xref: /freebsd/sys/dev/cxgbe/t4_main.c (revision 4133f23624058951a3b66e3ad735de980a485f36)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_ddb.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 #include "opt_ratelimit.h"
37 #include "opt_rss.h"
38 
39 #include <sys/param.h>
40 #include <sys/conf.h>
41 #include <sys/priv.h>
42 #include <sys/kernel.h>
43 #include <sys/bus.h>
44 #include <sys/module.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47 #include <sys/taskqueue.h>
48 #include <sys/pciio.h>
49 #include <dev/pci/pcireg.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pci_private.h>
52 #include <sys/firmware.h>
53 #include <sys/sbuf.h>
54 #include <sys/smp.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/if_dl.h>
62 #include <net/if_vlan_var.h>
63 #ifdef RSS
64 #include <net/rss_config.h>
65 #endif
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #if defined(__i386__) || defined(__amd64__)
69 #include <machine/md_var.h>
70 #include <machine/cputypes.h>
71 #include <vm/vm.h>
72 #include <vm/pmap.h>
73 #endif
74 #include <crypto/rijndael/rijndael.h>
75 #ifdef DDB
76 #include <ddb/ddb.h>
77 #include <ddb/db_lex.h>
78 #endif
79 
80 #include "common/common.h"
81 #include "common/t4_msg.h"
82 #include "common/t4_regs.h"
83 #include "common/t4_regs_values.h"
84 #include "cudbg/cudbg.h"
85 #include "t4_clip.h"
86 #include "t4_ioctl.h"
87 #include "t4_l2t.h"
88 #include "t4_mp_ring.h"
89 #include "t4_if.h"
90 #include "t4_smt.h"
91 
92 /* T4 bus driver interface */
93 static int t4_probe(device_t);
94 static int t4_attach(device_t);
95 static int t4_detach(device_t);
96 static int t4_child_location_str(device_t, device_t, char *, size_t);
97 static int t4_ready(device_t);
98 static int t4_read_port_device(device_t, int, device_t *);
99 static device_method_t t4_methods[] = {
100 	DEVMETHOD(device_probe,		t4_probe),
101 	DEVMETHOD(device_attach,	t4_attach),
102 	DEVMETHOD(device_detach,	t4_detach),
103 
104 	DEVMETHOD(bus_child_location_str, t4_child_location_str),
105 
106 	DEVMETHOD(t4_is_main_ready,	t4_ready),
107 	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
108 
109 	DEVMETHOD_END
110 };
111 static driver_t t4_driver = {
112 	"t4nex",
113 	t4_methods,
114 	sizeof(struct adapter)
115 };
116 
117 
118 /* T4 port (cxgbe) interface */
119 static int cxgbe_probe(device_t);
120 static int cxgbe_attach(device_t);
121 static int cxgbe_detach(device_t);
122 device_method_t cxgbe_methods[] = {
123 	DEVMETHOD(device_probe,		cxgbe_probe),
124 	DEVMETHOD(device_attach,	cxgbe_attach),
125 	DEVMETHOD(device_detach,	cxgbe_detach),
126 	{ 0, 0 }
127 };
128 static driver_t cxgbe_driver = {
129 	"cxgbe",
130 	cxgbe_methods,
131 	sizeof(struct port_info)
132 };
133 
134 /* T4 VI (vcxgbe) interface */
135 static int vcxgbe_probe(device_t);
136 static int vcxgbe_attach(device_t);
137 static int vcxgbe_detach(device_t);
138 static device_method_t vcxgbe_methods[] = {
139 	DEVMETHOD(device_probe,		vcxgbe_probe),
140 	DEVMETHOD(device_attach,	vcxgbe_attach),
141 	DEVMETHOD(device_detach,	vcxgbe_detach),
142 	{ 0, 0 }
143 };
144 static driver_t vcxgbe_driver = {
145 	"vcxgbe",
146 	vcxgbe_methods,
147 	sizeof(struct vi_info)
148 };
149 
150 static d_ioctl_t t4_ioctl;
151 
152 static struct cdevsw t4_cdevsw = {
153        .d_version = D_VERSION,
154        .d_ioctl = t4_ioctl,
155        .d_name = "t4nex",
156 };
157 
158 /* T5 bus driver interface */
159 static int t5_probe(device_t);
160 static device_method_t t5_methods[] = {
161 	DEVMETHOD(device_probe,		t5_probe),
162 	DEVMETHOD(device_attach,	t4_attach),
163 	DEVMETHOD(device_detach,	t4_detach),
164 
165 	DEVMETHOD(bus_child_location_str, t4_child_location_str),
166 
167 	DEVMETHOD(t4_is_main_ready,	t4_ready),
168 	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
169 
170 	DEVMETHOD_END
171 };
172 static driver_t t5_driver = {
173 	"t5nex",
174 	t5_methods,
175 	sizeof(struct adapter)
176 };
177 
178 
179 /* T5 port (cxl) interface */
180 static driver_t cxl_driver = {
181 	"cxl",
182 	cxgbe_methods,
183 	sizeof(struct port_info)
184 };
185 
186 /* T5 VI (vcxl) interface */
187 static driver_t vcxl_driver = {
188 	"vcxl",
189 	vcxgbe_methods,
190 	sizeof(struct vi_info)
191 };
192 
193 /* T6 bus driver interface */
194 static int t6_probe(device_t);
195 static device_method_t t6_methods[] = {
196 	DEVMETHOD(device_probe,		t6_probe),
197 	DEVMETHOD(device_attach,	t4_attach),
198 	DEVMETHOD(device_detach,	t4_detach),
199 
200 	DEVMETHOD(bus_child_location_str, t4_child_location_str),
201 
202 	DEVMETHOD(t4_is_main_ready,	t4_ready),
203 	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
204 
205 	DEVMETHOD_END
206 };
207 static driver_t t6_driver = {
208 	"t6nex",
209 	t6_methods,
210 	sizeof(struct adapter)
211 };
212 
213 
214 /* T6 port (cc) interface */
215 static driver_t cc_driver = {
216 	"cc",
217 	cxgbe_methods,
218 	sizeof(struct port_info)
219 };
220 
221 /* T6 VI (vcc) interface */
222 static driver_t vcc_driver = {
223 	"vcc",
224 	vcxgbe_methods,
225 	sizeof(struct vi_info)
226 };
227 
228 /* ifnet interface */
229 static void cxgbe_init(void *);
230 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
231 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
232 static void cxgbe_qflush(struct ifnet *);
233 #ifdef RATELIMIT
234 static int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *,
235     struct m_snd_tag **);
236 static int cxgbe_snd_tag_modify(struct m_snd_tag *,
237     union if_snd_tag_modify_params *);
238 static int cxgbe_snd_tag_query(struct m_snd_tag *,
239     union if_snd_tag_query_params *);
240 static void cxgbe_snd_tag_free(struct m_snd_tag *);
241 #endif
242 
243 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
244 
245 /*
246  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
247  * then ADAPTER_LOCK, then t4_uld_list_lock.
248  */
249 static struct sx t4_list_lock;
250 SLIST_HEAD(, adapter) t4_list;
251 #ifdef TCP_OFFLOAD
252 static struct sx t4_uld_list_lock;
253 SLIST_HEAD(, uld_info) t4_uld_list;
254 #endif
255 
256 /*
257  * Tunables.  See tweak_tunables() too.
258  *
259  * Each tunable is set to a default value here if it's known at compile-time.
260  * Otherwise it is set to -n as an indication to tweak_tunables() that it should
261  * provide a reasonable default (upto n) when the driver is loaded.
262  *
263  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
264  * T5 are under hw.cxl.
265  */
266 SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe(4) parameters");
267 SYSCTL_NODE(_hw, OID_AUTO, cxl, CTLFLAG_RD, 0, "cxgbe(4) T5+ parameters");
268 SYSCTL_NODE(_hw_cxgbe, OID_AUTO, toe, CTLFLAG_RD, 0, "cxgbe(4) TOE parameters");
269 
270 /*
271  * Number of queues for tx and rx, NIC and offload.
272  */
273 #define NTXQ 16
274 int t4_ntxq = -NTXQ;
275 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq, CTLFLAG_RDTUN, &t4_ntxq, 0,
276     "Number of TX queues per port");
277 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);	/* Old name, undocumented */
278 
279 #define NRXQ 8
280 int t4_nrxq = -NRXQ;
281 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq, CTLFLAG_RDTUN, &t4_nrxq, 0,
282     "Number of RX queues per port");
283 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);	/* Old name, undocumented */
284 
285 #define NTXQ_VI 1
286 static int t4_ntxq_vi = -NTXQ_VI;
287 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq_vi, CTLFLAG_RDTUN, &t4_ntxq_vi, 0,
288     "Number of TX queues per VI");
289 
290 #define NRXQ_VI 1
291 static int t4_nrxq_vi = -NRXQ_VI;
292 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq_vi, CTLFLAG_RDTUN, &t4_nrxq_vi, 0,
293     "Number of RX queues per VI");
294 
295 static int t4_rsrv_noflowq = 0;
296 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
297     0, "Reserve TX queue 0 of each VI for non-flowid packets");
298 
299 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
300 #define NOFLDTXQ 8
301 static int t4_nofldtxq = -NOFLDTXQ;
302 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq, CTLFLAG_RDTUN, &t4_nofldtxq, 0,
303     "Number of offload TX queues per port");
304 
305 #define NOFLDRXQ 2
306 static int t4_nofldrxq = -NOFLDRXQ;
307 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq, CTLFLAG_RDTUN, &t4_nofldrxq, 0,
308     "Number of offload RX queues per port");
309 
310 #define NOFLDTXQ_VI 1
311 static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
312 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq_vi, CTLFLAG_RDTUN, &t4_nofldtxq_vi, 0,
313     "Number of offload TX queues per VI");
314 
315 #define NOFLDRXQ_VI 1
316 static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
317 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq_vi, CTLFLAG_RDTUN, &t4_nofldrxq_vi, 0,
318     "Number of offload RX queues per VI");
319 
320 #define TMR_IDX_OFLD 1
321 int t4_tmr_idx_ofld = TMR_IDX_OFLD;
322 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_ofld, CTLFLAG_RDTUN,
323     &t4_tmr_idx_ofld, 0, "Holdoff timer index for offload queues");
324 
325 #define PKTC_IDX_OFLD (-1)
326 int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
327 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_ofld, CTLFLAG_RDTUN,
328     &t4_pktc_idx_ofld, 0, "holdoff packet counter index for offload queues");
329 
330 /* 0 means chip/fw default, non-zero number is value in microseconds */
331 static u_long t4_toe_keepalive_idle = 0;
332 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_idle, CTLFLAG_RDTUN,
333     &t4_toe_keepalive_idle, 0, "TOE keepalive idle timer (us)");
334 
335 /* 0 means chip/fw default, non-zero number is value in microseconds */
336 static u_long t4_toe_keepalive_interval = 0;
337 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_interval, CTLFLAG_RDTUN,
338     &t4_toe_keepalive_interval, 0, "TOE keepalive interval timer (us)");
339 
340 /* 0 means chip/fw default, non-zero number is # of keepalives before abort */
341 static int t4_toe_keepalive_count = 0;
342 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, keepalive_count, CTLFLAG_RDTUN,
343     &t4_toe_keepalive_count, 0, "Number of TOE keepalive probes before abort");
344 
345 /* 0 means chip/fw default, non-zero number is value in microseconds */
346 static u_long t4_toe_rexmt_min = 0;
347 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_min, CTLFLAG_RDTUN,
348     &t4_toe_rexmt_min, 0, "Minimum TOE retransmit interval (us)");
349 
350 /* 0 means chip/fw default, non-zero number is value in microseconds */
351 static u_long t4_toe_rexmt_max = 0;
352 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_max, CTLFLAG_RDTUN,
353     &t4_toe_rexmt_max, 0, "Maximum TOE retransmit interval (us)");
354 
355 /* 0 means chip/fw default, non-zero number is # of rexmt before abort */
356 static int t4_toe_rexmt_count = 0;
357 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, rexmt_count, CTLFLAG_RDTUN,
358     &t4_toe_rexmt_count, 0, "Number of TOE retransmissions before abort");
359 
360 /* -1 means chip/fw default, other values are raw backoff values to use */
361 static int t4_toe_rexmt_backoff[16] = {
362 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
363 };
364 SYSCTL_NODE(_hw_cxgbe_toe, OID_AUTO, rexmt_backoff, CTLFLAG_RD, 0,
365     "cxgbe(4) TOE retransmit backoff values");
366 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 0, CTLFLAG_RDTUN,
367     &t4_toe_rexmt_backoff[0], 0, "");
368 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 1, CTLFLAG_RDTUN,
369     &t4_toe_rexmt_backoff[1], 0, "");
370 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 2, CTLFLAG_RDTUN,
371     &t4_toe_rexmt_backoff[2], 0, "");
372 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 3, CTLFLAG_RDTUN,
373     &t4_toe_rexmt_backoff[3], 0, "");
374 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 4, CTLFLAG_RDTUN,
375     &t4_toe_rexmt_backoff[4], 0, "");
376 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 5, CTLFLAG_RDTUN,
377     &t4_toe_rexmt_backoff[5], 0, "");
378 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 6, CTLFLAG_RDTUN,
379     &t4_toe_rexmt_backoff[6], 0, "");
380 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 7, CTLFLAG_RDTUN,
381     &t4_toe_rexmt_backoff[7], 0, "");
382 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 8, CTLFLAG_RDTUN,
383     &t4_toe_rexmt_backoff[8], 0, "");
384 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 9, CTLFLAG_RDTUN,
385     &t4_toe_rexmt_backoff[9], 0, "");
386 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 10, CTLFLAG_RDTUN,
387     &t4_toe_rexmt_backoff[10], 0, "");
388 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 11, CTLFLAG_RDTUN,
389     &t4_toe_rexmt_backoff[11], 0, "");
390 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 12, CTLFLAG_RDTUN,
391     &t4_toe_rexmt_backoff[12], 0, "");
392 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 13, CTLFLAG_RDTUN,
393     &t4_toe_rexmt_backoff[13], 0, "");
394 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
395     &t4_toe_rexmt_backoff[14], 0, "");
396 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
397     &t4_toe_rexmt_backoff[15], 0, "");
398 #endif
399 
400 #ifdef DEV_NETMAP
401 #define NNMTXQ_VI 2
402 static int t4_nnmtxq_vi = -NNMTXQ_VI;
403 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq_vi, CTLFLAG_RDTUN, &t4_nnmtxq_vi, 0,
404     "Number of netmap TX queues per VI");
405 
406 #define NNMRXQ_VI 2
407 static int t4_nnmrxq_vi = -NNMRXQ_VI;
408 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq_vi, CTLFLAG_RDTUN, &t4_nnmrxq_vi, 0,
409     "Number of netmap RX queues per VI");
410 #endif
411 
412 /*
413  * Holdoff parameters for ports.
414  */
415 #define TMR_IDX 1
416 int t4_tmr_idx = TMR_IDX;
417 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx, CTLFLAG_RDTUN, &t4_tmr_idx,
418     0, "Holdoff timer index");
419 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);	/* Old name */
420 
421 #define PKTC_IDX (-1)
422 int t4_pktc_idx = PKTC_IDX;
423 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx, CTLFLAG_RDTUN, &t4_pktc_idx,
424     0, "Holdoff packet counter index");
425 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);	/* Old name */
426 
427 /*
428  * Size (# of entries) of each tx and rx queue.
429  */
430 unsigned int t4_qsize_txq = TX_EQ_QSIZE;
431 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, &t4_qsize_txq, 0,
432     "Number of descriptors in each TX queue");
433 
434 unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
435 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, &t4_qsize_rxq, 0,
436     "Number of descriptors in each RX queue");
437 
438 /*
439  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
440  */
441 int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
442 SYSCTL_INT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &t4_intr_types,
443     0, "Interrupt types allowed (bit 0 = INTx, 1 = MSI, 2 = MSI-X)");
444 
445 /*
446  * Configuration file.  All the _CF names here are special.
447  */
448 #define DEFAULT_CF	"default"
449 #define BUILTIN_CF	"built-in"
450 #define FLASH_CF	"flash"
451 #define UWIRE_CF	"uwire"
452 #define FPGA_CF		"fpga"
453 static char t4_cfg_file[32] = DEFAULT_CF;
454 SYSCTL_STRING(_hw_cxgbe, OID_AUTO, config_file, CTLFLAG_RDTUN, t4_cfg_file,
455     sizeof(t4_cfg_file), "Firmware configuration file");
456 
457 /*
458  * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
459  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
460  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
461  *            mark or when signalled to do so, 0 to never emit PAUSE.
462  * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
463  *                 negotiated settings will override rx_pause/tx_pause.
464  *                 Otherwise rx_pause/tx_pause are applied forcibly.
465  */
466 static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
467 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pause_settings, CTLFLAG_RDTUN,
468     &t4_pause_settings, 0,
469     "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
470 
471 /*
472  * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
473  * -1 to run with the firmware default.  Same as FEC_AUTO (bit 5)
474  *  0 to disable FEC.
475  */
476 static int t4_fec = -1;
477 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fec, CTLFLAG_RDTUN, &t4_fec, 0,
478     "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
479 
480 /*
481  * Link autonegotiation.
482  * -1 to run with the firmware default.
483  *  0 to disable.
484  *  1 to enable.
485  */
486 static int t4_autoneg = -1;
487 SYSCTL_INT(_hw_cxgbe, OID_AUTO, autoneg, CTLFLAG_RDTUN, &t4_autoneg, 0,
488     "Link autonegotiation");
489 
490 /*
491  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
492  * encouraged respectively).  '-n' is the same as 'n' except the firmware
493  * version used in the checks is read from the firmware bundled with the driver.
494  */
495 static int t4_fw_install = 1;
496 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fw_install, CTLFLAG_RDTUN, &t4_fw_install, 0,
497     "Firmware auto-install (0 = prohibited, 1 = allowed, 2 = encouraged)");
498 
499 /*
500  * ASIC features that will be used.  Disable the ones you don't want so that the
501  * chip resources aren't wasted on features that will not be used.
502  */
503 static int t4_nbmcaps_allowed = 0;
504 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nbmcaps_allowed, CTLFLAG_RDTUN,
505     &t4_nbmcaps_allowed, 0, "Default NBM capabilities");
506 
507 static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
508 SYSCTL_INT(_hw_cxgbe, OID_AUTO, linkcaps_allowed, CTLFLAG_RDTUN,
509     &t4_linkcaps_allowed, 0, "Default link capabilities");
510 
511 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
512     FW_CAPS_CONFIG_SWITCH_EGRESS;
513 SYSCTL_INT(_hw_cxgbe, OID_AUTO, switchcaps_allowed, CTLFLAG_RDTUN,
514     &t4_switchcaps_allowed, 0, "Default switch capabilities");
515 
516 #ifdef RATELIMIT
517 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
518 	FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
519 #else
520 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
521 	FW_CAPS_CONFIG_NIC_HASHFILTER;
522 #endif
523 SYSCTL_INT(_hw_cxgbe, OID_AUTO, niccaps_allowed, CTLFLAG_RDTUN,
524     &t4_niccaps_allowed, 0, "Default NIC capabilities");
525 
526 static int t4_toecaps_allowed = -1;
527 SYSCTL_INT(_hw_cxgbe, OID_AUTO, toecaps_allowed, CTLFLAG_RDTUN,
528     &t4_toecaps_allowed, 0, "Default TCP offload capabilities");
529 
530 static int t4_rdmacaps_allowed = -1;
531 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rdmacaps_allowed, CTLFLAG_RDTUN,
532     &t4_rdmacaps_allowed, 0, "Default RDMA capabilities");
533 
534 static int t4_cryptocaps_allowed = -1;
535 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cryptocaps_allowed, CTLFLAG_RDTUN,
536     &t4_cryptocaps_allowed, 0, "Default crypto capabilities");
537 
538 static int t4_iscsicaps_allowed = -1;
539 SYSCTL_INT(_hw_cxgbe, OID_AUTO, iscsicaps_allowed, CTLFLAG_RDTUN,
540     &t4_iscsicaps_allowed, 0, "Default iSCSI capabilities");
541 
542 static int t4_fcoecaps_allowed = 0;
543 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fcoecaps_allowed, CTLFLAG_RDTUN,
544     &t4_fcoecaps_allowed, 0, "Default FCoE capabilities");
545 
546 static int t5_write_combine = 0;
547 SYSCTL_INT(_hw_cxl, OID_AUTO, write_combine, CTLFLAG_RDTUN, &t5_write_combine,
548     0, "Use WC instead of UC for BAR2");
549 
550 static int t4_num_vis = 1;
551 SYSCTL_INT(_hw_cxgbe, OID_AUTO, num_vis, CTLFLAG_RDTUN, &t4_num_vis, 0,
552     "Number of VIs per port");
553 
554 /*
555  * PCIe Relaxed Ordering.
556  * -1: driver should figure out a good value.
557  * 0: disable RO.
558  * 1: enable RO.
559  * 2: leave RO alone.
560  */
561 static int pcie_relaxed_ordering = -1;
562 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pcie_relaxed_ordering, CTLFLAG_RDTUN,
563     &pcie_relaxed_ordering, 0,
564     "PCIe Relaxed Ordering: 0 = disable, 1 = enable, 2 = leave alone");
565 
566 static int t4_panic_on_fatal_err = 0;
567 SYSCTL_INT(_hw_cxgbe, OID_AUTO, panic_on_fatal_err, CTLFLAG_RDTUN,
568     &t4_panic_on_fatal_err, 0, "panic on fatal errors");
569 
570 #ifdef TCP_OFFLOAD
571 /*
572  * TOE tunables.
573  */
574 static int t4_cop_managed_offloading = 0;
575 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cop_managed_offloading, CTLFLAG_RDTUN,
576     &t4_cop_managed_offloading, 0,
577     "COP (Connection Offload Policy) controls all TOE offload");
578 #endif
579 
580 /* Functions used by VIs to obtain unique MAC addresses for each VI. */
581 static int vi_mac_funcs[] = {
582 	FW_VI_FUNC_ETH,
583 	FW_VI_FUNC_OFLD,
584 	FW_VI_FUNC_IWARP,
585 	FW_VI_FUNC_OPENISCSI,
586 	FW_VI_FUNC_OPENFCOE,
587 	FW_VI_FUNC_FOISCSI,
588 	FW_VI_FUNC_FOFCOE,
589 };
590 
591 struct intrs_and_queues {
592 	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
593 	uint16_t num_vis;	/* number of VIs for each port */
594 	uint16_t nirq;		/* Total # of vectors */
595 	uint16_t ntxq;		/* # of NIC txq's for each port */
596 	uint16_t nrxq;		/* # of NIC rxq's for each port */
597 	uint16_t nofldtxq;	/* # of TOE/ETHOFLD txq's for each port */
598 	uint16_t nofldrxq;	/* # of TOE rxq's for each port */
599 
600 	/* The vcxgbe/vcxl interfaces use these and not the ones above. */
601 	uint16_t ntxq_vi;	/* # of NIC txq's */
602 	uint16_t nrxq_vi;	/* # of NIC rxq's */
603 	uint16_t nofldtxq_vi;	/* # of TOE txq's */
604 	uint16_t nofldrxq_vi;	/* # of TOE rxq's */
605 	uint16_t nnmtxq_vi;	/* # of netmap txq's */
606 	uint16_t nnmrxq_vi;	/* # of netmap rxq's */
607 };
608 
609 static void setup_memwin(struct adapter *);
610 static void position_memwin(struct adapter *, int, uint32_t);
611 static int validate_mem_range(struct adapter *, uint32_t, uint32_t);
612 static int fwmtype_to_hwmtype(int);
613 static int validate_mt_off_len(struct adapter *, int, uint32_t, uint32_t,
614     uint32_t *);
615 static int fixup_devlog_params(struct adapter *);
616 static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
617 static int contact_firmware(struct adapter *);
618 static int partition_resources(struct adapter *);
619 static int get_params__pre_init(struct adapter *);
620 static int set_params__pre_init(struct adapter *);
621 static int get_params__post_init(struct adapter *);
622 static int set_params__post_init(struct adapter *);
623 static void t4_set_desc(struct adapter *);
624 static bool fixed_ifmedia(struct port_info *);
625 static void build_medialist(struct port_info *);
626 static void init_link_config(struct port_info *);
627 static int fixup_link_config(struct port_info *);
628 static int apply_link_config(struct port_info *);
629 static int cxgbe_init_synchronized(struct vi_info *);
630 static int cxgbe_uninit_synchronized(struct vi_info *);
631 static void quiesce_txq(struct adapter *, struct sge_txq *);
632 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
633 static void quiesce_iq(struct adapter *, struct sge_iq *);
634 static void quiesce_fl(struct adapter *, struct sge_fl *);
635 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
636     driver_intr_t *, void *, char *);
637 static int t4_free_irq(struct adapter *, struct irq *);
638 static void t4_init_atid_table(struct adapter *);
639 static void t4_free_atid_table(struct adapter *);
640 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
641 static void vi_refresh_stats(struct adapter *, struct vi_info *);
642 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
643 static void cxgbe_tick(void *);
644 static void cxgbe_sysctls(struct port_info *);
645 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
646 static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
647 static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
648 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
649 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
650 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
651 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
652 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
653 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
654 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
655 static int sysctl_fec(SYSCTL_HANDLER_ARGS);
656 static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
657 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
658 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
659 static int sysctl_vdd(SYSCTL_HANDLER_ARGS);
660 static int sysctl_loadavg(SYSCTL_HANDLER_ARGS);
661 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
662 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
663 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
664 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
665 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
666 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
667 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
668 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
669 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
670 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
671 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
672 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
673 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
674 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
675 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
676 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
677 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
678 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
679 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
680 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
681 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
682 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
683 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
684 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
685 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
686 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
687 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
688 static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
689 #ifdef TCP_OFFLOAD
690 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
691 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
692 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
693 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
694 static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
695 static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
696 static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
697 static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
698 #endif
699 static int get_sge_context(struct adapter *, struct t4_sge_context *);
700 static int load_fw(struct adapter *, struct t4_data *);
701 static int load_cfg(struct adapter *, struct t4_data *);
702 static int load_boot(struct adapter *, struct t4_bootrom *);
703 static int load_bootcfg(struct adapter *, struct t4_data *);
704 static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
705 static void free_offload_policy(struct t4_offload_policy *);
706 static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
707 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
708 static int read_i2c(struct adapter *, struct t4_i2c_data *);
709 static int clear_stats(struct adapter *, u_int);
710 #ifdef TCP_OFFLOAD
711 static int toe_capability(struct vi_info *, int);
712 #endif
713 static int mod_event(module_t, int, void *);
714 static int notify_siblings(device_t, int);
715 
716 struct {
717 	uint16_t device;
718 	char *desc;
719 } t4_pciids[] = {
720 	{0xa000, "Chelsio Terminator 4 FPGA"},
721 	{0x4400, "Chelsio T440-dbg"},
722 	{0x4401, "Chelsio T420-CR"},
723 	{0x4402, "Chelsio T422-CR"},
724 	{0x4403, "Chelsio T440-CR"},
725 	{0x4404, "Chelsio T420-BCH"},
726 	{0x4405, "Chelsio T440-BCH"},
727 	{0x4406, "Chelsio T440-CH"},
728 	{0x4407, "Chelsio T420-SO"},
729 	{0x4408, "Chelsio T420-CX"},
730 	{0x4409, "Chelsio T420-BT"},
731 	{0x440a, "Chelsio T404-BT"},
732 	{0x440e, "Chelsio T440-LP-CR"},
733 }, t5_pciids[] = {
734 	{0xb000, "Chelsio Terminator 5 FPGA"},
735 	{0x5400, "Chelsio T580-dbg"},
736 	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
737 	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
738 	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
739 	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
740 	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
741 	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
742 	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
743 	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
744 	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
745 	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
746 	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
747 	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
748 	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
749 	{0x5418,  "Chelsio T540-BT"},		/* 4 x 10GBaseT */
750 	{0x5419,  "Chelsio T540-LP-BT"},	/* 4 x 10GBaseT */
751 	{0x541a,  "Chelsio T540-SO-BT"},	/* 4 x 10GBaseT, nomem */
752 	{0x541b,  "Chelsio T540-SO-CR"},	/* 4 x 10G, nomem */
753 
754 	/* Custom */
755 	{0x5483, "Custom T540-CR"},
756 	{0x5484, "Custom T540-BT"},
757 }, t6_pciids[] = {
758 	{0xc006, "Chelsio Terminator 6 FPGA"},	/* T6 PE10K6 FPGA (PF0) */
759 	{0x6400, "Chelsio T6-DBG-25"},		/* 2 x 10/25G, debug */
760 	{0x6401, "Chelsio T6225-CR"},		/* 2 x 10/25G */
761 	{0x6402, "Chelsio T6225-SO-CR"},	/* 2 x 10/25G, nomem */
762 	{0x6403, "Chelsio T6425-CR"},		/* 4 x 10/25G */
763 	{0x6404, "Chelsio T6425-SO-CR"},	/* 4 x 10/25G, nomem */
764 	{0x6405, "Chelsio T6225-OCP-SO"},	/* 2 x 10/25G, nomem */
765 	{0x6406, "Chelsio T62100-OCP-SO"},	/* 2 x 40/50/100G, nomem */
766 	{0x6407, "Chelsio T62100-LP-CR"},	/* 2 x 40/50/100G */
767 	{0x6408, "Chelsio T62100-SO-CR"},	/* 2 x 40/50/100G, nomem */
768 	{0x6409, "Chelsio T6210-BT"},		/* 2 x 10GBASE-T */
769 	{0x640d, "Chelsio T62100-CR"},		/* 2 x 40/50/100G */
770 	{0x6410, "Chelsio T6-DBG-100"},		/* 2 x 40/50/100G, debug */
771 	{0x6411, "Chelsio T6225-LL-CR"},	/* 2 x 10/25G */
772 	{0x6414, "Chelsio T61100-OCP-SO"},	/* 1 x 40/50/100G, nomem */
773 	{0x6415, "Chelsio T6201-BT"},		/* 2 x 1000BASE-T */
774 
775 	/* Custom */
776 	{0x6480, "Custom T6225-CR"},
777 	{0x6481, "Custom T62100-CR"},
778 	{0x6482, "Custom T6225-CR"},
779 	{0x6483, "Custom T62100-CR"},
780 	{0x6484, "Custom T64100-CR"},
781 	{0x6485, "Custom T6240-SO"},
782 	{0x6486, "Custom T6225-SO-CR"},
783 	{0x6487, "Custom T6225-CR"},
784 };
785 
786 #ifdef TCP_OFFLOAD
787 /*
788  * service_iq_fl() has an iq and needs the fl.  Offset of fl from the iq should
789  * be exactly the same for both rxq and ofld_rxq.
790  */
791 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
792 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
793 #endif
794 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
795 
796 static int
797 t4_probe(device_t dev)
798 {
799 	int i;
800 	uint16_t v = pci_get_vendor(dev);
801 	uint16_t d = pci_get_device(dev);
802 	uint8_t f = pci_get_function(dev);
803 
804 	if (v != PCI_VENDOR_ID_CHELSIO)
805 		return (ENXIO);
806 
807 	/* Attach only to PF0 of the FPGA */
808 	if (d == 0xa000 && f != 0)
809 		return (ENXIO);
810 
811 	for (i = 0; i < nitems(t4_pciids); i++) {
812 		if (d == t4_pciids[i].device) {
813 			device_set_desc(dev, t4_pciids[i].desc);
814 			return (BUS_PROBE_DEFAULT);
815 		}
816 	}
817 
818 	return (ENXIO);
819 }
820 
821 static int
822 t5_probe(device_t dev)
823 {
824 	int i;
825 	uint16_t v = pci_get_vendor(dev);
826 	uint16_t d = pci_get_device(dev);
827 	uint8_t f = pci_get_function(dev);
828 
829 	if (v != PCI_VENDOR_ID_CHELSIO)
830 		return (ENXIO);
831 
832 	/* Attach only to PF0 of the FPGA */
833 	if (d == 0xb000 && f != 0)
834 		return (ENXIO);
835 
836 	for (i = 0; i < nitems(t5_pciids); i++) {
837 		if (d == t5_pciids[i].device) {
838 			device_set_desc(dev, t5_pciids[i].desc);
839 			return (BUS_PROBE_DEFAULT);
840 		}
841 	}
842 
843 	return (ENXIO);
844 }
845 
846 static int
847 t6_probe(device_t dev)
848 {
849 	int i;
850 	uint16_t v = pci_get_vendor(dev);
851 	uint16_t d = pci_get_device(dev);
852 
853 	if (v != PCI_VENDOR_ID_CHELSIO)
854 		return (ENXIO);
855 
856 	for (i = 0; i < nitems(t6_pciids); i++) {
857 		if (d == t6_pciids[i].device) {
858 			device_set_desc(dev, t6_pciids[i].desc);
859 			return (BUS_PROBE_DEFAULT);
860 		}
861 	}
862 
863 	return (ENXIO);
864 }
865 
866 static void
867 t5_attribute_workaround(device_t dev)
868 {
869 	device_t root_port;
870 	uint32_t v;
871 
872 	/*
873 	 * The T5 chips do not properly echo the No Snoop and Relaxed
874 	 * Ordering attributes when replying to a TLP from a Root
875 	 * Port.  As a workaround, find the parent Root Port and
876 	 * disable No Snoop and Relaxed Ordering.  Note that this
877 	 * affects all devices under this root port.
878 	 */
879 	root_port = pci_find_pcie_root_port(dev);
880 	if (root_port == NULL) {
881 		device_printf(dev, "Unable to find parent root port\n");
882 		return;
883 	}
884 
885 	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
886 	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
887 	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
888 	    0)
889 		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
890 		    device_get_nameunit(root_port));
891 }
892 
893 static const struct devnames devnames[] = {
894 	{
895 		.nexus_name = "t4nex",
896 		.ifnet_name = "cxgbe",
897 		.vi_ifnet_name = "vcxgbe",
898 		.pf03_drv_name = "t4iov",
899 		.vf_nexus_name = "t4vf",
900 		.vf_ifnet_name = "cxgbev"
901 	}, {
902 		.nexus_name = "t5nex",
903 		.ifnet_name = "cxl",
904 		.vi_ifnet_name = "vcxl",
905 		.pf03_drv_name = "t5iov",
906 		.vf_nexus_name = "t5vf",
907 		.vf_ifnet_name = "cxlv"
908 	}, {
909 		.nexus_name = "t6nex",
910 		.ifnet_name = "cc",
911 		.vi_ifnet_name = "vcc",
912 		.pf03_drv_name = "t6iov",
913 		.vf_nexus_name = "t6vf",
914 		.vf_ifnet_name = "ccv"
915 	}
916 };
917 
918 void
919 t4_init_devnames(struct adapter *sc)
920 {
921 	int id;
922 
923 	id = chip_id(sc);
924 	if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
925 		sc->names = &devnames[id - CHELSIO_T4];
926 	else {
927 		device_printf(sc->dev, "chip id %d is not supported.\n", id);
928 		sc->names = NULL;
929 	}
930 }
931 
932 static int
933 t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
934 {
935 	const char *parent, *name;
936 	long value;
937 	int line, unit;
938 
939 	line = 0;
940 	parent = device_get_nameunit(sc->dev);
941 	name = sc->names->ifnet_name;
942 	while (resource_find_dev(&line, name, &unit, "at", parent) == 0) {
943 		if (resource_long_value(name, unit, "port", &value) == 0 &&
944 		    value == pi->port_id)
945 			return (unit);
946 	}
947 	return (-1);
948 }
949 
950 static int
951 t4_attach(device_t dev)
952 {
953 	struct adapter *sc;
954 	int rc = 0, i, j, rqidx, tqidx, nports;
955 	struct make_dev_args mda;
956 	struct intrs_and_queues iaq;
957 	struct sge *s;
958 	uint32_t *buf;
959 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
960 	int ofld_tqidx;
961 #endif
962 #ifdef TCP_OFFLOAD
963 	int ofld_rqidx;
964 #endif
965 #ifdef DEV_NETMAP
966 	int nm_rqidx, nm_tqidx;
967 #endif
968 	int num_vis;
969 
970 	sc = device_get_softc(dev);
971 	sc->dev = dev;
972 	TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
973 
974 	if ((pci_get_device(dev) & 0xff00) == 0x5400)
975 		t5_attribute_workaround(dev);
976 	pci_enable_busmaster(dev);
977 	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
978 		uint32_t v;
979 
980 		pci_set_max_read_req(dev, 4096);
981 		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
982 		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
983 		if (pcie_relaxed_ordering == 0 &&
984 		    (v & PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
985 			v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
986 			pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
987 		} else if (pcie_relaxed_ordering == 1 &&
988 		    (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
989 			v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
990 			pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
991 		}
992 	}
993 
994 	sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
995 	sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
996 	sc->traceq = -1;
997 	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
998 	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
999 	    device_get_nameunit(dev));
1000 
1001 	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
1002 	    device_get_nameunit(dev));
1003 	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
1004 	t4_add_adapter(sc);
1005 
1006 	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
1007 	TAILQ_INIT(&sc->sfl);
1008 	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
1009 
1010 	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
1011 
1012 	sc->policy = NULL;
1013 	rw_init(&sc->policy_lock, "connection offload policy");
1014 
1015 	rc = t4_map_bars_0_and_4(sc);
1016 	if (rc != 0)
1017 		goto done; /* error message displayed already */
1018 
1019 	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
1020 
1021 	/* Prepare the adapter for operation. */
1022 	buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
1023 	rc = -t4_prep_adapter(sc, buf);
1024 	free(buf, M_CXGBE);
1025 	if (rc != 0) {
1026 		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
1027 		goto done;
1028 	}
1029 
1030 	/*
1031 	 * This is the real PF# to which we're attaching.  Works from within PCI
1032 	 * passthrough environments too, where pci_get_function() could return a
1033 	 * different PF# depending on the passthrough configuration.  We need to
1034 	 * use the real PF# in all our communication with the firmware.
1035 	 */
1036 	j = t4_read_reg(sc, A_PL_WHOAMI);
1037 	sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
1038 	sc->mbox = sc->pf;
1039 
1040 	t4_init_devnames(sc);
1041 	if (sc->names == NULL) {
1042 		rc = ENOTSUP;
1043 		goto done; /* error message displayed already */
1044 	}
1045 
1046 	/*
1047 	 * Do this really early, with the memory windows set up even before the
1048 	 * character device.  The userland tool's register i/o and mem read
1049 	 * will work even in "recovery mode".
1050 	 */
1051 	setup_memwin(sc);
1052 	if (t4_init_devlog_params(sc, 0) == 0)
1053 		fixup_devlog_params(sc);
1054 	make_dev_args_init(&mda);
1055 	mda.mda_devsw = &t4_cdevsw;
1056 	mda.mda_uid = UID_ROOT;
1057 	mda.mda_gid = GID_WHEEL;
1058 	mda.mda_mode = 0600;
1059 	mda.mda_si_drv1 = sc;
1060 	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
1061 	if (rc != 0)
1062 		device_printf(dev, "failed to create nexus char device: %d.\n",
1063 		    rc);
1064 
1065 	/* Go no further if recovery mode has been requested. */
1066 	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
1067 		device_printf(dev, "recovery mode.\n");
1068 		goto done;
1069 	}
1070 
1071 #if defined(__i386__)
1072 	if ((cpu_feature & CPUID_CX8) == 0) {
1073 		device_printf(dev, "64 bit atomics not available.\n");
1074 		rc = ENOTSUP;
1075 		goto done;
1076 	}
1077 #endif
1078 
1079 	/* Contact the firmware and try to become the master driver. */
1080 	rc = contact_firmware(sc);
1081 	if (rc != 0)
1082 		goto done; /* error message displayed already */
1083 	MPASS(sc->flags & FW_OK);
1084 
1085 	rc = get_params__pre_init(sc);
1086 	if (rc != 0)
1087 		goto done; /* error message displayed already */
1088 
1089 	if (sc->flags & MASTER_PF) {
1090 		rc = partition_resources(sc);
1091 		if (rc != 0)
1092 			goto done; /* error message displayed already */
1093 		t4_intr_clear(sc);
1094 	}
1095 
1096 	rc = get_params__post_init(sc);
1097 	if (rc != 0)
1098 		goto done; /* error message displayed already */
1099 
1100 	rc = set_params__post_init(sc);
1101 	if (rc != 0)
1102 		goto done; /* error message displayed already */
1103 
1104 	rc = t4_map_bar_2(sc);
1105 	if (rc != 0)
1106 		goto done; /* error message displayed already */
1107 
1108 	rc = t4_create_dma_tag(sc);
1109 	if (rc != 0)
1110 		goto done; /* error message displayed already */
1111 
1112 	/*
1113 	 * First pass over all the ports - allocate VIs and initialize some
1114 	 * basic parameters like mac address, port type, etc.
1115 	 */
1116 	for_each_port(sc, i) {
1117 		struct port_info *pi;
1118 
1119 		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
1120 		sc->port[i] = pi;
1121 
1122 		/* These must be set before t4_port_init */
1123 		pi->adapter = sc;
1124 		pi->port_id = i;
1125 		/*
1126 		 * XXX: vi[0] is special so we can't delay this allocation until
1127 		 * pi->nvi's final value is known.
1128 		 */
1129 		pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
1130 		    M_ZERO | M_WAITOK);
1131 
1132 		/*
1133 		 * Allocate the "main" VI and initialize parameters
1134 		 * like mac addr.
1135 		 */
1136 		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
1137 		if (rc != 0) {
1138 			device_printf(dev, "unable to initialize port %d: %d\n",
1139 			    i, rc);
1140 			free(pi->vi, M_CXGBE);
1141 			free(pi, M_CXGBE);
1142 			sc->port[i] = NULL;
1143 			goto done;
1144 		}
1145 
1146 		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1147 		    device_get_nameunit(dev), i);
1148 		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1149 		sc->chan_map[pi->tx_chan] = i;
1150 
1151 		/* All VIs on this port share this media. */
1152 		ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1153 		    cxgbe_media_status);
1154 
1155 		PORT_LOCK(pi);
1156 		init_link_config(pi);
1157 		fixup_link_config(pi);
1158 		build_medialist(pi);
1159 		if (fixed_ifmedia(pi))
1160 			pi->flags |= FIXED_IFMEDIA;
1161 		PORT_UNLOCK(pi);
1162 
1163 		pi->dev = device_add_child(dev, sc->names->ifnet_name,
1164 		    t4_ifnet_unit(sc, pi));
1165 		if (pi->dev == NULL) {
1166 			device_printf(dev,
1167 			    "failed to add device for port %d.\n", i);
1168 			rc = ENXIO;
1169 			goto done;
1170 		}
1171 		pi->vi[0].dev = pi->dev;
1172 		device_set_softc(pi->dev, pi);
1173 	}
1174 
1175 	/*
1176 	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1177 	 */
1178 	nports = sc->params.nports;
1179 	rc = cfg_itype_and_nqueues(sc, &iaq);
1180 	if (rc != 0)
1181 		goto done; /* error message displayed already */
1182 
1183 	num_vis = iaq.num_vis;
1184 	sc->intr_type = iaq.intr_type;
1185 	sc->intr_count = iaq.nirq;
1186 
1187 	s = &sc->sge;
1188 	s->nrxq = nports * iaq.nrxq;
1189 	s->ntxq = nports * iaq.ntxq;
1190 	if (num_vis > 1) {
1191 		s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1192 		s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1193 	}
1194 	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
1195 	s->neq += nports;		/* ctrl queues: 1 per port */
1196 	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
1197 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1198 	if (is_offload(sc) || is_ethoffload(sc)) {
1199 		s->nofldtxq = nports * iaq.nofldtxq;
1200 		if (num_vis > 1)
1201 			s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1202 		s->neq += s->nofldtxq;
1203 
1204 		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1205 		    M_CXGBE, M_ZERO | M_WAITOK);
1206 	}
1207 #endif
1208 #ifdef TCP_OFFLOAD
1209 	if (is_offload(sc)) {
1210 		s->nofldrxq = nports * iaq.nofldrxq;
1211 		if (num_vis > 1)
1212 			s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1213 		s->neq += s->nofldrxq;	/* free list */
1214 		s->niq += s->nofldrxq;
1215 
1216 		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1217 		    M_CXGBE, M_ZERO | M_WAITOK);
1218 	}
1219 #endif
1220 #ifdef DEV_NETMAP
1221 	if (num_vis > 1) {
1222 		s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi;
1223 		s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi;
1224 	}
1225 	s->neq += s->nnmtxq + s->nnmrxq;
1226 	s->niq += s->nnmrxq;
1227 
1228 	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1229 	    M_CXGBE, M_ZERO | M_WAITOK);
1230 	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1231 	    M_CXGBE, M_ZERO | M_WAITOK);
1232 #endif
1233 
1234 	s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1235 	    M_ZERO | M_WAITOK);
1236 	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1237 	    M_ZERO | M_WAITOK);
1238 	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1239 	    M_ZERO | M_WAITOK);
1240 	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
1241 	    M_ZERO | M_WAITOK);
1242 	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
1243 	    M_ZERO | M_WAITOK);
1244 
1245 	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1246 	    M_ZERO | M_WAITOK);
1247 
1248 	t4_init_l2t(sc, M_WAITOK);
1249 	t4_init_smt(sc, M_WAITOK);
1250 	t4_init_tx_sched(sc);
1251 	t4_init_atid_table(sc);
1252 #ifdef RATELIMIT
1253 	t4_init_etid_table(sc);
1254 #endif
1255 #ifdef INET6
1256 	t4_init_clip_table(sc);
1257 #endif
1258 	if (sc->vres.key.size != 0)
1259 		sc->key_map = vmem_create("T4TLS key map", sc->vres.key.start,
1260 		    sc->vres.key.size, 32, 0, M_FIRSTFIT | M_WAITOK);
1261 
1262 	/*
1263 	 * Second pass over the ports.  This time we know the number of rx and
1264 	 * tx queues that each port should get.
1265 	 */
1266 	rqidx = tqidx = 0;
1267 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1268 	ofld_tqidx = 0;
1269 #endif
1270 #ifdef TCP_OFFLOAD
1271 	ofld_rqidx = 0;
1272 #endif
1273 #ifdef DEV_NETMAP
1274 	nm_rqidx = nm_tqidx = 0;
1275 #endif
1276 	for_each_port(sc, i) {
1277 		struct port_info *pi = sc->port[i];
1278 		struct vi_info *vi;
1279 
1280 		if (pi == NULL)
1281 			continue;
1282 
1283 		pi->nvi = num_vis;
1284 		for_each_vi(pi, j, vi) {
1285 			vi->pi = pi;
1286 			vi->qsize_rxq = t4_qsize_rxq;
1287 			vi->qsize_txq = t4_qsize_txq;
1288 
1289 			vi->first_rxq = rqidx;
1290 			vi->first_txq = tqidx;
1291 			vi->tmr_idx = t4_tmr_idx;
1292 			vi->pktc_idx = t4_pktc_idx;
1293 			vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1294 			vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1295 
1296 			rqidx += vi->nrxq;
1297 			tqidx += vi->ntxq;
1298 
1299 			if (j == 0 && vi->ntxq > 1)
1300 				vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1301 			else
1302 				vi->rsrv_noflowq = 0;
1303 
1304 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1305 			vi->first_ofld_txq = ofld_tqidx;
1306 			vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1307 			ofld_tqidx += vi->nofldtxq;
1308 #endif
1309 #ifdef TCP_OFFLOAD
1310 			vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1311 			vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1312 			vi->first_ofld_rxq = ofld_rqidx;
1313 			vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1314 
1315 			ofld_rqidx += vi->nofldrxq;
1316 #endif
1317 #ifdef DEV_NETMAP
1318 			if (j > 0) {
1319 				vi->first_nm_rxq = nm_rqidx;
1320 				vi->first_nm_txq = nm_tqidx;
1321 				vi->nnmrxq = iaq.nnmrxq_vi;
1322 				vi->nnmtxq = iaq.nnmtxq_vi;
1323 				nm_rqidx += vi->nnmrxq;
1324 				nm_tqidx += vi->nnmtxq;
1325 			}
1326 #endif
1327 		}
1328 	}
1329 
1330 	rc = t4_setup_intr_handlers(sc);
1331 	if (rc != 0) {
1332 		device_printf(dev,
1333 		    "failed to setup interrupt handlers: %d\n", rc);
1334 		goto done;
1335 	}
1336 
1337 	rc = bus_generic_probe(dev);
1338 	if (rc != 0) {
1339 		device_printf(dev, "failed to probe child drivers: %d\n", rc);
1340 		goto done;
1341 	}
1342 
1343 	/*
1344 	 * Ensure thread-safe mailbox access (in debug builds).
1345 	 *
1346 	 * So far this was the only thread accessing the mailbox but various
1347 	 * ifnets and sysctls are about to be created and their handlers/ioctls
1348 	 * will access the mailbox from different threads.
1349 	 */
1350 	sc->flags |= CHK_MBOX_ACCESS;
1351 
1352 	rc = bus_generic_attach(dev);
1353 	if (rc != 0) {
1354 		device_printf(dev,
1355 		    "failed to attach all child ports: %d\n", rc);
1356 		goto done;
1357 	}
1358 
1359 	device_printf(dev,
1360 	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1361 	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1362 	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1363 	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1364 	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1365 
1366 	t4_set_desc(sc);
1367 
1368 	notify_siblings(dev, 0);
1369 
1370 done:
1371 	if (rc != 0 && sc->cdev) {
1372 		/* cdev was created and so cxgbetool works; recover that way. */
1373 		device_printf(dev,
1374 		    "error during attach, adapter is now in recovery mode.\n");
1375 		rc = 0;
1376 	}
1377 
1378 	if (rc != 0)
1379 		t4_detach_common(dev);
1380 	else
1381 		t4_sysctls(sc);
1382 
1383 	return (rc);
1384 }
1385 
1386 static int
1387 t4_child_location_str(device_t bus, device_t dev, char *buf, size_t buflen)
1388 {
1389 	struct adapter *sc;
1390 	struct port_info *pi;
1391 	int i;
1392 
1393 	sc = device_get_softc(bus);
1394 	buf[0] = '\0';
1395 	for_each_port(sc, i) {
1396 		pi = sc->port[i];
1397 		if (pi != NULL && pi->dev == dev) {
1398 			snprintf(buf, buflen, "port=%d", pi->port_id);
1399 			break;
1400 		}
1401 	}
1402 	return (0);
1403 }
1404 
1405 static int
1406 t4_ready(device_t dev)
1407 {
1408 	struct adapter *sc;
1409 
1410 	sc = device_get_softc(dev);
1411 	if (sc->flags & FW_OK)
1412 		return (0);
1413 	return (ENXIO);
1414 }
1415 
1416 static int
1417 t4_read_port_device(device_t dev, int port, device_t *child)
1418 {
1419 	struct adapter *sc;
1420 	struct port_info *pi;
1421 
1422 	sc = device_get_softc(dev);
1423 	if (port < 0 || port >= MAX_NPORTS)
1424 		return (EINVAL);
1425 	pi = sc->port[port];
1426 	if (pi == NULL || pi->dev == NULL)
1427 		return (ENXIO);
1428 	*child = pi->dev;
1429 	return (0);
1430 }
1431 
1432 static int
1433 notify_siblings(device_t dev, int detaching)
1434 {
1435 	device_t sibling;
1436 	int error, i;
1437 
1438 	error = 0;
1439 	for (i = 0; i < PCI_FUNCMAX; i++) {
1440 		if (i == pci_get_function(dev))
1441 			continue;
1442 		sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1443 		    pci_get_slot(dev), i);
1444 		if (sibling == NULL || !device_is_attached(sibling))
1445 			continue;
1446 		if (detaching)
1447 			error = T4_DETACH_CHILD(sibling);
1448 		else
1449 			(void)T4_ATTACH_CHILD(sibling);
1450 		if (error)
1451 			break;
1452 	}
1453 	return (error);
1454 }
1455 
1456 /*
1457  * Idempotent
1458  */
1459 static int
1460 t4_detach(device_t dev)
1461 {
1462 	struct adapter *sc;
1463 	int rc;
1464 
1465 	sc = device_get_softc(dev);
1466 
1467 	rc = notify_siblings(dev, 1);
1468 	if (rc) {
1469 		device_printf(dev,
1470 		    "failed to detach sibling devices: %d\n", rc);
1471 		return (rc);
1472 	}
1473 
1474 	return (t4_detach_common(dev));
1475 }
1476 
1477 int
1478 t4_detach_common(device_t dev)
1479 {
1480 	struct adapter *sc;
1481 	struct port_info *pi;
1482 	int i, rc;
1483 
1484 	sc = device_get_softc(dev);
1485 
1486 	if (sc->cdev) {
1487 		destroy_dev(sc->cdev);
1488 		sc->cdev = NULL;
1489 	}
1490 
1491 	sx_xlock(&t4_list_lock);
1492 	SLIST_REMOVE(&t4_list, sc, adapter, link);
1493 	sx_xunlock(&t4_list_lock);
1494 
1495 	sc->flags &= ~CHK_MBOX_ACCESS;
1496 	if (sc->flags & FULL_INIT_DONE) {
1497 		if (!(sc->flags & IS_VF))
1498 			t4_intr_disable(sc);
1499 	}
1500 
1501 	if (device_is_attached(dev)) {
1502 		rc = bus_generic_detach(dev);
1503 		if (rc) {
1504 			device_printf(dev,
1505 			    "failed to detach child devices: %d\n", rc);
1506 			return (rc);
1507 		}
1508 	}
1509 
1510 	for (i = 0; i < sc->intr_count; i++)
1511 		t4_free_irq(sc, &sc->irq[i]);
1512 
1513 	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1514 		t4_free_tx_sched(sc);
1515 
1516 	for (i = 0; i < MAX_NPORTS; i++) {
1517 		pi = sc->port[i];
1518 		if (pi) {
1519 			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1520 			if (pi->dev)
1521 				device_delete_child(dev, pi->dev);
1522 
1523 			mtx_destroy(&pi->pi_lock);
1524 			free(pi->vi, M_CXGBE);
1525 			free(pi, M_CXGBE);
1526 		}
1527 	}
1528 
1529 	device_delete_children(dev);
1530 
1531 	if (sc->flags & FULL_INIT_DONE)
1532 		adapter_full_uninit(sc);
1533 
1534 	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1535 		t4_fw_bye(sc, sc->mbox);
1536 
1537 	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1538 		pci_release_msi(dev);
1539 
1540 	if (sc->regs_res)
1541 		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1542 		    sc->regs_res);
1543 
1544 	if (sc->udbs_res)
1545 		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1546 		    sc->udbs_res);
1547 
1548 	if (sc->msix_res)
1549 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1550 		    sc->msix_res);
1551 
1552 	if (sc->l2t)
1553 		t4_free_l2t(sc->l2t);
1554 	if (sc->smt)
1555 		t4_free_smt(sc->smt);
1556 	t4_free_atid_table(sc);
1557 #ifdef RATELIMIT
1558 	t4_free_etid_table(sc);
1559 #endif
1560 	if (sc->key_map)
1561 		vmem_destroy(sc->key_map);
1562 #ifdef INET6
1563 	t4_destroy_clip_table(sc);
1564 #endif
1565 
1566 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1567 	free(sc->sge.ofld_txq, M_CXGBE);
1568 #endif
1569 #ifdef TCP_OFFLOAD
1570 	free(sc->sge.ofld_rxq, M_CXGBE);
1571 #endif
1572 #ifdef DEV_NETMAP
1573 	free(sc->sge.nm_rxq, M_CXGBE);
1574 	free(sc->sge.nm_txq, M_CXGBE);
1575 #endif
1576 	free(sc->irq, M_CXGBE);
1577 	free(sc->sge.rxq, M_CXGBE);
1578 	free(sc->sge.txq, M_CXGBE);
1579 	free(sc->sge.ctrlq, M_CXGBE);
1580 	free(sc->sge.iqmap, M_CXGBE);
1581 	free(sc->sge.eqmap, M_CXGBE);
1582 	free(sc->tids.ftid_tab, M_CXGBE);
1583 	free(sc->tids.hpftid_tab, M_CXGBE);
1584 	free_hftid_hash(&sc->tids);
1585 	free(sc->tids.tid_tab, M_CXGBE);
1586 	free(sc->tt.tls_rx_ports, M_CXGBE);
1587 	t4_destroy_dma_tag(sc);
1588 
1589 	callout_drain(&sc->sfl_callout);
1590 	if (mtx_initialized(&sc->tids.ftid_lock)) {
1591 		mtx_destroy(&sc->tids.ftid_lock);
1592 		cv_destroy(&sc->tids.ftid_cv);
1593 	}
1594 	if (mtx_initialized(&sc->tids.atid_lock))
1595 		mtx_destroy(&sc->tids.atid_lock);
1596 	if (mtx_initialized(&sc->ifp_lock))
1597 		mtx_destroy(&sc->ifp_lock);
1598 
1599 	if (rw_initialized(&sc->policy_lock)) {
1600 		rw_destroy(&sc->policy_lock);
1601 #ifdef TCP_OFFLOAD
1602 		if (sc->policy != NULL)
1603 			free_offload_policy(sc->policy);
1604 #endif
1605 	}
1606 
1607 	for (i = 0; i < NUM_MEMWIN; i++) {
1608 		struct memwin *mw = &sc->memwin[i];
1609 
1610 		if (rw_initialized(&mw->mw_lock))
1611 			rw_destroy(&mw->mw_lock);
1612 	}
1613 
1614 	mtx_destroy(&sc->sfl_lock);
1615 	mtx_destroy(&sc->reg_lock);
1616 	mtx_destroy(&sc->sc_lock);
1617 
1618 	bzero(sc, sizeof(*sc));
1619 
1620 	return (0);
1621 }
1622 
1623 static int
1624 cxgbe_probe(device_t dev)
1625 {
1626 	char buf[128];
1627 	struct port_info *pi = device_get_softc(dev);
1628 
1629 	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1630 	device_set_desc_copy(dev, buf);
1631 
1632 	return (BUS_PROBE_DEFAULT);
1633 }
1634 
1635 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1636     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1637     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS | \
1638     IFCAP_HWRXTSTMP | IFCAP_NOMAP)
1639 #define T4_CAP_ENABLE (T4_CAP)
1640 
1641 static int
1642 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1643 {
1644 	struct ifnet *ifp;
1645 	struct sbuf *sb;
1646 
1647 	vi->xact_addr_filt = -1;
1648 	callout_init(&vi->tick, 1);
1649 
1650 	/* Allocate an ifnet and set it up */
1651 	ifp = if_alloc_dev(IFT_ETHER, dev);
1652 	if (ifp == NULL) {
1653 		device_printf(dev, "Cannot allocate ifnet\n");
1654 		return (ENOMEM);
1655 	}
1656 	vi->ifp = ifp;
1657 	ifp->if_softc = vi;
1658 
1659 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1660 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1661 
1662 	ifp->if_init = cxgbe_init;
1663 	ifp->if_ioctl = cxgbe_ioctl;
1664 	ifp->if_transmit = cxgbe_transmit;
1665 	ifp->if_qflush = cxgbe_qflush;
1666 	ifp->if_get_counter = cxgbe_get_counter;
1667 #ifdef RATELIMIT
1668 	ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc;
1669 	ifp->if_snd_tag_modify = cxgbe_snd_tag_modify;
1670 	ifp->if_snd_tag_query = cxgbe_snd_tag_query;
1671 	ifp->if_snd_tag_free = cxgbe_snd_tag_free;
1672 	ifp->if_ratelimit_query = cxgbe_ratelimit_query;
1673 #endif
1674 
1675 	ifp->if_capabilities = T4_CAP;
1676 	ifp->if_capenable = T4_CAP_ENABLE;
1677 #ifdef TCP_OFFLOAD
1678 	if (vi->nofldrxq != 0)
1679 		ifp->if_capabilities |= IFCAP_TOE;
1680 #endif
1681 #ifdef RATELIMIT
1682 	if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0) {
1683 		ifp->if_capabilities |= IFCAP_TXRTLMT;
1684 		ifp->if_capenable |= IFCAP_TXRTLMT;
1685 	}
1686 #endif
1687 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1688 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1689 
1690 	ifp->if_hw_tsomax = IP_MAXPACKET;
1691 	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
1692 #ifdef RATELIMIT
1693 	if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0)
1694 		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
1695 #endif
1696 	ifp->if_hw_tsomaxsegsize = 65536;
1697 
1698 	ether_ifattach(ifp, vi->hw_addr);
1699 #ifdef DEV_NETMAP
1700 	if (vi->nnmrxq != 0)
1701 		cxgbe_nm_attach(vi);
1702 #endif
1703 	sb = sbuf_new_auto();
1704 	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1705 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1706 	switch (ifp->if_capabilities & (IFCAP_TOE | IFCAP_TXRTLMT)) {
1707 	case IFCAP_TOE:
1708 		sbuf_printf(sb, "; %d txq (TOE)", vi->nofldtxq);
1709 		break;
1710 	case IFCAP_TOE | IFCAP_TXRTLMT:
1711 		sbuf_printf(sb, "; %d txq (TOE/ETHOFLD)", vi->nofldtxq);
1712 		break;
1713 	case IFCAP_TXRTLMT:
1714 		sbuf_printf(sb, "; %d txq (ETHOFLD)", vi->nofldtxq);
1715 		break;
1716 	}
1717 #endif
1718 #ifdef TCP_OFFLOAD
1719 	if (ifp->if_capabilities & IFCAP_TOE)
1720 		sbuf_printf(sb, ", %d rxq (TOE)", vi->nofldrxq);
1721 #endif
1722 #ifdef DEV_NETMAP
1723 	if (ifp->if_capabilities & IFCAP_NETMAP)
1724 		sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1725 		    vi->nnmtxq, vi->nnmrxq);
1726 #endif
1727 	sbuf_finish(sb);
1728 	device_printf(dev, "%s\n", sbuf_data(sb));
1729 	sbuf_delete(sb);
1730 
1731 	vi_sysctls(vi);
1732 
1733 	return (0);
1734 }
1735 
1736 static int
1737 cxgbe_attach(device_t dev)
1738 {
1739 	struct port_info *pi = device_get_softc(dev);
1740 	struct adapter *sc = pi->adapter;
1741 	struct vi_info *vi;
1742 	int i, rc;
1743 
1744 	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1745 
1746 	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1747 	if (rc)
1748 		return (rc);
1749 
1750 	for_each_vi(pi, i, vi) {
1751 		if (i == 0)
1752 			continue;
1753 		vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1754 		if (vi->dev == NULL) {
1755 			device_printf(dev, "failed to add VI %d\n", i);
1756 			continue;
1757 		}
1758 		device_set_softc(vi->dev, vi);
1759 	}
1760 
1761 	cxgbe_sysctls(pi);
1762 
1763 	bus_generic_attach(dev);
1764 
1765 	return (0);
1766 }
1767 
1768 static void
1769 cxgbe_vi_detach(struct vi_info *vi)
1770 {
1771 	struct ifnet *ifp = vi->ifp;
1772 
1773 	ether_ifdetach(ifp);
1774 
1775 	/* Let detach proceed even if these fail. */
1776 #ifdef DEV_NETMAP
1777 	if (ifp->if_capabilities & IFCAP_NETMAP)
1778 		cxgbe_nm_detach(vi);
1779 #endif
1780 	cxgbe_uninit_synchronized(vi);
1781 	callout_drain(&vi->tick);
1782 	vi_full_uninit(vi);
1783 
1784 	if_free(vi->ifp);
1785 	vi->ifp = NULL;
1786 }
1787 
1788 static int
1789 cxgbe_detach(device_t dev)
1790 {
1791 	struct port_info *pi = device_get_softc(dev);
1792 	struct adapter *sc = pi->adapter;
1793 	int rc;
1794 
1795 	/* Detach the extra VIs first. */
1796 	rc = bus_generic_detach(dev);
1797 	if (rc)
1798 		return (rc);
1799 	device_delete_children(dev);
1800 
1801 	doom_vi(sc, &pi->vi[0]);
1802 
1803 	if (pi->flags & HAS_TRACEQ) {
1804 		sc->traceq = -1;	/* cloner should not create ifnet */
1805 		t4_tracer_port_detach(sc);
1806 	}
1807 
1808 	cxgbe_vi_detach(&pi->vi[0]);
1809 	callout_drain(&pi->tick);
1810 	ifmedia_removeall(&pi->media);
1811 
1812 	end_synchronized_op(sc, 0);
1813 
1814 	return (0);
1815 }
1816 
1817 static void
1818 cxgbe_init(void *arg)
1819 {
1820 	struct vi_info *vi = arg;
1821 	struct adapter *sc = vi->pi->adapter;
1822 
1823 	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1824 		return;
1825 	cxgbe_init_synchronized(vi);
1826 	end_synchronized_op(sc, 0);
1827 }
1828 
1829 static int
1830 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1831 {
1832 	int rc = 0, mtu, flags;
1833 	struct vi_info *vi = ifp->if_softc;
1834 	struct port_info *pi = vi->pi;
1835 	struct adapter *sc = pi->adapter;
1836 	struct ifreq *ifr = (struct ifreq *)data;
1837 	uint32_t mask;
1838 
1839 	switch (cmd) {
1840 	case SIOCSIFMTU:
1841 		mtu = ifr->ifr_mtu;
1842 		if (mtu < ETHERMIN || mtu > MAX_MTU)
1843 			return (EINVAL);
1844 
1845 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1846 		if (rc)
1847 			return (rc);
1848 		ifp->if_mtu = mtu;
1849 		if (vi->flags & VI_INIT_DONE) {
1850 			t4_update_fl_bufsize(ifp);
1851 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1852 				rc = update_mac_settings(ifp, XGMAC_MTU);
1853 		}
1854 		end_synchronized_op(sc, 0);
1855 		break;
1856 
1857 	case SIOCSIFFLAGS:
1858 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4flg");
1859 		if (rc)
1860 			return (rc);
1861 
1862 		if (ifp->if_flags & IFF_UP) {
1863 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1864 				flags = vi->if_flags;
1865 				if ((ifp->if_flags ^ flags) &
1866 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1867 					rc = update_mac_settings(ifp,
1868 					    XGMAC_PROMISC | XGMAC_ALLMULTI);
1869 				}
1870 			} else {
1871 				rc = cxgbe_init_synchronized(vi);
1872 			}
1873 			vi->if_flags = ifp->if_flags;
1874 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1875 			rc = cxgbe_uninit_synchronized(vi);
1876 		}
1877 		end_synchronized_op(sc, 0);
1878 		break;
1879 
1880 	case SIOCADDMULTI:
1881 	case SIOCDELMULTI:
1882 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4multi");
1883 		if (rc)
1884 			return (rc);
1885 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1886 			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1887 		end_synchronized_op(sc, 0);
1888 		break;
1889 
1890 	case SIOCSIFCAP:
1891 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1892 		if (rc)
1893 			return (rc);
1894 
1895 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1896 		if (mask & IFCAP_TXCSUM) {
1897 			ifp->if_capenable ^= IFCAP_TXCSUM;
1898 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1899 
1900 			if (IFCAP_TSO4 & ifp->if_capenable &&
1901 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1902 				ifp->if_capenable &= ~IFCAP_TSO4;
1903 				if_printf(ifp,
1904 				    "tso4 disabled due to -txcsum.\n");
1905 			}
1906 		}
1907 		if (mask & IFCAP_TXCSUM_IPV6) {
1908 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1909 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1910 
1911 			if (IFCAP_TSO6 & ifp->if_capenable &&
1912 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1913 				ifp->if_capenable &= ~IFCAP_TSO6;
1914 				if_printf(ifp,
1915 				    "tso6 disabled due to -txcsum6.\n");
1916 			}
1917 		}
1918 		if (mask & IFCAP_RXCSUM)
1919 			ifp->if_capenable ^= IFCAP_RXCSUM;
1920 		if (mask & IFCAP_RXCSUM_IPV6)
1921 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1922 
1923 		/*
1924 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1925 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1926 		 * sending a TSO request our way, so it's sufficient to toggle
1927 		 * IFCAP_TSOx only.
1928 		 */
1929 		if (mask & IFCAP_TSO4) {
1930 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1931 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1932 				if_printf(ifp, "enable txcsum first.\n");
1933 				rc = EAGAIN;
1934 				goto fail;
1935 			}
1936 			ifp->if_capenable ^= IFCAP_TSO4;
1937 		}
1938 		if (mask & IFCAP_TSO6) {
1939 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1940 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1941 				if_printf(ifp, "enable txcsum6 first.\n");
1942 				rc = EAGAIN;
1943 				goto fail;
1944 			}
1945 			ifp->if_capenable ^= IFCAP_TSO6;
1946 		}
1947 		if (mask & IFCAP_LRO) {
1948 #if defined(INET) || defined(INET6)
1949 			int i;
1950 			struct sge_rxq *rxq;
1951 
1952 			ifp->if_capenable ^= IFCAP_LRO;
1953 			for_each_rxq(vi, i, rxq) {
1954 				if (ifp->if_capenable & IFCAP_LRO)
1955 					rxq->iq.flags |= IQ_LRO_ENABLED;
1956 				else
1957 					rxq->iq.flags &= ~IQ_LRO_ENABLED;
1958 			}
1959 #endif
1960 		}
1961 #ifdef TCP_OFFLOAD
1962 		if (mask & IFCAP_TOE) {
1963 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1964 
1965 			rc = toe_capability(vi, enable);
1966 			if (rc != 0)
1967 				goto fail;
1968 
1969 			ifp->if_capenable ^= mask;
1970 		}
1971 #endif
1972 		if (mask & IFCAP_VLAN_HWTAGGING) {
1973 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1974 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1975 				rc = update_mac_settings(ifp, XGMAC_VLANEX);
1976 		}
1977 		if (mask & IFCAP_VLAN_MTU) {
1978 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
1979 
1980 			/* Need to find out how to disable auto-mtu-inflation */
1981 		}
1982 		if (mask & IFCAP_VLAN_HWTSO)
1983 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1984 		if (mask & IFCAP_VLAN_HWCSUM)
1985 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1986 #ifdef RATELIMIT
1987 		if (mask & IFCAP_TXRTLMT)
1988 			ifp->if_capenable ^= IFCAP_TXRTLMT;
1989 #endif
1990 		if (mask & IFCAP_HWRXTSTMP) {
1991 			int i;
1992 			struct sge_rxq *rxq;
1993 
1994 			ifp->if_capenable ^= IFCAP_HWRXTSTMP;
1995 			for_each_rxq(vi, i, rxq) {
1996 				if (ifp->if_capenable & IFCAP_HWRXTSTMP)
1997 					rxq->iq.flags |= IQ_RX_TIMESTAMP;
1998 				else
1999 					rxq->iq.flags &= ~IQ_RX_TIMESTAMP;
2000 			}
2001 		}
2002 		if (mask & IFCAP_NOMAP)
2003 			ifp->if_capenable ^= IFCAP_NOMAP;
2004 
2005 #ifdef VLAN_CAPABILITIES
2006 		VLAN_CAPABILITIES(ifp);
2007 #endif
2008 fail:
2009 		end_synchronized_op(sc, 0);
2010 		break;
2011 
2012 	case SIOCSIFMEDIA:
2013 	case SIOCGIFMEDIA:
2014 	case SIOCGIFXMEDIA:
2015 		ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
2016 		break;
2017 
2018 	case SIOCGI2C: {
2019 		struct ifi2creq i2c;
2020 
2021 		rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2022 		if (rc != 0)
2023 			break;
2024 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
2025 			rc = EPERM;
2026 			break;
2027 		}
2028 		if (i2c.len > sizeof(i2c.data)) {
2029 			rc = EINVAL;
2030 			break;
2031 		}
2032 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
2033 		if (rc)
2034 			return (rc);
2035 		rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
2036 		    i2c.offset, i2c.len, &i2c.data[0]);
2037 		end_synchronized_op(sc, 0);
2038 		if (rc == 0)
2039 			rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2040 		break;
2041 	}
2042 
2043 	default:
2044 		rc = ether_ioctl(ifp, cmd, data);
2045 	}
2046 
2047 	return (rc);
2048 }
2049 
2050 static int
2051 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
2052 {
2053 	struct vi_info *vi = ifp->if_softc;
2054 	struct port_info *pi = vi->pi;
2055 	struct adapter *sc = pi->adapter;
2056 	struct sge_txq *txq;
2057 #ifdef RATELIMIT
2058 	struct cxgbe_snd_tag *cst;
2059 #endif
2060 	void *items[1];
2061 	int rc;
2062 
2063 	M_ASSERTPKTHDR(m);
2064 	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
2065 #ifdef RATELIMIT
2066 	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
2067 		MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
2068 #endif
2069 
2070 	if (__predict_false(pi->link_cfg.link_ok == false)) {
2071 		m_freem(m);
2072 		return (ENETDOWN);
2073 	}
2074 
2075 	rc = parse_pkt(sc, &m);
2076 	if (__predict_false(rc != 0)) {
2077 		MPASS(m == NULL);			/* was freed already */
2078 		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
2079 		return (rc);
2080 	}
2081 #ifdef RATELIMIT
2082 	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
2083 		cst = mst_to_cst(m->m_pkthdr.snd_tag);
2084 		if (cst->type == IF_SND_TAG_TYPE_RATE_LIMIT)
2085 			return (ethofld_transmit(ifp, m));
2086 	}
2087 #endif
2088 
2089 	/* Select a txq. */
2090 	txq = &sc->sge.txq[vi->first_txq];
2091 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2092 		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
2093 		    vi->rsrv_noflowq);
2094 
2095 	items[0] = m;
2096 	rc = mp_ring_enqueue(txq->r, items, 1, 4096);
2097 	if (__predict_false(rc != 0))
2098 		m_freem(m);
2099 
2100 	return (rc);
2101 }
2102 
2103 static void
2104 cxgbe_qflush(struct ifnet *ifp)
2105 {
2106 	struct vi_info *vi = ifp->if_softc;
2107 	struct sge_txq *txq;
2108 	int i;
2109 
2110 	/* queues do not exist if !VI_INIT_DONE. */
2111 	if (vi->flags & VI_INIT_DONE) {
2112 		for_each_txq(vi, i, txq) {
2113 			TXQ_LOCK(txq);
2114 			txq->eq.flags |= EQ_QFLUSH;
2115 			TXQ_UNLOCK(txq);
2116 			while (!mp_ring_is_idle(txq->r)) {
2117 				mp_ring_check_drainage(txq->r, 0);
2118 				pause("qflush", 1);
2119 			}
2120 			TXQ_LOCK(txq);
2121 			txq->eq.flags &= ~EQ_QFLUSH;
2122 			TXQ_UNLOCK(txq);
2123 		}
2124 	}
2125 	if_qflush(ifp);
2126 }
2127 
2128 static uint64_t
2129 vi_get_counter(struct ifnet *ifp, ift_counter c)
2130 {
2131 	struct vi_info *vi = ifp->if_softc;
2132 	struct fw_vi_stats_vf *s = &vi->stats;
2133 
2134 	vi_refresh_stats(vi->pi->adapter, vi);
2135 
2136 	switch (c) {
2137 	case IFCOUNTER_IPACKETS:
2138 		return (s->rx_bcast_frames + s->rx_mcast_frames +
2139 		    s->rx_ucast_frames);
2140 	case IFCOUNTER_IERRORS:
2141 		return (s->rx_err_frames);
2142 	case IFCOUNTER_OPACKETS:
2143 		return (s->tx_bcast_frames + s->tx_mcast_frames +
2144 		    s->tx_ucast_frames + s->tx_offload_frames);
2145 	case IFCOUNTER_OERRORS:
2146 		return (s->tx_drop_frames);
2147 	case IFCOUNTER_IBYTES:
2148 		return (s->rx_bcast_bytes + s->rx_mcast_bytes +
2149 		    s->rx_ucast_bytes);
2150 	case IFCOUNTER_OBYTES:
2151 		return (s->tx_bcast_bytes + s->tx_mcast_bytes +
2152 		    s->tx_ucast_bytes + s->tx_offload_bytes);
2153 	case IFCOUNTER_IMCASTS:
2154 		return (s->rx_mcast_frames);
2155 	case IFCOUNTER_OMCASTS:
2156 		return (s->tx_mcast_frames);
2157 	case IFCOUNTER_OQDROPS: {
2158 		uint64_t drops;
2159 
2160 		drops = 0;
2161 		if (vi->flags & VI_INIT_DONE) {
2162 			int i;
2163 			struct sge_txq *txq;
2164 
2165 			for_each_txq(vi, i, txq)
2166 				drops += counter_u64_fetch(txq->r->drops);
2167 		}
2168 
2169 		return (drops);
2170 
2171 	}
2172 
2173 	default:
2174 		return (if_get_counter_default(ifp, c));
2175 	}
2176 }
2177 
2178 uint64_t
2179 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
2180 {
2181 	struct vi_info *vi = ifp->if_softc;
2182 	struct port_info *pi = vi->pi;
2183 	struct adapter *sc = pi->adapter;
2184 	struct port_stats *s = &pi->stats;
2185 
2186 	if (pi->nvi > 1 || sc->flags & IS_VF)
2187 		return (vi_get_counter(ifp, c));
2188 
2189 	cxgbe_refresh_stats(sc, pi);
2190 
2191 	switch (c) {
2192 	case IFCOUNTER_IPACKETS:
2193 		return (s->rx_frames);
2194 
2195 	case IFCOUNTER_IERRORS:
2196 		return (s->rx_jabber + s->rx_runt + s->rx_too_long +
2197 		    s->rx_fcs_err + s->rx_len_err);
2198 
2199 	case IFCOUNTER_OPACKETS:
2200 		return (s->tx_frames);
2201 
2202 	case IFCOUNTER_OERRORS:
2203 		return (s->tx_error_frames);
2204 
2205 	case IFCOUNTER_IBYTES:
2206 		return (s->rx_octets);
2207 
2208 	case IFCOUNTER_OBYTES:
2209 		return (s->tx_octets);
2210 
2211 	case IFCOUNTER_IMCASTS:
2212 		return (s->rx_mcast_frames);
2213 
2214 	case IFCOUNTER_OMCASTS:
2215 		return (s->tx_mcast_frames);
2216 
2217 	case IFCOUNTER_IQDROPS:
2218 		return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2219 		    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2220 		    s->rx_trunc3 + pi->tnl_cong_drops);
2221 
2222 	case IFCOUNTER_OQDROPS: {
2223 		uint64_t drops;
2224 
2225 		drops = s->tx_drop;
2226 		if (vi->flags & VI_INIT_DONE) {
2227 			int i;
2228 			struct sge_txq *txq;
2229 
2230 			for_each_txq(vi, i, txq)
2231 				drops += counter_u64_fetch(txq->r->drops);
2232 		}
2233 
2234 		return (drops);
2235 
2236 	}
2237 
2238 	default:
2239 		return (if_get_counter_default(ifp, c));
2240 	}
2241 }
2242 
2243 #ifdef RATELIMIT
2244 void
2245 cxgbe_snd_tag_init(struct cxgbe_snd_tag *cst, struct ifnet *ifp, int type)
2246 {
2247 
2248 	m_snd_tag_init(&cst->com, ifp);
2249 	cst->type = type;
2250 }
2251 
2252 static int
2253 cxgbe_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
2254     struct m_snd_tag **pt)
2255 {
2256 	int error;
2257 
2258 	switch (params->hdr.type) {
2259 #ifdef RATELIMIT
2260 	case IF_SND_TAG_TYPE_RATE_LIMIT:
2261 		error = cxgbe_rate_tag_alloc(ifp, params, pt);
2262 		break;
2263 #endif
2264 	default:
2265 		error = EOPNOTSUPP;
2266 	}
2267 	if (error == 0)
2268 		MPASS(mst_to_cst(*pt)->type == params->hdr.type);
2269 	return (error);
2270 }
2271 
2272 static int
2273 cxgbe_snd_tag_modify(struct m_snd_tag *mst,
2274     union if_snd_tag_modify_params *params)
2275 {
2276 	struct cxgbe_snd_tag *cst;
2277 
2278 	cst = mst_to_cst(mst);
2279 	switch (cst->type) {
2280 #ifdef RATELIMIT
2281 	case IF_SND_TAG_TYPE_RATE_LIMIT:
2282 		return (cxgbe_rate_tag_modify(mst, params));
2283 #endif
2284 	default:
2285 		return (EOPNOTSUPP);
2286 	}
2287 }
2288 
2289 static int
2290 cxgbe_snd_tag_query(struct m_snd_tag *mst,
2291     union if_snd_tag_query_params *params)
2292 {
2293 	struct cxgbe_snd_tag *cst;
2294 
2295 	cst = mst_to_cst(mst);
2296 	switch (cst->type) {
2297 #ifdef RATELIMIT
2298 	case IF_SND_TAG_TYPE_RATE_LIMIT:
2299 		return (cxgbe_rate_tag_query(mst, params));
2300 #endif
2301 	default:
2302 		return (EOPNOTSUPP);
2303 	}
2304 }
2305 
2306 static void
2307 cxgbe_snd_tag_free(struct m_snd_tag *mst)
2308 {
2309 	struct cxgbe_snd_tag *cst;
2310 
2311 	cst = mst_to_cst(mst);
2312 	switch (cst->type) {
2313 #ifdef RATELIMIT
2314 	case IF_SND_TAG_TYPE_RATE_LIMIT:
2315 		cxgbe_rate_tag_free(mst);
2316 		return;
2317 #endif
2318 	default:
2319 		panic("shouldn't get here");
2320 	}
2321 }
2322 #endif
2323 
2324 /*
2325  * The kernel picks a media from the list we had provided but we still validate
2326  * the requeste.
2327  */
2328 int
2329 cxgbe_media_change(struct ifnet *ifp)
2330 {
2331 	struct vi_info *vi = ifp->if_softc;
2332 	struct port_info *pi = vi->pi;
2333 	struct ifmedia *ifm = &pi->media;
2334 	struct link_config *lc = &pi->link_cfg;
2335 	struct adapter *sc = pi->adapter;
2336 	int rc;
2337 
2338 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec");
2339 	if (rc != 0)
2340 		return (rc);
2341 	PORT_LOCK(pi);
2342 	if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
2343 		/* ifconfig .. media autoselect */
2344 		if (!(lc->supported & FW_PORT_CAP32_ANEG)) {
2345 			rc = ENOTSUP; /* AN not supported by transceiver */
2346 			goto done;
2347 		}
2348 		lc->requested_aneg = AUTONEG_ENABLE;
2349 		lc->requested_speed = 0;
2350 		lc->requested_fc |= PAUSE_AUTONEG;
2351 	} else {
2352 		lc->requested_aneg = AUTONEG_DISABLE;
2353 		lc->requested_speed =
2354 		    ifmedia_baudrate(ifm->ifm_media) / 1000000;
2355 		lc->requested_fc = 0;
2356 		if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
2357 			lc->requested_fc |= PAUSE_RX;
2358 		if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
2359 			lc->requested_fc |= PAUSE_TX;
2360 	}
2361 	if (pi->up_vis > 0) {
2362 		fixup_link_config(pi);
2363 		rc = apply_link_config(pi);
2364 	}
2365 done:
2366 	PORT_UNLOCK(pi);
2367 	end_synchronized_op(sc, 0);
2368 	return (rc);
2369 }
2370 
2371 /*
2372  * Base media word (without ETHER, pause, link active, etc.) for the port at the
2373  * given speed.
2374  */
2375 static int
2376 port_mword(struct port_info *pi, uint32_t speed)
2377 {
2378 
2379 	MPASS(speed & M_FW_PORT_CAP32_SPEED);
2380 	MPASS(powerof2(speed));
2381 
2382 	switch(pi->port_type) {
2383 	case FW_PORT_TYPE_BT_SGMII:
2384 	case FW_PORT_TYPE_BT_XFI:
2385 	case FW_PORT_TYPE_BT_XAUI:
2386 		/* BaseT */
2387 		switch (speed) {
2388 		case FW_PORT_CAP32_SPEED_100M:
2389 			return (IFM_100_T);
2390 		case FW_PORT_CAP32_SPEED_1G:
2391 			return (IFM_1000_T);
2392 		case FW_PORT_CAP32_SPEED_10G:
2393 			return (IFM_10G_T);
2394 		}
2395 		break;
2396 	case FW_PORT_TYPE_KX4:
2397 		if (speed == FW_PORT_CAP32_SPEED_10G)
2398 			return (IFM_10G_KX4);
2399 		break;
2400 	case FW_PORT_TYPE_CX4:
2401 		if (speed == FW_PORT_CAP32_SPEED_10G)
2402 			return (IFM_10G_CX4);
2403 		break;
2404 	case FW_PORT_TYPE_KX:
2405 		if (speed == FW_PORT_CAP32_SPEED_1G)
2406 			return (IFM_1000_KX);
2407 		break;
2408 	case FW_PORT_TYPE_KR:
2409 	case FW_PORT_TYPE_BP_AP:
2410 	case FW_PORT_TYPE_BP4_AP:
2411 	case FW_PORT_TYPE_BP40_BA:
2412 	case FW_PORT_TYPE_KR4_100G:
2413 	case FW_PORT_TYPE_KR_SFP28:
2414 	case FW_PORT_TYPE_KR_XLAUI:
2415 		switch (speed) {
2416 		case FW_PORT_CAP32_SPEED_1G:
2417 			return (IFM_1000_KX);
2418 		case FW_PORT_CAP32_SPEED_10G:
2419 			return (IFM_10G_KR);
2420 		case FW_PORT_CAP32_SPEED_25G:
2421 			return (IFM_25G_KR);
2422 		case FW_PORT_CAP32_SPEED_40G:
2423 			return (IFM_40G_KR4);
2424 		case FW_PORT_CAP32_SPEED_50G:
2425 			return (IFM_50G_KR2);
2426 		case FW_PORT_CAP32_SPEED_100G:
2427 			return (IFM_100G_KR4);
2428 		}
2429 		break;
2430 	case FW_PORT_TYPE_FIBER_XFI:
2431 	case FW_PORT_TYPE_FIBER_XAUI:
2432 	case FW_PORT_TYPE_SFP:
2433 	case FW_PORT_TYPE_QSFP_10G:
2434 	case FW_PORT_TYPE_QSA:
2435 	case FW_PORT_TYPE_QSFP:
2436 	case FW_PORT_TYPE_CR4_QSFP:
2437 	case FW_PORT_TYPE_CR_QSFP:
2438 	case FW_PORT_TYPE_CR2_QSFP:
2439 	case FW_PORT_TYPE_SFP28:
2440 		/* Pluggable transceiver */
2441 		switch (pi->mod_type) {
2442 		case FW_PORT_MOD_TYPE_LR:
2443 			switch (speed) {
2444 			case FW_PORT_CAP32_SPEED_1G:
2445 				return (IFM_1000_LX);
2446 			case FW_PORT_CAP32_SPEED_10G:
2447 				return (IFM_10G_LR);
2448 			case FW_PORT_CAP32_SPEED_25G:
2449 				return (IFM_25G_LR);
2450 			case FW_PORT_CAP32_SPEED_40G:
2451 				return (IFM_40G_LR4);
2452 			case FW_PORT_CAP32_SPEED_50G:
2453 				return (IFM_50G_LR2);
2454 			case FW_PORT_CAP32_SPEED_100G:
2455 				return (IFM_100G_LR4);
2456 			}
2457 			break;
2458 		case FW_PORT_MOD_TYPE_SR:
2459 			switch (speed) {
2460 			case FW_PORT_CAP32_SPEED_1G:
2461 				return (IFM_1000_SX);
2462 			case FW_PORT_CAP32_SPEED_10G:
2463 				return (IFM_10G_SR);
2464 			case FW_PORT_CAP32_SPEED_25G:
2465 				return (IFM_25G_SR);
2466 			case FW_PORT_CAP32_SPEED_40G:
2467 				return (IFM_40G_SR4);
2468 			case FW_PORT_CAP32_SPEED_50G:
2469 				return (IFM_50G_SR2);
2470 			case FW_PORT_CAP32_SPEED_100G:
2471 				return (IFM_100G_SR4);
2472 			}
2473 			break;
2474 		case FW_PORT_MOD_TYPE_ER:
2475 			if (speed == FW_PORT_CAP32_SPEED_10G)
2476 				return (IFM_10G_ER);
2477 			break;
2478 		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2479 		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2480 			switch (speed) {
2481 			case FW_PORT_CAP32_SPEED_1G:
2482 				return (IFM_1000_CX);
2483 			case FW_PORT_CAP32_SPEED_10G:
2484 				return (IFM_10G_TWINAX);
2485 			case FW_PORT_CAP32_SPEED_25G:
2486 				return (IFM_25G_CR);
2487 			case FW_PORT_CAP32_SPEED_40G:
2488 				return (IFM_40G_CR4);
2489 			case FW_PORT_CAP32_SPEED_50G:
2490 				return (IFM_50G_CR2);
2491 			case FW_PORT_CAP32_SPEED_100G:
2492 				return (IFM_100G_CR4);
2493 			}
2494 			break;
2495 		case FW_PORT_MOD_TYPE_LRM:
2496 			if (speed == FW_PORT_CAP32_SPEED_10G)
2497 				return (IFM_10G_LRM);
2498 			break;
2499 		case FW_PORT_MOD_TYPE_NA:
2500 			MPASS(0);	/* Not pluggable? */
2501 			/* fall throough */
2502 		case FW_PORT_MOD_TYPE_ERROR:
2503 		case FW_PORT_MOD_TYPE_UNKNOWN:
2504 		case FW_PORT_MOD_TYPE_NOTSUPPORTED:
2505 			break;
2506 		case FW_PORT_MOD_TYPE_NONE:
2507 			return (IFM_NONE);
2508 		}
2509 		break;
2510 	case FW_PORT_TYPE_NONE:
2511 		return (IFM_NONE);
2512 	}
2513 
2514 	return (IFM_UNKNOWN);
2515 }
2516 
2517 void
2518 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2519 {
2520 	struct vi_info *vi = ifp->if_softc;
2521 	struct port_info *pi = vi->pi;
2522 	struct adapter *sc = pi->adapter;
2523 	struct link_config *lc = &pi->link_cfg;
2524 
2525 	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0)
2526 		return;
2527 	PORT_LOCK(pi);
2528 
2529 	if (pi->up_vis == 0) {
2530 		/*
2531 		 * If all the interfaces are administratively down the firmware
2532 		 * does not report transceiver changes.  Refresh port info here
2533 		 * so that ifconfig displays accurate ifmedia at all times.
2534 		 * This is the only reason we have a synchronized op in this
2535 		 * function.  Just PORT_LOCK would have been enough otherwise.
2536 		 */
2537 		t4_update_port_info(pi);
2538 		build_medialist(pi);
2539 	}
2540 
2541 	/* ifm_status */
2542 	ifmr->ifm_status = IFM_AVALID;
2543 	if (lc->link_ok == false)
2544 		goto done;
2545 	ifmr->ifm_status |= IFM_ACTIVE;
2546 
2547 	/* ifm_active */
2548 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2549 	ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2550 	if (lc->fc & PAUSE_RX)
2551 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2552 	if (lc->fc & PAUSE_TX)
2553 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2554 	ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
2555 done:
2556 	PORT_UNLOCK(pi);
2557 	end_synchronized_op(sc, 0);
2558 }
2559 
2560 static int
2561 vcxgbe_probe(device_t dev)
2562 {
2563 	char buf[128];
2564 	struct vi_info *vi = device_get_softc(dev);
2565 
2566 	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2567 	    vi - vi->pi->vi);
2568 	device_set_desc_copy(dev, buf);
2569 
2570 	return (BUS_PROBE_DEFAULT);
2571 }
2572 
2573 static int
2574 alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2575 {
2576 	int func, index, rc;
2577 	uint32_t param, val;
2578 
2579 	ASSERT_SYNCHRONIZED_OP(sc);
2580 
2581 	index = vi - pi->vi;
2582 	MPASS(index > 0);	/* This function deals with _extra_ VIs only */
2583 	KASSERT(index < nitems(vi_mac_funcs),
2584 	    ("%s: VI %s doesn't have a MAC func", __func__,
2585 	    device_get_nameunit(vi->dev)));
2586 	func = vi_mac_funcs[index];
2587 	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2588 	    vi->hw_addr, &vi->rss_size, &vi->vfvld, &vi->vin, func, 0);
2589 	if (rc < 0) {
2590 		device_printf(vi->dev, "failed to allocate virtual interface %d"
2591 		    "for port %d: %d\n", index, pi->port_id, -rc);
2592 		return (-rc);
2593 	}
2594 	vi->viid = rc;
2595 
2596 	if (vi->rss_size == 1) {
2597 		/*
2598 		 * This VI didn't get a slice of the RSS table.  Reduce the
2599 		 * number of VIs being created (hw.cxgbe.num_vis) or modify the
2600 		 * configuration file (nvi, rssnvi for this PF) if this is a
2601 		 * problem.
2602 		 */
2603 		device_printf(vi->dev, "RSS table not available.\n");
2604 		vi->rss_base = 0xffff;
2605 
2606 		return (0);
2607 	}
2608 
2609 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2610 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2611 	    V_FW_PARAMS_PARAM_YZ(vi->viid);
2612 	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2613 	if (rc)
2614 		vi->rss_base = 0xffff;
2615 	else {
2616 		MPASS((val >> 16) == vi->rss_size);
2617 		vi->rss_base = val & 0xffff;
2618 	}
2619 
2620 	return (0);
2621 }
2622 
2623 static int
2624 vcxgbe_attach(device_t dev)
2625 {
2626 	struct vi_info *vi;
2627 	struct port_info *pi;
2628 	struct adapter *sc;
2629 	int rc;
2630 
2631 	vi = device_get_softc(dev);
2632 	pi = vi->pi;
2633 	sc = pi->adapter;
2634 
2635 	rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2636 	if (rc)
2637 		return (rc);
2638 	rc = alloc_extra_vi(sc, pi, vi);
2639 	end_synchronized_op(sc, 0);
2640 	if (rc)
2641 		return (rc);
2642 
2643 	rc = cxgbe_vi_attach(dev, vi);
2644 	if (rc) {
2645 		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2646 		return (rc);
2647 	}
2648 	return (0);
2649 }
2650 
2651 static int
2652 vcxgbe_detach(device_t dev)
2653 {
2654 	struct vi_info *vi;
2655 	struct adapter *sc;
2656 
2657 	vi = device_get_softc(dev);
2658 	sc = vi->pi->adapter;
2659 
2660 	doom_vi(sc, vi);
2661 
2662 	cxgbe_vi_detach(vi);
2663 	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2664 
2665 	end_synchronized_op(sc, 0);
2666 
2667 	return (0);
2668 }
2669 
2670 static struct callout fatal_callout;
2671 
2672 static void
2673 delayed_panic(void *arg)
2674 {
2675 	struct adapter *sc = arg;
2676 
2677 	panic("%s: panic on fatal error", device_get_nameunit(sc->dev));
2678 }
2679 
2680 void
2681 t4_fatal_err(struct adapter *sc, bool fw_error)
2682 {
2683 
2684 	t4_shutdown_adapter(sc);
2685 	log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n",
2686 	    device_get_nameunit(sc->dev));
2687 	if (fw_error) {
2688 		ASSERT_SYNCHRONIZED_OP(sc);
2689 		sc->flags |= ADAP_ERR;
2690 	} else {
2691 		ADAPTER_LOCK(sc);
2692 		sc->flags |= ADAP_ERR;
2693 		ADAPTER_UNLOCK(sc);
2694 	}
2695 
2696 	if (t4_panic_on_fatal_err) {
2697 		log(LOG_ALERT, "%s: panic on fatal error after 30s",
2698 		    device_get_nameunit(sc->dev));
2699 		callout_reset(&fatal_callout, hz * 30, delayed_panic, sc);
2700 	}
2701 }
2702 
2703 void
2704 t4_add_adapter(struct adapter *sc)
2705 {
2706 	sx_xlock(&t4_list_lock);
2707 	SLIST_INSERT_HEAD(&t4_list, sc, link);
2708 	sx_xunlock(&t4_list_lock);
2709 }
2710 
2711 int
2712 t4_map_bars_0_and_4(struct adapter *sc)
2713 {
2714 	sc->regs_rid = PCIR_BAR(0);
2715 	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2716 	    &sc->regs_rid, RF_ACTIVE);
2717 	if (sc->regs_res == NULL) {
2718 		device_printf(sc->dev, "cannot map registers.\n");
2719 		return (ENXIO);
2720 	}
2721 	sc->bt = rman_get_bustag(sc->regs_res);
2722 	sc->bh = rman_get_bushandle(sc->regs_res);
2723 	sc->mmio_len = rman_get_size(sc->regs_res);
2724 	setbit(&sc->doorbells, DOORBELL_KDB);
2725 
2726 	sc->msix_rid = PCIR_BAR(4);
2727 	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2728 	    &sc->msix_rid, RF_ACTIVE);
2729 	if (sc->msix_res == NULL) {
2730 		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2731 		return (ENXIO);
2732 	}
2733 
2734 	return (0);
2735 }
2736 
2737 int
2738 t4_map_bar_2(struct adapter *sc)
2739 {
2740 
2741 	/*
2742 	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
2743 	 * to map it if RDMA is disabled.
2744 	 */
2745 	if (is_t4(sc) && sc->rdmacaps == 0)
2746 		return (0);
2747 
2748 	sc->udbs_rid = PCIR_BAR(2);
2749 	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2750 	    &sc->udbs_rid, RF_ACTIVE);
2751 	if (sc->udbs_res == NULL) {
2752 		device_printf(sc->dev, "cannot map doorbell BAR.\n");
2753 		return (ENXIO);
2754 	}
2755 	sc->udbs_base = rman_get_virtual(sc->udbs_res);
2756 
2757 	if (chip_id(sc) >= CHELSIO_T5) {
2758 		setbit(&sc->doorbells, DOORBELL_UDB);
2759 #if defined(__i386__) || defined(__amd64__)
2760 		if (t5_write_combine) {
2761 			int rc, mode;
2762 
2763 			/*
2764 			 * Enable write combining on BAR2.  This is the
2765 			 * userspace doorbell BAR and is split into 128B
2766 			 * (UDBS_SEG_SIZE) doorbell regions, each associated
2767 			 * with an egress queue.  The first 64B has the doorbell
2768 			 * and the second 64B can be used to submit a tx work
2769 			 * request with an implicit doorbell.
2770 			 */
2771 
2772 			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2773 			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2774 			if (rc == 0) {
2775 				clrbit(&sc->doorbells, DOORBELL_UDB);
2776 				setbit(&sc->doorbells, DOORBELL_WCWR);
2777 				setbit(&sc->doorbells, DOORBELL_UDBWC);
2778 			} else {
2779 				device_printf(sc->dev,
2780 				    "couldn't enable write combining: %d\n",
2781 				    rc);
2782 			}
2783 
2784 			mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2785 			t4_write_reg(sc, A_SGE_STAT_CFG,
2786 			    V_STATSOURCE_T5(7) | mode);
2787 		}
2788 #endif
2789 	}
2790 	sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0;
2791 
2792 	return (0);
2793 }
2794 
2795 struct memwin_init {
2796 	uint32_t base;
2797 	uint32_t aperture;
2798 };
2799 
2800 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2801 	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
2802 	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
2803 	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2804 };
2805 
2806 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2807 	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
2808 	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
2809 	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2810 };
2811 
2812 static void
2813 setup_memwin(struct adapter *sc)
2814 {
2815 	const struct memwin_init *mw_init;
2816 	struct memwin *mw;
2817 	int i;
2818 	uint32_t bar0;
2819 
2820 	if (is_t4(sc)) {
2821 		/*
2822 		 * Read low 32b of bar0 indirectly via the hardware backdoor
2823 		 * mechanism.  Works from within PCI passthrough environments
2824 		 * too, where rman_get_start() can return a different value.  We
2825 		 * need to program the T4 memory window decoders with the actual
2826 		 * addresses that will be coming across the PCIe link.
2827 		 */
2828 		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
2829 		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
2830 
2831 		mw_init = &t4_memwin[0];
2832 	} else {
2833 		/* T5+ use the relative offset inside the PCIe BAR */
2834 		bar0 = 0;
2835 
2836 		mw_init = &t5_memwin[0];
2837 	}
2838 
2839 	for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
2840 		rw_init(&mw->mw_lock, "memory window access");
2841 		mw->mw_base = mw_init->base;
2842 		mw->mw_aperture = mw_init->aperture;
2843 		mw->mw_curpos = 0;
2844 		t4_write_reg(sc,
2845 		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
2846 		    (mw->mw_base + bar0) | V_BIR(0) |
2847 		    V_WINDOW(ilog2(mw->mw_aperture) - 10));
2848 		rw_wlock(&mw->mw_lock);
2849 		position_memwin(sc, i, 0);
2850 		rw_wunlock(&mw->mw_lock);
2851 	}
2852 
2853 	/* flush */
2854 	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
2855 }
2856 
2857 /*
2858  * Positions the memory window at the given address in the card's address space.
2859  * There are some alignment requirements and the actual position may be at an
2860  * address prior to the requested address.  mw->mw_curpos always has the actual
2861  * position of the window.
2862  */
2863 static void
2864 position_memwin(struct adapter *sc, int idx, uint32_t addr)
2865 {
2866 	struct memwin *mw;
2867 	uint32_t pf;
2868 	uint32_t reg;
2869 
2870 	MPASS(idx >= 0 && idx < NUM_MEMWIN);
2871 	mw = &sc->memwin[idx];
2872 	rw_assert(&mw->mw_lock, RA_WLOCKED);
2873 
2874 	if (is_t4(sc)) {
2875 		pf = 0;
2876 		mw->mw_curpos = addr & ~0xf;	/* start must be 16B aligned */
2877 	} else {
2878 		pf = V_PFNUM(sc->pf);
2879 		mw->mw_curpos = addr & ~0x7f;	/* start must be 128B aligned */
2880 	}
2881 	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
2882 	t4_write_reg(sc, reg, mw->mw_curpos | pf);
2883 	t4_read_reg(sc, reg);	/* flush */
2884 }
2885 
2886 int
2887 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2888     int len, int rw)
2889 {
2890 	struct memwin *mw;
2891 	uint32_t mw_end, v;
2892 
2893 	MPASS(idx >= 0 && idx < NUM_MEMWIN);
2894 
2895 	/* Memory can only be accessed in naturally aligned 4 byte units */
2896 	if (addr & 3 || len & 3 || len <= 0)
2897 		return (EINVAL);
2898 
2899 	mw = &sc->memwin[idx];
2900 	while (len > 0) {
2901 		rw_rlock(&mw->mw_lock);
2902 		mw_end = mw->mw_curpos + mw->mw_aperture;
2903 		if (addr >= mw_end || addr < mw->mw_curpos) {
2904 			/* Will need to reposition the window */
2905 			if (!rw_try_upgrade(&mw->mw_lock)) {
2906 				rw_runlock(&mw->mw_lock);
2907 				rw_wlock(&mw->mw_lock);
2908 			}
2909 			rw_assert(&mw->mw_lock, RA_WLOCKED);
2910 			position_memwin(sc, idx, addr);
2911 			rw_downgrade(&mw->mw_lock);
2912 			mw_end = mw->mw_curpos + mw->mw_aperture;
2913 		}
2914 		rw_assert(&mw->mw_lock, RA_RLOCKED);
2915 		while (addr < mw_end && len > 0) {
2916 			if (rw == 0) {
2917 				v = t4_read_reg(sc, mw->mw_base + addr -
2918 				    mw->mw_curpos);
2919 				*val++ = le32toh(v);
2920 			} else {
2921 				v = *val++;
2922 				t4_write_reg(sc, mw->mw_base + addr -
2923 				    mw->mw_curpos, htole32(v));
2924 			}
2925 			addr += 4;
2926 			len -= 4;
2927 		}
2928 		rw_runlock(&mw->mw_lock);
2929 	}
2930 
2931 	return (0);
2932 }
2933 
2934 static void
2935 t4_init_atid_table(struct adapter *sc)
2936 {
2937 	struct tid_info *t;
2938 	int i;
2939 
2940 	t = &sc->tids;
2941 	if (t->natids == 0)
2942 		return;
2943 
2944 	MPASS(t->atid_tab == NULL);
2945 
2946 	t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
2947 	    M_ZERO | M_WAITOK);
2948 	mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
2949 	t->afree = t->atid_tab;
2950 	t->atids_in_use = 0;
2951 	for (i = 1; i < t->natids; i++)
2952 		t->atid_tab[i - 1].next = &t->atid_tab[i];
2953 	t->atid_tab[t->natids - 1].next = NULL;
2954 }
2955 
2956 static void
2957 t4_free_atid_table(struct adapter *sc)
2958 {
2959 	struct tid_info *t;
2960 
2961 	t = &sc->tids;
2962 
2963 	KASSERT(t->atids_in_use == 0,
2964 	    ("%s: %d atids still in use.", __func__, t->atids_in_use));
2965 
2966 	if (mtx_initialized(&t->atid_lock))
2967 		mtx_destroy(&t->atid_lock);
2968 	free(t->atid_tab, M_CXGBE);
2969 	t->atid_tab = NULL;
2970 }
2971 
2972 int
2973 alloc_atid(struct adapter *sc, void *ctx)
2974 {
2975 	struct tid_info *t = &sc->tids;
2976 	int atid = -1;
2977 
2978 	mtx_lock(&t->atid_lock);
2979 	if (t->afree) {
2980 		union aopen_entry *p = t->afree;
2981 
2982 		atid = p - t->atid_tab;
2983 		MPASS(atid <= M_TID_TID);
2984 		t->afree = p->next;
2985 		p->data = ctx;
2986 		t->atids_in_use++;
2987 	}
2988 	mtx_unlock(&t->atid_lock);
2989 	return (atid);
2990 }
2991 
2992 void *
2993 lookup_atid(struct adapter *sc, int atid)
2994 {
2995 	struct tid_info *t = &sc->tids;
2996 
2997 	return (t->atid_tab[atid].data);
2998 }
2999 
3000 void
3001 free_atid(struct adapter *sc, int atid)
3002 {
3003 	struct tid_info *t = &sc->tids;
3004 	union aopen_entry *p = &t->atid_tab[atid];
3005 
3006 	mtx_lock(&t->atid_lock);
3007 	p->next = t->afree;
3008 	t->afree = p;
3009 	t->atids_in_use--;
3010 	mtx_unlock(&t->atid_lock);
3011 }
3012 
3013 static void
3014 queue_tid_release(struct adapter *sc, int tid)
3015 {
3016 
3017 	CXGBE_UNIMPLEMENTED("deferred tid release");
3018 }
3019 
3020 void
3021 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
3022 {
3023 	struct wrqe *wr;
3024 	struct cpl_tid_release *req;
3025 
3026 	wr = alloc_wrqe(sizeof(*req), ctrlq);
3027 	if (wr == NULL) {
3028 		queue_tid_release(sc, tid);	/* defer */
3029 		return;
3030 	}
3031 	req = wrtod(wr);
3032 
3033 	INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
3034 
3035 	t4_wrq_tx(sc, wr);
3036 }
3037 
3038 static int
3039 t4_range_cmp(const void *a, const void *b)
3040 {
3041 	return ((const struct t4_range *)a)->start -
3042 	       ((const struct t4_range *)b)->start;
3043 }
3044 
3045 /*
3046  * Verify that the memory range specified by the addr/len pair is valid within
3047  * the card's address space.
3048  */
3049 static int
3050 validate_mem_range(struct adapter *sc, uint32_t addr, uint32_t len)
3051 {
3052 	struct t4_range mem_ranges[4], *r, *next;
3053 	uint32_t em, addr_len;
3054 	int i, n, remaining;
3055 
3056 	/* Memory can only be accessed in naturally aligned 4 byte units */
3057 	if (addr & 3 || len & 3 || len == 0)
3058 		return (EINVAL);
3059 
3060 	/* Enabled memories */
3061 	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3062 
3063 	r = &mem_ranges[0];
3064 	n = 0;
3065 	bzero(r, sizeof(mem_ranges));
3066 	if (em & F_EDRAM0_ENABLE) {
3067 		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3068 		r->size = G_EDRAM0_SIZE(addr_len) << 20;
3069 		if (r->size > 0) {
3070 			r->start = G_EDRAM0_BASE(addr_len) << 20;
3071 			if (addr >= r->start &&
3072 			    addr + len <= r->start + r->size)
3073 				return (0);
3074 			r++;
3075 			n++;
3076 		}
3077 	}
3078 	if (em & F_EDRAM1_ENABLE) {
3079 		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3080 		r->size = G_EDRAM1_SIZE(addr_len) << 20;
3081 		if (r->size > 0) {
3082 			r->start = G_EDRAM1_BASE(addr_len) << 20;
3083 			if (addr >= r->start &&
3084 			    addr + len <= r->start + r->size)
3085 				return (0);
3086 			r++;
3087 			n++;
3088 		}
3089 	}
3090 	if (em & F_EXT_MEM_ENABLE) {
3091 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3092 		r->size = G_EXT_MEM_SIZE(addr_len) << 20;
3093 		if (r->size > 0) {
3094 			r->start = G_EXT_MEM_BASE(addr_len) << 20;
3095 			if (addr >= r->start &&
3096 			    addr + len <= r->start + r->size)
3097 				return (0);
3098 			r++;
3099 			n++;
3100 		}
3101 	}
3102 	if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
3103 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3104 		r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
3105 		if (r->size > 0) {
3106 			r->start = G_EXT_MEM1_BASE(addr_len) << 20;
3107 			if (addr >= r->start &&
3108 			    addr + len <= r->start + r->size)
3109 				return (0);
3110 			r++;
3111 			n++;
3112 		}
3113 	}
3114 	MPASS(n <= nitems(mem_ranges));
3115 
3116 	if (n > 1) {
3117 		/* Sort and merge the ranges. */
3118 		qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
3119 
3120 		/* Start from index 0 and examine the next n - 1 entries. */
3121 		r = &mem_ranges[0];
3122 		for (remaining = n - 1; remaining > 0; remaining--, r++) {
3123 
3124 			MPASS(r->size > 0);	/* r is a valid entry. */
3125 			next = r + 1;
3126 			MPASS(next->size > 0);	/* and so is the next one. */
3127 
3128 			while (r->start + r->size >= next->start) {
3129 				/* Merge the next one into the current entry. */
3130 				r->size = max(r->start + r->size,
3131 				    next->start + next->size) - r->start;
3132 				n--;	/* One fewer entry in total. */
3133 				if (--remaining == 0)
3134 					goto done;	/* short circuit */
3135 				next++;
3136 			}
3137 			if (next != r + 1) {
3138 				/*
3139 				 * Some entries were merged into r and next
3140 				 * points to the first valid entry that couldn't
3141 				 * be merged.
3142 				 */
3143 				MPASS(next->size > 0);	/* must be valid */
3144 				memcpy(r + 1, next, remaining * sizeof(*r));
3145 #ifdef INVARIANTS
3146 				/*
3147 				 * This so that the foo->size assertion in the
3148 				 * next iteration of the loop do the right
3149 				 * thing for entries that were pulled up and are
3150 				 * no longer valid.
3151 				 */
3152 				MPASS(n < nitems(mem_ranges));
3153 				bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
3154 				    sizeof(struct t4_range));
3155 #endif
3156 			}
3157 		}
3158 done:
3159 		/* Done merging the ranges. */
3160 		MPASS(n > 0);
3161 		r = &mem_ranges[0];
3162 		for (i = 0; i < n; i++, r++) {
3163 			if (addr >= r->start &&
3164 			    addr + len <= r->start + r->size)
3165 				return (0);
3166 		}
3167 	}
3168 
3169 	return (EFAULT);
3170 }
3171 
3172 static int
3173 fwmtype_to_hwmtype(int mtype)
3174 {
3175 
3176 	switch (mtype) {
3177 	case FW_MEMTYPE_EDC0:
3178 		return (MEM_EDC0);
3179 	case FW_MEMTYPE_EDC1:
3180 		return (MEM_EDC1);
3181 	case FW_MEMTYPE_EXTMEM:
3182 		return (MEM_MC0);
3183 	case FW_MEMTYPE_EXTMEM1:
3184 		return (MEM_MC1);
3185 	default:
3186 		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
3187 	}
3188 }
3189 
3190 /*
3191  * Verify that the memory range specified by the memtype/offset/len pair is
3192  * valid and lies entirely within the memtype specified.  The global address of
3193  * the start of the range is returned in addr.
3194  */
3195 static int
3196 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, uint32_t len,
3197     uint32_t *addr)
3198 {
3199 	uint32_t em, addr_len, maddr;
3200 
3201 	/* Memory can only be accessed in naturally aligned 4 byte units */
3202 	if (off & 3 || len & 3 || len == 0)
3203 		return (EINVAL);
3204 
3205 	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3206 	switch (fwmtype_to_hwmtype(mtype)) {
3207 	case MEM_EDC0:
3208 		if (!(em & F_EDRAM0_ENABLE))
3209 			return (EINVAL);
3210 		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3211 		maddr = G_EDRAM0_BASE(addr_len) << 20;
3212 		break;
3213 	case MEM_EDC1:
3214 		if (!(em & F_EDRAM1_ENABLE))
3215 			return (EINVAL);
3216 		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3217 		maddr = G_EDRAM1_BASE(addr_len) << 20;
3218 		break;
3219 	case MEM_MC:
3220 		if (!(em & F_EXT_MEM_ENABLE))
3221 			return (EINVAL);
3222 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3223 		maddr = G_EXT_MEM_BASE(addr_len) << 20;
3224 		break;
3225 	case MEM_MC1:
3226 		if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
3227 			return (EINVAL);
3228 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3229 		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
3230 		break;
3231 	default:
3232 		return (EINVAL);
3233 	}
3234 
3235 	*addr = maddr + off;	/* global address */
3236 	return (validate_mem_range(sc, *addr, len));
3237 }
3238 
3239 static int
3240 fixup_devlog_params(struct adapter *sc)
3241 {
3242 	struct devlog_params *dparams = &sc->params.devlog;
3243 	int rc;
3244 
3245 	rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
3246 	    dparams->size, &dparams->addr);
3247 
3248 	return (rc);
3249 }
3250 
3251 static void
3252 update_nirq(struct intrs_and_queues *iaq, int nports)
3253 {
3254 	int extra = T4_EXTRA_INTR;
3255 
3256 	iaq->nirq = extra;
3257 	iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq);
3258 	iaq->nirq += nports * (iaq->num_vis - 1) *
3259 	    max(iaq->nrxq_vi, iaq->nnmrxq_vi);
3260 	iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
3261 }
3262 
3263 /*
3264  * Adjust requirements to fit the number of interrupts available.
3265  */
3266 static void
3267 calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
3268     int navail)
3269 {
3270 	int old_nirq;
3271 	const int nports = sc->params.nports;
3272 
3273 	MPASS(nports > 0);
3274 	MPASS(navail > 0);
3275 
3276 	bzero(iaq, sizeof(*iaq));
3277 	iaq->intr_type = itype;
3278 	iaq->num_vis = t4_num_vis;
3279 	iaq->ntxq = t4_ntxq;
3280 	iaq->ntxq_vi = t4_ntxq_vi;
3281 	iaq->nrxq = t4_nrxq;
3282 	iaq->nrxq_vi = t4_nrxq_vi;
3283 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
3284 	if (is_offload(sc) || is_ethoffload(sc)) {
3285 		iaq->nofldtxq = t4_nofldtxq;
3286 		iaq->nofldtxq_vi = t4_nofldtxq_vi;
3287 	}
3288 #endif
3289 #ifdef TCP_OFFLOAD
3290 	if (is_offload(sc)) {
3291 		iaq->nofldrxq = t4_nofldrxq;
3292 		iaq->nofldrxq_vi = t4_nofldrxq_vi;
3293 	}
3294 #endif
3295 #ifdef DEV_NETMAP
3296 	iaq->nnmtxq_vi = t4_nnmtxq_vi;
3297 	iaq->nnmrxq_vi = t4_nnmrxq_vi;
3298 #endif
3299 
3300 	update_nirq(iaq, nports);
3301 	if (iaq->nirq <= navail &&
3302 	    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3303 		/*
3304 		 * This is the normal case -- there are enough interrupts for
3305 		 * everything.
3306 		 */
3307 		goto done;
3308 	}
3309 
3310 	/*
3311 	 * If extra VIs have been configured try reducing their count and see if
3312 	 * that works.
3313 	 */
3314 	while (iaq->num_vis > 1) {
3315 		iaq->num_vis--;
3316 		update_nirq(iaq, nports);
3317 		if (iaq->nirq <= navail &&
3318 		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3319 			device_printf(sc->dev, "virtual interfaces per port "
3320 			    "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
3321 			    "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
3322 			    "itype %d, navail %u, nirq %d.\n",
3323 			    iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
3324 			    iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
3325 			    itype, navail, iaq->nirq);
3326 			goto done;
3327 		}
3328 	}
3329 
3330 	/*
3331 	 * Extra VIs will not be created.  Log a message if they were requested.
3332 	 */
3333 	MPASS(iaq->num_vis == 1);
3334 	iaq->ntxq_vi = iaq->nrxq_vi = 0;
3335 	iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
3336 	iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
3337 	if (iaq->num_vis != t4_num_vis) {
3338 		device_printf(sc->dev, "extra virtual interfaces disabled.  "
3339 		    "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
3340 		    "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
3341 		    iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
3342 		    iaq->nnmrxq_vi, itype, navail, iaq->nirq);
3343 	}
3344 
3345 	/*
3346 	 * Keep reducing the number of NIC rx queues to the next lower power of
3347 	 * 2 (for even RSS distribution) and halving the TOE rx queues and see
3348 	 * if that works.
3349 	 */
3350 	do {
3351 		if (iaq->nrxq > 1) {
3352 			do {
3353 				iaq->nrxq--;
3354 			} while (!powerof2(iaq->nrxq));
3355 		}
3356 		if (iaq->nofldrxq > 1)
3357 			iaq->nofldrxq >>= 1;
3358 
3359 		old_nirq = iaq->nirq;
3360 		update_nirq(iaq, nports);
3361 		if (iaq->nirq <= navail &&
3362 		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3363 			device_printf(sc->dev, "running with reduced number of "
3364 			    "rx queues because of shortage of interrupts.  "
3365 			    "nrxq=%u, nofldrxq=%u.  "
3366 			    "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
3367 			    iaq->nofldrxq, itype, navail, iaq->nirq);
3368 			goto done;
3369 		}
3370 	} while (old_nirq != iaq->nirq);
3371 
3372 	/* One interrupt for everything.  Ugh. */
3373 	device_printf(sc->dev, "running with minimal number of queues.  "
3374 	    "itype %d, navail %u.\n", itype, navail);
3375 	iaq->nirq = 1;
3376 	MPASS(iaq->nrxq == 1);
3377 	iaq->ntxq = 1;
3378 	if (iaq->nofldrxq > 1)
3379 		iaq->nofldtxq = 1;
3380 done:
3381 	MPASS(iaq->num_vis > 0);
3382 	if (iaq->num_vis > 1) {
3383 		MPASS(iaq->nrxq_vi > 0);
3384 		MPASS(iaq->ntxq_vi > 0);
3385 	}
3386 	MPASS(iaq->nirq > 0);
3387 	MPASS(iaq->nrxq > 0);
3388 	MPASS(iaq->ntxq > 0);
3389 	if (itype == INTR_MSI) {
3390 		MPASS(powerof2(iaq->nirq));
3391 	}
3392 }
3393 
3394 static int
3395 cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
3396 {
3397 	int rc, itype, navail, nalloc;
3398 
3399 	for (itype = INTR_MSIX; itype; itype >>= 1) {
3400 
3401 		if ((itype & t4_intr_types) == 0)
3402 			continue;	/* not allowed */
3403 
3404 		if (itype == INTR_MSIX)
3405 			navail = pci_msix_count(sc->dev);
3406 		else if (itype == INTR_MSI)
3407 			navail = pci_msi_count(sc->dev);
3408 		else
3409 			navail = 1;
3410 restart:
3411 		if (navail == 0)
3412 			continue;
3413 
3414 		calculate_iaq(sc, iaq, itype, navail);
3415 		nalloc = iaq->nirq;
3416 		rc = 0;
3417 		if (itype == INTR_MSIX)
3418 			rc = pci_alloc_msix(sc->dev, &nalloc);
3419 		else if (itype == INTR_MSI)
3420 			rc = pci_alloc_msi(sc->dev, &nalloc);
3421 
3422 		if (rc == 0 && nalloc > 0) {
3423 			if (nalloc == iaq->nirq)
3424 				return (0);
3425 
3426 			/*
3427 			 * Didn't get the number requested.  Use whatever number
3428 			 * the kernel is willing to allocate.
3429 			 */
3430 			device_printf(sc->dev, "fewer vectors than requested, "
3431 			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
3432 			    itype, iaq->nirq, nalloc);
3433 			pci_release_msi(sc->dev);
3434 			navail = nalloc;
3435 			goto restart;
3436 		}
3437 
3438 		device_printf(sc->dev,
3439 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
3440 		    itype, rc, iaq->nirq, nalloc);
3441 	}
3442 
3443 	device_printf(sc->dev,
3444 	    "failed to find a usable interrupt type.  "
3445 	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
3446 	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
3447 
3448 	return (ENXIO);
3449 }
3450 
3451 #define FW_VERSION(chip) ( \
3452     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
3453     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
3454     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
3455     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
3456 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
3457 
3458 /* Just enough of fw_hdr to cover all version info. */
3459 struct fw_h {
3460 	__u8	ver;
3461 	__u8	chip;
3462 	__be16	len512;
3463 	__be32	fw_ver;
3464 	__be32	tp_microcode_ver;
3465 	__u8	intfver_nic;
3466 	__u8	intfver_vnic;
3467 	__u8	intfver_ofld;
3468 	__u8	intfver_ri;
3469 	__u8	intfver_iscsipdu;
3470 	__u8	intfver_iscsi;
3471 	__u8	intfver_fcoepdu;
3472 	__u8	intfver_fcoe;
3473 };
3474 /* Spot check a couple of fields. */
3475 CTASSERT(offsetof(struct fw_h, fw_ver) == offsetof(struct fw_hdr, fw_ver));
3476 CTASSERT(offsetof(struct fw_h, intfver_nic) == offsetof(struct fw_hdr, intfver_nic));
3477 CTASSERT(offsetof(struct fw_h, intfver_fcoe) == offsetof(struct fw_hdr, intfver_fcoe));
3478 
3479 struct fw_info {
3480 	uint8_t chip;
3481 	char *kld_name;
3482 	char *fw_mod_name;
3483 	struct fw_h fw_h;
3484 } fw_info[] = {
3485 	{
3486 		.chip = CHELSIO_T4,
3487 		.kld_name = "t4fw_cfg",
3488 		.fw_mod_name = "t4fw",
3489 		.fw_h = {
3490 			.chip = FW_HDR_CHIP_T4,
3491 			.fw_ver = htobe32(FW_VERSION(T4)),
3492 			.intfver_nic = FW_INTFVER(T4, NIC),
3493 			.intfver_vnic = FW_INTFVER(T4, VNIC),
3494 			.intfver_ofld = FW_INTFVER(T4, OFLD),
3495 			.intfver_ri = FW_INTFVER(T4, RI),
3496 			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
3497 			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
3498 			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
3499 			.intfver_fcoe = FW_INTFVER(T4, FCOE),
3500 		},
3501 	}, {
3502 		.chip = CHELSIO_T5,
3503 		.kld_name = "t5fw_cfg",
3504 		.fw_mod_name = "t5fw",
3505 		.fw_h = {
3506 			.chip = FW_HDR_CHIP_T5,
3507 			.fw_ver = htobe32(FW_VERSION(T5)),
3508 			.intfver_nic = FW_INTFVER(T5, NIC),
3509 			.intfver_vnic = FW_INTFVER(T5, VNIC),
3510 			.intfver_ofld = FW_INTFVER(T5, OFLD),
3511 			.intfver_ri = FW_INTFVER(T5, RI),
3512 			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
3513 			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
3514 			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
3515 			.intfver_fcoe = FW_INTFVER(T5, FCOE),
3516 		},
3517 	}, {
3518 		.chip = CHELSIO_T6,
3519 		.kld_name = "t6fw_cfg",
3520 		.fw_mod_name = "t6fw",
3521 		.fw_h = {
3522 			.chip = FW_HDR_CHIP_T6,
3523 			.fw_ver = htobe32(FW_VERSION(T6)),
3524 			.intfver_nic = FW_INTFVER(T6, NIC),
3525 			.intfver_vnic = FW_INTFVER(T6, VNIC),
3526 			.intfver_ofld = FW_INTFVER(T6, OFLD),
3527 			.intfver_ri = FW_INTFVER(T6, RI),
3528 			.intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3529 			.intfver_iscsi = FW_INTFVER(T6, ISCSI),
3530 			.intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3531 			.intfver_fcoe = FW_INTFVER(T6, FCOE),
3532 		},
3533 	}
3534 };
3535 
3536 static struct fw_info *
3537 find_fw_info(int chip)
3538 {
3539 	int i;
3540 
3541 	for (i = 0; i < nitems(fw_info); i++) {
3542 		if (fw_info[i].chip == chip)
3543 			return (&fw_info[i]);
3544 	}
3545 	return (NULL);
3546 }
3547 
3548 /*
3549  * Is the given firmware API compatible with the one the driver was compiled
3550  * with?
3551  */
3552 static int
3553 fw_compatible(const struct fw_h *hdr1, const struct fw_h *hdr2)
3554 {
3555 
3556 	/* short circuit if it's the exact same firmware version */
3557 	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3558 		return (1);
3559 
3560 	/*
3561 	 * XXX: Is this too conservative?  Perhaps I should limit this to the
3562 	 * features that are supported in the driver.
3563 	 */
3564 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3565 	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3566 	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3567 	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3568 		return (1);
3569 #undef SAME_INTF
3570 
3571 	return (0);
3572 }
3573 
3574 static int
3575 load_fw_module(struct adapter *sc, const struct firmware **dcfg,
3576     const struct firmware **fw)
3577 {
3578 	struct fw_info *fw_info;
3579 
3580 	*dcfg = NULL;
3581 	if (fw != NULL)
3582 		*fw = NULL;
3583 
3584 	fw_info = find_fw_info(chip_id(sc));
3585 	if (fw_info == NULL) {
3586 		device_printf(sc->dev,
3587 		    "unable to look up firmware information for chip %d.\n",
3588 		    chip_id(sc));
3589 		return (EINVAL);
3590 	}
3591 
3592 	*dcfg = firmware_get(fw_info->kld_name);
3593 	if (*dcfg != NULL) {
3594 		if (fw != NULL)
3595 			*fw = firmware_get(fw_info->fw_mod_name);
3596 		return (0);
3597 	}
3598 
3599 	return (ENOENT);
3600 }
3601 
3602 static void
3603 unload_fw_module(struct adapter *sc, const struct firmware *dcfg,
3604     const struct firmware *fw)
3605 {
3606 
3607 	if (fw != NULL)
3608 		firmware_put(fw, FIRMWARE_UNLOAD);
3609 	if (dcfg != NULL)
3610 		firmware_put(dcfg, FIRMWARE_UNLOAD);
3611 }
3612 
3613 /*
3614  * Return values:
3615  * 0 means no firmware install attempted.
3616  * ERESTART means a firmware install was attempted and was successful.
3617  * +ve errno means a firmware install was attempted but failed.
3618  */
3619 static int
3620 install_kld_firmware(struct adapter *sc, struct fw_h *card_fw,
3621     const struct fw_h *drv_fw, const char *reason, int *already)
3622 {
3623 	const struct firmware *cfg, *fw;
3624 	const uint32_t c = be32toh(card_fw->fw_ver);
3625 	uint32_t d, k;
3626 	int rc, fw_install;
3627 	struct fw_h bundled_fw;
3628 	bool load_attempted;
3629 
3630 	cfg = fw = NULL;
3631 	load_attempted = false;
3632 	fw_install = t4_fw_install < 0 ? -t4_fw_install : t4_fw_install;
3633 
3634 	memcpy(&bundled_fw, drv_fw, sizeof(bundled_fw));
3635 	if (t4_fw_install < 0) {
3636 		rc = load_fw_module(sc, &cfg, &fw);
3637 		if (rc != 0 || fw == NULL) {
3638 			device_printf(sc->dev,
3639 			    "failed to load firmware module: %d. cfg %p, fw %p;"
3640 			    " will use compiled-in firmware version for"
3641 			    "hw.cxgbe.fw_install checks.\n",
3642 			    rc, cfg, fw);
3643 		} else {
3644 			memcpy(&bundled_fw, fw->data, sizeof(bundled_fw));
3645 		}
3646 		load_attempted = true;
3647 	}
3648 	d = be32toh(bundled_fw.fw_ver);
3649 
3650 	if (reason != NULL)
3651 		goto install;
3652 
3653 	if ((sc->flags & FW_OK) == 0) {
3654 
3655 		if (c == 0xffffffff) {
3656 			reason = "missing";
3657 			goto install;
3658 		}
3659 
3660 		rc = 0;
3661 		goto done;
3662 	}
3663 
3664 	if (!fw_compatible(card_fw, &bundled_fw)) {
3665 		reason = "incompatible or unusable";
3666 		goto install;
3667 	}
3668 
3669 	if (d > c) {
3670 		reason = "older than the version bundled with this driver";
3671 		goto install;
3672 	}
3673 
3674 	if (fw_install == 2 && d != c) {
3675 		reason = "different than the version bundled with this driver";
3676 		goto install;
3677 	}
3678 
3679 	/* No reason to do anything to the firmware already on the card. */
3680 	rc = 0;
3681 	goto done;
3682 
3683 install:
3684 	rc = 0;
3685 	if ((*already)++)
3686 		goto done;
3687 
3688 	if (fw_install == 0) {
3689 		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3690 		    "but the driver is prohibited from installing a firmware "
3691 		    "on the card.\n",
3692 		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3693 		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3694 
3695 		goto done;
3696 	}
3697 
3698 	/*
3699 	 * We'll attempt to install a firmware.  Load the module first (if it
3700 	 * hasn't been loaded already).
3701 	 */
3702 	if (!load_attempted) {
3703 		rc = load_fw_module(sc, &cfg, &fw);
3704 		if (rc != 0 || fw == NULL) {
3705 			device_printf(sc->dev,
3706 			    "failed to load firmware module: %d. cfg %p, fw %p\n",
3707 			    rc, cfg, fw);
3708 			/* carry on */
3709 		}
3710 	}
3711 	if (fw == NULL) {
3712 		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3713 		    "but the driver cannot take corrective action because it "
3714 		    "is unable to load the firmware module.\n",
3715 		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3716 		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3717 		rc = sc->flags & FW_OK ? 0 : ENOENT;
3718 		goto done;
3719 	}
3720 	k = be32toh(((const struct fw_hdr *)fw->data)->fw_ver);
3721 	if (k != d) {
3722 		MPASS(t4_fw_install > 0);
3723 		device_printf(sc->dev,
3724 		    "firmware in KLD (%u.%u.%u.%u) is not what the driver was "
3725 		    "expecting (%u.%u.%u.%u) and will not be used.\n",
3726 		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3727 		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k),
3728 		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3729 		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3730 		rc = sc->flags & FW_OK ? 0 : EINVAL;
3731 		goto done;
3732 	}
3733 
3734 	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3735 	    "installing firmware %u.%u.%u.%u on card.\n",
3736 	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3737 	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3738 	    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3739 	    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3740 
3741 	rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3742 	if (rc != 0) {
3743 		device_printf(sc->dev, "failed to install firmware: %d\n", rc);
3744 	} else {
3745 		/* Installed successfully, update the cached header too. */
3746 		rc = ERESTART;
3747 		memcpy(card_fw, fw->data, sizeof(*card_fw));
3748 	}
3749 done:
3750 	unload_fw_module(sc, cfg, fw);
3751 
3752 	return (rc);
3753 }
3754 
3755 /*
3756  * Establish contact with the firmware and attempt to become the master driver.
3757  *
3758  * A firmware will be installed to the card if needed (if the driver is allowed
3759  * to do so).
3760  */
3761 static int
3762 contact_firmware(struct adapter *sc)
3763 {
3764 	int rc, already = 0;
3765 	enum dev_state state;
3766 	struct fw_info *fw_info;
3767 	struct fw_hdr *card_fw;		/* fw on the card */
3768 	const struct fw_h *drv_fw;
3769 
3770 	fw_info = find_fw_info(chip_id(sc));
3771 	if (fw_info == NULL) {
3772 		device_printf(sc->dev,
3773 		    "unable to look up firmware information for chip %d.\n",
3774 		    chip_id(sc));
3775 		return (EINVAL);
3776 	}
3777 	drv_fw = &fw_info->fw_h;
3778 
3779 	/* Read the header of the firmware on the card */
3780 	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
3781 restart:
3782 	rc = -t4_get_fw_hdr(sc, card_fw);
3783 	if (rc != 0) {
3784 		device_printf(sc->dev,
3785 		    "unable to read firmware header from card's flash: %d\n",
3786 		    rc);
3787 		goto done;
3788 	}
3789 
3790 	rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw, NULL,
3791 	    &already);
3792 	if (rc == ERESTART)
3793 		goto restart;
3794 	if (rc != 0)
3795 		goto done;
3796 
3797 	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
3798 	if (rc < 0 || state == DEV_STATE_ERR) {
3799 		rc = -rc;
3800 		device_printf(sc->dev,
3801 		    "failed to connect to the firmware: %d, %d.  "
3802 		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3803 #if 0
3804 		if (install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
3805 		    "not responding properly to HELLO", &already) == ERESTART)
3806 			goto restart;
3807 #endif
3808 		goto done;
3809 	}
3810 	MPASS(be32toh(card_fw->flags) & FW_HDR_FLAGS_RESET_HALT);
3811 	sc->flags |= FW_OK;	/* The firmware responded to the FW_HELLO. */
3812 
3813 	if (rc == sc->pf) {
3814 		sc->flags |= MASTER_PF;
3815 		rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
3816 		    NULL, &already);
3817 		if (rc == ERESTART)
3818 			rc = 0;
3819 		else if (rc != 0)
3820 			goto done;
3821 	} else if (state == DEV_STATE_UNINIT) {
3822 		/*
3823 		 * We didn't get to be the master so we definitely won't be
3824 		 * configuring the chip.  It's a bug if someone else hasn't
3825 		 * configured it already.
3826 		 */
3827 		device_printf(sc->dev, "couldn't be master(%d), "
3828 		    "device not already initialized either(%d).  "
3829 		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3830 		rc = EPROTO;
3831 		goto done;
3832 	} else {
3833 		/*
3834 		 * Some other PF is the master and has configured the chip.
3835 		 * This is allowed but untested.
3836 		 */
3837 		device_printf(sc->dev, "PF%d is master, device state %d.  "
3838 		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3839 		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", rc);
3840 		sc->cfcsum = 0;
3841 		rc = 0;
3842 	}
3843 done:
3844 	if (rc != 0 && sc->flags & FW_OK) {
3845 		t4_fw_bye(sc, sc->mbox);
3846 		sc->flags &= ~FW_OK;
3847 	}
3848 	free(card_fw, M_CXGBE);
3849 	return (rc);
3850 }
3851 
3852 static int
3853 copy_cfg_file_to_card(struct adapter *sc, char *cfg_file,
3854     uint32_t mtype, uint32_t moff)
3855 {
3856 	struct fw_info *fw_info;
3857 	const struct firmware *dcfg, *rcfg = NULL;
3858 	const uint32_t *cfdata;
3859 	uint32_t cflen, addr;
3860 	int rc;
3861 
3862 	load_fw_module(sc, &dcfg, NULL);
3863 
3864 	/* Card specific interpretation of "default". */
3865 	if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3866 		if (pci_get_device(sc->dev) == 0x440a)
3867 			snprintf(cfg_file, sizeof(t4_cfg_file), UWIRE_CF);
3868 		if (is_fpga(sc))
3869 			snprintf(cfg_file, sizeof(t4_cfg_file), FPGA_CF);
3870 	}
3871 
3872 	if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3873 		if (dcfg == NULL) {
3874 			device_printf(sc->dev,
3875 			    "KLD with default config is not available.\n");
3876 			rc = ENOENT;
3877 			goto done;
3878 		}
3879 		cfdata = dcfg->data;
3880 		cflen = dcfg->datasize & ~3;
3881 	} else {
3882 		char s[32];
3883 
3884 		fw_info = find_fw_info(chip_id(sc));
3885 		if (fw_info == NULL) {
3886 			device_printf(sc->dev,
3887 			    "unable to look up firmware information for chip %d.\n",
3888 			    chip_id(sc));
3889 			rc = EINVAL;
3890 			goto done;
3891 		}
3892 		snprintf(s, sizeof(s), "%s_%s", fw_info->kld_name, cfg_file);
3893 
3894 		rcfg = firmware_get(s);
3895 		if (rcfg == NULL) {
3896 			device_printf(sc->dev,
3897 			    "unable to load module \"%s\" for configuration "
3898 			    "profile \"%s\".\n", s, cfg_file);
3899 			rc = ENOENT;
3900 			goto done;
3901 		}
3902 		cfdata = rcfg->data;
3903 		cflen = rcfg->datasize & ~3;
3904 	}
3905 
3906 	if (cflen > FLASH_CFG_MAX_SIZE) {
3907 		device_printf(sc->dev,
3908 		    "config file too long (%d, max allowed is %d).\n",
3909 		    cflen, FLASH_CFG_MAX_SIZE);
3910 		rc = EINVAL;
3911 		goto done;
3912 	}
3913 
3914 	rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
3915 	if (rc != 0) {
3916 		device_printf(sc->dev,
3917 		    "%s: addr (%d/0x%x) or len %d is not valid: %d.\n",
3918 		    __func__, mtype, moff, cflen, rc);
3919 		rc = EINVAL;
3920 		goto done;
3921 	}
3922 	write_via_memwin(sc, 2, addr, cfdata, cflen);
3923 done:
3924 	if (rcfg != NULL)
3925 		firmware_put(rcfg, FIRMWARE_UNLOAD);
3926 	unload_fw_module(sc, dcfg, NULL);
3927 	return (rc);
3928 }
3929 
3930 struct caps_allowed {
3931 	uint16_t nbmcaps;
3932 	uint16_t linkcaps;
3933 	uint16_t switchcaps;
3934 	uint16_t niccaps;
3935 	uint16_t toecaps;
3936 	uint16_t rdmacaps;
3937 	uint16_t cryptocaps;
3938 	uint16_t iscsicaps;
3939 	uint16_t fcoecaps;
3940 };
3941 
3942 #define FW_PARAM_DEV(param) \
3943 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3944 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3945 #define FW_PARAM_PFVF(param) \
3946 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3947 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
3948 
3949 /*
3950  * Provide a configuration profile to the firmware and have it initialize the
3951  * chip accordingly.  This may involve uploading a configuration file to the
3952  * card.
3953  */
3954 static int
3955 apply_cfg_and_initialize(struct adapter *sc, char *cfg_file,
3956     const struct caps_allowed *caps_allowed)
3957 {
3958 	int rc;
3959 	struct fw_caps_config_cmd caps;
3960 	uint32_t mtype, moff, finicsum, cfcsum, param, val;
3961 
3962 	rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
3963 	if (rc != 0) {
3964 		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
3965 		return (rc);
3966 	}
3967 
3968 	bzero(&caps, sizeof(caps));
3969 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3970 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
3971 	if (strncmp(cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0) {
3972 		mtype = 0;
3973 		moff = 0;
3974 		caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3975 	} else if (strncmp(cfg_file, FLASH_CF, sizeof(t4_cfg_file)) == 0) {
3976 		mtype = FW_MEMTYPE_FLASH;
3977 		moff = t4_flash_cfg_addr(sc);
3978 		caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3979 		    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3980 		    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
3981 		    FW_LEN16(caps));
3982 	} else {
3983 		/*
3984 		 * Ask the firmware where it wants us to upload the config file.
3985 		 */
3986 		param = FW_PARAM_DEV(CF);
3987 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3988 		if (rc != 0) {
3989 			/* No support for config file?  Shouldn't happen. */
3990 			device_printf(sc->dev,
3991 			    "failed to query config file location: %d.\n", rc);
3992 			goto done;
3993 		}
3994 		mtype = G_FW_PARAMS_PARAM_Y(val);
3995 		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
3996 		caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3997 		    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3998 		    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
3999 		    FW_LEN16(caps));
4000 
4001 		rc = copy_cfg_file_to_card(sc, cfg_file, mtype, moff);
4002 		if (rc != 0) {
4003 			device_printf(sc->dev,
4004 			    "failed to upload config file to card: %d.\n", rc);
4005 			goto done;
4006 		}
4007 	}
4008 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4009 	if (rc != 0) {
4010 		device_printf(sc->dev, "failed to pre-process config file: %d "
4011 		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
4012 		goto done;
4013 	}
4014 
4015 	finicsum = be32toh(caps.finicsum);
4016 	cfcsum = be32toh(caps.cfcsum);	/* actual */
4017 	if (finicsum != cfcsum) {
4018 		device_printf(sc->dev,
4019 		    "WARNING: config file checksum mismatch: %08x %08x\n",
4020 		    finicsum, cfcsum);
4021 	}
4022 	sc->cfcsum = cfcsum;
4023 	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", cfg_file);
4024 
4025 	/*
4026 	 * Let the firmware know what features will (not) be used so it can tune
4027 	 * things accordingly.
4028 	 */
4029 #define LIMIT_CAPS(x) do { \
4030 	caps.x##caps &= htobe16(caps_allowed->x##caps); \
4031 } while (0)
4032 	LIMIT_CAPS(nbm);
4033 	LIMIT_CAPS(link);
4034 	LIMIT_CAPS(switch);
4035 	LIMIT_CAPS(nic);
4036 	LIMIT_CAPS(toe);
4037 	LIMIT_CAPS(rdma);
4038 	LIMIT_CAPS(crypto);
4039 	LIMIT_CAPS(iscsi);
4040 	LIMIT_CAPS(fcoe);
4041 #undef LIMIT_CAPS
4042 	if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
4043 		/*
4044 		 * TOE and hashfilters are mutually exclusive.  It is a config
4045 		 * file or firmware bug if both are reported as available.  Try
4046 		 * to cope with the situation in non-debug builds by disabling
4047 		 * TOE.
4048 		 */
4049 		MPASS(caps.toecaps == 0);
4050 
4051 		caps.toecaps = 0;
4052 		caps.rdmacaps = 0;
4053 		caps.iscsicaps = 0;
4054 	}
4055 
4056 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4057 	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
4058 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4059 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
4060 	if (rc != 0) {
4061 		device_printf(sc->dev,
4062 		    "failed to process config file: %d.\n", rc);
4063 		goto done;
4064 	}
4065 
4066 	t4_tweak_chip_settings(sc);
4067 	set_params__pre_init(sc);
4068 
4069 	/* get basic stuff going */
4070 	rc = -t4_fw_initialize(sc, sc->mbox);
4071 	if (rc != 0) {
4072 		device_printf(sc->dev, "fw_initialize failed: %d.\n", rc);
4073 		goto done;
4074 	}
4075 done:
4076 	return (rc);
4077 }
4078 
4079 /*
4080  * Partition chip resources for use between various PFs, VFs, etc.
4081  */
4082 static int
4083 partition_resources(struct adapter *sc)
4084 {
4085 	char cfg_file[sizeof(t4_cfg_file)];
4086 	struct caps_allowed caps_allowed;
4087 	int rc;
4088 	bool fallback;
4089 
4090 	/* Only the master driver gets to configure the chip resources. */
4091 	MPASS(sc->flags & MASTER_PF);
4092 
4093 #define COPY_CAPS(x) do { \
4094 	caps_allowed.x##caps = t4_##x##caps_allowed; \
4095 } while (0)
4096 	bzero(&caps_allowed, sizeof(caps_allowed));
4097 	COPY_CAPS(nbm);
4098 	COPY_CAPS(link);
4099 	COPY_CAPS(switch);
4100 	COPY_CAPS(nic);
4101 	COPY_CAPS(toe);
4102 	COPY_CAPS(rdma);
4103 	COPY_CAPS(crypto);
4104 	COPY_CAPS(iscsi);
4105 	COPY_CAPS(fcoe);
4106 	fallback = sc->debug_flags & DF_DISABLE_CFG_RETRY ? false : true;
4107 	snprintf(cfg_file, sizeof(cfg_file), "%s", t4_cfg_file);
4108 retry:
4109 	rc = apply_cfg_and_initialize(sc, cfg_file, &caps_allowed);
4110 	if (rc != 0 && fallback) {
4111 		device_printf(sc->dev,
4112 		    "failed (%d) to configure card with \"%s\" profile, "
4113 		    "will fall back to a basic configuration and retry.\n",
4114 		    rc, cfg_file);
4115 		snprintf(cfg_file, sizeof(cfg_file), "%s", BUILTIN_CF);
4116 		bzero(&caps_allowed, sizeof(caps_allowed));
4117 		COPY_CAPS(switch);
4118 		caps_allowed.niccaps = FW_CAPS_CONFIG_NIC;
4119 		fallback = false;
4120 		goto retry;
4121 	}
4122 #undef COPY_CAPS
4123 	return (rc);
4124 }
4125 
4126 /*
4127  * Retrieve parameters that are needed (or nice to have) very early.
4128  */
4129 static int
4130 get_params__pre_init(struct adapter *sc)
4131 {
4132 	int rc;
4133 	uint32_t param[2], val[2];
4134 
4135 	t4_get_version_info(sc);
4136 
4137 	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
4138 	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
4139 	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
4140 	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
4141 	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
4142 
4143 	snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
4144 	    G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
4145 	    G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
4146 	    G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
4147 	    G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
4148 
4149 	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
4150 	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
4151 	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
4152 	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
4153 	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
4154 
4155 	snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
4156 	    G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
4157 	    G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
4158 	    G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
4159 	    G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
4160 
4161 	param[0] = FW_PARAM_DEV(PORTVEC);
4162 	param[1] = FW_PARAM_DEV(CCLK);
4163 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4164 	if (rc != 0) {
4165 		device_printf(sc->dev,
4166 		    "failed to query parameters (pre_init): %d.\n", rc);
4167 		return (rc);
4168 	}
4169 
4170 	sc->params.portvec = val[0];
4171 	sc->params.nports = bitcount32(val[0]);
4172 	sc->params.vpd.cclk = val[1];
4173 
4174 	/* Read device log parameters. */
4175 	rc = -t4_init_devlog_params(sc, 1);
4176 	if (rc == 0)
4177 		fixup_devlog_params(sc);
4178 	else {
4179 		device_printf(sc->dev,
4180 		    "failed to get devlog parameters: %d.\n", rc);
4181 		rc = 0;	/* devlog isn't critical for device operation */
4182 	}
4183 
4184 	return (rc);
4185 }
4186 
4187 /*
4188  * Any params that need to be set before FW_INITIALIZE.
4189  */
4190 static int
4191 set_params__pre_init(struct adapter *sc)
4192 {
4193 	int rc = 0;
4194 	uint32_t param, val;
4195 
4196 	if (chip_id(sc) >= CHELSIO_T6) {
4197 		param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT);
4198 		val = 1;
4199 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4200 		/* firmwares < 1.20.1.0 do not have this param. */
4201 		if (rc == FW_EINVAL && sc->params.fw_vers <
4202 		    (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4203 		    V_FW_HDR_FW_VER_MICRO(1) | V_FW_HDR_FW_VER_BUILD(0))) {
4204 			rc = 0;
4205 		}
4206 		if (rc != 0) {
4207 			device_printf(sc->dev,
4208 			    "failed to enable high priority filters :%d.\n",
4209 			    rc);
4210 		}
4211 	}
4212 
4213 	/* Enable opaque VIIDs with firmwares that support it. */
4214 	param = FW_PARAM_DEV(OPAQUE_VIID_SMT_EXTN);
4215 	val = 1;
4216 	rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4217 	if (rc == 0 && val == 1)
4218 		sc->params.viid_smt_extn_support = true;
4219 	else
4220 		sc->params.viid_smt_extn_support = false;
4221 
4222 	return (rc);
4223 }
4224 
4225 /*
4226  * Retrieve various parameters that are of interest to the driver.  The device
4227  * has been initialized by the firmware at this point.
4228  */
4229 static int
4230 get_params__post_init(struct adapter *sc)
4231 {
4232 	int rc;
4233 	uint32_t param[7], val[7];
4234 	struct fw_caps_config_cmd caps;
4235 
4236 	param[0] = FW_PARAM_PFVF(IQFLINT_START);
4237 	param[1] = FW_PARAM_PFVF(EQ_START);
4238 	param[2] = FW_PARAM_PFVF(FILTER_START);
4239 	param[3] = FW_PARAM_PFVF(FILTER_END);
4240 	param[4] = FW_PARAM_PFVF(L2T_START);
4241 	param[5] = FW_PARAM_PFVF(L2T_END);
4242 	param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
4243 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
4244 	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
4245 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
4246 	if (rc != 0) {
4247 		device_printf(sc->dev,
4248 		    "failed to query parameters (post_init): %d.\n", rc);
4249 		return (rc);
4250 	}
4251 
4252 	sc->sge.iq_start = val[0];
4253 	sc->sge.eq_start = val[1];
4254 	if ((int)val[3] > (int)val[2]) {
4255 		sc->tids.ftid_base = val[2];
4256 		sc->tids.ftid_end = val[3];
4257 		sc->tids.nftids = val[3] - val[2] + 1;
4258 	}
4259 	sc->vres.l2t.start = val[4];
4260 	sc->vres.l2t.size = val[5] - val[4] + 1;
4261 	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
4262 	    ("%s: L2 table size (%u) larger than expected (%u)",
4263 	    __func__, sc->vres.l2t.size, L2T_SIZE));
4264 	sc->params.core_vdd = val[6];
4265 
4266 	if (chip_id(sc) >= CHELSIO_T6) {
4267 
4268 		sc->tids.tid_base = t4_read_reg(sc,
4269 		    A_LE_DB_ACTIVE_TABLE_START_INDEX);
4270 
4271 		param[0] = FW_PARAM_PFVF(HPFILTER_START);
4272 		param[1] = FW_PARAM_PFVF(HPFILTER_END);
4273 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4274 		if (rc != 0) {
4275 			device_printf(sc->dev,
4276 			   "failed to query hpfilter parameters: %d.\n", rc);
4277 			return (rc);
4278 		}
4279 		if ((int)val[1] > (int)val[0]) {
4280 			sc->tids.hpftid_base = val[0];
4281 			sc->tids.hpftid_end = val[1];
4282 			sc->tids.nhpftids = val[1] - val[0] + 1;
4283 
4284 			/*
4285 			 * These should go off if the layout changes and the
4286 			 * driver needs to catch up.
4287 			 */
4288 			MPASS(sc->tids.hpftid_base == 0);
4289 			MPASS(sc->tids.tid_base == sc->tids.nhpftids);
4290 		}
4291 	}
4292 
4293 	/*
4294 	 * MPSBGMAP is queried separately because only recent firmwares support
4295 	 * it as a parameter and we don't want the compound query above to fail
4296 	 * on older firmwares.
4297 	 */
4298 	param[0] = FW_PARAM_DEV(MPSBGMAP);
4299 	val[0] = 0;
4300 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4301 	if (rc == 0)
4302 		sc->params.mps_bg_map = val[0];
4303 	else
4304 		sc->params.mps_bg_map = 0;
4305 
4306 	/*
4307 	 * Determine whether the firmware supports the filter2 work request.
4308 	 * This is queried separately for the same reason as MPSBGMAP above.
4309 	 */
4310 	param[0] = FW_PARAM_DEV(FILTER2_WR);
4311 	val[0] = 0;
4312 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4313 	if (rc == 0)
4314 		sc->params.filter2_wr_support = val[0] != 0;
4315 	else
4316 		sc->params.filter2_wr_support = 0;
4317 
4318 	/*
4319 	 * Find out whether we're allowed to use the ULPTX MEMWRITE DSGL.
4320 	 * This is queried separately for the same reason as other params above.
4321 	 */
4322 	param[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
4323 	val[0] = 0;
4324 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4325 	if (rc == 0)
4326 		sc->params.ulptx_memwrite_dsgl = val[0] != 0;
4327 	else
4328 		sc->params.ulptx_memwrite_dsgl = false;
4329 
4330 	/* get capabilites */
4331 	bzero(&caps, sizeof(caps));
4332 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4333 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
4334 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4335 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4336 	if (rc != 0) {
4337 		device_printf(sc->dev,
4338 		    "failed to get card capabilities: %d.\n", rc);
4339 		return (rc);
4340 	}
4341 
4342 #define READ_CAPS(x) do { \
4343 	sc->x = htobe16(caps.x); \
4344 } while (0)
4345 	READ_CAPS(nbmcaps);
4346 	READ_CAPS(linkcaps);
4347 	READ_CAPS(switchcaps);
4348 	READ_CAPS(niccaps);
4349 	READ_CAPS(toecaps);
4350 	READ_CAPS(rdmacaps);
4351 	READ_CAPS(cryptocaps);
4352 	READ_CAPS(iscsicaps);
4353 	READ_CAPS(fcoecaps);
4354 
4355 	if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
4356 		MPASS(chip_id(sc) > CHELSIO_T4);
4357 		MPASS(sc->toecaps == 0);
4358 		sc->toecaps = 0;
4359 
4360 		param[0] = FW_PARAM_DEV(NTID);
4361 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4362 		if (rc != 0) {
4363 			device_printf(sc->dev,
4364 			    "failed to query HASHFILTER parameters: %d.\n", rc);
4365 			return (rc);
4366 		}
4367 		sc->tids.ntids = val[0];
4368 		if (sc->params.fw_vers <
4369 		    (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4370 		    V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
4371 			MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4372 			sc->tids.ntids -= sc->tids.nhpftids;
4373 		}
4374 		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4375 		sc->params.hash_filter = 1;
4376 	}
4377 	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
4378 		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
4379 		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
4380 		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4381 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
4382 		if (rc != 0) {
4383 			device_printf(sc->dev,
4384 			    "failed to query NIC parameters: %d.\n", rc);
4385 			return (rc);
4386 		}
4387 		if ((int)val[1] > (int)val[0]) {
4388 			sc->tids.etid_base = val[0];
4389 			sc->tids.etid_end = val[1];
4390 			sc->tids.netids = val[1] - val[0] + 1;
4391 			sc->params.eo_wr_cred = val[2];
4392 			sc->params.ethoffload = 1;
4393 		}
4394 	}
4395 	if (sc->toecaps) {
4396 		/* query offload-related parameters */
4397 		param[0] = FW_PARAM_DEV(NTID);
4398 		param[1] = FW_PARAM_PFVF(SERVER_START);
4399 		param[2] = FW_PARAM_PFVF(SERVER_END);
4400 		param[3] = FW_PARAM_PFVF(TDDP_START);
4401 		param[4] = FW_PARAM_PFVF(TDDP_END);
4402 		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4403 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4404 		if (rc != 0) {
4405 			device_printf(sc->dev,
4406 			    "failed to query TOE parameters: %d.\n", rc);
4407 			return (rc);
4408 		}
4409 		sc->tids.ntids = val[0];
4410 		if (sc->params.fw_vers <
4411 		    (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4412 		    V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
4413 			MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4414 			sc->tids.ntids -= sc->tids.nhpftids;
4415 		}
4416 		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4417 		if ((int)val[2] > (int)val[1]) {
4418 			sc->tids.stid_base = val[1];
4419 			sc->tids.nstids = val[2] - val[1] + 1;
4420 		}
4421 		sc->vres.ddp.start = val[3];
4422 		sc->vres.ddp.size = val[4] - val[3] + 1;
4423 		sc->params.ofldq_wr_cred = val[5];
4424 		sc->params.offload = 1;
4425 	} else {
4426 		/*
4427 		 * The firmware attempts memfree TOE configuration for -SO cards
4428 		 * and will report toecaps=0 if it runs out of resources (this
4429 		 * depends on the config file).  It may not report 0 for other
4430 		 * capabilities dependent on the TOE in this case.  Set them to
4431 		 * 0 here so that the driver doesn't bother tracking resources
4432 		 * that will never be used.
4433 		 */
4434 		sc->iscsicaps = 0;
4435 		sc->rdmacaps = 0;
4436 	}
4437 	if (sc->rdmacaps) {
4438 		param[0] = FW_PARAM_PFVF(STAG_START);
4439 		param[1] = FW_PARAM_PFVF(STAG_END);
4440 		param[2] = FW_PARAM_PFVF(RQ_START);
4441 		param[3] = FW_PARAM_PFVF(RQ_END);
4442 		param[4] = FW_PARAM_PFVF(PBL_START);
4443 		param[5] = FW_PARAM_PFVF(PBL_END);
4444 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4445 		if (rc != 0) {
4446 			device_printf(sc->dev,
4447 			    "failed to query RDMA parameters(1): %d.\n", rc);
4448 			return (rc);
4449 		}
4450 		sc->vres.stag.start = val[0];
4451 		sc->vres.stag.size = val[1] - val[0] + 1;
4452 		sc->vres.rq.start = val[2];
4453 		sc->vres.rq.size = val[3] - val[2] + 1;
4454 		sc->vres.pbl.start = val[4];
4455 		sc->vres.pbl.size = val[5] - val[4] + 1;
4456 
4457 		param[0] = FW_PARAM_PFVF(SQRQ_START);
4458 		param[1] = FW_PARAM_PFVF(SQRQ_END);
4459 		param[2] = FW_PARAM_PFVF(CQ_START);
4460 		param[3] = FW_PARAM_PFVF(CQ_END);
4461 		param[4] = FW_PARAM_PFVF(OCQ_START);
4462 		param[5] = FW_PARAM_PFVF(OCQ_END);
4463 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4464 		if (rc != 0) {
4465 			device_printf(sc->dev,
4466 			    "failed to query RDMA parameters(2): %d.\n", rc);
4467 			return (rc);
4468 		}
4469 		sc->vres.qp.start = val[0];
4470 		sc->vres.qp.size = val[1] - val[0] + 1;
4471 		sc->vres.cq.start = val[2];
4472 		sc->vres.cq.size = val[3] - val[2] + 1;
4473 		sc->vres.ocq.start = val[4];
4474 		sc->vres.ocq.size = val[5] - val[4] + 1;
4475 
4476 		param[0] = FW_PARAM_PFVF(SRQ_START);
4477 		param[1] = FW_PARAM_PFVF(SRQ_END);
4478 		param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
4479 		param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
4480 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
4481 		if (rc != 0) {
4482 			device_printf(sc->dev,
4483 			    "failed to query RDMA parameters(3): %d.\n", rc);
4484 			return (rc);
4485 		}
4486 		sc->vres.srq.start = val[0];
4487 		sc->vres.srq.size = val[1] - val[0] + 1;
4488 		sc->params.max_ordird_qp = val[2];
4489 		sc->params.max_ird_adapter = val[3];
4490 	}
4491 	if (sc->iscsicaps) {
4492 		param[0] = FW_PARAM_PFVF(ISCSI_START);
4493 		param[1] = FW_PARAM_PFVF(ISCSI_END);
4494 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4495 		if (rc != 0) {
4496 			device_printf(sc->dev,
4497 			    "failed to query iSCSI parameters: %d.\n", rc);
4498 			return (rc);
4499 		}
4500 		sc->vres.iscsi.start = val[0];
4501 		sc->vres.iscsi.size = val[1] - val[0] + 1;
4502 	}
4503 	if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
4504 		param[0] = FW_PARAM_PFVF(TLS_START);
4505 		param[1] = FW_PARAM_PFVF(TLS_END);
4506 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4507 		if (rc != 0) {
4508 			device_printf(sc->dev,
4509 			    "failed to query TLS parameters: %d.\n", rc);
4510 			return (rc);
4511 		}
4512 		sc->vres.key.start = val[0];
4513 		sc->vres.key.size = val[1] - val[0] + 1;
4514 	}
4515 
4516 	t4_init_sge_params(sc);
4517 
4518 	/*
4519 	 * We've got the params we wanted to query via the firmware.  Now grab
4520 	 * some others directly from the chip.
4521 	 */
4522 	rc = t4_read_chip_settings(sc);
4523 
4524 	return (rc);
4525 }
4526 
4527 static int
4528 set_params__post_init(struct adapter *sc)
4529 {
4530 	uint32_t param, val;
4531 #ifdef TCP_OFFLOAD
4532 	int i, v, shift;
4533 #endif
4534 
4535 	/* ask for encapsulated CPLs */
4536 	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4537 	val = 1;
4538 	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4539 
4540 	/* Enable 32b port caps if the firmware supports it. */
4541 	param = FW_PARAM_PFVF(PORT_CAPS32);
4542 	val = 1;
4543 	if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
4544 		sc->params.port_caps32 = 1;
4545 
4546 	/* Let filter + maskhash steer to a part of the VI's RSS region. */
4547 	val = 1 << (G_MASKSIZE(t4_read_reg(sc, A_TP_RSS_CONFIG_TNL)) - 1);
4548 	t4_set_reg_field(sc, A_TP_RSS_CONFIG_TNL, V_MASKFILTER(M_MASKFILTER),
4549 	    V_MASKFILTER(val - 1));
4550 
4551 #ifdef TCP_OFFLOAD
4552 	/*
4553 	 * Override the TOE timers with user provided tunables.  This is not the
4554 	 * recommended way to change the timers (the firmware config file is) so
4555 	 * these tunables are not documented.
4556 	 *
4557 	 * All the timer tunables are in microseconds.
4558 	 */
4559 	if (t4_toe_keepalive_idle != 0) {
4560 		v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
4561 		v &= M_KEEPALIVEIDLE;
4562 		t4_set_reg_field(sc, A_TP_KEEP_IDLE,
4563 		    V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
4564 	}
4565 	if (t4_toe_keepalive_interval != 0) {
4566 		v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
4567 		v &= M_KEEPALIVEINTVL;
4568 		t4_set_reg_field(sc, A_TP_KEEP_INTVL,
4569 		    V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
4570 	}
4571 	if (t4_toe_keepalive_count != 0) {
4572 		v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
4573 		t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4574 		    V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
4575 		    V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
4576 		    V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
4577 	}
4578 	if (t4_toe_rexmt_min != 0) {
4579 		v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
4580 		v &= M_RXTMIN;
4581 		t4_set_reg_field(sc, A_TP_RXT_MIN,
4582 		    V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
4583 	}
4584 	if (t4_toe_rexmt_max != 0) {
4585 		v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
4586 		v &= M_RXTMAX;
4587 		t4_set_reg_field(sc, A_TP_RXT_MAX,
4588 		    V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
4589 	}
4590 	if (t4_toe_rexmt_count != 0) {
4591 		v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
4592 		t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4593 		    V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
4594 		    V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
4595 		    V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
4596 	}
4597 	for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
4598 		if (t4_toe_rexmt_backoff[i] != -1) {
4599 			v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
4600 			shift = (i & 3) << 3;
4601 			t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
4602 			    M_TIMERBACKOFFINDEX0 << shift, v << shift);
4603 		}
4604 	}
4605 #endif
4606 	return (0);
4607 }
4608 
4609 #undef FW_PARAM_PFVF
4610 #undef FW_PARAM_DEV
4611 
4612 static void
4613 t4_set_desc(struct adapter *sc)
4614 {
4615 	char buf[128];
4616 	struct adapter_params *p = &sc->params;
4617 
4618 	snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
4619 
4620 	device_set_desc_copy(sc->dev, buf);
4621 }
4622 
4623 static inline void
4624 ifmedia_add4(struct ifmedia *ifm, int m)
4625 {
4626 
4627 	ifmedia_add(ifm, m, 0, NULL);
4628 	ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL);
4629 	ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL);
4630 	ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
4631 }
4632 
4633 /*
4634  * This is the selected media, which is not quite the same as the active media.
4635  * The media line in ifconfig is "media: Ethernet selected (active)" if selected
4636  * and active are not the same, and "media: Ethernet selected" otherwise.
4637  */
4638 static void
4639 set_current_media(struct port_info *pi)
4640 {
4641 	struct link_config *lc;
4642 	struct ifmedia *ifm;
4643 	int mword;
4644 	u_int speed;
4645 
4646 	PORT_LOCK_ASSERT_OWNED(pi);
4647 
4648 	/* Leave current media alone if it's already set to IFM_NONE. */
4649 	ifm = &pi->media;
4650 	if (ifm->ifm_cur != NULL &&
4651 	    IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
4652 		return;
4653 
4654 	lc = &pi->link_cfg;
4655 	if (lc->requested_aneg != AUTONEG_DISABLE &&
4656 	    lc->supported & FW_PORT_CAP32_ANEG) {
4657 		ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
4658 		return;
4659 	}
4660 	mword = IFM_ETHER | IFM_FDX;
4661 	if (lc->requested_fc & PAUSE_TX)
4662 		mword |= IFM_ETH_TXPAUSE;
4663 	if (lc->requested_fc & PAUSE_RX)
4664 		mword |= IFM_ETH_RXPAUSE;
4665 	if (lc->requested_speed == 0)
4666 		speed = port_top_speed(pi) * 1000;	/* Gbps -> Mbps */
4667 	else
4668 		speed = lc->requested_speed;
4669 	mword |= port_mword(pi, speed_to_fwcap(speed));
4670 	ifmedia_set(ifm, mword);
4671 }
4672 
4673 /*
4674  * Returns true if the ifmedia list for the port cannot change.
4675  */
4676 static bool
4677 fixed_ifmedia(struct port_info *pi)
4678 {
4679 
4680 	return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
4681 	    pi->port_type == FW_PORT_TYPE_BT_XFI ||
4682 	    pi->port_type == FW_PORT_TYPE_BT_XAUI ||
4683 	    pi->port_type == FW_PORT_TYPE_KX4 ||
4684 	    pi->port_type == FW_PORT_TYPE_KX ||
4685 	    pi->port_type == FW_PORT_TYPE_KR ||
4686 	    pi->port_type == FW_PORT_TYPE_BP_AP ||
4687 	    pi->port_type == FW_PORT_TYPE_BP4_AP ||
4688 	    pi->port_type == FW_PORT_TYPE_BP40_BA ||
4689 	    pi->port_type == FW_PORT_TYPE_KR4_100G ||
4690 	    pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
4691 	    pi->port_type == FW_PORT_TYPE_KR_XLAUI);
4692 }
4693 
4694 static void
4695 build_medialist(struct port_info *pi)
4696 {
4697 	uint32_t ss, speed;
4698 	int unknown, mword, bit;
4699 	struct link_config *lc;
4700 	struct ifmedia *ifm;
4701 
4702 	PORT_LOCK_ASSERT_OWNED(pi);
4703 
4704 	if (pi->flags & FIXED_IFMEDIA)
4705 		return;
4706 
4707 	/*
4708 	 * Rebuild the ifmedia list.
4709 	 */
4710 	ifm = &pi->media;
4711 	ifmedia_removeall(ifm);
4712 	lc = &pi->link_cfg;
4713 	ss = G_FW_PORT_CAP32_SPEED(lc->supported); /* Supported Speeds */
4714 	if (__predict_false(ss == 0)) {	/* not supposed to happen. */
4715 		MPASS(ss != 0);
4716 no_media:
4717 		MPASS(LIST_EMPTY(&ifm->ifm_list));
4718 		ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL);
4719 		ifmedia_set(ifm, IFM_ETHER | IFM_NONE);
4720 		return;
4721 	}
4722 
4723 	unknown = 0;
4724 	for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
4725 		speed = 1 << bit;
4726 		MPASS(speed & M_FW_PORT_CAP32_SPEED);
4727 		if (ss & speed) {
4728 			mword = port_mword(pi, speed);
4729 			if (mword == IFM_NONE) {
4730 				goto no_media;
4731 			} else if (mword == IFM_UNKNOWN)
4732 				unknown++;
4733 			else
4734 				ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword);
4735 		}
4736 	}
4737 	if (unknown > 0) /* Add one unknown for all unknown media types. */
4738 		ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
4739 	if (lc->supported & FW_PORT_CAP32_ANEG)
4740 		ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
4741 
4742 	set_current_media(pi);
4743 }
4744 
4745 /*
4746  * Initialize the requested fields in the link config based on driver tunables.
4747  */
4748 static void
4749 init_link_config(struct port_info *pi)
4750 {
4751 	struct link_config *lc = &pi->link_cfg;
4752 
4753 	PORT_LOCK_ASSERT_OWNED(pi);
4754 
4755 	lc->requested_speed = 0;
4756 
4757 	if (t4_autoneg == 0)
4758 		lc->requested_aneg = AUTONEG_DISABLE;
4759 	else if (t4_autoneg == 1)
4760 		lc->requested_aneg = AUTONEG_ENABLE;
4761 	else
4762 		lc->requested_aneg = AUTONEG_AUTO;
4763 
4764 	lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
4765 	    PAUSE_AUTONEG);
4766 
4767 	if (t4_fec == -1 || t4_fec & FEC_AUTO)
4768 		lc->requested_fec = FEC_AUTO;
4769 	else {
4770 		lc->requested_fec = FEC_NONE;
4771 		if (t4_fec & FEC_RS)
4772 			lc->requested_fec |= FEC_RS;
4773 		if (t4_fec & FEC_BASER_RS)
4774 			lc->requested_fec |= FEC_BASER_RS;
4775 	}
4776 }
4777 
4778 /*
4779  * Makes sure that all requested settings comply with what's supported by the
4780  * port.  Returns the number of settings that were invalid and had to be fixed.
4781  */
4782 static int
4783 fixup_link_config(struct port_info *pi)
4784 {
4785 	int n = 0;
4786 	struct link_config *lc = &pi->link_cfg;
4787 	uint32_t fwspeed;
4788 
4789 	PORT_LOCK_ASSERT_OWNED(pi);
4790 
4791 	/* Speed (when not autonegotiating) */
4792 	if (lc->requested_speed != 0) {
4793 		fwspeed = speed_to_fwcap(lc->requested_speed);
4794 		if ((fwspeed & lc->supported) == 0) {
4795 			n++;
4796 			lc->requested_speed = 0;
4797 		}
4798 	}
4799 
4800 	/* Link autonegotiation */
4801 	MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
4802 	    lc->requested_aneg == AUTONEG_DISABLE ||
4803 	    lc->requested_aneg == AUTONEG_AUTO);
4804 	if (lc->requested_aneg == AUTONEG_ENABLE &&
4805 	    !(lc->supported & FW_PORT_CAP32_ANEG)) {
4806 		n++;
4807 		lc->requested_aneg = AUTONEG_AUTO;
4808 	}
4809 
4810 	/* Flow control */
4811 	MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
4812 	if (lc->requested_fc & PAUSE_TX &&
4813 	    !(lc->supported & FW_PORT_CAP32_FC_TX)) {
4814 		n++;
4815 		lc->requested_fc &= ~PAUSE_TX;
4816 	}
4817 	if (lc->requested_fc & PAUSE_RX &&
4818 	    !(lc->supported & FW_PORT_CAP32_FC_RX)) {
4819 		n++;
4820 		lc->requested_fc &= ~PAUSE_RX;
4821 	}
4822 	if (!(lc->requested_fc & PAUSE_AUTONEG) &&
4823 	    !(lc->supported & FW_PORT_CAP32_FORCE_PAUSE)) {
4824 		n++;
4825 		lc->requested_fc |= PAUSE_AUTONEG;
4826 	}
4827 
4828 	/* FEC */
4829 	if ((lc->requested_fec & FEC_RS &&
4830 	    !(lc->supported & FW_PORT_CAP32_FEC_RS)) ||
4831 	    (lc->requested_fec & FEC_BASER_RS &&
4832 	    !(lc->supported & FW_PORT_CAP32_FEC_BASER_RS))) {
4833 		n++;
4834 		lc->requested_fec = FEC_AUTO;
4835 	}
4836 
4837 	return (n);
4838 }
4839 
4840 /*
4841  * Apply the requested L1 settings, which are expected to be valid, to the
4842  * hardware.
4843  */
4844 static int
4845 apply_link_config(struct port_info *pi)
4846 {
4847 	struct adapter *sc = pi->adapter;
4848 	struct link_config *lc = &pi->link_cfg;
4849 	int rc;
4850 
4851 #ifdef INVARIANTS
4852 	ASSERT_SYNCHRONIZED_OP(sc);
4853 	PORT_LOCK_ASSERT_OWNED(pi);
4854 
4855 	if (lc->requested_aneg == AUTONEG_ENABLE)
4856 		MPASS(lc->supported & FW_PORT_CAP32_ANEG);
4857 	if (!(lc->requested_fc & PAUSE_AUTONEG))
4858 		MPASS(lc->supported & FW_PORT_CAP32_FORCE_PAUSE);
4859 	if (lc->requested_fc & PAUSE_TX)
4860 		MPASS(lc->supported & FW_PORT_CAP32_FC_TX);
4861 	if (lc->requested_fc & PAUSE_RX)
4862 		MPASS(lc->supported & FW_PORT_CAP32_FC_RX);
4863 	if (lc->requested_fec & FEC_RS)
4864 		MPASS(lc->supported & FW_PORT_CAP32_FEC_RS);
4865 	if (lc->requested_fec & FEC_BASER_RS)
4866 		MPASS(lc->supported & FW_PORT_CAP32_FEC_BASER_RS);
4867 #endif
4868 	rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
4869 	if (rc != 0) {
4870 		/* Don't complain if the VF driver gets back an EPERM. */
4871 		if (!(sc->flags & IS_VF) || rc != FW_EPERM)
4872 			device_printf(pi->dev, "l1cfg failed: %d\n", rc);
4873 	} else {
4874 		/*
4875 		 * An L1_CFG will almost always result in a link-change event if
4876 		 * the link is up, and the driver will refresh the actual
4877 		 * fec/fc/etc. when the notification is processed.  If the link
4878 		 * is down then the actual settings are meaningless.
4879 		 *
4880 		 * This takes care of the case where a change in the L1 settings
4881 		 * may not result in a notification.
4882 		 */
4883 		if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
4884 			lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
4885 	}
4886 	return (rc);
4887 }
4888 
4889 #define FW_MAC_EXACT_CHUNK	7
4890 struct mcaddr_ctx {
4891 	struct ifnet *ifp;
4892 	const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
4893 	uint64_t hash;
4894 	int i;
4895 	int del;
4896 	int rc;
4897 };
4898 
4899 static u_int
4900 add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
4901 {
4902 	struct mcaddr_ctx *ctx = arg;
4903 	struct vi_info *vi = ctx->ifp->if_softc;
4904 	struct port_info *pi = vi->pi;
4905 	struct adapter *sc = pi->adapter;
4906 
4907 	if (ctx->rc < 0)
4908 		return (0);
4909 
4910 	ctx->mcaddr[ctx->i] = LLADDR(sdl);
4911 	MPASS(ETHER_IS_MULTICAST(ctx->mcaddr[ctx->i]));
4912 	ctx->i++;
4913 
4914 	if (ctx->i == FW_MAC_EXACT_CHUNK) {
4915 		ctx->rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, ctx->del,
4916 		    ctx->i, ctx->mcaddr, NULL, &ctx->hash, 0);
4917 		if (ctx->rc < 0) {
4918 			int j;
4919 
4920 			for (j = 0; j < ctx->i; j++) {
4921 				if_printf(ctx->ifp,
4922 				    "failed to add mc address"
4923 				    " %02x:%02x:%02x:"
4924 				    "%02x:%02x:%02x rc=%d\n",
4925 				    ctx->mcaddr[j][0], ctx->mcaddr[j][1],
4926 				    ctx->mcaddr[j][2], ctx->mcaddr[j][3],
4927 				    ctx->mcaddr[j][4], ctx->mcaddr[j][5],
4928 				    -ctx->rc);
4929 			}
4930 			return (0);
4931 		}
4932 		ctx->del = 0;
4933 		ctx->i = 0;
4934 	}
4935 
4936 	return (1);
4937 }
4938 
4939 /*
4940  * Program the port's XGMAC based on parameters in ifnet.  The caller also
4941  * indicates which parameters should be programmed (the rest are left alone).
4942  */
4943 int
4944 update_mac_settings(struct ifnet *ifp, int flags)
4945 {
4946 	int rc = 0;
4947 	struct vi_info *vi = ifp->if_softc;
4948 	struct port_info *pi = vi->pi;
4949 	struct adapter *sc = pi->adapter;
4950 	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
4951 
4952 	ASSERT_SYNCHRONIZED_OP(sc);
4953 	KASSERT(flags, ("%s: not told what to update.", __func__));
4954 
4955 	if (flags & XGMAC_MTU)
4956 		mtu = ifp->if_mtu;
4957 
4958 	if (flags & XGMAC_PROMISC)
4959 		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
4960 
4961 	if (flags & XGMAC_ALLMULTI)
4962 		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
4963 
4964 	if (flags & XGMAC_VLANEX)
4965 		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
4966 
4967 	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
4968 		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
4969 		    allmulti, 1, vlanex, false);
4970 		if (rc) {
4971 			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
4972 			    rc);
4973 			return (rc);
4974 		}
4975 	}
4976 
4977 	if (flags & XGMAC_UCADDR) {
4978 		uint8_t ucaddr[ETHER_ADDR_LEN];
4979 
4980 		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
4981 		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
4982 		    ucaddr, true, &vi->smt_idx);
4983 		if (rc < 0) {
4984 			rc = -rc;
4985 			if_printf(ifp, "change_mac failed: %d\n", rc);
4986 			return (rc);
4987 		} else {
4988 			vi->xact_addr_filt = rc;
4989 			rc = 0;
4990 		}
4991 	}
4992 
4993 	if (flags & XGMAC_MCADDRS) {
4994 		struct epoch_tracker et;
4995 		struct mcaddr_ctx ctx;
4996 		int j;
4997 
4998 		ctx.ifp = ifp;
4999 		ctx.hash = 0;
5000 		ctx.i = 0;
5001 		ctx.del = 1;
5002 		/*
5003 		 * Unlike other drivers, we accumulate list of pointers into
5004 		 * interface address lists and we need to keep it safe even
5005 		 * after if_foreach_llmaddr() returns, thus we must enter the
5006 		 * network epoch.
5007 		 */
5008 		NET_EPOCH_ENTER(et);
5009 		if_foreach_llmaddr(ifp, add_maddr, &ctx);
5010 		if (ctx.rc < 0) {
5011 			NET_EPOCH_EXIT(et);
5012 			rc = -ctx.rc;
5013 			return (rc);
5014 		}
5015 		if (ctx.i > 0) {
5016 			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
5017 			    ctx.del, ctx.i, ctx.mcaddr, NULL, &ctx.hash, 0);
5018 			NET_EPOCH_EXIT(et);
5019 			if (rc < 0) {
5020 				rc = -rc;
5021 				for (j = 0; j < ctx.i; j++) {
5022 					if_printf(ifp,
5023 					    "failed to add mc address"
5024 					    " %02x:%02x:%02x:"
5025 					    "%02x:%02x:%02x rc=%d\n",
5026 					    ctx.mcaddr[j][0], ctx.mcaddr[j][1],
5027 					    ctx.mcaddr[j][2], ctx.mcaddr[j][3],
5028 					    ctx.mcaddr[j][4], ctx.mcaddr[j][5],
5029 					    rc);
5030 				}
5031 				return (rc);
5032 			}
5033 		} else
5034 			NET_EPOCH_EXIT(et);
5035 
5036 		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, ctx.hash, 0);
5037 		if (rc != 0)
5038 			if_printf(ifp, "failed to set mc address hash: %d", rc);
5039 	}
5040 
5041 	return (rc);
5042 }
5043 
5044 /*
5045  * {begin|end}_synchronized_op must be called from the same thread.
5046  */
5047 int
5048 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
5049     char *wmesg)
5050 {
5051 	int rc, pri;
5052 
5053 #ifdef WITNESS
5054 	/* the caller thinks it's ok to sleep, but is it really? */
5055 	if (flags & SLEEP_OK)
5056 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
5057 		    "begin_synchronized_op");
5058 #endif
5059 
5060 	if (INTR_OK)
5061 		pri = PCATCH;
5062 	else
5063 		pri = 0;
5064 
5065 	ADAPTER_LOCK(sc);
5066 	for (;;) {
5067 
5068 		if (vi && IS_DOOMED(vi)) {
5069 			rc = ENXIO;
5070 			goto done;
5071 		}
5072 
5073 		if (!IS_BUSY(sc)) {
5074 			rc = 0;
5075 			break;
5076 		}
5077 
5078 		if (!(flags & SLEEP_OK)) {
5079 			rc = EBUSY;
5080 			goto done;
5081 		}
5082 
5083 		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
5084 			rc = EINTR;
5085 			goto done;
5086 		}
5087 	}
5088 
5089 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
5090 	SET_BUSY(sc);
5091 #ifdef INVARIANTS
5092 	sc->last_op = wmesg;
5093 	sc->last_op_thr = curthread;
5094 	sc->last_op_flags = flags;
5095 #endif
5096 
5097 done:
5098 	if (!(flags & HOLD_LOCK) || rc)
5099 		ADAPTER_UNLOCK(sc);
5100 
5101 	return (rc);
5102 }
5103 
5104 /*
5105  * Tell if_ioctl and if_init that the VI is going away.  This is
5106  * special variant of begin_synchronized_op and must be paired with a
5107  * call to end_synchronized_op.
5108  */
5109 void
5110 doom_vi(struct adapter *sc, struct vi_info *vi)
5111 {
5112 
5113 	ADAPTER_LOCK(sc);
5114 	SET_DOOMED(vi);
5115 	wakeup(&sc->flags);
5116 	while (IS_BUSY(sc))
5117 		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
5118 	SET_BUSY(sc);
5119 #ifdef INVARIANTS
5120 	sc->last_op = "t4detach";
5121 	sc->last_op_thr = curthread;
5122 	sc->last_op_flags = 0;
5123 #endif
5124 	ADAPTER_UNLOCK(sc);
5125 }
5126 
5127 /*
5128  * {begin|end}_synchronized_op must be called from the same thread.
5129  */
5130 void
5131 end_synchronized_op(struct adapter *sc, int flags)
5132 {
5133 
5134 	if (flags & LOCK_HELD)
5135 		ADAPTER_LOCK_ASSERT_OWNED(sc);
5136 	else
5137 		ADAPTER_LOCK(sc);
5138 
5139 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
5140 	CLR_BUSY(sc);
5141 	wakeup(&sc->flags);
5142 	ADAPTER_UNLOCK(sc);
5143 }
5144 
5145 static int
5146 cxgbe_init_synchronized(struct vi_info *vi)
5147 {
5148 	struct port_info *pi = vi->pi;
5149 	struct adapter *sc = pi->adapter;
5150 	struct ifnet *ifp = vi->ifp;
5151 	int rc = 0, i;
5152 	struct sge_txq *txq;
5153 
5154 	ASSERT_SYNCHRONIZED_OP(sc);
5155 
5156 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
5157 		return (0);	/* already running */
5158 
5159 	if (!(sc->flags & FULL_INIT_DONE) &&
5160 	    ((rc = adapter_full_init(sc)) != 0))
5161 		return (rc);	/* error message displayed already */
5162 
5163 	if (!(vi->flags & VI_INIT_DONE) &&
5164 	    ((rc = vi_full_init(vi)) != 0))
5165 		return (rc); /* error message displayed already */
5166 
5167 	rc = update_mac_settings(ifp, XGMAC_ALL);
5168 	if (rc)
5169 		goto done;	/* error message displayed already */
5170 
5171 	PORT_LOCK(pi);
5172 	if (pi->up_vis == 0) {
5173 		t4_update_port_info(pi);
5174 		fixup_link_config(pi);
5175 		build_medialist(pi);
5176 		apply_link_config(pi);
5177 	}
5178 
5179 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
5180 	if (rc != 0) {
5181 		if_printf(ifp, "enable_vi failed: %d\n", rc);
5182 		PORT_UNLOCK(pi);
5183 		goto done;
5184 	}
5185 
5186 	/*
5187 	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
5188 	 * if this changes.
5189 	 */
5190 
5191 	for_each_txq(vi, i, txq) {
5192 		TXQ_LOCK(txq);
5193 		txq->eq.flags |= EQ_ENABLED;
5194 		TXQ_UNLOCK(txq);
5195 	}
5196 
5197 	/*
5198 	 * The first iq of the first port to come up is used for tracing.
5199 	 */
5200 	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
5201 		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
5202 		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
5203 		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
5204 		    V_QUEUENUMBER(sc->traceq));
5205 		pi->flags |= HAS_TRACEQ;
5206 	}
5207 
5208 	/* all ok */
5209 	pi->up_vis++;
5210 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
5211 
5212 	if (pi->nvi > 1 || sc->flags & IS_VF)
5213 		callout_reset(&vi->tick, hz, vi_tick, vi);
5214 	else
5215 		callout_reset(&pi->tick, hz, cxgbe_tick, pi);
5216 	if (pi->link_cfg.link_ok)
5217 		t4_os_link_changed(pi);
5218 	PORT_UNLOCK(pi);
5219 done:
5220 	if (rc != 0)
5221 		cxgbe_uninit_synchronized(vi);
5222 
5223 	return (rc);
5224 }
5225 
5226 /*
5227  * Idempotent.
5228  */
5229 static int
5230 cxgbe_uninit_synchronized(struct vi_info *vi)
5231 {
5232 	struct port_info *pi = vi->pi;
5233 	struct adapter *sc = pi->adapter;
5234 	struct ifnet *ifp = vi->ifp;
5235 	int rc, i;
5236 	struct sge_txq *txq;
5237 
5238 	ASSERT_SYNCHRONIZED_OP(sc);
5239 
5240 	if (!(vi->flags & VI_INIT_DONE)) {
5241 		if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5242 			KASSERT(0, ("uninited VI is running"));
5243 			if_printf(ifp, "uninited VI with running ifnet.  "
5244 			    "vi->flags 0x%016lx, if_flags 0x%08x, "
5245 			    "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
5246 			    ifp->if_drv_flags);
5247 		}
5248 		return (0);
5249 	}
5250 
5251 	/*
5252 	 * Disable the VI so that all its data in either direction is discarded
5253 	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
5254 	 * tick) intact as the TP can deliver negative advice or data that it's
5255 	 * holding in its RAM (for an offloaded connection) even after the VI is
5256 	 * disabled.
5257 	 */
5258 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
5259 	if (rc) {
5260 		if_printf(ifp, "disable_vi failed: %d\n", rc);
5261 		return (rc);
5262 	}
5263 
5264 	for_each_txq(vi, i, txq) {
5265 		TXQ_LOCK(txq);
5266 		txq->eq.flags &= ~EQ_ENABLED;
5267 		TXQ_UNLOCK(txq);
5268 	}
5269 
5270 	PORT_LOCK(pi);
5271 	if (pi->nvi > 1 || sc->flags & IS_VF)
5272 		callout_stop(&vi->tick);
5273 	else
5274 		callout_stop(&pi->tick);
5275 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5276 		PORT_UNLOCK(pi);
5277 		return (0);
5278 	}
5279 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
5280 	pi->up_vis--;
5281 	if (pi->up_vis > 0) {
5282 		PORT_UNLOCK(pi);
5283 		return (0);
5284 	}
5285 
5286 	pi->link_cfg.link_ok = false;
5287 	pi->link_cfg.speed = 0;
5288 	pi->link_cfg.link_down_rc = 255;
5289 	t4_os_link_changed(pi);
5290 	PORT_UNLOCK(pi);
5291 
5292 	return (0);
5293 }
5294 
5295 /*
5296  * It is ok for this function to fail midway and return right away.  t4_detach
5297  * will walk the entire sc->irq list and clean up whatever is valid.
5298  */
5299 int
5300 t4_setup_intr_handlers(struct adapter *sc)
5301 {
5302 	int rc, rid, p, q, v;
5303 	char s[8];
5304 	struct irq *irq;
5305 	struct port_info *pi;
5306 	struct vi_info *vi;
5307 	struct sge *sge = &sc->sge;
5308 	struct sge_rxq *rxq;
5309 #ifdef TCP_OFFLOAD
5310 	struct sge_ofld_rxq *ofld_rxq;
5311 #endif
5312 #ifdef DEV_NETMAP
5313 	struct sge_nm_rxq *nm_rxq;
5314 #endif
5315 #ifdef RSS
5316 	int nbuckets = rss_getnumbuckets();
5317 #endif
5318 
5319 	/*
5320 	 * Setup interrupts.
5321 	 */
5322 	irq = &sc->irq[0];
5323 	rid = sc->intr_type == INTR_INTX ? 0 : 1;
5324 	if (forwarding_intr_to_fwq(sc))
5325 		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
5326 
5327 	/* Multiple interrupts. */
5328 	if (sc->flags & IS_VF)
5329 		KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
5330 		    ("%s: too few intr.", __func__));
5331 	else
5332 		KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
5333 		    ("%s: too few intr.", __func__));
5334 
5335 	/* The first one is always error intr on PFs */
5336 	if (!(sc->flags & IS_VF)) {
5337 		rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
5338 		if (rc != 0)
5339 			return (rc);
5340 		irq++;
5341 		rid++;
5342 	}
5343 
5344 	/* The second one is always the firmware event queue (first on VFs) */
5345 	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
5346 	if (rc != 0)
5347 		return (rc);
5348 	irq++;
5349 	rid++;
5350 
5351 	for_each_port(sc, p) {
5352 		pi = sc->port[p];
5353 		for_each_vi(pi, v, vi) {
5354 			vi->first_intr = rid - 1;
5355 
5356 			if (vi->nnmrxq > 0) {
5357 				int n = max(vi->nrxq, vi->nnmrxq);
5358 
5359 				rxq = &sge->rxq[vi->first_rxq];
5360 #ifdef DEV_NETMAP
5361 				nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
5362 #endif
5363 				for (q = 0; q < n; q++) {
5364 					snprintf(s, sizeof(s), "%x%c%x", p,
5365 					    'a' + v, q);
5366 					if (q < vi->nrxq)
5367 						irq->rxq = rxq++;
5368 #ifdef DEV_NETMAP
5369 					if (q < vi->nnmrxq)
5370 						irq->nm_rxq = nm_rxq++;
5371 
5372 					if (irq->nm_rxq != NULL &&
5373 					    irq->rxq == NULL) {
5374 						/* Netmap rx only */
5375 						rc = t4_alloc_irq(sc, irq, rid,
5376 						    t4_nm_intr, irq->nm_rxq, s);
5377 					}
5378 					if (irq->nm_rxq != NULL &&
5379 					    irq->rxq != NULL) {
5380 						/* NIC and Netmap rx */
5381 						rc = t4_alloc_irq(sc, irq, rid,
5382 						    t4_vi_intr, irq, s);
5383 					}
5384 #endif
5385 					if (irq->rxq != NULL &&
5386 					    irq->nm_rxq == NULL) {
5387 						/* NIC rx only */
5388 						rc = t4_alloc_irq(sc, irq, rid,
5389 						    t4_intr, irq->rxq, s);
5390 					}
5391 					if (rc != 0)
5392 						return (rc);
5393 #ifdef RSS
5394 					if (q < vi->nrxq) {
5395 						bus_bind_intr(sc->dev, irq->res,
5396 						    rss_getcpu(q % nbuckets));
5397 					}
5398 #endif
5399 					irq++;
5400 					rid++;
5401 					vi->nintr++;
5402 				}
5403 			} else {
5404 				for_each_rxq(vi, q, rxq) {
5405 					snprintf(s, sizeof(s), "%x%c%x", p,
5406 					    'a' + v, q);
5407 					rc = t4_alloc_irq(sc, irq, rid,
5408 					    t4_intr, rxq, s);
5409 					if (rc != 0)
5410 						return (rc);
5411 #ifdef RSS
5412 					bus_bind_intr(sc->dev, irq->res,
5413 					    rss_getcpu(q % nbuckets));
5414 #endif
5415 					irq++;
5416 					rid++;
5417 					vi->nintr++;
5418 				}
5419 			}
5420 #ifdef TCP_OFFLOAD
5421 			for_each_ofld_rxq(vi, q, ofld_rxq) {
5422 				snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
5423 				rc = t4_alloc_irq(sc, irq, rid, t4_intr,
5424 				    ofld_rxq, s);
5425 				if (rc != 0)
5426 					return (rc);
5427 				irq++;
5428 				rid++;
5429 				vi->nintr++;
5430 			}
5431 #endif
5432 		}
5433 	}
5434 	MPASS(irq == &sc->irq[sc->intr_count]);
5435 
5436 	return (0);
5437 }
5438 
5439 int
5440 adapter_full_init(struct adapter *sc)
5441 {
5442 	int rc, i;
5443 #ifdef RSS
5444 	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5445 	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5446 #endif
5447 
5448 	ASSERT_SYNCHRONIZED_OP(sc);
5449 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5450 	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
5451 	    ("%s: FULL_INIT_DONE already", __func__));
5452 
5453 	/*
5454 	 * queues that belong to the adapter (not any particular port).
5455 	 */
5456 	rc = t4_setup_adapter_queues(sc);
5457 	if (rc != 0)
5458 		goto done;
5459 
5460 	for (i = 0; i < nitems(sc->tq); i++) {
5461 		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
5462 		    taskqueue_thread_enqueue, &sc->tq[i]);
5463 		if (sc->tq[i] == NULL) {
5464 			device_printf(sc->dev,
5465 			    "failed to allocate task queue %d\n", i);
5466 			rc = ENOMEM;
5467 			goto done;
5468 		}
5469 		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
5470 		    device_get_nameunit(sc->dev), i);
5471 	}
5472 #ifdef RSS
5473 	MPASS(RSS_KEYSIZE == 40);
5474 	rss_getkey((void *)&raw_rss_key[0]);
5475 	for (i = 0; i < nitems(rss_key); i++) {
5476 		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
5477 	}
5478 	t4_write_rss_key(sc, &rss_key[0], -1, 1);
5479 #endif
5480 
5481 	if (!(sc->flags & IS_VF))
5482 		t4_intr_enable(sc);
5483 	sc->flags |= FULL_INIT_DONE;
5484 done:
5485 	if (rc != 0)
5486 		adapter_full_uninit(sc);
5487 
5488 	return (rc);
5489 }
5490 
5491 int
5492 adapter_full_uninit(struct adapter *sc)
5493 {
5494 	int i;
5495 
5496 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5497 
5498 	t4_teardown_adapter_queues(sc);
5499 
5500 	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
5501 		taskqueue_free(sc->tq[i]);
5502 		sc->tq[i] = NULL;
5503 	}
5504 
5505 	sc->flags &= ~FULL_INIT_DONE;
5506 
5507 	return (0);
5508 }
5509 
5510 #ifdef RSS
5511 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
5512     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
5513     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
5514     RSS_HASHTYPE_RSS_UDP_IPV6)
5515 
5516 /* Translates kernel hash types to hardware. */
5517 static int
5518 hashconfig_to_hashen(int hashconfig)
5519 {
5520 	int hashen = 0;
5521 
5522 	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
5523 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
5524 	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
5525 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
5526 	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
5527 		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5528 		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5529 	}
5530 	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
5531 		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5532 		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5533 	}
5534 	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
5535 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5536 	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
5537 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5538 
5539 	return (hashen);
5540 }
5541 
5542 /* Translates hardware hash types to kernel. */
5543 static int
5544 hashen_to_hashconfig(int hashen)
5545 {
5546 	int hashconfig = 0;
5547 
5548 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
5549 		/*
5550 		 * If UDP hashing was enabled it must have been enabled for
5551 		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
5552 		 * enabling any 4-tuple hash is nonsense configuration.
5553 		 */
5554 		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5555 		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
5556 
5557 		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5558 			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
5559 		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5560 			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
5561 	}
5562 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5563 		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
5564 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5565 		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
5566 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
5567 		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
5568 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
5569 		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
5570 
5571 	return (hashconfig);
5572 }
5573 #endif
5574 
5575 int
5576 vi_full_init(struct vi_info *vi)
5577 {
5578 	struct adapter *sc = vi->pi->adapter;
5579 	struct ifnet *ifp = vi->ifp;
5580 	uint16_t *rss;
5581 	struct sge_rxq *rxq;
5582 	int rc, i, j;
5583 #ifdef RSS
5584 	int nbuckets = rss_getnumbuckets();
5585 	int hashconfig = rss_gethashconfig();
5586 	int extra;
5587 #endif
5588 
5589 	ASSERT_SYNCHRONIZED_OP(sc);
5590 	KASSERT((vi->flags & VI_INIT_DONE) == 0,
5591 	    ("%s: VI_INIT_DONE already", __func__));
5592 
5593 	sysctl_ctx_init(&vi->ctx);
5594 	vi->flags |= VI_SYSCTL_CTX;
5595 
5596 	/*
5597 	 * Allocate tx/rx/fl queues for this VI.
5598 	 */
5599 	rc = t4_setup_vi_queues(vi);
5600 	if (rc != 0)
5601 		goto done;	/* error message displayed already */
5602 
5603 	/*
5604 	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
5605 	 */
5606 	if (vi->nrxq > vi->rss_size) {
5607 		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
5608 		    "some queues will never receive traffic.\n", vi->nrxq,
5609 		    vi->rss_size);
5610 	} else if (vi->rss_size % vi->nrxq) {
5611 		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
5612 		    "expect uneven traffic distribution.\n", vi->nrxq,
5613 		    vi->rss_size);
5614 	}
5615 #ifdef RSS
5616 	if (vi->nrxq != nbuckets) {
5617 		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
5618 		    "performance will be impacted.\n", vi->nrxq, nbuckets);
5619 	}
5620 #endif
5621 	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
5622 	for (i = 0; i < vi->rss_size;) {
5623 #ifdef RSS
5624 		j = rss_get_indirection_to_bucket(i);
5625 		j %= vi->nrxq;
5626 		rxq = &sc->sge.rxq[vi->first_rxq + j];
5627 		rss[i++] = rxq->iq.abs_id;
5628 #else
5629 		for_each_rxq(vi, j, rxq) {
5630 			rss[i++] = rxq->iq.abs_id;
5631 			if (i == vi->rss_size)
5632 				break;
5633 		}
5634 #endif
5635 	}
5636 
5637 	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
5638 	    vi->rss_size);
5639 	if (rc != 0) {
5640 		free(rss, M_CXGBE);
5641 		if_printf(ifp, "rss_config failed: %d\n", rc);
5642 		goto done;
5643 	}
5644 
5645 #ifdef RSS
5646 	vi->hashen = hashconfig_to_hashen(hashconfig);
5647 
5648 	/*
5649 	 * We may have had to enable some hashes even though the global config
5650 	 * wants them disabled.  This is a potential problem that must be
5651 	 * reported to the user.
5652 	 */
5653 	extra = hashen_to_hashconfig(vi->hashen) ^ hashconfig;
5654 
5655 	/*
5656 	 * If we consider only the supported hash types, then the enabled hashes
5657 	 * are a superset of the requested hashes.  In other words, there cannot
5658 	 * be any supported hash that was requested but not enabled, but there
5659 	 * can be hashes that were not requested but had to be enabled.
5660 	 */
5661 	extra &= SUPPORTED_RSS_HASHTYPES;
5662 	MPASS((extra & hashconfig) == 0);
5663 
5664 	if (extra) {
5665 		if_printf(ifp,
5666 		    "global RSS config (0x%x) cannot be accommodated.\n",
5667 		    hashconfig);
5668 	}
5669 	if (extra & RSS_HASHTYPE_RSS_IPV4)
5670 		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
5671 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
5672 		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
5673 	if (extra & RSS_HASHTYPE_RSS_IPV6)
5674 		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
5675 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
5676 		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
5677 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
5678 		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
5679 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
5680 		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
5681 #else
5682 	vi->hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
5683 	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
5684 	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5685 	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
5686 #endif
5687 	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, rss[0], 0, 0);
5688 	if (rc != 0) {
5689 		free(rss, M_CXGBE);
5690 		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
5691 		goto done;
5692 	}
5693 
5694 	vi->rss = rss;
5695 	vi->flags |= VI_INIT_DONE;
5696 done:
5697 	if (rc != 0)
5698 		vi_full_uninit(vi);
5699 
5700 	return (rc);
5701 }
5702 
5703 /*
5704  * Idempotent.
5705  */
5706 int
5707 vi_full_uninit(struct vi_info *vi)
5708 {
5709 	struct port_info *pi = vi->pi;
5710 	struct adapter *sc = pi->adapter;
5711 	int i;
5712 	struct sge_rxq *rxq;
5713 	struct sge_txq *txq;
5714 #ifdef TCP_OFFLOAD
5715 	struct sge_ofld_rxq *ofld_rxq;
5716 #endif
5717 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
5718 	struct sge_wrq *ofld_txq;
5719 #endif
5720 
5721 	if (vi->flags & VI_INIT_DONE) {
5722 
5723 		/* Need to quiesce queues.  */
5724 
5725 		/* XXX: Only for the first VI? */
5726 		if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
5727 			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
5728 
5729 		for_each_txq(vi, i, txq) {
5730 			quiesce_txq(sc, txq);
5731 		}
5732 
5733 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
5734 		for_each_ofld_txq(vi, i, ofld_txq) {
5735 			quiesce_wrq(sc, ofld_txq);
5736 		}
5737 #endif
5738 
5739 		for_each_rxq(vi, i, rxq) {
5740 			quiesce_iq(sc, &rxq->iq);
5741 			quiesce_fl(sc, &rxq->fl);
5742 		}
5743 
5744 #ifdef TCP_OFFLOAD
5745 		for_each_ofld_rxq(vi, i, ofld_rxq) {
5746 			quiesce_iq(sc, &ofld_rxq->iq);
5747 			quiesce_fl(sc, &ofld_rxq->fl);
5748 		}
5749 #endif
5750 		free(vi->rss, M_CXGBE);
5751 		free(vi->nm_rss, M_CXGBE);
5752 	}
5753 
5754 	t4_teardown_vi_queues(vi);
5755 	vi->flags &= ~VI_INIT_DONE;
5756 
5757 	return (0);
5758 }
5759 
5760 static void
5761 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
5762 {
5763 	struct sge_eq *eq = &txq->eq;
5764 	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
5765 
5766 	(void) sc;	/* unused */
5767 
5768 #ifdef INVARIANTS
5769 	TXQ_LOCK(txq);
5770 	MPASS((eq->flags & EQ_ENABLED) == 0);
5771 	TXQ_UNLOCK(txq);
5772 #endif
5773 
5774 	/* Wait for the mp_ring to empty. */
5775 	while (!mp_ring_is_idle(txq->r)) {
5776 		mp_ring_check_drainage(txq->r, 0);
5777 		pause("rquiesce", 1);
5778 	}
5779 
5780 	/* Then wait for the hardware to finish. */
5781 	while (spg->cidx != htobe16(eq->pidx))
5782 		pause("equiesce", 1);
5783 
5784 	/* Finally, wait for the driver to reclaim all descriptors. */
5785 	while (eq->cidx != eq->pidx)
5786 		pause("dquiesce", 1);
5787 }
5788 
5789 static void
5790 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
5791 {
5792 
5793 	/* XXXTX */
5794 }
5795 
5796 static void
5797 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
5798 {
5799 	(void) sc;	/* unused */
5800 
5801 	/* Synchronize with the interrupt handler */
5802 	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
5803 		pause("iqfree", 1);
5804 }
5805 
5806 static void
5807 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
5808 {
5809 	mtx_lock(&sc->sfl_lock);
5810 	FL_LOCK(fl);
5811 	fl->flags |= FL_DOOMED;
5812 	FL_UNLOCK(fl);
5813 	callout_stop(&sc->sfl_callout);
5814 	mtx_unlock(&sc->sfl_lock);
5815 
5816 	KASSERT((fl->flags & FL_STARVING) == 0,
5817 	    ("%s: still starving", __func__));
5818 }
5819 
5820 static int
5821 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
5822     driver_intr_t *handler, void *arg, char *name)
5823 {
5824 	int rc;
5825 
5826 	irq->rid = rid;
5827 	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
5828 	    RF_SHAREABLE | RF_ACTIVE);
5829 	if (irq->res == NULL) {
5830 		device_printf(sc->dev,
5831 		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
5832 		return (ENOMEM);
5833 	}
5834 
5835 	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
5836 	    NULL, handler, arg, &irq->tag);
5837 	if (rc != 0) {
5838 		device_printf(sc->dev,
5839 		    "failed to setup interrupt for rid %d, name %s: %d\n",
5840 		    rid, name, rc);
5841 	} else if (name)
5842 		bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
5843 
5844 	return (rc);
5845 }
5846 
5847 static int
5848 t4_free_irq(struct adapter *sc, struct irq *irq)
5849 {
5850 	if (irq->tag)
5851 		bus_teardown_intr(sc->dev, irq->res, irq->tag);
5852 	if (irq->res)
5853 		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
5854 
5855 	bzero(irq, sizeof(*irq));
5856 
5857 	return (0);
5858 }
5859 
5860 static void
5861 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
5862 {
5863 
5864 	regs->version = chip_id(sc) | chip_rev(sc) << 10;
5865 	t4_get_regs(sc, buf, regs->len);
5866 }
5867 
5868 #define	A_PL_INDIR_CMD	0x1f8
5869 
5870 #define	S_PL_AUTOINC	31
5871 #define	M_PL_AUTOINC	0x1U
5872 #define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
5873 #define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
5874 
5875 #define	S_PL_VFID	20
5876 #define	M_PL_VFID	0xffU
5877 #define	V_PL_VFID(x)	((x) << S_PL_VFID)
5878 #define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
5879 
5880 #define	S_PL_ADDR	0
5881 #define	M_PL_ADDR	0xfffffU
5882 #define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
5883 #define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
5884 
5885 #define	A_PL_INDIR_DATA	0x1fc
5886 
5887 static uint64_t
5888 read_vf_stat(struct adapter *sc, u_int vin, int reg)
5889 {
5890 	u32 stats[2];
5891 
5892 	mtx_assert(&sc->reg_lock, MA_OWNED);
5893 	if (sc->flags & IS_VF) {
5894 		stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
5895 		stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
5896 	} else {
5897 		t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5898 		    V_PL_VFID(vin) | V_PL_ADDR(VF_MPS_REG(reg)));
5899 		stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
5900 		stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
5901 	}
5902 	return (((uint64_t)stats[1]) << 32 | stats[0]);
5903 }
5904 
5905 static void
5906 t4_get_vi_stats(struct adapter *sc, u_int vin, struct fw_vi_stats_vf *stats)
5907 {
5908 
5909 #define GET_STAT(name) \
5910 	read_vf_stat(sc, vin, A_MPS_VF_STAT_##name##_L)
5911 
5912 	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
5913 	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
5914 	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
5915 	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
5916 	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
5917 	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
5918 	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
5919 	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
5920 	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
5921 	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
5922 	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
5923 	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
5924 	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
5925 	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
5926 	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
5927 	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
5928 
5929 #undef GET_STAT
5930 }
5931 
5932 static void
5933 t4_clr_vi_stats(struct adapter *sc, u_int vin)
5934 {
5935 	int reg;
5936 
5937 	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(vin) |
5938 	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
5939 	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
5940 	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
5941 		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
5942 }
5943 
5944 static void
5945 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
5946 {
5947 	struct timeval tv;
5948 	const struct timeval interval = {0, 250000};	/* 250ms */
5949 
5950 	if (!(vi->flags & VI_INIT_DONE))
5951 		return;
5952 
5953 	getmicrotime(&tv);
5954 	timevalsub(&tv, &interval);
5955 	if (timevalcmp(&tv, &vi->last_refreshed, <))
5956 		return;
5957 
5958 	mtx_lock(&sc->reg_lock);
5959 	t4_get_vi_stats(sc, vi->vin, &vi->stats);
5960 	getmicrotime(&vi->last_refreshed);
5961 	mtx_unlock(&sc->reg_lock);
5962 }
5963 
5964 static void
5965 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
5966 {
5967 	u_int i, v, tnl_cong_drops, bg_map;
5968 	struct timeval tv;
5969 	const struct timeval interval = {0, 250000};	/* 250ms */
5970 
5971 	getmicrotime(&tv);
5972 	timevalsub(&tv, &interval);
5973 	if (timevalcmp(&tv, &pi->last_refreshed, <))
5974 		return;
5975 
5976 	tnl_cong_drops = 0;
5977 	t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
5978 	bg_map = pi->mps_bg_map;
5979 	while (bg_map) {
5980 		i = ffs(bg_map) - 1;
5981 		mtx_lock(&sc->reg_lock);
5982 		t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
5983 		    A_TP_MIB_TNL_CNG_DROP_0 + i);
5984 		mtx_unlock(&sc->reg_lock);
5985 		tnl_cong_drops += v;
5986 		bg_map &= ~(1 << i);
5987 	}
5988 	pi->tnl_cong_drops = tnl_cong_drops;
5989 	getmicrotime(&pi->last_refreshed);
5990 }
5991 
5992 static void
5993 cxgbe_tick(void *arg)
5994 {
5995 	struct port_info *pi = arg;
5996 	struct adapter *sc = pi->adapter;
5997 
5998 	PORT_LOCK_ASSERT_OWNED(pi);
5999 	cxgbe_refresh_stats(sc, pi);
6000 
6001 	callout_schedule(&pi->tick, hz);
6002 }
6003 
6004 void
6005 vi_tick(void *arg)
6006 {
6007 	struct vi_info *vi = arg;
6008 	struct adapter *sc = vi->pi->adapter;
6009 
6010 	vi_refresh_stats(sc, vi);
6011 
6012 	callout_schedule(&vi->tick, hz);
6013 }
6014 
6015 /*
6016  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
6017  */
6018 static char *caps_decoder[] = {
6019 	"\20\001IPMI\002NCSI",				/* 0: NBM */
6020 	"\20\001PPP\002QFC\003DCBX",			/* 1: link */
6021 	"\20\001INGRESS\002EGRESS",			/* 2: switch */
6022 	"\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"	/* 3: NIC */
6023 	    "\006HASHFILTER\007ETHOFLD",
6024 	"\20\001TOE",					/* 4: TOE */
6025 	"\20\001RDDP\002RDMAC",				/* 5: RDMA */
6026 	"\20\001INITIATOR_PDU\002TARGET_PDU"		/* 6: iSCSI */
6027 	    "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
6028 	    "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
6029 	    "\007T10DIF"
6030 	    "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
6031 	"\20\001LOOKASIDE\002TLSKEYS",			/* 7: Crypto */
6032 	"\20\001INITIATOR\002TARGET\003CTRL_OFLD"	/* 8: FCoE */
6033 		    "\004PO_INITIATOR\005PO_TARGET",
6034 };
6035 
6036 void
6037 t4_sysctls(struct adapter *sc)
6038 {
6039 	struct sysctl_ctx_list *ctx;
6040 	struct sysctl_oid *oid;
6041 	struct sysctl_oid_list *children, *c0;
6042 	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
6043 
6044 	ctx = device_get_sysctl_ctx(sc->dev);
6045 
6046 	/*
6047 	 * dev.t4nex.X.
6048 	 */
6049 	oid = device_get_sysctl_tree(sc->dev);
6050 	c0 = children = SYSCTL_CHILDREN(oid);
6051 
6052 	sc->sc_do_rxcopy = 1;
6053 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
6054 	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
6055 
6056 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
6057 	    sc->params.nports, "# of ports");
6058 
6059 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
6060 	    CTLTYPE_STRING | CTLFLAG_RD, doorbells, (uintptr_t)&sc->doorbells,
6061 	    sysctl_bitfield_8b, "A", "available doorbells");
6062 
6063 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
6064 	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
6065 
6066 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
6067 	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
6068 	    sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
6069 	    "interrupt holdoff timer values (us)");
6070 
6071 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
6072 	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
6073 	    sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
6074 	    "interrupt holdoff packet counter values");
6075 
6076 	t4_sge_sysctls(sc, ctx, children);
6077 
6078 	sc->lro_timeout = 100;
6079 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
6080 	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
6081 
6082 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
6083 	    &sc->debug_flags, 0, "flags to enable runtime debugging");
6084 
6085 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
6086 	    CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
6087 
6088 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
6089 	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
6090 
6091 	if (sc->flags & IS_VF)
6092 		return;
6093 
6094 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
6095 	    NULL, chip_rev(sc), "chip hardware revision");
6096 
6097 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
6098 	    CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
6099 
6100 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
6101 	    CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
6102 
6103 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
6104 	    CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
6105 
6106 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
6107 	    CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
6108 
6109 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
6110 	    CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
6111 
6112 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
6113 	    sc->er_version, 0, "expansion ROM version");
6114 
6115 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
6116 	    sc->bs_version, 0, "bootstrap firmware version");
6117 
6118 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
6119 	    NULL, sc->params.scfg_vers, "serial config version");
6120 
6121 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
6122 	    NULL, sc->params.vpd_vers, "VPD version");
6123 
6124 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
6125 	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
6126 
6127 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
6128 	    sc->cfcsum, "config file checksum");
6129 
6130 #define SYSCTL_CAP(name, n, text) \
6131 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
6132 	    CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], (uintptr_t)&sc->name, \
6133 	    sysctl_bitfield_16b, "A", "available " text " capabilities")
6134 
6135 	SYSCTL_CAP(nbmcaps, 0, "NBM");
6136 	SYSCTL_CAP(linkcaps, 1, "link");
6137 	SYSCTL_CAP(switchcaps, 2, "switch");
6138 	SYSCTL_CAP(niccaps, 3, "NIC");
6139 	SYSCTL_CAP(toecaps, 4, "TCP offload");
6140 	SYSCTL_CAP(rdmacaps, 5, "RDMA");
6141 	SYSCTL_CAP(iscsicaps, 6, "iSCSI");
6142 	SYSCTL_CAP(cryptocaps, 7, "crypto");
6143 	SYSCTL_CAP(fcoecaps, 8, "FCoE");
6144 #undef SYSCTL_CAP
6145 
6146 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
6147 	    NULL, sc->tids.nftids, "number of filters");
6148 
6149 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
6150 	    CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
6151 	    "chip temperature (in Celsius)");
6152 
6153 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg", CTLTYPE_STRING |
6154 	    CTLFLAG_RD, sc, 0, sysctl_loadavg, "A",
6155 	    "microprocessor load averages (debug firmwares only)");
6156 
6157 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "core_vdd", CTLTYPE_INT |
6158 	    CTLFLAG_RD, sc, 0, sysctl_vdd, "I", "core Vdd (in mV)");
6159 
6160 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus",
6161 	    CTLTYPE_STRING | CTLFLAG_RD, sc, LOCAL_CPUS,
6162 	    sysctl_cpus, "A", "local CPUs");
6163 
6164 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus",
6165 	    CTLTYPE_STRING | CTLFLAG_RD, sc, INTR_CPUS,
6166 	    sysctl_cpus, "A", "preferred CPUs for interrupts");
6167 
6168 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "swintr", CTLFLAG_RW,
6169 	    &sc->swintr, 0, "software triggered interrupts");
6170 
6171 	/*
6172 	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
6173 	 */
6174 	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
6175 	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
6176 	    "logs and miscellaneous information");
6177 	children = SYSCTL_CHILDREN(oid);
6178 
6179 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
6180 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6181 	    sysctl_cctrl, "A", "congestion control");
6182 
6183 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
6184 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6185 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
6186 
6187 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
6188 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
6189 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
6190 
6191 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
6192 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
6193 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
6194 
6195 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
6196 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
6197 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
6198 
6199 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
6200 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
6201 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
6202 
6203 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
6204 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
6205 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
6206 
6207 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
6208 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_la,
6209 	    "A", "CIM logic analyzer");
6210 
6211 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
6212 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6213 	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
6214 
6215 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
6216 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
6217 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
6218 
6219 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
6220 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
6221 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
6222 
6223 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
6224 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
6225 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
6226 
6227 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
6228 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
6229 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
6230 
6231 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
6232 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
6233 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
6234 
6235 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
6236 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
6237 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
6238 
6239 	if (chip_id(sc) > CHELSIO_T4) {
6240 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
6241 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
6242 		    sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
6243 
6244 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
6245 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
6246 		    sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
6247 	}
6248 
6249 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
6250 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6251 	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
6252 
6253 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
6254 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6255 	    sysctl_cim_qcfg, "A", "CIM queue configuration");
6256 
6257 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
6258 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6259 	    sysctl_cpl_stats, "A", "CPL statistics");
6260 
6261 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
6262 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6263 	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
6264 
6265 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
6266 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6267 	    sysctl_devlog, "A", "firmware's device log");
6268 
6269 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
6270 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6271 	    sysctl_fcoe_stats, "A", "FCoE statistics");
6272 
6273 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
6274 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6275 	    sysctl_hw_sched, "A", "hardware scheduler ");
6276 
6277 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
6278 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6279 	    sysctl_l2t, "A", "hardware L2 table");
6280 
6281 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt",
6282 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6283 	    sysctl_smt, "A", "hardware source MAC table");
6284 
6285 #ifdef INET6
6286 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "clip",
6287 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6288 	    sysctl_clip, "A", "active CLIP table entries");
6289 #endif
6290 
6291 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
6292 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6293 	    sysctl_lb_stats, "A", "loopback statistics");
6294 
6295 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
6296 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6297 	    sysctl_meminfo, "A", "memory regions");
6298 
6299 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
6300 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6301 	    chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
6302 	    "A", "MPS TCAM entries");
6303 
6304 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
6305 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6306 	    sysctl_path_mtus, "A", "path MTUs");
6307 
6308 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
6309 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6310 	    sysctl_pm_stats, "A", "PM statistics");
6311 
6312 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
6313 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6314 	    sysctl_rdma_stats, "A", "RDMA statistics");
6315 
6316 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
6317 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6318 	    sysctl_tcp_stats, "A", "TCP statistics");
6319 
6320 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
6321 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6322 	    sysctl_tids, "A", "TID information");
6323 
6324 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
6325 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6326 	    sysctl_tp_err_stats, "A", "TP error statistics");
6327 
6328 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
6329 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
6330 	    "TP logic analyzer event capture mask");
6331 
6332 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
6333 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6334 	    sysctl_tp_la, "A", "TP logic analyzer");
6335 
6336 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
6337 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6338 	    sysctl_tx_rate, "A", "Tx rate");
6339 
6340 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
6341 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6342 	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
6343 
6344 	if (chip_id(sc) >= CHELSIO_T5) {
6345 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
6346 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6347 		    sysctl_wcwr_stats, "A", "write combined work requests");
6348 	}
6349 
6350 #ifdef TCP_OFFLOAD
6351 	if (is_offload(sc)) {
6352 		int i;
6353 		char s[4];
6354 
6355 		/*
6356 		 * dev.t4nex.X.toe.
6357 		 */
6358 		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
6359 		    NULL, "TOE parameters");
6360 		children = SYSCTL_CHILDREN(oid);
6361 
6362 		sc->tt.cong_algorithm = -1;
6363 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
6364 		    CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
6365 		    "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
6366 		    "3 = highspeed)");
6367 
6368 		sc->tt.sndbuf = -1;
6369 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
6370 		    &sc->tt.sndbuf, 0, "hardware send buffer");
6371 
6372 		sc->tt.ddp = 0;
6373 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp",
6374 		    CTLFLAG_RW | CTLFLAG_SKIP, &sc->tt.ddp, 0, "");
6375 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_zcopy", CTLFLAG_RW,
6376 		    &sc->tt.ddp, 0, "Enable zero-copy aio_read(2)");
6377 
6378 		sc->tt.rx_coalesce = -1;
6379 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
6380 		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
6381 
6382 		sc->tt.tls = 0;
6383 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW,
6384 		    &sc->tt.tls, 0, "Inline TLS allowed");
6385 
6386 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
6387 		    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports,
6388 		    "I", "TCP ports that use inline TLS+TOE RX");
6389 
6390 		sc->tt.tx_align = -1;
6391 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
6392 		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
6393 
6394 		sc->tt.tx_zcopy = 0;
6395 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
6396 		    CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
6397 		    "Enable zero-copy aio_write(2)");
6398 
6399 		sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
6400 		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
6401 		    "cop_managed_offloading", CTLFLAG_RW,
6402 		    &sc->tt.cop_managed_offloading, 0,
6403 		    "COP (Connection Offload Policy) controls all TOE offload");
6404 
6405 		sc->tt.autorcvbuf_inc = 16 * 1024;
6406 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "autorcvbuf_inc",
6407 		    CTLFLAG_RW, &sc->tt.autorcvbuf_inc, 0,
6408 		    "autorcvbuf increment");
6409 
6410 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
6411 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
6412 		    "TP timer tick (us)");
6413 
6414 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
6415 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
6416 		    "TCP timestamp tick (us)");
6417 
6418 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
6419 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
6420 		    "DACK tick (us)");
6421 
6422 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
6423 		    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
6424 		    "IU", "DACK timer (us)");
6425 
6426 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
6427 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
6428 		    sysctl_tp_timer, "LU", "Minimum retransmit interval (us)");
6429 
6430 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
6431 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
6432 		    sysctl_tp_timer, "LU", "Maximum retransmit interval (us)");
6433 
6434 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
6435 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
6436 		    sysctl_tp_timer, "LU", "Persist timer min (us)");
6437 
6438 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
6439 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
6440 		    sysctl_tp_timer, "LU", "Persist timer max (us)");
6441 
6442 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
6443 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
6444 		    sysctl_tp_timer, "LU", "Keepalive idle timer (us)");
6445 
6446 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
6447 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
6448 		    sysctl_tp_timer, "LU", "Keepalive interval timer (us)");
6449 
6450 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
6451 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
6452 		    sysctl_tp_timer, "LU", "Initial SRTT (us)");
6453 
6454 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
6455 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
6456 		    sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
6457 
6458 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
6459 		    CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX,
6460 		    sysctl_tp_shift_cnt, "IU",
6461 		    "Number of SYN retransmissions before abort");
6462 
6463 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
6464 		    CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2,
6465 		    sysctl_tp_shift_cnt, "IU",
6466 		    "Number of retransmissions before abort");
6467 
6468 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
6469 		    CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2,
6470 		    sysctl_tp_shift_cnt, "IU",
6471 		    "Number of keepalive probes before abort");
6472 
6473 		oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
6474 		    CTLFLAG_RD, NULL, "TOE retransmit backoffs");
6475 		children = SYSCTL_CHILDREN(oid);
6476 		for (i = 0; i < 16; i++) {
6477 			snprintf(s, sizeof(s), "%u", i);
6478 			SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
6479 			    CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff,
6480 			    "IU", "TOE retransmit backoff");
6481 		}
6482 	}
6483 #endif
6484 }
6485 
6486 void
6487 vi_sysctls(struct vi_info *vi)
6488 {
6489 	struct sysctl_ctx_list *ctx;
6490 	struct sysctl_oid *oid;
6491 	struct sysctl_oid_list *children;
6492 
6493 	ctx = device_get_sysctl_ctx(vi->dev);
6494 
6495 	/*
6496 	 * dev.v?(cxgbe|cxl).X.
6497 	 */
6498 	oid = device_get_sysctl_tree(vi->dev);
6499 	children = SYSCTL_CHILDREN(oid);
6500 
6501 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
6502 	    vi->viid, "VI identifer");
6503 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
6504 	    &vi->nrxq, 0, "# of rx queues");
6505 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
6506 	    &vi->ntxq, 0, "# of tx queues");
6507 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
6508 	    &vi->first_rxq, 0, "index of first rx queue");
6509 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
6510 	    &vi->first_txq, 0, "index of first tx queue");
6511 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_base", CTLFLAG_RD, NULL,
6512 	    vi->rss_base, "start of RSS indirection table");
6513 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
6514 	    vi->rss_size, "size of RSS indirection table");
6515 
6516 	if (IS_MAIN_VI(vi)) {
6517 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
6518 		    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
6519 		    "Reserve queue 0 for non-flowid packets");
6520 	}
6521 
6522 #ifdef TCP_OFFLOAD
6523 	if (vi->nofldrxq != 0) {
6524 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
6525 		    &vi->nofldrxq, 0,
6526 		    "# of rx queues for offloaded TCP connections");
6527 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
6528 		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
6529 		    "index of first TOE rx queue");
6530 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
6531 		    CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6532 		    sysctl_holdoff_tmr_idx_ofld, "I",
6533 		    "holdoff timer index for TOE queues");
6534 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
6535 		    CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6536 		    sysctl_holdoff_pktc_idx_ofld, "I",
6537 		    "holdoff packet counter index for TOE queues");
6538 	}
6539 #endif
6540 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6541 	if (vi->nofldtxq != 0) {
6542 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
6543 		    &vi->nofldtxq, 0,
6544 		    "# of tx queues for TOE/ETHOFLD");
6545 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
6546 		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
6547 		    "index of first TOE/ETHOFLD tx queue");
6548 	}
6549 #endif
6550 #ifdef DEV_NETMAP
6551 	if (vi->nnmrxq != 0) {
6552 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
6553 		    &vi->nnmrxq, 0, "# of netmap rx queues");
6554 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
6555 		    &vi->nnmtxq, 0, "# of netmap tx queues");
6556 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
6557 		    CTLFLAG_RD, &vi->first_nm_rxq, 0,
6558 		    "index of first netmap rx queue");
6559 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
6560 		    CTLFLAG_RD, &vi->first_nm_txq, 0,
6561 		    "index of first netmap tx queue");
6562 	}
6563 #endif
6564 
6565 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
6566 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
6567 	    "holdoff timer index");
6568 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
6569 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
6570 	    "holdoff packet counter index");
6571 
6572 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
6573 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
6574 	    "rx queue size");
6575 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
6576 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
6577 	    "tx queue size");
6578 }
6579 
6580 static void
6581 cxgbe_sysctls(struct port_info *pi)
6582 {
6583 	struct sysctl_ctx_list *ctx;
6584 	struct sysctl_oid *oid;
6585 	struct sysctl_oid_list *children, *children2;
6586 	struct adapter *sc = pi->adapter;
6587 	int i;
6588 	char name[16];
6589 	static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
6590 
6591 	ctx = device_get_sysctl_ctx(pi->dev);
6592 
6593 	/*
6594 	 * dev.cxgbe.X.
6595 	 */
6596 	oid = device_get_sysctl_tree(pi->dev);
6597 	children = SYSCTL_CHILDREN(oid);
6598 
6599 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
6600 	   CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
6601 	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
6602 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
6603 		    CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
6604 		    "PHY temperature (in Celsius)");
6605 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
6606 		    CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
6607 		    "PHY firmware version");
6608 	}
6609 
6610 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
6611 	    CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A",
6612     "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
6613 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
6614 	    CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A",
6615 	    "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
6616 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
6617 	    CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I",
6618 	    "autonegotiation (-1 = not supported)");
6619 
6620 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
6621 	    port_top_speed(pi), "max speed (in Gbps)");
6622 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
6623 	    pi->mps_bg_map, "MPS buffer group map");
6624 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
6625 	    NULL, pi->rx_e_chan_map, "TP rx e-channel map");
6626 
6627 	if (sc->flags & IS_VF)
6628 		return;
6629 
6630 	/*
6631 	 * dev.(cxgbe|cxl).X.tc.
6632 	 */
6633 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
6634 	    "Tx scheduler traffic classes (cl_rl)");
6635 	children2 = SYSCTL_CHILDREN(oid);
6636 	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
6637 	    CTLFLAG_RW, &pi->sched_params->pktsize, 0,
6638 	    "pktsize for per-flow cl-rl (0 means up to the driver )");
6639 	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
6640 	    CTLFLAG_RW, &pi->sched_params->burstsize, 0,
6641 	    "burstsize for per-flow cl-rl (0 means up to the driver)");
6642 	for (i = 0; i < sc->chip_params->nsched_cls; i++) {
6643 		struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
6644 
6645 		snprintf(name, sizeof(name), "%d", i);
6646 		children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
6647 		    SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
6648 		    "traffic class"));
6649 		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
6650 		    CTLTYPE_STRING | CTLFLAG_RD, tc_flags, (uintptr_t)&tc->flags,
6651 		    sysctl_bitfield_8b, "A", "flags");
6652 		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
6653 		    CTLFLAG_RD, &tc->refcount, 0, "references to this class");
6654 		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
6655 		    CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
6656 		    sysctl_tc_params, "A", "traffic class parameters");
6657 	}
6658 
6659 	/*
6660 	 * dev.cxgbe.X.stats.
6661 	 */
6662 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
6663 	    NULL, "port statistics");
6664 	children = SYSCTL_CHILDREN(oid);
6665 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
6666 	    &pi->tx_parse_error, 0,
6667 	    "# of tx packets with invalid length or # of segments");
6668 
6669 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
6670 	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
6671 	    CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
6672 	    sysctl_handle_t4_reg64, "QU", desc)
6673 
6674 	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
6675 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
6676 	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
6677 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
6678 	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
6679 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
6680 	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
6681 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
6682 	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
6683 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
6684 	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
6685 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
6686 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
6687 	    "# of tx frames in this range",
6688 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
6689 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
6690 	    "# of tx frames in this range",
6691 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
6692 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
6693 	    "# of tx frames in this range",
6694 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
6695 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
6696 	    "# of tx frames in this range",
6697 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
6698 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
6699 	    "# of tx frames in this range",
6700 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
6701 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
6702 	    "# of tx frames in this range",
6703 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
6704 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
6705 	    "# of tx frames in this range",
6706 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
6707 	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
6708 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
6709 	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
6710 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
6711 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
6712 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
6713 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
6714 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
6715 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
6716 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
6717 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
6718 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
6719 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
6720 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
6721 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
6722 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
6723 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
6724 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
6725 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
6726 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
6727 
6728 	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
6729 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
6730 	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
6731 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
6732 	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
6733 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
6734 	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
6735 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
6736 	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
6737 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
6738 	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
6739 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
6740 	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
6741 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
6742 	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
6743 	    "# of frames received with bad FCS",
6744 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
6745 	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
6746 	    "# of frames received with length error",
6747 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
6748 	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
6749 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
6750 	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
6751 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
6752 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
6753 	    "# of rx frames in this range",
6754 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
6755 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
6756 	    "# of rx frames in this range",
6757 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
6758 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
6759 	    "# of rx frames in this range",
6760 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
6761 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
6762 	    "# of rx frames in this range",
6763 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
6764 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
6765 	    "# of rx frames in this range",
6766 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
6767 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
6768 	    "# of rx frames in this range",
6769 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
6770 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
6771 	    "# of rx frames in this range",
6772 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
6773 	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
6774 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
6775 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
6776 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
6777 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
6778 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
6779 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
6780 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
6781 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
6782 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
6783 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
6784 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
6785 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
6786 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
6787 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
6788 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
6789 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
6790 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
6791 
6792 #undef SYSCTL_ADD_T4_REG64
6793 
6794 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
6795 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
6796 	    &pi->stats.name, desc)
6797 
6798 	/* We get these from port_stats and they may be stale by up to 1s */
6799 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
6800 	    "# drops due to buffer-group 0 overflows");
6801 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
6802 	    "# drops due to buffer-group 1 overflows");
6803 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
6804 	    "# drops due to buffer-group 2 overflows");
6805 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
6806 	    "# drops due to buffer-group 3 overflows");
6807 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
6808 	    "# of buffer-group 0 truncated packets");
6809 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
6810 	    "# of buffer-group 1 truncated packets");
6811 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
6812 	    "# of buffer-group 2 truncated packets");
6813 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
6814 	    "# of buffer-group 3 truncated packets");
6815 
6816 #undef SYSCTL_ADD_T4_PORTSTAT
6817 
6818 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records",
6819 	    CTLFLAG_RD, &pi->tx_tls_records,
6820 	    "# of TLS records transmitted");
6821 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets",
6822 	    CTLFLAG_RD, &pi->tx_tls_octets,
6823 	    "# of payload octets in transmitted TLS records");
6824 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records",
6825 	    CTLFLAG_RD, &pi->rx_tls_records,
6826 	    "# of TLS records received");
6827 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets",
6828 	    CTLFLAG_RD, &pi->rx_tls_octets,
6829 	    "# of payload octets in received TLS records");
6830 }
6831 
6832 static int
6833 sysctl_int_array(SYSCTL_HANDLER_ARGS)
6834 {
6835 	int rc, *i, space = 0;
6836 	struct sbuf sb;
6837 
6838 	sbuf_new_for_sysctl(&sb, NULL, 64, req);
6839 	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
6840 		if (space)
6841 			sbuf_printf(&sb, " ");
6842 		sbuf_printf(&sb, "%d", *i);
6843 		space = 1;
6844 	}
6845 	rc = sbuf_finish(&sb);
6846 	sbuf_delete(&sb);
6847 	return (rc);
6848 }
6849 
6850 static int
6851 sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS)
6852 {
6853 	int rc;
6854 	struct sbuf *sb;
6855 
6856 	rc = sysctl_wire_old_buffer(req, 0);
6857 	if (rc != 0)
6858 		return(rc);
6859 
6860 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6861 	if (sb == NULL)
6862 		return (ENOMEM);
6863 
6864 	sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1);
6865 	rc = sbuf_finish(sb);
6866 	sbuf_delete(sb);
6867 
6868 	return (rc);
6869 }
6870 
6871 static int
6872 sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS)
6873 {
6874 	int rc;
6875 	struct sbuf *sb;
6876 
6877 	rc = sysctl_wire_old_buffer(req, 0);
6878 	if (rc != 0)
6879 		return(rc);
6880 
6881 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6882 	if (sb == NULL)
6883 		return (ENOMEM);
6884 
6885 	sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1);
6886 	rc = sbuf_finish(sb);
6887 	sbuf_delete(sb);
6888 
6889 	return (rc);
6890 }
6891 
6892 static int
6893 sysctl_btphy(SYSCTL_HANDLER_ARGS)
6894 {
6895 	struct port_info *pi = arg1;
6896 	int op = arg2;
6897 	struct adapter *sc = pi->adapter;
6898 	u_int v;
6899 	int rc;
6900 
6901 	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
6902 	if (rc)
6903 		return (rc);
6904 	/* XXX: magic numbers */
6905 	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
6906 	    &v);
6907 	end_synchronized_op(sc, 0);
6908 	if (rc)
6909 		return (rc);
6910 	if (op == 0)
6911 		v /= 256;
6912 
6913 	rc = sysctl_handle_int(oidp, &v, 0, req);
6914 	return (rc);
6915 }
6916 
6917 static int
6918 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
6919 {
6920 	struct vi_info *vi = arg1;
6921 	int rc, val;
6922 
6923 	val = vi->rsrv_noflowq;
6924 	rc = sysctl_handle_int(oidp, &val, 0, req);
6925 	if (rc != 0 || req->newptr == NULL)
6926 		return (rc);
6927 
6928 	if ((val >= 1) && (vi->ntxq > 1))
6929 		vi->rsrv_noflowq = 1;
6930 	else
6931 		vi->rsrv_noflowq = 0;
6932 
6933 	return (rc);
6934 }
6935 
6936 static int
6937 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
6938 {
6939 	struct vi_info *vi = arg1;
6940 	struct adapter *sc = vi->pi->adapter;
6941 	int idx, rc, i;
6942 	struct sge_rxq *rxq;
6943 	uint8_t v;
6944 
6945 	idx = vi->tmr_idx;
6946 
6947 	rc = sysctl_handle_int(oidp, &idx, 0, req);
6948 	if (rc != 0 || req->newptr == NULL)
6949 		return (rc);
6950 
6951 	if (idx < 0 || idx >= SGE_NTIMERS)
6952 		return (EINVAL);
6953 
6954 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6955 	    "t4tmr");
6956 	if (rc)
6957 		return (rc);
6958 
6959 	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
6960 	for_each_rxq(vi, i, rxq) {
6961 #ifdef atomic_store_rel_8
6962 		atomic_store_rel_8(&rxq->iq.intr_params, v);
6963 #else
6964 		rxq->iq.intr_params = v;
6965 #endif
6966 	}
6967 	vi->tmr_idx = idx;
6968 
6969 	end_synchronized_op(sc, LOCK_HELD);
6970 	return (0);
6971 }
6972 
6973 static int
6974 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
6975 {
6976 	struct vi_info *vi = arg1;
6977 	struct adapter *sc = vi->pi->adapter;
6978 	int idx, rc;
6979 
6980 	idx = vi->pktc_idx;
6981 
6982 	rc = sysctl_handle_int(oidp, &idx, 0, req);
6983 	if (rc != 0 || req->newptr == NULL)
6984 		return (rc);
6985 
6986 	if (idx < -1 || idx >= SGE_NCOUNTERS)
6987 		return (EINVAL);
6988 
6989 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6990 	    "t4pktc");
6991 	if (rc)
6992 		return (rc);
6993 
6994 	if (vi->flags & VI_INIT_DONE)
6995 		rc = EBUSY; /* cannot be changed once the queues are created */
6996 	else
6997 		vi->pktc_idx = idx;
6998 
6999 	end_synchronized_op(sc, LOCK_HELD);
7000 	return (rc);
7001 }
7002 
7003 static int
7004 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
7005 {
7006 	struct vi_info *vi = arg1;
7007 	struct adapter *sc = vi->pi->adapter;
7008 	int qsize, rc;
7009 
7010 	qsize = vi->qsize_rxq;
7011 
7012 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
7013 	if (rc != 0 || req->newptr == NULL)
7014 		return (rc);
7015 
7016 	if (qsize < 128 || (qsize & 7))
7017 		return (EINVAL);
7018 
7019 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7020 	    "t4rxqs");
7021 	if (rc)
7022 		return (rc);
7023 
7024 	if (vi->flags & VI_INIT_DONE)
7025 		rc = EBUSY; /* cannot be changed once the queues are created */
7026 	else
7027 		vi->qsize_rxq = qsize;
7028 
7029 	end_synchronized_op(sc, LOCK_HELD);
7030 	return (rc);
7031 }
7032 
7033 static int
7034 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
7035 {
7036 	struct vi_info *vi = arg1;
7037 	struct adapter *sc = vi->pi->adapter;
7038 	int qsize, rc;
7039 
7040 	qsize = vi->qsize_txq;
7041 
7042 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
7043 	if (rc != 0 || req->newptr == NULL)
7044 		return (rc);
7045 
7046 	if (qsize < 128 || qsize > 65536)
7047 		return (EINVAL);
7048 
7049 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7050 	    "t4txqs");
7051 	if (rc)
7052 		return (rc);
7053 
7054 	if (vi->flags & VI_INIT_DONE)
7055 		rc = EBUSY; /* cannot be changed once the queues are created */
7056 	else
7057 		vi->qsize_txq = qsize;
7058 
7059 	end_synchronized_op(sc, LOCK_HELD);
7060 	return (rc);
7061 }
7062 
7063 static int
7064 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
7065 {
7066 	struct port_info *pi = arg1;
7067 	struct adapter *sc = pi->adapter;
7068 	struct link_config *lc = &pi->link_cfg;
7069 	int rc;
7070 
7071 	if (req->newptr == NULL) {
7072 		struct sbuf *sb;
7073 		static char *bits = "\20\1RX\2TX\3AUTO";
7074 
7075 		rc = sysctl_wire_old_buffer(req, 0);
7076 		if (rc != 0)
7077 			return(rc);
7078 
7079 		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7080 		if (sb == NULL)
7081 			return (ENOMEM);
7082 
7083 		if (lc->link_ok) {
7084 			sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
7085 			    (lc->requested_fc & PAUSE_AUTONEG), bits);
7086 		} else {
7087 			sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
7088 			    PAUSE_RX | PAUSE_AUTONEG), bits);
7089 		}
7090 		rc = sbuf_finish(sb);
7091 		sbuf_delete(sb);
7092 	} else {
7093 		char s[2];
7094 		int n;
7095 
7096 		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
7097 		    PAUSE_AUTONEG));
7098 		s[1] = 0;
7099 
7100 		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7101 		if (rc != 0)
7102 			return(rc);
7103 
7104 		if (s[1] != 0)
7105 			return (EINVAL);
7106 		if (s[0] < '0' || s[0] > '9')
7107 			return (EINVAL);	/* not a number */
7108 		n = s[0] - '0';
7109 		if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
7110 			return (EINVAL);	/* some other bit is set too */
7111 
7112 		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7113 		    "t4PAUSE");
7114 		if (rc)
7115 			return (rc);
7116 		PORT_LOCK(pi);
7117 		lc->requested_fc = n;
7118 		fixup_link_config(pi);
7119 		if (pi->up_vis > 0)
7120 			rc = apply_link_config(pi);
7121 		set_current_media(pi);
7122 		PORT_UNLOCK(pi);
7123 		end_synchronized_op(sc, 0);
7124 	}
7125 
7126 	return (rc);
7127 }
7128 
7129 static int
7130 sysctl_fec(SYSCTL_HANDLER_ARGS)
7131 {
7132 	struct port_info *pi = arg1;
7133 	struct adapter *sc = pi->adapter;
7134 	struct link_config *lc = &pi->link_cfg;
7135 	int rc;
7136 	int8_t old;
7137 
7138 	if (req->newptr == NULL) {
7139 		struct sbuf *sb;
7140 		static char *bits = "\20\1RS\2BASE-R\3RSVD1\4RSVD2\5RSVD3\6AUTO";
7141 
7142 		rc = sysctl_wire_old_buffer(req, 0);
7143 		if (rc != 0)
7144 			return(rc);
7145 
7146 		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7147 		if (sb == NULL)
7148 			return (ENOMEM);
7149 
7150 		/*
7151 		 * Display the requested_fec when the link is down -- the actual
7152 		 * FEC makes sense only when the link is up.
7153 		 */
7154 		if (lc->link_ok) {
7155 			sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
7156 			    (lc->requested_fec & FEC_AUTO), bits);
7157 		} else {
7158 			sbuf_printf(sb, "%b", lc->requested_fec, bits);
7159 		}
7160 		rc = sbuf_finish(sb);
7161 		sbuf_delete(sb);
7162 	} else {
7163 		char s[3];
7164 		int n;
7165 
7166 		snprintf(s, sizeof(s), "%d",
7167 		    lc->requested_fec == FEC_AUTO ? -1 :
7168 		    lc->requested_fec & M_FW_PORT_CAP32_FEC);
7169 
7170 		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7171 		if (rc != 0)
7172 			return(rc);
7173 
7174 		n = strtol(&s[0], NULL, 0);
7175 		if (n < 0 || n & FEC_AUTO)
7176 			n = FEC_AUTO;
7177 		else {
7178 			if (n & ~M_FW_PORT_CAP32_FEC)
7179 				return (EINVAL);/* some other bit is set too */
7180 			if (!powerof2(n))
7181 				return (EINVAL);/* one bit can be set at most */
7182 		}
7183 
7184 		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7185 		    "t4fec");
7186 		if (rc)
7187 			return (rc);
7188 		PORT_LOCK(pi);
7189 		old = lc->requested_fec;
7190 		if (n == FEC_AUTO)
7191 			lc->requested_fec = FEC_AUTO;
7192 		else if (n == 0)
7193 			lc->requested_fec = FEC_NONE;
7194 		else {
7195 			if ((lc->supported | V_FW_PORT_CAP32_FEC(n)) !=
7196 			    lc->supported) {
7197 				rc = ENOTSUP;
7198 				goto done;
7199 			}
7200 			lc->requested_fec = n;
7201 		}
7202 		fixup_link_config(pi);
7203 		if (pi->up_vis > 0) {
7204 			rc = apply_link_config(pi);
7205 			if (rc != 0) {
7206 				lc->requested_fec = old;
7207 				if (rc == FW_EPROTO)
7208 					rc = ENOTSUP;
7209 			}
7210 		}
7211 done:
7212 		PORT_UNLOCK(pi);
7213 		end_synchronized_op(sc, 0);
7214 	}
7215 
7216 	return (rc);
7217 }
7218 
7219 static int
7220 sysctl_autoneg(SYSCTL_HANDLER_ARGS)
7221 {
7222 	struct port_info *pi = arg1;
7223 	struct adapter *sc = pi->adapter;
7224 	struct link_config *lc = &pi->link_cfg;
7225 	int rc, val;
7226 
7227 	if (lc->supported & FW_PORT_CAP32_ANEG)
7228 		val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
7229 	else
7230 		val = -1;
7231 	rc = sysctl_handle_int(oidp, &val, 0, req);
7232 	if (rc != 0 || req->newptr == NULL)
7233 		return (rc);
7234 	if (val == 0)
7235 		val = AUTONEG_DISABLE;
7236 	else if (val == 1)
7237 		val = AUTONEG_ENABLE;
7238 	else
7239 		val = AUTONEG_AUTO;
7240 
7241 	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7242 	    "t4aneg");
7243 	if (rc)
7244 		return (rc);
7245 	PORT_LOCK(pi);
7246 	if (val == AUTONEG_ENABLE && !(lc->supported & FW_PORT_CAP32_ANEG)) {
7247 		rc = ENOTSUP;
7248 		goto done;
7249 	}
7250 	lc->requested_aneg = val;
7251 	fixup_link_config(pi);
7252 	if (pi->up_vis > 0)
7253 		rc = apply_link_config(pi);
7254 	set_current_media(pi);
7255 done:
7256 	PORT_UNLOCK(pi);
7257 	end_synchronized_op(sc, 0);
7258 	return (rc);
7259 }
7260 
7261 static int
7262 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
7263 {
7264 	struct adapter *sc = arg1;
7265 	int reg = arg2;
7266 	uint64_t val;
7267 
7268 	val = t4_read_reg64(sc, reg);
7269 
7270 	return (sysctl_handle_64(oidp, &val, 0, req));
7271 }
7272 
7273 static int
7274 sysctl_temperature(SYSCTL_HANDLER_ARGS)
7275 {
7276 	struct adapter *sc = arg1;
7277 	int rc, t;
7278 	uint32_t param, val;
7279 
7280 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
7281 	if (rc)
7282 		return (rc);
7283 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7284 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7285 	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
7286 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7287 	end_synchronized_op(sc, 0);
7288 	if (rc)
7289 		return (rc);
7290 
7291 	/* unknown is returned as 0 but we display -1 in that case */
7292 	t = val == 0 ? -1 : val;
7293 
7294 	rc = sysctl_handle_int(oidp, &t, 0, req);
7295 	return (rc);
7296 }
7297 
7298 static int
7299 sysctl_vdd(SYSCTL_HANDLER_ARGS)
7300 {
7301 	struct adapter *sc = arg1;
7302 	int rc;
7303 	uint32_t param, val;
7304 
7305 	if (sc->params.core_vdd == 0) {
7306 		rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
7307 		    "t4vdd");
7308 		if (rc)
7309 			return (rc);
7310 		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7311 		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7312 		    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
7313 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7314 		end_synchronized_op(sc, 0);
7315 		if (rc)
7316 			return (rc);
7317 		sc->params.core_vdd = val;
7318 	}
7319 
7320 	return (sysctl_handle_int(oidp, &sc->params.core_vdd, 0, req));
7321 }
7322 
7323 static int
7324 sysctl_loadavg(SYSCTL_HANDLER_ARGS)
7325 {
7326 	struct adapter *sc = arg1;
7327 	struct sbuf *sb;
7328 	int rc;
7329 	uint32_t param, val;
7330 
7331 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
7332 	if (rc)
7333 		return (rc);
7334 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7335 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD);
7336 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7337 	end_synchronized_op(sc, 0);
7338 	if (rc)
7339 		return (rc);
7340 
7341 	rc = sysctl_wire_old_buffer(req, 0);
7342 	if (rc != 0)
7343 		return (rc);
7344 
7345 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7346 	if (sb == NULL)
7347 		return (ENOMEM);
7348 
7349 	if (val == 0xffffffff) {
7350 		/* Only debug and custom firmwares report load averages. */
7351 		sbuf_printf(sb, "not available");
7352 	} else {
7353 		sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff,
7354 		    (val >> 16) & 0xff);
7355 	}
7356 	rc = sbuf_finish(sb);
7357 	sbuf_delete(sb);
7358 
7359 	return (rc);
7360 }
7361 
7362 static int
7363 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
7364 {
7365 	struct adapter *sc = arg1;
7366 	struct sbuf *sb;
7367 	int rc, i;
7368 	uint16_t incr[NMTUS][NCCTRL_WIN];
7369 	static const char *dec_fac[] = {
7370 		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
7371 		"0.9375"
7372 	};
7373 
7374 	rc = sysctl_wire_old_buffer(req, 0);
7375 	if (rc != 0)
7376 		return (rc);
7377 
7378 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7379 	if (sb == NULL)
7380 		return (ENOMEM);
7381 
7382 	t4_read_cong_tbl(sc, incr);
7383 
7384 	for (i = 0; i < NCCTRL_WIN; ++i) {
7385 		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
7386 		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
7387 		    incr[5][i], incr[6][i], incr[7][i]);
7388 		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
7389 		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
7390 		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
7391 		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
7392 	}
7393 
7394 	rc = sbuf_finish(sb);
7395 	sbuf_delete(sb);
7396 
7397 	return (rc);
7398 }
7399 
7400 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
7401 	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
7402 	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
7403 	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
7404 };
7405 
7406 static int
7407 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
7408 {
7409 	struct adapter *sc = arg1;
7410 	struct sbuf *sb;
7411 	int rc, i, n, qid = arg2;
7412 	uint32_t *buf, *p;
7413 	char *qtype;
7414 	u_int cim_num_obq = sc->chip_params->cim_num_obq;
7415 
7416 	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
7417 	    ("%s: bad qid %d\n", __func__, qid));
7418 
7419 	if (qid < CIM_NUM_IBQ) {
7420 		/* inbound queue */
7421 		qtype = "IBQ";
7422 		n = 4 * CIM_IBQ_SIZE;
7423 		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7424 		rc = t4_read_cim_ibq(sc, qid, buf, n);
7425 	} else {
7426 		/* outbound queue */
7427 		qtype = "OBQ";
7428 		qid -= CIM_NUM_IBQ;
7429 		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
7430 		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7431 		rc = t4_read_cim_obq(sc, qid, buf, n);
7432 	}
7433 
7434 	if (rc < 0) {
7435 		rc = -rc;
7436 		goto done;
7437 	}
7438 	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
7439 
7440 	rc = sysctl_wire_old_buffer(req, 0);
7441 	if (rc != 0)
7442 		goto done;
7443 
7444 	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7445 	if (sb == NULL) {
7446 		rc = ENOMEM;
7447 		goto done;
7448 	}
7449 
7450 	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
7451 	for (i = 0, p = buf; i < n; i += 16, p += 4)
7452 		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
7453 		    p[2], p[3]);
7454 
7455 	rc = sbuf_finish(sb);
7456 	sbuf_delete(sb);
7457 done:
7458 	free(buf, M_CXGBE);
7459 	return (rc);
7460 }
7461 
7462 static void
7463 sbuf_cim_la4(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
7464 {
7465 	uint32_t *p;
7466 
7467 	sbuf_printf(sb, "Status   Data      PC%s",
7468 	    cfg & F_UPDBGLACAPTPCONLY ? "" :
7469 	    "     LS0Stat  LS0Addr             LS0Data");
7470 
7471 	for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
7472 		if (cfg & F_UPDBGLACAPTPCONLY) {
7473 			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
7474 			    p[6], p[7]);
7475 			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
7476 			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
7477 			    p[4] & 0xff, p[5] >> 8);
7478 			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
7479 			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7480 			    p[1] & 0xf, p[2] >> 4);
7481 		} else {
7482 			sbuf_printf(sb,
7483 			    "\n  %02x   %x%07x %x%07x %08x %08x "
7484 			    "%08x%08x%08x%08x",
7485 			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7486 			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
7487 			    p[6], p[7]);
7488 		}
7489 	}
7490 }
7491 
7492 static void
7493 sbuf_cim_la6(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
7494 {
7495 	uint32_t *p;
7496 
7497 	sbuf_printf(sb, "Status   Inst    Data      PC%s",
7498 	    cfg & F_UPDBGLACAPTPCONLY ? "" :
7499 	    "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
7500 
7501 	for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
7502 		if (cfg & F_UPDBGLACAPTPCONLY) {
7503 			sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
7504 			    p[3] & 0xff, p[2], p[1], p[0]);
7505 			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
7506 			    (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
7507 			    p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
7508 			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
7509 			    (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
7510 			    p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
7511 			    p[6] >> 16);
7512 		} else {
7513 			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
7514 			    "%08x %08x %08x %08x %08x %08x",
7515 			    (p[9] >> 16) & 0xff,
7516 			    p[9] & 0xffff, p[8] >> 16,
7517 			    p[8] & 0xffff, p[7] >> 16,
7518 			    p[7] & 0xffff, p[6] >> 16,
7519 			    p[2], p[1], p[0], p[5], p[4], p[3]);
7520 		}
7521 	}
7522 }
7523 
7524 static int
7525 sbuf_cim_la(struct adapter *sc, struct sbuf *sb, int flags)
7526 {
7527 	uint32_t cfg, *buf;
7528 	int rc;
7529 
7530 	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
7531 	if (rc != 0)
7532 		return (rc);
7533 
7534 	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
7535 	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
7536 	    M_ZERO | flags);
7537 	if (buf == NULL)
7538 		return (ENOMEM);
7539 
7540 	rc = -t4_cim_read_la(sc, buf, NULL);
7541 	if (rc != 0)
7542 		goto done;
7543 	if (chip_id(sc) < CHELSIO_T6)
7544 		sbuf_cim_la4(sc, sb, buf, cfg);
7545 	else
7546 		sbuf_cim_la6(sc, sb, buf, cfg);
7547 
7548 done:
7549 	free(buf, M_CXGBE);
7550 	return (rc);
7551 }
7552 
7553 static int
7554 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
7555 {
7556 	struct adapter *sc = arg1;
7557 	struct sbuf *sb;
7558 	int rc;
7559 
7560 	rc = sysctl_wire_old_buffer(req, 0);
7561 	if (rc != 0)
7562 		return (rc);
7563 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7564 	if (sb == NULL)
7565 		return (ENOMEM);
7566 
7567 	rc = sbuf_cim_la(sc, sb, M_WAITOK);
7568 	if (rc == 0)
7569 		rc = sbuf_finish(sb);
7570 	sbuf_delete(sb);
7571 	return (rc);
7572 }
7573 
7574 bool
7575 t4_os_dump_cimla(struct adapter *sc, int arg, bool verbose)
7576 {
7577 	struct sbuf sb;
7578 	int rc;
7579 
7580 	if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
7581 		return (false);
7582 	rc = sbuf_cim_la(sc, &sb, M_NOWAIT);
7583 	if (rc == 0) {
7584 		rc = sbuf_finish(&sb);
7585 		if (rc == 0) {
7586 			log(LOG_DEBUG, "%s: CIM LA dump follows.\n%s",
7587 		    		device_get_nameunit(sc->dev), sbuf_data(&sb));
7588 		}
7589 	}
7590 	sbuf_delete(&sb);
7591 	return (false);
7592 }
7593 
7594 static int
7595 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
7596 {
7597 	struct adapter *sc = arg1;
7598 	u_int i;
7599 	struct sbuf *sb;
7600 	uint32_t *buf, *p;
7601 	int rc;
7602 
7603 	rc = sysctl_wire_old_buffer(req, 0);
7604 	if (rc != 0)
7605 		return (rc);
7606 
7607 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7608 	if (sb == NULL)
7609 		return (ENOMEM);
7610 
7611 	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
7612 	    M_ZERO | M_WAITOK);
7613 
7614 	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
7615 	p = buf;
7616 
7617 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7618 		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
7619 		    p[1], p[0]);
7620 	}
7621 
7622 	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
7623 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7624 		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
7625 		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
7626 		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
7627 		    (p[1] >> 2) | ((p[2] & 3) << 30),
7628 		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
7629 		    p[0] & 1);
7630 	}
7631 
7632 	rc = sbuf_finish(sb);
7633 	sbuf_delete(sb);
7634 	free(buf, M_CXGBE);
7635 	return (rc);
7636 }
7637 
7638 static int
7639 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
7640 {
7641 	struct adapter *sc = arg1;
7642 	u_int i;
7643 	struct sbuf *sb;
7644 	uint32_t *buf, *p;
7645 	int rc;
7646 
7647 	rc = sysctl_wire_old_buffer(req, 0);
7648 	if (rc != 0)
7649 		return (rc);
7650 
7651 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7652 	if (sb == NULL)
7653 		return (ENOMEM);
7654 
7655 	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
7656 	    M_ZERO | M_WAITOK);
7657 
7658 	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
7659 	p = buf;
7660 
7661 	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
7662 	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7663 		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
7664 		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
7665 		    p[4], p[3], p[2], p[1], p[0]);
7666 	}
7667 
7668 	sbuf_printf(sb, "\n\nCntl ID               Data");
7669 	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7670 		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
7671 		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
7672 	}
7673 
7674 	rc = sbuf_finish(sb);
7675 	sbuf_delete(sb);
7676 	free(buf, M_CXGBE);
7677 	return (rc);
7678 }
7679 
7680 static int
7681 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
7682 {
7683 	struct adapter *sc = arg1;
7684 	struct sbuf *sb;
7685 	int rc, i;
7686 	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7687 	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7688 	uint16_t thres[CIM_NUM_IBQ];
7689 	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
7690 	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
7691 	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
7692 
7693 	cim_num_obq = sc->chip_params->cim_num_obq;
7694 	if (is_t4(sc)) {
7695 		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
7696 		obq_rdaddr = A_UP_OBQ_0_REALADDR;
7697 	} else {
7698 		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
7699 		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
7700 	}
7701 	nq = CIM_NUM_IBQ + cim_num_obq;
7702 
7703 	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
7704 	if (rc == 0)
7705 		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
7706 	if (rc != 0)
7707 		return (rc);
7708 
7709 	t4_read_cimq_cfg(sc, base, size, thres);
7710 
7711 	rc = sysctl_wire_old_buffer(req, 0);
7712 	if (rc != 0)
7713 		return (rc);
7714 
7715 	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7716 	if (sb == NULL)
7717 		return (ENOMEM);
7718 
7719 	sbuf_printf(sb,
7720 	    "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
7721 
7722 	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
7723 		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
7724 		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
7725 		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7726 		    G_QUEREMFLITS(p[2]) * 16);
7727 	for ( ; i < nq; i++, p += 4, wr += 2)
7728 		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
7729 		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
7730 		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7731 		    G_QUEREMFLITS(p[2]) * 16);
7732 
7733 	rc = sbuf_finish(sb);
7734 	sbuf_delete(sb);
7735 
7736 	return (rc);
7737 }
7738 
7739 static int
7740 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
7741 {
7742 	struct adapter *sc = arg1;
7743 	struct sbuf *sb;
7744 	int rc;
7745 	struct tp_cpl_stats stats;
7746 
7747 	rc = sysctl_wire_old_buffer(req, 0);
7748 	if (rc != 0)
7749 		return (rc);
7750 
7751 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7752 	if (sb == NULL)
7753 		return (ENOMEM);
7754 
7755 	mtx_lock(&sc->reg_lock);
7756 	t4_tp_get_cpl_stats(sc, &stats, 0);
7757 	mtx_unlock(&sc->reg_lock);
7758 
7759 	if (sc->chip_params->nchan > 2) {
7760 		sbuf_printf(sb, "                 channel 0  channel 1"
7761 		    "  channel 2  channel 3");
7762 		sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
7763 		    stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
7764 		sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
7765 		    stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
7766 	} else {
7767 		sbuf_printf(sb, "                 channel 0  channel 1");
7768 		sbuf_printf(sb, "\nCPL requests:   %10u %10u",
7769 		    stats.req[0], stats.req[1]);
7770 		sbuf_printf(sb, "\nCPL responses:   %10u %10u",
7771 		    stats.rsp[0], stats.rsp[1]);
7772 	}
7773 
7774 	rc = sbuf_finish(sb);
7775 	sbuf_delete(sb);
7776 
7777 	return (rc);
7778 }
7779 
7780 static int
7781 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
7782 {
7783 	struct adapter *sc = arg1;
7784 	struct sbuf *sb;
7785 	int rc;
7786 	struct tp_usm_stats stats;
7787 
7788 	rc = sysctl_wire_old_buffer(req, 0);
7789 	if (rc != 0)
7790 		return(rc);
7791 
7792 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7793 	if (sb == NULL)
7794 		return (ENOMEM);
7795 
7796 	t4_get_usm_stats(sc, &stats, 1);
7797 
7798 	sbuf_printf(sb, "Frames: %u\n", stats.frames);
7799 	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
7800 	sbuf_printf(sb, "Drops:  %u", stats.drops);
7801 
7802 	rc = sbuf_finish(sb);
7803 	sbuf_delete(sb);
7804 
7805 	return (rc);
7806 }
7807 
7808 static const char * const devlog_level_strings[] = {
7809 	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
7810 	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
7811 	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
7812 	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
7813 	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
7814 	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
7815 };
7816 
7817 static const char * const devlog_facility_strings[] = {
7818 	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
7819 	[FW_DEVLOG_FACILITY_CF]		= "CF",
7820 	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
7821 	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
7822 	[FW_DEVLOG_FACILITY_RES]	= "RES",
7823 	[FW_DEVLOG_FACILITY_HW]		= "HW",
7824 	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
7825 	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
7826 	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
7827 	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
7828 	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
7829 	[FW_DEVLOG_FACILITY_VI]		= "VI",
7830 	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
7831 	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
7832 	[FW_DEVLOG_FACILITY_TM]		= "TM",
7833 	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
7834 	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
7835 	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
7836 	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
7837 	[FW_DEVLOG_FACILITY_RI]		= "RI",
7838 	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
7839 	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
7840 	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
7841 	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE",
7842 	[FW_DEVLOG_FACILITY_CHNET]	= "CHNET",
7843 };
7844 
7845 static int
7846 sbuf_devlog(struct adapter *sc, struct sbuf *sb, int flags)
7847 {
7848 	int i, j, rc, nentries, first = 0;
7849 	struct devlog_params *dparams = &sc->params.devlog;
7850 	struct fw_devlog_e *buf, *e;
7851 	uint64_t ftstamp = UINT64_MAX;
7852 
7853 	if (dparams->addr == 0)
7854 		return (ENXIO);
7855 
7856 	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
7857 	buf = malloc(dparams->size, M_CXGBE, M_ZERO | flags);
7858 	if (buf == NULL)
7859 		return (ENOMEM);
7860 
7861 	rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
7862 	if (rc != 0)
7863 		goto done;
7864 
7865 	nentries = dparams->size / sizeof(struct fw_devlog_e);
7866 	for (i = 0; i < nentries; i++) {
7867 		e = &buf[i];
7868 
7869 		if (e->timestamp == 0)
7870 			break;	/* end */
7871 
7872 		e->timestamp = be64toh(e->timestamp);
7873 		e->seqno = be32toh(e->seqno);
7874 		for (j = 0; j < 8; j++)
7875 			e->params[j] = be32toh(e->params[j]);
7876 
7877 		if (e->timestamp < ftstamp) {
7878 			ftstamp = e->timestamp;
7879 			first = i;
7880 		}
7881 	}
7882 
7883 	if (buf[first].timestamp == 0)
7884 		goto done;	/* nothing in the log */
7885 
7886 	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
7887 	    "Seq#", "Tstamp", "Level", "Facility", "Message");
7888 
7889 	i = first;
7890 	do {
7891 		e = &buf[i];
7892 		if (e->timestamp == 0)
7893 			break;	/* end */
7894 
7895 		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
7896 		    e->seqno, e->timestamp,
7897 		    (e->level < nitems(devlog_level_strings) ?
7898 			devlog_level_strings[e->level] : "UNKNOWN"),
7899 		    (e->facility < nitems(devlog_facility_strings) ?
7900 			devlog_facility_strings[e->facility] : "UNKNOWN"));
7901 		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
7902 		    e->params[2], e->params[3], e->params[4],
7903 		    e->params[5], e->params[6], e->params[7]);
7904 
7905 		if (++i == nentries)
7906 			i = 0;
7907 	} while (i != first);
7908 done:
7909 	free(buf, M_CXGBE);
7910 	return (rc);
7911 }
7912 
7913 static int
7914 sysctl_devlog(SYSCTL_HANDLER_ARGS)
7915 {
7916 	struct adapter *sc = arg1;
7917 	int rc;
7918 	struct sbuf *sb;
7919 
7920 	rc = sysctl_wire_old_buffer(req, 0);
7921 	if (rc != 0)
7922 		return (rc);
7923 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7924 	if (sb == NULL)
7925 		return (ENOMEM);
7926 
7927 	rc = sbuf_devlog(sc, sb, M_WAITOK);
7928 	if (rc == 0)
7929 		rc = sbuf_finish(sb);
7930 	sbuf_delete(sb);
7931 	return (rc);
7932 }
7933 
7934 void
7935 t4_os_dump_devlog(struct adapter *sc)
7936 {
7937 	int rc;
7938 	struct sbuf sb;
7939 
7940 	if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
7941 		return;
7942 	rc = sbuf_devlog(sc, &sb, M_NOWAIT);
7943 	if (rc == 0) {
7944 		rc = sbuf_finish(&sb);
7945 		if (rc == 0) {
7946 			log(LOG_DEBUG, "%s: device log follows.\n%s",
7947 		    		device_get_nameunit(sc->dev), sbuf_data(&sb));
7948 		}
7949 	}
7950 	sbuf_delete(&sb);
7951 }
7952 
7953 static int
7954 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
7955 {
7956 	struct adapter *sc = arg1;
7957 	struct sbuf *sb;
7958 	int rc;
7959 	struct tp_fcoe_stats stats[MAX_NCHAN];
7960 	int i, nchan = sc->chip_params->nchan;
7961 
7962 	rc = sysctl_wire_old_buffer(req, 0);
7963 	if (rc != 0)
7964 		return (rc);
7965 
7966 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7967 	if (sb == NULL)
7968 		return (ENOMEM);
7969 
7970 	for (i = 0; i < nchan; i++)
7971 		t4_get_fcoe_stats(sc, i, &stats[i], 1);
7972 
7973 	if (nchan > 2) {
7974 		sbuf_printf(sb, "                   channel 0        channel 1"
7975 		    "        channel 2        channel 3");
7976 		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
7977 		    stats[0].octets_ddp, stats[1].octets_ddp,
7978 		    stats[2].octets_ddp, stats[3].octets_ddp);
7979 		sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
7980 		    stats[0].frames_ddp, stats[1].frames_ddp,
7981 		    stats[2].frames_ddp, stats[3].frames_ddp);
7982 		sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
7983 		    stats[0].frames_drop, stats[1].frames_drop,
7984 		    stats[2].frames_drop, stats[3].frames_drop);
7985 	} else {
7986 		sbuf_printf(sb, "                   channel 0        channel 1");
7987 		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
7988 		    stats[0].octets_ddp, stats[1].octets_ddp);
7989 		sbuf_printf(sb, "\nframesDDP:  %16u %16u",
7990 		    stats[0].frames_ddp, stats[1].frames_ddp);
7991 		sbuf_printf(sb, "\nframesDrop: %16u %16u",
7992 		    stats[0].frames_drop, stats[1].frames_drop);
7993 	}
7994 
7995 	rc = sbuf_finish(sb);
7996 	sbuf_delete(sb);
7997 
7998 	return (rc);
7999 }
8000 
8001 static int
8002 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
8003 {
8004 	struct adapter *sc = arg1;
8005 	struct sbuf *sb;
8006 	int rc, i;
8007 	unsigned int map, kbps, ipg, mode;
8008 	unsigned int pace_tab[NTX_SCHED];
8009 
8010 	rc = sysctl_wire_old_buffer(req, 0);
8011 	if (rc != 0)
8012 		return (rc);
8013 
8014 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8015 	if (sb == NULL)
8016 		return (ENOMEM);
8017 
8018 	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
8019 	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
8020 	t4_read_pace_tbl(sc, pace_tab);
8021 
8022 	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
8023 	    "Class IPG (0.1 ns)   Flow IPG (us)");
8024 
8025 	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
8026 		t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
8027 		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
8028 		    (mode & (1 << i)) ? "flow" : "class", map & 3);
8029 		if (kbps)
8030 			sbuf_printf(sb, "%9u     ", kbps);
8031 		else
8032 			sbuf_printf(sb, " disabled     ");
8033 
8034 		if (ipg)
8035 			sbuf_printf(sb, "%13u        ", ipg);
8036 		else
8037 			sbuf_printf(sb, "     disabled        ");
8038 
8039 		if (pace_tab[i])
8040 			sbuf_printf(sb, "%10u", pace_tab[i]);
8041 		else
8042 			sbuf_printf(sb, "  disabled");
8043 	}
8044 
8045 	rc = sbuf_finish(sb);
8046 	sbuf_delete(sb);
8047 
8048 	return (rc);
8049 }
8050 
8051 static int
8052 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
8053 {
8054 	struct adapter *sc = arg1;
8055 	struct sbuf *sb;
8056 	int rc, i, j;
8057 	uint64_t *p0, *p1;
8058 	struct lb_port_stats s[2];
8059 	static const char *stat_name[] = {
8060 		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
8061 		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
8062 		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
8063 		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
8064 		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
8065 		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
8066 		"BG2FramesTrunc:", "BG3FramesTrunc:"
8067 	};
8068 
8069 	rc = sysctl_wire_old_buffer(req, 0);
8070 	if (rc != 0)
8071 		return (rc);
8072 
8073 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8074 	if (sb == NULL)
8075 		return (ENOMEM);
8076 
8077 	memset(s, 0, sizeof(s));
8078 
8079 	for (i = 0; i < sc->chip_params->nchan; i += 2) {
8080 		t4_get_lb_stats(sc, i, &s[0]);
8081 		t4_get_lb_stats(sc, i + 1, &s[1]);
8082 
8083 		p0 = &s[0].octets;
8084 		p1 = &s[1].octets;
8085 		sbuf_printf(sb, "%s                       Loopback %u"
8086 		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
8087 
8088 		for (j = 0; j < nitems(stat_name); j++)
8089 			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
8090 				   *p0++, *p1++);
8091 	}
8092 
8093 	rc = sbuf_finish(sb);
8094 	sbuf_delete(sb);
8095 
8096 	return (rc);
8097 }
8098 
8099 static int
8100 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
8101 {
8102 	int rc = 0;
8103 	struct port_info *pi = arg1;
8104 	struct link_config *lc = &pi->link_cfg;
8105 	struct sbuf *sb;
8106 
8107 	rc = sysctl_wire_old_buffer(req, 0);
8108 	if (rc != 0)
8109 		return(rc);
8110 	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
8111 	if (sb == NULL)
8112 		return (ENOMEM);
8113 
8114 	if (lc->link_ok || lc->link_down_rc == 255)
8115 		sbuf_printf(sb, "n/a");
8116 	else
8117 		sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
8118 
8119 	rc = sbuf_finish(sb);
8120 	sbuf_delete(sb);
8121 
8122 	return (rc);
8123 }
8124 
8125 struct mem_desc {
8126 	unsigned int base;
8127 	unsigned int limit;
8128 	unsigned int idx;
8129 };
8130 
8131 static int
8132 mem_desc_cmp(const void *a, const void *b)
8133 {
8134 	return ((const struct mem_desc *)a)->base -
8135 	       ((const struct mem_desc *)b)->base;
8136 }
8137 
8138 static void
8139 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
8140     unsigned int to)
8141 {
8142 	unsigned int size;
8143 
8144 	if (from == to)
8145 		return;
8146 
8147 	size = to - from + 1;
8148 	if (size == 0)
8149 		return;
8150 
8151 	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
8152 	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
8153 }
8154 
8155 static int
8156 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
8157 {
8158 	struct adapter *sc = arg1;
8159 	struct sbuf *sb;
8160 	int rc, i, n;
8161 	uint32_t lo, hi, used, alloc;
8162 	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
8163 	static const char *region[] = {
8164 		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
8165 		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
8166 		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
8167 		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
8168 		"RQUDP region:", "PBL region:", "TXPBL region:",
8169 		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
8170 		"On-chip queues:", "TLS keys:",
8171 	};
8172 	struct mem_desc avail[4];
8173 	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
8174 	struct mem_desc *md = mem;
8175 
8176 	rc = sysctl_wire_old_buffer(req, 0);
8177 	if (rc != 0)
8178 		return (rc);
8179 
8180 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8181 	if (sb == NULL)
8182 		return (ENOMEM);
8183 
8184 	for (i = 0; i < nitems(mem); i++) {
8185 		mem[i].limit = 0;
8186 		mem[i].idx = i;
8187 	}
8188 
8189 	/* Find and sort the populated memory ranges */
8190 	i = 0;
8191 	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
8192 	if (lo & F_EDRAM0_ENABLE) {
8193 		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
8194 		avail[i].base = G_EDRAM0_BASE(hi) << 20;
8195 		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
8196 		avail[i].idx = 0;
8197 		i++;
8198 	}
8199 	if (lo & F_EDRAM1_ENABLE) {
8200 		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
8201 		avail[i].base = G_EDRAM1_BASE(hi) << 20;
8202 		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
8203 		avail[i].idx = 1;
8204 		i++;
8205 	}
8206 	if (lo & F_EXT_MEM_ENABLE) {
8207 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
8208 		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
8209 		avail[i].limit = avail[i].base +
8210 		    (G_EXT_MEM_SIZE(hi) << 20);
8211 		avail[i].idx = is_t5(sc) ? 3 : 2;	/* Call it MC0 for T5 */
8212 		i++;
8213 	}
8214 	if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
8215 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
8216 		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
8217 		avail[i].limit = avail[i].base +
8218 		    (G_EXT_MEM1_SIZE(hi) << 20);
8219 		avail[i].idx = 4;
8220 		i++;
8221 	}
8222 	if (!i)                                    /* no memory available */
8223 		return 0;
8224 	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
8225 
8226 	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
8227 	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
8228 	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
8229 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
8230 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
8231 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
8232 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
8233 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
8234 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
8235 
8236 	/* the next few have explicit upper bounds */
8237 	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
8238 	md->limit = md->base - 1 +
8239 		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
8240 		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
8241 	md++;
8242 
8243 	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
8244 	md->limit = md->base - 1 +
8245 		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
8246 		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
8247 	md++;
8248 
8249 	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8250 		if (chip_id(sc) <= CHELSIO_T5)
8251 			md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
8252 		else
8253 			md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
8254 		md->limit = 0;
8255 	} else {
8256 		md->base = 0;
8257 		md->idx = nitems(region);  /* hide it */
8258 	}
8259 	md++;
8260 
8261 #define ulp_region(reg) \
8262 	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
8263 	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
8264 
8265 	ulp_region(RX_ISCSI);
8266 	ulp_region(RX_TDDP);
8267 	ulp_region(TX_TPT);
8268 	ulp_region(RX_STAG);
8269 	ulp_region(RX_RQ);
8270 	ulp_region(RX_RQUDP);
8271 	ulp_region(RX_PBL);
8272 	ulp_region(TX_PBL);
8273 #undef ulp_region
8274 
8275 	md->base = 0;
8276 	md->idx = nitems(region);
8277 	if (!is_t4(sc)) {
8278 		uint32_t size = 0;
8279 		uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
8280 		uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
8281 
8282 		if (is_t5(sc)) {
8283 			if (sge_ctrl & F_VFIFO_ENABLE)
8284 				size = G_DBVFIFO_SIZE(fifo_size);
8285 		} else
8286 			size = G_T6_DBVFIFO_SIZE(fifo_size);
8287 
8288 		if (size) {
8289 			md->base = G_BASEADDR(t4_read_reg(sc,
8290 			    A_SGE_DBVFIFO_BADDR));
8291 			md->limit = md->base + (size << 2) - 1;
8292 		}
8293 	}
8294 	md++;
8295 
8296 	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
8297 	md->limit = 0;
8298 	md++;
8299 	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
8300 	md->limit = 0;
8301 	md++;
8302 
8303 	md->base = sc->vres.ocq.start;
8304 	if (sc->vres.ocq.size)
8305 		md->limit = md->base + sc->vres.ocq.size - 1;
8306 	else
8307 		md->idx = nitems(region);  /* hide it */
8308 	md++;
8309 
8310 	md->base = sc->vres.key.start;
8311 	if (sc->vres.key.size)
8312 		md->limit = md->base + sc->vres.key.size - 1;
8313 	else
8314 		md->idx = nitems(region);  /* hide it */
8315 	md++;
8316 
8317 	/* add any address-space holes, there can be up to 3 */
8318 	for (n = 0; n < i - 1; n++)
8319 		if (avail[n].limit < avail[n + 1].base)
8320 			(md++)->base = avail[n].limit;
8321 	if (avail[n].limit)
8322 		(md++)->base = avail[n].limit;
8323 
8324 	n = md - mem;
8325 	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
8326 
8327 	for (lo = 0; lo < i; lo++)
8328 		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
8329 				avail[lo].limit - 1);
8330 
8331 	sbuf_printf(sb, "\n");
8332 	for (i = 0; i < n; i++) {
8333 		if (mem[i].idx >= nitems(region))
8334 			continue;                        /* skip holes */
8335 		if (!mem[i].limit)
8336 			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
8337 		mem_region_show(sb, region[mem[i].idx], mem[i].base,
8338 				mem[i].limit);
8339 	}
8340 
8341 	sbuf_printf(sb, "\n");
8342 	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
8343 	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
8344 	mem_region_show(sb, "uP RAM:", lo, hi);
8345 
8346 	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
8347 	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
8348 	mem_region_show(sb, "uP Extmem2:", lo, hi);
8349 
8350 	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
8351 	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
8352 		   G_PMRXMAXPAGE(lo),
8353 		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
8354 		   (lo & F_PMRXNUMCHN) ? 2 : 1);
8355 
8356 	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
8357 	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
8358 	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
8359 		   G_PMTXMAXPAGE(lo),
8360 		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
8361 		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
8362 	sbuf_printf(sb, "%u p-structs\n",
8363 		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
8364 
8365 	for (i = 0; i < 4; i++) {
8366 		if (chip_id(sc) > CHELSIO_T5)
8367 			lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
8368 		else
8369 			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
8370 		if (is_t5(sc)) {
8371 			used = G_T5_USED(lo);
8372 			alloc = G_T5_ALLOC(lo);
8373 		} else {
8374 			used = G_USED(lo);
8375 			alloc = G_ALLOC(lo);
8376 		}
8377 		/* For T6 these are MAC buffer groups */
8378 		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
8379 		    i, used, alloc);
8380 	}
8381 	for (i = 0; i < sc->chip_params->nchan; i++) {
8382 		if (chip_id(sc) > CHELSIO_T5)
8383 			lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
8384 		else
8385 			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
8386 		if (is_t5(sc)) {
8387 			used = G_T5_USED(lo);
8388 			alloc = G_T5_ALLOC(lo);
8389 		} else {
8390 			used = G_USED(lo);
8391 			alloc = G_ALLOC(lo);
8392 		}
8393 		/* For T6 these are MAC buffer groups */
8394 		sbuf_printf(sb,
8395 		    "\nLoopback %d using %u pages out of %u allocated",
8396 		    i, used, alloc);
8397 	}
8398 
8399 	rc = sbuf_finish(sb);
8400 	sbuf_delete(sb);
8401 
8402 	return (rc);
8403 }
8404 
8405 static inline void
8406 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
8407 {
8408 	*mask = x | y;
8409 	y = htobe64(y);
8410 	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
8411 }
8412 
8413 static int
8414 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
8415 {
8416 	struct adapter *sc = arg1;
8417 	struct sbuf *sb;
8418 	int rc, i;
8419 
8420 	MPASS(chip_id(sc) <= CHELSIO_T5);
8421 
8422 	rc = sysctl_wire_old_buffer(req, 0);
8423 	if (rc != 0)
8424 		return (rc);
8425 
8426 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8427 	if (sb == NULL)
8428 		return (ENOMEM);
8429 
8430 	sbuf_printf(sb,
8431 	    "Idx  Ethernet address     Mask     Vld Ports PF"
8432 	    "  VF              Replication             P0 P1 P2 P3  ML");
8433 	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8434 		uint64_t tcamx, tcamy, mask;
8435 		uint32_t cls_lo, cls_hi;
8436 		uint8_t addr[ETHER_ADDR_LEN];
8437 
8438 		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
8439 		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
8440 		if (tcamx & tcamy)
8441 			continue;
8442 		tcamxy2valmask(tcamx, tcamy, addr, &mask);
8443 		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8444 		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8445 		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
8446 			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
8447 			   addr[3], addr[4], addr[5], (uintmax_t)mask,
8448 			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
8449 			   G_PORTMAP(cls_hi), G_PF(cls_lo),
8450 			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
8451 
8452 		if (cls_lo & F_REPLICATE) {
8453 			struct fw_ldst_cmd ldst_cmd;
8454 
8455 			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8456 			ldst_cmd.op_to_addrspace =
8457 			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8458 				F_FW_CMD_REQUEST | F_FW_CMD_READ |
8459 				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8460 			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8461 			ldst_cmd.u.mps.rplc.fid_idx =
8462 			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8463 				V_FW_LDST_CMD_IDX(i));
8464 
8465 			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8466 			    "t4mps");
8467 			if (rc)
8468 				break;
8469 			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8470 			    sizeof(ldst_cmd), &ldst_cmd);
8471 			end_synchronized_op(sc, 0);
8472 
8473 			if (rc != 0) {
8474 				sbuf_printf(sb, "%36d", rc);
8475 				rc = 0;
8476 			} else {
8477 				sbuf_printf(sb, " %08x %08x %08x %08x",
8478 				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8479 				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8480 				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8481 				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8482 			}
8483 		} else
8484 			sbuf_printf(sb, "%36s", "");
8485 
8486 		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
8487 		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
8488 		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
8489 	}
8490 
8491 	if (rc)
8492 		(void) sbuf_finish(sb);
8493 	else
8494 		rc = sbuf_finish(sb);
8495 	sbuf_delete(sb);
8496 
8497 	return (rc);
8498 }
8499 
8500 static int
8501 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
8502 {
8503 	struct adapter *sc = arg1;
8504 	struct sbuf *sb;
8505 	int rc, i;
8506 
8507 	MPASS(chip_id(sc) > CHELSIO_T5);
8508 
8509 	rc = sysctl_wire_old_buffer(req, 0);
8510 	if (rc != 0)
8511 		return (rc);
8512 
8513 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8514 	if (sb == NULL)
8515 		return (ENOMEM);
8516 
8517 	sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
8518 	    "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
8519 	    "                           Replication"
8520 	    "                                    P0 P1 P2 P3  ML\n");
8521 
8522 	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8523 		uint8_t dip_hit, vlan_vld, lookup_type, port_num;
8524 		uint16_t ivlan;
8525 		uint64_t tcamx, tcamy, val, mask;
8526 		uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
8527 		uint8_t addr[ETHER_ADDR_LEN];
8528 
8529 		ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
8530 		if (i < 256)
8531 			ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
8532 		else
8533 			ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
8534 		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8535 		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8536 		tcamy = G_DMACH(val) << 32;
8537 		tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8538 		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8539 		lookup_type = G_DATALKPTYPE(data2);
8540 		port_num = G_DATAPORTNUM(data2);
8541 		if (lookup_type && lookup_type != M_DATALKPTYPE) {
8542 			/* Inner header VNI */
8543 			vniy = ((data2 & F_DATAVIDH2) << 23) |
8544 				       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8545 			dip_hit = data2 & F_DATADIPHIT;
8546 			vlan_vld = 0;
8547 		} else {
8548 			vniy = 0;
8549 			dip_hit = 0;
8550 			vlan_vld = data2 & F_DATAVIDH2;
8551 			ivlan = G_VIDL(val);
8552 		}
8553 
8554 		ctl |= V_CTLXYBITSEL(1);
8555 		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8556 		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8557 		tcamx = G_DMACH(val) << 32;
8558 		tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8559 		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8560 		if (lookup_type && lookup_type != M_DATALKPTYPE) {
8561 			/* Inner header VNI mask */
8562 			vnix = ((data2 & F_DATAVIDH2) << 23) |
8563 			       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8564 		} else
8565 			vnix = 0;
8566 
8567 		if (tcamx & tcamy)
8568 			continue;
8569 		tcamxy2valmask(tcamx, tcamy, addr, &mask);
8570 
8571 		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8572 		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8573 
8574 		if (lookup_type && lookup_type != M_DATALKPTYPE) {
8575 			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8576 			    "%012jx %06x %06x    -    -   %3c"
8577 			    "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
8578 			    addr[1], addr[2], addr[3], addr[4], addr[5],
8579 			    (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
8580 			    port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8581 			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8582 			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8583 		} else {
8584 			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8585 			    "%012jx    -       -   ", i, addr[0], addr[1],
8586 			    addr[2], addr[3], addr[4], addr[5],
8587 			    (uintmax_t)mask);
8588 
8589 			if (vlan_vld)
8590 				sbuf_printf(sb, "%4u   Y     ", ivlan);
8591 			else
8592 				sbuf_printf(sb, "  -    N     ");
8593 
8594 			sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
8595 			    lookup_type ? 'I' : 'O', port_num,
8596 			    cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8597 			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8598 			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8599 		}
8600 
8601 
8602 		if (cls_lo & F_T6_REPLICATE) {
8603 			struct fw_ldst_cmd ldst_cmd;
8604 
8605 			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8606 			ldst_cmd.op_to_addrspace =
8607 			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8608 				F_FW_CMD_REQUEST | F_FW_CMD_READ |
8609 				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8610 			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8611 			ldst_cmd.u.mps.rplc.fid_idx =
8612 			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8613 				V_FW_LDST_CMD_IDX(i));
8614 
8615 			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8616 			    "t6mps");
8617 			if (rc)
8618 				break;
8619 			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8620 			    sizeof(ldst_cmd), &ldst_cmd);
8621 			end_synchronized_op(sc, 0);
8622 
8623 			if (rc != 0) {
8624 				sbuf_printf(sb, "%72d", rc);
8625 				rc = 0;
8626 			} else {
8627 				sbuf_printf(sb, " %08x %08x %08x %08x"
8628 				    " %08x %08x %08x %08x",
8629 				    be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
8630 				    be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
8631 				    be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
8632 				    be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
8633 				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8634 				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8635 				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8636 				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8637 			}
8638 		} else
8639 			sbuf_printf(sb, "%72s", "");
8640 
8641 		sbuf_printf(sb, "%4u%3u%3u%3u %#x",
8642 		    G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
8643 		    G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
8644 		    (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
8645 	}
8646 
8647 	if (rc)
8648 		(void) sbuf_finish(sb);
8649 	else
8650 		rc = sbuf_finish(sb);
8651 	sbuf_delete(sb);
8652 
8653 	return (rc);
8654 }
8655 
8656 static int
8657 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
8658 {
8659 	struct adapter *sc = arg1;
8660 	struct sbuf *sb;
8661 	int rc;
8662 	uint16_t mtus[NMTUS];
8663 
8664 	rc = sysctl_wire_old_buffer(req, 0);
8665 	if (rc != 0)
8666 		return (rc);
8667 
8668 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8669 	if (sb == NULL)
8670 		return (ENOMEM);
8671 
8672 	t4_read_mtu_tbl(sc, mtus, NULL);
8673 
8674 	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
8675 	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
8676 	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
8677 	    mtus[14], mtus[15]);
8678 
8679 	rc = sbuf_finish(sb);
8680 	sbuf_delete(sb);
8681 
8682 	return (rc);
8683 }
8684 
8685 static int
8686 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
8687 {
8688 	struct adapter *sc = arg1;
8689 	struct sbuf *sb;
8690 	int rc, i;
8691 	uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
8692 	uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
8693 	static const char *tx_stats[MAX_PM_NSTATS] = {
8694 		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
8695 		"Tx FIFO wait", NULL, "Tx latency"
8696 	};
8697 	static const char *rx_stats[MAX_PM_NSTATS] = {
8698 		"Read:", "Write bypass:", "Write mem:", "Flush:",
8699 		"Rx FIFO wait", NULL, "Rx latency"
8700 	};
8701 
8702 	rc = sysctl_wire_old_buffer(req, 0);
8703 	if (rc != 0)
8704 		return (rc);
8705 
8706 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8707 	if (sb == NULL)
8708 		return (ENOMEM);
8709 
8710 	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
8711 	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
8712 
8713 	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
8714 	for (i = 0; i < 4; i++) {
8715 		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8716 		    tx_cyc[i]);
8717 	}
8718 
8719 	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
8720 	for (i = 0; i < 4; i++) {
8721 		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8722 		    rx_cyc[i]);
8723 	}
8724 
8725 	if (chip_id(sc) > CHELSIO_T5) {
8726 		sbuf_printf(sb,
8727 		    "\n              Total wait      Total occupancy");
8728 		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8729 		    tx_cyc[i]);
8730 		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8731 		    rx_cyc[i]);
8732 
8733 		i += 2;
8734 		MPASS(i < nitems(tx_stats));
8735 
8736 		sbuf_printf(sb,
8737 		    "\n                   Reads           Total wait");
8738 		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8739 		    tx_cyc[i]);
8740 		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8741 		    rx_cyc[i]);
8742 	}
8743 
8744 	rc = sbuf_finish(sb);
8745 	sbuf_delete(sb);
8746 
8747 	return (rc);
8748 }
8749 
8750 static int
8751 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
8752 {
8753 	struct adapter *sc = arg1;
8754 	struct sbuf *sb;
8755 	int rc;
8756 	struct tp_rdma_stats stats;
8757 
8758 	rc = sysctl_wire_old_buffer(req, 0);
8759 	if (rc != 0)
8760 		return (rc);
8761 
8762 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8763 	if (sb == NULL)
8764 		return (ENOMEM);
8765 
8766 	mtx_lock(&sc->reg_lock);
8767 	t4_tp_get_rdma_stats(sc, &stats, 0);
8768 	mtx_unlock(&sc->reg_lock);
8769 
8770 	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
8771 	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
8772 
8773 	rc = sbuf_finish(sb);
8774 	sbuf_delete(sb);
8775 
8776 	return (rc);
8777 }
8778 
8779 static int
8780 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
8781 {
8782 	struct adapter *sc = arg1;
8783 	struct sbuf *sb;
8784 	int rc;
8785 	struct tp_tcp_stats v4, v6;
8786 
8787 	rc = sysctl_wire_old_buffer(req, 0);
8788 	if (rc != 0)
8789 		return (rc);
8790 
8791 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8792 	if (sb == NULL)
8793 		return (ENOMEM);
8794 
8795 	mtx_lock(&sc->reg_lock);
8796 	t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
8797 	mtx_unlock(&sc->reg_lock);
8798 
8799 	sbuf_printf(sb,
8800 	    "                                IP                 IPv6\n");
8801 	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
8802 	    v4.tcp_out_rsts, v6.tcp_out_rsts);
8803 	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
8804 	    v4.tcp_in_segs, v6.tcp_in_segs);
8805 	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
8806 	    v4.tcp_out_segs, v6.tcp_out_segs);
8807 	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
8808 	    v4.tcp_retrans_segs, v6.tcp_retrans_segs);
8809 
8810 	rc = sbuf_finish(sb);
8811 	sbuf_delete(sb);
8812 
8813 	return (rc);
8814 }
8815 
8816 static int
8817 sysctl_tids(SYSCTL_HANDLER_ARGS)
8818 {
8819 	struct adapter *sc = arg1;
8820 	struct sbuf *sb;
8821 	int rc;
8822 	struct tid_info *t = &sc->tids;
8823 
8824 	rc = sysctl_wire_old_buffer(req, 0);
8825 	if (rc != 0)
8826 		return (rc);
8827 
8828 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8829 	if (sb == NULL)
8830 		return (ENOMEM);
8831 
8832 	if (t->natids) {
8833 		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
8834 		    t->atids_in_use);
8835 	}
8836 
8837 	if (t->nhpftids) {
8838 		sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n",
8839 		    t->hpftid_base, t->hpftid_end, t->hpftids_in_use);
8840 	}
8841 
8842 	if (t->ntids) {
8843 		sbuf_printf(sb, "TID range: ");
8844 		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8845 			uint32_t b, hb;
8846 
8847 			if (chip_id(sc) <= CHELSIO_T5) {
8848 				b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
8849 				hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
8850 			} else {
8851 				b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
8852 				hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
8853 			}
8854 
8855 			if (b)
8856 				sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1);
8857 			sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
8858 		} else
8859 			sbuf_printf(sb, "%u-%u", t->tid_base, t->ntids - 1);
8860 		sbuf_printf(sb, ", in use: %u\n",
8861 		    atomic_load_acq_int(&t->tids_in_use));
8862 	}
8863 
8864 	if (t->nstids) {
8865 		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
8866 		    t->stid_base + t->nstids - 1, t->stids_in_use);
8867 	}
8868 
8869 	if (t->nftids) {
8870 		sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base,
8871 		    t->ftid_end, t->ftids_in_use);
8872 	}
8873 
8874 	if (t->netids) {
8875 		sbuf_printf(sb, "ETID range: %u-%u, in use: %u\n", t->etid_base,
8876 		    t->etid_base + t->netids - 1, t->etids_in_use);
8877 	}
8878 
8879 	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
8880 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
8881 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
8882 
8883 	rc = sbuf_finish(sb);
8884 	sbuf_delete(sb);
8885 
8886 	return (rc);
8887 }
8888 
8889 static int
8890 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
8891 {
8892 	struct adapter *sc = arg1;
8893 	struct sbuf *sb;
8894 	int rc;
8895 	struct tp_err_stats stats;
8896 
8897 	rc = sysctl_wire_old_buffer(req, 0);
8898 	if (rc != 0)
8899 		return (rc);
8900 
8901 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8902 	if (sb == NULL)
8903 		return (ENOMEM);
8904 
8905 	mtx_lock(&sc->reg_lock);
8906 	t4_tp_get_err_stats(sc, &stats, 0);
8907 	mtx_unlock(&sc->reg_lock);
8908 
8909 	if (sc->chip_params->nchan > 2) {
8910 		sbuf_printf(sb, "                 channel 0  channel 1"
8911 		    "  channel 2  channel 3\n");
8912 		sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
8913 		    stats.mac_in_errs[0], stats.mac_in_errs[1],
8914 		    stats.mac_in_errs[2], stats.mac_in_errs[3]);
8915 		sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
8916 		    stats.hdr_in_errs[0], stats.hdr_in_errs[1],
8917 		    stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
8918 		sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
8919 		    stats.tcp_in_errs[0], stats.tcp_in_errs[1],
8920 		    stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
8921 		sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
8922 		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
8923 		    stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
8924 		sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
8925 		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
8926 		    stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
8927 		sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
8928 		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
8929 		    stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
8930 		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
8931 		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
8932 		    stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
8933 		sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
8934 		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
8935 		    stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
8936 	} else {
8937 		sbuf_printf(sb, "                 channel 0  channel 1\n");
8938 		sbuf_printf(sb, "macInErrs:      %10u %10u\n",
8939 		    stats.mac_in_errs[0], stats.mac_in_errs[1]);
8940 		sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
8941 		    stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
8942 		sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
8943 		    stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
8944 		sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
8945 		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
8946 		sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
8947 		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
8948 		sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
8949 		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
8950 		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
8951 		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
8952 		sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
8953 		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
8954 	}
8955 
8956 	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
8957 	    stats.ofld_no_neigh, stats.ofld_cong_defer);
8958 
8959 	rc = sbuf_finish(sb);
8960 	sbuf_delete(sb);
8961 
8962 	return (rc);
8963 }
8964 
8965 static int
8966 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
8967 {
8968 	struct adapter *sc = arg1;
8969 	struct tp_params *tpp = &sc->params.tp;
8970 	u_int mask;
8971 	int rc;
8972 
8973 	mask = tpp->la_mask >> 16;
8974 	rc = sysctl_handle_int(oidp, &mask, 0, req);
8975 	if (rc != 0 || req->newptr == NULL)
8976 		return (rc);
8977 	if (mask > 0xffff)
8978 		return (EINVAL);
8979 	tpp->la_mask = mask << 16;
8980 	t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
8981 
8982 	return (0);
8983 }
8984 
8985 struct field_desc {
8986 	const char *name;
8987 	u_int start;
8988 	u_int width;
8989 };
8990 
8991 static void
8992 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
8993 {
8994 	char buf[32];
8995 	int line_size = 0;
8996 
8997 	while (f->name) {
8998 		uint64_t mask = (1ULL << f->width) - 1;
8999 		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
9000 		    ((uintmax_t)v >> f->start) & mask);
9001 
9002 		if (line_size + len >= 79) {
9003 			line_size = 8;
9004 			sbuf_printf(sb, "\n        ");
9005 		}
9006 		sbuf_printf(sb, "%s ", buf);
9007 		line_size += len + 1;
9008 		f++;
9009 	}
9010 	sbuf_printf(sb, "\n");
9011 }
9012 
9013 static const struct field_desc tp_la0[] = {
9014 	{ "RcfOpCodeOut", 60, 4 },
9015 	{ "State", 56, 4 },
9016 	{ "WcfState", 52, 4 },
9017 	{ "RcfOpcSrcOut", 50, 2 },
9018 	{ "CRxError", 49, 1 },
9019 	{ "ERxError", 48, 1 },
9020 	{ "SanityFailed", 47, 1 },
9021 	{ "SpuriousMsg", 46, 1 },
9022 	{ "FlushInputMsg", 45, 1 },
9023 	{ "FlushInputCpl", 44, 1 },
9024 	{ "RssUpBit", 43, 1 },
9025 	{ "RssFilterHit", 42, 1 },
9026 	{ "Tid", 32, 10 },
9027 	{ "InitTcb", 31, 1 },
9028 	{ "LineNumber", 24, 7 },
9029 	{ "Emsg", 23, 1 },
9030 	{ "EdataOut", 22, 1 },
9031 	{ "Cmsg", 21, 1 },
9032 	{ "CdataOut", 20, 1 },
9033 	{ "EreadPdu", 19, 1 },
9034 	{ "CreadPdu", 18, 1 },
9035 	{ "TunnelPkt", 17, 1 },
9036 	{ "RcfPeerFin", 16, 1 },
9037 	{ "RcfReasonOut", 12, 4 },
9038 	{ "TxCchannel", 10, 2 },
9039 	{ "RcfTxChannel", 8, 2 },
9040 	{ "RxEchannel", 6, 2 },
9041 	{ "RcfRxChannel", 5, 1 },
9042 	{ "RcfDataOutSrdy", 4, 1 },
9043 	{ "RxDvld", 3, 1 },
9044 	{ "RxOoDvld", 2, 1 },
9045 	{ "RxCongestion", 1, 1 },
9046 	{ "TxCongestion", 0, 1 },
9047 	{ NULL }
9048 };
9049 
9050 static const struct field_desc tp_la1[] = {
9051 	{ "CplCmdIn", 56, 8 },
9052 	{ "CplCmdOut", 48, 8 },
9053 	{ "ESynOut", 47, 1 },
9054 	{ "EAckOut", 46, 1 },
9055 	{ "EFinOut", 45, 1 },
9056 	{ "ERstOut", 44, 1 },
9057 	{ "SynIn", 43, 1 },
9058 	{ "AckIn", 42, 1 },
9059 	{ "FinIn", 41, 1 },
9060 	{ "RstIn", 40, 1 },
9061 	{ "DataIn", 39, 1 },
9062 	{ "DataInVld", 38, 1 },
9063 	{ "PadIn", 37, 1 },
9064 	{ "RxBufEmpty", 36, 1 },
9065 	{ "RxDdp", 35, 1 },
9066 	{ "RxFbCongestion", 34, 1 },
9067 	{ "TxFbCongestion", 33, 1 },
9068 	{ "TxPktSumSrdy", 32, 1 },
9069 	{ "RcfUlpType", 28, 4 },
9070 	{ "Eread", 27, 1 },
9071 	{ "Ebypass", 26, 1 },
9072 	{ "Esave", 25, 1 },
9073 	{ "Static0", 24, 1 },
9074 	{ "Cread", 23, 1 },
9075 	{ "Cbypass", 22, 1 },
9076 	{ "Csave", 21, 1 },
9077 	{ "CPktOut", 20, 1 },
9078 	{ "RxPagePoolFull", 18, 2 },
9079 	{ "RxLpbkPkt", 17, 1 },
9080 	{ "TxLpbkPkt", 16, 1 },
9081 	{ "RxVfValid", 15, 1 },
9082 	{ "SynLearned", 14, 1 },
9083 	{ "SetDelEntry", 13, 1 },
9084 	{ "SetInvEntry", 12, 1 },
9085 	{ "CpcmdDvld", 11, 1 },
9086 	{ "CpcmdSave", 10, 1 },
9087 	{ "RxPstructsFull", 8, 2 },
9088 	{ "EpcmdDvld", 7, 1 },
9089 	{ "EpcmdFlush", 6, 1 },
9090 	{ "EpcmdTrimPrefix", 5, 1 },
9091 	{ "EpcmdTrimPostfix", 4, 1 },
9092 	{ "ERssIp4Pkt", 3, 1 },
9093 	{ "ERssIp6Pkt", 2, 1 },
9094 	{ "ERssTcpUdpPkt", 1, 1 },
9095 	{ "ERssFceFipPkt", 0, 1 },
9096 	{ NULL }
9097 };
9098 
9099 static const struct field_desc tp_la2[] = {
9100 	{ "CplCmdIn", 56, 8 },
9101 	{ "MpsVfVld", 55, 1 },
9102 	{ "MpsPf", 52, 3 },
9103 	{ "MpsVf", 44, 8 },
9104 	{ "SynIn", 43, 1 },
9105 	{ "AckIn", 42, 1 },
9106 	{ "FinIn", 41, 1 },
9107 	{ "RstIn", 40, 1 },
9108 	{ "DataIn", 39, 1 },
9109 	{ "DataInVld", 38, 1 },
9110 	{ "PadIn", 37, 1 },
9111 	{ "RxBufEmpty", 36, 1 },
9112 	{ "RxDdp", 35, 1 },
9113 	{ "RxFbCongestion", 34, 1 },
9114 	{ "TxFbCongestion", 33, 1 },
9115 	{ "TxPktSumSrdy", 32, 1 },
9116 	{ "RcfUlpType", 28, 4 },
9117 	{ "Eread", 27, 1 },
9118 	{ "Ebypass", 26, 1 },
9119 	{ "Esave", 25, 1 },
9120 	{ "Static0", 24, 1 },
9121 	{ "Cread", 23, 1 },
9122 	{ "Cbypass", 22, 1 },
9123 	{ "Csave", 21, 1 },
9124 	{ "CPktOut", 20, 1 },
9125 	{ "RxPagePoolFull", 18, 2 },
9126 	{ "RxLpbkPkt", 17, 1 },
9127 	{ "TxLpbkPkt", 16, 1 },
9128 	{ "RxVfValid", 15, 1 },
9129 	{ "SynLearned", 14, 1 },
9130 	{ "SetDelEntry", 13, 1 },
9131 	{ "SetInvEntry", 12, 1 },
9132 	{ "CpcmdDvld", 11, 1 },
9133 	{ "CpcmdSave", 10, 1 },
9134 	{ "RxPstructsFull", 8, 2 },
9135 	{ "EpcmdDvld", 7, 1 },
9136 	{ "EpcmdFlush", 6, 1 },
9137 	{ "EpcmdTrimPrefix", 5, 1 },
9138 	{ "EpcmdTrimPostfix", 4, 1 },
9139 	{ "ERssIp4Pkt", 3, 1 },
9140 	{ "ERssIp6Pkt", 2, 1 },
9141 	{ "ERssTcpUdpPkt", 1, 1 },
9142 	{ "ERssFceFipPkt", 0, 1 },
9143 	{ NULL }
9144 };
9145 
9146 static void
9147 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
9148 {
9149 
9150 	field_desc_show(sb, *p, tp_la0);
9151 }
9152 
9153 static void
9154 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
9155 {
9156 
9157 	if (idx)
9158 		sbuf_printf(sb, "\n");
9159 	field_desc_show(sb, p[0], tp_la0);
9160 	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9161 		field_desc_show(sb, p[1], tp_la0);
9162 }
9163 
9164 static void
9165 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
9166 {
9167 
9168 	if (idx)
9169 		sbuf_printf(sb, "\n");
9170 	field_desc_show(sb, p[0], tp_la0);
9171 	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9172 		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
9173 }
9174 
9175 static int
9176 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
9177 {
9178 	struct adapter *sc = arg1;
9179 	struct sbuf *sb;
9180 	uint64_t *buf, *p;
9181 	int rc;
9182 	u_int i, inc;
9183 	void (*show_func)(struct sbuf *, uint64_t *, int);
9184 
9185 	rc = sysctl_wire_old_buffer(req, 0);
9186 	if (rc != 0)
9187 		return (rc);
9188 
9189 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9190 	if (sb == NULL)
9191 		return (ENOMEM);
9192 
9193 	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
9194 
9195 	t4_tp_read_la(sc, buf, NULL);
9196 	p = buf;
9197 
9198 	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
9199 	case 2:
9200 		inc = 2;
9201 		show_func = tp_la_show2;
9202 		break;
9203 	case 3:
9204 		inc = 2;
9205 		show_func = tp_la_show3;
9206 		break;
9207 	default:
9208 		inc = 1;
9209 		show_func = tp_la_show;
9210 	}
9211 
9212 	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
9213 		(*show_func)(sb, p, i);
9214 
9215 	rc = sbuf_finish(sb);
9216 	sbuf_delete(sb);
9217 	free(buf, M_CXGBE);
9218 	return (rc);
9219 }
9220 
9221 static int
9222 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
9223 {
9224 	struct adapter *sc = arg1;
9225 	struct sbuf *sb;
9226 	int rc;
9227 	u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
9228 
9229 	rc = sysctl_wire_old_buffer(req, 0);
9230 	if (rc != 0)
9231 		return (rc);
9232 
9233 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9234 	if (sb == NULL)
9235 		return (ENOMEM);
9236 
9237 	t4_get_chan_txrate(sc, nrate, orate);
9238 
9239 	if (sc->chip_params->nchan > 2) {
9240 		sbuf_printf(sb, "              channel 0   channel 1"
9241 		    "   channel 2   channel 3\n");
9242 		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
9243 		    nrate[0], nrate[1], nrate[2], nrate[3]);
9244 		sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
9245 		    orate[0], orate[1], orate[2], orate[3]);
9246 	} else {
9247 		sbuf_printf(sb, "              channel 0   channel 1\n");
9248 		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
9249 		    nrate[0], nrate[1]);
9250 		sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
9251 		    orate[0], orate[1]);
9252 	}
9253 
9254 	rc = sbuf_finish(sb);
9255 	sbuf_delete(sb);
9256 
9257 	return (rc);
9258 }
9259 
9260 static int
9261 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
9262 {
9263 	struct adapter *sc = arg1;
9264 	struct sbuf *sb;
9265 	uint32_t *buf, *p;
9266 	int rc, i;
9267 
9268 	rc = sysctl_wire_old_buffer(req, 0);
9269 	if (rc != 0)
9270 		return (rc);
9271 
9272 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9273 	if (sb == NULL)
9274 		return (ENOMEM);
9275 
9276 	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
9277 	    M_ZERO | M_WAITOK);
9278 
9279 	t4_ulprx_read_la(sc, buf);
9280 	p = buf;
9281 
9282 	sbuf_printf(sb, "      Pcmd        Type   Message"
9283 	    "                Data");
9284 	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
9285 		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
9286 		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
9287 	}
9288 
9289 	rc = sbuf_finish(sb);
9290 	sbuf_delete(sb);
9291 	free(buf, M_CXGBE);
9292 	return (rc);
9293 }
9294 
9295 static int
9296 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
9297 {
9298 	struct adapter *sc = arg1;
9299 	struct sbuf *sb;
9300 	int rc, v;
9301 
9302 	MPASS(chip_id(sc) >= CHELSIO_T5);
9303 
9304 	rc = sysctl_wire_old_buffer(req, 0);
9305 	if (rc != 0)
9306 		return (rc);
9307 
9308 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9309 	if (sb == NULL)
9310 		return (ENOMEM);
9311 
9312 	v = t4_read_reg(sc, A_SGE_STAT_CFG);
9313 	if (G_STATSOURCE_T5(v) == 7) {
9314 		int mode;
9315 
9316 		mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
9317 		if (mode == 0) {
9318 			sbuf_printf(sb, "total %d, incomplete %d",
9319 			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
9320 			    t4_read_reg(sc, A_SGE_STAT_MATCH));
9321 		} else if (mode == 1) {
9322 			sbuf_printf(sb, "total %d, data overflow %d",
9323 			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
9324 			    t4_read_reg(sc, A_SGE_STAT_MATCH));
9325 		} else {
9326 			sbuf_printf(sb, "unknown mode %d", mode);
9327 		}
9328 	}
9329 	rc = sbuf_finish(sb);
9330 	sbuf_delete(sb);
9331 
9332 	return (rc);
9333 }
9334 
9335 static int
9336 sysctl_cpus(SYSCTL_HANDLER_ARGS)
9337 {
9338 	struct adapter *sc = arg1;
9339 	enum cpu_sets op = arg2;
9340 	cpuset_t cpuset;
9341 	struct sbuf *sb;
9342 	int i, rc;
9343 
9344 	MPASS(op == LOCAL_CPUS || op == INTR_CPUS);
9345 
9346 	CPU_ZERO(&cpuset);
9347 	rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset);
9348 	if (rc != 0)
9349 		return (rc);
9350 
9351 	rc = sysctl_wire_old_buffer(req, 0);
9352 	if (rc != 0)
9353 		return (rc);
9354 
9355 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9356 	if (sb == NULL)
9357 		return (ENOMEM);
9358 
9359 	CPU_FOREACH(i)
9360 		sbuf_printf(sb, "%d ", i);
9361 	rc = sbuf_finish(sb);
9362 	sbuf_delete(sb);
9363 
9364 	return (rc);
9365 }
9366 
9367 #ifdef TCP_OFFLOAD
9368 static int
9369 sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
9370 {
9371 	struct adapter *sc = arg1;
9372 	int *old_ports, *new_ports;
9373 	int i, new_count, rc;
9374 
9375 	if (req->newptr == NULL && req->oldptr == NULL)
9376 		return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
9377 		    sizeof(sc->tt.tls_rx_ports[0])));
9378 
9379 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
9380 	if (rc)
9381 		return (rc);
9382 
9383 	if (sc->tt.num_tls_rx_ports == 0) {
9384 		i = -1;
9385 		rc = SYSCTL_OUT(req, &i, sizeof(i));
9386 	} else
9387 		rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
9388 		    sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
9389 	if (rc == 0 && req->newptr != NULL) {
9390 		new_count = req->newlen / sizeof(new_ports[0]);
9391 		new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
9392 		    M_WAITOK);
9393 		rc = SYSCTL_IN(req, new_ports, new_count *
9394 		    sizeof(new_ports[0]));
9395 		if (rc)
9396 			goto err;
9397 
9398 		/* Allow setting to a single '-1' to clear the list. */
9399 		if (new_count == 1 && new_ports[0] == -1) {
9400 			ADAPTER_LOCK(sc);
9401 			old_ports = sc->tt.tls_rx_ports;
9402 			sc->tt.tls_rx_ports = NULL;
9403 			sc->tt.num_tls_rx_ports = 0;
9404 			ADAPTER_UNLOCK(sc);
9405 			free(old_ports, M_CXGBE);
9406 		} else {
9407 			for (i = 0; i < new_count; i++) {
9408 				if (new_ports[i] < 1 ||
9409 				    new_ports[i] > IPPORT_MAX) {
9410 					rc = EINVAL;
9411 					goto err;
9412 				}
9413 			}
9414 
9415 			ADAPTER_LOCK(sc);
9416 			old_ports = sc->tt.tls_rx_ports;
9417 			sc->tt.tls_rx_ports = new_ports;
9418 			sc->tt.num_tls_rx_ports = new_count;
9419 			ADAPTER_UNLOCK(sc);
9420 			free(old_ports, M_CXGBE);
9421 			new_ports = NULL;
9422 		}
9423 	err:
9424 		free(new_ports, M_CXGBE);
9425 	}
9426 	end_synchronized_op(sc, 0);
9427 	return (rc);
9428 }
9429 
9430 static void
9431 unit_conv(char *buf, size_t len, u_int val, u_int factor)
9432 {
9433 	u_int rem = val % factor;
9434 
9435 	if (rem == 0)
9436 		snprintf(buf, len, "%u", val / factor);
9437 	else {
9438 		while (rem % 10 == 0)
9439 			rem /= 10;
9440 		snprintf(buf, len, "%u.%u", val / factor, rem);
9441 	}
9442 }
9443 
9444 static int
9445 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
9446 {
9447 	struct adapter *sc = arg1;
9448 	char buf[16];
9449 	u_int res, re;
9450 	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9451 
9452 	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
9453 	switch (arg2) {
9454 	case 0:
9455 		/* timer_tick */
9456 		re = G_TIMERRESOLUTION(res);
9457 		break;
9458 	case 1:
9459 		/* TCP timestamp tick */
9460 		re = G_TIMESTAMPRESOLUTION(res);
9461 		break;
9462 	case 2:
9463 		/* DACK tick */
9464 		re = G_DELAYEDACKRESOLUTION(res);
9465 		break;
9466 	default:
9467 		return (EDOOFUS);
9468 	}
9469 
9470 	unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
9471 
9472 	return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
9473 }
9474 
9475 static int
9476 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
9477 {
9478 	struct adapter *sc = arg1;
9479 	u_int res, dack_re, v;
9480 	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9481 
9482 	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
9483 	dack_re = G_DELAYEDACKRESOLUTION(res);
9484 	v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
9485 
9486 	return (sysctl_handle_int(oidp, &v, 0, req));
9487 }
9488 
9489 static int
9490 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
9491 {
9492 	struct adapter *sc = arg1;
9493 	int reg = arg2;
9494 	u_int tre;
9495 	u_long tp_tick_us, v;
9496 	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9497 
9498 	MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
9499 	    reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
9500 	    reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
9501 	    reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
9502 
9503 	tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
9504 	tp_tick_us = (cclk_ps << tre) / 1000000;
9505 
9506 	if (reg == A_TP_INIT_SRTT)
9507 		v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
9508 	else
9509 		v = tp_tick_us * t4_read_reg(sc, reg);
9510 
9511 	return (sysctl_handle_long(oidp, &v, 0, req));
9512 }
9513 
9514 /*
9515  * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
9516  * passed to this function.
9517  */
9518 static int
9519 sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
9520 {
9521 	struct adapter *sc = arg1;
9522 	int idx = arg2;
9523 	u_int v;
9524 
9525 	MPASS(idx >= 0 && idx <= 24);
9526 
9527 	v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
9528 
9529 	return (sysctl_handle_int(oidp, &v, 0, req));
9530 }
9531 
9532 static int
9533 sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
9534 {
9535 	struct adapter *sc = arg1;
9536 	int idx = arg2;
9537 	u_int shift, v, r;
9538 
9539 	MPASS(idx >= 0 && idx < 16);
9540 
9541 	r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
9542 	shift = (idx & 3) << 3;
9543 	v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
9544 
9545 	return (sysctl_handle_int(oidp, &v, 0, req));
9546 }
9547 
9548 static int
9549 sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
9550 {
9551 	struct vi_info *vi = arg1;
9552 	struct adapter *sc = vi->pi->adapter;
9553 	int idx, rc, i;
9554 	struct sge_ofld_rxq *ofld_rxq;
9555 	uint8_t v;
9556 
9557 	idx = vi->ofld_tmr_idx;
9558 
9559 	rc = sysctl_handle_int(oidp, &idx, 0, req);
9560 	if (rc != 0 || req->newptr == NULL)
9561 		return (rc);
9562 
9563 	if (idx < 0 || idx >= SGE_NTIMERS)
9564 		return (EINVAL);
9565 
9566 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9567 	    "t4otmr");
9568 	if (rc)
9569 		return (rc);
9570 
9571 	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
9572 	for_each_ofld_rxq(vi, i, ofld_rxq) {
9573 #ifdef atomic_store_rel_8
9574 		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
9575 #else
9576 		ofld_rxq->iq.intr_params = v;
9577 #endif
9578 	}
9579 	vi->ofld_tmr_idx = idx;
9580 
9581 	end_synchronized_op(sc, LOCK_HELD);
9582 	return (0);
9583 }
9584 
9585 static int
9586 sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
9587 {
9588 	struct vi_info *vi = arg1;
9589 	struct adapter *sc = vi->pi->adapter;
9590 	int idx, rc;
9591 
9592 	idx = vi->ofld_pktc_idx;
9593 
9594 	rc = sysctl_handle_int(oidp, &idx, 0, req);
9595 	if (rc != 0 || req->newptr == NULL)
9596 		return (rc);
9597 
9598 	if (idx < -1 || idx >= SGE_NCOUNTERS)
9599 		return (EINVAL);
9600 
9601 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9602 	    "t4opktc");
9603 	if (rc)
9604 		return (rc);
9605 
9606 	if (vi->flags & VI_INIT_DONE)
9607 		rc = EBUSY; /* cannot be changed once the queues are created */
9608 	else
9609 		vi->ofld_pktc_idx = idx;
9610 
9611 	end_synchronized_op(sc, LOCK_HELD);
9612 	return (rc);
9613 }
9614 #endif
9615 
9616 static int
9617 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
9618 {
9619 	int rc;
9620 
9621 	if (cntxt->cid > M_CTXTQID)
9622 		return (EINVAL);
9623 
9624 	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
9625 	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
9626 		return (EINVAL);
9627 
9628 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
9629 	if (rc)
9630 		return (rc);
9631 
9632 	if (sc->flags & FW_OK) {
9633 		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
9634 		    &cntxt->data[0]);
9635 		if (rc == 0)
9636 			goto done;
9637 	}
9638 
9639 	/*
9640 	 * Read via firmware failed or wasn't even attempted.  Read directly via
9641 	 * the backdoor.
9642 	 */
9643 	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
9644 done:
9645 	end_synchronized_op(sc, 0);
9646 	return (rc);
9647 }
9648 
9649 static int
9650 load_fw(struct adapter *sc, struct t4_data *fw)
9651 {
9652 	int rc;
9653 	uint8_t *fw_data;
9654 
9655 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
9656 	if (rc)
9657 		return (rc);
9658 
9659 	/*
9660 	 * The firmware, with the sole exception of the memory parity error
9661 	 * handler, runs from memory and not flash.  It is almost always safe to
9662 	 * install a new firmware on a running system.  Just set bit 1 in
9663 	 * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
9664 	 */
9665 	if (sc->flags & FULL_INIT_DONE &&
9666 	    (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
9667 		rc = EBUSY;
9668 		goto done;
9669 	}
9670 
9671 	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
9672 	if (fw_data == NULL) {
9673 		rc = ENOMEM;
9674 		goto done;
9675 	}
9676 
9677 	rc = copyin(fw->data, fw_data, fw->len);
9678 	if (rc == 0)
9679 		rc = -t4_load_fw(sc, fw_data, fw->len);
9680 
9681 	free(fw_data, M_CXGBE);
9682 done:
9683 	end_synchronized_op(sc, 0);
9684 	return (rc);
9685 }
9686 
9687 static int
9688 load_cfg(struct adapter *sc, struct t4_data *cfg)
9689 {
9690 	int rc;
9691 	uint8_t *cfg_data = NULL;
9692 
9693 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9694 	if (rc)
9695 		return (rc);
9696 
9697 	if (cfg->len == 0) {
9698 		/* clear */
9699 		rc = -t4_load_cfg(sc, NULL, 0);
9700 		goto done;
9701 	}
9702 
9703 	cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
9704 	if (cfg_data == NULL) {
9705 		rc = ENOMEM;
9706 		goto done;
9707 	}
9708 
9709 	rc = copyin(cfg->data, cfg_data, cfg->len);
9710 	if (rc == 0)
9711 		rc = -t4_load_cfg(sc, cfg_data, cfg->len);
9712 
9713 	free(cfg_data, M_CXGBE);
9714 done:
9715 	end_synchronized_op(sc, 0);
9716 	return (rc);
9717 }
9718 
9719 static int
9720 load_boot(struct adapter *sc, struct t4_bootrom *br)
9721 {
9722 	int rc;
9723 	uint8_t *br_data = NULL;
9724 	u_int offset;
9725 
9726 	if (br->len > 1024 * 1024)
9727 		return (EFBIG);
9728 
9729 	if (br->pf_offset == 0) {
9730 		/* pfidx */
9731 		if (br->pfidx_addr > 7)
9732 			return (EINVAL);
9733 		offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
9734 		    A_PCIE_PF_EXPROM_OFST)));
9735 	} else if (br->pf_offset == 1) {
9736 		/* offset */
9737 		offset = G_OFFSET(br->pfidx_addr);
9738 	} else {
9739 		return (EINVAL);
9740 	}
9741 
9742 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
9743 	if (rc)
9744 		return (rc);
9745 
9746 	if (br->len == 0) {
9747 		/* clear */
9748 		rc = -t4_load_boot(sc, NULL, offset, 0);
9749 		goto done;
9750 	}
9751 
9752 	br_data = malloc(br->len, M_CXGBE, M_WAITOK);
9753 	if (br_data == NULL) {
9754 		rc = ENOMEM;
9755 		goto done;
9756 	}
9757 
9758 	rc = copyin(br->data, br_data, br->len);
9759 	if (rc == 0)
9760 		rc = -t4_load_boot(sc, br_data, offset, br->len);
9761 
9762 	free(br_data, M_CXGBE);
9763 done:
9764 	end_synchronized_op(sc, 0);
9765 	return (rc);
9766 }
9767 
9768 static int
9769 load_bootcfg(struct adapter *sc, struct t4_data *bc)
9770 {
9771 	int rc;
9772 	uint8_t *bc_data = NULL;
9773 
9774 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9775 	if (rc)
9776 		return (rc);
9777 
9778 	if (bc->len == 0) {
9779 		/* clear */
9780 		rc = -t4_load_bootcfg(sc, NULL, 0);
9781 		goto done;
9782 	}
9783 
9784 	bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
9785 	if (bc_data == NULL) {
9786 		rc = ENOMEM;
9787 		goto done;
9788 	}
9789 
9790 	rc = copyin(bc->data, bc_data, bc->len);
9791 	if (rc == 0)
9792 		rc = -t4_load_bootcfg(sc, bc_data, bc->len);
9793 
9794 	free(bc_data, M_CXGBE);
9795 done:
9796 	end_synchronized_op(sc, 0);
9797 	return (rc);
9798 }
9799 
9800 static int
9801 cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
9802 {
9803 	int rc;
9804 	struct cudbg_init *cudbg;
9805 	void *handle, *buf;
9806 
9807 	/* buf is large, don't block if no memory is available */
9808 	buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
9809 	if (buf == NULL)
9810 		return (ENOMEM);
9811 
9812 	handle = cudbg_alloc_handle();
9813 	if (handle == NULL) {
9814 		rc = ENOMEM;
9815 		goto done;
9816 	}
9817 
9818 	cudbg = cudbg_get_init(handle);
9819 	cudbg->adap = sc;
9820 	cudbg->print = (cudbg_print_cb)printf;
9821 
9822 #ifndef notyet
9823 	device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
9824 	    __func__, dump->wr_flash, dump->len, dump->data);
9825 #endif
9826 
9827 	if (dump->wr_flash)
9828 		cudbg->use_flash = 1;
9829 	MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
9830 	memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
9831 
9832 	rc = cudbg_collect(handle, buf, &dump->len);
9833 	if (rc != 0)
9834 		goto done;
9835 
9836 	rc = copyout(buf, dump->data, dump->len);
9837 done:
9838 	cudbg_free_handle(handle);
9839 	free(buf, M_CXGBE);
9840 	return (rc);
9841 }
9842 
9843 static void
9844 free_offload_policy(struct t4_offload_policy *op)
9845 {
9846 	struct offload_rule *r;
9847 	int i;
9848 
9849 	if (op == NULL)
9850 		return;
9851 
9852 	r = &op->rule[0];
9853 	for (i = 0; i < op->nrules; i++, r++) {
9854 		free(r->bpf_prog.bf_insns, M_CXGBE);
9855 	}
9856 	free(op->rule, M_CXGBE);
9857 	free(op, M_CXGBE);
9858 }
9859 
9860 static int
9861 set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
9862 {
9863 	int i, rc, len;
9864 	struct t4_offload_policy *op, *old;
9865 	struct bpf_program *bf;
9866 	const struct offload_settings *s;
9867 	struct offload_rule *r;
9868 	void *u;
9869 
9870 	if (!is_offload(sc))
9871 		return (ENODEV);
9872 
9873 	if (uop->nrules == 0) {
9874 		/* Delete installed policies. */
9875 		op = NULL;
9876 		goto set_policy;
9877 	} else if (uop->nrules > 256) { /* arbitrary */
9878 		return (E2BIG);
9879 	}
9880 
9881 	/* Copy userspace offload policy to kernel */
9882 	op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
9883 	op->nrules = uop->nrules;
9884 	len = op->nrules * sizeof(struct offload_rule);
9885 	op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9886 	rc = copyin(uop->rule, op->rule, len);
9887 	if (rc) {
9888 		free(op->rule, M_CXGBE);
9889 		free(op, M_CXGBE);
9890 		return (rc);
9891 	}
9892 
9893 	r = &op->rule[0];
9894 	for (i = 0; i < op->nrules; i++, r++) {
9895 
9896 		/* Validate open_type */
9897 		if (r->open_type != OPEN_TYPE_LISTEN &&
9898 		    r->open_type != OPEN_TYPE_ACTIVE &&
9899 		    r->open_type != OPEN_TYPE_PASSIVE &&
9900 		    r->open_type != OPEN_TYPE_DONTCARE) {
9901 error:
9902 			/*
9903 			 * Rules 0 to i have malloc'd filters that need to be
9904 			 * freed.  Rules i+1 to nrules have userspace pointers
9905 			 * and should be left alone.
9906 			 */
9907 			op->nrules = i;
9908 			free_offload_policy(op);
9909 			return (rc);
9910 		}
9911 
9912 		/* Validate settings */
9913 		s = &r->settings;
9914 		if ((s->offload != 0 && s->offload != 1) ||
9915 		    s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
9916 		    s->sched_class < -1 ||
9917 		    s->sched_class >= sc->chip_params->nsched_cls) {
9918 			rc = EINVAL;
9919 			goto error;
9920 		}
9921 
9922 		bf = &r->bpf_prog;
9923 		u = bf->bf_insns;	/* userspace ptr */
9924 		bf->bf_insns = NULL;
9925 		if (bf->bf_len == 0) {
9926 			/* legal, matches everything */
9927 			continue;
9928 		}
9929 		len = bf->bf_len * sizeof(*bf->bf_insns);
9930 		bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9931 		rc = copyin(u, bf->bf_insns, len);
9932 		if (rc != 0)
9933 			goto error;
9934 
9935 		if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
9936 			rc = EINVAL;
9937 			goto error;
9938 		}
9939 	}
9940 set_policy:
9941 	rw_wlock(&sc->policy_lock);
9942 	old = sc->policy;
9943 	sc->policy = op;
9944 	rw_wunlock(&sc->policy_lock);
9945 	free_offload_policy(old);
9946 
9947 	return (0);
9948 }
9949 
9950 #define MAX_READ_BUF_SIZE (128 * 1024)
9951 static int
9952 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
9953 {
9954 	uint32_t addr, remaining, n;
9955 	uint32_t *buf;
9956 	int rc;
9957 	uint8_t *dst;
9958 
9959 	rc = validate_mem_range(sc, mr->addr, mr->len);
9960 	if (rc != 0)
9961 		return (rc);
9962 
9963 	buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
9964 	addr = mr->addr;
9965 	remaining = mr->len;
9966 	dst = (void *)mr->data;
9967 
9968 	while (remaining) {
9969 		n = min(remaining, MAX_READ_BUF_SIZE);
9970 		read_via_memwin(sc, 2, addr, buf, n);
9971 
9972 		rc = copyout(buf, dst, n);
9973 		if (rc != 0)
9974 			break;
9975 
9976 		dst += n;
9977 		remaining -= n;
9978 		addr += n;
9979 	}
9980 
9981 	free(buf, M_CXGBE);
9982 	return (rc);
9983 }
9984 #undef MAX_READ_BUF_SIZE
9985 
9986 static int
9987 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
9988 {
9989 	int rc;
9990 
9991 	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
9992 		return (EINVAL);
9993 
9994 	if (i2cd->len > sizeof(i2cd->data))
9995 		return (EFBIG);
9996 
9997 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
9998 	if (rc)
9999 		return (rc);
10000 	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
10001 	    i2cd->offset, i2cd->len, &i2cd->data[0]);
10002 	end_synchronized_op(sc, 0);
10003 
10004 	return (rc);
10005 }
10006 
10007 static int
10008 clear_stats(struct adapter *sc, u_int port_id)
10009 {
10010 	int i, v, bg_map;
10011 	struct port_info *pi;
10012 	struct vi_info *vi;
10013 	struct sge_rxq *rxq;
10014 	struct sge_txq *txq;
10015 	struct sge_wrq *wrq;
10016 #ifdef TCP_OFFLOAD
10017 	struct sge_ofld_rxq *ofld_rxq;
10018 #endif
10019 
10020 	if (port_id >= sc->params.nports)
10021 		return (EINVAL);
10022 	pi = sc->port[port_id];
10023 	if (pi == NULL)
10024 		return (EIO);
10025 
10026 	/* MAC stats */
10027 	t4_clr_port_stats(sc, pi->tx_chan);
10028 	pi->tx_parse_error = 0;
10029 	pi->tnl_cong_drops = 0;
10030 	mtx_lock(&sc->reg_lock);
10031 	for_each_vi(pi, v, vi) {
10032 		if (vi->flags & VI_INIT_DONE)
10033 			t4_clr_vi_stats(sc, vi->vin);
10034 	}
10035 	bg_map = pi->mps_bg_map;
10036 	v = 0;	/* reuse */
10037 	while (bg_map) {
10038 		i = ffs(bg_map) - 1;
10039 		t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
10040 		    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
10041 		bg_map &= ~(1 << i);
10042 	}
10043 	mtx_unlock(&sc->reg_lock);
10044 
10045 	/*
10046 	 * Since this command accepts a port, clear stats for
10047 	 * all VIs on this port.
10048 	 */
10049 	for_each_vi(pi, v, vi) {
10050 		if (vi->flags & VI_INIT_DONE) {
10051 
10052 			for_each_rxq(vi, i, rxq) {
10053 #if defined(INET) || defined(INET6)
10054 				rxq->lro.lro_queued = 0;
10055 				rxq->lro.lro_flushed = 0;
10056 #endif
10057 				rxq->rxcsum = 0;
10058 				rxq->vlan_extraction = 0;
10059 
10060 				rxq->fl.mbuf_allocated = 0;
10061 				rxq->fl.mbuf_inlined = 0;
10062 				rxq->fl.cl_allocated = 0;
10063 				rxq->fl.cl_recycled = 0;
10064 				rxq->fl.cl_fast_recycled = 0;
10065 			}
10066 
10067 			for_each_txq(vi, i, txq) {
10068 				txq->txcsum = 0;
10069 				txq->tso_wrs = 0;
10070 				txq->vlan_insertion = 0;
10071 				txq->imm_wrs = 0;
10072 				txq->sgl_wrs = 0;
10073 				txq->txpkt_wrs = 0;
10074 				txq->txpkts0_wrs = 0;
10075 				txq->txpkts1_wrs = 0;
10076 				txq->txpkts0_pkts = 0;
10077 				txq->txpkts1_pkts = 0;
10078 				txq->raw_wrs = 0;
10079 				mp_ring_reset_stats(txq->r);
10080 			}
10081 
10082 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10083 			for_each_ofld_txq(vi, i, wrq) {
10084 				wrq->tx_wrs_direct = 0;
10085 				wrq->tx_wrs_copied = 0;
10086 			}
10087 #endif
10088 #ifdef TCP_OFFLOAD
10089 			for_each_ofld_rxq(vi, i, ofld_rxq) {
10090 				ofld_rxq->fl.mbuf_allocated = 0;
10091 				ofld_rxq->fl.mbuf_inlined = 0;
10092 				ofld_rxq->fl.cl_allocated = 0;
10093 				ofld_rxq->fl.cl_recycled = 0;
10094 				ofld_rxq->fl.cl_fast_recycled = 0;
10095 			}
10096 #endif
10097 
10098 			if (IS_MAIN_VI(vi)) {
10099 				wrq = &sc->sge.ctrlq[pi->port_id];
10100 				wrq->tx_wrs_direct = 0;
10101 				wrq->tx_wrs_copied = 0;
10102 			}
10103 		}
10104 	}
10105 
10106 	return (0);
10107 }
10108 
10109 int
10110 t4_os_find_pci_capability(struct adapter *sc, int cap)
10111 {
10112 	int i;
10113 
10114 	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
10115 }
10116 
10117 int
10118 t4_os_pci_save_state(struct adapter *sc)
10119 {
10120 	device_t dev;
10121 	struct pci_devinfo *dinfo;
10122 
10123 	dev = sc->dev;
10124 	dinfo = device_get_ivars(dev);
10125 
10126 	pci_cfg_save(dev, dinfo, 0);
10127 	return (0);
10128 }
10129 
10130 int
10131 t4_os_pci_restore_state(struct adapter *sc)
10132 {
10133 	device_t dev;
10134 	struct pci_devinfo *dinfo;
10135 
10136 	dev = sc->dev;
10137 	dinfo = device_get_ivars(dev);
10138 
10139 	pci_cfg_restore(dev, dinfo);
10140 	return (0);
10141 }
10142 
10143 void
10144 t4_os_portmod_changed(struct port_info *pi)
10145 {
10146 	struct adapter *sc = pi->adapter;
10147 	struct vi_info *vi;
10148 	struct ifnet *ifp;
10149 	static const char *mod_str[] = {
10150 		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
10151 	};
10152 
10153 	KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
10154 	    ("%s: port_type %u", __func__, pi->port_type));
10155 
10156 	vi = &pi->vi[0];
10157 	if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
10158 		PORT_LOCK(pi);
10159 		build_medialist(pi);
10160 		if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
10161 			fixup_link_config(pi);
10162 			apply_link_config(pi);
10163 		}
10164 		PORT_UNLOCK(pi);
10165 		end_synchronized_op(sc, LOCK_HELD);
10166 	}
10167 
10168 	ifp = vi->ifp;
10169 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
10170 		if_printf(ifp, "transceiver unplugged.\n");
10171 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
10172 		if_printf(ifp, "unknown transceiver inserted.\n");
10173 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
10174 		if_printf(ifp, "unsupported transceiver inserted.\n");
10175 	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
10176 		if_printf(ifp, "%dGbps %s transceiver inserted.\n",
10177 		    port_top_speed(pi), mod_str[pi->mod_type]);
10178 	} else {
10179 		if_printf(ifp, "transceiver (type %d) inserted.\n",
10180 		    pi->mod_type);
10181 	}
10182 }
10183 
10184 void
10185 t4_os_link_changed(struct port_info *pi)
10186 {
10187 	struct vi_info *vi;
10188 	struct ifnet *ifp;
10189 	struct link_config *lc;
10190 	int v;
10191 
10192 	PORT_LOCK_ASSERT_OWNED(pi);
10193 
10194 	for_each_vi(pi, v, vi) {
10195 		ifp = vi->ifp;
10196 		if (ifp == NULL)
10197 			continue;
10198 
10199 		lc = &pi->link_cfg;
10200 		if (lc->link_ok) {
10201 			ifp->if_baudrate = IF_Mbps(lc->speed);
10202 			if_link_state_change(ifp, LINK_STATE_UP);
10203 		} else {
10204 			if_link_state_change(ifp, LINK_STATE_DOWN);
10205 		}
10206 	}
10207 }
10208 
10209 void
10210 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
10211 {
10212 	struct adapter *sc;
10213 
10214 	sx_slock(&t4_list_lock);
10215 	SLIST_FOREACH(sc, &t4_list, link) {
10216 		/*
10217 		 * func should not make any assumptions about what state sc is
10218 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
10219 		 */
10220 		func(sc, arg);
10221 	}
10222 	sx_sunlock(&t4_list_lock);
10223 }
10224 
10225 static int
10226 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
10227     struct thread *td)
10228 {
10229 	int rc;
10230 	struct adapter *sc = dev->si_drv1;
10231 
10232 	rc = priv_check(td, PRIV_DRIVER);
10233 	if (rc != 0)
10234 		return (rc);
10235 
10236 	switch (cmd) {
10237 	case CHELSIO_T4_GETREG: {
10238 		struct t4_reg *edata = (struct t4_reg *)data;
10239 
10240 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10241 			return (EFAULT);
10242 
10243 		if (edata->size == 4)
10244 			edata->val = t4_read_reg(sc, edata->addr);
10245 		else if (edata->size == 8)
10246 			edata->val = t4_read_reg64(sc, edata->addr);
10247 		else
10248 			return (EINVAL);
10249 
10250 		break;
10251 	}
10252 	case CHELSIO_T4_SETREG: {
10253 		struct t4_reg *edata = (struct t4_reg *)data;
10254 
10255 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10256 			return (EFAULT);
10257 
10258 		if (edata->size == 4) {
10259 			if (edata->val & 0xffffffff00000000)
10260 				return (EINVAL);
10261 			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
10262 		} else if (edata->size == 8)
10263 			t4_write_reg64(sc, edata->addr, edata->val);
10264 		else
10265 			return (EINVAL);
10266 		break;
10267 	}
10268 	case CHELSIO_T4_REGDUMP: {
10269 		struct t4_regdump *regs = (struct t4_regdump *)data;
10270 		int reglen = t4_get_regs_len(sc);
10271 		uint8_t *buf;
10272 
10273 		if (regs->len < reglen) {
10274 			regs->len = reglen; /* hint to the caller */
10275 			return (ENOBUFS);
10276 		}
10277 
10278 		regs->len = reglen;
10279 		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
10280 		get_regs(sc, regs, buf);
10281 		rc = copyout(buf, regs->data, reglen);
10282 		free(buf, M_CXGBE);
10283 		break;
10284 	}
10285 	case CHELSIO_T4_GET_FILTER_MODE:
10286 		rc = get_filter_mode(sc, (uint32_t *)data);
10287 		break;
10288 	case CHELSIO_T4_SET_FILTER_MODE:
10289 		rc = set_filter_mode(sc, *(uint32_t *)data);
10290 		break;
10291 	case CHELSIO_T4_GET_FILTER:
10292 		rc = get_filter(sc, (struct t4_filter *)data);
10293 		break;
10294 	case CHELSIO_T4_SET_FILTER:
10295 		rc = set_filter(sc, (struct t4_filter *)data);
10296 		break;
10297 	case CHELSIO_T4_DEL_FILTER:
10298 		rc = del_filter(sc, (struct t4_filter *)data);
10299 		break;
10300 	case CHELSIO_T4_GET_SGE_CONTEXT:
10301 		rc = get_sge_context(sc, (struct t4_sge_context *)data);
10302 		break;
10303 	case CHELSIO_T4_LOAD_FW:
10304 		rc = load_fw(sc, (struct t4_data *)data);
10305 		break;
10306 	case CHELSIO_T4_GET_MEM:
10307 		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
10308 		break;
10309 	case CHELSIO_T4_GET_I2C:
10310 		rc = read_i2c(sc, (struct t4_i2c_data *)data);
10311 		break;
10312 	case CHELSIO_T4_CLEAR_STATS:
10313 		rc = clear_stats(sc, *(uint32_t *)data);
10314 		break;
10315 	case CHELSIO_T4_SCHED_CLASS:
10316 		rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
10317 		break;
10318 	case CHELSIO_T4_SCHED_QUEUE:
10319 		rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
10320 		break;
10321 	case CHELSIO_T4_GET_TRACER:
10322 		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
10323 		break;
10324 	case CHELSIO_T4_SET_TRACER:
10325 		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
10326 		break;
10327 	case CHELSIO_T4_LOAD_CFG:
10328 		rc = load_cfg(sc, (struct t4_data *)data);
10329 		break;
10330 	case CHELSIO_T4_LOAD_BOOT:
10331 		rc = load_boot(sc, (struct t4_bootrom *)data);
10332 		break;
10333 	case CHELSIO_T4_LOAD_BOOTCFG:
10334 		rc = load_bootcfg(sc, (struct t4_data *)data);
10335 		break;
10336 	case CHELSIO_T4_CUDBG_DUMP:
10337 		rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
10338 		break;
10339 	case CHELSIO_T4_SET_OFLD_POLICY:
10340 		rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
10341 		break;
10342 	default:
10343 		rc = ENOTTY;
10344 	}
10345 
10346 	return (rc);
10347 }
10348 
10349 #ifdef TCP_OFFLOAD
10350 static int
10351 toe_capability(struct vi_info *vi, int enable)
10352 {
10353 	int rc;
10354 	struct port_info *pi = vi->pi;
10355 	struct adapter *sc = pi->adapter;
10356 
10357 	ASSERT_SYNCHRONIZED_OP(sc);
10358 
10359 	if (!is_offload(sc))
10360 		return (ENODEV);
10361 
10362 	if (enable) {
10363 		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
10364 			/* TOE is already enabled. */
10365 			return (0);
10366 		}
10367 
10368 		/*
10369 		 * We need the port's queues around so that we're able to send
10370 		 * and receive CPLs to/from the TOE even if the ifnet for this
10371 		 * port has never been UP'd administratively.
10372 		 */
10373 		if (!(vi->flags & VI_INIT_DONE)) {
10374 			rc = vi_full_init(vi);
10375 			if (rc)
10376 				return (rc);
10377 		}
10378 		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
10379 			rc = vi_full_init(&pi->vi[0]);
10380 			if (rc)
10381 				return (rc);
10382 		}
10383 
10384 		if (isset(&sc->offload_map, pi->port_id)) {
10385 			/* TOE is enabled on another VI of this port. */
10386 			pi->uld_vis++;
10387 			return (0);
10388 		}
10389 
10390 		if (!uld_active(sc, ULD_TOM)) {
10391 			rc = t4_activate_uld(sc, ULD_TOM);
10392 			if (rc == EAGAIN) {
10393 				log(LOG_WARNING,
10394 				    "You must kldload t4_tom.ko before trying "
10395 				    "to enable TOE on a cxgbe interface.\n");
10396 			}
10397 			if (rc != 0)
10398 				return (rc);
10399 			KASSERT(sc->tom_softc != NULL,
10400 			    ("%s: TOM activated but softc NULL", __func__));
10401 			KASSERT(uld_active(sc, ULD_TOM),
10402 			    ("%s: TOM activated but flag not set", __func__));
10403 		}
10404 
10405 		/* Activate iWARP and iSCSI too, if the modules are loaded. */
10406 		if (!uld_active(sc, ULD_IWARP))
10407 			(void) t4_activate_uld(sc, ULD_IWARP);
10408 		if (!uld_active(sc, ULD_ISCSI))
10409 			(void) t4_activate_uld(sc, ULD_ISCSI);
10410 
10411 		pi->uld_vis++;
10412 		setbit(&sc->offload_map, pi->port_id);
10413 	} else {
10414 		pi->uld_vis--;
10415 
10416 		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
10417 			return (0);
10418 
10419 		KASSERT(uld_active(sc, ULD_TOM),
10420 		    ("%s: TOM never initialized?", __func__));
10421 		clrbit(&sc->offload_map, pi->port_id);
10422 	}
10423 
10424 	return (0);
10425 }
10426 
10427 /*
10428  * Add an upper layer driver to the global list.
10429  */
10430 int
10431 t4_register_uld(struct uld_info *ui)
10432 {
10433 	int rc = 0;
10434 	struct uld_info *u;
10435 
10436 	sx_xlock(&t4_uld_list_lock);
10437 	SLIST_FOREACH(u, &t4_uld_list, link) {
10438 	    if (u->uld_id == ui->uld_id) {
10439 		    rc = EEXIST;
10440 		    goto done;
10441 	    }
10442 	}
10443 
10444 	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
10445 	ui->refcount = 0;
10446 done:
10447 	sx_xunlock(&t4_uld_list_lock);
10448 	return (rc);
10449 }
10450 
10451 int
10452 t4_unregister_uld(struct uld_info *ui)
10453 {
10454 	int rc = EINVAL;
10455 	struct uld_info *u;
10456 
10457 	sx_xlock(&t4_uld_list_lock);
10458 
10459 	SLIST_FOREACH(u, &t4_uld_list, link) {
10460 	    if (u == ui) {
10461 		    if (ui->refcount > 0) {
10462 			    rc = EBUSY;
10463 			    goto done;
10464 		    }
10465 
10466 		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
10467 		    rc = 0;
10468 		    goto done;
10469 	    }
10470 	}
10471 done:
10472 	sx_xunlock(&t4_uld_list_lock);
10473 	return (rc);
10474 }
10475 
10476 int
10477 t4_activate_uld(struct adapter *sc, int id)
10478 {
10479 	int rc;
10480 	struct uld_info *ui;
10481 
10482 	ASSERT_SYNCHRONIZED_OP(sc);
10483 
10484 	if (id < 0 || id > ULD_MAX)
10485 		return (EINVAL);
10486 	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
10487 
10488 	sx_slock(&t4_uld_list_lock);
10489 
10490 	SLIST_FOREACH(ui, &t4_uld_list, link) {
10491 		if (ui->uld_id == id) {
10492 			if (!(sc->flags & FULL_INIT_DONE)) {
10493 				rc = adapter_full_init(sc);
10494 				if (rc != 0)
10495 					break;
10496 			}
10497 
10498 			rc = ui->activate(sc);
10499 			if (rc == 0) {
10500 				setbit(&sc->active_ulds, id);
10501 				ui->refcount++;
10502 			}
10503 			break;
10504 		}
10505 	}
10506 
10507 	sx_sunlock(&t4_uld_list_lock);
10508 
10509 	return (rc);
10510 }
10511 
10512 int
10513 t4_deactivate_uld(struct adapter *sc, int id)
10514 {
10515 	int rc;
10516 	struct uld_info *ui;
10517 
10518 	ASSERT_SYNCHRONIZED_OP(sc);
10519 
10520 	if (id < 0 || id > ULD_MAX)
10521 		return (EINVAL);
10522 	rc = ENXIO;
10523 
10524 	sx_slock(&t4_uld_list_lock);
10525 
10526 	SLIST_FOREACH(ui, &t4_uld_list, link) {
10527 		if (ui->uld_id == id) {
10528 			rc = ui->deactivate(sc);
10529 			if (rc == 0) {
10530 				clrbit(&sc->active_ulds, id);
10531 				ui->refcount--;
10532 			}
10533 			break;
10534 		}
10535 	}
10536 
10537 	sx_sunlock(&t4_uld_list_lock);
10538 
10539 	return (rc);
10540 }
10541 
10542 int
10543 uld_active(struct adapter *sc, int uld_id)
10544 {
10545 
10546 	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
10547 
10548 	return (isset(&sc->active_ulds, uld_id));
10549 }
10550 #endif
10551 
10552 /*
10553  * t  = ptr to tunable.
10554  * nc = number of CPUs.
10555  * c  = compiled in default for that tunable.
10556  */
10557 static void
10558 calculate_nqueues(int *t, int nc, const int c)
10559 {
10560 	int nq;
10561 
10562 	if (*t > 0)
10563 		return;
10564 	nq = *t < 0 ? -*t : c;
10565 	*t = min(nc, nq);
10566 }
10567 
10568 /*
10569  * Come up with reasonable defaults for some of the tunables, provided they're
10570  * not set by the user (in which case we'll use the values as is).
10571  */
10572 static void
10573 tweak_tunables(void)
10574 {
10575 	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
10576 
10577 	if (t4_ntxq < 1) {
10578 #ifdef RSS
10579 		t4_ntxq = rss_getnumbuckets();
10580 #else
10581 		calculate_nqueues(&t4_ntxq, nc, NTXQ);
10582 #endif
10583 	}
10584 
10585 	calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
10586 
10587 	if (t4_nrxq < 1) {
10588 #ifdef RSS
10589 		t4_nrxq = rss_getnumbuckets();
10590 #else
10591 		calculate_nqueues(&t4_nrxq, nc, NRXQ);
10592 #endif
10593 	}
10594 
10595 	calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
10596 
10597 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10598 	calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
10599 	calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
10600 #endif
10601 #ifdef TCP_OFFLOAD
10602 	calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
10603 	calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
10604 
10605 	if (t4_toecaps_allowed == -1)
10606 		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
10607 
10608 	if (t4_rdmacaps_allowed == -1) {
10609 		t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
10610 		    FW_CAPS_CONFIG_RDMA_RDMAC;
10611 	}
10612 
10613 	if (t4_iscsicaps_allowed == -1) {
10614 		t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
10615 		    FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
10616 		    FW_CAPS_CONFIG_ISCSI_T10DIF;
10617 	}
10618 
10619 	if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
10620 		t4_tmr_idx_ofld = TMR_IDX_OFLD;
10621 
10622 	if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
10623 		t4_pktc_idx_ofld = PKTC_IDX_OFLD;
10624 #else
10625 	if (t4_toecaps_allowed == -1)
10626 		t4_toecaps_allowed = 0;
10627 
10628 	if (t4_rdmacaps_allowed == -1)
10629 		t4_rdmacaps_allowed = 0;
10630 
10631 	if (t4_iscsicaps_allowed == -1)
10632 		t4_iscsicaps_allowed = 0;
10633 #endif
10634 
10635 #ifdef DEV_NETMAP
10636 	calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
10637 	calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
10638 #endif
10639 
10640 	if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
10641 		t4_tmr_idx = TMR_IDX;
10642 
10643 	if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
10644 		t4_pktc_idx = PKTC_IDX;
10645 
10646 	if (t4_qsize_txq < 128)
10647 		t4_qsize_txq = 128;
10648 
10649 	if (t4_qsize_rxq < 128)
10650 		t4_qsize_rxq = 128;
10651 	while (t4_qsize_rxq & 7)
10652 		t4_qsize_rxq++;
10653 
10654 	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
10655 
10656 	/*
10657 	 * Number of VIs to create per-port.  The first VI is the "main" regular
10658 	 * VI for the port.  The rest are additional virtual interfaces on the
10659 	 * same physical port.  Note that the main VI does not have native
10660 	 * netmap support but the extra VIs do.
10661 	 *
10662 	 * Limit the number of VIs per port to the number of available
10663 	 * MAC addresses per port.
10664 	 */
10665 	if (t4_num_vis < 1)
10666 		t4_num_vis = 1;
10667 	if (t4_num_vis > nitems(vi_mac_funcs)) {
10668 		t4_num_vis = nitems(vi_mac_funcs);
10669 		printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
10670 	}
10671 
10672 	if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
10673 		pcie_relaxed_ordering = 1;
10674 #if defined(__i386__) || defined(__amd64__)
10675 		if (cpu_vendor_id == CPU_VENDOR_INTEL)
10676 			pcie_relaxed_ordering = 0;
10677 #endif
10678 	}
10679 }
10680 
10681 #ifdef DDB
10682 static void
10683 t4_dump_tcb(struct adapter *sc, int tid)
10684 {
10685 	uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
10686 
10687 	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
10688 	save = t4_read_reg(sc, reg);
10689 	base = sc->memwin[2].mw_base;
10690 
10691 	/* Dump TCB for the tid */
10692 	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
10693 	tcb_addr += tid * TCB_SIZE;
10694 
10695 	if (is_t4(sc)) {
10696 		pf = 0;
10697 		win_pos = tcb_addr & ~0xf;	/* start must be 16B aligned */
10698 	} else {
10699 		pf = V_PFNUM(sc->pf);
10700 		win_pos = tcb_addr & ~0x7f;	/* start must be 128B aligned */
10701 	}
10702 	t4_write_reg(sc, reg, win_pos | pf);
10703 	t4_read_reg(sc, reg);
10704 
10705 	off = tcb_addr - win_pos;
10706 	for (i = 0; i < 4; i++) {
10707 		uint32_t buf[8];
10708 		for (j = 0; j < 8; j++, off += 4)
10709 			buf[j] = htonl(t4_read_reg(sc, base + off));
10710 
10711 		db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
10712 		    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
10713 		    buf[7]);
10714 	}
10715 
10716 	t4_write_reg(sc, reg, save);
10717 	t4_read_reg(sc, reg);
10718 }
10719 
10720 static void
10721 t4_dump_devlog(struct adapter *sc)
10722 {
10723 	struct devlog_params *dparams = &sc->params.devlog;
10724 	struct fw_devlog_e e;
10725 	int i, first, j, m, nentries, rc;
10726 	uint64_t ftstamp = UINT64_MAX;
10727 
10728 	if (dparams->start == 0) {
10729 		db_printf("devlog params not valid\n");
10730 		return;
10731 	}
10732 
10733 	nentries = dparams->size / sizeof(struct fw_devlog_e);
10734 	m = fwmtype_to_hwmtype(dparams->memtype);
10735 
10736 	/* Find the first entry. */
10737 	first = -1;
10738 	for (i = 0; i < nentries && !db_pager_quit; i++) {
10739 		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10740 		    sizeof(e), (void *)&e);
10741 		if (rc != 0)
10742 			break;
10743 
10744 		if (e.timestamp == 0)
10745 			break;
10746 
10747 		e.timestamp = be64toh(e.timestamp);
10748 		if (e.timestamp < ftstamp) {
10749 			ftstamp = e.timestamp;
10750 			first = i;
10751 		}
10752 	}
10753 
10754 	if (first == -1)
10755 		return;
10756 
10757 	i = first;
10758 	do {
10759 		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10760 		    sizeof(e), (void *)&e);
10761 		if (rc != 0)
10762 			return;
10763 
10764 		if (e.timestamp == 0)
10765 			return;
10766 
10767 		e.timestamp = be64toh(e.timestamp);
10768 		e.seqno = be32toh(e.seqno);
10769 		for (j = 0; j < 8; j++)
10770 			e.params[j] = be32toh(e.params[j]);
10771 
10772 		db_printf("%10d  %15ju  %8s  %8s  ",
10773 		    e.seqno, e.timestamp,
10774 		    (e.level < nitems(devlog_level_strings) ?
10775 			devlog_level_strings[e.level] : "UNKNOWN"),
10776 		    (e.facility < nitems(devlog_facility_strings) ?
10777 			devlog_facility_strings[e.facility] : "UNKNOWN"));
10778 		db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
10779 		    e.params[3], e.params[4], e.params[5], e.params[6],
10780 		    e.params[7]);
10781 
10782 		if (++i == nentries)
10783 			i = 0;
10784 	} while (i != first && !db_pager_quit);
10785 }
10786 
10787 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
10788 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
10789 
10790 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
10791 {
10792 	device_t dev;
10793 	int t;
10794 	bool valid;
10795 
10796 	valid = false;
10797 	t = db_read_token();
10798 	if (t == tIDENT) {
10799 		dev = device_lookup_by_name(db_tok_string);
10800 		valid = true;
10801 	}
10802 	db_skip_to_eol();
10803 	if (!valid) {
10804 		db_printf("usage: show t4 devlog <nexus>\n");
10805 		return;
10806 	}
10807 
10808 	if (dev == NULL) {
10809 		db_printf("device not found\n");
10810 		return;
10811 	}
10812 
10813 	t4_dump_devlog(device_get_softc(dev));
10814 }
10815 
10816 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
10817 {
10818 	device_t dev;
10819 	int radix, tid, t;
10820 	bool valid;
10821 
10822 	valid = false;
10823 	radix = db_radix;
10824 	db_radix = 10;
10825 	t = db_read_token();
10826 	if (t == tIDENT) {
10827 		dev = device_lookup_by_name(db_tok_string);
10828 		t = db_read_token();
10829 		if (t == tNUMBER) {
10830 			tid = db_tok_number;
10831 			valid = true;
10832 		}
10833 	}
10834 	db_radix = radix;
10835 	db_skip_to_eol();
10836 	if (!valid) {
10837 		db_printf("usage: show t4 tcb <nexus> <tid>\n");
10838 		return;
10839 	}
10840 
10841 	if (dev == NULL) {
10842 		db_printf("device not found\n");
10843 		return;
10844 	}
10845 	if (tid < 0) {
10846 		db_printf("invalid tid\n");
10847 		return;
10848 	}
10849 
10850 	t4_dump_tcb(device_get_softc(dev), tid);
10851 }
10852 #endif
10853 
10854 /*
10855  * Borrowed from cesa_prep_aes_key().
10856  *
10857  * NB: The crypto engine wants the words in the decryption key in reverse
10858  * order.
10859  */
10860 void
10861 t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits)
10862 {
10863 	uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)];
10864 	uint32_t *dkey;
10865 	int i;
10866 
10867 	rijndaelKeySetupEnc(ek, enc_key, kbits);
10868 	dkey = dec_key;
10869 	dkey += (kbits / 8) / 4;
10870 
10871 	switch (kbits) {
10872 	case 128:
10873 		for (i = 0; i < 4; i++)
10874 			*--dkey = htobe32(ek[4 * 10 + i]);
10875 		break;
10876 	case 192:
10877 		for (i = 0; i < 2; i++)
10878 			*--dkey = htobe32(ek[4 * 11 + 2 + i]);
10879 		for (i = 0; i < 4; i++)
10880 			*--dkey = htobe32(ek[4 * 12 + i]);
10881 		break;
10882 	case 256:
10883 		for (i = 0; i < 4; i++)
10884 			*--dkey = htobe32(ek[4 * 13 + i]);
10885 		for (i = 0; i < 4; i++)
10886 			*--dkey = htobe32(ek[4 * 14 + i]);
10887 		break;
10888 	}
10889 	MPASS(dkey == dec_key);
10890 }
10891 
10892 static struct sx mlu;	/* mod load unload */
10893 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
10894 
10895 static int
10896 mod_event(module_t mod, int cmd, void *arg)
10897 {
10898 	int rc = 0;
10899 	static int loaded = 0;
10900 
10901 	switch (cmd) {
10902 	case MOD_LOAD:
10903 		sx_xlock(&mlu);
10904 		if (loaded++ == 0) {
10905 			t4_sge_modload();
10906 			t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10907 			    t4_filter_rpl, CPL_COOKIE_FILTER);
10908 			t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
10909 			    do_l2t_write_rpl, CPL_COOKIE_FILTER);
10910 			t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
10911 			    t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
10912 			t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10913 			    t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
10914 			t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
10915 			    t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
10916 			t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
10917 			t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
10918 			t4_register_cpl_handler(CPL_SMT_WRITE_RPL,
10919 			    do_smt_write_rpl);
10920 			sx_init(&t4_list_lock, "T4/T5 adapters");
10921 			SLIST_INIT(&t4_list);
10922 			callout_init(&fatal_callout, 1);
10923 #ifdef TCP_OFFLOAD
10924 			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
10925 			SLIST_INIT(&t4_uld_list);
10926 #endif
10927 #ifdef INET6
10928 			t4_clip_modload();
10929 #endif
10930 			t4_tracer_modload();
10931 			tweak_tunables();
10932 		}
10933 		sx_xunlock(&mlu);
10934 		break;
10935 
10936 	case MOD_UNLOAD:
10937 		sx_xlock(&mlu);
10938 		if (--loaded == 0) {
10939 			int tries;
10940 
10941 			sx_slock(&t4_list_lock);
10942 			if (!SLIST_EMPTY(&t4_list)) {
10943 				rc = EBUSY;
10944 				sx_sunlock(&t4_list_lock);
10945 				goto done_unload;
10946 			}
10947 #ifdef TCP_OFFLOAD
10948 			sx_slock(&t4_uld_list_lock);
10949 			if (!SLIST_EMPTY(&t4_uld_list)) {
10950 				rc = EBUSY;
10951 				sx_sunlock(&t4_uld_list_lock);
10952 				sx_sunlock(&t4_list_lock);
10953 				goto done_unload;
10954 			}
10955 #endif
10956 			tries = 0;
10957 			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
10958 				uprintf("%ju clusters with custom free routine "
10959 				    "still is use.\n", t4_sge_extfree_refs());
10960 				pause("t4unload", 2 * hz);
10961 			}
10962 #ifdef TCP_OFFLOAD
10963 			sx_sunlock(&t4_uld_list_lock);
10964 #endif
10965 			sx_sunlock(&t4_list_lock);
10966 
10967 			if (t4_sge_extfree_refs() == 0) {
10968 				t4_tracer_modunload();
10969 #ifdef INET6
10970 				t4_clip_modunload();
10971 #endif
10972 #ifdef TCP_OFFLOAD
10973 				sx_destroy(&t4_uld_list_lock);
10974 #endif
10975 				sx_destroy(&t4_list_lock);
10976 				t4_sge_modunload();
10977 				loaded = 0;
10978 			} else {
10979 				rc = EBUSY;
10980 				loaded++;	/* undo earlier decrement */
10981 			}
10982 		}
10983 done_unload:
10984 		sx_xunlock(&mlu);
10985 		break;
10986 	}
10987 
10988 	return (rc);
10989 }
10990 
10991 static devclass_t t4_devclass, t5_devclass, t6_devclass;
10992 static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
10993 static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
10994 
10995 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
10996 MODULE_VERSION(t4nex, 1);
10997 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
10998 #ifdef DEV_NETMAP
10999 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
11000 #endif /* DEV_NETMAP */
11001 
11002 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
11003 MODULE_VERSION(t5nex, 1);
11004 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
11005 #ifdef DEV_NETMAP
11006 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
11007 #endif /* DEV_NETMAP */
11008 
11009 DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
11010 MODULE_VERSION(t6nex, 1);
11011 MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
11012 #ifdef DEV_NETMAP
11013 MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
11014 #endif /* DEV_NETMAP */
11015 
11016 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
11017 MODULE_VERSION(cxgbe, 1);
11018 
11019 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
11020 MODULE_VERSION(cxl, 1);
11021 
11022 DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
11023 MODULE_VERSION(cc, 1);
11024 
11025 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
11026 MODULE_VERSION(vcxgbe, 1);
11027 
11028 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
11029 MODULE_VERSION(vcxl, 1);
11030 
11031 DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
11032 MODULE_VERSION(vcc, 1);
11033