xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision ddd5b8e9b4d8957fce018c520657cdfa4ecffad3)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_inet.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/bus.h>
39 #include <sys/module.h>
40 #include <sys/pciio.h>
41 #include <sys/conf.h>
42 #include <machine/bus.h>
43 #include <machine/resource.h>
44 #include <sys/bus_dma.h>
45 #include <sys/ktr.h>
46 #include <sys/rman.h>
47 #include <sys/ioccom.h>
48 #include <sys/mbuf.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/queue.h>
57 #include <sys/taskqueue.h>
58 #include <sys/proc.h>
59 
60 #include <net/bpf.h>
61 #include <net/ethernet.h>
62 #include <net/if.h>
63 #include <net/if_arp.h>
64 #include <net/if_dl.h>
65 #include <net/if_media.h>
66 #include <net/if_types.h>
67 #include <net/if_vlan_var.h>
68 
69 #include <netinet/in_systm.h>
70 #include <netinet/in.h>
71 #include <netinet/if_ether.h>
72 #include <netinet/ip.h>
73 #include <netinet/ip.h>
74 #include <netinet/tcp.h>
75 #include <netinet/udp.h>
76 
77 #include <dev/pci/pcireg.h>
78 #include <dev/pci/pcivar.h>
79 #include <dev/pci/pci_private.h>
80 
81 #include <cxgb_include.h>
82 
83 #ifdef PRIV_SUPPORTED
84 #include <sys/priv.h>
85 #endif
86 
87 static int cxgb_setup_interrupts(adapter_t *);
88 static void cxgb_teardown_interrupts(adapter_t *);
89 static void cxgb_init(void *);
90 static int cxgb_init_locked(struct port_info *);
91 static int cxgb_uninit_locked(struct port_info *);
92 static int cxgb_uninit_synchronized(struct port_info *);
93 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
94 static int cxgb_media_change(struct ifnet *);
95 static int cxgb_ifm_type(int);
96 static void cxgb_build_medialist(struct port_info *);
97 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
98 static int setup_sge_qsets(adapter_t *);
99 static void cxgb_async_intr(void *);
100 static void cxgb_tick_handler(void *, int);
101 static void cxgb_tick(void *);
102 static void link_check_callout(void *);
103 static void check_link_status(void *, int);
104 static void setup_rss(adapter_t *sc);
105 static int alloc_filters(struct adapter *);
106 static int setup_hw_filters(struct adapter *);
107 static int set_filter(struct adapter *, int, const struct filter_info *);
108 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
111     unsigned int, u64, u64);
112 #ifdef TCP_OFFLOAD
113 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
114 #endif
115 
116 /* Attachment glue for the PCI controller end of the device.  Each port of
117  * the device is attached separately, as defined later.
118  */
119 static int cxgb_controller_probe(device_t);
120 static int cxgb_controller_attach(device_t);
121 static int cxgb_controller_detach(device_t);
122 static void cxgb_free(struct adapter *);
123 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
124     unsigned int end);
125 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
126 static int cxgb_get_regs_len(void);
127 static void touch_bars(device_t dev);
128 static void cxgb_update_mac_settings(struct port_info *p);
129 #ifdef TCP_OFFLOAD
130 static int toe_capability(struct port_info *, int);
131 #endif
132 
133 static device_method_t cxgb_controller_methods[] = {
134 	DEVMETHOD(device_probe,		cxgb_controller_probe),
135 	DEVMETHOD(device_attach,	cxgb_controller_attach),
136 	DEVMETHOD(device_detach,	cxgb_controller_detach),
137 
138 	DEVMETHOD_END
139 };
140 
141 static driver_t cxgb_controller_driver = {
142 	"cxgbc",
143 	cxgb_controller_methods,
144 	sizeof(struct adapter)
145 };
146 
147 static int cxgbc_mod_event(module_t, int, void *);
148 static devclass_t	cxgb_controller_devclass;
149 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
150     cxgbc_mod_event, 0);
151 MODULE_VERSION(cxgbc, 1);
152 
153 /*
154  * Attachment glue for the ports.  Attachment is done directly to the
155  * controller device.
156  */
157 static int cxgb_port_probe(device_t);
158 static int cxgb_port_attach(device_t);
159 static int cxgb_port_detach(device_t);
160 
161 static device_method_t cxgb_port_methods[] = {
162 	DEVMETHOD(device_probe,		cxgb_port_probe),
163 	DEVMETHOD(device_attach,	cxgb_port_attach),
164 	DEVMETHOD(device_detach,	cxgb_port_detach),
165 	{ 0, 0 }
166 };
167 
168 static driver_t cxgb_port_driver = {
169 	"cxgb",
170 	cxgb_port_methods,
171 	0
172 };
173 
174 static d_ioctl_t cxgb_extension_ioctl;
175 static d_open_t cxgb_extension_open;
176 static d_close_t cxgb_extension_close;
177 
178 static struct cdevsw cxgb_cdevsw = {
179        .d_version =    D_VERSION,
180        .d_flags =      0,
181        .d_open =       cxgb_extension_open,
182        .d_close =      cxgb_extension_close,
183        .d_ioctl =      cxgb_extension_ioctl,
184        .d_name =       "cxgb",
185 };
186 
187 static devclass_t	cxgb_port_devclass;
188 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
189 MODULE_VERSION(cxgb, 1);
190 
191 static struct mtx t3_list_lock;
192 static SLIST_HEAD(, adapter) t3_list;
193 #ifdef TCP_OFFLOAD
194 static struct mtx t3_uld_list_lock;
195 static SLIST_HEAD(, uld_info) t3_uld_list;
196 #endif
197 
198 /*
199  * The driver uses the best interrupt scheme available on a platform in the
200  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
201  * of these schemes the driver may consider as follows:
202  *
203  * msi = 2: choose from among all three options
204  * msi = 1 : only consider MSI and pin interrupts
205  * msi = 0: force pin interrupts
206  */
207 static int msi_allowed = 2;
208 
209 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
210 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
211 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
212     "MSI-X, MSI, INTx selector");
213 
214 /*
215  * The driver uses an auto-queue algorithm by default.
216  * To disable it and force a single queue-set per port, use multiq = 0
217  */
218 static int multiq = 1;
219 TUNABLE_INT("hw.cxgb.multiq", &multiq);
220 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
221     "use min(ncpus/ports, 8) queue-sets per port");
222 
223 /*
224  * By default the driver will not update the firmware unless
225  * it was compiled against a newer version
226  *
227  */
228 static int force_fw_update = 0;
229 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
230 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
231     "update firmware even if up to date");
232 
233 int cxgb_use_16k_clusters = -1;
234 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
235 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
236     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
237 
238 static int nfilters = -1;
239 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
240 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
241     &nfilters, 0, "max number of entries in the filter table");
242 
243 enum {
244 	MAX_TXQ_ENTRIES      = 16384,
245 	MAX_CTRL_TXQ_ENTRIES = 1024,
246 	MAX_RSPQ_ENTRIES     = 16384,
247 	MAX_RX_BUFFERS       = 16384,
248 	MAX_RX_JUMBO_BUFFERS = 16384,
249 	MIN_TXQ_ENTRIES      = 4,
250 	MIN_CTRL_TXQ_ENTRIES = 4,
251 	MIN_RSPQ_ENTRIES     = 32,
252 	MIN_FL_ENTRIES       = 32,
253 	MIN_FL_JUMBO_ENTRIES = 32
254 };
255 
256 struct filter_info {
257 	u32 sip;
258 	u32 sip_mask;
259 	u32 dip;
260 	u16 sport;
261 	u16 dport;
262 	u32 vlan:12;
263 	u32 vlan_prio:3;
264 	u32 mac_hit:1;
265 	u32 mac_idx:4;
266 	u32 mac_vld:1;
267 	u32 pkt_type:2;
268 	u32 report_filter_id:1;
269 	u32 pass:1;
270 	u32 rss:1;
271 	u32 qset:3;
272 	u32 locked:1;
273 	u32 valid:1;
274 };
275 
276 enum { FILTER_NO_VLAN_PRI = 7 };
277 
278 #define EEPROM_MAGIC 0x38E2F10C
279 
280 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
281 
282 /* Table for probing the cards.  The desc field isn't actually used */
283 struct cxgb_ident {
284 	uint16_t	vendor;
285 	uint16_t	device;
286 	int		index;
287 	char		*desc;
288 } cxgb_identifiers[] = {
289 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
290 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
291 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
303 	{0, 0, 0, NULL}
304 };
305 
306 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
307 
308 
309 static __inline char
310 t3rev2char(struct adapter *adapter)
311 {
312 	char rev = 'z';
313 
314 	switch(adapter->params.rev) {
315 	case T3_REV_A:
316 		rev = 'a';
317 		break;
318 	case T3_REV_B:
319 	case T3_REV_B2:
320 		rev = 'b';
321 		break;
322 	case T3_REV_C:
323 		rev = 'c';
324 		break;
325 	}
326 	return rev;
327 }
328 
329 static struct cxgb_ident *
330 cxgb_get_ident(device_t dev)
331 {
332 	struct cxgb_ident *id;
333 
334 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
335 		if ((id->vendor == pci_get_vendor(dev)) &&
336 		    (id->device == pci_get_device(dev))) {
337 			return (id);
338 		}
339 	}
340 	return (NULL);
341 }
342 
343 static const struct adapter_info *
344 cxgb_get_adapter_info(device_t dev)
345 {
346 	struct cxgb_ident *id;
347 	const struct adapter_info *ai;
348 
349 	id = cxgb_get_ident(dev);
350 	if (id == NULL)
351 		return (NULL);
352 
353 	ai = t3_get_adapter_info(id->index);
354 
355 	return (ai);
356 }
357 
358 static int
359 cxgb_controller_probe(device_t dev)
360 {
361 	const struct adapter_info *ai;
362 	char *ports, buf[80];
363 	int nports;
364 
365 	ai = cxgb_get_adapter_info(dev);
366 	if (ai == NULL)
367 		return (ENXIO);
368 
369 	nports = ai->nports0 + ai->nports1;
370 	if (nports == 1)
371 		ports = "port";
372 	else
373 		ports = "ports";
374 
375 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
376 	device_set_desc_copy(dev, buf);
377 	return (BUS_PROBE_DEFAULT);
378 }
379 
380 #define FW_FNAME "cxgb_t3fw"
381 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
382 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
383 
384 static int
385 upgrade_fw(adapter_t *sc)
386 {
387 	const struct firmware *fw;
388 	int status;
389 	u32 vers;
390 
391 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
392 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
393 		return (ENOENT);
394 	} else
395 		device_printf(sc->dev, "installing firmware on card\n");
396 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
397 
398 	if (status != 0) {
399 		device_printf(sc->dev, "failed to install firmware: %d\n",
400 		    status);
401 	} else {
402 		t3_get_fw_version(sc, &vers);
403 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
404 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
405 		    G_FW_VERSION_MICRO(vers));
406 	}
407 
408 	firmware_put(fw, FIRMWARE_UNLOAD);
409 
410 	return (status);
411 }
412 
413 /*
414  * The cxgb_controller_attach function is responsible for the initial
415  * bringup of the device.  Its responsibilities include:
416  *
417  *  1. Determine if the device supports MSI or MSI-X.
418  *  2. Allocate bus resources so that we can access the Base Address Register
419  *  3. Create and initialize mutexes for the controller and its control
420  *     logic such as SGE and MDIO.
421  *  4. Call hardware specific setup routine for the adapter as a whole.
422  *  5. Allocate the BAR for doing MSI-X.
423  *  6. Setup the line interrupt iff MSI-X is not supported.
424  *  7. Create the driver's taskq.
425  *  8. Start one task queue service thread.
426  *  9. Check if the firmware and SRAM are up-to-date.  They will be
427  *     auto-updated later (before FULL_INIT_DONE), if required.
428  * 10. Create a child device for each MAC (port)
429  * 11. Initialize T3 private state.
430  * 12. Trigger the LED
431  * 13. Setup offload iff supported.
432  * 14. Reset/restart the tick callout.
433  * 15. Attach sysctls
434  *
435  * NOTE: Any modification or deviation from this list MUST be reflected in
436  * the above comment.  Failure to do so will result in problems on various
437  * error conditions including link flapping.
438  */
439 static int
440 cxgb_controller_attach(device_t dev)
441 {
442 	device_t child;
443 	const struct adapter_info *ai;
444 	struct adapter *sc;
445 	int i, error = 0;
446 	uint32_t vers;
447 	int port_qsets = 1;
448 	int msi_needed, reg;
449 	char buf[80];
450 
451 	sc = device_get_softc(dev);
452 	sc->dev = dev;
453 	sc->msi_count = 0;
454 	ai = cxgb_get_adapter_info(dev);
455 
456 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
457 	    device_get_unit(dev));
458 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
459 
460 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
461 	    device_get_unit(dev));
462 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
463 	    device_get_unit(dev));
464 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
465 	    device_get_unit(dev));
466 
467 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
468 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
469 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
470 
471 	mtx_lock(&t3_list_lock);
472 	SLIST_INSERT_HEAD(&t3_list, sc, link);
473 	mtx_unlock(&t3_list_lock);
474 
475 	/* find the PCIe link width and set max read request to 4KB*/
476 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
477 		uint16_t lnk;
478 
479 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
480 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
481 		if (sc->link_width < 8 &&
482 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
483 			device_printf(sc->dev,
484 			    "PCIe x%d Link, expect reduced performance\n",
485 			    sc->link_width);
486 		}
487 
488 		pci_set_max_read_req(dev, 4096);
489 	}
490 
491 	touch_bars(dev);
492 	pci_enable_busmaster(dev);
493 	/*
494 	 * Allocate the registers and make them available to the driver.
495 	 * The registers that we care about for NIC mode are in BAR 0
496 	 */
497 	sc->regs_rid = PCIR_BAR(0);
498 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
499 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
500 		device_printf(dev, "Cannot allocate BAR region 0\n");
501 		error = ENXIO;
502 		goto out;
503 	}
504 
505 	sc->bt = rman_get_bustag(sc->regs_res);
506 	sc->bh = rman_get_bushandle(sc->regs_res);
507 	sc->mmio_len = rman_get_size(sc->regs_res);
508 
509 	for (i = 0; i < MAX_NPORTS; i++)
510 		sc->port[i].adapter = sc;
511 
512 	if (t3_prep_adapter(sc, ai, 1) < 0) {
513 		printf("prep adapter failed\n");
514 		error = ENODEV;
515 		goto out;
516 	}
517 
518 	sc->udbs_rid = PCIR_BAR(2);
519 	sc->udbs_res = NULL;
520 	if (is_offload(sc) &&
521 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
522 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
523 		device_printf(dev, "Cannot allocate BAR region 1\n");
524 		error = ENXIO;
525 		goto out;
526 	}
527 
528         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
529 	 * enough messages for the queue sets.  If that fails, try falling
530 	 * back to MSI.  If that fails, then try falling back to the legacy
531 	 * interrupt pin model.
532 	 */
533 	sc->msix_regs_rid = 0x20;
534 	if ((msi_allowed >= 2) &&
535 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
536 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
537 
538 		if (multiq)
539 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
540 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
541 
542 		if (pci_msix_count(dev) == 0 ||
543 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
544 		    sc->msi_count != msi_needed) {
545 			device_printf(dev, "alloc msix failed - "
546 				      "msi_count=%d, msi_needed=%d, err=%d; "
547 				      "will try MSI\n", sc->msi_count,
548 				      msi_needed, error);
549 			sc->msi_count = 0;
550 			port_qsets = 1;
551 			pci_release_msi(dev);
552 			bus_release_resource(dev, SYS_RES_MEMORY,
553 			    sc->msix_regs_rid, sc->msix_regs_res);
554 			sc->msix_regs_res = NULL;
555 		} else {
556 			sc->flags |= USING_MSIX;
557 			sc->cxgb_intr = cxgb_async_intr;
558 			device_printf(dev,
559 				      "using MSI-X interrupts (%u vectors)\n",
560 				      sc->msi_count);
561 		}
562 	}
563 
564 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
565 		sc->msi_count = 1;
566 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
567 			device_printf(dev, "alloc msi failed - "
568 				      "err=%d; will try INTx\n", error);
569 			sc->msi_count = 0;
570 			port_qsets = 1;
571 			pci_release_msi(dev);
572 		} else {
573 			sc->flags |= USING_MSI;
574 			sc->cxgb_intr = t3_intr_msi;
575 			device_printf(dev, "using MSI interrupts\n");
576 		}
577 	}
578 	if (sc->msi_count == 0) {
579 		device_printf(dev, "using line interrupts\n");
580 		sc->cxgb_intr = t3b_intr;
581 	}
582 
583 	/* Create a private taskqueue thread for handling driver events */
584 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
585 	    taskqueue_thread_enqueue, &sc->tq);
586 	if (sc->tq == NULL) {
587 		device_printf(dev, "failed to allocate controller task queue\n");
588 		goto out;
589 	}
590 
591 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
592 	    device_get_nameunit(dev));
593 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
594 
595 
596 	/* Create a periodic callout for checking adapter status */
597 	callout_init(&sc->cxgb_tick_ch, TRUE);
598 
599 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
600 		/*
601 		 * Warn user that a firmware update will be attempted in init.
602 		 */
603 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
604 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
605 		sc->flags &= ~FW_UPTODATE;
606 	} else {
607 		sc->flags |= FW_UPTODATE;
608 	}
609 
610 	if (t3_check_tpsram_version(sc) < 0) {
611 		/*
612 		 * Warn user that a firmware update will be attempted in init.
613 		 */
614 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
615 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
616 		sc->flags &= ~TPS_UPTODATE;
617 	} else {
618 		sc->flags |= TPS_UPTODATE;
619 	}
620 
621 	/*
622 	 * Create a child device for each MAC.  The ethernet attachment
623 	 * will be done in these children.
624 	 */
625 	for (i = 0; i < (sc)->params.nports; i++) {
626 		struct port_info *pi;
627 
628 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
629 			device_printf(dev, "failed to add child port\n");
630 			error = EINVAL;
631 			goto out;
632 		}
633 		pi = &sc->port[i];
634 		pi->adapter = sc;
635 		pi->nqsets = port_qsets;
636 		pi->first_qset = i*port_qsets;
637 		pi->port_id = i;
638 		pi->tx_chan = i >= ai->nports0;
639 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
640 		sc->rxpkt_map[pi->txpkt_intf] = i;
641 		sc->port[i].tx_chan = i >= ai->nports0;
642 		sc->portdev[i] = child;
643 		device_set_softc(child, pi);
644 	}
645 	if ((error = bus_generic_attach(dev)) != 0)
646 		goto out;
647 
648 	/* initialize sge private state */
649 	t3_sge_init_adapter(sc);
650 
651 	t3_led_ready(sc);
652 
653 	error = t3_get_fw_version(sc, &vers);
654 	if (error)
655 		goto out;
656 
657 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
658 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
659 	    G_FW_VERSION_MICRO(vers));
660 
661 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
662 		 ai->desc, is_offload(sc) ? "R" : "",
663 		 sc->params.vpd.ec, sc->params.vpd.sn);
664 	device_set_desc_copy(dev, buf);
665 
666 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
667 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
668 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
669 
670 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
671 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
672 	t3_add_attach_sysctls(sc);
673 
674 #ifdef TCP_OFFLOAD
675 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
676 		sc->cpl_handler[i] = cpl_not_handled;
677 #endif
678 
679 	t3_intr_clear(sc);
680 	error = cxgb_setup_interrupts(sc);
681 out:
682 	if (error)
683 		cxgb_free(sc);
684 
685 	return (error);
686 }
687 
688 /*
689  * The cxgb_controller_detach routine is called with the device is
690  * unloaded from the system.
691  */
692 
693 static int
694 cxgb_controller_detach(device_t dev)
695 {
696 	struct adapter *sc;
697 
698 	sc = device_get_softc(dev);
699 
700 	cxgb_free(sc);
701 
702 	return (0);
703 }
704 
705 /*
706  * The cxgb_free() is called by the cxgb_controller_detach() routine
707  * to tear down the structures that were built up in
708  * cxgb_controller_attach(), and should be the final piece of work
709  * done when fully unloading the driver.
710  *
711  *
712  *  1. Shutting down the threads started by the cxgb_controller_attach()
713  *     routine.
714  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
715  *  3. Detaching all of the port devices created during the
716  *     cxgb_controller_attach() routine.
717  *  4. Removing the device children created via cxgb_controller_attach().
718  *  5. Releasing PCI resources associated with the device.
719  *  6. Turning off the offload support, iff it was turned on.
720  *  7. Destroying the mutexes created in cxgb_controller_attach().
721  *
722  */
723 static void
724 cxgb_free(struct adapter *sc)
725 {
726 	int i, nqsets = 0;
727 
728 	ADAPTER_LOCK(sc);
729 	sc->flags |= CXGB_SHUTDOWN;
730 	ADAPTER_UNLOCK(sc);
731 
732 	/*
733 	 * Make sure all child devices are gone.
734 	 */
735 	bus_generic_detach(sc->dev);
736 	for (i = 0; i < (sc)->params.nports; i++) {
737 		if (sc->portdev[i] &&
738 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
739 			device_printf(sc->dev, "failed to delete child port\n");
740 		nqsets += sc->port[i].nqsets;
741 	}
742 
743 	/*
744 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
745 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
746 	 * all open devices have been closed.
747 	 */
748 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
749 					   __func__, sc->open_device_map));
750 	for (i = 0; i < sc->params.nports; i++) {
751 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
752 						  __func__, i));
753 	}
754 
755 	/*
756 	 * Finish off the adapter's callouts.
757 	 */
758 	callout_drain(&sc->cxgb_tick_ch);
759 	callout_drain(&sc->sge_timer_ch);
760 
761 	/*
762 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
763 	 * sysctls are cleaned up by the kernel linker.
764 	 */
765 	if (sc->flags & FULL_INIT_DONE) {
766  		t3_free_sge_resources(sc, nqsets);
767  		sc->flags &= ~FULL_INIT_DONE;
768  	}
769 
770 	/*
771 	 * Release all interrupt resources.
772 	 */
773 	cxgb_teardown_interrupts(sc);
774 	if (sc->flags & (USING_MSI | USING_MSIX)) {
775 		device_printf(sc->dev, "releasing msi message(s)\n");
776 		pci_release_msi(sc->dev);
777 	} else {
778 		device_printf(sc->dev, "no msi message to release\n");
779 	}
780 
781 	if (sc->msix_regs_res != NULL) {
782 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
783 		    sc->msix_regs_res);
784 	}
785 
786 	/*
787 	 * Free the adapter's taskqueue.
788 	 */
789 	if (sc->tq != NULL) {
790 		taskqueue_free(sc->tq);
791 		sc->tq = NULL;
792 	}
793 
794 	free(sc->filters, M_DEVBUF);
795 	t3_sge_free(sc);
796 
797 	if (sc->udbs_res != NULL)
798 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
799 		    sc->udbs_res);
800 
801 	if (sc->regs_res != NULL)
802 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
803 		    sc->regs_res);
804 
805 	MTX_DESTROY(&sc->mdio_lock);
806 	MTX_DESTROY(&sc->sge.reg_lock);
807 	MTX_DESTROY(&sc->elmer_lock);
808 	mtx_lock(&t3_list_lock);
809 	SLIST_REMOVE(&t3_list, sc, adapter, link);
810 	mtx_unlock(&t3_list_lock);
811 	ADAPTER_LOCK_DEINIT(sc);
812 }
813 
814 /**
815  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
816  *	@sc: the controller softc
817  *
818  *	Determines how many sets of SGE queues to use and initializes them.
819  *	We support multiple queue sets per port if we have MSI-X, otherwise
820  *	just one queue set per port.
821  */
822 static int
823 setup_sge_qsets(adapter_t *sc)
824 {
825 	int i, j, err, irq_idx = 0, qset_idx = 0;
826 	u_int ntxq = SGE_TXQ_PER_SET;
827 
828 	if ((err = t3_sge_alloc(sc)) != 0) {
829 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
830 		return (err);
831 	}
832 
833 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
834 		irq_idx = -1;
835 
836 	for (i = 0; i < (sc)->params.nports; i++) {
837 		struct port_info *pi = &sc->port[i];
838 
839 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
840 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
841 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
842 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
843 			if (err) {
844 				t3_free_sge_resources(sc, qset_idx);
845 				device_printf(sc->dev,
846 				    "t3_sge_alloc_qset failed with %d\n", err);
847 				return (err);
848 			}
849 		}
850 	}
851 
852 	return (0);
853 }
854 
855 static void
856 cxgb_teardown_interrupts(adapter_t *sc)
857 {
858 	int i;
859 
860 	for (i = 0; i < SGE_QSETS; i++) {
861 		if (sc->msix_intr_tag[i] == NULL) {
862 
863 			/* Should have been setup fully or not at all */
864 			KASSERT(sc->msix_irq_res[i] == NULL &&
865 				sc->msix_irq_rid[i] == 0,
866 				("%s: half-done interrupt (%d).", __func__, i));
867 
868 			continue;
869 		}
870 
871 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
872 				  sc->msix_intr_tag[i]);
873 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
874 				     sc->msix_irq_res[i]);
875 
876 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
877 		sc->msix_irq_rid[i] = 0;
878 	}
879 
880 	if (sc->intr_tag) {
881 		KASSERT(sc->irq_res != NULL,
882 			("%s: half-done interrupt.", __func__));
883 
884 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
885 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
886 				     sc->irq_res);
887 
888 		sc->irq_res = sc->intr_tag = NULL;
889 		sc->irq_rid = 0;
890 	}
891 }
892 
893 static int
894 cxgb_setup_interrupts(adapter_t *sc)
895 {
896 	struct resource *res;
897 	void *tag;
898 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
899 
900 	sc->irq_rid = intr_flag ? 1 : 0;
901 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
902 					     RF_SHAREABLE | RF_ACTIVE);
903 	if (sc->irq_res == NULL) {
904 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
905 			      intr_flag, sc->irq_rid);
906 		err = EINVAL;
907 		sc->irq_rid = 0;
908 	} else {
909 		err = bus_setup_intr(sc->dev, sc->irq_res,
910 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
911 		    sc->cxgb_intr, sc, &sc->intr_tag);
912 
913 		if (err) {
914 			device_printf(sc->dev,
915 				      "Cannot set up interrupt (%x, %u, %d)\n",
916 				      intr_flag, sc->irq_rid, err);
917 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
918 					     sc->irq_res);
919 			sc->irq_res = sc->intr_tag = NULL;
920 			sc->irq_rid = 0;
921 		}
922 	}
923 
924 	/* That's all for INTx or MSI */
925 	if (!(intr_flag & USING_MSIX) || err)
926 		return (err);
927 
928 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
929 	for (i = 0; i < sc->msi_count - 1; i++) {
930 		rid = i + 2;
931 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
932 					     RF_SHAREABLE | RF_ACTIVE);
933 		if (res == NULL) {
934 			device_printf(sc->dev, "Cannot allocate interrupt "
935 				      "for message %d\n", rid);
936 			err = EINVAL;
937 			break;
938 		}
939 
940 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
941 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
942 		if (err) {
943 			device_printf(sc->dev, "Cannot set up interrupt "
944 				      "for message %d (%d)\n", rid, err);
945 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
946 			break;
947 		}
948 
949 		sc->msix_irq_rid[i] = rid;
950 		sc->msix_irq_res[i] = res;
951 		sc->msix_intr_tag[i] = tag;
952 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
953 	}
954 
955 	if (err)
956 		cxgb_teardown_interrupts(sc);
957 
958 	return (err);
959 }
960 
961 
962 static int
963 cxgb_port_probe(device_t dev)
964 {
965 	struct port_info *p;
966 	char buf[80];
967 	const char *desc;
968 
969 	p = device_get_softc(dev);
970 	desc = p->phy.desc;
971 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
972 	device_set_desc_copy(dev, buf);
973 	return (0);
974 }
975 
976 
977 static int
978 cxgb_makedev(struct port_info *pi)
979 {
980 
981 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
982 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
983 
984 	if (pi->port_cdev == NULL)
985 		return (ENOMEM);
986 
987 	pi->port_cdev->si_drv1 = (void *)pi;
988 
989 	return (0);
990 }
991 
992 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
993     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
994     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
995 #define CXGB_CAP_ENABLE CXGB_CAP
996 
997 static int
998 cxgb_port_attach(device_t dev)
999 {
1000 	struct port_info *p;
1001 	struct ifnet *ifp;
1002 	int err;
1003 	struct adapter *sc;
1004 
1005 	p = device_get_softc(dev);
1006 	sc = p->adapter;
1007 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1008 	    device_get_unit(device_get_parent(dev)), p->port_id);
1009 	PORT_LOCK_INIT(p, p->lockbuf);
1010 
1011 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1012 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1013 
1014 	/* Allocate an ifnet object and set it up */
1015 	ifp = p->ifp = if_alloc(IFT_ETHER);
1016 	if (ifp == NULL) {
1017 		device_printf(dev, "Cannot allocate ifnet\n");
1018 		return (ENOMEM);
1019 	}
1020 
1021 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1022 	ifp->if_init = cxgb_init;
1023 	ifp->if_softc = p;
1024 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1025 	ifp->if_ioctl = cxgb_ioctl;
1026 	ifp->if_transmit = cxgb_transmit;
1027 	ifp->if_qflush = cxgb_qflush;
1028 
1029 	ifp->if_capabilities = CXGB_CAP;
1030 #ifdef TCP_OFFLOAD
1031 	if (is_offload(sc))
1032 		ifp->if_capabilities |= IFCAP_TOE4;
1033 #endif
1034 	ifp->if_capenable = CXGB_CAP_ENABLE;
1035 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1036 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1037 
1038 	/*
1039 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1040 	 */
1041 	if (sc->params.nports > 2) {
1042 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1043 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1044 		ifp->if_hwassist &= ~CSUM_TSO;
1045 	}
1046 
1047 	ether_ifattach(ifp, p->hw_addr);
1048 
1049 #ifdef DEFAULT_JUMBO
1050 	if (sc->params.nports <= 2)
1051 		ifp->if_mtu = ETHERMTU_JUMBO;
1052 #endif
1053 	if ((err = cxgb_makedev(p)) != 0) {
1054 		printf("makedev failed %d\n", err);
1055 		return (err);
1056 	}
1057 
1058 	/* Create a list of media supported by this port */
1059 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1060 	    cxgb_media_status);
1061 	cxgb_build_medialist(p);
1062 
1063 	t3_sge_init_port(p);
1064 
1065 	return (err);
1066 }
1067 
1068 /*
1069  * cxgb_port_detach() is called via the device_detach methods when
1070  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1071  * removing the device from the view of the kernel, i.e. from all
1072  * interfaces lists etc.  This routine is only called when the driver is
1073  * being unloaded, not when the link goes down.
1074  */
1075 static int
1076 cxgb_port_detach(device_t dev)
1077 {
1078 	struct port_info *p;
1079 	struct adapter *sc;
1080 	int i;
1081 
1082 	p = device_get_softc(dev);
1083 	sc = p->adapter;
1084 
1085 	/* Tell cxgb_ioctl and if_init that the port is going away */
1086 	ADAPTER_LOCK(sc);
1087 	SET_DOOMED(p);
1088 	wakeup(&sc->flags);
1089 	while (IS_BUSY(sc))
1090 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1091 	SET_BUSY(sc);
1092 	ADAPTER_UNLOCK(sc);
1093 
1094 	if (p->port_cdev != NULL)
1095 		destroy_dev(p->port_cdev);
1096 
1097 	cxgb_uninit_synchronized(p);
1098 	ether_ifdetach(p->ifp);
1099 
1100 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1101 		struct sge_qset *qs = &sc->sge.qs[i];
1102 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1103 
1104 		callout_drain(&txq->txq_watchdog);
1105 		callout_drain(&txq->txq_timer);
1106 	}
1107 
1108 	PORT_LOCK_DEINIT(p);
1109 	if_free(p->ifp);
1110 	p->ifp = NULL;
1111 
1112 	ADAPTER_LOCK(sc);
1113 	CLR_BUSY(sc);
1114 	wakeup_one(&sc->flags);
1115 	ADAPTER_UNLOCK(sc);
1116 	return (0);
1117 }
1118 
1119 void
1120 t3_fatal_err(struct adapter *sc)
1121 {
1122 	u_int fw_status[4];
1123 
1124 	if (sc->flags & FULL_INIT_DONE) {
1125 		t3_sge_stop(sc);
1126 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1127 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1128 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1129 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1130 		t3_intr_disable(sc);
1131 	}
1132 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1133 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1134 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1135 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1136 }
1137 
1138 int
1139 t3_os_find_pci_capability(adapter_t *sc, int cap)
1140 {
1141 	device_t dev;
1142 	struct pci_devinfo *dinfo;
1143 	pcicfgregs *cfg;
1144 	uint32_t status;
1145 	uint8_t ptr;
1146 
1147 	dev = sc->dev;
1148 	dinfo = device_get_ivars(dev);
1149 	cfg = &dinfo->cfg;
1150 
1151 	status = pci_read_config(dev, PCIR_STATUS, 2);
1152 	if (!(status & PCIM_STATUS_CAPPRESENT))
1153 		return (0);
1154 
1155 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1156 	case 0:
1157 	case 1:
1158 		ptr = PCIR_CAP_PTR;
1159 		break;
1160 	case 2:
1161 		ptr = PCIR_CAP_PTR_2;
1162 		break;
1163 	default:
1164 		return (0);
1165 		break;
1166 	}
1167 	ptr = pci_read_config(dev, ptr, 1);
1168 
1169 	while (ptr != 0) {
1170 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1171 			return (ptr);
1172 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1173 	}
1174 
1175 	return (0);
1176 }
1177 
1178 int
1179 t3_os_pci_save_state(struct adapter *sc)
1180 {
1181 	device_t dev;
1182 	struct pci_devinfo *dinfo;
1183 
1184 	dev = sc->dev;
1185 	dinfo = device_get_ivars(dev);
1186 
1187 	pci_cfg_save(dev, dinfo, 0);
1188 	return (0);
1189 }
1190 
1191 int
1192 t3_os_pci_restore_state(struct adapter *sc)
1193 {
1194 	device_t dev;
1195 	struct pci_devinfo *dinfo;
1196 
1197 	dev = sc->dev;
1198 	dinfo = device_get_ivars(dev);
1199 
1200 	pci_cfg_restore(dev, dinfo);
1201 	return (0);
1202 }
1203 
1204 /**
1205  *	t3_os_link_changed - handle link status changes
1206  *	@sc: the adapter associated with the link change
1207  *	@port_id: the port index whose link status has changed
1208  *	@link_status: the new status of the link
1209  *	@speed: the new speed setting
1210  *	@duplex: the new duplex setting
1211  *	@fc: the new flow-control setting
1212  *
1213  *	This is the OS-dependent handler for link status changes.  The OS
1214  *	neutral handler takes care of most of the processing for these events,
1215  *	then calls this handler for any OS-specific processing.
1216  */
1217 void
1218 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1219      int duplex, int fc, int mac_was_reset)
1220 {
1221 	struct port_info *pi = &adapter->port[port_id];
1222 	struct ifnet *ifp = pi->ifp;
1223 
1224 	/* no race with detach, so ifp should always be good */
1225 	KASSERT(ifp, ("%s: if detached.", __func__));
1226 
1227 	/* Reapply mac settings if they were lost due to a reset */
1228 	if (mac_was_reset) {
1229 		PORT_LOCK(pi);
1230 		cxgb_update_mac_settings(pi);
1231 		PORT_UNLOCK(pi);
1232 	}
1233 
1234 	if (link_status) {
1235 		ifp->if_baudrate = IF_Mbps(speed);
1236 		if_link_state_change(ifp, LINK_STATE_UP);
1237 	} else
1238 		if_link_state_change(ifp, LINK_STATE_DOWN);
1239 }
1240 
1241 /**
1242  *	t3_os_phymod_changed - handle PHY module changes
1243  *	@phy: the PHY reporting the module change
1244  *	@mod_type: new module type
1245  *
1246  *	This is the OS-dependent handler for PHY module changes.  It is
1247  *	invoked when a PHY module is removed or inserted for any OS-specific
1248  *	processing.
1249  */
1250 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1251 {
1252 	static const char *mod_str[] = {
1253 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1254 	};
1255 	struct port_info *pi = &adap->port[port_id];
1256 	int mod = pi->phy.modtype;
1257 
1258 	if (mod != pi->media.ifm_cur->ifm_data)
1259 		cxgb_build_medialist(pi);
1260 
1261 	if (mod == phy_modtype_none)
1262 		if_printf(pi->ifp, "PHY module unplugged\n");
1263 	else {
1264 		KASSERT(mod < ARRAY_SIZE(mod_str),
1265 			("invalid PHY module type %d", mod));
1266 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1267 	}
1268 }
1269 
1270 void
1271 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1272 {
1273 
1274 	/*
1275 	 * The ifnet might not be allocated before this gets called,
1276 	 * as this is called early on in attach by t3_prep_adapter
1277 	 * save the address off in the port structure
1278 	 */
1279 	if (cxgb_debug)
1280 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1281 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1282 }
1283 
1284 /*
1285  * Programs the XGMAC based on the settings in the ifnet.  These settings
1286  * include MTU, MAC address, mcast addresses, etc.
1287  */
1288 static void
1289 cxgb_update_mac_settings(struct port_info *p)
1290 {
1291 	struct ifnet *ifp = p->ifp;
1292 	struct t3_rx_mode rm;
1293 	struct cmac *mac = &p->mac;
1294 	int mtu, hwtagging;
1295 
1296 	PORT_LOCK_ASSERT_OWNED(p);
1297 
1298 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1299 
1300 	mtu = ifp->if_mtu;
1301 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1302 		mtu += ETHER_VLAN_ENCAP_LEN;
1303 
1304 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1305 
1306 	t3_mac_set_mtu(mac, mtu);
1307 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1308 	t3_mac_set_address(mac, 0, p->hw_addr);
1309 	t3_init_rx_mode(&rm, p);
1310 	t3_mac_set_rx_mode(mac, &rm);
1311 }
1312 
1313 
1314 static int
1315 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1316 			      unsigned long n)
1317 {
1318 	int attempts = 5;
1319 
1320 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1321 		if (!--attempts)
1322 			return (ETIMEDOUT);
1323 		t3_os_sleep(10);
1324 	}
1325 	return 0;
1326 }
1327 
1328 static int
1329 init_tp_parity(struct adapter *adap)
1330 {
1331 	int i;
1332 	struct mbuf *m;
1333 	struct cpl_set_tcb_field *greq;
1334 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1335 
1336 	t3_tp_set_offload_mode(adap, 1);
1337 
1338 	for (i = 0; i < 16; i++) {
1339 		struct cpl_smt_write_req *req;
1340 
1341 		m = m_gethdr(M_WAITOK, MT_DATA);
1342 		req = mtod(m, struct cpl_smt_write_req *);
1343 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1344 		memset(req, 0, sizeof(*req));
1345 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1346 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1347 		req->iff = i;
1348 		t3_mgmt_tx(adap, m);
1349 	}
1350 
1351 	for (i = 0; i < 2048; i++) {
1352 		struct cpl_l2t_write_req *req;
1353 
1354 		m = m_gethdr(M_WAITOK, MT_DATA);
1355 		req = mtod(m, struct cpl_l2t_write_req *);
1356 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1357 		memset(req, 0, sizeof(*req));
1358 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1359 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1360 		req->params = htonl(V_L2T_W_IDX(i));
1361 		t3_mgmt_tx(adap, m);
1362 	}
1363 
1364 	for (i = 0; i < 2048; i++) {
1365 		struct cpl_rte_write_req *req;
1366 
1367 		m = m_gethdr(M_WAITOK, MT_DATA);
1368 		req = mtod(m, struct cpl_rte_write_req *);
1369 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1370 		memset(req, 0, sizeof(*req));
1371 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1372 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1373 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1374 		t3_mgmt_tx(adap, m);
1375 	}
1376 
1377 	m = m_gethdr(M_WAITOK, MT_DATA);
1378 	greq = mtod(m, struct cpl_set_tcb_field *);
1379 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1380 	memset(greq, 0, sizeof(*greq));
1381 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1383 	greq->mask = htobe64(1);
1384 	t3_mgmt_tx(adap, m);
1385 
1386 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1387 	t3_tp_set_offload_mode(adap, 0);
1388 	return (i);
1389 }
1390 
1391 /**
1392  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1393  *	@adap: the adapter
1394  *
1395  *	Sets up RSS to distribute packets to multiple receive queues.  We
1396  *	configure the RSS CPU lookup table to distribute to the number of HW
1397  *	receive queues, and the response queue lookup table to narrow that
1398  *	down to the response queues actually configured for each port.
1399  *	We always configure the RSS mapping for two ports since the mapping
1400  *	table has plenty of entries.
1401  */
1402 static void
1403 setup_rss(adapter_t *adap)
1404 {
1405 	int i;
1406 	u_int nq[2];
1407 	uint8_t cpus[SGE_QSETS + 1];
1408 	uint16_t rspq_map[RSS_TABLE_SIZE];
1409 
1410 	for (i = 0; i < SGE_QSETS; ++i)
1411 		cpus[i] = i;
1412 	cpus[SGE_QSETS] = 0xff;
1413 
1414 	nq[0] = nq[1] = 0;
1415 	for_each_port(adap, i) {
1416 		const struct port_info *pi = adap2pinfo(adap, i);
1417 
1418 		nq[pi->tx_chan] += pi->nqsets;
1419 	}
1420 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1421 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1422 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1423 	}
1424 
1425 	/* Calculate the reverse RSS map table */
1426 	for (i = 0; i < SGE_QSETS; ++i)
1427 		adap->rrss_map[i] = 0xff;
1428 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1429 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1430 			adap->rrss_map[rspq_map[i]] = i;
1431 
1432 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1433 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1434 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1435 	              cpus, rspq_map);
1436 
1437 }
1438 static void
1439 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1440 			      int hi, int port)
1441 {
1442 	struct mbuf *m;
1443 	struct mngt_pktsched_wr *req;
1444 
1445 	m = m_gethdr(M_NOWAIT, MT_DATA);
1446 	if (m) {
1447 		req = mtod(m, struct mngt_pktsched_wr *);
1448 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1449 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1450 		req->sched = sched;
1451 		req->idx = qidx;
1452 		req->min = lo;
1453 		req->max = hi;
1454 		req->binding = port;
1455 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1456 		t3_mgmt_tx(adap, m);
1457 	}
1458 }
1459 
1460 static void
1461 bind_qsets(adapter_t *sc)
1462 {
1463 	int i, j;
1464 
1465 	for (i = 0; i < (sc)->params.nports; ++i) {
1466 		const struct port_info *pi = adap2pinfo(sc, i);
1467 
1468 		for (j = 0; j < pi->nqsets; ++j) {
1469 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1470 					  -1, pi->tx_chan);
1471 
1472 		}
1473 	}
1474 }
1475 
1476 static void
1477 update_tpeeprom(struct adapter *adap)
1478 {
1479 	const struct firmware *tpeeprom;
1480 
1481 	uint32_t version;
1482 	unsigned int major, minor;
1483 	int ret, len;
1484 	char rev, name[32];
1485 
1486 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1487 
1488 	major = G_TP_VERSION_MAJOR(version);
1489 	minor = G_TP_VERSION_MINOR(version);
1490 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1491 		return;
1492 
1493 	rev = t3rev2char(adap);
1494 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1495 
1496 	tpeeprom = firmware_get(name);
1497 	if (tpeeprom == NULL) {
1498 		device_printf(adap->dev,
1499 			      "could not load TP EEPROM: unable to load %s\n",
1500 			      name);
1501 		return;
1502 	}
1503 
1504 	len = tpeeprom->datasize - 4;
1505 
1506 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1507 	if (ret)
1508 		goto release_tpeeprom;
1509 
1510 	if (len != TP_SRAM_LEN) {
1511 		device_printf(adap->dev,
1512 			      "%s length is wrong len=%d expected=%d\n", name,
1513 			      len, TP_SRAM_LEN);
1514 		return;
1515 	}
1516 
1517 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1518 	    TP_SRAM_OFFSET);
1519 
1520 	if (!ret) {
1521 		device_printf(adap->dev,
1522 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1523 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1524 	} else
1525 		device_printf(adap->dev,
1526 			      "Protocol SRAM image update in EEPROM failed\n");
1527 
1528 release_tpeeprom:
1529 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1530 
1531 	return;
1532 }
1533 
1534 static int
1535 update_tpsram(struct adapter *adap)
1536 {
1537 	const struct firmware *tpsram;
1538 	int ret;
1539 	char rev, name[32];
1540 
1541 	rev = t3rev2char(adap);
1542 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1543 
1544 	update_tpeeprom(adap);
1545 
1546 	tpsram = firmware_get(name);
1547 	if (tpsram == NULL){
1548 		device_printf(adap->dev, "could not load TP SRAM\n");
1549 		return (EINVAL);
1550 	} else
1551 		device_printf(adap->dev, "updating TP SRAM\n");
1552 
1553 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1554 	if (ret)
1555 		goto release_tpsram;
1556 
1557 	ret = t3_set_proto_sram(adap, tpsram->data);
1558 	if (ret)
1559 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1560 
1561 release_tpsram:
1562 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1563 
1564 	return ret;
1565 }
1566 
1567 /**
1568  *	cxgb_up - enable the adapter
1569  *	@adap: adapter being enabled
1570  *
1571  *	Called when the first port is enabled, this function performs the
1572  *	actions necessary to make an adapter operational, such as completing
1573  *	the initialization of HW modules, and enabling interrupts.
1574  */
1575 static int
1576 cxgb_up(struct adapter *sc)
1577 {
1578 	int err = 0;
1579 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1580 
1581 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1582 					   __func__, sc->open_device_map));
1583 
1584 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1585 
1586 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1587 
1588 		if ((sc->flags & FW_UPTODATE) == 0)
1589 			if ((err = upgrade_fw(sc)))
1590 				goto out;
1591 
1592 		if ((sc->flags & TPS_UPTODATE) == 0)
1593 			if ((err = update_tpsram(sc)))
1594 				goto out;
1595 
1596 		if (is_offload(sc) && nfilters != 0) {
1597 			sc->params.mc5.nservers = 0;
1598 
1599 			if (nfilters < 0)
1600 				sc->params.mc5.nfilters = mxf;
1601 			else
1602 				sc->params.mc5.nfilters = min(nfilters, mxf);
1603 		}
1604 
1605 		err = t3_init_hw(sc, 0);
1606 		if (err)
1607 			goto out;
1608 
1609 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1610 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1611 
1612 		err = setup_sge_qsets(sc);
1613 		if (err)
1614 			goto out;
1615 
1616 		alloc_filters(sc);
1617 		setup_rss(sc);
1618 
1619 		t3_add_configured_sysctls(sc);
1620 		sc->flags |= FULL_INIT_DONE;
1621 	}
1622 
1623 	t3_intr_clear(sc);
1624 	t3_sge_start(sc);
1625 	t3_intr_enable(sc);
1626 
1627 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1628 	    is_offload(sc) && init_tp_parity(sc) == 0)
1629 		sc->flags |= TP_PARITY_INIT;
1630 
1631 	if (sc->flags & TP_PARITY_INIT) {
1632 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1633 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1634 	}
1635 
1636 	if (!(sc->flags & QUEUES_BOUND)) {
1637 		bind_qsets(sc);
1638 		setup_hw_filters(sc);
1639 		sc->flags |= QUEUES_BOUND;
1640 	}
1641 
1642 	t3_sge_reset_adapter(sc);
1643 out:
1644 	return (err);
1645 }
1646 
1647 /*
1648  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1649  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1650  * during controller_detach, not here.
1651  */
1652 static void
1653 cxgb_down(struct adapter *sc)
1654 {
1655 	t3_sge_stop(sc);
1656 	t3_intr_disable(sc);
1657 }
1658 
1659 /*
1660  * if_init for cxgb ports.
1661  */
1662 static void
1663 cxgb_init(void *arg)
1664 {
1665 	struct port_info *p = arg;
1666 	struct adapter *sc = p->adapter;
1667 
1668 	ADAPTER_LOCK(sc);
1669 	cxgb_init_locked(p); /* releases adapter lock */
1670 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1671 }
1672 
1673 static int
1674 cxgb_init_locked(struct port_info *p)
1675 {
1676 	struct adapter *sc = p->adapter;
1677 	struct ifnet *ifp = p->ifp;
1678 	struct cmac *mac = &p->mac;
1679 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1680 
1681 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1682 
1683 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1684 		gave_up_lock = 1;
1685 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1686 			rc = EINTR;
1687 			goto done;
1688 		}
1689 	}
1690 	if (IS_DOOMED(p)) {
1691 		rc = ENXIO;
1692 		goto done;
1693 	}
1694 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1695 
1696 	/*
1697 	 * The code that runs during one-time adapter initialization can sleep
1698 	 * so it's important not to hold any locks across it.
1699 	 */
1700 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1701 
1702 	if (may_sleep) {
1703 		SET_BUSY(sc);
1704 		gave_up_lock = 1;
1705 		ADAPTER_UNLOCK(sc);
1706 	}
1707 
1708 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1709 			goto done;
1710 
1711 	PORT_LOCK(p);
1712 	if (isset(&sc->open_device_map, p->port_id) &&
1713 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1714 		PORT_UNLOCK(p);
1715 		goto done;
1716 	}
1717 	t3_port_intr_enable(sc, p->port_id);
1718 	if (!mac->multiport)
1719 		t3_mac_init(mac);
1720 	cxgb_update_mac_settings(p);
1721 	t3_link_start(&p->phy, mac, &p->link_config);
1722 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1723 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1724 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1725 	PORT_UNLOCK(p);
1726 
1727 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1728 		struct sge_qset *qs = &sc->sge.qs[i];
1729 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1730 
1731 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1732 				 txq->txq_watchdog.c_cpu);
1733 	}
1734 
1735 	/* all ok */
1736 	setbit(&sc->open_device_map, p->port_id);
1737 	callout_reset(&p->link_check_ch,
1738 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1739 	    link_check_callout, p);
1740 
1741 done:
1742 	if (may_sleep) {
1743 		ADAPTER_LOCK(sc);
1744 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1745 		CLR_BUSY(sc);
1746 	}
1747 	if (gave_up_lock)
1748 		wakeup_one(&sc->flags);
1749 	ADAPTER_UNLOCK(sc);
1750 	return (rc);
1751 }
1752 
1753 static int
1754 cxgb_uninit_locked(struct port_info *p)
1755 {
1756 	struct adapter *sc = p->adapter;
1757 	int rc;
1758 
1759 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1760 
1761 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1762 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1763 			rc = EINTR;
1764 			goto done;
1765 		}
1766 	}
1767 	if (IS_DOOMED(p)) {
1768 		rc = ENXIO;
1769 		goto done;
1770 	}
1771 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1772 	SET_BUSY(sc);
1773 	ADAPTER_UNLOCK(sc);
1774 
1775 	rc = cxgb_uninit_synchronized(p);
1776 
1777 	ADAPTER_LOCK(sc);
1778 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1779 	CLR_BUSY(sc);
1780 	wakeup_one(&sc->flags);
1781 done:
1782 	ADAPTER_UNLOCK(sc);
1783 	return (rc);
1784 }
1785 
1786 /*
1787  * Called on "ifconfig down", and from port_detach
1788  */
1789 static int
1790 cxgb_uninit_synchronized(struct port_info *pi)
1791 {
1792 	struct adapter *sc = pi->adapter;
1793 	struct ifnet *ifp = pi->ifp;
1794 
1795 	/*
1796 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1797 	 */
1798 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1799 
1800 	/*
1801 	 * Clear this port's bit from the open device map, and then drain all
1802 	 * the tasks that can access/manipulate this port's port_info or ifp.
1803 	 * We disable this port's interrupts here and so the slow/ext
1804 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1805 	 * be enqueued every second but the runs after this drain will not see
1806 	 * this port in the open device map.
1807 	 *
1808 	 * A well behaved task must take open_device_map into account and ignore
1809 	 * ports that are not open.
1810 	 */
1811 	clrbit(&sc->open_device_map, pi->port_id);
1812 	t3_port_intr_disable(sc, pi->port_id);
1813 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1814 	taskqueue_drain(sc->tq, &sc->tick_task);
1815 
1816 	callout_drain(&pi->link_check_ch);
1817 	taskqueue_drain(sc->tq, &pi->link_check_task);
1818 
1819 	PORT_LOCK(pi);
1820 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1821 
1822 	/* disable pause frames */
1823 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1824 
1825 	/* Reset RX FIFO HWM */
1826 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1827 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1828 
1829 	DELAY(100 * 1000);
1830 
1831 	/* Wait for TXFIFO empty */
1832 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1833 			F_TXFIFO_EMPTY, 1, 20, 5);
1834 
1835 	DELAY(100 * 1000);
1836 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1837 
1838 	pi->phy.ops->power_down(&pi->phy, 1);
1839 
1840 	PORT_UNLOCK(pi);
1841 
1842 	pi->link_config.link_ok = 0;
1843 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1844 
1845 	if (sc->open_device_map == 0)
1846 		cxgb_down(pi->adapter);
1847 
1848 	return (0);
1849 }
1850 
1851 /*
1852  * Mark lro enabled or disabled in all qsets for this port
1853  */
1854 static int
1855 cxgb_set_lro(struct port_info *p, int enabled)
1856 {
1857 	int i;
1858 	struct adapter *adp = p->adapter;
1859 	struct sge_qset *q;
1860 
1861 	for (i = 0; i < p->nqsets; i++) {
1862 		q = &adp->sge.qs[p->first_qset + i];
1863 		q->lro.enabled = (enabled != 0);
1864 	}
1865 	return (0);
1866 }
1867 
1868 static int
1869 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1870 {
1871 	struct port_info *p = ifp->if_softc;
1872 	struct adapter *sc = p->adapter;
1873 	struct ifreq *ifr = (struct ifreq *)data;
1874 	int flags, error = 0, mtu;
1875 	uint32_t mask;
1876 
1877 	switch (command) {
1878 	case SIOCSIFMTU:
1879 		ADAPTER_LOCK(sc);
1880 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1881 		if (error) {
1882 fail:
1883 			ADAPTER_UNLOCK(sc);
1884 			return (error);
1885 		}
1886 
1887 		mtu = ifr->ifr_mtu;
1888 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1889 			error = EINVAL;
1890 		} else {
1891 			ifp->if_mtu = mtu;
1892 			PORT_LOCK(p);
1893 			cxgb_update_mac_settings(p);
1894 			PORT_UNLOCK(p);
1895 		}
1896 		ADAPTER_UNLOCK(sc);
1897 		break;
1898 	case SIOCSIFFLAGS:
1899 		ADAPTER_LOCK(sc);
1900 		if (IS_DOOMED(p)) {
1901 			error = ENXIO;
1902 			goto fail;
1903 		}
1904 		if (ifp->if_flags & IFF_UP) {
1905 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1906 				flags = p->if_flags;
1907 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1908 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1909 					if (IS_BUSY(sc)) {
1910 						error = EBUSY;
1911 						goto fail;
1912 					}
1913 					PORT_LOCK(p);
1914 					cxgb_update_mac_settings(p);
1915 					PORT_UNLOCK(p);
1916 				}
1917 				ADAPTER_UNLOCK(sc);
1918 			} else
1919 				error = cxgb_init_locked(p);
1920 			p->if_flags = ifp->if_flags;
1921 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1922 			error = cxgb_uninit_locked(p);
1923 		else
1924 			ADAPTER_UNLOCK(sc);
1925 
1926 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1927 		break;
1928 	case SIOCADDMULTI:
1929 	case SIOCDELMULTI:
1930 		ADAPTER_LOCK(sc);
1931 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1932 		if (error)
1933 			goto fail;
1934 
1935 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1936 			PORT_LOCK(p);
1937 			cxgb_update_mac_settings(p);
1938 			PORT_UNLOCK(p);
1939 		}
1940 		ADAPTER_UNLOCK(sc);
1941 
1942 		break;
1943 	case SIOCSIFCAP:
1944 		ADAPTER_LOCK(sc);
1945 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1946 		if (error)
1947 			goto fail;
1948 
1949 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1950 		if (mask & IFCAP_TXCSUM) {
1951 			ifp->if_capenable ^= IFCAP_TXCSUM;
1952 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1953 
1954 			if (IFCAP_TSO4 & ifp->if_capenable &&
1955 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1956 				ifp->if_capenable &= ~IFCAP_TSO4;
1957 				if_printf(ifp,
1958 				    "tso4 disabled due to -txcsum.\n");
1959 			}
1960 		}
1961 		if (mask & IFCAP_TXCSUM_IPV6) {
1962 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1963 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1964 
1965 			if (IFCAP_TSO6 & ifp->if_capenable &&
1966 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1967 				ifp->if_capenable &= ~IFCAP_TSO6;
1968 				if_printf(ifp,
1969 				    "tso6 disabled due to -txcsum6.\n");
1970 			}
1971 		}
1972 		if (mask & IFCAP_RXCSUM)
1973 			ifp->if_capenable ^= IFCAP_RXCSUM;
1974 		if (mask & IFCAP_RXCSUM_IPV6)
1975 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1976 
1977 		/*
1978 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1979 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1980 		 * sending a TSO request our way, so it's sufficient to toggle
1981 		 * IFCAP_TSOx only.
1982 		 */
1983 		if (mask & IFCAP_TSO4) {
1984 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1985 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1986 				if_printf(ifp, "enable txcsum first.\n");
1987 				error = EAGAIN;
1988 				goto fail;
1989 			}
1990 			ifp->if_capenable ^= IFCAP_TSO4;
1991 		}
1992 		if (mask & IFCAP_TSO6) {
1993 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1994 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1995 				if_printf(ifp, "enable txcsum6 first.\n");
1996 				error = EAGAIN;
1997 				goto fail;
1998 			}
1999 			ifp->if_capenable ^= IFCAP_TSO6;
2000 		}
2001 		if (mask & IFCAP_LRO) {
2002 			ifp->if_capenable ^= IFCAP_LRO;
2003 
2004 			/* Safe to do this even if cxgb_up not called yet */
2005 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2006 		}
2007 #ifdef TCP_OFFLOAD
2008 		if (mask & IFCAP_TOE4) {
2009 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2010 
2011 			error = toe_capability(p, enable);
2012 			if (error == 0)
2013 				ifp->if_capenable ^= mask;
2014 		}
2015 #endif
2016 		if (mask & IFCAP_VLAN_HWTAGGING) {
2017 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2018 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2019 				PORT_LOCK(p);
2020 				cxgb_update_mac_settings(p);
2021 				PORT_UNLOCK(p);
2022 			}
2023 		}
2024 		if (mask & IFCAP_VLAN_MTU) {
2025 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2026 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2027 				PORT_LOCK(p);
2028 				cxgb_update_mac_settings(p);
2029 				PORT_UNLOCK(p);
2030 			}
2031 		}
2032 		if (mask & IFCAP_VLAN_HWTSO)
2033 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2034 		if (mask & IFCAP_VLAN_HWCSUM)
2035 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2036 
2037 #ifdef VLAN_CAPABILITIES
2038 		VLAN_CAPABILITIES(ifp);
2039 #endif
2040 		ADAPTER_UNLOCK(sc);
2041 		break;
2042 	case SIOCSIFMEDIA:
2043 	case SIOCGIFMEDIA:
2044 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2045 		break;
2046 	default:
2047 		error = ether_ioctl(ifp, command, data);
2048 	}
2049 
2050 	return (error);
2051 }
2052 
2053 static int
2054 cxgb_media_change(struct ifnet *ifp)
2055 {
2056 	return (EOPNOTSUPP);
2057 }
2058 
2059 /*
2060  * Translates phy->modtype to the correct Ethernet media subtype.
2061  */
2062 static int
2063 cxgb_ifm_type(int mod)
2064 {
2065 	switch (mod) {
2066 	case phy_modtype_sr:
2067 		return (IFM_10G_SR);
2068 	case phy_modtype_lr:
2069 		return (IFM_10G_LR);
2070 	case phy_modtype_lrm:
2071 		return (IFM_10G_LRM);
2072 	case phy_modtype_twinax:
2073 		return (IFM_10G_TWINAX);
2074 	case phy_modtype_twinax_long:
2075 		return (IFM_10G_TWINAX_LONG);
2076 	case phy_modtype_none:
2077 		return (IFM_NONE);
2078 	case phy_modtype_unknown:
2079 		return (IFM_UNKNOWN);
2080 	}
2081 
2082 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2083 	return (IFM_UNKNOWN);
2084 }
2085 
2086 /*
2087  * Rebuilds the ifmedia list for this port, and sets the current media.
2088  */
2089 static void
2090 cxgb_build_medialist(struct port_info *p)
2091 {
2092 	struct cphy *phy = &p->phy;
2093 	struct ifmedia *media = &p->media;
2094 	int mod = phy->modtype;
2095 	int m = IFM_ETHER | IFM_FDX;
2096 
2097 	PORT_LOCK(p);
2098 
2099 	ifmedia_removeall(media);
2100 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2101 		/* Copper (RJ45) */
2102 
2103 		if (phy->caps & SUPPORTED_10000baseT_Full)
2104 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2105 
2106 		if (phy->caps & SUPPORTED_1000baseT_Full)
2107 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2108 
2109 		if (phy->caps & SUPPORTED_100baseT_Full)
2110 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2111 
2112 		if (phy->caps & SUPPORTED_10baseT_Full)
2113 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2114 
2115 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2116 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2117 
2118 	} else if (phy->caps & SUPPORTED_TP) {
2119 		/* Copper (CX4) */
2120 
2121 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2122 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2123 
2124 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2125 		ifmedia_set(media, m | IFM_10G_CX4);
2126 
2127 	} else if (phy->caps & SUPPORTED_FIBRE &&
2128 		   phy->caps & SUPPORTED_10000baseT_Full) {
2129 		/* 10G optical (but includes SFP+ twinax) */
2130 
2131 		m |= cxgb_ifm_type(mod);
2132 		if (IFM_SUBTYPE(m) == IFM_NONE)
2133 			m &= ~IFM_FDX;
2134 
2135 		ifmedia_add(media, m, mod, NULL);
2136 		ifmedia_set(media, m);
2137 
2138 	} else if (phy->caps & SUPPORTED_FIBRE &&
2139 		   phy->caps & SUPPORTED_1000baseT_Full) {
2140 		/* 1G optical */
2141 
2142 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2143 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2144 		ifmedia_set(media, m | IFM_1000_SX);
2145 
2146 	} else {
2147 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2148 			    phy->caps));
2149 	}
2150 
2151 	PORT_UNLOCK(p);
2152 }
2153 
2154 static void
2155 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2156 {
2157 	struct port_info *p = ifp->if_softc;
2158 	struct ifmedia_entry *cur = p->media.ifm_cur;
2159 	int speed = p->link_config.speed;
2160 
2161 	if (cur->ifm_data != p->phy.modtype) {
2162 		cxgb_build_medialist(p);
2163 		cur = p->media.ifm_cur;
2164 	}
2165 
2166 	ifmr->ifm_status = IFM_AVALID;
2167 	if (!p->link_config.link_ok)
2168 		return;
2169 
2170 	ifmr->ifm_status |= IFM_ACTIVE;
2171 
2172 	/*
2173 	 * active and current will differ iff current media is autoselect.  That
2174 	 * can happen only for copper RJ45.
2175 	 */
2176 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2177 		return;
2178 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2179 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2180 
2181 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2182 	if (speed == SPEED_10000)
2183 		ifmr->ifm_active |= IFM_10G_T;
2184 	else if (speed == SPEED_1000)
2185 		ifmr->ifm_active |= IFM_1000_T;
2186 	else if (speed == SPEED_100)
2187 		ifmr->ifm_active |= IFM_100_TX;
2188 	else if (speed == SPEED_10)
2189 		ifmr->ifm_active |= IFM_10_T;
2190 	else
2191 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2192 			    speed));
2193 }
2194 
2195 static void
2196 cxgb_async_intr(void *data)
2197 {
2198 	adapter_t *sc = data;
2199 
2200 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2201 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2202 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2203 }
2204 
2205 static void
2206 link_check_callout(void *arg)
2207 {
2208 	struct port_info *pi = arg;
2209 	struct adapter *sc = pi->adapter;
2210 
2211 	if (!isset(&sc->open_device_map, pi->port_id))
2212 		return;
2213 
2214 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2215 }
2216 
2217 static void
2218 check_link_status(void *arg, int pending)
2219 {
2220 	struct port_info *pi = arg;
2221 	struct adapter *sc = pi->adapter;
2222 
2223 	if (!isset(&sc->open_device_map, pi->port_id))
2224 		return;
2225 
2226 	t3_link_changed(sc, pi->port_id);
2227 
2228 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2229 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2230 }
2231 
2232 void
2233 t3_os_link_intr(struct port_info *pi)
2234 {
2235 	/*
2236 	 * Schedule a link check in the near future.  If the link is flapping
2237 	 * rapidly we'll keep resetting the callout and delaying the check until
2238 	 * things stabilize a bit.
2239 	 */
2240 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2241 }
2242 
2243 static void
2244 check_t3b2_mac(struct adapter *sc)
2245 {
2246 	int i;
2247 
2248 	if (sc->flags & CXGB_SHUTDOWN)
2249 		return;
2250 
2251 	for_each_port(sc, i) {
2252 		struct port_info *p = &sc->port[i];
2253 		int status;
2254 #ifdef INVARIANTS
2255 		struct ifnet *ifp = p->ifp;
2256 #endif
2257 
2258 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2259 		    !p->link_config.link_ok)
2260 			continue;
2261 
2262 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2263 			("%s: state mismatch (drv_flags %x, device_map %x)",
2264 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2265 
2266 		PORT_LOCK(p);
2267 		status = t3b2_mac_watchdog_task(&p->mac);
2268 		if (status == 1)
2269 			p->mac.stats.num_toggled++;
2270 		else if (status == 2) {
2271 			struct cmac *mac = &p->mac;
2272 
2273 			cxgb_update_mac_settings(p);
2274 			t3_link_start(&p->phy, mac, &p->link_config);
2275 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2276 			t3_port_intr_enable(sc, p->port_id);
2277 			p->mac.stats.num_resets++;
2278 		}
2279 		PORT_UNLOCK(p);
2280 	}
2281 }
2282 
2283 static void
2284 cxgb_tick(void *arg)
2285 {
2286 	adapter_t *sc = (adapter_t *)arg;
2287 
2288 	if (sc->flags & CXGB_SHUTDOWN)
2289 		return;
2290 
2291 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2292 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2293 }
2294 
2295 static void
2296 cxgb_tick_handler(void *arg, int count)
2297 {
2298 	adapter_t *sc = (adapter_t *)arg;
2299 	const struct adapter_params *p = &sc->params;
2300 	int i;
2301 	uint32_t cause, reset;
2302 
2303 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2304 		return;
2305 
2306 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2307 		check_t3b2_mac(sc);
2308 
2309 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2310 	if (cause) {
2311 		struct sge_qset *qs = &sc->sge.qs[0];
2312 		uint32_t mask, v;
2313 
2314 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2315 
2316 		mask = 1;
2317 		for (i = 0; i < SGE_QSETS; i++) {
2318 			if (v & mask)
2319 				qs[i].rspq.starved++;
2320 			mask <<= 1;
2321 		}
2322 
2323 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2324 
2325 		for (i = 0; i < SGE_QSETS * 2; i++) {
2326 			if (v & mask) {
2327 				qs[i / 2].fl[i % 2].empty++;
2328 			}
2329 			mask <<= 1;
2330 		}
2331 
2332 		/* clear */
2333 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2334 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2335 	}
2336 
2337 	for (i = 0; i < sc->params.nports; i++) {
2338 		struct port_info *pi = &sc->port[i];
2339 		struct ifnet *ifp = pi->ifp;
2340 		struct cmac *mac = &pi->mac;
2341 		struct mac_stats *mstats = &mac->stats;
2342 		int drops, j;
2343 
2344 		if (!isset(&sc->open_device_map, pi->port_id))
2345 			continue;
2346 
2347 		PORT_LOCK(pi);
2348 		t3_mac_update_stats(mac);
2349 		PORT_UNLOCK(pi);
2350 
2351 		ifp->if_opackets = mstats->tx_frames;
2352 		ifp->if_ipackets = mstats->rx_frames;
2353 		ifp->if_obytes = mstats->tx_octets;
2354 		ifp->if_ibytes = mstats->rx_octets;
2355 		ifp->if_omcasts = mstats->tx_mcast_frames;
2356 		ifp->if_imcasts = mstats->rx_mcast_frames;
2357 		ifp->if_collisions = mstats->tx_total_collisions;
2358 		ifp->if_iqdrops = mstats->rx_cong_drops;
2359 
2360 		drops = 0;
2361 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2362 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2363 		ifp->if_snd.ifq_drops = drops;
2364 
2365 		ifp->if_oerrors =
2366 		    mstats->tx_excess_collisions +
2367 		    mstats->tx_underrun +
2368 		    mstats->tx_len_errs +
2369 		    mstats->tx_mac_internal_errs +
2370 		    mstats->tx_excess_deferral +
2371 		    mstats->tx_fcs_errs;
2372 		ifp->if_ierrors =
2373 		    mstats->rx_jabber +
2374 		    mstats->rx_data_errs +
2375 		    mstats->rx_sequence_errs +
2376 		    mstats->rx_runt +
2377 		    mstats->rx_too_long +
2378 		    mstats->rx_mac_internal_errs +
2379 		    mstats->rx_short +
2380 		    mstats->rx_fcs_errs;
2381 
2382 		if (mac->multiport)
2383 			continue;
2384 
2385 		/* Count rx fifo overflows, once per second */
2386 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2387 		reset = 0;
2388 		if (cause & F_RXFIFO_OVERFLOW) {
2389 			mac->stats.rx_fifo_ovfl++;
2390 			reset |= F_RXFIFO_OVERFLOW;
2391 		}
2392 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2393 	}
2394 }
2395 
2396 static void
2397 touch_bars(device_t dev)
2398 {
2399 	/*
2400 	 * Don't enable yet
2401 	 */
2402 #if !defined(__LP64__) && 0
2403 	u32 v;
2404 
2405 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2406 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2407 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2408 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2409 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2410 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2411 #endif
2412 }
2413 
2414 static int
2415 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2416 {
2417 	uint8_t *buf;
2418 	int err = 0;
2419 	u32 aligned_offset, aligned_len, *p;
2420 	struct adapter *adapter = pi->adapter;
2421 
2422 
2423 	aligned_offset = offset & ~3;
2424 	aligned_len = (len + (offset & 3) + 3) & ~3;
2425 
2426 	if (aligned_offset != offset || aligned_len != len) {
2427 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2428 		if (!buf)
2429 			return (ENOMEM);
2430 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2431 		if (!err && aligned_len > 4)
2432 			err = t3_seeprom_read(adapter,
2433 					      aligned_offset + aligned_len - 4,
2434 					      (u32 *)&buf[aligned_len - 4]);
2435 		if (err)
2436 			goto out;
2437 		memcpy(buf + (offset & 3), data, len);
2438 	} else
2439 		buf = (uint8_t *)(uintptr_t)data;
2440 
2441 	err = t3_seeprom_wp(adapter, 0);
2442 	if (err)
2443 		goto out;
2444 
2445 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2446 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2447 		aligned_offset += 4;
2448 	}
2449 
2450 	if (!err)
2451 		err = t3_seeprom_wp(adapter, 1);
2452 out:
2453 	if (buf != data)
2454 		free(buf, M_DEVBUF);
2455 	return err;
2456 }
2457 
2458 
2459 static int
2460 in_range(int val, int lo, int hi)
2461 {
2462 	return val < 0 || (val <= hi && val >= lo);
2463 }
2464 
2465 static int
2466 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2467 {
2468        return (0);
2469 }
2470 
2471 static int
2472 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2473 {
2474        return (0);
2475 }
2476 
2477 static int
2478 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2479     int fflag, struct thread *td)
2480 {
2481 	int mmd, error = 0;
2482 	struct port_info *pi = dev->si_drv1;
2483 	adapter_t *sc = pi->adapter;
2484 
2485 #ifdef PRIV_SUPPORTED
2486 	if (priv_check(td, PRIV_DRIVER)) {
2487 		if (cxgb_debug)
2488 			printf("user does not have access to privileged ioctls\n");
2489 		return (EPERM);
2490 	}
2491 #else
2492 	if (suser(td)) {
2493 		if (cxgb_debug)
2494 			printf("user does not have access to privileged ioctls\n");
2495 		return (EPERM);
2496 	}
2497 #endif
2498 
2499 	switch (cmd) {
2500 	case CHELSIO_GET_MIIREG: {
2501 		uint32_t val;
2502 		struct cphy *phy = &pi->phy;
2503 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2504 
2505 		if (!phy->mdio_read)
2506 			return (EOPNOTSUPP);
2507 		if (is_10G(sc)) {
2508 			mmd = mid->phy_id >> 8;
2509 			if (!mmd)
2510 				mmd = MDIO_DEV_PCS;
2511 			else if (mmd > MDIO_DEV_VEND2)
2512 				return (EINVAL);
2513 
2514 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2515 					     mid->reg_num, &val);
2516 		} else
2517 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2518 					     mid->reg_num & 0x1f, &val);
2519 		if (error == 0)
2520 			mid->val_out = val;
2521 		break;
2522 	}
2523 	case CHELSIO_SET_MIIREG: {
2524 		struct cphy *phy = &pi->phy;
2525 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2526 
2527 		if (!phy->mdio_write)
2528 			return (EOPNOTSUPP);
2529 		if (is_10G(sc)) {
2530 			mmd = mid->phy_id >> 8;
2531 			if (!mmd)
2532 				mmd = MDIO_DEV_PCS;
2533 			else if (mmd > MDIO_DEV_VEND2)
2534 				return (EINVAL);
2535 
2536 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2537 					      mmd, mid->reg_num, mid->val_in);
2538 		} else
2539 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2540 					      mid->reg_num & 0x1f,
2541 					      mid->val_in);
2542 		break;
2543 	}
2544 	case CHELSIO_SETREG: {
2545 		struct ch_reg *edata = (struct ch_reg *)data;
2546 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2547 			return (EFAULT);
2548 		t3_write_reg(sc, edata->addr, edata->val);
2549 		break;
2550 	}
2551 	case CHELSIO_GETREG: {
2552 		struct ch_reg *edata = (struct ch_reg *)data;
2553 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2554 			return (EFAULT);
2555 		edata->val = t3_read_reg(sc, edata->addr);
2556 		break;
2557 	}
2558 	case CHELSIO_GET_SGE_CONTEXT: {
2559 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2560 		mtx_lock_spin(&sc->sge.reg_lock);
2561 		switch (ecntxt->cntxt_type) {
2562 		case CNTXT_TYPE_EGRESS:
2563 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2564 			    ecntxt->data);
2565 			break;
2566 		case CNTXT_TYPE_FL:
2567 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2568 			    ecntxt->data);
2569 			break;
2570 		case CNTXT_TYPE_RSP:
2571 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2572 			    ecntxt->data);
2573 			break;
2574 		case CNTXT_TYPE_CQ:
2575 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2576 			    ecntxt->data);
2577 			break;
2578 		default:
2579 			error = EINVAL;
2580 			break;
2581 		}
2582 		mtx_unlock_spin(&sc->sge.reg_lock);
2583 		break;
2584 	}
2585 	case CHELSIO_GET_SGE_DESC: {
2586 		struct ch_desc *edesc = (struct ch_desc *)data;
2587 		int ret;
2588 		if (edesc->queue_num >= SGE_QSETS * 6)
2589 			return (EINVAL);
2590 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2591 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2592 		if (ret < 0)
2593 			return (EINVAL);
2594 		edesc->size = ret;
2595 		break;
2596 	}
2597 	case CHELSIO_GET_QSET_PARAMS: {
2598 		struct qset_params *q;
2599 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2600 		int q1 = pi->first_qset;
2601 		int nqsets = pi->nqsets;
2602 		int i;
2603 
2604 		if (t->qset_idx >= nqsets)
2605 			return EINVAL;
2606 
2607 		i = q1 + t->qset_idx;
2608 		q = &sc->params.sge.qset[i];
2609 		t->rspq_size   = q->rspq_size;
2610 		t->txq_size[0] = q->txq_size[0];
2611 		t->txq_size[1] = q->txq_size[1];
2612 		t->txq_size[2] = q->txq_size[2];
2613 		t->fl_size[0]  = q->fl_size;
2614 		t->fl_size[1]  = q->jumbo_size;
2615 		t->polling     = q->polling;
2616 		t->lro         = q->lro;
2617 		t->intr_lat    = q->coalesce_usecs;
2618 		t->cong_thres  = q->cong_thres;
2619 		t->qnum        = i;
2620 
2621 		if ((sc->flags & FULL_INIT_DONE) == 0)
2622 			t->vector = 0;
2623 		else if (sc->flags & USING_MSIX)
2624 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2625 		else
2626 			t->vector = rman_get_start(sc->irq_res);
2627 
2628 		break;
2629 	}
2630 	case CHELSIO_GET_QSET_NUM: {
2631 		struct ch_reg *edata = (struct ch_reg *)data;
2632 		edata->val = pi->nqsets;
2633 		break;
2634 	}
2635 	case CHELSIO_LOAD_FW: {
2636 		uint8_t *fw_data;
2637 		uint32_t vers;
2638 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2639 
2640 		/*
2641 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2642 		 *
2643 		 * FW_UPTODATE is also set so the rest of the initialization
2644 		 * will not overwrite what was loaded here.  This gives you the
2645 		 * flexibility to load any firmware (and maybe shoot yourself in
2646 		 * the foot).
2647 		 */
2648 
2649 		ADAPTER_LOCK(sc);
2650 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2651 			ADAPTER_UNLOCK(sc);
2652 			return (EBUSY);
2653 		}
2654 
2655 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2656 		if (!fw_data)
2657 			error = ENOMEM;
2658 		else
2659 			error = copyin(t->buf, fw_data, t->len);
2660 
2661 		if (!error)
2662 			error = -t3_load_fw(sc, fw_data, t->len);
2663 
2664 		if (t3_get_fw_version(sc, &vers) == 0) {
2665 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2666 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2667 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2668 		}
2669 
2670 		if (!error)
2671 			sc->flags |= FW_UPTODATE;
2672 
2673 		free(fw_data, M_DEVBUF);
2674 		ADAPTER_UNLOCK(sc);
2675 		break;
2676 	}
2677 	case CHELSIO_LOAD_BOOT: {
2678 		uint8_t *boot_data;
2679 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2680 
2681 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2682 		if (!boot_data)
2683 			return ENOMEM;
2684 
2685 		error = copyin(t->buf, boot_data, t->len);
2686 		if (!error)
2687 			error = -t3_load_boot(sc, boot_data, t->len);
2688 
2689 		free(boot_data, M_DEVBUF);
2690 		break;
2691 	}
2692 	case CHELSIO_GET_PM: {
2693 		struct ch_pm *m = (struct ch_pm *)data;
2694 		struct tp_params *p = &sc->params.tp;
2695 
2696 		if (!is_offload(sc))
2697 			return (EOPNOTSUPP);
2698 
2699 		m->tx_pg_sz = p->tx_pg_size;
2700 		m->tx_num_pg = p->tx_num_pgs;
2701 		m->rx_pg_sz  = p->rx_pg_size;
2702 		m->rx_num_pg = p->rx_num_pgs;
2703 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2704 
2705 		break;
2706 	}
2707 	case CHELSIO_SET_PM: {
2708 		struct ch_pm *m = (struct ch_pm *)data;
2709 		struct tp_params *p = &sc->params.tp;
2710 
2711 		if (!is_offload(sc))
2712 			return (EOPNOTSUPP);
2713 		if (sc->flags & FULL_INIT_DONE)
2714 			return (EBUSY);
2715 
2716 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2717 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2718 			return (EINVAL);	/* not power of 2 */
2719 		if (!(m->rx_pg_sz & 0x14000))
2720 			return (EINVAL);	/* not 16KB or 64KB */
2721 		if (!(m->tx_pg_sz & 0x1554000))
2722 			return (EINVAL);
2723 		if (m->tx_num_pg == -1)
2724 			m->tx_num_pg = p->tx_num_pgs;
2725 		if (m->rx_num_pg == -1)
2726 			m->rx_num_pg = p->rx_num_pgs;
2727 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2728 			return (EINVAL);
2729 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2730 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2731 			return (EINVAL);
2732 
2733 		p->rx_pg_size = m->rx_pg_sz;
2734 		p->tx_pg_size = m->tx_pg_sz;
2735 		p->rx_num_pgs = m->rx_num_pg;
2736 		p->tx_num_pgs = m->tx_num_pg;
2737 		break;
2738 	}
2739 	case CHELSIO_SETMTUTAB: {
2740 		struct ch_mtus *m = (struct ch_mtus *)data;
2741 		int i;
2742 
2743 		if (!is_offload(sc))
2744 			return (EOPNOTSUPP);
2745 		if (offload_running(sc))
2746 			return (EBUSY);
2747 		if (m->nmtus != NMTUS)
2748 			return (EINVAL);
2749 		if (m->mtus[0] < 81)         /* accommodate SACK */
2750 			return (EINVAL);
2751 
2752 		/*
2753 		 * MTUs must be in ascending order
2754 		 */
2755 		for (i = 1; i < NMTUS; ++i)
2756 			if (m->mtus[i] < m->mtus[i - 1])
2757 				return (EINVAL);
2758 
2759 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2760 		break;
2761 	}
2762 	case CHELSIO_GETMTUTAB: {
2763 		struct ch_mtus *m = (struct ch_mtus *)data;
2764 
2765 		if (!is_offload(sc))
2766 			return (EOPNOTSUPP);
2767 
2768 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2769 		m->nmtus = NMTUS;
2770 		break;
2771 	}
2772 	case CHELSIO_GET_MEM: {
2773 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2774 		struct mc7 *mem;
2775 		uint8_t *useraddr;
2776 		u64 buf[32];
2777 
2778 		/*
2779 		 * Use these to avoid modifying len/addr in the return
2780 		 * struct
2781 		 */
2782 		uint32_t len = t->len, addr = t->addr;
2783 
2784 		if (!is_offload(sc))
2785 			return (EOPNOTSUPP);
2786 		if (!(sc->flags & FULL_INIT_DONE))
2787 			return (EIO);         /* need the memory controllers */
2788 		if ((addr & 0x7) || (len & 0x7))
2789 			return (EINVAL);
2790 		if (t->mem_id == MEM_CM)
2791 			mem = &sc->cm;
2792 		else if (t->mem_id == MEM_PMRX)
2793 			mem = &sc->pmrx;
2794 		else if (t->mem_id == MEM_PMTX)
2795 			mem = &sc->pmtx;
2796 		else
2797 			return (EINVAL);
2798 
2799 		/*
2800 		 * Version scheme:
2801 		 * bits 0..9: chip version
2802 		 * bits 10..15: chip revision
2803 		 */
2804 		t->version = 3 | (sc->params.rev << 10);
2805 
2806 		/*
2807 		 * Read 256 bytes at a time as len can be large and we don't
2808 		 * want to use huge intermediate buffers.
2809 		 */
2810 		useraddr = (uint8_t *)t->buf;
2811 		while (len) {
2812 			unsigned int chunk = min(len, sizeof(buf));
2813 
2814 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2815 			if (error)
2816 				return (-error);
2817 			if (copyout(buf, useraddr, chunk))
2818 				return (EFAULT);
2819 			useraddr += chunk;
2820 			addr += chunk;
2821 			len -= chunk;
2822 		}
2823 		break;
2824 	}
2825 	case CHELSIO_READ_TCAM_WORD: {
2826 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2827 
2828 		if (!is_offload(sc))
2829 			return (EOPNOTSUPP);
2830 		if (!(sc->flags & FULL_INIT_DONE))
2831 			return (EIO);         /* need MC5 */
2832 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2833 		break;
2834 	}
2835 	case CHELSIO_SET_TRACE_FILTER: {
2836 		struct ch_trace *t = (struct ch_trace *)data;
2837 		const struct trace_params *tp;
2838 
2839 		tp = (const struct trace_params *)&t->sip;
2840 		if (t->config_tx)
2841 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2842 					       t->trace_tx);
2843 		if (t->config_rx)
2844 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2845 					       t->trace_rx);
2846 		break;
2847 	}
2848 	case CHELSIO_SET_PKTSCHED: {
2849 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2850 		if (sc->open_device_map == 0)
2851 			return (EAGAIN);
2852 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2853 		    p->binding);
2854 		break;
2855 	}
2856 	case CHELSIO_IFCONF_GETREGS: {
2857 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2858 		int reglen = cxgb_get_regs_len();
2859 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2860 		if (buf == NULL) {
2861 			return (ENOMEM);
2862 		}
2863 		if (regs->len > reglen)
2864 			regs->len = reglen;
2865 		else if (regs->len < reglen)
2866 			error = ENOBUFS;
2867 
2868 		if (!error) {
2869 			cxgb_get_regs(sc, regs, buf);
2870 			error = copyout(buf, regs->data, reglen);
2871 		}
2872 		free(buf, M_DEVBUF);
2873 
2874 		break;
2875 	}
2876 	case CHELSIO_SET_HW_SCHED: {
2877 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2878 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2879 
2880 		if ((sc->flags & FULL_INIT_DONE) == 0)
2881 			return (EAGAIN);       /* need TP to be initialized */
2882 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2883 		    !in_range(t->channel, 0, 1) ||
2884 		    !in_range(t->kbps, 0, 10000000) ||
2885 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2886 		    !in_range(t->flow_ipg, 0,
2887 			      dack_ticks_to_usec(sc, 0x7ff)))
2888 			return (EINVAL);
2889 
2890 		if (t->kbps >= 0) {
2891 			error = t3_config_sched(sc, t->kbps, t->sched);
2892 			if (error < 0)
2893 				return (-error);
2894 		}
2895 		if (t->class_ipg >= 0)
2896 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2897 		if (t->flow_ipg >= 0) {
2898 			t->flow_ipg *= 1000;     /* us -> ns */
2899 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2900 		}
2901 		if (t->mode >= 0) {
2902 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2903 
2904 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2905 					 bit, t->mode ? bit : 0);
2906 		}
2907 		if (t->channel >= 0)
2908 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2909 					 1 << t->sched, t->channel << t->sched);
2910 		break;
2911 	}
2912 	case CHELSIO_GET_EEPROM: {
2913 		int i;
2914 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2915 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2916 
2917 		if (buf == NULL) {
2918 			return (ENOMEM);
2919 		}
2920 		e->magic = EEPROM_MAGIC;
2921 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2922 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2923 
2924 		if (!error)
2925 			error = copyout(buf + e->offset, e->data, e->len);
2926 
2927 		free(buf, M_DEVBUF);
2928 		break;
2929 	}
2930 	case CHELSIO_CLEAR_STATS: {
2931 		if (!(sc->flags & FULL_INIT_DONE))
2932 			return EAGAIN;
2933 
2934 		PORT_LOCK(pi);
2935 		t3_mac_update_stats(&pi->mac);
2936 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2937 		PORT_UNLOCK(pi);
2938 		break;
2939 	}
2940 	case CHELSIO_GET_UP_LA: {
2941 		struct ch_up_la *la = (struct ch_up_la *)data;
2942 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2943 		if (buf == NULL) {
2944 			return (ENOMEM);
2945 		}
2946 		if (la->bufsize < LA_BUFSIZE)
2947 			error = ENOBUFS;
2948 
2949 		if (!error)
2950 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2951 					      &la->bufsize, buf);
2952 		if (!error)
2953 			error = copyout(buf, la->data, la->bufsize);
2954 
2955 		free(buf, M_DEVBUF);
2956 		break;
2957 	}
2958 	case CHELSIO_GET_UP_IOQS: {
2959 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
2960 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
2961 		uint32_t *v;
2962 
2963 		if (buf == NULL) {
2964 			return (ENOMEM);
2965 		}
2966 		if (ioqs->bufsize < IOQS_BUFSIZE)
2967 			error = ENOBUFS;
2968 
2969 		if (!error)
2970 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
2971 
2972 		if (!error) {
2973 			v = (uint32_t *)buf;
2974 
2975 			ioqs->ioq_rx_enable = *v++;
2976 			ioqs->ioq_tx_enable = *v++;
2977 			ioqs->ioq_rx_status = *v++;
2978 			ioqs->ioq_tx_status = *v++;
2979 
2980 			error = copyout(v, ioqs->data, ioqs->bufsize);
2981 		}
2982 
2983 		free(buf, M_DEVBUF);
2984 		break;
2985 	}
2986 	case CHELSIO_SET_FILTER: {
2987 		struct ch_filter *f = (struct ch_filter *)data;
2988 		struct filter_info *p;
2989 		unsigned int nfilters = sc->params.mc5.nfilters;
2990 
2991 		if (!is_offload(sc))
2992 			return (EOPNOTSUPP);	/* No TCAM */
2993 		if (!(sc->flags & FULL_INIT_DONE))
2994 			return (EAGAIN);	/* mc5 not setup yet */
2995 		if (nfilters == 0)
2996 			return (EBUSY);		/* TOE will use TCAM */
2997 
2998 		/* sanity checks */
2999 		if (f->filter_id >= nfilters ||
3000 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3001 		    (f->val.sport && f->mask.sport != 0xffff) ||
3002 		    (f->val.dport && f->mask.dport != 0xffff) ||
3003 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3004 		    (f->val.vlan_prio &&
3005 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3006 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3007 		    f->qset >= SGE_QSETS ||
3008 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3009 			return (EINVAL);
3010 
3011 		/* Was allocated with M_WAITOK */
3012 		KASSERT(sc->filters, ("filter table NULL\n"));
3013 
3014 		p = &sc->filters[f->filter_id];
3015 		if (p->locked)
3016 			return (EPERM);
3017 
3018 		bzero(p, sizeof(*p));
3019 		p->sip = f->val.sip;
3020 		p->sip_mask = f->mask.sip;
3021 		p->dip = f->val.dip;
3022 		p->sport = f->val.sport;
3023 		p->dport = f->val.dport;
3024 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3025 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3026 		    FILTER_NO_VLAN_PRI;
3027 		p->mac_hit = f->mac_hit;
3028 		p->mac_vld = f->mac_addr_idx != 0xffff;
3029 		p->mac_idx = f->mac_addr_idx;
3030 		p->pkt_type = f->proto;
3031 		p->report_filter_id = f->want_filter_id;
3032 		p->pass = f->pass;
3033 		p->rss = f->rss;
3034 		p->qset = f->qset;
3035 
3036 		error = set_filter(sc, f->filter_id, p);
3037 		if (error == 0)
3038 			p->valid = 1;
3039 		break;
3040 	}
3041 	case CHELSIO_DEL_FILTER: {
3042 		struct ch_filter *f = (struct ch_filter *)data;
3043 		struct filter_info *p;
3044 		unsigned int nfilters = sc->params.mc5.nfilters;
3045 
3046 		if (!is_offload(sc))
3047 			return (EOPNOTSUPP);
3048 		if (!(sc->flags & FULL_INIT_DONE))
3049 			return (EAGAIN);
3050 		if (nfilters == 0 || sc->filters == NULL)
3051 			return (EINVAL);
3052 		if (f->filter_id >= nfilters)
3053 		       return (EINVAL);
3054 
3055 		p = &sc->filters[f->filter_id];
3056 		if (p->locked)
3057 			return (EPERM);
3058 		if (!p->valid)
3059 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3060 
3061 		bzero(p, sizeof(*p));
3062 		p->sip = p->sip_mask = 0xffffffff;
3063 		p->vlan = 0xfff;
3064 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3065 		p->pkt_type = 1;
3066 		error = set_filter(sc, f->filter_id, p);
3067 		break;
3068 	}
3069 	case CHELSIO_GET_FILTER: {
3070 		struct ch_filter *f = (struct ch_filter *)data;
3071 		struct filter_info *p;
3072 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3073 
3074 		if (!is_offload(sc))
3075 			return (EOPNOTSUPP);
3076 		if (!(sc->flags & FULL_INIT_DONE))
3077 			return (EAGAIN);
3078 		if (nfilters == 0 || sc->filters == NULL)
3079 			return (EINVAL);
3080 
3081 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3082 		for (; i < nfilters; i++) {
3083 			p = &sc->filters[i];
3084 			if (!p->valid)
3085 				continue;
3086 
3087 			bzero(f, sizeof(*f));
3088 
3089 			f->filter_id = i;
3090 			f->val.sip = p->sip;
3091 			f->mask.sip = p->sip_mask;
3092 			f->val.dip = p->dip;
3093 			f->mask.dip = p->dip ? 0xffffffff : 0;
3094 			f->val.sport = p->sport;
3095 			f->mask.sport = p->sport ? 0xffff : 0;
3096 			f->val.dport = p->dport;
3097 			f->mask.dport = p->dport ? 0xffff : 0;
3098 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3099 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3100 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3101 			    0 : p->vlan_prio;
3102 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3103 			    0 : FILTER_NO_VLAN_PRI;
3104 			f->mac_hit = p->mac_hit;
3105 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3106 			f->proto = p->pkt_type;
3107 			f->want_filter_id = p->report_filter_id;
3108 			f->pass = p->pass;
3109 			f->rss = p->rss;
3110 			f->qset = p->qset;
3111 
3112 			break;
3113 		}
3114 
3115 		if (i == nfilters)
3116 			f->filter_id = 0xffffffff;
3117 		break;
3118 	}
3119 	default:
3120 		return (EOPNOTSUPP);
3121 		break;
3122 	}
3123 
3124 	return (error);
3125 }
3126 
3127 static __inline void
3128 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3129     unsigned int end)
3130 {
3131 	uint32_t *p = (uint32_t *)(buf + start);
3132 
3133 	for ( ; start <= end; start += sizeof(uint32_t))
3134 		*p++ = t3_read_reg(ap, start);
3135 }
3136 
3137 #define T3_REGMAP_SIZE (3 * 1024)
3138 static int
3139 cxgb_get_regs_len(void)
3140 {
3141 	return T3_REGMAP_SIZE;
3142 }
3143 
3144 static void
3145 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3146 {
3147 
3148 	/*
3149 	 * Version scheme:
3150 	 * bits 0..9: chip version
3151 	 * bits 10..15: chip revision
3152 	 * bit 31: set for PCIe cards
3153 	 */
3154 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3155 
3156 	/*
3157 	 * We skip the MAC statistics registers because they are clear-on-read.
3158 	 * Also reading multi-register stats would need to synchronize with the
3159 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3160 	 */
3161 	memset(buf, 0, cxgb_get_regs_len());
3162 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3163 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3164 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3165 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3166 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3167 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3168 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3169 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3170 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3171 }
3172 
3173 static int
3174 alloc_filters(struct adapter *sc)
3175 {
3176 	struct filter_info *p;
3177 	unsigned int nfilters = sc->params.mc5.nfilters;
3178 
3179 	if (nfilters == 0)
3180 		return (0);
3181 
3182 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3183 	sc->filters = p;
3184 
3185 	p = &sc->filters[nfilters - 1];
3186 	p->vlan = 0xfff;
3187 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3188 	p->pass = p->rss = p->valid = p->locked = 1;
3189 
3190 	return (0);
3191 }
3192 
3193 static int
3194 setup_hw_filters(struct adapter *sc)
3195 {
3196 	int i, rc;
3197 	unsigned int nfilters = sc->params.mc5.nfilters;
3198 
3199 	if (!sc->filters)
3200 		return (0);
3201 
3202 	t3_enable_filters(sc);
3203 
3204 	for (i = rc = 0; i < nfilters && !rc; i++) {
3205 		if (sc->filters[i].locked)
3206 			rc = set_filter(sc, i, &sc->filters[i]);
3207 	}
3208 
3209 	return (rc);
3210 }
3211 
3212 static int
3213 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3214 {
3215 	int len;
3216 	struct mbuf *m;
3217 	struct ulp_txpkt *txpkt;
3218 	struct work_request_hdr *wr;
3219 	struct cpl_pass_open_req *oreq;
3220 	struct cpl_set_tcb_field *sreq;
3221 
3222 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3223 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3224 
3225 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3226 	      sc->params.mc5.nfilters;
3227 
3228 	m = m_gethdr(M_WAITOK, MT_DATA);
3229 	m->m_len = m->m_pkthdr.len = len;
3230 	bzero(mtod(m, char *), len);
3231 
3232 	wr = mtod(m, struct work_request_hdr *);
3233 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3234 
3235 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3236 	txpkt = (struct ulp_txpkt *)oreq;
3237 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3238 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3239 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3240 	oreq->local_port = htons(f->dport);
3241 	oreq->peer_port = htons(f->sport);
3242 	oreq->local_ip = htonl(f->dip);
3243 	oreq->peer_ip = htonl(f->sip);
3244 	oreq->peer_netmask = htonl(f->sip_mask);
3245 	oreq->opt0h = 0;
3246 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3247 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3248 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3249 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3250 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3251 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3252 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3253 
3254 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3255 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3256 			  (f->report_filter_id << 15) | (1 << 23) |
3257 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3258 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3259 	t3_mgmt_tx(sc, m);
3260 
3261 	if (f->pass && !f->rss) {
3262 		len = sizeof(*sreq);
3263 		m = m_gethdr(M_WAITOK, MT_DATA);
3264 		m->m_len = m->m_pkthdr.len = len;
3265 		bzero(mtod(m, char *), len);
3266 		sreq = mtod(m, struct cpl_set_tcb_field *);
3267 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3268 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3269 				 (u64)sc->rrss_map[f->qset] << 19);
3270 		t3_mgmt_tx(sc, m);
3271 	}
3272 	return 0;
3273 }
3274 
3275 static inline void
3276 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3277     unsigned int word, u64 mask, u64 val)
3278 {
3279 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3280 	req->reply = V_NO_REPLY(1);
3281 	req->cpu_idx = 0;
3282 	req->word = htons(word);
3283 	req->mask = htobe64(mask);
3284 	req->val = htobe64(val);
3285 }
3286 
3287 static inline void
3288 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3289     unsigned int word, u64 mask, u64 val)
3290 {
3291 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3292 
3293 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3294 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3295 	mk_set_tcb_field(req, tid, word, mask, val);
3296 }
3297 
3298 void
3299 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3300 {
3301 	struct adapter *sc;
3302 
3303 	mtx_lock(&t3_list_lock);
3304 	SLIST_FOREACH(sc, &t3_list, link) {
3305 		/*
3306 		 * func should not make any assumptions about what state sc is
3307 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3308 		 */
3309 		func(sc, arg);
3310 	}
3311 	mtx_unlock(&t3_list_lock);
3312 }
3313 
3314 #ifdef TCP_OFFLOAD
3315 static int
3316 toe_capability(struct port_info *pi, int enable)
3317 {
3318 	int rc;
3319 	struct adapter *sc = pi->adapter;
3320 
3321 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3322 
3323 	if (!is_offload(sc))
3324 		return (ENODEV);
3325 
3326 	if (enable) {
3327 		if (!(sc->flags & FULL_INIT_DONE)) {
3328 			log(LOG_WARNING,
3329 			    "You must enable a cxgb interface first\n");
3330 			return (EAGAIN);
3331 		}
3332 
3333 		if (isset(&sc->offload_map, pi->port_id))
3334 			return (0);
3335 
3336 		if (!(sc->flags & TOM_INIT_DONE)) {
3337 			rc = t3_activate_uld(sc, ULD_TOM);
3338 			if (rc == EAGAIN) {
3339 				log(LOG_WARNING,
3340 				    "You must kldload t3_tom.ko before trying "
3341 				    "to enable TOE on a cxgb interface.\n");
3342 			}
3343 			if (rc != 0)
3344 				return (rc);
3345 			KASSERT(sc->tom_softc != NULL,
3346 			    ("%s: TOM activated but softc NULL", __func__));
3347 			KASSERT(sc->flags & TOM_INIT_DONE,
3348 			    ("%s: TOM activated but flag not set", __func__));
3349 		}
3350 
3351 		setbit(&sc->offload_map, pi->port_id);
3352 
3353 		/*
3354 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3355 		 * enabled on any port.  Need to figure out how to enable,
3356 		 * disable, load, and unload iWARP cleanly.
3357 		 */
3358 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3359 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3360 			setbit(&sc->offload_map, MAX_NPORTS);
3361 	} else {
3362 		if (!isset(&sc->offload_map, pi->port_id))
3363 			return (0);
3364 
3365 		KASSERT(sc->flags & TOM_INIT_DONE,
3366 		    ("%s: TOM never initialized?", __func__));
3367 		clrbit(&sc->offload_map, pi->port_id);
3368 	}
3369 
3370 	return (0);
3371 }
3372 
3373 /*
3374  * Add an upper layer driver to the global list.
3375  */
3376 int
3377 t3_register_uld(struct uld_info *ui)
3378 {
3379 	int rc = 0;
3380 	struct uld_info *u;
3381 
3382 	mtx_lock(&t3_uld_list_lock);
3383 	SLIST_FOREACH(u, &t3_uld_list, link) {
3384 	    if (u->uld_id == ui->uld_id) {
3385 		    rc = EEXIST;
3386 		    goto done;
3387 	    }
3388 	}
3389 
3390 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3391 	ui->refcount = 0;
3392 done:
3393 	mtx_unlock(&t3_uld_list_lock);
3394 	return (rc);
3395 }
3396 
3397 int
3398 t3_unregister_uld(struct uld_info *ui)
3399 {
3400 	int rc = EINVAL;
3401 	struct uld_info *u;
3402 
3403 	mtx_lock(&t3_uld_list_lock);
3404 
3405 	SLIST_FOREACH(u, &t3_uld_list, link) {
3406 	    if (u == ui) {
3407 		    if (ui->refcount > 0) {
3408 			    rc = EBUSY;
3409 			    goto done;
3410 		    }
3411 
3412 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3413 		    rc = 0;
3414 		    goto done;
3415 	    }
3416 	}
3417 done:
3418 	mtx_unlock(&t3_uld_list_lock);
3419 	return (rc);
3420 }
3421 
3422 int
3423 t3_activate_uld(struct adapter *sc, int id)
3424 {
3425 	int rc = EAGAIN;
3426 	struct uld_info *ui;
3427 
3428 	mtx_lock(&t3_uld_list_lock);
3429 
3430 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3431 		if (ui->uld_id == id) {
3432 			rc = ui->activate(sc);
3433 			if (rc == 0)
3434 				ui->refcount++;
3435 			goto done;
3436 		}
3437 	}
3438 done:
3439 	mtx_unlock(&t3_uld_list_lock);
3440 
3441 	return (rc);
3442 }
3443 
3444 int
3445 t3_deactivate_uld(struct adapter *sc, int id)
3446 {
3447 	int rc = EINVAL;
3448 	struct uld_info *ui;
3449 
3450 	mtx_lock(&t3_uld_list_lock);
3451 
3452 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3453 		if (ui->uld_id == id) {
3454 			rc = ui->deactivate(sc);
3455 			if (rc == 0)
3456 				ui->refcount--;
3457 			goto done;
3458 		}
3459 	}
3460 done:
3461 	mtx_unlock(&t3_uld_list_lock);
3462 
3463 	return (rc);
3464 }
3465 
3466 static int
3467 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3468     struct mbuf *m)
3469 {
3470 	m_freem(m);
3471 	return (EDOOFUS);
3472 }
3473 
3474 int
3475 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3476 {
3477 	uintptr_t *loc, new;
3478 
3479 	if (opcode >= NUM_CPL_HANDLERS)
3480 		return (EINVAL);
3481 
3482 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3483 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3484 	atomic_store_rel_ptr(loc, new);
3485 
3486 	return (0);
3487 }
3488 #endif
3489 
3490 static int
3491 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3492 {
3493 	int rc = 0;
3494 
3495 	switch (cmd) {
3496 	case MOD_LOAD:
3497 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3498 		SLIST_INIT(&t3_list);
3499 #ifdef TCP_OFFLOAD
3500 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3501 		SLIST_INIT(&t3_uld_list);
3502 #endif
3503 		break;
3504 
3505 	case MOD_UNLOAD:
3506 #ifdef TCP_OFFLOAD
3507 		mtx_lock(&t3_uld_list_lock);
3508 		if (!SLIST_EMPTY(&t3_uld_list)) {
3509 			rc = EBUSY;
3510 			mtx_unlock(&t3_uld_list_lock);
3511 			break;
3512 		}
3513 		mtx_unlock(&t3_uld_list_lock);
3514 		mtx_destroy(&t3_uld_list_lock);
3515 #endif
3516 		mtx_lock(&t3_list_lock);
3517 		if (!SLIST_EMPTY(&t3_list)) {
3518 			rc = EBUSY;
3519 			mtx_unlock(&t3_list_lock);
3520 			break;
3521 		}
3522 		mtx_unlock(&t3_list_lock);
3523 		mtx_destroy(&t3_list_lock);
3524 		break;
3525 	}
3526 
3527 	return (rc);
3528 }
3529