xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 13de33a5dc2304b13d595d75d48c51793958474f)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_inet.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/bus.h>
39 #include <sys/module.h>
40 #include <sys/pciio.h>
41 #include <sys/conf.h>
42 #include <machine/bus.h>
43 #include <machine/resource.h>
44 #include <sys/bus_dma.h>
45 #include <sys/ktr.h>
46 #include <sys/rman.h>
47 #include <sys/ioccom.h>
48 #include <sys/mbuf.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/queue.h>
57 #include <sys/taskqueue.h>
58 #include <sys/proc.h>
59 
60 #include <net/bpf.h>
61 #include <net/ethernet.h>
62 #include <net/if.h>
63 #include <net/if_var.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 #include <net/if_types.h>
68 #include <net/if_vlan_var.h>
69 
70 #include <netinet/in_systm.h>
71 #include <netinet/in.h>
72 #include <netinet/if_ether.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip.h>
75 #include <netinet/tcp.h>
76 #include <netinet/udp.h>
77 
78 #include <dev/pci/pcireg.h>
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pci_private.h>
81 
82 #include <cxgb_include.h>
83 
84 #ifdef PRIV_SUPPORTED
85 #include <sys/priv.h>
86 #endif
87 
88 static int cxgb_setup_interrupts(adapter_t *);
89 static void cxgb_teardown_interrupts(adapter_t *);
90 static void cxgb_init(void *);
91 static int cxgb_init_locked(struct port_info *);
92 static int cxgb_uninit_locked(struct port_info *);
93 static int cxgb_uninit_synchronized(struct port_info *);
94 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
95 static int cxgb_media_change(struct ifnet *);
96 static int cxgb_ifm_type(int);
97 static void cxgb_build_medialist(struct port_info *);
98 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
99 static int setup_sge_qsets(adapter_t *);
100 static void cxgb_async_intr(void *);
101 static void cxgb_tick_handler(void *, int);
102 static void cxgb_tick(void *);
103 static void link_check_callout(void *);
104 static void check_link_status(void *, int);
105 static void setup_rss(adapter_t *sc);
106 static int alloc_filters(struct adapter *);
107 static int setup_hw_filters(struct adapter *);
108 static int set_filter(struct adapter *, int, const struct filter_info *);
109 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
110     unsigned int, u64, u64);
111 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
112     unsigned int, u64, u64);
113 #ifdef TCP_OFFLOAD
114 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
115 #endif
116 
117 /* Attachment glue for the PCI controller end of the device.  Each port of
118  * the device is attached separately, as defined later.
119  */
120 static int cxgb_controller_probe(device_t);
121 static int cxgb_controller_attach(device_t);
122 static int cxgb_controller_detach(device_t);
123 static void cxgb_free(struct adapter *);
124 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
125     unsigned int end);
126 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
127 static int cxgb_get_regs_len(void);
128 static void touch_bars(device_t dev);
129 static void cxgb_update_mac_settings(struct port_info *p);
130 #ifdef TCP_OFFLOAD
131 static int toe_capability(struct port_info *, int);
132 #endif
133 
134 static device_method_t cxgb_controller_methods[] = {
135 	DEVMETHOD(device_probe,		cxgb_controller_probe),
136 	DEVMETHOD(device_attach,	cxgb_controller_attach),
137 	DEVMETHOD(device_detach,	cxgb_controller_detach),
138 
139 	DEVMETHOD_END
140 };
141 
142 static driver_t cxgb_controller_driver = {
143 	"cxgbc",
144 	cxgb_controller_methods,
145 	sizeof(struct adapter)
146 };
147 
148 static int cxgbc_mod_event(module_t, int, void *);
149 static devclass_t	cxgb_controller_devclass;
150 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
151     cxgbc_mod_event, 0);
152 MODULE_VERSION(cxgbc, 1);
153 MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
154 
155 /*
156  * Attachment glue for the ports.  Attachment is done directly to the
157  * controller device.
158  */
159 static int cxgb_port_probe(device_t);
160 static int cxgb_port_attach(device_t);
161 static int cxgb_port_detach(device_t);
162 
163 static device_method_t cxgb_port_methods[] = {
164 	DEVMETHOD(device_probe,		cxgb_port_probe),
165 	DEVMETHOD(device_attach,	cxgb_port_attach),
166 	DEVMETHOD(device_detach,	cxgb_port_detach),
167 	{ 0, 0 }
168 };
169 
170 static driver_t cxgb_port_driver = {
171 	"cxgb",
172 	cxgb_port_methods,
173 	0
174 };
175 
176 static d_ioctl_t cxgb_extension_ioctl;
177 static d_open_t cxgb_extension_open;
178 static d_close_t cxgb_extension_close;
179 
180 static struct cdevsw cxgb_cdevsw = {
181        .d_version =    D_VERSION,
182        .d_flags =      0,
183        .d_open =       cxgb_extension_open,
184        .d_close =      cxgb_extension_close,
185        .d_ioctl =      cxgb_extension_ioctl,
186        .d_name =       "cxgb",
187 };
188 
189 static devclass_t	cxgb_port_devclass;
190 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
191 MODULE_VERSION(cxgb, 1);
192 
193 static struct mtx t3_list_lock;
194 static SLIST_HEAD(, adapter) t3_list;
195 #ifdef TCP_OFFLOAD
196 static struct mtx t3_uld_list_lock;
197 static SLIST_HEAD(, uld_info) t3_uld_list;
198 #endif
199 
200 /*
201  * The driver uses the best interrupt scheme available on a platform in the
202  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
203  * of these schemes the driver may consider as follows:
204  *
205  * msi = 2: choose from among all three options
206  * msi = 1 : only consider MSI and pin interrupts
207  * msi = 0: force pin interrupts
208  */
209 static int msi_allowed = 2;
210 
211 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
212 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
213 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
214     "MSI-X, MSI, INTx selector");
215 
216 /*
217  * The driver uses an auto-queue algorithm by default.
218  * To disable it and force a single queue-set per port, use multiq = 0
219  */
220 static int multiq = 1;
221 TUNABLE_INT("hw.cxgb.multiq", &multiq);
222 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
223     "use min(ncpus/ports, 8) queue-sets per port");
224 
225 /*
226  * By default the driver will not update the firmware unless
227  * it was compiled against a newer version
228  *
229  */
230 static int force_fw_update = 0;
231 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
232 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
233     "update firmware even if up to date");
234 
235 int cxgb_use_16k_clusters = -1;
236 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
237 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
238     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
239 
240 static int nfilters = -1;
241 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
242 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
243     &nfilters, 0, "max number of entries in the filter table");
244 
245 enum {
246 	MAX_TXQ_ENTRIES      = 16384,
247 	MAX_CTRL_TXQ_ENTRIES = 1024,
248 	MAX_RSPQ_ENTRIES     = 16384,
249 	MAX_RX_BUFFERS       = 16384,
250 	MAX_RX_JUMBO_BUFFERS = 16384,
251 	MIN_TXQ_ENTRIES      = 4,
252 	MIN_CTRL_TXQ_ENTRIES = 4,
253 	MIN_RSPQ_ENTRIES     = 32,
254 	MIN_FL_ENTRIES       = 32,
255 	MIN_FL_JUMBO_ENTRIES = 32
256 };
257 
258 struct filter_info {
259 	u32 sip;
260 	u32 sip_mask;
261 	u32 dip;
262 	u16 sport;
263 	u16 dport;
264 	u32 vlan:12;
265 	u32 vlan_prio:3;
266 	u32 mac_hit:1;
267 	u32 mac_idx:4;
268 	u32 mac_vld:1;
269 	u32 pkt_type:2;
270 	u32 report_filter_id:1;
271 	u32 pass:1;
272 	u32 rss:1;
273 	u32 qset:3;
274 	u32 locked:1;
275 	u32 valid:1;
276 };
277 
278 enum { FILTER_NO_VLAN_PRI = 7 };
279 
280 #define EEPROM_MAGIC 0x38E2F10C
281 
282 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
283 
284 /* Table for probing the cards.  The desc field isn't actually used */
285 struct cxgb_ident {
286 	uint16_t	vendor;
287 	uint16_t	device;
288 	int		index;
289 	char		*desc;
290 } cxgb_identifiers[] = {
291 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
303 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
304 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
305 	{0, 0, 0, NULL}
306 };
307 
308 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
309 
310 
311 static __inline char
312 t3rev2char(struct adapter *adapter)
313 {
314 	char rev = 'z';
315 
316 	switch(adapter->params.rev) {
317 	case T3_REV_A:
318 		rev = 'a';
319 		break;
320 	case T3_REV_B:
321 	case T3_REV_B2:
322 		rev = 'b';
323 		break;
324 	case T3_REV_C:
325 		rev = 'c';
326 		break;
327 	}
328 	return rev;
329 }
330 
331 static struct cxgb_ident *
332 cxgb_get_ident(device_t dev)
333 {
334 	struct cxgb_ident *id;
335 
336 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
337 		if ((id->vendor == pci_get_vendor(dev)) &&
338 		    (id->device == pci_get_device(dev))) {
339 			return (id);
340 		}
341 	}
342 	return (NULL);
343 }
344 
345 static const struct adapter_info *
346 cxgb_get_adapter_info(device_t dev)
347 {
348 	struct cxgb_ident *id;
349 	const struct adapter_info *ai;
350 
351 	id = cxgb_get_ident(dev);
352 	if (id == NULL)
353 		return (NULL);
354 
355 	ai = t3_get_adapter_info(id->index);
356 
357 	return (ai);
358 }
359 
360 static int
361 cxgb_controller_probe(device_t dev)
362 {
363 	const struct adapter_info *ai;
364 	char *ports, buf[80];
365 	int nports;
366 
367 	ai = cxgb_get_adapter_info(dev);
368 	if (ai == NULL)
369 		return (ENXIO);
370 
371 	nports = ai->nports0 + ai->nports1;
372 	if (nports == 1)
373 		ports = "port";
374 	else
375 		ports = "ports";
376 
377 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
378 	device_set_desc_copy(dev, buf);
379 	return (BUS_PROBE_DEFAULT);
380 }
381 
382 #define FW_FNAME "cxgb_t3fw"
383 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
384 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
385 
386 static int
387 upgrade_fw(adapter_t *sc)
388 {
389 	const struct firmware *fw;
390 	int status;
391 	u32 vers;
392 
393 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
394 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
395 		return (ENOENT);
396 	} else
397 		device_printf(sc->dev, "installing firmware on card\n");
398 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
399 
400 	if (status != 0) {
401 		device_printf(sc->dev, "failed to install firmware: %d\n",
402 		    status);
403 	} else {
404 		t3_get_fw_version(sc, &vers);
405 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
406 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
407 		    G_FW_VERSION_MICRO(vers));
408 	}
409 
410 	firmware_put(fw, FIRMWARE_UNLOAD);
411 
412 	return (status);
413 }
414 
415 /*
416  * The cxgb_controller_attach function is responsible for the initial
417  * bringup of the device.  Its responsibilities include:
418  *
419  *  1. Determine if the device supports MSI or MSI-X.
420  *  2. Allocate bus resources so that we can access the Base Address Register
421  *  3. Create and initialize mutexes for the controller and its control
422  *     logic such as SGE and MDIO.
423  *  4. Call hardware specific setup routine for the adapter as a whole.
424  *  5. Allocate the BAR for doing MSI-X.
425  *  6. Setup the line interrupt iff MSI-X is not supported.
426  *  7. Create the driver's taskq.
427  *  8. Start one task queue service thread.
428  *  9. Check if the firmware and SRAM are up-to-date.  They will be
429  *     auto-updated later (before FULL_INIT_DONE), if required.
430  * 10. Create a child device for each MAC (port)
431  * 11. Initialize T3 private state.
432  * 12. Trigger the LED
433  * 13. Setup offload iff supported.
434  * 14. Reset/restart the tick callout.
435  * 15. Attach sysctls
436  *
437  * NOTE: Any modification or deviation from this list MUST be reflected in
438  * the above comment.  Failure to do so will result in problems on various
439  * error conditions including link flapping.
440  */
441 static int
442 cxgb_controller_attach(device_t dev)
443 {
444 	device_t child;
445 	const struct adapter_info *ai;
446 	struct adapter *sc;
447 	int i, error = 0;
448 	uint32_t vers;
449 	int port_qsets = 1;
450 	int msi_needed, reg;
451 	char buf[80];
452 
453 	sc = device_get_softc(dev);
454 	sc->dev = dev;
455 	sc->msi_count = 0;
456 	ai = cxgb_get_adapter_info(dev);
457 
458 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
459 	    device_get_unit(dev));
460 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
461 
462 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
463 	    device_get_unit(dev));
464 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
465 	    device_get_unit(dev));
466 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
467 	    device_get_unit(dev));
468 
469 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
470 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
471 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
472 
473 	mtx_lock(&t3_list_lock);
474 	SLIST_INSERT_HEAD(&t3_list, sc, link);
475 	mtx_unlock(&t3_list_lock);
476 
477 	/* find the PCIe link width and set max read request to 4KB*/
478 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
479 		uint16_t lnk;
480 
481 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
482 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
483 		if (sc->link_width < 8 &&
484 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
485 			device_printf(sc->dev,
486 			    "PCIe x%d Link, expect reduced performance\n",
487 			    sc->link_width);
488 		}
489 
490 		pci_set_max_read_req(dev, 4096);
491 	}
492 
493 	touch_bars(dev);
494 	pci_enable_busmaster(dev);
495 	/*
496 	 * Allocate the registers and make them available to the driver.
497 	 * The registers that we care about for NIC mode are in BAR 0
498 	 */
499 	sc->regs_rid = PCIR_BAR(0);
500 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
501 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
502 		device_printf(dev, "Cannot allocate BAR region 0\n");
503 		error = ENXIO;
504 		goto out;
505 	}
506 
507 	sc->bt = rman_get_bustag(sc->regs_res);
508 	sc->bh = rman_get_bushandle(sc->regs_res);
509 	sc->mmio_len = rman_get_size(sc->regs_res);
510 
511 	for (i = 0; i < MAX_NPORTS; i++)
512 		sc->port[i].adapter = sc;
513 
514 	if (t3_prep_adapter(sc, ai, 1) < 0) {
515 		printf("prep adapter failed\n");
516 		error = ENODEV;
517 		goto out;
518 	}
519 
520 	sc->udbs_rid = PCIR_BAR(2);
521 	sc->udbs_res = NULL;
522 	if (is_offload(sc) &&
523 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
524 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
525 		device_printf(dev, "Cannot allocate BAR region 1\n");
526 		error = ENXIO;
527 		goto out;
528 	}
529 
530         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
531 	 * enough messages for the queue sets.  If that fails, try falling
532 	 * back to MSI.  If that fails, then try falling back to the legacy
533 	 * interrupt pin model.
534 	 */
535 	sc->msix_regs_rid = 0x20;
536 	if ((msi_allowed >= 2) &&
537 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
538 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
539 
540 		if (multiq)
541 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
542 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
543 
544 		if (pci_msix_count(dev) == 0 ||
545 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
546 		    sc->msi_count != msi_needed) {
547 			device_printf(dev, "alloc msix failed - "
548 				      "msi_count=%d, msi_needed=%d, err=%d; "
549 				      "will try MSI\n", sc->msi_count,
550 				      msi_needed, error);
551 			sc->msi_count = 0;
552 			port_qsets = 1;
553 			pci_release_msi(dev);
554 			bus_release_resource(dev, SYS_RES_MEMORY,
555 			    sc->msix_regs_rid, sc->msix_regs_res);
556 			sc->msix_regs_res = NULL;
557 		} else {
558 			sc->flags |= USING_MSIX;
559 			sc->cxgb_intr = cxgb_async_intr;
560 			device_printf(dev,
561 				      "using MSI-X interrupts (%u vectors)\n",
562 				      sc->msi_count);
563 		}
564 	}
565 
566 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
567 		sc->msi_count = 1;
568 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
569 			device_printf(dev, "alloc msi failed - "
570 				      "err=%d; will try INTx\n", error);
571 			sc->msi_count = 0;
572 			port_qsets = 1;
573 			pci_release_msi(dev);
574 		} else {
575 			sc->flags |= USING_MSI;
576 			sc->cxgb_intr = t3_intr_msi;
577 			device_printf(dev, "using MSI interrupts\n");
578 		}
579 	}
580 	if (sc->msi_count == 0) {
581 		device_printf(dev, "using line interrupts\n");
582 		sc->cxgb_intr = t3b_intr;
583 	}
584 
585 	/* Create a private taskqueue thread for handling driver events */
586 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
587 	    taskqueue_thread_enqueue, &sc->tq);
588 	if (sc->tq == NULL) {
589 		device_printf(dev, "failed to allocate controller task queue\n");
590 		goto out;
591 	}
592 
593 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
594 	    device_get_nameunit(dev));
595 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
596 
597 
598 	/* Create a periodic callout for checking adapter status */
599 	callout_init(&sc->cxgb_tick_ch, TRUE);
600 
601 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
602 		/*
603 		 * Warn user that a firmware update will be attempted in init.
604 		 */
605 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
606 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
607 		sc->flags &= ~FW_UPTODATE;
608 	} else {
609 		sc->flags |= FW_UPTODATE;
610 	}
611 
612 	if (t3_check_tpsram_version(sc) < 0) {
613 		/*
614 		 * Warn user that a firmware update will be attempted in init.
615 		 */
616 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
617 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
618 		sc->flags &= ~TPS_UPTODATE;
619 	} else {
620 		sc->flags |= TPS_UPTODATE;
621 	}
622 
623 	/*
624 	 * Create a child device for each MAC.  The ethernet attachment
625 	 * will be done in these children.
626 	 */
627 	for (i = 0; i < (sc)->params.nports; i++) {
628 		struct port_info *pi;
629 
630 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
631 			device_printf(dev, "failed to add child port\n");
632 			error = EINVAL;
633 			goto out;
634 		}
635 		pi = &sc->port[i];
636 		pi->adapter = sc;
637 		pi->nqsets = port_qsets;
638 		pi->first_qset = i*port_qsets;
639 		pi->port_id = i;
640 		pi->tx_chan = i >= ai->nports0;
641 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
642 		sc->rxpkt_map[pi->txpkt_intf] = i;
643 		sc->port[i].tx_chan = i >= ai->nports0;
644 		sc->portdev[i] = child;
645 		device_set_softc(child, pi);
646 	}
647 	if ((error = bus_generic_attach(dev)) != 0)
648 		goto out;
649 
650 	/* initialize sge private state */
651 	t3_sge_init_adapter(sc);
652 
653 	t3_led_ready(sc);
654 
655 	error = t3_get_fw_version(sc, &vers);
656 	if (error)
657 		goto out;
658 
659 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661 	    G_FW_VERSION_MICRO(vers));
662 
663 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664 		 ai->desc, is_offload(sc) ? "R" : "",
665 		 sc->params.vpd.ec, sc->params.vpd.sn);
666 	device_set_desc_copy(dev, buf);
667 
668 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671 
672 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674 	t3_add_attach_sysctls(sc);
675 
676 #ifdef TCP_OFFLOAD
677 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
678 		sc->cpl_handler[i] = cpl_not_handled;
679 #endif
680 
681 	t3_intr_clear(sc);
682 	error = cxgb_setup_interrupts(sc);
683 out:
684 	if (error)
685 		cxgb_free(sc);
686 
687 	return (error);
688 }
689 
690 /*
691  * The cxgb_controller_detach routine is called with the device is
692  * unloaded from the system.
693  */
694 
695 static int
696 cxgb_controller_detach(device_t dev)
697 {
698 	struct adapter *sc;
699 
700 	sc = device_get_softc(dev);
701 
702 	cxgb_free(sc);
703 
704 	return (0);
705 }
706 
707 /*
708  * The cxgb_free() is called by the cxgb_controller_detach() routine
709  * to tear down the structures that were built up in
710  * cxgb_controller_attach(), and should be the final piece of work
711  * done when fully unloading the driver.
712  *
713  *
714  *  1. Shutting down the threads started by the cxgb_controller_attach()
715  *     routine.
716  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
717  *  3. Detaching all of the port devices created during the
718  *     cxgb_controller_attach() routine.
719  *  4. Removing the device children created via cxgb_controller_attach().
720  *  5. Releasing PCI resources associated with the device.
721  *  6. Turning off the offload support, iff it was turned on.
722  *  7. Destroying the mutexes created in cxgb_controller_attach().
723  *
724  */
725 static void
726 cxgb_free(struct adapter *sc)
727 {
728 	int i, nqsets = 0;
729 
730 	ADAPTER_LOCK(sc);
731 	sc->flags |= CXGB_SHUTDOWN;
732 	ADAPTER_UNLOCK(sc);
733 
734 	/*
735 	 * Make sure all child devices are gone.
736 	 */
737 	bus_generic_detach(sc->dev);
738 	for (i = 0; i < (sc)->params.nports; i++) {
739 		if (sc->portdev[i] &&
740 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
741 			device_printf(sc->dev, "failed to delete child port\n");
742 		nqsets += sc->port[i].nqsets;
743 	}
744 
745 	/*
746 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
747 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
748 	 * all open devices have been closed.
749 	 */
750 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
751 					   __func__, sc->open_device_map));
752 	for (i = 0; i < sc->params.nports; i++) {
753 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
754 						  __func__, i));
755 	}
756 
757 	/*
758 	 * Finish off the adapter's callouts.
759 	 */
760 	callout_drain(&sc->cxgb_tick_ch);
761 	callout_drain(&sc->sge_timer_ch);
762 
763 	/*
764 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
765 	 * sysctls are cleaned up by the kernel linker.
766 	 */
767 	if (sc->flags & FULL_INIT_DONE) {
768  		t3_free_sge_resources(sc, nqsets);
769  		sc->flags &= ~FULL_INIT_DONE;
770  	}
771 
772 	/*
773 	 * Release all interrupt resources.
774 	 */
775 	cxgb_teardown_interrupts(sc);
776 	if (sc->flags & (USING_MSI | USING_MSIX)) {
777 		device_printf(sc->dev, "releasing msi message(s)\n");
778 		pci_release_msi(sc->dev);
779 	} else {
780 		device_printf(sc->dev, "no msi message to release\n");
781 	}
782 
783 	if (sc->msix_regs_res != NULL) {
784 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
785 		    sc->msix_regs_res);
786 	}
787 
788 	/*
789 	 * Free the adapter's taskqueue.
790 	 */
791 	if (sc->tq != NULL) {
792 		taskqueue_free(sc->tq);
793 		sc->tq = NULL;
794 	}
795 
796 	free(sc->filters, M_DEVBUF);
797 	t3_sge_free(sc);
798 
799 	if (sc->udbs_res != NULL)
800 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
801 		    sc->udbs_res);
802 
803 	if (sc->regs_res != NULL)
804 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
805 		    sc->regs_res);
806 
807 	MTX_DESTROY(&sc->mdio_lock);
808 	MTX_DESTROY(&sc->sge.reg_lock);
809 	MTX_DESTROY(&sc->elmer_lock);
810 	mtx_lock(&t3_list_lock);
811 	SLIST_REMOVE(&t3_list, sc, adapter, link);
812 	mtx_unlock(&t3_list_lock);
813 	ADAPTER_LOCK_DEINIT(sc);
814 }
815 
816 /**
817  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
818  *	@sc: the controller softc
819  *
820  *	Determines how many sets of SGE queues to use and initializes them.
821  *	We support multiple queue sets per port if we have MSI-X, otherwise
822  *	just one queue set per port.
823  */
824 static int
825 setup_sge_qsets(adapter_t *sc)
826 {
827 	int i, j, err, irq_idx = 0, qset_idx = 0;
828 	u_int ntxq = SGE_TXQ_PER_SET;
829 
830 	if ((err = t3_sge_alloc(sc)) != 0) {
831 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
832 		return (err);
833 	}
834 
835 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
836 		irq_idx = -1;
837 
838 	for (i = 0; i < (sc)->params.nports; i++) {
839 		struct port_info *pi = &sc->port[i];
840 
841 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
842 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
843 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
844 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
845 			if (err) {
846 				t3_free_sge_resources(sc, qset_idx);
847 				device_printf(sc->dev,
848 				    "t3_sge_alloc_qset failed with %d\n", err);
849 				return (err);
850 			}
851 		}
852 	}
853 
854 	return (0);
855 }
856 
857 static void
858 cxgb_teardown_interrupts(adapter_t *sc)
859 {
860 	int i;
861 
862 	for (i = 0; i < SGE_QSETS; i++) {
863 		if (sc->msix_intr_tag[i] == NULL) {
864 
865 			/* Should have been setup fully or not at all */
866 			KASSERT(sc->msix_irq_res[i] == NULL &&
867 				sc->msix_irq_rid[i] == 0,
868 				("%s: half-done interrupt (%d).", __func__, i));
869 
870 			continue;
871 		}
872 
873 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
874 				  sc->msix_intr_tag[i]);
875 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
876 				     sc->msix_irq_res[i]);
877 
878 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
879 		sc->msix_irq_rid[i] = 0;
880 	}
881 
882 	if (sc->intr_tag) {
883 		KASSERT(sc->irq_res != NULL,
884 			("%s: half-done interrupt.", __func__));
885 
886 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
887 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
888 				     sc->irq_res);
889 
890 		sc->irq_res = sc->intr_tag = NULL;
891 		sc->irq_rid = 0;
892 	}
893 }
894 
895 static int
896 cxgb_setup_interrupts(adapter_t *sc)
897 {
898 	struct resource *res;
899 	void *tag;
900 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
901 
902 	sc->irq_rid = intr_flag ? 1 : 0;
903 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
904 					     RF_SHAREABLE | RF_ACTIVE);
905 	if (sc->irq_res == NULL) {
906 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
907 			      intr_flag, sc->irq_rid);
908 		err = EINVAL;
909 		sc->irq_rid = 0;
910 	} else {
911 		err = bus_setup_intr(sc->dev, sc->irq_res,
912 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
913 		    sc->cxgb_intr, sc, &sc->intr_tag);
914 
915 		if (err) {
916 			device_printf(sc->dev,
917 				      "Cannot set up interrupt (%x, %u, %d)\n",
918 				      intr_flag, sc->irq_rid, err);
919 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
920 					     sc->irq_res);
921 			sc->irq_res = sc->intr_tag = NULL;
922 			sc->irq_rid = 0;
923 		}
924 	}
925 
926 	/* That's all for INTx or MSI */
927 	if (!(intr_flag & USING_MSIX) || err)
928 		return (err);
929 
930 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
931 	for (i = 0; i < sc->msi_count - 1; i++) {
932 		rid = i + 2;
933 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
934 					     RF_SHAREABLE | RF_ACTIVE);
935 		if (res == NULL) {
936 			device_printf(sc->dev, "Cannot allocate interrupt "
937 				      "for message %d\n", rid);
938 			err = EINVAL;
939 			break;
940 		}
941 
942 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
943 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
944 		if (err) {
945 			device_printf(sc->dev, "Cannot set up interrupt "
946 				      "for message %d (%d)\n", rid, err);
947 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
948 			break;
949 		}
950 
951 		sc->msix_irq_rid[i] = rid;
952 		sc->msix_irq_res[i] = res;
953 		sc->msix_intr_tag[i] = tag;
954 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
955 	}
956 
957 	if (err)
958 		cxgb_teardown_interrupts(sc);
959 
960 	return (err);
961 }
962 
963 
964 static int
965 cxgb_port_probe(device_t dev)
966 {
967 	struct port_info *p;
968 	char buf[80];
969 	const char *desc;
970 
971 	p = device_get_softc(dev);
972 	desc = p->phy.desc;
973 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
974 	device_set_desc_copy(dev, buf);
975 	return (0);
976 }
977 
978 
979 static int
980 cxgb_makedev(struct port_info *pi)
981 {
982 
983 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
984 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
985 
986 	if (pi->port_cdev == NULL)
987 		return (ENOMEM);
988 
989 	pi->port_cdev->si_drv1 = (void *)pi;
990 
991 	return (0);
992 }
993 
994 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
995     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
996     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
997 #define CXGB_CAP_ENABLE CXGB_CAP
998 
999 static int
1000 cxgb_port_attach(device_t dev)
1001 {
1002 	struct port_info *p;
1003 	struct ifnet *ifp;
1004 	int err;
1005 	struct adapter *sc;
1006 
1007 	p = device_get_softc(dev);
1008 	sc = p->adapter;
1009 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1010 	    device_get_unit(device_get_parent(dev)), p->port_id);
1011 	PORT_LOCK_INIT(p, p->lockbuf);
1012 
1013 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1014 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1015 
1016 	/* Allocate an ifnet object and set it up */
1017 	ifp = p->ifp = if_alloc(IFT_ETHER);
1018 	if (ifp == NULL) {
1019 		device_printf(dev, "Cannot allocate ifnet\n");
1020 		return (ENOMEM);
1021 	}
1022 
1023 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1024 	ifp->if_init = cxgb_init;
1025 	ifp->if_softc = p;
1026 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1027 	ifp->if_ioctl = cxgb_ioctl;
1028 	ifp->if_transmit = cxgb_transmit;
1029 	ifp->if_qflush = cxgb_qflush;
1030 
1031 	ifp->if_capabilities = CXGB_CAP;
1032 #ifdef TCP_OFFLOAD
1033 	if (is_offload(sc))
1034 		ifp->if_capabilities |= IFCAP_TOE4;
1035 #endif
1036 	ifp->if_capenable = CXGB_CAP_ENABLE;
1037 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1038 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1039 
1040 	/*
1041 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1042 	 */
1043 	if (sc->params.nports > 2) {
1044 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1045 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1046 		ifp->if_hwassist &= ~CSUM_TSO;
1047 	}
1048 
1049 	ether_ifattach(ifp, p->hw_addr);
1050 
1051 #ifdef DEFAULT_JUMBO
1052 	if (sc->params.nports <= 2)
1053 		ifp->if_mtu = ETHERMTU_JUMBO;
1054 #endif
1055 	if ((err = cxgb_makedev(p)) != 0) {
1056 		printf("makedev failed %d\n", err);
1057 		return (err);
1058 	}
1059 
1060 	/* Create a list of media supported by this port */
1061 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1062 	    cxgb_media_status);
1063 	cxgb_build_medialist(p);
1064 
1065 	t3_sge_init_port(p);
1066 
1067 	return (err);
1068 }
1069 
1070 /*
1071  * cxgb_port_detach() is called via the device_detach methods when
1072  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1073  * removing the device from the view of the kernel, i.e. from all
1074  * interfaces lists etc.  This routine is only called when the driver is
1075  * being unloaded, not when the link goes down.
1076  */
1077 static int
1078 cxgb_port_detach(device_t dev)
1079 {
1080 	struct port_info *p;
1081 	struct adapter *sc;
1082 	int i;
1083 
1084 	p = device_get_softc(dev);
1085 	sc = p->adapter;
1086 
1087 	/* Tell cxgb_ioctl and if_init that the port is going away */
1088 	ADAPTER_LOCK(sc);
1089 	SET_DOOMED(p);
1090 	wakeup(&sc->flags);
1091 	while (IS_BUSY(sc))
1092 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1093 	SET_BUSY(sc);
1094 	ADAPTER_UNLOCK(sc);
1095 
1096 	if (p->port_cdev != NULL)
1097 		destroy_dev(p->port_cdev);
1098 
1099 	cxgb_uninit_synchronized(p);
1100 	ether_ifdetach(p->ifp);
1101 
1102 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1103 		struct sge_qset *qs = &sc->sge.qs[i];
1104 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1105 
1106 		callout_drain(&txq->txq_watchdog);
1107 		callout_drain(&txq->txq_timer);
1108 	}
1109 
1110 	PORT_LOCK_DEINIT(p);
1111 	if_free(p->ifp);
1112 	p->ifp = NULL;
1113 
1114 	ADAPTER_LOCK(sc);
1115 	CLR_BUSY(sc);
1116 	wakeup_one(&sc->flags);
1117 	ADAPTER_UNLOCK(sc);
1118 	return (0);
1119 }
1120 
1121 void
1122 t3_fatal_err(struct adapter *sc)
1123 {
1124 	u_int fw_status[4];
1125 
1126 	if (sc->flags & FULL_INIT_DONE) {
1127 		t3_sge_stop(sc);
1128 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1129 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1130 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1131 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1132 		t3_intr_disable(sc);
1133 	}
1134 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1135 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1136 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1137 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1138 }
1139 
1140 int
1141 t3_os_find_pci_capability(adapter_t *sc, int cap)
1142 {
1143 	device_t dev;
1144 	struct pci_devinfo *dinfo;
1145 	pcicfgregs *cfg;
1146 	uint32_t status;
1147 	uint8_t ptr;
1148 
1149 	dev = sc->dev;
1150 	dinfo = device_get_ivars(dev);
1151 	cfg = &dinfo->cfg;
1152 
1153 	status = pci_read_config(dev, PCIR_STATUS, 2);
1154 	if (!(status & PCIM_STATUS_CAPPRESENT))
1155 		return (0);
1156 
1157 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1158 	case 0:
1159 	case 1:
1160 		ptr = PCIR_CAP_PTR;
1161 		break;
1162 	case 2:
1163 		ptr = PCIR_CAP_PTR_2;
1164 		break;
1165 	default:
1166 		return (0);
1167 		break;
1168 	}
1169 	ptr = pci_read_config(dev, ptr, 1);
1170 
1171 	while (ptr != 0) {
1172 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1173 			return (ptr);
1174 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1175 	}
1176 
1177 	return (0);
1178 }
1179 
1180 int
1181 t3_os_pci_save_state(struct adapter *sc)
1182 {
1183 	device_t dev;
1184 	struct pci_devinfo *dinfo;
1185 
1186 	dev = sc->dev;
1187 	dinfo = device_get_ivars(dev);
1188 
1189 	pci_cfg_save(dev, dinfo, 0);
1190 	return (0);
1191 }
1192 
1193 int
1194 t3_os_pci_restore_state(struct adapter *sc)
1195 {
1196 	device_t dev;
1197 	struct pci_devinfo *dinfo;
1198 
1199 	dev = sc->dev;
1200 	dinfo = device_get_ivars(dev);
1201 
1202 	pci_cfg_restore(dev, dinfo);
1203 	return (0);
1204 }
1205 
1206 /**
1207  *	t3_os_link_changed - handle link status changes
1208  *	@sc: the adapter associated with the link change
1209  *	@port_id: the port index whose link status has changed
1210  *	@link_status: the new status of the link
1211  *	@speed: the new speed setting
1212  *	@duplex: the new duplex setting
1213  *	@fc: the new flow-control setting
1214  *
1215  *	This is the OS-dependent handler for link status changes.  The OS
1216  *	neutral handler takes care of most of the processing for these events,
1217  *	then calls this handler for any OS-specific processing.
1218  */
1219 void
1220 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1221      int duplex, int fc, int mac_was_reset)
1222 {
1223 	struct port_info *pi = &adapter->port[port_id];
1224 	struct ifnet *ifp = pi->ifp;
1225 
1226 	/* no race with detach, so ifp should always be good */
1227 	KASSERT(ifp, ("%s: if detached.", __func__));
1228 
1229 	/* Reapply mac settings if they were lost due to a reset */
1230 	if (mac_was_reset) {
1231 		PORT_LOCK(pi);
1232 		cxgb_update_mac_settings(pi);
1233 		PORT_UNLOCK(pi);
1234 	}
1235 
1236 	if (link_status) {
1237 		ifp->if_baudrate = IF_Mbps(speed);
1238 		if_link_state_change(ifp, LINK_STATE_UP);
1239 	} else
1240 		if_link_state_change(ifp, LINK_STATE_DOWN);
1241 }
1242 
1243 /**
1244  *	t3_os_phymod_changed - handle PHY module changes
1245  *	@phy: the PHY reporting the module change
1246  *	@mod_type: new module type
1247  *
1248  *	This is the OS-dependent handler for PHY module changes.  It is
1249  *	invoked when a PHY module is removed or inserted for any OS-specific
1250  *	processing.
1251  */
1252 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1253 {
1254 	static const char *mod_str[] = {
1255 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1256 	};
1257 	struct port_info *pi = &adap->port[port_id];
1258 	int mod = pi->phy.modtype;
1259 
1260 	if (mod != pi->media.ifm_cur->ifm_data)
1261 		cxgb_build_medialist(pi);
1262 
1263 	if (mod == phy_modtype_none)
1264 		if_printf(pi->ifp, "PHY module unplugged\n");
1265 	else {
1266 		KASSERT(mod < ARRAY_SIZE(mod_str),
1267 			("invalid PHY module type %d", mod));
1268 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1269 	}
1270 }
1271 
1272 void
1273 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1274 {
1275 
1276 	/*
1277 	 * The ifnet might not be allocated before this gets called,
1278 	 * as this is called early on in attach by t3_prep_adapter
1279 	 * save the address off in the port structure
1280 	 */
1281 	if (cxgb_debug)
1282 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1283 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1284 }
1285 
1286 /*
1287  * Programs the XGMAC based on the settings in the ifnet.  These settings
1288  * include MTU, MAC address, mcast addresses, etc.
1289  */
1290 static void
1291 cxgb_update_mac_settings(struct port_info *p)
1292 {
1293 	struct ifnet *ifp = p->ifp;
1294 	struct t3_rx_mode rm;
1295 	struct cmac *mac = &p->mac;
1296 	int mtu, hwtagging;
1297 
1298 	PORT_LOCK_ASSERT_OWNED(p);
1299 
1300 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1301 
1302 	mtu = ifp->if_mtu;
1303 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1304 		mtu += ETHER_VLAN_ENCAP_LEN;
1305 
1306 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1307 
1308 	t3_mac_set_mtu(mac, mtu);
1309 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1310 	t3_mac_set_address(mac, 0, p->hw_addr);
1311 	t3_init_rx_mode(&rm, p);
1312 	t3_mac_set_rx_mode(mac, &rm);
1313 }
1314 
1315 
1316 static int
1317 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1318 			      unsigned long n)
1319 {
1320 	int attempts = 5;
1321 
1322 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1323 		if (!--attempts)
1324 			return (ETIMEDOUT);
1325 		t3_os_sleep(10);
1326 	}
1327 	return 0;
1328 }
1329 
1330 static int
1331 init_tp_parity(struct adapter *adap)
1332 {
1333 	int i;
1334 	struct mbuf *m;
1335 	struct cpl_set_tcb_field *greq;
1336 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1337 
1338 	t3_tp_set_offload_mode(adap, 1);
1339 
1340 	for (i = 0; i < 16; i++) {
1341 		struct cpl_smt_write_req *req;
1342 
1343 		m = m_gethdr(M_WAITOK, MT_DATA);
1344 		req = mtod(m, struct cpl_smt_write_req *);
1345 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1346 		memset(req, 0, sizeof(*req));
1347 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1348 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1349 		req->iff = i;
1350 		t3_mgmt_tx(adap, m);
1351 	}
1352 
1353 	for (i = 0; i < 2048; i++) {
1354 		struct cpl_l2t_write_req *req;
1355 
1356 		m = m_gethdr(M_WAITOK, MT_DATA);
1357 		req = mtod(m, struct cpl_l2t_write_req *);
1358 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1359 		memset(req, 0, sizeof(*req));
1360 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1361 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1362 		req->params = htonl(V_L2T_W_IDX(i));
1363 		t3_mgmt_tx(adap, m);
1364 	}
1365 
1366 	for (i = 0; i < 2048; i++) {
1367 		struct cpl_rte_write_req *req;
1368 
1369 		m = m_gethdr(M_WAITOK, MT_DATA);
1370 		req = mtod(m, struct cpl_rte_write_req *);
1371 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1372 		memset(req, 0, sizeof(*req));
1373 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1374 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1375 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1376 		t3_mgmt_tx(adap, m);
1377 	}
1378 
1379 	m = m_gethdr(M_WAITOK, MT_DATA);
1380 	greq = mtod(m, struct cpl_set_tcb_field *);
1381 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1382 	memset(greq, 0, sizeof(*greq));
1383 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1384 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1385 	greq->mask = htobe64(1);
1386 	t3_mgmt_tx(adap, m);
1387 
1388 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1389 	t3_tp_set_offload_mode(adap, 0);
1390 	return (i);
1391 }
1392 
1393 /**
1394  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1395  *	@adap: the adapter
1396  *
1397  *	Sets up RSS to distribute packets to multiple receive queues.  We
1398  *	configure the RSS CPU lookup table to distribute to the number of HW
1399  *	receive queues, and the response queue lookup table to narrow that
1400  *	down to the response queues actually configured for each port.
1401  *	We always configure the RSS mapping for two ports since the mapping
1402  *	table has plenty of entries.
1403  */
1404 static void
1405 setup_rss(adapter_t *adap)
1406 {
1407 	int i;
1408 	u_int nq[2];
1409 	uint8_t cpus[SGE_QSETS + 1];
1410 	uint16_t rspq_map[RSS_TABLE_SIZE];
1411 
1412 	for (i = 0; i < SGE_QSETS; ++i)
1413 		cpus[i] = i;
1414 	cpus[SGE_QSETS] = 0xff;
1415 
1416 	nq[0] = nq[1] = 0;
1417 	for_each_port(adap, i) {
1418 		const struct port_info *pi = adap2pinfo(adap, i);
1419 
1420 		nq[pi->tx_chan] += pi->nqsets;
1421 	}
1422 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1423 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1424 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1425 	}
1426 
1427 	/* Calculate the reverse RSS map table */
1428 	for (i = 0; i < SGE_QSETS; ++i)
1429 		adap->rrss_map[i] = 0xff;
1430 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1431 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1432 			adap->rrss_map[rspq_map[i]] = i;
1433 
1434 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1435 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1436 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1437 	              cpus, rspq_map);
1438 
1439 }
1440 static void
1441 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1442 			      int hi, int port)
1443 {
1444 	struct mbuf *m;
1445 	struct mngt_pktsched_wr *req;
1446 
1447 	m = m_gethdr(M_NOWAIT, MT_DATA);
1448 	if (m) {
1449 		req = mtod(m, struct mngt_pktsched_wr *);
1450 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1451 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1452 		req->sched = sched;
1453 		req->idx = qidx;
1454 		req->min = lo;
1455 		req->max = hi;
1456 		req->binding = port;
1457 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1458 		t3_mgmt_tx(adap, m);
1459 	}
1460 }
1461 
1462 static void
1463 bind_qsets(adapter_t *sc)
1464 {
1465 	int i, j;
1466 
1467 	for (i = 0; i < (sc)->params.nports; ++i) {
1468 		const struct port_info *pi = adap2pinfo(sc, i);
1469 
1470 		for (j = 0; j < pi->nqsets; ++j) {
1471 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1472 					  -1, pi->tx_chan);
1473 
1474 		}
1475 	}
1476 }
1477 
1478 static void
1479 update_tpeeprom(struct adapter *adap)
1480 {
1481 	const struct firmware *tpeeprom;
1482 
1483 	uint32_t version;
1484 	unsigned int major, minor;
1485 	int ret, len;
1486 	char rev, name[32];
1487 
1488 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1489 
1490 	major = G_TP_VERSION_MAJOR(version);
1491 	minor = G_TP_VERSION_MINOR(version);
1492 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1493 		return;
1494 
1495 	rev = t3rev2char(adap);
1496 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1497 
1498 	tpeeprom = firmware_get(name);
1499 	if (tpeeprom == NULL) {
1500 		device_printf(adap->dev,
1501 			      "could not load TP EEPROM: unable to load %s\n",
1502 			      name);
1503 		return;
1504 	}
1505 
1506 	len = tpeeprom->datasize - 4;
1507 
1508 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1509 	if (ret)
1510 		goto release_tpeeprom;
1511 
1512 	if (len != TP_SRAM_LEN) {
1513 		device_printf(adap->dev,
1514 			      "%s length is wrong len=%d expected=%d\n", name,
1515 			      len, TP_SRAM_LEN);
1516 		return;
1517 	}
1518 
1519 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1520 	    TP_SRAM_OFFSET);
1521 
1522 	if (!ret) {
1523 		device_printf(adap->dev,
1524 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1525 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1526 	} else
1527 		device_printf(adap->dev,
1528 			      "Protocol SRAM image update in EEPROM failed\n");
1529 
1530 release_tpeeprom:
1531 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1532 
1533 	return;
1534 }
1535 
1536 static int
1537 update_tpsram(struct adapter *adap)
1538 {
1539 	const struct firmware *tpsram;
1540 	int ret;
1541 	char rev, name[32];
1542 
1543 	rev = t3rev2char(adap);
1544 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1545 
1546 	update_tpeeprom(adap);
1547 
1548 	tpsram = firmware_get(name);
1549 	if (tpsram == NULL){
1550 		device_printf(adap->dev, "could not load TP SRAM\n");
1551 		return (EINVAL);
1552 	} else
1553 		device_printf(adap->dev, "updating TP SRAM\n");
1554 
1555 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1556 	if (ret)
1557 		goto release_tpsram;
1558 
1559 	ret = t3_set_proto_sram(adap, tpsram->data);
1560 	if (ret)
1561 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1562 
1563 release_tpsram:
1564 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1565 
1566 	return ret;
1567 }
1568 
1569 /**
1570  *	cxgb_up - enable the adapter
1571  *	@adap: adapter being enabled
1572  *
1573  *	Called when the first port is enabled, this function performs the
1574  *	actions necessary to make an adapter operational, such as completing
1575  *	the initialization of HW modules, and enabling interrupts.
1576  */
1577 static int
1578 cxgb_up(struct adapter *sc)
1579 {
1580 	int err = 0;
1581 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1582 
1583 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1584 					   __func__, sc->open_device_map));
1585 
1586 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1587 
1588 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1589 
1590 		if ((sc->flags & FW_UPTODATE) == 0)
1591 			if ((err = upgrade_fw(sc)))
1592 				goto out;
1593 
1594 		if ((sc->flags & TPS_UPTODATE) == 0)
1595 			if ((err = update_tpsram(sc)))
1596 				goto out;
1597 
1598 		if (is_offload(sc) && nfilters != 0) {
1599 			sc->params.mc5.nservers = 0;
1600 
1601 			if (nfilters < 0)
1602 				sc->params.mc5.nfilters = mxf;
1603 			else
1604 				sc->params.mc5.nfilters = min(nfilters, mxf);
1605 		}
1606 
1607 		err = t3_init_hw(sc, 0);
1608 		if (err)
1609 			goto out;
1610 
1611 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1612 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1613 
1614 		err = setup_sge_qsets(sc);
1615 		if (err)
1616 			goto out;
1617 
1618 		alloc_filters(sc);
1619 		setup_rss(sc);
1620 
1621 		t3_add_configured_sysctls(sc);
1622 		sc->flags |= FULL_INIT_DONE;
1623 	}
1624 
1625 	t3_intr_clear(sc);
1626 	t3_sge_start(sc);
1627 	t3_intr_enable(sc);
1628 
1629 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1630 	    is_offload(sc) && init_tp_parity(sc) == 0)
1631 		sc->flags |= TP_PARITY_INIT;
1632 
1633 	if (sc->flags & TP_PARITY_INIT) {
1634 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1635 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1636 	}
1637 
1638 	if (!(sc->flags & QUEUES_BOUND)) {
1639 		bind_qsets(sc);
1640 		setup_hw_filters(sc);
1641 		sc->flags |= QUEUES_BOUND;
1642 	}
1643 
1644 	t3_sge_reset_adapter(sc);
1645 out:
1646 	return (err);
1647 }
1648 
1649 /*
1650  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1651  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1652  * during controller_detach, not here.
1653  */
1654 static void
1655 cxgb_down(struct adapter *sc)
1656 {
1657 	t3_sge_stop(sc);
1658 	t3_intr_disable(sc);
1659 }
1660 
1661 /*
1662  * if_init for cxgb ports.
1663  */
1664 static void
1665 cxgb_init(void *arg)
1666 {
1667 	struct port_info *p = arg;
1668 	struct adapter *sc = p->adapter;
1669 
1670 	ADAPTER_LOCK(sc);
1671 	cxgb_init_locked(p); /* releases adapter lock */
1672 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1673 }
1674 
1675 static int
1676 cxgb_init_locked(struct port_info *p)
1677 {
1678 	struct adapter *sc = p->adapter;
1679 	struct ifnet *ifp = p->ifp;
1680 	struct cmac *mac = &p->mac;
1681 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1682 
1683 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1684 
1685 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1686 		gave_up_lock = 1;
1687 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1688 			rc = EINTR;
1689 			goto done;
1690 		}
1691 	}
1692 	if (IS_DOOMED(p)) {
1693 		rc = ENXIO;
1694 		goto done;
1695 	}
1696 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1697 
1698 	/*
1699 	 * The code that runs during one-time adapter initialization can sleep
1700 	 * so it's important not to hold any locks across it.
1701 	 */
1702 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1703 
1704 	if (may_sleep) {
1705 		SET_BUSY(sc);
1706 		gave_up_lock = 1;
1707 		ADAPTER_UNLOCK(sc);
1708 	}
1709 
1710 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1711 			goto done;
1712 
1713 	PORT_LOCK(p);
1714 	if (isset(&sc->open_device_map, p->port_id) &&
1715 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1716 		PORT_UNLOCK(p);
1717 		goto done;
1718 	}
1719 	t3_port_intr_enable(sc, p->port_id);
1720 	if (!mac->multiport)
1721 		t3_mac_init(mac);
1722 	cxgb_update_mac_settings(p);
1723 	t3_link_start(&p->phy, mac, &p->link_config);
1724 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1725 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1726 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1727 	PORT_UNLOCK(p);
1728 
1729 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1730 		struct sge_qset *qs = &sc->sge.qs[i];
1731 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1732 
1733 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1734 				 txq->txq_watchdog.c_cpu);
1735 	}
1736 
1737 	/* all ok */
1738 	setbit(&sc->open_device_map, p->port_id);
1739 	callout_reset(&p->link_check_ch,
1740 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1741 	    link_check_callout, p);
1742 
1743 done:
1744 	if (may_sleep) {
1745 		ADAPTER_LOCK(sc);
1746 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1747 		CLR_BUSY(sc);
1748 	}
1749 	if (gave_up_lock)
1750 		wakeup_one(&sc->flags);
1751 	ADAPTER_UNLOCK(sc);
1752 	return (rc);
1753 }
1754 
1755 static int
1756 cxgb_uninit_locked(struct port_info *p)
1757 {
1758 	struct adapter *sc = p->adapter;
1759 	int rc;
1760 
1761 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1762 
1763 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1764 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1765 			rc = EINTR;
1766 			goto done;
1767 		}
1768 	}
1769 	if (IS_DOOMED(p)) {
1770 		rc = ENXIO;
1771 		goto done;
1772 	}
1773 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1774 	SET_BUSY(sc);
1775 	ADAPTER_UNLOCK(sc);
1776 
1777 	rc = cxgb_uninit_synchronized(p);
1778 
1779 	ADAPTER_LOCK(sc);
1780 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1781 	CLR_BUSY(sc);
1782 	wakeup_one(&sc->flags);
1783 done:
1784 	ADAPTER_UNLOCK(sc);
1785 	return (rc);
1786 }
1787 
1788 /*
1789  * Called on "ifconfig down", and from port_detach
1790  */
1791 static int
1792 cxgb_uninit_synchronized(struct port_info *pi)
1793 {
1794 	struct adapter *sc = pi->adapter;
1795 	struct ifnet *ifp = pi->ifp;
1796 
1797 	/*
1798 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1799 	 */
1800 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1801 
1802 	/*
1803 	 * Clear this port's bit from the open device map, and then drain all
1804 	 * the tasks that can access/manipulate this port's port_info or ifp.
1805 	 * We disable this port's interrupts here and so the slow/ext
1806 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1807 	 * be enqueued every second but the runs after this drain will not see
1808 	 * this port in the open device map.
1809 	 *
1810 	 * A well behaved task must take open_device_map into account and ignore
1811 	 * ports that are not open.
1812 	 */
1813 	clrbit(&sc->open_device_map, pi->port_id);
1814 	t3_port_intr_disable(sc, pi->port_id);
1815 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1816 	taskqueue_drain(sc->tq, &sc->tick_task);
1817 
1818 	callout_drain(&pi->link_check_ch);
1819 	taskqueue_drain(sc->tq, &pi->link_check_task);
1820 
1821 	PORT_LOCK(pi);
1822 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1823 
1824 	/* disable pause frames */
1825 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1826 
1827 	/* Reset RX FIFO HWM */
1828 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1829 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1830 
1831 	DELAY(100 * 1000);
1832 
1833 	/* Wait for TXFIFO empty */
1834 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1835 			F_TXFIFO_EMPTY, 1, 20, 5);
1836 
1837 	DELAY(100 * 1000);
1838 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1839 
1840 	pi->phy.ops->power_down(&pi->phy, 1);
1841 
1842 	PORT_UNLOCK(pi);
1843 
1844 	pi->link_config.link_ok = 0;
1845 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1846 
1847 	if (sc->open_device_map == 0)
1848 		cxgb_down(pi->adapter);
1849 
1850 	return (0);
1851 }
1852 
1853 /*
1854  * Mark lro enabled or disabled in all qsets for this port
1855  */
1856 static int
1857 cxgb_set_lro(struct port_info *p, int enabled)
1858 {
1859 	int i;
1860 	struct adapter *adp = p->adapter;
1861 	struct sge_qset *q;
1862 
1863 	for (i = 0; i < p->nqsets; i++) {
1864 		q = &adp->sge.qs[p->first_qset + i];
1865 		q->lro.enabled = (enabled != 0);
1866 	}
1867 	return (0);
1868 }
1869 
1870 static int
1871 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1872 {
1873 	struct port_info *p = ifp->if_softc;
1874 	struct adapter *sc = p->adapter;
1875 	struct ifreq *ifr = (struct ifreq *)data;
1876 	int flags, error = 0, mtu;
1877 	uint32_t mask;
1878 
1879 	switch (command) {
1880 	case SIOCSIFMTU:
1881 		ADAPTER_LOCK(sc);
1882 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1883 		if (error) {
1884 fail:
1885 			ADAPTER_UNLOCK(sc);
1886 			return (error);
1887 		}
1888 
1889 		mtu = ifr->ifr_mtu;
1890 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1891 			error = EINVAL;
1892 		} else {
1893 			ifp->if_mtu = mtu;
1894 			PORT_LOCK(p);
1895 			cxgb_update_mac_settings(p);
1896 			PORT_UNLOCK(p);
1897 		}
1898 		ADAPTER_UNLOCK(sc);
1899 		break;
1900 	case SIOCSIFFLAGS:
1901 		ADAPTER_LOCK(sc);
1902 		if (IS_DOOMED(p)) {
1903 			error = ENXIO;
1904 			goto fail;
1905 		}
1906 		if (ifp->if_flags & IFF_UP) {
1907 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1908 				flags = p->if_flags;
1909 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1910 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1911 					if (IS_BUSY(sc)) {
1912 						error = EBUSY;
1913 						goto fail;
1914 					}
1915 					PORT_LOCK(p);
1916 					cxgb_update_mac_settings(p);
1917 					PORT_UNLOCK(p);
1918 				}
1919 				ADAPTER_UNLOCK(sc);
1920 			} else
1921 				error = cxgb_init_locked(p);
1922 			p->if_flags = ifp->if_flags;
1923 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1924 			error = cxgb_uninit_locked(p);
1925 		else
1926 			ADAPTER_UNLOCK(sc);
1927 
1928 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1929 		break;
1930 	case SIOCADDMULTI:
1931 	case SIOCDELMULTI:
1932 		ADAPTER_LOCK(sc);
1933 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1934 		if (error)
1935 			goto fail;
1936 
1937 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1938 			PORT_LOCK(p);
1939 			cxgb_update_mac_settings(p);
1940 			PORT_UNLOCK(p);
1941 		}
1942 		ADAPTER_UNLOCK(sc);
1943 
1944 		break;
1945 	case SIOCSIFCAP:
1946 		ADAPTER_LOCK(sc);
1947 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1948 		if (error)
1949 			goto fail;
1950 
1951 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1952 		if (mask & IFCAP_TXCSUM) {
1953 			ifp->if_capenable ^= IFCAP_TXCSUM;
1954 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1955 
1956 			if (IFCAP_TSO4 & ifp->if_capenable &&
1957 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1958 				ifp->if_capenable &= ~IFCAP_TSO4;
1959 				if_printf(ifp,
1960 				    "tso4 disabled due to -txcsum.\n");
1961 			}
1962 		}
1963 		if (mask & IFCAP_TXCSUM_IPV6) {
1964 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1965 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1966 
1967 			if (IFCAP_TSO6 & ifp->if_capenable &&
1968 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1969 				ifp->if_capenable &= ~IFCAP_TSO6;
1970 				if_printf(ifp,
1971 				    "tso6 disabled due to -txcsum6.\n");
1972 			}
1973 		}
1974 		if (mask & IFCAP_RXCSUM)
1975 			ifp->if_capenable ^= IFCAP_RXCSUM;
1976 		if (mask & IFCAP_RXCSUM_IPV6)
1977 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1978 
1979 		/*
1980 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1981 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1982 		 * sending a TSO request our way, so it's sufficient to toggle
1983 		 * IFCAP_TSOx only.
1984 		 */
1985 		if (mask & IFCAP_TSO4) {
1986 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1987 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1988 				if_printf(ifp, "enable txcsum first.\n");
1989 				error = EAGAIN;
1990 				goto fail;
1991 			}
1992 			ifp->if_capenable ^= IFCAP_TSO4;
1993 		}
1994 		if (mask & IFCAP_TSO6) {
1995 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1996 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1997 				if_printf(ifp, "enable txcsum6 first.\n");
1998 				error = EAGAIN;
1999 				goto fail;
2000 			}
2001 			ifp->if_capenable ^= IFCAP_TSO6;
2002 		}
2003 		if (mask & IFCAP_LRO) {
2004 			ifp->if_capenable ^= IFCAP_LRO;
2005 
2006 			/* Safe to do this even if cxgb_up not called yet */
2007 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2008 		}
2009 #ifdef TCP_OFFLOAD
2010 		if (mask & IFCAP_TOE4) {
2011 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2012 
2013 			error = toe_capability(p, enable);
2014 			if (error == 0)
2015 				ifp->if_capenable ^= mask;
2016 		}
2017 #endif
2018 		if (mask & IFCAP_VLAN_HWTAGGING) {
2019 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2020 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2021 				PORT_LOCK(p);
2022 				cxgb_update_mac_settings(p);
2023 				PORT_UNLOCK(p);
2024 			}
2025 		}
2026 		if (mask & IFCAP_VLAN_MTU) {
2027 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2028 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2029 				PORT_LOCK(p);
2030 				cxgb_update_mac_settings(p);
2031 				PORT_UNLOCK(p);
2032 			}
2033 		}
2034 		if (mask & IFCAP_VLAN_HWTSO)
2035 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2036 		if (mask & IFCAP_VLAN_HWCSUM)
2037 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2038 
2039 #ifdef VLAN_CAPABILITIES
2040 		VLAN_CAPABILITIES(ifp);
2041 #endif
2042 		ADAPTER_UNLOCK(sc);
2043 		break;
2044 	case SIOCSIFMEDIA:
2045 	case SIOCGIFMEDIA:
2046 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2047 		break;
2048 	default:
2049 		error = ether_ioctl(ifp, command, data);
2050 	}
2051 
2052 	return (error);
2053 }
2054 
2055 static int
2056 cxgb_media_change(struct ifnet *ifp)
2057 {
2058 	return (EOPNOTSUPP);
2059 }
2060 
2061 /*
2062  * Translates phy->modtype to the correct Ethernet media subtype.
2063  */
2064 static int
2065 cxgb_ifm_type(int mod)
2066 {
2067 	switch (mod) {
2068 	case phy_modtype_sr:
2069 		return (IFM_10G_SR);
2070 	case phy_modtype_lr:
2071 		return (IFM_10G_LR);
2072 	case phy_modtype_lrm:
2073 		return (IFM_10G_LRM);
2074 	case phy_modtype_twinax:
2075 		return (IFM_10G_TWINAX);
2076 	case phy_modtype_twinax_long:
2077 		return (IFM_10G_TWINAX_LONG);
2078 	case phy_modtype_none:
2079 		return (IFM_NONE);
2080 	case phy_modtype_unknown:
2081 		return (IFM_UNKNOWN);
2082 	}
2083 
2084 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2085 	return (IFM_UNKNOWN);
2086 }
2087 
2088 /*
2089  * Rebuilds the ifmedia list for this port, and sets the current media.
2090  */
2091 static void
2092 cxgb_build_medialist(struct port_info *p)
2093 {
2094 	struct cphy *phy = &p->phy;
2095 	struct ifmedia *media = &p->media;
2096 	int mod = phy->modtype;
2097 	int m = IFM_ETHER | IFM_FDX;
2098 
2099 	PORT_LOCK(p);
2100 
2101 	ifmedia_removeall(media);
2102 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2103 		/* Copper (RJ45) */
2104 
2105 		if (phy->caps & SUPPORTED_10000baseT_Full)
2106 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2107 
2108 		if (phy->caps & SUPPORTED_1000baseT_Full)
2109 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2110 
2111 		if (phy->caps & SUPPORTED_100baseT_Full)
2112 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2113 
2114 		if (phy->caps & SUPPORTED_10baseT_Full)
2115 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2116 
2117 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2118 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2119 
2120 	} else if (phy->caps & SUPPORTED_TP) {
2121 		/* Copper (CX4) */
2122 
2123 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2124 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2125 
2126 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2127 		ifmedia_set(media, m | IFM_10G_CX4);
2128 
2129 	} else if (phy->caps & SUPPORTED_FIBRE &&
2130 		   phy->caps & SUPPORTED_10000baseT_Full) {
2131 		/* 10G optical (but includes SFP+ twinax) */
2132 
2133 		m |= cxgb_ifm_type(mod);
2134 		if (IFM_SUBTYPE(m) == IFM_NONE)
2135 			m &= ~IFM_FDX;
2136 
2137 		ifmedia_add(media, m, mod, NULL);
2138 		ifmedia_set(media, m);
2139 
2140 	} else if (phy->caps & SUPPORTED_FIBRE &&
2141 		   phy->caps & SUPPORTED_1000baseT_Full) {
2142 		/* 1G optical */
2143 
2144 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2145 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2146 		ifmedia_set(media, m | IFM_1000_SX);
2147 
2148 	} else {
2149 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2150 			    phy->caps));
2151 	}
2152 
2153 	PORT_UNLOCK(p);
2154 }
2155 
2156 static void
2157 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2158 {
2159 	struct port_info *p = ifp->if_softc;
2160 	struct ifmedia_entry *cur = p->media.ifm_cur;
2161 	int speed = p->link_config.speed;
2162 
2163 	if (cur->ifm_data != p->phy.modtype) {
2164 		cxgb_build_medialist(p);
2165 		cur = p->media.ifm_cur;
2166 	}
2167 
2168 	ifmr->ifm_status = IFM_AVALID;
2169 	if (!p->link_config.link_ok)
2170 		return;
2171 
2172 	ifmr->ifm_status |= IFM_ACTIVE;
2173 
2174 	/*
2175 	 * active and current will differ iff current media is autoselect.  That
2176 	 * can happen only for copper RJ45.
2177 	 */
2178 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2179 		return;
2180 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2181 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2182 
2183 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2184 	if (speed == SPEED_10000)
2185 		ifmr->ifm_active |= IFM_10G_T;
2186 	else if (speed == SPEED_1000)
2187 		ifmr->ifm_active |= IFM_1000_T;
2188 	else if (speed == SPEED_100)
2189 		ifmr->ifm_active |= IFM_100_TX;
2190 	else if (speed == SPEED_10)
2191 		ifmr->ifm_active |= IFM_10_T;
2192 	else
2193 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2194 			    speed));
2195 }
2196 
2197 static void
2198 cxgb_async_intr(void *data)
2199 {
2200 	adapter_t *sc = data;
2201 
2202 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2203 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2204 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2205 }
2206 
2207 static void
2208 link_check_callout(void *arg)
2209 {
2210 	struct port_info *pi = arg;
2211 	struct adapter *sc = pi->adapter;
2212 
2213 	if (!isset(&sc->open_device_map, pi->port_id))
2214 		return;
2215 
2216 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2217 }
2218 
2219 static void
2220 check_link_status(void *arg, int pending)
2221 {
2222 	struct port_info *pi = arg;
2223 	struct adapter *sc = pi->adapter;
2224 
2225 	if (!isset(&sc->open_device_map, pi->port_id))
2226 		return;
2227 
2228 	t3_link_changed(sc, pi->port_id);
2229 
2230 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2231 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2232 }
2233 
2234 void
2235 t3_os_link_intr(struct port_info *pi)
2236 {
2237 	/*
2238 	 * Schedule a link check in the near future.  If the link is flapping
2239 	 * rapidly we'll keep resetting the callout and delaying the check until
2240 	 * things stabilize a bit.
2241 	 */
2242 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2243 }
2244 
2245 static void
2246 check_t3b2_mac(struct adapter *sc)
2247 {
2248 	int i;
2249 
2250 	if (sc->flags & CXGB_SHUTDOWN)
2251 		return;
2252 
2253 	for_each_port(sc, i) {
2254 		struct port_info *p = &sc->port[i];
2255 		int status;
2256 #ifdef INVARIANTS
2257 		struct ifnet *ifp = p->ifp;
2258 #endif
2259 
2260 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2261 		    !p->link_config.link_ok)
2262 			continue;
2263 
2264 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2265 			("%s: state mismatch (drv_flags %x, device_map %x)",
2266 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2267 
2268 		PORT_LOCK(p);
2269 		status = t3b2_mac_watchdog_task(&p->mac);
2270 		if (status == 1)
2271 			p->mac.stats.num_toggled++;
2272 		else if (status == 2) {
2273 			struct cmac *mac = &p->mac;
2274 
2275 			cxgb_update_mac_settings(p);
2276 			t3_link_start(&p->phy, mac, &p->link_config);
2277 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2278 			t3_port_intr_enable(sc, p->port_id);
2279 			p->mac.stats.num_resets++;
2280 		}
2281 		PORT_UNLOCK(p);
2282 	}
2283 }
2284 
2285 static void
2286 cxgb_tick(void *arg)
2287 {
2288 	adapter_t *sc = (adapter_t *)arg;
2289 
2290 	if (sc->flags & CXGB_SHUTDOWN)
2291 		return;
2292 
2293 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2294 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2295 }
2296 
2297 static void
2298 cxgb_tick_handler(void *arg, int count)
2299 {
2300 	adapter_t *sc = (adapter_t *)arg;
2301 	const struct adapter_params *p = &sc->params;
2302 	int i;
2303 	uint32_t cause, reset;
2304 
2305 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2306 		return;
2307 
2308 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2309 		check_t3b2_mac(sc);
2310 
2311 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2312 	if (cause) {
2313 		struct sge_qset *qs = &sc->sge.qs[0];
2314 		uint32_t mask, v;
2315 
2316 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2317 
2318 		mask = 1;
2319 		for (i = 0; i < SGE_QSETS; i++) {
2320 			if (v & mask)
2321 				qs[i].rspq.starved++;
2322 			mask <<= 1;
2323 		}
2324 
2325 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2326 
2327 		for (i = 0; i < SGE_QSETS * 2; i++) {
2328 			if (v & mask) {
2329 				qs[i / 2].fl[i % 2].empty++;
2330 			}
2331 			mask <<= 1;
2332 		}
2333 
2334 		/* clear */
2335 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2336 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2337 	}
2338 
2339 	for (i = 0; i < sc->params.nports; i++) {
2340 		struct port_info *pi = &sc->port[i];
2341 		struct ifnet *ifp = pi->ifp;
2342 		struct cmac *mac = &pi->mac;
2343 		struct mac_stats *mstats = &mac->stats;
2344 		int drops, j;
2345 
2346 		if (!isset(&sc->open_device_map, pi->port_id))
2347 			continue;
2348 
2349 		PORT_LOCK(pi);
2350 		t3_mac_update_stats(mac);
2351 		PORT_UNLOCK(pi);
2352 
2353 		ifp->if_opackets = mstats->tx_frames;
2354 		ifp->if_ipackets = mstats->rx_frames;
2355 		ifp->if_obytes = mstats->tx_octets;
2356 		ifp->if_ibytes = mstats->rx_octets;
2357 		ifp->if_omcasts = mstats->tx_mcast_frames;
2358 		ifp->if_imcasts = mstats->rx_mcast_frames;
2359 		ifp->if_collisions = mstats->tx_total_collisions;
2360 		ifp->if_iqdrops = mstats->rx_cong_drops;
2361 
2362 		drops = 0;
2363 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2364 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2365 		ifp->if_snd.ifq_drops = drops;
2366 
2367 		ifp->if_oerrors =
2368 		    mstats->tx_excess_collisions +
2369 		    mstats->tx_underrun +
2370 		    mstats->tx_len_errs +
2371 		    mstats->tx_mac_internal_errs +
2372 		    mstats->tx_excess_deferral +
2373 		    mstats->tx_fcs_errs;
2374 		ifp->if_ierrors =
2375 		    mstats->rx_jabber +
2376 		    mstats->rx_data_errs +
2377 		    mstats->rx_sequence_errs +
2378 		    mstats->rx_runt +
2379 		    mstats->rx_too_long +
2380 		    mstats->rx_mac_internal_errs +
2381 		    mstats->rx_short +
2382 		    mstats->rx_fcs_errs;
2383 
2384 		if (mac->multiport)
2385 			continue;
2386 
2387 		/* Count rx fifo overflows, once per second */
2388 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2389 		reset = 0;
2390 		if (cause & F_RXFIFO_OVERFLOW) {
2391 			mac->stats.rx_fifo_ovfl++;
2392 			reset |= F_RXFIFO_OVERFLOW;
2393 		}
2394 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2395 	}
2396 }
2397 
2398 static void
2399 touch_bars(device_t dev)
2400 {
2401 	/*
2402 	 * Don't enable yet
2403 	 */
2404 #if !defined(__LP64__) && 0
2405 	u32 v;
2406 
2407 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2408 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2409 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2410 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2411 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2412 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2413 #endif
2414 }
2415 
2416 static int
2417 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2418 {
2419 	uint8_t *buf;
2420 	int err = 0;
2421 	u32 aligned_offset, aligned_len, *p;
2422 	struct adapter *adapter = pi->adapter;
2423 
2424 
2425 	aligned_offset = offset & ~3;
2426 	aligned_len = (len + (offset & 3) + 3) & ~3;
2427 
2428 	if (aligned_offset != offset || aligned_len != len) {
2429 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2430 		if (!buf)
2431 			return (ENOMEM);
2432 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2433 		if (!err && aligned_len > 4)
2434 			err = t3_seeprom_read(adapter,
2435 					      aligned_offset + aligned_len - 4,
2436 					      (u32 *)&buf[aligned_len - 4]);
2437 		if (err)
2438 			goto out;
2439 		memcpy(buf + (offset & 3), data, len);
2440 	} else
2441 		buf = (uint8_t *)(uintptr_t)data;
2442 
2443 	err = t3_seeprom_wp(adapter, 0);
2444 	if (err)
2445 		goto out;
2446 
2447 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2448 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2449 		aligned_offset += 4;
2450 	}
2451 
2452 	if (!err)
2453 		err = t3_seeprom_wp(adapter, 1);
2454 out:
2455 	if (buf != data)
2456 		free(buf, M_DEVBUF);
2457 	return err;
2458 }
2459 
2460 
2461 static int
2462 in_range(int val, int lo, int hi)
2463 {
2464 	return val < 0 || (val <= hi && val >= lo);
2465 }
2466 
2467 static int
2468 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2469 {
2470        return (0);
2471 }
2472 
2473 static int
2474 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2475 {
2476        return (0);
2477 }
2478 
2479 static int
2480 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2481     int fflag, struct thread *td)
2482 {
2483 	int mmd, error = 0;
2484 	struct port_info *pi = dev->si_drv1;
2485 	adapter_t *sc = pi->adapter;
2486 
2487 #ifdef PRIV_SUPPORTED
2488 	if (priv_check(td, PRIV_DRIVER)) {
2489 		if (cxgb_debug)
2490 			printf("user does not have access to privileged ioctls\n");
2491 		return (EPERM);
2492 	}
2493 #else
2494 	if (suser(td)) {
2495 		if (cxgb_debug)
2496 			printf("user does not have access to privileged ioctls\n");
2497 		return (EPERM);
2498 	}
2499 #endif
2500 
2501 	switch (cmd) {
2502 	case CHELSIO_GET_MIIREG: {
2503 		uint32_t val;
2504 		struct cphy *phy = &pi->phy;
2505 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2506 
2507 		if (!phy->mdio_read)
2508 			return (EOPNOTSUPP);
2509 		if (is_10G(sc)) {
2510 			mmd = mid->phy_id >> 8;
2511 			if (!mmd)
2512 				mmd = MDIO_DEV_PCS;
2513 			else if (mmd > MDIO_DEV_VEND2)
2514 				return (EINVAL);
2515 
2516 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2517 					     mid->reg_num, &val);
2518 		} else
2519 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2520 					     mid->reg_num & 0x1f, &val);
2521 		if (error == 0)
2522 			mid->val_out = val;
2523 		break;
2524 	}
2525 	case CHELSIO_SET_MIIREG: {
2526 		struct cphy *phy = &pi->phy;
2527 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2528 
2529 		if (!phy->mdio_write)
2530 			return (EOPNOTSUPP);
2531 		if (is_10G(sc)) {
2532 			mmd = mid->phy_id >> 8;
2533 			if (!mmd)
2534 				mmd = MDIO_DEV_PCS;
2535 			else if (mmd > MDIO_DEV_VEND2)
2536 				return (EINVAL);
2537 
2538 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2539 					      mmd, mid->reg_num, mid->val_in);
2540 		} else
2541 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2542 					      mid->reg_num & 0x1f,
2543 					      mid->val_in);
2544 		break;
2545 	}
2546 	case CHELSIO_SETREG: {
2547 		struct ch_reg *edata = (struct ch_reg *)data;
2548 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2549 			return (EFAULT);
2550 		t3_write_reg(sc, edata->addr, edata->val);
2551 		break;
2552 	}
2553 	case CHELSIO_GETREG: {
2554 		struct ch_reg *edata = (struct ch_reg *)data;
2555 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2556 			return (EFAULT);
2557 		edata->val = t3_read_reg(sc, edata->addr);
2558 		break;
2559 	}
2560 	case CHELSIO_GET_SGE_CONTEXT: {
2561 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2562 		mtx_lock_spin(&sc->sge.reg_lock);
2563 		switch (ecntxt->cntxt_type) {
2564 		case CNTXT_TYPE_EGRESS:
2565 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2566 			    ecntxt->data);
2567 			break;
2568 		case CNTXT_TYPE_FL:
2569 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2570 			    ecntxt->data);
2571 			break;
2572 		case CNTXT_TYPE_RSP:
2573 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2574 			    ecntxt->data);
2575 			break;
2576 		case CNTXT_TYPE_CQ:
2577 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2578 			    ecntxt->data);
2579 			break;
2580 		default:
2581 			error = EINVAL;
2582 			break;
2583 		}
2584 		mtx_unlock_spin(&sc->sge.reg_lock);
2585 		break;
2586 	}
2587 	case CHELSIO_GET_SGE_DESC: {
2588 		struct ch_desc *edesc = (struct ch_desc *)data;
2589 		int ret;
2590 		if (edesc->queue_num >= SGE_QSETS * 6)
2591 			return (EINVAL);
2592 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2593 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2594 		if (ret < 0)
2595 			return (EINVAL);
2596 		edesc->size = ret;
2597 		break;
2598 	}
2599 	case CHELSIO_GET_QSET_PARAMS: {
2600 		struct qset_params *q;
2601 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2602 		int q1 = pi->first_qset;
2603 		int nqsets = pi->nqsets;
2604 		int i;
2605 
2606 		if (t->qset_idx >= nqsets)
2607 			return EINVAL;
2608 
2609 		i = q1 + t->qset_idx;
2610 		q = &sc->params.sge.qset[i];
2611 		t->rspq_size   = q->rspq_size;
2612 		t->txq_size[0] = q->txq_size[0];
2613 		t->txq_size[1] = q->txq_size[1];
2614 		t->txq_size[2] = q->txq_size[2];
2615 		t->fl_size[0]  = q->fl_size;
2616 		t->fl_size[1]  = q->jumbo_size;
2617 		t->polling     = q->polling;
2618 		t->lro         = q->lro;
2619 		t->intr_lat    = q->coalesce_usecs;
2620 		t->cong_thres  = q->cong_thres;
2621 		t->qnum        = i;
2622 
2623 		if ((sc->flags & FULL_INIT_DONE) == 0)
2624 			t->vector = 0;
2625 		else if (sc->flags & USING_MSIX)
2626 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2627 		else
2628 			t->vector = rman_get_start(sc->irq_res);
2629 
2630 		break;
2631 	}
2632 	case CHELSIO_GET_QSET_NUM: {
2633 		struct ch_reg *edata = (struct ch_reg *)data;
2634 		edata->val = pi->nqsets;
2635 		break;
2636 	}
2637 	case CHELSIO_LOAD_FW: {
2638 		uint8_t *fw_data;
2639 		uint32_t vers;
2640 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2641 
2642 		/*
2643 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2644 		 *
2645 		 * FW_UPTODATE is also set so the rest of the initialization
2646 		 * will not overwrite what was loaded here.  This gives you the
2647 		 * flexibility to load any firmware (and maybe shoot yourself in
2648 		 * the foot).
2649 		 */
2650 
2651 		ADAPTER_LOCK(sc);
2652 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2653 			ADAPTER_UNLOCK(sc);
2654 			return (EBUSY);
2655 		}
2656 
2657 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2658 		if (!fw_data)
2659 			error = ENOMEM;
2660 		else
2661 			error = copyin(t->buf, fw_data, t->len);
2662 
2663 		if (!error)
2664 			error = -t3_load_fw(sc, fw_data, t->len);
2665 
2666 		if (t3_get_fw_version(sc, &vers) == 0) {
2667 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2668 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2669 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2670 		}
2671 
2672 		if (!error)
2673 			sc->flags |= FW_UPTODATE;
2674 
2675 		free(fw_data, M_DEVBUF);
2676 		ADAPTER_UNLOCK(sc);
2677 		break;
2678 	}
2679 	case CHELSIO_LOAD_BOOT: {
2680 		uint8_t *boot_data;
2681 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2682 
2683 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2684 		if (!boot_data)
2685 			return ENOMEM;
2686 
2687 		error = copyin(t->buf, boot_data, t->len);
2688 		if (!error)
2689 			error = -t3_load_boot(sc, boot_data, t->len);
2690 
2691 		free(boot_data, M_DEVBUF);
2692 		break;
2693 	}
2694 	case CHELSIO_GET_PM: {
2695 		struct ch_pm *m = (struct ch_pm *)data;
2696 		struct tp_params *p = &sc->params.tp;
2697 
2698 		if (!is_offload(sc))
2699 			return (EOPNOTSUPP);
2700 
2701 		m->tx_pg_sz = p->tx_pg_size;
2702 		m->tx_num_pg = p->tx_num_pgs;
2703 		m->rx_pg_sz  = p->rx_pg_size;
2704 		m->rx_num_pg = p->rx_num_pgs;
2705 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2706 
2707 		break;
2708 	}
2709 	case CHELSIO_SET_PM: {
2710 		struct ch_pm *m = (struct ch_pm *)data;
2711 		struct tp_params *p = &sc->params.tp;
2712 
2713 		if (!is_offload(sc))
2714 			return (EOPNOTSUPP);
2715 		if (sc->flags & FULL_INIT_DONE)
2716 			return (EBUSY);
2717 
2718 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2719 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2720 			return (EINVAL);	/* not power of 2 */
2721 		if (!(m->rx_pg_sz & 0x14000))
2722 			return (EINVAL);	/* not 16KB or 64KB */
2723 		if (!(m->tx_pg_sz & 0x1554000))
2724 			return (EINVAL);
2725 		if (m->tx_num_pg == -1)
2726 			m->tx_num_pg = p->tx_num_pgs;
2727 		if (m->rx_num_pg == -1)
2728 			m->rx_num_pg = p->rx_num_pgs;
2729 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2730 			return (EINVAL);
2731 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2732 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2733 			return (EINVAL);
2734 
2735 		p->rx_pg_size = m->rx_pg_sz;
2736 		p->tx_pg_size = m->tx_pg_sz;
2737 		p->rx_num_pgs = m->rx_num_pg;
2738 		p->tx_num_pgs = m->tx_num_pg;
2739 		break;
2740 	}
2741 	case CHELSIO_SETMTUTAB: {
2742 		struct ch_mtus *m = (struct ch_mtus *)data;
2743 		int i;
2744 
2745 		if (!is_offload(sc))
2746 			return (EOPNOTSUPP);
2747 		if (offload_running(sc))
2748 			return (EBUSY);
2749 		if (m->nmtus != NMTUS)
2750 			return (EINVAL);
2751 		if (m->mtus[0] < 81)         /* accommodate SACK */
2752 			return (EINVAL);
2753 
2754 		/*
2755 		 * MTUs must be in ascending order
2756 		 */
2757 		for (i = 1; i < NMTUS; ++i)
2758 			if (m->mtus[i] < m->mtus[i - 1])
2759 				return (EINVAL);
2760 
2761 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2762 		break;
2763 	}
2764 	case CHELSIO_GETMTUTAB: {
2765 		struct ch_mtus *m = (struct ch_mtus *)data;
2766 
2767 		if (!is_offload(sc))
2768 			return (EOPNOTSUPP);
2769 
2770 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2771 		m->nmtus = NMTUS;
2772 		break;
2773 	}
2774 	case CHELSIO_GET_MEM: {
2775 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2776 		struct mc7 *mem;
2777 		uint8_t *useraddr;
2778 		u64 buf[32];
2779 
2780 		/*
2781 		 * Use these to avoid modifying len/addr in the return
2782 		 * struct
2783 		 */
2784 		uint32_t len = t->len, addr = t->addr;
2785 
2786 		if (!is_offload(sc))
2787 			return (EOPNOTSUPP);
2788 		if (!(sc->flags & FULL_INIT_DONE))
2789 			return (EIO);         /* need the memory controllers */
2790 		if ((addr & 0x7) || (len & 0x7))
2791 			return (EINVAL);
2792 		if (t->mem_id == MEM_CM)
2793 			mem = &sc->cm;
2794 		else if (t->mem_id == MEM_PMRX)
2795 			mem = &sc->pmrx;
2796 		else if (t->mem_id == MEM_PMTX)
2797 			mem = &sc->pmtx;
2798 		else
2799 			return (EINVAL);
2800 
2801 		/*
2802 		 * Version scheme:
2803 		 * bits 0..9: chip version
2804 		 * bits 10..15: chip revision
2805 		 */
2806 		t->version = 3 | (sc->params.rev << 10);
2807 
2808 		/*
2809 		 * Read 256 bytes at a time as len can be large and we don't
2810 		 * want to use huge intermediate buffers.
2811 		 */
2812 		useraddr = (uint8_t *)t->buf;
2813 		while (len) {
2814 			unsigned int chunk = min(len, sizeof(buf));
2815 
2816 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2817 			if (error)
2818 				return (-error);
2819 			if (copyout(buf, useraddr, chunk))
2820 				return (EFAULT);
2821 			useraddr += chunk;
2822 			addr += chunk;
2823 			len -= chunk;
2824 		}
2825 		break;
2826 	}
2827 	case CHELSIO_READ_TCAM_WORD: {
2828 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2829 
2830 		if (!is_offload(sc))
2831 			return (EOPNOTSUPP);
2832 		if (!(sc->flags & FULL_INIT_DONE))
2833 			return (EIO);         /* need MC5 */
2834 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2835 		break;
2836 	}
2837 	case CHELSIO_SET_TRACE_FILTER: {
2838 		struct ch_trace *t = (struct ch_trace *)data;
2839 		const struct trace_params *tp;
2840 
2841 		tp = (const struct trace_params *)&t->sip;
2842 		if (t->config_tx)
2843 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2844 					       t->trace_tx);
2845 		if (t->config_rx)
2846 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2847 					       t->trace_rx);
2848 		break;
2849 	}
2850 	case CHELSIO_SET_PKTSCHED: {
2851 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2852 		if (sc->open_device_map == 0)
2853 			return (EAGAIN);
2854 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2855 		    p->binding);
2856 		break;
2857 	}
2858 	case CHELSIO_IFCONF_GETREGS: {
2859 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2860 		int reglen = cxgb_get_regs_len();
2861 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2862 		if (buf == NULL) {
2863 			return (ENOMEM);
2864 		}
2865 		if (regs->len > reglen)
2866 			regs->len = reglen;
2867 		else if (regs->len < reglen)
2868 			error = ENOBUFS;
2869 
2870 		if (!error) {
2871 			cxgb_get_regs(sc, regs, buf);
2872 			error = copyout(buf, regs->data, reglen);
2873 		}
2874 		free(buf, M_DEVBUF);
2875 
2876 		break;
2877 	}
2878 	case CHELSIO_SET_HW_SCHED: {
2879 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2880 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2881 
2882 		if ((sc->flags & FULL_INIT_DONE) == 0)
2883 			return (EAGAIN);       /* need TP to be initialized */
2884 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2885 		    !in_range(t->channel, 0, 1) ||
2886 		    !in_range(t->kbps, 0, 10000000) ||
2887 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2888 		    !in_range(t->flow_ipg, 0,
2889 			      dack_ticks_to_usec(sc, 0x7ff)))
2890 			return (EINVAL);
2891 
2892 		if (t->kbps >= 0) {
2893 			error = t3_config_sched(sc, t->kbps, t->sched);
2894 			if (error < 0)
2895 				return (-error);
2896 		}
2897 		if (t->class_ipg >= 0)
2898 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2899 		if (t->flow_ipg >= 0) {
2900 			t->flow_ipg *= 1000;     /* us -> ns */
2901 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2902 		}
2903 		if (t->mode >= 0) {
2904 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2905 
2906 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2907 					 bit, t->mode ? bit : 0);
2908 		}
2909 		if (t->channel >= 0)
2910 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2911 					 1 << t->sched, t->channel << t->sched);
2912 		break;
2913 	}
2914 	case CHELSIO_GET_EEPROM: {
2915 		int i;
2916 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2917 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2918 
2919 		if (buf == NULL) {
2920 			return (ENOMEM);
2921 		}
2922 		e->magic = EEPROM_MAGIC;
2923 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2924 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2925 
2926 		if (!error)
2927 			error = copyout(buf + e->offset, e->data, e->len);
2928 
2929 		free(buf, M_DEVBUF);
2930 		break;
2931 	}
2932 	case CHELSIO_CLEAR_STATS: {
2933 		if (!(sc->flags & FULL_INIT_DONE))
2934 			return EAGAIN;
2935 
2936 		PORT_LOCK(pi);
2937 		t3_mac_update_stats(&pi->mac);
2938 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2939 		PORT_UNLOCK(pi);
2940 		break;
2941 	}
2942 	case CHELSIO_GET_UP_LA: {
2943 		struct ch_up_la *la = (struct ch_up_la *)data;
2944 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2945 		if (buf == NULL) {
2946 			return (ENOMEM);
2947 		}
2948 		if (la->bufsize < LA_BUFSIZE)
2949 			error = ENOBUFS;
2950 
2951 		if (!error)
2952 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2953 					      &la->bufsize, buf);
2954 		if (!error)
2955 			error = copyout(buf, la->data, la->bufsize);
2956 
2957 		free(buf, M_DEVBUF);
2958 		break;
2959 	}
2960 	case CHELSIO_GET_UP_IOQS: {
2961 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
2962 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
2963 		uint32_t *v;
2964 
2965 		if (buf == NULL) {
2966 			return (ENOMEM);
2967 		}
2968 		if (ioqs->bufsize < IOQS_BUFSIZE)
2969 			error = ENOBUFS;
2970 
2971 		if (!error)
2972 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
2973 
2974 		if (!error) {
2975 			v = (uint32_t *)buf;
2976 
2977 			ioqs->ioq_rx_enable = *v++;
2978 			ioqs->ioq_tx_enable = *v++;
2979 			ioqs->ioq_rx_status = *v++;
2980 			ioqs->ioq_tx_status = *v++;
2981 
2982 			error = copyout(v, ioqs->data, ioqs->bufsize);
2983 		}
2984 
2985 		free(buf, M_DEVBUF);
2986 		break;
2987 	}
2988 	case CHELSIO_SET_FILTER: {
2989 		struct ch_filter *f = (struct ch_filter *)data;
2990 		struct filter_info *p;
2991 		unsigned int nfilters = sc->params.mc5.nfilters;
2992 
2993 		if (!is_offload(sc))
2994 			return (EOPNOTSUPP);	/* No TCAM */
2995 		if (!(sc->flags & FULL_INIT_DONE))
2996 			return (EAGAIN);	/* mc5 not setup yet */
2997 		if (nfilters == 0)
2998 			return (EBUSY);		/* TOE will use TCAM */
2999 
3000 		/* sanity checks */
3001 		if (f->filter_id >= nfilters ||
3002 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3003 		    (f->val.sport && f->mask.sport != 0xffff) ||
3004 		    (f->val.dport && f->mask.dport != 0xffff) ||
3005 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3006 		    (f->val.vlan_prio &&
3007 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3008 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3009 		    f->qset >= SGE_QSETS ||
3010 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3011 			return (EINVAL);
3012 
3013 		/* Was allocated with M_WAITOK */
3014 		KASSERT(sc->filters, ("filter table NULL\n"));
3015 
3016 		p = &sc->filters[f->filter_id];
3017 		if (p->locked)
3018 			return (EPERM);
3019 
3020 		bzero(p, sizeof(*p));
3021 		p->sip = f->val.sip;
3022 		p->sip_mask = f->mask.sip;
3023 		p->dip = f->val.dip;
3024 		p->sport = f->val.sport;
3025 		p->dport = f->val.dport;
3026 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3027 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3028 		    FILTER_NO_VLAN_PRI;
3029 		p->mac_hit = f->mac_hit;
3030 		p->mac_vld = f->mac_addr_idx != 0xffff;
3031 		p->mac_idx = f->mac_addr_idx;
3032 		p->pkt_type = f->proto;
3033 		p->report_filter_id = f->want_filter_id;
3034 		p->pass = f->pass;
3035 		p->rss = f->rss;
3036 		p->qset = f->qset;
3037 
3038 		error = set_filter(sc, f->filter_id, p);
3039 		if (error == 0)
3040 			p->valid = 1;
3041 		break;
3042 	}
3043 	case CHELSIO_DEL_FILTER: {
3044 		struct ch_filter *f = (struct ch_filter *)data;
3045 		struct filter_info *p;
3046 		unsigned int nfilters = sc->params.mc5.nfilters;
3047 
3048 		if (!is_offload(sc))
3049 			return (EOPNOTSUPP);
3050 		if (!(sc->flags & FULL_INIT_DONE))
3051 			return (EAGAIN);
3052 		if (nfilters == 0 || sc->filters == NULL)
3053 			return (EINVAL);
3054 		if (f->filter_id >= nfilters)
3055 		       return (EINVAL);
3056 
3057 		p = &sc->filters[f->filter_id];
3058 		if (p->locked)
3059 			return (EPERM);
3060 		if (!p->valid)
3061 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3062 
3063 		bzero(p, sizeof(*p));
3064 		p->sip = p->sip_mask = 0xffffffff;
3065 		p->vlan = 0xfff;
3066 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3067 		p->pkt_type = 1;
3068 		error = set_filter(sc, f->filter_id, p);
3069 		break;
3070 	}
3071 	case CHELSIO_GET_FILTER: {
3072 		struct ch_filter *f = (struct ch_filter *)data;
3073 		struct filter_info *p;
3074 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3075 
3076 		if (!is_offload(sc))
3077 			return (EOPNOTSUPP);
3078 		if (!(sc->flags & FULL_INIT_DONE))
3079 			return (EAGAIN);
3080 		if (nfilters == 0 || sc->filters == NULL)
3081 			return (EINVAL);
3082 
3083 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3084 		for (; i < nfilters; i++) {
3085 			p = &sc->filters[i];
3086 			if (!p->valid)
3087 				continue;
3088 
3089 			bzero(f, sizeof(*f));
3090 
3091 			f->filter_id = i;
3092 			f->val.sip = p->sip;
3093 			f->mask.sip = p->sip_mask;
3094 			f->val.dip = p->dip;
3095 			f->mask.dip = p->dip ? 0xffffffff : 0;
3096 			f->val.sport = p->sport;
3097 			f->mask.sport = p->sport ? 0xffff : 0;
3098 			f->val.dport = p->dport;
3099 			f->mask.dport = p->dport ? 0xffff : 0;
3100 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3101 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3102 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3103 			    0 : p->vlan_prio;
3104 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3105 			    0 : FILTER_NO_VLAN_PRI;
3106 			f->mac_hit = p->mac_hit;
3107 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3108 			f->proto = p->pkt_type;
3109 			f->want_filter_id = p->report_filter_id;
3110 			f->pass = p->pass;
3111 			f->rss = p->rss;
3112 			f->qset = p->qset;
3113 
3114 			break;
3115 		}
3116 
3117 		if (i == nfilters)
3118 			f->filter_id = 0xffffffff;
3119 		break;
3120 	}
3121 	default:
3122 		return (EOPNOTSUPP);
3123 		break;
3124 	}
3125 
3126 	return (error);
3127 }
3128 
3129 static __inline void
3130 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3131     unsigned int end)
3132 {
3133 	uint32_t *p = (uint32_t *)(buf + start);
3134 
3135 	for ( ; start <= end; start += sizeof(uint32_t))
3136 		*p++ = t3_read_reg(ap, start);
3137 }
3138 
3139 #define T3_REGMAP_SIZE (3 * 1024)
3140 static int
3141 cxgb_get_regs_len(void)
3142 {
3143 	return T3_REGMAP_SIZE;
3144 }
3145 
3146 static void
3147 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3148 {
3149 
3150 	/*
3151 	 * Version scheme:
3152 	 * bits 0..9: chip version
3153 	 * bits 10..15: chip revision
3154 	 * bit 31: set for PCIe cards
3155 	 */
3156 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3157 
3158 	/*
3159 	 * We skip the MAC statistics registers because they are clear-on-read.
3160 	 * Also reading multi-register stats would need to synchronize with the
3161 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3162 	 */
3163 	memset(buf, 0, cxgb_get_regs_len());
3164 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3165 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3166 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3167 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3168 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3169 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3170 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3171 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3172 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3173 }
3174 
3175 static int
3176 alloc_filters(struct adapter *sc)
3177 {
3178 	struct filter_info *p;
3179 	unsigned int nfilters = sc->params.mc5.nfilters;
3180 
3181 	if (nfilters == 0)
3182 		return (0);
3183 
3184 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3185 	sc->filters = p;
3186 
3187 	p = &sc->filters[nfilters - 1];
3188 	p->vlan = 0xfff;
3189 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3190 	p->pass = p->rss = p->valid = p->locked = 1;
3191 
3192 	return (0);
3193 }
3194 
3195 static int
3196 setup_hw_filters(struct adapter *sc)
3197 {
3198 	int i, rc;
3199 	unsigned int nfilters = sc->params.mc5.nfilters;
3200 
3201 	if (!sc->filters)
3202 		return (0);
3203 
3204 	t3_enable_filters(sc);
3205 
3206 	for (i = rc = 0; i < nfilters && !rc; i++) {
3207 		if (sc->filters[i].locked)
3208 			rc = set_filter(sc, i, &sc->filters[i]);
3209 	}
3210 
3211 	return (rc);
3212 }
3213 
3214 static int
3215 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3216 {
3217 	int len;
3218 	struct mbuf *m;
3219 	struct ulp_txpkt *txpkt;
3220 	struct work_request_hdr *wr;
3221 	struct cpl_pass_open_req *oreq;
3222 	struct cpl_set_tcb_field *sreq;
3223 
3224 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3225 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3226 
3227 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3228 	      sc->params.mc5.nfilters;
3229 
3230 	m = m_gethdr(M_WAITOK, MT_DATA);
3231 	m->m_len = m->m_pkthdr.len = len;
3232 	bzero(mtod(m, char *), len);
3233 
3234 	wr = mtod(m, struct work_request_hdr *);
3235 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3236 
3237 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3238 	txpkt = (struct ulp_txpkt *)oreq;
3239 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3240 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3241 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3242 	oreq->local_port = htons(f->dport);
3243 	oreq->peer_port = htons(f->sport);
3244 	oreq->local_ip = htonl(f->dip);
3245 	oreq->peer_ip = htonl(f->sip);
3246 	oreq->peer_netmask = htonl(f->sip_mask);
3247 	oreq->opt0h = 0;
3248 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3249 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3250 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3251 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3252 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3253 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3254 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3255 
3256 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3257 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3258 			  (f->report_filter_id << 15) | (1 << 23) |
3259 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3260 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3261 	t3_mgmt_tx(sc, m);
3262 
3263 	if (f->pass && !f->rss) {
3264 		len = sizeof(*sreq);
3265 		m = m_gethdr(M_WAITOK, MT_DATA);
3266 		m->m_len = m->m_pkthdr.len = len;
3267 		bzero(mtod(m, char *), len);
3268 		sreq = mtod(m, struct cpl_set_tcb_field *);
3269 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3270 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3271 				 (u64)sc->rrss_map[f->qset] << 19);
3272 		t3_mgmt_tx(sc, m);
3273 	}
3274 	return 0;
3275 }
3276 
3277 static inline void
3278 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3279     unsigned int word, u64 mask, u64 val)
3280 {
3281 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3282 	req->reply = V_NO_REPLY(1);
3283 	req->cpu_idx = 0;
3284 	req->word = htons(word);
3285 	req->mask = htobe64(mask);
3286 	req->val = htobe64(val);
3287 }
3288 
3289 static inline void
3290 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3291     unsigned int word, u64 mask, u64 val)
3292 {
3293 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3294 
3295 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3296 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3297 	mk_set_tcb_field(req, tid, word, mask, val);
3298 }
3299 
3300 void
3301 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3302 {
3303 	struct adapter *sc;
3304 
3305 	mtx_lock(&t3_list_lock);
3306 	SLIST_FOREACH(sc, &t3_list, link) {
3307 		/*
3308 		 * func should not make any assumptions about what state sc is
3309 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3310 		 */
3311 		func(sc, arg);
3312 	}
3313 	mtx_unlock(&t3_list_lock);
3314 }
3315 
3316 #ifdef TCP_OFFLOAD
3317 static int
3318 toe_capability(struct port_info *pi, int enable)
3319 {
3320 	int rc;
3321 	struct adapter *sc = pi->adapter;
3322 
3323 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3324 
3325 	if (!is_offload(sc))
3326 		return (ENODEV);
3327 
3328 	if (enable) {
3329 		if (!(sc->flags & FULL_INIT_DONE)) {
3330 			log(LOG_WARNING,
3331 			    "You must enable a cxgb interface first\n");
3332 			return (EAGAIN);
3333 		}
3334 
3335 		if (isset(&sc->offload_map, pi->port_id))
3336 			return (0);
3337 
3338 		if (!(sc->flags & TOM_INIT_DONE)) {
3339 			rc = t3_activate_uld(sc, ULD_TOM);
3340 			if (rc == EAGAIN) {
3341 				log(LOG_WARNING,
3342 				    "You must kldload t3_tom.ko before trying "
3343 				    "to enable TOE on a cxgb interface.\n");
3344 			}
3345 			if (rc != 0)
3346 				return (rc);
3347 			KASSERT(sc->tom_softc != NULL,
3348 			    ("%s: TOM activated but softc NULL", __func__));
3349 			KASSERT(sc->flags & TOM_INIT_DONE,
3350 			    ("%s: TOM activated but flag not set", __func__));
3351 		}
3352 
3353 		setbit(&sc->offload_map, pi->port_id);
3354 
3355 		/*
3356 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3357 		 * enabled on any port.  Need to figure out how to enable,
3358 		 * disable, load, and unload iWARP cleanly.
3359 		 */
3360 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3361 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3362 			setbit(&sc->offload_map, MAX_NPORTS);
3363 	} else {
3364 		if (!isset(&sc->offload_map, pi->port_id))
3365 			return (0);
3366 
3367 		KASSERT(sc->flags & TOM_INIT_DONE,
3368 		    ("%s: TOM never initialized?", __func__));
3369 		clrbit(&sc->offload_map, pi->port_id);
3370 	}
3371 
3372 	return (0);
3373 }
3374 
3375 /*
3376  * Add an upper layer driver to the global list.
3377  */
3378 int
3379 t3_register_uld(struct uld_info *ui)
3380 {
3381 	int rc = 0;
3382 	struct uld_info *u;
3383 
3384 	mtx_lock(&t3_uld_list_lock);
3385 	SLIST_FOREACH(u, &t3_uld_list, link) {
3386 	    if (u->uld_id == ui->uld_id) {
3387 		    rc = EEXIST;
3388 		    goto done;
3389 	    }
3390 	}
3391 
3392 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3393 	ui->refcount = 0;
3394 done:
3395 	mtx_unlock(&t3_uld_list_lock);
3396 	return (rc);
3397 }
3398 
3399 int
3400 t3_unregister_uld(struct uld_info *ui)
3401 {
3402 	int rc = EINVAL;
3403 	struct uld_info *u;
3404 
3405 	mtx_lock(&t3_uld_list_lock);
3406 
3407 	SLIST_FOREACH(u, &t3_uld_list, link) {
3408 	    if (u == ui) {
3409 		    if (ui->refcount > 0) {
3410 			    rc = EBUSY;
3411 			    goto done;
3412 		    }
3413 
3414 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3415 		    rc = 0;
3416 		    goto done;
3417 	    }
3418 	}
3419 done:
3420 	mtx_unlock(&t3_uld_list_lock);
3421 	return (rc);
3422 }
3423 
3424 int
3425 t3_activate_uld(struct adapter *sc, int id)
3426 {
3427 	int rc = EAGAIN;
3428 	struct uld_info *ui;
3429 
3430 	mtx_lock(&t3_uld_list_lock);
3431 
3432 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3433 		if (ui->uld_id == id) {
3434 			rc = ui->activate(sc);
3435 			if (rc == 0)
3436 				ui->refcount++;
3437 			goto done;
3438 		}
3439 	}
3440 done:
3441 	mtx_unlock(&t3_uld_list_lock);
3442 
3443 	return (rc);
3444 }
3445 
3446 int
3447 t3_deactivate_uld(struct adapter *sc, int id)
3448 {
3449 	int rc = EINVAL;
3450 	struct uld_info *ui;
3451 
3452 	mtx_lock(&t3_uld_list_lock);
3453 
3454 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3455 		if (ui->uld_id == id) {
3456 			rc = ui->deactivate(sc);
3457 			if (rc == 0)
3458 				ui->refcount--;
3459 			goto done;
3460 		}
3461 	}
3462 done:
3463 	mtx_unlock(&t3_uld_list_lock);
3464 
3465 	return (rc);
3466 }
3467 
3468 static int
3469 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3470     struct mbuf *m)
3471 {
3472 	m_freem(m);
3473 	return (EDOOFUS);
3474 }
3475 
3476 int
3477 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3478 {
3479 	uintptr_t *loc, new;
3480 
3481 	if (opcode >= NUM_CPL_HANDLERS)
3482 		return (EINVAL);
3483 
3484 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3485 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3486 	atomic_store_rel_ptr(loc, new);
3487 
3488 	return (0);
3489 }
3490 #endif
3491 
3492 static int
3493 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3494 {
3495 	int rc = 0;
3496 
3497 	switch (cmd) {
3498 	case MOD_LOAD:
3499 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3500 		SLIST_INIT(&t3_list);
3501 #ifdef TCP_OFFLOAD
3502 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3503 		SLIST_INIT(&t3_uld_list);
3504 #endif
3505 		break;
3506 
3507 	case MOD_UNLOAD:
3508 #ifdef TCP_OFFLOAD
3509 		mtx_lock(&t3_uld_list_lock);
3510 		if (!SLIST_EMPTY(&t3_uld_list)) {
3511 			rc = EBUSY;
3512 			mtx_unlock(&t3_uld_list_lock);
3513 			break;
3514 		}
3515 		mtx_unlock(&t3_uld_list_lock);
3516 		mtx_destroy(&t3_uld_list_lock);
3517 #endif
3518 		mtx_lock(&t3_list_lock);
3519 		if (!SLIST_EMPTY(&t3_list)) {
3520 			rc = EBUSY;
3521 			mtx_unlock(&t3_list_lock);
3522 			break;
3523 		}
3524 		mtx_unlock(&t3_list_lock);
3525 		mtx_destroy(&t3_list_lock);
3526 		break;
3527 	}
3528 
3529 	return (rc);
3530 }
3531