xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision ee7b0571c2c18bdec848ed2044223cc88db29bd8)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_inet.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/bus.h>
39 #include <sys/module.h>
40 #include <sys/pciio.h>
41 #include <sys/conf.h>
42 #include <machine/bus.h>
43 #include <machine/resource.h>
44 #include <sys/bus_dma.h>
45 #include <sys/ktr.h>
46 #include <sys/rman.h>
47 #include <sys/ioccom.h>
48 #include <sys/mbuf.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/queue.h>
57 #include <sys/taskqueue.h>
58 #include <sys/proc.h>
59 
60 #include <net/bpf.h>
61 #include <net/ethernet.h>
62 #include <net/if.h>
63 #include <net/if_var.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 #include <net/if_types.h>
68 #include <net/if_vlan_var.h>
69 
70 #include <netinet/in_systm.h>
71 #include <netinet/in.h>
72 #include <netinet/if_ether.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip.h>
75 #include <netinet/tcp.h>
76 #include <netinet/udp.h>
77 
78 #include <dev/pci/pcireg.h>
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pci_private.h>
81 
82 #include <cxgb_include.h>
83 
84 #ifdef PRIV_SUPPORTED
85 #include <sys/priv.h>
86 #endif
87 
88 static int cxgb_setup_interrupts(adapter_t *);
89 static void cxgb_teardown_interrupts(adapter_t *);
90 static void cxgb_init(void *);
91 static int cxgb_init_locked(struct port_info *);
92 static int cxgb_uninit_locked(struct port_info *);
93 static int cxgb_uninit_synchronized(struct port_info *);
94 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
95 static int cxgb_media_change(struct ifnet *);
96 static int cxgb_ifm_type(int);
97 static void cxgb_build_medialist(struct port_info *);
98 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
99 static int setup_sge_qsets(adapter_t *);
100 static void cxgb_async_intr(void *);
101 static void cxgb_tick_handler(void *, int);
102 static void cxgb_tick(void *);
103 static void link_check_callout(void *);
104 static void check_link_status(void *, int);
105 static void setup_rss(adapter_t *sc);
106 static int alloc_filters(struct adapter *);
107 static int setup_hw_filters(struct adapter *);
108 static int set_filter(struct adapter *, int, const struct filter_info *);
109 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
110     unsigned int, u64, u64);
111 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
112     unsigned int, u64, u64);
113 #ifdef TCP_OFFLOAD
114 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
115 #endif
116 
117 /* Attachment glue for the PCI controller end of the device.  Each port of
118  * the device is attached separately, as defined later.
119  */
120 static int cxgb_controller_probe(device_t);
121 static int cxgb_controller_attach(device_t);
122 static int cxgb_controller_detach(device_t);
123 static void cxgb_free(struct adapter *);
124 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
125     unsigned int end);
126 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
127 static int cxgb_get_regs_len(void);
128 static void touch_bars(device_t dev);
129 static void cxgb_update_mac_settings(struct port_info *p);
130 #ifdef TCP_OFFLOAD
131 static int toe_capability(struct port_info *, int);
132 #endif
133 
134 static device_method_t cxgb_controller_methods[] = {
135 	DEVMETHOD(device_probe,		cxgb_controller_probe),
136 	DEVMETHOD(device_attach,	cxgb_controller_attach),
137 	DEVMETHOD(device_detach,	cxgb_controller_detach),
138 
139 	DEVMETHOD_END
140 };
141 
142 static driver_t cxgb_controller_driver = {
143 	"cxgbc",
144 	cxgb_controller_methods,
145 	sizeof(struct adapter)
146 };
147 
148 static int cxgbc_mod_event(module_t, int, void *);
149 static devclass_t	cxgb_controller_devclass;
150 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
151     cxgbc_mod_event, 0);
152 MODULE_VERSION(cxgbc, 1);
153 MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
154 
155 /*
156  * Attachment glue for the ports.  Attachment is done directly to the
157  * controller device.
158  */
159 static int cxgb_port_probe(device_t);
160 static int cxgb_port_attach(device_t);
161 static int cxgb_port_detach(device_t);
162 
163 static device_method_t cxgb_port_methods[] = {
164 	DEVMETHOD(device_probe,		cxgb_port_probe),
165 	DEVMETHOD(device_attach,	cxgb_port_attach),
166 	DEVMETHOD(device_detach,	cxgb_port_detach),
167 	{ 0, 0 }
168 };
169 
170 static driver_t cxgb_port_driver = {
171 	"cxgb",
172 	cxgb_port_methods,
173 	0
174 };
175 
176 static d_ioctl_t cxgb_extension_ioctl;
177 static d_open_t cxgb_extension_open;
178 static d_close_t cxgb_extension_close;
179 
180 static struct cdevsw cxgb_cdevsw = {
181        .d_version =    D_VERSION,
182        .d_flags =      0,
183        .d_open =       cxgb_extension_open,
184        .d_close =      cxgb_extension_close,
185        .d_ioctl =      cxgb_extension_ioctl,
186        .d_name =       "cxgb",
187 };
188 
189 static devclass_t	cxgb_port_devclass;
190 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
191 MODULE_VERSION(cxgb, 1);
192 
193 static struct mtx t3_list_lock;
194 static SLIST_HEAD(, adapter) t3_list;
195 #ifdef TCP_OFFLOAD
196 static struct mtx t3_uld_list_lock;
197 static SLIST_HEAD(, uld_info) t3_uld_list;
198 #endif
199 
200 /*
201  * The driver uses the best interrupt scheme available on a platform in the
202  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
203  * of these schemes the driver may consider as follows:
204  *
205  * msi = 2: choose from among all three options
206  * msi = 1 : only consider MSI and pin interrupts
207  * msi = 0: force pin interrupts
208  */
209 static int msi_allowed = 2;
210 
211 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
212 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
213     "MSI-X, MSI, INTx selector");
214 
215 /*
216  * The driver uses an auto-queue algorithm by default.
217  * To disable it and force a single queue-set per port, use multiq = 0
218  */
219 static int multiq = 1;
220 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
221     "use min(ncpus/ports, 8) queue-sets per port");
222 
223 /*
224  * By default the driver will not update the firmware unless
225  * it was compiled against a newer version
226  *
227  */
228 static int force_fw_update = 0;
229 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
230     "update firmware even if up to date");
231 
232 int cxgb_use_16k_clusters = -1;
233 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
234     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
235 
236 static int nfilters = -1;
237 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
238     &nfilters, 0, "max number of entries in the filter table");
239 
240 enum {
241 	MAX_TXQ_ENTRIES      = 16384,
242 	MAX_CTRL_TXQ_ENTRIES = 1024,
243 	MAX_RSPQ_ENTRIES     = 16384,
244 	MAX_RX_BUFFERS       = 16384,
245 	MAX_RX_JUMBO_BUFFERS = 16384,
246 	MIN_TXQ_ENTRIES      = 4,
247 	MIN_CTRL_TXQ_ENTRIES = 4,
248 	MIN_RSPQ_ENTRIES     = 32,
249 	MIN_FL_ENTRIES       = 32,
250 	MIN_FL_JUMBO_ENTRIES = 32
251 };
252 
253 struct filter_info {
254 	u32 sip;
255 	u32 sip_mask;
256 	u32 dip;
257 	u16 sport;
258 	u16 dport;
259 	u32 vlan:12;
260 	u32 vlan_prio:3;
261 	u32 mac_hit:1;
262 	u32 mac_idx:4;
263 	u32 mac_vld:1;
264 	u32 pkt_type:2;
265 	u32 report_filter_id:1;
266 	u32 pass:1;
267 	u32 rss:1;
268 	u32 qset:3;
269 	u32 locked:1;
270 	u32 valid:1;
271 };
272 
273 enum { FILTER_NO_VLAN_PRI = 7 };
274 
275 #define EEPROM_MAGIC 0x38E2F10C
276 
277 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
278 
279 /* Table for probing the cards.  The desc field isn't actually used */
280 struct cxgb_ident {
281 	uint16_t	vendor;
282 	uint16_t	device;
283 	int		index;
284 	char		*desc;
285 } cxgb_identifiers[] = {
286 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
287 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
288 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
289 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
290 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
291 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
300 	{0, 0, 0, NULL}
301 };
302 
303 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
304 
305 
306 static __inline char
307 t3rev2char(struct adapter *adapter)
308 {
309 	char rev = 'z';
310 
311 	switch(adapter->params.rev) {
312 	case T3_REV_A:
313 		rev = 'a';
314 		break;
315 	case T3_REV_B:
316 	case T3_REV_B2:
317 		rev = 'b';
318 		break;
319 	case T3_REV_C:
320 		rev = 'c';
321 		break;
322 	}
323 	return rev;
324 }
325 
326 static struct cxgb_ident *
327 cxgb_get_ident(device_t dev)
328 {
329 	struct cxgb_ident *id;
330 
331 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
332 		if ((id->vendor == pci_get_vendor(dev)) &&
333 		    (id->device == pci_get_device(dev))) {
334 			return (id);
335 		}
336 	}
337 	return (NULL);
338 }
339 
340 static const struct adapter_info *
341 cxgb_get_adapter_info(device_t dev)
342 {
343 	struct cxgb_ident *id;
344 	const struct adapter_info *ai;
345 
346 	id = cxgb_get_ident(dev);
347 	if (id == NULL)
348 		return (NULL);
349 
350 	ai = t3_get_adapter_info(id->index);
351 
352 	return (ai);
353 }
354 
355 static int
356 cxgb_controller_probe(device_t dev)
357 {
358 	const struct adapter_info *ai;
359 	char *ports, buf[80];
360 	int nports;
361 
362 	ai = cxgb_get_adapter_info(dev);
363 	if (ai == NULL)
364 		return (ENXIO);
365 
366 	nports = ai->nports0 + ai->nports1;
367 	if (nports == 1)
368 		ports = "port";
369 	else
370 		ports = "ports";
371 
372 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
373 	device_set_desc_copy(dev, buf);
374 	return (BUS_PROBE_DEFAULT);
375 }
376 
377 #define FW_FNAME "cxgb_t3fw"
378 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
379 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
380 
381 static int
382 upgrade_fw(adapter_t *sc)
383 {
384 	const struct firmware *fw;
385 	int status;
386 	u32 vers;
387 
388 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
389 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
390 		return (ENOENT);
391 	} else
392 		device_printf(sc->dev, "installing firmware on card\n");
393 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
394 
395 	if (status != 0) {
396 		device_printf(sc->dev, "failed to install firmware: %d\n",
397 		    status);
398 	} else {
399 		t3_get_fw_version(sc, &vers);
400 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
401 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
402 		    G_FW_VERSION_MICRO(vers));
403 	}
404 
405 	firmware_put(fw, FIRMWARE_UNLOAD);
406 
407 	return (status);
408 }
409 
410 /*
411  * The cxgb_controller_attach function is responsible for the initial
412  * bringup of the device.  Its responsibilities include:
413  *
414  *  1. Determine if the device supports MSI or MSI-X.
415  *  2. Allocate bus resources so that we can access the Base Address Register
416  *  3. Create and initialize mutexes for the controller and its control
417  *     logic such as SGE and MDIO.
418  *  4. Call hardware specific setup routine for the adapter as a whole.
419  *  5. Allocate the BAR for doing MSI-X.
420  *  6. Setup the line interrupt iff MSI-X is not supported.
421  *  7. Create the driver's taskq.
422  *  8. Start one task queue service thread.
423  *  9. Check if the firmware and SRAM are up-to-date.  They will be
424  *     auto-updated later (before FULL_INIT_DONE), if required.
425  * 10. Create a child device for each MAC (port)
426  * 11. Initialize T3 private state.
427  * 12. Trigger the LED
428  * 13. Setup offload iff supported.
429  * 14. Reset/restart the tick callout.
430  * 15. Attach sysctls
431  *
432  * NOTE: Any modification or deviation from this list MUST be reflected in
433  * the above comment.  Failure to do so will result in problems on various
434  * error conditions including link flapping.
435  */
436 static int
437 cxgb_controller_attach(device_t dev)
438 {
439 	device_t child;
440 	const struct adapter_info *ai;
441 	struct adapter *sc;
442 	int i, error = 0;
443 	uint32_t vers;
444 	int port_qsets = 1;
445 	int msi_needed, reg;
446 	char buf[80];
447 
448 	sc = device_get_softc(dev);
449 	sc->dev = dev;
450 	sc->msi_count = 0;
451 	ai = cxgb_get_adapter_info(dev);
452 
453 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
454 	    device_get_unit(dev));
455 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
456 
457 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
458 	    device_get_unit(dev));
459 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
460 	    device_get_unit(dev));
461 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
462 	    device_get_unit(dev));
463 
464 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
465 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
466 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
467 
468 	mtx_lock(&t3_list_lock);
469 	SLIST_INSERT_HEAD(&t3_list, sc, link);
470 	mtx_unlock(&t3_list_lock);
471 
472 	/* find the PCIe link width and set max read request to 4KB*/
473 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
474 		uint16_t lnk;
475 
476 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
477 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
478 		if (sc->link_width < 8 &&
479 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
480 			device_printf(sc->dev,
481 			    "PCIe x%d Link, expect reduced performance\n",
482 			    sc->link_width);
483 		}
484 
485 		pci_set_max_read_req(dev, 4096);
486 	}
487 
488 	touch_bars(dev);
489 	pci_enable_busmaster(dev);
490 	/*
491 	 * Allocate the registers and make them available to the driver.
492 	 * The registers that we care about for NIC mode are in BAR 0
493 	 */
494 	sc->regs_rid = PCIR_BAR(0);
495 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
496 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
497 		device_printf(dev, "Cannot allocate BAR region 0\n");
498 		error = ENXIO;
499 		goto out;
500 	}
501 
502 	sc->bt = rman_get_bustag(sc->regs_res);
503 	sc->bh = rman_get_bushandle(sc->regs_res);
504 	sc->mmio_len = rman_get_size(sc->regs_res);
505 
506 	for (i = 0; i < MAX_NPORTS; i++)
507 		sc->port[i].adapter = sc;
508 
509 	if (t3_prep_adapter(sc, ai, 1) < 0) {
510 		printf("prep adapter failed\n");
511 		error = ENODEV;
512 		goto out;
513 	}
514 
515 	sc->udbs_rid = PCIR_BAR(2);
516 	sc->udbs_res = NULL;
517 	if (is_offload(sc) &&
518 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
519 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
520 		device_printf(dev, "Cannot allocate BAR region 1\n");
521 		error = ENXIO;
522 		goto out;
523 	}
524 
525         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526 	 * enough messages for the queue sets.  If that fails, try falling
527 	 * back to MSI.  If that fails, then try falling back to the legacy
528 	 * interrupt pin model.
529 	 */
530 	sc->msix_regs_rid = 0x20;
531 	if ((msi_allowed >= 2) &&
532 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534 
535 		if (multiq)
536 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538 
539 		if (pci_msix_count(dev) == 0 ||
540 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541 		    sc->msi_count != msi_needed) {
542 			device_printf(dev, "alloc msix failed - "
543 				      "msi_count=%d, msi_needed=%d, err=%d; "
544 				      "will try MSI\n", sc->msi_count,
545 				      msi_needed, error);
546 			sc->msi_count = 0;
547 			port_qsets = 1;
548 			pci_release_msi(dev);
549 			bus_release_resource(dev, SYS_RES_MEMORY,
550 			    sc->msix_regs_rid, sc->msix_regs_res);
551 			sc->msix_regs_res = NULL;
552 		} else {
553 			sc->flags |= USING_MSIX;
554 			sc->cxgb_intr = cxgb_async_intr;
555 			device_printf(dev,
556 				      "using MSI-X interrupts (%u vectors)\n",
557 				      sc->msi_count);
558 		}
559 	}
560 
561 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562 		sc->msi_count = 1;
563 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564 			device_printf(dev, "alloc msi failed - "
565 				      "err=%d; will try INTx\n", error);
566 			sc->msi_count = 0;
567 			port_qsets = 1;
568 			pci_release_msi(dev);
569 		} else {
570 			sc->flags |= USING_MSI;
571 			sc->cxgb_intr = t3_intr_msi;
572 			device_printf(dev, "using MSI interrupts\n");
573 		}
574 	}
575 	if (sc->msi_count == 0) {
576 		device_printf(dev, "using line interrupts\n");
577 		sc->cxgb_intr = t3b_intr;
578 	}
579 
580 	/* Create a private taskqueue thread for handling driver events */
581 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582 	    taskqueue_thread_enqueue, &sc->tq);
583 	if (sc->tq == NULL) {
584 		device_printf(dev, "failed to allocate controller task queue\n");
585 		goto out;
586 	}
587 
588 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589 	    device_get_nameunit(dev));
590 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591 
592 
593 	/* Create a periodic callout for checking adapter status */
594 	callout_init(&sc->cxgb_tick_ch, TRUE);
595 
596 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597 		/*
598 		 * Warn user that a firmware update will be attempted in init.
599 		 */
600 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602 		sc->flags &= ~FW_UPTODATE;
603 	} else {
604 		sc->flags |= FW_UPTODATE;
605 	}
606 
607 	if (t3_check_tpsram_version(sc) < 0) {
608 		/*
609 		 * Warn user that a firmware update will be attempted in init.
610 		 */
611 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613 		sc->flags &= ~TPS_UPTODATE;
614 	} else {
615 		sc->flags |= TPS_UPTODATE;
616 	}
617 
618 	/*
619 	 * Create a child device for each MAC.  The ethernet attachment
620 	 * will be done in these children.
621 	 */
622 	for (i = 0; i < (sc)->params.nports; i++) {
623 		struct port_info *pi;
624 
625 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626 			device_printf(dev, "failed to add child port\n");
627 			error = EINVAL;
628 			goto out;
629 		}
630 		pi = &sc->port[i];
631 		pi->adapter = sc;
632 		pi->nqsets = port_qsets;
633 		pi->first_qset = i*port_qsets;
634 		pi->port_id = i;
635 		pi->tx_chan = i >= ai->nports0;
636 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637 		sc->rxpkt_map[pi->txpkt_intf] = i;
638 		sc->port[i].tx_chan = i >= ai->nports0;
639 		sc->portdev[i] = child;
640 		device_set_softc(child, pi);
641 	}
642 	if ((error = bus_generic_attach(dev)) != 0)
643 		goto out;
644 
645 	/* initialize sge private state */
646 	t3_sge_init_adapter(sc);
647 
648 	t3_led_ready(sc);
649 
650 	error = t3_get_fw_version(sc, &vers);
651 	if (error)
652 		goto out;
653 
654 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
655 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
656 	    G_FW_VERSION_MICRO(vers));
657 
658 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
659 		 ai->desc, is_offload(sc) ? "R" : "",
660 		 sc->params.vpd.ec, sc->params.vpd.sn);
661 	device_set_desc_copy(dev, buf);
662 
663 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
664 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
665 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
666 
667 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
668 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
669 	t3_add_attach_sysctls(sc);
670 
671 #ifdef TCP_OFFLOAD
672 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
673 		sc->cpl_handler[i] = cpl_not_handled;
674 #endif
675 
676 	t3_intr_clear(sc);
677 	error = cxgb_setup_interrupts(sc);
678 out:
679 	if (error)
680 		cxgb_free(sc);
681 
682 	return (error);
683 }
684 
685 /*
686  * The cxgb_controller_detach routine is called with the device is
687  * unloaded from the system.
688  */
689 
690 static int
691 cxgb_controller_detach(device_t dev)
692 {
693 	struct adapter *sc;
694 
695 	sc = device_get_softc(dev);
696 
697 	cxgb_free(sc);
698 
699 	return (0);
700 }
701 
702 /*
703  * The cxgb_free() is called by the cxgb_controller_detach() routine
704  * to tear down the structures that were built up in
705  * cxgb_controller_attach(), and should be the final piece of work
706  * done when fully unloading the driver.
707  *
708  *
709  *  1. Shutting down the threads started by the cxgb_controller_attach()
710  *     routine.
711  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
712  *  3. Detaching all of the port devices created during the
713  *     cxgb_controller_attach() routine.
714  *  4. Removing the device children created via cxgb_controller_attach().
715  *  5. Releasing PCI resources associated with the device.
716  *  6. Turning off the offload support, iff it was turned on.
717  *  7. Destroying the mutexes created in cxgb_controller_attach().
718  *
719  */
720 static void
721 cxgb_free(struct adapter *sc)
722 {
723 	int i, nqsets = 0;
724 
725 	ADAPTER_LOCK(sc);
726 	sc->flags |= CXGB_SHUTDOWN;
727 	ADAPTER_UNLOCK(sc);
728 
729 	/*
730 	 * Make sure all child devices are gone.
731 	 */
732 	bus_generic_detach(sc->dev);
733 	for (i = 0; i < (sc)->params.nports; i++) {
734 		if (sc->portdev[i] &&
735 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
736 			device_printf(sc->dev, "failed to delete child port\n");
737 		nqsets += sc->port[i].nqsets;
738 	}
739 
740 	/*
741 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
742 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
743 	 * all open devices have been closed.
744 	 */
745 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
746 					   __func__, sc->open_device_map));
747 	for (i = 0; i < sc->params.nports; i++) {
748 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
749 						  __func__, i));
750 	}
751 
752 	/*
753 	 * Finish off the adapter's callouts.
754 	 */
755 	callout_drain(&sc->cxgb_tick_ch);
756 	callout_drain(&sc->sge_timer_ch);
757 
758 	/*
759 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
760 	 * sysctls are cleaned up by the kernel linker.
761 	 */
762 	if (sc->flags & FULL_INIT_DONE) {
763  		t3_free_sge_resources(sc, nqsets);
764  		sc->flags &= ~FULL_INIT_DONE;
765  	}
766 
767 	/*
768 	 * Release all interrupt resources.
769 	 */
770 	cxgb_teardown_interrupts(sc);
771 	if (sc->flags & (USING_MSI | USING_MSIX)) {
772 		device_printf(sc->dev, "releasing msi message(s)\n");
773 		pci_release_msi(sc->dev);
774 	} else {
775 		device_printf(sc->dev, "no msi message to release\n");
776 	}
777 
778 	if (sc->msix_regs_res != NULL) {
779 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
780 		    sc->msix_regs_res);
781 	}
782 
783 	/*
784 	 * Free the adapter's taskqueue.
785 	 */
786 	if (sc->tq != NULL) {
787 		taskqueue_free(sc->tq);
788 		sc->tq = NULL;
789 	}
790 
791 	free(sc->filters, M_DEVBUF);
792 	t3_sge_free(sc);
793 
794 	if (sc->udbs_res != NULL)
795 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
796 		    sc->udbs_res);
797 
798 	if (sc->regs_res != NULL)
799 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
800 		    sc->regs_res);
801 
802 	MTX_DESTROY(&sc->mdio_lock);
803 	MTX_DESTROY(&sc->sge.reg_lock);
804 	MTX_DESTROY(&sc->elmer_lock);
805 	mtx_lock(&t3_list_lock);
806 	SLIST_REMOVE(&t3_list, sc, adapter, link);
807 	mtx_unlock(&t3_list_lock);
808 	ADAPTER_LOCK_DEINIT(sc);
809 }
810 
811 /**
812  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
813  *	@sc: the controller softc
814  *
815  *	Determines how many sets of SGE queues to use and initializes them.
816  *	We support multiple queue sets per port if we have MSI-X, otherwise
817  *	just one queue set per port.
818  */
819 static int
820 setup_sge_qsets(adapter_t *sc)
821 {
822 	int i, j, err, irq_idx = 0, qset_idx = 0;
823 	u_int ntxq = SGE_TXQ_PER_SET;
824 
825 	if ((err = t3_sge_alloc(sc)) != 0) {
826 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
827 		return (err);
828 	}
829 
830 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
831 		irq_idx = -1;
832 
833 	for (i = 0; i < (sc)->params.nports; i++) {
834 		struct port_info *pi = &sc->port[i];
835 
836 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
837 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
838 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
839 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
840 			if (err) {
841 				t3_free_sge_resources(sc, qset_idx);
842 				device_printf(sc->dev,
843 				    "t3_sge_alloc_qset failed with %d\n", err);
844 				return (err);
845 			}
846 		}
847 	}
848 
849 	return (0);
850 }
851 
852 static void
853 cxgb_teardown_interrupts(adapter_t *sc)
854 {
855 	int i;
856 
857 	for (i = 0; i < SGE_QSETS; i++) {
858 		if (sc->msix_intr_tag[i] == NULL) {
859 
860 			/* Should have been setup fully or not at all */
861 			KASSERT(sc->msix_irq_res[i] == NULL &&
862 				sc->msix_irq_rid[i] == 0,
863 				("%s: half-done interrupt (%d).", __func__, i));
864 
865 			continue;
866 		}
867 
868 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
869 				  sc->msix_intr_tag[i]);
870 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
871 				     sc->msix_irq_res[i]);
872 
873 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
874 		sc->msix_irq_rid[i] = 0;
875 	}
876 
877 	if (sc->intr_tag) {
878 		KASSERT(sc->irq_res != NULL,
879 			("%s: half-done interrupt.", __func__));
880 
881 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
882 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
883 				     sc->irq_res);
884 
885 		sc->irq_res = sc->intr_tag = NULL;
886 		sc->irq_rid = 0;
887 	}
888 }
889 
890 static int
891 cxgb_setup_interrupts(adapter_t *sc)
892 {
893 	struct resource *res;
894 	void *tag;
895 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
896 
897 	sc->irq_rid = intr_flag ? 1 : 0;
898 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
899 					     RF_SHAREABLE | RF_ACTIVE);
900 	if (sc->irq_res == NULL) {
901 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
902 			      intr_flag, sc->irq_rid);
903 		err = EINVAL;
904 		sc->irq_rid = 0;
905 	} else {
906 		err = bus_setup_intr(sc->dev, sc->irq_res,
907 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
908 		    sc->cxgb_intr, sc, &sc->intr_tag);
909 
910 		if (err) {
911 			device_printf(sc->dev,
912 				      "Cannot set up interrupt (%x, %u, %d)\n",
913 				      intr_flag, sc->irq_rid, err);
914 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
915 					     sc->irq_res);
916 			sc->irq_res = sc->intr_tag = NULL;
917 			sc->irq_rid = 0;
918 		}
919 	}
920 
921 	/* That's all for INTx or MSI */
922 	if (!(intr_flag & USING_MSIX) || err)
923 		return (err);
924 
925 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
926 	for (i = 0; i < sc->msi_count - 1; i++) {
927 		rid = i + 2;
928 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
929 					     RF_SHAREABLE | RF_ACTIVE);
930 		if (res == NULL) {
931 			device_printf(sc->dev, "Cannot allocate interrupt "
932 				      "for message %d\n", rid);
933 			err = EINVAL;
934 			break;
935 		}
936 
937 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
938 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
939 		if (err) {
940 			device_printf(sc->dev, "Cannot set up interrupt "
941 				      "for message %d (%d)\n", rid, err);
942 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
943 			break;
944 		}
945 
946 		sc->msix_irq_rid[i] = rid;
947 		sc->msix_irq_res[i] = res;
948 		sc->msix_intr_tag[i] = tag;
949 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
950 	}
951 
952 	if (err)
953 		cxgb_teardown_interrupts(sc);
954 
955 	return (err);
956 }
957 
958 
959 static int
960 cxgb_port_probe(device_t dev)
961 {
962 	struct port_info *p;
963 	char buf[80];
964 	const char *desc;
965 
966 	p = device_get_softc(dev);
967 	desc = p->phy.desc;
968 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
969 	device_set_desc_copy(dev, buf);
970 	return (0);
971 }
972 
973 
974 static int
975 cxgb_makedev(struct port_info *pi)
976 {
977 
978 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
979 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
980 
981 	if (pi->port_cdev == NULL)
982 		return (ENOMEM);
983 
984 	pi->port_cdev->si_drv1 = (void *)pi;
985 
986 	return (0);
987 }
988 
989 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
990     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
991     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
992 #define CXGB_CAP_ENABLE CXGB_CAP
993 
994 static int
995 cxgb_port_attach(device_t dev)
996 {
997 	struct port_info *p;
998 	struct ifnet *ifp;
999 	int err;
1000 	struct adapter *sc;
1001 
1002 	p = device_get_softc(dev);
1003 	sc = p->adapter;
1004 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1005 	    device_get_unit(device_get_parent(dev)), p->port_id);
1006 	PORT_LOCK_INIT(p, p->lockbuf);
1007 
1008 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1009 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1010 
1011 	/* Allocate an ifnet object and set it up */
1012 	ifp = p->ifp = if_alloc(IFT_ETHER);
1013 	if (ifp == NULL) {
1014 		device_printf(dev, "Cannot allocate ifnet\n");
1015 		return (ENOMEM);
1016 	}
1017 
1018 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1019 	ifp->if_init = cxgb_init;
1020 	ifp->if_softc = p;
1021 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1022 	ifp->if_ioctl = cxgb_ioctl;
1023 	ifp->if_transmit = cxgb_transmit;
1024 	ifp->if_qflush = cxgb_qflush;
1025 
1026 	ifp->if_capabilities = CXGB_CAP;
1027 #ifdef TCP_OFFLOAD
1028 	if (is_offload(sc))
1029 		ifp->if_capabilities |= IFCAP_TOE4;
1030 #endif
1031 	ifp->if_capenable = CXGB_CAP_ENABLE;
1032 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1033 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1034 
1035 	/*
1036 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1037 	 */
1038 	if (sc->params.nports > 2) {
1039 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041 		ifp->if_hwassist &= ~CSUM_TSO;
1042 	}
1043 
1044 	ether_ifattach(ifp, p->hw_addr);
1045 
1046 #ifdef DEFAULT_JUMBO
1047 	if (sc->params.nports <= 2)
1048 		ifp->if_mtu = ETHERMTU_JUMBO;
1049 #endif
1050 	if ((err = cxgb_makedev(p)) != 0) {
1051 		printf("makedev failed %d\n", err);
1052 		return (err);
1053 	}
1054 
1055 	/* Create a list of media supported by this port */
1056 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1057 	    cxgb_media_status);
1058 	cxgb_build_medialist(p);
1059 
1060 	t3_sge_init_port(p);
1061 
1062 	return (err);
1063 }
1064 
1065 /*
1066  * cxgb_port_detach() is called via the device_detach methods when
1067  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1068  * removing the device from the view of the kernel, i.e. from all
1069  * interfaces lists etc.  This routine is only called when the driver is
1070  * being unloaded, not when the link goes down.
1071  */
1072 static int
1073 cxgb_port_detach(device_t dev)
1074 {
1075 	struct port_info *p;
1076 	struct adapter *sc;
1077 	int i;
1078 
1079 	p = device_get_softc(dev);
1080 	sc = p->adapter;
1081 
1082 	/* Tell cxgb_ioctl and if_init that the port is going away */
1083 	ADAPTER_LOCK(sc);
1084 	SET_DOOMED(p);
1085 	wakeup(&sc->flags);
1086 	while (IS_BUSY(sc))
1087 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1088 	SET_BUSY(sc);
1089 	ADAPTER_UNLOCK(sc);
1090 
1091 	if (p->port_cdev != NULL)
1092 		destroy_dev(p->port_cdev);
1093 
1094 	cxgb_uninit_synchronized(p);
1095 	ether_ifdetach(p->ifp);
1096 
1097 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1098 		struct sge_qset *qs = &sc->sge.qs[i];
1099 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1100 
1101 		callout_drain(&txq->txq_watchdog);
1102 		callout_drain(&txq->txq_timer);
1103 	}
1104 
1105 	PORT_LOCK_DEINIT(p);
1106 	if_free(p->ifp);
1107 	p->ifp = NULL;
1108 
1109 	ADAPTER_LOCK(sc);
1110 	CLR_BUSY(sc);
1111 	wakeup_one(&sc->flags);
1112 	ADAPTER_UNLOCK(sc);
1113 	return (0);
1114 }
1115 
1116 void
1117 t3_fatal_err(struct adapter *sc)
1118 {
1119 	u_int fw_status[4];
1120 
1121 	if (sc->flags & FULL_INIT_DONE) {
1122 		t3_sge_stop(sc);
1123 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1124 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1125 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1126 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1127 		t3_intr_disable(sc);
1128 	}
1129 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1130 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1131 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1132 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1133 }
1134 
1135 int
1136 t3_os_find_pci_capability(adapter_t *sc, int cap)
1137 {
1138 	device_t dev;
1139 	struct pci_devinfo *dinfo;
1140 	pcicfgregs *cfg;
1141 	uint32_t status;
1142 	uint8_t ptr;
1143 
1144 	dev = sc->dev;
1145 	dinfo = device_get_ivars(dev);
1146 	cfg = &dinfo->cfg;
1147 
1148 	status = pci_read_config(dev, PCIR_STATUS, 2);
1149 	if (!(status & PCIM_STATUS_CAPPRESENT))
1150 		return (0);
1151 
1152 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1153 	case 0:
1154 	case 1:
1155 		ptr = PCIR_CAP_PTR;
1156 		break;
1157 	case 2:
1158 		ptr = PCIR_CAP_PTR_2;
1159 		break;
1160 	default:
1161 		return (0);
1162 		break;
1163 	}
1164 	ptr = pci_read_config(dev, ptr, 1);
1165 
1166 	while (ptr != 0) {
1167 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1168 			return (ptr);
1169 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1170 	}
1171 
1172 	return (0);
1173 }
1174 
1175 int
1176 t3_os_pci_save_state(struct adapter *sc)
1177 {
1178 	device_t dev;
1179 	struct pci_devinfo *dinfo;
1180 
1181 	dev = sc->dev;
1182 	dinfo = device_get_ivars(dev);
1183 
1184 	pci_cfg_save(dev, dinfo, 0);
1185 	return (0);
1186 }
1187 
1188 int
1189 t3_os_pci_restore_state(struct adapter *sc)
1190 {
1191 	device_t dev;
1192 	struct pci_devinfo *dinfo;
1193 
1194 	dev = sc->dev;
1195 	dinfo = device_get_ivars(dev);
1196 
1197 	pci_cfg_restore(dev, dinfo);
1198 	return (0);
1199 }
1200 
1201 /**
1202  *	t3_os_link_changed - handle link status changes
1203  *	@sc: the adapter associated with the link change
1204  *	@port_id: the port index whose link status has changed
1205  *	@link_status: the new status of the link
1206  *	@speed: the new speed setting
1207  *	@duplex: the new duplex setting
1208  *	@fc: the new flow-control setting
1209  *
1210  *	This is the OS-dependent handler for link status changes.  The OS
1211  *	neutral handler takes care of most of the processing for these events,
1212  *	then calls this handler for any OS-specific processing.
1213  */
1214 void
1215 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1216      int duplex, int fc, int mac_was_reset)
1217 {
1218 	struct port_info *pi = &adapter->port[port_id];
1219 	struct ifnet *ifp = pi->ifp;
1220 
1221 	/* no race with detach, so ifp should always be good */
1222 	KASSERT(ifp, ("%s: if detached.", __func__));
1223 
1224 	/* Reapply mac settings if they were lost due to a reset */
1225 	if (mac_was_reset) {
1226 		PORT_LOCK(pi);
1227 		cxgb_update_mac_settings(pi);
1228 		PORT_UNLOCK(pi);
1229 	}
1230 
1231 	if (link_status) {
1232 		ifp->if_baudrate = IF_Mbps(speed);
1233 		if_link_state_change(ifp, LINK_STATE_UP);
1234 	} else
1235 		if_link_state_change(ifp, LINK_STATE_DOWN);
1236 }
1237 
1238 /**
1239  *	t3_os_phymod_changed - handle PHY module changes
1240  *	@phy: the PHY reporting the module change
1241  *	@mod_type: new module type
1242  *
1243  *	This is the OS-dependent handler for PHY module changes.  It is
1244  *	invoked when a PHY module is removed or inserted for any OS-specific
1245  *	processing.
1246  */
1247 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1248 {
1249 	static const char *mod_str[] = {
1250 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1251 	};
1252 	struct port_info *pi = &adap->port[port_id];
1253 	int mod = pi->phy.modtype;
1254 
1255 	if (mod != pi->media.ifm_cur->ifm_data)
1256 		cxgb_build_medialist(pi);
1257 
1258 	if (mod == phy_modtype_none)
1259 		if_printf(pi->ifp, "PHY module unplugged\n");
1260 	else {
1261 		KASSERT(mod < ARRAY_SIZE(mod_str),
1262 			("invalid PHY module type %d", mod));
1263 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1264 	}
1265 }
1266 
1267 void
1268 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1269 {
1270 
1271 	/*
1272 	 * The ifnet might not be allocated before this gets called,
1273 	 * as this is called early on in attach by t3_prep_adapter
1274 	 * save the address off in the port structure
1275 	 */
1276 	if (cxgb_debug)
1277 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1278 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1279 }
1280 
1281 /*
1282  * Programs the XGMAC based on the settings in the ifnet.  These settings
1283  * include MTU, MAC address, mcast addresses, etc.
1284  */
1285 static void
1286 cxgb_update_mac_settings(struct port_info *p)
1287 {
1288 	struct ifnet *ifp = p->ifp;
1289 	struct t3_rx_mode rm;
1290 	struct cmac *mac = &p->mac;
1291 	int mtu, hwtagging;
1292 
1293 	PORT_LOCK_ASSERT_OWNED(p);
1294 
1295 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1296 
1297 	mtu = ifp->if_mtu;
1298 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1299 		mtu += ETHER_VLAN_ENCAP_LEN;
1300 
1301 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1302 
1303 	t3_mac_set_mtu(mac, mtu);
1304 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1305 	t3_mac_set_address(mac, 0, p->hw_addr);
1306 	t3_init_rx_mode(&rm, p);
1307 	t3_mac_set_rx_mode(mac, &rm);
1308 }
1309 
1310 
1311 static int
1312 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1313 			      unsigned long n)
1314 {
1315 	int attempts = 5;
1316 
1317 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1318 		if (!--attempts)
1319 			return (ETIMEDOUT);
1320 		t3_os_sleep(10);
1321 	}
1322 	return 0;
1323 }
1324 
1325 static int
1326 init_tp_parity(struct adapter *adap)
1327 {
1328 	int i;
1329 	struct mbuf *m;
1330 	struct cpl_set_tcb_field *greq;
1331 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1332 
1333 	t3_tp_set_offload_mode(adap, 1);
1334 
1335 	for (i = 0; i < 16; i++) {
1336 		struct cpl_smt_write_req *req;
1337 
1338 		m = m_gethdr(M_WAITOK, MT_DATA);
1339 		req = mtod(m, struct cpl_smt_write_req *);
1340 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1341 		memset(req, 0, sizeof(*req));
1342 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1343 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1344 		req->iff = i;
1345 		t3_mgmt_tx(adap, m);
1346 	}
1347 
1348 	for (i = 0; i < 2048; i++) {
1349 		struct cpl_l2t_write_req *req;
1350 
1351 		m = m_gethdr(M_WAITOK, MT_DATA);
1352 		req = mtod(m, struct cpl_l2t_write_req *);
1353 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1354 		memset(req, 0, sizeof(*req));
1355 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1356 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1357 		req->params = htonl(V_L2T_W_IDX(i));
1358 		t3_mgmt_tx(adap, m);
1359 	}
1360 
1361 	for (i = 0; i < 2048; i++) {
1362 		struct cpl_rte_write_req *req;
1363 
1364 		m = m_gethdr(M_WAITOK, MT_DATA);
1365 		req = mtod(m, struct cpl_rte_write_req *);
1366 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1367 		memset(req, 0, sizeof(*req));
1368 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1369 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1370 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1371 		t3_mgmt_tx(adap, m);
1372 	}
1373 
1374 	m = m_gethdr(M_WAITOK, MT_DATA);
1375 	greq = mtod(m, struct cpl_set_tcb_field *);
1376 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1377 	memset(greq, 0, sizeof(*greq));
1378 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1379 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1380 	greq->mask = htobe64(1);
1381 	t3_mgmt_tx(adap, m);
1382 
1383 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1384 	t3_tp_set_offload_mode(adap, 0);
1385 	return (i);
1386 }
1387 
1388 /**
1389  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1390  *	@adap: the adapter
1391  *
1392  *	Sets up RSS to distribute packets to multiple receive queues.  We
1393  *	configure the RSS CPU lookup table to distribute to the number of HW
1394  *	receive queues, and the response queue lookup table to narrow that
1395  *	down to the response queues actually configured for each port.
1396  *	We always configure the RSS mapping for two ports since the mapping
1397  *	table has plenty of entries.
1398  */
1399 static void
1400 setup_rss(adapter_t *adap)
1401 {
1402 	int i;
1403 	u_int nq[2];
1404 	uint8_t cpus[SGE_QSETS + 1];
1405 	uint16_t rspq_map[RSS_TABLE_SIZE];
1406 
1407 	for (i = 0; i < SGE_QSETS; ++i)
1408 		cpus[i] = i;
1409 	cpus[SGE_QSETS] = 0xff;
1410 
1411 	nq[0] = nq[1] = 0;
1412 	for_each_port(adap, i) {
1413 		const struct port_info *pi = adap2pinfo(adap, i);
1414 
1415 		nq[pi->tx_chan] += pi->nqsets;
1416 	}
1417 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1418 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1419 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1420 	}
1421 
1422 	/* Calculate the reverse RSS map table */
1423 	for (i = 0; i < SGE_QSETS; ++i)
1424 		adap->rrss_map[i] = 0xff;
1425 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1426 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1427 			adap->rrss_map[rspq_map[i]] = i;
1428 
1429 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1430 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1431 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1432 	              cpus, rspq_map);
1433 
1434 }
1435 static void
1436 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1437 			      int hi, int port)
1438 {
1439 	struct mbuf *m;
1440 	struct mngt_pktsched_wr *req;
1441 
1442 	m = m_gethdr(M_NOWAIT, MT_DATA);
1443 	if (m) {
1444 		req = mtod(m, struct mngt_pktsched_wr *);
1445 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1446 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1447 		req->sched = sched;
1448 		req->idx = qidx;
1449 		req->min = lo;
1450 		req->max = hi;
1451 		req->binding = port;
1452 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1453 		t3_mgmt_tx(adap, m);
1454 	}
1455 }
1456 
1457 static void
1458 bind_qsets(adapter_t *sc)
1459 {
1460 	int i, j;
1461 
1462 	for (i = 0; i < (sc)->params.nports; ++i) {
1463 		const struct port_info *pi = adap2pinfo(sc, i);
1464 
1465 		for (j = 0; j < pi->nqsets; ++j) {
1466 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1467 					  -1, pi->tx_chan);
1468 
1469 		}
1470 	}
1471 }
1472 
1473 static void
1474 update_tpeeprom(struct adapter *adap)
1475 {
1476 	const struct firmware *tpeeprom;
1477 
1478 	uint32_t version;
1479 	unsigned int major, minor;
1480 	int ret, len;
1481 	char rev, name[32];
1482 
1483 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1484 
1485 	major = G_TP_VERSION_MAJOR(version);
1486 	minor = G_TP_VERSION_MINOR(version);
1487 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1488 		return;
1489 
1490 	rev = t3rev2char(adap);
1491 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1492 
1493 	tpeeprom = firmware_get(name);
1494 	if (tpeeprom == NULL) {
1495 		device_printf(adap->dev,
1496 			      "could not load TP EEPROM: unable to load %s\n",
1497 			      name);
1498 		return;
1499 	}
1500 
1501 	len = tpeeprom->datasize - 4;
1502 
1503 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1504 	if (ret)
1505 		goto release_tpeeprom;
1506 
1507 	if (len != TP_SRAM_LEN) {
1508 		device_printf(adap->dev,
1509 			      "%s length is wrong len=%d expected=%d\n", name,
1510 			      len, TP_SRAM_LEN);
1511 		return;
1512 	}
1513 
1514 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1515 	    TP_SRAM_OFFSET);
1516 
1517 	if (!ret) {
1518 		device_printf(adap->dev,
1519 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1520 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1521 	} else
1522 		device_printf(adap->dev,
1523 			      "Protocol SRAM image update in EEPROM failed\n");
1524 
1525 release_tpeeprom:
1526 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1527 
1528 	return;
1529 }
1530 
1531 static int
1532 update_tpsram(struct adapter *adap)
1533 {
1534 	const struct firmware *tpsram;
1535 	int ret;
1536 	char rev, name[32];
1537 
1538 	rev = t3rev2char(adap);
1539 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1540 
1541 	update_tpeeprom(adap);
1542 
1543 	tpsram = firmware_get(name);
1544 	if (tpsram == NULL){
1545 		device_printf(adap->dev, "could not load TP SRAM\n");
1546 		return (EINVAL);
1547 	} else
1548 		device_printf(adap->dev, "updating TP SRAM\n");
1549 
1550 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1551 	if (ret)
1552 		goto release_tpsram;
1553 
1554 	ret = t3_set_proto_sram(adap, tpsram->data);
1555 	if (ret)
1556 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1557 
1558 release_tpsram:
1559 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1560 
1561 	return ret;
1562 }
1563 
1564 /**
1565  *	cxgb_up - enable the adapter
1566  *	@adap: adapter being enabled
1567  *
1568  *	Called when the first port is enabled, this function performs the
1569  *	actions necessary to make an adapter operational, such as completing
1570  *	the initialization of HW modules, and enabling interrupts.
1571  */
1572 static int
1573 cxgb_up(struct adapter *sc)
1574 {
1575 	int err = 0;
1576 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1577 
1578 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1579 					   __func__, sc->open_device_map));
1580 
1581 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1582 
1583 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1584 
1585 		if ((sc->flags & FW_UPTODATE) == 0)
1586 			if ((err = upgrade_fw(sc)))
1587 				goto out;
1588 
1589 		if ((sc->flags & TPS_UPTODATE) == 0)
1590 			if ((err = update_tpsram(sc)))
1591 				goto out;
1592 
1593 		if (is_offload(sc) && nfilters != 0) {
1594 			sc->params.mc5.nservers = 0;
1595 
1596 			if (nfilters < 0)
1597 				sc->params.mc5.nfilters = mxf;
1598 			else
1599 				sc->params.mc5.nfilters = min(nfilters, mxf);
1600 		}
1601 
1602 		err = t3_init_hw(sc, 0);
1603 		if (err)
1604 			goto out;
1605 
1606 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1607 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1608 
1609 		err = setup_sge_qsets(sc);
1610 		if (err)
1611 			goto out;
1612 
1613 		alloc_filters(sc);
1614 		setup_rss(sc);
1615 
1616 		t3_add_configured_sysctls(sc);
1617 		sc->flags |= FULL_INIT_DONE;
1618 	}
1619 
1620 	t3_intr_clear(sc);
1621 	t3_sge_start(sc);
1622 	t3_intr_enable(sc);
1623 
1624 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1625 	    is_offload(sc) && init_tp_parity(sc) == 0)
1626 		sc->flags |= TP_PARITY_INIT;
1627 
1628 	if (sc->flags & TP_PARITY_INIT) {
1629 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1630 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1631 	}
1632 
1633 	if (!(sc->flags & QUEUES_BOUND)) {
1634 		bind_qsets(sc);
1635 		setup_hw_filters(sc);
1636 		sc->flags |= QUEUES_BOUND;
1637 	}
1638 
1639 	t3_sge_reset_adapter(sc);
1640 out:
1641 	return (err);
1642 }
1643 
1644 /*
1645  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1646  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1647  * during controller_detach, not here.
1648  */
1649 static void
1650 cxgb_down(struct adapter *sc)
1651 {
1652 	t3_sge_stop(sc);
1653 	t3_intr_disable(sc);
1654 }
1655 
1656 /*
1657  * if_init for cxgb ports.
1658  */
1659 static void
1660 cxgb_init(void *arg)
1661 {
1662 	struct port_info *p = arg;
1663 	struct adapter *sc = p->adapter;
1664 
1665 	ADAPTER_LOCK(sc);
1666 	cxgb_init_locked(p); /* releases adapter lock */
1667 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1668 }
1669 
1670 static int
1671 cxgb_init_locked(struct port_info *p)
1672 {
1673 	struct adapter *sc = p->adapter;
1674 	struct ifnet *ifp = p->ifp;
1675 	struct cmac *mac = &p->mac;
1676 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1677 
1678 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1679 
1680 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1681 		gave_up_lock = 1;
1682 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1683 			rc = EINTR;
1684 			goto done;
1685 		}
1686 	}
1687 	if (IS_DOOMED(p)) {
1688 		rc = ENXIO;
1689 		goto done;
1690 	}
1691 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1692 
1693 	/*
1694 	 * The code that runs during one-time adapter initialization can sleep
1695 	 * so it's important not to hold any locks across it.
1696 	 */
1697 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1698 
1699 	if (may_sleep) {
1700 		SET_BUSY(sc);
1701 		gave_up_lock = 1;
1702 		ADAPTER_UNLOCK(sc);
1703 	}
1704 
1705 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1706 			goto done;
1707 
1708 	PORT_LOCK(p);
1709 	if (isset(&sc->open_device_map, p->port_id) &&
1710 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1711 		PORT_UNLOCK(p);
1712 		goto done;
1713 	}
1714 	t3_port_intr_enable(sc, p->port_id);
1715 	if (!mac->multiport)
1716 		t3_mac_init(mac);
1717 	cxgb_update_mac_settings(p);
1718 	t3_link_start(&p->phy, mac, &p->link_config);
1719 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1720 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1721 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1722 	PORT_UNLOCK(p);
1723 
1724 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1725 		struct sge_qset *qs = &sc->sge.qs[i];
1726 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1727 
1728 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1729 				 txq->txq_watchdog.c_cpu);
1730 	}
1731 
1732 	/* all ok */
1733 	setbit(&sc->open_device_map, p->port_id);
1734 	callout_reset(&p->link_check_ch,
1735 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1736 	    link_check_callout, p);
1737 
1738 done:
1739 	if (may_sleep) {
1740 		ADAPTER_LOCK(sc);
1741 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1742 		CLR_BUSY(sc);
1743 	}
1744 	if (gave_up_lock)
1745 		wakeup_one(&sc->flags);
1746 	ADAPTER_UNLOCK(sc);
1747 	return (rc);
1748 }
1749 
1750 static int
1751 cxgb_uninit_locked(struct port_info *p)
1752 {
1753 	struct adapter *sc = p->adapter;
1754 	int rc;
1755 
1756 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1757 
1758 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1759 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1760 			rc = EINTR;
1761 			goto done;
1762 		}
1763 	}
1764 	if (IS_DOOMED(p)) {
1765 		rc = ENXIO;
1766 		goto done;
1767 	}
1768 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1769 	SET_BUSY(sc);
1770 	ADAPTER_UNLOCK(sc);
1771 
1772 	rc = cxgb_uninit_synchronized(p);
1773 
1774 	ADAPTER_LOCK(sc);
1775 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1776 	CLR_BUSY(sc);
1777 	wakeup_one(&sc->flags);
1778 done:
1779 	ADAPTER_UNLOCK(sc);
1780 	return (rc);
1781 }
1782 
1783 /*
1784  * Called on "ifconfig down", and from port_detach
1785  */
1786 static int
1787 cxgb_uninit_synchronized(struct port_info *pi)
1788 {
1789 	struct adapter *sc = pi->adapter;
1790 	struct ifnet *ifp = pi->ifp;
1791 
1792 	/*
1793 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1794 	 */
1795 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1796 
1797 	/*
1798 	 * Clear this port's bit from the open device map, and then drain all
1799 	 * the tasks that can access/manipulate this port's port_info or ifp.
1800 	 * We disable this port's interrupts here and so the slow/ext
1801 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1802 	 * be enqueued every second but the runs after this drain will not see
1803 	 * this port in the open device map.
1804 	 *
1805 	 * A well behaved task must take open_device_map into account and ignore
1806 	 * ports that are not open.
1807 	 */
1808 	clrbit(&sc->open_device_map, pi->port_id);
1809 	t3_port_intr_disable(sc, pi->port_id);
1810 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1811 	taskqueue_drain(sc->tq, &sc->tick_task);
1812 
1813 	callout_drain(&pi->link_check_ch);
1814 	taskqueue_drain(sc->tq, &pi->link_check_task);
1815 
1816 	PORT_LOCK(pi);
1817 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1818 
1819 	/* disable pause frames */
1820 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1821 
1822 	/* Reset RX FIFO HWM */
1823 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1824 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1825 
1826 	DELAY(100 * 1000);
1827 
1828 	/* Wait for TXFIFO empty */
1829 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1830 			F_TXFIFO_EMPTY, 1, 20, 5);
1831 
1832 	DELAY(100 * 1000);
1833 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1834 
1835 	pi->phy.ops->power_down(&pi->phy, 1);
1836 
1837 	PORT_UNLOCK(pi);
1838 
1839 	pi->link_config.link_ok = 0;
1840 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1841 
1842 	if (sc->open_device_map == 0)
1843 		cxgb_down(pi->adapter);
1844 
1845 	return (0);
1846 }
1847 
1848 /*
1849  * Mark lro enabled or disabled in all qsets for this port
1850  */
1851 static int
1852 cxgb_set_lro(struct port_info *p, int enabled)
1853 {
1854 	int i;
1855 	struct adapter *adp = p->adapter;
1856 	struct sge_qset *q;
1857 
1858 	for (i = 0; i < p->nqsets; i++) {
1859 		q = &adp->sge.qs[p->first_qset + i];
1860 		q->lro.enabled = (enabled != 0);
1861 	}
1862 	return (0);
1863 }
1864 
1865 static int
1866 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1867 {
1868 	struct port_info *p = ifp->if_softc;
1869 	struct adapter *sc = p->adapter;
1870 	struct ifreq *ifr = (struct ifreq *)data;
1871 	int flags, error = 0, mtu;
1872 	uint32_t mask;
1873 
1874 	switch (command) {
1875 	case SIOCSIFMTU:
1876 		ADAPTER_LOCK(sc);
1877 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1878 		if (error) {
1879 fail:
1880 			ADAPTER_UNLOCK(sc);
1881 			return (error);
1882 		}
1883 
1884 		mtu = ifr->ifr_mtu;
1885 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1886 			error = EINVAL;
1887 		} else {
1888 			ifp->if_mtu = mtu;
1889 			PORT_LOCK(p);
1890 			cxgb_update_mac_settings(p);
1891 			PORT_UNLOCK(p);
1892 		}
1893 		ADAPTER_UNLOCK(sc);
1894 		break;
1895 	case SIOCSIFFLAGS:
1896 		ADAPTER_LOCK(sc);
1897 		if (IS_DOOMED(p)) {
1898 			error = ENXIO;
1899 			goto fail;
1900 		}
1901 		if (ifp->if_flags & IFF_UP) {
1902 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1903 				flags = p->if_flags;
1904 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1905 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1906 					if (IS_BUSY(sc)) {
1907 						error = EBUSY;
1908 						goto fail;
1909 					}
1910 					PORT_LOCK(p);
1911 					cxgb_update_mac_settings(p);
1912 					PORT_UNLOCK(p);
1913 				}
1914 				ADAPTER_UNLOCK(sc);
1915 			} else
1916 				error = cxgb_init_locked(p);
1917 			p->if_flags = ifp->if_flags;
1918 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1919 			error = cxgb_uninit_locked(p);
1920 		else
1921 			ADAPTER_UNLOCK(sc);
1922 
1923 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1924 		break;
1925 	case SIOCADDMULTI:
1926 	case SIOCDELMULTI:
1927 		ADAPTER_LOCK(sc);
1928 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1929 		if (error)
1930 			goto fail;
1931 
1932 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1933 			PORT_LOCK(p);
1934 			cxgb_update_mac_settings(p);
1935 			PORT_UNLOCK(p);
1936 		}
1937 		ADAPTER_UNLOCK(sc);
1938 
1939 		break;
1940 	case SIOCSIFCAP:
1941 		ADAPTER_LOCK(sc);
1942 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1943 		if (error)
1944 			goto fail;
1945 
1946 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1947 		if (mask & IFCAP_TXCSUM) {
1948 			ifp->if_capenable ^= IFCAP_TXCSUM;
1949 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1950 
1951 			if (IFCAP_TSO4 & ifp->if_capenable &&
1952 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1953 				ifp->if_capenable &= ~IFCAP_TSO4;
1954 				if_printf(ifp,
1955 				    "tso4 disabled due to -txcsum.\n");
1956 			}
1957 		}
1958 		if (mask & IFCAP_TXCSUM_IPV6) {
1959 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1960 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1961 
1962 			if (IFCAP_TSO6 & ifp->if_capenable &&
1963 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1964 				ifp->if_capenable &= ~IFCAP_TSO6;
1965 				if_printf(ifp,
1966 				    "tso6 disabled due to -txcsum6.\n");
1967 			}
1968 		}
1969 		if (mask & IFCAP_RXCSUM)
1970 			ifp->if_capenable ^= IFCAP_RXCSUM;
1971 		if (mask & IFCAP_RXCSUM_IPV6)
1972 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1973 
1974 		/*
1975 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1976 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1977 		 * sending a TSO request our way, so it's sufficient to toggle
1978 		 * IFCAP_TSOx only.
1979 		 */
1980 		if (mask & IFCAP_TSO4) {
1981 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1982 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1983 				if_printf(ifp, "enable txcsum first.\n");
1984 				error = EAGAIN;
1985 				goto fail;
1986 			}
1987 			ifp->if_capenable ^= IFCAP_TSO4;
1988 		}
1989 		if (mask & IFCAP_TSO6) {
1990 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1991 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1992 				if_printf(ifp, "enable txcsum6 first.\n");
1993 				error = EAGAIN;
1994 				goto fail;
1995 			}
1996 			ifp->if_capenable ^= IFCAP_TSO6;
1997 		}
1998 		if (mask & IFCAP_LRO) {
1999 			ifp->if_capenable ^= IFCAP_LRO;
2000 
2001 			/* Safe to do this even if cxgb_up not called yet */
2002 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2003 		}
2004 #ifdef TCP_OFFLOAD
2005 		if (mask & IFCAP_TOE4) {
2006 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2007 
2008 			error = toe_capability(p, enable);
2009 			if (error == 0)
2010 				ifp->if_capenable ^= mask;
2011 		}
2012 #endif
2013 		if (mask & IFCAP_VLAN_HWTAGGING) {
2014 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2015 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2016 				PORT_LOCK(p);
2017 				cxgb_update_mac_settings(p);
2018 				PORT_UNLOCK(p);
2019 			}
2020 		}
2021 		if (mask & IFCAP_VLAN_MTU) {
2022 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2023 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2024 				PORT_LOCK(p);
2025 				cxgb_update_mac_settings(p);
2026 				PORT_UNLOCK(p);
2027 			}
2028 		}
2029 		if (mask & IFCAP_VLAN_HWTSO)
2030 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2031 		if (mask & IFCAP_VLAN_HWCSUM)
2032 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2033 
2034 #ifdef VLAN_CAPABILITIES
2035 		VLAN_CAPABILITIES(ifp);
2036 #endif
2037 		ADAPTER_UNLOCK(sc);
2038 		break;
2039 	case SIOCSIFMEDIA:
2040 	case SIOCGIFMEDIA:
2041 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2042 		break;
2043 	default:
2044 		error = ether_ioctl(ifp, command, data);
2045 	}
2046 
2047 	return (error);
2048 }
2049 
2050 static int
2051 cxgb_media_change(struct ifnet *ifp)
2052 {
2053 	return (EOPNOTSUPP);
2054 }
2055 
2056 /*
2057  * Translates phy->modtype to the correct Ethernet media subtype.
2058  */
2059 static int
2060 cxgb_ifm_type(int mod)
2061 {
2062 	switch (mod) {
2063 	case phy_modtype_sr:
2064 		return (IFM_10G_SR);
2065 	case phy_modtype_lr:
2066 		return (IFM_10G_LR);
2067 	case phy_modtype_lrm:
2068 		return (IFM_10G_LRM);
2069 	case phy_modtype_twinax:
2070 		return (IFM_10G_TWINAX);
2071 	case phy_modtype_twinax_long:
2072 		return (IFM_10G_TWINAX_LONG);
2073 	case phy_modtype_none:
2074 		return (IFM_NONE);
2075 	case phy_modtype_unknown:
2076 		return (IFM_UNKNOWN);
2077 	}
2078 
2079 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2080 	return (IFM_UNKNOWN);
2081 }
2082 
2083 /*
2084  * Rebuilds the ifmedia list for this port, and sets the current media.
2085  */
2086 static void
2087 cxgb_build_medialist(struct port_info *p)
2088 {
2089 	struct cphy *phy = &p->phy;
2090 	struct ifmedia *media = &p->media;
2091 	int mod = phy->modtype;
2092 	int m = IFM_ETHER | IFM_FDX;
2093 
2094 	PORT_LOCK(p);
2095 
2096 	ifmedia_removeall(media);
2097 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2098 		/* Copper (RJ45) */
2099 
2100 		if (phy->caps & SUPPORTED_10000baseT_Full)
2101 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2102 
2103 		if (phy->caps & SUPPORTED_1000baseT_Full)
2104 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2105 
2106 		if (phy->caps & SUPPORTED_100baseT_Full)
2107 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2108 
2109 		if (phy->caps & SUPPORTED_10baseT_Full)
2110 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2111 
2112 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2113 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2114 
2115 	} else if (phy->caps & SUPPORTED_TP) {
2116 		/* Copper (CX4) */
2117 
2118 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2119 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2120 
2121 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2122 		ifmedia_set(media, m | IFM_10G_CX4);
2123 
2124 	} else if (phy->caps & SUPPORTED_FIBRE &&
2125 		   phy->caps & SUPPORTED_10000baseT_Full) {
2126 		/* 10G optical (but includes SFP+ twinax) */
2127 
2128 		m |= cxgb_ifm_type(mod);
2129 		if (IFM_SUBTYPE(m) == IFM_NONE)
2130 			m &= ~IFM_FDX;
2131 
2132 		ifmedia_add(media, m, mod, NULL);
2133 		ifmedia_set(media, m);
2134 
2135 	} else if (phy->caps & SUPPORTED_FIBRE &&
2136 		   phy->caps & SUPPORTED_1000baseT_Full) {
2137 		/* 1G optical */
2138 
2139 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2140 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2141 		ifmedia_set(media, m | IFM_1000_SX);
2142 
2143 	} else {
2144 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2145 			    phy->caps));
2146 	}
2147 
2148 	PORT_UNLOCK(p);
2149 }
2150 
2151 static void
2152 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2153 {
2154 	struct port_info *p = ifp->if_softc;
2155 	struct ifmedia_entry *cur = p->media.ifm_cur;
2156 	int speed = p->link_config.speed;
2157 
2158 	if (cur->ifm_data != p->phy.modtype) {
2159 		cxgb_build_medialist(p);
2160 		cur = p->media.ifm_cur;
2161 	}
2162 
2163 	ifmr->ifm_status = IFM_AVALID;
2164 	if (!p->link_config.link_ok)
2165 		return;
2166 
2167 	ifmr->ifm_status |= IFM_ACTIVE;
2168 
2169 	/*
2170 	 * active and current will differ iff current media is autoselect.  That
2171 	 * can happen only for copper RJ45.
2172 	 */
2173 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2174 		return;
2175 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2176 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2177 
2178 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2179 	if (speed == SPEED_10000)
2180 		ifmr->ifm_active |= IFM_10G_T;
2181 	else if (speed == SPEED_1000)
2182 		ifmr->ifm_active |= IFM_1000_T;
2183 	else if (speed == SPEED_100)
2184 		ifmr->ifm_active |= IFM_100_TX;
2185 	else if (speed == SPEED_10)
2186 		ifmr->ifm_active |= IFM_10_T;
2187 	else
2188 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2189 			    speed));
2190 }
2191 
2192 static void
2193 cxgb_async_intr(void *data)
2194 {
2195 	adapter_t *sc = data;
2196 
2197 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2198 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2199 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2200 }
2201 
2202 static void
2203 link_check_callout(void *arg)
2204 {
2205 	struct port_info *pi = arg;
2206 	struct adapter *sc = pi->adapter;
2207 
2208 	if (!isset(&sc->open_device_map, pi->port_id))
2209 		return;
2210 
2211 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2212 }
2213 
2214 static void
2215 check_link_status(void *arg, int pending)
2216 {
2217 	struct port_info *pi = arg;
2218 	struct adapter *sc = pi->adapter;
2219 
2220 	if (!isset(&sc->open_device_map, pi->port_id))
2221 		return;
2222 
2223 	t3_link_changed(sc, pi->port_id);
2224 
2225 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2226 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2227 }
2228 
2229 void
2230 t3_os_link_intr(struct port_info *pi)
2231 {
2232 	/*
2233 	 * Schedule a link check in the near future.  If the link is flapping
2234 	 * rapidly we'll keep resetting the callout and delaying the check until
2235 	 * things stabilize a bit.
2236 	 */
2237 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2238 }
2239 
2240 static void
2241 check_t3b2_mac(struct adapter *sc)
2242 {
2243 	int i;
2244 
2245 	if (sc->flags & CXGB_SHUTDOWN)
2246 		return;
2247 
2248 	for_each_port(sc, i) {
2249 		struct port_info *p = &sc->port[i];
2250 		int status;
2251 #ifdef INVARIANTS
2252 		struct ifnet *ifp = p->ifp;
2253 #endif
2254 
2255 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2256 		    !p->link_config.link_ok)
2257 			continue;
2258 
2259 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2260 			("%s: state mismatch (drv_flags %x, device_map %x)",
2261 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2262 
2263 		PORT_LOCK(p);
2264 		status = t3b2_mac_watchdog_task(&p->mac);
2265 		if (status == 1)
2266 			p->mac.stats.num_toggled++;
2267 		else if (status == 2) {
2268 			struct cmac *mac = &p->mac;
2269 
2270 			cxgb_update_mac_settings(p);
2271 			t3_link_start(&p->phy, mac, &p->link_config);
2272 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2273 			t3_port_intr_enable(sc, p->port_id);
2274 			p->mac.stats.num_resets++;
2275 		}
2276 		PORT_UNLOCK(p);
2277 	}
2278 }
2279 
2280 static void
2281 cxgb_tick(void *arg)
2282 {
2283 	adapter_t *sc = (adapter_t *)arg;
2284 
2285 	if (sc->flags & CXGB_SHUTDOWN)
2286 		return;
2287 
2288 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2289 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2290 }
2291 
2292 static void
2293 cxgb_tick_handler(void *arg, int count)
2294 {
2295 	adapter_t *sc = (adapter_t *)arg;
2296 	const struct adapter_params *p = &sc->params;
2297 	int i;
2298 	uint32_t cause, reset;
2299 
2300 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2301 		return;
2302 
2303 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2304 		check_t3b2_mac(sc);
2305 
2306 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2307 	if (cause) {
2308 		struct sge_qset *qs = &sc->sge.qs[0];
2309 		uint32_t mask, v;
2310 
2311 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2312 
2313 		mask = 1;
2314 		for (i = 0; i < SGE_QSETS; i++) {
2315 			if (v & mask)
2316 				qs[i].rspq.starved++;
2317 			mask <<= 1;
2318 		}
2319 
2320 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2321 
2322 		for (i = 0; i < SGE_QSETS * 2; i++) {
2323 			if (v & mask) {
2324 				qs[i / 2].fl[i % 2].empty++;
2325 			}
2326 			mask <<= 1;
2327 		}
2328 
2329 		/* clear */
2330 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2331 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2332 	}
2333 
2334 	for (i = 0; i < sc->params.nports; i++) {
2335 		struct port_info *pi = &sc->port[i];
2336 		struct ifnet *ifp = pi->ifp;
2337 		struct cmac *mac = &pi->mac;
2338 		struct mac_stats *mstats = &mac->stats;
2339 		int drops, j;
2340 
2341 		if (!isset(&sc->open_device_map, pi->port_id))
2342 			continue;
2343 
2344 		PORT_LOCK(pi);
2345 		t3_mac_update_stats(mac);
2346 		PORT_UNLOCK(pi);
2347 
2348 		ifp->if_opackets = mstats->tx_frames;
2349 		ifp->if_ipackets = mstats->rx_frames;
2350 		ifp->if_obytes = mstats->tx_octets;
2351 		ifp->if_ibytes = mstats->rx_octets;
2352 		ifp->if_omcasts = mstats->tx_mcast_frames;
2353 		ifp->if_imcasts = mstats->rx_mcast_frames;
2354 		ifp->if_collisions = mstats->tx_total_collisions;
2355 		ifp->if_iqdrops = mstats->rx_cong_drops;
2356 
2357 		drops = 0;
2358 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2359 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2360 		ifp->if_snd.ifq_drops = drops;
2361 
2362 		ifp->if_oerrors =
2363 		    mstats->tx_excess_collisions +
2364 		    mstats->tx_underrun +
2365 		    mstats->tx_len_errs +
2366 		    mstats->tx_mac_internal_errs +
2367 		    mstats->tx_excess_deferral +
2368 		    mstats->tx_fcs_errs;
2369 		ifp->if_ierrors =
2370 		    mstats->rx_jabber +
2371 		    mstats->rx_data_errs +
2372 		    mstats->rx_sequence_errs +
2373 		    mstats->rx_runt +
2374 		    mstats->rx_too_long +
2375 		    mstats->rx_mac_internal_errs +
2376 		    mstats->rx_short +
2377 		    mstats->rx_fcs_errs;
2378 
2379 		if (mac->multiport)
2380 			continue;
2381 
2382 		/* Count rx fifo overflows, once per second */
2383 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2384 		reset = 0;
2385 		if (cause & F_RXFIFO_OVERFLOW) {
2386 			mac->stats.rx_fifo_ovfl++;
2387 			reset |= F_RXFIFO_OVERFLOW;
2388 		}
2389 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2390 	}
2391 }
2392 
2393 static void
2394 touch_bars(device_t dev)
2395 {
2396 	/*
2397 	 * Don't enable yet
2398 	 */
2399 #if !defined(__LP64__) && 0
2400 	u32 v;
2401 
2402 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2403 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2404 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2405 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2406 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2407 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2408 #endif
2409 }
2410 
2411 static int
2412 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2413 {
2414 	uint8_t *buf;
2415 	int err = 0;
2416 	u32 aligned_offset, aligned_len, *p;
2417 	struct adapter *adapter = pi->adapter;
2418 
2419 
2420 	aligned_offset = offset & ~3;
2421 	aligned_len = (len + (offset & 3) + 3) & ~3;
2422 
2423 	if (aligned_offset != offset || aligned_len != len) {
2424 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2425 		if (!buf)
2426 			return (ENOMEM);
2427 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2428 		if (!err && aligned_len > 4)
2429 			err = t3_seeprom_read(adapter,
2430 					      aligned_offset + aligned_len - 4,
2431 					      (u32 *)&buf[aligned_len - 4]);
2432 		if (err)
2433 			goto out;
2434 		memcpy(buf + (offset & 3), data, len);
2435 	} else
2436 		buf = (uint8_t *)(uintptr_t)data;
2437 
2438 	err = t3_seeprom_wp(adapter, 0);
2439 	if (err)
2440 		goto out;
2441 
2442 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2443 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2444 		aligned_offset += 4;
2445 	}
2446 
2447 	if (!err)
2448 		err = t3_seeprom_wp(adapter, 1);
2449 out:
2450 	if (buf != data)
2451 		free(buf, M_DEVBUF);
2452 	return err;
2453 }
2454 
2455 
2456 static int
2457 in_range(int val, int lo, int hi)
2458 {
2459 	return val < 0 || (val <= hi && val >= lo);
2460 }
2461 
2462 static int
2463 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2464 {
2465        return (0);
2466 }
2467 
2468 static int
2469 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2470 {
2471        return (0);
2472 }
2473 
2474 static int
2475 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2476     int fflag, struct thread *td)
2477 {
2478 	int mmd, error = 0;
2479 	struct port_info *pi = dev->si_drv1;
2480 	adapter_t *sc = pi->adapter;
2481 
2482 #ifdef PRIV_SUPPORTED
2483 	if (priv_check(td, PRIV_DRIVER)) {
2484 		if (cxgb_debug)
2485 			printf("user does not have access to privileged ioctls\n");
2486 		return (EPERM);
2487 	}
2488 #else
2489 	if (suser(td)) {
2490 		if (cxgb_debug)
2491 			printf("user does not have access to privileged ioctls\n");
2492 		return (EPERM);
2493 	}
2494 #endif
2495 
2496 	switch (cmd) {
2497 	case CHELSIO_GET_MIIREG: {
2498 		uint32_t val;
2499 		struct cphy *phy = &pi->phy;
2500 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2501 
2502 		if (!phy->mdio_read)
2503 			return (EOPNOTSUPP);
2504 		if (is_10G(sc)) {
2505 			mmd = mid->phy_id >> 8;
2506 			if (!mmd)
2507 				mmd = MDIO_DEV_PCS;
2508 			else if (mmd > MDIO_DEV_VEND2)
2509 				return (EINVAL);
2510 
2511 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2512 					     mid->reg_num, &val);
2513 		} else
2514 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2515 					     mid->reg_num & 0x1f, &val);
2516 		if (error == 0)
2517 			mid->val_out = val;
2518 		break;
2519 	}
2520 	case CHELSIO_SET_MIIREG: {
2521 		struct cphy *phy = &pi->phy;
2522 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2523 
2524 		if (!phy->mdio_write)
2525 			return (EOPNOTSUPP);
2526 		if (is_10G(sc)) {
2527 			mmd = mid->phy_id >> 8;
2528 			if (!mmd)
2529 				mmd = MDIO_DEV_PCS;
2530 			else if (mmd > MDIO_DEV_VEND2)
2531 				return (EINVAL);
2532 
2533 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2534 					      mmd, mid->reg_num, mid->val_in);
2535 		} else
2536 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2537 					      mid->reg_num & 0x1f,
2538 					      mid->val_in);
2539 		break;
2540 	}
2541 	case CHELSIO_SETREG: {
2542 		struct ch_reg *edata = (struct ch_reg *)data;
2543 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2544 			return (EFAULT);
2545 		t3_write_reg(sc, edata->addr, edata->val);
2546 		break;
2547 	}
2548 	case CHELSIO_GETREG: {
2549 		struct ch_reg *edata = (struct ch_reg *)data;
2550 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2551 			return (EFAULT);
2552 		edata->val = t3_read_reg(sc, edata->addr);
2553 		break;
2554 	}
2555 	case CHELSIO_GET_SGE_CONTEXT: {
2556 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2557 		mtx_lock_spin(&sc->sge.reg_lock);
2558 		switch (ecntxt->cntxt_type) {
2559 		case CNTXT_TYPE_EGRESS:
2560 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2561 			    ecntxt->data);
2562 			break;
2563 		case CNTXT_TYPE_FL:
2564 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2565 			    ecntxt->data);
2566 			break;
2567 		case CNTXT_TYPE_RSP:
2568 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2569 			    ecntxt->data);
2570 			break;
2571 		case CNTXT_TYPE_CQ:
2572 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2573 			    ecntxt->data);
2574 			break;
2575 		default:
2576 			error = EINVAL;
2577 			break;
2578 		}
2579 		mtx_unlock_spin(&sc->sge.reg_lock);
2580 		break;
2581 	}
2582 	case CHELSIO_GET_SGE_DESC: {
2583 		struct ch_desc *edesc = (struct ch_desc *)data;
2584 		int ret;
2585 		if (edesc->queue_num >= SGE_QSETS * 6)
2586 			return (EINVAL);
2587 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2588 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2589 		if (ret < 0)
2590 			return (EINVAL);
2591 		edesc->size = ret;
2592 		break;
2593 	}
2594 	case CHELSIO_GET_QSET_PARAMS: {
2595 		struct qset_params *q;
2596 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2597 		int q1 = pi->first_qset;
2598 		int nqsets = pi->nqsets;
2599 		int i;
2600 
2601 		if (t->qset_idx >= nqsets)
2602 			return EINVAL;
2603 
2604 		i = q1 + t->qset_idx;
2605 		q = &sc->params.sge.qset[i];
2606 		t->rspq_size   = q->rspq_size;
2607 		t->txq_size[0] = q->txq_size[0];
2608 		t->txq_size[1] = q->txq_size[1];
2609 		t->txq_size[2] = q->txq_size[2];
2610 		t->fl_size[0]  = q->fl_size;
2611 		t->fl_size[1]  = q->jumbo_size;
2612 		t->polling     = q->polling;
2613 		t->lro         = q->lro;
2614 		t->intr_lat    = q->coalesce_usecs;
2615 		t->cong_thres  = q->cong_thres;
2616 		t->qnum        = i;
2617 
2618 		if ((sc->flags & FULL_INIT_DONE) == 0)
2619 			t->vector = 0;
2620 		else if (sc->flags & USING_MSIX)
2621 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2622 		else
2623 			t->vector = rman_get_start(sc->irq_res);
2624 
2625 		break;
2626 	}
2627 	case CHELSIO_GET_QSET_NUM: {
2628 		struct ch_reg *edata = (struct ch_reg *)data;
2629 		edata->val = pi->nqsets;
2630 		break;
2631 	}
2632 	case CHELSIO_LOAD_FW: {
2633 		uint8_t *fw_data;
2634 		uint32_t vers;
2635 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2636 
2637 		/*
2638 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2639 		 *
2640 		 * FW_UPTODATE is also set so the rest of the initialization
2641 		 * will not overwrite what was loaded here.  This gives you the
2642 		 * flexibility to load any firmware (and maybe shoot yourself in
2643 		 * the foot).
2644 		 */
2645 
2646 		ADAPTER_LOCK(sc);
2647 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2648 			ADAPTER_UNLOCK(sc);
2649 			return (EBUSY);
2650 		}
2651 
2652 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2653 		if (!fw_data)
2654 			error = ENOMEM;
2655 		else
2656 			error = copyin(t->buf, fw_data, t->len);
2657 
2658 		if (!error)
2659 			error = -t3_load_fw(sc, fw_data, t->len);
2660 
2661 		if (t3_get_fw_version(sc, &vers) == 0) {
2662 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2663 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2664 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2665 		}
2666 
2667 		if (!error)
2668 			sc->flags |= FW_UPTODATE;
2669 
2670 		free(fw_data, M_DEVBUF);
2671 		ADAPTER_UNLOCK(sc);
2672 		break;
2673 	}
2674 	case CHELSIO_LOAD_BOOT: {
2675 		uint8_t *boot_data;
2676 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2677 
2678 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2679 		if (!boot_data)
2680 			return ENOMEM;
2681 
2682 		error = copyin(t->buf, boot_data, t->len);
2683 		if (!error)
2684 			error = -t3_load_boot(sc, boot_data, t->len);
2685 
2686 		free(boot_data, M_DEVBUF);
2687 		break;
2688 	}
2689 	case CHELSIO_GET_PM: {
2690 		struct ch_pm *m = (struct ch_pm *)data;
2691 		struct tp_params *p = &sc->params.tp;
2692 
2693 		if (!is_offload(sc))
2694 			return (EOPNOTSUPP);
2695 
2696 		m->tx_pg_sz = p->tx_pg_size;
2697 		m->tx_num_pg = p->tx_num_pgs;
2698 		m->rx_pg_sz  = p->rx_pg_size;
2699 		m->rx_num_pg = p->rx_num_pgs;
2700 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2701 
2702 		break;
2703 	}
2704 	case CHELSIO_SET_PM: {
2705 		struct ch_pm *m = (struct ch_pm *)data;
2706 		struct tp_params *p = &sc->params.tp;
2707 
2708 		if (!is_offload(sc))
2709 			return (EOPNOTSUPP);
2710 		if (sc->flags & FULL_INIT_DONE)
2711 			return (EBUSY);
2712 
2713 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2714 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2715 			return (EINVAL);	/* not power of 2 */
2716 		if (!(m->rx_pg_sz & 0x14000))
2717 			return (EINVAL);	/* not 16KB or 64KB */
2718 		if (!(m->tx_pg_sz & 0x1554000))
2719 			return (EINVAL);
2720 		if (m->tx_num_pg == -1)
2721 			m->tx_num_pg = p->tx_num_pgs;
2722 		if (m->rx_num_pg == -1)
2723 			m->rx_num_pg = p->rx_num_pgs;
2724 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2725 			return (EINVAL);
2726 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2727 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2728 			return (EINVAL);
2729 
2730 		p->rx_pg_size = m->rx_pg_sz;
2731 		p->tx_pg_size = m->tx_pg_sz;
2732 		p->rx_num_pgs = m->rx_num_pg;
2733 		p->tx_num_pgs = m->tx_num_pg;
2734 		break;
2735 	}
2736 	case CHELSIO_SETMTUTAB: {
2737 		struct ch_mtus *m = (struct ch_mtus *)data;
2738 		int i;
2739 
2740 		if (!is_offload(sc))
2741 			return (EOPNOTSUPP);
2742 		if (offload_running(sc))
2743 			return (EBUSY);
2744 		if (m->nmtus != NMTUS)
2745 			return (EINVAL);
2746 		if (m->mtus[0] < 81)         /* accommodate SACK */
2747 			return (EINVAL);
2748 
2749 		/*
2750 		 * MTUs must be in ascending order
2751 		 */
2752 		for (i = 1; i < NMTUS; ++i)
2753 			if (m->mtus[i] < m->mtus[i - 1])
2754 				return (EINVAL);
2755 
2756 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2757 		break;
2758 	}
2759 	case CHELSIO_GETMTUTAB: {
2760 		struct ch_mtus *m = (struct ch_mtus *)data;
2761 
2762 		if (!is_offload(sc))
2763 			return (EOPNOTSUPP);
2764 
2765 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2766 		m->nmtus = NMTUS;
2767 		break;
2768 	}
2769 	case CHELSIO_GET_MEM: {
2770 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2771 		struct mc7 *mem;
2772 		uint8_t *useraddr;
2773 		u64 buf[32];
2774 
2775 		/*
2776 		 * Use these to avoid modifying len/addr in the return
2777 		 * struct
2778 		 */
2779 		uint32_t len = t->len, addr = t->addr;
2780 
2781 		if (!is_offload(sc))
2782 			return (EOPNOTSUPP);
2783 		if (!(sc->flags & FULL_INIT_DONE))
2784 			return (EIO);         /* need the memory controllers */
2785 		if ((addr & 0x7) || (len & 0x7))
2786 			return (EINVAL);
2787 		if (t->mem_id == MEM_CM)
2788 			mem = &sc->cm;
2789 		else if (t->mem_id == MEM_PMRX)
2790 			mem = &sc->pmrx;
2791 		else if (t->mem_id == MEM_PMTX)
2792 			mem = &sc->pmtx;
2793 		else
2794 			return (EINVAL);
2795 
2796 		/*
2797 		 * Version scheme:
2798 		 * bits 0..9: chip version
2799 		 * bits 10..15: chip revision
2800 		 */
2801 		t->version = 3 | (sc->params.rev << 10);
2802 
2803 		/*
2804 		 * Read 256 bytes at a time as len can be large and we don't
2805 		 * want to use huge intermediate buffers.
2806 		 */
2807 		useraddr = (uint8_t *)t->buf;
2808 		while (len) {
2809 			unsigned int chunk = min(len, sizeof(buf));
2810 
2811 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2812 			if (error)
2813 				return (-error);
2814 			if (copyout(buf, useraddr, chunk))
2815 				return (EFAULT);
2816 			useraddr += chunk;
2817 			addr += chunk;
2818 			len -= chunk;
2819 		}
2820 		break;
2821 	}
2822 	case CHELSIO_READ_TCAM_WORD: {
2823 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2824 
2825 		if (!is_offload(sc))
2826 			return (EOPNOTSUPP);
2827 		if (!(sc->flags & FULL_INIT_DONE))
2828 			return (EIO);         /* need MC5 */
2829 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2830 		break;
2831 	}
2832 	case CHELSIO_SET_TRACE_FILTER: {
2833 		struct ch_trace *t = (struct ch_trace *)data;
2834 		const struct trace_params *tp;
2835 
2836 		tp = (const struct trace_params *)&t->sip;
2837 		if (t->config_tx)
2838 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2839 					       t->trace_tx);
2840 		if (t->config_rx)
2841 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2842 					       t->trace_rx);
2843 		break;
2844 	}
2845 	case CHELSIO_SET_PKTSCHED: {
2846 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2847 		if (sc->open_device_map == 0)
2848 			return (EAGAIN);
2849 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2850 		    p->binding);
2851 		break;
2852 	}
2853 	case CHELSIO_IFCONF_GETREGS: {
2854 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2855 		int reglen = cxgb_get_regs_len();
2856 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2857 		if (buf == NULL) {
2858 			return (ENOMEM);
2859 		}
2860 		if (regs->len > reglen)
2861 			regs->len = reglen;
2862 		else if (regs->len < reglen)
2863 			error = ENOBUFS;
2864 
2865 		if (!error) {
2866 			cxgb_get_regs(sc, regs, buf);
2867 			error = copyout(buf, regs->data, reglen);
2868 		}
2869 		free(buf, M_DEVBUF);
2870 
2871 		break;
2872 	}
2873 	case CHELSIO_SET_HW_SCHED: {
2874 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2875 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2876 
2877 		if ((sc->flags & FULL_INIT_DONE) == 0)
2878 			return (EAGAIN);       /* need TP to be initialized */
2879 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2880 		    !in_range(t->channel, 0, 1) ||
2881 		    !in_range(t->kbps, 0, 10000000) ||
2882 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2883 		    !in_range(t->flow_ipg, 0,
2884 			      dack_ticks_to_usec(sc, 0x7ff)))
2885 			return (EINVAL);
2886 
2887 		if (t->kbps >= 0) {
2888 			error = t3_config_sched(sc, t->kbps, t->sched);
2889 			if (error < 0)
2890 				return (-error);
2891 		}
2892 		if (t->class_ipg >= 0)
2893 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2894 		if (t->flow_ipg >= 0) {
2895 			t->flow_ipg *= 1000;     /* us -> ns */
2896 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2897 		}
2898 		if (t->mode >= 0) {
2899 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2900 
2901 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2902 					 bit, t->mode ? bit : 0);
2903 		}
2904 		if (t->channel >= 0)
2905 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2906 					 1 << t->sched, t->channel << t->sched);
2907 		break;
2908 	}
2909 	case CHELSIO_GET_EEPROM: {
2910 		int i;
2911 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2912 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2913 
2914 		if (buf == NULL) {
2915 			return (ENOMEM);
2916 		}
2917 		e->magic = EEPROM_MAGIC;
2918 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2919 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2920 
2921 		if (!error)
2922 			error = copyout(buf + e->offset, e->data, e->len);
2923 
2924 		free(buf, M_DEVBUF);
2925 		break;
2926 	}
2927 	case CHELSIO_CLEAR_STATS: {
2928 		if (!(sc->flags & FULL_INIT_DONE))
2929 			return EAGAIN;
2930 
2931 		PORT_LOCK(pi);
2932 		t3_mac_update_stats(&pi->mac);
2933 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2934 		PORT_UNLOCK(pi);
2935 		break;
2936 	}
2937 	case CHELSIO_GET_UP_LA: {
2938 		struct ch_up_la *la = (struct ch_up_la *)data;
2939 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2940 		if (buf == NULL) {
2941 			return (ENOMEM);
2942 		}
2943 		if (la->bufsize < LA_BUFSIZE)
2944 			error = ENOBUFS;
2945 
2946 		if (!error)
2947 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2948 					      &la->bufsize, buf);
2949 		if (!error)
2950 			error = copyout(buf, la->data, la->bufsize);
2951 
2952 		free(buf, M_DEVBUF);
2953 		break;
2954 	}
2955 	case CHELSIO_GET_UP_IOQS: {
2956 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
2957 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
2958 		uint32_t *v;
2959 
2960 		if (buf == NULL) {
2961 			return (ENOMEM);
2962 		}
2963 		if (ioqs->bufsize < IOQS_BUFSIZE)
2964 			error = ENOBUFS;
2965 
2966 		if (!error)
2967 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
2968 
2969 		if (!error) {
2970 			v = (uint32_t *)buf;
2971 
2972 			ioqs->ioq_rx_enable = *v++;
2973 			ioqs->ioq_tx_enable = *v++;
2974 			ioqs->ioq_rx_status = *v++;
2975 			ioqs->ioq_tx_status = *v++;
2976 
2977 			error = copyout(v, ioqs->data, ioqs->bufsize);
2978 		}
2979 
2980 		free(buf, M_DEVBUF);
2981 		break;
2982 	}
2983 	case CHELSIO_SET_FILTER: {
2984 		struct ch_filter *f = (struct ch_filter *)data;
2985 		struct filter_info *p;
2986 		unsigned int nfilters = sc->params.mc5.nfilters;
2987 
2988 		if (!is_offload(sc))
2989 			return (EOPNOTSUPP);	/* No TCAM */
2990 		if (!(sc->flags & FULL_INIT_DONE))
2991 			return (EAGAIN);	/* mc5 not setup yet */
2992 		if (nfilters == 0)
2993 			return (EBUSY);		/* TOE will use TCAM */
2994 
2995 		/* sanity checks */
2996 		if (f->filter_id >= nfilters ||
2997 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
2998 		    (f->val.sport && f->mask.sport != 0xffff) ||
2999 		    (f->val.dport && f->mask.dport != 0xffff) ||
3000 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3001 		    (f->val.vlan_prio &&
3002 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3003 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3004 		    f->qset >= SGE_QSETS ||
3005 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3006 			return (EINVAL);
3007 
3008 		/* Was allocated with M_WAITOK */
3009 		KASSERT(sc->filters, ("filter table NULL\n"));
3010 
3011 		p = &sc->filters[f->filter_id];
3012 		if (p->locked)
3013 			return (EPERM);
3014 
3015 		bzero(p, sizeof(*p));
3016 		p->sip = f->val.sip;
3017 		p->sip_mask = f->mask.sip;
3018 		p->dip = f->val.dip;
3019 		p->sport = f->val.sport;
3020 		p->dport = f->val.dport;
3021 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3022 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3023 		    FILTER_NO_VLAN_PRI;
3024 		p->mac_hit = f->mac_hit;
3025 		p->mac_vld = f->mac_addr_idx != 0xffff;
3026 		p->mac_idx = f->mac_addr_idx;
3027 		p->pkt_type = f->proto;
3028 		p->report_filter_id = f->want_filter_id;
3029 		p->pass = f->pass;
3030 		p->rss = f->rss;
3031 		p->qset = f->qset;
3032 
3033 		error = set_filter(sc, f->filter_id, p);
3034 		if (error == 0)
3035 			p->valid = 1;
3036 		break;
3037 	}
3038 	case CHELSIO_DEL_FILTER: {
3039 		struct ch_filter *f = (struct ch_filter *)data;
3040 		struct filter_info *p;
3041 		unsigned int nfilters = sc->params.mc5.nfilters;
3042 
3043 		if (!is_offload(sc))
3044 			return (EOPNOTSUPP);
3045 		if (!(sc->flags & FULL_INIT_DONE))
3046 			return (EAGAIN);
3047 		if (nfilters == 0 || sc->filters == NULL)
3048 			return (EINVAL);
3049 		if (f->filter_id >= nfilters)
3050 		       return (EINVAL);
3051 
3052 		p = &sc->filters[f->filter_id];
3053 		if (p->locked)
3054 			return (EPERM);
3055 		if (!p->valid)
3056 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3057 
3058 		bzero(p, sizeof(*p));
3059 		p->sip = p->sip_mask = 0xffffffff;
3060 		p->vlan = 0xfff;
3061 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3062 		p->pkt_type = 1;
3063 		error = set_filter(sc, f->filter_id, p);
3064 		break;
3065 	}
3066 	case CHELSIO_GET_FILTER: {
3067 		struct ch_filter *f = (struct ch_filter *)data;
3068 		struct filter_info *p;
3069 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3070 
3071 		if (!is_offload(sc))
3072 			return (EOPNOTSUPP);
3073 		if (!(sc->flags & FULL_INIT_DONE))
3074 			return (EAGAIN);
3075 		if (nfilters == 0 || sc->filters == NULL)
3076 			return (EINVAL);
3077 
3078 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3079 		for (; i < nfilters; i++) {
3080 			p = &sc->filters[i];
3081 			if (!p->valid)
3082 				continue;
3083 
3084 			bzero(f, sizeof(*f));
3085 
3086 			f->filter_id = i;
3087 			f->val.sip = p->sip;
3088 			f->mask.sip = p->sip_mask;
3089 			f->val.dip = p->dip;
3090 			f->mask.dip = p->dip ? 0xffffffff : 0;
3091 			f->val.sport = p->sport;
3092 			f->mask.sport = p->sport ? 0xffff : 0;
3093 			f->val.dport = p->dport;
3094 			f->mask.dport = p->dport ? 0xffff : 0;
3095 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3096 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3097 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3098 			    0 : p->vlan_prio;
3099 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3100 			    0 : FILTER_NO_VLAN_PRI;
3101 			f->mac_hit = p->mac_hit;
3102 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3103 			f->proto = p->pkt_type;
3104 			f->want_filter_id = p->report_filter_id;
3105 			f->pass = p->pass;
3106 			f->rss = p->rss;
3107 			f->qset = p->qset;
3108 
3109 			break;
3110 		}
3111 
3112 		if (i == nfilters)
3113 			f->filter_id = 0xffffffff;
3114 		break;
3115 	}
3116 	default:
3117 		return (EOPNOTSUPP);
3118 		break;
3119 	}
3120 
3121 	return (error);
3122 }
3123 
3124 static __inline void
3125 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3126     unsigned int end)
3127 {
3128 	uint32_t *p = (uint32_t *)(buf + start);
3129 
3130 	for ( ; start <= end; start += sizeof(uint32_t))
3131 		*p++ = t3_read_reg(ap, start);
3132 }
3133 
3134 #define T3_REGMAP_SIZE (3 * 1024)
3135 static int
3136 cxgb_get_regs_len(void)
3137 {
3138 	return T3_REGMAP_SIZE;
3139 }
3140 
3141 static void
3142 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3143 {
3144 
3145 	/*
3146 	 * Version scheme:
3147 	 * bits 0..9: chip version
3148 	 * bits 10..15: chip revision
3149 	 * bit 31: set for PCIe cards
3150 	 */
3151 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3152 
3153 	/*
3154 	 * We skip the MAC statistics registers because they are clear-on-read.
3155 	 * Also reading multi-register stats would need to synchronize with the
3156 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3157 	 */
3158 	memset(buf, 0, cxgb_get_regs_len());
3159 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3160 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3161 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3162 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3163 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3164 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3165 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3166 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3167 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3168 }
3169 
3170 static int
3171 alloc_filters(struct adapter *sc)
3172 {
3173 	struct filter_info *p;
3174 	unsigned int nfilters = sc->params.mc5.nfilters;
3175 
3176 	if (nfilters == 0)
3177 		return (0);
3178 
3179 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3180 	sc->filters = p;
3181 
3182 	p = &sc->filters[nfilters - 1];
3183 	p->vlan = 0xfff;
3184 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3185 	p->pass = p->rss = p->valid = p->locked = 1;
3186 
3187 	return (0);
3188 }
3189 
3190 static int
3191 setup_hw_filters(struct adapter *sc)
3192 {
3193 	int i, rc;
3194 	unsigned int nfilters = sc->params.mc5.nfilters;
3195 
3196 	if (!sc->filters)
3197 		return (0);
3198 
3199 	t3_enable_filters(sc);
3200 
3201 	for (i = rc = 0; i < nfilters && !rc; i++) {
3202 		if (sc->filters[i].locked)
3203 			rc = set_filter(sc, i, &sc->filters[i]);
3204 	}
3205 
3206 	return (rc);
3207 }
3208 
3209 static int
3210 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3211 {
3212 	int len;
3213 	struct mbuf *m;
3214 	struct ulp_txpkt *txpkt;
3215 	struct work_request_hdr *wr;
3216 	struct cpl_pass_open_req *oreq;
3217 	struct cpl_set_tcb_field *sreq;
3218 
3219 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3220 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3221 
3222 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3223 	      sc->params.mc5.nfilters;
3224 
3225 	m = m_gethdr(M_WAITOK, MT_DATA);
3226 	m->m_len = m->m_pkthdr.len = len;
3227 	bzero(mtod(m, char *), len);
3228 
3229 	wr = mtod(m, struct work_request_hdr *);
3230 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3231 
3232 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3233 	txpkt = (struct ulp_txpkt *)oreq;
3234 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3235 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3236 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3237 	oreq->local_port = htons(f->dport);
3238 	oreq->peer_port = htons(f->sport);
3239 	oreq->local_ip = htonl(f->dip);
3240 	oreq->peer_ip = htonl(f->sip);
3241 	oreq->peer_netmask = htonl(f->sip_mask);
3242 	oreq->opt0h = 0;
3243 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3244 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3245 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3246 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3247 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3248 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3249 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3250 
3251 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3252 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3253 			  (f->report_filter_id << 15) | (1 << 23) |
3254 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3255 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3256 	t3_mgmt_tx(sc, m);
3257 
3258 	if (f->pass && !f->rss) {
3259 		len = sizeof(*sreq);
3260 		m = m_gethdr(M_WAITOK, MT_DATA);
3261 		m->m_len = m->m_pkthdr.len = len;
3262 		bzero(mtod(m, char *), len);
3263 		sreq = mtod(m, struct cpl_set_tcb_field *);
3264 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3265 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3266 				 (u64)sc->rrss_map[f->qset] << 19);
3267 		t3_mgmt_tx(sc, m);
3268 	}
3269 	return 0;
3270 }
3271 
3272 static inline void
3273 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3274     unsigned int word, u64 mask, u64 val)
3275 {
3276 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3277 	req->reply = V_NO_REPLY(1);
3278 	req->cpu_idx = 0;
3279 	req->word = htons(word);
3280 	req->mask = htobe64(mask);
3281 	req->val = htobe64(val);
3282 }
3283 
3284 static inline void
3285 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3286     unsigned int word, u64 mask, u64 val)
3287 {
3288 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3289 
3290 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3291 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3292 	mk_set_tcb_field(req, tid, word, mask, val);
3293 }
3294 
3295 void
3296 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3297 {
3298 	struct adapter *sc;
3299 
3300 	mtx_lock(&t3_list_lock);
3301 	SLIST_FOREACH(sc, &t3_list, link) {
3302 		/*
3303 		 * func should not make any assumptions about what state sc is
3304 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3305 		 */
3306 		func(sc, arg);
3307 	}
3308 	mtx_unlock(&t3_list_lock);
3309 }
3310 
3311 #ifdef TCP_OFFLOAD
3312 static int
3313 toe_capability(struct port_info *pi, int enable)
3314 {
3315 	int rc;
3316 	struct adapter *sc = pi->adapter;
3317 
3318 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3319 
3320 	if (!is_offload(sc))
3321 		return (ENODEV);
3322 
3323 	if (enable) {
3324 		if (!(sc->flags & FULL_INIT_DONE)) {
3325 			log(LOG_WARNING,
3326 			    "You must enable a cxgb interface first\n");
3327 			return (EAGAIN);
3328 		}
3329 
3330 		if (isset(&sc->offload_map, pi->port_id))
3331 			return (0);
3332 
3333 		if (!(sc->flags & TOM_INIT_DONE)) {
3334 			rc = t3_activate_uld(sc, ULD_TOM);
3335 			if (rc == EAGAIN) {
3336 				log(LOG_WARNING,
3337 				    "You must kldload t3_tom.ko before trying "
3338 				    "to enable TOE on a cxgb interface.\n");
3339 			}
3340 			if (rc != 0)
3341 				return (rc);
3342 			KASSERT(sc->tom_softc != NULL,
3343 			    ("%s: TOM activated but softc NULL", __func__));
3344 			KASSERT(sc->flags & TOM_INIT_DONE,
3345 			    ("%s: TOM activated but flag not set", __func__));
3346 		}
3347 
3348 		setbit(&sc->offload_map, pi->port_id);
3349 
3350 		/*
3351 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3352 		 * enabled on any port.  Need to figure out how to enable,
3353 		 * disable, load, and unload iWARP cleanly.
3354 		 */
3355 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3356 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3357 			setbit(&sc->offload_map, MAX_NPORTS);
3358 	} else {
3359 		if (!isset(&sc->offload_map, pi->port_id))
3360 			return (0);
3361 
3362 		KASSERT(sc->flags & TOM_INIT_DONE,
3363 		    ("%s: TOM never initialized?", __func__));
3364 		clrbit(&sc->offload_map, pi->port_id);
3365 	}
3366 
3367 	return (0);
3368 }
3369 
3370 /*
3371  * Add an upper layer driver to the global list.
3372  */
3373 int
3374 t3_register_uld(struct uld_info *ui)
3375 {
3376 	int rc = 0;
3377 	struct uld_info *u;
3378 
3379 	mtx_lock(&t3_uld_list_lock);
3380 	SLIST_FOREACH(u, &t3_uld_list, link) {
3381 	    if (u->uld_id == ui->uld_id) {
3382 		    rc = EEXIST;
3383 		    goto done;
3384 	    }
3385 	}
3386 
3387 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3388 	ui->refcount = 0;
3389 done:
3390 	mtx_unlock(&t3_uld_list_lock);
3391 	return (rc);
3392 }
3393 
3394 int
3395 t3_unregister_uld(struct uld_info *ui)
3396 {
3397 	int rc = EINVAL;
3398 	struct uld_info *u;
3399 
3400 	mtx_lock(&t3_uld_list_lock);
3401 
3402 	SLIST_FOREACH(u, &t3_uld_list, link) {
3403 	    if (u == ui) {
3404 		    if (ui->refcount > 0) {
3405 			    rc = EBUSY;
3406 			    goto done;
3407 		    }
3408 
3409 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3410 		    rc = 0;
3411 		    goto done;
3412 	    }
3413 	}
3414 done:
3415 	mtx_unlock(&t3_uld_list_lock);
3416 	return (rc);
3417 }
3418 
3419 int
3420 t3_activate_uld(struct adapter *sc, int id)
3421 {
3422 	int rc = EAGAIN;
3423 	struct uld_info *ui;
3424 
3425 	mtx_lock(&t3_uld_list_lock);
3426 
3427 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3428 		if (ui->uld_id == id) {
3429 			rc = ui->activate(sc);
3430 			if (rc == 0)
3431 				ui->refcount++;
3432 			goto done;
3433 		}
3434 	}
3435 done:
3436 	mtx_unlock(&t3_uld_list_lock);
3437 
3438 	return (rc);
3439 }
3440 
3441 int
3442 t3_deactivate_uld(struct adapter *sc, int id)
3443 {
3444 	int rc = EINVAL;
3445 	struct uld_info *ui;
3446 
3447 	mtx_lock(&t3_uld_list_lock);
3448 
3449 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3450 		if (ui->uld_id == id) {
3451 			rc = ui->deactivate(sc);
3452 			if (rc == 0)
3453 				ui->refcount--;
3454 			goto done;
3455 		}
3456 	}
3457 done:
3458 	mtx_unlock(&t3_uld_list_lock);
3459 
3460 	return (rc);
3461 }
3462 
3463 static int
3464 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3465     struct mbuf *m)
3466 {
3467 	m_freem(m);
3468 	return (EDOOFUS);
3469 }
3470 
3471 int
3472 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3473 {
3474 	uintptr_t *loc, new;
3475 
3476 	if (opcode >= NUM_CPL_HANDLERS)
3477 		return (EINVAL);
3478 
3479 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3480 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3481 	atomic_store_rel_ptr(loc, new);
3482 
3483 	return (0);
3484 }
3485 #endif
3486 
3487 static int
3488 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3489 {
3490 	int rc = 0;
3491 
3492 	switch (cmd) {
3493 	case MOD_LOAD:
3494 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3495 		SLIST_INIT(&t3_list);
3496 #ifdef TCP_OFFLOAD
3497 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3498 		SLIST_INIT(&t3_uld_list);
3499 #endif
3500 		break;
3501 
3502 	case MOD_UNLOAD:
3503 #ifdef TCP_OFFLOAD
3504 		mtx_lock(&t3_uld_list_lock);
3505 		if (!SLIST_EMPTY(&t3_uld_list)) {
3506 			rc = EBUSY;
3507 			mtx_unlock(&t3_uld_list_lock);
3508 			break;
3509 		}
3510 		mtx_unlock(&t3_uld_list_lock);
3511 		mtx_destroy(&t3_uld_list_lock);
3512 #endif
3513 		mtx_lock(&t3_list_lock);
3514 		if (!SLIST_EMPTY(&t3_list)) {
3515 			rc = EBUSY;
3516 			mtx_unlock(&t3_list_lock);
3517 			break;
3518 		}
3519 		mtx_unlock(&t3_list_lock);
3520 		mtx_destroy(&t3_list_lock);
3521 		break;
3522 	}
3523 
3524 	return (rc);
3525 }
3526