xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 675be9115aae86ad6b3d877155d4fd7822892105)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110 
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126 
127 static device_method_t cxgb_controller_methods[] = {
128 	DEVMETHOD(device_probe,		cxgb_controller_probe),
129 	DEVMETHOD(device_attach,	cxgb_controller_attach),
130 	DEVMETHOD(device_detach,	cxgb_controller_detach),
131 
132 	DEVMETHOD_END
133 };
134 
135 static driver_t cxgb_controller_driver = {
136 	"cxgbc",
137 	cxgb_controller_methods,
138 	sizeof(struct adapter)
139 };
140 
141 static devclass_t	cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143 
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151 
152 static device_method_t cxgb_port_methods[] = {
153 	DEVMETHOD(device_probe,		cxgb_port_probe),
154 	DEVMETHOD(device_attach,	cxgb_port_attach),
155 	DEVMETHOD(device_detach,	cxgb_port_detach),
156 	{ 0, 0 }
157 };
158 
159 static driver_t cxgb_port_driver = {
160 	"cxgb",
161 	cxgb_port_methods,
162 	0
163 };
164 
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168 
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177 
178 static devclass_t	cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180 
181 /*
182  * The driver uses the best interrupt scheme available on a platform in the
183  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
184  * of these schemes the driver may consider as follows:
185  *
186  * msi = 2: choose from among all three options
187  * msi = 1 : only consider MSI and pin interrupts
188  * msi = 0: force pin interrupts
189  */
190 static int msi_allowed = 2;
191 
192 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
193 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
194 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
195     "MSI-X, MSI, INTx selector");
196 
197 /*
198  * The driver enables offload as a default.
199  * To disable it, use ofld_disable = 1.
200  */
201 static int ofld_disable = 0;
202 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
203 SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
204     "disable ULP offload");
205 
206 /*
207  * The driver uses an auto-queue algorithm by default.
208  * To disable it and force a single queue-set per port, use multiq = 0
209  */
210 static int multiq = 1;
211 TUNABLE_INT("hw.cxgb.multiq", &multiq);
212 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
213     "use min(ncpus/ports, 8) queue-sets per port");
214 
215 /*
216  * By default the driver will not update the firmware unless
217  * it was compiled against a newer version
218  *
219  */
220 static int force_fw_update = 0;
221 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
222 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
223     "update firmware even if up to date");
224 
225 int cxgb_use_16k_clusters = -1;
226 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
227 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
228     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
229 
230 /*
231  * Tune the size of the output queue.
232  */
233 int cxgb_snd_queue_len = IFQ_MAXLEN;
234 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
235 SYSCTL_INT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
236     &cxgb_snd_queue_len, 0, "send queue size ");
237 
238 static int nfilters = -1;
239 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
240 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
241     &nfilters, 0, "max number of entries in the filter table");
242 
243 enum {
244 	MAX_TXQ_ENTRIES      = 16384,
245 	MAX_CTRL_TXQ_ENTRIES = 1024,
246 	MAX_RSPQ_ENTRIES     = 16384,
247 	MAX_RX_BUFFERS       = 16384,
248 	MAX_RX_JUMBO_BUFFERS = 16384,
249 	MIN_TXQ_ENTRIES      = 4,
250 	MIN_CTRL_TXQ_ENTRIES = 4,
251 	MIN_RSPQ_ENTRIES     = 32,
252 	MIN_FL_ENTRIES       = 32,
253 	MIN_FL_JUMBO_ENTRIES = 32
254 };
255 
256 struct filter_info {
257 	u32 sip;
258 	u32 sip_mask;
259 	u32 dip;
260 	u16 sport;
261 	u16 dport;
262 	u32 vlan:12;
263 	u32 vlan_prio:3;
264 	u32 mac_hit:1;
265 	u32 mac_idx:4;
266 	u32 mac_vld:1;
267 	u32 pkt_type:2;
268 	u32 report_filter_id:1;
269 	u32 pass:1;
270 	u32 rss:1;
271 	u32 qset:3;
272 	u32 locked:1;
273 	u32 valid:1;
274 };
275 
276 enum { FILTER_NO_VLAN_PRI = 7 };
277 
278 #define EEPROM_MAGIC 0x38E2F10C
279 
280 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
281 
282 /* Table for probing the cards.  The desc field isn't actually used */
283 struct cxgb_ident {
284 	uint16_t	vendor;
285 	uint16_t	device;
286 	int		index;
287 	char		*desc;
288 } cxgb_identifiers[] = {
289 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
290 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
291 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
303 	{0, 0, 0, NULL}
304 };
305 
306 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
307 
308 
309 static __inline char
310 t3rev2char(struct adapter *adapter)
311 {
312 	char rev = 'z';
313 
314 	switch(adapter->params.rev) {
315 	case T3_REV_A:
316 		rev = 'a';
317 		break;
318 	case T3_REV_B:
319 	case T3_REV_B2:
320 		rev = 'b';
321 		break;
322 	case T3_REV_C:
323 		rev = 'c';
324 		break;
325 	}
326 	return rev;
327 }
328 
329 static struct cxgb_ident *
330 cxgb_get_ident(device_t dev)
331 {
332 	struct cxgb_ident *id;
333 
334 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
335 		if ((id->vendor == pci_get_vendor(dev)) &&
336 		    (id->device == pci_get_device(dev))) {
337 			return (id);
338 		}
339 	}
340 	return (NULL);
341 }
342 
343 static const struct adapter_info *
344 cxgb_get_adapter_info(device_t dev)
345 {
346 	struct cxgb_ident *id;
347 	const struct adapter_info *ai;
348 
349 	id = cxgb_get_ident(dev);
350 	if (id == NULL)
351 		return (NULL);
352 
353 	ai = t3_get_adapter_info(id->index);
354 
355 	return (ai);
356 }
357 
358 static int
359 cxgb_controller_probe(device_t dev)
360 {
361 	const struct adapter_info *ai;
362 	char *ports, buf[80];
363 	int nports;
364 
365 	ai = cxgb_get_adapter_info(dev);
366 	if (ai == NULL)
367 		return (ENXIO);
368 
369 	nports = ai->nports0 + ai->nports1;
370 	if (nports == 1)
371 		ports = "port";
372 	else
373 		ports = "ports";
374 
375 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
376 	device_set_desc_copy(dev, buf);
377 	return (BUS_PROBE_DEFAULT);
378 }
379 
380 #define FW_FNAME "cxgb_t3fw"
381 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
382 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
383 
384 static int
385 upgrade_fw(adapter_t *sc)
386 {
387 	const struct firmware *fw;
388 	int status;
389 	u32 vers;
390 
391 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
392 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
393 		return (ENOENT);
394 	} else
395 		device_printf(sc->dev, "installing firmware on card\n");
396 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
397 
398 	if (status != 0) {
399 		device_printf(sc->dev, "failed to install firmware: %d\n",
400 		    status);
401 	} else {
402 		t3_get_fw_version(sc, &vers);
403 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
404 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
405 		    G_FW_VERSION_MICRO(vers));
406 	}
407 
408 	firmware_put(fw, FIRMWARE_UNLOAD);
409 
410 	return (status);
411 }
412 
413 /*
414  * The cxgb_controller_attach function is responsible for the initial
415  * bringup of the device.  Its responsibilities include:
416  *
417  *  1. Determine if the device supports MSI or MSI-X.
418  *  2. Allocate bus resources so that we can access the Base Address Register
419  *  3. Create and initialize mutexes for the controller and its control
420  *     logic such as SGE and MDIO.
421  *  4. Call hardware specific setup routine for the adapter as a whole.
422  *  5. Allocate the BAR for doing MSI-X.
423  *  6. Setup the line interrupt iff MSI-X is not supported.
424  *  7. Create the driver's taskq.
425  *  8. Start one task queue service thread.
426  *  9. Check if the firmware and SRAM are up-to-date.  They will be
427  *     auto-updated later (before FULL_INIT_DONE), if required.
428  * 10. Create a child device for each MAC (port)
429  * 11. Initialize T3 private state.
430  * 12. Trigger the LED
431  * 13. Setup offload iff supported.
432  * 14. Reset/restart the tick callout.
433  * 15. Attach sysctls
434  *
435  * NOTE: Any modification or deviation from this list MUST be reflected in
436  * the above comment.  Failure to do so will result in problems on various
437  * error conditions including link flapping.
438  */
439 static int
440 cxgb_controller_attach(device_t dev)
441 {
442 	device_t child;
443 	const struct adapter_info *ai;
444 	struct adapter *sc;
445 	int i, error = 0;
446 	uint32_t vers;
447 	int port_qsets = 1;
448 	int msi_needed, reg;
449 	char buf[80];
450 
451 	sc = device_get_softc(dev);
452 	sc->dev = dev;
453 	sc->msi_count = 0;
454 	ai = cxgb_get_adapter_info(dev);
455 
456 	/* find the PCIe link width and set max read request to 4KB*/
457 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
458 		uint16_t lnk;
459 
460 		lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
461 		sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
462 		if (sc->link_width < 8 &&
463 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
464 			device_printf(sc->dev,
465 			    "PCIe x%d Link, expect reduced performance\n",
466 			    sc->link_width);
467 		}
468 
469 		pci_set_max_read_req(dev, 4096);
470 	}
471 
472 	touch_bars(dev);
473 	pci_enable_busmaster(dev);
474 	/*
475 	 * Allocate the registers and make them available to the driver.
476 	 * The registers that we care about for NIC mode are in BAR 0
477 	 */
478 	sc->regs_rid = PCIR_BAR(0);
479 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
480 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
481 		device_printf(dev, "Cannot allocate BAR region 0\n");
482 		return (ENXIO);
483 	}
484 	sc->udbs_rid = PCIR_BAR(2);
485 	sc->udbs_res = NULL;
486 	if (is_offload(sc) &&
487 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
488 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
489 		device_printf(dev, "Cannot allocate BAR region 1\n");
490 		error = ENXIO;
491 		goto out;
492 	}
493 
494 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
495 	    device_get_unit(dev));
496 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
497 
498 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
499 	    device_get_unit(dev));
500 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
501 	    device_get_unit(dev));
502 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
503 	    device_get_unit(dev));
504 
505 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
506 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
507 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
508 
509 	sc->bt = rman_get_bustag(sc->regs_res);
510 	sc->bh = rman_get_bushandle(sc->regs_res);
511 	sc->mmio_len = rman_get_size(sc->regs_res);
512 
513 	for (i = 0; i < MAX_NPORTS; i++)
514 		sc->port[i].adapter = sc;
515 
516 	if (t3_prep_adapter(sc, ai, 1) < 0) {
517 		printf("prep adapter failed\n");
518 		error = ENODEV;
519 		goto out;
520 	}
521         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
522 	 * enough messages for the queue sets.  If that fails, try falling
523 	 * back to MSI.  If that fails, then try falling back to the legacy
524 	 * interrupt pin model.
525 	 */
526 	sc->msix_regs_rid = 0x20;
527 	if ((msi_allowed >= 2) &&
528 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
529 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
530 
531 		if (multiq)
532 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
533 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
534 
535 		if (pci_msix_count(dev) == 0 ||
536 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
537 		    sc->msi_count != msi_needed) {
538 			device_printf(dev, "alloc msix failed - "
539 				      "msi_count=%d, msi_needed=%d, err=%d; "
540 				      "will try MSI\n", sc->msi_count,
541 				      msi_needed, error);
542 			sc->msi_count = 0;
543 			port_qsets = 1;
544 			pci_release_msi(dev);
545 			bus_release_resource(dev, SYS_RES_MEMORY,
546 			    sc->msix_regs_rid, sc->msix_regs_res);
547 			sc->msix_regs_res = NULL;
548 		} else {
549 			sc->flags |= USING_MSIX;
550 			sc->cxgb_intr = cxgb_async_intr;
551 			device_printf(dev,
552 				      "using MSI-X interrupts (%u vectors)\n",
553 				      sc->msi_count);
554 		}
555 	}
556 
557 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
558 		sc->msi_count = 1;
559 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
560 			device_printf(dev, "alloc msi failed - "
561 				      "err=%d; will try INTx\n", error);
562 			sc->msi_count = 0;
563 			port_qsets = 1;
564 			pci_release_msi(dev);
565 		} else {
566 			sc->flags |= USING_MSI;
567 			sc->cxgb_intr = t3_intr_msi;
568 			device_printf(dev, "using MSI interrupts\n");
569 		}
570 	}
571 	if (sc->msi_count == 0) {
572 		device_printf(dev, "using line interrupts\n");
573 		sc->cxgb_intr = t3b_intr;
574 	}
575 
576 	/* Create a private taskqueue thread for handling driver events */
577 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
578 	    taskqueue_thread_enqueue, &sc->tq);
579 	if (sc->tq == NULL) {
580 		device_printf(dev, "failed to allocate controller task queue\n");
581 		goto out;
582 	}
583 
584 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
585 	    device_get_nameunit(dev));
586 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
587 
588 
589 	/* Create a periodic callout for checking adapter status */
590 	callout_init(&sc->cxgb_tick_ch, TRUE);
591 
592 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
593 		/*
594 		 * Warn user that a firmware update will be attempted in init.
595 		 */
596 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
597 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
598 		sc->flags &= ~FW_UPTODATE;
599 	} else {
600 		sc->flags |= FW_UPTODATE;
601 	}
602 
603 	if (t3_check_tpsram_version(sc) < 0) {
604 		/*
605 		 * Warn user that a firmware update will be attempted in init.
606 		 */
607 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
608 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
609 		sc->flags &= ~TPS_UPTODATE;
610 	} else {
611 		sc->flags |= TPS_UPTODATE;
612 	}
613 
614 	/*
615 	 * Create a child device for each MAC.  The ethernet attachment
616 	 * will be done in these children.
617 	 */
618 	for (i = 0; i < (sc)->params.nports; i++) {
619 		struct port_info *pi;
620 
621 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
622 			device_printf(dev, "failed to add child port\n");
623 			error = EINVAL;
624 			goto out;
625 		}
626 		pi = &sc->port[i];
627 		pi->adapter = sc;
628 		pi->nqsets = port_qsets;
629 		pi->first_qset = i*port_qsets;
630 		pi->port_id = i;
631 		pi->tx_chan = i >= ai->nports0;
632 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
633 		sc->rxpkt_map[pi->txpkt_intf] = i;
634 		sc->port[i].tx_chan = i >= ai->nports0;
635 		sc->portdev[i] = child;
636 		device_set_softc(child, pi);
637 	}
638 	if ((error = bus_generic_attach(dev)) != 0)
639 		goto out;
640 
641 	/* initialize sge private state */
642 	t3_sge_init_adapter(sc);
643 
644 	t3_led_ready(sc);
645 
646 	cxgb_offload_init();
647 	if (is_offload(sc)) {
648 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
649 		cxgb_adapter_ofld(sc);
650         }
651 	error = t3_get_fw_version(sc, &vers);
652 	if (error)
653 		goto out;
654 
655 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
656 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
657 	    G_FW_VERSION_MICRO(vers));
658 
659 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
660 		 ai->desc, is_offload(sc) ? "R" : "",
661 		 sc->params.vpd.ec, sc->params.vpd.sn);
662 	device_set_desc_copy(dev, buf);
663 
664 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
665 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
666 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
667 
668 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
669 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
670 	t3_add_attach_sysctls(sc);
671 out:
672 	if (error)
673 		cxgb_free(sc);
674 
675 	return (error);
676 }
677 
678 /*
679  * The cxgb_controller_detach routine is called with the device is
680  * unloaded from the system.
681  */
682 
683 static int
684 cxgb_controller_detach(device_t dev)
685 {
686 	struct adapter *sc;
687 
688 	sc = device_get_softc(dev);
689 
690 	cxgb_free(sc);
691 
692 	return (0);
693 }
694 
695 /*
696  * The cxgb_free() is called by the cxgb_controller_detach() routine
697  * to tear down the structures that were built up in
698  * cxgb_controller_attach(), and should be the final piece of work
699  * done when fully unloading the driver.
700  *
701  *
702  *  1. Shutting down the threads started by the cxgb_controller_attach()
703  *     routine.
704  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
705  *  3. Detaching all of the port devices created during the
706  *     cxgb_controller_attach() routine.
707  *  4. Removing the device children created via cxgb_controller_attach().
708  *  5. Releasing PCI resources associated with the device.
709  *  6. Turning off the offload support, iff it was turned on.
710  *  7. Destroying the mutexes created in cxgb_controller_attach().
711  *
712  */
713 static void
714 cxgb_free(struct adapter *sc)
715 {
716 	int i, nqsets = 0;
717 
718 	ADAPTER_LOCK(sc);
719 	sc->flags |= CXGB_SHUTDOWN;
720 	ADAPTER_UNLOCK(sc);
721 
722 	/*
723 	 * Make sure all child devices are gone.
724 	 */
725 	bus_generic_detach(sc->dev);
726 	for (i = 0; i < (sc)->params.nports; i++) {
727 		if (sc->portdev[i] &&
728 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
729 			device_printf(sc->dev, "failed to delete child port\n");
730 		nqsets += sc->port[i].nqsets;
731 	}
732 
733 	/*
734 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
735 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
736 	 * all open devices have been closed.
737 	 */
738 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
739 					   __func__, sc->open_device_map));
740 	for (i = 0; i < sc->params.nports; i++) {
741 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
742 						  __func__, i));
743 	}
744 
745 	/*
746 	 * Finish off the adapter's callouts.
747 	 */
748 	callout_drain(&sc->cxgb_tick_ch);
749 	callout_drain(&sc->sge_timer_ch);
750 
751 	/*
752 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
753 	 * sysctls are cleaned up by the kernel linker.
754 	 */
755 	if (sc->flags & FULL_INIT_DONE) {
756  		t3_free_sge_resources(sc, nqsets);
757  		sc->flags &= ~FULL_INIT_DONE;
758  	}
759 
760 	/*
761 	 * Release all interrupt resources.
762 	 */
763 	cxgb_teardown_interrupts(sc);
764 	if (sc->flags & (USING_MSI | USING_MSIX)) {
765 		device_printf(sc->dev, "releasing msi message(s)\n");
766 		pci_release_msi(sc->dev);
767 	} else {
768 		device_printf(sc->dev, "no msi message to release\n");
769 	}
770 
771 	if (sc->msix_regs_res != NULL) {
772 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
773 		    sc->msix_regs_res);
774 	}
775 
776 	/*
777 	 * Free the adapter's taskqueue.
778 	 */
779 	if (sc->tq != NULL) {
780 		taskqueue_free(sc->tq);
781 		sc->tq = NULL;
782 	}
783 
784 	if (is_offload(sc)) {
785 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
786 		cxgb_adapter_unofld(sc);
787 	}
788 
789 #ifdef notyet
790 	if (sc->flags & CXGB_OFLD_INIT)
791 		cxgb_offload_deactivate(sc);
792 #endif
793 	free(sc->filters, M_DEVBUF);
794 	t3_sge_free(sc);
795 
796 	cxgb_offload_exit();
797 
798 	if (sc->udbs_res != NULL)
799 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
800 		    sc->udbs_res);
801 
802 	if (sc->regs_res != NULL)
803 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
804 		    sc->regs_res);
805 
806 	MTX_DESTROY(&sc->mdio_lock);
807 	MTX_DESTROY(&sc->sge.reg_lock);
808 	MTX_DESTROY(&sc->elmer_lock);
809 	ADAPTER_LOCK_DEINIT(sc);
810 }
811 
812 /**
813  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
814  *	@sc: the controller softc
815  *
816  *	Determines how many sets of SGE queues to use and initializes them.
817  *	We support multiple queue sets per port if we have MSI-X, otherwise
818  *	just one queue set per port.
819  */
820 static int
821 setup_sge_qsets(adapter_t *sc)
822 {
823 	int i, j, err, irq_idx = 0, qset_idx = 0;
824 	u_int ntxq = SGE_TXQ_PER_SET;
825 
826 	if ((err = t3_sge_alloc(sc)) != 0) {
827 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
828 		return (err);
829 	}
830 
831 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
832 		irq_idx = -1;
833 
834 	for (i = 0; i < (sc)->params.nports; i++) {
835 		struct port_info *pi = &sc->port[i];
836 
837 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
838 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
839 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
840 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
841 			if (err) {
842 				t3_free_sge_resources(sc, qset_idx);
843 				device_printf(sc->dev,
844 				    "t3_sge_alloc_qset failed with %d\n", err);
845 				return (err);
846 			}
847 		}
848 	}
849 
850 	return (0);
851 }
852 
853 static void
854 cxgb_teardown_interrupts(adapter_t *sc)
855 {
856 	int i;
857 
858 	for (i = 0; i < SGE_QSETS; i++) {
859 		if (sc->msix_intr_tag[i] == NULL) {
860 
861 			/* Should have been setup fully or not at all */
862 			KASSERT(sc->msix_irq_res[i] == NULL &&
863 				sc->msix_irq_rid[i] == 0,
864 				("%s: half-done interrupt (%d).", __func__, i));
865 
866 			continue;
867 		}
868 
869 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
870 				  sc->msix_intr_tag[i]);
871 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
872 				     sc->msix_irq_res[i]);
873 
874 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
875 		sc->msix_irq_rid[i] = 0;
876 	}
877 
878 	if (sc->intr_tag) {
879 		KASSERT(sc->irq_res != NULL,
880 			("%s: half-done interrupt.", __func__));
881 
882 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
883 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
884 				     sc->irq_res);
885 
886 		sc->irq_res = sc->intr_tag = NULL;
887 		sc->irq_rid = 0;
888 	}
889 }
890 
891 static int
892 cxgb_setup_interrupts(adapter_t *sc)
893 {
894 	struct resource *res;
895 	void *tag;
896 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
897 
898 	sc->irq_rid = intr_flag ? 1 : 0;
899 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
900 					     RF_SHAREABLE | RF_ACTIVE);
901 	if (sc->irq_res == NULL) {
902 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
903 			      intr_flag, sc->irq_rid);
904 		err = EINVAL;
905 		sc->irq_rid = 0;
906 	} else {
907 		err = bus_setup_intr(sc->dev, sc->irq_res,
908 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
909 		    sc->cxgb_intr, sc, &sc->intr_tag);
910 
911 		if (err) {
912 			device_printf(sc->dev,
913 				      "Cannot set up interrupt (%x, %u, %d)\n",
914 				      intr_flag, sc->irq_rid, err);
915 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
916 					     sc->irq_res);
917 			sc->irq_res = sc->intr_tag = NULL;
918 			sc->irq_rid = 0;
919 		}
920 	}
921 
922 	/* That's all for INTx or MSI */
923 	if (!(intr_flag & USING_MSIX) || err)
924 		return (err);
925 
926 	for (i = 0; i < sc->msi_count - 1; i++) {
927 		rid = i + 2;
928 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
929 					     RF_SHAREABLE | RF_ACTIVE);
930 		if (res == NULL) {
931 			device_printf(sc->dev, "Cannot allocate interrupt "
932 				      "for message %d\n", rid);
933 			err = EINVAL;
934 			break;
935 		}
936 
937 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
938 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
939 		if (err) {
940 			device_printf(sc->dev, "Cannot set up interrupt "
941 				      "for message %d (%d)\n", rid, err);
942 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
943 			break;
944 		}
945 
946 		sc->msix_irq_rid[i] = rid;
947 		sc->msix_irq_res[i] = res;
948 		sc->msix_intr_tag[i] = tag;
949 	}
950 
951 	if (err)
952 		cxgb_teardown_interrupts(sc);
953 
954 	return (err);
955 }
956 
957 
958 static int
959 cxgb_port_probe(device_t dev)
960 {
961 	struct port_info *p;
962 	char buf[80];
963 	const char *desc;
964 
965 	p = device_get_softc(dev);
966 	desc = p->phy.desc;
967 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
968 	device_set_desc_copy(dev, buf);
969 	return (0);
970 }
971 
972 
973 static int
974 cxgb_makedev(struct port_info *pi)
975 {
976 
977 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
978 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
979 
980 	if (pi->port_cdev == NULL)
981 		return (ENOMEM);
982 
983 	pi->port_cdev->si_drv1 = (void *)pi;
984 
985 	return (0);
986 }
987 
988 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
989     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
990     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
991 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
992 
993 static int
994 cxgb_port_attach(device_t dev)
995 {
996 	struct port_info *p;
997 	struct ifnet *ifp;
998 	int err;
999 	struct adapter *sc;
1000 
1001 	p = device_get_softc(dev);
1002 	sc = p->adapter;
1003 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1004 	    device_get_unit(device_get_parent(dev)), p->port_id);
1005 	PORT_LOCK_INIT(p, p->lockbuf);
1006 
1007 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1008 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1009 
1010 	/* Allocate an ifnet object and set it up */
1011 	ifp = p->ifp = if_alloc(IFT_ETHER);
1012 	if (ifp == NULL) {
1013 		device_printf(dev, "Cannot allocate ifnet\n");
1014 		return (ENOMEM);
1015 	}
1016 
1017 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1018 	ifp->if_init = cxgb_init;
1019 	ifp->if_softc = p;
1020 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1021 	ifp->if_ioctl = cxgb_ioctl;
1022 	ifp->if_start = cxgb_start;
1023 
1024 	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1025 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1026 	IFQ_SET_READY(&ifp->if_snd);
1027 
1028 	ifp->if_capabilities = CXGB_CAP;
1029 	ifp->if_capenable = CXGB_CAP_ENABLE;
1030 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1031 
1032 	/*
1033 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1034 	 */
1035 	if (sc->params.nports > 2) {
1036 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1037 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1038 		ifp->if_hwassist &= ~CSUM_TSO;
1039 	}
1040 
1041 	ether_ifattach(ifp, p->hw_addr);
1042 	ifp->if_transmit = cxgb_transmit;
1043 	ifp->if_qflush = cxgb_qflush;
1044 
1045 #ifdef DEFAULT_JUMBO
1046 	if (sc->params.nports <= 2)
1047 		ifp->if_mtu = ETHERMTU_JUMBO;
1048 #endif
1049 	if ((err = cxgb_makedev(p)) != 0) {
1050 		printf("makedev failed %d\n", err);
1051 		return (err);
1052 	}
1053 
1054 	/* Create a list of media supported by this port */
1055 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1056 	    cxgb_media_status);
1057 	cxgb_build_medialist(p);
1058 
1059 	t3_sge_init_port(p);
1060 
1061 	return (err);
1062 }
1063 
1064 /*
1065  * cxgb_port_detach() is called via the device_detach methods when
1066  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1067  * removing the device from the view of the kernel, i.e. from all
1068  * interfaces lists etc.  This routine is only called when the driver is
1069  * being unloaded, not when the link goes down.
1070  */
1071 static int
1072 cxgb_port_detach(device_t dev)
1073 {
1074 	struct port_info *p;
1075 	struct adapter *sc;
1076 	int i;
1077 
1078 	p = device_get_softc(dev);
1079 	sc = p->adapter;
1080 
1081 	/* Tell cxgb_ioctl and if_init that the port is going away */
1082 	ADAPTER_LOCK(sc);
1083 	SET_DOOMED(p);
1084 	wakeup(&sc->flags);
1085 	while (IS_BUSY(sc))
1086 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1087 	SET_BUSY(sc);
1088 	ADAPTER_UNLOCK(sc);
1089 
1090 	if (p->port_cdev != NULL)
1091 		destroy_dev(p->port_cdev);
1092 
1093 	cxgb_uninit_synchronized(p);
1094 	ether_ifdetach(p->ifp);
1095 
1096 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1097 		struct sge_qset *qs = &sc->sge.qs[i];
1098 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1099 
1100 		callout_drain(&txq->txq_watchdog);
1101 		callout_drain(&txq->txq_timer);
1102 	}
1103 
1104 	PORT_LOCK_DEINIT(p);
1105 	if_free(p->ifp);
1106 	p->ifp = NULL;
1107 
1108 	ADAPTER_LOCK(sc);
1109 	CLR_BUSY(sc);
1110 	wakeup_one(&sc->flags);
1111 	ADAPTER_UNLOCK(sc);
1112 	return (0);
1113 }
1114 
1115 void
1116 t3_fatal_err(struct adapter *sc)
1117 {
1118 	u_int fw_status[4];
1119 
1120 	if (sc->flags & FULL_INIT_DONE) {
1121 		t3_sge_stop(sc);
1122 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1123 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1124 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1125 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1126 		t3_intr_disable(sc);
1127 	}
1128 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1129 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1130 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1131 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1132 }
1133 
1134 int
1135 t3_os_find_pci_capability(adapter_t *sc, int cap)
1136 {
1137 	device_t dev;
1138 	struct pci_devinfo *dinfo;
1139 	pcicfgregs *cfg;
1140 	uint32_t status;
1141 	uint8_t ptr;
1142 
1143 	dev = sc->dev;
1144 	dinfo = device_get_ivars(dev);
1145 	cfg = &dinfo->cfg;
1146 
1147 	status = pci_read_config(dev, PCIR_STATUS, 2);
1148 	if (!(status & PCIM_STATUS_CAPPRESENT))
1149 		return (0);
1150 
1151 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1152 	case 0:
1153 	case 1:
1154 		ptr = PCIR_CAP_PTR;
1155 		break;
1156 	case 2:
1157 		ptr = PCIR_CAP_PTR_2;
1158 		break;
1159 	default:
1160 		return (0);
1161 		break;
1162 	}
1163 	ptr = pci_read_config(dev, ptr, 1);
1164 
1165 	while (ptr != 0) {
1166 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1167 			return (ptr);
1168 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1169 	}
1170 
1171 	return (0);
1172 }
1173 
1174 int
1175 t3_os_pci_save_state(struct adapter *sc)
1176 {
1177 	device_t dev;
1178 	struct pci_devinfo *dinfo;
1179 
1180 	dev = sc->dev;
1181 	dinfo = device_get_ivars(dev);
1182 
1183 	pci_cfg_save(dev, dinfo, 0);
1184 	return (0);
1185 }
1186 
1187 int
1188 t3_os_pci_restore_state(struct adapter *sc)
1189 {
1190 	device_t dev;
1191 	struct pci_devinfo *dinfo;
1192 
1193 	dev = sc->dev;
1194 	dinfo = device_get_ivars(dev);
1195 
1196 	pci_cfg_restore(dev, dinfo);
1197 	return (0);
1198 }
1199 
1200 /**
1201  *	t3_os_link_changed - handle link status changes
1202  *	@sc: the adapter associated with the link change
1203  *	@port_id: the port index whose link status has changed
1204  *	@link_status: the new status of the link
1205  *	@speed: the new speed setting
1206  *	@duplex: the new duplex setting
1207  *	@fc: the new flow-control setting
1208  *
1209  *	This is the OS-dependent handler for link status changes.  The OS
1210  *	neutral handler takes care of most of the processing for these events,
1211  *	then calls this handler for any OS-specific processing.
1212  */
1213 void
1214 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1215      int duplex, int fc, int mac_was_reset)
1216 {
1217 	struct port_info *pi = &adapter->port[port_id];
1218 	struct ifnet *ifp = pi->ifp;
1219 
1220 	/* no race with detach, so ifp should always be good */
1221 	KASSERT(ifp, ("%s: if detached.", __func__));
1222 
1223 	/* Reapply mac settings if they were lost due to a reset */
1224 	if (mac_was_reset) {
1225 		PORT_LOCK(pi);
1226 		cxgb_update_mac_settings(pi);
1227 		PORT_UNLOCK(pi);
1228 	}
1229 
1230 	if (link_status) {
1231 		ifp->if_baudrate = IF_Mbps(speed);
1232 		if_link_state_change(ifp, LINK_STATE_UP);
1233 	} else
1234 		if_link_state_change(ifp, LINK_STATE_DOWN);
1235 }
1236 
1237 /**
1238  *	t3_os_phymod_changed - handle PHY module changes
1239  *	@phy: the PHY reporting the module change
1240  *	@mod_type: new module type
1241  *
1242  *	This is the OS-dependent handler for PHY module changes.  It is
1243  *	invoked when a PHY module is removed or inserted for any OS-specific
1244  *	processing.
1245  */
1246 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1247 {
1248 	static const char *mod_str[] = {
1249 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1250 	};
1251 	struct port_info *pi = &adap->port[port_id];
1252 	int mod = pi->phy.modtype;
1253 
1254 	if (mod != pi->media.ifm_cur->ifm_data)
1255 		cxgb_build_medialist(pi);
1256 
1257 	if (mod == phy_modtype_none)
1258 		if_printf(pi->ifp, "PHY module unplugged\n");
1259 	else {
1260 		KASSERT(mod < ARRAY_SIZE(mod_str),
1261 			("invalid PHY module type %d", mod));
1262 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1263 	}
1264 }
1265 
1266 void
1267 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1268 {
1269 
1270 	/*
1271 	 * The ifnet might not be allocated before this gets called,
1272 	 * as this is called early on in attach by t3_prep_adapter
1273 	 * save the address off in the port structure
1274 	 */
1275 	if (cxgb_debug)
1276 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1277 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1278 }
1279 
1280 /*
1281  * Programs the XGMAC based on the settings in the ifnet.  These settings
1282  * include MTU, MAC address, mcast addresses, etc.
1283  */
1284 static void
1285 cxgb_update_mac_settings(struct port_info *p)
1286 {
1287 	struct ifnet *ifp = p->ifp;
1288 	struct t3_rx_mode rm;
1289 	struct cmac *mac = &p->mac;
1290 	int mtu, hwtagging;
1291 
1292 	PORT_LOCK_ASSERT_OWNED(p);
1293 
1294 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1295 
1296 	mtu = ifp->if_mtu;
1297 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1298 		mtu += ETHER_VLAN_ENCAP_LEN;
1299 
1300 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1301 
1302 	t3_mac_set_mtu(mac, mtu);
1303 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1304 	t3_mac_set_address(mac, 0, p->hw_addr);
1305 	t3_init_rx_mode(&rm, p);
1306 	t3_mac_set_rx_mode(mac, &rm);
1307 }
1308 
1309 
1310 static int
1311 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1312 			      unsigned long n)
1313 {
1314 	int attempts = 5;
1315 
1316 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1317 		if (!--attempts)
1318 			return (ETIMEDOUT);
1319 		t3_os_sleep(10);
1320 	}
1321 	return 0;
1322 }
1323 
1324 static int
1325 init_tp_parity(struct adapter *adap)
1326 {
1327 	int i;
1328 	struct mbuf *m;
1329 	struct cpl_set_tcb_field *greq;
1330 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1331 
1332 	t3_tp_set_offload_mode(adap, 1);
1333 
1334 	for (i = 0; i < 16; i++) {
1335 		struct cpl_smt_write_req *req;
1336 
1337 		m = m_gethdr(M_WAITOK, MT_DATA);
1338 		req = mtod(m, struct cpl_smt_write_req *);
1339 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1340 		memset(req, 0, sizeof(*req));
1341 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1342 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1343 		req->iff = i;
1344 		t3_mgmt_tx(adap, m);
1345 	}
1346 
1347 	for (i = 0; i < 2048; i++) {
1348 		struct cpl_l2t_write_req *req;
1349 
1350 		m = m_gethdr(M_WAITOK, MT_DATA);
1351 		req = mtod(m, struct cpl_l2t_write_req *);
1352 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1353 		memset(req, 0, sizeof(*req));
1354 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1355 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1356 		req->params = htonl(V_L2T_W_IDX(i));
1357 		t3_mgmt_tx(adap, m);
1358 	}
1359 
1360 	for (i = 0; i < 2048; i++) {
1361 		struct cpl_rte_write_req *req;
1362 
1363 		m = m_gethdr(M_WAITOK, MT_DATA);
1364 		req = mtod(m, struct cpl_rte_write_req *);
1365 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1366 		memset(req, 0, sizeof(*req));
1367 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1368 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1369 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1370 		t3_mgmt_tx(adap, m);
1371 	}
1372 
1373 	m = m_gethdr(M_WAITOK, MT_DATA);
1374 	greq = mtod(m, struct cpl_set_tcb_field *);
1375 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1376 	memset(greq, 0, sizeof(*greq));
1377 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1378 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1379 	greq->mask = htobe64(1);
1380 	t3_mgmt_tx(adap, m);
1381 
1382 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1383 	t3_tp_set_offload_mode(adap, 0);
1384 	return (i);
1385 }
1386 
1387 /**
1388  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1389  *	@adap: the adapter
1390  *
1391  *	Sets up RSS to distribute packets to multiple receive queues.  We
1392  *	configure the RSS CPU lookup table to distribute to the number of HW
1393  *	receive queues, and the response queue lookup table to narrow that
1394  *	down to the response queues actually configured for each port.
1395  *	We always configure the RSS mapping for two ports since the mapping
1396  *	table has plenty of entries.
1397  */
1398 static void
1399 setup_rss(adapter_t *adap)
1400 {
1401 	int i;
1402 	u_int nq[2];
1403 	uint8_t cpus[SGE_QSETS + 1];
1404 	uint16_t rspq_map[RSS_TABLE_SIZE];
1405 
1406 	for (i = 0; i < SGE_QSETS; ++i)
1407 		cpus[i] = i;
1408 	cpus[SGE_QSETS] = 0xff;
1409 
1410 	nq[0] = nq[1] = 0;
1411 	for_each_port(adap, i) {
1412 		const struct port_info *pi = adap2pinfo(adap, i);
1413 
1414 		nq[pi->tx_chan] += pi->nqsets;
1415 	}
1416 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1417 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1418 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1419 	}
1420 
1421 	/* Calculate the reverse RSS map table */
1422 	for (i = 0; i < SGE_QSETS; ++i)
1423 		adap->rrss_map[i] = 0xff;
1424 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1425 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1426 			adap->rrss_map[rspq_map[i]] = i;
1427 
1428 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1429 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1430 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1431 	              cpus, rspq_map);
1432 
1433 }
1434 
1435 /*
1436  * Sends an mbuf to an offload queue driver
1437  * after dealing with any active network taps.
1438  */
1439 static inline int
1440 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1441 {
1442 	int ret;
1443 
1444 	ret = t3_offload_tx(tdev, m);
1445 	return (ret);
1446 }
1447 
1448 static int
1449 write_smt_entry(struct adapter *adapter, int idx)
1450 {
1451 	struct port_info *pi = &adapter->port[idx];
1452 	struct cpl_smt_write_req *req;
1453 	struct mbuf *m;
1454 
1455 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1456 		return (ENOMEM);
1457 
1458 	req = mtod(m, struct cpl_smt_write_req *);
1459 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1460 
1461 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1462 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1463 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1464 	req->iff = idx;
1465 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1466 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1467 
1468 	m_set_priority(m, 1);
1469 
1470 	offload_tx(&adapter->tdev, m);
1471 
1472 	return (0);
1473 }
1474 
1475 static int
1476 init_smt(struct adapter *adapter)
1477 {
1478 	int i;
1479 
1480 	for_each_port(adapter, i)
1481 		write_smt_entry(adapter, i);
1482 	return 0;
1483 }
1484 
1485 static void
1486 init_port_mtus(adapter_t *adapter)
1487 {
1488 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1489 
1490 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1491 }
1492 
1493 static void
1494 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1495 			      int hi, int port)
1496 {
1497 	struct mbuf *m;
1498 	struct mngt_pktsched_wr *req;
1499 
1500 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1501 	if (m) {
1502 		req = mtod(m, struct mngt_pktsched_wr *);
1503 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1504 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1505 		req->sched = sched;
1506 		req->idx = qidx;
1507 		req->min = lo;
1508 		req->max = hi;
1509 		req->binding = port;
1510 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1511 		t3_mgmt_tx(adap, m);
1512 	}
1513 }
1514 
1515 static void
1516 bind_qsets(adapter_t *sc)
1517 {
1518 	int i, j;
1519 
1520 	for (i = 0; i < (sc)->params.nports; ++i) {
1521 		const struct port_info *pi = adap2pinfo(sc, i);
1522 
1523 		for (j = 0; j < pi->nqsets; ++j) {
1524 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1525 					  -1, pi->tx_chan);
1526 
1527 		}
1528 	}
1529 }
1530 
1531 static void
1532 update_tpeeprom(struct adapter *adap)
1533 {
1534 	const struct firmware *tpeeprom;
1535 
1536 	uint32_t version;
1537 	unsigned int major, minor;
1538 	int ret, len;
1539 	char rev, name[32];
1540 
1541 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1542 
1543 	major = G_TP_VERSION_MAJOR(version);
1544 	minor = G_TP_VERSION_MINOR(version);
1545 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1546 		return;
1547 
1548 	rev = t3rev2char(adap);
1549 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1550 
1551 	tpeeprom = firmware_get(name);
1552 	if (tpeeprom == NULL) {
1553 		device_printf(adap->dev,
1554 			      "could not load TP EEPROM: unable to load %s\n",
1555 			      name);
1556 		return;
1557 	}
1558 
1559 	len = tpeeprom->datasize - 4;
1560 
1561 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1562 	if (ret)
1563 		goto release_tpeeprom;
1564 
1565 	if (len != TP_SRAM_LEN) {
1566 		device_printf(adap->dev,
1567 			      "%s length is wrong len=%d expected=%d\n", name,
1568 			      len, TP_SRAM_LEN);
1569 		return;
1570 	}
1571 
1572 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1573 	    TP_SRAM_OFFSET);
1574 
1575 	if (!ret) {
1576 		device_printf(adap->dev,
1577 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1578 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1579 	} else
1580 		device_printf(adap->dev,
1581 			      "Protocol SRAM image update in EEPROM failed\n");
1582 
1583 release_tpeeprom:
1584 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1585 
1586 	return;
1587 }
1588 
1589 static int
1590 update_tpsram(struct adapter *adap)
1591 {
1592 	const struct firmware *tpsram;
1593 	int ret;
1594 	char rev, name[32];
1595 
1596 	rev = t3rev2char(adap);
1597 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1598 
1599 	update_tpeeprom(adap);
1600 
1601 	tpsram = firmware_get(name);
1602 	if (tpsram == NULL){
1603 		device_printf(adap->dev, "could not load TP SRAM\n");
1604 		return (EINVAL);
1605 	} else
1606 		device_printf(adap->dev, "updating TP SRAM\n");
1607 
1608 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1609 	if (ret)
1610 		goto release_tpsram;
1611 
1612 	ret = t3_set_proto_sram(adap, tpsram->data);
1613 	if (ret)
1614 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1615 
1616 release_tpsram:
1617 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1618 
1619 	return ret;
1620 }
1621 
1622 /**
1623  *	cxgb_up - enable the adapter
1624  *	@adap: adapter being enabled
1625  *
1626  *	Called when the first port is enabled, this function performs the
1627  *	actions necessary to make an adapter operational, such as completing
1628  *	the initialization of HW modules, and enabling interrupts.
1629  */
1630 static int
1631 cxgb_up(struct adapter *sc)
1632 {
1633 	int err = 0;
1634 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1635 
1636 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1637 					   __func__, sc->open_device_map));
1638 
1639 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1640 
1641 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1642 
1643 		if ((sc->flags & FW_UPTODATE) == 0)
1644 			if ((err = upgrade_fw(sc)))
1645 				goto out;
1646 
1647 		if ((sc->flags & TPS_UPTODATE) == 0)
1648 			if ((err = update_tpsram(sc)))
1649 				goto out;
1650 
1651 		if (is_offload(sc) && nfilters != 0) {
1652 			sc->params.mc5.nservers = 0;
1653 
1654 			if (nfilters < 0)
1655 				sc->params.mc5.nfilters = mxf;
1656 			else
1657 				sc->params.mc5.nfilters = min(nfilters, mxf);
1658 		}
1659 
1660 		err = t3_init_hw(sc, 0);
1661 		if (err)
1662 			goto out;
1663 
1664 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1665 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1666 
1667 		err = setup_sge_qsets(sc);
1668 		if (err)
1669 			goto out;
1670 
1671 		alloc_filters(sc);
1672 		setup_rss(sc);
1673 
1674 		t3_intr_clear(sc);
1675 		err = cxgb_setup_interrupts(sc);
1676 		if (err)
1677 			goto out;
1678 
1679 		t3_add_configured_sysctls(sc);
1680 		sc->flags |= FULL_INIT_DONE;
1681 	}
1682 
1683 	t3_intr_clear(sc);
1684 	t3_sge_start(sc);
1685 	t3_intr_enable(sc);
1686 
1687 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1688 	    is_offload(sc) && init_tp_parity(sc) == 0)
1689 		sc->flags |= TP_PARITY_INIT;
1690 
1691 	if (sc->flags & TP_PARITY_INIT) {
1692 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1693 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1694 	}
1695 
1696 	if (!(sc->flags & QUEUES_BOUND)) {
1697 		bind_qsets(sc);
1698 		setup_hw_filters(sc);
1699 		sc->flags |= QUEUES_BOUND;
1700 	}
1701 
1702 	t3_sge_reset_adapter(sc);
1703 out:
1704 	return (err);
1705 }
1706 
1707 /*
1708  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1709  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1710  * during controller_detach, not here.
1711  */
1712 static void
1713 cxgb_down(struct adapter *sc)
1714 {
1715 	t3_sge_stop(sc);
1716 	t3_intr_disable(sc);
1717 }
1718 
1719 static int
1720 offload_open(struct port_info *pi)
1721 {
1722 	struct adapter *sc = pi->adapter;
1723 	struct t3cdev *tdev = &sc->tdev;
1724 
1725 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1726 
1727 	t3_tp_set_offload_mode(sc, 1);
1728 	tdev->lldev = pi->ifp;
1729 	init_port_mtus(sc);
1730 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1731 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1732 	init_smt(sc);
1733 	cxgb_add_clients(tdev);
1734 
1735 	return (0);
1736 }
1737 
1738 static int
1739 offload_close(struct t3cdev *tdev)
1740 {
1741 	struct adapter *adapter = tdev2adap(tdev);
1742 
1743 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1744 		return (0);
1745 
1746 	/* Call back all registered clients */
1747 	cxgb_remove_clients(tdev);
1748 
1749 	tdev->lldev = NULL;
1750 	cxgb_set_dummy_ops(tdev);
1751 	t3_tp_set_offload_mode(adapter, 0);
1752 
1753 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1754 
1755 	return (0);
1756 }
1757 
1758 /*
1759  * if_init for cxgb ports.
1760  */
1761 static void
1762 cxgb_init(void *arg)
1763 {
1764 	struct port_info *p = arg;
1765 	struct adapter *sc = p->adapter;
1766 
1767 	ADAPTER_LOCK(sc);
1768 	cxgb_init_locked(p); /* releases adapter lock */
1769 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1770 }
1771 
1772 static int
1773 cxgb_init_locked(struct port_info *p)
1774 {
1775 	struct adapter *sc = p->adapter;
1776 	struct ifnet *ifp = p->ifp;
1777 	struct cmac *mac = &p->mac;
1778 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1779 
1780 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1781 
1782 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1783 		gave_up_lock = 1;
1784 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1785 			rc = EINTR;
1786 			goto done;
1787 		}
1788 	}
1789 	if (IS_DOOMED(p)) {
1790 		rc = ENXIO;
1791 		goto done;
1792 	}
1793 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1794 
1795 	/*
1796 	 * The code that runs during one-time adapter initialization can sleep
1797 	 * so it's important not to hold any locks across it.
1798 	 */
1799 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1800 
1801 	if (may_sleep) {
1802 		SET_BUSY(sc);
1803 		gave_up_lock = 1;
1804 		ADAPTER_UNLOCK(sc);
1805 	}
1806 
1807 	if (sc->open_device_map == 0) {
1808 		if ((rc = cxgb_up(sc)) != 0)
1809 			goto done;
1810 
1811 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1812 			log(LOG_WARNING,
1813 			    "Could not initialize offload capabilities\n");
1814 	}
1815 
1816 	PORT_LOCK(p);
1817 	if (isset(&sc->open_device_map, p->port_id) &&
1818 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1819 		PORT_UNLOCK(p);
1820 		goto done;
1821 	}
1822 	t3_port_intr_enable(sc, p->port_id);
1823 	if (!mac->multiport)
1824 		t3_mac_init(mac);
1825 	cxgb_update_mac_settings(p);
1826 	t3_link_start(&p->phy, mac, &p->link_config);
1827 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1828 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1829 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1830 	PORT_UNLOCK(p);
1831 
1832 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1833 		struct sge_qset *qs = &sc->sge.qs[i];
1834 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1835 
1836 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1837 				 txq->txq_watchdog.c_cpu);
1838 	}
1839 
1840 	/* all ok */
1841 	setbit(&sc->open_device_map, p->port_id);
1842 	callout_reset(&p->link_check_ch,
1843 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1844 	    link_check_callout, p);
1845 
1846 done:
1847 	if (may_sleep) {
1848 		ADAPTER_LOCK(sc);
1849 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1850 		CLR_BUSY(sc);
1851 	}
1852 	if (gave_up_lock)
1853 		wakeup_one(&sc->flags);
1854 	ADAPTER_UNLOCK(sc);
1855 	return (rc);
1856 }
1857 
1858 static int
1859 cxgb_uninit_locked(struct port_info *p)
1860 {
1861 	struct adapter *sc = p->adapter;
1862 	int rc;
1863 
1864 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1865 
1866 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1867 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1868 			rc = EINTR;
1869 			goto done;
1870 		}
1871 	}
1872 	if (IS_DOOMED(p)) {
1873 		rc = ENXIO;
1874 		goto done;
1875 	}
1876 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1877 	SET_BUSY(sc);
1878 	ADAPTER_UNLOCK(sc);
1879 
1880 	rc = cxgb_uninit_synchronized(p);
1881 
1882 	ADAPTER_LOCK(sc);
1883 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1884 	CLR_BUSY(sc);
1885 	wakeup_one(&sc->flags);
1886 done:
1887 	ADAPTER_UNLOCK(sc);
1888 	return (rc);
1889 }
1890 
1891 /*
1892  * Called on "ifconfig down", and from port_detach
1893  */
1894 static int
1895 cxgb_uninit_synchronized(struct port_info *pi)
1896 {
1897 	struct adapter *sc = pi->adapter;
1898 	struct ifnet *ifp = pi->ifp;
1899 
1900 	/*
1901 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1902 	 */
1903 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1904 
1905 	/*
1906 	 * Clear this port's bit from the open device map, and then drain all
1907 	 * the tasks that can access/manipulate this port's port_info or ifp.
1908 	 * We disable this port's interrupts here and so the slow/ext
1909 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1910 	 * be enqueued every second but the runs after this drain will not see
1911 	 * this port in the open device map.
1912 	 *
1913 	 * A well behaved task must take open_device_map into account and ignore
1914 	 * ports that are not open.
1915 	 */
1916 	clrbit(&sc->open_device_map, pi->port_id);
1917 	t3_port_intr_disable(sc, pi->port_id);
1918 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1919 	taskqueue_drain(sc->tq, &sc->tick_task);
1920 
1921 	callout_drain(&pi->link_check_ch);
1922 	taskqueue_drain(sc->tq, &pi->link_check_task);
1923 
1924 	PORT_LOCK(pi);
1925 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1926 
1927 	/* disable pause frames */
1928 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1929 
1930 	/* Reset RX FIFO HWM */
1931 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1932 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1933 
1934 	DELAY(100 * 1000);
1935 
1936 	/* Wait for TXFIFO empty */
1937 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1938 			F_TXFIFO_EMPTY, 1, 20, 5);
1939 
1940 	DELAY(100 * 1000);
1941 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1942 
1943 
1944 	pi->phy.ops->power_down(&pi->phy, 1);
1945 
1946 	PORT_UNLOCK(pi);
1947 
1948 	pi->link_config.link_ok = 0;
1949 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1950 
1951 	if ((sc->open_device_map & PORT_MASK) == 0)
1952 		offload_close(&sc->tdev);
1953 
1954 	if (sc->open_device_map == 0)
1955 		cxgb_down(pi->adapter);
1956 
1957 	return (0);
1958 }
1959 
1960 /*
1961  * Mark lro enabled or disabled in all qsets for this port
1962  */
1963 static int
1964 cxgb_set_lro(struct port_info *p, int enabled)
1965 {
1966 	int i;
1967 	struct adapter *adp = p->adapter;
1968 	struct sge_qset *q;
1969 
1970 	for (i = 0; i < p->nqsets; i++) {
1971 		q = &adp->sge.qs[p->first_qset + i];
1972 		q->lro.enabled = (enabled != 0);
1973 	}
1974 	return (0);
1975 }
1976 
1977 static int
1978 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1979 {
1980 	struct port_info *p = ifp->if_softc;
1981 	struct adapter *sc = p->adapter;
1982 	struct ifreq *ifr = (struct ifreq *)data;
1983 	int flags, error = 0, mtu;
1984 	uint32_t mask;
1985 
1986 	switch (command) {
1987 	case SIOCSIFMTU:
1988 		ADAPTER_LOCK(sc);
1989 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1990 		if (error) {
1991 fail:
1992 			ADAPTER_UNLOCK(sc);
1993 			return (error);
1994 		}
1995 
1996 		mtu = ifr->ifr_mtu;
1997 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1998 			error = EINVAL;
1999 		} else {
2000 			ifp->if_mtu = mtu;
2001 			PORT_LOCK(p);
2002 			cxgb_update_mac_settings(p);
2003 			PORT_UNLOCK(p);
2004 		}
2005 		ADAPTER_UNLOCK(sc);
2006 		break;
2007 	case SIOCSIFFLAGS:
2008 		ADAPTER_LOCK(sc);
2009 		if (IS_DOOMED(p)) {
2010 			error = ENXIO;
2011 			goto fail;
2012 		}
2013 		if (ifp->if_flags & IFF_UP) {
2014 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2015 				flags = p->if_flags;
2016 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2017 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2018 					if (IS_BUSY(sc)) {
2019 						error = EBUSY;
2020 						goto fail;
2021 					}
2022 					PORT_LOCK(p);
2023 					cxgb_update_mac_settings(p);
2024 					PORT_UNLOCK(p);
2025 				}
2026 				ADAPTER_UNLOCK(sc);
2027 			} else
2028 				error = cxgb_init_locked(p);
2029 			p->if_flags = ifp->if_flags;
2030 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2031 			error = cxgb_uninit_locked(p);
2032 		else
2033 			ADAPTER_UNLOCK(sc);
2034 
2035 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2036 		break;
2037 	case SIOCADDMULTI:
2038 	case SIOCDELMULTI:
2039 		ADAPTER_LOCK(sc);
2040 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2041 		if (error)
2042 			goto fail;
2043 
2044 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2045 			PORT_LOCK(p);
2046 			cxgb_update_mac_settings(p);
2047 			PORT_UNLOCK(p);
2048 		}
2049 		ADAPTER_UNLOCK(sc);
2050 
2051 		break;
2052 	case SIOCSIFCAP:
2053 		ADAPTER_LOCK(sc);
2054 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2055 		if (error)
2056 			goto fail;
2057 
2058 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2059 		if (mask & IFCAP_TXCSUM) {
2060 			ifp->if_capenable ^= IFCAP_TXCSUM;
2061 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2062 
2063 			if (IFCAP_TSO & ifp->if_capenable &&
2064 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2065 				ifp->if_capenable &= ~IFCAP_TSO;
2066 				ifp->if_hwassist &= ~CSUM_TSO;
2067 				if_printf(ifp,
2068 				    "tso disabled due to -txcsum.\n");
2069 			}
2070 		}
2071 		if (mask & IFCAP_RXCSUM)
2072 			ifp->if_capenable ^= IFCAP_RXCSUM;
2073 		if (mask & IFCAP_TSO4) {
2074 			ifp->if_capenable ^= IFCAP_TSO4;
2075 
2076 			if (IFCAP_TSO & ifp->if_capenable) {
2077 				if (IFCAP_TXCSUM & ifp->if_capenable)
2078 					ifp->if_hwassist |= CSUM_TSO;
2079 				else {
2080 					ifp->if_capenable &= ~IFCAP_TSO;
2081 					ifp->if_hwassist &= ~CSUM_TSO;
2082 					if_printf(ifp,
2083 					    "enable txcsum first.\n");
2084 					error = EAGAIN;
2085 				}
2086 			} else
2087 				ifp->if_hwassist &= ~CSUM_TSO;
2088 		}
2089 		if (mask & IFCAP_LRO) {
2090 			ifp->if_capenable ^= IFCAP_LRO;
2091 
2092 			/* Safe to do this even if cxgb_up not called yet */
2093 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2094 		}
2095 		if (mask & IFCAP_VLAN_HWTAGGING) {
2096 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2097 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2098 				PORT_LOCK(p);
2099 				cxgb_update_mac_settings(p);
2100 				PORT_UNLOCK(p);
2101 			}
2102 		}
2103 		if (mask & IFCAP_VLAN_MTU) {
2104 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2105 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2106 				PORT_LOCK(p);
2107 				cxgb_update_mac_settings(p);
2108 				PORT_UNLOCK(p);
2109 			}
2110 		}
2111 		if (mask & IFCAP_VLAN_HWTSO)
2112 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2113 		if (mask & IFCAP_VLAN_HWCSUM)
2114 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2115 
2116 #ifdef VLAN_CAPABILITIES
2117 		VLAN_CAPABILITIES(ifp);
2118 #endif
2119 		ADAPTER_UNLOCK(sc);
2120 		break;
2121 	case SIOCSIFMEDIA:
2122 	case SIOCGIFMEDIA:
2123 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2124 		break;
2125 	default:
2126 		error = ether_ioctl(ifp, command, data);
2127 	}
2128 
2129 	return (error);
2130 }
2131 
2132 static int
2133 cxgb_media_change(struct ifnet *ifp)
2134 {
2135 	return (EOPNOTSUPP);
2136 }
2137 
2138 /*
2139  * Translates phy->modtype to the correct Ethernet media subtype.
2140  */
2141 static int
2142 cxgb_ifm_type(int mod)
2143 {
2144 	switch (mod) {
2145 	case phy_modtype_sr:
2146 		return (IFM_10G_SR);
2147 	case phy_modtype_lr:
2148 		return (IFM_10G_LR);
2149 	case phy_modtype_lrm:
2150 		return (IFM_10G_LRM);
2151 	case phy_modtype_twinax:
2152 		return (IFM_10G_TWINAX);
2153 	case phy_modtype_twinax_long:
2154 		return (IFM_10G_TWINAX_LONG);
2155 	case phy_modtype_none:
2156 		return (IFM_NONE);
2157 	case phy_modtype_unknown:
2158 		return (IFM_UNKNOWN);
2159 	}
2160 
2161 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2162 	return (IFM_UNKNOWN);
2163 }
2164 
2165 /*
2166  * Rebuilds the ifmedia list for this port, and sets the current media.
2167  */
2168 static void
2169 cxgb_build_medialist(struct port_info *p)
2170 {
2171 	struct cphy *phy = &p->phy;
2172 	struct ifmedia *media = &p->media;
2173 	int mod = phy->modtype;
2174 	int m = IFM_ETHER | IFM_FDX;
2175 
2176 	PORT_LOCK(p);
2177 
2178 	ifmedia_removeall(media);
2179 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2180 		/* Copper (RJ45) */
2181 
2182 		if (phy->caps & SUPPORTED_10000baseT_Full)
2183 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2184 
2185 		if (phy->caps & SUPPORTED_1000baseT_Full)
2186 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2187 
2188 		if (phy->caps & SUPPORTED_100baseT_Full)
2189 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2190 
2191 		if (phy->caps & SUPPORTED_10baseT_Full)
2192 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2193 
2194 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2195 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2196 
2197 	} else if (phy->caps & SUPPORTED_TP) {
2198 		/* Copper (CX4) */
2199 
2200 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2201 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2202 
2203 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2204 		ifmedia_set(media, m | IFM_10G_CX4);
2205 
2206 	} else if (phy->caps & SUPPORTED_FIBRE &&
2207 		   phy->caps & SUPPORTED_10000baseT_Full) {
2208 		/* 10G optical (but includes SFP+ twinax) */
2209 
2210 		m |= cxgb_ifm_type(mod);
2211 		if (IFM_SUBTYPE(m) == IFM_NONE)
2212 			m &= ~IFM_FDX;
2213 
2214 		ifmedia_add(media, m, mod, NULL);
2215 		ifmedia_set(media, m);
2216 
2217 	} else if (phy->caps & SUPPORTED_FIBRE &&
2218 		   phy->caps & SUPPORTED_1000baseT_Full) {
2219 		/* 1G optical */
2220 
2221 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2222 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2223 		ifmedia_set(media, m | IFM_1000_SX);
2224 
2225 	} else {
2226 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2227 			    phy->caps));
2228 	}
2229 
2230 	PORT_UNLOCK(p);
2231 }
2232 
2233 static void
2234 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2235 {
2236 	struct port_info *p = ifp->if_softc;
2237 	struct ifmedia_entry *cur = p->media.ifm_cur;
2238 	int speed = p->link_config.speed;
2239 
2240 	if (cur->ifm_data != p->phy.modtype) {
2241 		cxgb_build_medialist(p);
2242 		cur = p->media.ifm_cur;
2243 	}
2244 
2245 	ifmr->ifm_status = IFM_AVALID;
2246 	if (!p->link_config.link_ok)
2247 		return;
2248 
2249 	ifmr->ifm_status |= IFM_ACTIVE;
2250 
2251 	/*
2252 	 * active and current will differ iff current media is autoselect.  That
2253 	 * can happen only for copper RJ45.
2254 	 */
2255 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2256 		return;
2257 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2258 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2259 
2260 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2261 	if (speed == SPEED_10000)
2262 		ifmr->ifm_active |= IFM_10G_T;
2263 	else if (speed == SPEED_1000)
2264 		ifmr->ifm_active |= IFM_1000_T;
2265 	else if (speed == SPEED_100)
2266 		ifmr->ifm_active |= IFM_100_TX;
2267 	else if (speed == SPEED_10)
2268 		ifmr->ifm_active |= IFM_10_T;
2269 	else
2270 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2271 			    speed));
2272 }
2273 
2274 static void
2275 cxgb_async_intr(void *data)
2276 {
2277 	adapter_t *sc = data;
2278 
2279 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2280 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2281 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2282 }
2283 
2284 static void
2285 link_check_callout(void *arg)
2286 {
2287 	struct port_info *pi = arg;
2288 	struct adapter *sc = pi->adapter;
2289 
2290 	if (!isset(&sc->open_device_map, pi->port_id))
2291 		return;
2292 
2293 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2294 }
2295 
2296 static void
2297 check_link_status(void *arg, int pending)
2298 {
2299 	struct port_info *pi = arg;
2300 	struct adapter *sc = pi->adapter;
2301 
2302 	if (!isset(&sc->open_device_map, pi->port_id))
2303 		return;
2304 
2305 	t3_link_changed(sc, pi->port_id);
2306 
2307 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2308 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2309 }
2310 
2311 void
2312 t3_os_link_intr(struct port_info *pi)
2313 {
2314 	/*
2315 	 * Schedule a link check in the near future.  If the link is flapping
2316 	 * rapidly we'll keep resetting the callout and delaying the check until
2317 	 * things stabilize a bit.
2318 	 */
2319 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2320 }
2321 
2322 static void
2323 check_t3b2_mac(struct adapter *sc)
2324 {
2325 	int i;
2326 
2327 	if (sc->flags & CXGB_SHUTDOWN)
2328 		return;
2329 
2330 	for_each_port(sc, i) {
2331 		struct port_info *p = &sc->port[i];
2332 		int status;
2333 #ifdef INVARIANTS
2334 		struct ifnet *ifp = p->ifp;
2335 #endif
2336 
2337 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2338 		    !p->link_config.link_ok)
2339 			continue;
2340 
2341 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2342 			("%s: state mismatch (drv_flags %x, device_map %x)",
2343 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2344 
2345 		PORT_LOCK(p);
2346 		status = t3b2_mac_watchdog_task(&p->mac);
2347 		if (status == 1)
2348 			p->mac.stats.num_toggled++;
2349 		else if (status == 2) {
2350 			struct cmac *mac = &p->mac;
2351 
2352 			cxgb_update_mac_settings(p);
2353 			t3_link_start(&p->phy, mac, &p->link_config);
2354 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2355 			t3_port_intr_enable(sc, p->port_id);
2356 			p->mac.stats.num_resets++;
2357 		}
2358 		PORT_UNLOCK(p);
2359 	}
2360 }
2361 
2362 static void
2363 cxgb_tick(void *arg)
2364 {
2365 	adapter_t *sc = (adapter_t *)arg;
2366 
2367 	if (sc->flags & CXGB_SHUTDOWN)
2368 		return;
2369 
2370 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2371 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2372 }
2373 
2374 static void
2375 cxgb_tick_handler(void *arg, int count)
2376 {
2377 	adapter_t *sc = (adapter_t *)arg;
2378 	const struct adapter_params *p = &sc->params;
2379 	int i;
2380 	uint32_t cause, reset;
2381 
2382 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2383 		return;
2384 
2385 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2386 		check_t3b2_mac(sc);
2387 
2388 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2389 	if (cause) {
2390 		struct sge_qset *qs = &sc->sge.qs[0];
2391 		uint32_t mask, v;
2392 
2393 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2394 
2395 		mask = 1;
2396 		for (i = 0; i < SGE_QSETS; i++) {
2397 			if (v & mask)
2398 				qs[i].rspq.starved++;
2399 			mask <<= 1;
2400 		}
2401 
2402 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2403 
2404 		for (i = 0; i < SGE_QSETS * 2; i++) {
2405 			if (v & mask) {
2406 				qs[i / 2].fl[i % 2].empty++;
2407 			}
2408 			mask <<= 1;
2409 		}
2410 
2411 		/* clear */
2412 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2413 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2414 	}
2415 
2416 	for (i = 0; i < sc->params.nports; i++) {
2417 		struct port_info *pi = &sc->port[i];
2418 		struct ifnet *ifp = pi->ifp;
2419 		struct cmac *mac = &pi->mac;
2420 		struct mac_stats *mstats = &mac->stats;
2421 		int drops, j;
2422 
2423 		if (!isset(&sc->open_device_map, pi->port_id))
2424 			continue;
2425 
2426 		PORT_LOCK(pi);
2427 		t3_mac_update_stats(mac);
2428 		PORT_UNLOCK(pi);
2429 
2430 		ifp->if_opackets = mstats->tx_frames;
2431 		ifp->if_ipackets = mstats->rx_frames;
2432 		ifp->if_obytes = mstats->tx_octets;
2433 		ifp->if_ibytes = mstats->rx_octets;
2434 		ifp->if_omcasts = mstats->tx_mcast_frames;
2435 		ifp->if_imcasts = mstats->rx_mcast_frames;
2436 		ifp->if_collisions = mstats->tx_total_collisions;
2437 		ifp->if_iqdrops = mstats->rx_cong_drops;
2438 
2439 		drops = 0;
2440 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2441 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2442 		ifp->if_snd.ifq_drops = drops;
2443 
2444 		ifp->if_oerrors =
2445 		    mstats->tx_excess_collisions +
2446 		    mstats->tx_underrun +
2447 		    mstats->tx_len_errs +
2448 		    mstats->tx_mac_internal_errs +
2449 		    mstats->tx_excess_deferral +
2450 		    mstats->tx_fcs_errs;
2451 		ifp->if_ierrors =
2452 		    mstats->rx_jabber +
2453 		    mstats->rx_data_errs +
2454 		    mstats->rx_sequence_errs +
2455 		    mstats->rx_runt +
2456 		    mstats->rx_too_long +
2457 		    mstats->rx_mac_internal_errs +
2458 		    mstats->rx_short +
2459 		    mstats->rx_fcs_errs;
2460 
2461 		if (mac->multiport)
2462 			continue;
2463 
2464 		/* Count rx fifo overflows, once per second */
2465 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2466 		reset = 0;
2467 		if (cause & F_RXFIFO_OVERFLOW) {
2468 			mac->stats.rx_fifo_ovfl++;
2469 			reset |= F_RXFIFO_OVERFLOW;
2470 		}
2471 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2472 	}
2473 }
2474 
2475 static void
2476 touch_bars(device_t dev)
2477 {
2478 	/*
2479 	 * Don't enable yet
2480 	 */
2481 #if !defined(__LP64__) && 0
2482 	u32 v;
2483 
2484 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2485 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2486 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2487 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2488 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2489 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2490 #endif
2491 }
2492 
2493 static int
2494 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2495 {
2496 	uint8_t *buf;
2497 	int err = 0;
2498 	u32 aligned_offset, aligned_len, *p;
2499 	struct adapter *adapter = pi->adapter;
2500 
2501 
2502 	aligned_offset = offset & ~3;
2503 	aligned_len = (len + (offset & 3) + 3) & ~3;
2504 
2505 	if (aligned_offset != offset || aligned_len != len) {
2506 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2507 		if (!buf)
2508 			return (ENOMEM);
2509 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2510 		if (!err && aligned_len > 4)
2511 			err = t3_seeprom_read(adapter,
2512 					      aligned_offset + aligned_len - 4,
2513 					      (u32 *)&buf[aligned_len - 4]);
2514 		if (err)
2515 			goto out;
2516 		memcpy(buf + (offset & 3), data, len);
2517 	} else
2518 		buf = (uint8_t *)(uintptr_t)data;
2519 
2520 	err = t3_seeprom_wp(adapter, 0);
2521 	if (err)
2522 		goto out;
2523 
2524 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2525 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2526 		aligned_offset += 4;
2527 	}
2528 
2529 	if (!err)
2530 		err = t3_seeprom_wp(adapter, 1);
2531 out:
2532 	if (buf != data)
2533 		free(buf, M_DEVBUF);
2534 	return err;
2535 }
2536 
2537 
2538 static int
2539 in_range(int val, int lo, int hi)
2540 {
2541 	return val < 0 || (val <= hi && val >= lo);
2542 }
2543 
2544 static int
2545 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2546 {
2547        return (0);
2548 }
2549 
2550 static int
2551 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2552 {
2553        return (0);
2554 }
2555 
2556 static int
2557 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2558     int fflag, struct thread *td)
2559 {
2560 	int mmd, error = 0;
2561 	struct port_info *pi = dev->si_drv1;
2562 	adapter_t *sc = pi->adapter;
2563 
2564 #ifdef PRIV_SUPPORTED
2565 	if (priv_check(td, PRIV_DRIVER)) {
2566 		if (cxgb_debug)
2567 			printf("user does not have access to privileged ioctls\n");
2568 		return (EPERM);
2569 	}
2570 #else
2571 	if (suser(td)) {
2572 		if (cxgb_debug)
2573 			printf("user does not have access to privileged ioctls\n");
2574 		return (EPERM);
2575 	}
2576 #endif
2577 
2578 	switch (cmd) {
2579 	case CHELSIO_GET_MIIREG: {
2580 		uint32_t val;
2581 		struct cphy *phy = &pi->phy;
2582 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2583 
2584 		if (!phy->mdio_read)
2585 			return (EOPNOTSUPP);
2586 		if (is_10G(sc)) {
2587 			mmd = mid->phy_id >> 8;
2588 			if (!mmd)
2589 				mmd = MDIO_DEV_PCS;
2590 			else if (mmd > MDIO_DEV_VEND2)
2591 				return (EINVAL);
2592 
2593 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2594 					     mid->reg_num, &val);
2595 		} else
2596 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2597 					     mid->reg_num & 0x1f, &val);
2598 		if (error == 0)
2599 			mid->val_out = val;
2600 		break;
2601 	}
2602 	case CHELSIO_SET_MIIREG: {
2603 		struct cphy *phy = &pi->phy;
2604 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2605 
2606 		if (!phy->mdio_write)
2607 			return (EOPNOTSUPP);
2608 		if (is_10G(sc)) {
2609 			mmd = mid->phy_id >> 8;
2610 			if (!mmd)
2611 				mmd = MDIO_DEV_PCS;
2612 			else if (mmd > MDIO_DEV_VEND2)
2613 				return (EINVAL);
2614 
2615 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2616 					      mmd, mid->reg_num, mid->val_in);
2617 		} else
2618 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2619 					      mid->reg_num & 0x1f,
2620 					      mid->val_in);
2621 		break;
2622 	}
2623 	case CHELSIO_SETREG: {
2624 		struct ch_reg *edata = (struct ch_reg *)data;
2625 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2626 			return (EFAULT);
2627 		t3_write_reg(sc, edata->addr, edata->val);
2628 		break;
2629 	}
2630 	case CHELSIO_GETREG: {
2631 		struct ch_reg *edata = (struct ch_reg *)data;
2632 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2633 			return (EFAULT);
2634 		edata->val = t3_read_reg(sc, edata->addr);
2635 		break;
2636 	}
2637 	case CHELSIO_GET_SGE_CONTEXT: {
2638 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2639 		mtx_lock_spin(&sc->sge.reg_lock);
2640 		switch (ecntxt->cntxt_type) {
2641 		case CNTXT_TYPE_EGRESS:
2642 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2643 			    ecntxt->data);
2644 			break;
2645 		case CNTXT_TYPE_FL:
2646 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2647 			    ecntxt->data);
2648 			break;
2649 		case CNTXT_TYPE_RSP:
2650 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2651 			    ecntxt->data);
2652 			break;
2653 		case CNTXT_TYPE_CQ:
2654 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2655 			    ecntxt->data);
2656 			break;
2657 		default:
2658 			error = EINVAL;
2659 			break;
2660 		}
2661 		mtx_unlock_spin(&sc->sge.reg_lock);
2662 		break;
2663 	}
2664 	case CHELSIO_GET_SGE_DESC: {
2665 		struct ch_desc *edesc = (struct ch_desc *)data;
2666 		int ret;
2667 		if (edesc->queue_num >= SGE_QSETS * 6)
2668 			return (EINVAL);
2669 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2670 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2671 		if (ret < 0)
2672 			return (EINVAL);
2673 		edesc->size = ret;
2674 		break;
2675 	}
2676 	case CHELSIO_GET_QSET_PARAMS: {
2677 		struct qset_params *q;
2678 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2679 		int q1 = pi->first_qset;
2680 		int nqsets = pi->nqsets;
2681 		int i;
2682 
2683 		if (t->qset_idx >= nqsets)
2684 			return EINVAL;
2685 
2686 		i = q1 + t->qset_idx;
2687 		q = &sc->params.sge.qset[i];
2688 		t->rspq_size   = q->rspq_size;
2689 		t->txq_size[0] = q->txq_size[0];
2690 		t->txq_size[1] = q->txq_size[1];
2691 		t->txq_size[2] = q->txq_size[2];
2692 		t->fl_size[0]  = q->fl_size;
2693 		t->fl_size[1]  = q->jumbo_size;
2694 		t->polling     = q->polling;
2695 		t->lro         = q->lro;
2696 		t->intr_lat    = q->coalesce_usecs;
2697 		t->cong_thres  = q->cong_thres;
2698 		t->qnum        = i;
2699 
2700 		if ((sc->flags & FULL_INIT_DONE) == 0)
2701 			t->vector = 0;
2702 		else if (sc->flags & USING_MSIX)
2703 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2704 		else
2705 			t->vector = rman_get_start(sc->irq_res);
2706 
2707 		break;
2708 	}
2709 	case CHELSIO_GET_QSET_NUM: {
2710 		struct ch_reg *edata = (struct ch_reg *)data;
2711 		edata->val = pi->nqsets;
2712 		break;
2713 	}
2714 	case CHELSIO_LOAD_FW: {
2715 		uint8_t *fw_data;
2716 		uint32_t vers;
2717 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2718 
2719 		/*
2720 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2721 		 *
2722 		 * FW_UPTODATE is also set so the rest of the initialization
2723 		 * will not overwrite what was loaded here.  This gives you the
2724 		 * flexibility to load any firmware (and maybe shoot yourself in
2725 		 * the foot).
2726 		 */
2727 
2728 		ADAPTER_LOCK(sc);
2729 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2730 			ADAPTER_UNLOCK(sc);
2731 			return (EBUSY);
2732 		}
2733 
2734 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2735 		if (!fw_data)
2736 			error = ENOMEM;
2737 		else
2738 			error = copyin(t->buf, fw_data, t->len);
2739 
2740 		if (!error)
2741 			error = -t3_load_fw(sc, fw_data, t->len);
2742 
2743 		if (t3_get_fw_version(sc, &vers) == 0) {
2744 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2745 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2746 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2747 		}
2748 
2749 		if (!error)
2750 			sc->flags |= FW_UPTODATE;
2751 
2752 		free(fw_data, M_DEVBUF);
2753 		ADAPTER_UNLOCK(sc);
2754 		break;
2755 	}
2756 	case CHELSIO_LOAD_BOOT: {
2757 		uint8_t *boot_data;
2758 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2759 
2760 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2761 		if (!boot_data)
2762 			return ENOMEM;
2763 
2764 		error = copyin(t->buf, boot_data, t->len);
2765 		if (!error)
2766 			error = -t3_load_boot(sc, boot_data, t->len);
2767 
2768 		free(boot_data, M_DEVBUF);
2769 		break;
2770 	}
2771 	case CHELSIO_GET_PM: {
2772 		struct ch_pm *m = (struct ch_pm *)data;
2773 		struct tp_params *p = &sc->params.tp;
2774 
2775 		if (!is_offload(sc))
2776 			return (EOPNOTSUPP);
2777 
2778 		m->tx_pg_sz = p->tx_pg_size;
2779 		m->tx_num_pg = p->tx_num_pgs;
2780 		m->rx_pg_sz  = p->rx_pg_size;
2781 		m->rx_num_pg = p->rx_num_pgs;
2782 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2783 
2784 		break;
2785 	}
2786 	case CHELSIO_SET_PM: {
2787 		struct ch_pm *m = (struct ch_pm *)data;
2788 		struct tp_params *p = &sc->params.tp;
2789 
2790 		if (!is_offload(sc))
2791 			return (EOPNOTSUPP);
2792 		if (sc->flags & FULL_INIT_DONE)
2793 			return (EBUSY);
2794 
2795 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2796 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2797 			return (EINVAL);	/* not power of 2 */
2798 		if (!(m->rx_pg_sz & 0x14000))
2799 			return (EINVAL);	/* not 16KB or 64KB */
2800 		if (!(m->tx_pg_sz & 0x1554000))
2801 			return (EINVAL);
2802 		if (m->tx_num_pg == -1)
2803 			m->tx_num_pg = p->tx_num_pgs;
2804 		if (m->rx_num_pg == -1)
2805 			m->rx_num_pg = p->rx_num_pgs;
2806 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2807 			return (EINVAL);
2808 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2809 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2810 			return (EINVAL);
2811 
2812 		p->rx_pg_size = m->rx_pg_sz;
2813 		p->tx_pg_size = m->tx_pg_sz;
2814 		p->rx_num_pgs = m->rx_num_pg;
2815 		p->tx_num_pgs = m->tx_num_pg;
2816 		break;
2817 	}
2818 	case CHELSIO_SETMTUTAB: {
2819 		struct ch_mtus *m = (struct ch_mtus *)data;
2820 		int i;
2821 
2822 		if (!is_offload(sc))
2823 			return (EOPNOTSUPP);
2824 		if (offload_running(sc))
2825 			return (EBUSY);
2826 		if (m->nmtus != NMTUS)
2827 			return (EINVAL);
2828 		if (m->mtus[0] < 81)         /* accommodate SACK */
2829 			return (EINVAL);
2830 
2831 		/*
2832 		 * MTUs must be in ascending order
2833 		 */
2834 		for (i = 1; i < NMTUS; ++i)
2835 			if (m->mtus[i] < m->mtus[i - 1])
2836 				return (EINVAL);
2837 
2838 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2839 		break;
2840 	}
2841 	case CHELSIO_GETMTUTAB: {
2842 		struct ch_mtus *m = (struct ch_mtus *)data;
2843 
2844 		if (!is_offload(sc))
2845 			return (EOPNOTSUPP);
2846 
2847 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2848 		m->nmtus = NMTUS;
2849 		break;
2850 	}
2851 	case CHELSIO_GET_MEM: {
2852 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2853 		struct mc7 *mem;
2854 		uint8_t *useraddr;
2855 		u64 buf[32];
2856 
2857 		/*
2858 		 * Use these to avoid modifying len/addr in the return
2859 		 * struct
2860 		 */
2861 		uint32_t len = t->len, addr = t->addr;
2862 
2863 		if (!is_offload(sc))
2864 			return (EOPNOTSUPP);
2865 		if (!(sc->flags & FULL_INIT_DONE))
2866 			return (EIO);         /* need the memory controllers */
2867 		if ((addr & 0x7) || (len & 0x7))
2868 			return (EINVAL);
2869 		if (t->mem_id == MEM_CM)
2870 			mem = &sc->cm;
2871 		else if (t->mem_id == MEM_PMRX)
2872 			mem = &sc->pmrx;
2873 		else if (t->mem_id == MEM_PMTX)
2874 			mem = &sc->pmtx;
2875 		else
2876 			return (EINVAL);
2877 
2878 		/*
2879 		 * Version scheme:
2880 		 * bits 0..9: chip version
2881 		 * bits 10..15: chip revision
2882 		 */
2883 		t->version = 3 | (sc->params.rev << 10);
2884 
2885 		/*
2886 		 * Read 256 bytes at a time as len can be large and we don't
2887 		 * want to use huge intermediate buffers.
2888 		 */
2889 		useraddr = (uint8_t *)t->buf;
2890 		while (len) {
2891 			unsigned int chunk = min(len, sizeof(buf));
2892 
2893 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2894 			if (error)
2895 				return (-error);
2896 			if (copyout(buf, useraddr, chunk))
2897 				return (EFAULT);
2898 			useraddr += chunk;
2899 			addr += chunk;
2900 			len -= chunk;
2901 		}
2902 		break;
2903 	}
2904 	case CHELSIO_READ_TCAM_WORD: {
2905 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2906 
2907 		if (!is_offload(sc))
2908 			return (EOPNOTSUPP);
2909 		if (!(sc->flags & FULL_INIT_DONE))
2910 			return (EIO);         /* need MC5 */
2911 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2912 		break;
2913 	}
2914 	case CHELSIO_SET_TRACE_FILTER: {
2915 		struct ch_trace *t = (struct ch_trace *)data;
2916 		const struct trace_params *tp;
2917 
2918 		tp = (const struct trace_params *)&t->sip;
2919 		if (t->config_tx)
2920 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2921 					       t->trace_tx);
2922 		if (t->config_rx)
2923 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2924 					       t->trace_rx);
2925 		break;
2926 	}
2927 	case CHELSIO_SET_PKTSCHED: {
2928 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2929 		if (sc->open_device_map == 0)
2930 			return (EAGAIN);
2931 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2932 		    p->binding);
2933 		break;
2934 	}
2935 	case CHELSIO_IFCONF_GETREGS: {
2936 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2937 		int reglen = cxgb_get_regs_len();
2938 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2939 		if (buf == NULL) {
2940 			return (ENOMEM);
2941 		}
2942 		if (regs->len > reglen)
2943 			regs->len = reglen;
2944 		else if (regs->len < reglen)
2945 			error = ENOBUFS;
2946 
2947 		if (!error) {
2948 			cxgb_get_regs(sc, regs, buf);
2949 			error = copyout(buf, regs->data, reglen);
2950 		}
2951 		free(buf, M_DEVBUF);
2952 
2953 		break;
2954 	}
2955 	case CHELSIO_SET_HW_SCHED: {
2956 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2957 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2958 
2959 		if ((sc->flags & FULL_INIT_DONE) == 0)
2960 			return (EAGAIN);       /* need TP to be initialized */
2961 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2962 		    !in_range(t->channel, 0, 1) ||
2963 		    !in_range(t->kbps, 0, 10000000) ||
2964 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2965 		    !in_range(t->flow_ipg, 0,
2966 			      dack_ticks_to_usec(sc, 0x7ff)))
2967 			return (EINVAL);
2968 
2969 		if (t->kbps >= 0) {
2970 			error = t3_config_sched(sc, t->kbps, t->sched);
2971 			if (error < 0)
2972 				return (-error);
2973 		}
2974 		if (t->class_ipg >= 0)
2975 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2976 		if (t->flow_ipg >= 0) {
2977 			t->flow_ipg *= 1000;     /* us -> ns */
2978 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2979 		}
2980 		if (t->mode >= 0) {
2981 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2982 
2983 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2984 					 bit, t->mode ? bit : 0);
2985 		}
2986 		if (t->channel >= 0)
2987 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2988 					 1 << t->sched, t->channel << t->sched);
2989 		break;
2990 	}
2991 	case CHELSIO_GET_EEPROM: {
2992 		int i;
2993 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2994 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2995 
2996 		if (buf == NULL) {
2997 			return (ENOMEM);
2998 		}
2999 		e->magic = EEPROM_MAGIC;
3000 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3001 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3002 
3003 		if (!error)
3004 			error = copyout(buf + e->offset, e->data, e->len);
3005 
3006 		free(buf, M_DEVBUF);
3007 		break;
3008 	}
3009 	case CHELSIO_CLEAR_STATS: {
3010 		if (!(sc->flags & FULL_INIT_DONE))
3011 			return EAGAIN;
3012 
3013 		PORT_LOCK(pi);
3014 		t3_mac_update_stats(&pi->mac);
3015 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3016 		PORT_UNLOCK(pi);
3017 		break;
3018 	}
3019 	case CHELSIO_GET_UP_LA: {
3020 		struct ch_up_la *la = (struct ch_up_la *)data;
3021 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3022 		if (buf == NULL) {
3023 			return (ENOMEM);
3024 		}
3025 		if (la->bufsize < LA_BUFSIZE)
3026 			error = ENOBUFS;
3027 
3028 		if (!error)
3029 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3030 					      &la->bufsize, buf);
3031 		if (!error)
3032 			error = copyout(buf, la->data, la->bufsize);
3033 
3034 		free(buf, M_DEVBUF);
3035 		break;
3036 	}
3037 	case CHELSIO_GET_UP_IOQS: {
3038 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3039 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3040 		uint32_t *v;
3041 
3042 		if (buf == NULL) {
3043 			return (ENOMEM);
3044 		}
3045 		if (ioqs->bufsize < IOQS_BUFSIZE)
3046 			error = ENOBUFS;
3047 
3048 		if (!error)
3049 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3050 
3051 		if (!error) {
3052 			v = (uint32_t *)buf;
3053 
3054 			ioqs->ioq_rx_enable = *v++;
3055 			ioqs->ioq_tx_enable = *v++;
3056 			ioqs->ioq_rx_status = *v++;
3057 			ioqs->ioq_tx_status = *v++;
3058 
3059 			error = copyout(v, ioqs->data, ioqs->bufsize);
3060 		}
3061 
3062 		free(buf, M_DEVBUF);
3063 		break;
3064 	}
3065 	case CHELSIO_SET_FILTER: {
3066 		struct ch_filter *f = (struct ch_filter *)data;;
3067 		struct filter_info *p;
3068 		unsigned int nfilters = sc->params.mc5.nfilters;
3069 
3070 		if (!is_offload(sc))
3071 			return (EOPNOTSUPP);	/* No TCAM */
3072 		if (!(sc->flags & FULL_INIT_DONE))
3073 			return (EAGAIN);	/* mc5 not setup yet */
3074 		if (nfilters == 0)
3075 			return (EBUSY);		/* TOE will use TCAM */
3076 
3077 		/* sanity checks */
3078 		if (f->filter_id >= nfilters ||
3079 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3080 		    (f->val.sport && f->mask.sport != 0xffff) ||
3081 		    (f->val.dport && f->mask.dport != 0xffff) ||
3082 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3083 		    (f->val.vlan_prio &&
3084 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3085 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3086 		    f->qset >= SGE_QSETS ||
3087 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3088 			return (EINVAL);
3089 
3090 		/* Was allocated with M_WAITOK */
3091 		KASSERT(sc->filters, ("filter table NULL\n"));
3092 
3093 		p = &sc->filters[f->filter_id];
3094 		if (p->locked)
3095 			return (EPERM);
3096 
3097 		bzero(p, sizeof(*p));
3098 		p->sip = f->val.sip;
3099 		p->sip_mask = f->mask.sip;
3100 		p->dip = f->val.dip;
3101 		p->sport = f->val.sport;
3102 		p->dport = f->val.dport;
3103 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3104 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3105 		    FILTER_NO_VLAN_PRI;
3106 		p->mac_hit = f->mac_hit;
3107 		p->mac_vld = f->mac_addr_idx != 0xffff;
3108 		p->mac_idx = f->mac_addr_idx;
3109 		p->pkt_type = f->proto;
3110 		p->report_filter_id = f->want_filter_id;
3111 		p->pass = f->pass;
3112 		p->rss = f->rss;
3113 		p->qset = f->qset;
3114 
3115 		error = set_filter(sc, f->filter_id, p);
3116 		if (error == 0)
3117 			p->valid = 1;
3118 		break;
3119 	}
3120 	case CHELSIO_DEL_FILTER: {
3121 		struct ch_filter *f = (struct ch_filter *)data;
3122 		struct filter_info *p;
3123 		unsigned int nfilters = sc->params.mc5.nfilters;
3124 
3125 		if (!is_offload(sc))
3126 			return (EOPNOTSUPP);
3127 		if (!(sc->flags & FULL_INIT_DONE))
3128 			return (EAGAIN);
3129 		if (nfilters == 0 || sc->filters == NULL)
3130 			return (EINVAL);
3131 		if (f->filter_id >= nfilters)
3132 		       return (EINVAL);
3133 
3134 		p = &sc->filters[f->filter_id];
3135 		if (p->locked)
3136 			return (EPERM);
3137 		if (!p->valid)
3138 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3139 
3140 		bzero(p, sizeof(*p));
3141 		p->sip = p->sip_mask = 0xffffffff;
3142 		p->vlan = 0xfff;
3143 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3144 		p->pkt_type = 1;
3145 		error = set_filter(sc, f->filter_id, p);
3146 		break;
3147 	}
3148 	case CHELSIO_GET_FILTER: {
3149 		struct ch_filter *f = (struct ch_filter *)data;
3150 		struct filter_info *p;
3151 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3152 
3153 		if (!is_offload(sc))
3154 			return (EOPNOTSUPP);
3155 		if (!(sc->flags & FULL_INIT_DONE))
3156 			return (EAGAIN);
3157 		if (nfilters == 0 || sc->filters == NULL)
3158 			return (EINVAL);
3159 
3160 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3161 		for (; i < nfilters; i++) {
3162 			p = &sc->filters[i];
3163 			if (!p->valid)
3164 				continue;
3165 
3166 			bzero(f, sizeof(*f));
3167 
3168 			f->filter_id = i;
3169 			f->val.sip = p->sip;
3170 			f->mask.sip = p->sip_mask;
3171 			f->val.dip = p->dip;
3172 			f->mask.dip = p->dip ? 0xffffffff : 0;
3173 			f->val.sport = p->sport;
3174 			f->mask.sport = p->sport ? 0xffff : 0;
3175 			f->val.dport = p->dport;
3176 			f->mask.dport = p->dport ? 0xffff : 0;
3177 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3178 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3179 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3180 			    0 : p->vlan_prio;
3181 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3182 			    0 : FILTER_NO_VLAN_PRI;
3183 			f->mac_hit = p->mac_hit;
3184 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3185 			f->proto = p->pkt_type;
3186 			f->want_filter_id = p->report_filter_id;
3187 			f->pass = p->pass;
3188 			f->rss = p->rss;
3189 			f->qset = p->qset;
3190 
3191 			break;
3192 		}
3193 
3194 		if (i == nfilters)
3195 			f->filter_id = 0xffffffff;
3196 		break;
3197 	}
3198 	default:
3199 		return (EOPNOTSUPP);
3200 		break;
3201 	}
3202 
3203 	return (error);
3204 }
3205 
3206 static __inline void
3207 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3208     unsigned int end)
3209 {
3210 	uint32_t *p = (uint32_t *)(buf + start);
3211 
3212 	for ( ; start <= end; start += sizeof(uint32_t))
3213 		*p++ = t3_read_reg(ap, start);
3214 }
3215 
3216 #define T3_REGMAP_SIZE (3 * 1024)
3217 static int
3218 cxgb_get_regs_len(void)
3219 {
3220 	return T3_REGMAP_SIZE;
3221 }
3222 
3223 static void
3224 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3225 {
3226 
3227 	/*
3228 	 * Version scheme:
3229 	 * bits 0..9: chip version
3230 	 * bits 10..15: chip revision
3231 	 * bit 31: set for PCIe cards
3232 	 */
3233 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3234 
3235 	/*
3236 	 * We skip the MAC statistics registers because they are clear-on-read.
3237 	 * Also reading multi-register stats would need to synchronize with the
3238 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3239 	 */
3240 	memset(buf, 0, cxgb_get_regs_len());
3241 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3242 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3243 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3244 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3245 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3246 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3247 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3248 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3249 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3250 }
3251 
3252 static int
3253 alloc_filters(struct adapter *sc)
3254 {
3255 	struct filter_info *p;
3256 	unsigned int nfilters = sc->params.mc5.nfilters;
3257 
3258 	if (nfilters == 0)
3259 		return (0);
3260 
3261 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3262 	sc->filters = p;
3263 
3264 	p = &sc->filters[nfilters - 1];
3265 	p->vlan = 0xfff;
3266 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3267 	p->pass = p->rss = p->valid = p->locked = 1;
3268 
3269 	return (0);
3270 }
3271 
3272 static int
3273 setup_hw_filters(struct adapter *sc)
3274 {
3275 	int i, rc;
3276 	unsigned int nfilters = sc->params.mc5.nfilters;
3277 
3278 	if (!sc->filters)
3279 		return (0);
3280 
3281 	t3_enable_filters(sc);
3282 
3283 	for (i = rc = 0; i < nfilters && !rc; i++) {
3284 		if (sc->filters[i].locked)
3285 			rc = set_filter(sc, i, &sc->filters[i]);
3286 	}
3287 
3288 	return (rc);
3289 }
3290 
3291 static int
3292 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3293 {
3294 	int len;
3295 	struct mbuf *m;
3296 	struct ulp_txpkt *txpkt;
3297 	struct work_request_hdr *wr;
3298 	struct cpl_pass_open_req *oreq;
3299 	struct cpl_set_tcb_field *sreq;
3300 
3301 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3302 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3303 
3304 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3305 	      sc->params.mc5.nfilters;
3306 
3307 	m = m_gethdr(M_WAITOK, MT_DATA);
3308 	m->m_len = m->m_pkthdr.len = len;
3309 	bzero(mtod(m, char *), len);
3310 
3311 	wr = mtod(m, struct work_request_hdr *);
3312 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3313 
3314 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3315 	txpkt = (struct ulp_txpkt *)oreq;
3316 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3317 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3318 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3319 	oreq->local_port = htons(f->dport);
3320 	oreq->peer_port = htons(f->sport);
3321 	oreq->local_ip = htonl(f->dip);
3322 	oreq->peer_ip = htonl(f->sip);
3323 	oreq->peer_netmask = htonl(f->sip_mask);
3324 	oreq->opt0h = 0;
3325 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3326 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3327 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3328 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3329 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3330 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3331 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3332 
3333 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3334 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3335 			  (f->report_filter_id << 15) | (1 << 23) |
3336 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3337 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3338 	t3_mgmt_tx(sc, m);
3339 
3340 	if (f->pass && !f->rss) {
3341 		len = sizeof(*sreq);
3342 		m = m_gethdr(M_WAITOK, MT_DATA);
3343 		m->m_len = m->m_pkthdr.len = len;
3344 		bzero(mtod(m, char *), len);
3345 		sreq = mtod(m, struct cpl_set_tcb_field *);
3346 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3347 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3348 				 (u64)sc->rrss_map[f->qset] << 19);
3349 		t3_mgmt_tx(sc, m);
3350 	}
3351 	return 0;
3352 }
3353 
3354 static inline void
3355 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3356     unsigned int word, u64 mask, u64 val)
3357 {
3358 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3359 	req->reply = V_NO_REPLY(1);
3360 	req->cpu_idx = 0;
3361 	req->word = htons(word);
3362 	req->mask = htobe64(mask);
3363 	req->val = htobe64(val);
3364 }
3365 
3366 static inline void
3367 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3368     unsigned int word, u64 mask, u64 val)
3369 {
3370 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3371 
3372 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3373 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3374 	mk_set_tcb_field(req, tid, word, mask, val);
3375 }
3376