xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 35a04710d7286aa9538917fd7f8e417dbee95b82)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12 2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/rman.h>
44 #include <sys/ioccom.h>
45 #include <sys/mbuf.h>
46 #include <sys/linker.h>
47 #include <sys/firmware.h>
48 #include <sys/socket.h>
49 #include <sys/sockio.h>
50 #include <sys/smp.h>
51 #include <sys/sysctl.h>
52 #include <sys/queue.h>
53 #include <sys/taskqueue.h>
54 
55 #include <net/bpf.h>
56 #include <net/ethernet.h>
57 #include <net/if.h>
58 #include <net/if_arp.h>
59 #include <net/if_dl.h>
60 #include <net/if_media.h>
61 #include <net/if_types.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/if_ether.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip.h>
68 #include <netinet/tcp.h>
69 #include <netinet/udp.h>
70 
71 #include <dev/pci/pcireg.h>
72 #include <dev/pci/pcivar.h>
73 #include <dev/pci/pci_private.h>
74 
75 #ifdef CONFIG_DEFINED
76 #include <cxgb_include.h>
77 #else
78 #include <dev/cxgb/cxgb_include.h>
79 #endif
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_msix(adapter_t *, int);
86 static void cxgb_teardown_msix(adapter_t *);
87 static void cxgb_init(void *);
88 static void cxgb_init_locked(struct port_info *);
89 static void cxgb_stop_locked(struct port_info *);
90 static void cxgb_set_rxmode(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static void cxgb_start(struct ifnet *);
93 static void cxgb_start_proc(void *, int ncount);
94 static int cxgb_media_change(struct ifnet *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_tick_handler(void *, int);
100 static void cxgb_down_locked(struct adapter *sc);
101 static void cxgb_tick(void *);
102 static void setup_rss(adapter_t *sc);
103 
104 /* Attachment glue for the PCI controller end of the device.  Each port of
105  * the device is attached separately, as defined later.
106  */
107 static int cxgb_controller_probe(device_t);
108 static int cxgb_controller_attach(device_t);
109 static int cxgb_controller_detach(device_t);
110 static void cxgb_free(struct adapter *);
111 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
112     unsigned int end);
113 static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
114 static int cxgb_get_regs_len(void);
115 static int offload_open(struct port_info *pi);
116 static void touch_bars(device_t dev);
117 
118 #ifdef notyet
119 static int offload_close(struct toedev *tdev);
120 #endif
121 
122 
123 static device_method_t cxgb_controller_methods[] = {
124 	DEVMETHOD(device_probe,		cxgb_controller_probe),
125 	DEVMETHOD(device_attach,	cxgb_controller_attach),
126 	DEVMETHOD(device_detach,	cxgb_controller_detach),
127 
128 	/* bus interface */
129 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
130 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
131 
132 	{ 0, 0 }
133 };
134 
135 static driver_t cxgb_controller_driver = {
136 	"cxgbc",
137 	cxgb_controller_methods,
138 	sizeof(struct adapter)
139 };
140 
141 static devclass_t	cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143 
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151 
152 static device_method_t cxgb_port_methods[] = {
153 	DEVMETHOD(device_probe,		cxgb_port_probe),
154 	DEVMETHOD(device_attach,	cxgb_port_attach),
155 	DEVMETHOD(device_detach,	cxgb_port_detach),
156 	{ 0, 0 }
157 };
158 
159 static driver_t cxgb_port_driver = {
160 	"cxgb",
161 	cxgb_port_methods,
162 	0
163 };
164 
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168 
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177 
178 static devclass_t	cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180 
181 #define SGE_MSIX_COUNT (SGE_QSETS + 1)
182 
183 extern int collapse_mbufs;
184 /*
185  * The driver uses the best interrupt scheme available on a platform in the
186  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
187  * of these schemes the driver may consider as follows:
188  *
189  * msi = 2: choose from among all three options
190  * msi = 1 : only consider MSI and pin interrupts
191  * msi = 0: force pin interrupts
192  */
193 static int msi_allowed = 2;
194 
195 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
196 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
197 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
198     "MSI-X, MSI, INTx selector");
199 
200 /*
201  * The driver enables offload as a default.
202  * To disable it, use ofld_disable = 1.
203  */
204 static int ofld_disable = 0;
205 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
206 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
207     "disable ULP offload");
208 
209 /*
210  * The driver uses an auto-queue algorithm by default.
211  * To disable it and force a single queue-set per port, use singleq = 1.
212  */
213 static int singleq = 1;
214 TUNABLE_INT("hw.cxgb.singleq", &singleq);
215 SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
216     "use a single queue-set per port");
217 
218 enum {
219 	MAX_TXQ_ENTRIES      = 16384,
220 	MAX_CTRL_TXQ_ENTRIES = 1024,
221 	MAX_RSPQ_ENTRIES     = 16384,
222 	MAX_RX_BUFFERS       = 16384,
223 	MAX_RX_JUMBO_BUFFERS = 16384,
224 	MIN_TXQ_ENTRIES      = 4,
225 	MIN_CTRL_TXQ_ENTRIES = 4,
226 	MIN_RSPQ_ENTRIES     = 32,
227 	MIN_FL_ENTRIES       = 32,
228 	MIN_FL_JUMBO_ENTRIES = 32
229 };
230 
231 struct filter_info {
232 	u32 sip;
233 	u32 sip_mask;
234 	u32 dip;
235 	u16 sport;
236 	u16 dport;
237 	u32 vlan:12;
238 	u32 vlan_prio:3;
239 	u32 mac_hit:1;
240 	u32 mac_idx:4;
241 	u32 mac_vld:1;
242 	u32 pkt_type:2;
243 	u32 report_filter_id:1;
244 	u32 pass:1;
245 	u32 rss:1;
246 	u32 qset:3;
247 	u32 locked:1;
248 	u32 valid:1;
249 };
250 
251 enum { FILTER_NO_VLAN_PRI = 7 };
252 
253 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
254 
255 /* Table for probing the cards.  The desc field isn't actually used */
256 struct cxgb_ident {
257 	uint16_t	vendor;
258 	uint16_t	device;
259 	int		index;
260 	char		*desc;
261 } cxgb_identifiers[] = {
262 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
263 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
264 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
265 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
266 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
267 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
268 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
269 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
270 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
271 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
272 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
273 	{0, 0, 0, NULL}
274 };
275 
276 
277 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
278 
279 static inline char
280 t3rev2char(struct adapter *adapter)
281 {
282 	char rev = 'z';
283 
284 	switch(adapter->params.rev) {
285 	case T3_REV_A:
286 		rev = 'a';
287 		break;
288 	case T3_REV_B:
289 	case T3_REV_B2:
290 		rev = 'b';
291 		break;
292 	case T3_REV_C:
293 		rev = 'c';
294 		break;
295 	}
296 	return rev;
297 }
298 
299 static struct cxgb_ident *
300 cxgb_get_ident(device_t dev)
301 {
302 	struct cxgb_ident *id;
303 
304 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
305 		if ((id->vendor == pci_get_vendor(dev)) &&
306 		    (id->device == pci_get_device(dev))) {
307 			return (id);
308 		}
309 	}
310 	return (NULL);
311 }
312 
313 static const struct adapter_info *
314 cxgb_get_adapter_info(device_t dev)
315 {
316 	struct cxgb_ident *id;
317 	const struct adapter_info *ai;
318 
319 	id = cxgb_get_ident(dev);
320 	if (id == NULL)
321 		return (NULL);
322 
323 	ai = t3_get_adapter_info(id->index);
324 
325 	return (ai);
326 }
327 
328 static int
329 cxgb_controller_probe(device_t dev)
330 {
331 	const struct adapter_info *ai;
332 	char *ports, buf[80];
333 	int nports;
334 
335 	ai = cxgb_get_adapter_info(dev);
336 	if (ai == NULL)
337 		return (ENXIO);
338 
339 	nports = ai->nports0 + ai->nports1;
340 	if (nports == 1)
341 		ports = "port";
342 	else
343 		ports = "ports";
344 
345 	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
346 	device_set_desc_copy(dev, buf);
347 	return (BUS_PROBE_DEFAULT);
348 }
349 
350 #define FW_FNAME "t3fw%d%d%d"
351 #define TPEEPROM_NAME "t3%ctpe%d%d%d"
352 #define TPSRAM_NAME "t3%cps%d%d%d"
353 
354 static int
355 upgrade_fw(adapter_t *sc)
356 {
357 	char buf[32];
358 #ifdef FIRMWARE_LATEST
359 	const struct firmware *fw;
360 #else
361 	struct firmware *fw;
362 #endif
363 	int status;
364 
365 	snprintf(&buf[0], sizeof(buf), FW_FNAME,  FW_VERSION_MAJOR,
366 	    FW_VERSION_MINOR, FW_VERSION_MICRO);
367 
368 	fw = firmware_get(buf);
369 
370 	if (fw == NULL) {
371 		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
372 		return (ENOENT);
373 	} else
374 		device_printf(sc->dev, "updating firmware on card with %s\n", buf);
375 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
376 
377 	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
378 
379 	firmware_put(fw, FIRMWARE_UNLOAD);
380 
381 	return (status);
382 }
383 
384 static int
385 cxgb_controller_attach(device_t dev)
386 {
387 	device_t child;
388 	const struct adapter_info *ai;
389 	struct adapter *sc;
390 	int i, error = 0;
391 	uint32_t vers;
392 	int port_qsets = 1;
393 #ifdef MSI_SUPPORTED
394 	int msi_needed, reg;
395 #endif
396 	sc = device_get_softc(dev);
397 	sc->dev = dev;
398 	sc->msi_count = 0;
399 	ai = cxgb_get_adapter_info(dev);
400 
401 	/*
402 	 * XXX not really related but a recent addition
403 	 */
404 #ifdef MSI_SUPPORTED
405 	/* find the PCIe link width and set max read request to 4KB*/
406 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
407 		uint16_t lnk, pectl;
408 		lnk = pci_read_config(dev, reg + 0x12, 2);
409 		sc->link_width = (lnk >> 4) & 0x3f;
410 
411 		pectl = pci_read_config(dev, reg + 0x8, 2);
412 		pectl = (pectl & ~0x7000) | (5 << 12);
413 		pci_write_config(dev, reg + 0x8, pectl, 2);
414 	}
415 
416 	if (sc->link_width != 0 && sc->link_width <= 4 &&
417 	    (ai->nports0 + ai->nports1) <= 2) {
418 		device_printf(sc->dev,
419 		    "PCIe x%d Link, expect reduced performance\n",
420 		    sc->link_width);
421 	}
422 #endif
423 	touch_bars(dev);
424 	pci_enable_busmaster(dev);
425 	/*
426 	 * Allocate the registers and make them available to the driver.
427 	 * The registers that we care about for NIC mode are in BAR 0
428 	 */
429 	sc->regs_rid = PCIR_BAR(0);
430 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
431 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
432 		device_printf(dev, "Cannot allocate BAR\n");
433 		return (ENXIO);
434 	}
435 
436 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
437 	    device_get_unit(dev));
438 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
439 
440 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
441 	    device_get_unit(dev));
442 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
443 	    device_get_unit(dev));
444 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
445 	    device_get_unit(dev));
446 
447 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_DEF);
448 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
449 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
450 
451 	sc->bt = rman_get_bustag(sc->regs_res);
452 	sc->bh = rman_get_bushandle(sc->regs_res);
453 	sc->mmio_len = rman_get_size(sc->regs_res);
454 
455 	if (t3_prep_adapter(sc, ai, 1) < 0) {
456 		printf("prep adapter failed\n");
457 		error = ENODEV;
458 		goto out;
459 	}
460 	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
461 	 * enough messages for the queue sets.  If that fails, try falling
462 	 * back to MSI.  If that fails, then try falling back to the legacy
463 	 * interrupt pin model.
464 	 */
465 #ifdef MSI_SUPPORTED
466 
467 	sc->msix_regs_rid = 0x20;
468 	if ((msi_allowed >= 2) &&
469 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
470 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
471 
472 		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
473 
474 		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
475 		    (sc->msi_count != msi_needed)) {
476 			device_printf(dev, "msix allocation failed - msi_count = %d"
477 			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
478 			    msi_needed, error);
479 			sc->msi_count = 0;
480 			pci_release_msi(dev);
481 			bus_release_resource(dev, SYS_RES_MEMORY,
482 			    sc->msix_regs_rid, sc->msix_regs_res);
483 			sc->msix_regs_res = NULL;
484 		} else {
485 			sc->flags |= USING_MSIX;
486 			sc->cxgb_intr = t3_intr_msix;
487 		}
488 	}
489 
490 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
491 		sc->msi_count = 1;
492 		if (pci_alloc_msi(dev, &sc->msi_count)) {
493 			device_printf(dev, "alloc msi failed - will try INTx\n");
494 			sc->msi_count = 0;
495 			pci_release_msi(dev);
496 		} else {
497 			sc->flags |= USING_MSI;
498 			sc->irq_rid = 1;
499 			sc->cxgb_intr = t3_intr_msi;
500 		}
501 	}
502 #endif
503 	if (sc->msi_count == 0) {
504 		device_printf(dev, "using line interrupts\n");
505 		sc->irq_rid = 0;
506 		sc->cxgb_intr = t3b_intr;
507 	}
508 
509 
510 	/* Create a private taskqueue thread for handling driver events */
511 #ifdef TASKQUEUE_CURRENT
512 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
513 	    taskqueue_thread_enqueue, &sc->tq);
514 #else
515 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
516 	    taskqueue_thread_enqueue, &sc->tq);
517 #endif
518 	if (sc->tq == NULL) {
519 		device_printf(dev, "failed to allocate controller task queue\n");
520 		goto out;
521 	}
522 
523 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
524 	    device_get_nameunit(dev));
525 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
526 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
527 
528 
529 	/* Create a periodic callout for checking adapter status */
530 	callout_init(&sc->cxgb_tick_ch, TRUE);
531 
532 	if (t3_check_fw_version(sc) != 0) {
533 		/*
534 		 * Warn user that a firmware update will be attempted in init.
535 		 */
536 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
537 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
538 		sc->flags &= ~FW_UPTODATE;
539 	} else {
540 		sc->flags |= FW_UPTODATE;
541 	}
542 
543 	if (t3_check_tpsram_version(sc) != 0) {
544 		/*
545 		 * Warn user that a firmware update will be attempted in init.
546 		 */
547 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
548 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
549 		sc->flags &= ~TPS_UPTODATE;
550 	} else {
551 		sc->flags |= TPS_UPTODATE;
552 	}
553 
554 	if ((sc->flags & USING_MSIX) && !singleq)
555 		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
556 
557 	/*
558 	 * Create a child device for each MAC.  The ethernet attachment
559 	 * will be done in these children.
560 	 */
561 	for (i = 0; i < (sc)->params.nports; i++) {
562 		struct port_info *pi;
563 
564 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
565 			device_printf(dev, "failed to add child port\n");
566 			error = EINVAL;
567 			goto out;
568 		}
569 		pi = &sc->port[i];
570 		pi->adapter = sc;
571 		pi->nqsets = port_qsets;
572 		pi->first_qset = i*port_qsets;
573 		pi->port_id = i;
574 		pi->tx_chan = i >= ai->nports0;
575 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
576 		sc->rxpkt_map[pi->txpkt_intf] = i;
577 		sc->portdev[i] = child;
578 		device_set_softc(child, pi);
579 	}
580 	if ((error = bus_generic_attach(dev)) != 0)
581 		goto out;
582 
583 	/*
584 	 * XXX need to poll for link status
585 	 */
586 	sc->params.stats_update_period = 1;
587 
588 	/* initialize sge private state */
589 	t3_sge_init_adapter(sc);
590 
591 	t3_led_ready(sc);
592 
593 	cxgb_offload_init();
594 	if (is_offload(sc)) {
595 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
596 		cxgb_adapter_ofld(sc);
597         }
598 	error = t3_get_fw_version(sc, &vers);
599 	if (error)
600 		goto out;
601 
602 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
603 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
604 	    G_FW_VERSION_MICRO(vers));
605 
606 	t3_add_sysctls(sc);
607 out:
608 	if (error)
609 		cxgb_free(sc);
610 
611 	return (error);
612 }
613 
614 static int
615 cxgb_controller_detach(device_t dev)
616 {
617 	struct adapter *sc;
618 
619 	sc = device_get_softc(dev);
620 
621 	cxgb_free(sc);
622 
623 	return (0);
624 }
625 
626 static void
627 cxgb_free(struct adapter *sc)
628 {
629 	int i;
630 
631 	ADAPTER_LOCK(sc);
632 	/*
633 	 * drops the lock
634 	 */
635 	cxgb_down_locked(sc);
636 
637 #ifdef MSI_SUPPORTED
638 	if (sc->flags & (USING_MSI | USING_MSIX)) {
639 		device_printf(sc->dev, "releasing msi message(s)\n");
640 		pci_release_msi(sc->dev);
641 	} else {
642 		device_printf(sc->dev, "no msi message to release\n");
643 	}
644 #endif
645 	if (sc->msix_regs_res != NULL) {
646 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
647 		    sc->msix_regs_res);
648 	}
649 
650 	if (sc->tq != NULL) {
651 		taskqueue_drain(sc->tq, &sc->ext_intr_task);
652 		taskqueue_drain(sc->tq, &sc->tick_task);
653 	}
654 	t3_sge_deinit_sw(sc);
655 	/*
656 	 * Wait for last callout
657 	 */
658 
659 	tsleep(&sc, 0, "cxgb unload", 3*hz);
660 
661 	for (i = 0; i < (sc)->params.nports; ++i) {
662 		if (sc->portdev[i] != NULL)
663 			device_delete_child(sc->dev, sc->portdev[i]);
664 	}
665 
666 	bus_generic_detach(sc->dev);
667 	if (sc->tq != NULL)
668 		taskqueue_free(sc->tq);
669 #ifdef notyet
670 	if (is_offload(sc)) {
671 		cxgb_adapter_unofld(sc);
672 		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
673 			offload_close(&sc->tdev);
674 	}
675 #endif
676 
677 	t3_free_sge_resources(sc);
678 	free(sc->filters, M_DEVBUF);
679 	t3_sge_free(sc);
680 
681 	cxgb_offload_exit();
682 
683 	if (sc->regs_res != NULL)
684 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
685 		    sc->regs_res);
686 
687 	MTX_DESTROY(&sc->mdio_lock);
688 	MTX_DESTROY(&sc->sge.reg_lock);
689 	MTX_DESTROY(&sc->elmer_lock);
690 	ADAPTER_LOCK_DEINIT(sc);
691 
692 	return;
693 }
694 
695 /**
696  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
697  *	@sc: the controller softc
698  *
699  *	Determines how many sets of SGE queues to use and initializes them.
700  *	We support multiple queue sets per port if we have MSI-X, otherwise
701  *	just one queue set per port.
702  */
703 static int
704 setup_sge_qsets(adapter_t *sc)
705 {
706 	int i, j, err, irq_idx = 0, qset_idx = 0;
707 	u_int ntxq = SGE_TXQ_PER_SET;
708 
709 	if ((err = t3_sge_alloc(sc)) != 0) {
710 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
711 		return (err);
712 	}
713 
714 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
715 		irq_idx = -1;
716 
717 	for (i = 0; i < (sc)->params.nports; i++) {
718 		struct port_info *pi = &sc->port[i];
719 
720 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
721 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
722 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
723 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
724 			if (err) {
725 				t3_free_sge_resources(sc);
726 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
727 				    err);
728 				return (err);
729 			}
730 		}
731 	}
732 
733 	return (0);
734 }
735 
736 static void
737 cxgb_teardown_msix(adapter_t *sc)
738 {
739 	int i, nqsets;
740 
741 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
742 		nqsets += sc->port[i].nqsets;
743 
744 	for (i = 0; i < nqsets; i++) {
745 		if (sc->msix_intr_tag[i] != NULL) {
746 			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
747 			    sc->msix_intr_tag[i]);
748 			sc->msix_intr_tag[i] = NULL;
749 		}
750 		if (sc->msix_irq_res[i] != NULL) {
751 			bus_release_resource(sc->dev, SYS_RES_IRQ,
752 			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
753 			sc->msix_irq_res[i] = NULL;
754 		}
755 	}
756 }
757 
758 static int
759 cxgb_setup_msix(adapter_t *sc, int msix_count)
760 {
761 	int i, j, k, nqsets, rid;
762 
763 	/* The first message indicates link changes and error conditions */
764 	sc->irq_rid = 1;
765 	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
766 	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
767 		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
768 		return (EINVAL);
769 	}
770 
771 	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
772 #ifdef INTR_FILTERS
773 		NULL,
774 #endif
775 		cxgb_async_intr, sc, &sc->intr_tag)) {
776 		device_printf(sc->dev, "Cannot set up interrupt\n");
777 		return (EINVAL);
778 	}
779 	for (i = k = 0; i < (sc)->params.nports; i++) {
780 		nqsets = sc->port[i].nqsets;
781 		for (j = 0; j < nqsets; j++, k++) {
782 			struct sge_qset *qs = &sc->sge.qs[k];
783 
784 			rid = k + 2;
785 			if (cxgb_debug)
786 				printf("rid=%d ", rid);
787 			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
788 			    sc->dev, SYS_RES_IRQ, &rid,
789 			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
790 				device_printf(sc->dev, "Cannot allocate "
791 				    "interrupt for message %d\n", rid);
792 				return (EINVAL);
793 			}
794 			sc->msix_irq_rid[k] = rid;
795 			printf("setting up interrupt for port=%d\n",
796 			    qs->port->port_id);
797 			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
798 			    INTR_MPSAFE|INTR_TYPE_NET,
799 #ifdef INTR_FILTERS
800 				NULL,
801 #endif
802 				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
803 				device_printf(sc->dev, "Cannot set up "
804 				    "interrupt for message %d\n", rid);
805 				return (EINVAL);
806 			}
807 		}
808 	}
809 
810 
811 	return (0);
812 }
813 
814 static int
815 cxgb_port_probe(device_t dev)
816 {
817 	struct port_info *p;
818 	char buf[80];
819 
820 	p = device_get_softc(dev);
821 
822 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, p->port_type->desc);
823 	device_set_desc_copy(dev, buf);
824 	return (0);
825 }
826 
827 
828 static int
829 cxgb_makedev(struct port_info *pi)
830 {
831 
832 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
833 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
834 
835 	if (pi->port_cdev == NULL)
836 		return (ENOMEM);
837 
838 	pi->port_cdev->si_drv1 = (void *)pi;
839 
840 	return (0);
841 }
842 
843 
844 #ifdef TSO_SUPPORTED
845 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
846 /* Don't enable TSO6 yet */
847 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
848 #else
849 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
850 /* Don't enable TSO6 yet */
851 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
852 #define IFCAP_TSO4 0x0
853 #define IFCAP_TSO6 0x0
854 #define CSUM_TSO   0x0
855 #endif
856 
857 
858 static int
859 cxgb_port_attach(device_t dev)
860 {
861 	struct port_info *p;
862 	struct ifnet *ifp;
863 	int err, media_flags;
864 
865 	p = device_get_softc(dev);
866 
867 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
868 	    device_get_unit(device_get_parent(dev)), p->port_id);
869 	PORT_LOCK_INIT(p, p->lockbuf);
870 
871 	/* Allocate an ifnet object and set it up */
872 	ifp = p->ifp = if_alloc(IFT_ETHER);
873 	if (ifp == NULL) {
874 		device_printf(dev, "Cannot allocate ifnet\n");
875 		return (ENOMEM);
876 	}
877 
878 	/*
879 	 * Note that there is currently no watchdog timer.
880 	 */
881 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
882 	ifp->if_init = cxgb_init;
883 	ifp->if_softc = p;
884 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
885 	ifp->if_ioctl = cxgb_ioctl;
886 	ifp->if_start = cxgb_start;
887 	ifp->if_timer = 0;	/* Disable ifnet watchdog */
888 	ifp->if_watchdog = NULL;
889 
890 	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
891 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
892 	IFQ_SET_READY(&ifp->if_snd);
893 
894 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
895 	ifp->if_capabilities |= CXGB_CAP;
896 	ifp->if_capenable |= CXGB_CAP_ENABLE;
897 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
898 	/*
899 	 * disable TSO on 4-port - it isn't supported by the firmware yet
900 	 */
901 	if (p->adapter->params.nports > 2) {
902 		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
903 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
904 		ifp->if_hwassist &= ~CSUM_TSO;
905 	}
906 
907 	ether_ifattach(ifp, p->hw_addr);
908 	/*
909 	 * Only default to jumbo frames on 10GigE
910 	 */
911 	if (p->adapter->params.nports <= 2)
912 		ifp->if_mtu = 9000;
913 	if ((err = cxgb_makedev(p)) != 0) {
914 		printf("makedev failed %d\n", err);
915 		return (err);
916 	}
917 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
918 	    cxgb_media_status);
919 
920 	if (!strcmp(p->port_type->desc, "10GBASE-CX4")) {
921 		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
922 	} else if (!strcmp(p->port_type->desc, "10GBASE-SR")) {
923 		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
924 	} else if (!strcmp(p->port_type->desc, "10GBASE-XR")) {
925 		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
926 	} else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) {
927 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
928 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
929 			    0, NULL);
930 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
931 			    0, NULL);
932 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
933 			    0, NULL);
934 		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
935 			    0, NULL);
936 		media_flags = 0;
937 	} else {
938 	        printf("unsupported media type %s\n", p->port_type->desc);
939 		return (ENXIO);
940 	}
941 	if (media_flags) {
942 		ifmedia_add(&p->media, media_flags, 0, NULL);
943 		ifmedia_set(&p->media, media_flags);
944 	} else {
945 		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
946 		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
947 	}
948 
949 
950 	snprintf(p->taskqbuf, TASKQ_NAME_LEN, "cxgb_port_taskq%d", p->port_id);
951 #ifdef TASKQUEUE_CURRENT
952 	/* Create a port for handling TX without starvation */
953 	p->tq = taskqueue_create(p->taskqbuf, M_NOWAIT,
954 	    taskqueue_thread_enqueue, &p->tq);
955 #else
956 	/* Create a port for handling TX without starvation */
957 	p->tq = taskqueue_create_fast(p->taskqbuf, M_NOWAIT,
958 	    taskqueue_thread_enqueue, &p->tq);
959 #endif
960 
961 	if (p->tq == NULL) {
962 		device_printf(dev, "failed to allocate port task queue\n");
963 		return (ENOMEM);
964 	}
965 	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
966 	    device_get_nameunit(dev));
967 
968 	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
969 
970 	t3_sge_init_port(p);
971 
972 	return (0);
973 }
974 
975 static int
976 cxgb_port_detach(device_t dev)
977 {
978 	struct port_info *p;
979 
980 	p = device_get_softc(dev);
981 
982 	PORT_LOCK(p);
983 	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
984 		cxgb_stop_locked(p);
985 	PORT_UNLOCK(p);
986 
987 	if (p->tq != NULL) {
988 		taskqueue_drain(p->tq, &p->start_task);
989 		taskqueue_free(p->tq);
990 		p->tq = NULL;
991 	}
992 
993 	ether_ifdetach(p->ifp);
994 	/*
995 	 * the lock may be acquired in ifdetach
996 	 */
997 	PORT_LOCK_DEINIT(p);
998 	if_free(p->ifp);
999 
1000 	if (p->port_cdev != NULL)
1001 		destroy_dev(p->port_cdev);
1002 
1003 	return (0);
1004 }
1005 
1006 void
1007 t3_fatal_err(struct adapter *sc)
1008 {
1009 	u_int fw_status[4];
1010 
1011 	if (sc->flags & FULL_INIT_DONE) {
1012 		t3_sge_stop(sc);
1013 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1014 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1015 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1016 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1017 		t3_intr_disable(sc);
1018 	}
1019 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1020 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1021 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1022 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1023 }
1024 
1025 int
1026 t3_os_find_pci_capability(adapter_t *sc, int cap)
1027 {
1028 	device_t dev;
1029 	struct pci_devinfo *dinfo;
1030 	pcicfgregs *cfg;
1031 	uint32_t status;
1032 	uint8_t ptr;
1033 
1034 	dev = sc->dev;
1035 	dinfo = device_get_ivars(dev);
1036 	cfg = &dinfo->cfg;
1037 
1038 	status = pci_read_config(dev, PCIR_STATUS, 2);
1039 	if (!(status & PCIM_STATUS_CAPPRESENT))
1040 		return (0);
1041 
1042 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1043 	case 0:
1044 	case 1:
1045 		ptr = PCIR_CAP_PTR;
1046 		break;
1047 	case 2:
1048 		ptr = PCIR_CAP_PTR_2;
1049 		break;
1050 	default:
1051 		return (0);
1052 		break;
1053 	}
1054 	ptr = pci_read_config(dev, ptr, 1);
1055 
1056 	while (ptr != 0) {
1057 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1058 			return (ptr);
1059 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1060 	}
1061 
1062 	return (0);
1063 }
1064 
1065 int
1066 t3_os_pci_save_state(struct adapter *sc)
1067 {
1068 	device_t dev;
1069 	struct pci_devinfo *dinfo;
1070 
1071 	dev = sc->dev;
1072 	dinfo = device_get_ivars(dev);
1073 
1074 	pci_cfg_save(dev, dinfo, 0);
1075 	return (0);
1076 }
1077 
1078 int
1079 t3_os_pci_restore_state(struct adapter *sc)
1080 {
1081 	device_t dev;
1082 	struct pci_devinfo *dinfo;
1083 
1084 	dev = sc->dev;
1085 	dinfo = device_get_ivars(dev);
1086 
1087 	pci_cfg_restore(dev, dinfo);
1088 	return (0);
1089 }
1090 
1091 /**
1092  *	t3_os_link_changed - handle link status changes
1093  *	@adapter: the adapter associated with the link change
1094  *	@port_id: the port index whose limk status has changed
1095  *	@link_stat: the new status of the link
1096  *	@speed: the new speed setting
1097  *	@duplex: the new duplex setting
1098  *	@fc: the new flow-control setting
1099  *
1100  *	This is the OS-dependent handler for link status changes.  The OS
1101  *	neutral handler takes care of most of the processing for these events,
1102  *	then calls this handler for any OS-specific processing.
1103  */
1104 void
1105 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1106      int duplex, int fc)
1107 {
1108 	struct port_info *pi = &adapter->port[port_id];
1109 	struct cmac *mac = &adapter->port[port_id].mac;
1110 
1111 	if ((pi->ifp->if_flags & IFF_UP) == 0)
1112 		return;
1113 
1114 	if (link_status) {
1115 		t3_mac_enable(mac, MAC_DIRECTION_RX);
1116 		if_link_state_change(pi->ifp, LINK_STATE_UP);
1117 	} else {
1118 		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1119 		pi->phy.ops->power_down(&pi->phy, 1);
1120 		t3_mac_disable(mac, MAC_DIRECTION_RX);
1121 		t3_link_start(&pi->phy, mac, &pi->link_config);
1122 	}
1123 }
1124 
1125 /*
1126  * Interrupt-context handler for external (PHY) interrupts.
1127  */
1128 void
1129 t3_os_ext_intr_handler(adapter_t *sc)
1130 {
1131 	if (cxgb_debug)
1132 		printf("t3_os_ext_intr_handler\n");
1133 	/*
1134 	 * Schedule a task to handle external interrupts as they may be slow
1135 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1136 	 * interrupts in the meantime and let the task reenable them when
1137 	 * it's done.
1138 	 */
1139 	ADAPTER_LOCK(sc);
1140 	if (sc->slow_intr_mask) {
1141 		sc->slow_intr_mask &= ~F_T3DBG;
1142 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1143 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1144 	}
1145 	ADAPTER_UNLOCK(sc);
1146 }
1147 
1148 void
1149 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1150 {
1151 
1152 	/*
1153 	 * The ifnet might not be allocated before this gets called,
1154 	 * as this is called early on in attach by t3_prep_adapter
1155 	 * save the address off in the port structure
1156 	 */
1157 	if (cxgb_debug)
1158 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1159 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1160 }
1161 
1162 /**
1163  *	link_start - enable a port
1164  *	@p: the port to enable
1165  *
1166  *	Performs the MAC and PHY actions needed to enable a port.
1167  */
1168 static void
1169 cxgb_link_start(struct port_info *p)
1170 {
1171 	struct ifnet *ifp;
1172 	struct t3_rx_mode rm;
1173 	struct cmac *mac = &p->mac;
1174 
1175 	ifp = p->ifp;
1176 
1177 	t3_init_rx_mode(&rm, p);
1178 	if (!mac->multiport)
1179 		t3_mac_reset(mac);
1180 	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1181 	t3_mac_set_address(mac, 0, p->hw_addr);
1182 	t3_mac_set_rx_mode(mac, &rm);
1183 	t3_link_start(&p->phy, mac, &p->link_config);
1184 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1185 }
1186 
1187 /**
1188  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1189  *	@adap: the adapter
1190  *
1191  *	Sets up RSS to distribute packets to multiple receive queues.  We
1192  *	configure the RSS CPU lookup table to distribute to the number of HW
1193  *	receive queues, and the response queue lookup table to narrow that
1194  *	down to the response queues actually configured for each port.
1195  *	We always configure the RSS mapping for two ports since the mapping
1196  *	table has plenty of entries.
1197  */
1198 static void
1199 setup_rss(adapter_t *adap)
1200 {
1201 	int i;
1202 	u_int nq[2];
1203 	uint8_t cpus[SGE_QSETS + 1];
1204 	uint16_t rspq_map[RSS_TABLE_SIZE];
1205 
1206 	for (i = 0; i < SGE_QSETS; ++i)
1207 		cpus[i] = i;
1208 	cpus[SGE_QSETS] = 0xff;
1209 
1210 	nq[0] = nq[1] = 0;
1211 	for_each_port(adap, i) {
1212 		const struct port_info *pi = adap2pinfo(adap, i);
1213 
1214 		nq[pi->tx_chan] += pi->nqsets;
1215 	}
1216 	nq[0] = max(nq[0], 1U);
1217 	nq[1] = max(nq[1], 1U);
1218 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1219 		rspq_map[i] = i % nq[0];
1220 		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq[1]) + nq[0];
1221 	}
1222 	/* Calculate the reverse RSS map table */
1223 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1224 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1225 			adap->rrss_map[rspq_map[i]] = i;
1226 
1227 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1228 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1229 		      V_RRCPLCPUSIZE(6), cpus, rspq_map);
1230 
1231 }
1232 
1233 /*
1234  * Sends an mbuf to an offload queue driver
1235  * after dealing with any active network taps.
1236  */
1237 static inline int
1238 offload_tx(struct toedev *tdev, struct mbuf *m)
1239 {
1240 	int ret;
1241 
1242 	critical_enter();
1243 	ret = t3_offload_tx(tdev, m);
1244 	critical_exit();
1245 	return (ret);
1246 }
1247 
1248 static int
1249 write_smt_entry(struct adapter *adapter, int idx)
1250 {
1251 	struct port_info *pi = &adapter->port[idx];
1252 	struct cpl_smt_write_req *req;
1253 	struct mbuf *m;
1254 
1255 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1256 		return (ENOMEM);
1257 
1258 	req = mtod(m, struct cpl_smt_write_req *);
1259 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1260 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1261 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1262 	req->iff = idx;
1263 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1264 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1265 
1266 	m_set_priority(m, 1);
1267 
1268 	offload_tx(&adapter->tdev, m);
1269 
1270 	return (0);
1271 }
1272 
1273 static int
1274 init_smt(struct adapter *adapter)
1275 {
1276 	int i;
1277 
1278 	for_each_port(adapter, i)
1279 		write_smt_entry(adapter, i);
1280 	return 0;
1281 }
1282 
1283 static void
1284 init_port_mtus(adapter_t *adapter)
1285 {
1286 	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1287 
1288 	if (adapter->port[1].ifp)
1289 		mtus |= adapter->port[1].ifp->if_mtu << 16;
1290 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1291 }
1292 
1293 static void
1294 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1295 			      int hi, int port)
1296 {
1297 	struct mbuf *m;
1298 	struct mngt_pktsched_wr *req;
1299 
1300 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1301 	if (m) {
1302 		req = mtod(m, struct mngt_pktsched_wr *);
1303 		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1304 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1305 		req->sched = sched;
1306 		req->idx = qidx;
1307 		req->min = lo;
1308 		req->max = hi;
1309 		req->binding = port;
1310 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1311 		t3_mgmt_tx(adap, m);
1312 	}
1313 }
1314 
1315 static void
1316 bind_qsets(adapter_t *sc)
1317 {
1318 	int i, j;
1319 
1320 	for (i = 0; i < (sc)->params.nports; ++i) {
1321 		const struct port_info *pi = adap2pinfo(sc, i);
1322 
1323 		for (j = 0; j < pi->nqsets; ++j) {
1324 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1325 					  -1, pi->tx_chan);
1326 
1327 		}
1328 	}
1329 }
1330 
1331 static void
1332 update_tpeeprom(struct adapter *adap)
1333 {
1334 #ifdef FIRMWARE_LATEST
1335 	const struct firmware *tpeeprom;
1336 #else
1337 	struct firmware *tpeeprom;
1338 #endif
1339 
1340 	char buf[64];
1341 	uint32_t version;
1342 	unsigned int major, minor;
1343 	int ret, len;
1344 	char rev;
1345 
1346 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1347 
1348 	major = G_TP_VERSION_MAJOR(version);
1349 	minor = G_TP_VERSION_MINOR(version);
1350 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1351 		return;
1352 
1353 	rev = t3rev2char(adap);
1354 
1355 	snprintf(buf, sizeof(buf), TPEEPROM_NAME, rev,
1356 		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1357 
1358 	tpeeprom = firmware_get(buf);
1359 	if (tpeeprom == NULL) {
1360 		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1361 			buf);
1362 		return;
1363 	}
1364 
1365 	len = tpeeprom->datasize - 4;
1366 
1367 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1368 	if (ret)
1369 		goto release_tpeeprom;
1370 
1371 	if (len != TP_SRAM_LEN) {
1372 		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", buf, len, TP_SRAM_LEN);
1373 		return;
1374 	}
1375 
1376 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1377 	    TP_SRAM_OFFSET);
1378 
1379 	if (!ret) {
1380 		device_printf(adap->dev,
1381 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1382 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1383 	} else
1384 		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1385 
1386 release_tpeeprom:
1387 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1388 
1389 	return;
1390 }
1391 
1392 static int
1393 update_tpsram(struct adapter *adap)
1394 {
1395 #ifdef FIRMWARE_LATEST
1396 	const struct firmware *tpsram;
1397 #else
1398 	struct firmware *tpsram;
1399 #endif
1400 	char buf[64];
1401 	int ret;
1402 	char rev;
1403 
1404 	rev = t3rev2char(adap);
1405 	if (!rev)
1406 		return 0;
1407 
1408 	update_tpeeprom(adap);
1409 
1410 	snprintf(buf, sizeof(buf), TPSRAM_NAME, rev,
1411 		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1412 
1413 	tpsram = firmware_get(buf);
1414 	if (tpsram == NULL){
1415 		device_printf(adap->dev, "could not load TP SRAM: unable to load %s\n",
1416 			buf);
1417 		return (EINVAL);
1418 	} else
1419 		device_printf(adap->dev, "updating TP SRAM with %s\n", buf);
1420 
1421 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1422 	if (ret)
1423 		goto release_tpsram;
1424 
1425 	ret = t3_set_proto_sram(adap, tpsram->data);
1426 	if (ret)
1427 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1428 
1429 release_tpsram:
1430 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1431 
1432 	return ret;
1433 }
1434 
1435 /**
1436  *	cxgb_up - enable the adapter
1437  *	@adap: adapter being enabled
1438  *
1439  *	Called when the first port is enabled, this function performs the
1440  *	actions necessary to make an adapter operational, such as completing
1441  *	the initialization of HW modules, and enabling interrupts.
1442  *
1443  */
1444 static int
1445 cxgb_up(struct adapter *sc)
1446 {
1447 	int err = 0;
1448 
1449 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1450 
1451 		if ((sc->flags & FW_UPTODATE) == 0)
1452 			if ((err = upgrade_fw(sc)))
1453 				goto out;
1454 		if ((sc->flags & TPS_UPTODATE) == 0)
1455 			if ((err = update_tpsram(sc)))
1456 				goto out;
1457 		err = t3_init_hw(sc, 0);
1458 		if (err)
1459 			goto out;
1460 
1461 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1462 
1463 		err = setup_sge_qsets(sc);
1464 		if (err)
1465 			goto out;
1466 
1467 		setup_rss(sc);
1468 		sc->flags |= FULL_INIT_DONE;
1469 	}
1470 
1471 	t3_intr_clear(sc);
1472 
1473 	/* If it's MSI or INTx, allocate a single interrupt for everything */
1474 	if ((sc->flags & USING_MSIX) == 0) {
1475 		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1476 		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1477 			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1478 			    sc->irq_rid);
1479 			err = EINVAL;
1480 			goto out;
1481 		}
1482 		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1483 
1484 		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1485 #ifdef INTR_FILTERS
1486 			NULL,
1487 #endif
1488 			sc->cxgb_intr, sc, &sc->intr_tag)) {
1489 			device_printf(sc->dev, "Cannot set up interrupt\n");
1490 			err = EINVAL;
1491 			goto irq_err;
1492 		}
1493 	} else {
1494 		cxgb_setup_msix(sc, sc->msi_count);
1495 	}
1496 
1497 	t3_sge_start(sc);
1498 	t3_intr_enable(sc);
1499 
1500 	if (!(sc->flags & QUEUES_BOUND)) {
1501 		printf("bind qsets\n");
1502 		bind_qsets(sc);
1503 		sc->flags |= QUEUES_BOUND;
1504 	}
1505 out:
1506 	return (err);
1507 irq_err:
1508 	CH_ERR(sc, "request_irq failed, err %d\n", err);
1509 	goto out;
1510 }
1511 
1512 
1513 /*
1514  * Release resources when all the ports and offloading have been stopped.
1515  */
1516 static void
1517 cxgb_down_locked(struct adapter *sc)
1518 {
1519 	int i;
1520 
1521 	t3_sge_stop(sc);
1522 	t3_intr_disable(sc);
1523 
1524 	if (sc->intr_tag != NULL) {
1525 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1526 		sc->intr_tag = NULL;
1527 	}
1528 	if (sc->irq_res != NULL) {
1529 		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1530 		    sc->irq_rid, sc->irq_res);
1531 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1532 		    sc->irq_res);
1533 		sc->irq_res = NULL;
1534 	}
1535 
1536 	if (sc->flags & USING_MSIX)
1537 		cxgb_teardown_msix(sc);
1538 	ADAPTER_UNLOCK(sc);
1539 
1540 	callout_drain(&sc->cxgb_tick_ch);
1541 	callout_drain(&sc->sge_timer_ch);
1542 
1543 	if (sc->tq != NULL) {
1544 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1545 		for (i = 0; i < sc->params.nports; i++)
1546 			taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task);
1547 	}
1548 #ifdef notyet
1549 
1550 		if (sc->port[i].tq != NULL)
1551 #endif
1552 
1553 }
1554 
1555 static int
1556 offload_open(struct port_info *pi)
1557 {
1558 	struct adapter *adapter = pi->adapter;
1559 	struct toedev *tdev = TOEDEV(pi->ifp);
1560 	int adap_up = adapter->open_device_map & PORT_MASK;
1561 	int err = 0;
1562 
1563 	if (atomic_cmpset_int(&adapter->open_device_map,
1564 		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1565 		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1566 		return (0);
1567 
1568 	ADAPTER_LOCK(pi->adapter);
1569 	if (!adap_up)
1570 		err = cxgb_up(adapter);
1571 	ADAPTER_UNLOCK(pi->adapter);
1572 	if (err)
1573 		return (err);
1574 
1575 	t3_tp_set_offload_mode(adapter, 1);
1576 	tdev->lldev = adapter->port[0].ifp;
1577 	err = cxgb_offload_activate(adapter);
1578 	if (err)
1579 		goto out;
1580 
1581 	init_port_mtus(adapter);
1582 	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1583 		     adapter->params.b_wnd,
1584 		     adapter->params.rev == 0 ?
1585 		       adapter->port[0].ifp->if_mtu : 0xffff);
1586 	init_smt(adapter);
1587 
1588 	/* Call back all registered clients */
1589 	cxgb_add_clients(tdev);
1590 
1591 out:
1592 	/* restore them in case the offload module has changed them */
1593 	if (err) {
1594 		t3_tp_set_offload_mode(adapter, 0);
1595 		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1596 		cxgb_set_dummy_ops(tdev);
1597 	}
1598 	return (err);
1599 }
1600 #ifdef notyet
1601 static int
1602 offload_close(struct toedev *tdev)
1603 {
1604 	struct adapter *adapter = tdev2adap(tdev);
1605 
1606 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1607 		return (0);
1608 
1609 	/* Call back all registered clients */
1610 	cxgb_remove_clients(tdev);
1611 	tdev->lldev = NULL;
1612 	cxgb_set_dummy_ops(tdev);
1613 	t3_tp_set_offload_mode(adapter, 0);
1614 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1615 
1616 	if (!adapter->open_device_map)
1617 		cxgb_down(adapter);
1618 
1619 	cxgb_offload_deactivate(adapter);
1620 	return (0);
1621 }
1622 #endif
1623 
1624 static void
1625 cxgb_init(void *arg)
1626 {
1627 	struct port_info *p = arg;
1628 
1629 	PORT_LOCK(p);
1630 	cxgb_init_locked(p);
1631 	PORT_UNLOCK(p);
1632 }
1633 
1634 static void
1635 cxgb_init_locked(struct port_info *p)
1636 {
1637 	struct ifnet *ifp;
1638 	adapter_t *sc = p->adapter;
1639 	int err;
1640 
1641 	PORT_LOCK_ASSERT_OWNED(p);
1642 	ifp = p->ifp;
1643 
1644 	ADAPTER_LOCK(p->adapter);
1645 	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1646 		ADAPTER_UNLOCK(p->adapter);
1647 		cxgb_stop_locked(p);
1648 		return;
1649 	}
1650 	if (p->adapter->open_device_map == 0) {
1651 		t3_intr_clear(sc);
1652 		t3_sge_init_adapter(sc);
1653 	}
1654 	setbit(&p->adapter->open_device_map, p->port_id);
1655 	ADAPTER_UNLOCK(p->adapter);
1656 
1657 	if (is_offload(sc) && !ofld_disable) {
1658 		err = offload_open(p);
1659 		if (err)
1660 			log(LOG_WARNING,
1661 			    "Could not initialize offload capabilities\n");
1662 	}
1663 	cxgb_link_start(p);
1664 	t3_link_changed(sc, p->port_id);
1665 	ifp->if_baudrate = p->link_config.speed * 1000000;
1666 
1667 	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1668 	t3_port_intr_enable(sc, p->port_id);
1669 
1670 	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1671 	    cxgb_tick, sc);
1672 
1673 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1674 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1675 }
1676 
1677 static void
1678 cxgb_set_rxmode(struct port_info *p)
1679 {
1680 	struct t3_rx_mode rm;
1681 	struct cmac *mac = &p->mac;
1682 
1683 	PORT_LOCK_ASSERT_OWNED(p);
1684 
1685 	t3_init_rx_mode(&rm, p);
1686 	t3_mac_set_rx_mode(mac, &rm);
1687 }
1688 
1689 static void
1690 cxgb_stop_locked(struct port_info *p)
1691 {
1692 	struct ifnet *ifp;
1693 
1694 	PORT_LOCK_ASSERT_OWNED(p);
1695 	ADAPTER_LOCK_ASSERT_NOTOWNED(p->adapter);
1696 
1697 	ifp = p->ifp;
1698 
1699 	t3_port_intr_disable(p->adapter, p->port_id);
1700 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1701 	p->phy.ops->power_down(&p->phy, 1);
1702 	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1703 
1704 	ADAPTER_LOCK(p->adapter);
1705 	clrbit(&p->adapter->open_device_map, p->port_id);
1706 
1707 
1708 	if (p->adapter->open_device_map == 0) {
1709 		cxgb_down_locked(p->adapter);
1710 	} else
1711 		ADAPTER_UNLOCK(p->adapter);
1712 
1713 }
1714 
1715 static int
1716 cxgb_set_mtu(struct port_info *p, int mtu)
1717 {
1718 	struct ifnet *ifp = p->ifp;
1719 	int error = 0;
1720 
1721 	if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
1722 		error = EINVAL;
1723 	else if (ifp->if_mtu != mtu) {
1724 		PORT_LOCK(p);
1725 		ifp->if_mtu = mtu;
1726 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1727 			callout_stop(&p->adapter->cxgb_tick_ch);
1728 			cxgb_stop_locked(p);
1729 			cxgb_init_locked(p);
1730 		}
1731 		PORT_UNLOCK(p);
1732 	}
1733 	return (error);
1734 }
1735 
1736 static int
1737 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1738 {
1739 	struct port_info *p = ifp->if_softc;
1740 	struct ifaddr *ifa = (struct ifaddr *)data;
1741 	struct ifreq *ifr = (struct ifreq *)data;
1742 	int flags, error = 0;
1743 	uint32_t mask;
1744 
1745 	/*
1746 	 * XXX need to check that we aren't in the middle of an unload
1747 	 */
1748 	switch (command) {
1749 	case SIOCSIFMTU:
1750 		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1751 		break;
1752 	case SIOCSIFADDR:
1753 	case SIOCGIFADDR:
1754 		PORT_LOCK(p);
1755 		if (ifa->ifa_addr->sa_family == AF_INET) {
1756 			ifp->if_flags |= IFF_UP;
1757 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1758 				cxgb_init_locked(p);
1759 			arp_ifinit(ifp, ifa);
1760 		} else
1761 			error = ether_ioctl(ifp, command, data);
1762 		PORT_UNLOCK(p);
1763 		break;
1764 	case SIOCSIFFLAGS:
1765 		callout_drain(&p->adapter->cxgb_tick_ch);
1766 		PORT_LOCK(p);
1767 		if (ifp->if_flags & IFF_UP) {
1768 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1769 				flags = p->if_flags;
1770 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1771 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1772 					cxgb_set_rxmode(p);
1773 			} else
1774 				cxgb_init_locked(p);
1775 			p->if_flags = ifp->if_flags;
1776 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1777 			cxgb_stop_locked(p);
1778 
1779 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1780 			adapter_t *sc = p->adapter;
1781 			callout_reset(&sc->cxgb_tick_ch,
1782 			    sc->params.stats_update_period * hz,
1783 			    cxgb_tick, sc);
1784 		}
1785 		PORT_UNLOCK(p);
1786 		break;
1787 	case SIOCSIFMEDIA:
1788 	case SIOCGIFMEDIA:
1789 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1790 		break;
1791 	case SIOCSIFCAP:
1792 		PORT_LOCK(p);
1793 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1794 		if (mask & IFCAP_TXCSUM) {
1795 			if (IFCAP_TXCSUM & ifp->if_capenable) {
1796 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1797 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1798 				    | CSUM_TSO);
1799 			} else {
1800 				ifp->if_capenable |= IFCAP_TXCSUM;
1801 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1802 			}
1803 		} else if (mask & IFCAP_RXCSUM) {
1804 			if (IFCAP_RXCSUM & ifp->if_capenable) {
1805 				ifp->if_capenable &= ~IFCAP_RXCSUM;
1806 			} else {
1807 				ifp->if_capenable |= IFCAP_RXCSUM;
1808 			}
1809 		}
1810 		if (mask & IFCAP_TSO4) {
1811 			if (IFCAP_TSO4 & ifp->if_capenable) {
1812 				ifp->if_capenable &= ~IFCAP_TSO4;
1813 				ifp->if_hwassist &= ~CSUM_TSO;
1814 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1815 				ifp->if_capenable |= IFCAP_TSO4;
1816 				ifp->if_hwassist |= CSUM_TSO;
1817 			} else {
1818 				if (cxgb_debug)
1819 					printf("cxgb requires tx checksum offload"
1820 					    " be enabled to use TSO\n");
1821 				error = EINVAL;
1822 			}
1823 		}
1824 		PORT_UNLOCK(p);
1825 		break;
1826 	default:
1827 		error = ether_ioctl(ifp, command, data);
1828 		break;
1829 	}
1830 	return (error);
1831 }
1832 
1833 static int
1834 cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1835 {
1836 	struct sge_qset *qs;
1837 	struct sge_txq *txq;
1838 	struct port_info *p = ifp->if_softc;
1839 	struct mbuf *m = NULL;
1840 	int err, in_use_init, free;
1841 
1842 	if (!p->link_config.link_ok)
1843 		return (ENXIO);
1844 
1845 	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1846 		return (ENOBUFS);
1847 
1848 	qs = &p->adapter->sge.qs[p->first_qset];
1849 	txq = &qs->txq[TXQ_ETH];
1850 	err = 0;
1851 
1852 	if (txq->flags & TXQ_TRANSMITTING)
1853 		return (EINPROGRESS);
1854 
1855 	mtx_lock(&txq->lock);
1856 	txq->flags |= TXQ_TRANSMITTING;
1857 	in_use_init = txq->in_use;
1858 	while ((txq->in_use - in_use_init < txmax) &&
1859 	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1860 		free = 0;
1861 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1862 		if (m == NULL)
1863 			break;
1864 		/*
1865 		 * Convert chain to M_IOVEC
1866 		 */
1867 		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1868 #ifdef notyet
1869 		m0 = m;
1870 		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1871 		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1872 			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1873 				m = m0;
1874 				m_collapse(m, TX_MAX_SEGS, &m0);
1875 			} else
1876 				break;
1877 		}
1878 		m = m0;
1879 #endif
1880 		if ((err = t3_encap(p, &m, &free)) != 0)
1881 			break;
1882 		BPF_MTAP(ifp, m);
1883 		if (free)
1884 			m_freem(m);
1885 	}
1886 	txq->flags &= ~TXQ_TRANSMITTING;
1887 	mtx_unlock(&txq->lock);
1888 
1889 	if (__predict_false(err)) {
1890 		if (err == ENOMEM) {
1891 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1892 			IFQ_LOCK(&ifp->if_snd);
1893 			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1894 			IFQ_UNLOCK(&ifp->if_snd);
1895 		}
1896 	}
1897 	if (err == 0 && m == NULL)
1898 		err = ENOBUFS;
1899 	else if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1900 	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1901 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1902 		err = ENOSPC;
1903 	}
1904 	return (err);
1905 }
1906 
1907 static void
1908 cxgb_start_proc(void *arg, int ncount)
1909 {
1910 	struct ifnet *ifp = arg;
1911 	struct port_info *pi = ifp->if_softc;
1912 	struct sge_qset *qs;
1913 	struct sge_txq *txq;
1914 	int error;
1915 
1916 	qs = &pi->adapter->sge.qs[pi->first_qset];
1917 	txq = &qs->txq[TXQ_ETH];
1918 
1919 	do {
1920 		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2)
1921 			taskqueue_enqueue(pi->tq, &txq->qreclaim_task);
1922 
1923 		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1924 	} while (error == 0);
1925 }
1926 
1927 static void
1928 cxgb_start(struct ifnet *ifp)
1929 {
1930 	struct port_info *pi = ifp->if_softc;
1931 	struct sge_qset *qs;
1932 	struct sge_txq *txq;
1933 	int err;
1934 
1935 	qs = &pi->adapter->sge.qs[pi->first_qset];
1936 	txq = &qs->txq[TXQ_ETH];
1937 
1938 	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2)
1939 		taskqueue_enqueue(pi->tq,
1940 		    &txq->qreclaim_task);
1941 
1942 	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1943 
1944 	if (err == 0)
1945 		taskqueue_enqueue(pi->tq, &pi->start_task);
1946 }
1947 
1948 
1949 static int
1950 cxgb_media_change(struct ifnet *ifp)
1951 {
1952 	if_printf(ifp, "media change not supported\n");
1953 	return (ENXIO);
1954 }
1955 
1956 static void
1957 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1958 {
1959 	struct port_info *p = ifp->if_softc;
1960 
1961 	ifmr->ifm_status = IFM_AVALID;
1962 	ifmr->ifm_active = IFM_ETHER;
1963 
1964 	if (!p->link_config.link_ok)
1965 		return;
1966 
1967 	ifmr->ifm_status |= IFM_ACTIVE;
1968 
1969 	switch (p->link_config.speed) {
1970 	case 10:
1971 		ifmr->ifm_active |= IFM_10_T;
1972 		break;
1973 	case 100:
1974 		ifmr->ifm_active |= IFM_100_TX;
1975 			break;
1976 	case 1000:
1977 		ifmr->ifm_active |= IFM_1000_T;
1978 		break;
1979 	}
1980 
1981 	if (p->link_config.duplex)
1982 		ifmr->ifm_active |= IFM_FDX;
1983 	else
1984 		ifmr->ifm_active |= IFM_HDX;
1985 }
1986 
1987 static void
1988 cxgb_async_intr(void *data)
1989 {
1990 	adapter_t *sc = data;
1991 
1992 	if (cxgb_debug)
1993 		device_printf(sc->dev, "cxgb_async_intr\n");
1994 	/*
1995 	 * May need to sleep - defer to taskqueue
1996 	 */
1997 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
1998 }
1999 
2000 static void
2001 cxgb_ext_intr_handler(void *arg, int count)
2002 {
2003 	adapter_t *sc = (adapter_t *)arg;
2004 
2005 	if (cxgb_debug)
2006 		printf("cxgb_ext_intr_handler\n");
2007 
2008 	t3_phy_intr_handler(sc);
2009 
2010 	/* Now reenable external interrupts */
2011 	ADAPTER_LOCK(sc);
2012 	if (sc->slow_intr_mask) {
2013 		sc->slow_intr_mask |= F_T3DBG;
2014 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2015 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2016 	}
2017 	ADAPTER_UNLOCK(sc);
2018 }
2019 
2020 static void
2021 check_link_status(adapter_t *sc)
2022 {
2023 	int i;
2024 
2025 	for (i = 0; i < (sc)->params.nports; ++i) {
2026 		struct port_info *p = &sc->port[i];
2027 
2028 		if (!(p->port_type->caps & SUPPORTED_IRQ))
2029 			t3_link_changed(sc, i);
2030 		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2031 	}
2032 }
2033 
2034 static void
2035 check_t3b2_mac(struct adapter *adapter)
2036 {
2037 	int i;
2038 
2039 	for_each_port(adapter, i) {
2040 		struct port_info *p = &adapter->port[i];
2041 		struct ifnet *ifp = p->ifp;
2042 		int status;
2043 
2044 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2045 			continue;
2046 
2047 		status = 0;
2048 		PORT_LOCK(p);
2049 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2050 			status = t3b2_mac_watchdog_task(&p->mac);
2051 		if (status == 1)
2052 			p->mac.stats.num_toggled++;
2053 		else if (status == 2) {
2054 			struct cmac *mac = &p->mac;
2055 
2056 			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
2057 			    + ETHER_VLAN_ENCAP_LEN);
2058 			t3_mac_set_address(mac, 0, p->hw_addr);
2059 			cxgb_set_rxmode(p);
2060 			t3_link_start(&p->phy, mac, &p->link_config);
2061 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2062 			t3_port_intr_enable(adapter, p->port_id);
2063 			p->mac.stats.num_resets++;
2064 		}
2065 		PORT_UNLOCK(p);
2066 	}
2067 }
2068 
2069 static void
2070 cxgb_tick(void *arg)
2071 {
2072 	adapter_t *sc = (adapter_t *)arg;
2073 
2074 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2075 
2076 	if (sc->open_device_map != 0)
2077 		callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
2078 		    cxgb_tick, sc);
2079 }
2080 
2081 static void
2082 cxgb_tick_handler(void *arg, int count)
2083 {
2084 	adapter_t *sc = (adapter_t *)arg;
2085 	const struct adapter_params *p = &sc->params;
2086 
2087 	ADAPTER_LOCK(sc);
2088 	if (p->linkpoll_period)
2089 		check_link_status(sc);
2090 
2091 	/*
2092 	 * adapter lock can currently only be acquire after the
2093 	 * port lock
2094 	 */
2095 	ADAPTER_UNLOCK(sc);
2096 
2097 	if (p->rev == T3_REV_B2 && p->nports < 4)
2098 		check_t3b2_mac(sc);
2099 }
2100 
2101 static void
2102 touch_bars(device_t dev)
2103 {
2104 	/*
2105 	 * Don't enable yet
2106 	 */
2107 #if !defined(__LP64__) && 0
2108 	u32 v;
2109 
2110 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2111 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2112 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2113 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2114 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2115 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2116 #endif
2117 }
2118 
2119 static int
2120 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2121 {
2122 	uint8_t *buf;
2123 	int err = 0;
2124 	u32 aligned_offset, aligned_len, *p;
2125 	struct adapter *adapter = pi->adapter;
2126 
2127 
2128 	aligned_offset = offset & ~3;
2129 	aligned_len = (len + (offset & 3) + 3) & ~3;
2130 
2131 	if (aligned_offset != offset || aligned_len != len) {
2132 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2133 		if (!buf)
2134 			return (ENOMEM);
2135 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2136 		if (!err && aligned_len > 4)
2137 			err = t3_seeprom_read(adapter,
2138 					      aligned_offset + aligned_len - 4,
2139 					      (u32 *)&buf[aligned_len - 4]);
2140 		if (err)
2141 			goto out;
2142 		memcpy(buf + (offset & 3), data, len);
2143 	} else
2144 		buf = (uint8_t *)(uintptr_t)data;
2145 
2146 	err = t3_seeprom_wp(adapter, 0);
2147 	if (err)
2148 		goto out;
2149 
2150 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2151 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2152 		aligned_offset += 4;
2153 	}
2154 
2155 	if (!err)
2156 		err = t3_seeprom_wp(adapter, 1);
2157 out:
2158 	if (buf != data)
2159 		free(buf, M_DEVBUF);
2160 	return err;
2161 }
2162 
2163 
2164 static int
2165 in_range(int val, int lo, int hi)
2166 {
2167 	return val < 0 || (val <= hi && val >= lo);
2168 }
2169 
2170 static int
2171 cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2172 {
2173        return (0);
2174 }
2175 
2176 static int
2177 cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2178 {
2179        return (0);
2180 }
2181 
2182 static int
2183 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2184     int fflag, struct thread *td)
2185 {
2186 	int mmd, error = 0;
2187 	struct port_info *pi = dev->si_drv1;
2188 	adapter_t *sc = pi->adapter;
2189 
2190 #ifdef PRIV_SUPPORTED
2191 	if (priv_check(td, PRIV_DRIVER)) {
2192 		if (cxgb_debug)
2193 			printf("user does not have access to privileged ioctls\n");
2194 		return (EPERM);
2195 	}
2196 #else
2197 	if (suser(td)) {
2198 		if (cxgb_debug)
2199 			printf("user does not have access to privileged ioctls\n");
2200 		return (EPERM);
2201 	}
2202 #endif
2203 
2204 	switch (cmd) {
2205 	case SIOCGMIIREG: {
2206 		uint32_t val;
2207 		struct cphy *phy = &pi->phy;
2208 		struct mii_data *mid = (struct mii_data *)data;
2209 
2210 		if (!phy->mdio_read)
2211 			return (EOPNOTSUPP);
2212 		if (is_10G(sc)) {
2213 			mmd = mid->phy_id >> 8;
2214 			if (!mmd)
2215 				mmd = MDIO_DEV_PCS;
2216 			else if (mmd > MDIO_DEV_XGXS)
2217 				return (EINVAL);
2218 
2219 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2220 					     mid->reg_num, &val);
2221 		} else
2222 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2223 					     mid->reg_num & 0x1f, &val);
2224 		if (error == 0)
2225 			mid->val_out = val;
2226 		break;
2227 	}
2228 	case SIOCSMIIREG: {
2229 		struct cphy *phy = &pi->phy;
2230 		struct mii_data *mid = (struct mii_data *)data;
2231 
2232 		if (!phy->mdio_write)
2233 			return (EOPNOTSUPP);
2234 		if (is_10G(sc)) {
2235 			mmd = mid->phy_id >> 8;
2236 			if (!mmd)
2237 				mmd = MDIO_DEV_PCS;
2238 			else if (mmd > MDIO_DEV_XGXS)
2239 				return (EINVAL);
2240 
2241 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2242 					      mmd, mid->reg_num, mid->val_in);
2243 		} else
2244 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2245 					      mid->reg_num & 0x1f,
2246 					      mid->val_in);
2247 		break;
2248 	}
2249 	case CHELSIO_SETREG: {
2250 		struct ch_reg *edata = (struct ch_reg *)data;
2251 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2252 			return (EFAULT);
2253 		t3_write_reg(sc, edata->addr, edata->val);
2254 		break;
2255 	}
2256 	case CHELSIO_GETREG: {
2257 		struct ch_reg *edata = (struct ch_reg *)data;
2258 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2259 			return (EFAULT);
2260 		edata->val = t3_read_reg(sc, edata->addr);
2261 		break;
2262 	}
2263 	case CHELSIO_GET_SGE_CONTEXT: {
2264 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2265 		mtx_lock(&sc->sge.reg_lock);
2266 		switch (ecntxt->cntxt_type) {
2267 		case CNTXT_TYPE_EGRESS:
2268 			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2269 			    ecntxt->data);
2270 			break;
2271 		case CNTXT_TYPE_FL:
2272 			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
2273 			    ecntxt->data);
2274 			break;
2275 		case CNTXT_TYPE_RSP:
2276 			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2277 			    ecntxt->data);
2278 			break;
2279 		case CNTXT_TYPE_CQ:
2280 			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
2281 			    ecntxt->data);
2282 			break;
2283 		default:
2284 			error = EINVAL;
2285 			break;
2286 		}
2287 		mtx_unlock(&sc->sge.reg_lock);
2288 		break;
2289 	}
2290 	case CHELSIO_GET_SGE_DESC: {
2291 		struct ch_desc *edesc = (struct ch_desc *)data;
2292 		int ret;
2293 		if (edesc->queue_num >= SGE_QSETS * 6)
2294 			return (EINVAL);
2295 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2296 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2297 		if (ret < 0)
2298 			return (EINVAL);
2299 		edesc->size = ret;
2300 		break;
2301 	}
2302 	case CHELSIO_SET_QSET_PARAMS: {
2303 		struct qset_params *q;
2304 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2305 
2306 		if (t->qset_idx >= SGE_QSETS)
2307 			return (EINVAL);
2308 		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
2309 		    !in_range(t->cong_thres, 0, 255) ||
2310 		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
2311 			      MAX_TXQ_ENTRIES) ||
2312 		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
2313 			      MAX_TXQ_ENTRIES) ||
2314 		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
2315 			      MAX_CTRL_TXQ_ENTRIES) ||
2316 		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
2317 		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
2318 			      MAX_RX_JUMBO_BUFFERS) ||
2319 		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
2320 			return (EINVAL);
2321 		if ((sc->flags & FULL_INIT_DONE) &&
2322 		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2323 		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2324 		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2325 		     t->polling >= 0 || t->cong_thres >= 0))
2326 			return (EBUSY);
2327 
2328 		q = &sc->params.sge.qset[t->qset_idx];
2329 
2330 		if (t->rspq_size >= 0)
2331 			q->rspq_size = t->rspq_size;
2332 		if (t->fl_size[0] >= 0)
2333 			q->fl_size = t->fl_size[0];
2334 		if (t->fl_size[1] >= 0)
2335 			q->jumbo_size = t->fl_size[1];
2336 		if (t->txq_size[0] >= 0)
2337 			q->txq_size[0] = t->txq_size[0];
2338 		if (t->txq_size[1] >= 0)
2339 			q->txq_size[1] = t->txq_size[1];
2340 		if (t->txq_size[2] >= 0)
2341 			q->txq_size[2] = t->txq_size[2];
2342 		if (t->cong_thres >= 0)
2343 			q->cong_thres = t->cong_thres;
2344 		if (t->intr_lat >= 0) {
2345 			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2346 
2347 			q->coalesce_nsecs = t->intr_lat*1000;
2348 			t3_update_qset_coalesce(qs, q);
2349 		}
2350 		break;
2351 	}
2352 	case CHELSIO_GET_QSET_PARAMS: {
2353 		struct qset_params *q;
2354 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2355 
2356 		if (t->qset_idx >= SGE_QSETS)
2357 			return (EINVAL);
2358 
2359 		q = &(sc)->params.sge.qset[t->qset_idx];
2360 		t->rspq_size   = q->rspq_size;
2361 		t->txq_size[0] = q->txq_size[0];
2362 		t->txq_size[1] = q->txq_size[1];
2363 		t->txq_size[2] = q->txq_size[2];
2364 		t->fl_size[0]  = q->fl_size;
2365 		t->fl_size[1]  = q->jumbo_size;
2366 		t->polling     = q->polling;
2367 		t->intr_lat    = q->coalesce_nsecs / 1000;
2368 		t->cong_thres  = q->cong_thres;
2369 		break;
2370 	}
2371 	case CHELSIO_SET_QSET_NUM: {
2372 		struct ch_reg *edata = (struct ch_reg *)data;
2373 		unsigned int port_idx = pi->port_id;
2374 
2375 		if (sc->flags & FULL_INIT_DONE)
2376 			return (EBUSY);
2377 		if (edata->val < 1 ||
2378 		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2379 			return (EINVAL);
2380 		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2381 			return (EINVAL);
2382 		sc->port[port_idx].nqsets = edata->val;
2383 		sc->port[0].first_qset = 0;
2384 		/*
2385 		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2386 		 */
2387 		sc->port[1].first_qset = sc->port[0].nqsets;
2388 		break;
2389 	}
2390 	case CHELSIO_GET_QSET_NUM: {
2391 		struct ch_reg *edata = (struct ch_reg *)data;
2392 		edata->val = pi->nqsets;
2393 		break;
2394 	}
2395 #ifdef notyet
2396 	case CHELSIO_LOAD_FW:
2397 	case CHELSIO_GET_PM:
2398 	case CHELSIO_SET_PM:
2399 		return (EOPNOTSUPP);
2400 		break;
2401 #endif
2402 	case CHELSIO_SETMTUTAB: {
2403 		struct ch_mtus *m = (struct ch_mtus *)data;
2404 		int i;
2405 
2406 		if (!is_offload(sc))
2407 			return (EOPNOTSUPP);
2408 		if (offload_running(sc))
2409 			return (EBUSY);
2410 		if (m->nmtus != NMTUS)
2411 			return (EINVAL);
2412 		if (m->mtus[0] < 81)         /* accommodate SACK */
2413 			return (EINVAL);
2414 
2415 		/*
2416 		 * MTUs must be in ascending order
2417 		 */
2418 		for (i = 1; i < NMTUS; ++i)
2419 			if (m->mtus[i] < m->mtus[i - 1])
2420 				return (EINVAL);
2421 
2422 		memcpy(sc->params.mtus, m->mtus,
2423 		       sizeof(sc->params.mtus));
2424 		break;
2425 	}
2426 	case CHELSIO_GETMTUTAB: {
2427 		struct ch_mtus *m = (struct ch_mtus *)data;
2428 
2429 		if (!is_offload(sc))
2430 			return (EOPNOTSUPP);
2431 
2432 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2433 		m->nmtus = NMTUS;
2434 		break;
2435 	}
2436 	case CHELSIO_DEVUP:
2437 		if (!is_offload(sc))
2438 			return (EOPNOTSUPP);
2439 		return offload_open(pi);
2440 		break;
2441 	case CHELSIO_GET_MEM: {
2442 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2443 		struct mc7 *mem;
2444 		uint8_t *useraddr;
2445 		u64 buf[32];
2446 
2447 		if (!is_offload(sc))
2448 			return (EOPNOTSUPP);
2449 		if (!(sc->flags & FULL_INIT_DONE))
2450 			return (EIO);         /* need the memory controllers */
2451 		if ((t->addr & 0x7) || (t->len & 0x7))
2452 			return (EINVAL);
2453 		if (t->mem_id == MEM_CM)
2454 			mem = &sc->cm;
2455 		else if (t->mem_id == MEM_PMRX)
2456 			mem = &sc->pmrx;
2457 		else if (t->mem_id == MEM_PMTX)
2458 			mem = &sc->pmtx;
2459 		else
2460 			return (EINVAL);
2461 
2462 		/*
2463 		 * Version scheme:
2464 		 * bits 0..9: chip version
2465 		 * bits 10..15: chip revision
2466 		 */
2467 		t->version = 3 | (sc->params.rev << 10);
2468 
2469 		/*
2470 		 * Read 256 bytes at a time as len can be large and we don't
2471 		 * want to use huge intermediate buffers.
2472 		 */
2473 		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2474 		while (t->len) {
2475 			unsigned int chunk = min(t->len, sizeof(buf));
2476 
2477 			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2478 			if (error)
2479 				return (-error);
2480 			if (copyout(buf, useraddr, chunk))
2481 				return (EFAULT);
2482 			useraddr += chunk;
2483 			t->addr += chunk;
2484 			t->len -= chunk;
2485 		}
2486 		break;
2487 	}
2488 	case CHELSIO_READ_TCAM_WORD: {
2489 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2490 
2491 		if (!is_offload(sc))
2492 			return (EOPNOTSUPP);
2493 		if (!(sc->flags & FULL_INIT_DONE))
2494 			return (EIO);         /* need MC5 */
2495 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2496 		break;
2497 	}
2498 	case CHELSIO_SET_TRACE_FILTER: {
2499 		struct ch_trace *t = (struct ch_trace *)data;
2500 		const struct trace_params *tp;
2501 
2502 		tp = (const struct trace_params *)&t->sip;
2503 		if (t->config_tx)
2504 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2505 					       t->trace_tx);
2506 		if (t->config_rx)
2507 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2508 					       t->trace_rx);
2509 		break;
2510 	}
2511 	case CHELSIO_SET_PKTSCHED: {
2512 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2513 		if (sc->open_device_map == 0)
2514 			return (EAGAIN);
2515 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2516 		    p->binding);
2517 		break;
2518 	}
2519 	case CHELSIO_IFCONF_GETREGS: {
2520 		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2521 		int reglen = cxgb_get_regs_len();
2522 		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2523 		if (buf == NULL) {
2524 			return (ENOMEM);
2525 		} if (regs->len > reglen)
2526 			regs->len = reglen;
2527 		else if (regs->len < reglen) {
2528 			error = E2BIG;
2529 			goto done;
2530 		}
2531 		cxgb_get_regs(sc, regs, buf);
2532 		error = copyout(buf, regs->data, reglen);
2533 
2534 		done:
2535 		free(buf, M_DEVBUF);
2536 
2537 		break;
2538 	}
2539 	case CHELSIO_SET_HW_SCHED: {
2540 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2541 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2542 
2543 		if ((sc->flags & FULL_INIT_DONE) == 0)
2544 			return (EAGAIN);       /* need TP to be initialized */
2545 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2546 		    !in_range(t->channel, 0, 1) ||
2547 		    !in_range(t->kbps, 0, 10000000) ||
2548 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2549 		    !in_range(t->flow_ipg, 0,
2550 			      dack_ticks_to_usec(sc, 0x7ff)))
2551 			return (EINVAL);
2552 
2553 		if (t->kbps >= 0) {
2554 			error = t3_config_sched(sc, t->kbps, t->sched);
2555 			if (error < 0)
2556 				return (-error);
2557 		}
2558 		if (t->class_ipg >= 0)
2559 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2560 		if (t->flow_ipg >= 0) {
2561 			t->flow_ipg *= 1000;     /* us -> ns */
2562 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2563 		}
2564 		if (t->mode >= 0) {
2565 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2566 
2567 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2568 					 bit, t->mode ? bit : 0);
2569 		}
2570 		if (t->channel >= 0)
2571 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2572 					 1 << t->sched, t->channel << t->sched);
2573 		break;
2574 	}
2575 	default:
2576 		return (EOPNOTSUPP);
2577 		break;
2578 	}
2579 
2580 	return (error);
2581 }
2582 
2583 static __inline void
2584 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2585     unsigned int end)
2586 {
2587 	uint32_t *p = (uint32_t *)buf + start;
2588 
2589 	for ( ; start <= end; start += sizeof(uint32_t))
2590 		*p++ = t3_read_reg(ap, start);
2591 }
2592 
2593 #define T3_REGMAP_SIZE (3 * 1024)
2594 static int
2595 cxgb_get_regs_len(void)
2596 {
2597 	return T3_REGMAP_SIZE;
2598 }
2599 #undef T3_REGMAP_SIZE
2600 
2601 static void
2602 cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2603 {
2604 
2605 	/*
2606 	 * Version scheme:
2607 	 * bits 0..9: chip version
2608 	 * bits 10..15: chip revision
2609 	 * bit 31: set for PCIe cards
2610 	 */
2611 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2612 
2613 	/*
2614 	 * We skip the MAC statistics registers because they are clear-on-read.
2615 	 * Also reading multi-register stats would need to synchronize with the
2616 	 * periodic mac stats accumulation.  Hard to justify the complexity.
2617 	 */
2618 	memset(buf, 0, REGDUMP_SIZE);
2619 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2620 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2621 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2622 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2623 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2624 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2625 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2626 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2627 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2628 }
2629