xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 0bb263df82e129f5f8c82da6deb55dfe10daa677)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12 2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/rman.h>
44 #include <sys/ioccom.h>
45 #include <sys/mbuf.h>
46 #include <sys/linker.h>
47 #include <sys/firmware.h>
48 #include <sys/socket.h>
49 #include <sys/sockio.h>
50 #include <sys/smp.h>
51 #include <sys/sysctl.h>
52 #include <sys/queue.h>
53 #include <sys/taskqueue.h>
54 
55 #include <net/bpf.h>
56 #include <net/ethernet.h>
57 #include <net/if.h>
58 #include <net/if_arp.h>
59 #include <net/if_dl.h>
60 #include <net/if_media.h>
61 #include <net/if_types.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/if_ether.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip.h>
68 #include <netinet/tcp.h>
69 #include <netinet/udp.h>
70 
71 #include <dev/pci/pcireg.h>
72 #include <dev/pci/pcivar.h>
73 #include <dev/pci/pci_private.h>
74 
75 #ifdef CONFIG_DEFINED
76 #include <cxgb_include.h>
77 #else
78 #include <dev/cxgb/cxgb_include.h>
79 #endif
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_msix(adapter_t *, int);
86 static void cxgb_teardown_msix(adapter_t *);
87 static void cxgb_init(void *);
88 static void cxgb_init_locked(struct port_info *);
89 static void cxgb_stop_locked(struct port_info *);
90 static void cxgb_set_rxmode(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static void cxgb_start(struct ifnet *);
93 static void cxgb_start_proc(void *, int ncount);
94 static int cxgb_media_change(struct ifnet *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_down(struct adapter *sc);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102 
103 /* Attachment glue for the PCI controller end of the device.  Each port of
104  * the device is attached separately, as defined later.
105  */
106 static int cxgb_controller_probe(device_t);
107 static int cxgb_controller_attach(device_t);
108 static int cxgb_controller_detach(device_t);
109 static void cxgb_free(struct adapter *);
110 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111     unsigned int end);
112 static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
113 static int cxgb_get_regs_len(void);
114 static int offload_open(struct port_info *pi);
115 static int offload_close(struct toedev *tdev);
116 
117 
118 
119 static device_method_t cxgb_controller_methods[] = {
120 	DEVMETHOD(device_probe,		cxgb_controller_probe),
121 	DEVMETHOD(device_attach,	cxgb_controller_attach),
122 	DEVMETHOD(device_detach,	cxgb_controller_detach),
123 
124 	/* bus interface */
125 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
126 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
127 
128 	{ 0, 0 }
129 };
130 
131 static driver_t cxgb_controller_driver = {
132 	"cxgbc",
133 	cxgb_controller_methods,
134 	sizeof(struct adapter)
135 };
136 
137 static devclass_t	cxgb_controller_devclass;
138 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
139 
140 /*
141  * Attachment glue for the ports.  Attachment is done directly to the
142  * controller device.
143  */
144 static int cxgb_port_probe(device_t);
145 static int cxgb_port_attach(device_t);
146 static int cxgb_port_detach(device_t);
147 
148 static device_method_t cxgb_port_methods[] = {
149 	DEVMETHOD(device_probe,		cxgb_port_probe),
150 	DEVMETHOD(device_attach,	cxgb_port_attach),
151 	DEVMETHOD(device_detach,	cxgb_port_detach),
152 	{ 0, 0 }
153 };
154 
155 static driver_t cxgb_port_driver = {
156 	"cxgb",
157 	cxgb_port_methods,
158 	0
159 };
160 
161 static d_ioctl_t cxgb_extension_ioctl;
162 static d_open_t cxgb_extension_open;
163 static d_close_t cxgb_extension_close;
164 
165 static struct cdevsw cxgb_cdevsw = {
166        .d_version =    D_VERSION,
167        .d_flags =      0,
168        .d_open =       cxgb_extension_open,
169        .d_close =      cxgb_extension_close,
170        .d_ioctl =      cxgb_extension_ioctl,
171        .d_name =       "cxgb",
172 };
173 
174 static devclass_t	cxgb_port_devclass;
175 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
176 
177 #define SGE_MSIX_COUNT (SGE_QSETS + 1)
178 
179 extern int collapse_mbufs;
180 /*
181  * The driver uses the best interrupt scheme available on a platform in the
182  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
183  * of these schemes the driver may consider as follows:
184  *
185  * msi = 2: choose from among all three options
186  * msi = 1 : only consider MSI and pin interrupts
187  * msi = 0: force pin interrupts
188  */
189 static int msi_allowed = 2;
190 
191 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
192 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
193 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
194     "MSI-X, MSI, INTx selector");
195 
196 /*
197  * The driver enables offload as a default.
198  * To disable it, use ofld_disable = 1.
199  */
200 static int ofld_disable = 0;
201 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
202 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
203     "disable ULP offload");
204 
205 /*
206  * The driver uses an auto-queue algorithm by default.
207  * To disable it and force a single queue-set per port, use singleq = 1.
208  */
209 static int singleq = 1;
210 TUNABLE_INT("hw.cxgb.singleq", &singleq);
211 SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
212     "use a single queue-set per port");
213 
214 enum {
215 	MAX_TXQ_ENTRIES      = 16384,
216 	MAX_CTRL_TXQ_ENTRIES = 1024,
217 	MAX_RSPQ_ENTRIES     = 16384,
218 	MAX_RX_BUFFERS       = 16384,
219 	MAX_RX_JUMBO_BUFFERS = 16384,
220 	MIN_TXQ_ENTRIES      = 4,
221 	MIN_CTRL_TXQ_ENTRIES = 4,
222 	MIN_RSPQ_ENTRIES     = 32,
223 	MIN_FL_ENTRIES       = 32
224 };
225 
226 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
227 
228 /* Table for probing the cards.  The desc field isn't actually used */
229 struct cxgb_ident {
230 	uint16_t	vendor;
231 	uint16_t	device;
232 	int		index;
233 	char		*desc;
234 } cxgb_identifiers[] = {
235 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
236 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
237 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
238 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
239 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
240 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
241 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
242 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
243 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
244 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
245 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
246 	{0, 0, 0, NULL}
247 };
248 
249 static struct cxgb_ident *
250 cxgb_get_ident(device_t dev)
251 {
252 	struct cxgb_ident *id;
253 
254 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
255 		if ((id->vendor == pci_get_vendor(dev)) &&
256 		    (id->device == pci_get_device(dev))) {
257 			return (id);
258 		}
259 	}
260 	return (NULL);
261 }
262 
263 static const struct adapter_info *
264 cxgb_get_adapter_info(device_t dev)
265 {
266 	struct cxgb_ident *id;
267 	const struct adapter_info *ai;
268 
269 	id = cxgb_get_ident(dev);
270 	if (id == NULL)
271 		return (NULL);
272 
273 	ai = t3_get_adapter_info(id->index);
274 
275 	return (ai);
276 }
277 
278 static int
279 cxgb_controller_probe(device_t dev)
280 {
281 	const struct adapter_info *ai;
282 	char *ports, buf[80];
283 	int nports;
284 
285 	ai = cxgb_get_adapter_info(dev);
286 	if (ai == NULL)
287 		return (ENXIO);
288 
289 	nports = ai->nports0 + ai->nports1;
290 	if (nports == 1)
291 		ports = "port";
292 	else
293 		ports = "ports";
294 
295 	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
296 	device_set_desc_copy(dev, buf);
297 	return (BUS_PROBE_DEFAULT);
298 }
299 
300 static int
301 upgrade_fw(adapter_t *sc)
302 {
303 	char buf[32];
304 #ifdef FIRMWARE_LATEST
305 	const struct firmware *fw;
306 #else
307 	struct firmware *fw;
308 #endif
309 	int status;
310 
311 	snprintf(&buf[0], sizeof(buf), "t3fw%d%d%d", FW_VERSION_MAJOR,
312 	    FW_VERSION_MINOR, FW_VERSION_MICRO);
313 
314 	fw = firmware_get(buf);
315 
316 	if (fw == NULL) {
317 		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
318 		return (ENOENT);
319 	}
320 
321 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
322 
323 	firmware_put(fw, FIRMWARE_UNLOAD);
324 
325 	return (status);
326 }
327 
328 static int
329 cxgb_controller_attach(device_t dev)
330 {
331 	device_t child;
332 	const struct adapter_info *ai;
333 	struct adapter *sc;
334 	int i, reg, msi_needed, error = 0;
335 	uint32_t vers;
336 	int port_qsets = 1;
337 
338 	sc = device_get_softc(dev);
339 	sc->dev = dev;
340 	sc->msi_count = 0;
341 
342 	/* find the PCIe link width and set max read request to 4KB*/
343 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
344 		uint16_t lnk, pectl;
345 		lnk = pci_read_config(dev, reg + 0x12, 2);
346 		sc->link_width = (lnk >> 4) & 0x3f;
347 
348 		pectl = pci_read_config(dev, reg + 0x8, 2);
349 		pectl = (pectl & ~0x7000) | (5 << 12);
350 		pci_write_config(dev, reg + 0x8, pectl, 2);
351 	}
352 	if (sc->link_width != 0 && sc->link_width <= 4) {
353 		device_printf(sc->dev,
354 		    "PCIe x%d Link, expect reduced performance\n",
355 		    sc->link_width);
356 	}
357 
358 	pci_enable_busmaster(dev);
359 	/*
360 	 * Allocate the registers and make them available to the driver.
361 	 * The registers that we care about for NIC mode are in BAR 0
362 	 */
363 	sc->regs_rid = PCIR_BAR(0);
364 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
365 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
366 		device_printf(dev, "Cannot allocate BAR\n");
367 		return (ENXIO);
368 	}
369 
370 	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
371 	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
372 	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
373 	mtx_init(&sc->elmer_lock, "cxgb elmer", NULL, MTX_DEF);
374 
375 	sc->bt = rman_get_bustag(sc->regs_res);
376 	sc->bh = rman_get_bushandle(sc->regs_res);
377 	sc->mmio_len = rman_get_size(sc->regs_res);
378 
379 	ai = cxgb_get_adapter_info(dev);
380 	if (t3_prep_adapter(sc, ai, 1) < 0) {
381 		printf("prep adapter failed\n");
382 		error = ENODEV;
383 		goto out;
384 	}
385 	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
386 	 * enough messages for the queue sets.  If that fails, try falling
387 	 * back to MSI.  If that fails, then try falling back to the legacy
388 	 * interrupt pin model.
389 	 */
390 #ifdef MSI_SUPPORTED
391 
392 	sc->msix_regs_rid = 0x20;
393 	if ((msi_allowed >= 2) &&
394 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
395 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
396 
397 		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
398 
399 		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
400 		    (sc->msi_count != msi_needed)) {
401 			device_printf(dev, "msix allocation failed - msi_count = %d"
402 			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
403 			    msi_needed, error);
404 			sc->msi_count = 0;
405 			pci_release_msi(dev);
406 			bus_release_resource(dev, SYS_RES_MEMORY,
407 			    sc->msix_regs_rid, sc->msix_regs_res);
408 			sc->msix_regs_res = NULL;
409 		} else {
410 			sc->flags |= USING_MSIX;
411 			sc->cxgb_intr = t3_intr_msix;
412 		}
413 	}
414 
415 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
416 		sc->msi_count = 1;
417 		if (pci_alloc_msi(dev, &sc->msi_count)) {
418 			device_printf(dev, "alloc msi failed - will try INTx\n");
419 			sc->msi_count = 0;
420 			pci_release_msi(dev);
421 		} else {
422 			sc->flags |= USING_MSI;
423 			sc->irq_rid = 1;
424 			sc->cxgb_intr = t3_intr_msi;
425 		}
426 	}
427 #endif
428 	if (sc->msi_count == 0) {
429 		device_printf(dev, "using line interrupts\n");
430 		sc->irq_rid = 0;
431 		sc->cxgb_intr = t3b_intr;
432 	}
433 
434 
435 	/* Create a private taskqueue thread for handling driver events */
436 #ifdef TASKQUEUE_CURRENT
437 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
438 	    taskqueue_thread_enqueue, &sc->tq);
439 #else
440 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
441 	    taskqueue_thread_enqueue, &sc->tq);
442 #endif
443 	if (sc->tq == NULL) {
444 		device_printf(dev, "failed to allocate controller task queue\n");
445 		goto out;
446 	}
447 
448 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
449 	    device_get_nameunit(dev));
450 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
451 
452 
453 	/* Create a periodic callout for checking adapter status */
454 	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
455 
456 	if (t3_check_fw_version(sc) != 0) {
457 		/*
458 		 * Warn user that a firmware update will be attempted in init.
459 		 */
460 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
461 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
462 		sc->flags &= ~FW_UPTODATE;
463 	} else {
464 		sc->flags |= FW_UPTODATE;
465 	}
466 
467 	if ((sc->flags & USING_MSIX) && !singleq)
468 		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
469 
470 	/*
471 	 * Create a child device for each MAC.  The ethernet attachment
472 	 * will be done in these children.
473 	 */
474 	for (i = 0; i < (sc)->params.nports; i++) {
475 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
476 			device_printf(dev, "failed to add child port\n");
477 			error = EINVAL;
478 			goto out;
479 		}
480 		sc->portdev[i] = child;
481 		sc->port[i].adapter = sc;
482 		sc->port[i].nqsets = port_qsets;
483 		sc->port[i].first_qset = i*port_qsets;
484 		sc->port[i].port = i;
485 		device_set_softc(child, &sc->port[i]);
486 	}
487 	if ((error = bus_generic_attach(dev)) != 0)
488 		goto out;
489 
490 	/*
491 	 * XXX need to poll for link status
492 	 */
493 	sc->params.stats_update_period = 1;
494 
495 	/* initialize sge private state */
496 	t3_sge_init_adapter(sc);
497 
498 	t3_led_ready(sc);
499 
500 	cxgb_offload_init();
501 	if (is_offload(sc)) {
502 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
503 		cxgb_adapter_ofld(sc);
504         }
505 	error = t3_get_fw_version(sc, &vers);
506 	if (error)
507 		goto out;
508 
509 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
510 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
511 	    G_FW_VERSION_MICRO(vers));
512 
513 	t3_add_sysctls(sc);
514 out:
515 	if (error)
516 		cxgb_free(sc);
517 
518 	return (error);
519 }
520 
521 static int
522 cxgb_controller_detach(device_t dev)
523 {
524 	struct adapter *sc;
525 
526 	sc = device_get_softc(dev);
527 
528 	cxgb_free(sc);
529 
530 	return (0);
531 }
532 
533 static void
534 cxgb_free(struct adapter *sc)
535 {
536 	int i;
537 
538 	cxgb_down(sc);
539 
540 #ifdef MSI_SUPPORTED
541 	if (sc->flags & (USING_MSI | USING_MSIX)) {
542 		device_printf(sc->dev, "releasing msi message(s)\n");
543 		pci_release_msi(sc->dev);
544 	} else {
545 		device_printf(sc->dev, "no msi message to release\n");
546 	}
547 #endif
548 	if (sc->msix_regs_res != NULL) {
549 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
550 		    sc->msix_regs_res);
551 	}
552 
553 	/*
554 	 * XXX need to drain the ifq by hand until
555 	 * it is taught about mbuf iovecs
556 	 */
557 	callout_drain(&sc->cxgb_tick_ch);
558 
559 	t3_sge_deinit_sw(sc);
560 
561 	if (sc->tq != NULL) {
562 		taskqueue_drain(sc->tq, &sc->ext_intr_task);
563 		taskqueue_free(sc->tq);
564 	}
565 
566 	for (i = 0; i < (sc)->params.nports; ++i) {
567 		if (sc->portdev[i] != NULL)
568 			device_delete_child(sc->dev, sc->portdev[i]);
569 	}
570 
571 	bus_generic_detach(sc->dev);
572 #ifdef notyet
573 	if (is_offload(sc)) {
574 		cxgb_adapter_unofld(sc);
575 		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
576 			offload_close(&sc->tdev);
577 	}
578 #endif
579 	t3_free_sge_resources(sc);
580 	t3_sge_free(sc);
581 
582 	if (sc->regs_res != NULL)
583 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
584 		    sc->regs_res);
585 
586 	mtx_destroy(&sc->mdio_lock);
587 	mtx_destroy(&sc->sge.reg_lock);
588 	mtx_destroy(&sc->lock);
589 
590 	return;
591 }
592 
593 /**
594  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
595  *	@sc: the controller softc
596  *
597  *	Determines how many sets of SGE queues to use and initializes them.
598  *	We support multiple queue sets per port if we have MSI-X, otherwise
599  *	just one queue set per port.
600  */
601 static int
602 setup_sge_qsets(adapter_t *sc)
603 {
604 	int i, j, err, irq_idx, qset_idx;
605 	u_int ntxq = SGE_TXQ_PER_SET;
606 
607 	if ((err = t3_sge_alloc(sc)) != 0) {
608 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
609 		return (err);
610 	}
611 
612 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
613 		irq_idx = -1;
614 	else
615 		irq_idx = 0;
616 
617 	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
618 		struct port_info *pi = &sc->port[i];
619 
620 		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
621 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
622 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
623 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
624 			if (err) {
625 				t3_free_sge_resources(sc);
626 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
627 				return (err);
628 			}
629 		}
630 	}
631 
632 	return (0);
633 }
634 
635 static void
636 cxgb_teardown_msix(adapter_t *sc)
637 {
638 	int i, nqsets;
639 
640 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
641 		nqsets += sc->port[i].nqsets;
642 
643 	for (i = 0; i < nqsets; i++) {
644 		if (sc->msix_intr_tag[i] != NULL) {
645 			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
646 			    sc->msix_intr_tag[i]);
647 			sc->msix_intr_tag[i] = NULL;
648 		}
649 		if (sc->msix_irq_res[i] != NULL) {
650 			bus_release_resource(sc->dev, SYS_RES_IRQ,
651 			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
652 			sc->msix_irq_res[i] = NULL;
653 		}
654 	}
655 }
656 
657 static int
658 cxgb_setup_msix(adapter_t *sc, int msix_count)
659 {
660 	int i, j, k, nqsets, rid;
661 
662 	/* The first message indicates link changes and error conditions */
663 	sc->irq_rid = 1;
664 	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
665 	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
666 		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
667 		return (EINVAL);
668 	}
669 
670 	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
671 #ifdef INTR_FILTERS
672 			NULL,
673 #endif
674 		cxgb_async_intr, sc, &sc->intr_tag)) {
675 		device_printf(sc->dev, "Cannot set up interrupt\n");
676 		return (EINVAL);
677 	}
678 	for (i = k = 0; i < (sc)->params.nports; i++) {
679 		nqsets = sc->port[i].nqsets;
680 		for (j = 0; j < nqsets; j++, k++) {
681 			struct sge_qset *qs = &sc->sge.qs[k];
682 
683 			rid = k + 2;
684 			if (cxgb_debug)
685 				printf("rid=%d ", rid);
686 			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
687 			    sc->dev, SYS_RES_IRQ, &rid,
688 			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
689 				device_printf(sc->dev, "Cannot allocate "
690 				    "interrupt for message %d\n", rid);
691 				return (EINVAL);
692 			}
693 			sc->msix_irq_rid[k] = rid;
694 			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
695 			    INTR_MPSAFE|INTR_TYPE_NET,
696 #ifdef INTR_FILTERS
697 			NULL,
698 #endif
699 				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
700 				device_printf(sc->dev, "Cannot set up "
701 				    "interrupt for message %d\n", rid);
702 				return (EINVAL);
703 			}
704 		}
705 	}
706 
707 
708 	return (0);
709 }
710 
711 static int
712 cxgb_port_probe(device_t dev)
713 {
714 	struct port_info *p;
715 	char buf[80];
716 
717 	p = device_get_softc(dev);
718 
719 	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
720 	device_set_desc_copy(dev, buf);
721 	return (0);
722 }
723 
724 
725 static int
726 cxgb_makedev(struct port_info *pi)
727 {
728 
729 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
730 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
731 
732 	if (pi->port_cdev == NULL)
733 		return (ENOMEM);
734 
735 	pi->port_cdev->si_drv1 = (void *)pi;
736 
737 	return (0);
738 }
739 
740 
741 #ifdef TSO_SUPPORTED
742 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
743 /* Don't enable TSO6 yet */
744 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
745 #else
746 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
747 /* Don't enable TSO6 yet */
748 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
749 #define IFCAP_TSO4 0x0
750 #define CSUM_TSO   0x0
751 #endif
752 
753 
754 static int
755 cxgb_port_attach(device_t dev)
756 {
757 	struct port_info *p;
758 	struct ifnet *ifp;
759 	int err, media_flags;
760 	char buf[64];
761 
762 	p = device_get_softc(dev);
763 
764 	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
765 	mtx_init(&p->lock, buf, 0, MTX_DEF);
766 
767 	/* Allocate an ifnet object and set it up */
768 	ifp = p->ifp = if_alloc(IFT_ETHER);
769 	if (ifp == NULL) {
770 		device_printf(dev, "Cannot allocate ifnet\n");
771 		return (ENOMEM);
772 	}
773 
774 	/*
775 	 * Note that there is currently no watchdog timer.
776 	 */
777 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
778 	ifp->if_init = cxgb_init;
779 	ifp->if_softc = p;
780 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
781 	ifp->if_ioctl = cxgb_ioctl;
782 	ifp->if_start = cxgb_start;
783 	ifp->if_timer = 0;	/* Disable ifnet watchdog */
784 	ifp->if_watchdog = NULL;
785 
786 	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
787 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
788 	IFQ_SET_READY(&ifp->if_snd);
789 
790 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
791 	ifp->if_capabilities |= CXGB_CAP;
792 	ifp->if_capenable |= CXGB_CAP_ENABLE;
793 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
794 
795 	ether_ifattach(ifp, p->hw_addr);
796 #ifdef DEFAULT_JUMBO
797 	ifp->if_mtu = 9000;
798 #endif
799 	if ((err = cxgb_makedev(p)) != 0) {
800 		printf("makedev failed %d\n", err);
801 		return (err);
802 	}
803 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
804 	    cxgb_media_status);
805 
806 	if (!strcmp(p->port_type->desc, "10GBASE-CX4")) {
807 		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
808 	} else if (!strcmp(p->port_type->desc, "10GBASE-SR")) {
809 		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
810 	} else if (!strcmp(p->port_type->desc, "10GBASE-XR")) {
811 		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
812 	} else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) {
813 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
814 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
815 			    0, NULL);
816 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
817 			    0, NULL);
818 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
819 			    0, NULL);
820 		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
821 			    0, NULL);
822 		media_flags = 0;
823 	} else {
824 	        printf("unsupported media type %s\n", p->port_type->desc);
825 		return (ENXIO);
826 	}
827 	if (media_flags) {
828 		ifmedia_add(&p->media, media_flags, 0, NULL);
829 		ifmedia_set(&p->media, media_flags);
830 	} else {
831 		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
832 		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
833 	}
834 
835 
836 	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
837 #ifdef TASKQUEUE_CURRENT
838 	/* Create a port for handling TX without starvation */
839 	p->tq = taskqueue_create(buf, M_NOWAIT,
840 	    taskqueue_thread_enqueue, &p->tq);
841 #else
842 	/* Create a port for handling TX without starvation */
843 	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
844 	    taskqueue_thread_enqueue, &p->tq);
845 #endif
846 
847 	if (p->tq == NULL) {
848 		device_printf(dev, "failed to allocate port task queue\n");
849 		return (ENOMEM);
850 	}
851 	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
852 	    device_get_nameunit(dev));
853 	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
854 
855 	t3_sge_init_port(p);
856 
857 	return (0);
858 }
859 
860 static int
861 cxgb_port_detach(device_t dev)
862 {
863 	struct port_info *p;
864 
865 	p = device_get_softc(dev);
866 
867 	PORT_LOCK(p);
868 	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
869 		cxgb_stop_locked(p);
870 	PORT_UNLOCK(p);
871 
872 	mtx_destroy(&p->lock);
873 	if (p->tq != NULL) {
874 		taskqueue_drain(p->tq, &p->start_task);
875 		taskqueue_free(p->tq);
876 		p->tq = NULL;
877 	}
878 
879 	ether_ifdetach(p->ifp);
880 	if_free(p->ifp);
881 
882 	if (p->port_cdev != NULL)
883 		destroy_dev(p->port_cdev);
884 
885 	return (0);
886 }
887 
888 void
889 t3_fatal_err(struct adapter *sc)
890 {
891 	u_int fw_status[4];
892 
893 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
894 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
895 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
896 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
897 }
898 
899 int
900 t3_os_find_pci_capability(adapter_t *sc, int cap)
901 {
902 	device_t dev;
903 	struct pci_devinfo *dinfo;
904 	pcicfgregs *cfg;
905 	uint32_t status;
906 	uint8_t ptr;
907 
908 	dev = sc->dev;
909 	dinfo = device_get_ivars(dev);
910 	cfg = &dinfo->cfg;
911 
912 	status = pci_read_config(dev, PCIR_STATUS, 2);
913 	if (!(status & PCIM_STATUS_CAPPRESENT))
914 		return (0);
915 
916 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
917 	case 0:
918 	case 1:
919 		ptr = PCIR_CAP_PTR;
920 		break;
921 	case 2:
922 		ptr = PCIR_CAP_PTR_2;
923 		break;
924 	default:
925 		return (0);
926 		break;
927 	}
928 	ptr = pci_read_config(dev, ptr, 1);
929 
930 	while (ptr != 0) {
931 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
932 			return (ptr);
933 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
934 	}
935 
936 	return (0);
937 }
938 
939 int
940 t3_os_pci_save_state(struct adapter *sc)
941 {
942 	device_t dev;
943 	struct pci_devinfo *dinfo;
944 
945 	dev = sc->dev;
946 	dinfo = device_get_ivars(dev);
947 
948 	pci_cfg_save(dev, dinfo, 0);
949 	return (0);
950 }
951 
952 int
953 t3_os_pci_restore_state(struct adapter *sc)
954 {
955 	device_t dev;
956 	struct pci_devinfo *dinfo;
957 
958 	dev = sc->dev;
959 	dinfo = device_get_ivars(dev);
960 
961 	pci_cfg_restore(dev, dinfo);
962 	return (0);
963 }
964 
965 /**
966  *	t3_os_link_changed - handle link status changes
967  *	@adapter: the adapter associated with the link change
968  *	@port_id: the port index whose limk status has changed
969  *	@link_stat: the new status of the link
970  *	@speed: the new speed setting
971  *	@duplex: the new duplex setting
972  *	@fc: the new flow-control setting
973  *
974  *	This is the OS-dependent handler for link status changes.  The OS
975  *	neutral handler takes care of most of the processing for these events,
976  *	then calls this handler for any OS-specific processing.
977  */
978 void
979 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
980      int duplex, int fc)
981 {
982 	struct port_info *pi = &adapter->port[port_id];
983 	struct cmac *mac = &adapter->port[port_id].mac;
984 
985 	if ((pi->ifp->if_flags & IFF_UP) == 0)
986 		return;
987 
988 	if (link_status) {
989 		t3_mac_enable(mac, MAC_DIRECTION_RX);
990 		if_link_state_change(pi->ifp, LINK_STATE_UP);
991 	} else {
992 		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
993 		pi->phy.ops->power_down(&pi->phy, 1);
994 		t3_mac_disable(mac, MAC_DIRECTION_RX);
995 		t3_link_start(&pi->phy, mac, &pi->link_config);
996 	}
997 }
998 
999 
1000 /*
1001  * Interrupt-context handler for external (PHY) interrupts.
1002  */
1003 void
1004 t3_os_ext_intr_handler(adapter_t *sc)
1005 {
1006 	if (cxgb_debug)
1007 		printf("t3_os_ext_intr_handler\n");
1008 	/*
1009 	 * Schedule a task to handle external interrupts as they may be slow
1010 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1011 	 * interrupts in the meantime and let the task reenable them when
1012 	 * it's done.
1013 	 */
1014 	ADAPTER_LOCK(sc);
1015 	if (sc->slow_intr_mask) {
1016 		sc->slow_intr_mask &= ~F_T3DBG;
1017 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1018 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1019 	}
1020 	ADAPTER_UNLOCK(sc);
1021 }
1022 
1023 void
1024 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1025 {
1026 
1027 	/*
1028 	 * The ifnet might not be allocated before this gets called,
1029 	 * as this is called early on in attach by t3_prep_adapter
1030 	 * save the address off in the port structure
1031 	 */
1032 	if (cxgb_debug)
1033 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1034 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1035 }
1036 
1037 /**
1038  *	link_start - enable a port
1039  *	@p: the port to enable
1040  *
1041  *	Performs the MAC and PHY actions needed to enable a port.
1042  */
1043 static void
1044 cxgb_link_start(struct port_info *p)
1045 {
1046 	struct ifnet *ifp;
1047 	struct t3_rx_mode rm;
1048 	struct cmac *mac = &p->mac;
1049 
1050 	ifp = p->ifp;
1051 
1052 	t3_init_rx_mode(&rm, p);
1053 	t3_mac_reset(mac);
1054 	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1055 	t3_mac_set_address(mac, 0, p->hw_addr);
1056 	t3_mac_set_rx_mode(mac, &rm);
1057 	t3_link_start(&p->phy, mac, &p->link_config);
1058 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1059 }
1060 
1061 /**
1062  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1063  *	@adap: the adapter
1064  *
1065  *	Sets up RSS to distribute packets to multiple receive queues.  We
1066  *	configure the RSS CPU lookup table to distribute to the number of HW
1067  *	receive queues, and the response queue lookup table to narrow that
1068  *	down to the response queues actually configured for each port.
1069  *	We always configure the RSS mapping for two ports since the mapping
1070  *	table has plenty of entries.
1071  */
1072 static void
1073 setup_rss(adapter_t *adap)
1074 {
1075 	int i;
1076 	u_int nq0 = adap->port[0].nqsets;
1077 	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1078 	uint8_t cpus[SGE_QSETS + 1];
1079 	uint16_t rspq_map[RSS_TABLE_SIZE];
1080 
1081 	for (i = 0; i < SGE_QSETS; ++i)
1082 		cpus[i] = i;
1083 	cpus[SGE_QSETS] = 0xff;
1084 
1085 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1086 		rspq_map[i] = i % nq0;
1087 		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1088 	}
1089 
1090 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1091 	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1092 	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1093 }
1094 
1095 /*
1096  * Sends an mbuf to an offload queue driver
1097  * after dealing with any active network taps.
1098  */
1099 static inline int
1100 offload_tx(struct toedev *tdev, struct mbuf *m)
1101 {
1102 	int ret;
1103 
1104 	critical_enter();
1105 	ret = t3_offload_tx(tdev, m);
1106 	critical_exit();
1107 	return (ret);
1108 }
1109 
1110 static int
1111 write_smt_entry(struct adapter *adapter, int idx)
1112 {
1113 	struct port_info *pi = &adapter->port[idx];
1114 	struct cpl_smt_write_req *req;
1115 	struct mbuf *m;
1116 
1117 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1118 		return (ENOMEM);
1119 
1120 	req = mtod(m, struct cpl_smt_write_req *);
1121 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1122 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1123 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1124 	req->iff = idx;
1125 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1126 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1127 
1128 	m_set_priority(m, 1);
1129 
1130 	offload_tx(&adapter->tdev, m);
1131 
1132 	return (0);
1133 }
1134 
1135 static int
1136 init_smt(struct adapter *adapter)
1137 {
1138 	int i;
1139 
1140 	for_each_port(adapter, i)
1141 		write_smt_entry(adapter, i);
1142 	return 0;
1143 }
1144 
1145 static void
1146 init_port_mtus(adapter_t *adapter)
1147 {
1148 	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1149 
1150 	if (adapter->port[1].ifp)
1151 		mtus |= adapter->port[1].ifp->if_mtu << 16;
1152 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1153 }
1154 
1155 static void
1156 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1157 			      int hi, int port)
1158 {
1159 	struct mbuf *m;
1160 	struct mngt_pktsched_wr *req;
1161 
1162 	m = m_gethdr(M_NOWAIT, MT_DATA);
1163 	if (m) {
1164 		req = mtod(m, struct mngt_pktsched_wr *);
1165 		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1166 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1167 		req->sched = sched;
1168 		req->idx = qidx;
1169 		req->min = lo;
1170 		req->max = hi;
1171 		req->binding = port;
1172 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1173 		t3_mgmt_tx(adap, m);
1174 	}
1175 }
1176 
1177 static void
1178 bind_qsets(adapter_t *sc)
1179 {
1180 	int i, j;
1181 
1182 	for (i = 0; i < (sc)->params.nports; ++i) {
1183 		const struct port_info *pi = adap2pinfo(sc, i);
1184 
1185 		for (j = 0; j < pi->nqsets; ++j)
1186 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1187 					  -1, i);
1188 	}
1189 }
1190 
1191 /**
1192  *	cxgb_up - enable the adapter
1193  *	@adap: adapter being enabled
1194  *
1195  *	Called when the first port is enabled, this function performs the
1196  *	actions necessary to make an adapter operational, such as completing
1197  *	the initialization of HW modules, and enabling interrupts.
1198  *
1199  */
1200 static int
1201 cxgb_up(struct adapter *sc)
1202 {
1203 	int err = 0;
1204 
1205 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1206 
1207 		if ((sc->flags & FW_UPTODATE) == 0)
1208 			err = upgrade_fw(sc);
1209 
1210 		if (err)
1211 			goto out;
1212 
1213 		err = t3_init_hw(sc, 0);
1214 		if (err)
1215 			goto out;
1216 
1217 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1218 
1219 		err = setup_sge_qsets(sc);
1220 		if (err)
1221 			goto out;
1222 
1223 		setup_rss(sc);
1224 		sc->flags |= FULL_INIT_DONE;
1225 	}
1226 
1227 	t3_intr_clear(sc);
1228 
1229 	/* If it's MSI or INTx, allocate a single interrupt for everything */
1230 	if ((sc->flags & USING_MSIX) == 0) {
1231 		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1232 		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1233 			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
1234 			err = EINVAL;
1235 			goto out;
1236 		}
1237 		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1238 
1239 		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1240 #ifdef INTR_FILTERS
1241 			NULL,
1242 #endif
1243 			sc->cxgb_intr, sc, &sc->intr_tag)) {
1244 			device_printf(sc->dev, "Cannot set up interrupt\n");
1245 			err = EINVAL;
1246 			goto irq_err;
1247 		}
1248 	} else {
1249 		cxgb_setup_msix(sc, sc->msi_count);
1250 	}
1251 
1252 	t3_sge_start(sc);
1253 	t3_intr_enable(sc);
1254 
1255 	if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1256 		bind_qsets(sc);
1257 	sc->flags |= QUEUES_BOUND;
1258 out:
1259 	return (err);
1260 irq_err:
1261 	CH_ERR(sc, "request_irq failed, err %d\n", err);
1262 	goto out;
1263 }
1264 
1265 
1266 /*
1267  * Release resources when all the ports and offloading have been stopped.
1268  */
1269 static void
1270 cxgb_down(struct adapter *sc)
1271 {
1272 	int i;
1273 
1274 	t3_sge_stop(sc);
1275 	ADAPTER_LOCK(sc);
1276 	t3_intr_disable(sc);
1277 	ADAPTER_UNLOCK(sc);
1278 
1279 
1280 	if (sc->intr_tag != NULL) {
1281 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1282 		sc->intr_tag = NULL;
1283 	}
1284 	if (sc->irq_res != NULL) {
1285 		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1286 		    sc->irq_rid, sc->irq_res);
1287 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1288 		    sc->irq_res);
1289 		sc->irq_res = NULL;
1290 	}
1291 
1292 	if (sc->flags & USING_MSIX)
1293 		cxgb_teardown_msix(sc);
1294 
1295 	callout_drain(&sc->sge_timer_ch);
1296 	if (sc->tq != NULL)
1297 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1298 	for (i = 0; i < sc->params.nports; i++)
1299 		if (sc->port[i].tq != NULL)
1300 			taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
1301 
1302 }
1303 
1304 static int
1305 offload_open(struct port_info *pi)
1306 {
1307 	struct adapter *adapter = pi->adapter;
1308 	struct toedev *tdev = TOEDEV(pi->ifp);
1309 	int adap_up = adapter->open_device_map & PORT_MASK;
1310 	int err = 0;
1311 
1312 	if (atomic_cmpset_int(&adapter->open_device_map,
1313 		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1314 		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1315 		return (0);
1316 
1317 	ADAPTER_LOCK(pi->adapter);
1318 	if (!adap_up)
1319 		err = cxgb_up(adapter);
1320 	ADAPTER_UNLOCK(pi->adapter);
1321 	if (err < 0)
1322 		return (err);
1323 
1324 	t3_tp_set_offload_mode(adapter, 1);
1325 	tdev->lldev = adapter->port[0].ifp;
1326 	err = cxgb_offload_activate(adapter);
1327 	if (err)
1328 		goto out;
1329 
1330 	init_port_mtus(adapter);
1331 	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1332 		     adapter->params.b_wnd,
1333 		     adapter->params.rev == 0 ?
1334 		       adapter->port[0].ifp->if_mtu : 0xffff);
1335 	init_smt(adapter);
1336 
1337 	/* Call back all registered clients */
1338 	cxgb_add_clients(tdev);
1339 
1340 out:
1341 	/* restore them in case the offload module has changed them */
1342 	if (err) {
1343 		t3_tp_set_offload_mode(adapter, 0);
1344 		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1345 		cxgb_set_dummy_ops(tdev);
1346 	}
1347 	return (err);
1348 }
1349 
1350 static int
1351 offload_close(struct toedev *tdev)
1352 {
1353 	struct adapter *adapter = tdev2adap(tdev);
1354 
1355 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1356 		return (0);
1357 
1358 	/* Call back all registered clients */
1359 	cxgb_remove_clients(tdev);
1360 	tdev->lldev = NULL;
1361 	cxgb_set_dummy_ops(tdev);
1362 	t3_tp_set_offload_mode(adapter, 0);
1363 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1364 
1365 	ADAPTER_LOCK(adapter);
1366 	if (!adapter->open_device_map)
1367 		cxgb_down(adapter);
1368 	ADAPTER_UNLOCK(adapter);
1369 
1370 	cxgb_offload_deactivate(adapter);
1371 	return (0);
1372 }
1373 
1374 static void
1375 cxgb_init(void *arg)
1376 {
1377 	struct port_info *p = arg;
1378 
1379 	PORT_LOCK(p);
1380 	cxgb_init_locked(p);
1381 	PORT_UNLOCK(p);
1382 }
1383 
1384 static void
1385 cxgb_init_locked(struct port_info *p)
1386 {
1387 	struct ifnet *ifp;
1388 	adapter_t *sc = p->adapter;
1389 	int err;
1390 
1391 	mtx_assert(&p->lock, MA_OWNED);
1392 	ifp = p->ifp;
1393 
1394 	ADAPTER_LOCK(p->adapter);
1395 	if ((sc->open_device_map == 0) && ((err = cxgb_up(sc)) < 0)) {
1396 		ADAPTER_UNLOCK(p->adapter);
1397 		cxgb_stop_locked(p);
1398 		return;
1399 	}
1400 	if (p->adapter->open_device_map == 0)
1401 		t3_intr_clear(sc);
1402 
1403 	setbit(&p->adapter->open_device_map, p->port);
1404 	ADAPTER_UNLOCK(p->adapter);
1405 
1406 	if (is_offload(sc) && !ofld_disable) {
1407 		err = offload_open(p);
1408 		if (err)
1409 			log(LOG_WARNING,
1410 			    "Could not initialize offload capabilities\n");
1411 	}
1412 	cxgb_link_start(p);
1413 	t3_link_changed(sc, p->port);
1414 	ifp->if_baudrate = p->link_config.speed * 1000000;
1415 
1416 	t3_port_intr_enable(sc, p->port);
1417 
1418 	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1419 	    cxgb_tick, sc);
1420 
1421 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1422 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1423 }
1424 
1425 static void
1426 cxgb_set_rxmode(struct port_info *p)
1427 {
1428 	struct t3_rx_mode rm;
1429 	struct cmac *mac = &p->mac;
1430 
1431 	mtx_assert(&p->lock, MA_OWNED);
1432 
1433 	t3_init_rx_mode(&rm, p);
1434 	t3_mac_set_rx_mode(mac, &rm);
1435 }
1436 
1437 static void
1438 cxgb_stop_locked(struct port_info *p)
1439 {
1440 	struct ifnet *ifp;
1441 
1442 	mtx_assert(&p->lock, MA_OWNED);
1443 	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1444 
1445 	ifp = p->ifp;
1446 
1447 	t3_port_intr_disable(p->adapter, p->port);
1448 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1449 	p->phy.ops->power_down(&p->phy, 1);
1450 	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1451 
1452 	ADAPTER_LOCK(p->adapter);
1453 	clrbit(&p->adapter->open_device_map, p->port);
1454 	/*
1455 	 * XXX cancel check_task
1456 	 */
1457 	if (p->adapter->open_device_map == 0)
1458 		cxgb_down(p->adapter);
1459 	ADAPTER_UNLOCK(p->adapter);
1460 }
1461 
1462 static int
1463 cxgb_set_mtu(struct port_info *p, int mtu)
1464 {
1465 	struct ifnet *ifp = p->ifp;
1466 	int error = 0;
1467 
1468 	if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
1469 		error = EINVAL;
1470 	else if (ifp->if_mtu != mtu) {
1471 		PORT_LOCK(p);
1472 		ifp->if_mtu = mtu;
1473 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1474 			callout_stop(&p->adapter->cxgb_tick_ch);
1475 			cxgb_stop_locked(p);
1476 			cxgb_init_locked(p);
1477 		}
1478 		PORT_UNLOCK(p);
1479 	}
1480 	return (error);
1481 }
1482 
1483 static int
1484 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1485 {
1486 	struct port_info *p = ifp->if_softc;
1487 	struct ifaddr *ifa = (struct ifaddr *)data;
1488 	struct ifreq *ifr = (struct ifreq *)data;
1489 	int flags, error = 0;
1490 	uint32_t mask;
1491 
1492 	/*
1493 	 * XXX need to check that we aren't in the middle of an unload
1494 	 */
1495 	switch (command) {
1496 	case SIOCSIFMTU:
1497 		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1498 		break;
1499 	case SIOCSIFADDR:
1500 	case SIOCGIFADDR:
1501 		PORT_LOCK(p);
1502 		if (ifa->ifa_addr->sa_family == AF_INET) {
1503 			ifp->if_flags |= IFF_UP;
1504 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1505 				cxgb_init_locked(p);
1506 			arp_ifinit(ifp, ifa);
1507 		} else
1508 			error = ether_ioctl(ifp, command, data);
1509 		PORT_UNLOCK(p);
1510 		break;
1511 	case SIOCSIFFLAGS:
1512 		PORT_LOCK(p);
1513 		if (ifp->if_flags & IFF_UP) {
1514 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1515 				flags = p->if_flags;
1516 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1517 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1518 					cxgb_set_rxmode(p);
1519 
1520 			} else
1521 				cxgb_init_locked(p);
1522 			p->if_flags = ifp->if_flags;
1523 		} else {
1524 			callout_stop(&p->adapter->cxgb_tick_ch);
1525 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1526 				cxgb_stop_locked(p);
1527 			} else {
1528 				adapter_t *sc = p->adapter;
1529 				callout_reset(&sc->cxgb_tick_ch,
1530 				    sc->params.stats_update_period * hz,
1531 				    cxgb_tick, sc);
1532 			}
1533 		}
1534 		PORT_UNLOCK(p);
1535 		break;
1536 	case SIOCSIFMEDIA:
1537 	case SIOCGIFMEDIA:
1538 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1539 		break;
1540 	case SIOCSIFCAP:
1541 		PORT_LOCK(p);
1542 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1543 		if (mask & IFCAP_TXCSUM) {
1544 			if (IFCAP_TXCSUM & ifp->if_capenable) {
1545 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1546 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1547 				    | CSUM_TSO);
1548 			} else {
1549 				ifp->if_capenable |= IFCAP_TXCSUM;
1550 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1551 			}
1552 		} else if (mask & IFCAP_RXCSUM) {
1553 			if (IFCAP_RXCSUM & ifp->if_capenable) {
1554 				ifp->if_capenable &= ~IFCAP_RXCSUM;
1555 			} else {
1556 				ifp->if_capenable |= IFCAP_RXCSUM;
1557 			}
1558 		}
1559 		if (mask & IFCAP_TSO4) {
1560 			if (IFCAP_TSO4 & ifp->if_capenable) {
1561 				ifp->if_capenable &= ~IFCAP_TSO4;
1562 				ifp->if_hwassist &= ~CSUM_TSO;
1563 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1564 				ifp->if_capenable |= IFCAP_TSO4;
1565 				ifp->if_hwassist |= CSUM_TSO;
1566 			} else {
1567 				if (cxgb_debug)
1568 					printf("cxgb requires tx checksum offload"
1569 					    " be enabled to use TSO\n");
1570 				error = EINVAL;
1571 			}
1572 		}
1573 		PORT_UNLOCK(p);
1574 		break;
1575 	default:
1576 		error = ether_ioctl(ifp, command, data);
1577 		break;
1578 	}
1579 	return (error);
1580 }
1581 
1582 static int
1583 cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1584 {
1585 	struct sge_qset *qs;
1586 	struct sge_txq *txq;
1587 	struct port_info *p = ifp->if_softc;
1588 	struct mbuf *m0, *m = NULL;
1589 	int err, in_use_init;
1590 
1591 	if (!p->link_config.link_ok)
1592 		return (ENXIO);
1593 
1594 	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1595 		return (ENOBUFS);
1596 
1597 	qs = &p->adapter->sge.qs[p->first_qset];
1598 	txq = &qs->txq[TXQ_ETH];
1599 	err = 0;
1600 
1601 	mtx_lock(&txq->lock);
1602 	in_use_init = txq->in_use;
1603 	while ((txq->in_use - in_use_init < txmax) &&
1604 	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1605 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1606 		if (m == NULL)
1607 			break;
1608 		/*
1609 		 * Convert chain to M_IOVEC
1610 		 */
1611 		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1612 		m0 = m;
1613 #ifdef INVARIANTS
1614 		/*
1615 		 * Clean up after net stack sloppiness
1616 		 * before calling m_sanity
1617 		 */
1618 		m0 = m->m_next;
1619 		while (m0) {
1620 			m0->m_flags &= ~M_PKTHDR;
1621 			m0 = m0->m_next;
1622 		}
1623 		m_sanity(m0, 0);
1624 		m0 = m;
1625 #endif
1626 		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1627 		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1628 			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1629 				m = m0;
1630 				m_collapse(m, TX_MAX_SEGS, &m0);
1631 			} else
1632 				break;
1633 		}
1634 		m = m0;
1635 		if ((err = t3_encap(p, &m)) != 0)
1636 			break;
1637 		BPF_MTAP(ifp, m);
1638 	}
1639 	mtx_unlock(&txq->lock);
1640 
1641 	if (__predict_false(err)) {
1642 		if (err == ENOMEM) {
1643 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1644 			IFQ_LOCK(&ifp->if_snd);
1645 			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1646 			IFQ_UNLOCK(&ifp->if_snd);
1647 		}
1648 	}
1649 	if (err == 0 && m == NULL)
1650 		err = ENOBUFS;
1651 	else if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1652 	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1653 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1654 		err = ENOSPC;
1655 	}
1656 	return (err);
1657 }
1658 
1659 static void
1660 cxgb_start_proc(void *arg, int ncount)
1661 {
1662 	struct ifnet *ifp = arg;
1663 	struct port_info *pi = ifp->if_softc;
1664 	struct sge_qset *qs;
1665 	struct sge_txq *txq;
1666 	int error;
1667 
1668 	qs = &pi->adapter->sge.qs[pi->first_qset];
1669 	txq = &qs->txq[TXQ_ETH];
1670 
1671 	do {
1672 		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1673 			taskqueue_enqueue(pi->adapter->tq,
1674 			    &pi->timer_reclaim_task);
1675 
1676 		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1677 	} while (error == 0);
1678 }
1679 
1680 static void
1681 cxgb_start(struct ifnet *ifp)
1682 {
1683 	struct port_info *pi = ifp->if_softc;
1684 	struct sge_qset *qs;
1685 	struct sge_txq *txq;
1686 	int err;
1687 
1688 	qs = &pi->adapter->sge.qs[pi->first_qset];
1689 	txq = &qs->txq[TXQ_ETH];
1690 
1691 	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1692 		taskqueue_enqueue(pi->adapter->tq,
1693 		    &pi->timer_reclaim_task);
1694 
1695 	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1696 
1697 	if (err == 0)
1698 		taskqueue_enqueue(pi->tq, &pi->start_task);
1699 }
1700 
1701 
1702 static int
1703 cxgb_media_change(struct ifnet *ifp)
1704 {
1705 	if_printf(ifp, "media change not supported\n");
1706 	return (ENXIO);
1707 }
1708 
1709 static void
1710 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1711 {
1712 	struct port_info *p = ifp->if_softc;
1713 
1714 	ifmr->ifm_status = IFM_AVALID;
1715 	ifmr->ifm_active = IFM_ETHER;
1716 
1717 	if (!p->link_config.link_ok)
1718 		return;
1719 
1720 	ifmr->ifm_status |= IFM_ACTIVE;
1721 
1722 	switch (p->link_config.speed) {
1723 	case 10:
1724 		ifmr->ifm_active |= IFM_10_T;
1725 		break;
1726 	case 100:
1727 		ifmr->ifm_active |= IFM_100_TX;
1728 			break;
1729 	case 1000:
1730 		ifmr->ifm_active |= IFM_1000_T;
1731 		break;
1732 	}
1733 
1734 	if (p->link_config.duplex)
1735 		ifmr->ifm_active |= IFM_FDX;
1736 	else
1737 		ifmr->ifm_active |= IFM_HDX;
1738 }
1739 
1740 static void
1741 cxgb_async_intr(void *data)
1742 {
1743 	adapter_t *sc = data;
1744 
1745 	if (cxgb_debug)
1746 		device_printf(sc->dev, "cxgb_async_intr\n");
1747 
1748 	t3_slow_intr_handler(sc);
1749 
1750 }
1751 
1752 static void
1753 cxgb_ext_intr_handler(void *arg, int count)
1754 {
1755 	adapter_t *sc = (adapter_t *)arg;
1756 
1757 	if (cxgb_debug)
1758 		printf("cxgb_ext_intr_handler\n");
1759 
1760 	t3_phy_intr_handler(sc);
1761 
1762 	/* Now reenable external interrupts */
1763 	ADAPTER_LOCK(sc);
1764 	if (sc->slow_intr_mask) {
1765 		sc->slow_intr_mask |= F_T3DBG;
1766 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1767 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1768 	}
1769 	ADAPTER_UNLOCK(sc);
1770 }
1771 
1772 static void
1773 check_link_status(adapter_t *sc)
1774 {
1775 	int i;
1776 
1777 	for (i = 0; i < (sc)->params.nports; ++i) {
1778 		struct port_info *p = &sc->port[i];
1779 
1780 		if (!(p->port_type->caps & SUPPORTED_IRQ))
1781 			t3_link_changed(sc, i);
1782 		p->ifp->if_baudrate = p->link_config.speed * 1000000;
1783 	}
1784 }
1785 
1786 static void
1787 check_t3b2_mac(struct adapter *adapter)
1788 {
1789 	int i;
1790 
1791 	for_each_port(adapter, i) {
1792 		struct port_info *p = &adapter->port[i];
1793 		struct ifnet *ifp = p->ifp;
1794 		int status;
1795 
1796 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1797 			continue;
1798 
1799 		status = 0;
1800 		PORT_LOCK(p);
1801 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1802 			status = t3b2_mac_watchdog_task(&p->mac);
1803 		if (status == 1)
1804 			p->mac.stats.num_toggled++;
1805 		else if (status == 2) {
1806 			struct cmac *mac = &p->mac;
1807 
1808 			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
1809 			    + ETHER_VLAN_ENCAP_LEN);
1810 			t3_mac_set_address(mac, 0, p->hw_addr);
1811 			cxgb_set_rxmode(p);
1812 			t3_link_start(&p->phy, mac, &p->link_config);
1813 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1814 			t3_port_intr_enable(adapter, p->port);
1815 			p->mac.stats.num_resets++;
1816 		}
1817 		PORT_UNLOCK(p);
1818 	}
1819 }
1820 
1821 static void
1822 cxgb_tick(void *arg)
1823 {
1824 	adapter_t *sc = (adapter_t *)arg;
1825 	const struct adapter_params *p = &sc->params;
1826 
1827 	if (p->linkpoll_period)
1828 		check_link_status(sc);
1829 	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1830 	    cxgb_tick, sc);
1831 
1832 	/*
1833 	 * adapter lock can currently only be acquire after the
1834 	 * port lock
1835 	 */
1836 	ADAPTER_UNLOCK(sc);
1837 
1838 	if (p->rev == T3_REV_B2)
1839 		check_t3b2_mac(sc);
1840 }
1841 
1842 static int
1843 in_range(int val, int lo, int hi)
1844 {
1845 	return val < 0 || (val <= hi && val >= lo);
1846 }
1847 
1848 static int
1849 cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
1850 {
1851        return (0);
1852 }
1853 
1854 static int
1855 cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
1856 {
1857        return (0);
1858 }
1859 
1860 static int
1861 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1862     int fflag, struct thread *td)
1863 {
1864 	int mmd, error = 0;
1865 	struct port_info *pi = dev->si_drv1;
1866 	adapter_t *sc = pi->adapter;
1867 
1868 #ifdef PRIV_SUPPORTED
1869 	if (priv_check(td, PRIV_DRIVER)) {
1870 		if (cxgb_debug)
1871 			printf("user does not have access to privileged ioctls\n");
1872 		return (EPERM);
1873 	}
1874 #else
1875 	if (suser(td)) {
1876 		if (cxgb_debug)
1877 			printf("user does not have access to privileged ioctls\n");
1878 		return (EPERM);
1879 	}
1880 #endif
1881 
1882 	switch (cmd) {
1883 	case SIOCGMIIREG: {
1884 		uint32_t val;
1885 		struct cphy *phy = &pi->phy;
1886 		struct mii_data *mid = (struct mii_data *)data;
1887 
1888 		if (!phy->mdio_read)
1889 			return (EOPNOTSUPP);
1890 		if (is_10G(sc)) {
1891 			mmd = mid->phy_id >> 8;
1892 			if (!mmd)
1893 				mmd = MDIO_DEV_PCS;
1894 			else if (mmd > MDIO_DEV_XGXS)
1895 				return -EINVAL;
1896 
1897 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1898 					     mid->reg_num, &val);
1899 		} else
1900 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1901 					     mid->reg_num & 0x1f, &val);
1902 		if (error == 0)
1903 			mid->val_out = val;
1904 		break;
1905 	}
1906 	case SIOCSMIIREG: {
1907 		struct cphy *phy = &pi->phy;
1908 		struct mii_data *mid = (struct mii_data *)data;
1909 
1910 		if (!phy->mdio_write)
1911 			return (EOPNOTSUPP);
1912 		if (is_10G(sc)) {
1913 			mmd = mid->phy_id >> 8;
1914 			if (!mmd)
1915 				mmd = MDIO_DEV_PCS;
1916 			else if (mmd > MDIO_DEV_XGXS)
1917 				return (EINVAL);
1918 
1919 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1920 					      mmd, mid->reg_num, mid->val_in);
1921 		} else
1922 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1923 					      mid->reg_num & 0x1f,
1924 					      mid->val_in);
1925 		break;
1926 	}
1927 	case CHELSIO_SETREG: {
1928 		struct ch_reg *edata = (struct ch_reg *)data;
1929 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1930 			return (EFAULT);
1931 		t3_write_reg(sc, edata->addr, edata->val);
1932 		break;
1933 	}
1934 	case CHELSIO_GETREG: {
1935 		struct ch_reg *edata = (struct ch_reg *)data;
1936 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1937 			return (EFAULT);
1938 		edata->val = t3_read_reg(sc, edata->addr);
1939 		break;
1940 	}
1941 	case CHELSIO_GET_SGE_CONTEXT: {
1942 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1943 		mtx_lock(&sc->sge.reg_lock);
1944 		switch (ecntxt->cntxt_type) {
1945 		case CNTXT_TYPE_EGRESS:
1946 			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1947 			    ecntxt->data);
1948 			break;
1949 		case CNTXT_TYPE_FL:
1950 			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1951 			    ecntxt->data);
1952 			break;
1953 		case CNTXT_TYPE_RSP:
1954 			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1955 			    ecntxt->data);
1956 			break;
1957 		case CNTXT_TYPE_CQ:
1958 			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1959 			    ecntxt->data);
1960 			break;
1961 		default:
1962 			error = EINVAL;
1963 			break;
1964 		}
1965 		mtx_unlock(&sc->sge.reg_lock);
1966 		break;
1967 	}
1968 	case CHELSIO_GET_SGE_DESC: {
1969 		struct ch_desc *edesc = (struct ch_desc *)data;
1970 		int ret;
1971 		if (edesc->queue_num >= SGE_QSETS * 6)
1972 			return (EINVAL);
1973 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1974 		    edesc->queue_num % 6, edesc->idx, edesc->data);
1975 		if (ret < 0)
1976 			return (EINVAL);
1977 		edesc->size = ret;
1978 		break;
1979 	}
1980 	case CHELSIO_SET_QSET_PARAMS: {
1981 		struct qset_params *q;
1982 		struct ch_qset_params *t = (struct ch_qset_params *)data;
1983 
1984 		if (t->qset_idx >= SGE_QSETS)
1985 			return -EINVAL;
1986 		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1987 		    !in_range(t->cong_thres, 0, 255) ||
1988 		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1989 			      MAX_TXQ_ENTRIES) ||
1990 		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1991 			      MAX_TXQ_ENTRIES) ||
1992 		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1993 			      MAX_CTRL_TXQ_ENTRIES) ||
1994 		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1995 		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1996 			      MAX_RX_JUMBO_BUFFERS) ||
1997 		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1998 		       return -EINVAL;
1999 		if ((sc->flags & FULL_INIT_DONE) &&
2000 		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2001 		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2002 		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2003 		     t->polling >= 0 || t->cong_thres >= 0))
2004 			return -EBUSY;
2005 
2006 		q = &sc->params.sge.qset[t->qset_idx];
2007 
2008 		if (t->rspq_size >= 0)
2009 			q->rspq_size = t->rspq_size;
2010 		if (t->fl_size[0] >= 0)
2011 			q->fl_size = t->fl_size[0];
2012 		if (t->fl_size[1] >= 0)
2013 			q->jumbo_size = t->fl_size[1];
2014 		if (t->txq_size[0] >= 0)
2015 			q->txq_size[0] = t->txq_size[0];
2016 		if (t->txq_size[1] >= 0)
2017 			q->txq_size[1] = t->txq_size[1];
2018 		if (t->txq_size[2] >= 0)
2019 			q->txq_size[2] = t->txq_size[2];
2020 		if (t->cong_thres >= 0)
2021 			q->cong_thres = t->cong_thres;
2022 		if (t->intr_lat >= 0) {
2023 			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2024 
2025 			q->coalesce_nsecs = t->intr_lat*1000;
2026 			t3_update_qset_coalesce(qs, q);
2027 		}
2028 		break;
2029 	}
2030 	case CHELSIO_GET_QSET_PARAMS: {
2031 		struct qset_params *q;
2032 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2033 
2034 		if (t->qset_idx >= SGE_QSETS)
2035 			return (EINVAL);
2036 
2037 		q = &(sc)->params.sge.qset[t->qset_idx];
2038 		t->rspq_size   = q->rspq_size;
2039 		t->txq_size[0] = q->txq_size[0];
2040 		t->txq_size[1] = q->txq_size[1];
2041 		t->txq_size[2] = q->txq_size[2];
2042 		t->fl_size[0]  = q->fl_size;
2043 		t->fl_size[1]  = q->jumbo_size;
2044 		t->polling     = q->polling;
2045 		t->intr_lat    = q->coalesce_nsecs / 1000;
2046 		t->cong_thres  = q->cong_thres;
2047 		break;
2048 	}
2049 	case CHELSIO_SET_QSET_NUM: {
2050 		struct ch_reg *edata = (struct ch_reg *)data;
2051 		unsigned int port_idx = pi->port;
2052 
2053 		if (sc->flags & FULL_INIT_DONE)
2054 			return (EBUSY);
2055 		if (edata->val < 1 ||
2056 		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2057 			return (EINVAL);
2058 		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2059 			return (EINVAL);
2060 		sc->port[port_idx].nqsets = edata->val;
2061 		sc->port[0].first_qset = 0;
2062 		/*
2063 		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2064 		 */
2065 		sc->port[1].first_qset = sc->port[0].nqsets;
2066 		break;
2067 	}
2068 	case CHELSIO_GET_QSET_NUM: {
2069 		struct ch_reg *edata = (struct ch_reg *)data;
2070 		edata->val = pi->nqsets;
2071 		break;
2072 	}
2073 #ifdef notyet
2074 	case CHELSIO_LOAD_FW:
2075 	case CHELSIO_GET_PM:
2076 	case CHELSIO_SET_PM:
2077 		return (EOPNOTSUPP);
2078 		break;
2079 #endif
2080 	case CHELSIO_SETMTUTAB: {
2081 		struct ch_mtus *m = (struct ch_mtus *)data;
2082 		int i;
2083 
2084 		if (!is_offload(sc))
2085 			return (EOPNOTSUPP);
2086 		if (offload_running(sc))
2087 			return (EBUSY);
2088 		if (m->nmtus != NMTUS)
2089 			return (EINVAL);
2090 		if (m->mtus[0] < 81)         /* accommodate SACK */
2091 			return (EINVAL);
2092 
2093 		/*
2094 		 * MTUs must be in ascending order
2095 		 */
2096 		for (i = 1; i < NMTUS; ++i)
2097 			if (m->mtus[i] < m->mtus[i - 1])
2098 				return (EINVAL);
2099 
2100 		memcpy(sc->params.mtus, m->mtus,
2101 		       sizeof(sc->params.mtus));
2102 		break;
2103 	}
2104 	case CHELSIO_GETMTUTAB: {
2105 		struct ch_mtus *m = (struct ch_mtus *)data;
2106 
2107 		if (!is_offload(sc))
2108 			return (EOPNOTSUPP);
2109 
2110 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2111 		m->nmtus = NMTUS;
2112 		break;
2113 	}
2114 	case CHELSIO_DEVUP:
2115 		if (!is_offload(sc))
2116 			return (EOPNOTSUPP);
2117 		return offload_open(pi);
2118 		break;
2119 	case CHELSIO_GET_MEM: {
2120 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2121 		struct mc7 *mem;
2122 		uint8_t *useraddr;
2123 		u64 buf[32];
2124 
2125 		if (!is_offload(sc))
2126 			return (EOPNOTSUPP);
2127 		if (!(sc->flags & FULL_INIT_DONE))
2128 			return (EIO);         /* need the memory controllers */
2129 		if ((t->addr & 0x7) || (t->len & 0x7))
2130 			return (EINVAL);
2131 		if (t->mem_id == MEM_CM)
2132 			mem = &sc->cm;
2133 		else if (t->mem_id == MEM_PMRX)
2134 			mem = &sc->pmrx;
2135 		else if (t->mem_id == MEM_PMTX)
2136 			mem = &sc->pmtx;
2137 		else
2138 			return (EINVAL);
2139 
2140 		/*
2141 		 * Version scheme:
2142 		 * bits 0..9: chip version
2143 		 * bits 10..15: chip revision
2144 		 */
2145 		t->version = 3 | (sc->params.rev << 10);
2146 
2147 		/*
2148 		 * Read 256 bytes at a time as len can be large and we don't
2149 		 * want to use huge intermediate buffers.
2150 		 */
2151 		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2152 		while (t->len) {
2153 			unsigned int chunk = min(t->len, sizeof(buf));
2154 
2155 			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2156 			if (error)
2157 				return (-error);
2158 			if (copyout(buf, useraddr, chunk))
2159 				return (EFAULT);
2160 			useraddr += chunk;
2161 			t->addr += chunk;
2162 			t->len -= chunk;
2163 		}
2164 		break;
2165 	}
2166 	case CHELSIO_READ_TCAM_WORD: {
2167 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2168 
2169 		if (!is_offload(sc))
2170 			return (EOPNOTSUPP);
2171 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2172 		break;
2173 	}
2174 	case CHELSIO_SET_TRACE_FILTER: {
2175 		struct ch_trace *t = (struct ch_trace *)data;
2176 		const struct trace_params *tp;
2177 
2178 		tp = (const struct trace_params *)&t->sip;
2179 		if (t->config_tx)
2180 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2181 					       t->trace_tx);
2182 		if (t->config_rx)
2183 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2184 					       t->trace_rx);
2185 		break;
2186 	}
2187 	case CHELSIO_SET_PKTSCHED: {
2188 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2189 		if (sc->open_device_map == 0)
2190 			return (EAGAIN);
2191 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2192 		    p->binding);
2193 		break;
2194 	}
2195 	case CHELSIO_IFCONF_GETREGS: {
2196 		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2197 		int reglen = cxgb_get_regs_len();
2198 		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2199 		if (buf == NULL) {
2200 			return (ENOMEM);
2201 		} if (regs->len > reglen)
2202 			regs->len = reglen;
2203 		else if (regs->len < reglen) {
2204 			error = E2BIG;
2205 			goto done;
2206 		}
2207 		cxgb_get_regs(sc, regs, buf);
2208 		error = copyout(buf, regs->data, reglen);
2209 
2210 		done:
2211 		free(buf, M_DEVBUF);
2212 
2213 		break;
2214 	}
2215 	case CHELSIO_SET_HW_SCHED: {
2216 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2217 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2218 
2219 		if ((sc->flags & FULL_INIT_DONE) == 0)
2220 			return (EAGAIN);       /* need TP to be initialized */
2221 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2222 		    !in_range(t->channel, 0, 1) ||
2223 		    !in_range(t->kbps, 0, 10000000) ||
2224 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2225 		    !in_range(t->flow_ipg, 0,
2226 			      dack_ticks_to_usec(sc, 0x7ff)))
2227 			return (EINVAL);
2228 
2229 		if (t->kbps >= 0) {
2230 			error = t3_config_sched(sc, t->kbps, t->sched);
2231 			if (error < 0)
2232 				return (-error);
2233 		}
2234 		if (t->class_ipg >= 0)
2235 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2236 		if (t->flow_ipg >= 0) {
2237 			t->flow_ipg *= 1000;     /* us -> ns */
2238 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2239 		}
2240 		if (t->mode >= 0) {
2241 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2242 
2243 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2244 					 bit, t->mode ? bit : 0);
2245 		}
2246 		if (t->channel >= 0)
2247 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2248 					 1 << t->sched, t->channel << t->sched);
2249 		break;
2250 	}
2251 	default:
2252 		return (EOPNOTSUPP);
2253 		break;
2254 	}
2255 
2256 	return (error);
2257 }
2258 
2259 static __inline void
2260 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2261     unsigned int end)
2262 {
2263 	uint32_t *p = (uint32_t *)buf + start;
2264 
2265 	for ( ; start <= end; start += sizeof(uint32_t))
2266 		*p++ = t3_read_reg(ap, start);
2267 }
2268 
2269 #define T3_REGMAP_SIZE (3 * 1024)
2270 static int
2271 cxgb_get_regs_len(void)
2272 {
2273 	return T3_REGMAP_SIZE;
2274 }
2275 #undef T3_REGMAP_SIZE
2276 
2277 static void
2278 cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2279 {
2280 
2281 	/*
2282 	 * Version scheme:
2283 	 * bits 0..9: chip version
2284 	 * bits 10..15: chip revision
2285 	 * bit 31: set for PCIe cards
2286 	 */
2287 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2288 
2289 	/*
2290 	 * We skip the MAC statistics registers because they are clear-on-read.
2291 	 * Also reading multi-register stats would need to synchronize with the
2292 	 * periodic mac stats accumulation.  Hard to justify the complexity.
2293 	 */
2294 	memset(buf, 0, REGDUMP_SIZE);
2295 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2296 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2297 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2298 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2299 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2300 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2301 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2302 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2303 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2304 }
2305