1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 * Use is subject to license terms.
30 */
31
32 /*
33 * Copyright (c) 2014, Joyent, Inc.
34 */
35
36 #ifndef lint
37 static const char __idstring[] =
38 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
39 #endif
40
41 #define MXGEFW_NDIS
42 #include "myri10ge_var.h"
43 #include "rss_eth_z8e.h"
44 #include "rss_ethp_z8e.h"
45 #include "mcp_gen_header.h"
46
47 #define MYRI10GE_MAX_ETHER_MTU 9014
48 #define MYRI10GE_MAX_GLD_MTU 9000
49 #define MYRI10GE_MIN_GLD_MTU 1500
50
51 #define MYRI10GE_ETH_STOPPED 0
52 #define MYRI10GE_ETH_STOPPING 1
53 #define MYRI10GE_ETH_STARTING 2
54 #define MYRI10GE_ETH_RUNNING 3
55 #define MYRI10GE_ETH_OPEN_FAILED 4
56 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
57
58 static int myri10ge_small_bytes = 510;
59 static int myri10ge_intr_coal_delay = 125;
60 static int myri10ge_flow_control = 1;
61 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
62 static int myri10ge_nvidia_ecrc_enable = 1;
63 #endif
64 static int myri10ge_mtu_override = 0;
65 static int myri10ge_tx_copylen = 512;
66 static int myri10ge_deassert_wait = 1;
67 static int myri10ge_verbose = 0;
68 static int myri10ge_watchdog_reset = 0;
69 static int myri10ge_use_msix = 1;
70 static int myri10ge_max_slices = -1;
71 static int myri10ge_use_msi = 1;
72 int myri10ge_force_firmware = 0;
73 static boolean_t myri10ge_use_lso = B_TRUE;
74 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
75 static int myri10ge_tx_hash = 1;
76 static int myri10ge_lro = 0;
77 static int myri10ge_lro_cnt = 8;
78 int myri10ge_lro_max_aggr = 2;
79 static int myri10ge_lso_copy = 0;
80 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
81 int myri10ge_tx_handles_initial = 128;
82
83 static kmutex_t myri10ge_param_lock;
84 static void* myri10ge_db_lastfree;
85
86 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
87 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
88 static int myri10ge_quiesce(dev_info_t *dip);
89
90 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
91 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
92
93
94 static struct modldrv modldrv = {
95 &mod_driverops,
96 "Myricom 10G driver (10GbE)",
97 &myri10ge_ops,
98 };
99
100
101 static struct modlinkage modlinkage = {
102 MODREV_1,
103 {&modldrv, NULL},
104 };
105
106 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
107
108 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
109 DMA_ATTR_V0, /* version number. */
110 (uint64_t)0, /* low address */
111 (uint64_t)0xffffffffffffffffULL, /* high address */
112 (uint64_t)0x7ffffff, /* address counter max */
113 (uint64_t)4096, /* alignment */
114 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
115 (uint32_t)0x1, /* minimum transfer size */
116 (uint64_t)0x7fffffff, /* maximum transfer size */
117 (uint64_t)0x7fffffff, /* maximum segment size */
118 1, /* scatter/gather list length */
119 1, /* granularity */
120 0 /* attribute flags */
121 };
122
123 /*
124 * The Myri10GE NIC has the following constraints on receive buffers:
125 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
126 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
127 */
128
129 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
130 DMA_ATTR_V0, /* version number. */
131 (uint64_t)0, /* low address */
132 (uint64_t)0xffffffffffffffffULL, /* high address */
133 (uint64_t)0x7ffffff, /* address counter max */
134 (uint64_t)4096, /* alignment */
135 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
136 (uint32_t)0x1, /* minimum transfer size */
137 (uint64_t)0x7fffffff, /* maximum transfer size */
138 UINT64_MAX, /* maximum segment size */
139 1, /* scatter/gather list length */
140 1, /* granularity */
141 0 /* attribute flags */
142 };
143
144 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
145 DMA_ATTR_V0, /* version number. */
146 (uint64_t)0, /* low address */
147 (uint64_t)0xffffffffffffffffULL, /* high address */
148 (uint64_t)0x7ffffff, /* address counter max */
149 #if defined sparc64 || defined __sparcv9
150 (uint64_t)4096, /* alignment */
151 #else
152 (uint64_t)0x80, /* alignment */
153 #endif
154 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
155 (uint32_t)0x1, /* minimum transfer size */
156 (uint64_t)0x7fffffff, /* maximum transfer size */
157 #if defined sparc64 || defined __sparcv9
158 UINT64_MAX, /* maximum segment size */
159 #else
160 (uint64_t)0xfff, /* maximum segment size */
161 #endif
162 1, /* scatter/gather list length */
163 1, /* granularity */
164 0 /* attribute flags */
165 };
166
167 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
168 DMA_ATTR_V0, /* version number. */
169 (uint64_t)0, /* low address */
170 (uint64_t)0xffffffffffffffffULL, /* high address */
171 (uint64_t)0x7ffffff, /* address counter max */
172 (uint64_t)1, /* alignment */
173 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
174 (uint32_t)0x1, /* minimum transfer size */
175 (uint64_t)0x7fffffff, /* maximum transfer size */
176 UINT64_MAX, /* maximum segment size */
177 INT32_MAX, /* scatter/gather list length */
178 1, /* granularity */
179 0 /* attribute flags */
180 };
181
182 #if defined sparc64 || defined __sparcv9
183 #define WC 0
184 #else
185 #define WC 1
186 #endif
187
188 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
189 DDI_DEVICE_ATTR_V0, /* version */
190 DDI_NEVERSWAP_ACC, /* endian flash */
191 #if WC
192 DDI_MERGING_OK_ACC /* data order */
193 #else
194 DDI_STRICTORDER_ACC
195 #endif
196 };
197
198 static void myri10ge_watchdog(void *arg);
199
200 #ifdef MYRICOM_PRIV
201 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
202 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU
203 #else
204 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
205 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU
206 #endif
207 int myri10ge_bigbufs_initial = 1024;
208 int myri10ge_bigbufs_max = 4096;
209
210
211 caddr_t
myri10ge_dma_alloc(dev_info_t * dip,size_t len,ddi_dma_attr_t * attr,ddi_device_acc_attr_t * accattr,uint_t alloc_flags,int bind_flags,struct myri10ge_dma_stuff * dma,int warn,int (* wait)(caddr_t))212 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
213 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr,
214 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
215 int warn, int (*wait)(caddr_t))
216 {
217 caddr_t kaddr;
218 size_t real_length;
219 ddi_dma_cookie_t cookie;
220 uint_t count;
221 int err;
222
223 err = ddi_dma_alloc_handle(dip, attr, wait,
224 NULL, &dma->handle);
225 if (err != DDI_SUCCESS) {
226 if (warn)
227 cmn_err(CE_WARN,
228 "myri10ge: ddi_dma_alloc_handle failed\n");
229 goto abort_with_nothing;
230 }
231
232 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
233 wait, NULL, &kaddr, &real_length,
234 &dma->acc_handle);
235 if (err != DDI_SUCCESS) {
236 if (warn)
237 cmn_err(CE_WARN,
238 "myri10ge: ddi_dma_mem_alloc failed\n");
239 goto abort_with_handle;
240 }
241
242 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
243 bind_flags, wait, NULL, &cookie, &count);
244
245 if (err != DDI_SUCCESS) {
246 if (warn)
247 cmn_err(CE_WARN,
248 "myri10ge: ddi_dma_addr_bind_handle failed\n");
249 goto abort_with_mem;
250 }
251
252 if (count != 1) {
253 if (warn)
254 cmn_err(CE_WARN,
255 "myri10ge: got too many dma segments ");
256 goto abort_with_bind;
257 }
258 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
259 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
260 return (kaddr);
261
262 abort_with_bind:
263 (void) ddi_dma_unbind_handle(dma->handle);
264
265 abort_with_mem:
266 ddi_dma_mem_free(&dma->acc_handle);
267
268 abort_with_handle:
269 ddi_dma_free_handle(&dma->handle);
270 abort_with_nothing:
271 if (warn) {
272 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n ");
273 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
274 (void*) dip, len, (void*) attr);
275 cmn_err(CE_WARN,
276 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
277 (void*) accattr, alloc_flags);
278 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p",
279 bind_flags, (void*) dma);
280 }
281 return (NULL);
282
283 }
284
285 void
myri10ge_dma_free(struct myri10ge_dma_stuff * dma)286 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
287 {
288 (void) ddi_dma_unbind_handle(dma->handle);
289 ddi_dma_mem_free(&dma->acc_handle);
290 ddi_dma_free_handle(&dma->handle);
291 }
292
293 static inline void
myri10ge_pio_copy32(void * to,uint32_t * from32,size_t size)294 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
295 {
296 register volatile uint32_t *to32;
297 size_t i;
298
299 to32 = (volatile uint32_t *) to;
300 for (i = (size / 4); i; i--) {
301 *to32 = *from32;
302 to32++;
303 from32++;
304 }
305 }
306
307 #if defined(_LP64)
308 static inline void
myri10ge_pio_copy64(void * to,uint64_t * from64,size_t size)309 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
310 {
311 register volatile uint64_t *to64;
312 size_t i;
313
314 to64 = (volatile uint64_t *) to;
315 for (i = (size / 8); i; i--) {
316 *to64 = *from64;
317 to64++;
318 from64++;
319 }
320 }
321 #endif
322
323 /*
324 * This routine copies memory from the host to the NIC.
325 * The "size" argument must always be a multiple of
326 * the size of long (4 or 8 bytes), and to/from must also
327 * be naturally aligned.
328 */
329 static inline void
myri10ge_pio_copy(void * to,void * from,size_t size)330 myri10ge_pio_copy(void *to, void *from, size_t size)
331 {
332 #if !defined(_LP64)
333 ASSERT((size % 4) == 0);
334 myri10ge_pio_copy32(to, (uint32_t *)from, size);
335 #else
336 ASSERT((size % 8) == 0);
337 myri10ge_pio_copy64(to, (uint64_t *)from, size);
338 #endif
339 }
340
341
342 /*
343 * Due to various bugs in Solaris (especially bug 6186772 where the
344 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
345 * than two elements), and the design bug where hardware checksums are
346 * ignored on mblk chains with more than 2 elements, we need to
347 * allocate private pool of physically contiguous receive buffers.
348 */
349
350 static void
myri10ge_jpool_init(struct myri10ge_slice_state * ss)351 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
352 {
353 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
354
355 bzero(jpool, sizeof (*jpool));
356 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
357 ss->mgp->icookie);
358 jpool->head = NULL;
359 }
360
361 static void
myri10ge_jpool_fini(struct myri10ge_slice_state * ss)362 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
363 {
364 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
365
366 if (jpool->head != NULL) {
367 cmn_err(CE_WARN,
368 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
369 ss->mgp->name);
370 }
371 mutex_destroy(&jpool->mtx);
372 }
373
374
375 /*
376 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
377 * at most 32 bytes at a time, so as to avoid involving the software
378 * pio handler in the nic. We re-write the first segment's low
379 * DMA address to mark it valid only after we write the entire chunk
380 * in a burst
381 */
382 static inline void
myri10ge_submit_8rx(mcp_kreq_ether_recv_t * dst,mcp_kreq_ether_recv_t * src)383 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
384 {
385 src->addr_low |= BE_32(1);
386 myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
387 mb();
388 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
389 mb();
390 src->addr_low &= ~(BE_32(1));
391 dst->addr_low = src->addr_low;
392 mb();
393 }
394
395 static void
myri10ge_pull_jpool(struct myri10ge_slice_state * ss)396 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
397 {
398 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
399 struct myri10ge_jpool_entry *jtail, *j, *jfree;
400 volatile uintptr_t *putp;
401 uintptr_t put;
402 int i;
403
404 /* find tail */
405 jtail = NULL;
406 if (jpool->head != NULL) {
407 j = jpool->head;
408 while (j->next != NULL)
409 j = j->next;
410 jtail = j;
411 }
412
413 /*
414 * iterate over all per-CPU caches, and add contents into
415 * jpool
416 */
417 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
418 /* take per-CPU free list */
419 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
420 if (*putp == NULL)
421 continue;
422 put = atomic_swap_ulong(putp, 0);
423 jfree = (struct myri10ge_jpool_entry *)put;
424
425 /* append to pool */
426 if (jtail == NULL) {
427 jpool->head = jfree;
428 } else {
429 jtail->next = jfree;
430 }
431 j = jfree;
432 while (j->next != NULL)
433 j = j->next;
434 jtail = j;
435 }
436 }
437
438 /*
439 * Transfers buffers from the free pool to the nic
440 * Must be called holding the jpool mutex.
441 */
442
443 static inline void
myri10ge_restock_jumbos(struct myri10ge_slice_state * ss)444 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
445 {
446 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
447 struct myri10ge_jpool_entry *j;
448 myri10ge_rx_ring_t *rx;
449 int i, idx, limit;
450
451 rx = &ss->rx_big;
452 limit = ss->j_rx_cnt + (rx->mask + 1);
453
454 for (i = rx->cnt; i != limit; i++) {
455 idx = i & (rx->mask);
456 j = jpool->head;
457 if (j == NULL) {
458 myri10ge_pull_jpool(ss);
459 j = jpool->head;
460 if (j == NULL) {
461 break;
462 }
463 }
464 jpool->head = j->next;
465 rx->info[idx].j = j;
466 rx->shadow[idx].addr_low = j->dma.low;
467 rx->shadow[idx].addr_high = j->dma.high;
468 /* copy 4 descriptors (32-bytes) to the mcp at a time */
469 if ((idx & 7) == 7) {
470 myri10ge_submit_8rx(&rx->lanai[idx - 7],
471 &rx->shadow[idx - 7]);
472 }
473 }
474 rx->cnt = i;
475 }
476
477 /*
478 * Transfer buffers from the nic to the free pool.
479 * Should be called holding the jpool mutex
480 */
481
482 static inline void
myri10ge_unstock_jumbos(struct myri10ge_slice_state * ss)483 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
484 {
485 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
486 struct myri10ge_jpool_entry *j;
487 myri10ge_rx_ring_t *rx;
488 int i;
489
490 mutex_enter(&jpool->mtx);
491 rx = &ss->rx_big;
492
493 for (i = 0; i < rx->mask + 1; i++) {
494 j = rx->info[i].j;
495 rx->info[i].j = NULL;
496 if (j == NULL)
497 continue;
498 j->next = jpool->head;
499 jpool->head = j;
500 }
501 mutex_exit(&jpool->mtx);
502
503 }
504
505
506 /*
507 * Free routine which is called when the mblk allocated via
508 * esballoc() is freed. Here we return the jumbo buffer
509 * to the free pool, and possibly pass some jumbo buffers
510 * to the nic
511 */
512
513 static void
myri10ge_jfree_rtn(void * arg)514 myri10ge_jfree_rtn(void *arg)
515 {
516 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
517 struct myri10ge_jpool_stuff *jpool;
518 volatile uintptr_t *putp;
519 uintptr_t old, new;
520
521 jpool = &j->ss->jpool;
522
523 /* prepend buffer locklessly to per-CPU freelist */
524 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
525 new = (uintptr_t)j;
526 do {
527 old = *putp;
528 j->next = (void *)old;
529 } while (atomic_cas_ulong(putp, old, new) != old);
530 }
531
532 static void
myri10ge_remove_jbuf(struct myri10ge_jpool_entry * j)533 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
534 {
535 (void) ddi_dma_unbind_handle(j->dma_handle);
536 ddi_dma_mem_free(&j->acc_handle);
537 ddi_dma_free_handle(&j->dma_handle);
538 kmem_free(j, sizeof (*j));
539 }
540
541
542 /*
543 * Allocates one physically contiguous descriptor
544 * and add it to the jumbo buffer pool.
545 */
546
547 static int
myri10ge_add_jbuf(struct myri10ge_slice_state * ss)548 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
549 {
550 struct myri10ge_jpool_entry *j;
551 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
552 ddi_dma_attr_t *rx_dma_attr;
553 size_t real_length;
554 ddi_dma_cookie_t cookie;
555 uint_t count;
556 int err;
557
558 if (myri10ge_mtu < 2048)
559 rx_dma_attr = &myri10ge_rx_std_dma_attr;
560 else
561 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
562
563 again:
564 j = (struct myri10ge_jpool_entry *)
565 kmem_alloc(sizeof (*j), KM_SLEEP);
566 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
567 DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
568 if (err != DDI_SUCCESS)
569 goto abort_with_j;
570
571 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
572 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
573 NULL, &j->buf, &real_length, &j->acc_handle);
574 if (err != DDI_SUCCESS)
575 goto abort_with_handle;
576
577 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
578 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
579 NULL, &cookie, &count);
580 if (err != DDI_SUCCESS)
581 goto abort_with_mem;
582
583 /*
584 * Make certain std MTU buffers do not cross a 4KB boundary:
585 *
586 * Setting dma_attr_align=4096 will do this, but the system
587 * will only allocate 1 RX buffer per 4KB page, rather than 2.
588 * Setting dma_attr_granular=4096 *seems* to work around this,
589 * but I'm paranoid about future systems no longer honoring
590 * this, so fall back to the safe, but memory wasting way if a
591 * buffer crosses a 4KB boundary.
592 */
593
594 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
595 rx_dma_attr->dma_attr_align != 4096) {
596 uint32_t start, end;
597
598 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
599 end = start + myri10ge_mtu;
600 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
601 printf("std buffer crossed a 4KB boundary!\n");
602 myri10ge_remove_jbuf(j);
603 rx_dma_attr->dma_attr_align = 4096;
604 rx_dma_attr->dma_attr_seg = UINT64_MAX;
605 goto again;
606 }
607 }
608
609 j->dma.low =
610 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
611 j->dma.high =
612 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
613 j->ss = ss;
614
615
616 j->free_func.free_func = myri10ge_jfree_rtn;
617 j->free_func.free_arg = (char *)j;
618 mutex_enter(&jpool->mtx);
619 j->next = jpool->head;
620 jpool->head = j;
621 jpool->num_alloc++;
622 mutex_exit(&jpool->mtx);
623 return (0);
624
625 abort_with_mem:
626 ddi_dma_mem_free(&j->acc_handle);
627
628 abort_with_handle:
629 ddi_dma_free_handle(&j->dma_handle);
630
631 abort_with_j:
632 kmem_free(j, sizeof (*j));
633
634 /*
635 * If an allocation failed, perhaps it failed because it could
636 * not satisfy granularity requirement. Disable that, and
637 * try agin.
638 */
639 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
640 rx_dma_attr->dma_attr_align != 4096) {
641 cmn_err(CE_NOTE,
642 "!alloc failed, reverting to gran=1\n");
643 rx_dma_attr->dma_attr_align = 4096;
644 rx_dma_attr->dma_attr_seg = UINT64_MAX;
645 goto again;
646 }
647 return (err);
648 }
649
650 static int
myri10ge_jfree_cnt(struct myri10ge_jpool_stuff * jpool)651 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
652 {
653 int i;
654 struct myri10ge_jpool_entry *j;
655
656 mutex_enter(&jpool->mtx);
657 j = jpool->head;
658 i = 0;
659 while (j != NULL) {
660 i++;
661 j = j->next;
662 }
663 mutex_exit(&jpool->mtx);
664 return (i);
665 }
666
667 static int
myri10ge_add_jbufs(struct myri10ge_slice_state * ss,int num,int total)668 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
669 {
670 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
671 int allocated = 0;
672 int err;
673 int needed;
674
675 /*
676 * if total is set, user wants "num" jbufs in the pool,
677 * otherwise the user wants to "num" additional jbufs
678 * added to the pool
679 */
680 if (total && jpool->num_alloc) {
681 allocated = myri10ge_jfree_cnt(jpool);
682 needed = num - allocated;
683 } else {
684 needed = num;
685 }
686
687 while (needed > 0) {
688 needed--;
689 err = myri10ge_add_jbuf(ss);
690 if (err == 0) {
691 allocated++;
692 }
693 }
694 return (allocated);
695 }
696
697 static void
myri10ge_remove_jbufs(struct myri10ge_slice_state * ss)698 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
699 {
700 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
701 struct myri10ge_jpool_entry *j;
702
703 mutex_enter(&jpool->mtx);
704 myri10ge_pull_jpool(ss);
705 while (jpool->head != NULL) {
706 jpool->num_alloc--;
707 j = jpool->head;
708 jpool->head = j->next;
709 myri10ge_remove_jbuf(j);
710 }
711 mutex_exit(&jpool->mtx);
712 }
713
714 static void
myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state * ss)715 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
716 {
717 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
718 struct myri10ge_jpool_entry *j = NULL;
719 caddr_t ptr;
720 uint32_t dma_low, dma_high;
721 int idx, len;
722 unsigned int alloc_size;
723
724 dma_low = dma_high = len = 0;
725 alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
726 ptr = NULL;
727 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
728 /* Allocate a jumbo frame and carve it into small frames */
729 if (len < alloc_size) {
730 mutex_enter(&jpool->mtx);
731 /* remove jumbo from freelist */
732 j = jpool->head;
733 jpool->head = j->next;
734 /* place it onto small list */
735 j->next = ss->small_jpool;
736 ss->small_jpool = j;
737 mutex_exit(&jpool->mtx);
738 len = myri10ge_mtu;
739 dma_low = ntohl(j->dma.low);
740 dma_high = ntohl(j->dma.high);
741 ptr = j->buf;
742 }
743 ss->rx_small.info[idx].ptr = ptr;
744 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
745 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
746 len -= alloc_size;
747 ptr += alloc_size;
748 dma_low += alloc_size;
749 }
750 }
751
752 /*
753 * Return the jumbo bufs we carved up for small to the jumbo pool
754 */
755
756 static void
myri10ge_release_small_jbufs(struct myri10ge_slice_state * ss)757 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
758 {
759 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
760 struct myri10ge_jpool_entry *j = NULL;
761
762 mutex_enter(&jpool->mtx);
763 while (ss->small_jpool != NULL) {
764 j = ss->small_jpool;
765 ss->small_jpool = j->next;
766 j->next = jpool->head;
767 jpool->head = j;
768 }
769 mutex_exit(&jpool->mtx);
770 ss->jbufs_for_smalls = 0;
771 }
772
773 static int
myri10ge_add_tx_handle(struct myri10ge_slice_state * ss)774 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
775 {
776 myri10ge_tx_ring_t *tx = &ss->tx;
777 struct myri10ge_priv *mgp = ss->mgp;
778 struct myri10ge_tx_dma_handle *handle;
779 int err;
780
781 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
782 err = ddi_dma_alloc_handle(mgp->dip,
783 &myri10ge_tx_dma_attr,
784 DDI_DMA_SLEEP, NULL,
785 &handle->h);
786 if (err) {
787 static int limit = 0;
788 if (limit == 0)
789 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
790 mgp->name);
791 limit++;
792 kmem_free(handle, sizeof (*handle));
793 return (err);
794 }
795 mutex_enter(&tx->handle_lock);
796 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
797 handle->next = tx->free_tx_handles;
798 tx->free_tx_handles = handle;
799 mutex_exit(&tx->handle_lock);
800 return (DDI_SUCCESS);
801 }
802
803 static void
myri10ge_remove_tx_handles(struct myri10ge_slice_state * ss)804 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
805 {
806 myri10ge_tx_ring_t *tx = &ss->tx;
807 struct myri10ge_tx_dma_handle *handle;
808 mutex_enter(&tx->handle_lock);
809
810 handle = tx->free_tx_handles;
811 while (handle != NULL) {
812 tx->free_tx_handles = handle->next;
813 ddi_dma_free_handle(&handle->h);
814 kmem_free(handle, sizeof (*handle));
815 handle = tx->free_tx_handles;
816 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
817 }
818 mutex_exit(&tx->handle_lock);
819 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
820 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
821 ss->mgp->name,
822 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
823 }
824 }
825
826 static void
myri10ge_free_tx_handles(myri10ge_tx_ring_t * tx,struct myri10ge_tx_dma_handle_head * list)827 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
828 struct myri10ge_tx_dma_handle_head *list)
829 {
830 mutex_enter(&tx->handle_lock);
831 list->tail->next = tx->free_tx_handles;
832 tx->free_tx_handles = list->head;
833 mutex_exit(&tx->handle_lock);
834 }
835
836 static void
myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t * tx,struct myri10ge_tx_dma_handle * handle)837 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
838 struct myri10ge_tx_dma_handle *handle)
839 {
840 struct myri10ge_tx_dma_handle_head list;
841
842 if (handle == NULL)
843 return;
844 list.head = handle;
845 list.tail = handle;
846 while (handle != NULL) {
847 list.tail = handle;
848 handle = handle->next;
849 }
850 myri10ge_free_tx_handles(tx, &list);
851 }
852
853 static int
myri10ge_alloc_tx_handles(struct myri10ge_slice_state * ss,int count,struct myri10ge_tx_dma_handle ** ret)854 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
855 struct myri10ge_tx_dma_handle **ret)
856 {
857 myri10ge_tx_ring_t *tx = &ss->tx;
858 struct myri10ge_tx_dma_handle *handle;
859 int err, i;
860
861 mutex_enter(&tx->handle_lock);
862 for (i = 0; i < count; i++) {
863 handle = tx->free_tx_handles;
864 while (handle == NULL) {
865 mutex_exit(&tx->handle_lock);
866 err = myri10ge_add_tx_handle(ss);
867 if (err != DDI_SUCCESS) {
868 goto abort_with_handles;
869 }
870 mutex_enter(&tx->handle_lock);
871 handle = tx->free_tx_handles;
872 }
873 tx->free_tx_handles = handle->next;
874 handle->next = *ret;
875 *ret = handle;
876 }
877 mutex_exit(&tx->handle_lock);
878 return (DDI_SUCCESS);
879
880 abort_with_handles:
881 myri10ge_free_tx_handle_slist(tx, *ret);
882 return (err);
883 }
884
885
886 /*
887 * Frees DMA resources associated with the send ring
888 */
889 static void
myri10ge_unprepare_tx_ring(struct myri10ge_slice_state * ss)890 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
891 {
892 myri10ge_tx_ring_t *tx;
893 struct myri10ge_tx_dma_handle_head handles;
894 size_t bytes;
895 int idx;
896
897 tx = &ss->tx;
898 handles.head = NULL;
899 handles.tail = NULL;
900 for (idx = 0; idx < ss->tx.mask + 1; idx++) {
901 if (tx->info[idx].m) {
902 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
903 handles.head = tx->info[idx].handle;
904 if (handles.tail == NULL)
905 handles.tail = tx->info[idx].handle;
906 freeb(tx->info[idx].m);
907 tx->info[idx].m = 0;
908 tx->info[idx].handle = 0;
909 }
910 tx->cp[idx].va = NULL;
911 myri10ge_dma_free(&tx->cp[idx].dma);
912 }
913 bytes = sizeof (*tx->cp) * (tx->mask + 1);
914 kmem_free(tx->cp, bytes);
915 tx->cp = NULL;
916 if (handles.head != NULL)
917 myri10ge_free_tx_handles(tx, &handles);
918 myri10ge_remove_tx_handles(ss);
919 }
920
921 /*
922 * Allocates DMA handles associated with the send ring
923 */
924 static inline int
myri10ge_prepare_tx_ring(struct myri10ge_slice_state * ss)925 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
926 {
927 struct myri10ge_tx_dma_handle *handles;
928 int h;
929 size_t bytes;
930
931 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
932 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
933 if (ss->tx.cp == NULL) {
934 cmn_err(CE_WARN,
935 "%s: Failed to allocate tx copyblock storage\n",
936 ss->mgp->name);
937 return (DDI_FAILURE);
938 }
939
940
941 /* allocate the TX copyblocks */
942 for (h = 0; h < ss->tx.mask + 1; h++) {
943 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
944 4096, &myri10ge_rx_jumbo_dma_attr,
945 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
946 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
947 DDI_DMA_DONTWAIT);
948 if (ss->tx.cp[h].va == NULL) {
949 cmn_err(CE_WARN, "%s: Failed to allocate tx "
950 "copyblock %d\n", ss->mgp->name, h);
951 goto abort_with_copyblocks;
952 }
953 }
954 /* pre-allocate transmit handles */
955 handles = NULL;
956 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
957 &handles);
958 if (handles != NULL)
959 myri10ge_free_tx_handle_slist(&ss->tx, handles);
960
961 return (DDI_SUCCESS);
962
963 abort_with_copyblocks:
964 while (h > 0) {
965 h--;
966 myri10ge_dma_free(&ss->tx.cp[h].dma);
967 }
968
969 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
970 kmem_free(ss->tx.cp, bytes);
971 ss->tx.cp = NULL;
972 return (DDI_FAILURE);
973 }
974
975 /*
976 * The eeprom strings on the lanaiX have the format
977 * SN=x\0
978 * MAC=x:x:x:x:x:x\0
979 * PT:ddd mmm xx xx:xx:xx xx\0
980 * PV:ddd mmm xx xx:xx:xx xx\0
981 */
982 static int
myri10ge_read_mac_addr(struct myri10ge_priv * mgp)983 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
984 {
985 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
986 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
987 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
988 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
989
990 char *ptr, *limit;
991 int i, hv, lv;
992
993 ptr = mgp->eeprom_strings;
994 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
995
996 while (*ptr != '\0' && ptr < limit) {
997 if (memcmp(ptr, "MAC=", 4) == 0) {
998 ptr += 4;
999 if (myri10ge_verbose)
1000 printf("%s: mac address = %s\n", mgp->name,
1001 ptr);
1002 mgp->mac_addr_string = ptr;
1003 for (i = 0; i < 6; i++) {
1004 if ((ptr + 2) > limit)
1005 goto abort;
1006
1007 if (*(ptr+1) == ':') {
1008 hv = 0;
1009 lv = myri10ge_digit(*ptr); ptr++;
1010 } else {
1011 hv = myri10ge_digit(*ptr); ptr++;
1012 lv = myri10ge_digit(*ptr); ptr++;
1013 }
1014 mgp->mac_addr[i] = (hv << 4) | lv;
1015 ptr++;
1016 }
1017 }
1018 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1019 ptr += 3;
1020 mgp->sn_str = (char *)ptr;
1021 }
1022 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1023 ptr += 3;
1024 mgp->pc_str = (char *)ptr;
1025 }
1026 MYRI10GE_NEXT_STRING(ptr);
1027 }
1028
1029 return (0);
1030
1031 abort:
1032 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1033 return (ENXIO);
1034 }
1035
1036
1037 /*
1038 * Determine the register set containing the PCI resource we
1039 * want to map: the memory-mappable part of the interface. We do
1040 * this by scanning the DDI "reg" property of the interface,
1041 * which is an array of mx_ddi_reg_set structures.
1042 */
1043 static int
myri10ge_reg_set(dev_info_t * dip,int * reg_set,int * span,unsigned long * busno,unsigned long * devno,unsigned long * funcno)1044 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1045 unsigned long *busno, unsigned long *devno,
1046 unsigned long *funcno)
1047 {
1048
1049 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1050 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1051 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1052 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1053 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1054 #define PCI_ADDR_HIGH(ip) (ip[1])
1055 #define PCI_ADDR_LOW(ip) (ip[2])
1056 #define PCI_SPAN_HIGH(ip) (ip[3])
1057 #define PCI_SPAN_LOW(ip) (ip[4])
1058
1059 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1060 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1061
1062 int *data, i, *rs;
1063 uint32_t nelementsp;
1064
1065 #ifdef MYRI10GE_REGSET_VERBOSE
1066 char *address_space_name[] = { "Configuration Space",
1067 "I/O Space",
1068 "32-bit Memory Space",
1069 "64-bit Memory Space"
1070 };
1071 #endif
1072
1073 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1074 "reg", &data, &nelementsp) != DDI_SUCCESS) {
1075 printf("Could not determine register set.\n");
1076 return (ENXIO);
1077 }
1078
1079 #ifdef MYRI10GE_REGSET_VERBOSE
1080 printf("There are %d register sets.\n", nelementsp / 5);
1081 #endif
1082 if (!nelementsp) {
1083 printf("Didn't find any \"reg\" properties.\n");
1084 ddi_prop_free(data);
1085 return (ENODEV);
1086 }
1087
1088 /* Scan for the register number. */
1089 rs = &data[0];
1090 *busno = BUS_NUMBER(rs);
1091 *devno = DEVICE_NUMBER(rs);
1092 *funcno = FUNCTION_NUMBER(rs);
1093
1094 #ifdef MYRI10GE_REGSET_VERBOSE
1095 printf("*** Scanning for register number.\n");
1096 #endif
1097 for (i = 0; i < nelementsp / 5; i++) {
1098 rs = &data[5 * i];
1099 #ifdef MYRI10GE_REGSET_VERBOSE
1100 printf("Examining register set %d:\n", i);
1101 printf(" Register number = %d.\n", REGISTER_NUMBER(rs));
1102 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs));
1103 printf(" Device number = %d.\n", DEVICE_NUMBER(rs));
1104 printf(" Bus number = %d.\n", BUS_NUMBER(rs));
1105 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs),
1106 address_space_name[ADDRESS_SPACE(rs)]);
1107 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1108 PCI_ADDR_LOW(rs));
1109 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1110 PCI_SPAN_LOW(rs));
1111 #endif
1112 /* We are looking for a memory property. */
1113
1114 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1115 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1116 *reg_set = i;
1117
1118 #ifdef MYRI10GE_REGSET_VERBOSE
1119 printf("%s uses register set %d.\n",
1120 address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1121 #endif
1122
1123 *span = (PCI_SPAN_LOW(rs));
1124 #ifdef MYRI10GE_REGSET_VERBOSE
1125 printf("Board span is 0x%x\n", *span);
1126 #endif
1127 break;
1128 }
1129 }
1130
1131 ddi_prop_free(data);
1132
1133 /* If no match, fail. */
1134 if (i >= nelementsp / 5) {
1135 return (EIO);
1136 }
1137
1138 return (0);
1139 }
1140
1141
1142 static int
myri10ge_load_firmware_from_zlib(struct myri10ge_priv * mgp,uint32_t * limit)1143 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1144 {
1145 void *inflate_buffer;
1146 int rv, status;
1147 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1148 size_t destlen;
1149 mcp_gen_header_t *hdr;
1150 unsigned hdr_offset, i;
1151
1152
1153 *limit = 0; /* -Wuninitialized */
1154 status = 0;
1155
1156 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1157 if (!inflate_buffer) {
1158 cmn_err(CE_WARN,
1159 "%s: Could not allocate buffer to inflate mcp\n",
1160 mgp->name);
1161 return (ENOMEM);
1162 }
1163
1164 destlen = sram_size;
1165 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1166 mgp->eth_z8e_length);
1167
1168 if (rv != Z_OK) {
1169 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1170 mgp->name, z_strerror(rv));
1171 status = ENXIO;
1172 goto abort;
1173 }
1174
1175 *limit = (uint32_t)destlen;
1176
1177 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1178 MCP_HEADER_PTR_OFFSET));
1179 hdr = (void *)((char *)inflate_buffer + hdr_offset);
1180 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1181 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1182 ntohl(hdr->mcp_type));
1183 status = EIO;
1184 goto abort;
1185 }
1186
1187 /* save firmware version for kstat */
1188 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1189 if (myri10ge_verbose)
1190 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1191
1192 /* Copy the inflated firmware to NIC SRAM. */
1193 for (i = 0; i < *limit; i += 256) {
1194 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1195 (char *)inflate_buffer + i,
1196 min(256U, (unsigned)(*limit - i)));
1197 mb();
1198 (void) *(int *)(void *)mgp->sram;
1199 mb();
1200 }
1201
1202 abort:
1203 kmem_free(inflate_buffer, sram_size);
1204
1205 return (status);
1206
1207 }
1208
1209
1210 int
myri10ge_send_cmd(struct myri10ge_priv * mgp,uint32_t cmd,myri10ge_cmd_t * data)1211 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1212 myri10ge_cmd_t *data)
1213 {
1214 mcp_cmd_t *buf;
1215 char buf_bytes[sizeof (*buf) + 8];
1216 volatile mcp_cmd_response_t *response = mgp->cmd;
1217 volatile char *cmd_addr =
1218 (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1219 int sleep_total = 0;
1220
1221 /* ensure buf is aligned to 8 bytes */
1222 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1223
1224 buf->data0 = htonl(data->data0);
1225 buf->data1 = htonl(data->data1);
1226 buf->data2 = htonl(data->data2);
1227 buf->cmd = htonl(cmd);
1228 buf->response_addr.low = mgp->cmd_dma.low;
1229 buf->response_addr.high = mgp->cmd_dma.high;
1230 mutex_enter(&mgp->cmd_lock);
1231 response->result = 0xffffffff;
1232 mb();
1233
1234 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1235
1236 /* wait up to 20ms */
1237 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1238 mb();
1239 if (response->result != 0xffffffff) {
1240 if (response->result == 0) {
1241 data->data0 = ntohl(response->data);
1242 mutex_exit(&mgp->cmd_lock);
1243 return (0);
1244 } else if (ntohl(response->result)
1245 == MXGEFW_CMD_UNKNOWN) {
1246 mutex_exit(&mgp->cmd_lock);
1247 return (ENOSYS);
1248 } else if (ntohl(response->result)
1249 == MXGEFW_CMD_ERROR_UNALIGNED) {
1250 mutex_exit(&mgp->cmd_lock);
1251 return (E2BIG);
1252 } else {
1253 cmn_err(CE_WARN,
1254 "%s: command %d failed, result = %d\n",
1255 mgp->name, cmd, ntohl(response->result));
1256 mutex_exit(&mgp->cmd_lock);
1257 return (ENXIO);
1258 }
1259 }
1260 drv_usecwait(1000);
1261 }
1262 mutex_exit(&mgp->cmd_lock);
1263 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1264 mgp->name, cmd, ntohl(response->result));
1265 return (EAGAIN);
1266 }
1267
1268 /*
1269 * Enable or disable periodic RDMAs from the host to make certain
1270 * chipsets resend dropped PCIe messages
1271 */
1272
1273 static void
myri10ge_dummy_rdma(struct myri10ge_priv * mgp,int enable)1274 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1275 {
1276 char buf_bytes[72];
1277 volatile uint32_t *confirm;
1278 volatile char *submit;
1279 uint32_t *buf;
1280 int i;
1281
1282 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1283
1284 /* clear confirmation addr */
1285 confirm = (volatile uint32_t *)mgp->cmd;
1286 *confirm = 0;
1287 mb();
1288
1289 /*
1290 * send an rdma command to the PCIe engine, and wait for the
1291 * response in the confirmation address. The firmware should
1292 * write a -1 there to indicate it is alive and well
1293 */
1294
1295 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1296 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1297 buf[2] = htonl(0xffffffff); /* confirm data */
1298 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */
1299 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */
1300 buf[5] = htonl(enable); /* enable? */
1301
1302
1303 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1304
1305 myri10ge_pio_copy((char *)submit, buf, 64);
1306 mb();
1307 drv_usecwait(1000);
1308 mb();
1309 i = 0;
1310 while (*confirm != 0xffffffff && i < 20) {
1311 drv_usecwait(1000);
1312 i++;
1313 }
1314 if (*confirm != 0xffffffff) {
1315 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1316 mgp->name,
1317 (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1318 }
1319 }
1320
1321 static int
myri10ge_load_firmware(struct myri10ge_priv * mgp)1322 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1323 {
1324 myri10ge_cmd_t cmd;
1325 volatile uint32_t *confirm;
1326 volatile char *submit;
1327 char buf_bytes[72];
1328 uint32_t *buf, size;
1329 int status, i;
1330
1331 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1332
1333 status = myri10ge_load_firmware_from_zlib(mgp, &size);
1334 if (status) {
1335 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1336 return (status);
1337 }
1338
1339 /* clear confirmation addr */
1340 confirm = (volatile uint32_t *)mgp->cmd;
1341 *confirm = 0;
1342 mb();
1343
1344 /*
1345 * send a reload command to the bootstrap MCP, and wait for the
1346 * response in the confirmation address. The firmware should
1347 * write a -1 there to indicate it is alive and well
1348 */
1349
1350 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1351 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1352 buf[2] = htonl(0xffffffff); /* confirm data */
1353
1354 /*
1355 * FIX: All newest firmware should un-protect the bottom of
1356 * the sram before handoff. However, the very first interfaces
1357 * do not. Therefore the handoff copy must skip the first 8 bytes
1358 */
1359 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1360 buf[4] = htonl(size - 8); /* length of code */
1361 buf[5] = htonl(8); /* where to copy to */
1362 buf[6] = htonl(0); /* where to jump to */
1363
1364 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1365
1366 myri10ge_pio_copy((char *)submit, buf, 64);
1367 mb();
1368 drv_usecwait(1000);
1369 mb();
1370 i = 0;
1371 while (*confirm != 0xffffffff && i < 1000) {
1372 drv_usecwait(1000);
1373 i++;
1374 }
1375 if (*confirm != 0xffffffff) {
1376 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1377 mgp->name, (void *) confirm, *confirm);
1378
1379 return (ENXIO);
1380 }
1381 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1382 if (status != 0) {
1383 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1384 mgp->name);
1385 return (ENXIO);
1386 }
1387
1388 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1389 myri10ge_dummy_rdma(mgp, 1);
1390 return (0);
1391 }
1392
1393 static int
myri10ge_m_unicst(void * arg,const uint8_t * addr)1394 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1395 {
1396 struct myri10ge_priv *mgp = arg;
1397 myri10ge_cmd_t cmd;
1398 int status;
1399
1400 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1401 | (addr[2] << 8) | addr[3]);
1402
1403 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1404
1405 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1406 if (status == 0 && (addr != mgp->mac_addr))
1407 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1408
1409 return (status);
1410 }
1411
1412 static int
myri10ge_change_pause(struct myri10ge_priv * mgp,int pause)1413 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1414 {
1415 myri10ge_cmd_t cmd;
1416 int status;
1417
1418 if (pause)
1419 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1420 &cmd);
1421 else
1422 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1423 &cmd);
1424
1425 if (status) {
1426 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1427 mgp->name);
1428 return (ENXIO);
1429 }
1430 mgp->pause = pause;
1431 return (0);
1432 }
1433
1434 static void
myri10ge_change_promisc(struct myri10ge_priv * mgp,int promisc)1435 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1436 {
1437 myri10ge_cmd_t cmd;
1438 int status;
1439
1440 if (promisc)
1441 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1442 else
1443 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1444
1445 if (status) {
1446 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1447 mgp->name);
1448 }
1449 }
1450
1451 static int
myri10ge_dma_test(struct myri10ge_priv * mgp,int test_type)1452 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1453 {
1454 myri10ge_cmd_t cmd;
1455 int status;
1456 uint32_t len;
1457 void *dmabench;
1458 struct myri10ge_dma_stuff dmabench_dma;
1459 char *test = " ";
1460
1461 /*
1462 * Run a small DMA test.
1463 * The magic multipliers to the length tell the firmware
1464 * tp do DMA read, write, or read+write tests. The
1465 * results are returned in cmd.data0. The upper 16
1466 * bits or the return is the number of transfers completed.
1467 * The lower 16 bits is the time in 0.5us ticks that the
1468 * transfers took to complete
1469 */
1470
1471 len = mgp->tx_boundary;
1472
1473 dmabench = myri10ge_dma_alloc(mgp->dip, len,
1474 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1475 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING,
1476 &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1477 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1478 if (dmabench == NULL) {
1479 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1480 return (ENOMEM);
1481 }
1482
1483 cmd.data0 = ntohl(dmabench_dma.low);
1484 cmd.data1 = ntohl(dmabench_dma.high);
1485 cmd.data2 = len * 0x10000;
1486 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1487 if (status != 0) {
1488 test = "read";
1489 goto abort;
1490 }
1491 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1492
1493 cmd.data0 = ntohl(dmabench_dma.low);
1494 cmd.data1 = ntohl(dmabench_dma.high);
1495 cmd.data2 = len * 0x1;
1496 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1497 if (status != 0) {
1498 test = "write";
1499 goto abort;
1500 }
1501 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1502
1503 cmd.data0 = ntohl(dmabench_dma.low);
1504 cmd.data1 = ntohl(dmabench_dma.high);
1505 cmd.data2 = len * 0x10001;
1506 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1507 if (status != 0) {
1508 test = "read/write";
1509 goto abort;
1510 }
1511 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1512 (cmd.data0 & 0xffff);
1513
1514
1515 abort:
1516 myri10ge_dma_free(&dmabench_dma);
1517 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1518 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1519 test);
1520 return (status);
1521 }
1522
1523 static int
myri10ge_reset(struct myri10ge_priv * mgp)1524 myri10ge_reset(struct myri10ge_priv *mgp)
1525 {
1526 myri10ge_cmd_t cmd;
1527 struct myri10ge_nic_stat *ethstat;
1528 struct myri10ge_slice_state *ss;
1529 int i, status;
1530 size_t bytes;
1531
1532 /* send a reset command to the card to see if it is alive */
1533 (void) memset(&cmd, 0, sizeof (cmd));
1534 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1535 if (status != 0) {
1536 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1537 return (ENXIO);
1538 }
1539
1540 /* Now exchange information about interrupts */
1541
1542 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1543 cmd.data0 = (uint32_t)bytes;
1544 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1545
1546 /*
1547 * Even though we already know how many slices are supported
1548 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1549 * has magic side effects, and must be called after a reset.
1550 * It must be called prior to calling any RSS related cmds,
1551 * including assigning an interrupt queue for anything but
1552 * slice 0. It must also be called *after*
1553 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1554 * the firmware to compute offsets.
1555 */
1556
1557 if (mgp->num_slices > 1) {
1558
1559 /* ask the maximum number of slices it supports */
1560 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1561 &cmd);
1562 if (status != 0) {
1563 cmn_err(CE_WARN,
1564 "%s: failed to get number of slices\n",
1565 mgp->name);
1566 return (status);
1567 }
1568
1569 /*
1570 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1571 * to setting up the interrupt queue DMA
1572 */
1573
1574 cmd.data0 = mgp->num_slices;
1575 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1576 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1577 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1578 &cmd);
1579 if (status != 0) {
1580 cmn_err(CE_WARN,
1581 "%s: failed to set number of slices\n",
1582 mgp->name);
1583 return (status);
1584 }
1585 }
1586 for (i = 0; i < mgp->num_slices; i++) {
1587 ss = &mgp->ss[i];
1588 cmd.data0 = ntohl(ss->rx_done.dma.low);
1589 cmd.data1 = ntohl(ss->rx_done.dma.high);
1590 cmd.data2 = i;
1591 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1592 &cmd);
1593 };
1594
1595 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1596 for (i = 0; i < mgp->num_slices; i++) {
1597 ss = &mgp->ss[i];
1598 ss->irq_claim = (volatile unsigned int *)
1599 (void *)(mgp->sram + cmd.data0 + 8 * i);
1600 }
1601
1602 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1603 status |= myri10ge_send_cmd(mgp,
1604 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1605 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1606 }
1607
1608 status |= myri10ge_send_cmd(mgp,
1609 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1610 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1611
1612 if (status != 0) {
1613 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1614 mgp->name);
1615 return (status);
1616 }
1617
1618 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1619 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1620
1621 /* reset mcp/driver shared state back to 0 */
1622
1623 for (i = 0; i < mgp->num_slices; i++) {
1624 ss = &mgp->ss[i];
1625 bytes = mgp->max_intr_slots *
1626 sizeof (*mgp->ss[0].rx_done.entry);
1627 (void) memset(ss->rx_done.entry, 0, bytes);
1628 ss->tx.req = 0;
1629 ss->tx.done = 0;
1630 ss->tx.pkt_done = 0;
1631 ss->rx_big.cnt = 0;
1632 ss->rx_small.cnt = 0;
1633 ss->rx_done.idx = 0;
1634 ss->rx_done.cnt = 0;
1635 ss->rx_token = 0;
1636 ss->tx.watchdog_done = 0;
1637 ss->tx.watchdog_req = 0;
1638 ss->tx.active = 0;
1639 ss->tx.activate = 0;
1640 }
1641 mgp->watchdog_rx_pause = 0;
1642 if (mgp->ksp_stat != NULL) {
1643 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1644 ethstat->link_changes.value.ul = 0;
1645 }
1646 status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1647 myri10ge_change_promisc(mgp, 0);
1648 (void) myri10ge_change_pause(mgp, mgp->pause);
1649 return (status);
1650 }
1651
1652 static int
myri10ge_init_toeplitz(struct myri10ge_priv * mgp)1653 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1654 {
1655 myri10ge_cmd_t cmd;
1656 int i, b, s, t, j;
1657 int status;
1658 uint32_t k[8];
1659 uint32_t tmp;
1660 uint8_t *key;
1661
1662 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1663 &cmd);
1664 if (status != 0) {
1665 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1666 mgp->name);
1667 return (EIO);
1668 }
1669 myri10ge_pio_copy32(mgp->rss_key,
1670 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1671 sizeof (mgp->rss_key));
1672
1673 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1674 KM_SLEEP);
1675 key = (uint8_t *)mgp->rss_key;
1676 t = 0;
1677 for (b = 0; b < 12; b++) {
1678 for (s = 0; s < 8; s++) {
1679 /* Bits: b*8+s, ..., b*8+s+31 */
1680 k[s] = 0;
1681 for (j = 0; j < 32; j++) {
1682 int bit = b*8+s+j;
1683 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1684 k[s] |= bit << (31 - j);
1685 }
1686 }
1687
1688 for (i = 0; i <= 0xff; i++) {
1689 tmp = 0;
1690 if (i & (1 << 7)) { tmp ^= k[0]; }
1691 if (i & (1 << 6)) { tmp ^= k[1]; }
1692 if (i & (1 << 5)) { tmp ^= k[2]; }
1693 if (i & (1 << 4)) { tmp ^= k[3]; }
1694 if (i & (1 << 3)) { tmp ^= k[4]; }
1695 if (i & (1 << 2)) { tmp ^= k[5]; }
1696 if (i & (1 << 1)) { tmp ^= k[6]; }
1697 if (i & (1 << 0)) { tmp ^= k[7]; }
1698 mgp->toeplitz_hash_table[t++] = tmp;
1699 }
1700 }
1701 return (0);
1702 }
1703
1704 static inline struct myri10ge_slice_state *
myri10ge_toeplitz_send_hash(struct myri10ge_priv * mgp,struct ip * ip)1705 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1706 {
1707 struct tcphdr *hdr;
1708 uint32_t saddr, daddr;
1709 uint32_t hash, slice;
1710 uint32_t *table = mgp->toeplitz_hash_table;
1711 uint16_t src, dst;
1712
1713 /*
1714 * Note hashing order is reversed from how it is done
1715 * in the NIC, so as to generate the same hash value
1716 * for the connection to try to keep connections CPU local
1717 */
1718
1719 /* hash on IPv4 src/dst address */
1720 saddr = ntohl(ip->ip_src.s_addr);
1721 daddr = ntohl(ip->ip_dst.s_addr);
1722 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1723 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1724 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1725 hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1726 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1727 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1728 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1729 hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1730 /* hash on TCP port, if required */
1731 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1732 ip->ip_p == IPPROTO_TCP) {
1733 hdr = (struct tcphdr *)(void *)
1734 (((uint8_t *)ip) + (ip->ip_hl << 2));
1735 src = ntohs(hdr->th_sport);
1736 dst = ntohs(hdr->th_dport);
1737
1738 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1739 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1740 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1741 hash ^= table[(256 * 11) + ((src) & 0xff)];
1742 }
1743 slice = (mgp->num_slices - 1) & hash;
1744 return (&mgp->ss[slice]);
1745
1746 }
1747
1748 static inline struct myri10ge_slice_state *
myri10ge_simple_send_hash(struct myri10ge_priv * mgp,struct ip * ip)1749 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1750 {
1751 struct tcphdr *hdr;
1752 uint32_t slice, hash_val;
1753
1754
1755 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1756 return (&mgp->ss[0]);
1757 }
1758 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2));
1759
1760 /*
1761 * Use the second byte of the *destination* address for
1762 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1763 */
1764 hash_val = ntohs(hdr->th_dport) & 0xff;
1765 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1766 hash_val += ntohs(hdr->th_sport) & 0xff;
1767
1768 slice = (mgp->num_slices - 1) & hash_val;
1769 return (&mgp->ss[slice]);
1770 }
1771
1772 static inline struct myri10ge_slice_state *
myri10ge_send_hash(struct myri10ge_priv * mgp,mblk_t * mp)1773 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1774 {
1775 unsigned int slice = 0;
1776 struct ether_header *eh;
1777 struct ether_vlan_header *vh;
1778 struct ip *ip;
1779 int ehl, ihl;
1780
1781 if (mgp->num_slices == 1)
1782 return (&mgp->ss[0]);
1783
1784 if (myri10ge_tx_hash == 0) {
1785 slice = CPU->cpu_id & (mgp->num_slices - 1);
1786 return (&mgp->ss[slice]);
1787 }
1788
1789 /*
1790 * ensure it is a TCP or UDP over IPv4 packet, and that the
1791 * headers are in the 1st mblk. Otherwise, punt
1792 */
1793 ehl = sizeof (*eh);
1794 ihl = sizeof (*ip);
1795 if ((MBLKL(mp)) < (ehl + ihl + 8))
1796 return (&mgp->ss[0]);
1797 eh = (struct ether_header *)(void *)mp->b_rptr;
1798 ip = (struct ip *)(void *)(eh + 1);
1799 if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1800 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1801 return (&mgp->ss[0]);
1802 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1803 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1804 return (&mgp->ss[0]);
1805 ehl += 4;
1806 ip = (struct ip *)(void *)(vh + 1);
1807 }
1808 ihl = ip->ip_hl << 2;
1809 if (MBLKL(mp) < (ehl + ihl + 8))
1810 return (&mgp->ss[0]);
1811 switch (myri10ge_rss_hash) {
1812 case MXGEFW_RSS_HASH_TYPE_IPV4:
1813 /* fallthru */
1814 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1815 /* fallthru */
1816 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1817 return (myri10ge_toeplitz_send_hash(mgp, ip));
1818 case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1819 /* fallthru */
1820 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1821 return (myri10ge_simple_send_hash(mgp, ip));
1822 default:
1823 break;
1824 }
1825 return (&mgp->ss[0]);
1826 }
1827
1828 static int
myri10ge_setup_slice(struct myri10ge_slice_state * ss)1829 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1830 {
1831 struct myri10ge_priv *mgp = ss->mgp;
1832 myri10ge_cmd_t cmd;
1833 int tx_ring_size, rx_ring_size;
1834 int tx_ring_entries, rx_ring_entries;
1835 int slice, status;
1836 int allocated, idx;
1837 size_t bytes;
1838
1839 slice = ss - mgp->ss;
1840 cmd.data0 = slice;
1841 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1842 tx_ring_size = cmd.data0;
1843 cmd.data0 = slice;
1844 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1845 if (status != 0)
1846 return (status);
1847 rx_ring_size = cmd.data0;
1848
1849 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1850 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1851 ss->tx.mask = tx_ring_entries - 1;
1852 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1853
1854 /* get the lanai pointers to the send and receive rings */
1855
1856 cmd.data0 = slice;
1857 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1858 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1859 if (mgp->num_slices > 1) {
1860 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1861 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1862 64 * slice;
1863 } else {
1864 ss->tx.go = NULL;
1865 ss->tx.stop = NULL;
1866 }
1867
1868 cmd.data0 = slice;
1869 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1870 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1871 (void *)(mgp->sram + cmd.data0);
1872
1873 cmd.data0 = slice;
1874 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1875 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1876 (mgp->sram + cmd.data0);
1877
1878 if (status != 0) {
1879 cmn_err(CE_WARN,
1880 "%s: failed to get ring sizes or locations\n", mgp->name);
1881 return (status);
1882 }
1883
1884 status = ENOMEM;
1885 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1886 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1887 if (ss->rx_small.shadow == NULL)
1888 goto abort;
1889 (void) memset(ss->rx_small.shadow, 0, bytes);
1890
1891 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1892 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1893 if (ss->rx_big.shadow == NULL)
1894 goto abort_with_rx_small_shadow;
1895 (void) memset(ss->rx_big.shadow, 0, bytes);
1896
1897 /* allocate the host info rings */
1898
1899 bytes = tx_ring_entries * sizeof (*ss->tx.info);
1900 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1901 if (ss->tx.info == NULL)
1902 goto abort_with_rx_big_shadow;
1903 (void) memset(ss->tx.info, 0, bytes);
1904
1905 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1906 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1907 if (ss->rx_small.info == NULL)
1908 goto abort_with_tx_info;
1909 (void) memset(ss->rx_small.info, 0, bytes);
1910
1911 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1912 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1913 if (ss->rx_big.info == NULL)
1914 goto abort_with_rx_small_info;
1915 (void) memset(ss->rx_big.info, 0, bytes);
1916
1917 ss->tx.stall = ss->tx.sched = 0;
1918 ss->tx.stall_early = ss->tx.stall_late = 0;
1919
1920 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1921 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1922
1923 allocated = myri10ge_add_jbufs(ss,
1924 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1925 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1926 cmn_err(CE_WARN,
1927 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1928 mgp->name, allocated,
1929 myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1930 goto abort_with_jumbos;
1931 }
1932
1933 myri10ge_carve_up_jbufs_into_small_ring(ss);
1934 ss->j_rx_cnt = 0;
1935
1936 mutex_enter(&ss->jpool.mtx);
1937 if (allocated < rx_ring_entries)
1938 ss->jpool.low_water = allocated / 4;
1939 else
1940 ss->jpool.low_water = rx_ring_entries / 2;
1941
1942 /*
1943 * invalidate the big receive ring in case we do not
1944 * allocate sufficient jumbos to fill it
1945 */
1946 (void) memset(ss->rx_big.shadow, 1,
1947 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1948 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1949 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1950 &ss->rx_big.shadow[idx - 7]);
1951 mb();
1952 }
1953
1954
1955 myri10ge_restock_jumbos(ss);
1956
1957 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1958 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1959 &ss->rx_small.shadow[idx - 7]);
1960 mb();
1961 }
1962 ss->rx_small.cnt = ss->rx_small.mask + 1;
1963
1964 mutex_exit(&ss->jpool.mtx);
1965
1966 status = myri10ge_prepare_tx_ring(ss);
1967
1968 if (status != 0)
1969 goto abort_with_small_jbufs;
1970
1971 cmd.data0 = ntohl(ss->fw_stats_dma.low);
1972 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1973 cmd.data2 = sizeof (mcp_irq_data_t);
1974 cmd.data2 |= (slice << 16);
1975 bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1976 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1977 if (status == ENOSYS) {
1978 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1979 offsetof(mcp_irq_data_t, send_done_count);
1980 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1981 status = myri10ge_send_cmd(mgp,
1982 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1983 }
1984 if (status) {
1985 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1986 goto abort_with_tx;
1987 }
1988
1989 return (0);
1990
1991 abort_with_tx:
1992 myri10ge_unprepare_tx_ring(ss);
1993
1994 abort_with_small_jbufs:
1995 myri10ge_release_small_jbufs(ss);
1996
1997 abort_with_jumbos:
1998 if (allocated != 0) {
1999 mutex_enter(&ss->jpool.mtx);
2000 ss->jpool.low_water = 0;
2001 mutex_exit(&ss->jpool.mtx);
2002 myri10ge_unstock_jumbos(ss);
2003 myri10ge_remove_jbufs(ss);
2004 }
2005
2006 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2007 kmem_free(ss->rx_big.info, bytes);
2008
2009 abort_with_rx_small_info:
2010 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2011 kmem_free(ss->rx_small.info, bytes);
2012
2013 abort_with_tx_info:
2014 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2015 kmem_free(ss->tx.info, bytes);
2016
2017 abort_with_rx_big_shadow:
2018 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2019 kmem_free(ss->rx_big.shadow, bytes);
2020
2021 abort_with_rx_small_shadow:
2022 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2023 kmem_free(ss->rx_small.shadow, bytes);
2024 abort:
2025 return (status);
2026
2027 }
2028
2029 static void
myri10ge_teardown_slice(struct myri10ge_slice_state * ss)2030 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2031 {
2032 int tx_ring_entries, rx_ring_entries;
2033 size_t bytes;
2034
2035 /* ignore slices that have not been fully setup */
2036 if (ss->tx.cp == NULL)
2037 return;
2038 /* Free the TX copy buffers */
2039 myri10ge_unprepare_tx_ring(ss);
2040
2041 /* stop passing returned buffers to firmware */
2042
2043 mutex_enter(&ss->jpool.mtx);
2044 ss->jpool.low_water = 0;
2045 mutex_exit(&ss->jpool.mtx);
2046 myri10ge_release_small_jbufs(ss);
2047
2048 /* Release the free jumbo frame pool */
2049 myri10ge_unstock_jumbos(ss);
2050 myri10ge_remove_jbufs(ss);
2051
2052 rx_ring_entries = ss->rx_big.mask + 1;
2053 tx_ring_entries = ss->tx.mask + 1;
2054
2055 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2056 kmem_free(ss->rx_big.info, bytes);
2057
2058 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2059 kmem_free(ss->rx_small.info, bytes);
2060
2061 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2062 kmem_free(ss->tx.info, bytes);
2063
2064 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2065 kmem_free(ss->rx_big.shadow, bytes);
2066
2067 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2068 kmem_free(ss->rx_small.shadow, bytes);
2069
2070 }
2071 static int
myri10ge_start_locked(struct myri10ge_priv * mgp)2072 myri10ge_start_locked(struct myri10ge_priv *mgp)
2073 {
2074 myri10ge_cmd_t cmd;
2075 int status, big_pow2, i;
2076 volatile uint8_t *itable;
2077
2078 status = DDI_SUCCESS;
2079 /* Allocate DMA resources and receive buffers */
2080
2081 status = myri10ge_reset(mgp);
2082 if (status != 0) {
2083 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2084 return (DDI_FAILURE);
2085 }
2086
2087 if (mgp->num_slices > 1) {
2088 cmd.data0 = mgp->num_slices;
2089 cmd.data1 = 1; /* use MSI-X */
2090 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2091 &cmd);
2092 if (status != 0) {
2093 cmn_err(CE_WARN,
2094 "%s: failed to set number of slices\n",
2095 mgp->name);
2096 goto abort_with_nothing;
2097 }
2098 /* setup the indirection table */
2099 cmd.data0 = mgp->num_slices;
2100 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2101 &cmd);
2102
2103 status |= myri10ge_send_cmd(mgp,
2104 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2105 if (status != 0) {
2106 cmn_err(CE_WARN,
2107 "%s: failed to setup rss tables\n", mgp->name);
2108 }
2109
2110 /* just enable an identity mapping */
2111 itable = mgp->sram + cmd.data0;
2112 for (i = 0; i < mgp->num_slices; i++)
2113 itable[i] = (uint8_t)i;
2114
2115 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2116 status = myri10ge_init_toeplitz(mgp);
2117 if (status != 0) {
2118 cmn_err(CE_WARN, "%s: failed to setup "
2119 "toeplitz tx hash table", mgp->name);
2120 goto abort_with_nothing;
2121 }
2122 }
2123 cmd.data0 = 1;
2124 cmd.data1 = myri10ge_rss_hash;
2125 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2126 &cmd);
2127 if (status != 0) {
2128 cmn_err(CE_WARN,
2129 "%s: failed to enable slices\n", mgp->name);
2130 goto abort_with_toeplitz;
2131 }
2132 }
2133
2134 for (i = 0; i < mgp->num_slices; i++) {
2135 status = myri10ge_setup_slice(&mgp->ss[i]);
2136 if (status != 0)
2137 goto abort_with_slices;
2138 }
2139
2140 /*
2141 * Tell the MCP how many buffers he has, and to
2142 * bring the ethernet interface up
2143 *
2144 * Firmware needs the big buff size as a power of 2. Lie and
2145 * tell him the buffer is larger, because we only use 1
2146 * buffer/pkt, and the mtu will prevent overruns
2147 */
2148 big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2149 while (!ISP2(big_pow2))
2150 big_pow2++;
2151
2152 /* now give firmware buffers sizes, and MTU */
2153 cmd.data0 = myri10ge_mtu;
2154 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2155 cmd.data0 = myri10ge_small_bytes;
2156 status |=
2157 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2158 cmd.data0 = big_pow2;
2159 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2160 if (status) {
2161 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2162 goto abort_with_slices;
2163 }
2164
2165
2166 cmd.data0 = 1;
2167 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2168 if (status) {
2169 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2170 mgp->name, status);
2171 } else {
2172 mgp->features |= MYRI10GE_TSO;
2173 }
2174
2175 mgp->link_state = -1;
2176 mgp->rdma_tags_available = 15;
2177 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2178 if (status) {
2179 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2180 goto abort_with_slices;
2181 }
2182 mgp->running = MYRI10GE_ETH_RUNNING;
2183 return (DDI_SUCCESS);
2184
2185 abort_with_slices:
2186 for (i = 0; i < mgp->num_slices; i++)
2187 myri10ge_teardown_slice(&mgp->ss[i]);
2188
2189 mgp->running = MYRI10GE_ETH_STOPPED;
2190
2191 abort_with_toeplitz:
2192 if (mgp->toeplitz_hash_table != NULL) {
2193 kmem_free(mgp->toeplitz_hash_table,
2194 sizeof (uint32_t) * 12 * 256);
2195 mgp->toeplitz_hash_table = NULL;
2196 }
2197
2198 abort_with_nothing:
2199 return (DDI_FAILURE);
2200 }
2201
2202 static void
myri10ge_stop_locked(struct myri10ge_priv * mgp)2203 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2204 {
2205 int status, old_down_cnt;
2206 myri10ge_cmd_t cmd;
2207 int wait_time = 10;
2208 int i, polling;
2209
2210 old_down_cnt = mgp->down_cnt;
2211 mb();
2212 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2213 if (status) {
2214 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2215 }
2216
2217 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2218 delay(1 * drv_usectohz(1000000));
2219 wait_time--;
2220 if (wait_time == 0)
2221 break;
2222 }
2223 again:
2224 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2225 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2226 for (i = 0; i < mgp->num_slices; i++) {
2227 /*
2228 * take and release the rx lock to ensure
2229 * that no interrupt thread is blocked
2230 * elsewhere in the stack, preventing
2231 * completion
2232 */
2233
2234 mutex_enter(&mgp->ss[i].rx_lock);
2235 printf("%s: slice %d rx irq idle\n",
2236 mgp->name, i);
2237 mutex_exit(&mgp->ss[i].rx_lock);
2238
2239 /* verify that the poll handler is inactive */
2240 mutex_enter(&mgp->ss->poll_lock);
2241 polling = mgp->ss->rx_polling;
2242 mutex_exit(&mgp->ss->poll_lock);
2243 if (polling) {
2244 printf("%s: slice %d is polling\n",
2245 mgp->name, i);
2246 delay(1 * drv_usectohz(1000000));
2247 goto again;
2248 }
2249 }
2250 delay(1 * drv_usectohz(1000000));
2251 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2252 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2253 }
2254 }
2255
2256 for (i = 0; i < mgp->num_slices; i++)
2257 myri10ge_teardown_slice(&mgp->ss[i]);
2258
2259 if (mgp->toeplitz_hash_table != NULL) {
2260 kmem_free(mgp->toeplitz_hash_table,
2261 sizeof (uint32_t) * 12 * 256);
2262 mgp->toeplitz_hash_table = NULL;
2263 }
2264 mgp->running = MYRI10GE_ETH_STOPPED;
2265 }
2266
2267 static int
myri10ge_m_start(void * arg)2268 myri10ge_m_start(void *arg)
2269 {
2270 struct myri10ge_priv *mgp = arg;
2271 int status;
2272
2273 mutex_enter(&mgp->intrlock);
2274
2275 if (mgp->running != MYRI10GE_ETH_STOPPED) {
2276 mutex_exit(&mgp->intrlock);
2277 return (DDI_FAILURE);
2278 }
2279 status = myri10ge_start_locked(mgp);
2280 mutex_exit(&mgp->intrlock);
2281
2282 if (status != DDI_SUCCESS)
2283 return (status);
2284
2285 /* start the watchdog timer */
2286 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2287 mgp->timer_ticks);
2288 return (DDI_SUCCESS);
2289
2290 }
2291
2292 static void
myri10ge_m_stop(void * arg)2293 myri10ge_m_stop(void *arg)
2294 {
2295 struct myri10ge_priv *mgp = arg;
2296
2297 mutex_enter(&mgp->intrlock);
2298 /* if the device not running give up */
2299 if (mgp->running != MYRI10GE_ETH_RUNNING) {
2300 mutex_exit(&mgp->intrlock);
2301 return;
2302 }
2303
2304 mgp->running = MYRI10GE_ETH_STOPPING;
2305 mutex_exit(&mgp->intrlock);
2306 (void) untimeout(mgp->timer_id);
2307 mutex_enter(&mgp->intrlock);
2308 myri10ge_stop_locked(mgp);
2309 mutex_exit(&mgp->intrlock);
2310
2311 }
2312
2313 static inline void
myri10ge_rx_csum(mblk_t * mp,struct myri10ge_rx_ring_stats * s,uint32_t csum)2314 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2315 {
2316 struct ether_header *eh;
2317 struct ip *ip;
2318 struct ip6_hdr *ip6;
2319 uint32_t start, stuff, end, partial, hdrlen;
2320
2321
2322 csum = ntohs((uint16_t)csum);
2323 eh = (struct ether_header *)(void *)mp->b_rptr;
2324 hdrlen = sizeof (*eh);
2325 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2326 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2327 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2328 s->brdcstrcv++;
2329 else
2330 s->multircv++;
2331 }
2332
2333 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2334 /*
2335 * fix checksum by subtracting 4 bytes after what the
2336 * firmware thought was the end of the ether hdr
2337 */
2338 partial = *(uint32_t *)
2339 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2340 csum += ~partial;
2341 csum += (csum < ~partial);
2342 csum = (csum >> 16) + (csum & 0xFFFF);
2343 csum = (csum >> 16) + (csum & 0xFFFF);
2344 hdrlen += VLAN_TAGSZ;
2345 }
2346
2347 if (eh->ether_type == BE_16(ETHERTYPE_IP)) {
2348 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2349 start = ip->ip_hl << 2;
2350
2351 if (ip->ip_p == IPPROTO_TCP)
2352 stuff = start + offsetof(struct tcphdr, th_sum);
2353 else if (ip->ip_p == IPPROTO_UDP)
2354 stuff = start + offsetof(struct udphdr, uh_sum);
2355 else
2356 return;
2357 end = ntohs(ip->ip_len);
2358 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) {
2359 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2360 start = sizeof (*ip6);
2361 if (ip6->ip6_nxt == IPPROTO_TCP) {
2362 stuff = start + offsetof(struct tcphdr, th_sum);
2363 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2364 stuff = start + offsetof(struct udphdr, uh_sum);
2365 else
2366 return;
2367 end = start + ntohs(ip6->ip6_plen);
2368 /*
2369 * IPv6 headers do not contain a checksum, and hence
2370 * do not checksum to zero, so they don't "fall out"
2371 * of the partial checksum calculation like IPv4
2372 * headers do. We need to fix the partial checksum by
2373 * subtracting the checksum of the IPv6 header.
2374 */
2375
2376 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2377 csum += ~partial;
2378 csum += (csum < ~partial);
2379 csum = (csum >> 16) + (csum & 0xFFFF);
2380 csum = (csum >> 16) + (csum & 0xFFFF);
2381 } else {
2382 return;
2383 }
2384
2385 if (MBLKL(mp) > hdrlen + end) {
2386 /* padded frame, so hw csum may be invalid */
2387 return;
2388 }
2389
2390 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2391 }
2392
2393 static mblk_t *
myri10ge_rx_done_small(struct myri10ge_slice_state * ss,uint32_t len,uint32_t csum)2394 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2395 uint32_t csum)
2396 {
2397 mblk_t *mp;
2398 myri10ge_rx_ring_t *rx;
2399 int idx;
2400
2401 rx = &ss->rx_small;
2402 idx = rx->cnt & rx->mask;
2403 ss->rx_small.cnt++;
2404
2405 /* allocate a new buffer to pass up the stack */
2406 mp = allocb(len + MXGEFW_PAD, 0);
2407 if (mp == NULL) {
2408 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2409 goto abort;
2410 }
2411 bcopy(ss->rx_small.info[idx].ptr,
2412 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2413 mp->b_wptr += len + MXGEFW_PAD;
2414 mp->b_rptr += MXGEFW_PAD;
2415
2416 ss->rx_stats.ibytes += len;
2417 ss->rx_stats.ipackets += 1;
2418 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2419
2420 abort:
2421 if ((idx & 7) == 7) {
2422 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2423 &rx->shadow[idx - 7]);
2424 }
2425
2426 return (mp);
2427 }
2428
2429
2430 static mblk_t *
myri10ge_rx_done_big(struct myri10ge_slice_state * ss,uint32_t len,uint32_t csum)2431 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2432 uint32_t csum)
2433 {
2434 struct myri10ge_jpool_stuff *jpool;
2435 struct myri10ge_jpool_entry *j;
2436 mblk_t *mp;
2437 int idx, num_owned_by_mcp;
2438
2439 jpool = &ss->jpool;
2440 idx = ss->j_rx_cnt & ss->rx_big.mask;
2441 j = ss->rx_big.info[idx].j;
2442
2443 if (j == NULL) {
2444 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2445 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2446 return (NULL);
2447 }
2448
2449
2450 ss->rx_big.info[idx].j = NULL;
2451 ss->j_rx_cnt++;
2452
2453
2454 /*
2455 * Check to see if we are low on rx buffers.
2456 * Note that we must leave at least 8 free so there are
2457 * enough to free in a single 64-byte write.
2458 */
2459 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2460 if (num_owned_by_mcp < jpool->low_water) {
2461 mutex_enter(&jpool->mtx);
2462 myri10ge_restock_jumbos(ss);
2463 mutex_exit(&jpool->mtx);
2464 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2465 /* if we are still low, then we have to copy */
2466 if (num_owned_by_mcp < 16) {
2467 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2468 /* allocate a new buffer to pass up the stack */
2469 mp = allocb(len + MXGEFW_PAD, 0);
2470 if (mp == NULL) {
2471 goto abort;
2472 }
2473 bcopy(j->buf,
2474 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2475 myri10ge_jfree_rtn(j);
2476 /* push buffer back to NIC */
2477 mutex_enter(&jpool->mtx);
2478 myri10ge_restock_jumbos(ss);
2479 mutex_exit(&jpool->mtx);
2480 goto set_len;
2481 }
2482 }
2483
2484 /* loan our buffer to the stack */
2485 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2486 if (mp == NULL) {
2487 goto abort;
2488 }
2489
2490 set_len:
2491 mp->b_rptr += MXGEFW_PAD;
2492 mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2493
2494 ss->rx_stats.ibytes += len;
2495 ss->rx_stats.ipackets += 1;
2496 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2497
2498 return (mp);
2499
2500 abort:
2501 myri10ge_jfree_rtn(j);
2502 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2503 return (NULL);
2504 }
2505
2506 /*
2507 * Free all transmit buffers up until the specified index
2508 */
2509 static inline void
myri10ge_tx_done(struct myri10ge_slice_state * ss,uint32_t mcp_index)2510 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2511 {
2512 myri10ge_tx_ring_t *tx;
2513 struct myri10ge_tx_dma_handle_head handles;
2514 int idx;
2515 int limit = 0;
2516
2517 tx = &ss->tx;
2518 handles.head = NULL;
2519 handles.tail = NULL;
2520 while (tx->pkt_done != (int)mcp_index) {
2521 idx = tx->done & tx->mask;
2522
2523 /*
2524 * mblk & DMA handle attached only to first slot
2525 * per buffer in the packet
2526 */
2527
2528 if (tx->info[idx].m) {
2529 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2530 tx->info[idx].handle->next = handles.head;
2531 handles.head = tx->info[idx].handle;
2532 if (handles.tail == NULL)
2533 handles.tail = tx->info[idx].handle;
2534 freeb(tx->info[idx].m);
2535 tx->info[idx].m = 0;
2536 tx->info[idx].handle = 0;
2537 }
2538 if (tx->info[idx].ostat.opackets != 0) {
2539 tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2540 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2541 tx->stats.obytes += tx->info[idx].ostat.obytes;
2542 tx->stats.opackets += tx->info[idx].ostat.opackets;
2543 tx->info[idx].stat.un.all = 0;
2544 tx->pkt_done++;
2545 }
2546
2547 tx->done++;
2548 /*
2549 * if we stalled the queue, wake it. But Wait until
2550 * we have at least 1/2 our slots free.
2551 */
2552 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2553 tx->stall != tx->sched) {
2554 mutex_enter(&ss->tx.lock);
2555 tx->sched = tx->stall;
2556 mutex_exit(&ss->tx.lock);
2557 mac_tx_ring_update(ss->mgp->mh, tx->rh);
2558 }
2559
2560 /* limit potential for livelock */
2561 if (unlikely(++limit > 2 * tx->mask))
2562 break;
2563 }
2564 if (tx->req == tx->done && tx->stop != NULL) {
2565 /*
2566 * Nic has sent all pending requests, allow him
2567 * to stop polling this queue
2568 */
2569 mutex_enter(&tx->lock);
2570 if (tx->req == tx->done && tx->active) {
2571 *(int *)(void *)tx->stop = 1;
2572 tx->active = 0;
2573 mb();
2574 }
2575 mutex_exit(&tx->lock);
2576 }
2577 if (handles.head != NULL)
2578 myri10ge_free_tx_handles(tx, &handles);
2579 }
2580
2581 static void
myri10ge_mbl_init(struct myri10ge_mblk_list * mbl)2582 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2583 {
2584 mbl->head = NULL;
2585 mbl->tail = &mbl->head;
2586 mbl->cnt = 0;
2587 }
2588
2589 /*ARGSUSED*/
2590 void
myri10ge_mbl_append(struct myri10ge_slice_state * ss,struct myri10ge_mblk_list * mbl,mblk_t * mp)2591 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2592 struct myri10ge_mblk_list *mbl, mblk_t *mp)
2593 {
2594 *(mbl->tail) = mp;
2595 mbl->tail = &mp->b_next;
2596 mp->b_next = NULL;
2597 mbl->cnt++;
2598 }
2599
2600
2601 static inline void
myri10ge_clean_rx_done(struct myri10ge_slice_state * ss,struct myri10ge_mblk_list * mbl,int limit,boolean_t * stop)2602 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2603 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2604 {
2605 myri10ge_rx_done_t *rx_done = &ss->rx_done;
2606 struct myri10ge_priv *mgp = ss->mgp;
2607 mblk_t *mp;
2608 struct lro_entry *lro;
2609 uint16_t length;
2610 uint16_t checksum;
2611
2612
2613 while (rx_done->entry[rx_done->idx].length != 0) {
2614 if (unlikely (*stop)) {
2615 break;
2616 }
2617 length = ntohs(rx_done->entry[rx_done->idx].length);
2618 length &= (~MXGEFW_RSS_HASH_MASK);
2619
2620 /* limit potential for livelock */
2621 limit -= length;
2622 if (unlikely(limit < 0))
2623 break;
2624
2625 rx_done->entry[rx_done->idx].length = 0;
2626 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2627 if (length <= myri10ge_small_bytes)
2628 mp = myri10ge_rx_done_small(ss, length, checksum);
2629 else
2630 mp = myri10ge_rx_done_big(ss, length, checksum);
2631 if (mp != NULL) {
2632 if (!myri10ge_lro ||
2633 0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2634 myri10ge_mbl_append(ss, mbl, mp);
2635 }
2636 rx_done->cnt++;
2637 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2638 }
2639 while (ss->lro_active != NULL) {
2640 lro = ss->lro_active;
2641 ss->lro_active = lro->next;
2642 myri10ge_lro_flush(ss, lro, mbl);
2643 }
2644 }
2645
2646 static void
myri10ge_intr_rx(struct myri10ge_slice_state * ss)2647 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2648 {
2649 uint64_t gen;
2650 struct myri10ge_mblk_list mbl;
2651
2652 myri10ge_mbl_init(&mbl);
2653 if (mutex_tryenter(&ss->rx_lock) == 0)
2654 return;
2655 gen = ss->rx_gen_num;
2656 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2657 &ss->rx_polling);
2658 if (mbl.head != NULL)
2659 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2660 mutex_exit(&ss->rx_lock);
2661
2662 }
2663
2664 static mblk_t *
myri10ge_poll_rx(void * arg,int bytes)2665 myri10ge_poll_rx(void *arg, int bytes)
2666 {
2667 struct myri10ge_slice_state *ss = arg;
2668 struct myri10ge_mblk_list mbl;
2669 boolean_t dummy = B_FALSE;
2670
2671 if (bytes == 0)
2672 return (NULL);
2673
2674 myri10ge_mbl_init(&mbl);
2675 mutex_enter(&ss->rx_lock);
2676 if (ss->rx_polling)
2677 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2678 else
2679 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2680 ss->mgp->ss), ss->rx_token, ss->rx_polling);
2681 mutex_exit(&ss->rx_lock);
2682 return (mbl.head);
2683 }
2684
2685 /*ARGSUSED*/
2686 static uint_t
myri10ge_intr(caddr_t arg0,caddr_t arg1)2687 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2688 {
2689 struct myri10ge_slice_state *ss =
2690 (struct myri10ge_slice_state *)(void *)arg0;
2691 struct myri10ge_priv *mgp = ss->mgp;
2692 mcp_irq_data_t *stats = ss->fw_stats;
2693 myri10ge_tx_ring_t *tx = &ss->tx;
2694 uint32_t send_done_count;
2695 uint8_t valid;
2696
2697
2698 /* make sure the DMA has finished */
2699 if (!stats->valid) {
2700 return (DDI_INTR_UNCLAIMED);
2701 }
2702 valid = stats->valid;
2703
2704 /* low bit indicates receives are present */
2705 if (valid & 1)
2706 myri10ge_intr_rx(ss);
2707
2708 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2709 /* lower legacy IRQ */
2710 *mgp->irq_deassert = 0;
2711 if (!myri10ge_deassert_wait)
2712 /* don't wait for conf. that irq is low */
2713 stats->valid = 0;
2714 mb();
2715 } else {
2716 /* no need to wait for conf. that irq is low */
2717 stats->valid = 0;
2718 }
2719
2720 do {
2721 /* check for transmit completes and receives */
2722 send_done_count = ntohl(stats->send_done_count);
2723 if (send_done_count != tx->pkt_done)
2724 myri10ge_tx_done(ss, (int)send_done_count);
2725 } while (*((volatile uint8_t *) &stats->valid));
2726
2727 if (stats->stats_updated) {
2728 if (mgp->link_state != stats->link_up || stats->link_down) {
2729 mgp->link_state = stats->link_up;
2730 if (stats->link_down) {
2731 mgp->down_cnt += stats->link_down;
2732 mgp->link_state = 0;
2733 }
2734 if (mgp->link_state) {
2735 if (myri10ge_verbose)
2736 printf("%s: link up\n", mgp->name);
2737 mac_link_update(mgp->mh, LINK_STATE_UP);
2738 } else {
2739 if (myri10ge_verbose)
2740 printf("%s: link down\n", mgp->name);
2741 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2742 }
2743 MYRI10GE_NIC_STAT_INC(link_changes);
2744 }
2745 if (mgp->rdma_tags_available !=
2746 ntohl(ss->fw_stats->rdma_tags_available)) {
2747 mgp->rdma_tags_available =
2748 ntohl(ss->fw_stats->rdma_tags_available);
2749 cmn_err(CE_NOTE, "%s: RDMA timed out! "
2750 "%d tags left\n", mgp->name,
2751 mgp->rdma_tags_available);
2752 }
2753 }
2754
2755 mb();
2756 /* check to see if we have rx token to pass back */
2757 if (valid & 0x1) {
2758 mutex_enter(&ss->poll_lock);
2759 if (ss->rx_polling) {
2760 ss->rx_token = 1;
2761 } else {
2762 *ss->irq_claim = BE_32(3);
2763 ss->rx_token = 0;
2764 }
2765 mutex_exit(&ss->poll_lock);
2766 }
2767 *(ss->irq_claim + 1) = BE_32(3);
2768 return (DDI_INTR_CLAIMED);
2769 }
2770
2771 /*
2772 * Add or remove a multicast address. This is called with our
2773 * macinfo's lock held by GLD, so we do not need to worry about
2774 * our own locking here.
2775 */
2776 static int
myri10ge_m_multicst(void * arg,boolean_t add,const uint8_t * multicastaddr)2777 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2778 {
2779 myri10ge_cmd_t cmd;
2780 struct myri10ge_priv *mgp = arg;
2781 int status, join_leave;
2782
2783 if (add)
2784 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2785 else
2786 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2787 (void) memcpy(&cmd.data0, multicastaddr, 4);
2788 (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2789 cmd.data0 = htonl(cmd.data0);
2790 cmd.data1 = htonl(cmd.data1);
2791 status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2792 if (status == 0)
2793 return (0);
2794
2795 cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2796 mgp->name);
2797 return (status);
2798 }
2799
2800
2801 static int
myri10ge_m_promisc(void * arg,boolean_t on)2802 myri10ge_m_promisc(void *arg, boolean_t on)
2803 {
2804 struct myri10ge_priv *mgp = arg;
2805
2806 myri10ge_change_promisc(mgp, on);
2807 return (0);
2808 }
2809
2810 /*
2811 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2812 * backwards one at a time and handle ring wraps
2813 */
2814
2815 static inline void
myri10ge_submit_req_backwards(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)2816 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2817 mcp_kreq_ether_send_t *src, int cnt)
2818 {
2819 int idx, starting_slot;
2820 starting_slot = tx->req;
2821 while (cnt > 1) {
2822 cnt--;
2823 idx = (starting_slot + cnt) & tx->mask;
2824 myri10ge_pio_copy(&tx->lanai[idx],
2825 &src[cnt], sizeof (*src));
2826 mb();
2827 }
2828 }
2829
2830 /*
2831 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2832 * at most 32 bytes at a time, so as to avoid involving the software
2833 * pio handler in the nic. We re-write the first segment's flags
2834 * to mark them valid only after writing the entire chain
2835 */
2836
2837 static inline void
myri10ge_submit_req(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)2838 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2839 int cnt)
2840 {
2841 int idx, i;
2842 uint32_t *src_ints, *dst_ints;
2843 mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2844 uint8_t last_flags;
2845
2846 idx = tx->req & tx->mask;
2847
2848 last_flags = src->flags;
2849 src->flags = 0;
2850 mb();
2851 dst = dstp = &tx->lanai[idx];
2852 srcp = src;
2853
2854 if ((idx + cnt) < tx->mask) {
2855 for (i = 0; i < (cnt - 1); i += 2) {
2856 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2857 mb(); /* force write every 32 bytes */
2858 srcp += 2;
2859 dstp += 2;
2860 }
2861 } else {
2862 /*
2863 * submit all but the first request, and ensure
2864 * that it is submitted below
2865 */
2866 myri10ge_submit_req_backwards(tx, src, cnt);
2867 i = 0;
2868 }
2869 if (i < cnt) {
2870 /* submit the first request */
2871 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2872 mb(); /* barrier before setting valid flag */
2873 }
2874
2875 /* re-write the last 32-bits with the valid flags */
2876 src->flags |= last_flags;
2877 src_ints = (uint32_t *)src;
2878 src_ints += 3;
2879 dst_ints = (uint32_t *)dst;
2880 dst_ints += 3;
2881 *dst_ints = *src_ints;
2882 tx->req += cnt;
2883 mb();
2884 /* notify NIC to poll this tx ring */
2885 if (!tx->active && tx->go != NULL) {
2886 *(int *)(void *)tx->go = 1;
2887 tx->active = 1;
2888 tx->activate++;
2889 mb();
2890 }
2891 }
2892
2893 /* ARGSUSED */
2894 static inline void
myri10ge_lso_info_get(mblk_t * mp,uint32_t * mss,uint32_t * flags)2895 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2896 {
2897 uint32_t lso_flag;
2898 mac_lso_get(mp, mss, &lso_flag);
2899 (*flags) |= lso_flag;
2900 }
2901
2902
2903 /* like pullupmsg, except preserve hcksum/LSO attributes */
2904 static int
myri10ge_pullup(struct myri10ge_slice_state * ss,mblk_t * mp)2905 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2906 {
2907 uint32_t start, stuff, tx_offload_flags, mss;
2908 int ok;
2909
2910 mss = 0;
2911 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2912 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2913
2914 ok = pullupmsg(mp, -1);
2915 if (!ok) {
2916 printf("pullupmsg failed");
2917 return (DDI_FAILURE);
2918 }
2919 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2920 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags);
2921 if (tx_offload_flags & HW_LSO)
2922 DB_LSOMSS(mp) = (uint16_t)mss;
2923 lso_info_set(mp, mss, tx_offload_flags);
2924 return (DDI_SUCCESS);
2925 }
2926
2927 static inline void
myri10ge_tx_stat(struct myri10ge_tx_pkt_stats * s,struct ether_header * eh,int opackets,int obytes)2928 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2929 int opackets, int obytes)
2930 {
2931 s->un.all = 0;
2932 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2933 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2934 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2935 s->un.s.brdcstxmt = 1;
2936 else
2937 s->un.s.multixmt = 1;
2938 }
2939 s->un.s.opackets = (uint16_t)opackets;
2940 s->un.s.obytes = obytes;
2941 }
2942
2943 static int
myri10ge_tx_copy(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req)2944 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2945 mcp_kreq_ether_send_t *req)
2946 {
2947 myri10ge_tx_ring_t *tx = &ss->tx;
2948 caddr_t ptr;
2949 struct myri10ge_tx_copybuf *cp;
2950 mblk_t *bp;
2951 int idx, mblen, avail;
2952 uint16_t len;
2953
2954 mutex_enter(&tx->lock);
2955 avail = tx->mask - (tx->req - tx->done);
2956 if (avail <= 1) {
2957 mutex_exit(&tx->lock);
2958 return (EBUSY);
2959 }
2960 idx = tx->req & tx->mask;
2961 cp = &tx->cp[idx];
2962 ptr = cp->va;
2963 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2964 mblen = MBLKL(bp);
2965 bcopy(bp->b_rptr, ptr, mblen);
2966 ptr += mblen;
2967 len += mblen;
2968 }
2969 /* ensure runts are padded to 60 bytes */
2970 if (len < 60) {
2971 bzero(ptr, 64 - len);
2972 len = 60;
2973 }
2974 req->addr_low = cp->dma.low;
2975 req->addr_high = cp->dma.high;
2976 req->length = htons(len);
2977 req->pad = 0;
2978 req->rdma_count = 1;
2979 myri10ge_tx_stat(&tx->info[idx].stat,
2980 (struct ether_header *)(void *)cp->va, 1, len);
2981 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2982 myri10ge_submit_req(&ss->tx, req, 1);
2983 mutex_exit(&tx->lock);
2984 freemsg(mp);
2985 return (DDI_SUCCESS);
2986 }
2987
2988
2989 static void
myri10ge_send_locked(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * req_list,struct myri10ge_tx_buffer_state * tx_info,int count)2990 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2991 struct myri10ge_tx_buffer_state *tx_info,
2992 int count)
2993 {
2994 int i, idx;
2995
2996 idx = 0; /* gcc -Wuninitialized */
2997 /* store unmapping and bp info for tx irq handler */
2998 for (i = 0; i < count; i++) {
2999 idx = (tx->req + i) & tx->mask;
3000 tx->info[idx].m = tx_info[i].m;
3001 tx->info[idx].handle = tx_info[i].handle;
3002 }
3003 tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
3004
3005 /* submit the frame to the nic */
3006 myri10ge_submit_req(tx, req_list, count);
3007
3008
3009 }
3010
3011
3012
3013 static void
myri10ge_copydata(mblk_t * mp,int off,int len,caddr_t buf)3014 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3015 {
3016 mblk_t *bp;
3017 int seglen;
3018 uint_t count;
3019
3020 bp = mp;
3021
3022 while (off > 0) {
3023 seglen = MBLKL(bp);
3024 if (off < seglen)
3025 break;
3026 off -= seglen;
3027 bp = bp->b_cont;
3028 }
3029 while (len > 0) {
3030 seglen = MBLKL(bp);
3031 count = min(seglen - off, len);
3032 bcopy(bp->b_rptr + off, buf, count);
3033 len -= count;
3034 buf += count;
3035 off = 0;
3036 bp = bp->b_cont;
3037 }
3038 }
3039
3040 static int
myri10ge_ether_parse_header(mblk_t * mp)3041 myri10ge_ether_parse_header(mblk_t *mp)
3042 {
3043 struct ether_header eh_copy;
3044 struct ether_header *eh;
3045 int eth_hdr_len, seglen;
3046
3047 seglen = MBLKL(mp);
3048 eth_hdr_len = sizeof (*eh);
3049 if (seglen < eth_hdr_len) {
3050 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3051 eh = &eh_copy;
3052 } else {
3053 eh = (struct ether_header *)(void *)mp->b_rptr;
3054 }
3055 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3056 eth_hdr_len += 4;
3057 }
3058
3059 return (eth_hdr_len);
3060 }
3061
3062 static int
myri10ge_lso_parse_header(mblk_t * mp,int off)3063 myri10ge_lso_parse_header(mblk_t *mp, int off)
3064 {
3065 char buf[128];
3066 int seglen, sum_off;
3067 struct ip *ip;
3068 struct tcphdr *tcp;
3069
3070 seglen = MBLKL(mp);
3071 if (seglen < off + sizeof (*ip)) {
3072 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3073 ip = (struct ip *)(void *)buf;
3074 } else {
3075 ip = (struct ip *)(void *)(mp->b_rptr + off);
3076 }
3077 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3078 myri10ge_copydata(mp, off,
3079 (ip->ip_hl << 2) + sizeof (*tcp), buf);
3080 ip = (struct ip *)(void *)buf;
3081 }
3082 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3083
3084 /*
3085 * NIC expects ip_sum to be zero. Recent changes to
3086 * OpenSolaris leave the correct ip checksum there, rather
3087 * than the required zero, so we need to zero it. Otherwise,
3088 * the NIC will produce bad checksums when sending LSO packets.
3089 */
3090 if (ip->ip_sum != 0) {
3091 if (((char *)ip) != buf) {
3092 /* ip points into mblk, so just zero it */
3093 ip->ip_sum = 0;
3094 } else {
3095 /*
3096 * ip points into a copy, so walk the chain
3097 * to find the ip_csum, then zero it
3098 */
3099 sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3100 while (sum_off > (int)(MBLKL(mp) - 1)) {
3101 sum_off -= MBLKL(mp);
3102 mp = mp->b_cont;
3103 }
3104 mp->b_rptr[sum_off] = 0;
3105 sum_off++;
3106 while (sum_off > MBLKL(mp) - 1) {
3107 sum_off -= MBLKL(mp);
3108 mp = mp->b_cont;
3109 }
3110 mp->b_rptr[sum_off] = 0;
3111 }
3112 }
3113 return (off + ((ip->ip_hl + tcp->th_off) << 2));
3114 }
3115
3116 static int
myri10ge_tx_tso_copy(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req_list,int hdr_size,int pkt_size,uint16_t mss,uint8_t cksum_offset)3117 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3118 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3119 uint16_t mss, uint8_t cksum_offset)
3120 {
3121 myri10ge_tx_ring_t *tx = &ss->tx;
3122 struct myri10ge_priv *mgp = ss->mgp;
3123 mblk_t *bp;
3124 mcp_kreq_ether_send_t *req;
3125 struct myri10ge_tx_copybuf *cp;
3126 caddr_t rptr, ptr;
3127 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3128 int resid, avail, idx, hdr_size_tmp, tx_boundary;
3129 int rdma_count;
3130 uint32_t seglen, len, boundary, low, high_swapped;
3131 uint16_t pseudo_hdr_offset = htons(mss);
3132 uint8_t flags;
3133
3134 tx_boundary = mgp->tx_boundary;
3135 hdr_size_tmp = hdr_size;
3136 resid = tx_boundary;
3137 count = 1;
3138 mutex_enter(&tx->lock);
3139
3140 /* check to see if the slots are really there */
3141 avail = tx->mask - (tx->req - tx->done);
3142 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) {
3143 atomic_inc_32(&tx->stall);
3144 mutex_exit(&tx->lock);
3145 return (EBUSY);
3146 }
3147
3148 /* copy */
3149 cum_len = -hdr_size;
3150 count = 0;
3151 req = req_list;
3152 idx = tx->mask & tx->req;
3153 cp = &tx->cp[idx];
3154 low = ntohl(cp->dma.low);
3155 ptr = cp->va;
3156 cp->len = 0;
3157 if (mss) {
3158 int payload = pkt_size - hdr_size;
3159 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3160 tx->info[idx].ostat.opackets = opackets;
3161 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3162 + pkt_size;
3163 }
3164 hdr_size_tmp = hdr_size;
3165 mss_resid = mss;
3166 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3167 tx_req = tx->req;
3168 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3169 mblen = MBLKL(bp);
3170 rptr = (caddr_t)bp->b_rptr;
3171 len = min(hdr_size_tmp, mblen);
3172 if (len) {
3173 bcopy(rptr, ptr, len);
3174 rptr += len;
3175 ptr += len;
3176 resid -= len;
3177 mblen -= len;
3178 hdr_size_tmp -= len;
3179 cp->len += len;
3180 if (hdr_size_tmp)
3181 continue;
3182 if (resid < mss) {
3183 tx_req++;
3184 idx = tx->mask & tx_req;
3185 cp = &tx->cp[idx];
3186 low = ntohl(cp->dma.low);
3187 ptr = cp->va;
3188 resid = tx_boundary;
3189 }
3190 }
3191 while (mblen) {
3192 len = min(mss_resid, mblen);
3193 bcopy(rptr, ptr, len);
3194 mss_resid -= len;
3195 resid -= len;
3196 mblen -= len;
3197 rptr += len;
3198 ptr += len;
3199 cp->len += len;
3200 if (mss_resid == 0) {
3201 mss_resid = mss;
3202 if (resid < mss) {
3203 tx_req++;
3204 idx = tx->mask & tx_req;
3205 cp = &tx->cp[idx];
3206 cp->len = 0;
3207 low = ntohl(cp->dma.low);
3208 ptr = cp->va;
3209 resid = tx_boundary;
3210 }
3211 }
3212 }
3213 }
3214
3215 req = req_list;
3216 pkt_size_tmp = pkt_size;
3217 count = 0;
3218 rdma_count = 0;
3219 tx_req = tx->req;
3220 while (pkt_size_tmp) {
3221 idx = tx->mask & tx_req;
3222 cp = &tx->cp[idx];
3223 high_swapped = cp->dma.high;
3224 low = ntohl(cp->dma.low);
3225 len = cp->len;
3226 if (len == 0) {
3227 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3228 pkt_size_tmp, pkt_size);
3229 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3230 mblen = MBLKL(bp);
3231 printf("mblen:%d\n", mblen);
3232 }
3233 pkt_size_tmp = pkt_size;
3234 tx_req = tx->req;
3235 while (pkt_size_tmp > 0) {
3236 idx = tx->mask & tx_req;
3237 cp = &tx->cp[idx];
3238 printf("cp->len = %d\n", cp->len);
3239 pkt_size_tmp -= cp->len;
3240 tx_req++;
3241 }
3242 printf("dropped\n");
3243 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3244 goto done;
3245 }
3246 pkt_size_tmp -= len;
3247 while (len) {
3248 while (len) {
3249 uint8_t flags_next;
3250 int cum_len_next;
3251
3252 boundary = (low + mgp->tx_boundary) &
3253 ~(mgp->tx_boundary - 1);
3254 seglen = boundary - low;
3255 if (seglen > len)
3256 seglen = len;
3257
3258 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3259 cum_len_next = cum_len + seglen;
3260 (req-rdma_count)->rdma_count = rdma_count + 1;
3261 if (likely(cum_len >= 0)) {
3262 /* payload */
3263 int next_is_first, chop;
3264
3265 chop = (cum_len_next > mss);
3266 cum_len_next = cum_len_next % mss;
3267 next_is_first = (cum_len_next == 0);
3268 flags |= chop *
3269 MXGEFW_FLAGS_TSO_CHOP;
3270 flags_next |= next_is_first *
3271 MXGEFW_FLAGS_FIRST;
3272 rdma_count |= -(chop | next_is_first);
3273 rdma_count += chop & !next_is_first;
3274 } else if (likely(cum_len_next >= 0)) {
3275 /* header ends */
3276 int small;
3277
3278 rdma_count = -1;
3279 cum_len_next = 0;
3280 seglen = -cum_len;
3281 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3282 flags_next = MXGEFW_FLAGS_TSO_PLD |
3283 MXGEFW_FLAGS_FIRST |
3284 (small * MXGEFW_FLAGS_SMALL);
3285 }
3286 req->addr_high = high_swapped;
3287 req->addr_low = htonl(low);
3288 req->pseudo_hdr_offset = pseudo_hdr_offset;
3289 req->pad = 0; /* complete solid 16-byte block */
3290 req->rdma_count = 1;
3291 req->cksum_offset = cksum_offset;
3292 req->length = htons(seglen);
3293 req->flags = flags | ((cum_len & 1) *
3294 MXGEFW_FLAGS_ALIGN_ODD);
3295 if (cksum_offset > seglen)
3296 cksum_offset -= seglen;
3297 else
3298 cksum_offset = 0;
3299 low += seglen;
3300 len -= seglen;
3301 cum_len = cum_len_next;
3302 req++;
3303 req->flags = 0;
3304 flags = flags_next;
3305 count++;
3306 rdma_count++;
3307 }
3308 }
3309 tx_req++;
3310 }
3311 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3312 do {
3313 req--;
3314 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3315 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3316 MXGEFW_FLAGS_FIRST)));
3317
3318 myri10ge_submit_req(tx, req_list, count);
3319 done:
3320 mutex_exit(&tx->lock);
3321 freemsg(mp);
3322 return (DDI_SUCCESS);
3323 }
3324
3325 /*
3326 * Try to send the chain of buffers described by the mp. We must not
3327 * encapsulate more than eth->tx.req - eth->tx.done, or
3328 * MXGEFW_MAX_SEND_DESC, whichever is more.
3329 */
3330
3331 static int
myri10ge_send(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req_list,struct myri10ge_tx_buffer_state * tx_info)3332 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3333 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3334 {
3335 struct myri10ge_priv *mgp = ss->mgp;
3336 myri10ge_tx_ring_t *tx = &ss->tx;
3337 mcp_kreq_ether_send_t *req;
3338 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3339 mblk_t *bp;
3340 ddi_dma_cookie_t cookie;
3341 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3342 rdma_count, cum_len, lso_hdr_size;
3343 uint32_t start, stuff, tx_offload_flags;
3344 uint32_t seglen, len, mss, boundary, low, high_swapped;
3345 uint_t ncookies;
3346 uint16_t pseudo_hdr_offset;
3347 uint8_t flags, cksum_offset, odd_flag;
3348 int pkt_size;
3349 int lso_copy = myri10ge_lso_copy;
3350 try_pullup = 1;
3351
3352 again:
3353 /* Setup checksum offloading, if needed */
3354 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3355 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3356 if (tx_offload_flags & HW_LSO) {
3357 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3358 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3359 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3360 freemsg(mp);
3361 return (DDI_SUCCESS);
3362 }
3363 } else {
3364 max_segs = MXGEFW_MAX_SEND_DESC;
3365 mss = 0;
3366 }
3367 req = req_list;
3368 cksum_offset = 0;
3369 pseudo_hdr_offset = 0;
3370
3371 /* leave an extra slot keep the ring from wrapping */
3372 avail = tx->mask - (tx->req - tx->done);
3373
3374 /*
3375 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3376 * message will need to be pulled up in order to fit.
3377 * Otherwise, we are low on transmit descriptors, it is
3378 * probably better to stall and try again rather than pullup a
3379 * message to fit.
3380 */
3381
3382 if (avail < max_segs) {
3383 err = EBUSY;
3384 atomic_inc_32(&tx->stall_early);
3385 goto stall;
3386 }
3387
3388 /* find out how long the frame is and how many segments it is */
3389 count = 0;
3390 odd_flag = 0;
3391 pkt_size = 0;
3392 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3393 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3394 dblk_t *dbp;
3395 mblen = MBLKL(bp);
3396 if (mblen == 0) {
3397 /*
3398 * we can't simply skip over 0-length mblks
3399 * because the hardware can't deal with them,
3400 * and we could leak them.
3401 */
3402 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3403 err = EIO;
3404 goto pullup;
3405 }
3406 /*
3407 * There's no advantage to copying most gesballoc
3408 * attached blocks, so disable lso copy in that case
3409 */
3410 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3411 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3412 lso_copy = 0;
3413 }
3414 }
3415 pkt_size += mblen;
3416 count++;
3417 }
3418
3419 /* Try to pull up excessivly long chains */
3420 if (count >= max_segs) {
3421 err = myri10ge_pullup(ss, mp);
3422 if (likely(err == DDI_SUCCESS)) {
3423 count = 1;
3424 } else {
3425 if (count < MYRI10GE_MAX_SEND_DESC_TSO) {
3426 /*
3427 * just let the h/w send it, it will be
3428 * inefficient, but us better than dropping
3429 */
3430 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3431 } else {
3432 /* drop it */
3433 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3434 freemsg(mp);
3435 return (0);
3436 }
3437 }
3438 }
3439
3440 cum_len = 0;
3441 maclen = myri10ge_ether_parse_header(mp);
3442
3443 if (tx_offload_flags & HCK_PARTIALCKSUM) {
3444
3445 cksum_offset = start + maclen;
3446 pseudo_hdr_offset = htons(stuff + maclen);
3447 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3448 flags |= MXGEFW_FLAGS_CKSUM;
3449 }
3450
3451 lso_hdr_size = 0; /* -Wunitinialized */
3452 if (mss) { /* LSO */
3453 /* this removes any CKSUM flag from before */
3454 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3455 /*
3456 * parse the headers and set cum_len to a negative
3457 * value to reflect the offset of the TCP payload
3458 */
3459 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen);
3460 cum_len = -lso_hdr_size;
3461 if ((mss < mgp->tx_boundary) && lso_copy) {
3462 err = myri10ge_tx_tso_copy(ss, mp, req_list,
3463 lso_hdr_size, pkt_size, mss, cksum_offset);
3464 return (err);
3465 }
3466
3467 /*
3468 * for TSO, pseudo_hdr_offset holds mss. The firmware
3469 * figures out where to put the checksum by parsing
3470 * the header.
3471 */
3472
3473 pseudo_hdr_offset = htons(mss);
3474 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3475 flags |= MXGEFW_FLAGS_SMALL;
3476 if (pkt_size < myri10ge_tx_copylen) {
3477 req->cksum_offset = cksum_offset;
3478 req->pseudo_hdr_offset = pseudo_hdr_offset;
3479 req->flags = flags;
3480 err = myri10ge_tx_copy(ss, mp, req);
3481 return (err);
3482 }
3483 cum_len = 0;
3484 }
3485
3486 /* pull one DMA handle for each bp from our freelist */
3487 handles = NULL;
3488 err = myri10ge_alloc_tx_handles(ss, count, &handles);
3489 if (err != DDI_SUCCESS) {
3490 err = DDI_FAILURE;
3491 goto stall;
3492 }
3493 count = 0;
3494 rdma_count = 0;
3495 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3496 mblen = MBLKL(bp);
3497 dma_handle = handles;
3498 handles = handles->next;
3499
3500 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3501 (caddr_t)bp->b_rptr, mblen,
3502 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3503 &cookie, &ncookies);
3504 if (unlikely(rv != DDI_DMA_MAPPED)) {
3505 err = EIO;
3506 try_pullup = 0;
3507 dma_handle->next = handles;
3508 handles = dma_handle;
3509 goto abort_with_handles;
3510 }
3511
3512 /* reserve the slot */
3513 tx_info[count].m = bp;
3514 tx_info[count].handle = dma_handle;
3515
3516 for (; ; ) {
3517 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3518 high_swapped =
3519 htonl(MYRI10GE_HIGHPART_TO_U32(
3520 cookie.dmac_laddress));
3521 len = (uint32_t)cookie.dmac_size;
3522 while (len) {
3523 uint8_t flags_next;
3524 int cum_len_next;
3525
3526 boundary = (low + mgp->tx_boundary) &
3527 ~(mgp->tx_boundary - 1);
3528 seglen = boundary - low;
3529 if (seglen > len)
3530 seglen = len;
3531
3532 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3533 cum_len_next = cum_len + seglen;
3534 if (mss) {
3535 (req-rdma_count)->rdma_count =
3536 rdma_count + 1;
3537 if (likely(cum_len >= 0)) {
3538 /* payload */
3539 int next_is_first, chop;
3540
3541 chop = (cum_len_next > mss);
3542 cum_len_next =
3543 cum_len_next % mss;
3544 next_is_first =
3545 (cum_len_next == 0);
3546 flags |= chop *
3547 MXGEFW_FLAGS_TSO_CHOP;
3548 flags_next |= next_is_first *
3549 MXGEFW_FLAGS_FIRST;
3550 rdma_count |=
3551 -(chop | next_is_first);
3552 rdma_count +=
3553 chop & !next_is_first;
3554 } else if (likely(cum_len_next >= 0)) {
3555 /* header ends */
3556 int small;
3557
3558 rdma_count = -1;
3559 cum_len_next = 0;
3560 seglen = -cum_len;
3561 small = (mss <=
3562 MXGEFW_SEND_SMALL_SIZE);
3563 flags_next =
3564 MXGEFW_FLAGS_TSO_PLD
3565 | MXGEFW_FLAGS_FIRST
3566 | (small *
3567 MXGEFW_FLAGS_SMALL);
3568 }
3569 }
3570 req->addr_high = high_swapped;
3571 req->addr_low = htonl(low);
3572 req->pseudo_hdr_offset = pseudo_hdr_offset;
3573 req->pad = 0; /* complete solid 16-byte block */
3574 req->rdma_count = 1;
3575 req->cksum_offset = cksum_offset;
3576 req->length = htons(seglen);
3577 req->flags = flags | ((cum_len & 1) * odd_flag);
3578 if (cksum_offset > seglen)
3579 cksum_offset -= seglen;
3580 else
3581 cksum_offset = 0;
3582 low += seglen;
3583 len -= seglen;
3584 cum_len = cum_len_next;
3585 count++;
3586 rdma_count++;
3587 /* make sure all the segments will fit */
3588 if (unlikely(count >= max_segs)) {
3589 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3590 xmit_lowbuf);
3591 /* may try a pullup */
3592 err = EBUSY;
3593 if (try_pullup)
3594 try_pullup = 2;
3595 goto abort_with_handles;
3596 }
3597 req++;
3598 req->flags = 0;
3599 flags = flags_next;
3600 tx_info[count].m = 0;
3601 }
3602 ncookies--;
3603 if (ncookies == 0)
3604 break;
3605 ddi_dma_nextcookie(dma_handle->h, &cookie);
3606 }
3607 }
3608 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3609
3610 if (mss) {
3611 do {
3612 req--;
3613 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3614 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3615 MXGEFW_FLAGS_FIRST)));
3616 }
3617
3618 /* calculate tx stats */
3619 if (mss) {
3620 uint16_t opackets;
3621 int payload;
3622
3623 payload = pkt_size - lso_hdr_size;
3624 opackets = (payload / mss) + ((payload % mss) != 0);
3625 tx_info[0].stat.un.all = 0;
3626 tx_info[0].ostat.opackets = opackets;
3627 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3628 + pkt_size;
3629 } else {
3630 myri10ge_tx_stat(&tx_info[0].stat,
3631 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3632 }
3633 mutex_enter(&tx->lock);
3634
3635 /* check to see if the slots are really there */
3636 avail = tx->mask - (tx->req - tx->done);
3637 if (unlikely(avail <= count)) {
3638 mutex_exit(&tx->lock);
3639 err = 0;
3640 goto late_stall;
3641 }
3642
3643 myri10ge_send_locked(tx, req_list, tx_info, count);
3644 mutex_exit(&tx->lock);
3645 return (DDI_SUCCESS);
3646
3647 late_stall:
3648 try_pullup = 0;
3649 atomic_inc_32(&tx->stall_late);
3650
3651 abort_with_handles:
3652 /* unbind and free handles from previous mblks */
3653 for (i = 0; i < count; i++) {
3654 bp = tx_info[i].m;
3655 tx_info[i].m = 0;
3656 if (bp) {
3657 dma_handle = tx_info[i].handle;
3658 (void) ddi_dma_unbind_handle(dma_handle->h);
3659 dma_handle->next = handles;
3660 handles = dma_handle;
3661 tx_info[i].handle = NULL;
3662 tx_info[i].m = NULL;
3663 }
3664 }
3665 myri10ge_free_tx_handle_slist(tx, handles);
3666 pullup:
3667 if (try_pullup) {
3668 err = myri10ge_pullup(ss, mp);
3669 if (err != DDI_SUCCESS && try_pullup == 2) {
3670 /* drop */
3671 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3672 freemsg(mp);
3673 return (0);
3674 }
3675 try_pullup = 0;
3676 goto again;
3677 }
3678
3679 stall:
3680 if (err != 0) {
3681 if (err == EBUSY) {
3682 atomic_inc_32(&tx->stall);
3683 } else {
3684 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3685 }
3686 }
3687 return (err);
3688 }
3689
3690 static mblk_t *
myri10ge_send_wrapper(void * arg,mblk_t * mp)3691 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3692 {
3693 struct myri10ge_slice_state *ss = arg;
3694 int err = 0;
3695 mcp_kreq_ether_send_t *req_list;
3696 #if defined(__i386)
3697 /*
3698 * We need about 2.5KB of scratch space to handle transmits.
3699 * i86pc has only 8KB of kernel stack space, so we malloc the
3700 * scratch space there rather than keeping it on the stack.
3701 */
3702 size_t req_size, tx_info_size;
3703 struct myri10ge_tx_buffer_state *tx_info;
3704 caddr_t req_bytes;
3705
3706 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3707 + 8;
3708 req_bytes = kmem_alloc(req_size, KM_SLEEP);
3709 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1);
3710 tx_info = kmem_alloc(tx_info_size, KM_SLEEP);
3711 #else
3712 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3713 + 8];
3714 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3715 #endif
3716
3717 /* ensure req_list entries are aligned to 8 bytes */
3718 req_list = (struct mcp_kreq_ether_send *)
3719 (((unsigned long)req_bytes + 7UL) & ~7UL);
3720
3721 err = myri10ge_send(ss, mp, req_list, tx_info);
3722
3723 #if defined(__i386)
3724 kmem_free(tx_info, tx_info_size);
3725 kmem_free(req_bytes, req_size);
3726 #endif
3727 if (err)
3728 return (mp);
3729 else
3730 return (NULL);
3731 }
3732
3733 static int
myri10ge_addmac(void * arg,const uint8_t * mac_addr)3734 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3735 {
3736 struct myri10ge_priv *mgp = arg;
3737 int err;
3738
3739 if (mac_addr == NULL)
3740 return (EINVAL);
3741
3742 mutex_enter(&mgp->intrlock);
3743 if (mgp->macaddr_cnt) {
3744 mutex_exit(&mgp->intrlock);
3745 return (ENOSPC);
3746 }
3747 err = myri10ge_m_unicst(mgp, mac_addr);
3748 if (!err)
3749 mgp->macaddr_cnt++;
3750
3751 mutex_exit(&mgp->intrlock);
3752 if (err)
3753 return (err);
3754
3755 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3756 return (0);
3757 }
3758
3759 /*ARGSUSED*/
3760 static int
myri10ge_remmac(void * arg,const uint8_t * mac_addr)3761 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3762 {
3763 struct myri10ge_priv *mgp = arg;
3764
3765 mutex_enter(&mgp->intrlock);
3766 mgp->macaddr_cnt--;
3767 mutex_exit(&mgp->intrlock);
3768
3769 return (0);
3770 }
3771
3772 /*ARGSUSED*/
3773 static void
myri10ge_fill_group(void * arg,mac_ring_type_t rtype,const int index,mac_group_info_t * infop,mac_group_handle_t gh)3774 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3775 mac_group_info_t *infop, mac_group_handle_t gh)
3776 {
3777 struct myri10ge_priv *mgp = arg;
3778
3779 if (rtype != MAC_RING_TYPE_RX)
3780 return;
3781
3782 infop->mgi_driver = (mac_group_driver_t)mgp;
3783 infop->mgi_start = NULL;
3784 infop->mgi_stop = NULL;
3785 infop->mgi_addmac = myri10ge_addmac;
3786 infop->mgi_remmac = myri10ge_remmac;
3787 infop->mgi_count = mgp->num_slices;
3788 }
3789
3790 static int
myri10ge_ring_start(mac_ring_driver_t rh,uint64_t mr_gen_num)3791 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3792 {
3793 struct myri10ge_slice_state *ss;
3794
3795 ss = (struct myri10ge_slice_state *)rh;
3796 mutex_enter(&ss->rx_lock);
3797 ss->rx_gen_num = mr_gen_num;
3798 mutex_exit(&ss->rx_lock);
3799 return (0);
3800 }
3801
3802 /*
3803 * Retrieve a value for one of the statistics for a particular rx ring
3804 */
3805 int
myri10ge_rx_ring_stat(mac_ring_driver_t rh,uint_t stat,uint64_t * val)3806 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3807 {
3808 struct myri10ge_slice_state *ss;
3809
3810 ss = (struct myri10ge_slice_state *)rh;
3811 switch (stat) {
3812 case MAC_STAT_RBYTES:
3813 *val = ss->rx_stats.ibytes;
3814 break;
3815
3816 case MAC_STAT_IPACKETS:
3817 *val = ss->rx_stats.ipackets;
3818 break;
3819
3820 default:
3821 *val = 0;
3822 return (ENOTSUP);
3823 }
3824
3825 return (0);
3826 }
3827
3828 /*
3829 * Retrieve a value for one of the statistics for a particular tx ring
3830 */
3831 int
myri10ge_tx_ring_stat(mac_ring_driver_t rh,uint_t stat,uint64_t * val)3832 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3833 {
3834 struct myri10ge_slice_state *ss;
3835
3836 ss = (struct myri10ge_slice_state *)rh;
3837 switch (stat) {
3838 case MAC_STAT_OBYTES:
3839 *val = ss->tx.stats.obytes;
3840 break;
3841
3842 case MAC_STAT_OPACKETS:
3843 *val = ss->tx.stats.opackets;
3844 break;
3845
3846 default:
3847 *val = 0;
3848 return (ENOTSUP);
3849 }
3850
3851 return (0);
3852 }
3853
3854 static int
myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)3855 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3856 {
3857 struct myri10ge_slice_state *ss;
3858
3859 ss = (struct myri10ge_slice_state *)intrh;
3860 mutex_enter(&ss->poll_lock);
3861 ss->rx_polling = B_TRUE;
3862 mutex_exit(&ss->poll_lock);
3863 return (0);
3864 }
3865
3866 static int
myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)3867 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3868 {
3869 struct myri10ge_slice_state *ss;
3870
3871 ss = (struct myri10ge_slice_state *)intrh;
3872 mutex_enter(&ss->poll_lock);
3873 ss->rx_polling = B_FALSE;
3874 if (ss->rx_token) {
3875 *ss->irq_claim = BE_32(3);
3876 ss->rx_token = 0;
3877 }
3878 mutex_exit(&ss->poll_lock);
3879 return (0);
3880 }
3881
3882 /*ARGSUSED*/
3883 static void
myri10ge_fill_ring(void * arg,mac_ring_type_t rtype,const int rg_index,const int ring_index,mac_ring_info_t * infop,mac_ring_handle_t rh)3884 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3885 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3886 {
3887 struct myri10ge_priv *mgp = arg;
3888 struct myri10ge_slice_state *ss;
3889 mac_intr_t *mintr = &infop->mri_intr;
3890
3891 ASSERT((unsigned int)ring_index < mgp->num_slices);
3892
3893 ss = &mgp->ss[ring_index];
3894 switch (rtype) {
3895 case MAC_RING_TYPE_RX:
3896 ss->rx_rh = rh;
3897 infop->mri_driver = (mac_ring_driver_t)ss;
3898 infop->mri_start = myri10ge_ring_start;
3899 infop->mri_stop = NULL;
3900 infop->mri_poll = myri10ge_poll_rx;
3901 infop->mri_stat = myri10ge_rx_ring_stat;
3902 mintr->mi_handle = (mac_intr_handle_t)ss;
3903 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3904 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3905 break;
3906 case MAC_RING_TYPE_TX:
3907 ss->tx.rh = rh;
3908 infop->mri_driver = (mac_ring_driver_t)ss;
3909 infop->mri_start = NULL;
3910 infop->mri_stop = NULL;
3911 infop->mri_tx = myri10ge_send_wrapper;
3912 infop->mri_stat = myri10ge_tx_ring_stat;
3913 break;
3914 default:
3915 break;
3916 }
3917 }
3918
3919 static void
myri10ge_nic_stat_destroy(struct myri10ge_priv * mgp)3920 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3921 {
3922 if (mgp->ksp_stat == NULL)
3923 return;
3924
3925 kstat_delete(mgp->ksp_stat);
3926 mgp->ksp_stat = NULL;
3927 }
3928
3929 static void
myri10ge_slice_stat_destroy(struct myri10ge_slice_state * ss)3930 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3931 {
3932 if (ss->ksp_stat == NULL)
3933 return;
3934
3935 kstat_delete(ss->ksp_stat);
3936 ss->ksp_stat = NULL;
3937 }
3938
3939 static void
myri10ge_info_destroy(struct myri10ge_priv * mgp)3940 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3941 {
3942 if (mgp->ksp_info == NULL)
3943 return;
3944
3945 kstat_delete(mgp->ksp_info);
3946 mgp->ksp_info = NULL;
3947 }
3948
3949 static int
myri10ge_nic_stat_kstat_update(kstat_t * ksp,int rw)3950 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3951 {
3952 struct myri10ge_nic_stat *ethstat;
3953 struct myri10ge_priv *mgp;
3954 mcp_irq_data_t *fw_stats;
3955
3956
3957 if (rw == KSTAT_WRITE)
3958 return (EACCES);
3959
3960 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3961 mgp = (struct myri10ge_priv *)ksp->ks_private;
3962 fw_stats = mgp->ss[0].fw_stats;
3963
3964 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3965 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3966 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3967 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3968 ethstat->dma_force_physical.value.ul = 1;
3969 else
3970 ethstat->dma_force_physical.value.ul = 0;
3971 ethstat->lanes.value.ul = mgp->pcie_link_width;
3972 ethstat->dropped_bad_crc32.value.ul =
3973 ntohl(fw_stats->dropped_bad_crc32);
3974 ethstat->dropped_bad_phy.value.ul =
3975 ntohl(fw_stats->dropped_bad_phy);
3976 ethstat->dropped_link_error_or_filtered.value.ul =
3977 ntohl(fw_stats->dropped_link_error_or_filtered);
3978 ethstat->dropped_link_overflow.value.ul =
3979 ntohl(fw_stats->dropped_link_overflow);
3980 ethstat->dropped_multicast_filtered.value.ul =
3981 ntohl(fw_stats->dropped_multicast_filtered);
3982 ethstat->dropped_no_big_buffer.value.ul =
3983 ntohl(fw_stats->dropped_no_big_buffer);
3984 ethstat->dropped_no_small_buffer.value.ul =
3985 ntohl(fw_stats->dropped_no_small_buffer);
3986 ethstat->dropped_overrun.value.ul =
3987 ntohl(fw_stats->dropped_overrun);
3988 ethstat->dropped_pause.value.ul =
3989 ntohl(fw_stats->dropped_pause);
3990 ethstat->dropped_runt.value.ul =
3991 ntohl(fw_stats->dropped_runt);
3992 ethstat->link_up.value.ul =
3993 ntohl(fw_stats->link_up);
3994 ethstat->dropped_unicast_filtered.value.ul =
3995 ntohl(fw_stats->dropped_unicast_filtered);
3996 return (0);
3997 }
3998
3999 static int
myri10ge_slice_stat_kstat_update(kstat_t * ksp,int rw)4000 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
4001 {
4002 struct myri10ge_slice_stat *ethstat;
4003 struct myri10ge_slice_state *ss;
4004
4005 if (rw == KSTAT_WRITE)
4006 return (EACCES);
4007
4008 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
4009 ss = (struct myri10ge_slice_state *)ksp->ks_private;
4010
4011 ethstat->rx_big.value.ul = ss->j_rx_cnt;
4012 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
4013 ethstat->rx_bigbuf_pool.value.ul =
4014 ss->jpool.num_alloc - ss->jbufs_for_smalls;
4015 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
4016 ethstat->rx_small.value.ul = ss->rx_small.cnt -
4017 (ss->rx_small.mask + 1);
4018 ethstat->tx_done.value.ul = ss->tx.done;
4019 ethstat->tx_req.value.ul = ss->tx.req;
4020 ethstat->tx_activate.value.ul = ss->tx.activate;
4021 ethstat->xmit_sched.value.ul = ss->tx.sched;
4022 ethstat->xmit_stall.value.ul = ss->tx.stall;
4023 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
4024 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
4025 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err);
4026 return (0);
4027 }
4028
4029 static int
myri10ge_info_kstat_update(kstat_t * ksp,int rw)4030 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4031 {
4032 struct myri10ge_info *info;
4033 struct myri10ge_priv *mgp;
4034
4035
4036 if (rw == KSTAT_WRITE)
4037 return (EACCES);
4038
4039 info = (struct myri10ge_info *)ksp->ks_data;
4040 mgp = (struct myri10ge_priv *)ksp->ks_private;
4041 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4042 kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4043 kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4044 kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4045 kstat_named_setstr(&info->product_code, mgp->pc_str);
4046 kstat_named_setstr(&info->serial_number, mgp->sn_str);
4047 return (0);
4048 }
4049
4050 static struct myri10ge_info myri10ge_info_template = {
4051 { "driver_version", KSTAT_DATA_STRING },
4052 { "firmware_version", KSTAT_DATA_STRING },
4053 { "firmware_name", KSTAT_DATA_STRING },
4054 { "interrupt_type", KSTAT_DATA_STRING },
4055 { "product_code", KSTAT_DATA_STRING },
4056 { "serial_number", KSTAT_DATA_STRING },
4057 };
4058 static kmutex_t myri10ge_info_template_lock;
4059
4060
4061 static int
myri10ge_info_init(struct myri10ge_priv * mgp)4062 myri10ge_info_init(struct myri10ge_priv *mgp)
4063 {
4064 struct kstat *ksp;
4065
4066 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4067 "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4068 sizeof (myri10ge_info_template) /
4069 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4070 if (ksp == NULL) {
4071 cmn_err(CE_WARN,
4072 "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4073 return (DDI_FAILURE);
4074 }
4075 mgp->ksp_info = ksp;
4076 ksp->ks_update = myri10ge_info_kstat_update;
4077 ksp->ks_private = (void *) mgp;
4078 ksp->ks_data = &myri10ge_info_template;
4079 ksp->ks_lock = &myri10ge_info_template_lock;
4080 if (MYRI10GE_VERSION_STR != NULL)
4081 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4082 if (mgp->fw_version != NULL)
4083 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4084 ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4085 ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4086 if (mgp->pc_str != NULL)
4087 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4088 if (mgp->sn_str != NULL)
4089 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4090
4091 kstat_install(ksp);
4092 return (DDI_SUCCESS);
4093 }
4094
4095
4096 static int
myri10ge_nic_stat_init(struct myri10ge_priv * mgp)4097 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4098 {
4099 struct kstat *ksp;
4100 struct myri10ge_nic_stat *ethstat;
4101
4102 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4103 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4104 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4105 if (ksp == NULL) {
4106 cmn_err(CE_WARN,
4107 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4108 return (DDI_FAILURE);
4109 }
4110 mgp->ksp_stat = ksp;
4111 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4112
4113 kstat_named_init(ðstat->dma_read_bw_MBs,
4114 "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4115 kstat_named_init(ðstat->dma_write_bw_MBs,
4116 "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4117 kstat_named_init(ðstat->dma_read_write_bw_MBs,
4118 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4119 kstat_named_init(ðstat->dma_force_physical,
4120 "dma_force_physical", KSTAT_DATA_ULONG);
4121 kstat_named_init(ðstat->lanes,
4122 "lanes", KSTAT_DATA_ULONG);
4123 kstat_named_init(ðstat->dropped_bad_crc32,
4124 "dropped_bad_crc32", KSTAT_DATA_ULONG);
4125 kstat_named_init(ðstat->dropped_bad_phy,
4126 "dropped_bad_phy", KSTAT_DATA_ULONG);
4127 kstat_named_init(ðstat->dropped_link_error_or_filtered,
4128 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4129 kstat_named_init(ðstat->dropped_link_overflow,
4130 "dropped_link_overflow", KSTAT_DATA_ULONG);
4131 kstat_named_init(ðstat->dropped_multicast_filtered,
4132 "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4133 kstat_named_init(ðstat->dropped_no_big_buffer,
4134 "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4135 kstat_named_init(ðstat->dropped_no_small_buffer,
4136 "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4137 kstat_named_init(ðstat->dropped_overrun,
4138 "dropped_overrun", KSTAT_DATA_ULONG);
4139 kstat_named_init(ðstat->dropped_pause,
4140 "dropped_pause", KSTAT_DATA_ULONG);
4141 kstat_named_init(ðstat->dropped_runt,
4142 "dropped_runt", KSTAT_DATA_ULONG);
4143 kstat_named_init(ðstat->dropped_unicast_filtered,
4144 "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4145 kstat_named_init(ðstat->dropped_runt, "dropped_runt",
4146 KSTAT_DATA_ULONG);
4147 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG);
4148 kstat_named_init(ðstat->link_changes, "link_changes",
4149 KSTAT_DATA_ULONG);
4150 ksp->ks_update = myri10ge_nic_stat_kstat_update;
4151 ksp->ks_private = (void *) mgp;
4152 kstat_install(ksp);
4153 return (DDI_SUCCESS);
4154 }
4155
4156 static int
myri10ge_slice_stat_init(struct myri10ge_slice_state * ss)4157 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4158 {
4159 struct myri10ge_priv *mgp = ss->mgp;
4160 struct kstat *ksp;
4161 struct myri10ge_slice_stat *ethstat;
4162 int instance;
4163
4164 /*
4165 * fake an instance so that the same slice numbers from
4166 * different instances do not collide
4167 */
4168 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss);
4169 ksp = kstat_create("myri10ge", instance,
4170 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4171 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4172 if (ksp == NULL) {
4173 cmn_err(CE_WARN,
4174 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4175 return (DDI_FAILURE);
4176 }
4177 ss->ksp_stat = ksp;
4178 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4179 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum",
4180 KSTAT_DATA_ULONG);
4181 kstat_named_init(ðstat->lro_flushed, "lro_flushed",
4182 KSTAT_DATA_ULONG);
4183 kstat_named_init(ðstat->lro_queued, "lro_queued",
4184 KSTAT_DATA_ULONG);
4185 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4186 KSTAT_DATA_ULONG);
4187 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4188 KSTAT_DATA_ULONG);
4189 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4190 KSTAT_DATA_ULONG);
4191 kstat_named_init(ðstat->rx_copy, "rx_copy",
4192 KSTAT_DATA_ULONG);
4193 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf",
4194 KSTAT_DATA_ULONG);
4195 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf",
4196 KSTAT_DATA_ULONG);
4197 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len",
4198 KSTAT_DATA_ULONG);
4199 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup",
4200 KSTAT_DATA_ULONG);
4201 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first",
4202 KSTAT_DATA_ULONG);
4203 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf",
4204 KSTAT_DATA_ULONG);
4205 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags",
4206 KSTAT_DATA_ULONG);
4207 kstat_named_init(ðstat->xmit_sched, "xmit_sched",
4208 KSTAT_DATA_ULONG);
4209 kstat_named_init(ðstat->xmit_stall, "xmit_stall",
4210 KSTAT_DATA_ULONG);
4211 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early",
4212 KSTAT_DATA_ULONG);
4213 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late",
4214 KSTAT_DATA_ULONG);
4215 kstat_named_init(ðstat->xmit_err, "xmit_err",
4216 KSTAT_DATA_ULONG);
4217 kstat_named_init(ðstat->tx_req, "tx_req",
4218 KSTAT_DATA_ULONG);
4219 kstat_named_init(ðstat->tx_activate, "tx_activate",
4220 KSTAT_DATA_ULONG);
4221 kstat_named_init(ðstat->tx_done, "tx_done",
4222 KSTAT_DATA_ULONG);
4223 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced",
4224 KSTAT_DATA_ULONG);
4225 kstat_named_init(ðstat->rx_big, "rx_big",
4226 KSTAT_DATA_ULONG);
4227 kstat_named_init(ðstat->rx_small, "rx_small",
4228 KSTAT_DATA_ULONG);
4229 ksp->ks_update = myri10ge_slice_stat_kstat_update;
4230 ksp->ks_private = (void *) ss;
4231 kstat_install(ksp);
4232 return (DDI_SUCCESS);
4233 }
4234
4235
4236
4237 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4238
4239 #include <vm/hat.h>
4240 #include <sys/ddi_isa.h>
4241 void *device_arena_alloc(size_t size, int vm_flag);
4242 void device_arena_free(void *vaddr, size_t size);
4243
4244 static void
myri10ge_enable_nvidia_ecrc(struct myri10ge_priv * mgp)4245 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4246 {
4247 dev_info_t *parent_dip;
4248 ddi_acc_handle_t handle;
4249 unsigned long bus_number, dev_number, func_number;
4250 unsigned long cfg_pa, paddr, base, pgoffset;
4251 char *cvaddr, *ptr;
4252 uint32_t *ptr32;
4253 int retval = DDI_FAILURE;
4254 int dontcare;
4255 uint16_t read_vid, read_did, vendor_id, device_id;
4256
4257 if (!myri10ge_nvidia_ecrc_enable)
4258 return;
4259
4260 parent_dip = ddi_get_parent(mgp->dip);
4261 if (parent_dip == NULL) {
4262 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4263 return;
4264 }
4265
4266 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4267 cmn_err(CE_WARN,
4268 "%s: Could not access my parent's registers", mgp->name);
4269 return;
4270 }
4271
4272 vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4273 device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4274 pci_config_teardown(&handle);
4275
4276 if (myri10ge_verbose) {
4277 unsigned long bus_number, dev_number, func_number;
4278 int reg_set, span;
4279 (void) myri10ge_reg_set(parent_dip, ®_set, &span,
4280 &bus_number, &dev_number, &func_number);
4281 if (myri10ge_verbose)
4282 printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4283 bus_number, dev_number, func_number);
4284 }
4285
4286 if (vendor_id != 0x10de)
4287 return;
4288
4289 if (device_id != 0x005d /* CK804 */ &&
4290 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4291 return;
4292 }
4293 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4294 &bus_number, &dev_number, &func_number);
4295
4296 for (cfg_pa = 0xf0000000UL;
4297 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4298 cfg_pa -= 0x10000000UL) {
4299 /* find the config space address for the nvidia bridge */
4300 paddr = (cfg_pa + bus_number * 0x00100000UL +
4301 (dev_number * 8 + func_number) * 0x00001000UL);
4302
4303 base = paddr & (~MMU_PAGEOFFSET);
4304 pgoffset = paddr & MMU_PAGEOFFSET;
4305
4306 /* map it into the kernel */
4307 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP);
4308 if (cvaddr == NULL)
4309 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4310 mgp->name);
4311
4312 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4313 i_ddi_paddr_to_pfn(base),
4314 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4315
4316 ptr = cvaddr + pgoffset;
4317 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4318 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4319 if (vendor_id == read_did || device_id == read_did) {
4320 ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4321 if (myri10ge_verbose)
4322 printf("%s: Enabling ECRC on upstream "
4323 "Nvidia bridge (0x%x:0x%x) "
4324 "at %ld:%ld:%ld\n", mgp->name,
4325 read_vid, read_did, bus_number,
4326 dev_number, func_number);
4327 *ptr32 |= 0x40;
4328 retval = DDI_SUCCESS;
4329 }
4330 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4331 device_arena_free(cvaddr, ptob(1));
4332 }
4333 }
4334
4335 #else
4336 /*ARGSUSED*/
4337 static void
myri10ge_enable_nvidia_ecrc(struct myri10ge_priv * mgp)4338 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4339 {
4340 }
4341 #endif /* i386 */
4342
4343
4344 /*
4345 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4346 * when the PCI-E Completion packets are aligned on an 8-byte
4347 * boundary. Some PCI-E chip sets always align Completion packets; on
4348 * the ones that do not, the alignment can be enforced by enabling
4349 * ECRC generation (if supported).
4350 *
4351 * When PCI-E Completion packets are not aligned, it is actually more
4352 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4353 *
4354 * If the driver can neither enable ECRC nor verify that it has
4355 * already been enabled, then it must use a firmware image which works
4356 * around unaligned completion packets (ethp_z8e.dat), and it should
4357 * also ensure that it never gives the device a Read-DMA which is
4358 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4359 * enabled, then the driver should use the aligned (eth_z8e.dat)
4360 * firmware image, and set tx.boundary to 4KB.
4361 */
4362
4363
4364 static int
myri10ge_firmware_probe(struct myri10ge_priv * mgp)4365 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4366 {
4367 int status;
4368
4369 mgp->tx_boundary = 4096;
4370 /*
4371 * Verify the max read request size was set to 4KB
4372 * before trying the test with 4KB.
4373 */
4374 if (mgp->max_read_request_4k == 0)
4375 mgp->tx_boundary = 2048;
4376 /*
4377 * load the optimized firmware which assumes aligned PCIe
4378 * completions in order to see if it works on this host.
4379 */
4380
4381 mgp->fw_name = "rss_eth_z8e";
4382 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4383 mgp->eth_z8e_length = rss_eth_z8e_length;
4384
4385 status = myri10ge_load_firmware(mgp);
4386 if (status != 0) {
4387 return (status);
4388 }
4389 /*
4390 * Enable ECRC if possible
4391 */
4392 myri10ge_enable_nvidia_ecrc(mgp);
4393
4394 /*
4395 * Run a DMA test which watches for unaligned completions and
4396 * aborts on the first one seen.
4397 */
4398 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4399 if (status == 0)
4400 return (0); /* keep the aligned firmware */
4401
4402 if (status != E2BIG)
4403 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4404 mgp->name, status);
4405 if (status == ENOSYS)
4406 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4407 "Please install up to date fw\n", mgp->name);
4408 return (status);
4409 }
4410
4411 static int
myri10ge_select_firmware(struct myri10ge_priv * mgp)4412 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4413 {
4414 int aligned;
4415
4416 aligned = 0;
4417
4418 if (myri10ge_force_firmware == 1) {
4419 if (myri10ge_verbose)
4420 printf("%s: Assuming aligned completions (forced)\n",
4421 mgp->name);
4422 aligned = 1;
4423 goto done;
4424 }
4425
4426 if (myri10ge_force_firmware == 2) {
4427 if (myri10ge_verbose)
4428 printf("%s: Assuming unaligned completions (forced)\n",
4429 mgp->name);
4430 aligned = 0;
4431 goto done;
4432 }
4433
4434 /* If the width is less than 8, we may used the aligned firmware */
4435 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4436 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4437 mgp->name, mgp->pcie_link_width);
4438 aligned = 1;
4439 goto done;
4440 }
4441
4442 if (0 == myri10ge_firmware_probe(mgp))
4443 return (0); /* keep optimized firmware */
4444
4445 done:
4446 if (aligned) {
4447 mgp->fw_name = "rss_eth_z8e";
4448 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4449 mgp->eth_z8e_length = rss_eth_z8e_length;
4450 mgp->tx_boundary = 4096;
4451 } else {
4452 mgp->fw_name = "rss_ethp_z8e";
4453 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4454 mgp->eth_z8e_length = rss_ethp_z8e_length;
4455 mgp->tx_boundary = 2048;
4456 }
4457
4458 return (myri10ge_load_firmware(mgp));
4459 }
4460
4461 static int
myri10ge_add_intrs(struct myri10ge_priv * mgp,int add_handler)4462 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4463 {
4464 dev_info_t *devinfo = mgp->dip;
4465 int count, avail, actual, intr_types;
4466 int x, y, rc, inum = 0;
4467
4468
4469 rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4470 if (rc != DDI_SUCCESS) {
4471 cmn_err(CE_WARN,
4472 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4473 rc);
4474 return (DDI_FAILURE);
4475 }
4476
4477 if (!myri10ge_use_msi)
4478 intr_types &= ~DDI_INTR_TYPE_MSI;
4479 if (!myri10ge_use_msix)
4480 intr_types &= ~DDI_INTR_TYPE_MSIX;
4481
4482 if (intr_types & DDI_INTR_TYPE_MSIX) {
4483 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4484 mgp->intr_type = "MSI-X";
4485 } else if (intr_types & DDI_INTR_TYPE_MSI) {
4486 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4487 mgp->intr_type = "MSI";
4488 } else {
4489 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4490 mgp->intr_type = "Legacy";
4491 }
4492 /* Get number of interrupts */
4493 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4494 if ((rc != DDI_SUCCESS) || (count == 0)) {
4495 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4496 "count: %d", mgp->name, rc, count);
4497
4498 return (DDI_FAILURE);
4499 }
4500
4501 /* Get number of available interrupts */
4502 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4503 if ((rc != DDI_SUCCESS) || (avail == 0)) {
4504 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4505 "rc: %d, avail: %d\n", mgp->name, rc, avail);
4506 return (DDI_FAILURE);
4507 }
4508 if (avail < count) {
4509 cmn_err(CE_NOTE,
4510 "!%s: nintrs() returned %d, navail returned %d",
4511 mgp->name, count, avail);
4512 count = avail;
4513 }
4514
4515 if (count < mgp->num_slices)
4516 return (DDI_FAILURE);
4517
4518 if (count > mgp->num_slices)
4519 count = mgp->num_slices;
4520
4521 /* Allocate memory for MSI interrupts */
4522 mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4523 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4524
4525 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4526 count, &actual, DDI_INTR_ALLOC_NORMAL);
4527
4528 if ((rc != DDI_SUCCESS) || (actual == 0)) {
4529 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4530 mgp->name, rc);
4531
4532 kmem_free(mgp->htable, mgp->intr_size);
4533 mgp->htable = NULL;
4534 return (DDI_FAILURE);
4535 }
4536
4537 if ((actual < count) && myri10ge_verbose) {
4538 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4539 mgp->name, actual, count);
4540 }
4541
4542 mgp->intr_cnt = actual;
4543
4544 /*
4545 * Get priority for first irq, assume remaining are all the same
4546 */
4547 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4548 != DDI_SUCCESS) {
4549 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4550
4551 /* Free already allocated intr */
4552 for (y = 0; y < actual; y++) {
4553 (void) ddi_intr_free(mgp->htable[y]);
4554 }
4555
4556 kmem_free(mgp->htable, mgp->intr_size);
4557 mgp->htable = NULL;
4558 return (DDI_FAILURE);
4559 }
4560
4561 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4562
4563 if (!add_handler)
4564 return (DDI_SUCCESS);
4565
4566 /* Call ddi_intr_add_handler() */
4567 for (x = 0; x < actual; x++) {
4568 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4569 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4570 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4571 mgp->name);
4572
4573 /* Free already allocated intr */
4574 for (y = 0; y < actual; y++) {
4575 (void) ddi_intr_free(mgp->htable[y]);
4576 }
4577
4578 kmem_free(mgp->htable, mgp->intr_size);
4579 mgp->htable = NULL;
4580 return (DDI_FAILURE);
4581 }
4582 }
4583
4584 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4585 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4586 /* Call ddi_intr_block_enable() for MSI */
4587 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4588 } else {
4589 /* Call ddi_intr_enable() for MSI non block enable */
4590 for (x = 0; x < mgp->intr_cnt; x++) {
4591 (void) ddi_intr_enable(mgp->htable[x]);
4592 }
4593 }
4594
4595 return (DDI_SUCCESS);
4596 }
4597
4598 static void
myri10ge_rem_intrs(struct myri10ge_priv * mgp,int handler_installed)4599 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4600 {
4601 int x, err;
4602
4603 /* Disable all interrupts */
4604 if (handler_installed) {
4605 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4606 /* Call ddi_intr_block_disable() */
4607 (void) ddi_intr_block_disable(mgp->htable,
4608 mgp->intr_cnt);
4609 } else {
4610 for (x = 0; x < mgp->intr_cnt; x++) {
4611 (void) ddi_intr_disable(mgp->htable[x]);
4612 }
4613 }
4614 }
4615
4616 for (x = 0; x < mgp->intr_cnt; x++) {
4617 if (handler_installed) {
4618 /* Call ddi_intr_remove_handler() */
4619 err = ddi_intr_remove_handler(mgp->htable[x]);
4620 if (err != DDI_SUCCESS) {
4621 cmn_err(CE_WARN,
4622 "%s: ddi_intr_remove_handler for"
4623 "vec %d returned %d\n", mgp->name,
4624 x, err);
4625 }
4626 }
4627 err = ddi_intr_free(mgp->htable[x]);
4628 if (err != DDI_SUCCESS) {
4629 cmn_err(CE_WARN,
4630 "%s: ddi_intr_free for vec %d returned %d\n",
4631 mgp->name, x, err);
4632 }
4633 }
4634 kmem_free(mgp->htable, mgp->intr_size);
4635 mgp->htable = NULL;
4636 }
4637
4638 static void
myri10ge_test_physical(dev_info_t * dip)4639 myri10ge_test_physical(dev_info_t *dip)
4640 {
4641 ddi_dma_handle_t handle;
4642 struct myri10ge_dma_stuff dma;
4643 void *addr;
4644 int err;
4645
4646 /* test #1, sufficient for older sparc systems */
4647 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4648 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4649 DDI_DMA_DONTWAIT, NULL, &handle);
4650 if (err == DDI_DMA_BADATTR)
4651 goto fail;
4652 ddi_dma_free_handle(&handle);
4653
4654 /* test #2, required on Olympis where the bind is what fails */
4655 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4656 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4657 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4658 if (addr == NULL)
4659 goto fail;
4660 myri10ge_dma_free(&dma);
4661 return;
4662
4663 fail:
4664 if (myri10ge_verbose)
4665 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4666 "using IOMMU\n", ddi_get_instance(dip));
4667
4668 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4669 }
4670
4671 static void
myri10ge_get_props(dev_info_t * dip)4672 myri10ge_get_props(dev_info_t *dip)
4673 {
4674
4675 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4676 "myri10ge_flow_control", myri10ge_flow_control);
4677
4678 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4679 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4680
4681 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4682 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4683 "myri10ge_nvidia_ecrc_enable", 1);
4684 #endif
4685
4686
4687 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4688 "myri10ge_use_msi", myri10ge_use_msi);
4689
4690 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4691 "myri10ge_deassert_wait", myri10ge_deassert_wait);
4692
4693 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4694 "myri10ge_verbose", myri10ge_verbose);
4695
4696 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4697 "myri10ge_tx_copylen", myri10ge_tx_copylen);
4698
4699 if (myri10ge_tx_copylen < 60) {
4700 cmn_err(CE_WARN,
4701 "myri10ge_tx_copylen must be >= 60 bytes\n");
4702 myri10ge_tx_copylen = 60;
4703 }
4704
4705 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4706 "myri10ge_mtu_override", myri10ge_mtu_override);
4707
4708 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU &&
4709 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU)
4710 myri10ge_mtu = myri10ge_mtu_override +
4711 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4712 else if (myri10ge_mtu_override != 0) {
4713 cmn_err(CE_WARN,
4714 "myri10ge_mtu_override must be between 1500 and "
4715 "9000 bytes\n");
4716 }
4717
4718 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4719 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4720 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4721 "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4722
4723 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4724 "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4725
4726 if (myri10ge_bigbufs_initial < 128) {
4727 cmn_err(CE_WARN,
4728 "myri10ge_bigbufs_initial be at least 128\n");
4729 myri10ge_bigbufs_initial = 128;
4730 }
4731 if (myri10ge_bigbufs_max < 128) {
4732 cmn_err(CE_WARN,
4733 "myri10ge_bigbufs_max be at least 128\n");
4734 myri10ge_bigbufs_max = 128;
4735 }
4736
4737 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4738 cmn_err(CE_WARN,
4739 "myri10ge_bigbufs_max must be >= "
4740 "myri10ge_bigbufs_initial\n");
4741 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4742 }
4743
4744 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4745 "myri10ge_force_firmware", myri10ge_force_firmware);
4746
4747 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4748 "myri10ge_max_slices", myri10ge_max_slices);
4749
4750 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4751 "myri10ge_use_msix", myri10ge_use_msix);
4752
4753 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4754 "myri10ge_rss_hash", myri10ge_rss_hash);
4755
4756 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4757 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4758 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4759 myri10ge_rss_hash);
4760 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4761 }
4762 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4763 "myri10ge_lro", myri10ge_lro);
4764 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4765 "myri10ge_lro_cnt", myri10ge_lro_cnt);
4766 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4767 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4768 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4769 "myri10ge_tx_hash", myri10ge_tx_hash);
4770 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4771 "myri10ge_use_lso", myri10ge_use_lso);
4772 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4773 "myri10ge_lso_copy", myri10ge_lso_copy);
4774 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4775 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4776 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4777 "myri10ge_small_bytes", myri10ge_small_bytes);
4778 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4779 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4780 myri10ge_small_bytes);
4781 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4782 myri10ge_small_bytes += 128;
4783 myri10ge_small_bytes &= ~(128 -1);
4784 myri10ge_small_bytes -= MXGEFW_PAD;
4785 cmn_err(CE_WARN, "rounded up to %d\n",
4786 myri10ge_small_bytes);
4787
4788 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4789 }
4790 }
4791
4792 #ifndef PCI_EXP_LNKSTA
4793 #define PCI_EXP_LNKSTA 18
4794 #endif
4795
4796 static int
myri10ge_find_cap(ddi_acc_handle_t handle,uint8_t * capptr,uint8_t capid)4797 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4798 {
4799 uint16_t status;
4800 uint8_t ptr;
4801
4802 /* check to see if we have capabilities */
4803 status = pci_config_get16(handle, PCI_CONF_STAT);
4804 if (!(status & PCI_STAT_CAP)) {
4805 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4806 return (ENXIO);
4807 }
4808
4809 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4810
4811 /* Walk the capabilities list, looking for a PCI Express cap */
4812 while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4813 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4814 break;
4815 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4816 }
4817 if (ptr < 64) {
4818 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4819 return (ENXIO);
4820 }
4821 *capptr = ptr;
4822 return (0);
4823 }
4824
4825 static int
myri10ge_set_max_readreq(ddi_acc_handle_t handle)4826 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4827 {
4828 int err;
4829 uint16_t val;
4830 uint8_t ptr;
4831
4832 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4833 if (err != 0) {
4834 cmn_err(CE_WARN, "could not find PCIe cap\n");
4835 return (ENXIO);
4836 }
4837
4838 /* set max read req to 4096 */
4839 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4840 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4841 PCIE_DEVCTL_MAX_READ_REQ_4096;
4842 pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4843 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4844 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4845 PCIE_DEVCTL_MAX_READ_REQ_4096) {
4846 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4847 return (EINVAL);
4848 }
4849 return (0);
4850 }
4851
4852 static int
myri10ge_read_pcie_link_width(ddi_acc_handle_t handle,int * link)4853 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4854 {
4855 int err;
4856 uint16_t val;
4857 uint8_t ptr;
4858
4859 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4860 if (err != 0) {
4861 cmn_err(CE_WARN, "could not set max read req\n");
4862 return (ENXIO);
4863 }
4864
4865 /* read link width */
4866 val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4867 val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4868 *link = (val >> 4);
4869 return (0);
4870 }
4871
4872 static int
myri10ge_reset_nic(struct myri10ge_priv * mgp)4873 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4874 {
4875 ddi_acc_handle_t handle = mgp->cfg_hdl;
4876 uint32_t reboot;
4877 uint16_t cmd;
4878 int err;
4879
4880 cmd = pci_config_get16(handle, PCI_CONF_COMM);
4881 if ((cmd & PCI_COMM_ME) == 0) {
4882 /*
4883 * Bus master DMA disabled? Check to see if the card
4884 * rebooted due to a parity error For now, just report
4885 * it
4886 */
4887
4888 /* enter read32 mode */
4889 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4890 /* read REBOOT_STATUS (0xfffffff0) */
4891 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4892 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4893 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4894 return (0);
4895 }
4896 if (!myri10ge_watchdog_reset) {
4897 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4898 return (1);
4899 }
4900
4901 myri10ge_stop_locked(mgp);
4902 err = myri10ge_start_locked(mgp);
4903 if (err == DDI_FAILURE) {
4904 return (0);
4905 }
4906 mac_tx_update(mgp->mh);
4907 return (1);
4908 }
4909
4910 static inline int
myri10ge_ring_stalled(myri10ge_tx_ring_t * tx)4911 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4912 {
4913 if (tx->sched != tx->stall &&
4914 tx->done == tx->watchdog_done &&
4915 tx->watchdog_req != tx->watchdog_done)
4916 return (1);
4917 return (0);
4918 }
4919
4920 static void
myri10ge_watchdog(void * arg)4921 myri10ge_watchdog(void *arg)
4922 {
4923 struct myri10ge_priv *mgp;
4924 struct myri10ge_slice_state *ss;
4925 myri10ge_tx_ring_t *tx;
4926 int nic_ok = 1;
4927 int slices_stalled, rx_pause, i;
4928 int add_rx;
4929
4930 mgp = arg;
4931 mutex_enter(&mgp->intrlock);
4932 if (mgp->running != MYRI10GE_ETH_RUNNING) {
4933 cmn_err(CE_WARN,
4934 "%s not running, not rearming watchdog (%d)\n",
4935 mgp->name, mgp->running);
4936 mutex_exit(&mgp->intrlock);
4937 return;
4938 }
4939
4940 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4941
4942 /*
4943 * make sure nic is stalled before we reset the nic, so as to
4944 * ensure we don't rip the transmit data structures out from
4945 * under a pending transmit
4946 */
4947
4948 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4949 tx = &mgp->ss[i].tx;
4950 slices_stalled = myri10ge_ring_stalled(tx);
4951 if (slices_stalled)
4952 break;
4953 }
4954
4955 if (slices_stalled) {
4956 if (mgp->watchdog_rx_pause == rx_pause) {
4957 cmn_err(CE_WARN,
4958 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4959 mgp->name, i, tx->sched, tx->stall,
4960 tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4961 (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4962 nic_ok = myri10ge_reset_nic(mgp);
4963 } else {
4964 cmn_err(CE_WARN,
4965 "%s Flow controlled, check link partner\n",
4966 mgp->name);
4967 }
4968 }
4969
4970 if (!nic_ok) {
4971 cmn_err(CE_WARN,
4972 "%s Nic dead, not rearming watchdog\n", mgp->name);
4973 mutex_exit(&mgp->intrlock);
4974 return;
4975 }
4976 for (i = 0; i < mgp->num_slices; i++) {
4977 ss = &mgp->ss[i];
4978 tx = &ss->tx;
4979 tx->watchdog_done = tx->done;
4980 tx->watchdog_req = tx->req;
4981 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4982 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4983 add_rx =
4984 min(ss->jpool.num_alloc,
4985 myri10ge_bigbufs_max -
4986 (ss->jpool.num_alloc -
4987 ss->jbufs_for_smalls));
4988 if (add_rx != 0) {
4989 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4990 /* now feed them to the firmware */
4991 mutex_enter(&ss->jpool.mtx);
4992 myri10ge_restock_jumbos(ss);
4993 mutex_exit(&ss->jpool.mtx);
4994 }
4995 }
4996 }
4997 mgp->watchdog_rx_pause = rx_pause;
4998
4999 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5000 mgp->timer_ticks);
5001 mutex_exit(&mgp->intrlock);
5002 }
5003
5004 /*ARGSUSED*/
5005 static int
myri10ge_get_coalesce(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5006 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5007
5008 {
5009 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5010 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
5011 return (0);
5012 }
5013
5014 /*ARGSUSED*/
5015 static int
myri10ge_set_coalesce(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5016 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
5017 caddr_t cp, cred_t *credp)
5018
5019 {
5020 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5021 char *end;
5022 size_t new_value;
5023
5024 new_value = mi_strtol(value, &end, 10);
5025 if (end == value)
5026 return (EINVAL);
5027
5028 mutex_enter(&myri10ge_param_lock);
5029 mgp->intr_coal_delay = (int)new_value;
5030 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5031 mutex_exit(&myri10ge_param_lock);
5032 return (0);
5033 }
5034
5035 /*ARGSUSED*/
5036 static int
myri10ge_get_pauseparam(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5037 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5038
5039 {
5040 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5041 (void) mi_mpprintf(mp, "%d", mgp->pause);
5042 return (0);
5043 }
5044
5045 /*ARGSUSED*/
5046 static int
myri10ge_set_pauseparam(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5047 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5048 caddr_t cp, cred_t *credp)
5049
5050 {
5051 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5052 char *end;
5053 size_t new_value;
5054 int err = 0;
5055
5056 new_value = mi_strtol(value, &end, 10);
5057 if (end == value)
5058 return (EINVAL);
5059 if (new_value != 0)
5060 new_value = 1;
5061
5062 mutex_enter(&myri10ge_param_lock);
5063 if (new_value != mgp->pause)
5064 err = myri10ge_change_pause(mgp, new_value);
5065 mutex_exit(&myri10ge_param_lock);
5066 return (err);
5067 }
5068
5069 /*ARGSUSED*/
5070 static int
myri10ge_get_int(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5071 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5072
5073 {
5074 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5075 return (0);
5076 }
5077
5078 /*ARGSUSED*/
5079 static int
myri10ge_set_int(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5080 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5081 caddr_t cp, cred_t *credp)
5082
5083 {
5084 char *end;
5085 size_t new_value;
5086
5087 new_value = mi_strtol(value, &end, 10);
5088 if (end == value)
5089 return (EINVAL);
5090 *(int *)(void *)cp = new_value;
5091
5092 return (0);
5093 }
5094
5095 static void
myri10ge_ndd_init(struct myri10ge_priv * mgp)5096 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5097 {
5098 mgp->nd_head = NULL;
5099
5100 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5101 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5102 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5103 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5104 (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5105 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5106 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5107 myri10ge_get_int, myri10ge_set_int,
5108 (caddr_t)&myri10ge_deassert_wait);
5109 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5110 myri10ge_get_int, myri10ge_set_int,
5111 (caddr_t)&myri10ge_bigbufs_max);
5112 (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5113 myri10ge_get_int, myri10ge_set_int,
5114 (caddr_t)&myri10ge_lro);
5115 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5116 myri10ge_get_int, myri10ge_set_int,
5117 (caddr_t)&myri10ge_lro_max_aggr);
5118 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5119 myri10ge_get_int, myri10ge_set_int,
5120 (caddr_t)&myri10ge_tx_hash);
5121 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5122 myri10ge_get_int, myri10ge_set_int,
5123 (caddr_t)&myri10ge_lso_copy);
5124 }
5125
5126 static void
myri10ge_ndd_fini(struct myri10ge_priv * mgp)5127 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5128 {
5129 nd_free(&mgp->nd_head);
5130 }
5131
5132 static void
myri10ge_m_ioctl(void * arg,queue_t * wq,mblk_t * mp)5133 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5134 {
5135 struct iocblk *iocp;
5136 struct myri10ge_priv *mgp = arg;
5137 int cmd, ok, err;
5138
5139 iocp = (struct iocblk *)(void *)mp->b_rptr;
5140 cmd = iocp->ioc_cmd;
5141
5142 ok = 0;
5143 err = 0;
5144
5145 switch (cmd) {
5146 case ND_GET:
5147 case ND_SET:
5148 ok = nd_getset(wq, mgp->nd_head, mp);
5149 break;
5150 default:
5151 break;
5152 }
5153 if (!ok)
5154 err = EINVAL;
5155 else
5156 err = iocp->ioc_error;
5157
5158 if (!err)
5159 miocack(wq, mp, iocp->ioc_count, err);
5160 else
5161 miocnak(wq, mp, 0, err);
5162 }
5163
5164 static struct myri10ge_priv *mgp_list;
5165
5166 struct myri10ge_priv *
myri10ge_get_instance(uint_t unit)5167 myri10ge_get_instance(uint_t unit)
5168 {
5169 struct myri10ge_priv *mgp;
5170
5171 mutex_enter(&myri10ge_param_lock);
5172 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5173 if (unit == ddi_get_instance(mgp->dip)) {
5174 mgp->refcnt++;
5175 break;
5176 }
5177 }
5178 mutex_exit(&myri10ge_param_lock);
5179 return (mgp);
5180 }
5181
5182 void
myri10ge_put_instance(struct myri10ge_priv * mgp)5183 myri10ge_put_instance(struct myri10ge_priv *mgp)
5184 {
5185 mutex_enter(&myri10ge_param_lock);
5186 mgp->refcnt--;
5187 mutex_exit(&myri10ge_param_lock);
5188 }
5189
5190 static boolean_t
myri10ge_m_getcapab(void * arg,mac_capab_t cap,void * cap_data)5191 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5192 {
5193 struct myri10ge_priv *mgp = arg;
5194 uint32_t *cap_hcksum;
5195 mac_capab_lso_t *cap_lso;
5196 mac_capab_rings_t *cap_rings;
5197
5198 switch (cap) {
5199 case MAC_CAPAB_HCKSUM:
5200 cap_hcksum = cap_data;
5201 *cap_hcksum = HCKSUM_INET_PARTIAL;
5202 break;
5203 case MAC_CAPAB_RINGS:
5204 cap_rings = cap_data;
5205 switch (cap_rings->mr_type) {
5206 case MAC_RING_TYPE_RX:
5207 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5208 cap_rings->mr_rnum = mgp->num_slices;
5209 cap_rings->mr_gnum = 1;
5210 cap_rings->mr_rget = myri10ge_fill_ring;
5211 cap_rings->mr_gget = myri10ge_fill_group;
5212 break;
5213 case MAC_RING_TYPE_TX:
5214 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5215 cap_rings->mr_rnum = mgp->num_slices;
5216 cap_rings->mr_gnum = 0;
5217 cap_rings->mr_rget = myri10ge_fill_ring;
5218 cap_rings->mr_gget = NULL;
5219 break;
5220 default:
5221 return (B_FALSE);
5222 }
5223 break;
5224 case MAC_CAPAB_LSO:
5225 cap_lso = cap_data;
5226 if (!myri10ge_use_lso)
5227 return (B_FALSE);
5228 if (!(mgp->features & MYRI10GE_TSO))
5229 return (B_FALSE);
5230 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5231 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5232 break;
5233
5234 default:
5235 return (B_FALSE);
5236 }
5237 return (B_TRUE);
5238 }
5239
5240
5241 static int
myri10ge_m_stat(void * arg,uint_t stat,uint64_t * val)5242 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5243 {
5244 struct myri10ge_priv *mgp = arg;
5245 struct myri10ge_rx_ring_stats *rstat;
5246 struct myri10ge_tx_ring_stats *tstat;
5247 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5248 struct myri10ge_slice_state *ss;
5249 uint64_t tmp = 0;
5250 int i;
5251
5252 switch (stat) {
5253 case MAC_STAT_IFSPEED:
5254 *val = 10ull * 1000ull * 1000000ull;
5255 break;
5256
5257 case MAC_STAT_MULTIRCV:
5258 for (i = 0; i < mgp->num_slices; i++) {
5259 rstat = &mgp->ss[i].rx_stats;
5260 tmp += rstat->multircv;
5261 }
5262 *val = tmp;
5263 break;
5264
5265 case MAC_STAT_BRDCSTRCV:
5266 for (i = 0; i < mgp->num_slices; i++) {
5267 rstat = &mgp->ss[i].rx_stats;
5268 tmp += rstat->brdcstrcv;
5269 }
5270 *val = tmp;
5271 break;
5272
5273 case MAC_STAT_MULTIXMT:
5274 for (i = 0; i < mgp->num_slices; i++) {
5275 tstat = &mgp->ss[i].tx.stats;
5276 tmp += tstat->multixmt;
5277 }
5278 *val = tmp;
5279 break;
5280
5281 case MAC_STAT_BRDCSTXMT:
5282 for (i = 0; i < mgp->num_slices; i++) {
5283 tstat = &mgp->ss[i].tx.stats;
5284 tmp += tstat->brdcstxmt;
5285 }
5286 *val = tmp;
5287 break;
5288
5289 case MAC_STAT_NORCVBUF:
5290 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5291 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5292 tmp += ntohl(fw_stats->dropped_link_overflow);
5293 for (i = 0; i < mgp->num_slices; i++) {
5294 ss = &mgp->ss[i];
5295 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5296 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5297 }
5298 *val = tmp;
5299 break;
5300
5301 case MAC_STAT_IERRORS:
5302 tmp += ntohl(fw_stats->dropped_bad_crc32);
5303 tmp += ntohl(fw_stats->dropped_bad_phy);
5304 tmp += ntohl(fw_stats->dropped_runt);
5305 tmp += ntohl(fw_stats->dropped_overrun);
5306 *val = tmp;
5307 break;
5308
5309 case MAC_STAT_OERRORS:
5310 for (i = 0; i < mgp->num_slices; i++) {
5311 ss = &mgp->ss[i];
5312 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5313 tmp += MYRI10GE_SLICE_STAT(xmit_err);
5314 }
5315 *val = tmp;
5316 break;
5317
5318 case MAC_STAT_RBYTES:
5319 for (i = 0; i < mgp->num_slices; i++) {
5320 rstat = &mgp->ss[i].rx_stats;
5321 tmp += rstat->ibytes;
5322 }
5323 *val = tmp;
5324 break;
5325
5326 case MAC_STAT_IPACKETS:
5327 for (i = 0; i < mgp->num_slices; i++) {
5328 rstat = &mgp->ss[i].rx_stats;
5329 tmp += rstat->ipackets;
5330 }
5331 *val = tmp;
5332 break;
5333
5334 case MAC_STAT_OBYTES:
5335 for (i = 0; i < mgp->num_slices; i++) {
5336 tstat = &mgp->ss[i].tx.stats;
5337 tmp += tstat->obytes;
5338 }
5339 *val = tmp;
5340 break;
5341
5342 case MAC_STAT_OPACKETS:
5343 for (i = 0; i < mgp->num_slices; i++) {
5344 tstat = &mgp->ss[i].tx.stats;
5345 tmp += tstat->opackets;
5346 }
5347 *val = tmp;
5348 break;
5349
5350 case ETHER_STAT_TOOLONG_ERRORS:
5351 *val = ntohl(fw_stats->dropped_overrun);
5352 break;
5353
5354 #ifdef SOLARIS_S11
5355 case ETHER_STAT_TOOSHORT_ERRORS:
5356 *val = ntohl(fw_stats->dropped_runt);
5357 break;
5358 #endif
5359
5360 case ETHER_STAT_LINK_PAUSE:
5361 *val = mgp->pause;
5362 break;
5363
5364 case ETHER_STAT_LINK_AUTONEG:
5365 *val = 1;
5366 break;
5367
5368 case ETHER_STAT_LINK_DUPLEX:
5369 *val = LINK_DUPLEX_FULL;
5370 break;
5371
5372 default:
5373 return (ENOTSUP);
5374 }
5375
5376 return (0);
5377 }
5378
5379 /* ARGSUSED */
5380 static void
myri10ge_m_propinfo(void * arg,const char * pr_name,mac_prop_id_t pr_num,mac_prop_info_handle_t prh)5381 myri10ge_m_propinfo(void *arg, const char *pr_name,
5382 mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
5383 {
5384 switch (pr_num) {
5385 case MAC_PROP_MTU:
5386 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU);
5387 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU,
5388 MYRI10GE_MAX_GLD_MTU);
5389 break;
5390 default:
5391 break;
5392 }
5393 }
5394
5395 /*ARGSUSED*/
5396 static int
myri10ge_m_setprop(void * arg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,const void * pr_val)5397 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
5398 uint_t pr_valsize, const void *pr_val)
5399 {
5400 int err = 0;
5401 struct myri10ge_priv *mgp = arg;
5402
5403 switch (pr_num) {
5404 case MAC_PROP_MTU: {
5405 uint32_t mtu;
5406 if (pr_valsize < sizeof (mtu)) {
5407 err = EINVAL;
5408 break;
5409 }
5410 bcopy(pr_val, &mtu, sizeof (mtu));
5411 if (mtu > MYRI10GE_MAX_GLD_MTU ||
5412 mtu < MYRI10GE_MIN_GLD_MTU) {
5413 err = EINVAL;
5414 break;
5415 }
5416
5417 mutex_enter(&mgp->intrlock);
5418 if (mgp->running != MYRI10GE_ETH_STOPPED) {
5419 err = EBUSY;
5420 mutex_exit(&mgp->intrlock);
5421 break;
5422 }
5423
5424 myri10ge_mtu = mtu + sizeof (struct ether_header) +
5425 MXGEFW_PAD + VLAN_TAGSZ;
5426 mutex_exit(&mgp->intrlock);
5427 break;
5428 }
5429 default:
5430 err = ENOTSUP;
5431 break;
5432 }
5433
5434 return (err);
5435 }
5436
5437 static mac_callbacks_t myri10ge_m_callbacks = {
5438 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO),
5439 myri10ge_m_stat,
5440 myri10ge_m_start,
5441 myri10ge_m_stop,
5442 myri10ge_m_promisc,
5443 myri10ge_m_multicst,
5444 NULL,
5445 NULL,
5446 NULL,
5447 myri10ge_m_ioctl,
5448 myri10ge_m_getcapab,
5449 NULL,
5450 NULL,
5451 myri10ge_m_setprop,
5452 NULL,
5453 myri10ge_m_propinfo
5454 };
5455
5456
5457 static int
myri10ge_probe_slices(struct myri10ge_priv * mgp)5458 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5459 {
5460 myri10ge_cmd_t cmd;
5461 int status;
5462
5463 mgp->num_slices = 1;
5464
5465 /* hit the board with a reset to ensure it is alive */
5466 (void) memset(&cmd, 0, sizeof (cmd));
5467 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5468 if (status != 0) {
5469 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5470 return (ENXIO);
5471 }
5472
5473 if (myri10ge_use_msix == 0)
5474 return (0);
5475
5476 /* tell it the size of the interrupt queues */
5477 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5478 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5479 if (status != 0) {
5480 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5481 mgp->name);
5482 return (ENXIO);
5483 }
5484
5485 /* ask the maximum number of slices it supports */
5486 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5487 &cmd);
5488 if (status != 0)
5489 return (0);
5490
5491 mgp->num_slices = cmd.data0;
5492
5493 /*
5494 * if the admin did not specify a limit to how many
5495 * slices we should use, cap it automatically to the
5496 * number of CPUs currently online
5497 */
5498 if (myri10ge_max_slices == -1)
5499 myri10ge_max_slices = ncpus;
5500
5501 if (mgp->num_slices > myri10ge_max_slices)
5502 mgp->num_slices = myri10ge_max_slices;
5503
5504
5505 /*
5506 * Now try to allocate as many MSI-X vectors as we have
5507 * slices. We give up on MSI-X if we can only get a single
5508 * vector.
5509 */
5510 while (mgp->num_slices > 1) {
5511 /* make sure it is a power of two */
5512 while (!ISP2(mgp->num_slices))
5513 mgp->num_slices--;
5514 if (mgp->num_slices == 1)
5515 return (0);
5516
5517 status = myri10ge_add_intrs(mgp, 0);
5518 if (status == 0) {
5519 myri10ge_rem_intrs(mgp, 0);
5520 if (mgp->intr_cnt == mgp->num_slices) {
5521 if (myri10ge_verbose)
5522 printf("Got %d slices!\n",
5523 mgp->num_slices);
5524 return (0);
5525 }
5526 mgp->num_slices = mgp->intr_cnt;
5527 } else {
5528 mgp->num_slices = mgp->num_slices / 2;
5529 }
5530 }
5531
5532 if (myri10ge_verbose)
5533 printf("Got %d slices\n", mgp->num_slices);
5534 return (0);
5535 }
5536
5537 static void
myri10ge_lro_free(struct myri10ge_slice_state * ss)5538 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5539 {
5540 struct lro_entry *lro;
5541
5542 while (ss->lro_free != NULL) {
5543 lro = ss->lro_free;
5544 ss->lro_free = lro->next;
5545 kmem_free(lro, sizeof (*lro));
5546 }
5547 }
5548
5549 static void
myri10ge_lro_alloc(struct myri10ge_slice_state * ss)5550 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5551 {
5552 struct lro_entry *lro;
5553 int idx;
5554
5555 ss->lro_free = NULL;
5556 ss->lro_active = NULL;
5557
5558 for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5559 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5560 if (lro == NULL)
5561 continue;
5562 lro->next = ss->lro_free;
5563 ss->lro_free = lro;
5564 }
5565 }
5566
5567 static void
myri10ge_free_slices(struct myri10ge_priv * mgp)5568 myri10ge_free_slices(struct myri10ge_priv *mgp)
5569 {
5570 struct myri10ge_slice_state *ss;
5571 size_t bytes;
5572 int i;
5573
5574 if (mgp->ss == NULL)
5575 return;
5576
5577 for (i = 0; i < mgp->num_slices; i++) {
5578 ss = &mgp->ss[i];
5579 if (ss->rx_done.entry == NULL)
5580 continue;
5581 myri10ge_dma_free(&ss->rx_done.dma);
5582 ss->rx_done.entry = NULL;
5583 if (ss->fw_stats == NULL)
5584 continue;
5585 myri10ge_dma_free(&ss->fw_stats_dma);
5586 ss->fw_stats = NULL;
5587 mutex_destroy(&ss->rx_lock);
5588 mutex_destroy(&ss->tx.lock);
5589 mutex_destroy(&ss->tx.handle_lock);
5590 mutex_destroy(&ss->poll_lock);
5591 myri10ge_jpool_fini(ss);
5592 myri10ge_slice_stat_destroy(ss);
5593 myri10ge_lro_free(ss);
5594 }
5595 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5596 kmem_free(mgp->ss, bytes);
5597 mgp->ss = NULL;
5598 }
5599
5600
5601 static int
myri10ge_alloc_slices(struct myri10ge_priv * mgp)5602 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5603 {
5604 struct myri10ge_slice_state *ss;
5605 size_t bytes;
5606 int i;
5607
5608 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5609 mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5610 if (mgp->ss == NULL)
5611 return (ENOMEM);
5612 for (i = 0; i < mgp->num_slices; i++) {
5613 ss = &mgp->ss[i];
5614
5615 ss->mgp = mgp;
5616
5617 /* allocate the per-slice firmware stats */
5618 bytes = sizeof (*ss->fw_stats);
5619 ss->fw_stats = (mcp_irq_data_t *)(void *)
5620 myri10ge_dma_alloc(mgp->dip, bytes,
5621 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5622 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5623 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5624 if (ss->fw_stats == NULL)
5625 goto abort;
5626 (void) memset(ss->fw_stats, 0, bytes);
5627
5628 /* allocate rx done ring */
5629 bytes = mgp->max_intr_slots *
5630 sizeof (*ss->rx_done.entry);
5631 ss->rx_done.entry = (mcp_slot_t *)(void *)
5632 myri10ge_dma_alloc(mgp->dip, bytes,
5633 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5634 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5635 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5636 if (ss->rx_done.entry == NULL) {
5637 goto abort;
5638 }
5639 (void) memset(ss->rx_done.entry, 0, bytes);
5640 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie);
5641 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL);
5642 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL);
5643 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL);
5644 myri10ge_jpool_init(ss);
5645 (void) myri10ge_slice_stat_init(ss);
5646 myri10ge_lro_alloc(ss);
5647 }
5648
5649 return (0);
5650
5651 abort:
5652 myri10ge_free_slices(mgp);
5653 return (ENOMEM);
5654 }
5655
5656 static int
myri10ge_save_msi_state(struct myri10ge_priv * mgp,ddi_acc_handle_t handle)5657 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5658 ddi_acc_handle_t handle)
5659 {
5660 uint8_t ptr;
5661 int err;
5662
5663 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5664 if (err != 0) {
5665 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5666 mgp->name);
5667 return (DDI_FAILURE);
5668 }
5669 mgp->pci_saved_state.msi_ctrl =
5670 pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5671 mgp->pci_saved_state.msi_addr_low =
5672 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5673 mgp->pci_saved_state.msi_addr_high =
5674 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5675 mgp->pci_saved_state.msi_data_32 =
5676 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5677 mgp->pci_saved_state.msi_data_64 =
5678 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5679 return (DDI_SUCCESS);
5680 }
5681
5682 static int
myri10ge_restore_msi_state(struct myri10ge_priv * mgp,ddi_acc_handle_t handle)5683 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5684 ddi_acc_handle_t handle)
5685 {
5686 uint8_t ptr;
5687 int err;
5688
5689 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5690 if (err != 0) {
5691 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5692 mgp->name);
5693 return (DDI_FAILURE);
5694 }
5695
5696 pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5697 mgp->pci_saved_state.msi_ctrl);
5698 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5699 mgp->pci_saved_state.msi_addr_low);
5700 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5701 mgp->pci_saved_state.msi_addr_high);
5702 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5703 mgp->pci_saved_state.msi_data_32);
5704 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5705 mgp->pci_saved_state.msi_data_64);
5706
5707 return (DDI_SUCCESS);
5708 }
5709
5710 static int
myri10ge_save_pci_state(struct myri10ge_priv * mgp)5711 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5712 {
5713 ddi_acc_handle_t handle = mgp->cfg_hdl;
5714 int i;
5715 int err = DDI_SUCCESS;
5716
5717
5718 /* Save the non-extended PCI config space 32-bits at a time */
5719 for (i = 0; i < 16; i++)
5720 mgp->pci_saved_state.base[i] =
5721 pci_config_get32(handle, i*4);
5722
5723 /* now save MSI interrupt state *, if needed */
5724 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5725 err = myri10ge_save_msi_state(mgp, handle);
5726
5727 return (err);
5728 }
5729
5730 static int
myri10ge_restore_pci_state(struct myri10ge_priv * mgp)5731 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5732 {
5733 ddi_acc_handle_t handle = mgp->cfg_hdl;
5734 int i;
5735 int err = DDI_SUCCESS;
5736
5737
5738 /* Restore the non-extended PCI config space 32-bits at a time */
5739 for (i = 15; i >= 0; i--)
5740 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5741
5742 /* now restore MSI interrupt state *, if needed */
5743 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5744 err = myri10ge_restore_msi_state(mgp, handle);
5745
5746 if (mgp->max_read_request_4k)
5747 (void) myri10ge_set_max_readreq(handle);
5748 return (err);
5749 }
5750
5751
5752 static int
myri10ge_suspend(dev_info_t * dip)5753 myri10ge_suspend(dev_info_t *dip)
5754 {
5755 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5756 int status;
5757
5758 if (mgp == NULL) {
5759 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5760 return (DDI_FAILURE);
5761 }
5762 if (mgp->dip != dip) {
5763 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5764 return (DDI_FAILURE);
5765 }
5766 mutex_enter(&mgp->intrlock);
5767 if (mgp->running == MYRI10GE_ETH_RUNNING) {
5768 mgp->running = MYRI10GE_ETH_STOPPING;
5769 mutex_exit(&mgp->intrlock);
5770 (void) untimeout(mgp->timer_id);
5771 mutex_enter(&mgp->intrlock);
5772 myri10ge_stop_locked(mgp);
5773 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5774 }
5775 status = myri10ge_save_pci_state(mgp);
5776 mutex_exit(&mgp->intrlock);
5777 return (status);
5778 }
5779
5780 static int
myri10ge_resume(dev_info_t * dip)5781 myri10ge_resume(dev_info_t *dip)
5782 {
5783 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5784 int status = DDI_SUCCESS;
5785
5786 if (mgp == NULL) {
5787 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5788 return (DDI_FAILURE);
5789 }
5790 if (mgp->dip != dip) {
5791 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5792 return (DDI_FAILURE);
5793 }
5794
5795 mutex_enter(&mgp->intrlock);
5796 status = myri10ge_restore_pci_state(mgp);
5797 if (status == DDI_SUCCESS &&
5798 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5799 status = myri10ge_start_locked(mgp);
5800 }
5801 mutex_exit(&mgp->intrlock);
5802 if (status != DDI_SUCCESS)
5803 return (status);
5804
5805 /* start the watchdog timer */
5806 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5807 mgp->timer_ticks);
5808 return (DDI_SUCCESS);
5809 }
5810
5811 static int
myri10ge_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5812 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5813 {
5814
5815 struct myri10ge_priv *mgp;
5816 mac_register_t *macp, *omacp;
5817 ddi_acc_handle_t handle;
5818 uint32_t csr, hdr_offset;
5819 int status, span, link_width, max_read_request_4k;
5820 unsigned long bus_number, dev_number, func_number;
5821 size_t bytes;
5822 offset_t ss_offset;
5823 uint8_t vso;
5824
5825 if (cmd == DDI_RESUME) {
5826 return (myri10ge_resume(dip));
5827 }
5828
5829 if (cmd != DDI_ATTACH)
5830 return (DDI_FAILURE);
5831 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5832 return (DDI_FAILURE);
5833
5834 /* enable busmater and io space access */
5835 csr = pci_config_get32(handle, PCI_CONF_COMM);
5836 pci_config_put32(handle, PCI_CONF_COMM,
5837 (csr |PCI_COMM_ME|PCI_COMM_MAE));
5838 status = myri10ge_read_pcie_link_width(handle, &link_width);
5839 if (status != 0) {
5840 cmn_err(CE_WARN, "could not read link width!\n");
5841 link_width = 0;
5842 }
5843 max_read_request_4k = !myri10ge_set_max_readreq(handle);
5844 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5845 if (status != 0)
5846 goto abort_with_cfg_hdl;
5847 if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5848 goto abort_with_cfg_hdl;
5849 /*
5850 * XXXX Hack: mac_register_t grows in newer kernels. To be
5851 * able to write newer fields, such as m_margin, without
5852 * writing outside allocated memory, we allocate our own macp
5853 * and pass that to mac_register()
5854 */
5855 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5856 macp->m_version = omacp->m_version;
5857
5858 if ((mgp = (struct myri10ge_priv *)
5859 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5860 goto abort_with_macinfo;
5861 }
5862 ddi_set_driver_private(dip, mgp);
5863
5864 /* setup device name for log messages */
5865 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5866
5867 mutex_enter(&myri10ge_param_lock);
5868 myri10ge_get_props(dip);
5869 mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5870 mgp->pause = myri10ge_flow_control;
5871 mutex_exit(&myri10ge_param_lock);
5872
5873 mgp->max_read_request_4k = max_read_request_4k;
5874 mgp->pcie_link_width = link_width;
5875 mgp->running = MYRI10GE_ETH_STOPPED;
5876 mgp->vso = vso;
5877 mgp->dip = dip;
5878 mgp->cfg_hdl = handle;
5879
5880 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */
5881 myri10ge_test_physical(dip);
5882
5883 /* allocate command page */
5884 bytes = sizeof (*mgp->cmd);
5885 mgp->cmd = (mcp_cmd_response_t *)
5886 (void *)myri10ge_dma_alloc(dip, bytes,
5887 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5888 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5889 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5890 if (mgp->cmd == NULL)
5891 goto abort_with_mgp;
5892
5893 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5894 &dev_number, &func_number);
5895 if (myri10ge_verbose)
5896 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5897 bus_number, dev_number, func_number);
5898 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5899 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr,
5900 &mgp->io_handle);
5901 if (status != DDI_SUCCESS) {
5902 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5903 printf("%s: reg_set = %d, span = %d, status = %d",
5904 mgp->name, mgp->reg_set, span, status);
5905 goto abort_with_mgp;
5906 }
5907
5908 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET);
5909 hdr_offset = ntohl(hdr_offset) & 0xffffc;
5910 ss_offset = hdr_offset +
5911 offsetof(struct mcp_gen_header, string_specs);
5912 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5913 myri10ge_pio_copy32(mgp->eeprom_strings,
5914 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5915 MYRI10GE_EEPROM_STRINGS_SIZE);
5916 (void) memset(mgp->eeprom_strings +
5917 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5918
5919 status = myri10ge_read_mac_addr(mgp);
5920 if (status) {
5921 goto abort_with_mapped;
5922 }
5923
5924 status = myri10ge_select_firmware(mgp);
5925 if (status != 0) {
5926 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5927 goto abort_with_mapped;
5928 }
5929
5930 status = myri10ge_probe_slices(mgp);
5931 if (status != 0) {
5932 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5933 goto abort_with_dummy_rdma;
5934 }
5935
5936 status = myri10ge_alloc_slices(mgp);
5937 if (status != 0) {
5938 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5939 goto abort_with_dummy_rdma;
5940 }
5941
5942 /* add the interrupt handler */
5943 status = myri10ge_add_intrs(mgp, 1);
5944 if (status != 0) {
5945 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5946 mgp->name);
5947 goto abort_with_slices;
5948 }
5949
5950 /* now that we have an iblock_cookie, init the mutexes */
5951 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5952 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5953
5954
5955 status = myri10ge_nic_stat_init(mgp);
5956 if (status != DDI_SUCCESS)
5957 goto abort_with_interrupts;
5958 status = myri10ge_info_init(mgp);
5959 if (status != DDI_SUCCESS)
5960 goto abort_with_stats;
5961
5962 /*
5963 * Initialize GLD state
5964 */
5965
5966 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5967 macp->m_driver = mgp;
5968 macp->m_dip = dip;
5969 macp->m_src_addr = mgp->mac_addr;
5970 macp->m_callbacks = &myri10ge_m_callbacks;
5971 macp->m_min_sdu = 0;
5972 macp->m_max_sdu = myri10ge_mtu -
5973 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5974 #ifdef SOLARIS_S11
5975 macp->m_margin = VLAN_TAGSZ;
5976 #endif
5977 macp->m_v12n = MAC_VIRT_LEVEL1;
5978 status = mac_register(macp, &mgp->mh);
5979 if (status != 0) {
5980 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5981 mgp->name, status);
5982 goto abort_with_info;
5983 }
5984 myri10ge_ndd_init(mgp);
5985 if (myri10ge_verbose)
5986 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5987 mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5988 mutex_enter(&myri10ge_param_lock);
5989 mgp->next = mgp_list;
5990 mgp_list = mgp;
5991 mutex_exit(&myri10ge_param_lock);
5992 kmem_free(macp, sizeof (*macp) * 8);
5993 mac_free(omacp);
5994 return (DDI_SUCCESS);
5995
5996 abort_with_info:
5997 myri10ge_info_destroy(mgp);
5998
5999 abort_with_stats:
6000 myri10ge_nic_stat_destroy(mgp);
6001
6002 abort_with_interrupts:
6003 mutex_destroy(&mgp->cmd_lock);
6004 mutex_destroy(&mgp->intrlock);
6005 myri10ge_rem_intrs(mgp, 1);
6006
6007 abort_with_slices:
6008 myri10ge_free_slices(mgp);
6009
6010 abort_with_dummy_rdma:
6011 myri10ge_dummy_rdma(mgp, 0);
6012
6013 abort_with_mapped:
6014 ddi_regs_map_free(&mgp->io_handle);
6015
6016 myri10ge_dma_free(&mgp->cmd_dma);
6017
6018 abort_with_mgp:
6019 kmem_free(mgp, sizeof (*mgp));
6020
6021 abort_with_macinfo:
6022 kmem_free(macp, sizeof (*macp) * 8);
6023 mac_free(omacp);
6024
6025 abort_with_cfg_hdl:
6026 pci_config_teardown(&handle);
6027 return (DDI_FAILURE);
6028
6029 }
6030
6031
6032 static int
myri10ge_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)6033 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6034 {
6035 struct myri10ge_priv *mgp, *tmp;
6036 int status, i, jbufs_alloced;
6037
6038 if (cmd == DDI_SUSPEND) {
6039 status = myri10ge_suspend(dip);
6040 return (status);
6041 }
6042
6043 if (cmd != DDI_DETACH) {
6044 return (DDI_FAILURE);
6045 }
6046 /* Get the driver private (gld_mac_info_t) structure */
6047 mgp = ddi_get_driver_private(dip);
6048
6049 mutex_enter(&mgp->intrlock);
6050 jbufs_alloced = 0;
6051 for (i = 0; i < mgp->num_slices; i++) {
6052 myri10ge_remove_jbufs(&mgp->ss[i]);
6053 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
6054 }
6055 mutex_exit(&mgp->intrlock);
6056 if (jbufs_alloced != 0) {
6057 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
6058 mgp->name, jbufs_alloced);
6059 return (DDI_FAILURE);
6060 }
6061
6062 mutex_enter(&myri10ge_param_lock);
6063 if (mgp->refcnt != 0) {
6064 mutex_exit(&myri10ge_param_lock);
6065 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
6066 mgp->name, mgp->refcnt);
6067 return (DDI_FAILURE);
6068 }
6069 mutex_exit(&myri10ge_param_lock);
6070
6071 status = mac_unregister(mgp->mh);
6072 if (status != DDI_SUCCESS)
6073 return (status);
6074
6075 myri10ge_ndd_fini(mgp);
6076 myri10ge_dummy_rdma(mgp, 0);
6077 myri10ge_nic_stat_destroy(mgp);
6078 myri10ge_info_destroy(mgp);
6079
6080 mutex_destroy(&mgp->cmd_lock);
6081 mutex_destroy(&mgp->intrlock);
6082
6083 myri10ge_rem_intrs(mgp, 1);
6084
6085 myri10ge_free_slices(mgp);
6086 ddi_regs_map_free(&mgp->io_handle);
6087 myri10ge_dma_free(&mgp->cmd_dma);
6088 pci_config_teardown(&mgp->cfg_hdl);
6089
6090 mutex_enter(&myri10ge_param_lock);
6091 if (mgp_list == mgp) {
6092 mgp_list = mgp->next;
6093 } else {
6094 tmp = mgp_list;
6095 while (tmp->next != mgp && tmp->next != NULL)
6096 tmp = tmp->next;
6097 if (tmp->next != NULL)
6098 tmp->next = tmp->next->next;
6099 }
6100 kmem_free(mgp, sizeof (*mgp));
6101 mutex_exit(&myri10ge_param_lock);
6102 return (DDI_SUCCESS);
6103 }
6104
6105 /*
6106 * Helper for quiesce entry point: Interrupt threads are not being
6107 * scheduled, so we must poll for the confirmation DMA to arrive in
6108 * the firmware stats block for slice 0. We're essentially running
6109 * the guts of the interrupt handler, and just cherry picking the
6110 * confirmation that the NIC is queuesced (stats->link_down)
6111 */
6112
6113 static int
myri10ge_poll_down(struct myri10ge_priv * mgp)6114 myri10ge_poll_down(struct myri10ge_priv *mgp)
6115 {
6116 struct myri10ge_slice_state *ss = mgp->ss;
6117 mcp_irq_data_t *stats = ss->fw_stats;
6118 int valid;
6119 int found_down = 0;
6120
6121
6122 /* check for a pending IRQ */
6123
6124 if (! *((volatile uint8_t *)& stats->valid))
6125 return (0);
6126 valid = stats->valid;
6127
6128 /*
6129 * Make sure to tell the NIC to lower a legacy IRQ, else
6130 * it may have corrupt state after restarting
6131 */
6132
6133 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6134 /* lower legacy IRQ */
6135 *mgp->irq_deassert = 0;
6136 mb();
6137 /* wait for irq conf DMA */
6138 while (*((volatile uint8_t *)& stats->valid))
6139 ;
6140 }
6141 if (stats->stats_updated && stats->link_down)
6142 found_down = 1;
6143
6144 if (valid & 0x1)
6145 *ss->irq_claim = BE_32(3);
6146 *(ss->irq_claim + 1) = BE_32(3);
6147
6148 return (found_down);
6149 }
6150
6151 static int
myri10ge_quiesce(dev_info_t * dip)6152 myri10ge_quiesce(dev_info_t *dip)
6153 {
6154 struct myri10ge_priv *mgp;
6155 myri10ge_cmd_t cmd;
6156 int status, down, i;
6157
6158 mgp = ddi_get_driver_private(dip);
6159 if (mgp == NULL)
6160 return (DDI_FAILURE);
6161
6162 /* if devices was unplumbed, it is guaranteed to be quiescent */
6163 if (mgp->running == MYRI10GE_ETH_STOPPED)
6164 return (DDI_SUCCESS);
6165
6166 /* send a down CMD to queuesce NIC */
6167 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6168 if (status) {
6169 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6170 return (DDI_FAILURE);
6171 }
6172
6173 for (i = 0; i < 20; i++) {
6174 down = myri10ge_poll_down(mgp);
6175 if (down)
6176 break;
6177 delay(drv_usectohz(100000));
6178 mb();
6179 }
6180 if (down)
6181 return (DDI_SUCCESS);
6182 return (DDI_FAILURE);
6183 }
6184
6185 /*
6186 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6187 * storage.
6188 */
6189 static void
myri10ge_find_lastfree(void)6190 myri10ge_find_lastfree(void)
6191 {
6192 mblk_t *mp = allocb(1024, 0);
6193 dblk_t *dbp;
6194
6195 if (mp == NULL) {
6196 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6197 return;
6198 }
6199 dbp = mp->b_datap;
6200 myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6201 }
6202
6203 int
_init(void)6204 _init(void)
6205 {
6206 int i;
6207
6208 if (myri10ge_verbose)
6209 cmn_err(CE_NOTE,
6210 "Myricom 10G driver (10GbE) version %s loading\n",
6211 MYRI10GE_VERSION_STR);
6212 myri10ge_find_lastfree();
6213 mac_init_ops(&myri10ge_ops, "myri10ge");
6214 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6215 if ((i = mod_install(&modlinkage)) != 0) {
6216 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6217 mac_fini_ops(&myri10ge_ops);
6218 mutex_destroy(&myri10ge_param_lock);
6219 }
6220 return (i);
6221 }
6222
6223 int
_fini(void)6224 _fini(void)
6225 {
6226 int i;
6227 i = mod_remove(&modlinkage);
6228 if (i != 0) {
6229 return (i);
6230 }
6231 mac_fini_ops(&myri10ge_ops);
6232 mutex_destroy(&myri10ge_param_lock);
6233 return (0);
6234 }
6235
6236 int
_info(struct modinfo * modinfop)6237 _info(struct modinfo *modinfop)
6238 {
6239 return (mod_info(&modlinkage, modinfop));
6240 }
6241
6242
6243 /*
6244 * This file uses MyriGE driver indentation.
6245 *
6246 * Local Variables:
6247 * c-file-style:"sun"
6248 * tab-width:8
6249 * End:
6250 */
6251