1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 * Use is subject to license terms.
30 */
31
32 /*
33 * Copyright (c) 2014, Joyent, Inc.
34 * Copyright (c) 2016 by Delphix. All rights reserved.
35 */
36
37 #define MXGEFW_NDIS
38 #include "myri10ge_var.h"
39 #include "rss_eth_z8e.h"
40 #include "rss_ethp_z8e.h"
41 #include "mcp_gen_header.h"
42
43 #define MYRI10GE_MAX_ETHER_MTU 9014
44 #define MYRI10GE_MAX_GLD_MTU 9000
45 #define MYRI10GE_MIN_GLD_MTU 1500
46
47 #define MYRI10GE_ETH_STOPPED 0
48 #define MYRI10GE_ETH_STOPPING 1
49 #define MYRI10GE_ETH_STARTING 2
50 #define MYRI10GE_ETH_RUNNING 3
51 #define MYRI10GE_ETH_OPEN_FAILED 4
52 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
53
54 static int myri10ge_small_bytes = 510;
55 static int myri10ge_intr_coal_delay = 125;
56 static int myri10ge_flow_control = 1;
57 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
58 static int myri10ge_nvidia_ecrc_enable = 1;
59 #endif
60 static int myri10ge_mtu_override = 0;
61 static int myri10ge_tx_copylen = 512;
62 static int myri10ge_deassert_wait = 1;
63 static int myri10ge_verbose = 0;
64 static int myri10ge_watchdog_reset = 0;
65 static int myri10ge_use_msix = 1;
66 static int myri10ge_max_slices = -1;
67 static int myri10ge_use_msi = 1;
68 int myri10ge_force_firmware = 0;
69 static boolean_t myri10ge_use_lso = B_TRUE;
70 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
71 static int myri10ge_tx_hash = 1;
72 static int myri10ge_lro = 0;
73 static int myri10ge_lro_cnt = 8;
74 int myri10ge_lro_max_aggr = 2;
75 static int myri10ge_lso_copy = 0;
76 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
77 int myri10ge_tx_handles_initial = 128;
78
79 static kmutex_t myri10ge_param_lock;
80 static void* myri10ge_db_lastfree;
81
82 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
83 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
84 static int myri10ge_quiesce(dev_info_t *dip);
85
86 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
87 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
88
89
90 static struct modldrv modldrv = {
91 &mod_driverops,
92 "Myricom 10G driver (10GbE)",
93 &myri10ge_ops,
94 };
95
96
97 static struct modlinkage modlinkage = {
98 MODREV_1,
99 {&modldrv, NULL},
100 };
101
102 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
103
104 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
105 DMA_ATTR_V0, /* version number. */
106 (uint64_t)0, /* low address */
107 (uint64_t)0xffffffffffffffffULL, /* high address */
108 (uint64_t)0x7ffffff, /* address counter max */
109 (uint64_t)4096, /* alignment */
110 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
111 (uint32_t)0x1, /* minimum transfer size */
112 (uint64_t)0x7fffffff, /* maximum transfer size */
113 (uint64_t)0x7fffffff, /* maximum segment size */
114 1, /* scatter/gather list length */
115 1, /* granularity */
116 0 /* attribute flags */
117 };
118
119 /*
120 * The Myri10GE NIC has the following constraints on receive buffers:
121 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
122 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
123 */
124
125 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
126 DMA_ATTR_V0, /* version number. */
127 (uint64_t)0, /* low address */
128 (uint64_t)0xffffffffffffffffULL, /* high address */
129 (uint64_t)0x7ffffff, /* address counter max */
130 (uint64_t)4096, /* alignment */
131 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
132 (uint32_t)0x1, /* minimum transfer size */
133 (uint64_t)0x7fffffff, /* maximum transfer size */
134 UINT64_MAX, /* maximum segment size */
135 1, /* scatter/gather list length */
136 1, /* granularity */
137 0 /* attribute flags */
138 };
139
140 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
141 DMA_ATTR_V0, /* version number. */
142 (uint64_t)0, /* low address */
143 (uint64_t)0xffffffffffffffffULL, /* high address */
144 (uint64_t)0x7ffffff, /* address counter max */
145 #if defined sparc64 || defined __sparcv9
146 (uint64_t)4096, /* alignment */
147 #else
148 (uint64_t)0x80, /* alignment */
149 #endif
150 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
151 (uint32_t)0x1, /* minimum transfer size */
152 (uint64_t)0x7fffffff, /* maximum transfer size */
153 #if defined sparc64 || defined __sparcv9
154 UINT64_MAX, /* maximum segment size */
155 #else
156 (uint64_t)0xfff, /* maximum segment size */
157 #endif
158 1, /* scatter/gather list length */
159 1, /* granularity */
160 0 /* attribute flags */
161 };
162
163 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
164 DMA_ATTR_V0, /* version number. */
165 (uint64_t)0, /* low address */
166 (uint64_t)0xffffffffffffffffULL, /* high address */
167 (uint64_t)0x7ffffff, /* address counter max */
168 (uint64_t)1, /* alignment */
169 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
170 (uint32_t)0x1, /* minimum transfer size */
171 (uint64_t)0x7fffffff, /* maximum transfer size */
172 UINT64_MAX, /* maximum segment size */
173 INT32_MAX, /* scatter/gather list length */
174 1, /* granularity */
175 0 /* attribute flags */
176 };
177
178 #if defined sparc64 || defined __sparcv9
179 #define WC 0
180 #else
181 #define WC 1
182 #endif
183
184 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
185 DDI_DEVICE_ATTR_V0, /* version */
186 DDI_NEVERSWAP_ACC, /* endian flash */
187 #if WC
188 DDI_MERGING_OK_ACC /* data order */
189 #else
190 DDI_STRICTORDER_ACC
191 #endif
192 };
193
194 static void myri10ge_watchdog(void *arg);
195
196 #ifdef MYRICOM_PRIV
197 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
198 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU
199 #else
200 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
201 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU
202 #endif
203 int myri10ge_bigbufs_initial = 1024;
204 int myri10ge_bigbufs_max = 4096;
205
206
207 caddr_t
myri10ge_dma_alloc(dev_info_t * dip,size_t len,ddi_dma_attr_t * attr,ddi_device_acc_attr_t * accattr,uint_t alloc_flags,int bind_flags,struct myri10ge_dma_stuff * dma,int warn,int (* wait)(caddr_t))208 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
209 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr,
210 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
211 int warn, int (*wait)(caddr_t))
212 {
213 caddr_t kaddr;
214 size_t real_length;
215 ddi_dma_cookie_t cookie;
216 uint_t count;
217 int err;
218
219 err = ddi_dma_alloc_handle(dip, attr, wait,
220 NULL, &dma->handle);
221 if (err != DDI_SUCCESS) {
222 if (warn)
223 cmn_err(CE_WARN,
224 "myri10ge: ddi_dma_alloc_handle failed\n");
225 goto abort_with_nothing;
226 }
227
228 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
229 wait, NULL, &kaddr, &real_length,
230 &dma->acc_handle);
231 if (err != DDI_SUCCESS) {
232 if (warn)
233 cmn_err(CE_WARN,
234 "myri10ge: ddi_dma_mem_alloc failed\n");
235 goto abort_with_handle;
236 }
237
238 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
239 bind_flags, wait, NULL, &cookie, &count);
240
241 if (err != DDI_SUCCESS) {
242 if (warn)
243 cmn_err(CE_WARN,
244 "myri10ge: ddi_dma_addr_bind_handle failed\n");
245 goto abort_with_mem;
246 }
247
248 if (count != 1) {
249 if (warn)
250 cmn_err(CE_WARN,
251 "myri10ge: got too many dma segments ");
252 goto abort_with_bind;
253 }
254 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
255 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
256 return (kaddr);
257
258 abort_with_bind:
259 (void) ddi_dma_unbind_handle(dma->handle);
260
261 abort_with_mem:
262 ddi_dma_mem_free(&dma->acc_handle);
263
264 abort_with_handle:
265 ddi_dma_free_handle(&dma->handle);
266 abort_with_nothing:
267 if (warn) {
268 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n ");
269 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
270 (void*) dip, len, (void*) attr);
271 cmn_err(CE_WARN,
272 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
273 (void*) accattr, alloc_flags);
274 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p",
275 bind_flags, (void*) dma);
276 }
277 return (NULL);
278
279 }
280
281 void
myri10ge_dma_free(struct myri10ge_dma_stuff * dma)282 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
283 {
284 (void) ddi_dma_unbind_handle(dma->handle);
285 ddi_dma_mem_free(&dma->acc_handle);
286 ddi_dma_free_handle(&dma->handle);
287 }
288
289 static inline void
myri10ge_pio_copy32(void * to,uint32_t * from32,size_t size)290 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
291 {
292 register volatile uint32_t *to32;
293 size_t i;
294
295 to32 = (volatile uint32_t *) to;
296 for (i = (size / 4); i; i--) {
297 *to32 = *from32;
298 to32++;
299 from32++;
300 }
301 }
302
303 #if defined(_LP64)
304 static inline void
myri10ge_pio_copy64(void * to,uint64_t * from64,size_t size)305 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
306 {
307 register volatile uint64_t *to64;
308 size_t i;
309
310 to64 = (volatile uint64_t *) to;
311 for (i = (size / 8); i; i--) {
312 *to64 = *from64;
313 to64++;
314 from64++;
315 }
316 }
317 #endif
318
319 /*
320 * This routine copies memory from the host to the NIC.
321 * The "size" argument must always be a multiple of
322 * the size of long (4 or 8 bytes), and to/from must also
323 * be naturally aligned.
324 */
325 static inline void
myri10ge_pio_copy(void * to,void * from,size_t size)326 myri10ge_pio_copy(void *to, void *from, size_t size)
327 {
328 #if !defined(_LP64)
329 ASSERT((size % 4) == 0);
330 myri10ge_pio_copy32(to, (uint32_t *)from, size);
331 #else
332 ASSERT((size % 8) == 0);
333 myri10ge_pio_copy64(to, (uint64_t *)from, size);
334 #endif
335 }
336
337
338 /*
339 * Due to various bugs in Solaris (especially bug 6186772 where the
340 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
341 * than two elements), and the design bug where hardware checksums are
342 * ignored on mblk chains with more than 2 elements, we need to
343 * allocate private pool of physically contiguous receive buffers.
344 */
345
346 static void
myri10ge_jpool_init(struct myri10ge_slice_state * ss)347 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
348 {
349 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
350
351 bzero(jpool, sizeof (*jpool));
352 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
353 ss->mgp->icookie);
354 jpool->head = NULL;
355 }
356
357 static void
myri10ge_jpool_fini(struct myri10ge_slice_state * ss)358 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
359 {
360 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
361
362 if (jpool->head != NULL) {
363 cmn_err(CE_WARN,
364 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
365 ss->mgp->name);
366 }
367 mutex_destroy(&jpool->mtx);
368 }
369
370
371 /*
372 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
373 * at most 32 bytes at a time, so as to avoid involving the software
374 * pio handler in the nic. We re-write the first segment's low
375 * DMA address to mark it valid only after we write the entire chunk
376 * in a burst
377 */
378 static inline void
myri10ge_submit_8rx(mcp_kreq_ether_recv_t * dst,mcp_kreq_ether_recv_t * src)379 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
380 {
381 src->addr_low |= BE_32(1);
382 myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
383 mb();
384 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
385 mb();
386 src->addr_low &= ~(BE_32(1));
387 dst->addr_low = src->addr_low;
388 mb();
389 }
390
391 static void
myri10ge_pull_jpool(struct myri10ge_slice_state * ss)392 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
393 {
394 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
395 struct myri10ge_jpool_entry *jtail, *j, *jfree;
396 volatile void *putp;
397 int i;
398
399 /* find tail */
400 jtail = NULL;
401 if (jpool->head != NULL) {
402 j = jpool->head;
403 while (j->next != NULL)
404 j = j->next;
405 jtail = j;
406 }
407
408 /*
409 * iterate over all per-CPU caches, and add contents into
410 * jpool
411 */
412 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
413 /* take per-CPU free list */
414 putp = &jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
415 jfree = atomic_swap_ptr(putp, NULL);
416 if (jfree == NULL)
417 continue;
418
419 /* append to pool */
420 if (jtail == NULL) {
421 jpool->head = jfree;
422 } else {
423 jtail->next = jfree;
424 }
425 j = jfree;
426 while (j->next != NULL)
427 j = j->next;
428 jtail = j;
429 }
430 }
431
432 /*
433 * Transfers buffers from the free pool to the nic
434 * Must be called holding the jpool mutex.
435 */
436
437 static inline void
myri10ge_restock_jumbos(struct myri10ge_slice_state * ss)438 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
439 {
440 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
441 struct myri10ge_jpool_entry *j;
442 myri10ge_rx_ring_t *rx;
443 int i, idx, limit;
444
445 rx = &ss->rx_big;
446 limit = ss->j_rx_cnt + (rx->mask + 1);
447
448 for (i = rx->cnt; i != limit; i++) {
449 idx = i & (rx->mask);
450 j = jpool->head;
451 if (j == NULL) {
452 myri10ge_pull_jpool(ss);
453 j = jpool->head;
454 if (j == NULL) {
455 break;
456 }
457 }
458 jpool->head = j->next;
459 rx->info[idx].j = j;
460 rx->shadow[idx].addr_low = j->dma.low;
461 rx->shadow[idx].addr_high = j->dma.high;
462 /* copy 4 descriptors (32-bytes) to the mcp at a time */
463 if ((idx & 7) == 7) {
464 myri10ge_submit_8rx(&rx->lanai[idx - 7],
465 &rx->shadow[idx - 7]);
466 }
467 }
468 rx->cnt = i;
469 }
470
471 /*
472 * Transfer buffers from the nic to the free pool.
473 * Should be called holding the jpool mutex
474 */
475
476 static inline void
myri10ge_unstock_jumbos(struct myri10ge_slice_state * ss)477 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
478 {
479 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
480 struct myri10ge_jpool_entry *j;
481 myri10ge_rx_ring_t *rx;
482 int i;
483
484 mutex_enter(&jpool->mtx);
485 rx = &ss->rx_big;
486
487 for (i = 0; i < rx->mask + 1; i++) {
488 j = rx->info[i].j;
489 rx->info[i].j = NULL;
490 if (j == NULL)
491 continue;
492 j->next = jpool->head;
493 jpool->head = j;
494 }
495 mutex_exit(&jpool->mtx);
496
497 }
498
499
500 /*
501 * Free routine which is called when the mblk allocated via
502 * esballoc() is freed. Here we return the jumbo buffer
503 * to the free pool, and possibly pass some jumbo buffers
504 * to the nic
505 */
506
507 static void
myri10ge_jfree_rtn(void * arg)508 myri10ge_jfree_rtn(void *arg)
509 {
510 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
511 struct myri10ge_jpool_stuff *jpool;
512 volatile uintptr_t *putp;
513 uintptr_t old, new;
514
515 jpool = &j->ss->jpool;
516
517 /* prepend buffer locklessly to per-CPU freelist */
518 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
519 new = (uintptr_t)j;
520 do {
521 old = *putp;
522 j->next = (void *)old;
523 } while (atomic_cas_ulong(putp, old, new) != old);
524 }
525
526 static void
myri10ge_remove_jbuf(struct myri10ge_jpool_entry * j)527 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
528 {
529 (void) ddi_dma_unbind_handle(j->dma_handle);
530 ddi_dma_mem_free(&j->acc_handle);
531 ddi_dma_free_handle(&j->dma_handle);
532 kmem_free(j, sizeof (*j));
533 }
534
535
536 /*
537 * Allocates one physically contiguous descriptor
538 * and add it to the jumbo buffer pool.
539 */
540
541 static int
myri10ge_add_jbuf(struct myri10ge_slice_state * ss)542 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
543 {
544 struct myri10ge_jpool_entry *j;
545 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
546 ddi_dma_attr_t *rx_dma_attr;
547 size_t real_length;
548 ddi_dma_cookie_t cookie;
549 uint_t count;
550 int err;
551
552 if (myri10ge_mtu < 2048)
553 rx_dma_attr = &myri10ge_rx_std_dma_attr;
554 else
555 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
556
557 again:
558 j = (struct myri10ge_jpool_entry *)
559 kmem_alloc(sizeof (*j), KM_SLEEP);
560 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
561 DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
562 if (err != DDI_SUCCESS)
563 goto abort_with_j;
564
565 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
566 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
567 NULL, &j->buf, &real_length, &j->acc_handle);
568 if (err != DDI_SUCCESS)
569 goto abort_with_handle;
570
571 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
572 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
573 NULL, &cookie, &count);
574 if (err != DDI_SUCCESS)
575 goto abort_with_mem;
576
577 /*
578 * Make certain std MTU buffers do not cross a 4KB boundary:
579 *
580 * Setting dma_attr_align=4096 will do this, but the system
581 * will only allocate 1 RX buffer per 4KB page, rather than 2.
582 * Setting dma_attr_granular=4096 *seems* to work around this,
583 * but I'm paranoid about future systems no longer honoring
584 * this, so fall back to the safe, but memory wasting way if a
585 * buffer crosses a 4KB boundary.
586 */
587
588 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
589 rx_dma_attr->dma_attr_align != 4096) {
590 uint32_t start, end;
591
592 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
593 end = start + myri10ge_mtu;
594 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
595 printf("std buffer crossed a 4KB boundary!\n");
596 myri10ge_remove_jbuf(j);
597 rx_dma_attr->dma_attr_align = 4096;
598 rx_dma_attr->dma_attr_seg = UINT64_MAX;
599 goto again;
600 }
601 }
602
603 j->dma.low =
604 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
605 j->dma.high =
606 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
607 j->ss = ss;
608
609
610 j->free_func.free_func = myri10ge_jfree_rtn;
611 j->free_func.free_arg = (char *)j;
612 mutex_enter(&jpool->mtx);
613 j->next = jpool->head;
614 jpool->head = j;
615 jpool->num_alloc++;
616 mutex_exit(&jpool->mtx);
617 return (0);
618
619 abort_with_mem:
620 ddi_dma_mem_free(&j->acc_handle);
621
622 abort_with_handle:
623 ddi_dma_free_handle(&j->dma_handle);
624
625 abort_with_j:
626 kmem_free(j, sizeof (*j));
627
628 /*
629 * If an allocation failed, perhaps it failed because it could
630 * not satisfy granularity requirement. Disable that, and
631 * try agin.
632 */
633 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
634 rx_dma_attr->dma_attr_align != 4096) {
635 cmn_err(CE_NOTE,
636 "!alloc failed, reverting to gran=1\n");
637 rx_dma_attr->dma_attr_align = 4096;
638 rx_dma_attr->dma_attr_seg = UINT64_MAX;
639 goto again;
640 }
641 return (err);
642 }
643
644 static int
myri10ge_jfree_cnt(struct myri10ge_jpool_stuff * jpool)645 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
646 {
647 int i;
648 struct myri10ge_jpool_entry *j;
649
650 mutex_enter(&jpool->mtx);
651 j = jpool->head;
652 i = 0;
653 while (j != NULL) {
654 i++;
655 j = j->next;
656 }
657 mutex_exit(&jpool->mtx);
658 return (i);
659 }
660
661 static int
myri10ge_add_jbufs(struct myri10ge_slice_state * ss,int num,int total)662 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
663 {
664 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
665 int allocated = 0;
666 int err;
667 int needed;
668
669 /*
670 * if total is set, user wants "num" jbufs in the pool,
671 * otherwise the user wants to "num" additional jbufs
672 * added to the pool
673 */
674 if (total && jpool->num_alloc) {
675 allocated = myri10ge_jfree_cnt(jpool);
676 needed = num - allocated;
677 } else {
678 needed = num;
679 }
680
681 while (needed > 0) {
682 needed--;
683 err = myri10ge_add_jbuf(ss);
684 if (err == 0) {
685 allocated++;
686 }
687 }
688 return (allocated);
689 }
690
691 static void
myri10ge_remove_jbufs(struct myri10ge_slice_state * ss)692 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
693 {
694 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
695 struct myri10ge_jpool_entry *j;
696
697 mutex_enter(&jpool->mtx);
698 myri10ge_pull_jpool(ss);
699 while (jpool->head != NULL) {
700 jpool->num_alloc--;
701 j = jpool->head;
702 jpool->head = j->next;
703 myri10ge_remove_jbuf(j);
704 }
705 mutex_exit(&jpool->mtx);
706 }
707
708 static void
myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state * ss)709 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
710 {
711 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
712 struct myri10ge_jpool_entry *j = NULL;
713 caddr_t ptr;
714 uint32_t dma_low, dma_high;
715 int idx, len;
716 unsigned int alloc_size;
717
718 dma_low = dma_high = len = 0;
719 alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
720 ptr = NULL;
721 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
722 /* Allocate a jumbo frame and carve it into small frames */
723 if (len < alloc_size) {
724 mutex_enter(&jpool->mtx);
725 /* remove jumbo from freelist */
726 j = jpool->head;
727 jpool->head = j->next;
728 /* place it onto small list */
729 j->next = ss->small_jpool;
730 ss->small_jpool = j;
731 mutex_exit(&jpool->mtx);
732 len = myri10ge_mtu;
733 dma_low = ntohl(j->dma.low);
734 dma_high = ntohl(j->dma.high);
735 ptr = j->buf;
736 }
737 ss->rx_small.info[idx].ptr = ptr;
738 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
739 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
740 len -= alloc_size;
741 ptr += alloc_size;
742 dma_low += alloc_size;
743 }
744 }
745
746 /*
747 * Return the jumbo bufs we carved up for small to the jumbo pool
748 */
749
750 static void
myri10ge_release_small_jbufs(struct myri10ge_slice_state * ss)751 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
752 {
753 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
754 struct myri10ge_jpool_entry *j = NULL;
755
756 mutex_enter(&jpool->mtx);
757 while (ss->small_jpool != NULL) {
758 j = ss->small_jpool;
759 ss->small_jpool = j->next;
760 j->next = jpool->head;
761 jpool->head = j;
762 }
763 mutex_exit(&jpool->mtx);
764 ss->jbufs_for_smalls = 0;
765 }
766
767 static int
myri10ge_add_tx_handle(struct myri10ge_slice_state * ss)768 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
769 {
770 myri10ge_tx_ring_t *tx = &ss->tx;
771 struct myri10ge_priv *mgp = ss->mgp;
772 struct myri10ge_tx_dma_handle *handle;
773 int err;
774
775 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
776 err = ddi_dma_alloc_handle(mgp->dip,
777 &myri10ge_tx_dma_attr,
778 DDI_DMA_SLEEP, NULL,
779 &handle->h);
780 if (err) {
781 static int limit = 0;
782 if (limit == 0)
783 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
784 mgp->name);
785 limit++;
786 kmem_free(handle, sizeof (*handle));
787 return (err);
788 }
789 mutex_enter(&tx->handle_lock);
790 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
791 handle->next = tx->free_tx_handles;
792 tx->free_tx_handles = handle;
793 mutex_exit(&tx->handle_lock);
794 return (DDI_SUCCESS);
795 }
796
797 static void
myri10ge_remove_tx_handles(struct myri10ge_slice_state * ss)798 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
799 {
800 myri10ge_tx_ring_t *tx = &ss->tx;
801 struct myri10ge_tx_dma_handle *handle;
802 mutex_enter(&tx->handle_lock);
803
804 handle = tx->free_tx_handles;
805 while (handle != NULL) {
806 tx->free_tx_handles = handle->next;
807 ddi_dma_free_handle(&handle->h);
808 kmem_free(handle, sizeof (*handle));
809 handle = tx->free_tx_handles;
810 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
811 }
812 mutex_exit(&tx->handle_lock);
813 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
814 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
815 ss->mgp->name,
816 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
817 }
818 }
819
820 static void
myri10ge_free_tx_handles(myri10ge_tx_ring_t * tx,struct myri10ge_tx_dma_handle_head * list)821 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
822 struct myri10ge_tx_dma_handle_head *list)
823 {
824 mutex_enter(&tx->handle_lock);
825 list->tail->next = tx->free_tx_handles;
826 tx->free_tx_handles = list->head;
827 mutex_exit(&tx->handle_lock);
828 }
829
830 static void
myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t * tx,struct myri10ge_tx_dma_handle * handle)831 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
832 struct myri10ge_tx_dma_handle *handle)
833 {
834 struct myri10ge_tx_dma_handle_head list;
835
836 if (handle == NULL)
837 return;
838 list.head = handle;
839 list.tail = handle;
840 while (handle != NULL) {
841 list.tail = handle;
842 handle = handle->next;
843 }
844 myri10ge_free_tx_handles(tx, &list);
845 }
846
847 static int
myri10ge_alloc_tx_handles(struct myri10ge_slice_state * ss,int count,struct myri10ge_tx_dma_handle ** ret)848 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
849 struct myri10ge_tx_dma_handle **ret)
850 {
851 myri10ge_tx_ring_t *tx = &ss->tx;
852 struct myri10ge_tx_dma_handle *handle;
853 int err, i;
854
855 mutex_enter(&tx->handle_lock);
856 for (i = 0; i < count; i++) {
857 handle = tx->free_tx_handles;
858 while (handle == NULL) {
859 mutex_exit(&tx->handle_lock);
860 err = myri10ge_add_tx_handle(ss);
861 if (err != DDI_SUCCESS) {
862 goto abort_with_handles;
863 }
864 mutex_enter(&tx->handle_lock);
865 handle = tx->free_tx_handles;
866 }
867 tx->free_tx_handles = handle->next;
868 handle->next = *ret;
869 *ret = handle;
870 }
871 mutex_exit(&tx->handle_lock);
872 return (DDI_SUCCESS);
873
874 abort_with_handles:
875 myri10ge_free_tx_handle_slist(tx, *ret);
876 return (err);
877 }
878
879
880 /*
881 * Frees DMA resources associated with the send ring
882 */
883 static void
myri10ge_unprepare_tx_ring(struct myri10ge_slice_state * ss)884 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
885 {
886 myri10ge_tx_ring_t *tx;
887 struct myri10ge_tx_dma_handle_head handles;
888 size_t bytes;
889 int idx;
890
891 tx = &ss->tx;
892 handles.head = NULL;
893 handles.tail = NULL;
894 for (idx = 0; idx < ss->tx.mask + 1; idx++) {
895 if (tx->info[idx].m) {
896 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
897 handles.head = tx->info[idx].handle;
898 if (handles.tail == NULL)
899 handles.tail = tx->info[idx].handle;
900 freeb(tx->info[idx].m);
901 tx->info[idx].m = 0;
902 tx->info[idx].handle = 0;
903 }
904 tx->cp[idx].va = NULL;
905 myri10ge_dma_free(&tx->cp[idx].dma);
906 }
907 bytes = sizeof (*tx->cp) * (tx->mask + 1);
908 kmem_free(tx->cp, bytes);
909 tx->cp = NULL;
910 if (handles.head != NULL)
911 myri10ge_free_tx_handles(tx, &handles);
912 myri10ge_remove_tx_handles(ss);
913 }
914
915 /*
916 * Allocates DMA handles associated with the send ring
917 */
918 static inline int
myri10ge_prepare_tx_ring(struct myri10ge_slice_state * ss)919 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
920 {
921 struct myri10ge_tx_dma_handle *handles;
922 int h;
923 size_t bytes;
924
925 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
926 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
927 if (ss->tx.cp == NULL) {
928 cmn_err(CE_WARN,
929 "%s: Failed to allocate tx copyblock storage\n",
930 ss->mgp->name);
931 return (DDI_FAILURE);
932 }
933
934
935 /* allocate the TX copyblocks */
936 for (h = 0; h < ss->tx.mask + 1; h++) {
937 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
938 4096, &myri10ge_rx_jumbo_dma_attr,
939 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
940 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
941 DDI_DMA_DONTWAIT);
942 if (ss->tx.cp[h].va == NULL) {
943 cmn_err(CE_WARN, "%s: Failed to allocate tx "
944 "copyblock %d\n", ss->mgp->name, h);
945 goto abort_with_copyblocks;
946 }
947 }
948 /* pre-allocate transmit handles */
949 handles = NULL;
950 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
951 &handles);
952 if (handles != NULL)
953 myri10ge_free_tx_handle_slist(&ss->tx, handles);
954
955 return (DDI_SUCCESS);
956
957 abort_with_copyblocks:
958 while (h > 0) {
959 h--;
960 myri10ge_dma_free(&ss->tx.cp[h].dma);
961 }
962
963 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
964 kmem_free(ss->tx.cp, bytes);
965 ss->tx.cp = NULL;
966 return (DDI_FAILURE);
967 }
968
969 /*
970 * The eeprom strings on the lanaiX have the format
971 * SN=x\0
972 * MAC=x:x:x:x:x:x\0
973 * PT:ddd mmm xx xx:xx:xx xx\0
974 * PV:ddd mmm xx xx:xx:xx xx\0
975 */
976 static int
myri10ge_read_mac_addr(struct myri10ge_priv * mgp)977 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
978 {
979 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
980 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
981 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
982 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
983
984 char *ptr, *limit;
985 int i, hv, lv;
986
987 ptr = mgp->eeprom_strings;
988 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
989
990 while (*ptr != '\0' && ptr < limit) {
991 if (memcmp(ptr, "MAC=", 4) == 0) {
992 ptr += 4;
993 if (myri10ge_verbose)
994 printf("%s: mac address = %s\n", mgp->name,
995 ptr);
996 mgp->mac_addr_string = ptr;
997 for (i = 0; i < 6; i++) {
998 if ((ptr + 2) > limit)
999 goto abort;
1000
1001 if (*(ptr+1) == ':') {
1002 hv = 0;
1003 lv = myri10ge_digit(*ptr); ptr++;
1004 } else {
1005 hv = myri10ge_digit(*ptr); ptr++;
1006 lv = myri10ge_digit(*ptr); ptr++;
1007 }
1008 mgp->mac_addr[i] = (hv << 4) | lv;
1009 ptr++;
1010 }
1011 }
1012 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1013 ptr += 3;
1014 mgp->sn_str = (char *)ptr;
1015 }
1016 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1017 ptr += 3;
1018 mgp->pc_str = (char *)ptr;
1019 }
1020 MYRI10GE_NEXT_STRING(ptr);
1021 }
1022
1023 return (0);
1024
1025 abort:
1026 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1027 return (ENXIO);
1028 }
1029
1030
1031 /*
1032 * Determine the register set containing the PCI resource we
1033 * want to map: the memory-mappable part of the interface. We do
1034 * this by scanning the DDI "reg" property of the interface,
1035 * which is an array of mx_ddi_reg_set structures.
1036 */
1037 static int
myri10ge_reg_set(dev_info_t * dip,int * reg_set,int * span,unsigned long * busno,unsigned long * devno,unsigned long * funcno)1038 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1039 unsigned long *busno, unsigned long *devno,
1040 unsigned long *funcno)
1041 {
1042
1043 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1044 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1045 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1046 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1047 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1048 #define PCI_ADDR_HIGH(ip) (ip[1])
1049 #define PCI_ADDR_LOW(ip) (ip[2])
1050 #define PCI_SPAN_HIGH(ip) (ip[3])
1051 #define PCI_SPAN_LOW(ip) (ip[4])
1052
1053 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1054 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1055
1056 int *data, i, *rs;
1057 uint32_t nelementsp;
1058
1059 #ifdef MYRI10GE_REGSET_VERBOSE
1060 char *address_space_name[] = { "Configuration Space",
1061 "I/O Space",
1062 "32-bit Memory Space",
1063 "64-bit Memory Space"
1064 };
1065 #endif
1066
1067 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1068 "reg", &data, &nelementsp) != DDI_SUCCESS) {
1069 printf("Could not determine register set.\n");
1070 return (ENXIO);
1071 }
1072
1073 #ifdef MYRI10GE_REGSET_VERBOSE
1074 printf("There are %d register sets.\n", nelementsp / 5);
1075 #endif
1076 if (!nelementsp) {
1077 printf("Didn't find any \"reg\" properties.\n");
1078 ddi_prop_free(data);
1079 return (ENODEV);
1080 }
1081
1082 /* Scan for the register number. */
1083 rs = &data[0];
1084 *busno = BUS_NUMBER(rs);
1085 *devno = DEVICE_NUMBER(rs);
1086 *funcno = FUNCTION_NUMBER(rs);
1087
1088 #ifdef MYRI10GE_REGSET_VERBOSE
1089 printf("*** Scanning for register number.\n");
1090 #endif
1091 for (i = 0; i < nelementsp / 5; i++) {
1092 rs = &data[5 * i];
1093 #ifdef MYRI10GE_REGSET_VERBOSE
1094 printf("Examining register set %d:\n", i);
1095 printf(" Register number = %d.\n", REGISTER_NUMBER(rs));
1096 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs));
1097 printf(" Device number = %d.\n", DEVICE_NUMBER(rs));
1098 printf(" Bus number = %d.\n", BUS_NUMBER(rs));
1099 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs),
1100 address_space_name[ADDRESS_SPACE(rs)]);
1101 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1102 PCI_ADDR_LOW(rs));
1103 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1104 PCI_SPAN_LOW(rs));
1105 #endif
1106 /* We are looking for a memory property. */
1107
1108 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1109 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1110 *reg_set = i;
1111
1112 #ifdef MYRI10GE_REGSET_VERBOSE
1113 printf("%s uses register set %d.\n",
1114 address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1115 #endif
1116
1117 *span = (PCI_SPAN_LOW(rs));
1118 #ifdef MYRI10GE_REGSET_VERBOSE
1119 printf("Board span is 0x%x\n", *span);
1120 #endif
1121 break;
1122 }
1123 }
1124
1125 ddi_prop_free(data);
1126
1127 /* If no match, fail. */
1128 if (i >= nelementsp / 5) {
1129 return (EIO);
1130 }
1131
1132 return (0);
1133 }
1134
1135
1136 static int
myri10ge_load_firmware_from_zlib(struct myri10ge_priv * mgp,uint32_t * limit)1137 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1138 {
1139 void *inflate_buffer;
1140 int rv, status;
1141 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1142 size_t destlen;
1143 mcp_gen_header_t *hdr;
1144 unsigned hdr_offset, i;
1145
1146
1147 *limit = 0; /* -Wuninitialized */
1148 status = 0;
1149
1150 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1151 if (!inflate_buffer) {
1152 cmn_err(CE_WARN,
1153 "%s: Could not allocate buffer to inflate mcp\n",
1154 mgp->name);
1155 return (ENOMEM);
1156 }
1157
1158 destlen = sram_size;
1159 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1160 mgp->eth_z8e_length);
1161
1162 if (rv != Z_OK) {
1163 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1164 mgp->name, z_strerror(rv));
1165 status = ENXIO;
1166 goto abort;
1167 }
1168
1169 *limit = (uint32_t)destlen;
1170
1171 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1172 MCP_HEADER_PTR_OFFSET));
1173 hdr = (void *)((char *)inflate_buffer + hdr_offset);
1174 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1175 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1176 ntohl(hdr->mcp_type));
1177 status = EIO;
1178 goto abort;
1179 }
1180
1181 /* save firmware version for kstat */
1182 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1183 if (myri10ge_verbose)
1184 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1185
1186 /* Copy the inflated firmware to NIC SRAM. */
1187 for (i = 0; i < *limit; i += 256) {
1188 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1189 (char *)inflate_buffer + i,
1190 min(256U, (unsigned)(*limit - i)));
1191 mb();
1192 (void) *(int *)(void *)mgp->sram;
1193 mb();
1194 }
1195
1196 abort:
1197 kmem_free(inflate_buffer, sram_size);
1198
1199 return (status);
1200
1201 }
1202
1203
1204 int
myri10ge_send_cmd(struct myri10ge_priv * mgp,uint32_t cmd,myri10ge_cmd_t * data)1205 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1206 myri10ge_cmd_t *data)
1207 {
1208 mcp_cmd_t *buf;
1209 char buf_bytes[sizeof (*buf) + 8];
1210 volatile mcp_cmd_response_t *response = mgp->cmd;
1211 volatile char *cmd_addr =
1212 (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1213 int sleep_total = 0;
1214
1215 /* ensure buf is aligned to 8 bytes */
1216 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1217
1218 buf->data0 = htonl(data->data0);
1219 buf->data1 = htonl(data->data1);
1220 buf->data2 = htonl(data->data2);
1221 buf->cmd = htonl(cmd);
1222 buf->response_addr.low = mgp->cmd_dma.low;
1223 buf->response_addr.high = mgp->cmd_dma.high;
1224 mutex_enter(&mgp->cmd_lock);
1225 response->result = 0xffffffff;
1226 mb();
1227
1228 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1229
1230 /* wait up to 20ms */
1231 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1232 mb();
1233 if (response->result != 0xffffffff) {
1234 if (response->result == 0) {
1235 data->data0 = ntohl(response->data);
1236 mutex_exit(&mgp->cmd_lock);
1237 return (0);
1238 } else if (ntohl(response->result)
1239 == MXGEFW_CMD_UNKNOWN) {
1240 mutex_exit(&mgp->cmd_lock);
1241 return (ENOSYS);
1242 } else if (ntohl(response->result)
1243 == MXGEFW_CMD_ERROR_UNALIGNED) {
1244 mutex_exit(&mgp->cmd_lock);
1245 return (E2BIG);
1246 } else {
1247 cmn_err(CE_WARN,
1248 "%s: command %d failed, result = %d\n",
1249 mgp->name, cmd, ntohl(response->result));
1250 mutex_exit(&mgp->cmd_lock);
1251 return (ENXIO);
1252 }
1253 }
1254 drv_usecwait(1000);
1255 }
1256 mutex_exit(&mgp->cmd_lock);
1257 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1258 mgp->name, cmd, ntohl(response->result));
1259 return (EAGAIN);
1260 }
1261
1262 /*
1263 * Enable or disable periodic RDMAs from the host to make certain
1264 * chipsets resend dropped PCIe messages
1265 */
1266
1267 static void
myri10ge_dummy_rdma(struct myri10ge_priv * mgp,int enable)1268 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1269 {
1270 char buf_bytes[72];
1271 volatile uint32_t *confirm;
1272 volatile char *submit;
1273 uint32_t *buf;
1274 int i;
1275
1276 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1277
1278 /* clear confirmation addr */
1279 confirm = (volatile uint32_t *)mgp->cmd;
1280 *confirm = 0;
1281 mb();
1282
1283 /*
1284 * send an rdma command to the PCIe engine, and wait for the
1285 * response in the confirmation address. The firmware should
1286 * write a -1 there to indicate it is alive and well
1287 */
1288
1289 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1290 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1291 buf[2] = htonl(0xffffffff); /* confirm data */
1292 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */
1293 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */
1294 buf[5] = htonl(enable); /* enable? */
1295
1296
1297 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1298
1299 myri10ge_pio_copy((char *)submit, buf, 64);
1300 mb();
1301 drv_usecwait(1000);
1302 mb();
1303 i = 0;
1304 while (*confirm != 0xffffffff && i < 20) {
1305 drv_usecwait(1000);
1306 i++;
1307 }
1308 if (*confirm != 0xffffffff) {
1309 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1310 mgp->name,
1311 (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1312 }
1313 }
1314
1315 static int
myri10ge_load_firmware(struct myri10ge_priv * mgp)1316 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1317 {
1318 myri10ge_cmd_t cmd;
1319 volatile uint32_t *confirm;
1320 volatile char *submit;
1321 char buf_bytes[72];
1322 uint32_t *buf, size;
1323 int status, i;
1324
1325 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1326
1327 status = myri10ge_load_firmware_from_zlib(mgp, &size);
1328 if (status) {
1329 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1330 return (status);
1331 }
1332
1333 /* clear confirmation addr */
1334 confirm = (volatile uint32_t *)mgp->cmd;
1335 *confirm = 0;
1336 mb();
1337
1338 /*
1339 * send a reload command to the bootstrap MCP, and wait for the
1340 * response in the confirmation address. The firmware should
1341 * write a -1 there to indicate it is alive and well
1342 */
1343
1344 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1345 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1346 buf[2] = htonl(0xffffffff); /* confirm data */
1347
1348 /*
1349 * FIX: All newest firmware should un-protect the bottom of
1350 * the sram before handoff. However, the very first interfaces
1351 * do not. Therefore the handoff copy must skip the first 8 bytes
1352 */
1353 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1354 buf[4] = htonl(size - 8); /* length of code */
1355 buf[5] = htonl(8); /* where to copy to */
1356 buf[6] = htonl(0); /* where to jump to */
1357
1358 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1359
1360 myri10ge_pio_copy((char *)submit, buf, 64);
1361 mb();
1362 drv_usecwait(1000);
1363 mb();
1364 i = 0;
1365 while (*confirm != 0xffffffff && i < 1000) {
1366 drv_usecwait(1000);
1367 i++;
1368 }
1369 if (*confirm != 0xffffffff) {
1370 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1371 mgp->name, (void *) confirm, *confirm);
1372
1373 return (ENXIO);
1374 }
1375 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1376 if (status != 0) {
1377 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1378 mgp->name);
1379 return (ENXIO);
1380 }
1381
1382 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1383 myri10ge_dummy_rdma(mgp, 1);
1384 return (0);
1385 }
1386
1387 static int
myri10ge_m_unicst(void * arg,const uint8_t * addr)1388 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1389 {
1390 struct myri10ge_priv *mgp = arg;
1391 myri10ge_cmd_t cmd;
1392 int status;
1393
1394 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1395 | (addr[2] << 8) | addr[3]);
1396
1397 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1398
1399 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1400 if (status == 0 && (addr != mgp->mac_addr))
1401 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1402
1403 return (status);
1404 }
1405
1406 static int
myri10ge_change_pause(struct myri10ge_priv * mgp,int pause)1407 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1408 {
1409 myri10ge_cmd_t cmd;
1410 int status;
1411
1412 if (pause)
1413 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1414 &cmd);
1415 else
1416 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1417 &cmd);
1418
1419 if (status) {
1420 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1421 mgp->name);
1422 return (ENXIO);
1423 }
1424 mgp->pause = pause;
1425 return (0);
1426 }
1427
1428 static void
myri10ge_change_promisc(struct myri10ge_priv * mgp,int promisc)1429 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1430 {
1431 myri10ge_cmd_t cmd;
1432 int status;
1433
1434 if (promisc)
1435 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1436 else
1437 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1438
1439 if (status) {
1440 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1441 mgp->name);
1442 }
1443 }
1444
1445 static int
myri10ge_dma_test(struct myri10ge_priv * mgp,int test_type)1446 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1447 {
1448 myri10ge_cmd_t cmd;
1449 int status;
1450 uint32_t len;
1451 void *dmabench;
1452 struct myri10ge_dma_stuff dmabench_dma;
1453 char *test = " ";
1454
1455 /*
1456 * Run a small DMA test.
1457 * The magic multipliers to the length tell the firmware
1458 * tp do DMA read, write, or read+write tests. The
1459 * results are returned in cmd.data0. The upper 16
1460 * bits or the return is the number of transfers completed.
1461 * The lower 16 bits is the time in 0.5us ticks that the
1462 * transfers took to complete
1463 */
1464
1465 len = mgp->tx_boundary;
1466
1467 dmabench = myri10ge_dma_alloc(mgp->dip, len,
1468 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1469 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING,
1470 &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1471 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1472 if (dmabench == NULL) {
1473 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1474 return (ENOMEM);
1475 }
1476
1477 cmd.data0 = ntohl(dmabench_dma.low);
1478 cmd.data1 = ntohl(dmabench_dma.high);
1479 cmd.data2 = len * 0x10000;
1480 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1481 if (status != 0) {
1482 test = "read";
1483 goto abort;
1484 }
1485 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1486
1487 cmd.data0 = ntohl(dmabench_dma.low);
1488 cmd.data1 = ntohl(dmabench_dma.high);
1489 cmd.data2 = len * 0x1;
1490 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1491 if (status != 0) {
1492 test = "write";
1493 goto abort;
1494 }
1495 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1496
1497 cmd.data0 = ntohl(dmabench_dma.low);
1498 cmd.data1 = ntohl(dmabench_dma.high);
1499 cmd.data2 = len * 0x10001;
1500 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1501 if (status != 0) {
1502 test = "read/write";
1503 goto abort;
1504 }
1505 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1506 (cmd.data0 & 0xffff);
1507
1508
1509 abort:
1510 myri10ge_dma_free(&dmabench_dma);
1511 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1512 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1513 test);
1514 return (status);
1515 }
1516
1517 static int
myri10ge_reset(struct myri10ge_priv * mgp)1518 myri10ge_reset(struct myri10ge_priv *mgp)
1519 {
1520 myri10ge_cmd_t cmd;
1521 struct myri10ge_nic_stat *ethstat;
1522 struct myri10ge_slice_state *ss;
1523 int i, status;
1524 size_t bytes;
1525
1526 /* send a reset command to the card to see if it is alive */
1527 (void) memset(&cmd, 0, sizeof (cmd));
1528 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1529 if (status != 0) {
1530 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1531 return (ENXIO);
1532 }
1533
1534 /* Now exchange information about interrupts */
1535
1536 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1537 cmd.data0 = (uint32_t)bytes;
1538 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1539
1540 /*
1541 * Even though we already know how many slices are supported
1542 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1543 * has magic side effects, and must be called after a reset.
1544 * It must be called prior to calling any RSS related cmds,
1545 * including assigning an interrupt queue for anything but
1546 * slice 0. It must also be called *after*
1547 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1548 * the firmware to compute offsets.
1549 */
1550
1551 if (mgp->num_slices > 1) {
1552
1553 /* ask the maximum number of slices it supports */
1554 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1555 &cmd);
1556 if (status != 0) {
1557 cmn_err(CE_WARN,
1558 "%s: failed to get number of slices\n",
1559 mgp->name);
1560 return (status);
1561 }
1562
1563 /*
1564 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1565 * to setting up the interrupt queue DMA
1566 */
1567
1568 cmd.data0 = mgp->num_slices;
1569 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1570 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1571 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1572 &cmd);
1573 if (status != 0) {
1574 cmn_err(CE_WARN,
1575 "%s: failed to set number of slices\n",
1576 mgp->name);
1577 return (status);
1578 }
1579 }
1580 for (i = 0; i < mgp->num_slices; i++) {
1581 ss = &mgp->ss[i];
1582 cmd.data0 = ntohl(ss->rx_done.dma.low);
1583 cmd.data1 = ntohl(ss->rx_done.dma.high);
1584 cmd.data2 = i;
1585 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1586 &cmd);
1587 };
1588
1589 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1590 for (i = 0; i < mgp->num_slices; i++) {
1591 ss = &mgp->ss[i];
1592 ss->irq_claim = (volatile unsigned int *)
1593 (void *)(mgp->sram + cmd.data0 + 8 * i);
1594 }
1595
1596 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1597 status |= myri10ge_send_cmd(mgp,
1598 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1599 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1600 }
1601
1602 status |= myri10ge_send_cmd(mgp,
1603 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1604 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1605
1606 if (status != 0) {
1607 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1608 mgp->name);
1609 return (status);
1610 }
1611
1612 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1613 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1614
1615 /* reset mcp/driver shared state back to 0 */
1616
1617 for (i = 0; i < mgp->num_slices; i++) {
1618 ss = &mgp->ss[i];
1619 bytes = mgp->max_intr_slots *
1620 sizeof (*mgp->ss[0].rx_done.entry);
1621 (void) memset(ss->rx_done.entry, 0, bytes);
1622 ss->tx.req = 0;
1623 ss->tx.done = 0;
1624 ss->tx.pkt_done = 0;
1625 ss->rx_big.cnt = 0;
1626 ss->rx_small.cnt = 0;
1627 ss->rx_done.idx = 0;
1628 ss->rx_done.cnt = 0;
1629 ss->rx_token = 0;
1630 ss->tx.watchdog_done = 0;
1631 ss->tx.watchdog_req = 0;
1632 ss->tx.active = 0;
1633 ss->tx.activate = 0;
1634 }
1635 mgp->watchdog_rx_pause = 0;
1636 if (mgp->ksp_stat != NULL) {
1637 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1638 ethstat->link_changes.value.ul = 0;
1639 }
1640 status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1641 myri10ge_change_promisc(mgp, 0);
1642 (void) myri10ge_change_pause(mgp, mgp->pause);
1643 return (status);
1644 }
1645
1646 static int
myri10ge_init_toeplitz(struct myri10ge_priv * mgp)1647 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1648 {
1649 myri10ge_cmd_t cmd;
1650 int i, b, s, t, j;
1651 int status;
1652 uint32_t k[8];
1653 uint32_t tmp;
1654 uint8_t *key;
1655
1656 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1657 &cmd);
1658 if (status != 0) {
1659 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1660 mgp->name);
1661 return (EIO);
1662 }
1663 myri10ge_pio_copy32(mgp->rss_key,
1664 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1665 sizeof (mgp->rss_key));
1666
1667 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1668 KM_SLEEP);
1669 key = (uint8_t *)mgp->rss_key;
1670 t = 0;
1671 for (b = 0; b < 12; b++) {
1672 for (s = 0; s < 8; s++) {
1673 /* Bits: b*8+s, ..., b*8+s+31 */
1674 k[s] = 0;
1675 for (j = 0; j < 32; j++) {
1676 int bit = b*8+s+j;
1677 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1678 k[s] |= bit << (31 - j);
1679 }
1680 }
1681
1682 for (i = 0; i <= 0xff; i++) {
1683 tmp = 0;
1684 if (i & (1 << 7)) { tmp ^= k[0]; }
1685 if (i & (1 << 6)) { tmp ^= k[1]; }
1686 if (i & (1 << 5)) { tmp ^= k[2]; }
1687 if (i & (1 << 4)) { tmp ^= k[3]; }
1688 if (i & (1 << 3)) { tmp ^= k[4]; }
1689 if (i & (1 << 2)) { tmp ^= k[5]; }
1690 if (i & (1 << 1)) { tmp ^= k[6]; }
1691 if (i & (1 << 0)) { tmp ^= k[7]; }
1692 mgp->toeplitz_hash_table[t++] = tmp;
1693 }
1694 }
1695 return (0);
1696 }
1697
1698 static inline struct myri10ge_slice_state *
myri10ge_toeplitz_send_hash(struct myri10ge_priv * mgp,struct ip * ip)1699 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1700 {
1701 struct tcphdr *hdr;
1702 uint32_t saddr, daddr;
1703 uint32_t hash, slice;
1704 uint32_t *table = mgp->toeplitz_hash_table;
1705 uint16_t src, dst;
1706
1707 /*
1708 * Note hashing order is reversed from how it is done
1709 * in the NIC, so as to generate the same hash value
1710 * for the connection to try to keep connections CPU local
1711 */
1712
1713 /* hash on IPv4 src/dst address */
1714 saddr = ntohl(ip->ip_src.s_addr);
1715 daddr = ntohl(ip->ip_dst.s_addr);
1716 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1717 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1718 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1719 hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1720 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1721 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1722 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1723 hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1724 /* hash on TCP port, if required */
1725 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1726 ip->ip_p == IPPROTO_TCP) {
1727 hdr = (struct tcphdr *)(void *)
1728 (((uint8_t *)ip) + (ip->ip_hl << 2));
1729 src = ntohs(hdr->th_sport);
1730 dst = ntohs(hdr->th_dport);
1731
1732 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1733 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1734 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1735 hash ^= table[(256 * 11) + ((src) & 0xff)];
1736 }
1737 slice = (mgp->num_slices - 1) & hash;
1738 return (&mgp->ss[slice]);
1739
1740 }
1741
1742 static inline struct myri10ge_slice_state *
myri10ge_simple_send_hash(struct myri10ge_priv * mgp,struct ip * ip)1743 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1744 {
1745 struct tcphdr *hdr;
1746 uint32_t slice, hash_val;
1747
1748
1749 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1750 return (&mgp->ss[0]);
1751 }
1752 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2));
1753
1754 /*
1755 * Use the second byte of the *destination* address for
1756 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1757 */
1758 hash_val = ntohs(hdr->th_dport) & 0xff;
1759 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1760 hash_val += ntohs(hdr->th_sport) & 0xff;
1761
1762 slice = (mgp->num_slices - 1) & hash_val;
1763 return (&mgp->ss[slice]);
1764 }
1765
1766 static inline struct myri10ge_slice_state *
myri10ge_send_hash(struct myri10ge_priv * mgp,mblk_t * mp)1767 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1768 {
1769 unsigned int slice = 0;
1770 struct ether_header *eh;
1771 struct ether_vlan_header *vh;
1772 struct ip *ip;
1773 int ehl, ihl;
1774
1775 if (mgp->num_slices == 1)
1776 return (&mgp->ss[0]);
1777
1778 if (myri10ge_tx_hash == 0) {
1779 slice = CPU->cpu_id & (mgp->num_slices - 1);
1780 return (&mgp->ss[slice]);
1781 }
1782
1783 /*
1784 * ensure it is a TCP or UDP over IPv4 packet, and that the
1785 * headers are in the 1st mblk. Otherwise, punt
1786 */
1787 ehl = sizeof (*eh);
1788 ihl = sizeof (*ip);
1789 if ((MBLKL(mp)) < (ehl + ihl + 8))
1790 return (&mgp->ss[0]);
1791 eh = (struct ether_header *)(void *)mp->b_rptr;
1792 ip = (struct ip *)(void *)(eh + 1);
1793 if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1794 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1795 return (&mgp->ss[0]);
1796 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1797 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1798 return (&mgp->ss[0]);
1799 ehl += 4;
1800 ip = (struct ip *)(void *)(vh + 1);
1801 }
1802 ihl = ip->ip_hl << 2;
1803 if (MBLKL(mp) < (ehl + ihl + 8))
1804 return (&mgp->ss[0]);
1805 switch (myri10ge_rss_hash) {
1806 case MXGEFW_RSS_HASH_TYPE_IPV4:
1807 /* fallthru */
1808 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1809 /* fallthru */
1810 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1811 return (myri10ge_toeplitz_send_hash(mgp, ip));
1812 case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1813 /* fallthru */
1814 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1815 return (myri10ge_simple_send_hash(mgp, ip));
1816 default:
1817 break;
1818 }
1819 return (&mgp->ss[0]);
1820 }
1821
1822 static int
myri10ge_setup_slice(struct myri10ge_slice_state * ss)1823 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1824 {
1825 struct myri10ge_priv *mgp = ss->mgp;
1826 myri10ge_cmd_t cmd;
1827 int tx_ring_size, rx_ring_size;
1828 int tx_ring_entries, rx_ring_entries;
1829 int slice, status;
1830 int allocated, idx;
1831 size_t bytes;
1832
1833 slice = ss - mgp->ss;
1834 cmd.data0 = slice;
1835 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1836 tx_ring_size = cmd.data0;
1837 cmd.data0 = slice;
1838 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1839 if (status != 0)
1840 return (status);
1841 rx_ring_size = cmd.data0;
1842
1843 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1844 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1845 ss->tx.mask = tx_ring_entries - 1;
1846 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1847
1848 /* get the lanai pointers to the send and receive rings */
1849
1850 cmd.data0 = slice;
1851 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1852 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1853 if (mgp->num_slices > 1) {
1854 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1855 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1856 64 * slice;
1857 } else {
1858 ss->tx.go = NULL;
1859 ss->tx.stop = NULL;
1860 }
1861
1862 cmd.data0 = slice;
1863 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1864 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1865 (void *)(mgp->sram + cmd.data0);
1866
1867 cmd.data0 = slice;
1868 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1869 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1870 (mgp->sram + cmd.data0);
1871
1872 if (status != 0) {
1873 cmn_err(CE_WARN,
1874 "%s: failed to get ring sizes or locations\n", mgp->name);
1875 return (status);
1876 }
1877
1878 status = ENOMEM;
1879 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1880 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1881 if (ss->rx_small.shadow == NULL)
1882 goto abort;
1883 (void) memset(ss->rx_small.shadow, 0, bytes);
1884
1885 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1886 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1887 if (ss->rx_big.shadow == NULL)
1888 goto abort_with_rx_small_shadow;
1889 (void) memset(ss->rx_big.shadow, 0, bytes);
1890
1891 /* allocate the host info rings */
1892
1893 bytes = tx_ring_entries * sizeof (*ss->tx.info);
1894 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1895 if (ss->tx.info == NULL)
1896 goto abort_with_rx_big_shadow;
1897 (void) memset(ss->tx.info, 0, bytes);
1898
1899 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1900 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1901 if (ss->rx_small.info == NULL)
1902 goto abort_with_tx_info;
1903 (void) memset(ss->rx_small.info, 0, bytes);
1904
1905 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1906 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1907 if (ss->rx_big.info == NULL)
1908 goto abort_with_rx_small_info;
1909 (void) memset(ss->rx_big.info, 0, bytes);
1910
1911 ss->tx.stall = ss->tx.sched = 0;
1912 ss->tx.stall_early = ss->tx.stall_late = 0;
1913
1914 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1915 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1916
1917 allocated = myri10ge_add_jbufs(ss,
1918 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1919 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1920 cmn_err(CE_WARN,
1921 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1922 mgp->name, allocated,
1923 myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1924 goto abort_with_jumbos;
1925 }
1926
1927 myri10ge_carve_up_jbufs_into_small_ring(ss);
1928 ss->j_rx_cnt = 0;
1929
1930 mutex_enter(&ss->jpool.mtx);
1931 if (allocated < rx_ring_entries)
1932 ss->jpool.low_water = allocated / 4;
1933 else
1934 ss->jpool.low_water = rx_ring_entries / 2;
1935
1936 /*
1937 * invalidate the big receive ring in case we do not
1938 * allocate sufficient jumbos to fill it
1939 */
1940 (void) memset(ss->rx_big.shadow, 1,
1941 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1942 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1943 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1944 &ss->rx_big.shadow[idx - 7]);
1945 mb();
1946 }
1947
1948
1949 myri10ge_restock_jumbos(ss);
1950
1951 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1952 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1953 &ss->rx_small.shadow[idx - 7]);
1954 mb();
1955 }
1956 ss->rx_small.cnt = ss->rx_small.mask + 1;
1957
1958 mutex_exit(&ss->jpool.mtx);
1959
1960 status = myri10ge_prepare_tx_ring(ss);
1961
1962 if (status != 0)
1963 goto abort_with_small_jbufs;
1964
1965 cmd.data0 = ntohl(ss->fw_stats_dma.low);
1966 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1967 cmd.data2 = sizeof (mcp_irq_data_t);
1968 cmd.data2 |= (slice << 16);
1969 bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1970 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1971 if (status == ENOSYS) {
1972 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1973 offsetof(mcp_irq_data_t, send_done_count);
1974 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1975 status = myri10ge_send_cmd(mgp,
1976 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1977 }
1978 if (status) {
1979 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1980 goto abort_with_tx;
1981 }
1982
1983 return (0);
1984
1985 abort_with_tx:
1986 myri10ge_unprepare_tx_ring(ss);
1987
1988 abort_with_small_jbufs:
1989 myri10ge_release_small_jbufs(ss);
1990
1991 abort_with_jumbos:
1992 if (allocated != 0) {
1993 mutex_enter(&ss->jpool.mtx);
1994 ss->jpool.low_water = 0;
1995 mutex_exit(&ss->jpool.mtx);
1996 myri10ge_unstock_jumbos(ss);
1997 myri10ge_remove_jbufs(ss);
1998 }
1999
2000 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2001 kmem_free(ss->rx_big.info, bytes);
2002
2003 abort_with_rx_small_info:
2004 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2005 kmem_free(ss->rx_small.info, bytes);
2006
2007 abort_with_tx_info:
2008 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2009 kmem_free(ss->tx.info, bytes);
2010
2011 abort_with_rx_big_shadow:
2012 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2013 kmem_free(ss->rx_big.shadow, bytes);
2014
2015 abort_with_rx_small_shadow:
2016 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2017 kmem_free(ss->rx_small.shadow, bytes);
2018 abort:
2019 return (status);
2020
2021 }
2022
2023 static void
myri10ge_teardown_slice(struct myri10ge_slice_state * ss)2024 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2025 {
2026 int tx_ring_entries, rx_ring_entries;
2027 size_t bytes;
2028
2029 /* ignore slices that have not been fully setup */
2030 if (ss->tx.cp == NULL)
2031 return;
2032 /* Free the TX copy buffers */
2033 myri10ge_unprepare_tx_ring(ss);
2034
2035 /* stop passing returned buffers to firmware */
2036
2037 mutex_enter(&ss->jpool.mtx);
2038 ss->jpool.low_water = 0;
2039 mutex_exit(&ss->jpool.mtx);
2040 myri10ge_release_small_jbufs(ss);
2041
2042 /* Release the free jumbo frame pool */
2043 myri10ge_unstock_jumbos(ss);
2044 myri10ge_remove_jbufs(ss);
2045
2046 rx_ring_entries = ss->rx_big.mask + 1;
2047 tx_ring_entries = ss->tx.mask + 1;
2048
2049 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2050 kmem_free(ss->rx_big.info, bytes);
2051
2052 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2053 kmem_free(ss->rx_small.info, bytes);
2054
2055 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2056 kmem_free(ss->tx.info, bytes);
2057
2058 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2059 kmem_free(ss->rx_big.shadow, bytes);
2060
2061 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2062 kmem_free(ss->rx_small.shadow, bytes);
2063
2064 }
2065 static int
myri10ge_start_locked(struct myri10ge_priv * mgp)2066 myri10ge_start_locked(struct myri10ge_priv *mgp)
2067 {
2068 myri10ge_cmd_t cmd;
2069 int status, big_pow2, i;
2070 volatile uint8_t *itable;
2071
2072 status = DDI_SUCCESS;
2073 /* Allocate DMA resources and receive buffers */
2074
2075 status = myri10ge_reset(mgp);
2076 if (status != 0) {
2077 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2078 return (DDI_FAILURE);
2079 }
2080
2081 if (mgp->num_slices > 1) {
2082 cmd.data0 = mgp->num_slices;
2083 cmd.data1 = 1; /* use MSI-X */
2084 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2085 &cmd);
2086 if (status != 0) {
2087 cmn_err(CE_WARN,
2088 "%s: failed to set number of slices\n",
2089 mgp->name);
2090 goto abort_with_nothing;
2091 }
2092 /* setup the indirection table */
2093 cmd.data0 = mgp->num_slices;
2094 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2095 &cmd);
2096
2097 status |= myri10ge_send_cmd(mgp,
2098 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2099 if (status != 0) {
2100 cmn_err(CE_WARN,
2101 "%s: failed to setup rss tables\n", mgp->name);
2102 }
2103
2104 /* just enable an identity mapping */
2105 itable = mgp->sram + cmd.data0;
2106 for (i = 0; i < mgp->num_slices; i++)
2107 itable[i] = (uint8_t)i;
2108
2109 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2110 status = myri10ge_init_toeplitz(mgp);
2111 if (status != 0) {
2112 cmn_err(CE_WARN, "%s: failed to setup "
2113 "toeplitz tx hash table", mgp->name);
2114 goto abort_with_nothing;
2115 }
2116 }
2117 cmd.data0 = 1;
2118 cmd.data1 = myri10ge_rss_hash;
2119 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2120 &cmd);
2121 if (status != 0) {
2122 cmn_err(CE_WARN,
2123 "%s: failed to enable slices\n", mgp->name);
2124 goto abort_with_toeplitz;
2125 }
2126 }
2127
2128 for (i = 0; i < mgp->num_slices; i++) {
2129 status = myri10ge_setup_slice(&mgp->ss[i]);
2130 if (status != 0)
2131 goto abort_with_slices;
2132 }
2133
2134 /*
2135 * Tell the MCP how many buffers it has, and to
2136 * bring the ethernet interface up
2137 *
2138 * Firmware needs the big buff size as a power of 2. Lie and
2139 * tell it the buffer is larger, because we only use 1
2140 * buffer/pkt, and the mtu will prevent overruns
2141 */
2142 big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2143 while (!ISP2(big_pow2))
2144 big_pow2++;
2145
2146 /* now give firmware buffers sizes, and MTU */
2147 cmd.data0 = myri10ge_mtu;
2148 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2149 cmd.data0 = myri10ge_small_bytes;
2150 status |=
2151 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2152 cmd.data0 = big_pow2;
2153 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2154 if (status) {
2155 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2156 goto abort_with_slices;
2157 }
2158
2159
2160 cmd.data0 = 1;
2161 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2162 if (status) {
2163 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2164 mgp->name, status);
2165 } else {
2166 mgp->features |= MYRI10GE_TSO;
2167 }
2168
2169 mgp->link_state = -1;
2170 mgp->rdma_tags_available = 15;
2171 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2172 if (status) {
2173 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2174 goto abort_with_slices;
2175 }
2176 mgp->running = MYRI10GE_ETH_RUNNING;
2177 return (DDI_SUCCESS);
2178
2179 abort_with_slices:
2180 for (i = 0; i < mgp->num_slices; i++)
2181 myri10ge_teardown_slice(&mgp->ss[i]);
2182
2183 mgp->running = MYRI10GE_ETH_STOPPED;
2184
2185 abort_with_toeplitz:
2186 if (mgp->toeplitz_hash_table != NULL) {
2187 kmem_free(mgp->toeplitz_hash_table,
2188 sizeof (uint32_t) * 12 * 256);
2189 mgp->toeplitz_hash_table = NULL;
2190 }
2191
2192 abort_with_nothing:
2193 return (DDI_FAILURE);
2194 }
2195
2196 static void
myri10ge_stop_locked(struct myri10ge_priv * mgp)2197 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2198 {
2199 int status, old_down_cnt;
2200 myri10ge_cmd_t cmd;
2201 int wait_time = 10;
2202 int i, polling;
2203
2204 old_down_cnt = mgp->down_cnt;
2205 mb();
2206 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2207 if (status) {
2208 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2209 }
2210
2211 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2212 delay(1 * drv_usectohz(1000000));
2213 wait_time--;
2214 if (wait_time == 0)
2215 break;
2216 }
2217 again:
2218 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2219 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2220 for (i = 0; i < mgp->num_slices; i++) {
2221 /*
2222 * take and release the rx lock to ensure
2223 * that no interrupt thread is blocked
2224 * elsewhere in the stack, preventing
2225 * completion
2226 */
2227
2228 mutex_enter(&mgp->ss[i].rx_lock);
2229 printf("%s: slice %d rx irq idle\n",
2230 mgp->name, i);
2231 mutex_exit(&mgp->ss[i].rx_lock);
2232
2233 /* verify that the poll handler is inactive */
2234 mutex_enter(&mgp->ss->poll_lock);
2235 polling = mgp->ss->rx_polling;
2236 mutex_exit(&mgp->ss->poll_lock);
2237 if (polling) {
2238 printf("%s: slice %d is polling\n",
2239 mgp->name, i);
2240 delay(1 * drv_usectohz(1000000));
2241 goto again;
2242 }
2243 }
2244 delay(1 * drv_usectohz(1000000));
2245 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2246 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2247 }
2248 }
2249
2250 for (i = 0; i < mgp->num_slices; i++)
2251 myri10ge_teardown_slice(&mgp->ss[i]);
2252
2253 if (mgp->toeplitz_hash_table != NULL) {
2254 kmem_free(mgp->toeplitz_hash_table,
2255 sizeof (uint32_t) * 12 * 256);
2256 mgp->toeplitz_hash_table = NULL;
2257 }
2258 mgp->running = MYRI10GE_ETH_STOPPED;
2259 }
2260
2261 static int
myri10ge_m_start(void * arg)2262 myri10ge_m_start(void *arg)
2263 {
2264 struct myri10ge_priv *mgp = arg;
2265 int status;
2266
2267 mutex_enter(&mgp->intrlock);
2268
2269 if (mgp->running != MYRI10GE_ETH_STOPPED) {
2270 mutex_exit(&mgp->intrlock);
2271 return (DDI_FAILURE);
2272 }
2273 status = myri10ge_start_locked(mgp);
2274 mutex_exit(&mgp->intrlock);
2275
2276 if (status != DDI_SUCCESS)
2277 return (status);
2278
2279 /* start the watchdog timer */
2280 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2281 mgp->timer_ticks);
2282 return (DDI_SUCCESS);
2283
2284 }
2285
2286 static void
myri10ge_m_stop(void * arg)2287 myri10ge_m_stop(void *arg)
2288 {
2289 struct myri10ge_priv *mgp = arg;
2290
2291 mutex_enter(&mgp->intrlock);
2292 /* if the device not running give up */
2293 if (mgp->running != MYRI10GE_ETH_RUNNING) {
2294 mutex_exit(&mgp->intrlock);
2295 return;
2296 }
2297
2298 mgp->running = MYRI10GE_ETH_STOPPING;
2299 mutex_exit(&mgp->intrlock);
2300 (void) untimeout(mgp->timer_id);
2301 mutex_enter(&mgp->intrlock);
2302 myri10ge_stop_locked(mgp);
2303 mutex_exit(&mgp->intrlock);
2304
2305 }
2306
2307 static inline void
myri10ge_rx_csum(mblk_t * mp,struct myri10ge_rx_ring_stats * s,uint32_t csum)2308 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2309 {
2310 struct ether_header *eh;
2311 struct ip *ip;
2312 struct ip6_hdr *ip6;
2313 uint32_t start, stuff, end, partial, hdrlen;
2314
2315
2316 csum = ntohs((uint16_t)csum);
2317 eh = (struct ether_header *)(void *)mp->b_rptr;
2318 hdrlen = sizeof (*eh);
2319 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2320 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2321 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2322 s->brdcstrcv++;
2323 else
2324 s->multircv++;
2325 }
2326
2327 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2328 /*
2329 * fix checksum by subtracting 4 bytes after what the
2330 * firmware thought was the end of the ether hdr
2331 */
2332 partial = *(uint32_t *)
2333 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2334 csum += ~partial;
2335 csum += (csum < ~partial);
2336 csum = (csum >> 16) + (csum & 0xFFFF);
2337 csum = (csum >> 16) + (csum & 0xFFFF);
2338 hdrlen += VLAN_TAGSZ;
2339 }
2340
2341 if (eh->ether_type == BE_16(ETHERTYPE_IP)) {
2342 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2343 start = ip->ip_hl << 2;
2344
2345 if (ip->ip_p == IPPROTO_TCP)
2346 stuff = start + offsetof(struct tcphdr, th_sum);
2347 else if (ip->ip_p == IPPROTO_UDP)
2348 stuff = start + offsetof(struct udphdr, uh_sum);
2349 else
2350 return;
2351 end = ntohs(ip->ip_len);
2352 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) {
2353 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2354 start = sizeof (*ip6);
2355 if (ip6->ip6_nxt == IPPROTO_TCP) {
2356 stuff = start + offsetof(struct tcphdr, th_sum);
2357 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2358 stuff = start + offsetof(struct udphdr, uh_sum);
2359 else
2360 return;
2361 end = start + ntohs(ip6->ip6_plen);
2362 /*
2363 * IPv6 headers do not contain a checksum, and hence
2364 * do not checksum to zero, so they don't "fall out"
2365 * of the partial checksum calculation like IPv4
2366 * headers do. We need to fix the partial checksum by
2367 * subtracting the checksum of the IPv6 header.
2368 */
2369
2370 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2371 csum += ~partial;
2372 csum += (csum < ~partial);
2373 csum = (csum >> 16) + (csum & 0xFFFF);
2374 csum = (csum >> 16) + (csum & 0xFFFF);
2375 } else {
2376 return;
2377 }
2378
2379 if (MBLKL(mp) > hdrlen + end) {
2380 /* padded frame, so hw csum may be invalid */
2381 return;
2382 }
2383
2384 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2385 }
2386
2387 static mblk_t *
myri10ge_rx_done_small(struct myri10ge_slice_state * ss,uint32_t len,uint32_t csum)2388 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2389 uint32_t csum)
2390 {
2391 mblk_t *mp;
2392 myri10ge_rx_ring_t *rx;
2393 int idx;
2394
2395 rx = &ss->rx_small;
2396 idx = rx->cnt & rx->mask;
2397 ss->rx_small.cnt++;
2398
2399 /* allocate a new buffer to pass up the stack */
2400 mp = allocb(len + MXGEFW_PAD, 0);
2401 if (mp == NULL) {
2402 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2403 goto abort;
2404 }
2405 bcopy(ss->rx_small.info[idx].ptr,
2406 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2407 mp->b_wptr += len + MXGEFW_PAD;
2408 mp->b_rptr += MXGEFW_PAD;
2409
2410 ss->rx_stats.ibytes += len;
2411 ss->rx_stats.ipackets += 1;
2412 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2413
2414 abort:
2415 if ((idx & 7) == 7) {
2416 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2417 &rx->shadow[idx - 7]);
2418 }
2419
2420 return (mp);
2421 }
2422
2423
2424 static mblk_t *
myri10ge_rx_done_big(struct myri10ge_slice_state * ss,uint32_t len,uint32_t csum)2425 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2426 uint32_t csum)
2427 {
2428 struct myri10ge_jpool_stuff *jpool;
2429 struct myri10ge_jpool_entry *j;
2430 mblk_t *mp;
2431 int idx, num_owned_by_mcp;
2432
2433 jpool = &ss->jpool;
2434 idx = ss->j_rx_cnt & ss->rx_big.mask;
2435 j = ss->rx_big.info[idx].j;
2436
2437 if (j == NULL) {
2438 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2439 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2440 return (NULL);
2441 }
2442
2443
2444 ss->rx_big.info[idx].j = NULL;
2445 ss->j_rx_cnt++;
2446
2447
2448 /*
2449 * Check to see if we are low on rx buffers.
2450 * Note that we must leave at least 8 free so there are
2451 * enough to free in a single 64-byte write.
2452 */
2453 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2454 if (num_owned_by_mcp < jpool->low_water) {
2455 mutex_enter(&jpool->mtx);
2456 myri10ge_restock_jumbos(ss);
2457 mutex_exit(&jpool->mtx);
2458 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2459 /* if we are still low, then we have to copy */
2460 if (num_owned_by_mcp < 16) {
2461 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2462 /* allocate a new buffer to pass up the stack */
2463 mp = allocb(len + MXGEFW_PAD, 0);
2464 if (mp == NULL) {
2465 goto abort;
2466 }
2467 bcopy(j->buf,
2468 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2469 myri10ge_jfree_rtn(j);
2470 /* push buffer back to NIC */
2471 mutex_enter(&jpool->mtx);
2472 myri10ge_restock_jumbos(ss);
2473 mutex_exit(&jpool->mtx);
2474 goto set_len;
2475 }
2476 }
2477
2478 /* loan our buffer to the stack */
2479 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2480 if (mp == NULL) {
2481 goto abort;
2482 }
2483
2484 set_len:
2485 mp->b_rptr += MXGEFW_PAD;
2486 mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2487
2488 ss->rx_stats.ibytes += len;
2489 ss->rx_stats.ipackets += 1;
2490 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2491
2492 return (mp);
2493
2494 abort:
2495 myri10ge_jfree_rtn(j);
2496 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2497 return (NULL);
2498 }
2499
2500 /*
2501 * Free all transmit buffers up until the specified index
2502 */
2503 static inline void
myri10ge_tx_done(struct myri10ge_slice_state * ss,uint32_t mcp_index)2504 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2505 {
2506 myri10ge_tx_ring_t *tx;
2507 struct myri10ge_tx_dma_handle_head handles;
2508 int idx;
2509 int limit = 0;
2510
2511 tx = &ss->tx;
2512 handles.head = NULL;
2513 handles.tail = NULL;
2514 while (tx->pkt_done != (int)mcp_index) {
2515 idx = tx->done & tx->mask;
2516
2517 /*
2518 * mblk & DMA handle attached only to first slot
2519 * per buffer in the packet
2520 */
2521
2522 if (tx->info[idx].m) {
2523 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2524 tx->info[idx].handle->next = handles.head;
2525 handles.head = tx->info[idx].handle;
2526 if (handles.tail == NULL)
2527 handles.tail = tx->info[idx].handle;
2528 freeb(tx->info[idx].m);
2529 tx->info[idx].m = 0;
2530 tx->info[idx].handle = 0;
2531 }
2532 if (tx->info[idx].ostat.opackets != 0) {
2533 tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2534 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2535 tx->stats.obytes += tx->info[idx].ostat.obytes;
2536 tx->stats.opackets += tx->info[idx].ostat.opackets;
2537 tx->info[idx].stat.un.all = 0;
2538 tx->pkt_done++;
2539 }
2540
2541 tx->done++;
2542 /*
2543 * if we stalled the queue, wake it. But Wait until
2544 * we have at least 1/2 our slots free.
2545 */
2546 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2547 tx->stall != tx->sched) {
2548 mutex_enter(&ss->tx.lock);
2549 tx->sched = tx->stall;
2550 mutex_exit(&ss->tx.lock);
2551 mac_tx_ring_update(ss->mgp->mh, tx->rh);
2552 }
2553
2554 /* limit potential for livelock */
2555 if (unlikely(++limit > 2 * tx->mask))
2556 break;
2557 }
2558 if (tx->req == tx->done && tx->stop != NULL) {
2559 /*
2560 * Nic has sent all pending requests, allow it
2561 * to stop polling this queue
2562 */
2563 mutex_enter(&tx->lock);
2564 if (tx->req == tx->done && tx->active) {
2565 *(int *)(void *)tx->stop = 1;
2566 tx->active = 0;
2567 mb();
2568 }
2569 mutex_exit(&tx->lock);
2570 }
2571 if (handles.head != NULL)
2572 myri10ge_free_tx_handles(tx, &handles);
2573 }
2574
2575 static void
myri10ge_mbl_init(struct myri10ge_mblk_list * mbl)2576 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2577 {
2578 mbl->head = NULL;
2579 mbl->tail = &mbl->head;
2580 mbl->cnt = 0;
2581 }
2582
2583 /*ARGSUSED*/
2584 void
myri10ge_mbl_append(struct myri10ge_slice_state * ss,struct myri10ge_mblk_list * mbl,mblk_t * mp)2585 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2586 struct myri10ge_mblk_list *mbl, mblk_t *mp)
2587 {
2588 *(mbl->tail) = mp;
2589 mbl->tail = &mp->b_next;
2590 mp->b_next = NULL;
2591 mbl->cnt++;
2592 }
2593
2594
2595 static inline void
myri10ge_clean_rx_done(struct myri10ge_slice_state * ss,struct myri10ge_mblk_list * mbl,int limit,boolean_t * stop)2596 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2597 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2598 {
2599 myri10ge_rx_done_t *rx_done = &ss->rx_done;
2600 struct myri10ge_priv *mgp = ss->mgp;
2601 mblk_t *mp;
2602 struct lro_entry *lro;
2603 uint16_t length;
2604 uint16_t checksum;
2605
2606
2607 while (rx_done->entry[rx_done->idx].length != 0) {
2608 if (unlikely (*stop)) {
2609 break;
2610 }
2611 length = ntohs(rx_done->entry[rx_done->idx].length);
2612 length &= (~MXGEFW_RSS_HASH_MASK);
2613
2614 /* limit potential for livelock */
2615 limit -= length;
2616 if (unlikely(limit < 0))
2617 break;
2618
2619 rx_done->entry[rx_done->idx].length = 0;
2620 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2621 if (length <= myri10ge_small_bytes)
2622 mp = myri10ge_rx_done_small(ss, length, checksum);
2623 else
2624 mp = myri10ge_rx_done_big(ss, length, checksum);
2625 if (mp != NULL) {
2626 if (!myri10ge_lro ||
2627 0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2628 myri10ge_mbl_append(ss, mbl, mp);
2629 }
2630 rx_done->cnt++;
2631 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2632 }
2633 while (ss->lro_active != NULL) {
2634 lro = ss->lro_active;
2635 ss->lro_active = lro->next;
2636 myri10ge_lro_flush(ss, lro, mbl);
2637 }
2638 }
2639
2640 static void
myri10ge_intr_rx(struct myri10ge_slice_state * ss)2641 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2642 {
2643 uint64_t gen;
2644 struct myri10ge_mblk_list mbl;
2645
2646 myri10ge_mbl_init(&mbl);
2647 if (mutex_tryenter(&ss->rx_lock) == 0)
2648 return;
2649 gen = ss->rx_gen_num;
2650 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2651 &ss->rx_polling);
2652 if (mbl.head != NULL)
2653 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2654 mutex_exit(&ss->rx_lock);
2655
2656 }
2657
2658 static mblk_t *
myri10ge_poll_rx(void * arg,int bytes)2659 myri10ge_poll_rx(void *arg, int bytes)
2660 {
2661 struct myri10ge_slice_state *ss = arg;
2662 struct myri10ge_mblk_list mbl;
2663 boolean_t dummy = B_FALSE;
2664
2665 if (bytes == 0)
2666 return (NULL);
2667
2668 myri10ge_mbl_init(&mbl);
2669 mutex_enter(&ss->rx_lock);
2670 if (ss->rx_polling)
2671 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2672 else
2673 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2674 ss->mgp->ss), ss->rx_token, ss->rx_polling);
2675 mutex_exit(&ss->rx_lock);
2676 return (mbl.head);
2677 }
2678
2679 /*ARGSUSED*/
2680 static uint_t
myri10ge_intr(caddr_t arg0,caddr_t arg1)2681 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2682 {
2683 struct myri10ge_slice_state *ss =
2684 (struct myri10ge_slice_state *)(void *)arg0;
2685 struct myri10ge_priv *mgp = ss->mgp;
2686 mcp_irq_data_t *stats = ss->fw_stats;
2687 myri10ge_tx_ring_t *tx = &ss->tx;
2688 uint32_t send_done_count;
2689 uint8_t valid;
2690
2691
2692 /* make sure the DMA has finished */
2693 if (!stats->valid) {
2694 return (DDI_INTR_UNCLAIMED);
2695 }
2696 valid = stats->valid;
2697
2698 /* low bit indicates receives are present */
2699 if (valid & 1)
2700 myri10ge_intr_rx(ss);
2701
2702 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2703 /* lower legacy IRQ */
2704 *mgp->irq_deassert = 0;
2705 if (!myri10ge_deassert_wait)
2706 /* don't wait for conf. that irq is low */
2707 stats->valid = 0;
2708 mb();
2709 } else {
2710 /* no need to wait for conf. that irq is low */
2711 stats->valid = 0;
2712 }
2713
2714 do {
2715 /* check for transmit completes and receives */
2716 send_done_count = ntohl(stats->send_done_count);
2717 if (send_done_count != tx->pkt_done)
2718 myri10ge_tx_done(ss, (int)send_done_count);
2719 } while (*((volatile uint8_t *) &stats->valid));
2720
2721 if (stats->stats_updated) {
2722 if (mgp->link_state != stats->link_up || stats->link_down) {
2723 mgp->link_state = stats->link_up;
2724 if (stats->link_down) {
2725 mgp->down_cnt += stats->link_down;
2726 mgp->link_state = 0;
2727 }
2728 if (mgp->link_state) {
2729 if (myri10ge_verbose)
2730 printf("%s: link up\n", mgp->name);
2731 mac_link_update(mgp->mh, LINK_STATE_UP);
2732 } else {
2733 if (myri10ge_verbose)
2734 printf("%s: link down\n", mgp->name);
2735 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2736 }
2737 MYRI10GE_NIC_STAT_INC(link_changes);
2738 }
2739 if (mgp->rdma_tags_available !=
2740 ntohl(ss->fw_stats->rdma_tags_available)) {
2741 mgp->rdma_tags_available =
2742 ntohl(ss->fw_stats->rdma_tags_available);
2743 cmn_err(CE_NOTE, "%s: RDMA timed out! "
2744 "%d tags left\n", mgp->name,
2745 mgp->rdma_tags_available);
2746 }
2747 }
2748
2749 mb();
2750 /* check to see if we have rx token to pass back */
2751 if (valid & 0x1) {
2752 mutex_enter(&ss->poll_lock);
2753 if (ss->rx_polling) {
2754 ss->rx_token = 1;
2755 } else {
2756 *ss->irq_claim = BE_32(3);
2757 ss->rx_token = 0;
2758 }
2759 mutex_exit(&ss->poll_lock);
2760 }
2761 *(ss->irq_claim + 1) = BE_32(3);
2762 return (DDI_INTR_CLAIMED);
2763 }
2764
2765 /*
2766 * Add or remove a multicast address. This is called with our
2767 * macinfo's lock held by GLD, so we do not need to worry about
2768 * our own locking here.
2769 */
2770 static int
myri10ge_m_multicst(void * arg,boolean_t add,const uint8_t * multicastaddr)2771 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2772 {
2773 myri10ge_cmd_t cmd;
2774 struct myri10ge_priv *mgp = arg;
2775 int status, join_leave;
2776
2777 if (add)
2778 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2779 else
2780 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2781 (void) memcpy(&cmd.data0, multicastaddr, 4);
2782 (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2783 cmd.data0 = htonl(cmd.data0);
2784 cmd.data1 = htonl(cmd.data1);
2785 status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2786 if (status == 0)
2787 return (0);
2788
2789 cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2790 mgp->name);
2791 return (status);
2792 }
2793
2794
2795 static int
myri10ge_m_promisc(void * arg,boolean_t on)2796 myri10ge_m_promisc(void *arg, boolean_t on)
2797 {
2798 struct myri10ge_priv *mgp = arg;
2799
2800 myri10ge_change_promisc(mgp, on);
2801 return (0);
2802 }
2803
2804 /*
2805 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2806 * backwards one at a time and handle ring wraps
2807 */
2808
2809 static inline void
myri10ge_submit_req_backwards(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)2810 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2811 mcp_kreq_ether_send_t *src, int cnt)
2812 {
2813 int idx, starting_slot;
2814 starting_slot = tx->req;
2815 while (cnt > 1) {
2816 cnt--;
2817 idx = (starting_slot + cnt) & tx->mask;
2818 myri10ge_pio_copy(&tx->lanai[idx],
2819 &src[cnt], sizeof (*src));
2820 mb();
2821 }
2822 }
2823
2824 /*
2825 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2826 * at most 32 bytes at a time, so as to avoid involving the software
2827 * pio handler in the nic. We re-write the first segment's flags
2828 * to mark them valid only after writing the entire chain
2829 */
2830
2831 static inline void
myri10ge_submit_req(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)2832 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2833 int cnt)
2834 {
2835 int idx, i;
2836 uint32_t *src_ints, *dst_ints;
2837 mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2838 uint8_t last_flags;
2839
2840 idx = tx->req & tx->mask;
2841
2842 last_flags = src->flags;
2843 src->flags = 0;
2844 mb();
2845 dst = dstp = &tx->lanai[idx];
2846 srcp = src;
2847
2848 if ((idx + cnt) < tx->mask) {
2849 for (i = 0; i < (cnt - 1); i += 2) {
2850 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2851 mb(); /* force write every 32 bytes */
2852 srcp += 2;
2853 dstp += 2;
2854 }
2855 } else {
2856 /*
2857 * submit all but the first request, and ensure
2858 * that it is submitted below
2859 */
2860 myri10ge_submit_req_backwards(tx, src, cnt);
2861 i = 0;
2862 }
2863 if (i < cnt) {
2864 /* submit the first request */
2865 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2866 mb(); /* barrier before setting valid flag */
2867 }
2868
2869 /* re-write the last 32-bits with the valid flags */
2870 src->flags |= last_flags;
2871 src_ints = (uint32_t *)src;
2872 src_ints += 3;
2873 dst_ints = (uint32_t *)dst;
2874 dst_ints += 3;
2875 *dst_ints = *src_ints;
2876 tx->req += cnt;
2877 mb();
2878 /* notify NIC to poll this tx ring */
2879 if (!tx->active && tx->go != NULL) {
2880 *(int *)(void *)tx->go = 1;
2881 tx->active = 1;
2882 tx->activate++;
2883 mb();
2884 }
2885 }
2886
2887 /* ARGSUSED */
2888 static inline void
myri10ge_lso_info_get(mblk_t * mp,uint32_t * mss,uint32_t * flags)2889 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2890 {
2891 uint32_t lso_flag;
2892 mac_lso_get(mp, mss, &lso_flag);
2893 (*flags) |= lso_flag;
2894 }
2895
2896
2897 /* like pullupmsg, except preserve hcksum/LSO attributes */
2898 static int
myri10ge_pullup(struct myri10ge_slice_state * ss,mblk_t * mp)2899 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2900 {
2901 uint32_t start, stuff, tx_offload_flags, mss;
2902 int ok;
2903
2904 mss = 0;
2905 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2906 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2907
2908 ok = pullupmsg(mp, -1);
2909 if (!ok) {
2910 printf("pullupmsg failed");
2911 return (DDI_FAILURE);
2912 }
2913 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2914 mac_hcksum_set(mp, start, stuff, 0, 0, tx_offload_flags);
2915 if (tx_offload_flags & HW_LSO)
2916 DB_LSOMSS(mp) = (uint16_t)mss;
2917 lso_info_set(mp, mss, tx_offload_flags);
2918 return (DDI_SUCCESS);
2919 }
2920
2921 static inline void
myri10ge_tx_stat(struct myri10ge_tx_pkt_stats * s,struct ether_header * eh,int opackets,int obytes)2922 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2923 int opackets, int obytes)
2924 {
2925 s->un.all = 0;
2926 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2927 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2928 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2929 s->un.s.brdcstxmt = 1;
2930 else
2931 s->un.s.multixmt = 1;
2932 }
2933 s->un.s.opackets = (uint16_t)opackets;
2934 s->un.s.obytes = obytes;
2935 }
2936
2937 static int
myri10ge_tx_copy(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req)2938 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2939 mcp_kreq_ether_send_t *req)
2940 {
2941 myri10ge_tx_ring_t *tx = &ss->tx;
2942 caddr_t ptr;
2943 struct myri10ge_tx_copybuf *cp;
2944 mblk_t *bp;
2945 int idx, mblen, avail;
2946 uint16_t len;
2947
2948 mutex_enter(&tx->lock);
2949 avail = tx->mask - (tx->req - tx->done);
2950 if (avail <= 1) {
2951 mutex_exit(&tx->lock);
2952 return (EBUSY);
2953 }
2954 idx = tx->req & tx->mask;
2955 cp = &tx->cp[idx];
2956 ptr = cp->va;
2957 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2958 mblen = MBLKL(bp);
2959 bcopy(bp->b_rptr, ptr, mblen);
2960 ptr += mblen;
2961 len += mblen;
2962 }
2963 /* ensure runts are padded to 60 bytes */
2964 if (len < 60) {
2965 bzero(ptr, 64 - len);
2966 len = 60;
2967 }
2968 req->addr_low = cp->dma.low;
2969 req->addr_high = cp->dma.high;
2970 req->length = htons(len);
2971 req->pad = 0;
2972 req->rdma_count = 1;
2973 myri10ge_tx_stat(&tx->info[idx].stat,
2974 (struct ether_header *)(void *)cp->va, 1, len);
2975 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2976 myri10ge_submit_req(&ss->tx, req, 1);
2977 mutex_exit(&tx->lock);
2978 freemsg(mp);
2979 return (DDI_SUCCESS);
2980 }
2981
2982
2983 static void
myri10ge_send_locked(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * req_list,struct myri10ge_tx_buffer_state * tx_info,int count)2984 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2985 struct myri10ge_tx_buffer_state *tx_info,
2986 int count)
2987 {
2988 int i, idx;
2989
2990 idx = 0; /* gcc -Wuninitialized */
2991 /* store unmapping and bp info for tx irq handler */
2992 for (i = 0; i < count; i++) {
2993 idx = (tx->req + i) & tx->mask;
2994 tx->info[idx].m = tx_info[i].m;
2995 tx->info[idx].handle = tx_info[i].handle;
2996 }
2997 tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
2998
2999 /* submit the frame to the nic */
3000 myri10ge_submit_req(tx, req_list, count);
3001
3002
3003 }
3004
3005
3006
3007 static void
myri10ge_copydata(mblk_t * mp,int off,int len,caddr_t buf)3008 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3009 {
3010 mblk_t *bp;
3011 int seglen;
3012 uint_t count;
3013
3014 bp = mp;
3015
3016 while (off > 0) {
3017 seglen = MBLKL(bp);
3018 if (off < seglen)
3019 break;
3020 off -= seglen;
3021 bp = bp->b_cont;
3022 }
3023 while (len > 0) {
3024 seglen = MBLKL(bp);
3025 count = min(seglen - off, len);
3026 bcopy(bp->b_rptr + off, buf, count);
3027 len -= count;
3028 buf += count;
3029 off = 0;
3030 bp = bp->b_cont;
3031 }
3032 }
3033
3034 static int
myri10ge_ether_parse_header(mblk_t * mp)3035 myri10ge_ether_parse_header(mblk_t *mp)
3036 {
3037 struct ether_header eh_copy;
3038 struct ether_header *eh;
3039 int eth_hdr_len, seglen;
3040
3041 seglen = MBLKL(mp);
3042 eth_hdr_len = sizeof (*eh);
3043 if (seglen < eth_hdr_len) {
3044 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3045 eh = &eh_copy;
3046 } else {
3047 eh = (struct ether_header *)(void *)mp->b_rptr;
3048 }
3049 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3050 eth_hdr_len += 4;
3051 }
3052
3053 return (eth_hdr_len);
3054 }
3055
3056 static int
myri10ge_lso_parse_header(mblk_t * mp,int off)3057 myri10ge_lso_parse_header(mblk_t *mp, int off)
3058 {
3059 char buf[128];
3060 int seglen, sum_off;
3061 struct ip *ip;
3062 struct tcphdr *tcp;
3063
3064 seglen = MBLKL(mp);
3065 if (seglen < off + sizeof (*ip)) {
3066 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3067 ip = (struct ip *)(void *)buf;
3068 } else {
3069 ip = (struct ip *)(void *)(mp->b_rptr + off);
3070 }
3071 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3072 myri10ge_copydata(mp, off,
3073 (ip->ip_hl << 2) + sizeof (*tcp), buf);
3074 ip = (struct ip *)(void *)buf;
3075 }
3076 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3077
3078 /*
3079 * NIC expects ip_sum to be zero. Recent changes to
3080 * OpenSolaris leave the correct ip checksum there, rather
3081 * than the required zero, so we need to zero it. Otherwise,
3082 * the NIC will produce bad checksums when sending LSO packets.
3083 */
3084 if (ip->ip_sum != 0) {
3085 if (((char *)ip) != buf) {
3086 /* ip points into mblk, so just zero it */
3087 ip->ip_sum = 0;
3088 } else {
3089 /*
3090 * ip points into a copy, so walk the chain
3091 * to find the ip_csum, then zero it
3092 */
3093 sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3094 while (sum_off > (int)(MBLKL(mp) - 1)) {
3095 sum_off -= MBLKL(mp);
3096 mp = mp->b_cont;
3097 }
3098 mp->b_rptr[sum_off] = 0;
3099 sum_off++;
3100 while (sum_off > MBLKL(mp) - 1) {
3101 sum_off -= MBLKL(mp);
3102 mp = mp->b_cont;
3103 }
3104 mp->b_rptr[sum_off] = 0;
3105 }
3106 }
3107 return (off + ((ip->ip_hl + tcp->th_off) << 2));
3108 }
3109
3110 static int
myri10ge_tx_tso_copy(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req_list,int hdr_size,int pkt_size,uint16_t mss,uint8_t cksum_offset)3111 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3112 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3113 uint16_t mss, uint8_t cksum_offset)
3114 {
3115 myri10ge_tx_ring_t *tx = &ss->tx;
3116 struct myri10ge_priv *mgp = ss->mgp;
3117 mblk_t *bp;
3118 mcp_kreq_ether_send_t *req;
3119 struct myri10ge_tx_copybuf *cp;
3120 caddr_t rptr, ptr;
3121 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3122 int resid, avail, idx, hdr_size_tmp, tx_boundary;
3123 int rdma_count;
3124 uint32_t seglen, len, boundary, low, high_swapped;
3125 uint16_t pseudo_hdr_offset = htons(mss);
3126 uint8_t flags;
3127
3128 tx_boundary = mgp->tx_boundary;
3129 hdr_size_tmp = hdr_size;
3130 resid = tx_boundary;
3131 count = 1;
3132 mutex_enter(&tx->lock);
3133
3134 /* check to see if the slots are really there */
3135 avail = tx->mask - (tx->req - tx->done);
3136 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) {
3137 atomic_inc_32(&tx->stall);
3138 mutex_exit(&tx->lock);
3139 return (EBUSY);
3140 }
3141
3142 /* copy */
3143 cum_len = -hdr_size;
3144 count = 0;
3145 req = req_list;
3146 idx = tx->mask & tx->req;
3147 cp = &tx->cp[idx];
3148 low = ntohl(cp->dma.low);
3149 ptr = cp->va;
3150 cp->len = 0;
3151 if (mss) {
3152 int payload = pkt_size - hdr_size;
3153 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3154 tx->info[idx].ostat.opackets = opackets;
3155 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3156 + pkt_size;
3157 }
3158 hdr_size_tmp = hdr_size;
3159 mss_resid = mss;
3160 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3161 tx_req = tx->req;
3162 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3163 mblen = MBLKL(bp);
3164 rptr = (caddr_t)bp->b_rptr;
3165 len = min(hdr_size_tmp, mblen);
3166 if (len) {
3167 bcopy(rptr, ptr, len);
3168 rptr += len;
3169 ptr += len;
3170 resid -= len;
3171 mblen -= len;
3172 hdr_size_tmp -= len;
3173 cp->len += len;
3174 if (hdr_size_tmp)
3175 continue;
3176 if (resid < mss) {
3177 tx_req++;
3178 idx = tx->mask & tx_req;
3179 cp = &tx->cp[idx];
3180 low = ntohl(cp->dma.low);
3181 ptr = cp->va;
3182 resid = tx_boundary;
3183 }
3184 }
3185 while (mblen) {
3186 len = min(mss_resid, mblen);
3187 bcopy(rptr, ptr, len);
3188 mss_resid -= len;
3189 resid -= len;
3190 mblen -= len;
3191 rptr += len;
3192 ptr += len;
3193 cp->len += len;
3194 if (mss_resid == 0) {
3195 mss_resid = mss;
3196 if (resid < mss) {
3197 tx_req++;
3198 idx = tx->mask & tx_req;
3199 cp = &tx->cp[idx];
3200 cp->len = 0;
3201 low = ntohl(cp->dma.low);
3202 ptr = cp->va;
3203 resid = tx_boundary;
3204 }
3205 }
3206 }
3207 }
3208
3209 req = req_list;
3210 pkt_size_tmp = pkt_size;
3211 count = 0;
3212 rdma_count = 0;
3213 tx_req = tx->req;
3214 while (pkt_size_tmp) {
3215 idx = tx->mask & tx_req;
3216 cp = &tx->cp[idx];
3217 high_swapped = cp->dma.high;
3218 low = ntohl(cp->dma.low);
3219 len = cp->len;
3220 if (len == 0) {
3221 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3222 pkt_size_tmp, pkt_size);
3223 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3224 mblen = MBLKL(bp);
3225 printf("mblen:%d\n", mblen);
3226 }
3227 pkt_size_tmp = pkt_size;
3228 tx_req = tx->req;
3229 while (pkt_size_tmp > 0) {
3230 idx = tx->mask & tx_req;
3231 cp = &tx->cp[idx];
3232 printf("cp->len = %d\n", cp->len);
3233 pkt_size_tmp -= cp->len;
3234 tx_req++;
3235 }
3236 printf("dropped\n");
3237 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3238 goto done;
3239 }
3240 pkt_size_tmp -= len;
3241 while (len) {
3242 while (len) {
3243 uint8_t flags_next;
3244 int cum_len_next;
3245
3246 boundary = (low + mgp->tx_boundary) &
3247 ~(mgp->tx_boundary - 1);
3248 seglen = boundary - low;
3249 if (seglen > len)
3250 seglen = len;
3251
3252 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3253 cum_len_next = cum_len + seglen;
3254 (req-rdma_count)->rdma_count = rdma_count + 1;
3255 if (likely(cum_len >= 0)) {
3256 /* payload */
3257 int next_is_first, chop;
3258
3259 chop = (cum_len_next > mss);
3260 cum_len_next = cum_len_next % mss;
3261 next_is_first = (cum_len_next == 0);
3262 flags |= chop *
3263 MXGEFW_FLAGS_TSO_CHOP;
3264 flags_next |= next_is_first *
3265 MXGEFW_FLAGS_FIRST;
3266 rdma_count |= -(chop | next_is_first);
3267 rdma_count += chop & !next_is_first;
3268 } else if (likely(cum_len_next >= 0)) {
3269 /* header ends */
3270 int small;
3271
3272 rdma_count = -1;
3273 cum_len_next = 0;
3274 seglen = -cum_len;
3275 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3276 flags_next = MXGEFW_FLAGS_TSO_PLD |
3277 MXGEFW_FLAGS_FIRST |
3278 (small * MXGEFW_FLAGS_SMALL);
3279 }
3280 req->addr_high = high_swapped;
3281 req->addr_low = htonl(low);
3282 req->pseudo_hdr_offset = pseudo_hdr_offset;
3283 req->pad = 0; /* complete solid 16-byte block */
3284 req->rdma_count = 1;
3285 req->cksum_offset = cksum_offset;
3286 req->length = htons(seglen);
3287 req->flags = flags | ((cum_len & 1) *
3288 MXGEFW_FLAGS_ALIGN_ODD);
3289 if (cksum_offset > seglen)
3290 cksum_offset -= seglen;
3291 else
3292 cksum_offset = 0;
3293 low += seglen;
3294 len -= seglen;
3295 cum_len = cum_len_next;
3296 req++;
3297 req->flags = 0;
3298 flags = flags_next;
3299 count++;
3300 rdma_count++;
3301 }
3302 }
3303 tx_req++;
3304 }
3305 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3306 do {
3307 req--;
3308 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3309 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3310 MXGEFW_FLAGS_FIRST)));
3311
3312 myri10ge_submit_req(tx, req_list, count);
3313 done:
3314 mutex_exit(&tx->lock);
3315 freemsg(mp);
3316 return (DDI_SUCCESS);
3317 }
3318
3319 /*
3320 * Try to send the chain of buffers described by the mp. We must not
3321 * encapsulate more than eth->tx.req - eth->tx.done, or
3322 * MXGEFW_MAX_SEND_DESC, whichever is more.
3323 */
3324
3325 static int
myri10ge_send(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req_list,struct myri10ge_tx_buffer_state * tx_info)3326 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3327 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3328 {
3329 struct myri10ge_priv *mgp = ss->mgp;
3330 myri10ge_tx_ring_t *tx = &ss->tx;
3331 mcp_kreq_ether_send_t *req;
3332 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3333 mblk_t *bp;
3334 ddi_dma_cookie_t cookie;
3335 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3336 rdma_count, cum_len, lso_hdr_size;
3337 uint32_t start, stuff, tx_offload_flags;
3338 uint32_t seglen, len, mss, boundary, low, high_swapped;
3339 uint_t ncookies;
3340 uint16_t pseudo_hdr_offset;
3341 uint8_t flags, cksum_offset, odd_flag;
3342 int pkt_size;
3343 int lso_copy = myri10ge_lso_copy;
3344 try_pullup = 1;
3345
3346 again:
3347 /* Setup checksum offloading, if needed */
3348 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3349 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3350 if (tx_offload_flags & HW_LSO) {
3351 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3352 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3353 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3354 freemsg(mp);
3355 return (DDI_SUCCESS);
3356 }
3357 } else {
3358 max_segs = MXGEFW_MAX_SEND_DESC;
3359 mss = 0;
3360 }
3361 req = req_list;
3362 cksum_offset = 0;
3363 pseudo_hdr_offset = 0;
3364
3365 /* leave an extra slot keep the ring from wrapping */
3366 avail = tx->mask - (tx->req - tx->done);
3367
3368 /*
3369 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3370 * message will need to be pulled up in order to fit.
3371 * Otherwise, we are low on transmit descriptors, it is
3372 * probably better to stall and try again rather than pullup a
3373 * message to fit.
3374 */
3375
3376 if (avail < max_segs) {
3377 err = EBUSY;
3378 atomic_inc_32(&tx->stall_early);
3379 goto stall;
3380 }
3381
3382 /* find out how long the frame is and how many segments it is */
3383 count = 0;
3384 odd_flag = 0;
3385 pkt_size = 0;
3386 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3387 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3388 dblk_t *dbp;
3389 mblen = MBLKL(bp);
3390 if (mblen == 0) {
3391 /*
3392 * we can't simply skip over 0-length mblks
3393 * because the hardware can't deal with them,
3394 * and we could leak them.
3395 */
3396 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3397 err = EIO;
3398 goto pullup;
3399 }
3400 /*
3401 * There's no advantage to copying most gesballoc
3402 * attached blocks, so disable lso copy in that case
3403 */
3404 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3405 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3406 lso_copy = 0;
3407 }
3408 }
3409 pkt_size += mblen;
3410 count++;
3411 }
3412
3413 /* Try to pull up excessivly long chains */
3414 if (count >= max_segs) {
3415 err = myri10ge_pullup(ss, mp);
3416 if (likely(err == DDI_SUCCESS)) {
3417 count = 1;
3418 } else {
3419 if (count < MYRI10GE_MAX_SEND_DESC_TSO) {
3420 /*
3421 * just let the h/w send it, it will be
3422 * inefficient, but us better than dropping
3423 */
3424 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3425 } else {
3426 /* drop it */
3427 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3428 freemsg(mp);
3429 return (0);
3430 }
3431 }
3432 }
3433
3434 cum_len = 0;
3435 maclen = myri10ge_ether_parse_header(mp);
3436
3437 if (tx_offload_flags & HCK_PARTIALCKSUM) {
3438
3439 cksum_offset = start + maclen;
3440 pseudo_hdr_offset = htons(stuff + maclen);
3441 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3442 flags |= MXGEFW_FLAGS_CKSUM;
3443 }
3444
3445 lso_hdr_size = 0; /* -Wunitinialized */
3446 if (mss) { /* LSO */
3447 /* this removes any CKSUM flag from before */
3448 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3449 /*
3450 * parse the headers and set cum_len to a negative
3451 * value to reflect the offset of the TCP payload
3452 */
3453 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen);
3454 cum_len = -lso_hdr_size;
3455 if ((mss < mgp->tx_boundary) && lso_copy) {
3456 err = myri10ge_tx_tso_copy(ss, mp, req_list,
3457 lso_hdr_size, pkt_size, mss, cksum_offset);
3458 return (err);
3459 }
3460
3461 /*
3462 * for TSO, pseudo_hdr_offset holds mss. The firmware
3463 * figures out where to put the checksum by parsing
3464 * the header.
3465 */
3466
3467 pseudo_hdr_offset = htons(mss);
3468 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3469 flags |= MXGEFW_FLAGS_SMALL;
3470 if (pkt_size < myri10ge_tx_copylen) {
3471 req->cksum_offset = cksum_offset;
3472 req->pseudo_hdr_offset = pseudo_hdr_offset;
3473 req->flags = flags;
3474 err = myri10ge_tx_copy(ss, mp, req);
3475 return (err);
3476 }
3477 cum_len = 0;
3478 }
3479
3480 /* pull one DMA handle for each bp from our freelist */
3481 handles = NULL;
3482 err = myri10ge_alloc_tx_handles(ss, count, &handles);
3483 if (err != DDI_SUCCESS) {
3484 err = DDI_FAILURE;
3485 goto stall;
3486 }
3487 count = 0;
3488 rdma_count = 0;
3489 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3490 mblen = MBLKL(bp);
3491 dma_handle = handles;
3492 handles = handles->next;
3493
3494 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3495 (caddr_t)bp->b_rptr, mblen,
3496 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3497 &cookie, &ncookies);
3498 if (unlikely(rv != DDI_DMA_MAPPED)) {
3499 err = EIO;
3500 try_pullup = 0;
3501 dma_handle->next = handles;
3502 handles = dma_handle;
3503 goto abort_with_handles;
3504 }
3505
3506 /* reserve the slot */
3507 tx_info[count].m = bp;
3508 tx_info[count].handle = dma_handle;
3509
3510 for (; ; ) {
3511 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3512 high_swapped =
3513 htonl(MYRI10GE_HIGHPART_TO_U32(
3514 cookie.dmac_laddress));
3515 len = (uint32_t)cookie.dmac_size;
3516 while (len) {
3517 uint8_t flags_next;
3518 int cum_len_next;
3519
3520 boundary = (low + mgp->tx_boundary) &
3521 ~(mgp->tx_boundary - 1);
3522 seglen = boundary - low;
3523 if (seglen > len)
3524 seglen = len;
3525
3526 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3527 cum_len_next = cum_len + seglen;
3528 if (mss) {
3529 (req-rdma_count)->rdma_count =
3530 rdma_count + 1;
3531 if (likely(cum_len >= 0)) {
3532 /* payload */
3533 int next_is_first, chop;
3534
3535 chop = (cum_len_next > mss);
3536 cum_len_next =
3537 cum_len_next % mss;
3538 next_is_first =
3539 (cum_len_next == 0);
3540 flags |= chop *
3541 MXGEFW_FLAGS_TSO_CHOP;
3542 flags_next |= next_is_first *
3543 MXGEFW_FLAGS_FIRST;
3544 rdma_count |=
3545 -(chop | next_is_first);
3546 rdma_count +=
3547 chop & !next_is_first;
3548 } else if (likely(cum_len_next >= 0)) {
3549 /* header ends */
3550 int small;
3551
3552 rdma_count = -1;
3553 cum_len_next = 0;
3554 seglen = -cum_len;
3555 small = (mss <=
3556 MXGEFW_SEND_SMALL_SIZE);
3557 flags_next =
3558 MXGEFW_FLAGS_TSO_PLD
3559 | MXGEFW_FLAGS_FIRST
3560 | (small *
3561 MXGEFW_FLAGS_SMALL);
3562 }
3563 }
3564 req->addr_high = high_swapped;
3565 req->addr_low = htonl(low);
3566 req->pseudo_hdr_offset = pseudo_hdr_offset;
3567 req->pad = 0; /* complete solid 16-byte block */
3568 req->rdma_count = 1;
3569 req->cksum_offset = cksum_offset;
3570 req->length = htons(seglen);
3571 req->flags = flags | ((cum_len & 1) * odd_flag);
3572 if (cksum_offset > seglen)
3573 cksum_offset -= seglen;
3574 else
3575 cksum_offset = 0;
3576 low += seglen;
3577 len -= seglen;
3578 cum_len = cum_len_next;
3579 count++;
3580 rdma_count++;
3581 /* make sure all the segments will fit */
3582 if (unlikely(count >= max_segs)) {
3583 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3584 xmit_lowbuf);
3585 /* may try a pullup */
3586 err = EBUSY;
3587 if (try_pullup)
3588 try_pullup = 2;
3589 goto abort_with_handles;
3590 }
3591 req++;
3592 req->flags = 0;
3593 flags = flags_next;
3594 tx_info[count].m = 0;
3595 }
3596 ncookies--;
3597 if (ncookies == 0)
3598 break;
3599 ddi_dma_nextcookie(dma_handle->h, &cookie);
3600 }
3601 }
3602 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3603
3604 if (mss) {
3605 do {
3606 req--;
3607 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3608 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3609 MXGEFW_FLAGS_FIRST)));
3610 }
3611
3612 /* calculate tx stats */
3613 if (mss) {
3614 uint16_t opackets;
3615 int payload;
3616
3617 payload = pkt_size - lso_hdr_size;
3618 opackets = (payload / mss) + ((payload % mss) != 0);
3619 tx_info[0].stat.un.all = 0;
3620 tx_info[0].ostat.opackets = opackets;
3621 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3622 + pkt_size;
3623 } else {
3624 myri10ge_tx_stat(&tx_info[0].stat,
3625 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3626 }
3627 mutex_enter(&tx->lock);
3628
3629 /* check to see if the slots are really there */
3630 avail = tx->mask - (tx->req - tx->done);
3631 if (unlikely(avail <= count)) {
3632 mutex_exit(&tx->lock);
3633 err = 0;
3634 goto late_stall;
3635 }
3636
3637 myri10ge_send_locked(tx, req_list, tx_info, count);
3638 mutex_exit(&tx->lock);
3639 return (DDI_SUCCESS);
3640
3641 late_stall:
3642 try_pullup = 0;
3643 atomic_inc_32(&tx->stall_late);
3644
3645 abort_with_handles:
3646 /* unbind and free handles from previous mblks */
3647 for (i = 0; i < count; i++) {
3648 bp = tx_info[i].m;
3649 tx_info[i].m = 0;
3650 if (bp) {
3651 dma_handle = tx_info[i].handle;
3652 (void) ddi_dma_unbind_handle(dma_handle->h);
3653 dma_handle->next = handles;
3654 handles = dma_handle;
3655 tx_info[i].handle = NULL;
3656 tx_info[i].m = NULL;
3657 }
3658 }
3659 myri10ge_free_tx_handle_slist(tx, handles);
3660 pullup:
3661 if (try_pullup) {
3662 err = myri10ge_pullup(ss, mp);
3663 if (err != DDI_SUCCESS && try_pullup == 2) {
3664 /* drop */
3665 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3666 freemsg(mp);
3667 return (0);
3668 }
3669 try_pullup = 0;
3670 goto again;
3671 }
3672
3673 stall:
3674 if (err != 0) {
3675 if (err == EBUSY) {
3676 atomic_inc_32(&tx->stall);
3677 } else {
3678 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3679 }
3680 }
3681 return (err);
3682 }
3683
3684 static mblk_t *
myri10ge_send_wrapper(void * arg,mblk_t * mp)3685 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3686 {
3687 struct myri10ge_slice_state *ss = arg;
3688 int err = 0;
3689 mcp_kreq_ether_send_t *req_list;
3690 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3691 + 8];
3692 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3693
3694 /* ensure req_list entries are aligned to 8 bytes */
3695 req_list = (struct mcp_kreq_ether_send *)
3696 (((unsigned long)req_bytes + 7UL) & ~7UL);
3697
3698 err = myri10ge_send(ss, mp, req_list, tx_info);
3699
3700 if (err)
3701 return (mp);
3702 else
3703 return (NULL);
3704 }
3705
3706 static int
myri10ge_addmac(void * arg,const uint8_t * mac_addr)3707 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3708 {
3709 struct myri10ge_priv *mgp = arg;
3710 int err;
3711
3712 if (mac_addr == NULL)
3713 return (EINVAL);
3714
3715 mutex_enter(&mgp->intrlock);
3716 if (mgp->macaddr_cnt) {
3717 mutex_exit(&mgp->intrlock);
3718 return (ENOSPC);
3719 }
3720 err = myri10ge_m_unicst(mgp, mac_addr);
3721 if (!err)
3722 mgp->macaddr_cnt++;
3723
3724 mutex_exit(&mgp->intrlock);
3725 if (err)
3726 return (err);
3727
3728 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3729 return (0);
3730 }
3731
3732 /*ARGSUSED*/
3733 static int
myri10ge_remmac(void * arg,const uint8_t * mac_addr)3734 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3735 {
3736 struct myri10ge_priv *mgp = arg;
3737
3738 mutex_enter(&mgp->intrlock);
3739 mgp->macaddr_cnt--;
3740 mutex_exit(&mgp->intrlock);
3741
3742 return (0);
3743 }
3744
3745 /*ARGSUSED*/
3746 static void
myri10ge_fill_group(void * arg,mac_ring_type_t rtype,const int index,mac_group_info_t * infop,mac_group_handle_t gh)3747 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3748 mac_group_info_t *infop, mac_group_handle_t gh)
3749 {
3750 struct myri10ge_priv *mgp = arg;
3751
3752 if (rtype != MAC_RING_TYPE_RX)
3753 return;
3754
3755 infop->mgi_driver = (mac_group_driver_t)mgp;
3756 infop->mgi_start = NULL;
3757 infop->mgi_stop = NULL;
3758 infop->mgi_addmac = myri10ge_addmac;
3759 infop->mgi_remmac = myri10ge_remmac;
3760 infop->mgi_count = mgp->num_slices;
3761 }
3762
3763 static int
myri10ge_ring_start(mac_ring_driver_t rh,uint64_t mr_gen_num)3764 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3765 {
3766 struct myri10ge_slice_state *ss;
3767
3768 ss = (struct myri10ge_slice_state *)rh;
3769 mutex_enter(&ss->rx_lock);
3770 ss->rx_gen_num = mr_gen_num;
3771 mutex_exit(&ss->rx_lock);
3772 return (0);
3773 }
3774
3775 /*
3776 * Retrieve a value for one of the statistics for a particular rx ring
3777 */
3778 int
myri10ge_rx_ring_stat(mac_ring_driver_t rh,uint_t stat,uint64_t * val)3779 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3780 {
3781 struct myri10ge_slice_state *ss;
3782
3783 ss = (struct myri10ge_slice_state *)rh;
3784 switch (stat) {
3785 case MAC_STAT_RBYTES:
3786 *val = ss->rx_stats.ibytes;
3787 break;
3788
3789 case MAC_STAT_IPACKETS:
3790 *val = ss->rx_stats.ipackets;
3791 break;
3792
3793 default:
3794 *val = 0;
3795 return (ENOTSUP);
3796 }
3797
3798 return (0);
3799 }
3800
3801 /*
3802 * Retrieve a value for one of the statistics for a particular tx ring
3803 */
3804 int
myri10ge_tx_ring_stat(mac_ring_driver_t rh,uint_t stat,uint64_t * val)3805 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3806 {
3807 struct myri10ge_slice_state *ss;
3808
3809 ss = (struct myri10ge_slice_state *)rh;
3810 switch (stat) {
3811 case MAC_STAT_OBYTES:
3812 *val = ss->tx.stats.obytes;
3813 break;
3814
3815 case MAC_STAT_OPACKETS:
3816 *val = ss->tx.stats.opackets;
3817 break;
3818
3819 default:
3820 *val = 0;
3821 return (ENOTSUP);
3822 }
3823
3824 return (0);
3825 }
3826
3827 static int
myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)3828 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3829 {
3830 struct myri10ge_slice_state *ss;
3831
3832 ss = (struct myri10ge_slice_state *)intrh;
3833 mutex_enter(&ss->poll_lock);
3834 ss->rx_polling = B_TRUE;
3835 mutex_exit(&ss->poll_lock);
3836 return (0);
3837 }
3838
3839 static int
myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)3840 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3841 {
3842 struct myri10ge_slice_state *ss;
3843
3844 ss = (struct myri10ge_slice_state *)intrh;
3845 mutex_enter(&ss->poll_lock);
3846 ss->rx_polling = B_FALSE;
3847 if (ss->rx_token) {
3848 *ss->irq_claim = BE_32(3);
3849 ss->rx_token = 0;
3850 }
3851 mutex_exit(&ss->poll_lock);
3852 return (0);
3853 }
3854
3855 /*ARGSUSED*/
3856 static void
myri10ge_fill_ring(void * arg,mac_ring_type_t rtype,const int rg_index,const int ring_index,mac_ring_info_t * infop,mac_ring_handle_t rh)3857 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3858 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3859 {
3860 struct myri10ge_priv *mgp = arg;
3861 struct myri10ge_slice_state *ss;
3862 mac_intr_t *mintr = &infop->mri_intr;
3863
3864 ASSERT((unsigned int)ring_index < mgp->num_slices);
3865
3866 ss = &mgp->ss[ring_index];
3867 switch (rtype) {
3868 case MAC_RING_TYPE_RX:
3869 ss->rx_rh = rh;
3870 infop->mri_driver = (mac_ring_driver_t)ss;
3871 infop->mri_start = myri10ge_ring_start;
3872 infop->mri_stop = NULL;
3873 infop->mri_poll = myri10ge_poll_rx;
3874 infop->mri_stat = myri10ge_rx_ring_stat;
3875 mintr->mi_handle = (mac_intr_handle_t)ss;
3876 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3877 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3878 break;
3879 case MAC_RING_TYPE_TX:
3880 ss->tx.rh = rh;
3881 infop->mri_driver = (mac_ring_driver_t)ss;
3882 infop->mri_start = NULL;
3883 infop->mri_stop = NULL;
3884 infop->mri_tx = myri10ge_send_wrapper;
3885 infop->mri_stat = myri10ge_tx_ring_stat;
3886 break;
3887 default:
3888 break;
3889 }
3890 }
3891
3892 static void
myri10ge_nic_stat_destroy(struct myri10ge_priv * mgp)3893 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3894 {
3895 if (mgp->ksp_stat == NULL)
3896 return;
3897
3898 kstat_delete(mgp->ksp_stat);
3899 mgp->ksp_stat = NULL;
3900 }
3901
3902 static void
myri10ge_slice_stat_destroy(struct myri10ge_slice_state * ss)3903 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3904 {
3905 if (ss->ksp_stat == NULL)
3906 return;
3907
3908 kstat_delete(ss->ksp_stat);
3909 ss->ksp_stat = NULL;
3910 }
3911
3912 static void
myri10ge_info_destroy(struct myri10ge_priv * mgp)3913 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3914 {
3915 if (mgp->ksp_info == NULL)
3916 return;
3917
3918 kstat_delete(mgp->ksp_info);
3919 mgp->ksp_info = NULL;
3920 }
3921
3922 static int
myri10ge_nic_stat_kstat_update(kstat_t * ksp,int rw)3923 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3924 {
3925 struct myri10ge_nic_stat *ethstat;
3926 struct myri10ge_priv *mgp;
3927 mcp_irq_data_t *fw_stats;
3928
3929
3930 if (rw == KSTAT_WRITE)
3931 return (EACCES);
3932
3933 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3934 mgp = (struct myri10ge_priv *)ksp->ks_private;
3935 fw_stats = mgp->ss[0].fw_stats;
3936
3937 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3938 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3939 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3940 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3941 ethstat->dma_force_physical.value.ul = 1;
3942 else
3943 ethstat->dma_force_physical.value.ul = 0;
3944 ethstat->lanes.value.ul = mgp->pcie_link_width;
3945 ethstat->dropped_bad_crc32.value.ul =
3946 ntohl(fw_stats->dropped_bad_crc32);
3947 ethstat->dropped_bad_phy.value.ul =
3948 ntohl(fw_stats->dropped_bad_phy);
3949 ethstat->dropped_link_error_or_filtered.value.ul =
3950 ntohl(fw_stats->dropped_link_error_or_filtered);
3951 ethstat->dropped_link_overflow.value.ul =
3952 ntohl(fw_stats->dropped_link_overflow);
3953 ethstat->dropped_multicast_filtered.value.ul =
3954 ntohl(fw_stats->dropped_multicast_filtered);
3955 ethstat->dropped_no_big_buffer.value.ul =
3956 ntohl(fw_stats->dropped_no_big_buffer);
3957 ethstat->dropped_no_small_buffer.value.ul =
3958 ntohl(fw_stats->dropped_no_small_buffer);
3959 ethstat->dropped_overrun.value.ul =
3960 ntohl(fw_stats->dropped_overrun);
3961 ethstat->dropped_pause.value.ul =
3962 ntohl(fw_stats->dropped_pause);
3963 ethstat->dropped_runt.value.ul =
3964 ntohl(fw_stats->dropped_runt);
3965 ethstat->link_up.value.ul =
3966 ntohl(fw_stats->link_up);
3967 ethstat->dropped_unicast_filtered.value.ul =
3968 ntohl(fw_stats->dropped_unicast_filtered);
3969 return (0);
3970 }
3971
3972 static int
myri10ge_slice_stat_kstat_update(kstat_t * ksp,int rw)3973 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
3974 {
3975 struct myri10ge_slice_stat *ethstat;
3976 struct myri10ge_slice_state *ss;
3977
3978 if (rw == KSTAT_WRITE)
3979 return (EACCES);
3980
3981 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
3982 ss = (struct myri10ge_slice_state *)ksp->ks_private;
3983
3984 ethstat->rx_big.value.ul = ss->j_rx_cnt;
3985 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
3986 ethstat->rx_bigbuf_pool.value.ul =
3987 ss->jpool.num_alloc - ss->jbufs_for_smalls;
3988 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
3989 ethstat->rx_small.value.ul = ss->rx_small.cnt -
3990 (ss->rx_small.mask + 1);
3991 ethstat->tx_done.value.ul = ss->tx.done;
3992 ethstat->tx_req.value.ul = ss->tx.req;
3993 ethstat->tx_activate.value.ul = ss->tx.activate;
3994 ethstat->xmit_sched.value.ul = ss->tx.sched;
3995 ethstat->xmit_stall.value.ul = ss->tx.stall;
3996 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
3997 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
3998 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err);
3999 return (0);
4000 }
4001
4002 static int
myri10ge_info_kstat_update(kstat_t * ksp,int rw)4003 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4004 {
4005 struct myri10ge_info *info;
4006 struct myri10ge_priv *mgp;
4007
4008
4009 if (rw == KSTAT_WRITE)
4010 return (EACCES);
4011
4012 info = (struct myri10ge_info *)ksp->ks_data;
4013 mgp = (struct myri10ge_priv *)ksp->ks_private;
4014 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4015 kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4016 kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4017 kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4018 kstat_named_setstr(&info->product_code, mgp->pc_str);
4019 kstat_named_setstr(&info->serial_number, mgp->sn_str);
4020 return (0);
4021 }
4022
4023 static struct myri10ge_info myri10ge_info_template = {
4024 { "driver_version", KSTAT_DATA_STRING },
4025 { "firmware_version", KSTAT_DATA_STRING },
4026 { "firmware_name", KSTAT_DATA_STRING },
4027 { "interrupt_type", KSTAT_DATA_STRING },
4028 { "product_code", KSTAT_DATA_STRING },
4029 { "serial_number", KSTAT_DATA_STRING },
4030 };
4031 static kmutex_t myri10ge_info_template_lock;
4032
4033
4034 static int
myri10ge_info_init(struct myri10ge_priv * mgp)4035 myri10ge_info_init(struct myri10ge_priv *mgp)
4036 {
4037 struct kstat *ksp;
4038
4039 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4040 "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4041 sizeof (myri10ge_info_template) /
4042 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4043 if (ksp == NULL) {
4044 cmn_err(CE_WARN,
4045 "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4046 return (DDI_FAILURE);
4047 }
4048 mgp->ksp_info = ksp;
4049 ksp->ks_update = myri10ge_info_kstat_update;
4050 ksp->ks_private = (void *) mgp;
4051 ksp->ks_data = &myri10ge_info_template;
4052 ksp->ks_lock = &myri10ge_info_template_lock;
4053 if (MYRI10GE_VERSION_STR != NULL)
4054 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4055 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4056 ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4057 ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4058 if (mgp->pc_str != NULL)
4059 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4060 if (mgp->sn_str != NULL)
4061 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4062
4063 kstat_install(ksp);
4064 return (DDI_SUCCESS);
4065 }
4066
4067
4068 static int
myri10ge_nic_stat_init(struct myri10ge_priv * mgp)4069 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4070 {
4071 struct kstat *ksp;
4072 struct myri10ge_nic_stat *ethstat;
4073
4074 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4075 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4076 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4077 if (ksp == NULL) {
4078 cmn_err(CE_WARN,
4079 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4080 return (DDI_FAILURE);
4081 }
4082 mgp->ksp_stat = ksp;
4083 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4084
4085 kstat_named_init(ðstat->dma_read_bw_MBs,
4086 "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4087 kstat_named_init(ðstat->dma_write_bw_MBs,
4088 "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4089 kstat_named_init(ðstat->dma_read_write_bw_MBs,
4090 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4091 kstat_named_init(ðstat->dma_force_physical,
4092 "dma_force_physical", KSTAT_DATA_ULONG);
4093 kstat_named_init(ðstat->lanes,
4094 "lanes", KSTAT_DATA_ULONG);
4095 kstat_named_init(ðstat->dropped_bad_crc32,
4096 "dropped_bad_crc32", KSTAT_DATA_ULONG);
4097 kstat_named_init(ðstat->dropped_bad_phy,
4098 "dropped_bad_phy", KSTAT_DATA_ULONG);
4099 kstat_named_init(ðstat->dropped_link_error_or_filtered,
4100 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4101 kstat_named_init(ðstat->dropped_link_overflow,
4102 "dropped_link_overflow", KSTAT_DATA_ULONG);
4103 kstat_named_init(ðstat->dropped_multicast_filtered,
4104 "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4105 kstat_named_init(ðstat->dropped_no_big_buffer,
4106 "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4107 kstat_named_init(ðstat->dropped_no_small_buffer,
4108 "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4109 kstat_named_init(ðstat->dropped_overrun,
4110 "dropped_overrun", KSTAT_DATA_ULONG);
4111 kstat_named_init(ðstat->dropped_pause,
4112 "dropped_pause", KSTAT_DATA_ULONG);
4113 kstat_named_init(ðstat->dropped_runt,
4114 "dropped_runt", KSTAT_DATA_ULONG);
4115 kstat_named_init(ðstat->dropped_unicast_filtered,
4116 "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4117 kstat_named_init(ðstat->dropped_runt, "dropped_runt",
4118 KSTAT_DATA_ULONG);
4119 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG);
4120 kstat_named_init(ðstat->link_changes, "link_changes",
4121 KSTAT_DATA_ULONG);
4122 ksp->ks_update = myri10ge_nic_stat_kstat_update;
4123 ksp->ks_private = (void *) mgp;
4124 kstat_install(ksp);
4125 return (DDI_SUCCESS);
4126 }
4127
4128 static int
myri10ge_slice_stat_init(struct myri10ge_slice_state * ss)4129 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4130 {
4131 struct myri10ge_priv *mgp = ss->mgp;
4132 struct kstat *ksp;
4133 struct myri10ge_slice_stat *ethstat;
4134 int instance;
4135
4136 /*
4137 * fake an instance so that the same slice numbers from
4138 * different instances do not collide
4139 */
4140 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss);
4141 ksp = kstat_create("myri10ge", instance,
4142 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4143 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4144 if (ksp == NULL) {
4145 cmn_err(CE_WARN,
4146 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4147 return (DDI_FAILURE);
4148 }
4149 ss->ksp_stat = ksp;
4150 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4151 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum",
4152 KSTAT_DATA_ULONG);
4153 kstat_named_init(ðstat->lro_flushed, "lro_flushed",
4154 KSTAT_DATA_ULONG);
4155 kstat_named_init(ðstat->lro_queued, "lro_queued",
4156 KSTAT_DATA_ULONG);
4157 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4158 KSTAT_DATA_ULONG);
4159 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4160 KSTAT_DATA_ULONG);
4161 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4162 KSTAT_DATA_ULONG);
4163 kstat_named_init(ðstat->rx_copy, "rx_copy",
4164 KSTAT_DATA_ULONG);
4165 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf",
4166 KSTAT_DATA_ULONG);
4167 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf",
4168 KSTAT_DATA_ULONG);
4169 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len",
4170 KSTAT_DATA_ULONG);
4171 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup",
4172 KSTAT_DATA_ULONG);
4173 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first",
4174 KSTAT_DATA_ULONG);
4175 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf",
4176 KSTAT_DATA_ULONG);
4177 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags",
4178 KSTAT_DATA_ULONG);
4179 kstat_named_init(ðstat->xmit_sched, "xmit_sched",
4180 KSTAT_DATA_ULONG);
4181 kstat_named_init(ðstat->xmit_stall, "xmit_stall",
4182 KSTAT_DATA_ULONG);
4183 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early",
4184 KSTAT_DATA_ULONG);
4185 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late",
4186 KSTAT_DATA_ULONG);
4187 kstat_named_init(ðstat->xmit_err, "xmit_err",
4188 KSTAT_DATA_ULONG);
4189 kstat_named_init(ðstat->tx_req, "tx_req",
4190 KSTAT_DATA_ULONG);
4191 kstat_named_init(ðstat->tx_activate, "tx_activate",
4192 KSTAT_DATA_ULONG);
4193 kstat_named_init(ðstat->tx_done, "tx_done",
4194 KSTAT_DATA_ULONG);
4195 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced",
4196 KSTAT_DATA_ULONG);
4197 kstat_named_init(ðstat->rx_big, "rx_big",
4198 KSTAT_DATA_ULONG);
4199 kstat_named_init(ðstat->rx_small, "rx_small",
4200 KSTAT_DATA_ULONG);
4201 ksp->ks_update = myri10ge_slice_stat_kstat_update;
4202 ksp->ks_private = (void *) ss;
4203 kstat_install(ksp);
4204 return (DDI_SUCCESS);
4205 }
4206
4207
4208
4209 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4210
4211 #include <vm/hat.h>
4212 #include <sys/ddi_isa.h>
4213 void *device_arena_alloc(size_t size, int vm_flag);
4214 void device_arena_free(void *vaddr, size_t size);
4215
4216 static void
myri10ge_enable_nvidia_ecrc(struct myri10ge_priv * mgp)4217 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4218 {
4219 dev_info_t *parent_dip;
4220 ddi_acc_handle_t handle;
4221 unsigned long bus_number, dev_number, func_number;
4222 unsigned long cfg_pa, paddr, base, pgoffset;
4223 char *cvaddr, *ptr;
4224 uint32_t *ptr32;
4225 int retval = DDI_FAILURE;
4226 int dontcare;
4227 uint16_t read_vid, read_did, vendor_id, device_id;
4228
4229 if (!myri10ge_nvidia_ecrc_enable)
4230 return;
4231
4232 parent_dip = ddi_get_parent(mgp->dip);
4233 if (parent_dip == NULL) {
4234 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4235 return;
4236 }
4237
4238 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4239 cmn_err(CE_WARN,
4240 "%s: Could not access my parent's registers", mgp->name);
4241 return;
4242 }
4243
4244 vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4245 device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4246 pci_config_teardown(&handle);
4247
4248 if (myri10ge_verbose) {
4249 unsigned long bus_number, dev_number, func_number;
4250 int reg_set, span;
4251 (void) myri10ge_reg_set(parent_dip, ®_set, &span,
4252 &bus_number, &dev_number, &func_number);
4253 if (myri10ge_verbose)
4254 printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4255 bus_number, dev_number, func_number);
4256 }
4257
4258 if (vendor_id != 0x10de)
4259 return;
4260
4261 if (device_id != 0x005d /* CK804 */ &&
4262 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4263 return;
4264 }
4265 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4266 &bus_number, &dev_number, &func_number);
4267
4268 for (cfg_pa = 0xf0000000UL;
4269 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4270 cfg_pa -= 0x10000000UL) {
4271 /* find the config space address for the nvidia bridge */
4272 paddr = (cfg_pa + bus_number * 0x00100000UL +
4273 (dev_number * 8 + func_number) * 0x00001000UL);
4274
4275 base = paddr & (~MMU_PAGEOFFSET);
4276 pgoffset = paddr & MMU_PAGEOFFSET;
4277
4278 /* map it into the kernel */
4279 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP);
4280 if (cvaddr == NULL)
4281 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4282 mgp->name);
4283
4284 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4285 i_ddi_paddr_to_pfn(base),
4286 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4287
4288 ptr = cvaddr + pgoffset;
4289 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4290 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4291 if (vendor_id == read_did || device_id == read_did) {
4292 ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4293 if (myri10ge_verbose)
4294 printf("%s: Enabling ECRC on upstream "
4295 "Nvidia bridge (0x%x:0x%x) "
4296 "at %ld:%ld:%ld\n", mgp->name,
4297 read_vid, read_did, bus_number,
4298 dev_number, func_number);
4299 *ptr32 |= 0x40;
4300 retval = DDI_SUCCESS;
4301 }
4302 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4303 device_arena_free(cvaddr, ptob(1));
4304 }
4305 }
4306
4307 #else
4308 /*ARGSUSED*/
4309 static void
myri10ge_enable_nvidia_ecrc(struct myri10ge_priv * mgp)4310 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4311 {
4312 }
4313 #endif /* i386 */
4314
4315
4316 /*
4317 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4318 * when the PCI-E Completion packets are aligned on an 8-byte
4319 * boundary. Some PCI-E chip sets always align Completion packets; on
4320 * the ones that do not, the alignment can be enforced by enabling
4321 * ECRC generation (if supported).
4322 *
4323 * When PCI-E Completion packets are not aligned, it is actually more
4324 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4325 *
4326 * If the driver can neither enable ECRC nor verify that it has
4327 * already been enabled, then it must use a firmware image which works
4328 * around unaligned completion packets (ethp_z8e.dat), and it should
4329 * also ensure that it never gives the device a Read-DMA which is
4330 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4331 * enabled, then the driver should use the aligned (eth_z8e.dat)
4332 * firmware image, and set tx.boundary to 4KB.
4333 */
4334
4335
4336 static int
myri10ge_firmware_probe(struct myri10ge_priv * mgp)4337 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4338 {
4339 int status;
4340
4341 mgp->tx_boundary = 4096;
4342 /*
4343 * Verify the max read request size was set to 4KB
4344 * before trying the test with 4KB.
4345 */
4346 if (mgp->max_read_request_4k == 0)
4347 mgp->tx_boundary = 2048;
4348 /*
4349 * load the optimized firmware which assumes aligned PCIe
4350 * completions in order to see if it works on this host.
4351 */
4352
4353 mgp->fw_name = "rss_eth_z8e";
4354 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4355 mgp->eth_z8e_length = rss_eth_z8e_length;
4356
4357 status = myri10ge_load_firmware(mgp);
4358 if (status != 0) {
4359 return (status);
4360 }
4361 /*
4362 * Enable ECRC if possible
4363 */
4364 myri10ge_enable_nvidia_ecrc(mgp);
4365
4366 /*
4367 * Run a DMA test which watches for unaligned completions and
4368 * aborts on the first one seen.
4369 */
4370 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4371 if (status == 0)
4372 return (0); /* keep the aligned firmware */
4373
4374 if (status != E2BIG)
4375 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4376 mgp->name, status);
4377 if (status == ENOSYS)
4378 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4379 "Please install up to date fw\n", mgp->name);
4380 return (status);
4381 }
4382
4383 static int
myri10ge_select_firmware(struct myri10ge_priv * mgp)4384 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4385 {
4386 int aligned;
4387
4388 aligned = 0;
4389
4390 if (myri10ge_force_firmware == 1) {
4391 if (myri10ge_verbose)
4392 printf("%s: Assuming aligned completions (forced)\n",
4393 mgp->name);
4394 aligned = 1;
4395 goto done;
4396 }
4397
4398 if (myri10ge_force_firmware == 2) {
4399 if (myri10ge_verbose)
4400 printf("%s: Assuming unaligned completions (forced)\n",
4401 mgp->name);
4402 aligned = 0;
4403 goto done;
4404 }
4405
4406 /* If the width is less than 8, we may used the aligned firmware */
4407 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4408 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4409 mgp->name, mgp->pcie_link_width);
4410 aligned = 1;
4411 goto done;
4412 }
4413
4414 if (0 == myri10ge_firmware_probe(mgp))
4415 return (0); /* keep optimized firmware */
4416
4417 done:
4418 if (aligned) {
4419 mgp->fw_name = "rss_eth_z8e";
4420 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4421 mgp->eth_z8e_length = rss_eth_z8e_length;
4422 mgp->tx_boundary = 4096;
4423 } else {
4424 mgp->fw_name = "rss_ethp_z8e";
4425 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4426 mgp->eth_z8e_length = rss_ethp_z8e_length;
4427 mgp->tx_boundary = 2048;
4428 }
4429
4430 return (myri10ge_load_firmware(mgp));
4431 }
4432
4433 static int
myri10ge_add_intrs(struct myri10ge_priv * mgp,int add_handler)4434 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4435 {
4436 dev_info_t *devinfo = mgp->dip;
4437 int count, avail, actual, intr_types;
4438 int x, y, rc, inum = 0;
4439
4440
4441 rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4442 if (rc != DDI_SUCCESS) {
4443 cmn_err(CE_WARN,
4444 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4445 rc);
4446 return (DDI_FAILURE);
4447 }
4448
4449 if (!myri10ge_use_msi)
4450 intr_types &= ~DDI_INTR_TYPE_MSI;
4451 if (!myri10ge_use_msix)
4452 intr_types &= ~DDI_INTR_TYPE_MSIX;
4453
4454 if (intr_types & DDI_INTR_TYPE_MSIX) {
4455 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4456 mgp->intr_type = "MSI-X";
4457 } else if (intr_types & DDI_INTR_TYPE_MSI) {
4458 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4459 mgp->intr_type = "MSI";
4460 } else {
4461 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4462 mgp->intr_type = "Legacy";
4463 }
4464 /* Get number of interrupts */
4465 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4466 if ((rc != DDI_SUCCESS) || (count == 0)) {
4467 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4468 "count: %d", mgp->name, rc, count);
4469
4470 return (DDI_FAILURE);
4471 }
4472
4473 /* Get number of available interrupts */
4474 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4475 if ((rc != DDI_SUCCESS) || (avail == 0)) {
4476 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4477 "rc: %d, avail: %d\n", mgp->name, rc, avail);
4478 return (DDI_FAILURE);
4479 }
4480 if (avail < count) {
4481 cmn_err(CE_NOTE,
4482 "!%s: nintrs() returned %d, navail returned %d",
4483 mgp->name, count, avail);
4484 count = avail;
4485 }
4486
4487 if (count < mgp->num_slices)
4488 return (DDI_FAILURE);
4489
4490 if (count > mgp->num_slices)
4491 count = mgp->num_slices;
4492
4493 /* Allocate memory for MSI interrupts */
4494 mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4495 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4496
4497 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4498 count, &actual, DDI_INTR_ALLOC_NORMAL);
4499
4500 if ((rc != DDI_SUCCESS) || (actual == 0)) {
4501 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4502 mgp->name, rc);
4503
4504 kmem_free(mgp->htable, mgp->intr_size);
4505 mgp->htable = NULL;
4506 return (DDI_FAILURE);
4507 }
4508
4509 if ((actual < count) && myri10ge_verbose) {
4510 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4511 mgp->name, actual, count);
4512 }
4513
4514 mgp->intr_cnt = actual;
4515
4516 /*
4517 * Get priority for first irq, assume remaining are all the same
4518 */
4519 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4520 != DDI_SUCCESS) {
4521 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4522
4523 /* Free already allocated intr */
4524 for (y = 0; y < actual; y++) {
4525 (void) ddi_intr_free(mgp->htable[y]);
4526 }
4527
4528 kmem_free(mgp->htable, mgp->intr_size);
4529 mgp->htable = NULL;
4530 return (DDI_FAILURE);
4531 }
4532
4533 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4534
4535 if (!add_handler)
4536 return (DDI_SUCCESS);
4537
4538 /* Call ddi_intr_add_handler() */
4539 for (x = 0; x < actual; x++) {
4540 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4541 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4542 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4543 mgp->name);
4544
4545 /* Free already allocated intr */
4546 for (y = 0; y < actual; y++) {
4547 (void) ddi_intr_free(mgp->htable[y]);
4548 }
4549
4550 kmem_free(mgp->htable, mgp->intr_size);
4551 mgp->htable = NULL;
4552 return (DDI_FAILURE);
4553 }
4554 }
4555
4556 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4557 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4558 /* Call ddi_intr_block_enable() for MSI */
4559 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4560 } else {
4561 /* Call ddi_intr_enable() for MSI non block enable */
4562 for (x = 0; x < mgp->intr_cnt; x++) {
4563 (void) ddi_intr_enable(mgp->htable[x]);
4564 }
4565 }
4566
4567 return (DDI_SUCCESS);
4568 }
4569
4570 static void
myri10ge_rem_intrs(struct myri10ge_priv * mgp,int handler_installed)4571 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4572 {
4573 int x, err;
4574
4575 /* Disable all interrupts */
4576 if (handler_installed) {
4577 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4578 /* Call ddi_intr_block_disable() */
4579 (void) ddi_intr_block_disable(mgp->htable,
4580 mgp->intr_cnt);
4581 } else {
4582 for (x = 0; x < mgp->intr_cnt; x++) {
4583 (void) ddi_intr_disable(mgp->htable[x]);
4584 }
4585 }
4586 }
4587
4588 for (x = 0; x < mgp->intr_cnt; x++) {
4589 if (handler_installed) {
4590 /* Call ddi_intr_remove_handler() */
4591 err = ddi_intr_remove_handler(mgp->htable[x]);
4592 if (err != DDI_SUCCESS) {
4593 cmn_err(CE_WARN,
4594 "%s: ddi_intr_remove_handler for"
4595 "vec %d returned %d\n", mgp->name,
4596 x, err);
4597 }
4598 }
4599 err = ddi_intr_free(mgp->htable[x]);
4600 if (err != DDI_SUCCESS) {
4601 cmn_err(CE_WARN,
4602 "%s: ddi_intr_free for vec %d returned %d\n",
4603 mgp->name, x, err);
4604 }
4605 }
4606 kmem_free(mgp->htable, mgp->intr_size);
4607 mgp->htable = NULL;
4608 }
4609
4610 static void
myri10ge_test_physical(dev_info_t * dip)4611 myri10ge_test_physical(dev_info_t *dip)
4612 {
4613 ddi_dma_handle_t handle;
4614 struct myri10ge_dma_stuff dma;
4615 void *addr;
4616 int err;
4617
4618 /* test #1, sufficient for older sparc systems */
4619 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4620 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4621 DDI_DMA_DONTWAIT, NULL, &handle);
4622 if (err == DDI_DMA_BADATTR)
4623 goto fail;
4624 ddi_dma_free_handle(&handle);
4625
4626 /* test #2, required on Olympis where the bind is what fails */
4627 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4628 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4629 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4630 if (addr == NULL)
4631 goto fail;
4632 myri10ge_dma_free(&dma);
4633 return;
4634
4635 fail:
4636 if (myri10ge_verbose)
4637 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4638 "using IOMMU\n", ddi_get_instance(dip));
4639
4640 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4641 }
4642
4643 static void
myri10ge_get_props(dev_info_t * dip)4644 myri10ge_get_props(dev_info_t *dip)
4645 {
4646
4647 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4648 "myri10ge_flow_control", myri10ge_flow_control);
4649
4650 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4651 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4652
4653 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4654 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4655 "myri10ge_nvidia_ecrc_enable", 1);
4656 #endif
4657
4658
4659 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4660 "myri10ge_use_msi", myri10ge_use_msi);
4661
4662 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4663 "myri10ge_deassert_wait", myri10ge_deassert_wait);
4664
4665 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4666 "myri10ge_verbose", myri10ge_verbose);
4667
4668 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4669 "myri10ge_tx_copylen", myri10ge_tx_copylen);
4670
4671 if (myri10ge_tx_copylen < 60) {
4672 cmn_err(CE_WARN,
4673 "myri10ge_tx_copylen must be >= 60 bytes\n");
4674 myri10ge_tx_copylen = 60;
4675 }
4676
4677 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4678 "myri10ge_mtu_override", myri10ge_mtu_override);
4679
4680 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU &&
4681 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU)
4682 myri10ge_mtu = myri10ge_mtu_override +
4683 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4684 else if (myri10ge_mtu_override != 0) {
4685 cmn_err(CE_WARN,
4686 "myri10ge_mtu_override must be between 1500 and "
4687 "9000 bytes\n");
4688 }
4689
4690 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4691 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4692 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4693 "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4694
4695 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4696 "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4697
4698 if (myri10ge_bigbufs_initial < 128) {
4699 cmn_err(CE_WARN,
4700 "myri10ge_bigbufs_initial be at least 128\n");
4701 myri10ge_bigbufs_initial = 128;
4702 }
4703 if (myri10ge_bigbufs_max < 128) {
4704 cmn_err(CE_WARN,
4705 "myri10ge_bigbufs_max be at least 128\n");
4706 myri10ge_bigbufs_max = 128;
4707 }
4708
4709 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4710 cmn_err(CE_WARN,
4711 "myri10ge_bigbufs_max must be >= "
4712 "myri10ge_bigbufs_initial\n");
4713 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4714 }
4715
4716 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4717 "myri10ge_force_firmware", myri10ge_force_firmware);
4718
4719 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4720 "myri10ge_max_slices", myri10ge_max_slices);
4721
4722 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4723 "myri10ge_use_msix", myri10ge_use_msix);
4724
4725 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4726 "myri10ge_rss_hash", myri10ge_rss_hash);
4727
4728 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4729 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4730 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4731 myri10ge_rss_hash);
4732 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4733 }
4734 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4735 "myri10ge_lro", myri10ge_lro);
4736 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4737 "myri10ge_lro_cnt", myri10ge_lro_cnt);
4738 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4739 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4740 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4741 "myri10ge_tx_hash", myri10ge_tx_hash);
4742 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4743 "myri10ge_use_lso", myri10ge_use_lso);
4744 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4745 "myri10ge_lso_copy", myri10ge_lso_copy);
4746 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4747 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4748 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4749 "myri10ge_small_bytes", myri10ge_small_bytes);
4750 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4751 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4752 myri10ge_small_bytes);
4753 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4754 myri10ge_small_bytes += 128;
4755 myri10ge_small_bytes &= ~(128 -1);
4756 myri10ge_small_bytes -= MXGEFW_PAD;
4757 cmn_err(CE_WARN, "rounded up to %d\n",
4758 myri10ge_small_bytes);
4759
4760 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4761 }
4762 }
4763
4764 #ifndef PCI_EXP_LNKSTA
4765 #define PCI_EXP_LNKSTA 18
4766 #endif
4767
4768 static int
myri10ge_find_cap(ddi_acc_handle_t handle,uint8_t * capptr,uint8_t capid)4769 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4770 {
4771 uint16_t status;
4772 uint8_t ptr;
4773
4774 /* check to see if we have capabilities */
4775 status = pci_config_get16(handle, PCI_CONF_STAT);
4776 if (!(status & PCI_STAT_CAP)) {
4777 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4778 return (ENXIO);
4779 }
4780
4781 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4782
4783 /* Walk the capabilities list, looking for a PCI Express cap */
4784 while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4785 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4786 break;
4787 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4788 }
4789 if (ptr < 64) {
4790 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4791 return (ENXIO);
4792 }
4793 *capptr = ptr;
4794 return (0);
4795 }
4796
4797 static int
myri10ge_set_max_readreq(ddi_acc_handle_t handle)4798 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4799 {
4800 int err;
4801 uint16_t val;
4802 uint8_t ptr;
4803
4804 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4805 if (err != 0) {
4806 cmn_err(CE_WARN, "could not find PCIe cap\n");
4807 return (ENXIO);
4808 }
4809
4810 /* set max read req to 4096 */
4811 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4812 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4813 PCIE_DEVCTL_MAX_READ_REQ_4096;
4814 pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4815 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4816 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4817 PCIE_DEVCTL_MAX_READ_REQ_4096) {
4818 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4819 return (EINVAL);
4820 }
4821 return (0);
4822 }
4823
4824 static int
myri10ge_read_pcie_link_width(ddi_acc_handle_t handle,int * link)4825 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4826 {
4827 int err;
4828 uint16_t val;
4829 uint8_t ptr;
4830
4831 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4832 if (err != 0) {
4833 cmn_err(CE_WARN, "could not set max read req\n");
4834 return (ENXIO);
4835 }
4836
4837 /* read link width */
4838 val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4839 val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4840 *link = (val >> 4);
4841 return (0);
4842 }
4843
4844 static int
myri10ge_reset_nic(struct myri10ge_priv * mgp)4845 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4846 {
4847 ddi_acc_handle_t handle = mgp->cfg_hdl;
4848 uint32_t reboot;
4849 uint16_t cmd;
4850 int err;
4851
4852 cmd = pci_config_get16(handle, PCI_CONF_COMM);
4853 if ((cmd & PCI_COMM_ME) == 0) {
4854 /*
4855 * Bus master DMA disabled? Check to see if the card
4856 * rebooted due to a parity error For now, just report
4857 * it
4858 */
4859
4860 /* enter read32 mode */
4861 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4862 /* read REBOOT_STATUS (0xfffffff0) */
4863 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4864 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4865 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4866 return (0);
4867 }
4868 if (!myri10ge_watchdog_reset) {
4869 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4870 return (1);
4871 }
4872
4873 myri10ge_stop_locked(mgp);
4874 err = myri10ge_start_locked(mgp);
4875 if (err == DDI_FAILURE) {
4876 return (0);
4877 }
4878 mac_tx_update(mgp->mh);
4879 return (1);
4880 }
4881
4882 static inline int
myri10ge_ring_stalled(myri10ge_tx_ring_t * tx)4883 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4884 {
4885 if (tx->sched != tx->stall &&
4886 tx->done == tx->watchdog_done &&
4887 tx->watchdog_req != tx->watchdog_done)
4888 return (1);
4889 return (0);
4890 }
4891
4892 static void
myri10ge_watchdog(void * arg)4893 myri10ge_watchdog(void *arg)
4894 {
4895 struct myri10ge_priv *mgp;
4896 struct myri10ge_slice_state *ss;
4897 myri10ge_tx_ring_t *tx;
4898 int nic_ok = 1;
4899 int slices_stalled, rx_pause, i;
4900 int add_rx;
4901
4902 mgp = arg;
4903 mutex_enter(&mgp->intrlock);
4904 if (mgp->running != MYRI10GE_ETH_RUNNING) {
4905 cmn_err(CE_WARN,
4906 "%s not running, not rearming watchdog (%d)\n",
4907 mgp->name, mgp->running);
4908 mutex_exit(&mgp->intrlock);
4909 return;
4910 }
4911
4912 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4913
4914 /*
4915 * make sure nic is stalled before we reset the nic, so as to
4916 * ensure we don't rip the transmit data structures out from
4917 * under a pending transmit
4918 */
4919
4920 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4921 tx = &mgp->ss[i].tx;
4922 slices_stalled = myri10ge_ring_stalled(tx);
4923 if (slices_stalled)
4924 break;
4925 }
4926
4927 if (slices_stalled) {
4928 if (mgp->watchdog_rx_pause == rx_pause) {
4929 cmn_err(CE_WARN,
4930 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4931 mgp->name, i, tx->sched, tx->stall,
4932 tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4933 (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4934 nic_ok = myri10ge_reset_nic(mgp);
4935 } else {
4936 cmn_err(CE_WARN,
4937 "%s Flow controlled, check link partner\n",
4938 mgp->name);
4939 }
4940 }
4941
4942 if (!nic_ok) {
4943 cmn_err(CE_WARN,
4944 "%s Nic dead, not rearming watchdog\n", mgp->name);
4945 mutex_exit(&mgp->intrlock);
4946 return;
4947 }
4948 for (i = 0; i < mgp->num_slices; i++) {
4949 ss = &mgp->ss[i];
4950 tx = &ss->tx;
4951 tx->watchdog_done = tx->done;
4952 tx->watchdog_req = tx->req;
4953 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4954 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4955 add_rx =
4956 min(ss->jpool.num_alloc,
4957 myri10ge_bigbufs_max -
4958 (ss->jpool.num_alloc -
4959 ss->jbufs_for_smalls));
4960 if (add_rx != 0) {
4961 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4962 /* now feed them to the firmware */
4963 mutex_enter(&ss->jpool.mtx);
4964 myri10ge_restock_jumbos(ss);
4965 mutex_exit(&ss->jpool.mtx);
4966 }
4967 }
4968 }
4969 mgp->watchdog_rx_pause = rx_pause;
4970
4971 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
4972 mgp->timer_ticks);
4973 mutex_exit(&mgp->intrlock);
4974 }
4975
4976 /*ARGSUSED*/
4977 static int
myri10ge_get_coalesce(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)4978 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
4979 {
4980 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
4981 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
4982 return (0);
4983 }
4984
4985 /*ARGSUSED*/
4986 static int
myri10ge_set_coalesce(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)4987 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
4988 caddr_t cp, cred_t *credp)
4989 {
4990 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
4991 char *end;
4992 size_t new_value;
4993
4994 new_value = mi_strtol(value, &end, 10);
4995 if (end == value)
4996 return (EINVAL);
4997
4998 mutex_enter(&myri10ge_param_lock);
4999 mgp->intr_coal_delay = (int)new_value;
5000 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5001 mutex_exit(&myri10ge_param_lock);
5002 return (0);
5003 }
5004
5005 /*ARGSUSED*/
5006 static int
myri10ge_get_pauseparam(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5007 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5008 {
5009 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5010 (void) mi_mpprintf(mp, "%d", mgp->pause);
5011 return (0);
5012 }
5013
5014 /*ARGSUSED*/
5015 static int
myri10ge_set_pauseparam(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5016 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5017 caddr_t cp, cred_t *credp)
5018 {
5019 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5020 char *end;
5021 size_t new_value;
5022 int err = 0;
5023
5024 new_value = mi_strtol(value, &end, 10);
5025 if (end == value)
5026 return (EINVAL);
5027 if (new_value != 0)
5028 new_value = 1;
5029
5030 mutex_enter(&myri10ge_param_lock);
5031 if (new_value != mgp->pause)
5032 err = myri10ge_change_pause(mgp, new_value);
5033 mutex_exit(&myri10ge_param_lock);
5034 return (err);
5035 }
5036
5037 /*ARGSUSED*/
5038 static int
myri10ge_get_int(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5039 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5040 {
5041 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5042 return (0);
5043 }
5044
5045 /*ARGSUSED*/
5046 static int
myri10ge_set_int(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5047 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5048 caddr_t cp, cred_t *credp)
5049 {
5050 char *end;
5051 size_t new_value;
5052
5053 new_value = mi_strtol(value, &end, 10);
5054 if (end == value)
5055 return (EINVAL);
5056 *(int *)(void *)cp = new_value;
5057
5058 return (0);
5059 }
5060
5061 static void
myri10ge_ndd_init(struct myri10ge_priv * mgp)5062 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5063 {
5064 mgp->nd_head = NULL;
5065
5066 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5067 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5068 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5069 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5070 (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5071 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5072 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5073 myri10ge_get_int, myri10ge_set_int,
5074 (caddr_t)&myri10ge_deassert_wait);
5075 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5076 myri10ge_get_int, myri10ge_set_int,
5077 (caddr_t)&myri10ge_bigbufs_max);
5078 (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5079 myri10ge_get_int, myri10ge_set_int,
5080 (caddr_t)&myri10ge_lro);
5081 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5082 myri10ge_get_int, myri10ge_set_int,
5083 (caddr_t)&myri10ge_lro_max_aggr);
5084 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5085 myri10ge_get_int, myri10ge_set_int,
5086 (caddr_t)&myri10ge_tx_hash);
5087 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5088 myri10ge_get_int, myri10ge_set_int,
5089 (caddr_t)&myri10ge_lso_copy);
5090 }
5091
5092 static void
myri10ge_ndd_fini(struct myri10ge_priv * mgp)5093 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5094 {
5095 nd_free(&mgp->nd_head);
5096 }
5097
5098 static void
myri10ge_m_ioctl(void * arg,queue_t * wq,mblk_t * mp)5099 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5100 {
5101 struct iocblk *iocp;
5102 struct myri10ge_priv *mgp = arg;
5103 int cmd, ok, err;
5104
5105 iocp = (struct iocblk *)(void *)mp->b_rptr;
5106 cmd = iocp->ioc_cmd;
5107
5108 ok = 0;
5109 err = 0;
5110
5111 switch (cmd) {
5112 case ND_GET:
5113 case ND_SET:
5114 ok = nd_getset(wq, mgp->nd_head, mp);
5115 break;
5116 default:
5117 break;
5118 }
5119 if (!ok)
5120 err = EINVAL;
5121 else
5122 err = iocp->ioc_error;
5123
5124 if (!err)
5125 miocack(wq, mp, iocp->ioc_count, err);
5126 else
5127 miocnak(wq, mp, 0, err);
5128 }
5129
5130 static struct myri10ge_priv *mgp_list;
5131
5132 struct myri10ge_priv *
myri10ge_get_instance(uint_t unit)5133 myri10ge_get_instance(uint_t unit)
5134 {
5135 struct myri10ge_priv *mgp;
5136
5137 mutex_enter(&myri10ge_param_lock);
5138 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5139 if (unit == ddi_get_instance(mgp->dip)) {
5140 mgp->refcnt++;
5141 break;
5142 }
5143 }
5144 mutex_exit(&myri10ge_param_lock);
5145 return (mgp);
5146 }
5147
5148 void
myri10ge_put_instance(struct myri10ge_priv * mgp)5149 myri10ge_put_instance(struct myri10ge_priv *mgp)
5150 {
5151 mutex_enter(&myri10ge_param_lock);
5152 mgp->refcnt--;
5153 mutex_exit(&myri10ge_param_lock);
5154 }
5155
5156 static boolean_t
myri10ge_m_getcapab(void * arg,mac_capab_t cap,void * cap_data)5157 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5158 {
5159 struct myri10ge_priv *mgp = arg;
5160 uint32_t *cap_hcksum;
5161 mac_capab_lso_t *cap_lso;
5162 mac_capab_rings_t *cap_rings;
5163
5164 switch (cap) {
5165 case MAC_CAPAB_HCKSUM:
5166 cap_hcksum = cap_data;
5167 *cap_hcksum = HCKSUM_INET_PARTIAL;
5168 break;
5169 case MAC_CAPAB_RINGS:
5170 cap_rings = cap_data;
5171 switch (cap_rings->mr_type) {
5172 case MAC_RING_TYPE_RX:
5173 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5174 cap_rings->mr_rnum = mgp->num_slices;
5175 cap_rings->mr_gnum = 1;
5176 cap_rings->mr_rget = myri10ge_fill_ring;
5177 cap_rings->mr_gget = myri10ge_fill_group;
5178 break;
5179 case MAC_RING_TYPE_TX:
5180 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5181 cap_rings->mr_rnum = mgp->num_slices;
5182 cap_rings->mr_gnum = 0;
5183 cap_rings->mr_rget = myri10ge_fill_ring;
5184 cap_rings->mr_gget = NULL;
5185 break;
5186 default:
5187 return (B_FALSE);
5188 }
5189 break;
5190 case MAC_CAPAB_LSO:
5191 cap_lso = cap_data;
5192 if (!myri10ge_use_lso)
5193 return (B_FALSE);
5194 if (!(mgp->features & MYRI10GE_TSO))
5195 return (B_FALSE);
5196 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5197 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5198 break;
5199
5200 default:
5201 return (B_FALSE);
5202 }
5203 return (B_TRUE);
5204 }
5205
5206
5207 static int
myri10ge_m_stat(void * arg,uint_t stat,uint64_t * val)5208 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5209 {
5210 struct myri10ge_priv *mgp = arg;
5211 struct myri10ge_rx_ring_stats *rstat;
5212 struct myri10ge_tx_ring_stats *tstat;
5213 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5214 struct myri10ge_slice_state *ss;
5215 uint64_t tmp = 0;
5216 int i;
5217
5218 switch (stat) {
5219 case MAC_STAT_IFSPEED:
5220 *val = 10ull * 1000ull * 1000000ull;
5221 break;
5222
5223 case MAC_STAT_MULTIRCV:
5224 for (i = 0; i < mgp->num_slices; i++) {
5225 rstat = &mgp->ss[i].rx_stats;
5226 tmp += rstat->multircv;
5227 }
5228 *val = tmp;
5229 break;
5230
5231 case MAC_STAT_BRDCSTRCV:
5232 for (i = 0; i < mgp->num_slices; i++) {
5233 rstat = &mgp->ss[i].rx_stats;
5234 tmp += rstat->brdcstrcv;
5235 }
5236 *val = tmp;
5237 break;
5238
5239 case MAC_STAT_MULTIXMT:
5240 for (i = 0; i < mgp->num_slices; i++) {
5241 tstat = &mgp->ss[i].tx.stats;
5242 tmp += tstat->multixmt;
5243 }
5244 *val = tmp;
5245 break;
5246
5247 case MAC_STAT_BRDCSTXMT:
5248 for (i = 0; i < mgp->num_slices; i++) {
5249 tstat = &mgp->ss[i].tx.stats;
5250 tmp += tstat->brdcstxmt;
5251 }
5252 *val = tmp;
5253 break;
5254
5255 case MAC_STAT_NORCVBUF:
5256 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5257 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5258 tmp += ntohl(fw_stats->dropped_link_overflow);
5259 for (i = 0; i < mgp->num_slices; i++) {
5260 ss = &mgp->ss[i];
5261 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5262 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5263 }
5264 *val = tmp;
5265 break;
5266
5267 case MAC_STAT_IERRORS:
5268 tmp += ntohl(fw_stats->dropped_bad_crc32);
5269 tmp += ntohl(fw_stats->dropped_bad_phy);
5270 tmp += ntohl(fw_stats->dropped_runt);
5271 tmp += ntohl(fw_stats->dropped_overrun);
5272 *val = tmp;
5273 break;
5274
5275 case MAC_STAT_OERRORS:
5276 for (i = 0; i < mgp->num_slices; i++) {
5277 ss = &mgp->ss[i];
5278 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5279 tmp += MYRI10GE_SLICE_STAT(xmit_err);
5280 }
5281 *val = tmp;
5282 break;
5283
5284 case MAC_STAT_RBYTES:
5285 for (i = 0; i < mgp->num_slices; i++) {
5286 rstat = &mgp->ss[i].rx_stats;
5287 tmp += rstat->ibytes;
5288 }
5289 *val = tmp;
5290 break;
5291
5292 case MAC_STAT_IPACKETS:
5293 for (i = 0; i < mgp->num_slices; i++) {
5294 rstat = &mgp->ss[i].rx_stats;
5295 tmp += rstat->ipackets;
5296 }
5297 *val = tmp;
5298 break;
5299
5300 case MAC_STAT_OBYTES:
5301 for (i = 0; i < mgp->num_slices; i++) {
5302 tstat = &mgp->ss[i].tx.stats;
5303 tmp += tstat->obytes;
5304 }
5305 *val = tmp;
5306 break;
5307
5308 case MAC_STAT_OPACKETS:
5309 for (i = 0; i < mgp->num_slices; i++) {
5310 tstat = &mgp->ss[i].tx.stats;
5311 tmp += tstat->opackets;
5312 }
5313 *val = tmp;
5314 break;
5315
5316 case ETHER_STAT_TOOLONG_ERRORS:
5317 *val = ntohl(fw_stats->dropped_overrun);
5318 break;
5319
5320 #ifdef SOLARIS_S11
5321 case ETHER_STAT_TOOSHORT_ERRORS:
5322 *val = ntohl(fw_stats->dropped_runt);
5323 break;
5324 #endif
5325
5326 case ETHER_STAT_LINK_PAUSE:
5327 *val = mgp->pause;
5328 break;
5329
5330 case ETHER_STAT_LINK_AUTONEG:
5331 *val = 1;
5332 break;
5333
5334 case ETHER_STAT_LINK_DUPLEX:
5335 *val = LINK_DUPLEX_FULL;
5336 break;
5337
5338 default:
5339 return (ENOTSUP);
5340 }
5341
5342 return (0);
5343 }
5344
5345 /* ARGSUSED */
5346 static void
myri10ge_m_propinfo(void * arg,const char * pr_name,mac_prop_id_t pr_num,mac_prop_info_handle_t prh)5347 myri10ge_m_propinfo(void *arg, const char *pr_name,
5348 mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
5349 {
5350 switch (pr_num) {
5351 case MAC_PROP_MTU:
5352 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU);
5353 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU,
5354 MYRI10GE_MAX_GLD_MTU);
5355 break;
5356 default:
5357 break;
5358 }
5359 }
5360
5361 /*ARGSUSED*/
5362 static int
myri10ge_m_setprop(void * arg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,const void * pr_val)5363 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
5364 uint_t pr_valsize, const void *pr_val)
5365 {
5366 int err = 0;
5367 struct myri10ge_priv *mgp = arg;
5368
5369 switch (pr_num) {
5370 case MAC_PROP_MTU: {
5371 uint32_t mtu;
5372 if (pr_valsize < sizeof (mtu)) {
5373 err = EINVAL;
5374 break;
5375 }
5376 bcopy(pr_val, &mtu, sizeof (mtu));
5377 if (mtu > MYRI10GE_MAX_GLD_MTU ||
5378 mtu < MYRI10GE_MIN_GLD_MTU) {
5379 err = EINVAL;
5380 break;
5381 }
5382
5383 mutex_enter(&mgp->intrlock);
5384 if (mgp->running != MYRI10GE_ETH_STOPPED) {
5385 err = EBUSY;
5386 mutex_exit(&mgp->intrlock);
5387 break;
5388 }
5389
5390 myri10ge_mtu = mtu + sizeof (struct ether_header) +
5391 MXGEFW_PAD + VLAN_TAGSZ;
5392 mutex_exit(&mgp->intrlock);
5393 break;
5394 }
5395 default:
5396 err = ENOTSUP;
5397 break;
5398 }
5399
5400 return (err);
5401 }
5402
5403 static mac_callbacks_t myri10ge_m_callbacks = {
5404 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO),
5405 myri10ge_m_stat,
5406 myri10ge_m_start,
5407 myri10ge_m_stop,
5408 myri10ge_m_promisc,
5409 myri10ge_m_multicst,
5410 NULL,
5411 NULL,
5412 NULL,
5413 myri10ge_m_ioctl,
5414 myri10ge_m_getcapab,
5415 NULL,
5416 NULL,
5417 myri10ge_m_setprop,
5418 NULL,
5419 myri10ge_m_propinfo
5420 };
5421
5422
5423 static int
myri10ge_probe_slices(struct myri10ge_priv * mgp)5424 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5425 {
5426 myri10ge_cmd_t cmd;
5427 int status;
5428
5429 mgp->num_slices = 1;
5430
5431 /* hit the board with a reset to ensure it is alive */
5432 (void) memset(&cmd, 0, sizeof (cmd));
5433 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5434 if (status != 0) {
5435 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5436 return (ENXIO);
5437 }
5438
5439 if (myri10ge_use_msix == 0)
5440 return (0);
5441
5442 /* tell it the size of the interrupt queues */
5443 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5444 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5445 if (status != 0) {
5446 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5447 mgp->name);
5448 return (ENXIO);
5449 }
5450
5451 /* ask the maximum number of slices it supports */
5452 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5453 &cmd);
5454 if (status != 0)
5455 return (0);
5456
5457 mgp->num_slices = cmd.data0;
5458
5459 /*
5460 * if the admin did not specify a limit to how many
5461 * slices we should use, cap it automatically to the
5462 * number of CPUs currently online
5463 */
5464 if (myri10ge_max_slices == -1)
5465 myri10ge_max_slices = ncpus;
5466
5467 if (mgp->num_slices > myri10ge_max_slices)
5468 mgp->num_slices = myri10ge_max_slices;
5469
5470
5471 /*
5472 * Now try to allocate as many MSI-X vectors as we have
5473 * slices. We give up on MSI-X if we can only get a single
5474 * vector.
5475 */
5476 while (mgp->num_slices > 1) {
5477 /* make sure it is a power of two */
5478 while (!ISP2(mgp->num_slices))
5479 mgp->num_slices--;
5480 if (mgp->num_slices == 1)
5481 return (0);
5482
5483 status = myri10ge_add_intrs(mgp, 0);
5484 if (status == 0) {
5485 myri10ge_rem_intrs(mgp, 0);
5486 if (mgp->intr_cnt == mgp->num_slices) {
5487 if (myri10ge_verbose)
5488 printf("Got %d slices!\n",
5489 mgp->num_slices);
5490 return (0);
5491 }
5492 mgp->num_slices = mgp->intr_cnt;
5493 } else {
5494 mgp->num_slices = mgp->num_slices / 2;
5495 }
5496 }
5497
5498 if (myri10ge_verbose)
5499 printf("Got %d slices\n", mgp->num_slices);
5500 return (0);
5501 }
5502
5503 static void
myri10ge_lro_free(struct myri10ge_slice_state * ss)5504 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5505 {
5506 struct lro_entry *lro;
5507
5508 while (ss->lro_free != NULL) {
5509 lro = ss->lro_free;
5510 ss->lro_free = lro->next;
5511 kmem_free(lro, sizeof (*lro));
5512 }
5513 }
5514
5515 static void
myri10ge_lro_alloc(struct myri10ge_slice_state * ss)5516 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5517 {
5518 struct lro_entry *lro;
5519 int idx;
5520
5521 ss->lro_free = NULL;
5522 ss->lro_active = NULL;
5523
5524 for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5525 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5526 if (lro == NULL)
5527 continue;
5528 lro->next = ss->lro_free;
5529 ss->lro_free = lro;
5530 }
5531 }
5532
5533 static void
myri10ge_free_slices(struct myri10ge_priv * mgp)5534 myri10ge_free_slices(struct myri10ge_priv *mgp)
5535 {
5536 struct myri10ge_slice_state *ss;
5537 size_t bytes;
5538 int i;
5539
5540 if (mgp->ss == NULL)
5541 return;
5542
5543 for (i = 0; i < mgp->num_slices; i++) {
5544 ss = &mgp->ss[i];
5545 if (ss->rx_done.entry == NULL)
5546 continue;
5547 myri10ge_dma_free(&ss->rx_done.dma);
5548 ss->rx_done.entry = NULL;
5549 if (ss->fw_stats == NULL)
5550 continue;
5551 myri10ge_dma_free(&ss->fw_stats_dma);
5552 ss->fw_stats = NULL;
5553 mutex_destroy(&ss->rx_lock);
5554 mutex_destroy(&ss->tx.lock);
5555 mutex_destroy(&ss->tx.handle_lock);
5556 mutex_destroy(&ss->poll_lock);
5557 myri10ge_jpool_fini(ss);
5558 myri10ge_slice_stat_destroy(ss);
5559 myri10ge_lro_free(ss);
5560 }
5561 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5562 kmem_free(mgp->ss, bytes);
5563 mgp->ss = NULL;
5564 }
5565
5566
5567 static int
myri10ge_alloc_slices(struct myri10ge_priv * mgp)5568 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5569 {
5570 struct myri10ge_slice_state *ss;
5571 size_t bytes;
5572 int i;
5573
5574 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5575 mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5576 if (mgp->ss == NULL)
5577 return (ENOMEM);
5578 for (i = 0; i < mgp->num_slices; i++) {
5579 ss = &mgp->ss[i];
5580
5581 ss->mgp = mgp;
5582
5583 /* allocate the per-slice firmware stats */
5584 bytes = sizeof (*ss->fw_stats);
5585 ss->fw_stats = (mcp_irq_data_t *)(void *)
5586 myri10ge_dma_alloc(mgp->dip, bytes,
5587 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5588 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5589 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5590 if (ss->fw_stats == NULL)
5591 goto abort;
5592 (void) memset(ss->fw_stats, 0, bytes);
5593
5594 /* allocate rx done ring */
5595 bytes = mgp->max_intr_slots *
5596 sizeof (*ss->rx_done.entry);
5597 ss->rx_done.entry = (mcp_slot_t *)(void *)
5598 myri10ge_dma_alloc(mgp->dip, bytes,
5599 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5600 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5601 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5602 if (ss->rx_done.entry == NULL) {
5603 goto abort;
5604 }
5605 (void) memset(ss->rx_done.entry, 0, bytes);
5606 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie);
5607 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL);
5608 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL);
5609 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL);
5610 myri10ge_jpool_init(ss);
5611 (void) myri10ge_slice_stat_init(ss);
5612 myri10ge_lro_alloc(ss);
5613 }
5614
5615 return (0);
5616
5617 abort:
5618 myri10ge_free_slices(mgp);
5619 return (ENOMEM);
5620 }
5621
5622 static int
myri10ge_save_msi_state(struct myri10ge_priv * mgp,ddi_acc_handle_t handle)5623 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5624 ddi_acc_handle_t handle)
5625 {
5626 uint8_t ptr;
5627 int err;
5628
5629 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5630 if (err != 0) {
5631 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5632 mgp->name);
5633 return (DDI_FAILURE);
5634 }
5635 mgp->pci_saved_state.msi_ctrl =
5636 pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5637 mgp->pci_saved_state.msi_addr_low =
5638 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5639 mgp->pci_saved_state.msi_addr_high =
5640 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5641 mgp->pci_saved_state.msi_data_32 =
5642 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5643 mgp->pci_saved_state.msi_data_64 =
5644 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5645 return (DDI_SUCCESS);
5646 }
5647
5648 static int
myri10ge_restore_msi_state(struct myri10ge_priv * mgp,ddi_acc_handle_t handle)5649 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5650 ddi_acc_handle_t handle)
5651 {
5652 uint8_t ptr;
5653 int err;
5654
5655 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5656 if (err != 0) {
5657 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5658 mgp->name);
5659 return (DDI_FAILURE);
5660 }
5661
5662 pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5663 mgp->pci_saved_state.msi_ctrl);
5664 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5665 mgp->pci_saved_state.msi_addr_low);
5666 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5667 mgp->pci_saved_state.msi_addr_high);
5668 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5669 mgp->pci_saved_state.msi_data_32);
5670 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5671 mgp->pci_saved_state.msi_data_64);
5672
5673 return (DDI_SUCCESS);
5674 }
5675
5676 static int
myri10ge_save_pci_state(struct myri10ge_priv * mgp)5677 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5678 {
5679 ddi_acc_handle_t handle = mgp->cfg_hdl;
5680 int i;
5681 int err = DDI_SUCCESS;
5682
5683
5684 /* Save the non-extended PCI config space 32-bits at a time */
5685 for (i = 0; i < 16; i++)
5686 mgp->pci_saved_state.base[i] =
5687 pci_config_get32(handle, i*4);
5688
5689 /* now save MSI interrupt state *, if needed */
5690 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5691 err = myri10ge_save_msi_state(mgp, handle);
5692
5693 return (err);
5694 }
5695
5696 static int
myri10ge_restore_pci_state(struct myri10ge_priv * mgp)5697 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5698 {
5699 ddi_acc_handle_t handle = mgp->cfg_hdl;
5700 int i;
5701 int err = DDI_SUCCESS;
5702
5703
5704 /* Restore the non-extended PCI config space 32-bits at a time */
5705 for (i = 15; i >= 0; i--)
5706 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5707
5708 /* now restore MSI interrupt state *, if needed */
5709 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5710 err = myri10ge_restore_msi_state(mgp, handle);
5711
5712 if (mgp->max_read_request_4k)
5713 (void) myri10ge_set_max_readreq(handle);
5714 return (err);
5715 }
5716
5717
5718 static int
myri10ge_suspend(dev_info_t * dip)5719 myri10ge_suspend(dev_info_t *dip)
5720 {
5721 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5722 int status;
5723
5724 if (mgp == NULL) {
5725 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5726 return (DDI_FAILURE);
5727 }
5728 if (mgp->dip != dip) {
5729 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5730 return (DDI_FAILURE);
5731 }
5732 mutex_enter(&mgp->intrlock);
5733 if (mgp->running == MYRI10GE_ETH_RUNNING) {
5734 mgp->running = MYRI10GE_ETH_STOPPING;
5735 mutex_exit(&mgp->intrlock);
5736 (void) untimeout(mgp->timer_id);
5737 mutex_enter(&mgp->intrlock);
5738 myri10ge_stop_locked(mgp);
5739 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5740 }
5741 status = myri10ge_save_pci_state(mgp);
5742 mutex_exit(&mgp->intrlock);
5743 return (status);
5744 }
5745
5746 static int
myri10ge_resume(dev_info_t * dip)5747 myri10ge_resume(dev_info_t *dip)
5748 {
5749 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5750 int status = DDI_SUCCESS;
5751
5752 if (mgp == NULL) {
5753 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5754 return (DDI_FAILURE);
5755 }
5756 if (mgp->dip != dip) {
5757 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5758 return (DDI_FAILURE);
5759 }
5760
5761 mutex_enter(&mgp->intrlock);
5762 status = myri10ge_restore_pci_state(mgp);
5763 if (status == DDI_SUCCESS &&
5764 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5765 status = myri10ge_start_locked(mgp);
5766 }
5767 mutex_exit(&mgp->intrlock);
5768 if (status != DDI_SUCCESS)
5769 return (status);
5770
5771 /* start the watchdog timer */
5772 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5773 mgp->timer_ticks);
5774 return (DDI_SUCCESS);
5775 }
5776
5777 static int
myri10ge_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5778 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5779 {
5780
5781 struct myri10ge_priv *mgp;
5782 mac_register_t *macp, *omacp;
5783 ddi_acc_handle_t handle;
5784 uint32_t csr, hdr_offset;
5785 int status, span, link_width, max_read_request_4k;
5786 unsigned long bus_number, dev_number, func_number;
5787 size_t bytes;
5788 offset_t ss_offset;
5789 uint8_t vso;
5790
5791 if (cmd == DDI_RESUME) {
5792 return (myri10ge_resume(dip));
5793 }
5794
5795 if (cmd != DDI_ATTACH)
5796 return (DDI_FAILURE);
5797 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5798 return (DDI_FAILURE);
5799
5800 /* enable busmater and io space access */
5801 csr = pci_config_get32(handle, PCI_CONF_COMM);
5802 pci_config_put32(handle, PCI_CONF_COMM,
5803 (csr |PCI_COMM_ME|PCI_COMM_MAE));
5804 status = myri10ge_read_pcie_link_width(handle, &link_width);
5805 if (status != 0) {
5806 cmn_err(CE_WARN, "could not read link width!\n");
5807 link_width = 0;
5808 }
5809 max_read_request_4k = !myri10ge_set_max_readreq(handle);
5810 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5811 if (status != 0)
5812 goto abort_with_cfg_hdl;
5813 if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5814 goto abort_with_cfg_hdl;
5815 /*
5816 * XXXX Hack: mac_register_t grows in newer kernels. To be
5817 * able to write newer fields, such as m_margin, without
5818 * writing outside allocated memory, we allocate our own macp
5819 * and pass that to mac_register()
5820 */
5821 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5822 macp->m_version = omacp->m_version;
5823
5824 if ((mgp = (struct myri10ge_priv *)
5825 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5826 goto abort_with_macinfo;
5827 }
5828 ddi_set_driver_private(dip, mgp);
5829
5830 /* setup device name for log messages */
5831 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5832
5833 mutex_enter(&myri10ge_param_lock);
5834 myri10ge_get_props(dip);
5835 mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5836 mgp->pause = myri10ge_flow_control;
5837 mutex_exit(&myri10ge_param_lock);
5838
5839 mgp->max_read_request_4k = max_read_request_4k;
5840 mgp->pcie_link_width = link_width;
5841 mgp->running = MYRI10GE_ETH_STOPPED;
5842 mgp->vso = vso;
5843 mgp->dip = dip;
5844 mgp->cfg_hdl = handle;
5845
5846 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */
5847 myri10ge_test_physical(dip);
5848
5849 /* allocate command page */
5850 bytes = sizeof (*mgp->cmd);
5851 mgp->cmd = (mcp_cmd_response_t *)
5852 (void *)myri10ge_dma_alloc(dip, bytes,
5853 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5854 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5855 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5856 if (mgp->cmd == NULL)
5857 goto abort_with_mgp;
5858
5859 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5860 &dev_number, &func_number);
5861 if (myri10ge_verbose)
5862 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5863 bus_number, dev_number, func_number);
5864 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5865 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr,
5866 &mgp->io_handle);
5867 if (status != DDI_SUCCESS) {
5868 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5869 printf("%s: reg_set = %d, span = %d, status = %d",
5870 mgp->name, mgp->reg_set, span, status);
5871 goto abort_with_mgp;
5872 }
5873
5874 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET);
5875 hdr_offset = ntohl(hdr_offset) & 0xffffc;
5876 ss_offset = hdr_offset +
5877 offsetof(struct mcp_gen_header, string_specs);
5878 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5879 myri10ge_pio_copy32(mgp->eeprom_strings,
5880 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5881 MYRI10GE_EEPROM_STRINGS_SIZE);
5882 (void) memset(mgp->eeprom_strings +
5883 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5884
5885 status = myri10ge_read_mac_addr(mgp);
5886 if (status) {
5887 goto abort_with_mapped;
5888 }
5889
5890 status = myri10ge_select_firmware(mgp);
5891 if (status != 0) {
5892 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5893 goto abort_with_mapped;
5894 }
5895
5896 status = myri10ge_probe_slices(mgp);
5897 if (status != 0) {
5898 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5899 goto abort_with_dummy_rdma;
5900 }
5901
5902 status = myri10ge_alloc_slices(mgp);
5903 if (status != 0) {
5904 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5905 goto abort_with_dummy_rdma;
5906 }
5907
5908 /* add the interrupt handler */
5909 status = myri10ge_add_intrs(mgp, 1);
5910 if (status != 0) {
5911 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5912 mgp->name);
5913 goto abort_with_slices;
5914 }
5915
5916 /* now that we have an iblock_cookie, init the mutexes */
5917 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5918 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5919
5920
5921 status = myri10ge_nic_stat_init(mgp);
5922 if (status != DDI_SUCCESS)
5923 goto abort_with_interrupts;
5924 status = myri10ge_info_init(mgp);
5925 if (status != DDI_SUCCESS)
5926 goto abort_with_stats;
5927
5928 /*
5929 * Initialize GLD state
5930 */
5931
5932 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5933 macp->m_driver = mgp;
5934 macp->m_dip = dip;
5935 macp->m_src_addr = mgp->mac_addr;
5936 macp->m_callbacks = &myri10ge_m_callbacks;
5937 macp->m_min_sdu = 0;
5938 macp->m_max_sdu = myri10ge_mtu -
5939 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5940 #ifdef SOLARIS_S11
5941 macp->m_margin = VLAN_TAGSZ;
5942 #endif
5943 macp->m_v12n = MAC_VIRT_LEVEL1;
5944 status = mac_register(macp, &mgp->mh);
5945 if (status != 0) {
5946 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5947 mgp->name, status);
5948 goto abort_with_info;
5949 }
5950 myri10ge_ndd_init(mgp);
5951 if (myri10ge_verbose)
5952 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5953 mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5954 mutex_enter(&myri10ge_param_lock);
5955 mgp->next = mgp_list;
5956 mgp_list = mgp;
5957 mutex_exit(&myri10ge_param_lock);
5958 kmem_free(macp, sizeof (*macp) * 8);
5959 mac_free(omacp);
5960 return (DDI_SUCCESS);
5961
5962 abort_with_info:
5963 myri10ge_info_destroy(mgp);
5964
5965 abort_with_stats:
5966 myri10ge_nic_stat_destroy(mgp);
5967
5968 abort_with_interrupts:
5969 mutex_destroy(&mgp->cmd_lock);
5970 mutex_destroy(&mgp->intrlock);
5971 myri10ge_rem_intrs(mgp, 1);
5972
5973 abort_with_slices:
5974 myri10ge_free_slices(mgp);
5975
5976 abort_with_dummy_rdma:
5977 myri10ge_dummy_rdma(mgp, 0);
5978
5979 abort_with_mapped:
5980 ddi_regs_map_free(&mgp->io_handle);
5981
5982 myri10ge_dma_free(&mgp->cmd_dma);
5983
5984 abort_with_mgp:
5985 kmem_free(mgp, sizeof (*mgp));
5986
5987 abort_with_macinfo:
5988 kmem_free(macp, sizeof (*macp) * 8);
5989 mac_free(omacp);
5990
5991 abort_with_cfg_hdl:
5992 pci_config_teardown(&handle);
5993 return (DDI_FAILURE);
5994
5995 }
5996
5997
5998 static int
myri10ge_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5999 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6000 {
6001 struct myri10ge_priv *mgp, *tmp;
6002 int status, i, jbufs_alloced;
6003
6004 if (cmd == DDI_SUSPEND) {
6005 status = myri10ge_suspend(dip);
6006 return (status);
6007 }
6008
6009 if (cmd != DDI_DETACH) {
6010 return (DDI_FAILURE);
6011 }
6012 /* Get the driver private (gld_mac_info_t) structure */
6013 mgp = ddi_get_driver_private(dip);
6014
6015 mutex_enter(&mgp->intrlock);
6016 jbufs_alloced = 0;
6017 for (i = 0; i < mgp->num_slices; i++) {
6018 myri10ge_remove_jbufs(&mgp->ss[i]);
6019 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
6020 }
6021 mutex_exit(&mgp->intrlock);
6022 if (jbufs_alloced != 0) {
6023 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
6024 mgp->name, jbufs_alloced);
6025 return (DDI_FAILURE);
6026 }
6027
6028 mutex_enter(&myri10ge_param_lock);
6029 if (mgp->refcnt != 0) {
6030 mutex_exit(&myri10ge_param_lock);
6031 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
6032 mgp->name, mgp->refcnt);
6033 return (DDI_FAILURE);
6034 }
6035 mutex_exit(&myri10ge_param_lock);
6036
6037 status = mac_unregister(mgp->mh);
6038 if (status != DDI_SUCCESS)
6039 return (status);
6040
6041 myri10ge_ndd_fini(mgp);
6042 myri10ge_dummy_rdma(mgp, 0);
6043 myri10ge_nic_stat_destroy(mgp);
6044 myri10ge_info_destroy(mgp);
6045
6046 mutex_destroy(&mgp->cmd_lock);
6047 mutex_destroy(&mgp->intrlock);
6048
6049 myri10ge_rem_intrs(mgp, 1);
6050
6051 myri10ge_free_slices(mgp);
6052 ddi_regs_map_free(&mgp->io_handle);
6053 myri10ge_dma_free(&mgp->cmd_dma);
6054 pci_config_teardown(&mgp->cfg_hdl);
6055
6056 mutex_enter(&myri10ge_param_lock);
6057 if (mgp_list == mgp) {
6058 mgp_list = mgp->next;
6059 } else {
6060 tmp = mgp_list;
6061 while (tmp->next != mgp && tmp->next != NULL)
6062 tmp = tmp->next;
6063 if (tmp->next != NULL)
6064 tmp->next = tmp->next->next;
6065 }
6066 kmem_free(mgp, sizeof (*mgp));
6067 mutex_exit(&myri10ge_param_lock);
6068 return (DDI_SUCCESS);
6069 }
6070
6071 /*
6072 * Helper for quiesce entry point: Interrupt threads are not being
6073 * scheduled, so we must poll for the confirmation DMA to arrive in
6074 * the firmware stats block for slice 0. We're essentially running
6075 * the guts of the interrupt handler, and just cherry picking the
6076 * confirmation that the NIC is queuesced (stats->link_down)
6077 */
6078
6079 static int
myri10ge_poll_down(struct myri10ge_priv * mgp)6080 myri10ge_poll_down(struct myri10ge_priv *mgp)
6081 {
6082 struct myri10ge_slice_state *ss = mgp->ss;
6083 mcp_irq_data_t *stats = ss->fw_stats;
6084 int valid;
6085 int found_down = 0;
6086
6087
6088 /* check for a pending IRQ */
6089
6090 if (! *((volatile uint8_t *)& stats->valid))
6091 return (0);
6092 valid = stats->valid;
6093
6094 /*
6095 * Make sure to tell the NIC to lower a legacy IRQ, else
6096 * it may have corrupt state after restarting
6097 */
6098
6099 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6100 /* lower legacy IRQ */
6101 *mgp->irq_deassert = 0;
6102 mb();
6103 /* wait for irq conf DMA */
6104 while (*((volatile uint8_t *)& stats->valid))
6105 ;
6106 }
6107 if (stats->stats_updated && stats->link_down)
6108 found_down = 1;
6109
6110 if (valid & 0x1)
6111 *ss->irq_claim = BE_32(3);
6112 *(ss->irq_claim + 1) = BE_32(3);
6113
6114 return (found_down);
6115 }
6116
6117 static int
myri10ge_quiesce(dev_info_t * dip)6118 myri10ge_quiesce(dev_info_t *dip)
6119 {
6120 struct myri10ge_priv *mgp;
6121 myri10ge_cmd_t cmd;
6122 int status, down, i;
6123
6124 mgp = ddi_get_driver_private(dip);
6125 if (mgp == NULL)
6126 return (DDI_FAILURE);
6127
6128 /* if devices was unplumbed, it is guaranteed to be quiescent */
6129 if (mgp->running == MYRI10GE_ETH_STOPPED)
6130 return (DDI_SUCCESS);
6131
6132 /* send a down CMD to queuesce NIC */
6133 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6134 if (status) {
6135 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6136 return (DDI_FAILURE);
6137 }
6138
6139 for (i = 0; i < 20; i++) {
6140 down = myri10ge_poll_down(mgp);
6141 if (down)
6142 break;
6143 delay(drv_usectohz(100000));
6144 mb();
6145 }
6146 if (down)
6147 return (DDI_SUCCESS);
6148 return (DDI_FAILURE);
6149 }
6150
6151 /*
6152 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6153 * storage.
6154 */
6155 static void
myri10ge_find_lastfree(void)6156 myri10ge_find_lastfree(void)
6157 {
6158 mblk_t *mp = allocb(1024, 0);
6159 dblk_t *dbp;
6160
6161 if (mp == NULL) {
6162 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6163 return;
6164 }
6165 dbp = mp->b_datap;
6166 myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6167 }
6168
6169 int
_init(void)6170 _init(void)
6171 {
6172 int i;
6173
6174 if (myri10ge_verbose)
6175 cmn_err(CE_NOTE,
6176 "Myricom 10G driver (10GbE) version %s loading\n",
6177 MYRI10GE_VERSION_STR);
6178 myri10ge_find_lastfree();
6179 mac_init_ops(&myri10ge_ops, "myri10ge");
6180 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6181 if ((i = mod_install(&modlinkage)) != 0) {
6182 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6183 mac_fini_ops(&myri10ge_ops);
6184 mutex_destroy(&myri10ge_param_lock);
6185 }
6186 return (i);
6187 }
6188
6189 int
_fini(void)6190 _fini(void)
6191 {
6192 int i;
6193 i = mod_remove(&modlinkage);
6194 if (i != 0) {
6195 return (i);
6196 }
6197 mac_fini_ops(&myri10ge_ops);
6198 mutex_destroy(&myri10ge_param_lock);
6199 return (0);
6200 }
6201
6202 int
_info(struct modinfo * modinfop)6203 _info(struct modinfo *modinfop)
6204 {
6205 return (mod_info(&modlinkage, modinfop));
6206 }
6207
6208
6209 /*
6210 * This file uses MyriGE driver indentation.
6211 *
6212 * Local Variables:
6213 * c-file-style:"sun"
6214 * tab-width:8
6215 * End:
6216 */
6217