1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 /* 33 * Copyright (c) 2014, Joyent, Inc. 34 * Copyright (c) 2016 by Delphix. All rights reserved. 35 */ 36 37 #ifndef lint 38 static const char __idstring[] = 39 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $"; 40 #endif 41 42 #define MXGEFW_NDIS 43 #include "myri10ge_var.h" 44 #include "rss_eth_z8e.h" 45 #include "rss_ethp_z8e.h" 46 #include "mcp_gen_header.h" 47 48 #define MYRI10GE_MAX_ETHER_MTU 9014 49 #define MYRI10GE_MAX_GLD_MTU 9000 50 #define MYRI10GE_MIN_GLD_MTU 1500 51 52 #define MYRI10GE_ETH_STOPPED 0 53 #define MYRI10GE_ETH_STOPPING 1 54 #define MYRI10GE_ETH_STARTING 2 55 #define MYRI10GE_ETH_RUNNING 3 56 #define MYRI10GE_ETH_OPEN_FAILED 4 57 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5 58 59 static int myri10ge_small_bytes = 510; 60 static int myri10ge_intr_coal_delay = 125; 61 static int myri10ge_flow_control = 1; 62 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 63 static int myri10ge_nvidia_ecrc_enable = 1; 64 #endif 65 static int myri10ge_mtu_override = 0; 66 static int myri10ge_tx_copylen = 512; 67 static int myri10ge_deassert_wait = 1; 68 static int myri10ge_verbose = 0; 69 static int myri10ge_watchdog_reset = 0; 70 static int myri10ge_use_msix = 1; 71 static int myri10ge_max_slices = -1; 72 static int myri10ge_use_msi = 1; 73 int myri10ge_force_firmware = 0; 74 static boolean_t myri10ge_use_lso = B_TRUE; 75 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 76 static int myri10ge_tx_hash = 1; 77 static int myri10ge_lro = 0; 78 static int myri10ge_lro_cnt = 8; 79 int myri10ge_lro_max_aggr = 2; 80 static int myri10ge_lso_copy = 0; 81 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp); 82 int myri10ge_tx_handles_initial = 128; 83 84 static kmutex_t myri10ge_param_lock; 85 static void* myri10ge_db_lastfree; 86 87 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 88 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 89 static int myri10ge_quiesce(dev_info_t *dip); 90 91 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach, 92 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce); 93 94 95 static struct modldrv modldrv = { 96 &mod_driverops, 97 "Myricom 10G driver (10GbE)", 98 &myri10ge_ops, 99 }; 100 101 102 static struct modlinkage modlinkage = { 103 MODREV_1, 104 {&modldrv, NULL}, 105 }; 106 107 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 108 109 static ddi_dma_attr_t myri10ge_misc_dma_attr = { 110 DMA_ATTR_V0, /* version number. */ 111 (uint64_t)0, /* low address */ 112 (uint64_t)0xffffffffffffffffULL, /* high address */ 113 (uint64_t)0x7ffffff, /* address counter max */ 114 (uint64_t)4096, /* alignment */ 115 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 116 (uint32_t)0x1, /* minimum transfer size */ 117 (uint64_t)0x7fffffff, /* maximum transfer size */ 118 (uint64_t)0x7fffffff, /* maximum segment size */ 119 1, /* scatter/gather list length */ 120 1, /* granularity */ 121 0 /* attribute flags */ 122 }; 123 124 /* 125 * The Myri10GE NIC has the following constraints on receive buffers: 126 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB 127 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary 128 */ 129 130 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = { 131 DMA_ATTR_V0, /* version number. */ 132 (uint64_t)0, /* low address */ 133 (uint64_t)0xffffffffffffffffULL, /* high address */ 134 (uint64_t)0x7ffffff, /* address counter max */ 135 (uint64_t)4096, /* alignment */ 136 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 137 (uint32_t)0x1, /* minimum transfer size */ 138 (uint64_t)0x7fffffff, /* maximum transfer size */ 139 UINT64_MAX, /* maximum segment size */ 140 1, /* scatter/gather list length */ 141 1, /* granularity */ 142 0 /* attribute flags */ 143 }; 144 145 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = { 146 DMA_ATTR_V0, /* version number. */ 147 (uint64_t)0, /* low address */ 148 (uint64_t)0xffffffffffffffffULL, /* high address */ 149 (uint64_t)0x7ffffff, /* address counter max */ 150 #if defined sparc64 || defined __sparcv9 151 (uint64_t)4096, /* alignment */ 152 #else 153 (uint64_t)0x80, /* alignment */ 154 #endif 155 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 156 (uint32_t)0x1, /* minimum transfer size */ 157 (uint64_t)0x7fffffff, /* maximum transfer size */ 158 #if defined sparc64 || defined __sparcv9 159 UINT64_MAX, /* maximum segment size */ 160 #else 161 (uint64_t)0xfff, /* maximum segment size */ 162 #endif 163 1, /* scatter/gather list length */ 164 1, /* granularity */ 165 0 /* attribute flags */ 166 }; 167 168 static ddi_dma_attr_t myri10ge_tx_dma_attr = { 169 DMA_ATTR_V0, /* version number. */ 170 (uint64_t)0, /* low address */ 171 (uint64_t)0xffffffffffffffffULL, /* high address */ 172 (uint64_t)0x7ffffff, /* address counter max */ 173 (uint64_t)1, /* alignment */ 174 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 175 (uint32_t)0x1, /* minimum transfer size */ 176 (uint64_t)0x7fffffff, /* maximum transfer size */ 177 UINT64_MAX, /* maximum segment size */ 178 INT32_MAX, /* scatter/gather list length */ 179 1, /* granularity */ 180 0 /* attribute flags */ 181 }; 182 183 #if defined sparc64 || defined __sparcv9 184 #define WC 0 185 #else 186 #define WC 1 187 #endif 188 189 struct ddi_device_acc_attr myri10ge_dev_access_attr = { 190 DDI_DEVICE_ATTR_V0, /* version */ 191 DDI_NEVERSWAP_ACC, /* endian flash */ 192 #if WC 193 DDI_MERGING_OK_ACC /* data order */ 194 #else 195 DDI_STRICTORDER_ACC 196 #endif 197 }; 198 199 static void myri10ge_watchdog(void *arg); 200 201 #ifdef MYRICOM_PRIV 202 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ; 203 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU 204 #else 205 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ; 206 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU 207 #endif 208 int myri10ge_bigbufs_initial = 1024; 209 int myri10ge_bigbufs_max = 4096; 210 211 212 caddr_t 213 myri10ge_dma_alloc(dev_info_t *dip, size_t len, 214 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr, 215 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma, 216 int warn, int (*wait)(caddr_t)) 217 { 218 caddr_t kaddr; 219 size_t real_length; 220 ddi_dma_cookie_t cookie; 221 uint_t count; 222 int err; 223 224 err = ddi_dma_alloc_handle(dip, attr, wait, 225 NULL, &dma->handle); 226 if (err != DDI_SUCCESS) { 227 if (warn) 228 cmn_err(CE_WARN, 229 "myri10ge: ddi_dma_alloc_handle failed\n"); 230 goto abort_with_nothing; 231 } 232 233 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags, 234 wait, NULL, &kaddr, &real_length, 235 &dma->acc_handle); 236 if (err != DDI_SUCCESS) { 237 if (warn) 238 cmn_err(CE_WARN, 239 "myri10ge: ddi_dma_mem_alloc failed\n"); 240 goto abort_with_handle; 241 } 242 243 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len, 244 bind_flags, wait, NULL, &cookie, &count); 245 246 if (err != DDI_SUCCESS) { 247 if (warn) 248 cmn_err(CE_WARN, 249 "myri10ge: ddi_dma_addr_bind_handle failed\n"); 250 goto abort_with_mem; 251 } 252 253 if (count != 1) { 254 if (warn) 255 cmn_err(CE_WARN, 256 "myri10ge: got too many dma segments "); 257 goto abort_with_bind; 258 } 259 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 260 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 261 return (kaddr); 262 263 abort_with_bind: 264 (void) ddi_dma_unbind_handle(dma->handle); 265 266 abort_with_mem: 267 ddi_dma_mem_free(&dma->acc_handle); 268 269 abort_with_handle: 270 ddi_dma_free_handle(&dma->handle); 271 abort_with_nothing: 272 if (warn) { 273 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n "); 274 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n", 275 (void*) dip, len, (void*) attr); 276 cmn_err(CE_WARN, 277 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n", 278 (void*) accattr, alloc_flags); 279 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p", 280 bind_flags, (void*) dma); 281 } 282 return (NULL); 283 284 } 285 286 void 287 myri10ge_dma_free(struct myri10ge_dma_stuff *dma) 288 { 289 (void) ddi_dma_unbind_handle(dma->handle); 290 ddi_dma_mem_free(&dma->acc_handle); 291 ddi_dma_free_handle(&dma->handle); 292 } 293 294 static inline void 295 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size) 296 { 297 register volatile uint32_t *to32; 298 size_t i; 299 300 to32 = (volatile uint32_t *) to; 301 for (i = (size / 4); i; i--) { 302 *to32 = *from32; 303 to32++; 304 from32++; 305 } 306 } 307 308 #if defined(_LP64) 309 static inline void 310 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size) 311 { 312 register volatile uint64_t *to64; 313 size_t i; 314 315 to64 = (volatile uint64_t *) to; 316 for (i = (size / 8); i; i--) { 317 *to64 = *from64; 318 to64++; 319 from64++; 320 } 321 } 322 #endif 323 324 /* 325 * This routine copies memory from the host to the NIC. 326 * The "size" argument must always be a multiple of 327 * the size of long (4 or 8 bytes), and to/from must also 328 * be naturally aligned. 329 */ 330 static inline void 331 myri10ge_pio_copy(void *to, void *from, size_t size) 332 { 333 #if !defined(_LP64) 334 ASSERT((size % 4) == 0); 335 myri10ge_pio_copy32(to, (uint32_t *)from, size); 336 #else 337 ASSERT((size % 8) == 0); 338 myri10ge_pio_copy64(to, (uint64_t *)from, size); 339 #endif 340 } 341 342 343 /* 344 * Due to various bugs in Solaris (especially bug 6186772 where the 345 * TCP/UDP checksum is calculated incorrectly on mblk chains with more 346 * than two elements), and the design bug where hardware checksums are 347 * ignored on mblk chains with more than 2 elements, we need to 348 * allocate private pool of physically contiguous receive buffers. 349 */ 350 351 static void 352 myri10ge_jpool_init(struct myri10ge_slice_state *ss) 353 { 354 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 355 356 bzero(jpool, sizeof (*jpool)); 357 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER, 358 ss->mgp->icookie); 359 jpool->head = NULL; 360 } 361 362 static void 363 myri10ge_jpool_fini(struct myri10ge_slice_state *ss) 364 { 365 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 366 367 if (jpool->head != NULL) { 368 cmn_err(CE_WARN, 369 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n", 370 ss->mgp->name); 371 } 372 mutex_destroy(&jpool->mtx); 373 } 374 375 376 /* 377 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 378 * at most 32 bytes at a time, so as to avoid involving the software 379 * pio handler in the nic. We re-write the first segment's low 380 * DMA address to mark it valid only after we write the entire chunk 381 * in a burst 382 */ 383 static inline void 384 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) 385 { 386 src->addr_low |= BE_32(1); 387 myri10ge_pio_copy(dst, src, 4 * sizeof (*src)); 388 mb(); 389 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 390 mb(); 391 src->addr_low &= ~(BE_32(1)); 392 dst->addr_low = src->addr_low; 393 mb(); 394 } 395 396 static void 397 myri10ge_pull_jpool(struct myri10ge_slice_state *ss) 398 { 399 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 400 struct myri10ge_jpool_entry *jtail, *j, *jfree; 401 volatile uintptr_t *putp; 402 uintptr_t put; 403 int i; 404 405 /* find tail */ 406 jtail = NULL; 407 if (jpool->head != NULL) { 408 j = jpool->head; 409 while (j->next != NULL) 410 j = j->next; 411 jtail = j; 412 } 413 414 /* 415 * iterate over all per-CPU caches, and add contents into 416 * jpool 417 */ 418 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) { 419 /* take per-CPU free list */ 420 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head; 421 if (*putp == NULL) 422 continue; 423 put = atomic_swap_ulong(putp, 0); 424 jfree = (struct myri10ge_jpool_entry *)put; 425 426 /* append to pool */ 427 if (jtail == NULL) { 428 jpool->head = jfree; 429 } else { 430 jtail->next = jfree; 431 } 432 j = jfree; 433 while (j->next != NULL) 434 j = j->next; 435 jtail = j; 436 } 437 } 438 439 /* 440 * Transfers buffers from the free pool to the nic 441 * Must be called holding the jpool mutex. 442 */ 443 444 static inline void 445 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss) 446 { 447 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 448 struct myri10ge_jpool_entry *j; 449 myri10ge_rx_ring_t *rx; 450 int i, idx, limit; 451 452 rx = &ss->rx_big; 453 limit = ss->j_rx_cnt + (rx->mask + 1); 454 455 for (i = rx->cnt; i != limit; i++) { 456 idx = i & (rx->mask); 457 j = jpool->head; 458 if (j == NULL) { 459 myri10ge_pull_jpool(ss); 460 j = jpool->head; 461 if (j == NULL) { 462 break; 463 } 464 } 465 jpool->head = j->next; 466 rx->info[idx].j = j; 467 rx->shadow[idx].addr_low = j->dma.low; 468 rx->shadow[idx].addr_high = j->dma.high; 469 /* copy 4 descriptors (32-bytes) to the mcp at a time */ 470 if ((idx & 7) == 7) { 471 myri10ge_submit_8rx(&rx->lanai[idx - 7], 472 &rx->shadow[idx - 7]); 473 } 474 } 475 rx->cnt = i; 476 } 477 478 /* 479 * Transfer buffers from the nic to the free pool. 480 * Should be called holding the jpool mutex 481 */ 482 483 static inline void 484 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss) 485 { 486 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 487 struct myri10ge_jpool_entry *j; 488 myri10ge_rx_ring_t *rx; 489 int i; 490 491 mutex_enter(&jpool->mtx); 492 rx = &ss->rx_big; 493 494 for (i = 0; i < rx->mask + 1; i++) { 495 j = rx->info[i].j; 496 rx->info[i].j = NULL; 497 if (j == NULL) 498 continue; 499 j->next = jpool->head; 500 jpool->head = j; 501 } 502 mutex_exit(&jpool->mtx); 503 504 } 505 506 507 /* 508 * Free routine which is called when the mblk allocated via 509 * esballoc() is freed. Here we return the jumbo buffer 510 * to the free pool, and possibly pass some jumbo buffers 511 * to the nic 512 */ 513 514 static void 515 myri10ge_jfree_rtn(void *arg) 516 { 517 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg; 518 struct myri10ge_jpool_stuff *jpool; 519 volatile uintptr_t *putp; 520 uintptr_t old, new; 521 522 jpool = &j->ss->jpool; 523 524 /* prepend buffer locklessly to per-CPU freelist */ 525 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head; 526 new = (uintptr_t)j; 527 do { 528 old = *putp; 529 j->next = (void *)old; 530 } while (atomic_cas_ulong(putp, old, new) != old); 531 } 532 533 static void 534 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j) 535 { 536 (void) ddi_dma_unbind_handle(j->dma_handle); 537 ddi_dma_mem_free(&j->acc_handle); 538 ddi_dma_free_handle(&j->dma_handle); 539 kmem_free(j, sizeof (*j)); 540 } 541 542 543 /* 544 * Allocates one physically contiguous descriptor 545 * and add it to the jumbo buffer pool. 546 */ 547 548 static int 549 myri10ge_add_jbuf(struct myri10ge_slice_state *ss) 550 { 551 struct myri10ge_jpool_entry *j; 552 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 553 ddi_dma_attr_t *rx_dma_attr; 554 size_t real_length; 555 ddi_dma_cookie_t cookie; 556 uint_t count; 557 int err; 558 559 if (myri10ge_mtu < 2048) 560 rx_dma_attr = &myri10ge_rx_std_dma_attr; 561 else 562 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr; 563 564 again: 565 j = (struct myri10ge_jpool_entry *) 566 kmem_alloc(sizeof (*j), KM_SLEEP); 567 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr, 568 DDI_DMA_DONTWAIT, NULL, &j->dma_handle); 569 if (err != DDI_SUCCESS) 570 goto abort_with_j; 571 572 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu, 573 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 574 NULL, &j->buf, &real_length, &j->acc_handle); 575 if (err != DDI_SUCCESS) 576 goto abort_with_handle; 577 578 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf, 579 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 580 NULL, &cookie, &count); 581 if (err != DDI_SUCCESS) 582 goto abort_with_mem; 583 584 /* 585 * Make certain std MTU buffers do not cross a 4KB boundary: 586 * 587 * Setting dma_attr_align=4096 will do this, but the system 588 * will only allocate 1 RX buffer per 4KB page, rather than 2. 589 * Setting dma_attr_granular=4096 *seems* to work around this, 590 * but I'm paranoid about future systems no longer honoring 591 * this, so fall back to the safe, but memory wasting way if a 592 * buffer crosses a 4KB boundary. 593 */ 594 595 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 596 rx_dma_attr->dma_attr_align != 4096) { 597 uint32_t start, end; 598 599 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 600 end = start + myri10ge_mtu; 601 if (((end >> 12) != (start >> 12)) && (start & 4095U)) { 602 printf("std buffer crossed a 4KB boundary!\n"); 603 myri10ge_remove_jbuf(j); 604 rx_dma_attr->dma_attr_align = 4096; 605 rx_dma_attr->dma_attr_seg = UINT64_MAX; 606 goto again; 607 } 608 } 609 610 j->dma.low = 611 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 612 j->dma.high = 613 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 614 j->ss = ss; 615 616 617 j->free_func.free_func = myri10ge_jfree_rtn; 618 j->free_func.free_arg = (char *)j; 619 mutex_enter(&jpool->mtx); 620 j->next = jpool->head; 621 jpool->head = j; 622 jpool->num_alloc++; 623 mutex_exit(&jpool->mtx); 624 return (0); 625 626 abort_with_mem: 627 ddi_dma_mem_free(&j->acc_handle); 628 629 abort_with_handle: 630 ddi_dma_free_handle(&j->dma_handle); 631 632 abort_with_j: 633 kmem_free(j, sizeof (*j)); 634 635 /* 636 * If an allocation failed, perhaps it failed because it could 637 * not satisfy granularity requirement. Disable that, and 638 * try agin. 639 */ 640 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 641 rx_dma_attr->dma_attr_align != 4096) { 642 cmn_err(CE_NOTE, 643 "!alloc failed, reverting to gran=1\n"); 644 rx_dma_attr->dma_attr_align = 4096; 645 rx_dma_attr->dma_attr_seg = UINT64_MAX; 646 goto again; 647 } 648 return (err); 649 } 650 651 static int 652 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool) 653 { 654 int i; 655 struct myri10ge_jpool_entry *j; 656 657 mutex_enter(&jpool->mtx); 658 j = jpool->head; 659 i = 0; 660 while (j != NULL) { 661 i++; 662 j = j->next; 663 } 664 mutex_exit(&jpool->mtx); 665 return (i); 666 } 667 668 static int 669 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total) 670 { 671 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 672 int allocated = 0; 673 int err; 674 int needed; 675 676 /* 677 * if total is set, user wants "num" jbufs in the pool, 678 * otherwise the user wants to "num" additional jbufs 679 * added to the pool 680 */ 681 if (total && jpool->num_alloc) { 682 allocated = myri10ge_jfree_cnt(jpool); 683 needed = num - allocated; 684 } else { 685 needed = num; 686 } 687 688 while (needed > 0) { 689 needed--; 690 err = myri10ge_add_jbuf(ss); 691 if (err == 0) { 692 allocated++; 693 } 694 } 695 return (allocated); 696 } 697 698 static void 699 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss) 700 { 701 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 702 struct myri10ge_jpool_entry *j; 703 704 mutex_enter(&jpool->mtx); 705 myri10ge_pull_jpool(ss); 706 while (jpool->head != NULL) { 707 jpool->num_alloc--; 708 j = jpool->head; 709 jpool->head = j->next; 710 myri10ge_remove_jbuf(j); 711 } 712 mutex_exit(&jpool->mtx); 713 } 714 715 static void 716 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss) 717 { 718 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 719 struct myri10ge_jpool_entry *j = NULL; 720 caddr_t ptr; 721 uint32_t dma_low, dma_high; 722 int idx, len; 723 unsigned int alloc_size; 724 725 dma_low = dma_high = len = 0; 726 alloc_size = myri10ge_small_bytes + MXGEFW_PAD; 727 ptr = NULL; 728 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) { 729 /* Allocate a jumbo frame and carve it into small frames */ 730 if (len < alloc_size) { 731 mutex_enter(&jpool->mtx); 732 /* remove jumbo from freelist */ 733 j = jpool->head; 734 jpool->head = j->next; 735 /* place it onto small list */ 736 j->next = ss->small_jpool; 737 ss->small_jpool = j; 738 mutex_exit(&jpool->mtx); 739 len = myri10ge_mtu; 740 dma_low = ntohl(j->dma.low); 741 dma_high = ntohl(j->dma.high); 742 ptr = j->buf; 743 } 744 ss->rx_small.info[idx].ptr = ptr; 745 ss->rx_small.shadow[idx].addr_low = htonl(dma_low); 746 ss->rx_small.shadow[idx].addr_high = htonl(dma_high); 747 len -= alloc_size; 748 ptr += alloc_size; 749 dma_low += alloc_size; 750 } 751 } 752 753 /* 754 * Return the jumbo bufs we carved up for small to the jumbo pool 755 */ 756 757 static void 758 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss) 759 { 760 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 761 struct myri10ge_jpool_entry *j = NULL; 762 763 mutex_enter(&jpool->mtx); 764 while (ss->small_jpool != NULL) { 765 j = ss->small_jpool; 766 ss->small_jpool = j->next; 767 j->next = jpool->head; 768 jpool->head = j; 769 } 770 mutex_exit(&jpool->mtx); 771 ss->jbufs_for_smalls = 0; 772 } 773 774 static int 775 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss) 776 { 777 myri10ge_tx_ring_t *tx = &ss->tx; 778 struct myri10ge_priv *mgp = ss->mgp; 779 struct myri10ge_tx_dma_handle *handle; 780 int err; 781 782 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP); 783 err = ddi_dma_alloc_handle(mgp->dip, 784 &myri10ge_tx_dma_attr, 785 DDI_DMA_SLEEP, NULL, 786 &handle->h); 787 if (err) { 788 static int limit = 0; 789 if (limit == 0) 790 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n", 791 mgp->name); 792 limit++; 793 kmem_free(handle, sizeof (*handle)); 794 return (err); 795 } 796 mutex_enter(&tx->handle_lock); 797 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced); 798 handle->next = tx->free_tx_handles; 799 tx->free_tx_handles = handle; 800 mutex_exit(&tx->handle_lock); 801 return (DDI_SUCCESS); 802 } 803 804 static void 805 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss) 806 { 807 myri10ge_tx_ring_t *tx = &ss->tx; 808 struct myri10ge_tx_dma_handle *handle; 809 mutex_enter(&tx->handle_lock); 810 811 handle = tx->free_tx_handles; 812 while (handle != NULL) { 813 tx->free_tx_handles = handle->next; 814 ddi_dma_free_handle(&handle->h); 815 kmem_free(handle, sizeof (*handle)); 816 handle = tx->free_tx_handles; 817 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced); 818 } 819 mutex_exit(&tx->handle_lock); 820 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) { 821 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n", 822 ss->mgp->name, 823 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced)); 824 } 825 } 826 827 static void 828 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx, 829 struct myri10ge_tx_dma_handle_head *list) 830 { 831 mutex_enter(&tx->handle_lock); 832 list->tail->next = tx->free_tx_handles; 833 tx->free_tx_handles = list->head; 834 mutex_exit(&tx->handle_lock); 835 } 836 837 static void 838 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx, 839 struct myri10ge_tx_dma_handle *handle) 840 { 841 struct myri10ge_tx_dma_handle_head list; 842 843 if (handle == NULL) 844 return; 845 list.head = handle; 846 list.tail = handle; 847 while (handle != NULL) { 848 list.tail = handle; 849 handle = handle->next; 850 } 851 myri10ge_free_tx_handles(tx, &list); 852 } 853 854 static int 855 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count, 856 struct myri10ge_tx_dma_handle **ret) 857 { 858 myri10ge_tx_ring_t *tx = &ss->tx; 859 struct myri10ge_tx_dma_handle *handle; 860 int err, i; 861 862 mutex_enter(&tx->handle_lock); 863 for (i = 0; i < count; i++) { 864 handle = tx->free_tx_handles; 865 while (handle == NULL) { 866 mutex_exit(&tx->handle_lock); 867 err = myri10ge_add_tx_handle(ss); 868 if (err != DDI_SUCCESS) { 869 goto abort_with_handles; 870 } 871 mutex_enter(&tx->handle_lock); 872 handle = tx->free_tx_handles; 873 } 874 tx->free_tx_handles = handle->next; 875 handle->next = *ret; 876 *ret = handle; 877 } 878 mutex_exit(&tx->handle_lock); 879 return (DDI_SUCCESS); 880 881 abort_with_handles: 882 myri10ge_free_tx_handle_slist(tx, *ret); 883 return (err); 884 } 885 886 887 /* 888 * Frees DMA resources associated with the send ring 889 */ 890 static void 891 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss) 892 { 893 myri10ge_tx_ring_t *tx; 894 struct myri10ge_tx_dma_handle_head handles; 895 size_t bytes; 896 int idx; 897 898 tx = &ss->tx; 899 handles.head = NULL; 900 handles.tail = NULL; 901 for (idx = 0; idx < ss->tx.mask + 1; idx++) { 902 if (tx->info[idx].m) { 903 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 904 handles.head = tx->info[idx].handle; 905 if (handles.tail == NULL) 906 handles.tail = tx->info[idx].handle; 907 freeb(tx->info[idx].m); 908 tx->info[idx].m = 0; 909 tx->info[idx].handle = 0; 910 } 911 tx->cp[idx].va = NULL; 912 myri10ge_dma_free(&tx->cp[idx].dma); 913 } 914 bytes = sizeof (*tx->cp) * (tx->mask + 1); 915 kmem_free(tx->cp, bytes); 916 tx->cp = NULL; 917 if (handles.head != NULL) 918 myri10ge_free_tx_handles(tx, &handles); 919 myri10ge_remove_tx_handles(ss); 920 } 921 922 /* 923 * Allocates DMA handles associated with the send ring 924 */ 925 static inline int 926 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss) 927 { 928 struct myri10ge_tx_dma_handle *handles; 929 int h; 930 size_t bytes; 931 932 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 933 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP); 934 if (ss->tx.cp == NULL) { 935 cmn_err(CE_WARN, 936 "%s: Failed to allocate tx copyblock storage\n", 937 ss->mgp->name); 938 return (DDI_FAILURE); 939 } 940 941 942 /* allocate the TX copyblocks */ 943 for (h = 0; h < ss->tx.mask + 1; h++) { 944 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip, 945 4096, &myri10ge_rx_jumbo_dma_attr, 946 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 947 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1, 948 DDI_DMA_DONTWAIT); 949 if (ss->tx.cp[h].va == NULL) { 950 cmn_err(CE_WARN, "%s: Failed to allocate tx " 951 "copyblock %d\n", ss->mgp->name, h); 952 goto abort_with_copyblocks; 953 } 954 } 955 /* pre-allocate transmit handles */ 956 handles = NULL; 957 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial, 958 &handles); 959 if (handles != NULL) 960 myri10ge_free_tx_handle_slist(&ss->tx, handles); 961 962 return (DDI_SUCCESS); 963 964 abort_with_copyblocks: 965 while (h > 0) { 966 h--; 967 myri10ge_dma_free(&ss->tx.cp[h].dma); 968 } 969 970 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 971 kmem_free(ss->tx.cp, bytes); 972 ss->tx.cp = NULL; 973 return (DDI_FAILURE); 974 } 975 976 /* 977 * The eeprom strings on the lanaiX have the format 978 * SN=x\0 979 * MAC=x:x:x:x:x:x\0 980 * PT:ddd mmm xx xx:xx:xx xx\0 981 * PV:ddd mmm xx xx:xx:xx xx\0 982 */ 983 static int 984 myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 985 { 986 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++) 987 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \ 988 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \ 989 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1))) 990 991 char *ptr, *limit; 992 int i, hv, lv; 993 994 ptr = mgp->eeprom_strings; 995 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 996 997 while (*ptr != '\0' && ptr < limit) { 998 if (memcmp(ptr, "MAC=", 4) == 0) { 999 ptr += 4; 1000 if (myri10ge_verbose) 1001 printf("%s: mac address = %s\n", mgp->name, 1002 ptr); 1003 mgp->mac_addr_string = ptr; 1004 for (i = 0; i < 6; i++) { 1005 if ((ptr + 2) > limit) 1006 goto abort; 1007 1008 if (*(ptr+1) == ':') { 1009 hv = 0; 1010 lv = myri10ge_digit(*ptr); ptr++; 1011 } else { 1012 hv = myri10ge_digit(*ptr); ptr++; 1013 lv = myri10ge_digit(*ptr); ptr++; 1014 } 1015 mgp->mac_addr[i] = (hv << 4) | lv; 1016 ptr++; 1017 } 1018 } 1019 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 1020 ptr += 3; 1021 mgp->sn_str = (char *)ptr; 1022 } 1023 if (memcmp((const void *)ptr, "PC=", 3) == 0) { 1024 ptr += 3; 1025 mgp->pc_str = (char *)ptr; 1026 } 1027 MYRI10GE_NEXT_STRING(ptr); 1028 } 1029 1030 return (0); 1031 1032 abort: 1033 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name); 1034 return (ENXIO); 1035 } 1036 1037 1038 /* 1039 * Determine the register set containing the PCI resource we 1040 * want to map: the memory-mappable part of the interface. We do 1041 * this by scanning the DDI "reg" property of the interface, 1042 * which is an array of mx_ddi_reg_set structures. 1043 */ 1044 static int 1045 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span, 1046 unsigned long *busno, unsigned long *devno, 1047 unsigned long *funcno) 1048 { 1049 1050 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff) 1051 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07) 1052 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f) 1053 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff) 1054 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03) 1055 #define PCI_ADDR_HIGH(ip) (ip[1]) 1056 #define PCI_ADDR_LOW(ip) (ip[2]) 1057 #define PCI_SPAN_HIGH(ip) (ip[3]) 1058 #define PCI_SPAN_LOW(ip) (ip[4]) 1059 1060 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2 1061 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3 1062 1063 int *data, i, *rs; 1064 uint32_t nelementsp; 1065 1066 #ifdef MYRI10GE_REGSET_VERBOSE 1067 char *address_space_name[] = { "Configuration Space", 1068 "I/O Space", 1069 "32-bit Memory Space", 1070 "64-bit Memory Space" 1071 }; 1072 #endif 1073 1074 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1075 "reg", &data, &nelementsp) != DDI_SUCCESS) { 1076 printf("Could not determine register set.\n"); 1077 return (ENXIO); 1078 } 1079 1080 #ifdef MYRI10GE_REGSET_VERBOSE 1081 printf("There are %d register sets.\n", nelementsp / 5); 1082 #endif 1083 if (!nelementsp) { 1084 printf("Didn't find any \"reg\" properties.\n"); 1085 ddi_prop_free(data); 1086 return (ENODEV); 1087 } 1088 1089 /* Scan for the register number. */ 1090 rs = &data[0]; 1091 *busno = BUS_NUMBER(rs); 1092 *devno = DEVICE_NUMBER(rs); 1093 *funcno = FUNCTION_NUMBER(rs); 1094 1095 #ifdef MYRI10GE_REGSET_VERBOSE 1096 printf("*** Scanning for register number.\n"); 1097 #endif 1098 for (i = 0; i < nelementsp / 5; i++) { 1099 rs = &data[5 * i]; 1100 #ifdef MYRI10GE_REGSET_VERBOSE 1101 printf("Examining register set %d:\n", i); 1102 printf(" Register number = %d.\n", REGISTER_NUMBER(rs)); 1103 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs)); 1104 printf(" Device number = %d.\n", DEVICE_NUMBER(rs)); 1105 printf(" Bus number = %d.\n", BUS_NUMBER(rs)); 1106 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs), 1107 address_space_name[ADDRESS_SPACE(rs)]); 1108 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs), 1109 PCI_ADDR_LOW(rs)); 1110 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs), 1111 PCI_SPAN_LOW(rs)); 1112 #endif 1113 /* We are looking for a memory property. */ 1114 1115 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE || 1116 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) { 1117 *reg_set = i; 1118 1119 #ifdef MYRI10GE_REGSET_VERBOSE 1120 printf("%s uses register set %d.\n", 1121 address_space_name[ADDRESS_SPACE(rs)], *reg_set); 1122 #endif 1123 1124 *span = (PCI_SPAN_LOW(rs)); 1125 #ifdef MYRI10GE_REGSET_VERBOSE 1126 printf("Board span is 0x%x\n", *span); 1127 #endif 1128 break; 1129 } 1130 } 1131 1132 ddi_prop_free(data); 1133 1134 /* If no match, fail. */ 1135 if (i >= nelementsp / 5) { 1136 return (EIO); 1137 } 1138 1139 return (0); 1140 } 1141 1142 1143 static int 1144 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit) 1145 { 1146 void *inflate_buffer; 1147 int rv, status; 1148 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE; 1149 size_t destlen; 1150 mcp_gen_header_t *hdr; 1151 unsigned hdr_offset, i; 1152 1153 1154 *limit = 0; /* -Wuninitialized */ 1155 status = 0; 1156 1157 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP); 1158 if (!inflate_buffer) { 1159 cmn_err(CE_WARN, 1160 "%s: Could not allocate buffer to inflate mcp\n", 1161 mgp->name); 1162 return (ENOMEM); 1163 } 1164 1165 destlen = sram_size; 1166 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e, 1167 mgp->eth_z8e_length); 1168 1169 if (rv != Z_OK) { 1170 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n", 1171 mgp->name, z_strerror(rv)); 1172 status = ENXIO; 1173 goto abort; 1174 } 1175 1176 *limit = (uint32_t)destlen; 1177 1178 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer + 1179 MCP_HEADER_PTR_OFFSET)); 1180 hdr = (void *)((char *)inflate_buffer + hdr_offset); 1181 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 1182 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name, 1183 ntohl(hdr->mcp_type)); 1184 status = EIO; 1185 goto abort; 1186 } 1187 1188 /* save firmware version for kstat */ 1189 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version)); 1190 if (myri10ge_verbose) 1191 printf("%s: firmware id: %s\n", mgp->name, hdr->version); 1192 1193 /* Copy the inflated firmware to NIC SRAM. */ 1194 for (i = 0; i < *limit; i += 256) { 1195 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i, 1196 (char *)inflate_buffer + i, 1197 min(256U, (unsigned)(*limit - i))); 1198 mb(); 1199 (void) *(int *)(void *)mgp->sram; 1200 mb(); 1201 } 1202 1203 abort: 1204 kmem_free(inflate_buffer, sram_size); 1205 1206 return (status); 1207 1208 } 1209 1210 1211 int 1212 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd, 1213 myri10ge_cmd_t *data) 1214 { 1215 mcp_cmd_t *buf; 1216 char buf_bytes[sizeof (*buf) + 8]; 1217 volatile mcp_cmd_response_t *response = mgp->cmd; 1218 volatile char *cmd_addr = 1219 (volatile char *)mgp->sram + MXGEFW_ETH_CMD; 1220 int sleep_total = 0; 1221 1222 /* ensure buf is aligned to 8 bytes */ 1223 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1224 1225 buf->data0 = htonl(data->data0); 1226 buf->data1 = htonl(data->data1); 1227 buf->data2 = htonl(data->data2); 1228 buf->cmd = htonl(cmd); 1229 buf->response_addr.low = mgp->cmd_dma.low; 1230 buf->response_addr.high = mgp->cmd_dma.high; 1231 mutex_enter(&mgp->cmd_lock); 1232 response->result = 0xffffffff; 1233 mb(); 1234 1235 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf)); 1236 1237 /* wait up to 20ms */ 1238 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 1239 mb(); 1240 if (response->result != 0xffffffff) { 1241 if (response->result == 0) { 1242 data->data0 = ntohl(response->data); 1243 mutex_exit(&mgp->cmd_lock); 1244 return (0); 1245 } else if (ntohl(response->result) 1246 == MXGEFW_CMD_UNKNOWN) { 1247 mutex_exit(&mgp->cmd_lock); 1248 return (ENOSYS); 1249 } else if (ntohl(response->result) 1250 == MXGEFW_CMD_ERROR_UNALIGNED) { 1251 mutex_exit(&mgp->cmd_lock); 1252 return (E2BIG); 1253 } else { 1254 cmn_err(CE_WARN, 1255 "%s: command %d failed, result = %d\n", 1256 mgp->name, cmd, ntohl(response->result)); 1257 mutex_exit(&mgp->cmd_lock); 1258 return (ENXIO); 1259 } 1260 } 1261 drv_usecwait(1000); 1262 } 1263 mutex_exit(&mgp->cmd_lock); 1264 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n", 1265 mgp->name, cmd, ntohl(response->result)); 1266 return (EAGAIN); 1267 } 1268 1269 /* 1270 * Enable or disable periodic RDMAs from the host to make certain 1271 * chipsets resend dropped PCIe messages 1272 */ 1273 1274 static void 1275 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 1276 { 1277 char buf_bytes[72]; 1278 volatile uint32_t *confirm; 1279 volatile char *submit; 1280 uint32_t *buf; 1281 int i; 1282 1283 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1284 1285 /* clear confirmation addr */ 1286 confirm = (volatile uint32_t *)mgp->cmd; 1287 *confirm = 0; 1288 mb(); 1289 1290 /* 1291 * send an rdma command to the PCIe engine, and wait for the 1292 * response in the confirmation address. The firmware should 1293 * write a -1 there to indicate it is alive and well 1294 */ 1295 1296 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1297 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1298 buf[2] = htonl(0xffffffff); /* confirm data */ 1299 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */ 1300 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */ 1301 buf[5] = htonl(enable); /* enable? */ 1302 1303 1304 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA); 1305 1306 myri10ge_pio_copy((char *)submit, buf, 64); 1307 mb(); 1308 drv_usecwait(1000); 1309 mb(); 1310 i = 0; 1311 while (*confirm != 0xffffffff && i < 20) { 1312 drv_usecwait(1000); 1313 i++; 1314 } 1315 if (*confirm != 0xffffffff) { 1316 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)", 1317 mgp->name, 1318 (enable ? "enable" : "disable"), (void*) confirm, *confirm); 1319 } 1320 } 1321 1322 static int 1323 myri10ge_load_firmware(struct myri10ge_priv *mgp) 1324 { 1325 myri10ge_cmd_t cmd; 1326 volatile uint32_t *confirm; 1327 volatile char *submit; 1328 char buf_bytes[72]; 1329 uint32_t *buf, size; 1330 int status, i; 1331 1332 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1333 1334 status = myri10ge_load_firmware_from_zlib(mgp, &size); 1335 if (status) { 1336 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name); 1337 return (status); 1338 } 1339 1340 /* clear confirmation addr */ 1341 confirm = (volatile uint32_t *)mgp->cmd; 1342 *confirm = 0; 1343 mb(); 1344 1345 /* 1346 * send a reload command to the bootstrap MCP, and wait for the 1347 * response in the confirmation address. The firmware should 1348 * write a -1 there to indicate it is alive and well 1349 */ 1350 1351 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1352 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1353 buf[2] = htonl(0xffffffff); /* confirm data */ 1354 1355 /* 1356 * FIX: All newest firmware should un-protect the bottom of 1357 * the sram before handoff. However, the very first interfaces 1358 * do not. Therefore the handoff copy must skip the first 8 bytes 1359 */ 1360 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 1361 buf[4] = htonl(size - 8); /* length of code */ 1362 buf[5] = htonl(8); /* where to copy to */ 1363 buf[6] = htonl(0); /* where to jump to */ 1364 1365 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF); 1366 1367 myri10ge_pio_copy((char *)submit, buf, 64); 1368 mb(); 1369 drv_usecwait(1000); 1370 mb(); 1371 i = 0; 1372 while (*confirm != 0xffffffff && i < 1000) { 1373 drv_usecwait(1000); 1374 i++; 1375 } 1376 if (*confirm != 0xffffffff) { 1377 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)", 1378 mgp->name, (void *) confirm, *confirm); 1379 1380 return (ENXIO); 1381 } 1382 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1383 if (status != 0) { 1384 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n", 1385 mgp->name); 1386 return (ENXIO); 1387 } 1388 1389 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 1390 myri10ge_dummy_rdma(mgp, 1); 1391 return (0); 1392 } 1393 1394 static int 1395 myri10ge_m_unicst(void *arg, const uint8_t *addr) 1396 { 1397 struct myri10ge_priv *mgp = arg; 1398 myri10ge_cmd_t cmd; 1399 int status; 1400 1401 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1402 | (addr[2] << 8) | addr[3]); 1403 1404 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1405 1406 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd); 1407 if (status == 0 && (addr != mgp->mac_addr)) 1408 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr)); 1409 1410 return (status); 1411 } 1412 1413 static int 1414 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 1415 { 1416 myri10ge_cmd_t cmd; 1417 int status; 1418 1419 if (pause) 1420 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL, 1421 &cmd); 1422 else 1423 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL, 1424 &cmd); 1425 1426 if (status) { 1427 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n", 1428 mgp->name); 1429 return (ENXIO); 1430 } 1431 mgp->pause = pause; 1432 return (0); 1433 } 1434 1435 static void 1436 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc) 1437 { 1438 myri10ge_cmd_t cmd; 1439 int status; 1440 1441 if (promisc) 1442 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd); 1443 else 1444 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd); 1445 1446 if (status) { 1447 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n", 1448 mgp->name); 1449 } 1450 } 1451 1452 static int 1453 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 1454 { 1455 myri10ge_cmd_t cmd; 1456 int status; 1457 uint32_t len; 1458 void *dmabench; 1459 struct myri10ge_dma_stuff dmabench_dma; 1460 char *test = " "; 1461 1462 /* 1463 * Run a small DMA test. 1464 * The magic multipliers to the length tell the firmware 1465 * tp do DMA read, write, or read+write tests. The 1466 * results are returned in cmd.data0. The upper 16 1467 * bits or the return is the number of transfers completed. 1468 * The lower 16 bits is the time in 0.5us ticks that the 1469 * transfers took to complete 1470 */ 1471 1472 len = mgp->tx_boundary; 1473 1474 dmabench = myri10ge_dma_alloc(mgp->dip, len, 1475 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr, 1476 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING, 1477 &dmabench_dma, 1, DDI_DMA_DONTWAIT); 1478 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0; 1479 if (dmabench == NULL) { 1480 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name); 1481 return (ENOMEM); 1482 } 1483 1484 cmd.data0 = ntohl(dmabench_dma.low); 1485 cmd.data1 = ntohl(dmabench_dma.high); 1486 cmd.data2 = len * 0x10000; 1487 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1488 if (status != 0) { 1489 test = "read"; 1490 goto abort; 1491 } 1492 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1493 1494 cmd.data0 = ntohl(dmabench_dma.low); 1495 cmd.data1 = ntohl(dmabench_dma.high); 1496 cmd.data2 = len * 0x1; 1497 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1498 if (status != 0) { 1499 test = "write"; 1500 goto abort; 1501 } 1502 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1503 1504 cmd.data0 = ntohl(dmabench_dma.low); 1505 cmd.data1 = ntohl(dmabench_dma.high); 1506 cmd.data2 = len * 0x10001; 1507 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1508 if (status != 0) { 1509 test = "read/write"; 1510 goto abort; 1511 } 1512 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 1513 (cmd.data0 & 0xffff); 1514 1515 1516 abort: 1517 myri10ge_dma_free(&dmabench_dma); 1518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 1519 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name, 1520 test); 1521 return (status); 1522 } 1523 1524 static int 1525 myri10ge_reset(struct myri10ge_priv *mgp) 1526 { 1527 myri10ge_cmd_t cmd; 1528 struct myri10ge_nic_stat *ethstat; 1529 struct myri10ge_slice_state *ss; 1530 int i, status; 1531 size_t bytes; 1532 1533 /* send a reset command to the card to see if it is alive */ 1534 (void) memset(&cmd, 0, sizeof (cmd)); 1535 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 1536 if (status != 0) { 1537 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 1538 return (ENXIO); 1539 } 1540 1541 /* Now exchange information about interrupts */ 1542 1543 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry); 1544 cmd.data0 = (uint32_t)bytes; 1545 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1546 1547 /* 1548 * Even though we already know how many slices are supported 1549 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 1550 * has magic side effects, and must be called after a reset. 1551 * It must be called prior to calling any RSS related cmds, 1552 * including assigning an interrupt queue for anything but 1553 * slice 0. It must also be called *after* 1554 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1555 * the firmware to compute offsets. 1556 */ 1557 1558 if (mgp->num_slices > 1) { 1559 1560 /* ask the maximum number of slices it supports */ 1561 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1562 &cmd); 1563 if (status != 0) { 1564 cmn_err(CE_WARN, 1565 "%s: failed to get number of slices\n", 1566 mgp->name); 1567 return (status); 1568 } 1569 1570 /* 1571 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1572 * to setting up the interrupt queue DMA 1573 */ 1574 1575 cmd.data0 = mgp->num_slices; 1576 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE | 1577 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1578 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1579 &cmd); 1580 if (status != 0) { 1581 cmn_err(CE_WARN, 1582 "%s: failed to set number of slices\n", 1583 mgp->name); 1584 return (status); 1585 } 1586 } 1587 for (i = 0; i < mgp->num_slices; i++) { 1588 ss = &mgp->ss[i]; 1589 cmd.data0 = ntohl(ss->rx_done.dma.low); 1590 cmd.data1 = ntohl(ss->rx_done.dma.high); 1591 cmd.data2 = i; 1592 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1593 &cmd); 1594 }; 1595 1596 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1597 for (i = 0; i < mgp->num_slices; i++) { 1598 ss = &mgp->ss[i]; 1599 ss->irq_claim = (volatile unsigned int *) 1600 (void *)(mgp->sram + cmd.data0 + 8 * i); 1601 } 1602 1603 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 1604 status |= myri10ge_send_cmd(mgp, 1605 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1606 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1607 } 1608 1609 status |= myri10ge_send_cmd(mgp, 1610 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1611 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1612 1613 if (status != 0) { 1614 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n", 1615 mgp->name); 1616 return (status); 1617 } 1618 1619 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 1620 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 1621 1622 /* reset mcp/driver shared state back to 0 */ 1623 1624 for (i = 0; i < mgp->num_slices; i++) { 1625 ss = &mgp->ss[i]; 1626 bytes = mgp->max_intr_slots * 1627 sizeof (*mgp->ss[0].rx_done.entry); 1628 (void) memset(ss->rx_done.entry, 0, bytes); 1629 ss->tx.req = 0; 1630 ss->tx.done = 0; 1631 ss->tx.pkt_done = 0; 1632 ss->rx_big.cnt = 0; 1633 ss->rx_small.cnt = 0; 1634 ss->rx_done.idx = 0; 1635 ss->rx_done.cnt = 0; 1636 ss->rx_token = 0; 1637 ss->tx.watchdog_done = 0; 1638 ss->tx.watchdog_req = 0; 1639 ss->tx.active = 0; 1640 ss->tx.activate = 0; 1641 } 1642 mgp->watchdog_rx_pause = 0; 1643 if (mgp->ksp_stat != NULL) { 1644 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data; 1645 ethstat->link_changes.value.ul = 0; 1646 } 1647 status = myri10ge_m_unicst(mgp, mgp->mac_addr); 1648 myri10ge_change_promisc(mgp, 0); 1649 (void) myri10ge_change_pause(mgp, mgp->pause); 1650 return (status); 1651 } 1652 1653 static int 1654 myri10ge_init_toeplitz(struct myri10ge_priv *mgp) 1655 { 1656 myri10ge_cmd_t cmd; 1657 int i, b, s, t, j; 1658 int status; 1659 uint32_t k[8]; 1660 uint32_t tmp; 1661 uint8_t *key; 1662 1663 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 1664 &cmd); 1665 if (status != 0) { 1666 cmn_err(CE_WARN, "%s: failed to get rss key\n", 1667 mgp->name); 1668 return (EIO); 1669 } 1670 myri10ge_pio_copy32(mgp->rss_key, 1671 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0), 1672 sizeof (mgp->rss_key)); 1673 1674 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256, 1675 KM_SLEEP); 1676 key = (uint8_t *)mgp->rss_key; 1677 t = 0; 1678 for (b = 0; b < 12; b++) { 1679 for (s = 0; s < 8; s++) { 1680 /* Bits: b*8+s, ..., b*8+s+31 */ 1681 k[s] = 0; 1682 for (j = 0; j < 32; j++) { 1683 int bit = b*8+s+j; 1684 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7))); 1685 k[s] |= bit << (31 - j); 1686 } 1687 } 1688 1689 for (i = 0; i <= 0xff; i++) { 1690 tmp = 0; 1691 if (i & (1 << 7)) { tmp ^= k[0]; } 1692 if (i & (1 << 6)) { tmp ^= k[1]; } 1693 if (i & (1 << 5)) { tmp ^= k[2]; } 1694 if (i & (1 << 4)) { tmp ^= k[3]; } 1695 if (i & (1 << 3)) { tmp ^= k[4]; } 1696 if (i & (1 << 2)) { tmp ^= k[5]; } 1697 if (i & (1 << 1)) { tmp ^= k[6]; } 1698 if (i & (1 << 0)) { tmp ^= k[7]; } 1699 mgp->toeplitz_hash_table[t++] = tmp; 1700 } 1701 } 1702 return (0); 1703 } 1704 1705 static inline struct myri10ge_slice_state * 1706 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1707 { 1708 struct tcphdr *hdr; 1709 uint32_t saddr, daddr; 1710 uint32_t hash, slice; 1711 uint32_t *table = mgp->toeplitz_hash_table; 1712 uint16_t src, dst; 1713 1714 /* 1715 * Note hashing order is reversed from how it is done 1716 * in the NIC, so as to generate the same hash value 1717 * for the connection to try to keep connections CPU local 1718 */ 1719 1720 /* hash on IPv4 src/dst address */ 1721 saddr = ntohl(ip->ip_src.s_addr); 1722 daddr = ntohl(ip->ip_dst.s_addr); 1723 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)]; 1724 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)]; 1725 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)]; 1726 hash ^= table[(256 * 3) + ((daddr) & 0xff)]; 1727 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)]; 1728 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)]; 1729 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)]; 1730 hash ^= table[(256 * 7) + ((saddr) & 0xff)]; 1731 /* hash on TCP port, if required */ 1732 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) && 1733 ip->ip_p == IPPROTO_TCP) { 1734 hdr = (struct tcphdr *)(void *) 1735 (((uint8_t *)ip) + (ip->ip_hl << 2)); 1736 src = ntohs(hdr->th_sport); 1737 dst = ntohs(hdr->th_dport); 1738 1739 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)]; 1740 hash ^= table[(256 * 9) + ((dst) & 0xff)]; 1741 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)]; 1742 hash ^= table[(256 * 11) + ((src) & 0xff)]; 1743 } 1744 slice = (mgp->num_slices - 1) & hash; 1745 return (&mgp->ss[slice]); 1746 1747 } 1748 1749 static inline struct myri10ge_slice_state * 1750 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1751 { 1752 struct tcphdr *hdr; 1753 uint32_t slice, hash_val; 1754 1755 1756 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) { 1757 return (&mgp->ss[0]); 1758 } 1759 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2)); 1760 1761 /* 1762 * Use the second byte of the *destination* address for 1763 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing 1764 */ 1765 hash_val = ntohs(hdr->th_dport) & 0xff; 1766 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT) 1767 hash_val += ntohs(hdr->th_sport) & 0xff; 1768 1769 slice = (mgp->num_slices - 1) & hash_val; 1770 return (&mgp->ss[slice]); 1771 } 1772 1773 static inline struct myri10ge_slice_state * 1774 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp) 1775 { 1776 unsigned int slice = 0; 1777 struct ether_header *eh; 1778 struct ether_vlan_header *vh; 1779 struct ip *ip; 1780 int ehl, ihl; 1781 1782 if (mgp->num_slices == 1) 1783 return (&mgp->ss[0]); 1784 1785 if (myri10ge_tx_hash == 0) { 1786 slice = CPU->cpu_id & (mgp->num_slices - 1); 1787 return (&mgp->ss[slice]); 1788 } 1789 1790 /* 1791 * ensure it is a TCP or UDP over IPv4 packet, and that the 1792 * headers are in the 1st mblk. Otherwise, punt 1793 */ 1794 ehl = sizeof (*eh); 1795 ihl = sizeof (*ip); 1796 if ((MBLKL(mp)) < (ehl + ihl + 8)) 1797 return (&mgp->ss[0]); 1798 eh = (struct ether_header *)(void *)mp->b_rptr; 1799 ip = (struct ip *)(void *)(eh + 1); 1800 if (eh->ether_type != BE_16(ETHERTYPE_IP)) { 1801 if (eh->ether_type != BE_16(ETHERTYPE_VLAN)) 1802 return (&mgp->ss[0]); 1803 vh = (struct ether_vlan_header *)(void *)mp->b_rptr; 1804 if (vh->ether_type != BE_16(ETHERTYPE_IP)) 1805 return (&mgp->ss[0]); 1806 ehl += 4; 1807 ip = (struct ip *)(void *)(vh + 1); 1808 } 1809 ihl = ip->ip_hl << 2; 1810 if (MBLKL(mp) < (ehl + ihl + 8)) 1811 return (&mgp->ss[0]); 1812 switch (myri10ge_rss_hash) { 1813 case MXGEFW_RSS_HASH_TYPE_IPV4: 1814 /* fallthru */ 1815 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4: 1816 /* fallthru */ 1817 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4): 1818 return (myri10ge_toeplitz_send_hash(mgp, ip)); 1819 case MXGEFW_RSS_HASH_TYPE_SRC_PORT: 1820 /* fallthru */ 1821 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT: 1822 return (myri10ge_simple_send_hash(mgp, ip)); 1823 default: 1824 break; 1825 } 1826 return (&mgp->ss[0]); 1827 } 1828 1829 static int 1830 myri10ge_setup_slice(struct myri10ge_slice_state *ss) 1831 { 1832 struct myri10ge_priv *mgp = ss->mgp; 1833 myri10ge_cmd_t cmd; 1834 int tx_ring_size, rx_ring_size; 1835 int tx_ring_entries, rx_ring_entries; 1836 int slice, status; 1837 int allocated, idx; 1838 size_t bytes; 1839 1840 slice = ss - mgp->ss; 1841 cmd.data0 = slice; 1842 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 1843 tx_ring_size = cmd.data0; 1844 cmd.data0 = slice; 1845 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1846 if (status != 0) 1847 return (status); 1848 rx_ring_size = cmd.data0; 1849 1850 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send); 1851 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr); 1852 ss->tx.mask = tx_ring_entries - 1; 1853 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 1854 1855 /* get the lanai pointers to the send and receive rings */ 1856 1857 cmd.data0 = slice; 1858 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 1859 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0); 1860 if (mgp->num_slices > 1) { 1861 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice; 1862 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP + 1863 64 * slice; 1864 } else { 1865 ss->tx.go = NULL; 1866 ss->tx.stop = NULL; 1867 } 1868 1869 cmd.data0 = slice; 1870 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 1871 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *) 1872 (void *)(mgp->sram + cmd.data0); 1873 1874 cmd.data0 = slice; 1875 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 1876 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *) 1877 (mgp->sram + cmd.data0); 1878 1879 if (status != 0) { 1880 cmn_err(CE_WARN, 1881 "%s: failed to get ring sizes or locations\n", mgp->name); 1882 return (status); 1883 } 1884 1885 status = ENOMEM; 1886 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 1887 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP); 1888 if (ss->rx_small.shadow == NULL) 1889 goto abort; 1890 (void) memset(ss->rx_small.shadow, 0, bytes); 1891 1892 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 1893 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP); 1894 if (ss->rx_big.shadow == NULL) 1895 goto abort_with_rx_small_shadow; 1896 (void) memset(ss->rx_big.shadow, 0, bytes); 1897 1898 /* allocate the host info rings */ 1899 1900 bytes = tx_ring_entries * sizeof (*ss->tx.info); 1901 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP); 1902 if (ss->tx.info == NULL) 1903 goto abort_with_rx_big_shadow; 1904 (void) memset(ss->tx.info, 0, bytes); 1905 1906 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 1907 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP); 1908 if (ss->rx_small.info == NULL) 1909 goto abort_with_tx_info; 1910 (void) memset(ss->rx_small.info, 0, bytes); 1911 1912 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 1913 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP); 1914 if (ss->rx_big.info == NULL) 1915 goto abort_with_rx_small_info; 1916 (void) memset(ss->rx_big.info, 0, bytes); 1917 1918 ss->tx.stall = ss->tx.sched = 0; 1919 ss->tx.stall_early = ss->tx.stall_late = 0; 1920 1921 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) / 1922 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD)); 1923 1924 allocated = myri10ge_add_jbufs(ss, 1925 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1); 1926 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) { 1927 cmn_err(CE_WARN, 1928 "%s: Could not allocate enough receive buffers (%d/%d)\n", 1929 mgp->name, allocated, 1930 myri10ge_bigbufs_initial + ss->jbufs_for_smalls); 1931 goto abort_with_jumbos; 1932 } 1933 1934 myri10ge_carve_up_jbufs_into_small_ring(ss); 1935 ss->j_rx_cnt = 0; 1936 1937 mutex_enter(&ss->jpool.mtx); 1938 if (allocated < rx_ring_entries) 1939 ss->jpool.low_water = allocated / 4; 1940 else 1941 ss->jpool.low_water = rx_ring_entries / 2; 1942 1943 /* 1944 * invalidate the big receive ring in case we do not 1945 * allocate sufficient jumbos to fill it 1946 */ 1947 (void) memset(ss->rx_big.shadow, 1, 1948 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0])); 1949 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) { 1950 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7], 1951 &ss->rx_big.shadow[idx - 7]); 1952 mb(); 1953 } 1954 1955 1956 myri10ge_restock_jumbos(ss); 1957 1958 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) { 1959 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7], 1960 &ss->rx_small.shadow[idx - 7]); 1961 mb(); 1962 } 1963 ss->rx_small.cnt = ss->rx_small.mask + 1; 1964 1965 mutex_exit(&ss->jpool.mtx); 1966 1967 status = myri10ge_prepare_tx_ring(ss); 1968 1969 if (status != 0) 1970 goto abort_with_small_jbufs; 1971 1972 cmd.data0 = ntohl(ss->fw_stats_dma.low); 1973 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1974 cmd.data2 = sizeof (mcp_irq_data_t); 1975 cmd.data2 |= (slice << 16); 1976 bzero(ss->fw_stats, sizeof (*ss->fw_stats)); 1977 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 1978 if (status == ENOSYS) { 1979 cmd.data0 = ntohl(ss->fw_stats_dma.low) + 1980 offsetof(mcp_irq_data_t, send_done_count); 1981 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1982 status = myri10ge_send_cmd(mgp, 1983 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); 1984 } 1985 if (status) { 1986 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name); 1987 goto abort_with_tx; 1988 } 1989 1990 return (0); 1991 1992 abort_with_tx: 1993 myri10ge_unprepare_tx_ring(ss); 1994 1995 abort_with_small_jbufs: 1996 myri10ge_release_small_jbufs(ss); 1997 1998 abort_with_jumbos: 1999 if (allocated != 0) { 2000 mutex_enter(&ss->jpool.mtx); 2001 ss->jpool.low_water = 0; 2002 mutex_exit(&ss->jpool.mtx); 2003 myri10ge_unstock_jumbos(ss); 2004 myri10ge_remove_jbufs(ss); 2005 } 2006 2007 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2008 kmem_free(ss->rx_big.info, bytes); 2009 2010 abort_with_rx_small_info: 2011 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2012 kmem_free(ss->rx_small.info, bytes); 2013 2014 abort_with_tx_info: 2015 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2016 kmem_free(ss->tx.info, bytes); 2017 2018 abort_with_rx_big_shadow: 2019 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2020 kmem_free(ss->rx_big.shadow, bytes); 2021 2022 abort_with_rx_small_shadow: 2023 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2024 kmem_free(ss->rx_small.shadow, bytes); 2025 abort: 2026 return (status); 2027 2028 } 2029 2030 static void 2031 myri10ge_teardown_slice(struct myri10ge_slice_state *ss) 2032 { 2033 int tx_ring_entries, rx_ring_entries; 2034 size_t bytes; 2035 2036 /* ignore slices that have not been fully setup */ 2037 if (ss->tx.cp == NULL) 2038 return; 2039 /* Free the TX copy buffers */ 2040 myri10ge_unprepare_tx_ring(ss); 2041 2042 /* stop passing returned buffers to firmware */ 2043 2044 mutex_enter(&ss->jpool.mtx); 2045 ss->jpool.low_water = 0; 2046 mutex_exit(&ss->jpool.mtx); 2047 myri10ge_release_small_jbufs(ss); 2048 2049 /* Release the free jumbo frame pool */ 2050 myri10ge_unstock_jumbos(ss); 2051 myri10ge_remove_jbufs(ss); 2052 2053 rx_ring_entries = ss->rx_big.mask + 1; 2054 tx_ring_entries = ss->tx.mask + 1; 2055 2056 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2057 kmem_free(ss->rx_big.info, bytes); 2058 2059 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2060 kmem_free(ss->rx_small.info, bytes); 2061 2062 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2063 kmem_free(ss->tx.info, bytes); 2064 2065 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2066 kmem_free(ss->rx_big.shadow, bytes); 2067 2068 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2069 kmem_free(ss->rx_small.shadow, bytes); 2070 2071 } 2072 static int 2073 myri10ge_start_locked(struct myri10ge_priv *mgp) 2074 { 2075 myri10ge_cmd_t cmd; 2076 int status, big_pow2, i; 2077 volatile uint8_t *itable; 2078 2079 status = DDI_SUCCESS; 2080 /* Allocate DMA resources and receive buffers */ 2081 2082 status = myri10ge_reset(mgp); 2083 if (status != 0) { 2084 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 2085 return (DDI_FAILURE); 2086 } 2087 2088 if (mgp->num_slices > 1) { 2089 cmd.data0 = mgp->num_slices; 2090 cmd.data1 = 1; /* use MSI-X */ 2091 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2092 &cmd); 2093 if (status != 0) { 2094 cmn_err(CE_WARN, 2095 "%s: failed to set number of slices\n", 2096 mgp->name); 2097 goto abort_with_nothing; 2098 } 2099 /* setup the indirection table */ 2100 cmd.data0 = mgp->num_slices; 2101 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2102 &cmd); 2103 2104 status |= myri10ge_send_cmd(mgp, 2105 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 2106 if (status != 0) { 2107 cmn_err(CE_WARN, 2108 "%s: failed to setup rss tables\n", mgp->name); 2109 } 2110 2111 /* just enable an identity mapping */ 2112 itable = mgp->sram + cmd.data0; 2113 for (i = 0; i < mgp->num_slices; i++) 2114 itable[i] = (uint8_t)i; 2115 2116 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) { 2117 status = myri10ge_init_toeplitz(mgp); 2118 if (status != 0) { 2119 cmn_err(CE_WARN, "%s: failed to setup " 2120 "toeplitz tx hash table", mgp->name); 2121 goto abort_with_nothing; 2122 } 2123 } 2124 cmd.data0 = 1; 2125 cmd.data1 = myri10ge_rss_hash; 2126 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2127 &cmd); 2128 if (status != 0) { 2129 cmn_err(CE_WARN, 2130 "%s: failed to enable slices\n", mgp->name); 2131 goto abort_with_toeplitz; 2132 } 2133 } 2134 2135 for (i = 0; i < mgp->num_slices; i++) { 2136 status = myri10ge_setup_slice(&mgp->ss[i]); 2137 if (status != 0) 2138 goto abort_with_slices; 2139 } 2140 2141 /* 2142 * Tell the MCP how many buffers it has, and to 2143 * bring the ethernet interface up 2144 * 2145 * Firmware needs the big buff size as a power of 2. Lie and 2146 * tell it the buffer is larger, because we only use 1 2147 * buffer/pkt, and the mtu will prevent overruns 2148 */ 2149 big_pow2 = myri10ge_mtu + MXGEFW_PAD; 2150 while (!ISP2(big_pow2)) 2151 big_pow2++; 2152 2153 /* now give firmware buffers sizes, and MTU */ 2154 cmd.data0 = myri10ge_mtu; 2155 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd); 2156 cmd.data0 = myri10ge_small_bytes; 2157 status |= 2158 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 2159 cmd.data0 = big_pow2; 2160 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2161 if (status) { 2162 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name); 2163 goto abort_with_slices; 2164 } 2165 2166 2167 cmd.data0 = 1; 2168 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd); 2169 if (status) { 2170 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n", 2171 mgp->name, status); 2172 } else { 2173 mgp->features |= MYRI10GE_TSO; 2174 } 2175 2176 mgp->link_state = -1; 2177 mgp->rdma_tags_available = 15; 2178 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd); 2179 if (status) { 2180 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name); 2181 goto abort_with_slices; 2182 } 2183 mgp->running = MYRI10GE_ETH_RUNNING; 2184 return (DDI_SUCCESS); 2185 2186 abort_with_slices: 2187 for (i = 0; i < mgp->num_slices; i++) 2188 myri10ge_teardown_slice(&mgp->ss[i]); 2189 2190 mgp->running = MYRI10GE_ETH_STOPPED; 2191 2192 abort_with_toeplitz: 2193 if (mgp->toeplitz_hash_table != NULL) { 2194 kmem_free(mgp->toeplitz_hash_table, 2195 sizeof (uint32_t) * 12 * 256); 2196 mgp->toeplitz_hash_table = NULL; 2197 } 2198 2199 abort_with_nothing: 2200 return (DDI_FAILURE); 2201 } 2202 2203 static void 2204 myri10ge_stop_locked(struct myri10ge_priv *mgp) 2205 { 2206 int status, old_down_cnt; 2207 myri10ge_cmd_t cmd; 2208 int wait_time = 10; 2209 int i, polling; 2210 2211 old_down_cnt = mgp->down_cnt; 2212 mb(); 2213 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2214 if (status) { 2215 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 2216 } 2217 2218 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2219 delay(1 * drv_usectohz(1000000)); 2220 wait_time--; 2221 if (wait_time == 0) 2222 break; 2223 } 2224 again: 2225 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2226 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name); 2227 for (i = 0; i < mgp->num_slices; i++) { 2228 /* 2229 * take and release the rx lock to ensure 2230 * that no interrupt thread is blocked 2231 * elsewhere in the stack, preventing 2232 * completion 2233 */ 2234 2235 mutex_enter(&mgp->ss[i].rx_lock); 2236 printf("%s: slice %d rx irq idle\n", 2237 mgp->name, i); 2238 mutex_exit(&mgp->ss[i].rx_lock); 2239 2240 /* verify that the poll handler is inactive */ 2241 mutex_enter(&mgp->ss->poll_lock); 2242 polling = mgp->ss->rx_polling; 2243 mutex_exit(&mgp->ss->poll_lock); 2244 if (polling) { 2245 printf("%s: slice %d is polling\n", 2246 mgp->name, i); 2247 delay(1 * drv_usectohz(1000000)); 2248 goto again; 2249 } 2250 } 2251 delay(1 * drv_usectohz(1000000)); 2252 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2253 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name); 2254 } 2255 } 2256 2257 for (i = 0; i < mgp->num_slices; i++) 2258 myri10ge_teardown_slice(&mgp->ss[i]); 2259 2260 if (mgp->toeplitz_hash_table != NULL) { 2261 kmem_free(mgp->toeplitz_hash_table, 2262 sizeof (uint32_t) * 12 * 256); 2263 mgp->toeplitz_hash_table = NULL; 2264 } 2265 mgp->running = MYRI10GE_ETH_STOPPED; 2266 } 2267 2268 static int 2269 myri10ge_m_start(void *arg) 2270 { 2271 struct myri10ge_priv *mgp = arg; 2272 int status; 2273 2274 mutex_enter(&mgp->intrlock); 2275 2276 if (mgp->running != MYRI10GE_ETH_STOPPED) { 2277 mutex_exit(&mgp->intrlock); 2278 return (DDI_FAILURE); 2279 } 2280 status = myri10ge_start_locked(mgp); 2281 mutex_exit(&mgp->intrlock); 2282 2283 if (status != DDI_SUCCESS) 2284 return (status); 2285 2286 /* start the watchdog timer */ 2287 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 2288 mgp->timer_ticks); 2289 return (DDI_SUCCESS); 2290 2291 } 2292 2293 static void 2294 myri10ge_m_stop(void *arg) 2295 { 2296 struct myri10ge_priv *mgp = arg; 2297 2298 mutex_enter(&mgp->intrlock); 2299 /* if the device not running give up */ 2300 if (mgp->running != MYRI10GE_ETH_RUNNING) { 2301 mutex_exit(&mgp->intrlock); 2302 return; 2303 } 2304 2305 mgp->running = MYRI10GE_ETH_STOPPING; 2306 mutex_exit(&mgp->intrlock); 2307 (void) untimeout(mgp->timer_id); 2308 mutex_enter(&mgp->intrlock); 2309 myri10ge_stop_locked(mgp); 2310 mutex_exit(&mgp->intrlock); 2311 2312 } 2313 2314 static inline void 2315 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) 2316 { 2317 struct ether_header *eh; 2318 struct ip *ip; 2319 struct ip6_hdr *ip6; 2320 uint32_t start, stuff, end, partial, hdrlen; 2321 2322 2323 csum = ntohs((uint16_t)csum); 2324 eh = (struct ether_header *)(void *)mp->b_rptr; 2325 hdrlen = sizeof (*eh); 2326 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2327 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2328 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2329 s->brdcstrcv++; 2330 else 2331 s->multircv++; 2332 } 2333 2334 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 2335 /* 2336 * fix checksum by subtracting 4 bytes after what the 2337 * firmware thought was the end of the ether hdr 2338 */ 2339 partial = *(uint32_t *) 2340 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE); 2341 csum += ~partial; 2342 csum += (csum < ~partial); 2343 csum = (csum >> 16) + (csum & 0xFFFF); 2344 csum = (csum >> 16) + (csum & 0xFFFF); 2345 hdrlen += VLAN_TAGSZ; 2346 } 2347 2348 if (eh->ether_type == BE_16(ETHERTYPE_IP)) { 2349 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen); 2350 start = ip->ip_hl << 2; 2351 2352 if (ip->ip_p == IPPROTO_TCP) 2353 stuff = start + offsetof(struct tcphdr, th_sum); 2354 else if (ip->ip_p == IPPROTO_UDP) 2355 stuff = start + offsetof(struct udphdr, uh_sum); 2356 else 2357 return; 2358 end = ntohs(ip->ip_len); 2359 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) { 2360 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen); 2361 start = sizeof (*ip6); 2362 if (ip6->ip6_nxt == IPPROTO_TCP) { 2363 stuff = start + offsetof(struct tcphdr, th_sum); 2364 } else if (ip6->ip6_nxt == IPPROTO_UDP) 2365 stuff = start + offsetof(struct udphdr, uh_sum); 2366 else 2367 return; 2368 end = start + ntohs(ip6->ip6_plen); 2369 /* 2370 * IPv6 headers do not contain a checksum, and hence 2371 * do not checksum to zero, so they don't "fall out" 2372 * of the partial checksum calculation like IPv4 2373 * headers do. We need to fix the partial checksum by 2374 * subtracting the checksum of the IPv6 header. 2375 */ 2376 2377 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6)); 2378 csum += ~partial; 2379 csum += (csum < ~partial); 2380 csum = (csum >> 16) + (csum & 0xFFFF); 2381 csum = (csum >> 16) + (csum & 0xFFFF); 2382 } else { 2383 return; 2384 } 2385 2386 if (MBLKL(mp) > hdrlen + end) { 2387 /* padded frame, so hw csum may be invalid */ 2388 return; 2389 } 2390 2391 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); 2392 } 2393 2394 static mblk_t * 2395 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len, 2396 uint32_t csum) 2397 { 2398 mblk_t *mp; 2399 myri10ge_rx_ring_t *rx; 2400 int idx; 2401 2402 rx = &ss->rx_small; 2403 idx = rx->cnt & rx->mask; 2404 ss->rx_small.cnt++; 2405 2406 /* allocate a new buffer to pass up the stack */ 2407 mp = allocb(len + MXGEFW_PAD, 0); 2408 if (mp == NULL) { 2409 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf); 2410 goto abort; 2411 } 2412 bcopy(ss->rx_small.info[idx].ptr, 2413 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2414 mp->b_wptr += len + MXGEFW_PAD; 2415 mp->b_rptr += MXGEFW_PAD; 2416 2417 ss->rx_stats.ibytes += len; 2418 ss->rx_stats.ipackets += 1; 2419 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2420 2421 abort: 2422 if ((idx & 7) == 7) { 2423 myri10ge_submit_8rx(&rx->lanai[idx - 7], 2424 &rx->shadow[idx - 7]); 2425 } 2426 2427 return (mp); 2428 } 2429 2430 2431 static mblk_t * 2432 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len, 2433 uint32_t csum) 2434 { 2435 struct myri10ge_jpool_stuff *jpool; 2436 struct myri10ge_jpool_entry *j; 2437 mblk_t *mp; 2438 int idx, num_owned_by_mcp; 2439 2440 jpool = &ss->jpool; 2441 idx = ss->j_rx_cnt & ss->rx_big.mask; 2442 j = ss->rx_big.info[idx].j; 2443 2444 if (j == NULL) { 2445 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n", 2446 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt); 2447 return (NULL); 2448 } 2449 2450 2451 ss->rx_big.info[idx].j = NULL; 2452 ss->j_rx_cnt++; 2453 2454 2455 /* 2456 * Check to see if we are low on rx buffers. 2457 * Note that we must leave at least 8 free so there are 2458 * enough to free in a single 64-byte write. 2459 */ 2460 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2461 if (num_owned_by_mcp < jpool->low_water) { 2462 mutex_enter(&jpool->mtx); 2463 myri10ge_restock_jumbos(ss); 2464 mutex_exit(&jpool->mtx); 2465 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2466 /* if we are still low, then we have to copy */ 2467 if (num_owned_by_mcp < 16) { 2468 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy); 2469 /* allocate a new buffer to pass up the stack */ 2470 mp = allocb(len + MXGEFW_PAD, 0); 2471 if (mp == NULL) { 2472 goto abort; 2473 } 2474 bcopy(j->buf, 2475 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2476 myri10ge_jfree_rtn(j); 2477 /* push buffer back to NIC */ 2478 mutex_enter(&jpool->mtx); 2479 myri10ge_restock_jumbos(ss); 2480 mutex_exit(&jpool->mtx); 2481 goto set_len; 2482 } 2483 } 2484 2485 /* loan our buffer to the stack */ 2486 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func); 2487 if (mp == NULL) { 2488 goto abort; 2489 } 2490 2491 set_len: 2492 mp->b_rptr += MXGEFW_PAD; 2493 mp->b_wptr = ((unsigned char *) mp->b_rptr + len); 2494 2495 ss->rx_stats.ibytes += len; 2496 ss->rx_stats.ipackets += 1; 2497 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2498 2499 return (mp); 2500 2501 abort: 2502 myri10ge_jfree_rtn(j); 2503 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf); 2504 return (NULL); 2505 } 2506 2507 /* 2508 * Free all transmit buffers up until the specified index 2509 */ 2510 static inline void 2511 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index) 2512 { 2513 myri10ge_tx_ring_t *tx; 2514 struct myri10ge_tx_dma_handle_head handles; 2515 int idx; 2516 int limit = 0; 2517 2518 tx = &ss->tx; 2519 handles.head = NULL; 2520 handles.tail = NULL; 2521 while (tx->pkt_done != (int)mcp_index) { 2522 idx = tx->done & tx->mask; 2523 2524 /* 2525 * mblk & DMA handle attached only to first slot 2526 * per buffer in the packet 2527 */ 2528 2529 if (tx->info[idx].m) { 2530 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 2531 tx->info[idx].handle->next = handles.head; 2532 handles.head = tx->info[idx].handle; 2533 if (handles.tail == NULL) 2534 handles.tail = tx->info[idx].handle; 2535 freeb(tx->info[idx].m); 2536 tx->info[idx].m = 0; 2537 tx->info[idx].handle = 0; 2538 } 2539 if (tx->info[idx].ostat.opackets != 0) { 2540 tx->stats.multixmt += tx->info[idx].ostat.multixmt; 2541 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt; 2542 tx->stats.obytes += tx->info[idx].ostat.obytes; 2543 tx->stats.opackets += tx->info[idx].ostat.opackets; 2544 tx->info[idx].stat.un.all = 0; 2545 tx->pkt_done++; 2546 } 2547 2548 tx->done++; 2549 /* 2550 * if we stalled the queue, wake it. But Wait until 2551 * we have at least 1/2 our slots free. 2552 */ 2553 if ((tx->req - tx->done) < (tx->mask >> 1) && 2554 tx->stall != tx->sched) { 2555 mutex_enter(&ss->tx.lock); 2556 tx->sched = tx->stall; 2557 mutex_exit(&ss->tx.lock); 2558 mac_tx_ring_update(ss->mgp->mh, tx->rh); 2559 } 2560 2561 /* limit potential for livelock */ 2562 if (unlikely(++limit > 2 * tx->mask)) 2563 break; 2564 } 2565 if (tx->req == tx->done && tx->stop != NULL) { 2566 /* 2567 * Nic has sent all pending requests, allow it 2568 * to stop polling this queue 2569 */ 2570 mutex_enter(&tx->lock); 2571 if (tx->req == tx->done && tx->active) { 2572 *(int *)(void *)tx->stop = 1; 2573 tx->active = 0; 2574 mb(); 2575 } 2576 mutex_exit(&tx->lock); 2577 } 2578 if (handles.head != NULL) 2579 myri10ge_free_tx_handles(tx, &handles); 2580 } 2581 2582 static void 2583 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl) 2584 { 2585 mbl->head = NULL; 2586 mbl->tail = &mbl->head; 2587 mbl->cnt = 0; 2588 } 2589 2590 /*ARGSUSED*/ 2591 void 2592 myri10ge_mbl_append(struct myri10ge_slice_state *ss, 2593 struct myri10ge_mblk_list *mbl, mblk_t *mp) 2594 { 2595 *(mbl->tail) = mp; 2596 mbl->tail = &mp->b_next; 2597 mp->b_next = NULL; 2598 mbl->cnt++; 2599 } 2600 2601 2602 static inline void 2603 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, 2604 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop) 2605 { 2606 myri10ge_rx_done_t *rx_done = &ss->rx_done; 2607 struct myri10ge_priv *mgp = ss->mgp; 2608 mblk_t *mp; 2609 struct lro_entry *lro; 2610 uint16_t length; 2611 uint16_t checksum; 2612 2613 2614 while (rx_done->entry[rx_done->idx].length != 0) { 2615 if (unlikely (*stop)) { 2616 break; 2617 } 2618 length = ntohs(rx_done->entry[rx_done->idx].length); 2619 length &= (~MXGEFW_RSS_HASH_MASK); 2620 2621 /* limit potential for livelock */ 2622 limit -= length; 2623 if (unlikely(limit < 0)) 2624 break; 2625 2626 rx_done->entry[rx_done->idx].length = 0; 2627 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2628 if (length <= myri10ge_small_bytes) 2629 mp = myri10ge_rx_done_small(ss, length, checksum); 2630 else 2631 mp = myri10ge_rx_done_big(ss, length, checksum); 2632 if (mp != NULL) { 2633 if (!myri10ge_lro || 2634 0 != myri10ge_lro_rx(ss, mp, checksum, mbl)) 2635 myri10ge_mbl_append(ss, mbl, mp); 2636 } 2637 rx_done->cnt++; 2638 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1); 2639 } 2640 while (ss->lro_active != NULL) { 2641 lro = ss->lro_active; 2642 ss->lro_active = lro->next; 2643 myri10ge_lro_flush(ss, lro, mbl); 2644 } 2645 } 2646 2647 static void 2648 myri10ge_intr_rx(struct myri10ge_slice_state *ss) 2649 { 2650 uint64_t gen; 2651 struct myri10ge_mblk_list mbl; 2652 2653 myri10ge_mbl_init(&mbl); 2654 if (mutex_tryenter(&ss->rx_lock) == 0) 2655 return; 2656 gen = ss->rx_gen_num; 2657 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL, 2658 &ss->rx_polling); 2659 if (mbl.head != NULL) 2660 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen); 2661 mutex_exit(&ss->rx_lock); 2662 2663 } 2664 2665 static mblk_t * 2666 myri10ge_poll_rx(void *arg, int bytes) 2667 { 2668 struct myri10ge_slice_state *ss = arg; 2669 struct myri10ge_mblk_list mbl; 2670 boolean_t dummy = B_FALSE; 2671 2672 if (bytes == 0) 2673 return (NULL); 2674 2675 myri10ge_mbl_init(&mbl); 2676 mutex_enter(&ss->rx_lock); 2677 if (ss->rx_polling) 2678 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy); 2679 else 2680 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss - 2681 ss->mgp->ss), ss->rx_token, ss->rx_polling); 2682 mutex_exit(&ss->rx_lock); 2683 return (mbl.head); 2684 } 2685 2686 /*ARGSUSED*/ 2687 static uint_t 2688 myri10ge_intr(caddr_t arg0, caddr_t arg1) 2689 { 2690 struct myri10ge_slice_state *ss = 2691 (struct myri10ge_slice_state *)(void *)arg0; 2692 struct myri10ge_priv *mgp = ss->mgp; 2693 mcp_irq_data_t *stats = ss->fw_stats; 2694 myri10ge_tx_ring_t *tx = &ss->tx; 2695 uint32_t send_done_count; 2696 uint8_t valid; 2697 2698 2699 /* make sure the DMA has finished */ 2700 if (!stats->valid) { 2701 return (DDI_INTR_UNCLAIMED); 2702 } 2703 valid = stats->valid; 2704 2705 /* low bit indicates receives are present */ 2706 if (valid & 1) 2707 myri10ge_intr_rx(ss); 2708 2709 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 2710 /* lower legacy IRQ */ 2711 *mgp->irq_deassert = 0; 2712 if (!myri10ge_deassert_wait) 2713 /* don't wait for conf. that irq is low */ 2714 stats->valid = 0; 2715 mb(); 2716 } else { 2717 /* no need to wait for conf. that irq is low */ 2718 stats->valid = 0; 2719 } 2720 2721 do { 2722 /* check for transmit completes and receives */ 2723 send_done_count = ntohl(stats->send_done_count); 2724 if (send_done_count != tx->pkt_done) 2725 myri10ge_tx_done(ss, (int)send_done_count); 2726 } while (*((volatile uint8_t *) &stats->valid)); 2727 2728 if (stats->stats_updated) { 2729 if (mgp->link_state != stats->link_up || stats->link_down) { 2730 mgp->link_state = stats->link_up; 2731 if (stats->link_down) { 2732 mgp->down_cnt += stats->link_down; 2733 mgp->link_state = 0; 2734 } 2735 if (mgp->link_state) { 2736 if (myri10ge_verbose) 2737 printf("%s: link up\n", mgp->name); 2738 mac_link_update(mgp->mh, LINK_STATE_UP); 2739 } else { 2740 if (myri10ge_verbose) 2741 printf("%s: link down\n", mgp->name); 2742 mac_link_update(mgp->mh, LINK_STATE_DOWN); 2743 } 2744 MYRI10GE_NIC_STAT_INC(link_changes); 2745 } 2746 if (mgp->rdma_tags_available != 2747 ntohl(ss->fw_stats->rdma_tags_available)) { 2748 mgp->rdma_tags_available = 2749 ntohl(ss->fw_stats->rdma_tags_available); 2750 cmn_err(CE_NOTE, "%s: RDMA timed out! " 2751 "%d tags left\n", mgp->name, 2752 mgp->rdma_tags_available); 2753 } 2754 } 2755 2756 mb(); 2757 /* check to see if we have rx token to pass back */ 2758 if (valid & 0x1) { 2759 mutex_enter(&ss->poll_lock); 2760 if (ss->rx_polling) { 2761 ss->rx_token = 1; 2762 } else { 2763 *ss->irq_claim = BE_32(3); 2764 ss->rx_token = 0; 2765 } 2766 mutex_exit(&ss->poll_lock); 2767 } 2768 *(ss->irq_claim + 1) = BE_32(3); 2769 return (DDI_INTR_CLAIMED); 2770 } 2771 2772 /* 2773 * Add or remove a multicast address. This is called with our 2774 * macinfo's lock held by GLD, so we do not need to worry about 2775 * our own locking here. 2776 */ 2777 static int 2778 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr) 2779 { 2780 myri10ge_cmd_t cmd; 2781 struct myri10ge_priv *mgp = arg; 2782 int status, join_leave; 2783 2784 if (add) 2785 join_leave = MXGEFW_JOIN_MULTICAST_GROUP; 2786 else 2787 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP; 2788 (void) memcpy(&cmd.data0, multicastaddr, 4); 2789 (void) memcpy(&cmd.data1, multicastaddr + 4, 2); 2790 cmd.data0 = htonl(cmd.data0); 2791 cmd.data1 = htonl(cmd.data1); 2792 status = myri10ge_send_cmd(mgp, join_leave, &cmd); 2793 if (status == 0) 2794 return (0); 2795 2796 cmn_err(CE_WARN, "%s: failed to set multicast address\n", 2797 mgp->name); 2798 return (status); 2799 } 2800 2801 2802 static int 2803 myri10ge_m_promisc(void *arg, boolean_t on) 2804 { 2805 struct myri10ge_priv *mgp = arg; 2806 2807 myri10ge_change_promisc(mgp, on); 2808 return (0); 2809 } 2810 2811 /* 2812 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2813 * backwards one at a time and handle ring wraps 2814 */ 2815 2816 static inline void 2817 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx, 2818 mcp_kreq_ether_send_t *src, int cnt) 2819 { 2820 int idx, starting_slot; 2821 starting_slot = tx->req; 2822 while (cnt > 1) { 2823 cnt--; 2824 idx = (starting_slot + cnt) & tx->mask; 2825 myri10ge_pio_copy(&tx->lanai[idx], 2826 &src[cnt], sizeof (*src)); 2827 mb(); 2828 } 2829 } 2830 2831 /* 2832 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2833 * at most 32 bytes at a time, so as to avoid involving the software 2834 * pio handler in the nic. We re-write the first segment's flags 2835 * to mark them valid only after writing the entire chain 2836 */ 2837 2838 static inline void 2839 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 2840 int cnt) 2841 { 2842 int idx, i; 2843 uint32_t *src_ints, *dst_ints; 2844 mcp_kreq_ether_send_t *srcp, *dstp, *dst; 2845 uint8_t last_flags; 2846 2847 idx = tx->req & tx->mask; 2848 2849 last_flags = src->flags; 2850 src->flags = 0; 2851 mb(); 2852 dst = dstp = &tx->lanai[idx]; 2853 srcp = src; 2854 2855 if ((idx + cnt) < tx->mask) { 2856 for (i = 0; i < (cnt - 1); i += 2) { 2857 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src)); 2858 mb(); /* force write every 32 bytes */ 2859 srcp += 2; 2860 dstp += 2; 2861 } 2862 } else { 2863 /* 2864 * submit all but the first request, and ensure 2865 * that it is submitted below 2866 */ 2867 myri10ge_submit_req_backwards(tx, src, cnt); 2868 i = 0; 2869 } 2870 if (i < cnt) { 2871 /* submit the first request */ 2872 myri10ge_pio_copy(dstp, srcp, sizeof (*src)); 2873 mb(); /* barrier before setting valid flag */ 2874 } 2875 2876 /* re-write the last 32-bits with the valid flags */ 2877 src->flags |= last_flags; 2878 src_ints = (uint32_t *)src; 2879 src_ints += 3; 2880 dst_ints = (uint32_t *)dst; 2881 dst_ints += 3; 2882 *dst_ints = *src_ints; 2883 tx->req += cnt; 2884 mb(); 2885 /* notify NIC to poll this tx ring */ 2886 if (!tx->active && tx->go != NULL) { 2887 *(int *)(void *)tx->go = 1; 2888 tx->active = 1; 2889 tx->activate++; 2890 mb(); 2891 } 2892 } 2893 2894 /* ARGSUSED */ 2895 static inline void 2896 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 2897 { 2898 uint32_t lso_flag; 2899 mac_lso_get(mp, mss, &lso_flag); 2900 (*flags) |= lso_flag; 2901 } 2902 2903 2904 /* like pullupmsg, except preserve hcksum/LSO attributes */ 2905 static int 2906 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) 2907 { 2908 uint32_t start, stuff, tx_offload_flags, mss; 2909 int ok; 2910 2911 mss = 0; 2912 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 2913 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 2914 2915 ok = pullupmsg(mp, -1); 2916 if (!ok) { 2917 printf("pullupmsg failed"); 2918 return (DDI_FAILURE); 2919 } 2920 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); 2921 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); 2922 if (tx_offload_flags & HW_LSO) 2923 DB_LSOMSS(mp) = (uint16_t)mss; 2924 lso_info_set(mp, mss, tx_offload_flags); 2925 return (DDI_SUCCESS); 2926 } 2927 2928 static inline void 2929 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh, 2930 int opackets, int obytes) 2931 { 2932 s->un.all = 0; 2933 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2934 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2935 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2936 s->un.s.brdcstxmt = 1; 2937 else 2938 s->un.s.multixmt = 1; 2939 } 2940 s->un.s.opackets = (uint16_t)opackets; 2941 s->un.s.obytes = obytes; 2942 } 2943 2944 static int 2945 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 2946 mcp_kreq_ether_send_t *req) 2947 { 2948 myri10ge_tx_ring_t *tx = &ss->tx; 2949 caddr_t ptr; 2950 struct myri10ge_tx_copybuf *cp; 2951 mblk_t *bp; 2952 int idx, mblen, avail; 2953 uint16_t len; 2954 2955 mutex_enter(&tx->lock); 2956 avail = tx->mask - (tx->req - tx->done); 2957 if (avail <= 1) { 2958 mutex_exit(&tx->lock); 2959 return (EBUSY); 2960 } 2961 idx = tx->req & tx->mask; 2962 cp = &tx->cp[idx]; 2963 ptr = cp->va; 2964 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) { 2965 mblen = MBLKL(bp); 2966 bcopy(bp->b_rptr, ptr, mblen); 2967 ptr += mblen; 2968 len += mblen; 2969 } 2970 /* ensure runts are padded to 60 bytes */ 2971 if (len < 60) { 2972 bzero(ptr, 64 - len); 2973 len = 60; 2974 } 2975 req->addr_low = cp->dma.low; 2976 req->addr_high = cp->dma.high; 2977 req->length = htons(len); 2978 req->pad = 0; 2979 req->rdma_count = 1; 2980 myri10ge_tx_stat(&tx->info[idx].stat, 2981 (struct ether_header *)(void *)cp->va, 1, len); 2982 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV); 2983 myri10ge_submit_req(&ss->tx, req, 1); 2984 mutex_exit(&tx->lock); 2985 freemsg(mp); 2986 return (DDI_SUCCESS); 2987 } 2988 2989 2990 static void 2991 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list, 2992 struct myri10ge_tx_buffer_state *tx_info, 2993 int count) 2994 { 2995 int i, idx; 2996 2997 idx = 0; /* gcc -Wuninitialized */ 2998 /* store unmapping and bp info for tx irq handler */ 2999 for (i = 0; i < count; i++) { 3000 idx = (tx->req + i) & tx->mask; 3001 tx->info[idx].m = tx_info[i].m; 3002 tx->info[idx].handle = tx_info[i].handle; 3003 } 3004 tx->info[idx].stat.un.all = tx_info[0].stat.un.all; 3005 3006 /* submit the frame to the nic */ 3007 myri10ge_submit_req(tx, req_list, count); 3008 3009 3010 } 3011 3012 3013 3014 static void 3015 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf) 3016 { 3017 mblk_t *bp; 3018 int seglen; 3019 uint_t count; 3020 3021 bp = mp; 3022 3023 while (off > 0) { 3024 seglen = MBLKL(bp); 3025 if (off < seglen) 3026 break; 3027 off -= seglen; 3028 bp = bp->b_cont; 3029 } 3030 while (len > 0) { 3031 seglen = MBLKL(bp); 3032 count = min(seglen - off, len); 3033 bcopy(bp->b_rptr + off, buf, count); 3034 len -= count; 3035 buf += count; 3036 off = 0; 3037 bp = bp->b_cont; 3038 } 3039 } 3040 3041 static int 3042 myri10ge_ether_parse_header(mblk_t *mp) 3043 { 3044 struct ether_header eh_copy; 3045 struct ether_header *eh; 3046 int eth_hdr_len, seglen; 3047 3048 seglen = MBLKL(mp); 3049 eth_hdr_len = sizeof (*eh); 3050 if (seglen < eth_hdr_len) { 3051 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy); 3052 eh = &eh_copy; 3053 } else { 3054 eh = (struct ether_header *)(void *)mp->b_rptr; 3055 } 3056 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 3057 eth_hdr_len += 4; 3058 } 3059 3060 return (eth_hdr_len); 3061 } 3062 3063 static int 3064 myri10ge_lso_parse_header(mblk_t *mp, int off) 3065 { 3066 char buf[128]; 3067 int seglen, sum_off; 3068 struct ip *ip; 3069 struct tcphdr *tcp; 3070 3071 seglen = MBLKL(mp); 3072 if (seglen < off + sizeof (*ip)) { 3073 myri10ge_copydata(mp, off, sizeof (*ip), buf); 3074 ip = (struct ip *)(void *)buf; 3075 } else { 3076 ip = (struct ip *)(void *)(mp->b_rptr + off); 3077 } 3078 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) { 3079 myri10ge_copydata(mp, off, 3080 (ip->ip_hl << 2) + sizeof (*tcp), buf); 3081 ip = (struct ip *)(void *)buf; 3082 } 3083 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2)); 3084 3085 /* 3086 * NIC expects ip_sum to be zero. Recent changes to 3087 * OpenSolaris leave the correct ip checksum there, rather 3088 * than the required zero, so we need to zero it. Otherwise, 3089 * the NIC will produce bad checksums when sending LSO packets. 3090 */ 3091 if (ip->ip_sum != 0) { 3092 if (((char *)ip) != buf) { 3093 /* ip points into mblk, so just zero it */ 3094 ip->ip_sum = 0; 3095 } else { 3096 /* 3097 * ip points into a copy, so walk the chain 3098 * to find the ip_csum, then zero it 3099 */ 3100 sum_off = off + _PTRDIFF(&ip->ip_sum, buf); 3101 while (sum_off > (int)(MBLKL(mp) - 1)) { 3102 sum_off -= MBLKL(mp); 3103 mp = mp->b_cont; 3104 } 3105 mp->b_rptr[sum_off] = 0; 3106 sum_off++; 3107 while (sum_off > MBLKL(mp) - 1) { 3108 sum_off -= MBLKL(mp); 3109 mp = mp->b_cont; 3110 } 3111 mp->b_rptr[sum_off] = 0; 3112 } 3113 } 3114 return (off + ((ip->ip_hl + tcp->th_off) << 2)); 3115 } 3116 3117 static int 3118 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 3119 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size, 3120 uint16_t mss, uint8_t cksum_offset) 3121 { 3122 myri10ge_tx_ring_t *tx = &ss->tx; 3123 struct myri10ge_priv *mgp = ss->mgp; 3124 mblk_t *bp; 3125 mcp_kreq_ether_send_t *req; 3126 struct myri10ge_tx_copybuf *cp; 3127 caddr_t rptr, ptr; 3128 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp; 3129 int resid, avail, idx, hdr_size_tmp, tx_boundary; 3130 int rdma_count; 3131 uint32_t seglen, len, boundary, low, high_swapped; 3132 uint16_t pseudo_hdr_offset = htons(mss); 3133 uint8_t flags; 3134 3135 tx_boundary = mgp->tx_boundary; 3136 hdr_size_tmp = hdr_size; 3137 resid = tx_boundary; 3138 count = 1; 3139 mutex_enter(&tx->lock); 3140 3141 /* check to see if the slots are really there */ 3142 avail = tx->mask - (tx->req - tx->done); 3143 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) { 3144 atomic_inc_32(&tx->stall); 3145 mutex_exit(&tx->lock); 3146 return (EBUSY); 3147 } 3148 3149 /* copy */ 3150 cum_len = -hdr_size; 3151 count = 0; 3152 req = req_list; 3153 idx = tx->mask & tx->req; 3154 cp = &tx->cp[idx]; 3155 low = ntohl(cp->dma.low); 3156 ptr = cp->va; 3157 cp->len = 0; 3158 if (mss) { 3159 int payload = pkt_size - hdr_size; 3160 uint16_t opackets = (payload / mss) + ((payload % mss) != 0); 3161 tx->info[idx].ostat.opackets = opackets; 3162 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size 3163 + pkt_size; 3164 } 3165 hdr_size_tmp = hdr_size; 3166 mss_resid = mss; 3167 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3168 tx_req = tx->req; 3169 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3170 mblen = MBLKL(bp); 3171 rptr = (caddr_t)bp->b_rptr; 3172 len = min(hdr_size_tmp, mblen); 3173 if (len) { 3174 bcopy(rptr, ptr, len); 3175 rptr += len; 3176 ptr += len; 3177 resid -= len; 3178 mblen -= len; 3179 hdr_size_tmp -= len; 3180 cp->len += len; 3181 if (hdr_size_tmp) 3182 continue; 3183 if (resid < mss) { 3184 tx_req++; 3185 idx = tx->mask & tx_req; 3186 cp = &tx->cp[idx]; 3187 low = ntohl(cp->dma.low); 3188 ptr = cp->va; 3189 resid = tx_boundary; 3190 } 3191 } 3192 while (mblen) { 3193 len = min(mss_resid, mblen); 3194 bcopy(rptr, ptr, len); 3195 mss_resid -= len; 3196 resid -= len; 3197 mblen -= len; 3198 rptr += len; 3199 ptr += len; 3200 cp->len += len; 3201 if (mss_resid == 0) { 3202 mss_resid = mss; 3203 if (resid < mss) { 3204 tx_req++; 3205 idx = tx->mask & tx_req; 3206 cp = &tx->cp[idx]; 3207 cp->len = 0; 3208 low = ntohl(cp->dma.low); 3209 ptr = cp->va; 3210 resid = tx_boundary; 3211 } 3212 } 3213 } 3214 } 3215 3216 req = req_list; 3217 pkt_size_tmp = pkt_size; 3218 count = 0; 3219 rdma_count = 0; 3220 tx_req = tx->req; 3221 while (pkt_size_tmp) { 3222 idx = tx->mask & tx_req; 3223 cp = &tx->cp[idx]; 3224 high_swapped = cp->dma.high; 3225 low = ntohl(cp->dma.low); 3226 len = cp->len; 3227 if (len == 0) { 3228 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n", 3229 pkt_size_tmp, pkt_size); 3230 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3231 mblen = MBLKL(bp); 3232 printf("mblen:%d\n", mblen); 3233 } 3234 pkt_size_tmp = pkt_size; 3235 tx_req = tx->req; 3236 while (pkt_size_tmp > 0) { 3237 idx = tx->mask & tx_req; 3238 cp = &tx->cp[idx]; 3239 printf("cp->len = %d\n", cp->len); 3240 pkt_size_tmp -= cp->len; 3241 tx_req++; 3242 } 3243 printf("dropped\n"); 3244 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3245 goto done; 3246 } 3247 pkt_size_tmp -= len; 3248 while (len) { 3249 while (len) { 3250 uint8_t flags_next; 3251 int cum_len_next; 3252 3253 boundary = (low + mgp->tx_boundary) & 3254 ~(mgp->tx_boundary - 1); 3255 seglen = boundary - low; 3256 if (seglen > len) 3257 seglen = len; 3258 3259 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3260 cum_len_next = cum_len + seglen; 3261 (req-rdma_count)->rdma_count = rdma_count + 1; 3262 if (likely(cum_len >= 0)) { 3263 /* payload */ 3264 int next_is_first, chop; 3265 3266 chop = (cum_len_next > mss); 3267 cum_len_next = cum_len_next % mss; 3268 next_is_first = (cum_len_next == 0); 3269 flags |= chop * 3270 MXGEFW_FLAGS_TSO_CHOP; 3271 flags_next |= next_is_first * 3272 MXGEFW_FLAGS_FIRST; 3273 rdma_count |= -(chop | next_is_first); 3274 rdma_count += chop & !next_is_first; 3275 } else if (likely(cum_len_next >= 0)) { 3276 /* header ends */ 3277 int small; 3278 3279 rdma_count = -1; 3280 cum_len_next = 0; 3281 seglen = -cum_len; 3282 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 3283 flags_next = MXGEFW_FLAGS_TSO_PLD | 3284 MXGEFW_FLAGS_FIRST | 3285 (small * MXGEFW_FLAGS_SMALL); 3286 } 3287 req->addr_high = high_swapped; 3288 req->addr_low = htonl(low); 3289 req->pseudo_hdr_offset = pseudo_hdr_offset; 3290 req->pad = 0; /* complete solid 16-byte block */ 3291 req->rdma_count = 1; 3292 req->cksum_offset = cksum_offset; 3293 req->length = htons(seglen); 3294 req->flags = flags | ((cum_len & 1) * 3295 MXGEFW_FLAGS_ALIGN_ODD); 3296 if (cksum_offset > seglen) 3297 cksum_offset -= seglen; 3298 else 3299 cksum_offset = 0; 3300 low += seglen; 3301 len -= seglen; 3302 cum_len = cum_len_next; 3303 req++; 3304 req->flags = 0; 3305 flags = flags_next; 3306 count++; 3307 rdma_count++; 3308 } 3309 } 3310 tx_req++; 3311 } 3312 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3313 do { 3314 req--; 3315 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3316 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3317 MXGEFW_FLAGS_FIRST))); 3318 3319 myri10ge_submit_req(tx, req_list, count); 3320 done: 3321 mutex_exit(&tx->lock); 3322 freemsg(mp); 3323 return (DDI_SUCCESS); 3324 } 3325 3326 /* 3327 * Try to send the chain of buffers described by the mp. We must not 3328 * encapsulate more than eth->tx.req - eth->tx.done, or 3329 * MXGEFW_MAX_SEND_DESC, whichever is more. 3330 */ 3331 3332 static int 3333 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, 3334 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info) 3335 { 3336 struct myri10ge_priv *mgp = ss->mgp; 3337 myri10ge_tx_ring_t *tx = &ss->tx; 3338 mcp_kreq_ether_send_t *req; 3339 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL; 3340 mblk_t *bp; 3341 ddi_dma_cookie_t cookie; 3342 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen, 3343 rdma_count, cum_len, lso_hdr_size; 3344 uint32_t start, stuff, tx_offload_flags; 3345 uint32_t seglen, len, mss, boundary, low, high_swapped; 3346 uint_t ncookies; 3347 uint16_t pseudo_hdr_offset; 3348 uint8_t flags, cksum_offset, odd_flag; 3349 int pkt_size; 3350 int lso_copy = myri10ge_lso_copy; 3351 try_pullup = 1; 3352 3353 again: 3354 /* Setup checksum offloading, if needed */ 3355 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 3356 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 3357 if (tx_offload_flags & HW_LSO) { 3358 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3359 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) { 3360 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags); 3361 freemsg(mp); 3362 return (DDI_SUCCESS); 3363 } 3364 } else { 3365 max_segs = MXGEFW_MAX_SEND_DESC; 3366 mss = 0; 3367 } 3368 req = req_list; 3369 cksum_offset = 0; 3370 pseudo_hdr_offset = 0; 3371 3372 /* leave an extra slot keep the ring from wrapping */ 3373 avail = tx->mask - (tx->req - tx->done); 3374 3375 /* 3376 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length 3377 * message will need to be pulled up in order to fit. 3378 * Otherwise, we are low on transmit descriptors, it is 3379 * probably better to stall and try again rather than pullup a 3380 * message to fit. 3381 */ 3382 3383 if (avail < max_segs) { 3384 err = EBUSY; 3385 atomic_inc_32(&tx->stall_early); 3386 goto stall; 3387 } 3388 3389 /* find out how long the frame is and how many segments it is */ 3390 count = 0; 3391 odd_flag = 0; 3392 pkt_size = 0; 3393 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 3394 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3395 dblk_t *dbp; 3396 mblen = MBLKL(bp); 3397 if (mblen == 0) { 3398 /* 3399 * we can't simply skip over 0-length mblks 3400 * because the hardware can't deal with them, 3401 * and we could leak them. 3402 */ 3403 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len); 3404 err = EIO; 3405 goto pullup; 3406 } 3407 /* 3408 * There's no advantage to copying most gesballoc 3409 * attached blocks, so disable lso copy in that case 3410 */ 3411 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) { 3412 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) { 3413 lso_copy = 0; 3414 } 3415 } 3416 pkt_size += mblen; 3417 count++; 3418 } 3419 3420 /* Try to pull up excessivly long chains */ 3421 if (count >= max_segs) { 3422 err = myri10ge_pullup(ss, mp); 3423 if (likely(err == DDI_SUCCESS)) { 3424 count = 1; 3425 } else { 3426 if (count < MYRI10GE_MAX_SEND_DESC_TSO) { 3427 /* 3428 * just let the h/w send it, it will be 3429 * inefficient, but us better than dropping 3430 */ 3431 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3432 } else { 3433 /* drop it */ 3434 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3435 freemsg(mp); 3436 return (0); 3437 } 3438 } 3439 } 3440 3441 cum_len = 0; 3442 maclen = myri10ge_ether_parse_header(mp); 3443 3444 if (tx_offload_flags & HCK_PARTIALCKSUM) { 3445 3446 cksum_offset = start + maclen; 3447 pseudo_hdr_offset = htons(stuff + maclen); 3448 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 3449 flags |= MXGEFW_FLAGS_CKSUM; 3450 } 3451 3452 lso_hdr_size = 0; /* -Wunitinialized */ 3453 if (mss) { /* LSO */ 3454 /* this removes any CKSUM flag from before */ 3455 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3456 /* 3457 * parse the headers and set cum_len to a negative 3458 * value to reflect the offset of the TCP payload 3459 */ 3460 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen); 3461 cum_len = -lso_hdr_size; 3462 if ((mss < mgp->tx_boundary) && lso_copy) { 3463 err = myri10ge_tx_tso_copy(ss, mp, req_list, 3464 lso_hdr_size, pkt_size, mss, cksum_offset); 3465 return (err); 3466 } 3467 3468 /* 3469 * for TSO, pseudo_hdr_offset holds mss. The firmware 3470 * figures out where to put the checksum by parsing 3471 * the header. 3472 */ 3473 3474 pseudo_hdr_offset = htons(mss); 3475 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) { 3476 flags |= MXGEFW_FLAGS_SMALL; 3477 if (pkt_size < myri10ge_tx_copylen) { 3478 req->cksum_offset = cksum_offset; 3479 req->pseudo_hdr_offset = pseudo_hdr_offset; 3480 req->flags = flags; 3481 err = myri10ge_tx_copy(ss, mp, req); 3482 return (err); 3483 } 3484 cum_len = 0; 3485 } 3486 3487 /* pull one DMA handle for each bp from our freelist */ 3488 handles = NULL; 3489 err = myri10ge_alloc_tx_handles(ss, count, &handles); 3490 if (err != DDI_SUCCESS) { 3491 err = DDI_FAILURE; 3492 goto stall; 3493 } 3494 count = 0; 3495 rdma_count = 0; 3496 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3497 mblen = MBLKL(bp); 3498 dma_handle = handles; 3499 handles = handles->next; 3500 3501 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL, 3502 (caddr_t)bp->b_rptr, mblen, 3503 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL, 3504 &cookie, &ncookies); 3505 if (unlikely(rv != DDI_DMA_MAPPED)) { 3506 err = EIO; 3507 try_pullup = 0; 3508 dma_handle->next = handles; 3509 handles = dma_handle; 3510 goto abort_with_handles; 3511 } 3512 3513 /* reserve the slot */ 3514 tx_info[count].m = bp; 3515 tx_info[count].handle = dma_handle; 3516 3517 for (; ; ) { 3518 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 3519 high_swapped = 3520 htonl(MYRI10GE_HIGHPART_TO_U32( 3521 cookie.dmac_laddress)); 3522 len = (uint32_t)cookie.dmac_size; 3523 while (len) { 3524 uint8_t flags_next; 3525 int cum_len_next; 3526 3527 boundary = (low + mgp->tx_boundary) & 3528 ~(mgp->tx_boundary - 1); 3529 seglen = boundary - low; 3530 if (seglen > len) 3531 seglen = len; 3532 3533 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3534 cum_len_next = cum_len + seglen; 3535 if (mss) { 3536 (req-rdma_count)->rdma_count = 3537 rdma_count + 1; 3538 if (likely(cum_len >= 0)) { 3539 /* payload */ 3540 int next_is_first, chop; 3541 3542 chop = (cum_len_next > mss); 3543 cum_len_next = 3544 cum_len_next % mss; 3545 next_is_first = 3546 (cum_len_next == 0); 3547 flags |= chop * 3548 MXGEFW_FLAGS_TSO_CHOP; 3549 flags_next |= next_is_first * 3550 MXGEFW_FLAGS_FIRST; 3551 rdma_count |= 3552 -(chop | next_is_first); 3553 rdma_count += 3554 chop & !next_is_first; 3555 } else if (likely(cum_len_next >= 0)) { 3556 /* header ends */ 3557 int small; 3558 3559 rdma_count = -1; 3560 cum_len_next = 0; 3561 seglen = -cum_len; 3562 small = (mss <= 3563 MXGEFW_SEND_SMALL_SIZE); 3564 flags_next = 3565 MXGEFW_FLAGS_TSO_PLD 3566 | MXGEFW_FLAGS_FIRST 3567 | (small * 3568 MXGEFW_FLAGS_SMALL); 3569 } 3570 } 3571 req->addr_high = high_swapped; 3572 req->addr_low = htonl(low); 3573 req->pseudo_hdr_offset = pseudo_hdr_offset; 3574 req->pad = 0; /* complete solid 16-byte block */ 3575 req->rdma_count = 1; 3576 req->cksum_offset = cksum_offset; 3577 req->length = htons(seglen); 3578 req->flags = flags | ((cum_len & 1) * odd_flag); 3579 if (cksum_offset > seglen) 3580 cksum_offset -= seglen; 3581 else 3582 cksum_offset = 0; 3583 low += seglen; 3584 len -= seglen; 3585 cum_len = cum_len_next; 3586 count++; 3587 rdma_count++; 3588 /* make sure all the segments will fit */ 3589 if (unlikely(count >= max_segs)) { 3590 MYRI10GE_ATOMIC_SLICE_STAT_INC( 3591 xmit_lowbuf); 3592 /* may try a pullup */ 3593 err = EBUSY; 3594 if (try_pullup) 3595 try_pullup = 2; 3596 goto abort_with_handles; 3597 } 3598 req++; 3599 req->flags = 0; 3600 flags = flags_next; 3601 tx_info[count].m = 0; 3602 } 3603 ncookies--; 3604 if (ncookies == 0) 3605 break; 3606 ddi_dma_nextcookie(dma_handle->h, &cookie); 3607 } 3608 } 3609 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3610 3611 if (mss) { 3612 do { 3613 req--; 3614 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3615 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3616 MXGEFW_FLAGS_FIRST))); 3617 } 3618 3619 /* calculate tx stats */ 3620 if (mss) { 3621 uint16_t opackets; 3622 int payload; 3623 3624 payload = pkt_size - lso_hdr_size; 3625 opackets = (payload / mss) + ((payload % mss) != 0); 3626 tx_info[0].stat.un.all = 0; 3627 tx_info[0].ostat.opackets = opackets; 3628 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size 3629 + pkt_size; 3630 } else { 3631 myri10ge_tx_stat(&tx_info[0].stat, 3632 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size); 3633 } 3634 mutex_enter(&tx->lock); 3635 3636 /* check to see if the slots are really there */ 3637 avail = tx->mask - (tx->req - tx->done); 3638 if (unlikely(avail <= count)) { 3639 mutex_exit(&tx->lock); 3640 err = 0; 3641 goto late_stall; 3642 } 3643 3644 myri10ge_send_locked(tx, req_list, tx_info, count); 3645 mutex_exit(&tx->lock); 3646 return (DDI_SUCCESS); 3647 3648 late_stall: 3649 try_pullup = 0; 3650 atomic_inc_32(&tx->stall_late); 3651 3652 abort_with_handles: 3653 /* unbind and free handles from previous mblks */ 3654 for (i = 0; i < count; i++) { 3655 bp = tx_info[i].m; 3656 tx_info[i].m = 0; 3657 if (bp) { 3658 dma_handle = tx_info[i].handle; 3659 (void) ddi_dma_unbind_handle(dma_handle->h); 3660 dma_handle->next = handles; 3661 handles = dma_handle; 3662 tx_info[i].handle = NULL; 3663 tx_info[i].m = NULL; 3664 } 3665 } 3666 myri10ge_free_tx_handle_slist(tx, handles); 3667 pullup: 3668 if (try_pullup) { 3669 err = myri10ge_pullup(ss, mp); 3670 if (err != DDI_SUCCESS && try_pullup == 2) { 3671 /* drop */ 3672 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3673 freemsg(mp); 3674 return (0); 3675 } 3676 try_pullup = 0; 3677 goto again; 3678 } 3679 3680 stall: 3681 if (err != 0) { 3682 if (err == EBUSY) { 3683 atomic_inc_32(&tx->stall); 3684 } else { 3685 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3686 } 3687 } 3688 return (err); 3689 } 3690 3691 static mblk_t * 3692 myri10ge_send_wrapper(void *arg, mblk_t *mp) 3693 { 3694 struct myri10ge_slice_state *ss = arg; 3695 int err = 0; 3696 mcp_kreq_ether_send_t *req_list; 3697 #if defined(__i386) 3698 /* 3699 * We need about 2.5KB of scratch space to handle transmits. 3700 * i86pc has only 8KB of kernel stack space, so we malloc the 3701 * scratch space there rather than keeping it on the stack. 3702 */ 3703 size_t req_size, tx_info_size; 3704 struct myri10ge_tx_buffer_state *tx_info; 3705 caddr_t req_bytes; 3706 3707 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3708 + 8; 3709 req_bytes = kmem_alloc(req_size, KM_SLEEP); 3710 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1); 3711 tx_info = kmem_alloc(tx_info_size, KM_SLEEP); 3712 #else 3713 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3714 + 8]; 3715 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1]; 3716 #endif 3717 3718 /* ensure req_list entries are aligned to 8 bytes */ 3719 req_list = (struct mcp_kreq_ether_send *) 3720 (((unsigned long)req_bytes + 7UL) & ~7UL); 3721 3722 err = myri10ge_send(ss, mp, req_list, tx_info); 3723 3724 #if defined(__i386) 3725 kmem_free(tx_info, tx_info_size); 3726 kmem_free(req_bytes, req_size); 3727 #endif 3728 if (err) 3729 return (mp); 3730 else 3731 return (NULL); 3732 } 3733 3734 static int 3735 myri10ge_addmac(void *arg, const uint8_t *mac_addr) 3736 { 3737 struct myri10ge_priv *mgp = arg; 3738 int err; 3739 3740 if (mac_addr == NULL) 3741 return (EINVAL); 3742 3743 mutex_enter(&mgp->intrlock); 3744 if (mgp->macaddr_cnt) { 3745 mutex_exit(&mgp->intrlock); 3746 return (ENOSPC); 3747 } 3748 err = myri10ge_m_unicst(mgp, mac_addr); 3749 if (!err) 3750 mgp->macaddr_cnt++; 3751 3752 mutex_exit(&mgp->intrlock); 3753 if (err) 3754 return (err); 3755 3756 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr)); 3757 return (0); 3758 } 3759 3760 /*ARGSUSED*/ 3761 static int 3762 myri10ge_remmac(void *arg, const uint8_t *mac_addr) 3763 { 3764 struct myri10ge_priv *mgp = arg; 3765 3766 mutex_enter(&mgp->intrlock); 3767 mgp->macaddr_cnt--; 3768 mutex_exit(&mgp->intrlock); 3769 3770 return (0); 3771 } 3772 3773 /*ARGSUSED*/ 3774 static void 3775 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index, 3776 mac_group_info_t *infop, mac_group_handle_t gh) 3777 { 3778 struct myri10ge_priv *mgp = arg; 3779 3780 if (rtype != MAC_RING_TYPE_RX) 3781 return; 3782 3783 infop->mgi_driver = (mac_group_driver_t)mgp; 3784 infop->mgi_start = NULL; 3785 infop->mgi_stop = NULL; 3786 infop->mgi_addmac = myri10ge_addmac; 3787 infop->mgi_remmac = myri10ge_remmac; 3788 infop->mgi_count = mgp->num_slices; 3789 } 3790 3791 static int 3792 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) 3793 { 3794 struct myri10ge_slice_state *ss; 3795 3796 ss = (struct myri10ge_slice_state *)rh; 3797 mutex_enter(&ss->rx_lock); 3798 ss->rx_gen_num = mr_gen_num; 3799 mutex_exit(&ss->rx_lock); 3800 return (0); 3801 } 3802 3803 /* 3804 * Retrieve a value for one of the statistics for a particular rx ring 3805 */ 3806 int 3807 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3808 { 3809 struct myri10ge_slice_state *ss; 3810 3811 ss = (struct myri10ge_slice_state *)rh; 3812 switch (stat) { 3813 case MAC_STAT_RBYTES: 3814 *val = ss->rx_stats.ibytes; 3815 break; 3816 3817 case MAC_STAT_IPACKETS: 3818 *val = ss->rx_stats.ipackets; 3819 break; 3820 3821 default: 3822 *val = 0; 3823 return (ENOTSUP); 3824 } 3825 3826 return (0); 3827 } 3828 3829 /* 3830 * Retrieve a value for one of the statistics for a particular tx ring 3831 */ 3832 int 3833 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3834 { 3835 struct myri10ge_slice_state *ss; 3836 3837 ss = (struct myri10ge_slice_state *)rh; 3838 switch (stat) { 3839 case MAC_STAT_OBYTES: 3840 *val = ss->tx.stats.obytes; 3841 break; 3842 3843 case MAC_STAT_OPACKETS: 3844 *val = ss->tx.stats.opackets; 3845 break; 3846 3847 default: 3848 *val = 0; 3849 return (ENOTSUP); 3850 } 3851 3852 return (0); 3853 } 3854 3855 static int 3856 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) 3857 { 3858 struct myri10ge_slice_state *ss; 3859 3860 ss = (struct myri10ge_slice_state *)intrh; 3861 mutex_enter(&ss->poll_lock); 3862 ss->rx_polling = B_TRUE; 3863 mutex_exit(&ss->poll_lock); 3864 return (0); 3865 } 3866 3867 static int 3868 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh) 3869 { 3870 struct myri10ge_slice_state *ss; 3871 3872 ss = (struct myri10ge_slice_state *)intrh; 3873 mutex_enter(&ss->poll_lock); 3874 ss->rx_polling = B_FALSE; 3875 if (ss->rx_token) { 3876 *ss->irq_claim = BE_32(3); 3877 ss->rx_token = 0; 3878 } 3879 mutex_exit(&ss->poll_lock); 3880 return (0); 3881 } 3882 3883 /*ARGSUSED*/ 3884 static void 3885 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 3886 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 3887 { 3888 struct myri10ge_priv *mgp = arg; 3889 struct myri10ge_slice_state *ss; 3890 mac_intr_t *mintr = &infop->mri_intr; 3891 3892 ASSERT((unsigned int)ring_index < mgp->num_slices); 3893 3894 ss = &mgp->ss[ring_index]; 3895 switch (rtype) { 3896 case MAC_RING_TYPE_RX: 3897 ss->rx_rh = rh; 3898 infop->mri_driver = (mac_ring_driver_t)ss; 3899 infop->mri_start = myri10ge_ring_start; 3900 infop->mri_stop = NULL; 3901 infop->mri_poll = myri10ge_poll_rx; 3902 infop->mri_stat = myri10ge_rx_ring_stat; 3903 mintr->mi_handle = (mac_intr_handle_t)ss; 3904 mintr->mi_enable = myri10ge_rx_ring_intr_enable; 3905 mintr->mi_disable = myri10ge_rx_ring_intr_disable; 3906 break; 3907 case MAC_RING_TYPE_TX: 3908 ss->tx.rh = rh; 3909 infop->mri_driver = (mac_ring_driver_t)ss; 3910 infop->mri_start = NULL; 3911 infop->mri_stop = NULL; 3912 infop->mri_tx = myri10ge_send_wrapper; 3913 infop->mri_stat = myri10ge_tx_ring_stat; 3914 break; 3915 default: 3916 break; 3917 } 3918 } 3919 3920 static void 3921 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp) 3922 { 3923 if (mgp->ksp_stat == NULL) 3924 return; 3925 3926 kstat_delete(mgp->ksp_stat); 3927 mgp->ksp_stat = NULL; 3928 } 3929 3930 static void 3931 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss) 3932 { 3933 if (ss->ksp_stat == NULL) 3934 return; 3935 3936 kstat_delete(ss->ksp_stat); 3937 ss->ksp_stat = NULL; 3938 } 3939 3940 static void 3941 myri10ge_info_destroy(struct myri10ge_priv *mgp) 3942 { 3943 if (mgp->ksp_info == NULL) 3944 return; 3945 3946 kstat_delete(mgp->ksp_info); 3947 mgp->ksp_info = NULL; 3948 } 3949 3950 static int 3951 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw) 3952 { 3953 struct myri10ge_nic_stat *ethstat; 3954 struct myri10ge_priv *mgp; 3955 mcp_irq_data_t *fw_stats; 3956 3957 3958 if (rw == KSTAT_WRITE) 3959 return (EACCES); 3960 3961 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data; 3962 mgp = (struct myri10ge_priv *)ksp->ks_private; 3963 fw_stats = mgp->ss[0].fw_stats; 3964 3965 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma; 3966 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma; 3967 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma; 3968 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) 3969 ethstat->dma_force_physical.value.ul = 1; 3970 else 3971 ethstat->dma_force_physical.value.ul = 0; 3972 ethstat->lanes.value.ul = mgp->pcie_link_width; 3973 ethstat->dropped_bad_crc32.value.ul = 3974 ntohl(fw_stats->dropped_bad_crc32); 3975 ethstat->dropped_bad_phy.value.ul = 3976 ntohl(fw_stats->dropped_bad_phy); 3977 ethstat->dropped_link_error_or_filtered.value.ul = 3978 ntohl(fw_stats->dropped_link_error_or_filtered); 3979 ethstat->dropped_link_overflow.value.ul = 3980 ntohl(fw_stats->dropped_link_overflow); 3981 ethstat->dropped_multicast_filtered.value.ul = 3982 ntohl(fw_stats->dropped_multicast_filtered); 3983 ethstat->dropped_no_big_buffer.value.ul = 3984 ntohl(fw_stats->dropped_no_big_buffer); 3985 ethstat->dropped_no_small_buffer.value.ul = 3986 ntohl(fw_stats->dropped_no_small_buffer); 3987 ethstat->dropped_overrun.value.ul = 3988 ntohl(fw_stats->dropped_overrun); 3989 ethstat->dropped_pause.value.ul = 3990 ntohl(fw_stats->dropped_pause); 3991 ethstat->dropped_runt.value.ul = 3992 ntohl(fw_stats->dropped_runt); 3993 ethstat->link_up.value.ul = 3994 ntohl(fw_stats->link_up); 3995 ethstat->dropped_unicast_filtered.value.ul = 3996 ntohl(fw_stats->dropped_unicast_filtered); 3997 return (0); 3998 } 3999 4000 static int 4001 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw) 4002 { 4003 struct myri10ge_slice_stat *ethstat; 4004 struct myri10ge_slice_state *ss; 4005 4006 if (rw == KSTAT_WRITE) 4007 return (EACCES); 4008 4009 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data; 4010 ss = (struct myri10ge_slice_state *)ksp->ks_private; 4011 4012 ethstat->rx_big.value.ul = ss->j_rx_cnt; 4013 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt; 4014 ethstat->rx_bigbuf_pool.value.ul = 4015 ss->jpool.num_alloc - ss->jbufs_for_smalls; 4016 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls; 4017 ethstat->rx_small.value.ul = ss->rx_small.cnt - 4018 (ss->rx_small.mask + 1); 4019 ethstat->tx_done.value.ul = ss->tx.done; 4020 ethstat->tx_req.value.ul = ss->tx.req; 4021 ethstat->tx_activate.value.ul = ss->tx.activate; 4022 ethstat->xmit_sched.value.ul = ss->tx.sched; 4023 ethstat->xmit_stall.value.ul = ss->tx.stall; 4024 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early; 4025 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late; 4026 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err); 4027 return (0); 4028 } 4029 4030 static int 4031 myri10ge_info_kstat_update(kstat_t *ksp, int rw) 4032 { 4033 struct myri10ge_info *info; 4034 struct myri10ge_priv *mgp; 4035 4036 4037 if (rw == KSTAT_WRITE) 4038 return (EACCES); 4039 4040 info = (struct myri10ge_info *)ksp->ks_data; 4041 mgp = (struct myri10ge_priv *)ksp->ks_private; 4042 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR); 4043 kstat_named_setstr(&info->firmware_version, mgp->fw_version); 4044 kstat_named_setstr(&info->firmware_name, mgp->fw_name); 4045 kstat_named_setstr(&info->interrupt_type, mgp->intr_type); 4046 kstat_named_setstr(&info->product_code, mgp->pc_str); 4047 kstat_named_setstr(&info->serial_number, mgp->sn_str); 4048 return (0); 4049 } 4050 4051 static struct myri10ge_info myri10ge_info_template = { 4052 { "driver_version", KSTAT_DATA_STRING }, 4053 { "firmware_version", KSTAT_DATA_STRING }, 4054 { "firmware_name", KSTAT_DATA_STRING }, 4055 { "interrupt_type", KSTAT_DATA_STRING }, 4056 { "product_code", KSTAT_DATA_STRING }, 4057 { "serial_number", KSTAT_DATA_STRING }, 4058 }; 4059 static kmutex_t myri10ge_info_template_lock; 4060 4061 4062 static int 4063 myri10ge_info_init(struct myri10ge_priv *mgp) 4064 { 4065 struct kstat *ksp; 4066 4067 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4068 "myri10ge_info", "net", KSTAT_TYPE_NAMED, 4069 sizeof (myri10ge_info_template) / 4070 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 4071 if (ksp == NULL) { 4072 cmn_err(CE_WARN, 4073 "%s: myri10ge_info_init: kstat_create failed", mgp->name); 4074 return (DDI_FAILURE); 4075 } 4076 mgp->ksp_info = ksp; 4077 ksp->ks_update = myri10ge_info_kstat_update; 4078 ksp->ks_private = (void *) mgp; 4079 ksp->ks_data = &myri10ge_info_template; 4080 ksp->ks_lock = &myri10ge_info_template_lock; 4081 if (MYRI10GE_VERSION_STR != NULL) 4082 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1; 4083 if (mgp->fw_version != NULL) 4084 ksp->ks_data_size += strlen(mgp->fw_version) + 1; 4085 ksp->ks_data_size += strlen(mgp->fw_name) + 1; 4086 ksp->ks_data_size += strlen(mgp->intr_type) + 1; 4087 if (mgp->pc_str != NULL) 4088 ksp->ks_data_size += strlen(mgp->pc_str) + 1; 4089 if (mgp->sn_str != NULL) 4090 ksp->ks_data_size += strlen(mgp->sn_str) + 1; 4091 4092 kstat_install(ksp); 4093 return (DDI_SUCCESS); 4094 } 4095 4096 4097 static int 4098 myri10ge_nic_stat_init(struct myri10ge_priv *mgp) 4099 { 4100 struct kstat *ksp; 4101 struct myri10ge_nic_stat *ethstat; 4102 4103 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4104 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED, 4105 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4106 if (ksp == NULL) { 4107 cmn_err(CE_WARN, 4108 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4109 return (DDI_FAILURE); 4110 } 4111 mgp->ksp_stat = ksp; 4112 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data); 4113 4114 kstat_named_init(ðstat->dma_read_bw_MBs, 4115 "dma_read_bw_MBs", KSTAT_DATA_ULONG); 4116 kstat_named_init(ðstat->dma_write_bw_MBs, 4117 "dma_write_bw_MBs", KSTAT_DATA_ULONG); 4118 kstat_named_init(ðstat->dma_read_write_bw_MBs, 4119 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG); 4120 kstat_named_init(ðstat->dma_force_physical, 4121 "dma_force_physical", KSTAT_DATA_ULONG); 4122 kstat_named_init(ðstat->lanes, 4123 "lanes", KSTAT_DATA_ULONG); 4124 kstat_named_init(ðstat->dropped_bad_crc32, 4125 "dropped_bad_crc32", KSTAT_DATA_ULONG); 4126 kstat_named_init(ðstat->dropped_bad_phy, 4127 "dropped_bad_phy", KSTAT_DATA_ULONG); 4128 kstat_named_init(ðstat->dropped_link_error_or_filtered, 4129 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG); 4130 kstat_named_init(ðstat->dropped_link_overflow, 4131 "dropped_link_overflow", KSTAT_DATA_ULONG); 4132 kstat_named_init(ðstat->dropped_multicast_filtered, 4133 "dropped_multicast_filtered", KSTAT_DATA_ULONG); 4134 kstat_named_init(ðstat->dropped_no_big_buffer, 4135 "dropped_no_big_buffer", KSTAT_DATA_ULONG); 4136 kstat_named_init(ðstat->dropped_no_small_buffer, 4137 "dropped_no_small_buffer", KSTAT_DATA_ULONG); 4138 kstat_named_init(ðstat->dropped_overrun, 4139 "dropped_overrun", KSTAT_DATA_ULONG); 4140 kstat_named_init(ðstat->dropped_pause, 4141 "dropped_pause", KSTAT_DATA_ULONG); 4142 kstat_named_init(ðstat->dropped_runt, 4143 "dropped_runt", KSTAT_DATA_ULONG); 4144 kstat_named_init(ðstat->dropped_unicast_filtered, 4145 "dropped_unicast_filtered", KSTAT_DATA_ULONG); 4146 kstat_named_init(ðstat->dropped_runt, "dropped_runt", 4147 KSTAT_DATA_ULONG); 4148 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG); 4149 kstat_named_init(ðstat->link_changes, "link_changes", 4150 KSTAT_DATA_ULONG); 4151 ksp->ks_update = myri10ge_nic_stat_kstat_update; 4152 ksp->ks_private = (void *) mgp; 4153 kstat_install(ksp); 4154 return (DDI_SUCCESS); 4155 } 4156 4157 static int 4158 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss) 4159 { 4160 struct myri10ge_priv *mgp = ss->mgp; 4161 struct kstat *ksp; 4162 struct myri10ge_slice_stat *ethstat; 4163 int instance; 4164 4165 /* 4166 * fake an instance so that the same slice numbers from 4167 * different instances do not collide 4168 */ 4169 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss); 4170 ksp = kstat_create("myri10ge", instance, 4171 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED, 4172 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4173 if (ksp == NULL) { 4174 cmn_err(CE_WARN, 4175 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4176 return (DDI_FAILURE); 4177 } 4178 ss->ksp_stat = ksp; 4179 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data); 4180 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum", 4181 KSTAT_DATA_ULONG); 4182 kstat_named_init(ðstat->lro_flushed, "lro_flushed", 4183 KSTAT_DATA_ULONG); 4184 kstat_named_init(ðstat->lro_queued, "lro_queued", 4185 KSTAT_DATA_ULONG); 4186 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware", 4187 KSTAT_DATA_ULONG); 4188 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool", 4189 KSTAT_DATA_ULONG); 4190 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls", 4191 KSTAT_DATA_ULONG); 4192 kstat_named_init(ðstat->rx_copy, "rx_copy", 4193 KSTAT_DATA_ULONG); 4194 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf", 4195 KSTAT_DATA_ULONG); 4196 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf", 4197 KSTAT_DATA_ULONG); 4198 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len", 4199 KSTAT_DATA_ULONG); 4200 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup", 4201 KSTAT_DATA_ULONG); 4202 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first", 4203 KSTAT_DATA_ULONG); 4204 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf", 4205 KSTAT_DATA_ULONG); 4206 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags", 4207 KSTAT_DATA_ULONG); 4208 kstat_named_init(ðstat->xmit_sched, "xmit_sched", 4209 KSTAT_DATA_ULONG); 4210 kstat_named_init(ðstat->xmit_stall, "xmit_stall", 4211 KSTAT_DATA_ULONG); 4212 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early", 4213 KSTAT_DATA_ULONG); 4214 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late", 4215 KSTAT_DATA_ULONG); 4216 kstat_named_init(ðstat->xmit_err, "xmit_err", 4217 KSTAT_DATA_ULONG); 4218 kstat_named_init(ðstat->tx_req, "tx_req", 4219 KSTAT_DATA_ULONG); 4220 kstat_named_init(ðstat->tx_activate, "tx_activate", 4221 KSTAT_DATA_ULONG); 4222 kstat_named_init(ðstat->tx_done, "tx_done", 4223 KSTAT_DATA_ULONG); 4224 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced", 4225 KSTAT_DATA_ULONG); 4226 kstat_named_init(ðstat->rx_big, "rx_big", 4227 KSTAT_DATA_ULONG); 4228 kstat_named_init(ðstat->rx_small, "rx_small", 4229 KSTAT_DATA_ULONG); 4230 ksp->ks_update = myri10ge_slice_stat_kstat_update; 4231 ksp->ks_private = (void *) ss; 4232 kstat_install(ksp); 4233 return (DDI_SUCCESS); 4234 } 4235 4236 4237 4238 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4239 4240 #include <vm/hat.h> 4241 #include <sys/ddi_isa.h> 4242 void *device_arena_alloc(size_t size, int vm_flag); 4243 void device_arena_free(void *vaddr, size_t size); 4244 4245 static void 4246 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4247 { 4248 dev_info_t *parent_dip; 4249 ddi_acc_handle_t handle; 4250 unsigned long bus_number, dev_number, func_number; 4251 unsigned long cfg_pa, paddr, base, pgoffset; 4252 char *cvaddr, *ptr; 4253 uint32_t *ptr32; 4254 int retval = DDI_FAILURE; 4255 int dontcare; 4256 uint16_t read_vid, read_did, vendor_id, device_id; 4257 4258 if (!myri10ge_nvidia_ecrc_enable) 4259 return; 4260 4261 parent_dip = ddi_get_parent(mgp->dip); 4262 if (parent_dip == NULL) { 4263 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name); 4264 return; 4265 } 4266 4267 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) { 4268 cmn_err(CE_WARN, 4269 "%s: Could not access my parent's registers", mgp->name); 4270 return; 4271 } 4272 4273 vendor_id = pci_config_get16(handle, PCI_CONF_VENID); 4274 device_id = pci_config_get16(handle, PCI_CONF_DEVID); 4275 pci_config_teardown(&handle); 4276 4277 if (myri10ge_verbose) { 4278 unsigned long bus_number, dev_number, func_number; 4279 int reg_set, span; 4280 (void) myri10ge_reg_set(parent_dip, ®_set, &span, 4281 &bus_number, &dev_number, &func_number); 4282 if (myri10ge_verbose) 4283 printf("%s: parent at %ld:%ld:%ld\n", mgp->name, 4284 bus_number, dev_number, func_number); 4285 } 4286 4287 if (vendor_id != 0x10de) 4288 return; 4289 4290 if (device_id != 0x005d /* CK804 */ && 4291 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) { 4292 return; 4293 } 4294 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare, 4295 &bus_number, &dev_number, &func_number); 4296 4297 for (cfg_pa = 0xf0000000UL; 4298 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL; 4299 cfg_pa -= 0x10000000UL) { 4300 /* find the config space address for the nvidia bridge */ 4301 paddr = (cfg_pa + bus_number * 0x00100000UL + 4302 (dev_number * 8 + func_number) * 0x00001000UL); 4303 4304 base = paddr & (~MMU_PAGEOFFSET); 4305 pgoffset = paddr & MMU_PAGEOFFSET; 4306 4307 /* map it into the kernel */ 4308 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP); 4309 if (cvaddr == NULL) 4310 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n", 4311 mgp->name); 4312 4313 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1), 4314 i_ddi_paddr_to_pfn(base), 4315 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK); 4316 4317 ptr = cvaddr + pgoffset; 4318 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID); 4319 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID); 4320 if (vendor_id == read_did || device_id == read_did) { 4321 ptr32 = (uint32_t *)(void *)(ptr + 0x178); 4322 if (myri10ge_verbose) 4323 printf("%s: Enabling ECRC on upstream " 4324 "Nvidia bridge (0x%x:0x%x) " 4325 "at %ld:%ld:%ld\n", mgp->name, 4326 read_vid, read_did, bus_number, 4327 dev_number, func_number); 4328 *ptr32 |= 0x40; 4329 retval = DDI_SUCCESS; 4330 } 4331 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK); 4332 device_arena_free(cvaddr, ptob(1)); 4333 } 4334 } 4335 4336 #else 4337 /*ARGSUSED*/ 4338 static void 4339 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4340 { 4341 } 4342 #endif /* i386 */ 4343 4344 4345 /* 4346 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 4347 * when the PCI-E Completion packets are aligned on an 8-byte 4348 * boundary. Some PCI-E chip sets always align Completion packets; on 4349 * the ones that do not, the alignment can be enforced by enabling 4350 * ECRC generation (if supported). 4351 * 4352 * When PCI-E Completion packets are not aligned, it is actually more 4353 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 4354 * 4355 * If the driver can neither enable ECRC nor verify that it has 4356 * already been enabled, then it must use a firmware image which works 4357 * around unaligned completion packets (ethp_z8e.dat), and it should 4358 * also ensure that it never gives the device a Read-DMA which is 4359 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 4360 * enabled, then the driver should use the aligned (eth_z8e.dat) 4361 * firmware image, and set tx.boundary to 4KB. 4362 */ 4363 4364 4365 static int 4366 myri10ge_firmware_probe(struct myri10ge_priv *mgp) 4367 { 4368 int status; 4369 4370 mgp->tx_boundary = 4096; 4371 /* 4372 * Verify the max read request size was set to 4KB 4373 * before trying the test with 4KB. 4374 */ 4375 if (mgp->max_read_request_4k == 0) 4376 mgp->tx_boundary = 2048; 4377 /* 4378 * load the optimized firmware which assumes aligned PCIe 4379 * completions in order to see if it works on this host. 4380 */ 4381 4382 mgp->fw_name = "rss_eth_z8e"; 4383 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4384 mgp->eth_z8e_length = rss_eth_z8e_length; 4385 4386 status = myri10ge_load_firmware(mgp); 4387 if (status != 0) { 4388 return (status); 4389 } 4390 /* 4391 * Enable ECRC if possible 4392 */ 4393 myri10ge_enable_nvidia_ecrc(mgp); 4394 4395 /* 4396 * Run a DMA test which watches for unaligned completions and 4397 * aborts on the first one seen. 4398 */ 4399 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 4400 if (status == 0) 4401 return (0); /* keep the aligned firmware */ 4402 4403 if (status != E2BIG) 4404 cmn_err(CE_WARN, "%s: DMA test failed: %d\n", 4405 mgp->name, status); 4406 if (status == ENOSYS) 4407 cmn_err(CE_WARN, "%s: Falling back to ethp! " 4408 "Please install up to date fw\n", mgp->name); 4409 return (status); 4410 } 4411 4412 static int 4413 myri10ge_select_firmware(struct myri10ge_priv *mgp) 4414 { 4415 int aligned; 4416 4417 aligned = 0; 4418 4419 if (myri10ge_force_firmware == 1) { 4420 if (myri10ge_verbose) 4421 printf("%s: Assuming aligned completions (forced)\n", 4422 mgp->name); 4423 aligned = 1; 4424 goto done; 4425 } 4426 4427 if (myri10ge_force_firmware == 2) { 4428 if (myri10ge_verbose) 4429 printf("%s: Assuming unaligned completions (forced)\n", 4430 mgp->name); 4431 aligned = 0; 4432 goto done; 4433 } 4434 4435 /* If the width is less than 8, we may used the aligned firmware */ 4436 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) { 4437 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n", 4438 mgp->name, mgp->pcie_link_width); 4439 aligned = 1; 4440 goto done; 4441 } 4442 4443 if (0 == myri10ge_firmware_probe(mgp)) 4444 return (0); /* keep optimized firmware */ 4445 4446 done: 4447 if (aligned) { 4448 mgp->fw_name = "rss_eth_z8e"; 4449 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4450 mgp->eth_z8e_length = rss_eth_z8e_length; 4451 mgp->tx_boundary = 4096; 4452 } else { 4453 mgp->fw_name = "rss_ethp_z8e"; 4454 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e; 4455 mgp->eth_z8e_length = rss_ethp_z8e_length; 4456 mgp->tx_boundary = 2048; 4457 } 4458 4459 return (myri10ge_load_firmware(mgp)); 4460 } 4461 4462 static int 4463 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler) 4464 { 4465 dev_info_t *devinfo = mgp->dip; 4466 int count, avail, actual, intr_types; 4467 int x, y, rc, inum = 0; 4468 4469 4470 rc = ddi_intr_get_supported_types(devinfo, &intr_types); 4471 if (rc != DDI_SUCCESS) { 4472 cmn_err(CE_WARN, 4473 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name, 4474 rc); 4475 return (DDI_FAILURE); 4476 } 4477 4478 if (!myri10ge_use_msi) 4479 intr_types &= ~DDI_INTR_TYPE_MSI; 4480 if (!myri10ge_use_msix) 4481 intr_types &= ~DDI_INTR_TYPE_MSIX; 4482 4483 if (intr_types & DDI_INTR_TYPE_MSIX) { 4484 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX; 4485 mgp->intr_type = "MSI-X"; 4486 } else if (intr_types & DDI_INTR_TYPE_MSI) { 4487 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI; 4488 mgp->intr_type = "MSI"; 4489 } else { 4490 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED; 4491 mgp->intr_type = "Legacy"; 4492 } 4493 /* Get number of interrupts */ 4494 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count); 4495 if ((rc != DDI_SUCCESS) || (count == 0)) { 4496 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, " 4497 "count: %d", mgp->name, rc, count); 4498 4499 return (DDI_FAILURE); 4500 } 4501 4502 /* Get number of available interrupts */ 4503 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail); 4504 if ((rc != DDI_SUCCESS) || (avail == 0)) { 4505 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, " 4506 "rc: %d, avail: %d\n", mgp->name, rc, avail); 4507 return (DDI_FAILURE); 4508 } 4509 if (avail < count) { 4510 cmn_err(CE_NOTE, 4511 "!%s: nintrs() returned %d, navail returned %d", 4512 mgp->name, count, avail); 4513 count = avail; 4514 } 4515 4516 if (count < mgp->num_slices) 4517 return (DDI_FAILURE); 4518 4519 if (count > mgp->num_slices) 4520 count = mgp->num_slices; 4521 4522 /* Allocate memory for MSI interrupts */ 4523 mgp->intr_size = count * sizeof (ddi_intr_handle_t); 4524 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP); 4525 4526 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum, 4527 count, &actual, DDI_INTR_ALLOC_NORMAL); 4528 4529 if ((rc != DDI_SUCCESS) || (actual == 0)) { 4530 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d", 4531 mgp->name, rc); 4532 4533 kmem_free(mgp->htable, mgp->intr_size); 4534 mgp->htable = NULL; 4535 return (DDI_FAILURE); 4536 } 4537 4538 if ((actual < count) && myri10ge_verbose) { 4539 cmn_err(CE_NOTE, "%s: got %d/%d slices", 4540 mgp->name, actual, count); 4541 } 4542 4543 mgp->intr_cnt = actual; 4544 4545 /* 4546 * Get priority for first irq, assume remaining are all the same 4547 */ 4548 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri) 4549 != DDI_SUCCESS) { 4550 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name); 4551 4552 /* Free already allocated intr */ 4553 for (y = 0; y < actual; y++) { 4554 (void) ddi_intr_free(mgp->htable[y]); 4555 } 4556 4557 kmem_free(mgp->htable, mgp->intr_size); 4558 mgp->htable = NULL; 4559 return (DDI_FAILURE); 4560 } 4561 4562 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri; 4563 4564 if (!add_handler) 4565 return (DDI_SUCCESS); 4566 4567 /* Call ddi_intr_add_handler() */ 4568 for (x = 0; x < actual; x++) { 4569 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr, 4570 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) { 4571 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed", 4572 mgp->name); 4573 4574 /* Free already allocated intr */ 4575 for (y = 0; y < actual; y++) { 4576 (void) ddi_intr_free(mgp->htable[y]); 4577 } 4578 4579 kmem_free(mgp->htable, mgp->intr_size); 4580 mgp->htable = NULL; 4581 return (DDI_FAILURE); 4582 } 4583 } 4584 4585 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap); 4586 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4587 /* Call ddi_intr_block_enable() for MSI */ 4588 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt); 4589 } else { 4590 /* Call ddi_intr_enable() for MSI non block enable */ 4591 for (x = 0; x < mgp->intr_cnt; x++) { 4592 (void) ddi_intr_enable(mgp->htable[x]); 4593 } 4594 } 4595 4596 return (DDI_SUCCESS); 4597 } 4598 4599 static void 4600 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed) 4601 { 4602 int x, err; 4603 4604 /* Disable all interrupts */ 4605 if (handler_installed) { 4606 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4607 /* Call ddi_intr_block_disable() */ 4608 (void) ddi_intr_block_disable(mgp->htable, 4609 mgp->intr_cnt); 4610 } else { 4611 for (x = 0; x < mgp->intr_cnt; x++) { 4612 (void) ddi_intr_disable(mgp->htable[x]); 4613 } 4614 } 4615 } 4616 4617 for (x = 0; x < mgp->intr_cnt; x++) { 4618 if (handler_installed) { 4619 /* Call ddi_intr_remove_handler() */ 4620 err = ddi_intr_remove_handler(mgp->htable[x]); 4621 if (err != DDI_SUCCESS) { 4622 cmn_err(CE_WARN, 4623 "%s: ddi_intr_remove_handler for" 4624 "vec %d returned %d\n", mgp->name, 4625 x, err); 4626 } 4627 } 4628 err = ddi_intr_free(mgp->htable[x]); 4629 if (err != DDI_SUCCESS) { 4630 cmn_err(CE_WARN, 4631 "%s: ddi_intr_free for vec %d returned %d\n", 4632 mgp->name, x, err); 4633 } 4634 } 4635 kmem_free(mgp->htable, mgp->intr_size); 4636 mgp->htable = NULL; 4637 } 4638 4639 static void 4640 myri10ge_test_physical(dev_info_t *dip) 4641 { 4642 ddi_dma_handle_t handle; 4643 struct myri10ge_dma_stuff dma; 4644 void *addr; 4645 int err; 4646 4647 /* test #1, sufficient for older sparc systems */ 4648 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 4649 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr, 4650 DDI_DMA_DONTWAIT, NULL, &handle); 4651 if (err == DDI_DMA_BADATTR) 4652 goto fail; 4653 ddi_dma_free_handle(&handle); 4654 4655 /* test #2, required on Olympis where the bind is what fails */ 4656 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr, 4657 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 4658 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT); 4659 if (addr == NULL) 4660 goto fail; 4661 myri10ge_dma_free(&dma); 4662 return; 4663 4664 fail: 4665 if (myri10ge_verbose) 4666 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, " 4667 "using IOMMU\n", ddi_get_instance(dip)); 4668 4669 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL; 4670 } 4671 4672 static void 4673 myri10ge_get_props(dev_info_t *dip) 4674 { 4675 4676 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4677 "myri10ge_flow_control", myri10ge_flow_control); 4678 4679 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4680 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay); 4681 4682 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4683 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4684 "myri10ge_nvidia_ecrc_enable", 1); 4685 #endif 4686 4687 4688 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4689 "myri10ge_use_msi", myri10ge_use_msi); 4690 4691 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4692 "myri10ge_deassert_wait", myri10ge_deassert_wait); 4693 4694 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4695 "myri10ge_verbose", myri10ge_verbose); 4696 4697 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4698 "myri10ge_tx_copylen", myri10ge_tx_copylen); 4699 4700 if (myri10ge_tx_copylen < 60) { 4701 cmn_err(CE_WARN, 4702 "myri10ge_tx_copylen must be >= 60 bytes\n"); 4703 myri10ge_tx_copylen = 60; 4704 } 4705 4706 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4707 "myri10ge_mtu_override", myri10ge_mtu_override); 4708 4709 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU && 4710 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU) 4711 myri10ge_mtu = myri10ge_mtu_override + 4712 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ; 4713 else if (myri10ge_mtu_override != 0) { 4714 cmn_err(CE_WARN, 4715 "myri10ge_mtu_override must be between 1500 and " 4716 "9000 bytes\n"); 4717 } 4718 4719 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4720 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial); 4721 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4722 "myri10ge_bigbufs_max", myri10ge_bigbufs_max); 4723 4724 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4725 "myri10ge_watchdog_reset", myri10ge_watchdog_reset); 4726 4727 if (myri10ge_bigbufs_initial < 128) { 4728 cmn_err(CE_WARN, 4729 "myri10ge_bigbufs_initial be at least 128\n"); 4730 myri10ge_bigbufs_initial = 128; 4731 } 4732 if (myri10ge_bigbufs_max < 128) { 4733 cmn_err(CE_WARN, 4734 "myri10ge_bigbufs_max be at least 128\n"); 4735 myri10ge_bigbufs_max = 128; 4736 } 4737 4738 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) { 4739 cmn_err(CE_WARN, 4740 "myri10ge_bigbufs_max must be >= " 4741 "myri10ge_bigbufs_initial\n"); 4742 myri10ge_bigbufs_max = myri10ge_bigbufs_initial; 4743 } 4744 4745 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4746 "myri10ge_force_firmware", myri10ge_force_firmware); 4747 4748 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4749 "myri10ge_max_slices", myri10ge_max_slices); 4750 4751 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4752 "myri10ge_use_msix", myri10ge_use_msix); 4753 4754 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4755 "myri10ge_rss_hash", myri10ge_rss_hash); 4756 4757 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX || 4758 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) { 4759 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n", 4760 myri10ge_rss_hash); 4761 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4762 } 4763 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4764 "myri10ge_lro", myri10ge_lro); 4765 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4766 "myri10ge_lro_cnt", myri10ge_lro_cnt); 4767 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4768 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr); 4769 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4770 "myri10ge_tx_hash", myri10ge_tx_hash); 4771 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4772 "myri10ge_use_lso", myri10ge_use_lso); 4773 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4774 "myri10ge_lso_copy", myri10ge_lso_copy); 4775 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4776 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial); 4777 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4778 "myri10ge_small_bytes", myri10ge_small_bytes); 4779 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) { 4780 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n", 4781 myri10ge_small_bytes); 4782 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n"); 4783 myri10ge_small_bytes += 128; 4784 myri10ge_small_bytes &= ~(128 -1); 4785 myri10ge_small_bytes -= MXGEFW_PAD; 4786 cmn_err(CE_WARN, "rounded up to %d\n", 4787 myri10ge_small_bytes); 4788 4789 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4790 } 4791 } 4792 4793 #ifndef PCI_EXP_LNKSTA 4794 #define PCI_EXP_LNKSTA 18 4795 #endif 4796 4797 static int 4798 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid) 4799 { 4800 uint16_t status; 4801 uint8_t ptr; 4802 4803 /* check to see if we have capabilities */ 4804 status = pci_config_get16(handle, PCI_CONF_STAT); 4805 if (!(status & PCI_STAT_CAP)) { 4806 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n"); 4807 return (ENXIO); 4808 } 4809 4810 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR); 4811 4812 /* Walk the capabilities list, looking for a PCI Express cap */ 4813 while (ptr != PCI_CAP_NEXT_PTR_NULL) { 4814 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid) 4815 break; 4816 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR); 4817 } 4818 if (ptr < 64) { 4819 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr); 4820 return (ENXIO); 4821 } 4822 *capptr = ptr; 4823 return (0); 4824 } 4825 4826 static int 4827 myri10ge_set_max_readreq(ddi_acc_handle_t handle) 4828 { 4829 int err; 4830 uint16_t val; 4831 uint8_t ptr; 4832 4833 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4834 if (err != 0) { 4835 cmn_err(CE_WARN, "could not find PCIe cap\n"); 4836 return (ENXIO); 4837 } 4838 4839 /* set max read req to 4096 */ 4840 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4841 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) | 4842 PCIE_DEVCTL_MAX_READ_REQ_4096; 4843 pci_config_put16(handle, ptr + PCIE_DEVCTL, val); 4844 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4845 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) != 4846 PCIE_DEVCTL_MAX_READ_REQ_4096) { 4847 cmn_err(CE_WARN, "could not set max read req (%x)\n", val); 4848 return (EINVAL); 4849 } 4850 return (0); 4851 } 4852 4853 static int 4854 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link) 4855 { 4856 int err; 4857 uint16_t val; 4858 uint8_t ptr; 4859 4860 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4861 if (err != 0) { 4862 cmn_err(CE_WARN, "could not set max read req\n"); 4863 return (ENXIO); 4864 } 4865 4866 /* read link width */ 4867 val = pci_config_get16(handle, ptr + PCIE_LINKSTS); 4868 val &= PCIE_LINKSTS_NEG_WIDTH_MASK; 4869 *link = (val >> 4); 4870 return (0); 4871 } 4872 4873 static int 4874 myri10ge_reset_nic(struct myri10ge_priv *mgp) 4875 { 4876 ddi_acc_handle_t handle = mgp->cfg_hdl; 4877 uint32_t reboot; 4878 uint16_t cmd; 4879 int err; 4880 4881 cmd = pci_config_get16(handle, PCI_CONF_COMM); 4882 if ((cmd & PCI_COMM_ME) == 0) { 4883 /* 4884 * Bus master DMA disabled? Check to see if the card 4885 * rebooted due to a parity error For now, just report 4886 * it 4887 */ 4888 4889 /* enter read32 mode */ 4890 pci_config_put8(handle, mgp->vso + 0x10, 0x3); 4891 /* read REBOOT_STATUS (0xfffffff0) */ 4892 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0); 4893 reboot = pci_config_get16(handle, mgp->vso + 0x14); 4894 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot); 4895 return (0); 4896 } 4897 if (!myri10ge_watchdog_reset) { 4898 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name); 4899 return (1); 4900 } 4901 4902 myri10ge_stop_locked(mgp); 4903 err = myri10ge_start_locked(mgp); 4904 if (err == DDI_FAILURE) { 4905 return (0); 4906 } 4907 mac_tx_update(mgp->mh); 4908 return (1); 4909 } 4910 4911 static inline int 4912 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx) 4913 { 4914 if (tx->sched != tx->stall && 4915 tx->done == tx->watchdog_done && 4916 tx->watchdog_req != tx->watchdog_done) 4917 return (1); 4918 return (0); 4919 } 4920 4921 static void 4922 myri10ge_watchdog(void *arg) 4923 { 4924 struct myri10ge_priv *mgp; 4925 struct myri10ge_slice_state *ss; 4926 myri10ge_tx_ring_t *tx; 4927 int nic_ok = 1; 4928 int slices_stalled, rx_pause, i; 4929 int add_rx; 4930 4931 mgp = arg; 4932 mutex_enter(&mgp->intrlock); 4933 if (mgp->running != MYRI10GE_ETH_RUNNING) { 4934 cmn_err(CE_WARN, 4935 "%s not running, not rearming watchdog (%d)\n", 4936 mgp->name, mgp->running); 4937 mutex_exit(&mgp->intrlock); 4938 return; 4939 } 4940 4941 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause); 4942 4943 /* 4944 * make sure nic is stalled before we reset the nic, so as to 4945 * ensure we don't rip the transmit data structures out from 4946 * under a pending transmit 4947 */ 4948 4949 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) { 4950 tx = &mgp->ss[i].tx; 4951 slices_stalled = myri10ge_ring_stalled(tx); 4952 if (slices_stalled) 4953 break; 4954 } 4955 4956 if (slices_stalled) { 4957 if (mgp->watchdog_rx_pause == rx_pause) { 4958 cmn_err(CE_WARN, 4959 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)", 4960 mgp->name, i, tx->sched, tx->stall, 4961 tx->done, tx->watchdog_done, tx->req, tx->pkt_done, 4962 (int)ntohl(mgp->ss[i].fw_stats->send_done_count)); 4963 nic_ok = myri10ge_reset_nic(mgp); 4964 } else { 4965 cmn_err(CE_WARN, 4966 "%s Flow controlled, check link partner\n", 4967 mgp->name); 4968 } 4969 } 4970 4971 if (!nic_ok) { 4972 cmn_err(CE_WARN, 4973 "%s Nic dead, not rearming watchdog\n", mgp->name); 4974 mutex_exit(&mgp->intrlock); 4975 return; 4976 } 4977 for (i = 0; i < mgp->num_slices; i++) { 4978 ss = &mgp->ss[i]; 4979 tx = &ss->tx; 4980 tx->watchdog_done = tx->done; 4981 tx->watchdog_req = tx->req; 4982 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) { 4983 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy); 4984 add_rx = 4985 min(ss->jpool.num_alloc, 4986 myri10ge_bigbufs_max - 4987 (ss->jpool.num_alloc - 4988 ss->jbufs_for_smalls)); 4989 if (add_rx != 0) { 4990 (void) myri10ge_add_jbufs(ss, add_rx, 0); 4991 /* now feed them to the firmware */ 4992 mutex_enter(&ss->jpool.mtx); 4993 myri10ge_restock_jumbos(ss); 4994 mutex_exit(&ss->jpool.mtx); 4995 } 4996 } 4997 } 4998 mgp->watchdog_rx_pause = rx_pause; 4999 5000 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5001 mgp->timer_ticks); 5002 mutex_exit(&mgp->intrlock); 5003 } 5004 5005 /*ARGSUSED*/ 5006 static int 5007 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5008 5009 { 5010 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5011 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay); 5012 return (0); 5013 } 5014 5015 /*ARGSUSED*/ 5016 static int 5017 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value, 5018 caddr_t cp, cred_t *credp) 5019 5020 { 5021 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5022 char *end; 5023 size_t new_value; 5024 5025 new_value = mi_strtol(value, &end, 10); 5026 if (end == value) 5027 return (EINVAL); 5028 5029 mutex_enter(&myri10ge_param_lock); 5030 mgp->intr_coal_delay = (int)new_value; 5031 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 5032 mutex_exit(&myri10ge_param_lock); 5033 return (0); 5034 } 5035 5036 /*ARGSUSED*/ 5037 static int 5038 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5039 5040 { 5041 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5042 (void) mi_mpprintf(mp, "%d", mgp->pause); 5043 return (0); 5044 } 5045 5046 /*ARGSUSED*/ 5047 static int 5048 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value, 5049 caddr_t cp, cred_t *credp) 5050 5051 { 5052 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5053 char *end; 5054 size_t new_value; 5055 int err = 0; 5056 5057 new_value = mi_strtol(value, &end, 10); 5058 if (end == value) 5059 return (EINVAL); 5060 if (new_value != 0) 5061 new_value = 1; 5062 5063 mutex_enter(&myri10ge_param_lock); 5064 if (new_value != mgp->pause) 5065 err = myri10ge_change_pause(mgp, new_value); 5066 mutex_exit(&myri10ge_param_lock); 5067 return (err); 5068 } 5069 5070 /*ARGSUSED*/ 5071 static int 5072 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5073 5074 { 5075 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp); 5076 return (0); 5077 } 5078 5079 /*ARGSUSED*/ 5080 static int 5081 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value, 5082 caddr_t cp, cred_t *credp) 5083 5084 { 5085 char *end; 5086 size_t new_value; 5087 5088 new_value = mi_strtol(value, &end, 10); 5089 if (end == value) 5090 return (EINVAL); 5091 *(int *)(void *)cp = new_value; 5092 5093 return (0); 5094 } 5095 5096 static void 5097 myri10ge_ndd_init(struct myri10ge_priv *mgp) 5098 { 5099 mgp->nd_head = NULL; 5100 5101 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay", 5102 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp); 5103 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control", 5104 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp); 5105 (void) nd_load(&mgp->nd_head, "myri10ge_verbose", 5106 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose); 5107 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait", 5108 myri10ge_get_int, myri10ge_set_int, 5109 (caddr_t)&myri10ge_deassert_wait); 5110 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max", 5111 myri10ge_get_int, myri10ge_set_int, 5112 (caddr_t)&myri10ge_bigbufs_max); 5113 (void) nd_load(&mgp->nd_head, "myri10ge_lro", 5114 myri10ge_get_int, myri10ge_set_int, 5115 (caddr_t)&myri10ge_lro); 5116 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr", 5117 myri10ge_get_int, myri10ge_set_int, 5118 (caddr_t)&myri10ge_lro_max_aggr); 5119 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash", 5120 myri10ge_get_int, myri10ge_set_int, 5121 (caddr_t)&myri10ge_tx_hash); 5122 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy", 5123 myri10ge_get_int, myri10ge_set_int, 5124 (caddr_t)&myri10ge_lso_copy); 5125 } 5126 5127 static void 5128 myri10ge_ndd_fini(struct myri10ge_priv *mgp) 5129 { 5130 nd_free(&mgp->nd_head); 5131 } 5132 5133 static void 5134 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) 5135 { 5136 struct iocblk *iocp; 5137 struct myri10ge_priv *mgp = arg; 5138 int cmd, ok, err; 5139 5140 iocp = (struct iocblk *)(void *)mp->b_rptr; 5141 cmd = iocp->ioc_cmd; 5142 5143 ok = 0; 5144 err = 0; 5145 5146 switch (cmd) { 5147 case ND_GET: 5148 case ND_SET: 5149 ok = nd_getset(wq, mgp->nd_head, mp); 5150 break; 5151 default: 5152 break; 5153 } 5154 if (!ok) 5155 err = EINVAL; 5156 else 5157 err = iocp->ioc_error; 5158 5159 if (!err) 5160 miocack(wq, mp, iocp->ioc_count, err); 5161 else 5162 miocnak(wq, mp, 0, err); 5163 } 5164 5165 static struct myri10ge_priv *mgp_list; 5166 5167 struct myri10ge_priv * 5168 myri10ge_get_instance(uint_t unit) 5169 { 5170 struct myri10ge_priv *mgp; 5171 5172 mutex_enter(&myri10ge_param_lock); 5173 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) { 5174 if (unit == ddi_get_instance(mgp->dip)) { 5175 mgp->refcnt++; 5176 break; 5177 } 5178 } 5179 mutex_exit(&myri10ge_param_lock); 5180 return (mgp); 5181 } 5182 5183 void 5184 myri10ge_put_instance(struct myri10ge_priv *mgp) 5185 { 5186 mutex_enter(&myri10ge_param_lock); 5187 mgp->refcnt--; 5188 mutex_exit(&myri10ge_param_lock); 5189 } 5190 5191 static boolean_t 5192 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 5193 { 5194 struct myri10ge_priv *mgp = arg; 5195 uint32_t *cap_hcksum; 5196 mac_capab_lso_t *cap_lso; 5197 mac_capab_rings_t *cap_rings; 5198 5199 switch (cap) { 5200 case MAC_CAPAB_HCKSUM: 5201 cap_hcksum = cap_data; 5202 *cap_hcksum = HCKSUM_INET_PARTIAL; 5203 break; 5204 case MAC_CAPAB_RINGS: 5205 cap_rings = cap_data; 5206 switch (cap_rings->mr_type) { 5207 case MAC_RING_TYPE_RX: 5208 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5209 cap_rings->mr_rnum = mgp->num_slices; 5210 cap_rings->mr_gnum = 1; 5211 cap_rings->mr_rget = myri10ge_fill_ring; 5212 cap_rings->mr_gget = myri10ge_fill_group; 5213 break; 5214 case MAC_RING_TYPE_TX: 5215 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5216 cap_rings->mr_rnum = mgp->num_slices; 5217 cap_rings->mr_gnum = 0; 5218 cap_rings->mr_rget = myri10ge_fill_ring; 5219 cap_rings->mr_gget = NULL; 5220 break; 5221 default: 5222 return (B_FALSE); 5223 } 5224 break; 5225 case MAC_CAPAB_LSO: 5226 cap_lso = cap_data; 5227 if (!myri10ge_use_lso) 5228 return (B_FALSE); 5229 if (!(mgp->features & MYRI10GE_TSO)) 5230 return (B_FALSE); 5231 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; 5232 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1; 5233 break; 5234 5235 default: 5236 return (B_FALSE); 5237 } 5238 return (B_TRUE); 5239 } 5240 5241 5242 static int 5243 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val) 5244 { 5245 struct myri10ge_priv *mgp = arg; 5246 struct myri10ge_rx_ring_stats *rstat; 5247 struct myri10ge_tx_ring_stats *tstat; 5248 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats; 5249 struct myri10ge_slice_state *ss; 5250 uint64_t tmp = 0; 5251 int i; 5252 5253 switch (stat) { 5254 case MAC_STAT_IFSPEED: 5255 *val = 10ull * 1000ull * 1000000ull; 5256 break; 5257 5258 case MAC_STAT_MULTIRCV: 5259 for (i = 0; i < mgp->num_slices; i++) { 5260 rstat = &mgp->ss[i].rx_stats; 5261 tmp += rstat->multircv; 5262 } 5263 *val = tmp; 5264 break; 5265 5266 case MAC_STAT_BRDCSTRCV: 5267 for (i = 0; i < mgp->num_slices; i++) { 5268 rstat = &mgp->ss[i].rx_stats; 5269 tmp += rstat->brdcstrcv; 5270 } 5271 *val = tmp; 5272 break; 5273 5274 case MAC_STAT_MULTIXMT: 5275 for (i = 0; i < mgp->num_slices; i++) { 5276 tstat = &mgp->ss[i].tx.stats; 5277 tmp += tstat->multixmt; 5278 } 5279 *val = tmp; 5280 break; 5281 5282 case MAC_STAT_BRDCSTXMT: 5283 for (i = 0; i < mgp->num_slices; i++) { 5284 tstat = &mgp->ss[i].tx.stats; 5285 tmp += tstat->brdcstxmt; 5286 } 5287 *val = tmp; 5288 break; 5289 5290 case MAC_STAT_NORCVBUF: 5291 tmp = ntohl(fw_stats->dropped_no_big_buffer); 5292 tmp += ntohl(fw_stats->dropped_no_small_buffer); 5293 tmp += ntohl(fw_stats->dropped_link_overflow); 5294 for (i = 0; i < mgp->num_slices; i++) { 5295 ss = &mgp->ss[i]; 5296 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf); 5297 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf); 5298 } 5299 *val = tmp; 5300 break; 5301 5302 case MAC_STAT_IERRORS: 5303 tmp += ntohl(fw_stats->dropped_bad_crc32); 5304 tmp += ntohl(fw_stats->dropped_bad_phy); 5305 tmp += ntohl(fw_stats->dropped_runt); 5306 tmp += ntohl(fw_stats->dropped_overrun); 5307 *val = tmp; 5308 break; 5309 5310 case MAC_STAT_OERRORS: 5311 for (i = 0; i < mgp->num_slices; i++) { 5312 ss = &mgp->ss[i]; 5313 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags); 5314 tmp += MYRI10GE_SLICE_STAT(xmit_err); 5315 } 5316 *val = tmp; 5317 break; 5318 5319 case MAC_STAT_RBYTES: 5320 for (i = 0; i < mgp->num_slices; i++) { 5321 rstat = &mgp->ss[i].rx_stats; 5322 tmp += rstat->ibytes; 5323 } 5324 *val = tmp; 5325 break; 5326 5327 case MAC_STAT_IPACKETS: 5328 for (i = 0; i < mgp->num_slices; i++) { 5329 rstat = &mgp->ss[i].rx_stats; 5330 tmp += rstat->ipackets; 5331 } 5332 *val = tmp; 5333 break; 5334 5335 case MAC_STAT_OBYTES: 5336 for (i = 0; i < mgp->num_slices; i++) { 5337 tstat = &mgp->ss[i].tx.stats; 5338 tmp += tstat->obytes; 5339 } 5340 *val = tmp; 5341 break; 5342 5343 case MAC_STAT_OPACKETS: 5344 for (i = 0; i < mgp->num_slices; i++) { 5345 tstat = &mgp->ss[i].tx.stats; 5346 tmp += tstat->opackets; 5347 } 5348 *val = tmp; 5349 break; 5350 5351 case ETHER_STAT_TOOLONG_ERRORS: 5352 *val = ntohl(fw_stats->dropped_overrun); 5353 break; 5354 5355 #ifdef SOLARIS_S11 5356 case ETHER_STAT_TOOSHORT_ERRORS: 5357 *val = ntohl(fw_stats->dropped_runt); 5358 break; 5359 #endif 5360 5361 case ETHER_STAT_LINK_PAUSE: 5362 *val = mgp->pause; 5363 break; 5364 5365 case ETHER_STAT_LINK_AUTONEG: 5366 *val = 1; 5367 break; 5368 5369 case ETHER_STAT_LINK_DUPLEX: 5370 *val = LINK_DUPLEX_FULL; 5371 break; 5372 5373 default: 5374 return (ENOTSUP); 5375 } 5376 5377 return (0); 5378 } 5379 5380 /* ARGSUSED */ 5381 static void 5382 myri10ge_m_propinfo(void *arg, const char *pr_name, 5383 mac_prop_id_t pr_num, mac_prop_info_handle_t prh) 5384 { 5385 switch (pr_num) { 5386 case MAC_PROP_MTU: 5387 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU); 5388 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU, 5389 MYRI10GE_MAX_GLD_MTU); 5390 break; 5391 default: 5392 break; 5393 } 5394 } 5395 5396 /*ARGSUSED*/ 5397 static int 5398 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 5399 uint_t pr_valsize, const void *pr_val) 5400 { 5401 int err = 0; 5402 struct myri10ge_priv *mgp = arg; 5403 5404 switch (pr_num) { 5405 case MAC_PROP_MTU: { 5406 uint32_t mtu; 5407 if (pr_valsize < sizeof (mtu)) { 5408 err = EINVAL; 5409 break; 5410 } 5411 bcopy(pr_val, &mtu, sizeof (mtu)); 5412 if (mtu > MYRI10GE_MAX_GLD_MTU || 5413 mtu < MYRI10GE_MIN_GLD_MTU) { 5414 err = EINVAL; 5415 break; 5416 } 5417 5418 mutex_enter(&mgp->intrlock); 5419 if (mgp->running != MYRI10GE_ETH_STOPPED) { 5420 err = EBUSY; 5421 mutex_exit(&mgp->intrlock); 5422 break; 5423 } 5424 5425 myri10ge_mtu = mtu + sizeof (struct ether_header) + 5426 MXGEFW_PAD + VLAN_TAGSZ; 5427 mutex_exit(&mgp->intrlock); 5428 break; 5429 } 5430 default: 5431 err = ENOTSUP; 5432 break; 5433 } 5434 5435 return (err); 5436 } 5437 5438 static mac_callbacks_t myri10ge_m_callbacks = { 5439 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO), 5440 myri10ge_m_stat, 5441 myri10ge_m_start, 5442 myri10ge_m_stop, 5443 myri10ge_m_promisc, 5444 myri10ge_m_multicst, 5445 NULL, 5446 NULL, 5447 NULL, 5448 myri10ge_m_ioctl, 5449 myri10ge_m_getcapab, 5450 NULL, 5451 NULL, 5452 myri10ge_m_setprop, 5453 NULL, 5454 myri10ge_m_propinfo 5455 }; 5456 5457 5458 static int 5459 myri10ge_probe_slices(struct myri10ge_priv *mgp) 5460 { 5461 myri10ge_cmd_t cmd; 5462 int status; 5463 5464 mgp->num_slices = 1; 5465 5466 /* hit the board with a reset to ensure it is alive */ 5467 (void) memset(&cmd, 0, sizeof (cmd)); 5468 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 5469 if (status != 0) { 5470 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 5471 return (ENXIO); 5472 } 5473 5474 if (myri10ge_use_msix == 0) 5475 return (0); 5476 5477 /* tell it the size of the interrupt queues */ 5478 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot); 5479 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 5480 if (status != 0) { 5481 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n", 5482 mgp->name); 5483 return (ENXIO); 5484 } 5485 5486 /* ask the maximum number of slices it supports */ 5487 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 5488 &cmd); 5489 if (status != 0) 5490 return (0); 5491 5492 mgp->num_slices = cmd.data0; 5493 5494 /* 5495 * if the admin did not specify a limit to how many 5496 * slices we should use, cap it automatically to the 5497 * number of CPUs currently online 5498 */ 5499 if (myri10ge_max_slices == -1) 5500 myri10ge_max_slices = ncpus; 5501 5502 if (mgp->num_slices > myri10ge_max_slices) 5503 mgp->num_slices = myri10ge_max_slices; 5504 5505 5506 /* 5507 * Now try to allocate as many MSI-X vectors as we have 5508 * slices. We give up on MSI-X if we can only get a single 5509 * vector. 5510 */ 5511 while (mgp->num_slices > 1) { 5512 /* make sure it is a power of two */ 5513 while (!ISP2(mgp->num_slices)) 5514 mgp->num_slices--; 5515 if (mgp->num_slices == 1) 5516 return (0); 5517 5518 status = myri10ge_add_intrs(mgp, 0); 5519 if (status == 0) { 5520 myri10ge_rem_intrs(mgp, 0); 5521 if (mgp->intr_cnt == mgp->num_slices) { 5522 if (myri10ge_verbose) 5523 printf("Got %d slices!\n", 5524 mgp->num_slices); 5525 return (0); 5526 } 5527 mgp->num_slices = mgp->intr_cnt; 5528 } else { 5529 mgp->num_slices = mgp->num_slices / 2; 5530 } 5531 } 5532 5533 if (myri10ge_verbose) 5534 printf("Got %d slices\n", mgp->num_slices); 5535 return (0); 5536 } 5537 5538 static void 5539 myri10ge_lro_free(struct myri10ge_slice_state *ss) 5540 { 5541 struct lro_entry *lro; 5542 5543 while (ss->lro_free != NULL) { 5544 lro = ss->lro_free; 5545 ss->lro_free = lro->next; 5546 kmem_free(lro, sizeof (*lro)); 5547 } 5548 } 5549 5550 static void 5551 myri10ge_lro_alloc(struct myri10ge_slice_state *ss) 5552 { 5553 struct lro_entry *lro; 5554 int idx; 5555 5556 ss->lro_free = NULL; 5557 ss->lro_active = NULL; 5558 5559 for (idx = 0; idx < myri10ge_lro_cnt; idx++) { 5560 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP); 5561 if (lro == NULL) 5562 continue; 5563 lro->next = ss->lro_free; 5564 ss->lro_free = lro; 5565 } 5566 } 5567 5568 static void 5569 myri10ge_free_slices(struct myri10ge_priv *mgp) 5570 { 5571 struct myri10ge_slice_state *ss; 5572 size_t bytes; 5573 int i; 5574 5575 if (mgp->ss == NULL) 5576 return; 5577 5578 for (i = 0; i < mgp->num_slices; i++) { 5579 ss = &mgp->ss[i]; 5580 if (ss->rx_done.entry == NULL) 5581 continue; 5582 myri10ge_dma_free(&ss->rx_done.dma); 5583 ss->rx_done.entry = NULL; 5584 if (ss->fw_stats == NULL) 5585 continue; 5586 myri10ge_dma_free(&ss->fw_stats_dma); 5587 ss->fw_stats = NULL; 5588 mutex_destroy(&ss->rx_lock); 5589 mutex_destroy(&ss->tx.lock); 5590 mutex_destroy(&ss->tx.handle_lock); 5591 mutex_destroy(&ss->poll_lock); 5592 myri10ge_jpool_fini(ss); 5593 myri10ge_slice_stat_destroy(ss); 5594 myri10ge_lro_free(ss); 5595 } 5596 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5597 kmem_free(mgp->ss, bytes); 5598 mgp->ss = NULL; 5599 } 5600 5601 5602 static int 5603 myri10ge_alloc_slices(struct myri10ge_priv *mgp) 5604 { 5605 struct myri10ge_slice_state *ss; 5606 size_t bytes; 5607 int i; 5608 5609 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5610 mgp->ss = kmem_zalloc(bytes, KM_SLEEP); 5611 if (mgp->ss == NULL) 5612 return (ENOMEM); 5613 for (i = 0; i < mgp->num_slices; i++) { 5614 ss = &mgp->ss[i]; 5615 5616 ss->mgp = mgp; 5617 5618 /* allocate the per-slice firmware stats */ 5619 bytes = sizeof (*ss->fw_stats); 5620 ss->fw_stats = (mcp_irq_data_t *)(void *) 5621 myri10ge_dma_alloc(mgp->dip, bytes, 5622 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5623 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5624 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT); 5625 if (ss->fw_stats == NULL) 5626 goto abort; 5627 (void) memset(ss->fw_stats, 0, bytes); 5628 5629 /* allocate rx done ring */ 5630 bytes = mgp->max_intr_slots * 5631 sizeof (*ss->rx_done.entry); 5632 ss->rx_done.entry = (mcp_slot_t *)(void *) 5633 myri10ge_dma_alloc(mgp->dip, bytes, 5634 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5635 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5636 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT); 5637 if (ss->rx_done.entry == NULL) { 5638 goto abort; 5639 } 5640 (void) memset(ss->rx_done.entry, 0, bytes); 5641 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie); 5642 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL); 5643 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL); 5644 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL); 5645 myri10ge_jpool_init(ss); 5646 (void) myri10ge_slice_stat_init(ss); 5647 myri10ge_lro_alloc(ss); 5648 } 5649 5650 return (0); 5651 5652 abort: 5653 myri10ge_free_slices(mgp); 5654 return (ENOMEM); 5655 } 5656 5657 static int 5658 myri10ge_save_msi_state(struct myri10ge_priv *mgp, 5659 ddi_acc_handle_t handle) 5660 { 5661 uint8_t ptr; 5662 int err; 5663 5664 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5665 if (err != 0) { 5666 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5667 mgp->name); 5668 return (DDI_FAILURE); 5669 } 5670 mgp->pci_saved_state.msi_ctrl = 5671 pci_config_get16(handle, ptr + PCI_MSI_CTRL); 5672 mgp->pci_saved_state.msi_addr_low = 5673 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET); 5674 mgp->pci_saved_state.msi_addr_high = 5675 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4); 5676 mgp->pci_saved_state.msi_data_32 = 5677 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA); 5678 mgp->pci_saved_state.msi_data_64 = 5679 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA); 5680 return (DDI_SUCCESS); 5681 } 5682 5683 static int 5684 myri10ge_restore_msi_state(struct myri10ge_priv *mgp, 5685 ddi_acc_handle_t handle) 5686 { 5687 uint8_t ptr; 5688 int err; 5689 5690 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5691 if (err != 0) { 5692 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5693 mgp->name); 5694 return (DDI_FAILURE); 5695 } 5696 5697 pci_config_put16(handle, ptr + PCI_MSI_CTRL, 5698 mgp->pci_saved_state.msi_ctrl); 5699 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET, 5700 mgp->pci_saved_state.msi_addr_low); 5701 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4, 5702 mgp->pci_saved_state.msi_addr_high); 5703 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA, 5704 mgp->pci_saved_state.msi_data_32); 5705 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA, 5706 mgp->pci_saved_state.msi_data_64); 5707 5708 return (DDI_SUCCESS); 5709 } 5710 5711 static int 5712 myri10ge_save_pci_state(struct myri10ge_priv *mgp) 5713 { 5714 ddi_acc_handle_t handle = mgp->cfg_hdl; 5715 int i; 5716 int err = DDI_SUCCESS; 5717 5718 5719 /* Save the non-extended PCI config space 32-bits at a time */ 5720 for (i = 0; i < 16; i++) 5721 mgp->pci_saved_state.base[i] = 5722 pci_config_get32(handle, i*4); 5723 5724 /* now save MSI interrupt state *, if needed */ 5725 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5726 err = myri10ge_save_msi_state(mgp, handle); 5727 5728 return (err); 5729 } 5730 5731 static int 5732 myri10ge_restore_pci_state(struct myri10ge_priv *mgp) 5733 { 5734 ddi_acc_handle_t handle = mgp->cfg_hdl; 5735 int i; 5736 int err = DDI_SUCCESS; 5737 5738 5739 /* Restore the non-extended PCI config space 32-bits at a time */ 5740 for (i = 15; i >= 0; i--) 5741 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]); 5742 5743 /* now restore MSI interrupt state *, if needed */ 5744 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5745 err = myri10ge_restore_msi_state(mgp, handle); 5746 5747 if (mgp->max_read_request_4k) 5748 (void) myri10ge_set_max_readreq(handle); 5749 return (err); 5750 } 5751 5752 5753 static int 5754 myri10ge_suspend(dev_info_t *dip) 5755 { 5756 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5757 int status; 5758 5759 if (mgp == NULL) { 5760 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n"); 5761 return (DDI_FAILURE); 5762 } 5763 if (mgp->dip != dip) { 5764 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n"); 5765 return (DDI_FAILURE); 5766 } 5767 mutex_enter(&mgp->intrlock); 5768 if (mgp->running == MYRI10GE_ETH_RUNNING) { 5769 mgp->running = MYRI10GE_ETH_STOPPING; 5770 mutex_exit(&mgp->intrlock); 5771 (void) untimeout(mgp->timer_id); 5772 mutex_enter(&mgp->intrlock); 5773 myri10ge_stop_locked(mgp); 5774 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING; 5775 } 5776 status = myri10ge_save_pci_state(mgp); 5777 mutex_exit(&mgp->intrlock); 5778 return (status); 5779 } 5780 5781 static int 5782 myri10ge_resume(dev_info_t *dip) 5783 { 5784 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5785 int status = DDI_SUCCESS; 5786 5787 if (mgp == NULL) { 5788 cmn_err(CE_WARN, "null dip in myri10ge_resume\n"); 5789 return (DDI_FAILURE); 5790 } 5791 if (mgp->dip != dip) { 5792 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n"); 5793 return (DDI_FAILURE); 5794 } 5795 5796 mutex_enter(&mgp->intrlock); 5797 status = myri10ge_restore_pci_state(mgp); 5798 if (status == DDI_SUCCESS && 5799 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) { 5800 status = myri10ge_start_locked(mgp); 5801 } 5802 mutex_exit(&mgp->intrlock); 5803 if (status != DDI_SUCCESS) 5804 return (status); 5805 5806 /* start the watchdog timer */ 5807 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5808 mgp->timer_ticks); 5809 return (DDI_SUCCESS); 5810 } 5811 5812 static int 5813 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5814 { 5815 5816 struct myri10ge_priv *mgp; 5817 mac_register_t *macp, *omacp; 5818 ddi_acc_handle_t handle; 5819 uint32_t csr, hdr_offset; 5820 int status, span, link_width, max_read_request_4k; 5821 unsigned long bus_number, dev_number, func_number; 5822 size_t bytes; 5823 offset_t ss_offset; 5824 uint8_t vso; 5825 5826 if (cmd == DDI_RESUME) { 5827 return (myri10ge_resume(dip)); 5828 } 5829 5830 if (cmd != DDI_ATTACH) 5831 return (DDI_FAILURE); 5832 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) 5833 return (DDI_FAILURE); 5834 5835 /* enable busmater and io space access */ 5836 csr = pci_config_get32(handle, PCI_CONF_COMM); 5837 pci_config_put32(handle, PCI_CONF_COMM, 5838 (csr |PCI_COMM_ME|PCI_COMM_MAE)); 5839 status = myri10ge_read_pcie_link_width(handle, &link_width); 5840 if (status != 0) { 5841 cmn_err(CE_WARN, "could not read link width!\n"); 5842 link_width = 0; 5843 } 5844 max_read_request_4k = !myri10ge_set_max_readreq(handle); 5845 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS); 5846 if (status != 0) 5847 goto abort_with_cfg_hdl; 5848 if ((omacp = mac_alloc(MAC_VERSION)) == NULL) 5849 goto abort_with_cfg_hdl; 5850 /* 5851 * XXXX Hack: mac_register_t grows in newer kernels. To be 5852 * able to write newer fields, such as m_margin, without 5853 * writing outside allocated memory, we allocate our own macp 5854 * and pass that to mac_register() 5855 */ 5856 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP); 5857 macp->m_version = omacp->m_version; 5858 5859 if ((mgp = (struct myri10ge_priv *) 5860 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) { 5861 goto abort_with_macinfo; 5862 } 5863 ddi_set_driver_private(dip, mgp); 5864 5865 /* setup device name for log messages */ 5866 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip)); 5867 5868 mutex_enter(&myri10ge_param_lock); 5869 myri10ge_get_props(dip); 5870 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 5871 mgp->pause = myri10ge_flow_control; 5872 mutex_exit(&myri10ge_param_lock); 5873 5874 mgp->max_read_request_4k = max_read_request_4k; 5875 mgp->pcie_link_width = link_width; 5876 mgp->running = MYRI10GE_ETH_STOPPED; 5877 mgp->vso = vso; 5878 mgp->dip = dip; 5879 mgp->cfg_hdl = handle; 5880 5881 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */ 5882 myri10ge_test_physical(dip); 5883 5884 /* allocate command page */ 5885 bytes = sizeof (*mgp->cmd); 5886 mgp->cmd = (mcp_cmd_response_t *) 5887 (void *)myri10ge_dma_alloc(dip, bytes, 5888 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5889 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT, 5890 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT); 5891 if (mgp->cmd == NULL) 5892 goto abort_with_mgp; 5893 5894 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number, 5895 &dev_number, &func_number); 5896 if (myri10ge_verbose) 5897 printf("%s at %ld:%ld:%ld attaching\n", mgp->name, 5898 bus_number, dev_number, func_number); 5899 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram, 5900 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr, 5901 &mgp->io_handle); 5902 if (status != DDI_SUCCESS) { 5903 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name); 5904 printf("%s: reg_set = %d, span = %d, status = %d", 5905 mgp->name, mgp->reg_set, span, status); 5906 goto abort_with_mgp; 5907 } 5908 5909 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET); 5910 hdr_offset = ntohl(hdr_offset) & 0xffffc; 5911 ss_offset = hdr_offset + 5912 offsetof(struct mcp_gen_header, string_specs); 5913 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset)); 5914 myri10ge_pio_copy32(mgp->eeprom_strings, 5915 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size), 5916 MYRI10GE_EEPROM_STRINGS_SIZE); 5917 (void) memset(mgp->eeprom_strings + 5918 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 5919 5920 status = myri10ge_read_mac_addr(mgp); 5921 if (status) { 5922 goto abort_with_mapped; 5923 } 5924 5925 status = myri10ge_select_firmware(mgp); 5926 if (status != 0) { 5927 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name); 5928 goto abort_with_mapped; 5929 } 5930 5931 status = myri10ge_probe_slices(mgp); 5932 if (status != 0) { 5933 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name); 5934 goto abort_with_dummy_rdma; 5935 } 5936 5937 status = myri10ge_alloc_slices(mgp); 5938 if (status != 0) { 5939 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name); 5940 goto abort_with_dummy_rdma; 5941 } 5942 5943 /* add the interrupt handler */ 5944 status = myri10ge_add_intrs(mgp, 1); 5945 if (status != 0) { 5946 cmn_err(CE_WARN, "%s: Failed to add interrupt\n", 5947 mgp->name); 5948 goto abort_with_slices; 5949 } 5950 5951 /* now that we have an iblock_cookie, init the mutexes */ 5952 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie); 5953 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie); 5954 5955 5956 status = myri10ge_nic_stat_init(mgp); 5957 if (status != DDI_SUCCESS) 5958 goto abort_with_interrupts; 5959 status = myri10ge_info_init(mgp); 5960 if (status != DDI_SUCCESS) 5961 goto abort_with_stats; 5962 5963 /* 5964 * Initialize GLD state 5965 */ 5966 5967 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 5968 macp->m_driver = mgp; 5969 macp->m_dip = dip; 5970 macp->m_src_addr = mgp->mac_addr; 5971 macp->m_callbacks = &myri10ge_m_callbacks; 5972 macp->m_min_sdu = 0; 5973 macp->m_max_sdu = myri10ge_mtu - 5974 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ); 5975 #ifdef SOLARIS_S11 5976 macp->m_margin = VLAN_TAGSZ; 5977 #endif 5978 macp->m_v12n = MAC_VIRT_LEVEL1; 5979 status = mac_register(macp, &mgp->mh); 5980 if (status != 0) { 5981 cmn_err(CE_WARN, "%s: mac_register failed with %d\n", 5982 mgp->name, status); 5983 goto abort_with_info; 5984 } 5985 myri10ge_ndd_init(mgp); 5986 if (myri10ge_verbose) 5987 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name, 5988 mgp->intr_type, mgp->tx_boundary, mgp->fw_name); 5989 mutex_enter(&myri10ge_param_lock); 5990 mgp->next = mgp_list; 5991 mgp_list = mgp; 5992 mutex_exit(&myri10ge_param_lock); 5993 kmem_free(macp, sizeof (*macp) * 8); 5994 mac_free(omacp); 5995 return (DDI_SUCCESS); 5996 5997 abort_with_info: 5998 myri10ge_info_destroy(mgp); 5999 6000 abort_with_stats: 6001 myri10ge_nic_stat_destroy(mgp); 6002 6003 abort_with_interrupts: 6004 mutex_destroy(&mgp->cmd_lock); 6005 mutex_destroy(&mgp->intrlock); 6006 myri10ge_rem_intrs(mgp, 1); 6007 6008 abort_with_slices: 6009 myri10ge_free_slices(mgp); 6010 6011 abort_with_dummy_rdma: 6012 myri10ge_dummy_rdma(mgp, 0); 6013 6014 abort_with_mapped: 6015 ddi_regs_map_free(&mgp->io_handle); 6016 6017 myri10ge_dma_free(&mgp->cmd_dma); 6018 6019 abort_with_mgp: 6020 kmem_free(mgp, sizeof (*mgp)); 6021 6022 abort_with_macinfo: 6023 kmem_free(macp, sizeof (*macp) * 8); 6024 mac_free(omacp); 6025 6026 abort_with_cfg_hdl: 6027 pci_config_teardown(&handle); 6028 return (DDI_FAILURE); 6029 6030 } 6031 6032 6033 static int 6034 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 6035 { 6036 struct myri10ge_priv *mgp, *tmp; 6037 int status, i, jbufs_alloced; 6038 6039 if (cmd == DDI_SUSPEND) { 6040 status = myri10ge_suspend(dip); 6041 return (status); 6042 } 6043 6044 if (cmd != DDI_DETACH) { 6045 return (DDI_FAILURE); 6046 } 6047 /* Get the driver private (gld_mac_info_t) structure */ 6048 mgp = ddi_get_driver_private(dip); 6049 6050 mutex_enter(&mgp->intrlock); 6051 jbufs_alloced = 0; 6052 for (i = 0; i < mgp->num_slices; i++) { 6053 myri10ge_remove_jbufs(&mgp->ss[i]); 6054 jbufs_alloced += mgp->ss[i].jpool.num_alloc; 6055 } 6056 mutex_exit(&mgp->intrlock); 6057 if (jbufs_alloced != 0) { 6058 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n", 6059 mgp->name, jbufs_alloced); 6060 return (DDI_FAILURE); 6061 } 6062 6063 mutex_enter(&myri10ge_param_lock); 6064 if (mgp->refcnt != 0) { 6065 mutex_exit(&myri10ge_param_lock); 6066 cmn_err(CE_NOTE, "%s: %d external refs remain\n", 6067 mgp->name, mgp->refcnt); 6068 return (DDI_FAILURE); 6069 } 6070 mutex_exit(&myri10ge_param_lock); 6071 6072 status = mac_unregister(mgp->mh); 6073 if (status != DDI_SUCCESS) 6074 return (status); 6075 6076 myri10ge_ndd_fini(mgp); 6077 myri10ge_dummy_rdma(mgp, 0); 6078 myri10ge_nic_stat_destroy(mgp); 6079 myri10ge_info_destroy(mgp); 6080 6081 mutex_destroy(&mgp->cmd_lock); 6082 mutex_destroy(&mgp->intrlock); 6083 6084 myri10ge_rem_intrs(mgp, 1); 6085 6086 myri10ge_free_slices(mgp); 6087 ddi_regs_map_free(&mgp->io_handle); 6088 myri10ge_dma_free(&mgp->cmd_dma); 6089 pci_config_teardown(&mgp->cfg_hdl); 6090 6091 mutex_enter(&myri10ge_param_lock); 6092 if (mgp_list == mgp) { 6093 mgp_list = mgp->next; 6094 } else { 6095 tmp = mgp_list; 6096 while (tmp->next != mgp && tmp->next != NULL) 6097 tmp = tmp->next; 6098 if (tmp->next != NULL) 6099 tmp->next = tmp->next->next; 6100 } 6101 kmem_free(mgp, sizeof (*mgp)); 6102 mutex_exit(&myri10ge_param_lock); 6103 return (DDI_SUCCESS); 6104 } 6105 6106 /* 6107 * Helper for quiesce entry point: Interrupt threads are not being 6108 * scheduled, so we must poll for the confirmation DMA to arrive in 6109 * the firmware stats block for slice 0. We're essentially running 6110 * the guts of the interrupt handler, and just cherry picking the 6111 * confirmation that the NIC is queuesced (stats->link_down) 6112 */ 6113 6114 static int 6115 myri10ge_poll_down(struct myri10ge_priv *mgp) 6116 { 6117 struct myri10ge_slice_state *ss = mgp->ss; 6118 mcp_irq_data_t *stats = ss->fw_stats; 6119 int valid; 6120 int found_down = 0; 6121 6122 6123 /* check for a pending IRQ */ 6124 6125 if (! *((volatile uint8_t *)& stats->valid)) 6126 return (0); 6127 valid = stats->valid; 6128 6129 /* 6130 * Make sure to tell the NIC to lower a legacy IRQ, else 6131 * it may have corrupt state after restarting 6132 */ 6133 6134 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 6135 /* lower legacy IRQ */ 6136 *mgp->irq_deassert = 0; 6137 mb(); 6138 /* wait for irq conf DMA */ 6139 while (*((volatile uint8_t *)& stats->valid)) 6140 ; 6141 } 6142 if (stats->stats_updated && stats->link_down) 6143 found_down = 1; 6144 6145 if (valid & 0x1) 6146 *ss->irq_claim = BE_32(3); 6147 *(ss->irq_claim + 1) = BE_32(3); 6148 6149 return (found_down); 6150 } 6151 6152 static int 6153 myri10ge_quiesce(dev_info_t *dip) 6154 { 6155 struct myri10ge_priv *mgp; 6156 myri10ge_cmd_t cmd; 6157 int status, down, i; 6158 6159 mgp = ddi_get_driver_private(dip); 6160 if (mgp == NULL) 6161 return (DDI_FAILURE); 6162 6163 /* if devices was unplumbed, it is guaranteed to be quiescent */ 6164 if (mgp->running == MYRI10GE_ETH_STOPPED) 6165 return (DDI_SUCCESS); 6166 6167 /* send a down CMD to queuesce NIC */ 6168 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 6169 if (status) { 6170 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 6171 return (DDI_FAILURE); 6172 } 6173 6174 for (i = 0; i < 20; i++) { 6175 down = myri10ge_poll_down(mgp); 6176 if (down) 6177 break; 6178 delay(drv_usectohz(100000)); 6179 mb(); 6180 } 6181 if (down) 6182 return (DDI_SUCCESS); 6183 return (DDI_FAILURE); 6184 } 6185 6186 /* 6187 * Distinguish between allocb'ed blocks, and gesballoc'ed attached 6188 * storage. 6189 */ 6190 static void 6191 myri10ge_find_lastfree(void) 6192 { 6193 mblk_t *mp = allocb(1024, 0); 6194 dblk_t *dbp; 6195 6196 if (mp == NULL) { 6197 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n"); 6198 return; 6199 } 6200 dbp = mp->b_datap; 6201 myri10ge_db_lastfree = (void *)dbp->db_lastfree; 6202 } 6203 6204 int 6205 _init(void) 6206 { 6207 int i; 6208 6209 if (myri10ge_verbose) 6210 cmn_err(CE_NOTE, 6211 "Myricom 10G driver (10GbE) version %s loading\n", 6212 MYRI10GE_VERSION_STR); 6213 myri10ge_find_lastfree(); 6214 mac_init_ops(&myri10ge_ops, "myri10ge"); 6215 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL); 6216 if ((i = mod_install(&modlinkage)) != 0) { 6217 cmn_err(CE_WARN, "mod_install returned %d\n", i); 6218 mac_fini_ops(&myri10ge_ops); 6219 mutex_destroy(&myri10ge_param_lock); 6220 } 6221 return (i); 6222 } 6223 6224 int 6225 _fini(void) 6226 { 6227 int i; 6228 i = mod_remove(&modlinkage); 6229 if (i != 0) { 6230 return (i); 6231 } 6232 mac_fini_ops(&myri10ge_ops); 6233 mutex_destroy(&myri10ge_param_lock); 6234 return (0); 6235 } 6236 6237 int 6238 _info(struct modinfo *modinfop) 6239 { 6240 return (mod_info(&modlinkage, modinfop)); 6241 } 6242 6243 6244 /* 6245 * This file uses MyriGE driver indentation. 6246 * 6247 * Local Variables: 6248 * c-file-style:"sun" 6249 * tab-width:8 6250 * End: 6251 */ 6252