1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 /* 33 * Copyright (c) 2014, Joyent, Inc. 34 * Copyright (c) 2016 by Delphix. All rights reserved. 35 */ 36 37 #define MXGEFW_NDIS 38 #include "myri10ge_var.h" 39 #include "rss_eth_z8e.h" 40 #include "rss_ethp_z8e.h" 41 #include "mcp_gen_header.h" 42 43 #define MYRI10GE_MAX_ETHER_MTU 9014 44 #define MYRI10GE_MAX_GLD_MTU 9000 45 #define MYRI10GE_MIN_GLD_MTU 1500 46 47 #define MYRI10GE_ETH_STOPPED 0 48 #define MYRI10GE_ETH_STOPPING 1 49 #define MYRI10GE_ETH_STARTING 2 50 #define MYRI10GE_ETH_RUNNING 3 51 #define MYRI10GE_ETH_OPEN_FAILED 4 52 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5 53 54 static int myri10ge_small_bytes = 510; 55 static int myri10ge_intr_coal_delay = 125; 56 static int myri10ge_flow_control = 1; 57 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 58 static int myri10ge_nvidia_ecrc_enable = 1; 59 #endif 60 static int myri10ge_mtu_override = 0; 61 static int myri10ge_tx_copylen = 512; 62 static int myri10ge_deassert_wait = 1; 63 static int myri10ge_verbose = 0; 64 static int myri10ge_watchdog_reset = 0; 65 static int myri10ge_use_msix = 1; 66 static int myri10ge_max_slices = -1; 67 static int myri10ge_use_msi = 1; 68 int myri10ge_force_firmware = 0; 69 static boolean_t myri10ge_use_lso = B_TRUE; 70 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 71 static int myri10ge_tx_hash = 1; 72 static int myri10ge_lro = 0; 73 static int myri10ge_lro_cnt = 8; 74 int myri10ge_lro_max_aggr = 2; 75 static int myri10ge_lso_copy = 0; 76 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp); 77 int myri10ge_tx_handles_initial = 128; 78 79 static kmutex_t myri10ge_param_lock; 80 static void* myri10ge_db_lastfree; 81 82 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 83 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 84 static int myri10ge_quiesce(dev_info_t *dip); 85 86 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach, 87 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce); 88 89 90 static struct modldrv modldrv = { 91 &mod_driverops, 92 "Myricom 10G driver (10GbE)", 93 &myri10ge_ops, 94 }; 95 96 97 static struct modlinkage modlinkage = { 98 MODREV_1, 99 {&modldrv, NULL}, 100 }; 101 102 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 103 104 static ddi_dma_attr_t myri10ge_misc_dma_attr = { 105 DMA_ATTR_V0, /* version number. */ 106 (uint64_t)0, /* low address */ 107 (uint64_t)0xffffffffffffffffULL, /* high address */ 108 (uint64_t)0x7ffffff, /* address counter max */ 109 (uint64_t)4096, /* alignment */ 110 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 111 (uint32_t)0x1, /* minimum transfer size */ 112 (uint64_t)0x7fffffff, /* maximum transfer size */ 113 (uint64_t)0x7fffffff, /* maximum segment size */ 114 1, /* scatter/gather list length */ 115 1, /* granularity */ 116 0 /* attribute flags */ 117 }; 118 119 /* 120 * The Myri10GE NIC has the following constraints on receive buffers: 121 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB 122 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary 123 */ 124 125 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = { 126 DMA_ATTR_V0, /* version number. */ 127 (uint64_t)0, /* low address */ 128 (uint64_t)0xffffffffffffffffULL, /* high address */ 129 (uint64_t)0x7ffffff, /* address counter max */ 130 (uint64_t)4096, /* alignment */ 131 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 132 (uint32_t)0x1, /* minimum transfer size */ 133 (uint64_t)0x7fffffff, /* maximum transfer size */ 134 UINT64_MAX, /* maximum segment size */ 135 1, /* scatter/gather list length */ 136 1, /* granularity */ 137 0 /* attribute flags */ 138 }; 139 140 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = { 141 DMA_ATTR_V0, /* version number. */ 142 (uint64_t)0, /* low address */ 143 (uint64_t)0xffffffffffffffffULL, /* high address */ 144 (uint64_t)0x7ffffff, /* address counter max */ 145 #if defined sparc64 || defined __sparcv9 146 (uint64_t)4096, /* alignment */ 147 #else 148 (uint64_t)0x80, /* alignment */ 149 #endif 150 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 151 (uint32_t)0x1, /* minimum transfer size */ 152 (uint64_t)0x7fffffff, /* maximum transfer size */ 153 #if defined sparc64 || defined __sparcv9 154 UINT64_MAX, /* maximum segment size */ 155 #else 156 (uint64_t)0xfff, /* maximum segment size */ 157 #endif 158 1, /* scatter/gather list length */ 159 1, /* granularity */ 160 0 /* attribute flags */ 161 }; 162 163 static ddi_dma_attr_t myri10ge_tx_dma_attr = { 164 DMA_ATTR_V0, /* version number. */ 165 (uint64_t)0, /* low address */ 166 (uint64_t)0xffffffffffffffffULL, /* high address */ 167 (uint64_t)0x7ffffff, /* address counter max */ 168 (uint64_t)1, /* alignment */ 169 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 170 (uint32_t)0x1, /* minimum transfer size */ 171 (uint64_t)0x7fffffff, /* maximum transfer size */ 172 UINT64_MAX, /* maximum segment size */ 173 INT32_MAX, /* scatter/gather list length */ 174 1, /* granularity */ 175 0 /* attribute flags */ 176 }; 177 178 #if defined sparc64 || defined __sparcv9 179 #define WC 0 180 #else 181 #define WC 1 182 #endif 183 184 struct ddi_device_acc_attr myri10ge_dev_access_attr = { 185 DDI_DEVICE_ATTR_V0, /* version */ 186 DDI_NEVERSWAP_ACC, /* endian flash */ 187 #if WC 188 DDI_MERGING_OK_ACC /* data order */ 189 #else 190 DDI_STRICTORDER_ACC 191 #endif 192 }; 193 194 static void myri10ge_watchdog(void *arg); 195 196 #ifdef MYRICOM_PRIV 197 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ; 198 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU 199 #else 200 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ; 201 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU 202 #endif 203 int myri10ge_bigbufs_initial = 1024; 204 int myri10ge_bigbufs_max = 4096; 205 206 207 caddr_t 208 myri10ge_dma_alloc(dev_info_t *dip, size_t len, 209 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr, 210 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma, 211 int warn, int (*wait)(caddr_t)) 212 { 213 caddr_t kaddr; 214 size_t real_length; 215 ddi_dma_cookie_t cookie; 216 uint_t count; 217 int err; 218 219 err = ddi_dma_alloc_handle(dip, attr, wait, 220 NULL, &dma->handle); 221 if (err != DDI_SUCCESS) { 222 if (warn) 223 cmn_err(CE_WARN, 224 "myri10ge: ddi_dma_alloc_handle failed\n"); 225 goto abort_with_nothing; 226 } 227 228 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags, 229 wait, NULL, &kaddr, &real_length, 230 &dma->acc_handle); 231 if (err != DDI_SUCCESS) { 232 if (warn) 233 cmn_err(CE_WARN, 234 "myri10ge: ddi_dma_mem_alloc failed\n"); 235 goto abort_with_handle; 236 } 237 238 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len, 239 bind_flags, wait, NULL, &cookie, &count); 240 241 if (err != DDI_SUCCESS) { 242 if (warn) 243 cmn_err(CE_WARN, 244 "myri10ge: ddi_dma_addr_bind_handle failed\n"); 245 goto abort_with_mem; 246 } 247 248 if (count != 1) { 249 if (warn) 250 cmn_err(CE_WARN, 251 "myri10ge: got too many dma segments "); 252 goto abort_with_bind; 253 } 254 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 255 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 256 return (kaddr); 257 258 abort_with_bind: 259 (void) ddi_dma_unbind_handle(dma->handle); 260 261 abort_with_mem: 262 ddi_dma_mem_free(&dma->acc_handle); 263 264 abort_with_handle: 265 ddi_dma_free_handle(&dma->handle); 266 abort_with_nothing: 267 if (warn) { 268 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n "); 269 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n", 270 (void*) dip, len, (void*) attr); 271 cmn_err(CE_WARN, 272 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n", 273 (void*) accattr, alloc_flags); 274 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p", 275 bind_flags, (void*) dma); 276 } 277 return (NULL); 278 279 } 280 281 void 282 myri10ge_dma_free(struct myri10ge_dma_stuff *dma) 283 { 284 (void) ddi_dma_unbind_handle(dma->handle); 285 ddi_dma_mem_free(&dma->acc_handle); 286 ddi_dma_free_handle(&dma->handle); 287 } 288 289 static inline void 290 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size) 291 { 292 register volatile uint32_t *to32; 293 size_t i; 294 295 to32 = (volatile uint32_t *) to; 296 for (i = (size / 4); i; i--) { 297 *to32 = *from32; 298 to32++; 299 from32++; 300 } 301 } 302 303 #if defined(_LP64) 304 static inline void 305 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size) 306 { 307 register volatile uint64_t *to64; 308 size_t i; 309 310 to64 = (volatile uint64_t *) to; 311 for (i = (size / 8); i; i--) { 312 *to64 = *from64; 313 to64++; 314 from64++; 315 } 316 } 317 #endif 318 319 /* 320 * This routine copies memory from the host to the NIC. 321 * The "size" argument must always be a multiple of 322 * the size of long (4 or 8 bytes), and to/from must also 323 * be naturally aligned. 324 */ 325 static inline void 326 myri10ge_pio_copy(void *to, void *from, size_t size) 327 { 328 #if !defined(_LP64) 329 ASSERT((size % 4) == 0); 330 myri10ge_pio_copy32(to, (uint32_t *)from, size); 331 #else 332 ASSERT((size % 8) == 0); 333 myri10ge_pio_copy64(to, (uint64_t *)from, size); 334 #endif 335 } 336 337 338 /* 339 * Due to various bugs in Solaris (especially bug 6186772 where the 340 * TCP/UDP checksum is calculated incorrectly on mblk chains with more 341 * than two elements), and the design bug where hardware checksums are 342 * ignored on mblk chains with more than 2 elements, we need to 343 * allocate private pool of physically contiguous receive buffers. 344 */ 345 346 static void 347 myri10ge_jpool_init(struct myri10ge_slice_state *ss) 348 { 349 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 350 351 bzero(jpool, sizeof (*jpool)); 352 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER, 353 ss->mgp->icookie); 354 jpool->head = NULL; 355 } 356 357 static void 358 myri10ge_jpool_fini(struct myri10ge_slice_state *ss) 359 { 360 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 361 362 if (jpool->head != NULL) { 363 cmn_err(CE_WARN, 364 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n", 365 ss->mgp->name); 366 } 367 mutex_destroy(&jpool->mtx); 368 } 369 370 371 /* 372 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 373 * at most 32 bytes at a time, so as to avoid involving the software 374 * pio handler in the nic. We re-write the first segment's low 375 * DMA address to mark it valid only after we write the entire chunk 376 * in a burst 377 */ 378 static inline void 379 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) 380 { 381 src->addr_low |= BE_32(1); 382 myri10ge_pio_copy(dst, src, 4 * sizeof (*src)); 383 mb(); 384 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 385 mb(); 386 src->addr_low &= ~(BE_32(1)); 387 dst->addr_low = src->addr_low; 388 mb(); 389 } 390 391 static void 392 myri10ge_pull_jpool(struct myri10ge_slice_state *ss) 393 { 394 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 395 struct myri10ge_jpool_entry *jtail, *j, *jfree; 396 volatile uintptr_t *putp; 397 uintptr_t put; 398 int i; 399 400 /* find tail */ 401 jtail = NULL; 402 if (jpool->head != NULL) { 403 j = jpool->head; 404 while (j->next != NULL) 405 j = j->next; 406 jtail = j; 407 } 408 409 /* 410 * iterate over all per-CPU caches, and add contents into 411 * jpool 412 */ 413 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) { 414 /* take per-CPU free list */ 415 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head; 416 if (*putp == NULL) 417 continue; 418 put = atomic_swap_ulong(putp, 0); 419 jfree = (struct myri10ge_jpool_entry *)put; 420 421 /* append to pool */ 422 if (jtail == NULL) { 423 jpool->head = jfree; 424 } else { 425 jtail->next = jfree; 426 } 427 j = jfree; 428 while (j->next != NULL) 429 j = j->next; 430 jtail = j; 431 } 432 } 433 434 /* 435 * Transfers buffers from the free pool to the nic 436 * Must be called holding the jpool mutex. 437 */ 438 439 static inline void 440 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss) 441 { 442 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 443 struct myri10ge_jpool_entry *j; 444 myri10ge_rx_ring_t *rx; 445 int i, idx, limit; 446 447 rx = &ss->rx_big; 448 limit = ss->j_rx_cnt + (rx->mask + 1); 449 450 for (i = rx->cnt; i != limit; i++) { 451 idx = i & (rx->mask); 452 j = jpool->head; 453 if (j == NULL) { 454 myri10ge_pull_jpool(ss); 455 j = jpool->head; 456 if (j == NULL) { 457 break; 458 } 459 } 460 jpool->head = j->next; 461 rx->info[idx].j = j; 462 rx->shadow[idx].addr_low = j->dma.low; 463 rx->shadow[idx].addr_high = j->dma.high; 464 /* copy 4 descriptors (32-bytes) to the mcp at a time */ 465 if ((idx & 7) == 7) { 466 myri10ge_submit_8rx(&rx->lanai[idx - 7], 467 &rx->shadow[idx - 7]); 468 } 469 } 470 rx->cnt = i; 471 } 472 473 /* 474 * Transfer buffers from the nic to the free pool. 475 * Should be called holding the jpool mutex 476 */ 477 478 static inline void 479 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss) 480 { 481 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 482 struct myri10ge_jpool_entry *j; 483 myri10ge_rx_ring_t *rx; 484 int i; 485 486 mutex_enter(&jpool->mtx); 487 rx = &ss->rx_big; 488 489 for (i = 0; i < rx->mask + 1; i++) { 490 j = rx->info[i].j; 491 rx->info[i].j = NULL; 492 if (j == NULL) 493 continue; 494 j->next = jpool->head; 495 jpool->head = j; 496 } 497 mutex_exit(&jpool->mtx); 498 499 } 500 501 502 /* 503 * Free routine which is called when the mblk allocated via 504 * esballoc() is freed. Here we return the jumbo buffer 505 * to the free pool, and possibly pass some jumbo buffers 506 * to the nic 507 */ 508 509 static void 510 myri10ge_jfree_rtn(void *arg) 511 { 512 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg; 513 struct myri10ge_jpool_stuff *jpool; 514 volatile uintptr_t *putp; 515 uintptr_t old, new; 516 517 jpool = &j->ss->jpool; 518 519 /* prepend buffer locklessly to per-CPU freelist */ 520 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head; 521 new = (uintptr_t)j; 522 do { 523 old = *putp; 524 j->next = (void *)old; 525 } while (atomic_cas_ulong(putp, old, new) != old); 526 } 527 528 static void 529 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j) 530 { 531 (void) ddi_dma_unbind_handle(j->dma_handle); 532 ddi_dma_mem_free(&j->acc_handle); 533 ddi_dma_free_handle(&j->dma_handle); 534 kmem_free(j, sizeof (*j)); 535 } 536 537 538 /* 539 * Allocates one physically contiguous descriptor 540 * and add it to the jumbo buffer pool. 541 */ 542 543 static int 544 myri10ge_add_jbuf(struct myri10ge_slice_state *ss) 545 { 546 struct myri10ge_jpool_entry *j; 547 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 548 ddi_dma_attr_t *rx_dma_attr; 549 size_t real_length; 550 ddi_dma_cookie_t cookie; 551 uint_t count; 552 int err; 553 554 if (myri10ge_mtu < 2048) 555 rx_dma_attr = &myri10ge_rx_std_dma_attr; 556 else 557 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr; 558 559 again: 560 j = (struct myri10ge_jpool_entry *) 561 kmem_alloc(sizeof (*j), KM_SLEEP); 562 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr, 563 DDI_DMA_DONTWAIT, NULL, &j->dma_handle); 564 if (err != DDI_SUCCESS) 565 goto abort_with_j; 566 567 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu, 568 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 569 NULL, &j->buf, &real_length, &j->acc_handle); 570 if (err != DDI_SUCCESS) 571 goto abort_with_handle; 572 573 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf, 574 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 575 NULL, &cookie, &count); 576 if (err != DDI_SUCCESS) 577 goto abort_with_mem; 578 579 /* 580 * Make certain std MTU buffers do not cross a 4KB boundary: 581 * 582 * Setting dma_attr_align=4096 will do this, but the system 583 * will only allocate 1 RX buffer per 4KB page, rather than 2. 584 * Setting dma_attr_granular=4096 *seems* to work around this, 585 * but I'm paranoid about future systems no longer honoring 586 * this, so fall back to the safe, but memory wasting way if a 587 * buffer crosses a 4KB boundary. 588 */ 589 590 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 591 rx_dma_attr->dma_attr_align != 4096) { 592 uint32_t start, end; 593 594 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 595 end = start + myri10ge_mtu; 596 if (((end >> 12) != (start >> 12)) && (start & 4095U)) { 597 printf("std buffer crossed a 4KB boundary!\n"); 598 myri10ge_remove_jbuf(j); 599 rx_dma_attr->dma_attr_align = 4096; 600 rx_dma_attr->dma_attr_seg = UINT64_MAX; 601 goto again; 602 } 603 } 604 605 j->dma.low = 606 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 607 j->dma.high = 608 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 609 j->ss = ss; 610 611 612 j->free_func.free_func = myri10ge_jfree_rtn; 613 j->free_func.free_arg = (char *)j; 614 mutex_enter(&jpool->mtx); 615 j->next = jpool->head; 616 jpool->head = j; 617 jpool->num_alloc++; 618 mutex_exit(&jpool->mtx); 619 return (0); 620 621 abort_with_mem: 622 ddi_dma_mem_free(&j->acc_handle); 623 624 abort_with_handle: 625 ddi_dma_free_handle(&j->dma_handle); 626 627 abort_with_j: 628 kmem_free(j, sizeof (*j)); 629 630 /* 631 * If an allocation failed, perhaps it failed because it could 632 * not satisfy granularity requirement. Disable that, and 633 * try agin. 634 */ 635 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 636 rx_dma_attr->dma_attr_align != 4096) { 637 cmn_err(CE_NOTE, 638 "!alloc failed, reverting to gran=1\n"); 639 rx_dma_attr->dma_attr_align = 4096; 640 rx_dma_attr->dma_attr_seg = UINT64_MAX; 641 goto again; 642 } 643 return (err); 644 } 645 646 static int 647 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool) 648 { 649 int i; 650 struct myri10ge_jpool_entry *j; 651 652 mutex_enter(&jpool->mtx); 653 j = jpool->head; 654 i = 0; 655 while (j != NULL) { 656 i++; 657 j = j->next; 658 } 659 mutex_exit(&jpool->mtx); 660 return (i); 661 } 662 663 static int 664 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total) 665 { 666 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 667 int allocated = 0; 668 int err; 669 int needed; 670 671 /* 672 * if total is set, user wants "num" jbufs in the pool, 673 * otherwise the user wants to "num" additional jbufs 674 * added to the pool 675 */ 676 if (total && jpool->num_alloc) { 677 allocated = myri10ge_jfree_cnt(jpool); 678 needed = num - allocated; 679 } else { 680 needed = num; 681 } 682 683 while (needed > 0) { 684 needed--; 685 err = myri10ge_add_jbuf(ss); 686 if (err == 0) { 687 allocated++; 688 } 689 } 690 return (allocated); 691 } 692 693 static void 694 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss) 695 { 696 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 697 struct myri10ge_jpool_entry *j; 698 699 mutex_enter(&jpool->mtx); 700 myri10ge_pull_jpool(ss); 701 while (jpool->head != NULL) { 702 jpool->num_alloc--; 703 j = jpool->head; 704 jpool->head = j->next; 705 myri10ge_remove_jbuf(j); 706 } 707 mutex_exit(&jpool->mtx); 708 } 709 710 static void 711 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss) 712 { 713 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 714 struct myri10ge_jpool_entry *j = NULL; 715 caddr_t ptr; 716 uint32_t dma_low, dma_high; 717 int idx, len; 718 unsigned int alloc_size; 719 720 dma_low = dma_high = len = 0; 721 alloc_size = myri10ge_small_bytes + MXGEFW_PAD; 722 ptr = NULL; 723 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) { 724 /* Allocate a jumbo frame and carve it into small frames */ 725 if (len < alloc_size) { 726 mutex_enter(&jpool->mtx); 727 /* remove jumbo from freelist */ 728 j = jpool->head; 729 jpool->head = j->next; 730 /* place it onto small list */ 731 j->next = ss->small_jpool; 732 ss->small_jpool = j; 733 mutex_exit(&jpool->mtx); 734 len = myri10ge_mtu; 735 dma_low = ntohl(j->dma.low); 736 dma_high = ntohl(j->dma.high); 737 ptr = j->buf; 738 } 739 ss->rx_small.info[idx].ptr = ptr; 740 ss->rx_small.shadow[idx].addr_low = htonl(dma_low); 741 ss->rx_small.shadow[idx].addr_high = htonl(dma_high); 742 len -= alloc_size; 743 ptr += alloc_size; 744 dma_low += alloc_size; 745 } 746 } 747 748 /* 749 * Return the jumbo bufs we carved up for small to the jumbo pool 750 */ 751 752 static void 753 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss) 754 { 755 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 756 struct myri10ge_jpool_entry *j = NULL; 757 758 mutex_enter(&jpool->mtx); 759 while (ss->small_jpool != NULL) { 760 j = ss->small_jpool; 761 ss->small_jpool = j->next; 762 j->next = jpool->head; 763 jpool->head = j; 764 } 765 mutex_exit(&jpool->mtx); 766 ss->jbufs_for_smalls = 0; 767 } 768 769 static int 770 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss) 771 { 772 myri10ge_tx_ring_t *tx = &ss->tx; 773 struct myri10ge_priv *mgp = ss->mgp; 774 struct myri10ge_tx_dma_handle *handle; 775 int err; 776 777 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP); 778 err = ddi_dma_alloc_handle(mgp->dip, 779 &myri10ge_tx_dma_attr, 780 DDI_DMA_SLEEP, NULL, 781 &handle->h); 782 if (err) { 783 static int limit = 0; 784 if (limit == 0) 785 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n", 786 mgp->name); 787 limit++; 788 kmem_free(handle, sizeof (*handle)); 789 return (err); 790 } 791 mutex_enter(&tx->handle_lock); 792 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced); 793 handle->next = tx->free_tx_handles; 794 tx->free_tx_handles = handle; 795 mutex_exit(&tx->handle_lock); 796 return (DDI_SUCCESS); 797 } 798 799 static void 800 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss) 801 { 802 myri10ge_tx_ring_t *tx = &ss->tx; 803 struct myri10ge_tx_dma_handle *handle; 804 mutex_enter(&tx->handle_lock); 805 806 handle = tx->free_tx_handles; 807 while (handle != NULL) { 808 tx->free_tx_handles = handle->next; 809 ddi_dma_free_handle(&handle->h); 810 kmem_free(handle, sizeof (*handle)); 811 handle = tx->free_tx_handles; 812 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced); 813 } 814 mutex_exit(&tx->handle_lock); 815 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) { 816 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n", 817 ss->mgp->name, 818 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced)); 819 } 820 } 821 822 static void 823 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx, 824 struct myri10ge_tx_dma_handle_head *list) 825 { 826 mutex_enter(&tx->handle_lock); 827 list->tail->next = tx->free_tx_handles; 828 tx->free_tx_handles = list->head; 829 mutex_exit(&tx->handle_lock); 830 } 831 832 static void 833 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx, 834 struct myri10ge_tx_dma_handle *handle) 835 { 836 struct myri10ge_tx_dma_handle_head list; 837 838 if (handle == NULL) 839 return; 840 list.head = handle; 841 list.tail = handle; 842 while (handle != NULL) { 843 list.tail = handle; 844 handle = handle->next; 845 } 846 myri10ge_free_tx_handles(tx, &list); 847 } 848 849 static int 850 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count, 851 struct myri10ge_tx_dma_handle **ret) 852 { 853 myri10ge_tx_ring_t *tx = &ss->tx; 854 struct myri10ge_tx_dma_handle *handle; 855 int err, i; 856 857 mutex_enter(&tx->handle_lock); 858 for (i = 0; i < count; i++) { 859 handle = tx->free_tx_handles; 860 while (handle == NULL) { 861 mutex_exit(&tx->handle_lock); 862 err = myri10ge_add_tx_handle(ss); 863 if (err != DDI_SUCCESS) { 864 goto abort_with_handles; 865 } 866 mutex_enter(&tx->handle_lock); 867 handle = tx->free_tx_handles; 868 } 869 tx->free_tx_handles = handle->next; 870 handle->next = *ret; 871 *ret = handle; 872 } 873 mutex_exit(&tx->handle_lock); 874 return (DDI_SUCCESS); 875 876 abort_with_handles: 877 myri10ge_free_tx_handle_slist(tx, *ret); 878 return (err); 879 } 880 881 882 /* 883 * Frees DMA resources associated with the send ring 884 */ 885 static void 886 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss) 887 { 888 myri10ge_tx_ring_t *tx; 889 struct myri10ge_tx_dma_handle_head handles; 890 size_t bytes; 891 int idx; 892 893 tx = &ss->tx; 894 handles.head = NULL; 895 handles.tail = NULL; 896 for (idx = 0; idx < ss->tx.mask + 1; idx++) { 897 if (tx->info[idx].m) { 898 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 899 handles.head = tx->info[idx].handle; 900 if (handles.tail == NULL) 901 handles.tail = tx->info[idx].handle; 902 freeb(tx->info[idx].m); 903 tx->info[idx].m = 0; 904 tx->info[idx].handle = 0; 905 } 906 tx->cp[idx].va = NULL; 907 myri10ge_dma_free(&tx->cp[idx].dma); 908 } 909 bytes = sizeof (*tx->cp) * (tx->mask + 1); 910 kmem_free(tx->cp, bytes); 911 tx->cp = NULL; 912 if (handles.head != NULL) 913 myri10ge_free_tx_handles(tx, &handles); 914 myri10ge_remove_tx_handles(ss); 915 } 916 917 /* 918 * Allocates DMA handles associated with the send ring 919 */ 920 static inline int 921 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss) 922 { 923 struct myri10ge_tx_dma_handle *handles; 924 int h; 925 size_t bytes; 926 927 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 928 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP); 929 if (ss->tx.cp == NULL) { 930 cmn_err(CE_WARN, 931 "%s: Failed to allocate tx copyblock storage\n", 932 ss->mgp->name); 933 return (DDI_FAILURE); 934 } 935 936 937 /* allocate the TX copyblocks */ 938 for (h = 0; h < ss->tx.mask + 1; h++) { 939 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip, 940 4096, &myri10ge_rx_jumbo_dma_attr, 941 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 942 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1, 943 DDI_DMA_DONTWAIT); 944 if (ss->tx.cp[h].va == NULL) { 945 cmn_err(CE_WARN, "%s: Failed to allocate tx " 946 "copyblock %d\n", ss->mgp->name, h); 947 goto abort_with_copyblocks; 948 } 949 } 950 /* pre-allocate transmit handles */ 951 handles = NULL; 952 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial, 953 &handles); 954 if (handles != NULL) 955 myri10ge_free_tx_handle_slist(&ss->tx, handles); 956 957 return (DDI_SUCCESS); 958 959 abort_with_copyblocks: 960 while (h > 0) { 961 h--; 962 myri10ge_dma_free(&ss->tx.cp[h].dma); 963 } 964 965 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 966 kmem_free(ss->tx.cp, bytes); 967 ss->tx.cp = NULL; 968 return (DDI_FAILURE); 969 } 970 971 /* 972 * The eeprom strings on the lanaiX have the format 973 * SN=x\0 974 * MAC=x:x:x:x:x:x\0 975 * PT:ddd mmm xx xx:xx:xx xx\0 976 * PV:ddd mmm xx xx:xx:xx xx\0 977 */ 978 static int 979 myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 980 { 981 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++) 982 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \ 983 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \ 984 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1))) 985 986 char *ptr, *limit; 987 int i, hv, lv; 988 989 ptr = mgp->eeprom_strings; 990 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 991 992 while (*ptr != '\0' && ptr < limit) { 993 if (memcmp(ptr, "MAC=", 4) == 0) { 994 ptr += 4; 995 if (myri10ge_verbose) 996 printf("%s: mac address = %s\n", mgp->name, 997 ptr); 998 mgp->mac_addr_string = ptr; 999 for (i = 0; i < 6; i++) { 1000 if ((ptr + 2) > limit) 1001 goto abort; 1002 1003 if (*(ptr+1) == ':') { 1004 hv = 0; 1005 lv = myri10ge_digit(*ptr); ptr++; 1006 } else { 1007 hv = myri10ge_digit(*ptr); ptr++; 1008 lv = myri10ge_digit(*ptr); ptr++; 1009 } 1010 mgp->mac_addr[i] = (hv << 4) | lv; 1011 ptr++; 1012 } 1013 } 1014 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 1015 ptr += 3; 1016 mgp->sn_str = (char *)ptr; 1017 } 1018 if (memcmp((const void *)ptr, "PC=", 3) == 0) { 1019 ptr += 3; 1020 mgp->pc_str = (char *)ptr; 1021 } 1022 MYRI10GE_NEXT_STRING(ptr); 1023 } 1024 1025 return (0); 1026 1027 abort: 1028 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name); 1029 return (ENXIO); 1030 } 1031 1032 1033 /* 1034 * Determine the register set containing the PCI resource we 1035 * want to map: the memory-mappable part of the interface. We do 1036 * this by scanning the DDI "reg" property of the interface, 1037 * which is an array of mx_ddi_reg_set structures. 1038 */ 1039 static int 1040 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span, 1041 unsigned long *busno, unsigned long *devno, 1042 unsigned long *funcno) 1043 { 1044 1045 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff) 1046 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07) 1047 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f) 1048 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff) 1049 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03) 1050 #define PCI_ADDR_HIGH(ip) (ip[1]) 1051 #define PCI_ADDR_LOW(ip) (ip[2]) 1052 #define PCI_SPAN_HIGH(ip) (ip[3]) 1053 #define PCI_SPAN_LOW(ip) (ip[4]) 1054 1055 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2 1056 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3 1057 1058 int *data, i, *rs; 1059 uint32_t nelementsp; 1060 1061 #ifdef MYRI10GE_REGSET_VERBOSE 1062 char *address_space_name[] = { "Configuration Space", 1063 "I/O Space", 1064 "32-bit Memory Space", 1065 "64-bit Memory Space" 1066 }; 1067 #endif 1068 1069 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1070 "reg", &data, &nelementsp) != DDI_SUCCESS) { 1071 printf("Could not determine register set.\n"); 1072 return (ENXIO); 1073 } 1074 1075 #ifdef MYRI10GE_REGSET_VERBOSE 1076 printf("There are %d register sets.\n", nelementsp / 5); 1077 #endif 1078 if (!nelementsp) { 1079 printf("Didn't find any \"reg\" properties.\n"); 1080 ddi_prop_free(data); 1081 return (ENODEV); 1082 } 1083 1084 /* Scan for the register number. */ 1085 rs = &data[0]; 1086 *busno = BUS_NUMBER(rs); 1087 *devno = DEVICE_NUMBER(rs); 1088 *funcno = FUNCTION_NUMBER(rs); 1089 1090 #ifdef MYRI10GE_REGSET_VERBOSE 1091 printf("*** Scanning for register number.\n"); 1092 #endif 1093 for (i = 0; i < nelementsp / 5; i++) { 1094 rs = &data[5 * i]; 1095 #ifdef MYRI10GE_REGSET_VERBOSE 1096 printf("Examining register set %d:\n", i); 1097 printf(" Register number = %d.\n", REGISTER_NUMBER(rs)); 1098 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs)); 1099 printf(" Device number = %d.\n", DEVICE_NUMBER(rs)); 1100 printf(" Bus number = %d.\n", BUS_NUMBER(rs)); 1101 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs), 1102 address_space_name[ADDRESS_SPACE(rs)]); 1103 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs), 1104 PCI_ADDR_LOW(rs)); 1105 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs), 1106 PCI_SPAN_LOW(rs)); 1107 #endif 1108 /* We are looking for a memory property. */ 1109 1110 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE || 1111 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) { 1112 *reg_set = i; 1113 1114 #ifdef MYRI10GE_REGSET_VERBOSE 1115 printf("%s uses register set %d.\n", 1116 address_space_name[ADDRESS_SPACE(rs)], *reg_set); 1117 #endif 1118 1119 *span = (PCI_SPAN_LOW(rs)); 1120 #ifdef MYRI10GE_REGSET_VERBOSE 1121 printf("Board span is 0x%x\n", *span); 1122 #endif 1123 break; 1124 } 1125 } 1126 1127 ddi_prop_free(data); 1128 1129 /* If no match, fail. */ 1130 if (i >= nelementsp / 5) { 1131 return (EIO); 1132 } 1133 1134 return (0); 1135 } 1136 1137 1138 static int 1139 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit) 1140 { 1141 void *inflate_buffer; 1142 int rv, status; 1143 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE; 1144 size_t destlen; 1145 mcp_gen_header_t *hdr; 1146 unsigned hdr_offset, i; 1147 1148 1149 *limit = 0; /* -Wuninitialized */ 1150 status = 0; 1151 1152 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP); 1153 if (!inflate_buffer) { 1154 cmn_err(CE_WARN, 1155 "%s: Could not allocate buffer to inflate mcp\n", 1156 mgp->name); 1157 return (ENOMEM); 1158 } 1159 1160 destlen = sram_size; 1161 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e, 1162 mgp->eth_z8e_length); 1163 1164 if (rv != Z_OK) { 1165 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n", 1166 mgp->name, z_strerror(rv)); 1167 status = ENXIO; 1168 goto abort; 1169 } 1170 1171 *limit = (uint32_t)destlen; 1172 1173 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer + 1174 MCP_HEADER_PTR_OFFSET)); 1175 hdr = (void *)((char *)inflate_buffer + hdr_offset); 1176 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 1177 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name, 1178 ntohl(hdr->mcp_type)); 1179 status = EIO; 1180 goto abort; 1181 } 1182 1183 /* save firmware version for kstat */ 1184 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version)); 1185 if (myri10ge_verbose) 1186 printf("%s: firmware id: %s\n", mgp->name, hdr->version); 1187 1188 /* Copy the inflated firmware to NIC SRAM. */ 1189 for (i = 0; i < *limit; i += 256) { 1190 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i, 1191 (char *)inflate_buffer + i, 1192 min(256U, (unsigned)(*limit - i))); 1193 mb(); 1194 (void) *(int *)(void *)mgp->sram; 1195 mb(); 1196 } 1197 1198 abort: 1199 kmem_free(inflate_buffer, sram_size); 1200 1201 return (status); 1202 1203 } 1204 1205 1206 int 1207 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd, 1208 myri10ge_cmd_t *data) 1209 { 1210 mcp_cmd_t *buf; 1211 char buf_bytes[sizeof (*buf) + 8]; 1212 volatile mcp_cmd_response_t *response = mgp->cmd; 1213 volatile char *cmd_addr = 1214 (volatile char *)mgp->sram + MXGEFW_ETH_CMD; 1215 int sleep_total = 0; 1216 1217 /* ensure buf is aligned to 8 bytes */ 1218 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1219 1220 buf->data0 = htonl(data->data0); 1221 buf->data1 = htonl(data->data1); 1222 buf->data2 = htonl(data->data2); 1223 buf->cmd = htonl(cmd); 1224 buf->response_addr.low = mgp->cmd_dma.low; 1225 buf->response_addr.high = mgp->cmd_dma.high; 1226 mutex_enter(&mgp->cmd_lock); 1227 response->result = 0xffffffff; 1228 mb(); 1229 1230 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf)); 1231 1232 /* wait up to 20ms */ 1233 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 1234 mb(); 1235 if (response->result != 0xffffffff) { 1236 if (response->result == 0) { 1237 data->data0 = ntohl(response->data); 1238 mutex_exit(&mgp->cmd_lock); 1239 return (0); 1240 } else if (ntohl(response->result) 1241 == MXGEFW_CMD_UNKNOWN) { 1242 mutex_exit(&mgp->cmd_lock); 1243 return (ENOSYS); 1244 } else if (ntohl(response->result) 1245 == MXGEFW_CMD_ERROR_UNALIGNED) { 1246 mutex_exit(&mgp->cmd_lock); 1247 return (E2BIG); 1248 } else { 1249 cmn_err(CE_WARN, 1250 "%s: command %d failed, result = %d\n", 1251 mgp->name, cmd, ntohl(response->result)); 1252 mutex_exit(&mgp->cmd_lock); 1253 return (ENXIO); 1254 } 1255 } 1256 drv_usecwait(1000); 1257 } 1258 mutex_exit(&mgp->cmd_lock); 1259 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n", 1260 mgp->name, cmd, ntohl(response->result)); 1261 return (EAGAIN); 1262 } 1263 1264 /* 1265 * Enable or disable periodic RDMAs from the host to make certain 1266 * chipsets resend dropped PCIe messages 1267 */ 1268 1269 static void 1270 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 1271 { 1272 char buf_bytes[72]; 1273 volatile uint32_t *confirm; 1274 volatile char *submit; 1275 uint32_t *buf; 1276 int i; 1277 1278 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1279 1280 /* clear confirmation addr */ 1281 confirm = (volatile uint32_t *)mgp->cmd; 1282 *confirm = 0; 1283 mb(); 1284 1285 /* 1286 * send an rdma command to the PCIe engine, and wait for the 1287 * response in the confirmation address. The firmware should 1288 * write a -1 there to indicate it is alive and well 1289 */ 1290 1291 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1292 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1293 buf[2] = htonl(0xffffffff); /* confirm data */ 1294 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */ 1295 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */ 1296 buf[5] = htonl(enable); /* enable? */ 1297 1298 1299 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA); 1300 1301 myri10ge_pio_copy((char *)submit, buf, 64); 1302 mb(); 1303 drv_usecwait(1000); 1304 mb(); 1305 i = 0; 1306 while (*confirm != 0xffffffff && i < 20) { 1307 drv_usecwait(1000); 1308 i++; 1309 } 1310 if (*confirm != 0xffffffff) { 1311 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)", 1312 mgp->name, 1313 (enable ? "enable" : "disable"), (void*) confirm, *confirm); 1314 } 1315 } 1316 1317 static int 1318 myri10ge_load_firmware(struct myri10ge_priv *mgp) 1319 { 1320 myri10ge_cmd_t cmd; 1321 volatile uint32_t *confirm; 1322 volatile char *submit; 1323 char buf_bytes[72]; 1324 uint32_t *buf, size; 1325 int status, i; 1326 1327 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1328 1329 status = myri10ge_load_firmware_from_zlib(mgp, &size); 1330 if (status) { 1331 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name); 1332 return (status); 1333 } 1334 1335 /* clear confirmation addr */ 1336 confirm = (volatile uint32_t *)mgp->cmd; 1337 *confirm = 0; 1338 mb(); 1339 1340 /* 1341 * send a reload command to the bootstrap MCP, and wait for the 1342 * response in the confirmation address. The firmware should 1343 * write a -1 there to indicate it is alive and well 1344 */ 1345 1346 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1347 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1348 buf[2] = htonl(0xffffffff); /* confirm data */ 1349 1350 /* 1351 * FIX: All newest firmware should un-protect the bottom of 1352 * the sram before handoff. However, the very first interfaces 1353 * do not. Therefore the handoff copy must skip the first 8 bytes 1354 */ 1355 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 1356 buf[4] = htonl(size - 8); /* length of code */ 1357 buf[5] = htonl(8); /* where to copy to */ 1358 buf[6] = htonl(0); /* where to jump to */ 1359 1360 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF); 1361 1362 myri10ge_pio_copy((char *)submit, buf, 64); 1363 mb(); 1364 drv_usecwait(1000); 1365 mb(); 1366 i = 0; 1367 while (*confirm != 0xffffffff && i < 1000) { 1368 drv_usecwait(1000); 1369 i++; 1370 } 1371 if (*confirm != 0xffffffff) { 1372 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)", 1373 mgp->name, (void *) confirm, *confirm); 1374 1375 return (ENXIO); 1376 } 1377 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1378 if (status != 0) { 1379 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n", 1380 mgp->name); 1381 return (ENXIO); 1382 } 1383 1384 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 1385 myri10ge_dummy_rdma(mgp, 1); 1386 return (0); 1387 } 1388 1389 static int 1390 myri10ge_m_unicst(void *arg, const uint8_t *addr) 1391 { 1392 struct myri10ge_priv *mgp = arg; 1393 myri10ge_cmd_t cmd; 1394 int status; 1395 1396 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1397 | (addr[2] << 8) | addr[3]); 1398 1399 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1400 1401 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd); 1402 if (status == 0 && (addr != mgp->mac_addr)) 1403 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr)); 1404 1405 return (status); 1406 } 1407 1408 static int 1409 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 1410 { 1411 myri10ge_cmd_t cmd; 1412 int status; 1413 1414 if (pause) 1415 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL, 1416 &cmd); 1417 else 1418 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL, 1419 &cmd); 1420 1421 if (status) { 1422 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n", 1423 mgp->name); 1424 return (ENXIO); 1425 } 1426 mgp->pause = pause; 1427 return (0); 1428 } 1429 1430 static void 1431 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc) 1432 { 1433 myri10ge_cmd_t cmd; 1434 int status; 1435 1436 if (promisc) 1437 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd); 1438 else 1439 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd); 1440 1441 if (status) { 1442 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n", 1443 mgp->name); 1444 } 1445 } 1446 1447 static int 1448 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 1449 { 1450 myri10ge_cmd_t cmd; 1451 int status; 1452 uint32_t len; 1453 void *dmabench; 1454 struct myri10ge_dma_stuff dmabench_dma; 1455 char *test = " "; 1456 1457 /* 1458 * Run a small DMA test. 1459 * The magic multipliers to the length tell the firmware 1460 * tp do DMA read, write, or read+write tests. The 1461 * results are returned in cmd.data0. The upper 16 1462 * bits or the return is the number of transfers completed. 1463 * The lower 16 bits is the time in 0.5us ticks that the 1464 * transfers took to complete 1465 */ 1466 1467 len = mgp->tx_boundary; 1468 1469 dmabench = myri10ge_dma_alloc(mgp->dip, len, 1470 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr, 1471 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING, 1472 &dmabench_dma, 1, DDI_DMA_DONTWAIT); 1473 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0; 1474 if (dmabench == NULL) { 1475 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name); 1476 return (ENOMEM); 1477 } 1478 1479 cmd.data0 = ntohl(dmabench_dma.low); 1480 cmd.data1 = ntohl(dmabench_dma.high); 1481 cmd.data2 = len * 0x10000; 1482 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1483 if (status != 0) { 1484 test = "read"; 1485 goto abort; 1486 } 1487 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1488 1489 cmd.data0 = ntohl(dmabench_dma.low); 1490 cmd.data1 = ntohl(dmabench_dma.high); 1491 cmd.data2 = len * 0x1; 1492 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1493 if (status != 0) { 1494 test = "write"; 1495 goto abort; 1496 } 1497 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1498 1499 cmd.data0 = ntohl(dmabench_dma.low); 1500 cmd.data1 = ntohl(dmabench_dma.high); 1501 cmd.data2 = len * 0x10001; 1502 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1503 if (status != 0) { 1504 test = "read/write"; 1505 goto abort; 1506 } 1507 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 1508 (cmd.data0 & 0xffff); 1509 1510 1511 abort: 1512 myri10ge_dma_free(&dmabench_dma); 1513 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 1514 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name, 1515 test); 1516 return (status); 1517 } 1518 1519 static int 1520 myri10ge_reset(struct myri10ge_priv *mgp) 1521 { 1522 myri10ge_cmd_t cmd; 1523 struct myri10ge_nic_stat *ethstat; 1524 struct myri10ge_slice_state *ss; 1525 int i, status; 1526 size_t bytes; 1527 1528 /* send a reset command to the card to see if it is alive */ 1529 (void) memset(&cmd, 0, sizeof (cmd)); 1530 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 1531 if (status != 0) { 1532 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 1533 return (ENXIO); 1534 } 1535 1536 /* Now exchange information about interrupts */ 1537 1538 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry); 1539 cmd.data0 = (uint32_t)bytes; 1540 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1541 1542 /* 1543 * Even though we already know how many slices are supported 1544 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 1545 * has magic side effects, and must be called after a reset. 1546 * It must be called prior to calling any RSS related cmds, 1547 * including assigning an interrupt queue for anything but 1548 * slice 0. It must also be called *after* 1549 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1550 * the firmware to compute offsets. 1551 */ 1552 1553 if (mgp->num_slices > 1) { 1554 1555 /* ask the maximum number of slices it supports */ 1556 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1557 &cmd); 1558 if (status != 0) { 1559 cmn_err(CE_WARN, 1560 "%s: failed to get number of slices\n", 1561 mgp->name); 1562 return (status); 1563 } 1564 1565 /* 1566 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1567 * to setting up the interrupt queue DMA 1568 */ 1569 1570 cmd.data0 = mgp->num_slices; 1571 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE | 1572 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1573 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1574 &cmd); 1575 if (status != 0) { 1576 cmn_err(CE_WARN, 1577 "%s: failed to set number of slices\n", 1578 mgp->name); 1579 return (status); 1580 } 1581 } 1582 for (i = 0; i < mgp->num_slices; i++) { 1583 ss = &mgp->ss[i]; 1584 cmd.data0 = ntohl(ss->rx_done.dma.low); 1585 cmd.data1 = ntohl(ss->rx_done.dma.high); 1586 cmd.data2 = i; 1587 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1588 &cmd); 1589 }; 1590 1591 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1592 for (i = 0; i < mgp->num_slices; i++) { 1593 ss = &mgp->ss[i]; 1594 ss->irq_claim = (volatile unsigned int *) 1595 (void *)(mgp->sram + cmd.data0 + 8 * i); 1596 } 1597 1598 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 1599 status |= myri10ge_send_cmd(mgp, 1600 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1601 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1602 } 1603 1604 status |= myri10ge_send_cmd(mgp, 1605 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1606 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1607 1608 if (status != 0) { 1609 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n", 1610 mgp->name); 1611 return (status); 1612 } 1613 1614 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 1615 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 1616 1617 /* reset mcp/driver shared state back to 0 */ 1618 1619 for (i = 0; i < mgp->num_slices; i++) { 1620 ss = &mgp->ss[i]; 1621 bytes = mgp->max_intr_slots * 1622 sizeof (*mgp->ss[0].rx_done.entry); 1623 (void) memset(ss->rx_done.entry, 0, bytes); 1624 ss->tx.req = 0; 1625 ss->tx.done = 0; 1626 ss->tx.pkt_done = 0; 1627 ss->rx_big.cnt = 0; 1628 ss->rx_small.cnt = 0; 1629 ss->rx_done.idx = 0; 1630 ss->rx_done.cnt = 0; 1631 ss->rx_token = 0; 1632 ss->tx.watchdog_done = 0; 1633 ss->tx.watchdog_req = 0; 1634 ss->tx.active = 0; 1635 ss->tx.activate = 0; 1636 } 1637 mgp->watchdog_rx_pause = 0; 1638 if (mgp->ksp_stat != NULL) { 1639 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data; 1640 ethstat->link_changes.value.ul = 0; 1641 } 1642 status = myri10ge_m_unicst(mgp, mgp->mac_addr); 1643 myri10ge_change_promisc(mgp, 0); 1644 (void) myri10ge_change_pause(mgp, mgp->pause); 1645 return (status); 1646 } 1647 1648 static int 1649 myri10ge_init_toeplitz(struct myri10ge_priv *mgp) 1650 { 1651 myri10ge_cmd_t cmd; 1652 int i, b, s, t, j; 1653 int status; 1654 uint32_t k[8]; 1655 uint32_t tmp; 1656 uint8_t *key; 1657 1658 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 1659 &cmd); 1660 if (status != 0) { 1661 cmn_err(CE_WARN, "%s: failed to get rss key\n", 1662 mgp->name); 1663 return (EIO); 1664 } 1665 myri10ge_pio_copy32(mgp->rss_key, 1666 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0), 1667 sizeof (mgp->rss_key)); 1668 1669 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256, 1670 KM_SLEEP); 1671 key = (uint8_t *)mgp->rss_key; 1672 t = 0; 1673 for (b = 0; b < 12; b++) { 1674 for (s = 0; s < 8; s++) { 1675 /* Bits: b*8+s, ..., b*8+s+31 */ 1676 k[s] = 0; 1677 for (j = 0; j < 32; j++) { 1678 int bit = b*8+s+j; 1679 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7))); 1680 k[s] |= bit << (31 - j); 1681 } 1682 } 1683 1684 for (i = 0; i <= 0xff; i++) { 1685 tmp = 0; 1686 if (i & (1 << 7)) { tmp ^= k[0]; } 1687 if (i & (1 << 6)) { tmp ^= k[1]; } 1688 if (i & (1 << 5)) { tmp ^= k[2]; } 1689 if (i & (1 << 4)) { tmp ^= k[3]; } 1690 if (i & (1 << 3)) { tmp ^= k[4]; } 1691 if (i & (1 << 2)) { tmp ^= k[5]; } 1692 if (i & (1 << 1)) { tmp ^= k[6]; } 1693 if (i & (1 << 0)) { tmp ^= k[7]; } 1694 mgp->toeplitz_hash_table[t++] = tmp; 1695 } 1696 } 1697 return (0); 1698 } 1699 1700 static inline struct myri10ge_slice_state * 1701 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1702 { 1703 struct tcphdr *hdr; 1704 uint32_t saddr, daddr; 1705 uint32_t hash, slice; 1706 uint32_t *table = mgp->toeplitz_hash_table; 1707 uint16_t src, dst; 1708 1709 /* 1710 * Note hashing order is reversed from how it is done 1711 * in the NIC, so as to generate the same hash value 1712 * for the connection to try to keep connections CPU local 1713 */ 1714 1715 /* hash on IPv4 src/dst address */ 1716 saddr = ntohl(ip->ip_src.s_addr); 1717 daddr = ntohl(ip->ip_dst.s_addr); 1718 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)]; 1719 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)]; 1720 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)]; 1721 hash ^= table[(256 * 3) + ((daddr) & 0xff)]; 1722 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)]; 1723 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)]; 1724 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)]; 1725 hash ^= table[(256 * 7) + ((saddr) & 0xff)]; 1726 /* hash on TCP port, if required */ 1727 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) && 1728 ip->ip_p == IPPROTO_TCP) { 1729 hdr = (struct tcphdr *)(void *) 1730 (((uint8_t *)ip) + (ip->ip_hl << 2)); 1731 src = ntohs(hdr->th_sport); 1732 dst = ntohs(hdr->th_dport); 1733 1734 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)]; 1735 hash ^= table[(256 * 9) + ((dst) & 0xff)]; 1736 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)]; 1737 hash ^= table[(256 * 11) + ((src) & 0xff)]; 1738 } 1739 slice = (mgp->num_slices - 1) & hash; 1740 return (&mgp->ss[slice]); 1741 1742 } 1743 1744 static inline struct myri10ge_slice_state * 1745 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1746 { 1747 struct tcphdr *hdr; 1748 uint32_t slice, hash_val; 1749 1750 1751 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) { 1752 return (&mgp->ss[0]); 1753 } 1754 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2)); 1755 1756 /* 1757 * Use the second byte of the *destination* address for 1758 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing 1759 */ 1760 hash_val = ntohs(hdr->th_dport) & 0xff; 1761 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT) 1762 hash_val += ntohs(hdr->th_sport) & 0xff; 1763 1764 slice = (mgp->num_slices - 1) & hash_val; 1765 return (&mgp->ss[slice]); 1766 } 1767 1768 static inline struct myri10ge_slice_state * 1769 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp) 1770 { 1771 unsigned int slice = 0; 1772 struct ether_header *eh; 1773 struct ether_vlan_header *vh; 1774 struct ip *ip; 1775 int ehl, ihl; 1776 1777 if (mgp->num_slices == 1) 1778 return (&mgp->ss[0]); 1779 1780 if (myri10ge_tx_hash == 0) { 1781 slice = CPU->cpu_id & (mgp->num_slices - 1); 1782 return (&mgp->ss[slice]); 1783 } 1784 1785 /* 1786 * ensure it is a TCP or UDP over IPv4 packet, and that the 1787 * headers are in the 1st mblk. Otherwise, punt 1788 */ 1789 ehl = sizeof (*eh); 1790 ihl = sizeof (*ip); 1791 if ((MBLKL(mp)) < (ehl + ihl + 8)) 1792 return (&mgp->ss[0]); 1793 eh = (struct ether_header *)(void *)mp->b_rptr; 1794 ip = (struct ip *)(void *)(eh + 1); 1795 if (eh->ether_type != BE_16(ETHERTYPE_IP)) { 1796 if (eh->ether_type != BE_16(ETHERTYPE_VLAN)) 1797 return (&mgp->ss[0]); 1798 vh = (struct ether_vlan_header *)(void *)mp->b_rptr; 1799 if (vh->ether_type != BE_16(ETHERTYPE_IP)) 1800 return (&mgp->ss[0]); 1801 ehl += 4; 1802 ip = (struct ip *)(void *)(vh + 1); 1803 } 1804 ihl = ip->ip_hl << 2; 1805 if (MBLKL(mp) < (ehl + ihl + 8)) 1806 return (&mgp->ss[0]); 1807 switch (myri10ge_rss_hash) { 1808 case MXGEFW_RSS_HASH_TYPE_IPV4: 1809 /* fallthru */ 1810 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4: 1811 /* fallthru */ 1812 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4): 1813 return (myri10ge_toeplitz_send_hash(mgp, ip)); 1814 case MXGEFW_RSS_HASH_TYPE_SRC_PORT: 1815 /* fallthru */ 1816 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT: 1817 return (myri10ge_simple_send_hash(mgp, ip)); 1818 default: 1819 break; 1820 } 1821 return (&mgp->ss[0]); 1822 } 1823 1824 static int 1825 myri10ge_setup_slice(struct myri10ge_slice_state *ss) 1826 { 1827 struct myri10ge_priv *mgp = ss->mgp; 1828 myri10ge_cmd_t cmd; 1829 int tx_ring_size, rx_ring_size; 1830 int tx_ring_entries, rx_ring_entries; 1831 int slice, status; 1832 int allocated, idx; 1833 size_t bytes; 1834 1835 slice = ss - mgp->ss; 1836 cmd.data0 = slice; 1837 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 1838 tx_ring_size = cmd.data0; 1839 cmd.data0 = slice; 1840 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1841 if (status != 0) 1842 return (status); 1843 rx_ring_size = cmd.data0; 1844 1845 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send); 1846 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr); 1847 ss->tx.mask = tx_ring_entries - 1; 1848 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 1849 1850 /* get the lanai pointers to the send and receive rings */ 1851 1852 cmd.data0 = slice; 1853 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 1854 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0); 1855 if (mgp->num_slices > 1) { 1856 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice; 1857 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP + 1858 64 * slice; 1859 } else { 1860 ss->tx.go = NULL; 1861 ss->tx.stop = NULL; 1862 } 1863 1864 cmd.data0 = slice; 1865 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 1866 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *) 1867 (void *)(mgp->sram + cmd.data0); 1868 1869 cmd.data0 = slice; 1870 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 1871 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *) 1872 (mgp->sram + cmd.data0); 1873 1874 if (status != 0) { 1875 cmn_err(CE_WARN, 1876 "%s: failed to get ring sizes or locations\n", mgp->name); 1877 return (status); 1878 } 1879 1880 status = ENOMEM; 1881 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 1882 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP); 1883 if (ss->rx_small.shadow == NULL) 1884 goto abort; 1885 (void) memset(ss->rx_small.shadow, 0, bytes); 1886 1887 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 1888 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP); 1889 if (ss->rx_big.shadow == NULL) 1890 goto abort_with_rx_small_shadow; 1891 (void) memset(ss->rx_big.shadow, 0, bytes); 1892 1893 /* allocate the host info rings */ 1894 1895 bytes = tx_ring_entries * sizeof (*ss->tx.info); 1896 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP); 1897 if (ss->tx.info == NULL) 1898 goto abort_with_rx_big_shadow; 1899 (void) memset(ss->tx.info, 0, bytes); 1900 1901 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 1902 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP); 1903 if (ss->rx_small.info == NULL) 1904 goto abort_with_tx_info; 1905 (void) memset(ss->rx_small.info, 0, bytes); 1906 1907 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 1908 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP); 1909 if (ss->rx_big.info == NULL) 1910 goto abort_with_rx_small_info; 1911 (void) memset(ss->rx_big.info, 0, bytes); 1912 1913 ss->tx.stall = ss->tx.sched = 0; 1914 ss->tx.stall_early = ss->tx.stall_late = 0; 1915 1916 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) / 1917 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD)); 1918 1919 allocated = myri10ge_add_jbufs(ss, 1920 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1); 1921 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) { 1922 cmn_err(CE_WARN, 1923 "%s: Could not allocate enough receive buffers (%d/%d)\n", 1924 mgp->name, allocated, 1925 myri10ge_bigbufs_initial + ss->jbufs_for_smalls); 1926 goto abort_with_jumbos; 1927 } 1928 1929 myri10ge_carve_up_jbufs_into_small_ring(ss); 1930 ss->j_rx_cnt = 0; 1931 1932 mutex_enter(&ss->jpool.mtx); 1933 if (allocated < rx_ring_entries) 1934 ss->jpool.low_water = allocated / 4; 1935 else 1936 ss->jpool.low_water = rx_ring_entries / 2; 1937 1938 /* 1939 * invalidate the big receive ring in case we do not 1940 * allocate sufficient jumbos to fill it 1941 */ 1942 (void) memset(ss->rx_big.shadow, 1, 1943 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0])); 1944 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) { 1945 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7], 1946 &ss->rx_big.shadow[idx - 7]); 1947 mb(); 1948 } 1949 1950 1951 myri10ge_restock_jumbos(ss); 1952 1953 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) { 1954 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7], 1955 &ss->rx_small.shadow[idx - 7]); 1956 mb(); 1957 } 1958 ss->rx_small.cnt = ss->rx_small.mask + 1; 1959 1960 mutex_exit(&ss->jpool.mtx); 1961 1962 status = myri10ge_prepare_tx_ring(ss); 1963 1964 if (status != 0) 1965 goto abort_with_small_jbufs; 1966 1967 cmd.data0 = ntohl(ss->fw_stats_dma.low); 1968 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1969 cmd.data2 = sizeof (mcp_irq_data_t); 1970 cmd.data2 |= (slice << 16); 1971 bzero(ss->fw_stats, sizeof (*ss->fw_stats)); 1972 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 1973 if (status == ENOSYS) { 1974 cmd.data0 = ntohl(ss->fw_stats_dma.low) + 1975 offsetof(mcp_irq_data_t, send_done_count); 1976 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1977 status = myri10ge_send_cmd(mgp, 1978 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); 1979 } 1980 if (status) { 1981 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name); 1982 goto abort_with_tx; 1983 } 1984 1985 return (0); 1986 1987 abort_with_tx: 1988 myri10ge_unprepare_tx_ring(ss); 1989 1990 abort_with_small_jbufs: 1991 myri10ge_release_small_jbufs(ss); 1992 1993 abort_with_jumbos: 1994 if (allocated != 0) { 1995 mutex_enter(&ss->jpool.mtx); 1996 ss->jpool.low_water = 0; 1997 mutex_exit(&ss->jpool.mtx); 1998 myri10ge_unstock_jumbos(ss); 1999 myri10ge_remove_jbufs(ss); 2000 } 2001 2002 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2003 kmem_free(ss->rx_big.info, bytes); 2004 2005 abort_with_rx_small_info: 2006 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2007 kmem_free(ss->rx_small.info, bytes); 2008 2009 abort_with_tx_info: 2010 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2011 kmem_free(ss->tx.info, bytes); 2012 2013 abort_with_rx_big_shadow: 2014 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2015 kmem_free(ss->rx_big.shadow, bytes); 2016 2017 abort_with_rx_small_shadow: 2018 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2019 kmem_free(ss->rx_small.shadow, bytes); 2020 abort: 2021 return (status); 2022 2023 } 2024 2025 static void 2026 myri10ge_teardown_slice(struct myri10ge_slice_state *ss) 2027 { 2028 int tx_ring_entries, rx_ring_entries; 2029 size_t bytes; 2030 2031 /* ignore slices that have not been fully setup */ 2032 if (ss->tx.cp == NULL) 2033 return; 2034 /* Free the TX copy buffers */ 2035 myri10ge_unprepare_tx_ring(ss); 2036 2037 /* stop passing returned buffers to firmware */ 2038 2039 mutex_enter(&ss->jpool.mtx); 2040 ss->jpool.low_water = 0; 2041 mutex_exit(&ss->jpool.mtx); 2042 myri10ge_release_small_jbufs(ss); 2043 2044 /* Release the free jumbo frame pool */ 2045 myri10ge_unstock_jumbos(ss); 2046 myri10ge_remove_jbufs(ss); 2047 2048 rx_ring_entries = ss->rx_big.mask + 1; 2049 tx_ring_entries = ss->tx.mask + 1; 2050 2051 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2052 kmem_free(ss->rx_big.info, bytes); 2053 2054 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2055 kmem_free(ss->rx_small.info, bytes); 2056 2057 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2058 kmem_free(ss->tx.info, bytes); 2059 2060 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2061 kmem_free(ss->rx_big.shadow, bytes); 2062 2063 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2064 kmem_free(ss->rx_small.shadow, bytes); 2065 2066 } 2067 static int 2068 myri10ge_start_locked(struct myri10ge_priv *mgp) 2069 { 2070 myri10ge_cmd_t cmd; 2071 int status, big_pow2, i; 2072 volatile uint8_t *itable; 2073 2074 status = DDI_SUCCESS; 2075 /* Allocate DMA resources and receive buffers */ 2076 2077 status = myri10ge_reset(mgp); 2078 if (status != 0) { 2079 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 2080 return (DDI_FAILURE); 2081 } 2082 2083 if (mgp->num_slices > 1) { 2084 cmd.data0 = mgp->num_slices; 2085 cmd.data1 = 1; /* use MSI-X */ 2086 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2087 &cmd); 2088 if (status != 0) { 2089 cmn_err(CE_WARN, 2090 "%s: failed to set number of slices\n", 2091 mgp->name); 2092 goto abort_with_nothing; 2093 } 2094 /* setup the indirection table */ 2095 cmd.data0 = mgp->num_slices; 2096 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2097 &cmd); 2098 2099 status |= myri10ge_send_cmd(mgp, 2100 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 2101 if (status != 0) { 2102 cmn_err(CE_WARN, 2103 "%s: failed to setup rss tables\n", mgp->name); 2104 } 2105 2106 /* just enable an identity mapping */ 2107 itable = mgp->sram + cmd.data0; 2108 for (i = 0; i < mgp->num_slices; i++) 2109 itable[i] = (uint8_t)i; 2110 2111 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) { 2112 status = myri10ge_init_toeplitz(mgp); 2113 if (status != 0) { 2114 cmn_err(CE_WARN, "%s: failed to setup " 2115 "toeplitz tx hash table", mgp->name); 2116 goto abort_with_nothing; 2117 } 2118 } 2119 cmd.data0 = 1; 2120 cmd.data1 = myri10ge_rss_hash; 2121 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2122 &cmd); 2123 if (status != 0) { 2124 cmn_err(CE_WARN, 2125 "%s: failed to enable slices\n", mgp->name); 2126 goto abort_with_toeplitz; 2127 } 2128 } 2129 2130 for (i = 0; i < mgp->num_slices; i++) { 2131 status = myri10ge_setup_slice(&mgp->ss[i]); 2132 if (status != 0) 2133 goto abort_with_slices; 2134 } 2135 2136 /* 2137 * Tell the MCP how many buffers it has, and to 2138 * bring the ethernet interface up 2139 * 2140 * Firmware needs the big buff size as a power of 2. Lie and 2141 * tell it the buffer is larger, because we only use 1 2142 * buffer/pkt, and the mtu will prevent overruns 2143 */ 2144 big_pow2 = myri10ge_mtu + MXGEFW_PAD; 2145 while (!ISP2(big_pow2)) 2146 big_pow2++; 2147 2148 /* now give firmware buffers sizes, and MTU */ 2149 cmd.data0 = myri10ge_mtu; 2150 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd); 2151 cmd.data0 = myri10ge_small_bytes; 2152 status |= 2153 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 2154 cmd.data0 = big_pow2; 2155 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2156 if (status) { 2157 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name); 2158 goto abort_with_slices; 2159 } 2160 2161 2162 cmd.data0 = 1; 2163 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd); 2164 if (status) { 2165 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n", 2166 mgp->name, status); 2167 } else { 2168 mgp->features |= MYRI10GE_TSO; 2169 } 2170 2171 mgp->link_state = -1; 2172 mgp->rdma_tags_available = 15; 2173 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd); 2174 if (status) { 2175 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name); 2176 goto abort_with_slices; 2177 } 2178 mgp->running = MYRI10GE_ETH_RUNNING; 2179 return (DDI_SUCCESS); 2180 2181 abort_with_slices: 2182 for (i = 0; i < mgp->num_slices; i++) 2183 myri10ge_teardown_slice(&mgp->ss[i]); 2184 2185 mgp->running = MYRI10GE_ETH_STOPPED; 2186 2187 abort_with_toeplitz: 2188 if (mgp->toeplitz_hash_table != NULL) { 2189 kmem_free(mgp->toeplitz_hash_table, 2190 sizeof (uint32_t) * 12 * 256); 2191 mgp->toeplitz_hash_table = NULL; 2192 } 2193 2194 abort_with_nothing: 2195 return (DDI_FAILURE); 2196 } 2197 2198 static void 2199 myri10ge_stop_locked(struct myri10ge_priv *mgp) 2200 { 2201 int status, old_down_cnt; 2202 myri10ge_cmd_t cmd; 2203 int wait_time = 10; 2204 int i, polling; 2205 2206 old_down_cnt = mgp->down_cnt; 2207 mb(); 2208 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2209 if (status) { 2210 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 2211 } 2212 2213 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2214 delay(1 * drv_usectohz(1000000)); 2215 wait_time--; 2216 if (wait_time == 0) 2217 break; 2218 } 2219 again: 2220 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2221 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name); 2222 for (i = 0; i < mgp->num_slices; i++) { 2223 /* 2224 * take and release the rx lock to ensure 2225 * that no interrupt thread is blocked 2226 * elsewhere in the stack, preventing 2227 * completion 2228 */ 2229 2230 mutex_enter(&mgp->ss[i].rx_lock); 2231 printf("%s: slice %d rx irq idle\n", 2232 mgp->name, i); 2233 mutex_exit(&mgp->ss[i].rx_lock); 2234 2235 /* verify that the poll handler is inactive */ 2236 mutex_enter(&mgp->ss->poll_lock); 2237 polling = mgp->ss->rx_polling; 2238 mutex_exit(&mgp->ss->poll_lock); 2239 if (polling) { 2240 printf("%s: slice %d is polling\n", 2241 mgp->name, i); 2242 delay(1 * drv_usectohz(1000000)); 2243 goto again; 2244 } 2245 } 2246 delay(1 * drv_usectohz(1000000)); 2247 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2248 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name); 2249 } 2250 } 2251 2252 for (i = 0; i < mgp->num_slices; i++) 2253 myri10ge_teardown_slice(&mgp->ss[i]); 2254 2255 if (mgp->toeplitz_hash_table != NULL) { 2256 kmem_free(mgp->toeplitz_hash_table, 2257 sizeof (uint32_t) * 12 * 256); 2258 mgp->toeplitz_hash_table = NULL; 2259 } 2260 mgp->running = MYRI10GE_ETH_STOPPED; 2261 } 2262 2263 static int 2264 myri10ge_m_start(void *arg) 2265 { 2266 struct myri10ge_priv *mgp = arg; 2267 int status; 2268 2269 mutex_enter(&mgp->intrlock); 2270 2271 if (mgp->running != MYRI10GE_ETH_STOPPED) { 2272 mutex_exit(&mgp->intrlock); 2273 return (DDI_FAILURE); 2274 } 2275 status = myri10ge_start_locked(mgp); 2276 mutex_exit(&mgp->intrlock); 2277 2278 if (status != DDI_SUCCESS) 2279 return (status); 2280 2281 /* start the watchdog timer */ 2282 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 2283 mgp->timer_ticks); 2284 return (DDI_SUCCESS); 2285 2286 } 2287 2288 static void 2289 myri10ge_m_stop(void *arg) 2290 { 2291 struct myri10ge_priv *mgp = arg; 2292 2293 mutex_enter(&mgp->intrlock); 2294 /* if the device not running give up */ 2295 if (mgp->running != MYRI10GE_ETH_RUNNING) { 2296 mutex_exit(&mgp->intrlock); 2297 return; 2298 } 2299 2300 mgp->running = MYRI10GE_ETH_STOPPING; 2301 mutex_exit(&mgp->intrlock); 2302 (void) untimeout(mgp->timer_id); 2303 mutex_enter(&mgp->intrlock); 2304 myri10ge_stop_locked(mgp); 2305 mutex_exit(&mgp->intrlock); 2306 2307 } 2308 2309 static inline void 2310 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) 2311 { 2312 struct ether_header *eh; 2313 struct ip *ip; 2314 struct ip6_hdr *ip6; 2315 uint32_t start, stuff, end, partial, hdrlen; 2316 2317 2318 csum = ntohs((uint16_t)csum); 2319 eh = (struct ether_header *)(void *)mp->b_rptr; 2320 hdrlen = sizeof (*eh); 2321 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2322 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2323 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2324 s->brdcstrcv++; 2325 else 2326 s->multircv++; 2327 } 2328 2329 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 2330 /* 2331 * fix checksum by subtracting 4 bytes after what the 2332 * firmware thought was the end of the ether hdr 2333 */ 2334 partial = *(uint32_t *) 2335 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE); 2336 csum += ~partial; 2337 csum += (csum < ~partial); 2338 csum = (csum >> 16) + (csum & 0xFFFF); 2339 csum = (csum >> 16) + (csum & 0xFFFF); 2340 hdrlen += VLAN_TAGSZ; 2341 } 2342 2343 if (eh->ether_type == BE_16(ETHERTYPE_IP)) { 2344 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen); 2345 start = ip->ip_hl << 2; 2346 2347 if (ip->ip_p == IPPROTO_TCP) 2348 stuff = start + offsetof(struct tcphdr, th_sum); 2349 else if (ip->ip_p == IPPROTO_UDP) 2350 stuff = start + offsetof(struct udphdr, uh_sum); 2351 else 2352 return; 2353 end = ntohs(ip->ip_len); 2354 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) { 2355 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen); 2356 start = sizeof (*ip6); 2357 if (ip6->ip6_nxt == IPPROTO_TCP) { 2358 stuff = start + offsetof(struct tcphdr, th_sum); 2359 } else if (ip6->ip6_nxt == IPPROTO_UDP) 2360 stuff = start + offsetof(struct udphdr, uh_sum); 2361 else 2362 return; 2363 end = start + ntohs(ip6->ip6_plen); 2364 /* 2365 * IPv6 headers do not contain a checksum, and hence 2366 * do not checksum to zero, so they don't "fall out" 2367 * of the partial checksum calculation like IPv4 2368 * headers do. We need to fix the partial checksum by 2369 * subtracting the checksum of the IPv6 header. 2370 */ 2371 2372 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6)); 2373 csum += ~partial; 2374 csum += (csum < ~partial); 2375 csum = (csum >> 16) + (csum & 0xFFFF); 2376 csum = (csum >> 16) + (csum & 0xFFFF); 2377 } else { 2378 return; 2379 } 2380 2381 if (MBLKL(mp) > hdrlen + end) { 2382 /* padded frame, so hw csum may be invalid */ 2383 return; 2384 } 2385 2386 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); 2387 } 2388 2389 static mblk_t * 2390 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len, 2391 uint32_t csum) 2392 { 2393 mblk_t *mp; 2394 myri10ge_rx_ring_t *rx; 2395 int idx; 2396 2397 rx = &ss->rx_small; 2398 idx = rx->cnt & rx->mask; 2399 ss->rx_small.cnt++; 2400 2401 /* allocate a new buffer to pass up the stack */ 2402 mp = allocb(len + MXGEFW_PAD, 0); 2403 if (mp == NULL) { 2404 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf); 2405 goto abort; 2406 } 2407 bcopy(ss->rx_small.info[idx].ptr, 2408 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2409 mp->b_wptr += len + MXGEFW_PAD; 2410 mp->b_rptr += MXGEFW_PAD; 2411 2412 ss->rx_stats.ibytes += len; 2413 ss->rx_stats.ipackets += 1; 2414 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2415 2416 abort: 2417 if ((idx & 7) == 7) { 2418 myri10ge_submit_8rx(&rx->lanai[idx - 7], 2419 &rx->shadow[idx - 7]); 2420 } 2421 2422 return (mp); 2423 } 2424 2425 2426 static mblk_t * 2427 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len, 2428 uint32_t csum) 2429 { 2430 struct myri10ge_jpool_stuff *jpool; 2431 struct myri10ge_jpool_entry *j; 2432 mblk_t *mp; 2433 int idx, num_owned_by_mcp; 2434 2435 jpool = &ss->jpool; 2436 idx = ss->j_rx_cnt & ss->rx_big.mask; 2437 j = ss->rx_big.info[idx].j; 2438 2439 if (j == NULL) { 2440 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n", 2441 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt); 2442 return (NULL); 2443 } 2444 2445 2446 ss->rx_big.info[idx].j = NULL; 2447 ss->j_rx_cnt++; 2448 2449 2450 /* 2451 * Check to see if we are low on rx buffers. 2452 * Note that we must leave at least 8 free so there are 2453 * enough to free in a single 64-byte write. 2454 */ 2455 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2456 if (num_owned_by_mcp < jpool->low_water) { 2457 mutex_enter(&jpool->mtx); 2458 myri10ge_restock_jumbos(ss); 2459 mutex_exit(&jpool->mtx); 2460 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2461 /* if we are still low, then we have to copy */ 2462 if (num_owned_by_mcp < 16) { 2463 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy); 2464 /* allocate a new buffer to pass up the stack */ 2465 mp = allocb(len + MXGEFW_PAD, 0); 2466 if (mp == NULL) { 2467 goto abort; 2468 } 2469 bcopy(j->buf, 2470 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2471 myri10ge_jfree_rtn(j); 2472 /* push buffer back to NIC */ 2473 mutex_enter(&jpool->mtx); 2474 myri10ge_restock_jumbos(ss); 2475 mutex_exit(&jpool->mtx); 2476 goto set_len; 2477 } 2478 } 2479 2480 /* loan our buffer to the stack */ 2481 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func); 2482 if (mp == NULL) { 2483 goto abort; 2484 } 2485 2486 set_len: 2487 mp->b_rptr += MXGEFW_PAD; 2488 mp->b_wptr = ((unsigned char *) mp->b_rptr + len); 2489 2490 ss->rx_stats.ibytes += len; 2491 ss->rx_stats.ipackets += 1; 2492 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2493 2494 return (mp); 2495 2496 abort: 2497 myri10ge_jfree_rtn(j); 2498 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf); 2499 return (NULL); 2500 } 2501 2502 /* 2503 * Free all transmit buffers up until the specified index 2504 */ 2505 static inline void 2506 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index) 2507 { 2508 myri10ge_tx_ring_t *tx; 2509 struct myri10ge_tx_dma_handle_head handles; 2510 int idx; 2511 int limit = 0; 2512 2513 tx = &ss->tx; 2514 handles.head = NULL; 2515 handles.tail = NULL; 2516 while (tx->pkt_done != (int)mcp_index) { 2517 idx = tx->done & tx->mask; 2518 2519 /* 2520 * mblk & DMA handle attached only to first slot 2521 * per buffer in the packet 2522 */ 2523 2524 if (tx->info[idx].m) { 2525 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 2526 tx->info[idx].handle->next = handles.head; 2527 handles.head = tx->info[idx].handle; 2528 if (handles.tail == NULL) 2529 handles.tail = tx->info[idx].handle; 2530 freeb(tx->info[idx].m); 2531 tx->info[idx].m = 0; 2532 tx->info[idx].handle = 0; 2533 } 2534 if (tx->info[idx].ostat.opackets != 0) { 2535 tx->stats.multixmt += tx->info[idx].ostat.multixmt; 2536 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt; 2537 tx->stats.obytes += tx->info[idx].ostat.obytes; 2538 tx->stats.opackets += tx->info[idx].ostat.opackets; 2539 tx->info[idx].stat.un.all = 0; 2540 tx->pkt_done++; 2541 } 2542 2543 tx->done++; 2544 /* 2545 * if we stalled the queue, wake it. But Wait until 2546 * we have at least 1/2 our slots free. 2547 */ 2548 if ((tx->req - tx->done) < (tx->mask >> 1) && 2549 tx->stall != tx->sched) { 2550 mutex_enter(&ss->tx.lock); 2551 tx->sched = tx->stall; 2552 mutex_exit(&ss->tx.lock); 2553 mac_tx_ring_update(ss->mgp->mh, tx->rh); 2554 } 2555 2556 /* limit potential for livelock */ 2557 if (unlikely(++limit > 2 * tx->mask)) 2558 break; 2559 } 2560 if (tx->req == tx->done && tx->stop != NULL) { 2561 /* 2562 * Nic has sent all pending requests, allow it 2563 * to stop polling this queue 2564 */ 2565 mutex_enter(&tx->lock); 2566 if (tx->req == tx->done && tx->active) { 2567 *(int *)(void *)tx->stop = 1; 2568 tx->active = 0; 2569 mb(); 2570 } 2571 mutex_exit(&tx->lock); 2572 } 2573 if (handles.head != NULL) 2574 myri10ge_free_tx_handles(tx, &handles); 2575 } 2576 2577 static void 2578 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl) 2579 { 2580 mbl->head = NULL; 2581 mbl->tail = &mbl->head; 2582 mbl->cnt = 0; 2583 } 2584 2585 /*ARGSUSED*/ 2586 void 2587 myri10ge_mbl_append(struct myri10ge_slice_state *ss, 2588 struct myri10ge_mblk_list *mbl, mblk_t *mp) 2589 { 2590 *(mbl->tail) = mp; 2591 mbl->tail = &mp->b_next; 2592 mp->b_next = NULL; 2593 mbl->cnt++; 2594 } 2595 2596 2597 static inline void 2598 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, 2599 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop) 2600 { 2601 myri10ge_rx_done_t *rx_done = &ss->rx_done; 2602 struct myri10ge_priv *mgp = ss->mgp; 2603 mblk_t *mp; 2604 struct lro_entry *lro; 2605 uint16_t length; 2606 uint16_t checksum; 2607 2608 2609 while (rx_done->entry[rx_done->idx].length != 0) { 2610 if (unlikely (*stop)) { 2611 break; 2612 } 2613 length = ntohs(rx_done->entry[rx_done->idx].length); 2614 length &= (~MXGEFW_RSS_HASH_MASK); 2615 2616 /* limit potential for livelock */ 2617 limit -= length; 2618 if (unlikely(limit < 0)) 2619 break; 2620 2621 rx_done->entry[rx_done->idx].length = 0; 2622 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2623 if (length <= myri10ge_small_bytes) 2624 mp = myri10ge_rx_done_small(ss, length, checksum); 2625 else 2626 mp = myri10ge_rx_done_big(ss, length, checksum); 2627 if (mp != NULL) { 2628 if (!myri10ge_lro || 2629 0 != myri10ge_lro_rx(ss, mp, checksum, mbl)) 2630 myri10ge_mbl_append(ss, mbl, mp); 2631 } 2632 rx_done->cnt++; 2633 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1); 2634 } 2635 while (ss->lro_active != NULL) { 2636 lro = ss->lro_active; 2637 ss->lro_active = lro->next; 2638 myri10ge_lro_flush(ss, lro, mbl); 2639 } 2640 } 2641 2642 static void 2643 myri10ge_intr_rx(struct myri10ge_slice_state *ss) 2644 { 2645 uint64_t gen; 2646 struct myri10ge_mblk_list mbl; 2647 2648 myri10ge_mbl_init(&mbl); 2649 if (mutex_tryenter(&ss->rx_lock) == 0) 2650 return; 2651 gen = ss->rx_gen_num; 2652 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL, 2653 &ss->rx_polling); 2654 if (mbl.head != NULL) 2655 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen); 2656 mutex_exit(&ss->rx_lock); 2657 2658 } 2659 2660 static mblk_t * 2661 myri10ge_poll_rx(void *arg, int bytes) 2662 { 2663 struct myri10ge_slice_state *ss = arg; 2664 struct myri10ge_mblk_list mbl; 2665 boolean_t dummy = B_FALSE; 2666 2667 if (bytes == 0) 2668 return (NULL); 2669 2670 myri10ge_mbl_init(&mbl); 2671 mutex_enter(&ss->rx_lock); 2672 if (ss->rx_polling) 2673 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy); 2674 else 2675 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss - 2676 ss->mgp->ss), ss->rx_token, ss->rx_polling); 2677 mutex_exit(&ss->rx_lock); 2678 return (mbl.head); 2679 } 2680 2681 /*ARGSUSED*/ 2682 static uint_t 2683 myri10ge_intr(caddr_t arg0, caddr_t arg1) 2684 { 2685 struct myri10ge_slice_state *ss = 2686 (struct myri10ge_slice_state *)(void *)arg0; 2687 struct myri10ge_priv *mgp = ss->mgp; 2688 mcp_irq_data_t *stats = ss->fw_stats; 2689 myri10ge_tx_ring_t *tx = &ss->tx; 2690 uint32_t send_done_count; 2691 uint8_t valid; 2692 2693 2694 /* make sure the DMA has finished */ 2695 if (!stats->valid) { 2696 return (DDI_INTR_UNCLAIMED); 2697 } 2698 valid = stats->valid; 2699 2700 /* low bit indicates receives are present */ 2701 if (valid & 1) 2702 myri10ge_intr_rx(ss); 2703 2704 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 2705 /* lower legacy IRQ */ 2706 *mgp->irq_deassert = 0; 2707 if (!myri10ge_deassert_wait) 2708 /* don't wait for conf. that irq is low */ 2709 stats->valid = 0; 2710 mb(); 2711 } else { 2712 /* no need to wait for conf. that irq is low */ 2713 stats->valid = 0; 2714 } 2715 2716 do { 2717 /* check for transmit completes and receives */ 2718 send_done_count = ntohl(stats->send_done_count); 2719 if (send_done_count != tx->pkt_done) 2720 myri10ge_tx_done(ss, (int)send_done_count); 2721 } while (*((volatile uint8_t *) &stats->valid)); 2722 2723 if (stats->stats_updated) { 2724 if (mgp->link_state != stats->link_up || stats->link_down) { 2725 mgp->link_state = stats->link_up; 2726 if (stats->link_down) { 2727 mgp->down_cnt += stats->link_down; 2728 mgp->link_state = 0; 2729 } 2730 if (mgp->link_state) { 2731 if (myri10ge_verbose) 2732 printf("%s: link up\n", mgp->name); 2733 mac_link_update(mgp->mh, LINK_STATE_UP); 2734 } else { 2735 if (myri10ge_verbose) 2736 printf("%s: link down\n", mgp->name); 2737 mac_link_update(mgp->mh, LINK_STATE_DOWN); 2738 } 2739 MYRI10GE_NIC_STAT_INC(link_changes); 2740 } 2741 if (mgp->rdma_tags_available != 2742 ntohl(ss->fw_stats->rdma_tags_available)) { 2743 mgp->rdma_tags_available = 2744 ntohl(ss->fw_stats->rdma_tags_available); 2745 cmn_err(CE_NOTE, "%s: RDMA timed out! " 2746 "%d tags left\n", mgp->name, 2747 mgp->rdma_tags_available); 2748 } 2749 } 2750 2751 mb(); 2752 /* check to see if we have rx token to pass back */ 2753 if (valid & 0x1) { 2754 mutex_enter(&ss->poll_lock); 2755 if (ss->rx_polling) { 2756 ss->rx_token = 1; 2757 } else { 2758 *ss->irq_claim = BE_32(3); 2759 ss->rx_token = 0; 2760 } 2761 mutex_exit(&ss->poll_lock); 2762 } 2763 *(ss->irq_claim + 1) = BE_32(3); 2764 return (DDI_INTR_CLAIMED); 2765 } 2766 2767 /* 2768 * Add or remove a multicast address. This is called with our 2769 * macinfo's lock held by GLD, so we do not need to worry about 2770 * our own locking here. 2771 */ 2772 static int 2773 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr) 2774 { 2775 myri10ge_cmd_t cmd; 2776 struct myri10ge_priv *mgp = arg; 2777 int status, join_leave; 2778 2779 if (add) 2780 join_leave = MXGEFW_JOIN_MULTICAST_GROUP; 2781 else 2782 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP; 2783 (void) memcpy(&cmd.data0, multicastaddr, 4); 2784 (void) memcpy(&cmd.data1, multicastaddr + 4, 2); 2785 cmd.data0 = htonl(cmd.data0); 2786 cmd.data1 = htonl(cmd.data1); 2787 status = myri10ge_send_cmd(mgp, join_leave, &cmd); 2788 if (status == 0) 2789 return (0); 2790 2791 cmn_err(CE_WARN, "%s: failed to set multicast address\n", 2792 mgp->name); 2793 return (status); 2794 } 2795 2796 2797 static int 2798 myri10ge_m_promisc(void *arg, boolean_t on) 2799 { 2800 struct myri10ge_priv *mgp = arg; 2801 2802 myri10ge_change_promisc(mgp, on); 2803 return (0); 2804 } 2805 2806 /* 2807 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2808 * backwards one at a time and handle ring wraps 2809 */ 2810 2811 static inline void 2812 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx, 2813 mcp_kreq_ether_send_t *src, int cnt) 2814 { 2815 int idx, starting_slot; 2816 starting_slot = tx->req; 2817 while (cnt > 1) { 2818 cnt--; 2819 idx = (starting_slot + cnt) & tx->mask; 2820 myri10ge_pio_copy(&tx->lanai[idx], 2821 &src[cnt], sizeof (*src)); 2822 mb(); 2823 } 2824 } 2825 2826 /* 2827 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2828 * at most 32 bytes at a time, so as to avoid involving the software 2829 * pio handler in the nic. We re-write the first segment's flags 2830 * to mark them valid only after writing the entire chain 2831 */ 2832 2833 static inline void 2834 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 2835 int cnt) 2836 { 2837 int idx, i; 2838 uint32_t *src_ints, *dst_ints; 2839 mcp_kreq_ether_send_t *srcp, *dstp, *dst; 2840 uint8_t last_flags; 2841 2842 idx = tx->req & tx->mask; 2843 2844 last_flags = src->flags; 2845 src->flags = 0; 2846 mb(); 2847 dst = dstp = &tx->lanai[idx]; 2848 srcp = src; 2849 2850 if ((idx + cnt) < tx->mask) { 2851 for (i = 0; i < (cnt - 1); i += 2) { 2852 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src)); 2853 mb(); /* force write every 32 bytes */ 2854 srcp += 2; 2855 dstp += 2; 2856 } 2857 } else { 2858 /* 2859 * submit all but the first request, and ensure 2860 * that it is submitted below 2861 */ 2862 myri10ge_submit_req_backwards(tx, src, cnt); 2863 i = 0; 2864 } 2865 if (i < cnt) { 2866 /* submit the first request */ 2867 myri10ge_pio_copy(dstp, srcp, sizeof (*src)); 2868 mb(); /* barrier before setting valid flag */ 2869 } 2870 2871 /* re-write the last 32-bits with the valid flags */ 2872 src->flags |= last_flags; 2873 src_ints = (uint32_t *)src; 2874 src_ints += 3; 2875 dst_ints = (uint32_t *)dst; 2876 dst_ints += 3; 2877 *dst_ints = *src_ints; 2878 tx->req += cnt; 2879 mb(); 2880 /* notify NIC to poll this tx ring */ 2881 if (!tx->active && tx->go != NULL) { 2882 *(int *)(void *)tx->go = 1; 2883 tx->active = 1; 2884 tx->activate++; 2885 mb(); 2886 } 2887 } 2888 2889 /* ARGSUSED */ 2890 static inline void 2891 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 2892 { 2893 uint32_t lso_flag; 2894 mac_lso_get(mp, mss, &lso_flag); 2895 (*flags) |= lso_flag; 2896 } 2897 2898 2899 /* like pullupmsg, except preserve hcksum/LSO attributes */ 2900 static int 2901 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) 2902 { 2903 uint32_t start, stuff, tx_offload_flags, mss; 2904 int ok; 2905 2906 mss = 0; 2907 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 2908 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 2909 2910 ok = pullupmsg(mp, -1); 2911 if (!ok) { 2912 printf("pullupmsg failed"); 2913 return (DDI_FAILURE); 2914 } 2915 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); 2916 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); 2917 if (tx_offload_flags & HW_LSO) 2918 DB_LSOMSS(mp) = (uint16_t)mss; 2919 lso_info_set(mp, mss, tx_offload_flags); 2920 return (DDI_SUCCESS); 2921 } 2922 2923 static inline void 2924 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh, 2925 int opackets, int obytes) 2926 { 2927 s->un.all = 0; 2928 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2929 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2930 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2931 s->un.s.brdcstxmt = 1; 2932 else 2933 s->un.s.multixmt = 1; 2934 } 2935 s->un.s.opackets = (uint16_t)opackets; 2936 s->un.s.obytes = obytes; 2937 } 2938 2939 static int 2940 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 2941 mcp_kreq_ether_send_t *req) 2942 { 2943 myri10ge_tx_ring_t *tx = &ss->tx; 2944 caddr_t ptr; 2945 struct myri10ge_tx_copybuf *cp; 2946 mblk_t *bp; 2947 int idx, mblen, avail; 2948 uint16_t len; 2949 2950 mutex_enter(&tx->lock); 2951 avail = tx->mask - (tx->req - tx->done); 2952 if (avail <= 1) { 2953 mutex_exit(&tx->lock); 2954 return (EBUSY); 2955 } 2956 idx = tx->req & tx->mask; 2957 cp = &tx->cp[idx]; 2958 ptr = cp->va; 2959 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) { 2960 mblen = MBLKL(bp); 2961 bcopy(bp->b_rptr, ptr, mblen); 2962 ptr += mblen; 2963 len += mblen; 2964 } 2965 /* ensure runts are padded to 60 bytes */ 2966 if (len < 60) { 2967 bzero(ptr, 64 - len); 2968 len = 60; 2969 } 2970 req->addr_low = cp->dma.low; 2971 req->addr_high = cp->dma.high; 2972 req->length = htons(len); 2973 req->pad = 0; 2974 req->rdma_count = 1; 2975 myri10ge_tx_stat(&tx->info[idx].stat, 2976 (struct ether_header *)(void *)cp->va, 1, len); 2977 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV); 2978 myri10ge_submit_req(&ss->tx, req, 1); 2979 mutex_exit(&tx->lock); 2980 freemsg(mp); 2981 return (DDI_SUCCESS); 2982 } 2983 2984 2985 static void 2986 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list, 2987 struct myri10ge_tx_buffer_state *tx_info, 2988 int count) 2989 { 2990 int i, idx; 2991 2992 idx = 0; /* gcc -Wuninitialized */ 2993 /* store unmapping and bp info for tx irq handler */ 2994 for (i = 0; i < count; i++) { 2995 idx = (tx->req + i) & tx->mask; 2996 tx->info[idx].m = tx_info[i].m; 2997 tx->info[idx].handle = tx_info[i].handle; 2998 } 2999 tx->info[idx].stat.un.all = tx_info[0].stat.un.all; 3000 3001 /* submit the frame to the nic */ 3002 myri10ge_submit_req(tx, req_list, count); 3003 3004 3005 } 3006 3007 3008 3009 static void 3010 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf) 3011 { 3012 mblk_t *bp; 3013 int seglen; 3014 uint_t count; 3015 3016 bp = mp; 3017 3018 while (off > 0) { 3019 seglen = MBLKL(bp); 3020 if (off < seglen) 3021 break; 3022 off -= seglen; 3023 bp = bp->b_cont; 3024 } 3025 while (len > 0) { 3026 seglen = MBLKL(bp); 3027 count = min(seglen - off, len); 3028 bcopy(bp->b_rptr + off, buf, count); 3029 len -= count; 3030 buf += count; 3031 off = 0; 3032 bp = bp->b_cont; 3033 } 3034 } 3035 3036 static int 3037 myri10ge_ether_parse_header(mblk_t *mp) 3038 { 3039 struct ether_header eh_copy; 3040 struct ether_header *eh; 3041 int eth_hdr_len, seglen; 3042 3043 seglen = MBLKL(mp); 3044 eth_hdr_len = sizeof (*eh); 3045 if (seglen < eth_hdr_len) { 3046 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy); 3047 eh = &eh_copy; 3048 } else { 3049 eh = (struct ether_header *)(void *)mp->b_rptr; 3050 } 3051 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 3052 eth_hdr_len += 4; 3053 } 3054 3055 return (eth_hdr_len); 3056 } 3057 3058 static int 3059 myri10ge_lso_parse_header(mblk_t *mp, int off) 3060 { 3061 char buf[128]; 3062 int seglen, sum_off; 3063 struct ip *ip; 3064 struct tcphdr *tcp; 3065 3066 seglen = MBLKL(mp); 3067 if (seglen < off + sizeof (*ip)) { 3068 myri10ge_copydata(mp, off, sizeof (*ip), buf); 3069 ip = (struct ip *)(void *)buf; 3070 } else { 3071 ip = (struct ip *)(void *)(mp->b_rptr + off); 3072 } 3073 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) { 3074 myri10ge_copydata(mp, off, 3075 (ip->ip_hl << 2) + sizeof (*tcp), buf); 3076 ip = (struct ip *)(void *)buf; 3077 } 3078 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2)); 3079 3080 /* 3081 * NIC expects ip_sum to be zero. Recent changes to 3082 * OpenSolaris leave the correct ip checksum there, rather 3083 * than the required zero, so we need to zero it. Otherwise, 3084 * the NIC will produce bad checksums when sending LSO packets. 3085 */ 3086 if (ip->ip_sum != 0) { 3087 if (((char *)ip) != buf) { 3088 /* ip points into mblk, so just zero it */ 3089 ip->ip_sum = 0; 3090 } else { 3091 /* 3092 * ip points into a copy, so walk the chain 3093 * to find the ip_csum, then zero it 3094 */ 3095 sum_off = off + _PTRDIFF(&ip->ip_sum, buf); 3096 while (sum_off > (int)(MBLKL(mp) - 1)) { 3097 sum_off -= MBLKL(mp); 3098 mp = mp->b_cont; 3099 } 3100 mp->b_rptr[sum_off] = 0; 3101 sum_off++; 3102 while (sum_off > MBLKL(mp) - 1) { 3103 sum_off -= MBLKL(mp); 3104 mp = mp->b_cont; 3105 } 3106 mp->b_rptr[sum_off] = 0; 3107 } 3108 } 3109 return (off + ((ip->ip_hl + tcp->th_off) << 2)); 3110 } 3111 3112 static int 3113 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 3114 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size, 3115 uint16_t mss, uint8_t cksum_offset) 3116 { 3117 myri10ge_tx_ring_t *tx = &ss->tx; 3118 struct myri10ge_priv *mgp = ss->mgp; 3119 mblk_t *bp; 3120 mcp_kreq_ether_send_t *req; 3121 struct myri10ge_tx_copybuf *cp; 3122 caddr_t rptr, ptr; 3123 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp; 3124 int resid, avail, idx, hdr_size_tmp, tx_boundary; 3125 int rdma_count; 3126 uint32_t seglen, len, boundary, low, high_swapped; 3127 uint16_t pseudo_hdr_offset = htons(mss); 3128 uint8_t flags; 3129 3130 tx_boundary = mgp->tx_boundary; 3131 hdr_size_tmp = hdr_size; 3132 resid = tx_boundary; 3133 count = 1; 3134 mutex_enter(&tx->lock); 3135 3136 /* check to see if the slots are really there */ 3137 avail = tx->mask - (tx->req - tx->done); 3138 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) { 3139 atomic_inc_32(&tx->stall); 3140 mutex_exit(&tx->lock); 3141 return (EBUSY); 3142 } 3143 3144 /* copy */ 3145 cum_len = -hdr_size; 3146 count = 0; 3147 req = req_list; 3148 idx = tx->mask & tx->req; 3149 cp = &tx->cp[idx]; 3150 low = ntohl(cp->dma.low); 3151 ptr = cp->va; 3152 cp->len = 0; 3153 if (mss) { 3154 int payload = pkt_size - hdr_size; 3155 uint16_t opackets = (payload / mss) + ((payload % mss) != 0); 3156 tx->info[idx].ostat.opackets = opackets; 3157 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size 3158 + pkt_size; 3159 } 3160 hdr_size_tmp = hdr_size; 3161 mss_resid = mss; 3162 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3163 tx_req = tx->req; 3164 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3165 mblen = MBLKL(bp); 3166 rptr = (caddr_t)bp->b_rptr; 3167 len = min(hdr_size_tmp, mblen); 3168 if (len) { 3169 bcopy(rptr, ptr, len); 3170 rptr += len; 3171 ptr += len; 3172 resid -= len; 3173 mblen -= len; 3174 hdr_size_tmp -= len; 3175 cp->len += len; 3176 if (hdr_size_tmp) 3177 continue; 3178 if (resid < mss) { 3179 tx_req++; 3180 idx = tx->mask & tx_req; 3181 cp = &tx->cp[idx]; 3182 low = ntohl(cp->dma.low); 3183 ptr = cp->va; 3184 resid = tx_boundary; 3185 } 3186 } 3187 while (mblen) { 3188 len = min(mss_resid, mblen); 3189 bcopy(rptr, ptr, len); 3190 mss_resid -= len; 3191 resid -= len; 3192 mblen -= len; 3193 rptr += len; 3194 ptr += len; 3195 cp->len += len; 3196 if (mss_resid == 0) { 3197 mss_resid = mss; 3198 if (resid < mss) { 3199 tx_req++; 3200 idx = tx->mask & tx_req; 3201 cp = &tx->cp[idx]; 3202 cp->len = 0; 3203 low = ntohl(cp->dma.low); 3204 ptr = cp->va; 3205 resid = tx_boundary; 3206 } 3207 } 3208 } 3209 } 3210 3211 req = req_list; 3212 pkt_size_tmp = pkt_size; 3213 count = 0; 3214 rdma_count = 0; 3215 tx_req = tx->req; 3216 while (pkt_size_tmp) { 3217 idx = tx->mask & tx_req; 3218 cp = &tx->cp[idx]; 3219 high_swapped = cp->dma.high; 3220 low = ntohl(cp->dma.low); 3221 len = cp->len; 3222 if (len == 0) { 3223 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n", 3224 pkt_size_tmp, pkt_size); 3225 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3226 mblen = MBLKL(bp); 3227 printf("mblen:%d\n", mblen); 3228 } 3229 pkt_size_tmp = pkt_size; 3230 tx_req = tx->req; 3231 while (pkt_size_tmp > 0) { 3232 idx = tx->mask & tx_req; 3233 cp = &tx->cp[idx]; 3234 printf("cp->len = %d\n", cp->len); 3235 pkt_size_tmp -= cp->len; 3236 tx_req++; 3237 } 3238 printf("dropped\n"); 3239 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3240 goto done; 3241 } 3242 pkt_size_tmp -= len; 3243 while (len) { 3244 while (len) { 3245 uint8_t flags_next; 3246 int cum_len_next; 3247 3248 boundary = (low + mgp->tx_boundary) & 3249 ~(mgp->tx_boundary - 1); 3250 seglen = boundary - low; 3251 if (seglen > len) 3252 seglen = len; 3253 3254 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3255 cum_len_next = cum_len + seglen; 3256 (req-rdma_count)->rdma_count = rdma_count + 1; 3257 if (likely(cum_len >= 0)) { 3258 /* payload */ 3259 int next_is_first, chop; 3260 3261 chop = (cum_len_next > mss); 3262 cum_len_next = cum_len_next % mss; 3263 next_is_first = (cum_len_next == 0); 3264 flags |= chop * 3265 MXGEFW_FLAGS_TSO_CHOP; 3266 flags_next |= next_is_first * 3267 MXGEFW_FLAGS_FIRST; 3268 rdma_count |= -(chop | next_is_first); 3269 rdma_count += chop & !next_is_first; 3270 } else if (likely(cum_len_next >= 0)) { 3271 /* header ends */ 3272 int small; 3273 3274 rdma_count = -1; 3275 cum_len_next = 0; 3276 seglen = -cum_len; 3277 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 3278 flags_next = MXGEFW_FLAGS_TSO_PLD | 3279 MXGEFW_FLAGS_FIRST | 3280 (small * MXGEFW_FLAGS_SMALL); 3281 } 3282 req->addr_high = high_swapped; 3283 req->addr_low = htonl(low); 3284 req->pseudo_hdr_offset = pseudo_hdr_offset; 3285 req->pad = 0; /* complete solid 16-byte block */ 3286 req->rdma_count = 1; 3287 req->cksum_offset = cksum_offset; 3288 req->length = htons(seglen); 3289 req->flags = flags | ((cum_len & 1) * 3290 MXGEFW_FLAGS_ALIGN_ODD); 3291 if (cksum_offset > seglen) 3292 cksum_offset -= seglen; 3293 else 3294 cksum_offset = 0; 3295 low += seglen; 3296 len -= seglen; 3297 cum_len = cum_len_next; 3298 req++; 3299 req->flags = 0; 3300 flags = flags_next; 3301 count++; 3302 rdma_count++; 3303 } 3304 } 3305 tx_req++; 3306 } 3307 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3308 do { 3309 req--; 3310 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3311 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3312 MXGEFW_FLAGS_FIRST))); 3313 3314 myri10ge_submit_req(tx, req_list, count); 3315 done: 3316 mutex_exit(&tx->lock); 3317 freemsg(mp); 3318 return (DDI_SUCCESS); 3319 } 3320 3321 /* 3322 * Try to send the chain of buffers described by the mp. We must not 3323 * encapsulate more than eth->tx.req - eth->tx.done, or 3324 * MXGEFW_MAX_SEND_DESC, whichever is more. 3325 */ 3326 3327 static int 3328 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, 3329 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info) 3330 { 3331 struct myri10ge_priv *mgp = ss->mgp; 3332 myri10ge_tx_ring_t *tx = &ss->tx; 3333 mcp_kreq_ether_send_t *req; 3334 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL; 3335 mblk_t *bp; 3336 ddi_dma_cookie_t cookie; 3337 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen, 3338 rdma_count, cum_len, lso_hdr_size; 3339 uint32_t start, stuff, tx_offload_flags; 3340 uint32_t seglen, len, mss, boundary, low, high_swapped; 3341 uint_t ncookies; 3342 uint16_t pseudo_hdr_offset; 3343 uint8_t flags, cksum_offset, odd_flag; 3344 int pkt_size; 3345 int lso_copy = myri10ge_lso_copy; 3346 try_pullup = 1; 3347 3348 again: 3349 /* Setup checksum offloading, if needed */ 3350 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 3351 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 3352 if (tx_offload_flags & HW_LSO) { 3353 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3354 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) { 3355 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags); 3356 freemsg(mp); 3357 return (DDI_SUCCESS); 3358 } 3359 } else { 3360 max_segs = MXGEFW_MAX_SEND_DESC; 3361 mss = 0; 3362 } 3363 req = req_list; 3364 cksum_offset = 0; 3365 pseudo_hdr_offset = 0; 3366 3367 /* leave an extra slot keep the ring from wrapping */ 3368 avail = tx->mask - (tx->req - tx->done); 3369 3370 /* 3371 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length 3372 * message will need to be pulled up in order to fit. 3373 * Otherwise, we are low on transmit descriptors, it is 3374 * probably better to stall and try again rather than pullup a 3375 * message to fit. 3376 */ 3377 3378 if (avail < max_segs) { 3379 err = EBUSY; 3380 atomic_inc_32(&tx->stall_early); 3381 goto stall; 3382 } 3383 3384 /* find out how long the frame is and how many segments it is */ 3385 count = 0; 3386 odd_flag = 0; 3387 pkt_size = 0; 3388 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 3389 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3390 dblk_t *dbp; 3391 mblen = MBLKL(bp); 3392 if (mblen == 0) { 3393 /* 3394 * we can't simply skip over 0-length mblks 3395 * because the hardware can't deal with them, 3396 * and we could leak them. 3397 */ 3398 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len); 3399 err = EIO; 3400 goto pullup; 3401 } 3402 /* 3403 * There's no advantage to copying most gesballoc 3404 * attached blocks, so disable lso copy in that case 3405 */ 3406 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) { 3407 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) { 3408 lso_copy = 0; 3409 } 3410 } 3411 pkt_size += mblen; 3412 count++; 3413 } 3414 3415 /* Try to pull up excessivly long chains */ 3416 if (count >= max_segs) { 3417 err = myri10ge_pullup(ss, mp); 3418 if (likely(err == DDI_SUCCESS)) { 3419 count = 1; 3420 } else { 3421 if (count < MYRI10GE_MAX_SEND_DESC_TSO) { 3422 /* 3423 * just let the h/w send it, it will be 3424 * inefficient, but us better than dropping 3425 */ 3426 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3427 } else { 3428 /* drop it */ 3429 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3430 freemsg(mp); 3431 return (0); 3432 } 3433 } 3434 } 3435 3436 cum_len = 0; 3437 maclen = myri10ge_ether_parse_header(mp); 3438 3439 if (tx_offload_flags & HCK_PARTIALCKSUM) { 3440 3441 cksum_offset = start + maclen; 3442 pseudo_hdr_offset = htons(stuff + maclen); 3443 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 3444 flags |= MXGEFW_FLAGS_CKSUM; 3445 } 3446 3447 lso_hdr_size = 0; /* -Wunitinialized */ 3448 if (mss) { /* LSO */ 3449 /* this removes any CKSUM flag from before */ 3450 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3451 /* 3452 * parse the headers and set cum_len to a negative 3453 * value to reflect the offset of the TCP payload 3454 */ 3455 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen); 3456 cum_len = -lso_hdr_size; 3457 if ((mss < mgp->tx_boundary) && lso_copy) { 3458 err = myri10ge_tx_tso_copy(ss, mp, req_list, 3459 lso_hdr_size, pkt_size, mss, cksum_offset); 3460 return (err); 3461 } 3462 3463 /* 3464 * for TSO, pseudo_hdr_offset holds mss. The firmware 3465 * figures out where to put the checksum by parsing 3466 * the header. 3467 */ 3468 3469 pseudo_hdr_offset = htons(mss); 3470 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) { 3471 flags |= MXGEFW_FLAGS_SMALL; 3472 if (pkt_size < myri10ge_tx_copylen) { 3473 req->cksum_offset = cksum_offset; 3474 req->pseudo_hdr_offset = pseudo_hdr_offset; 3475 req->flags = flags; 3476 err = myri10ge_tx_copy(ss, mp, req); 3477 return (err); 3478 } 3479 cum_len = 0; 3480 } 3481 3482 /* pull one DMA handle for each bp from our freelist */ 3483 handles = NULL; 3484 err = myri10ge_alloc_tx_handles(ss, count, &handles); 3485 if (err != DDI_SUCCESS) { 3486 err = DDI_FAILURE; 3487 goto stall; 3488 } 3489 count = 0; 3490 rdma_count = 0; 3491 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3492 mblen = MBLKL(bp); 3493 dma_handle = handles; 3494 handles = handles->next; 3495 3496 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL, 3497 (caddr_t)bp->b_rptr, mblen, 3498 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL, 3499 &cookie, &ncookies); 3500 if (unlikely(rv != DDI_DMA_MAPPED)) { 3501 err = EIO; 3502 try_pullup = 0; 3503 dma_handle->next = handles; 3504 handles = dma_handle; 3505 goto abort_with_handles; 3506 } 3507 3508 /* reserve the slot */ 3509 tx_info[count].m = bp; 3510 tx_info[count].handle = dma_handle; 3511 3512 for (; ; ) { 3513 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 3514 high_swapped = 3515 htonl(MYRI10GE_HIGHPART_TO_U32( 3516 cookie.dmac_laddress)); 3517 len = (uint32_t)cookie.dmac_size; 3518 while (len) { 3519 uint8_t flags_next; 3520 int cum_len_next; 3521 3522 boundary = (low + mgp->tx_boundary) & 3523 ~(mgp->tx_boundary - 1); 3524 seglen = boundary - low; 3525 if (seglen > len) 3526 seglen = len; 3527 3528 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3529 cum_len_next = cum_len + seglen; 3530 if (mss) { 3531 (req-rdma_count)->rdma_count = 3532 rdma_count + 1; 3533 if (likely(cum_len >= 0)) { 3534 /* payload */ 3535 int next_is_first, chop; 3536 3537 chop = (cum_len_next > mss); 3538 cum_len_next = 3539 cum_len_next % mss; 3540 next_is_first = 3541 (cum_len_next == 0); 3542 flags |= chop * 3543 MXGEFW_FLAGS_TSO_CHOP; 3544 flags_next |= next_is_first * 3545 MXGEFW_FLAGS_FIRST; 3546 rdma_count |= 3547 -(chop | next_is_first); 3548 rdma_count += 3549 chop & !next_is_first; 3550 } else if (likely(cum_len_next >= 0)) { 3551 /* header ends */ 3552 int small; 3553 3554 rdma_count = -1; 3555 cum_len_next = 0; 3556 seglen = -cum_len; 3557 small = (mss <= 3558 MXGEFW_SEND_SMALL_SIZE); 3559 flags_next = 3560 MXGEFW_FLAGS_TSO_PLD 3561 | MXGEFW_FLAGS_FIRST 3562 | (small * 3563 MXGEFW_FLAGS_SMALL); 3564 } 3565 } 3566 req->addr_high = high_swapped; 3567 req->addr_low = htonl(low); 3568 req->pseudo_hdr_offset = pseudo_hdr_offset; 3569 req->pad = 0; /* complete solid 16-byte block */ 3570 req->rdma_count = 1; 3571 req->cksum_offset = cksum_offset; 3572 req->length = htons(seglen); 3573 req->flags = flags | ((cum_len & 1) * odd_flag); 3574 if (cksum_offset > seglen) 3575 cksum_offset -= seglen; 3576 else 3577 cksum_offset = 0; 3578 low += seglen; 3579 len -= seglen; 3580 cum_len = cum_len_next; 3581 count++; 3582 rdma_count++; 3583 /* make sure all the segments will fit */ 3584 if (unlikely(count >= max_segs)) { 3585 MYRI10GE_ATOMIC_SLICE_STAT_INC( 3586 xmit_lowbuf); 3587 /* may try a pullup */ 3588 err = EBUSY; 3589 if (try_pullup) 3590 try_pullup = 2; 3591 goto abort_with_handles; 3592 } 3593 req++; 3594 req->flags = 0; 3595 flags = flags_next; 3596 tx_info[count].m = 0; 3597 } 3598 ncookies--; 3599 if (ncookies == 0) 3600 break; 3601 ddi_dma_nextcookie(dma_handle->h, &cookie); 3602 } 3603 } 3604 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3605 3606 if (mss) { 3607 do { 3608 req--; 3609 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3610 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3611 MXGEFW_FLAGS_FIRST))); 3612 } 3613 3614 /* calculate tx stats */ 3615 if (mss) { 3616 uint16_t opackets; 3617 int payload; 3618 3619 payload = pkt_size - lso_hdr_size; 3620 opackets = (payload / mss) + ((payload % mss) != 0); 3621 tx_info[0].stat.un.all = 0; 3622 tx_info[0].ostat.opackets = opackets; 3623 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size 3624 + pkt_size; 3625 } else { 3626 myri10ge_tx_stat(&tx_info[0].stat, 3627 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size); 3628 } 3629 mutex_enter(&tx->lock); 3630 3631 /* check to see if the slots are really there */ 3632 avail = tx->mask - (tx->req - tx->done); 3633 if (unlikely(avail <= count)) { 3634 mutex_exit(&tx->lock); 3635 err = 0; 3636 goto late_stall; 3637 } 3638 3639 myri10ge_send_locked(tx, req_list, tx_info, count); 3640 mutex_exit(&tx->lock); 3641 return (DDI_SUCCESS); 3642 3643 late_stall: 3644 try_pullup = 0; 3645 atomic_inc_32(&tx->stall_late); 3646 3647 abort_with_handles: 3648 /* unbind and free handles from previous mblks */ 3649 for (i = 0; i < count; i++) { 3650 bp = tx_info[i].m; 3651 tx_info[i].m = 0; 3652 if (bp) { 3653 dma_handle = tx_info[i].handle; 3654 (void) ddi_dma_unbind_handle(dma_handle->h); 3655 dma_handle->next = handles; 3656 handles = dma_handle; 3657 tx_info[i].handle = NULL; 3658 tx_info[i].m = NULL; 3659 } 3660 } 3661 myri10ge_free_tx_handle_slist(tx, handles); 3662 pullup: 3663 if (try_pullup) { 3664 err = myri10ge_pullup(ss, mp); 3665 if (err != DDI_SUCCESS && try_pullup == 2) { 3666 /* drop */ 3667 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3668 freemsg(mp); 3669 return (0); 3670 } 3671 try_pullup = 0; 3672 goto again; 3673 } 3674 3675 stall: 3676 if (err != 0) { 3677 if (err == EBUSY) { 3678 atomic_inc_32(&tx->stall); 3679 } else { 3680 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3681 } 3682 } 3683 return (err); 3684 } 3685 3686 static mblk_t * 3687 myri10ge_send_wrapper(void *arg, mblk_t *mp) 3688 { 3689 struct myri10ge_slice_state *ss = arg; 3690 int err = 0; 3691 mcp_kreq_ether_send_t *req_list; 3692 #if defined(__i386) 3693 /* 3694 * We need about 2.5KB of scratch space to handle transmits. 3695 * i86pc has only 8KB of kernel stack space, so we malloc the 3696 * scratch space there rather than keeping it on the stack. 3697 */ 3698 size_t req_size, tx_info_size; 3699 struct myri10ge_tx_buffer_state *tx_info; 3700 caddr_t req_bytes; 3701 3702 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3703 + 8; 3704 req_bytes = kmem_alloc(req_size, KM_SLEEP); 3705 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1); 3706 tx_info = kmem_alloc(tx_info_size, KM_SLEEP); 3707 #else 3708 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3709 + 8]; 3710 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1]; 3711 #endif 3712 3713 /* ensure req_list entries are aligned to 8 bytes */ 3714 req_list = (struct mcp_kreq_ether_send *) 3715 (((unsigned long)req_bytes + 7UL) & ~7UL); 3716 3717 err = myri10ge_send(ss, mp, req_list, tx_info); 3718 3719 #if defined(__i386) 3720 kmem_free(tx_info, tx_info_size); 3721 kmem_free(req_bytes, req_size); 3722 #endif 3723 if (err) 3724 return (mp); 3725 else 3726 return (NULL); 3727 } 3728 3729 static int 3730 myri10ge_addmac(void *arg, const uint8_t *mac_addr) 3731 { 3732 struct myri10ge_priv *mgp = arg; 3733 int err; 3734 3735 if (mac_addr == NULL) 3736 return (EINVAL); 3737 3738 mutex_enter(&mgp->intrlock); 3739 if (mgp->macaddr_cnt) { 3740 mutex_exit(&mgp->intrlock); 3741 return (ENOSPC); 3742 } 3743 err = myri10ge_m_unicst(mgp, mac_addr); 3744 if (!err) 3745 mgp->macaddr_cnt++; 3746 3747 mutex_exit(&mgp->intrlock); 3748 if (err) 3749 return (err); 3750 3751 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr)); 3752 return (0); 3753 } 3754 3755 /*ARGSUSED*/ 3756 static int 3757 myri10ge_remmac(void *arg, const uint8_t *mac_addr) 3758 { 3759 struct myri10ge_priv *mgp = arg; 3760 3761 mutex_enter(&mgp->intrlock); 3762 mgp->macaddr_cnt--; 3763 mutex_exit(&mgp->intrlock); 3764 3765 return (0); 3766 } 3767 3768 /*ARGSUSED*/ 3769 static void 3770 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index, 3771 mac_group_info_t *infop, mac_group_handle_t gh) 3772 { 3773 struct myri10ge_priv *mgp = arg; 3774 3775 if (rtype != MAC_RING_TYPE_RX) 3776 return; 3777 3778 infop->mgi_driver = (mac_group_driver_t)mgp; 3779 infop->mgi_start = NULL; 3780 infop->mgi_stop = NULL; 3781 infop->mgi_addmac = myri10ge_addmac; 3782 infop->mgi_remmac = myri10ge_remmac; 3783 infop->mgi_count = mgp->num_slices; 3784 } 3785 3786 static int 3787 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) 3788 { 3789 struct myri10ge_slice_state *ss; 3790 3791 ss = (struct myri10ge_slice_state *)rh; 3792 mutex_enter(&ss->rx_lock); 3793 ss->rx_gen_num = mr_gen_num; 3794 mutex_exit(&ss->rx_lock); 3795 return (0); 3796 } 3797 3798 /* 3799 * Retrieve a value for one of the statistics for a particular rx ring 3800 */ 3801 int 3802 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3803 { 3804 struct myri10ge_slice_state *ss; 3805 3806 ss = (struct myri10ge_slice_state *)rh; 3807 switch (stat) { 3808 case MAC_STAT_RBYTES: 3809 *val = ss->rx_stats.ibytes; 3810 break; 3811 3812 case MAC_STAT_IPACKETS: 3813 *val = ss->rx_stats.ipackets; 3814 break; 3815 3816 default: 3817 *val = 0; 3818 return (ENOTSUP); 3819 } 3820 3821 return (0); 3822 } 3823 3824 /* 3825 * Retrieve a value for one of the statistics for a particular tx ring 3826 */ 3827 int 3828 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3829 { 3830 struct myri10ge_slice_state *ss; 3831 3832 ss = (struct myri10ge_slice_state *)rh; 3833 switch (stat) { 3834 case MAC_STAT_OBYTES: 3835 *val = ss->tx.stats.obytes; 3836 break; 3837 3838 case MAC_STAT_OPACKETS: 3839 *val = ss->tx.stats.opackets; 3840 break; 3841 3842 default: 3843 *val = 0; 3844 return (ENOTSUP); 3845 } 3846 3847 return (0); 3848 } 3849 3850 static int 3851 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) 3852 { 3853 struct myri10ge_slice_state *ss; 3854 3855 ss = (struct myri10ge_slice_state *)intrh; 3856 mutex_enter(&ss->poll_lock); 3857 ss->rx_polling = B_TRUE; 3858 mutex_exit(&ss->poll_lock); 3859 return (0); 3860 } 3861 3862 static int 3863 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh) 3864 { 3865 struct myri10ge_slice_state *ss; 3866 3867 ss = (struct myri10ge_slice_state *)intrh; 3868 mutex_enter(&ss->poll_lock); 3869 ss->rx_polling = B_FALSE; 3870 if (ss->rx_token) { 3871 *ss->irq_claim = BE_32(3); 3872 ss->rx_token = 0; 3873 } 3874 mutex_exit(&ss->poll_lock); 3875 return (0); 3876 } 3877 3878 /*ARGSUSED*/ 3879 static void 3880 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 3881 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 3882 { 3883 struct myri10ge_priv *mgp = arg; 3884 struct myri10ge_slice_state *ss; 3885 mac_intr_t *mintr = &infop->mri_intr; 3886 3887 ASSERT((unsigned int)ring_index < mgp->num_slices); 3888 3889 ss = &mgp->ss[ring_index]; 3890 switch (rtype) { 3891 case MAC_RING_TYPE_RX: 3892 ss->rx_rh = rh; 3893 infop->mri_driver = (mac_ring_driver_t)ss; 3894 infop->mri_start = myri10ge_ring_start; 3895 infop->mri_stop = NULL; 3896 infop->mri_poll = myri10ge_poll_rx; 3897 infop->mri_stat = myri10ge_rx_ring_stat; 3898 mintr->mi_handle = (mac_intr_handle_t)ss; 3899 mintr->mi_enable = myri10ge_rx_ring_intr_enable; 3900 mintr->mi_disable = myri10ge_rx_ring_intr_disable; 3901 break; 3902 case MAC_RING_TYPE_TX: 3903 ss->tx.rh = rh; 3904 infop->mri_driver = (mac_ring_driver_t)ss; 3905 infop->mri_start = NULL; 3906 infop->mri_stop = NULL; 3907 infop->mri_tx = myri10ge_send_wrapper; 3908 infop->mri_stat = myri10ge_tx_ring_stat; 3909 break; 3910 default: 3911 break; 3912 } 3913 } 3914 3915 static void 3916 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp) 3917 { 3918 if (mgp->ksp_stat == NULL) 3919 return; 3920 3921 kstat_delete(mgp->ksp_stat); 3922 mgp->ksp_stat = NULL; 3923 } 3924 3925 static void 3926 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss) 3927 { 3928 if (ss->ksp_stat == NULL) 3929 return; 3930 3931 kstat_delete(ss->ksp_stat); 3932 ss->ksp_stat = NULL; 3933 } 3934 3935 static void 3936 myri10ge_info_destroy(struct myri10ge_priv *mgp) 3937 { 3938 if (mgp->ksp_info == NULL) 3939 return; 3940 3941 kstat_delete(mgp->ksp_info); 3942 mgp->ksp_info = NULL; 3943 } 3944 3945 static int 3946 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw) 3947 { 3948 struct myri10ge_nic_stat *ethstat; 3949 struct myri10ge_priv *mgp; 3950 mcp_irq_data_t *fw_stats; 3951 3952 3953 if (rw == KSTAT_WRITE) 3954 return (EACCES); 3955 3956 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data; 3957 mgp = (struct myri10ge_priv *)ksp->ks_private; 3958 fw_stats = mgp->ss[0].fw_stats; 3959 3960 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma; 3961 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma; 3962 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma; 3963 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) 3964 ethstat->dma_force_physical.value.ul = 1; 3965 else 3966 ethstat->dma_force_physical.value.ul = 0; 3967 ethstat->lanes.value.ul = mgp->pcie_link_width; 3968 ethstat->dropped_bad_crc32.value.ul = 3969 ntohl(fw_stats->dropped_bad_crc32); 3970 ethstat->dropped_bad_phy.value.ul = 3971 ntohl(fw_stats->dropped_bad_phy); 3972 ethstat->dropped_link_error_or_filtered.value.ul = 3973 ntohl(fw_stats->dropped_link_error_or_filtered); 3974 ethstat->dropped_link_overflow.value.ul = 3975 ntohl(fw_stats->dropped_link_overflow); 3976 ethstat->dropped_multicast_filtered.value.ul = 3977 ntohl(fw_stats->dropped_multicast_filtered); 3978 ethstat->dropped_no_big_buffer.value.ul = 3979 ntohl(fw_stats->dropped_no_big_buffer); 3980 ethstat->dropped_no_small_buffer.value.ul = 3981 ntohl(fw_stats->dropped_no_small_buffer); 3982 ethstat->dropped_overrun.value.ul = 3983 ntohl(fw_stats->dropped_overrun); 3984 ethstat->dropped_pause.value.ul = 3985 ntohl(fw_stats->dropped_pause); 3986 ethstat->dropped_runt.value.ul = 3987 ntohl(fw_stats->dropped_runt); 3988 ethstat->link_up.value.ul = 3989 ntohl(fw_stats->link_up); 3990 ethstat->dropped_unicast_filtered.value.ul = 3991 ntohl(fw_stats->dropped_unicast_filtered); 3992 return (0); 3993 } 3994 3995 static int 3996 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw) 3997 { 3998 struct myri10ge_slice_stat *ethstat; 3999 struct myri10ge_slice_state *ss; 4000 4001 if (rw == KSTAT_WRITE) 4002 return (EACCES); 4003 4004 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data; 4005 ss = (struct myri10ge_slice_state *)ksp->ks_private; 4006 4007 ethstat->rx_big.value.ul = ss->j_rx_cnt; 4008 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt; 4009 ethstat->rx_bigbuf_pool.value.ul = 4010 ss->jpool.num_alloc - ss->jbufs_for_smalls; 4011 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls; 4012 ethstat->rx_small.value.ul = ss->rx_small.cnt - 4013 (ss->rx_small.mask + 1); 4014 ethstat->tx_done.value.ul = ss->tx.done; 4015 ethstat->tx_req.value.ul = ss->tx.req; 4016 ethstat->tx_activate.value.ul = ss->tx.activate; 4017 ethstat->xmit_sched.value.ul = ss->tx.sched; 4018 ethstat->xmit_stall.value.ul = ss->tx.stall; 4019 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early; 4020 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late; 4021 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err); 4022 return (0); 4023 } 4024 4025 static int 4026 myri10ge_info_kstat_update(kstat_t *ksp, int rw) 4027 { 4028 struct myri10ge_info *info; 4029 struct myri10ge_priv *mgp; 4030 4031 4032 if (rw == KSTAT_WRITE) 4033 return (EACCES); 4034 4035 info = (struct myri10ge_info *)ksp->ks_data; 4036 mgp = (struct myri10ge_priv *)ksp->ks_private; 4037 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR); 4038 kstat_named_setstr(&info->firmware_version, mgp->fw_version); 4039 kstat_named_setstr(&info->firmware_name, mgp->fw_name); 4040 kstat_named_setstr(&info->interrupt_type, mgp->intr_type); 4041 kstat_named_setstr(&info->product_code, mgp->pc_str); 4042 kstat_named_setstr(&info->serial_number, mgp->sn_str); 4043 return (0); 4044 } 4045 4046 static struct myri10ge_info myri10ge_info_template = { 4047 { "driver_version", KSTAT_DATA_STRING }, 4048 { "firmware_version", KSTAT_DATA_STRING }, 4049 { "firmware_name", KSTAT_DATA_STRING }, 4050 { "interrupt_type", KSTAT_DATA_STRING }, 4051 { "product_code", KSTAT_DATA_STRING }, 4052 { "serial_number", KSTAT_DATA_STRING }, 4053 }; 4054 static kmutex_t myri10ge_info_template_lock; 4055 4056 4057 static int 4058 myri10ge_info_init(struct myri10ge_priv *mgp) 4059 { 4060 struct kstat *ksp; 4061 4062 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4063 "myri10ge_info", "net", KSTAT_TYPE_NAMED, 4064 sizeof (myri10ge_info_template) / 4065 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 4066 if (ksp == NULL) { 4067 cmn_err(CE_WARN, 4068 "%s: myri10ge_info_init: kstat_create failed", mgp->name); 4069 return (DDI_FAILURE); 4070 } 4071 mgp->ksp_info = ksp; 4072 ksp->ks_update = myri10ge_info_kstat_update; 4073 ksp->ks_private = (void *) mgp; 4074 ksp->ks_data = &myri10ge_info_template; 4075 ksp->ks_lock = &myri10ge_info_template_lock; 4076 if (MYRI10GE_VERSION_STR != NULL) 4077 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1; 4078 if (mgp->fw_version != NULL) 4079 ksp->ks_data_size += strlen(mgp->fw_version) + 1; 4080 ksp->ks_data_size += strlen(mgp->fw_name) + 1; 4081 ksp->ks_data_size += strlen(mgp->intr_type) + 1; 4082 if (mgp->pc_str != NULL) 4083 ksp->ks_data_size += strlen(mgp->pc_str) + 1; 4084 if (mgp->sn_str != NULL) 4085 ksp->ks_data_size += strlen(mgp->sn_str) + 1; 4086 4087 kstat_install(ksp); 4088 return (DDI_SUCCESS); 4089 } 4090 4091 4092 static int 4093 myri10ge_nic_stat_init(struct myri10ge_priv *mgp) 4094 { 4095 struct kstat *ksp; 4096 struct myri10ge_nic_stat *ethstat; 4097 4098 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4099 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED, 4100 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4101 if (ksp == NULL) { 4102 cmn_err(CE_WARN, 4103 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4104 return (DDI_FAILURE); 4105 } 4106 mgp->ksp_stat = ksp; 4107 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data); 4108 4109 kstat_named_init(ðstat->dma_read_bw_MBs, 4110 "dma_read_bw_MBs", KSTAT_DATA_ULONG); 4111 kstat_named_init(ðstat->dma_write_bw_MBs, 4112 "dma_write_bw_MBs", KSTAT_DATA_ULONG); 4113 kstat_named_init(ðstat->dma_read_write_bw_MBs, 4114 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG); 4115 kstat_named_init(ðstat->dma_force_physical, 4116 "dma_force_physical", KSTAT_DATA_ULONG); 4117 kstat_named_init(ðstat->lanes, 4118 "lanes", KSTAT_DATA_ULONG); 4119 kstat_named_init(ðstat->dropped_bad_crc32, 4120 "dropped_bad_crc32", KSTAT_DATA_ULONG); 4121 kstat_named_init(ðstat->dropped_bad_phy, 4122 "dropped_bad_phy", KSTAT_DATA_ULONG); 4123 kstat_named_init(ðstat->dropped_link_error_or_filtered, 4124 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG); 4125 kstat_named_init(ðstat->dropped_link_overflow, 4126 "dropped_link_overflow", KSTAT_DATA_ULONG); 4127 kstat_named_init(ðstat->dropped_multicast_filtered, 4128 "dropped_multicast_filtered", KSTAT_DATA_ULONG); 4129 kstat_named_init(ðstat->dropped_no_big_buffer, 4130 "dropped_no_big_buffer", KSTAT_DATA_ULONG); 4131 kstat_named_init(ðstat->dropped_no_small_buffer, 4132 "dropped_no_small_buffer", KSTAT_DATA_ULONG); 4133 kstat_named_init(ðstat->dropped_overrun, 4134 "dropped_overrun", KSTAT_DATA_ULONG); 4135 kstat_named_init(ðstat->dropped_pause, 4136 "dropped_pause", KSTAT_DATA_ULONG); 4137 kstat_named_init(ðstat->dropped_runt, 4138 "dropped_runt", KSTAT_DATA_ULONG); 4139 kstat_named_init(ðstat->dropped_unicast_filtered, 4140 "dropped_unicast_filtered", KSTAT_DATA_ULONG); 4141 kstat_named_init(ðstat->dropped_runt, "dropped_runt", 4142 KSTAT_DATA_ULONG); 4143 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG); 4144 kstat_named_init(ðstat->link_changes, "link_changes", 4145 KSTAT_DATA_ULONG); 4146 ksp->ks_update = myri10ge_nic_stat_kstat_update; 4147 ksp->ks_private = (void *) mgp; 4148 kstat_install(ksp); 4149 return (DDI_SUCCESS); 4150 } 4151 4152 static int 4153 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss) 4154 { 4155 struct myri10ge_priv *mgp = ss->mgp; 4156 struct kstat *ksp; 4157 struct myri10ge_slice_stat *ethstat; 4158 int instance; 4159 4160 /* 4161 * fake an instance so that the same slice numbers from 4162 * different instances do not collide 4163 */ 4164 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss); 4165 ksp = kstat_create("myri10ge", instance, 4166 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED, 4167 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4168 if (ksp == NULL) { 4169 cmn_err(CE_WARN, 4170 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4171 return (DDI_FAILURE); 4172 } 4173 ss->ksp_stat = ksp; 4174 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data); 4175 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum", 4176 KSTAT_DATA_ULONG); 4177 kstat_named_init(ðstat->lro_flushed, "lro_flushed", 4178 KSTAT_DATA_ULONG); 4179 kstat_named_init(ðstat->lro_queued, "lro_queued", 4180 KSTAT_DATA_ULONG); 4181 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware", 4182 KSTAT_DATA_ULONG); 4183 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool", 4184 KSTAT_DATA_ULONG); 4185 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls", 4186 KSTAT_DATA_ULONG); 4187 kstat_named_init(ðstat->rx_copy, "rx_copy", 4188 KSTAT_DATA_ULONG); 4189 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf", 4190 KSTAT_DATA_ULONG); 4191 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf", 4192 KSTAT_DATA_ULONG); 4193 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len", 4194 KSTAT_DATA_ULONG); 4195 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup", 4196 KSTAT_DATA_ULONG); 4197 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first", 4198 KSTAT_DATA_ULONG); 4199 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf", 4200 KSTAT_DATA_ULONG); 4201 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags", 4202 KSTAT_DATA_ULONG); 4203 kstat_named_init(ðstat->xmit_sched, "xmit_sched", 4204 KSTAT_DATA_ULONG); 4205 kstat_named_init(ðstat->xmit_stall, "xmit_stall", 4206 KSTAT_DATA_ULONG); 4207 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early", 4208 KSTAT_DATA_ULONG); 4209 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late", 4210 KSTAT_DATA_ULONG); 4211 kstat_named_init(ðstat->xmit_err, "xmit_err", 4212 KSTAT_DATA_ULONG); 4213 kstat_named_init(ðstat->tx_req, "tx_req", 4214 KSTAT_DATA_ULONG); 4215 kstat_named_init(ðstat->tx_activate, "tx_activate", 4216 KSTAT_DATA_ULONG); 4217 kstat_named_init(ðstat->tx_done, "tx_done", 4218 KSTAT_DATA_ULONG); 4219 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced", 4220 KSTAT_DATA_ULONG); 4221 kstat_named_init(ðstat->rx_big, "rx_big", 4222 KSTAT_DATA_ULONG); 4223 kstat_named_init(ðstat->rx_small, "rx_small", 4224 KSTAT_DATA_ULONG); 4225 ksp->ks_update = myri10ge_slice_stat_kstat_update; 4226 ksp->ks_private = (void *) ss; 4227 kstat_install(ksp); 4228 return (DDI_SUCCESS); 4229 } 4230 4231 4232 4233 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4234 4235 #include <vm/hat.h> 4236 #include <sys/ddi_isa.h> 4237 void *device_arena_alloc(size_t size, int vm_flag); 4238 void device_arena_free(void *vaddr, size_t size); 4239 4240 static void 4241 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4242 { 4243 dev_info_t *parent_dip; 4244 ddi_acc_handle_t handle; 4245 unsigned long bus_number, dev_number, func_number; 4246 unsigned long cfg_pa, paddr, base, pgoffset; 4247 char *cvaddr, *ptr; 4248 uint32_t *ptr32; 4249 int retval = DDI_FAILURE; 4250 int dontcare; 4251 uint16_t read_vid, read_did, vendor_id, device_id; 4252 4253 if (!myri10ge_nvidia_ecrc_enable) 4254 return; 4255 4256 parent_dip = ddi_get_parent(mgp->dip); 4257 if (parent_dip == NULL) { 4258 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name); 4259 return; 4260 } 4261 4262 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) { 4263 cmn_err(CE_WARN, 4264 "%s: Could not access my parent's registers", mgp->name); 4265 return; 4266 } 4267 4268 vendor_id = pci_config_get16(handle, PCI_CONF_VENID); 4269 device_id = pci_config_get16(handle, PCI_CONF_DEVID); 4270 pci_config_teardown(&handle); 4271 4272 if (myri10ge_verbose) { 4273 unsigned long bus_number, dev_number, func_number; 4274 int reg_set, span; 4275 (void) myri10ge_reg_set(parent_dip, ®_set, &span, 4276 &bus_number, &dev_number, &func_number); 4277 if (myri10ge_verbose) 4278 printf("%s: parent at %ld:%ld:%ld\n", mgp->name, 4279 bus_number, dev_number, func_number); 4280 } 4281 4282 if (vendor_id != 0x10de) 4283 return; 4284 4285 if (device_id != 0x005d /* CK804 */ && 4286 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) { 4287 return; 4288 } 4289 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare, 4290 &bus_number, &dev_number, &func_number); 4291 4292 for (cfg_pa = 0xf0000000UL; 4293 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL; 4294 cfg_pa -= 0x10000000UL) { 4295 /* find the config space address for the nvidia bridge */ 4296 paddr = (cfg_pa + bus_number * 0x00100000UL + 4297 (dev_number * 8 + func_number) * 0x00001000UL); 4298 4299 base = paddr & (~MMU_PAGEOFFSET); 4300 pgoffset = paddr & MMU_PAGEOFFSET; 4301 4302 /* map it into the kernel */ 4303 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP); 4304 if (cvaddr == NULL) 4305 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n", 4306 mgp->name); 4307 4308 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1), 4309 i_ddi_paddr_to_pfn(base), 4310 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK); 4311 4312 ptr = cvaddr + pgoffset; 4313 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID); 4314 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID); 4315 if (vendor_id == read_did || device_id == read_did) { 4316 ptr32 = (uint32_t *)(void *)(ptr + 0x178); 4317 if (myri10ge_verbose) 4318 printf("%s: Enabling ECRC on upstream " 4319 "Nvidia bridge (0x%x:0x%x) " 4320 "at %ld:%ld:%ld\n", mgp->name, 4321 read_vid, read_did, bus_number, 4322 dev_number, func_number); 4323 *ptr32 |= 0x40; 4324 retval = DDI_SUCCESS; 4325 } 4326 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK); 4327 device_arena_free(cvaddr, ptob(1)); 4328 } 4329 } 4330 4331 #else 4332 /*ARGSUSED*/ 4333 static void 4334 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4335 { 4336 } 4337 #endif /* i386 */ 4338 4339 4340 /* 4341 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 4342 * when the PCI-E Completion packets are aligned on an 8-byte 4343 * boundary. Some PCI-E chip sets always align Completion packets; on 4344 * the ones that do not, the alignment can be enforced by enabling 4345 * ECRC generation (if supported). 4346 * 4347 * When PCI-E Completion packets are not aligned, it is actually more 4348 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 4349 * 4350 * If the driver can neither enable ECRC nor verify that it has 4351 * already been enabled, then it must use a firmware image which works 4352 * around unaligned completion packets (ethp_z8e.dat), and it should 4353 * also ensure that it never gives the device a Read-DMA which is 4354 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 4355 * enabled, then the driver should use the aligned (eth_z8e.dat) 4356 * firmware image, and set tx.boundary to 4KB. 4357 */ 4358 4359 4360 static int 4361 myri10ge_firmware_probe(struct myri10ge_priv *mgp) 4362 { 4363 int status; 4364 4365 mgp->tx_boundary = 4096; 4366 /* 4367 * Verify the max read request size was set to 4KB 4368 * before trying the test with 4KB. 4369 */ 4370 if (mgp->max_read_request_4k == 0) 4371 mgp->tx_boundary = 2048; 4372 /* 4373 * load the optimized firmware which assumes aligned PCIe 4374 * completions in order to see if it works on this host. 4375 */ 4376 4377 mgp->fw_name = "rss_eth_z8e"; 4378 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4379 mgp->eth_z8e_length = rss_eth_z8e_length; 4380 4381 status = myri10ge_load_firmware(mgp); 4382 if (status != 0) { 4383 return (status); 4384 } 4385 /* 4386 * Enable ECRC if possible 4387 */ 4388 myri10ge_enable_nvidia_ecrc(mgp); 4389 4390 /* 4391 * Run a DMA test which watches for unaligned completions and 4392 * aborts on the first one seen. 4393 */ 4394 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 4395 if (status == 0) 4396 return (0); /* keep the aligned firmware */ 4397 4398 if (status != E2BIG) 4399 cmn_err(CE_WARN, "%s: DMA test failed: %d\n", 4400 mgp->name, status); 4401 if (status == ENOSYS) 4402 cmn_err(CE_WARN, "%s: Falling back to ethp! " 4403 "Please install up to date fw\n", mgp->name); 4404 return (status); 4405 } 4406 4407 static int 4408 myri10ge_select_firmware(struct myri10ge_priv *mgp) 4409 { 4410 int aligned; 4411 4412 aligned = 0; 4413 4414 if (myri10ge_force_firmware == 1) { 4415 if (myri10ge_verbose) 4416 printf("%s: Assuming aligned completions (forced)\n", 4417 mgp->name); 4418 aligned = 1; 4419 goto done; 4420 } 4421 4422 if (myri10ge_force_firmware == 2) { 4423 if (myri10ge_verbose) 4424 printf("%s: Assuming unaligned completions (forced)\n", 4425 mgp->name); 4426 aligned = 0; 4427 goto done; 4428 } 4429 4430 /* If the width is less than 8, we may used the aligned firmware */ 4431 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) { 4432 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n", 4433 mgp->name, mgp->pcie_link_width); 4434 aligned = 1; 4435 goto done; 4436 } 4437 4438 if (0 == myri10ge_firmware_probe(mgp)) 4439 return (0); /* keep optimized firmware */ 4440 4441 done: 4442 if (aligned) { 4443 mgp->fw_name = "rss_eth_z8e"; 4444 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4445 mgp->eth_z8e_length = rss_eth_z8e_length; 4446 mgp->tx_boundary = 4096; 4447 } else { 4448 mgp->fw_name = "rss_ethp_z8e"; 4449 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e; 4450 mgp->eth_z8e_length = rss_ethp_z8e_length; 4451 mgp->tx_boundary = 2048; 4452 } 4453 4454 return (myri10ge_load_firmware(mgp)); 4455 } 4456 4457 static int 4458 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler) 4459 { 4460 dev_info_t *devinfo = mgp->dip; 4461 int count, avail, actual, intr_types; 4462 int x, y, rc, inum = 0; 4463 4464 4465 rc = ddi_intr_get_supported_types(devinfo, &intr_types); 4466 if (rc != DDI_SUCCESS) { 4467 cmn_err(CE_WARN, 4468 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name, 4469 rc); 4470 return (DDI_FAILURE); 4471 } 4472 4473 if (!myri10ge_use_msi) 4474 intr_types &= ~DDI_INTR_TYPE_MSI; 4475 if (!myri10ge_use_msix) 4476 intr_types &= ~DDI_INTR_TYPE_MSIX; 4477 4478 if (intr_types & DDI_INTR_TYPE_MSIX) { 4479 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX; 4480 mgp->intr_type = "MSI-X"; 4481 } else if (intr_types & DDI_INTR_TYPE_MSI) { 4482 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI; 4483 mgp->intr_type = "MSI"; 4484 } else { 4485 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED; 4486 mgp->intr_type = "Legacy"; 4487 } 4488 /* Get number of interrupts */ 4489 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count); 4490 if ((rc != DDI_SUCCESS) || (count == 0)) { 4491 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, " 4492 "count: %d", mgp->name, rc, count); 4493 4494 return (DDI_FAILURE); 4495 } 4496 4497 /* Get number of available interrupts */ 4498 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail); 4499 if ((rc != DDI_SUCCESS) || (avail == 0)) { 4500 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, " 4501 "rc: %d, avail: %d\n", mgp->name, rc, avail); 4502 return (DDI_FAILURE); 4503 } 4504 if (avail < count) { 4505 cmn_err(CE_NOTE, 4506 "!%s: nintrs() returned %d, navail returned %d", 4507 mgp->name, count, avail); 4508 count = avail; 4509 } 4510 4511 if (count < mgp->num_slices) 4512 return (DDI_FAILURE); 4513 4514 if (count > mgp->num_slices) 4515 count = mgp->num_slices; 4516 4517 /* Allocate memory for MSI interrupts */ 4518 mgp->intr_size = count * sizeof (ddi_intr_handle_t); 4519 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP); 4520 4521 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum, 4522 count, &actual, DDI_INTR_ALLOC_NORMAL); 4523 4524 if ((rc != DDI_SUCCESS) || (actual == 0)) { 4525 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d", 4526 mgp->name, rc); 4527 4528 kmem_free(mgp->htable, mgp->intr_size); 4529 mgp->htable = NULL; 4530 return (DDI_FAILURE); 4531 } 4532 4533 if ((actual < count) && myri10ge_verbose) { 4534 cmn_err(CE_NOTE, "%s: got %d/%d slices", 4535 mgp->name, actual, count); 4536 } 4537 4538 mgp->intr_cnt = actual; 4539 4540 /* 4541 * Get priority for first irq, assume remaining are all the same 4542 */ 4543 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri) 4544 != DDI_SUCCESS) { 4545 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name); 4546 4547 /* Free already allocated intr */ 4548 for (y = 0; y < actual; y++) { 4549 (void) ddi_intr_free(mgp->htable[y]); 4550 } 4551 4552 kmem_free(mgp->htable, mgp->intr_size); 4553 mgp->htable = NULL; 4554 return (DDI_FAILURE); 4555 } 4556 4557 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri; 4558 4559 if (!add_handler) 4560 return (DDI_SUCCESS); 4561 4562 /* Call ddi_intr_add_handler() */ 4563 for (x = 0; x < actual; x++) { 4564 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr, 4565 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) { 4566 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed", 4567 mgp->name); 4568 4569 /* Free already allocated intr */ 4570 for (y = 0; y < actual; y++) { 4571 (void) ddi_intr_free(mgp->htable[y]); 4572 } 4573 4574 kmem_free(mgp->htable, mgp->intr_size); 4575 mgp->htable = NULL; 4576 return (DDI_FAILURE); 4577 } 4578 } 4579 4580 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap); 4581 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4582 /* Call ddi_intr_block_enable() for MSI */ 4583 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt); 4584 } else { 4585 /* Call ddi_intr_enable() for MSI non block enable */ 4586 for (x = 0; x < mgp->intr_cnt; x++) { 4587 (void) ddi_intr_enable(mgp->htable[x]); 4588 } 4589 } 4590 4591 return (DDI_SUCCESS); 4592 } 4593 4594 static void 4595 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed) 4596 { 4597 int x, err; 4598 4599 /* Disable all interrupts */ 4600 if (handler_installed) { 4601 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4602 /* Call ddi_intr_block_disable() */ 4603 (void) ddi_intr_block_disable(mgp->htable, 4604 mgp->intr_cnt); 4605 } else { 4606 for (x = 0; x < mgp->intr_cnt; x++) { 4607 (void) ddi_intr_disable(mgp->htable[x]); 4608 } 4609 } 4610 } 4611 4612 for (x = 0; x < mgp->intr_cnt; x++) { 4613 if (handler_installed) { 4614 /* Call ddi_intr_remove_handler() */ 4615 err = ddi_intr_remove_handler(mgp->htable[x]); 4616 if (err != DDI_SUCCESS) { 4617 cmn_err(CE_WARN, 4618 "%s: ddi_intr_remove_handler for" 4619 "vec %d returned %d\n", mgp->name, 4620 x, err); 4621 } 4622 } 4623 err = ddi_intr_free(mgp->htable[x]); 4624 if (err != DDI_SUCCESS) { 4625 cmn_err(CE_WARN, 4626 "%s: ddi_intr_free for vec %d returned %d\n", 4627 mgp->name, x, err); 4628 } 4629 } 4630 kmem_free(mgp->htable, mgp->intr_size); 4631 mgp->htable = NULL; 4632 } 4633 4634 static void 4635 myri10ge_test_physical(dev_info_t *dip) 4636 { 4637 ddi_dma_handle_t handle; 4638 struct myri10ge_dma_stuff dma; 4639 void *addr; 4640 int err; 4641 4642 /* test #1, sufficient for older sparc systems */ 4643 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 4644 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr, 4645 DDI_DMA_DONTWAIT, NULL, &handle); 4646 if (err == DDI_DMA_BADATTR) 4647 goto fail; 4648 ddi_dma_free_handle(&handle); 4649 4650 /* test #2, required on Olympis where the bind is what fails */ 4651 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr, 4652 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 4653 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT); 4654 if (addr == NULL) 4655 goto fail; 4656 myri10ge_dma_free(&dma); 4657 return; 4658 4659 fail: 4660 if (myri10ge_verbose) 4661 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, " 4662 "using IOMMU\n", ddi_get_instance(dip)); 4663 4664 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL; 4665 } 4666 4667 static void 4668 myri10ge_get_props(dev_info_t *dip) 4669 { 4670 4671 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4672 "myri10ge_flow_control", myri10ge_flow_control); 4673 4674 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4675 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay); 4676 4677 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4678 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4679 "myri10ge_nvidia_ecrc_enable", 1); 4680 #endif 4681 4682 4683 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4684 "myri10ge_use_msi", myri10ge_use_msi); 4685 4686 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4687 "myri10ge_deassert_wait", myri10ge_deassert_wait); 4688 4689 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4690 "myri10ge_verbose", myri10ge_verbose); 4691 4692 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4693 "myri10ge_tx_copylen", myri10ge_tx_copylen); 4694 4695 if (myri10ge_tx_copylen < 60) { 4696 cmn_err(CE_WARN, 4697 "myri10ge_tx_copylen must be >= 60 bytes\n"); 4698 myri10ge_tx_copylen = 60; 4699 } 4700 4701 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4702 "myri10ge_mtu_override", myri10ge_mtu_override); 4703 4704 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU && 4705 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU) 4706 myri10ge_mtu = myri10ge_mtu_override + 4707 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ; 4708 else if (myri10ge_mtu_override != 0) { 4709 cmn_err(CE_WARN, 4710 "myri10ge_mtu_override must be between 1500 and " 4711 "9000 bytes\n"); 4712 } 4713 4714 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4715 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial); 4716 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4717 "myri10ge_bigbufs_max", myri10ge_bigbufs_max); 4718 4719 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4720 "myri10ge_watchdog_reset", myri10ge_watchdog_reset); 4721 4722 if (myri10ge_bigbufs_initial < 128) { 4723 cmn_err(CE_WARN, 4724 "myri10ge_bigbufs_initial be at least 128\n"); 4725 myri10ge_bigbufs_initial = 128; 4726 } 4727 if (myri10ge_bigbufs_max < 128) { 4728 cmn_err(CE_WARN, 4729 "myri10ge_bigbufs_max be at least 128\n"); 4730 myri10ge_bigbufs_max = 128; 4731 } 4732 4733 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) { 4734 cmn_err(CE_WARN, 4735 "myri10ge_bigbufs_max must be >= " 4736 "myri10ge_bigbufs_initial\n"); 4737 myri10ge_bigbufs_max = myri10ge_bigbufs_initial; 4738 } 4739 4740 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4741 "myri10ge_force_firmware", myri10ge_force_firmware); 4742 4743 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4744 "myri10ge_max_slices", myri10ge_max_slices); 4745 4746 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4747 "myri10ge_use_msix", myri10ge_use_msix); 4748 4749 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4750 "myri10ge_rss_hash", myri10ge_rss_hash); 4751 4752 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX || 4753 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) { 4754 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n", 4755 myri10ge_rss_hash); 4756 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4757 } 4758 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4759 "myri10ge_lro", myri10ge_lro); 4760 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4761 "myri10ge_lro_cnt", myri10ge_lro_cnt); 4762 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4763 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr); 4764 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4765 "myri10ge_tx_hash", myri10ge_tx_hash); 4766 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4767 "myri10ge_use_lso", myri10ge_use_lso); 4768 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4769 "myri10ge_lso_copy", myri10ge_lso_copy); 4770 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4771 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial); 4772 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4773 "myri10ge_small_bytes", myri10ge_small_bytes); 4774 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) { 4775 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n", 4776 myri10ge_small_bytes); 4777 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n"); 4778 myri10ge_small_bytes += 128; 4779 myri10ge_small_bytes &= ~(128 -1); 4780 myri10ge_small_bytes -= MXGEFW_PAD; 4781 cmn_err(CE_WARN, "rounded up to %d\n", 4782 myri10ge_small_bytes); 4783 4784 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4785 } 4786 } 4787 4788 #ifndef PCI_EXP_LNKSTA 4789 #define PCI_EXP_LNKSTA 18 4790 #endif 4791 4792 static int 4793 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid) 4794 { 4795 uint16_t status; 4796 uint8_t ptr; 4797 4798 /* check to see if we have capabilities */ 4799 status = pci_config_get16(handle, PCI_CONF_STAT); 4800 if (!(status & PCI_STAT_CAP)) { 4801 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n"); 4802 return (ENXIO); 4803 } 4804 4805 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR); 4806 4807 /* Walk the capabilities list, looking for a PCI Express cap */ 4808 while (ptr != PCI_CAP_NEXT_PTR_NULL) { 4809 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid) 4810 break; 4811 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR); 4812 } 4813 if (ptr < 64) { 4814 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr); 4815 return (ENXIO); 4816 } 4817 *capptr = ptr; 4818 return (0); 4819 } 4820 4821 static int 4822 myri10ge_set_max_readreq(ddi_acc_handle_t handle) 4823 { 4824 int err; 4825 uint16_t val; 4826 uint8_t ptr; 4827 4828 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4829 if (err != 0) { 4830 cmn_err(CE_WARN, "could not find PCIe cap\n"); 4831 return (ENXIO); 4832 } 4833 4834 /* set max read req to 4096 */ 4835 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4836 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) | 4837 PCIE_DEVCTL_MAX_READ_REQ_4096; 4838 pci_config_put16(handle, ptr + PCIE_DEVCTL, val); 4839 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4840 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) != 4841 PCIE_DEVCTL_MAX_READ_REQ_4096) { 4842 cmn_err(CE_WARN, "could not set max read req (%x)\n", val); 4843 return (EINVAL); 4844 } 4845 return (0); 4846 } 4847 4848 static int 4849 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link) 4850 { 4851 int err; 4852 uint16_t val; 4853 uint8_t ptr; 4854 4855 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4856 if (err != 0) { 4857 cmn_err(CE_WARN, "could not set max read req\n"); 4858 return (ENXIO); 4859 } 4860 4861 /* read link width */ 4862 val = pci_config_get16(handle, ptr + PCIE_LINKSTS); 4863 val &= PCIE_LINKSTS_NEG_WIDTH_MASK; 4864 *link = (val >> 4); 4865 return (0); 4866 } 4867 4868 static int 4869 myri10ge_reset_nic(struct myri10ge_priv *mgp) 4870 { 4871 ddi_acc_handle_t handle = mgp->cfg_hdl; 4872 uint32_t reboot; 4873 uint16_t cmd; 4874 int err; 4875 4876 cmd = pci_config_get16(handle, PCI_CONF_COMM); 4877 if ((cmd & PCI_COMM_ME) == 0) { 4878 /* 4879 * Bus master DMA disabled? Check to see if the card 4880 * rebooted due to a parity error For now, just report 4881 * it 4882 */ 4883 4884 /* enter read32 mode */ 4885 pci_config_put8(handle, mgp->vso + 0x10, 0x3); 4886 /* read REBOOT_STATUS (0xfffffff0) */ 4887 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0); 4888 reboot = pci_config_get16(handle, mgp->vso + 0x14); 4889 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot); 4890 return (0); 4891 } 4892 if (!myri10ge_watchdog_reset) { 4893 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name); 4894 return (1); 4895 } 4896 4897 myri10ge_stop_locked(mgp); 4898 err = myri10ge_start_locked(mgp); 4899 if (err == DDI_FAILURE) { 4900 return (0); 4901 } 4902 mac_tx_update(mgp->mh); 4903 return (1); 4904 } 4905 4906 static inline int 4907 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx) 4908 { 4909 if (tx->sched != tx->stall && 4910 tx->done == tx->watchdog_done && 4911 tx->watchdog_req != tx->watchdog_done) 4912 return (1); 4913 return (0); 4914 } 4915 4916 static void 4917 myri10ge_watchdog(void *arg) 4918 { 4919 struct myri10ge_priv *mgp; 4920 struct myri10ge_slice_state *ss; 4921 myri10ge_tx_ring_t *tx; 4922 int nic_ok = 1; 4923 int slices_stalled, rx_pause, i; 4924 int add_rx; 4925 4926 mgp = arg; 4927 mutex_enter(&mgp->intrlock); 4928 if (mgp->running != MYRI10GE_ETH_RUNNING) { 4929 cmn_err(CE_WARN, 4930 "%s not running, not rearming watchdog (%d)\n", 4931 mgp->name, mgp->running); 4932 mutex_exit(&mgp->intrlock); 4933 return; 4934 } 4935 4936 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause); 4937 4938 /* 4939 * make sure nic is stalled before we reset the nic, so as to 4940 * ensure we don't rip the transmit data structures out from 4941 * under a pending transmit 4942 */ 4943 4944 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) { 4945 tx = &mgp->ss[i].tx; 4946 slices_stalled = myri10ge_ring_stalled(tx); 4947 if (slices_stalled) 4948 break; 4949 } 4950 4951 if (slices_stalled) { 4952 if (mgp->watchdog_rx_pause == rx_pause) { 4953 cmn_err(CE_WARN, 4954 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)", 4955 mgp->name, i, tx->sched, tx->stall, 4956 tx->done, tx->watchdog_done, tx->req, tx->pkt_done, 4957 (int)ntohl(mgp->ss[i].fw_stats->send_done_count)); 4958 nic_ok = myri10ge_reset_nic(mgp); 4959 } else { 4960 cmn_err(CE_WARN, 4961 "%s Flow controlled, check link partner\n", 4962 mgp->name); 4963 } 4964 } 4965 4966 if (!nic_ok) { 4967 cmn_err(CE_WARN, 4968 "%s Nic dead, not rearming watchdog\n", mgp->name); 4969 mutex_exit(&mgp->intrlock); 4970 return; 4971 } 4972 for (i = 0; i < mgp->num_slices; i++) { 4973 ss = &mgp->ss[i]; 4974 tx = &ss->tx; 4975 tx->watchdog_done = tx->done; 4976 tx->watchdog_req = tx->req; 4977 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) { 4978 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy); 4979 add_rx = 4980 min(ss->jpool.num_alloc, 4981 myri10ge_bigbufs_max - 4982 (ss->jpool.num_alloc - 4983 ss->jbufs_for_smalls)); 4984 if (add_rx != 0) { 4985 (void) myri10ge_add_jbufs(ss, add_rx, 0); 4986 /* now feed them to the firmware */ 4987 mutex_enter(&ss->jpool.mtx); 4988 myri10ge_restock_jumbos(ss); 4989 mutex_exit(&ss->jpool.mtx); 4990 } 4991 } 4992 } 4993 mgp->watchdog_rx_pause = rx_pause; 4994 4995 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 4996 mgp->timer_ticks); 4997 mutex_exit(&mgp->intrlock); 4998 } 4999 5000 /*ARGSUSED*/ 5001 static int 5002 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5003 { 5004 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5005 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay); 5006 return (0); 5007 } 5008 5009 /*ARGSUSED*/ 5010 static int 5011 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value, 5012 caddr_t cp, cred_t *credp) 5013 { 5014 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5015 char *end; 5016 size_t new_value; 5017 5018 new_value = mi_strtol(value, &end, 10); 5019 if (end == value) 5020 return (EINVAL); 5021 5022 mutex_enter(&myri10ge_param_lock); 5023 mgp->intr_coal_delay = (int)new_value; 5024 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 5025 mutex_exit(&myri10ge_param_lock); 5026 return (0); 5027 } 5028 5029 /*ARGSUSED*/ 5030 static int 5031 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5032 { 5033 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5034 (void) mi_mpprintf(mp, "%d", mgp->pause); 5035 return (0); 5036 } 5037 5038 /*ARGSUSED*/ 5039 static int 5040 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value, 5041 caddr_t cp, cred_t *credp) 5042 { 5043 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5044 char *end; 5045 size_t new_value; 5046 int err = 0; 5047 5048 new_value = mi_strtol(value, &end, 10); 5049 if (end == value) 5050 return (EINVAL); 5051 if (new_value != 0) 5052 new_value = 1; 5053 5054 mutex_enter(&myri10ge_param_lock); 5055 if (new_value != mgp->pause) 5056 err = myri10ge_change_pause(mgp, new_value); 5057 mutex_exit(&myri10ge_param_lock); 5058 return (err); 5059 } 5060 5061 /*ARGSUSED*/ 5062 static int 5063 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5064 { 5065 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp); 5066 return (0); 5067 } 5068 5069 /*ARGSUSED*/ 5070 static int 5071 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value, 5072 caddr_t cp, cred_t *credp) 5073 { 5074 char *end; 5075 size_t new_value; 5076 5077 new_value = mi_strtol(value, &end, 10); 5078 if (end == value) 5079 return (EINVAL); 5080 *(int *)(void *)cp = new_value; 5081 5082 return (0); 5083 } 5084 5085 static void 5086 myri10ge_ndd_init(struct myri10ge_priv *mgp) 5087 { 5088 mgp->nd_head = NULL; 5089 5090 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay", 5091 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp); 5092 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control", 5093 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp); 5094 (void) nd_load(&mgp->nd_head, "myri10ge_verbose", 5095 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose); 5096 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait", 5097 myri10ge_get_int, myri10ge_set_int, 5098 (caddr_t)&myri10ge_deassert_wait); 5099 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max", 5100 myri10ge_get_int, myri10ge_set_int, 5101 (caddr_t)&myri10ge_bigbufs_max); 5102 (void) nd_load(&mgp->nd_head, "myri10ge_lro", 5103 myri10ge_get_int, myri10ge_set_int, 5104 (caddr_t)&myri10ge_lro); 5105 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr", 5106 myri10ge_get_int, myri10ge_set_int, 5107 (caddr_t)&myri10ge_lro_max_aggr); 5108 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash", 5109 myri10ge_get_int, myri10ge_set_int, 5110 (caddr_t)&myri10ge_tx_hash); 5111 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy", 5112 myri10ge_get_int, myri10ge_set_int, 5113 (caddr_t)&myri10ge_lso_copy); 5114 } 5115 5116 static void 5117 myri10ge_ndd_fini(struct myri10ge_priv *mgp) 5118 { 5119 nd_free(&mgp->nd_head); 5120 } 5121 5122 static void 5123 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) 5124 { 5125 struct iocblk *iocp; 5126 struct myri10ge_priv *mgp = arg; 5127 int cmd, ok, err; 5128 5129 iocp = (struct iocblk *)(void *)mp->b_rptr; 5130 cmd = iocp->ioc_cmd; 5131 5132 ok = 0; 5133 err = 0; 5134 5135 switch (cmd) { 5136 case ND_GET: 5137 case ND_SET: 5138 ok = nd_getset(wq, mgp->nd_head, mp); 5139 break; 5140 default: 5141 break; 5142 } 5143 if (!ok) 5144 err = EINVAL; 5145 else 5146 err = iocp->ioc_error; 5147 5148 if (!err) 5149 miocack(wq, mp, iocp->ioc_count, err); 5150 else 5151 miocnak(wq, mp, 0, err); 5152 } 5153 5154 static struct myri10ge_priv *mgp_list; 5155 5156 struct myri10ge_priv * 5157 myri10ge_get_instance(uint_t unit) 5158 { 5159 struct myri10ge_priv *mgp; 5160 5161 mutex_enter(&myri10ge_param_lock); 5162 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) { 5163 if (unit == ddi_get_instance(mgp->dip)) { 5164 mgp->refcnt++; 5165 break; 5166 } 5167 } 5168 mutex_exit(&myri10ge_param_lock); 5169 return (mgp); 5170 } 5171 5172 void 5173 myri10ge_put_instance(struct myri10ge_priv *mgp) 5174 { 5175 mutex_enter(&myri10ge_param_lock); 5176 mgp->refcnt--; 5177 mutex_exit(&myri10ge_param_lock); 5178 } 5179 5180 static boolean_t 5181 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 5182 { 5183 struct myri10ge_priv *mgp = arg; 5184 uint32_t *cap_hcksum; 5185 mac_capab_lso_t *cap_lso; 5186 mac_capab_rings_t *cap_rings; 5187 5188 switch (cap) { 5189 case MAC_CAPAB_HCKSUM: 5190 cap_hcksum = cap_data; 5191 *cap_hcksum = HCKSUM_INET_PARTIAL; 5192 break; 5193 case MAC_CAPAB_RINGS: 5194 cap_rings = cap_data; 5195 switch (cap_rings->mr_type) { 5196 case MAC_RING_TYPE_RX: 5197 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5198 cap_rings->mr_rnum = mgp->num_slices; 5199 cap_rings->mr_gnum = 1; 5200 cap_rings->mr_rget = myri10ge_fill_ring; 5201 cap_rings->mr_gget = myri10ge_fill_group; 5202 break; 5203 case MAC_RING_TYPE_TX: 5204 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5205 cap_rings->mr_rnum = mgp->num_slices; 5206 cap_rings->mr_gnum = 0; 5207 cap_rings->mr_rget = myri10ge_fill_ring; 5208 cap_rings->mr_gget = NULL; 5209 break; 5210 default: 5211 return (B_FALSE); 5212 } 5213 break; 5214 case MAC_CAPAB_LSO: 5215 cap_lso = cap_data; 5216 if (!myri10ge_use_lso) 5217 return (B_FALSE); 5218 if (!(mgp->features & MYRI10GE_TSO)) 5219 return (B_FALSE); 5220 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; 5221 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1; 5222 break; 5223 5224 default: 5225 return (B_FALSE); 5226 } 5227 return (B_TRUE); 5228 } 5229 5230 5231 static int 5232 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val) 5233 { 5234 struct myri10ge_priv *mgp = arg; 5235 struct myri10ge_rx_ring_stats *rstat; 5236 struct myri10ge_tx_ring_stats *tstat; 5237 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats; 5238 struct myri10ge_slice_state *ss; 5239 uint64_t tmp = 0; 5240 int i; 5241 5242 switch (stat) { 5243 case MAC_STAT_IFSPEED: 5244 *val = 10ull * 1000ull * 1000000ull; 5245 break; 5246 5247 case MAC_STAT_MULTIRCV: 5248 for (i = 0; i < mgp->num_slices; i++) { 5249 rstat = &mgp->ss[i].rx_stats; 5250 tmp += rstat->multircv; 5251 } 5252 *val = tmp; 5253 break; 5254 5255 case MAC_STAT_BRDCSTRCV: 5256 for (i = 0; i < mgp->num_slices; i++) { 5257 rstat = &mgp->ss[i].rx_stats; 5258 tmp += rstat->brdcstrcv; 5259 } 5260 *val = tmp; 5261 break; 5262 5263 case MAC_STAT_MULTIXMT: 5264 for (i = 0; i < mgp->num_slices; i++) { 5265 tstat = &mgp->ss[i].tx.stats; 5266 tmp += tstat->multixmt; 5267 } 5268 *val = tmp; 5269 break; 5270 5271 case MAC_STAT_BRDCSTXMT: 5272 for (i = 0; i < mgp->num_slices; i++) { 5273 tstat = &mgp->ss[i].tx.stats; 5274 tmp += tstat->brdcstxmt; 5275 } 5276 *val = tmp; 5277 break; 5278 5279 case MAC_STAT_NORCVBUF: 5280 tmp = ntohl(fw_stats->dropped_no_big_buffer); 5281 tmp += ntohl(fw_stats->dropped_no_small_buffer); 5282 tmp += ntohl(fw_stats->dropped_link_overflow); 5283 for (i = 0; i < mgp->num_slices; i++) { 5284 ss = &mgp->ss[i]; 5285 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf); 5286 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf); 5287 } 5288 *val = tmp; 5289 break; 5290 5291 case MAC_STAT_IERRORS: 5292 tmp += ntohl(fw_stats->dropped_bad_crc32); 5293 tmp += ntohl(fw_stats->dropped_bad_phy); 5294 tmp += ntohl(fw_stats->dropped_runt); 5295 tmp += ntohl(fw_stats->dropped_overrun); 5296 *val = tmp; 5297 break; 5298 5299 case MAC_STAT_OERRORS: 5300 for (i = 0; i < mgp->num_slices; i++) { 5301 ss = &mgp->ss[i]; 5302 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags); 5303 tmp += MYRI10GE_SLICE_STAT(xmit_err); 5304 } 5305 *val = tmp; 5306 break; 5307 5308 case MAC_STAT_RBYTES: 5309 for (i = 0; i < mgp->num_slices; i++) { 5310 rstat = &mgp->ss[i].rx_stats; 5311 tmp += rstat->ibytes; 5312 } 5313 *val = tmp; 5314 break; 5315 5316 case MAC_STAT_IPACKETS: 5317 for (i = 0; i < mgp->num_slices; i++) { 5318 rstat = &mgp->ss[i].rx_stats; 5319 tmp += rstat->ipackets; 5320 } 5321 *val = tmp; 5322 break; 5323 5324 case MAC_STAT_OBYTES: 5325 for (i = 0; i < mgp->num_slices; i++) { 5326 tstat = &mgp->ss[i].tx.stats; 5327 tmp += tstat->obytes; 5328 } 5329 *val = tmp; 5330 break; 5331 5332 case MAC_STAT_OPACKETS: 5333 for (i = 0; i < mgp->num_slices; i++) { 5334 tstat = &mgp->ss[i].tx.stats; 5335 tmp += tstat->opackets; 5336 } 5337 *val = tmp; 5338 break; 5339 5340 case ETHER_STAT_TOOLONG_ERRORS: 5341 *val = ntohl(fw_stats->dropped_overrun); 5342 break; 5343 5344 #ifdef SOLARIS_S11 5345 case ETHER_STAT_TOOSHORT_ERRORS: 5346 *val = ntohl(fw_stats->dropped_runt); 5347 break; 5348 #endif 5349 5350 case ETHER_STAT_LINK_PAUSE: 5351 *val = mgp->pause; 5352 break; 5353 5354 case ETHER_STAT_LINK_AUTONEG: 5355 *val = 1; 5356 break; 5357 5358 case ETHER_STAT_LINK_DUPLEX: 5359 *val = LINK_DUPLEX_FULL; 5360 break; 5361 5362 default: 5363 return (ENOTSUP); 5364 } 5365 5366 return (0); 5367 } 5368 5369 /* ARGSUSED */ 5370 static void 5371 myri10ge_m_propinfo(void *arg, const char *pr_name, 5372 mac_prop_id_t pr_num, mac_prop_info_handle_t prh) 5373 { 5374 switch (pr_num) { 5375 case MAC_PROP_MTU: 5376 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU); 5377 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU, 5378 MYRI10GE_MAX_GLD_MTU); 5379 break; 5380 default: 5381 break; 5382 } 5383 } 5384 5385 /*ARGSUSED*/ 5386 static int 5387 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 5388 uint_t pr_valsize, const void *pr_val) 5389 { 5390 int err = 0; 5391 struct myri10ge_priv *mgp = arg; 5392 5393 switch (pr_num) { 5394 case MAC_PROP_MTU: { 5395 uint32_t mtu; 5396 if (pr_valsize < sizeof (mtu)) { 5397 err = EINVAL; 5398 break; 5399 } 5400 bcopy(pr_val, &mtu, sizeof (mtu)); 5401 if (mtu > MYRI10GE_MAX_GLD_MTU || 5402 mtu < MYRI10GE_MIN_GLD_MTU) { 5403 err = EINVAL; 5404 break; 5405 } 5406 5407 mutex_enter(&mgp->intrlock); 5408 if (mgp->running != MYRI10GE_ETH_STOPPED) { 5409 err = EBUSY; 5410 mutex_exit(&mgp->intrlock); 5411 break; 5412 } 5413 5414 myri10ge_mtu = mtu + sizeof (struct ether_header) + 5415 MXGEFW_PAD + VLAN_TAGSZ; 5416 mutex_exit(&mgp->intrlock); 5417 break; 5418 } 5419 default: 5420 err = ENOTSUP; 5421 break; 5422 } 5423 5424 return (err); 5425 } 5426 5427 static mac_callbacks_t myri10ge_m_callbacks = { 5428 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO), 5429 myri10ge_m_stat, 5430 myri10ge_m_start, 5431 myri10ge_m_stop, 5432 myri10ge_m_promisc, 5433 myri10ge_m_multicst, 5434 NULL, 5435 NULL, 5436 NULL, 5437 myri10ge_m_ioctl, 5438 myri10ge_m_getcapab, 5439 NULL, 5440 NULL, 5441 myri10ge_m_setprop, 5442 NULL, 5443 myri10ge_m_propinfo 5444 }; 5445 5446 5447 static int 5448 myri10ge_probe_slices(struct myri10ge_priv *mgp) 5449 { 5450 myri10ge_cmd_t cmd; 5451 int status; 5452 5453 mgp->num_slices = 1; 5454 5455 /* hit the board with a reset to ensure it is alive */ 5456 (void) memset(&cmd, 0, sizeof (cmd)); 5457 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 5458 if (status != 0) { 5459 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 5460 return (ENXIO); 5461 } 5462 5463 if (myri10ge_use_msix == 0) 5464 return (0); 5465 5466 /* tell it the size of the interrupt queues */ 5467 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot); 5468 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 5469 if (status != 0) { 5470 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n", 5471 mgp->name); 5472 return (ENXIO); 5473 } 5474 5475 /* ask the maximum number of slices it supports */ 5476 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 5477 &cmd); 5478 if (status != 0) 5479 return (0); 5480 5481 mgp->num_slices = cmd.data0; 5482 5483 /* 5484 * if the admin did not specify a limit to how many 5485 * slices we should use, cap it automatically to the 5486 * number of CPUs currently online 5487 */ 5488 if (myri10ge_max_slices == -1) 5489 myri10ge_max_slices = ncpus; 5490 5491 if (mgp->num_slices > myri10ge_max_slices) 5492 mgp->num_slices = myri10ge_max_slices; 5493 5494 5495 /* 5496 * Now try to allocate as many MSI-X vectors as we have 5497 * slices. We give up on MSI-X if we can only get a single 5498 * vector. 5499 */ 5500 while (mgp->num_slices > 1) { 5501 /* make sure it is a power of two */ 5502 while (!ISP2(mgp->num_slices)) 5503 mgp->num_slices--; 5504 if (mgp->num_slices == 1) 5505 return (0); 5506 5507 status = myri10ge_add_intrs(mgp, 0); 5508 if (status == 0) { 5509 myri10ge_rem_intrs(mgp, 0); 5510 if (mgp->intr_cnt == mgp->num_slices) { 5511 if (myri10ge_verbose) 5512 printf("Got %d slices!\n", 5513 mgp->num_slices); 5514 return (0); 5515 } 5516 mgp->num_slices = mgp->intr_cnt; 5517 } else { 5518 mgp->num_slices = mgp->num_slices / 2; 5519 } 5520 } 5521 5522 if (myri10ge_verbose) 5523 printf("Got %d slices\n", mgp->num_slices); 5524 return (0); 5525 } 5526 5527 static void 5528 myri10ge_lro_free(struct myri10ge_slice_state *ss) 5529 { 5530 struct lro_entry *lro; 5531 5532 while (ss->lro_free != NULL) { 5533 lro = ss->lro_free; 5534 ss->lro_free = lro->next; 5535 kmem_free(lro, sizeof (*lro)); 5536 } 5537 } 5538 5539 static void 5540 myri10ge_lro_alloc(struct myri10ge_slice_state *ss) 5541 { 5542 struct lro_entry *lro; 5543 int idx; 5544 5545 ss->lro_free = NULL; 5546 ss->lro_active = NULL; 5547 5548 for (idx = 0; idx < myri10ge_lro_cnt; idx++) { 5549 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP); 5550 if (lro == NULL) 5551 continue; 5552 lro->next = ss->lro_free; 5553 ss->lro_free = lro; 5554 } 5555 } 5556 5557 static void 5558 myri10ge_free_slices(struct myri10ge_priv *mgp) 5559 { 5560 struct myri10ge_slice_state *ss; 5561 size_t bytes; 5562 int i; 5563 5564 if (mgp->ss == NULL) 5565 return; 5566 5567 for (i = 0; i < mgp->num_slices; i++) { 5568 ss = &mgp->ss[i]; 5569 if (ss->rx_done.entry == NULL) 5570 continue; 5571 myri10ge_dma_free(&ss->rx_done.dma); 5572 ss->rx_done.entry = NULL; 5573 if (ss->fw_stats == NULL) 5574 continue; 5575 myri10ge_dma_free(&ss->fw_stats_dma); 5576 ss->fw_stats = NULL; 5577 mutex_destroy(&ss->rx_lock); 5578 mutex_destroy(&ss->tx.lock); 5579 mutex_destroy(&ss->tx.handle_lock); 5580 mutex_destroy(&ss->poll_lock); 5581 myri10ge_jpool_fini(ss); 5582 myri10ge_slice_stat_destroy(ss); 5583 myri10ge_lro_free(ss); 5584 } 5585 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5586 kmem_free(mgp->ss, bytes); 5587 mgp->ss = NULL; 5588 } 5589 5590 5591 static int 5592 myri10ge_alloc_slices(struct myri10ge_priv *mgp) 5593 { 5594 struct myri10ge_slice_state *ss; 5595 size_t bytes; 5596 int i; 5597 5598 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5599 mgp->ss = kmem_zalloc(bytes, KM_SLEEP); 5600 if (mgp->ss == NULL) 5601 return (ENOMEM); 5602 for (i = 0; i < mgp->num_slices; i++) { 5603 ss = &mgp->ss[i]; 5604 5605 ss->mgp = mgp; 5606 5607 /* allocate the per-slice firmware stats */ 5608 bytes = sizeof (*ss->fw_stats); 5609 ss->fw_stats = (mcp_irq_data_t *)(void *) 5610 myri10ge_dma_alloc(mgp->dip, bytes, 5611 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5612 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5613 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT); 5614 if (ss->fw_stats == NULL) 5615 goto abort; 5616 (void) memset(ss->fw_stats, 0, bytes); 5617 5618 /* allocate rx done ring */ 5619 bytes = mgp->max_intr_slots * 5620 sizeof (*ss->rx_done.entry); 5621 ss->rx_done.entry = (mcp_slot_t *)(void *) 5622 myri10ge_dma_alloc(mgp->dip, bytes, 5623 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5624 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5625 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT); 5626 if (ss->rx_done.entry == NULL) { 5627 goto abort; 5628 } 5629 (void) memset(ss->rx_done.entry, 0, bytes); 5630 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie); 5631 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL); 5632 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL); 5633 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL); 5634 myri10ge_jpool_init(ss); 5635 (void) myri10ge_slice_stat_init(ss); 5636 myri10ge_lro_alloc(ss); 5637 } 5638 5639 return (0); 5640 5641 abort: 5642 myri10ge_free_slices(mgp); 5643 return (ENOMEM); 5644 } 5645 5646 static int 5647 myri10ge_save_msi_state(struct myri10ge_priv *mgp, 5648 ddi_acc_handle_t handle) 5649 { 5650 uint8_t ptr; 5651 int err; 5652 5653 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5654 if (err != 0) { 5655 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5656 mgp->name); 5657 return (DDI_FAILURE); 5658 } 5659 mgp->pci_saved_state.msi_ctrl = 5660 pci_config_get16(handle, ptr + PCI_MSI_CTRL); 5661 mgp->pci_saved_state.msi_addr_low = 5662 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET); 5663 mgp->pci_saved_state.msi_addr_high = 5664 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4); 5665 mgp->pci_saved_state.msi_data_32 = 5666 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA); 5667 mgp->pci_saved_state.msi_data_64 = 5668 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA); 5669 return (DDI_SUCCESS); 5670 } 5671 5672 static int 5673 myri10ge_restore_msi_state(struct myri10ge_priv *mgp, 5674 ddi_acc_handle_t handle) 5675 { 5676 uint8_t ptr; 5677 int err; 5678 5679 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5680 if (err != 0) { 5681 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5682 mgp->name); 5683 return (DDI_FAILURE); 5684 } 5685 5686 pci_config_put16(handle, ptr + PCI_MSI_CTRL, 5687 mgp->pci_saved_state.msi_ctrl); 5688 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET, 5689 mgp->pci_saved_state.msi_addr_low); 5690 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4, 5691 mgp->pci_saved_state.msi_addr_high); 5692 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA, 5693 mgp->pci_saved_state.msi_data_32); 5694 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA, 5695 mgp->pci_saved_state.msi_data_64); 5696 5697 return (DDI_SUCCESS); 5698 } 5699 5700 static int 5701 myri10ge_save_pci_state(struct myri10ge_priv *mgp) 5702 { 5703 ddi_acc_handle_t handle = mgp->cfg_hdl; 5704 int i; 5705 int err = DDI_SUCCESS; 5706 5707 5708 /* Save the non-extended PCI config space 32-bits at a time */ 5709 for (i = 0; i < 16; i++) 5710 mgp->pci_saved_state.base[i] = 5711 pci_config_get32(handle, i*4); 5712 5713 /* now save MSI interrupt state *, if needed */ 5714 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5715 err = myri10ge_save_msi_state(mgp, handle); 5716 5717 return (err); 5718 } 5719 5720 static int 5721 myri10ge_restore_pci_state(struct myri10ge_priv *mgp) 5722 { 5723 ddi_acc_handle_t handle = mgp->cfg_hdl; 5724 int i; 5725 int err = DDI_SUCCESS; 5726 5727 5728 /* Restore the non-extended PCI config space 32-bits at a time */ 5729 for (i = 15; i >= 0; i--) 5730 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]); 5731 5732 /* now restore MSI interrupt state *, if needed */ 5733 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5734 err = myri10ge_restore_msi_state(mgp, handle); 5735 5736 if (mgp->max_read_request_4k) 5737 (void) myri10ge_set_max_readreq(handle); 5738 return (err); 5739 } 5740 5741 5742 static int 5743 myri10ge_suspend(dev_info_t *dip) 5744 { 5745 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5746 int status; 5747 5748 if (mgp == NULL) { 5749 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n"); 5750 return (DDI_FAILURE); 5751 } 5752 if (mgp->dip != dip) { 5753 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n"); 5754 return (DDI_FAILURE); 5755 } 5756 mutex_enter(&mgp->intrlock); 5757 if (mgp->running == MYRI10GE_ETH_RUNNING) { 5758 mgp->running = MYRI10GE_ETH_STOPPING; 5759 mutex_exit(&mgp->intrlock); 5760 (void) untimeout(mgp->timer_id); 5761 mutex_enter(&mgp->intrlock); 5762 myri10ge_stop_locked(mgp); 5763 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING; 5764 } 5765 status = myri10ge_save_pci_state(mgp); 5766 mutex_exit(&mgp->intrlock); 5767 return (status); 5768 } 5769 5770 static int 5771 myri10ge_resume(dev_info_t *dip) 5772 { 5773 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5774 int status = DDI_SUCCESS; 5775 5776 if (mgp == NULL) { 5777 cmn_err(CE_WARN, "null dip in myri10ge_resume\n"); 5778 return (DDI_FAILURE); 5779 } 5780 if (mgp->dip != dip) { 5781 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n"); 5782 return (DDI_FAILURE); 5783 } 5784 5785 mutex_enter(&mgp->intrlock); 5786 status = myri10ge_restore_pci_state(mgp); 5787 if (status == DDI_SUCCESS && 5788 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) { 5789 status = myri10ge_start_locked(mgp); 5790 } 5791 mutex_exit(&mgp->intrlock); 5792 if (status != DDI_SUCCESS) 5793 return (status); 5794 5795 /* start the watchdog timer */ 5796 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5797 mgp->timer_ticks); 5798 return (DDI_SUCCESS); 5799 } 5800 5801 static int 5802 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5803 { 5804 5805 struct myri10ge_priv *mgp; 5806 mac_register_t *macp, *omacp; 5807 ddi_acc_handle_t handle; 5808 uint32_t csr, hdr_offset; 5809 int status, span, link_width, max_read_request_4k; 5810 unsigned long bus_number, dev_number, func_number; 5811 size_t bytes; 5812 offset_t ss_offset; 5813 uint8_t vso; 5814 5815 if (cmd == DDI_RESUME) { 5816 return (myri10ge_resume(dip)); 5817 } 5818 5819 if (cmd != DDI_ATTACH) 5820 return (DDI_FAILURE); 5821 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) 5822 return (DDI_FAILURE); 5823 5824 /* enable busmater and io space access */ 5825 csr = pci_config_get32(handle, PCI_CONF_COMM); 5826 pci_config_put32(handle, PCI_CONF_COMM, 5827 (csr |PCI_COMM_ME|PCI_COMM_MAE)); 5828 status = myri10ge_read_pcie_link_width(handle, &link_width); 5829 if (status != 0) { 5830 cmn_err(CE_WARN, "could not read link width!\n"); 5831 link_width = 0; 5832 } 5833 max_read_request_4k = !myri10ge_set_max_readreq(handle); 5834 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS); 5835 if (status != 0) 5836 goto abort_with_cfg_hdl; 5837 if ((omacp = mac_alloc(MAC_VERSION)) == NULL) 5838 goto abort_with_cfg_hdl; 5839 /* 5840 * XXXX Hack: mac_register_t grows in newer kernels. To be 5841 * able to write newer fields, such as m_margin, without 5842 * writing outside allocated memory, we allocate our own macp 5843 * and pass that to mac_register() 5844 */ 5845 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP); 5846 macp->m_version = omacp->m_version; 5847 5848 if ((mgp = (struct myri10ge_priv *) 5849 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) { 5850 goto abort_with_macinfo; 5851 } 5852 ddi_set_driver_private(dip, mgp); 5853 5854 /* setup device name for log messages */ 5855 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip)); 5856 5857 mutex_enter(&myri10ge_param_lock); 5858 myri10ge_get_props(dip); 5859 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 5860 mgp->pause = myri10ge_flow_control; 5861 mutex_exit(&myri10ge_param_lock); 5862 5863 mgp->max_read_request_4k = max_read_request_4k; 5864 mgp->pcie_link_width = link_width; 5865 mgp->running = MYRI10GE_ETH_STOPPED; 5866 mgp->vso = vso; 5867 mgp->dip = dip; 5868 mgp->cfg_hdl = handle; 5869 5870 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */ 5871 myri10ge_test_physical(dip); 5872 5873 /* allocate command page */ 5874 bytes = sizeof (*mgp->cmd); 5875 mgp->cmd = (mcp_cmd_response_t *) 5876 (void *)myri10ge_dma_alloc(dip, bytes, 5877 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5878 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT, 5879 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT); 5880 if (mgp->cmd == NULL) 5881 goto abort_with_mgp; 5882 5883 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number, 5884 &dev_number, &func_number); 5885 if (myri10ge_verbose) 5886 printf("%s at %ld:%ld:%ld attaching\n", mgp->name, 5887 bus_number, dev_number, func_number); 5888 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram, 5889 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr, 5890 &mgp->io_handle); 5891 if (status != DDI_SUCCESS) { 5892 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name); 5893 printf("%s: reg_set = %d, span = %d, status = %d", 5894 mgp->name, mgp->reg_set, span, status); 5895 goto abort_with_mgp; 5896 } 5897 5898 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET); 5899 hdr_offset = ntohl(hdr_offset) & 0xffffc; 5900 ss_offset = hdr_offset + 5901 offsetof(struct mcp_gen_header, string_specs); 5902 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset)); 5903 myri10ge_pio_copy32(mgp->eeprom_strings, 5904 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size), 5905 MYRI10GE_EEPROM_STRINGS_SIZE); 5906 (void) memset(mgp->eeprom_strings + 5907 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 5908 5909 status = myri10ge_read_mac_addr(mgp); 5910 if (status) { 5911 goto abort_with_mapped; 5912 } 5913 5914 status = myri10ge_select_firmware(mgp); 5915 if (status != 0) { 5916 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name); 5917 goto abort_with_mapped; 5918 } 5919 5920 status = myri10ge_probe_slices(mgp); 5921 if (status != 0) { 5922 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name); 5923 goto abort_with_dummy_rdma; 5924 } 5925 5926 status = myri10ge_alloc_slices(mgp); 5927 if (status != 0) { 5928 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name); 5929 goto abort_with_dummy_rdma; 5930 } 5931 5932 /* add the interrupt handler */ 5933 status = myri10ge_add_intrs(mgp, 1); 5934 if (status != 0) { 5935 cmn_err(CE_WARN, "%s: Failed to add interrupt\n", 5936 mgp->name); 5937 goto abort_with_slices; 5938 } 5939 5940 /* now that we have an iblock_cookie, init the mutexes */ 5941 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie); 5942 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie); 5943 5944 5945 status = myri10ge_nic_stat_init(mgp); 5946 if (status != DDI_SUCCESS) 5947 goto abort_with_interrupts; 5948 status = myri10ge_info_init(mgp); 5949 if (status != DDI_SUCCESS) 5950 goto abort_with_stats; 5951 5952 /* 5953 * Initialize GLD state 5954 */ 5955 5956 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 5957 macp->m_driver = mgp; 5958 macp->m_dip = dip; 5959 macp->m_src_addr = mgp->mac_addr; 5960 macp->m_callbacks = &myri10ge_m_callbacks; 5961 macp->m_min_sdu = 0; 5962 macp->m_max_sdu = myri10ge_mtu - 5963 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ); 5964 #ifdef SOLARIS_S11 5965 macp->m_margin = VLAN_TAGSZ; 5966 #endif 5967 macp->m_v12n = MAC_VIRT_LEVEL1; 5968 status = mac_register(macp, &mgp->mh); 5969 if (status != 0) { 5970 cmn_err(CE_WARN, "%s: mac_register failed with %d\n", 5971 mgp->name, status); 5972 goto abort_with_info; 5973 } 5974 myri10ge_ndd_init(mgp); 5975 if (myri10ge_verbose) 5976 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name, 5977 mgp->intr_type, mgp->tx_boundary, mgp->fw_name); 5978 mutex_enter(&myri10ge_param_lock); 5979 mgp->next = mgp_list; 5980 mgp_list = mgp; 5981 mutex_exit(&myri10ge_param_lock); 5982 kmem_free(macp, sizeof (*macp) * 8); 5983 mac_free(omacp); 5984 return (DDI_SUCCESS); 5985 5986 abort_with_info: 5987 myri10ge_info_destroy(mgp); 5988 5989 abort_with_stats: 5990 myri10ge_nic_stat_destroy(mgp); 5991 5992 abort_with_interrupts: 5993 mutex_destroy(&mgp->cmd_lock); 5994 mutex_destroy(&mgp->intrlock); 5995 myri10ge_rem_intrs(mgp, 1); 5996 5997 abort_with_slices: 5998 myri10ge_free_slices(mgp); 5999 6000 abort_with_dummy_rdma: 6001 myri10ge_dummy_rdma(mgp, 0); 6002 6003 abort_with_mapped: 6004 ddi_regs_map_free(&mgp->io_handle); 6005 6006 myri10ge_dma_free(&mgp->cmd_dma); 6007 6008 abort_with_mgp: 6009 kmem_free(mgp, sizeof (*mgp)); 6010 6011 abort_with_macinfo: 6012 kmem_free(macp, sizeof (*macp) * 8); 6013 mac_free(omacp); 6014 6015 abort_with_cfg_hdl: 6016 pci_config_teardown(&handle); 6017 return (DDI_FAILURE); 6018 6019 } 6020 6021 6022 static int 6023 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 6024 { 6025 struct myri10ge_priv *mgp, *tmp; 6026 int status, i, jbufs_alloced; 6027 6028 if (cmd == DDI_SUSPEND) { 6029 status = myri10ge_suspend(dip); 6030 return (status); 6031 } 6032 6033 if (cmd != DDI_DETACH) { 6034 return (DDI_FAILURE); 6035 } 6036 /* Get the driver private (gld_mac_info_t) structure */ 6037 mgp = ddi_get_driver_private(dip); 6038 6039 mutex_enter(&mgp->intrlock); 6040 jbufs_alloced = 0; 6041 for (i = 0; i < mgp->num_slices; i++) { 6042 myri10ge_remove_jbufs(&mgp->ss[i]); 6043 jbufs_alloced += mgp->ss[i].jpool.num_alloc; 6044 } 6045 mutex_exit(&mgp->intrlock); 6046 if (jbufs_alloced != 0) { 6047 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n", 6048 mgp->name, jbufs_alloced); 6049 return (DDI_FAILURE); 6050 } 6051 6052 mutex_enter(&myri10ge_param_lock); 6053 if (mgp->refcnt != 0) { 6054 mutex_exit(&myri10ge_param_lock); 6055 cmn_err(CE_NOTE, "%s: %d external refs remain\n", 6056 mgp->name, mgp->refcnt); 6057 return (DDI_FAILURE); 6058 } 6059 mutex_exit(&myri10ge_param_lock); 6060 6061 status = mac_unregister(mgp->mh); 6062 if (status != DDI_SUCCESS) 6063 return (status); 6064 6065 myri10ge_ndd_fini(mgp); 6066 myri10ge_dummy_rdma(mgp, 0); 6067 myri10ge_nic_stat_destroy(mgp); 6068 myri10ge_info_destroy(mgp); 6069 6070 mutex_destroy(&mgp->cmd_lock); 6071 mutex_destroy(&mgp->intrlock); 6072 6073 myri10ge_rem_intrs(mgp, 1); 6074 6075 myri10ge_free_slices(mgp); 6076 ddi_regs_map_free(&mgp->io_handle); 6077 myri10ge_dma_free(&mgp->cmd_dma); 6078 pci_config_teardown(&mgp->cfg_hdl); 6079 6080 mutex_enter(&myri10ge_param_lock); 6081 if (mgp_list == mgp) { 6082 mgp_list = mgp->next; 6083 } else { 6084 tmp = mgp_list; 6085 while (tmp->next != mgp && tmp->next != NULL) 6086 tmp = tmp->next; 6087 if (tmp->next != NULL) 6088 tmp->next = tmp->next->next; 6089 } 6090 kmem_free(mgp, sizeof (*mgp)); 6091 mutex_exit(&myri10ge_param_lock); 6092 return (DDI_SUCCESS); 6093 } 6094 6095 /* 6096 * Helper for quiesce entry point: Interrupt threads are not being 6097 * scheduled, so we must poll for the confirmation DMA to arrive in 6098 * the firmware stats block for slice 0. We're essentially running 6099 * the guts of the interrupt handler, and just cherry picking the 6100 * confirmation that the NIC is queuesced (stats->link_down) 6101 */ 6102 6103 static int 6104 myri10ge_poll_down(struct myri10ge_priv *mgp) 6105 { 6106 struct myri10ge_slice_state *ss = mgp->ss; 6107 mcp_irq_data_t *stats = ss->fw_stats; 6108 int valid; 6109 int found_down = 0; 6110 6111 6112 /* check for a pending IRQ */ 6113 6114 if (! *((volatile uint8_t *)& stats->valid)) 6115 return (0); 6116 valid = stats->valid; 6117 6118 /* 6119 * Make sure to tell the NIC to lower a legacy IRQ, else 6120 * it may have corrupt state after restarting 6121 */ 6122 6123 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 6124 /* lower legacy IRQ */ 6125 *mgp->irq_deassert = 0; 6126 mb(); 6127 /* wait for irq conf DMA */ 6128 while (*((volatile uint8_t *)& stats->valid)) 6129 ; 6130 } 6131 if (stats->stats_updated && stats->link_down) 6132 found_down = 1; 6133 6134 if (valid & 0x1) 6135 *ss->irq_claim = BE_32(3); 6136 *(ss->irq_claim + 1) = BE_32(3); 6137 6138 return (found_down); 6139 } 6140 6141 static int 6142 myri10ge_quiesce(dev_info_t *dip) 6143 { 6144 struct myri10ge_priv *mgp; 6145 myri10ge_cmd_t cmd; 6146 int status, down, i; 6147 6148 mgp = ddi_get_driver_private(dip); 6149 if (mgp == NULL) 6150 return (DDI_FAILURE); 6151 6152 /* if devices was unplumbed, it is guaranteed to be quiescent */ 6153 if (mgp->running == MYRI10GE_ETH_STOPPED) 6154 return (DDI_SUCCESS); 6155 6156 /* send a down CMD to queuesce NIC */ 6157 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 6158 if (status) { 6159 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 6160 return (DDI_FAILURE); 6161 } 6162 6163 for (i = 0; i < 20; i++) { 6164 down = myri10ge_poll_down(mgp); 6165 if (down) 6166 break; 6167 delay(drv_usectohz(100000)); 6168 mb(); 6169 } 6170 if (down) 6171 return (DDI_SUCCESS); 6172 return (DDI_FAILURE); 6173 } 6174 6175 /* 6176 * Distinguish between allocb'ed blocks, and gesballoc'ed attached 6177 * storage. 6178 */ 6179 static void 6180 myri10ge_find_lastfree(void) 6181 { 6182 mblk_t *mp = allocb(1024, 0); 6183 dblk_t *dbp; 6184 6185 if (mp == NULL) { 6186 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n"); 6187 return; 6188 } 6189 dbp = mp->b_datap; 6190 myri10ge_db_lastfree = (void *)dbp->db_lastfree; 6191 } 6192 6193 int 6194 _init(void) 6195 { 6196 int i; 6197 6198 if (myri10ge_verbose) 6199 cmn_err(CE_NOTE, 6200 "Myricom 10G driver (10GbE) version %s loading\n", 6201 MYRI10GE_VERSION_STR); 6202 myri10ge_find_lastfree(); 6203 mac_init_ops(&myri10ge_ops, "myri10ge"); 6204 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL); 6205 if ((i = mod_install(&modlinkage)) != 0) { 6206 cmn_err(CE_WARN, "mod_install returned %d\n", i); 6207 mac_fini_ops(&myri10ge_ops); 6208 mutex_destroy(&myri10ge_param_lock); 6209 } 6210 return (i); 6211 } 6212 6213 int 6214 _fini(void) 6215 { 6216 int i; 6217 i = mod_remove(&modlinkage); 6218 if (i != 0) { 6219 return (i); 6220 } 6221 mac_fini_ops(&myri10ge_ops); 6222 mutex_destroy(&myri10ge_param_lock); 6223 return (0); 6224 } 6225 6226 int 6227 _info(struct modinfo *modinfop) 6228 { 6229 return (mod_info(&modlinkage, modinfop)); 6230 } 6231 6232 6233 /* 6234 * This file uses MyriGE driver indentation. 6235 * 6236 * Local Variables: 6237 * c-file-style:"sun" 6238 * tab-width:8 6239 * End: 6240 */ 6241