1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 #ifndef lint 33 static const char __idstring[] = 34 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $"; 35 #endif 36 37 #define MXGEFW_NDIS 38 #include "myri10ge_var.h" 39 #include "rss_eth_z8e.h" 40 #include "rss_ethp_z8e.h" 41 #include "mcp_gen_header.h" 42 43 #define MYRI10GE_MAX_ETHER_MTU 9014 44 45 #define MYRI10GE_ETH_STOPPED 0 46 #define MYRI10GE_ETH_STOPPING 1 47 #define MYRI10GE_ETH_STARTING 2 48 #define MYRI10GE_ETH_RUNNING 3 49 #define MYRI10GE_ETH_OPEN_FAILED 4 50 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5 51 52 static int myri10ge_small_bytes = 510; 53 static int myri10ge_intr_coal_delay = 125; 54 static int myri10ge_flow_control = 1; 55 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 56 static int myri10ge_nvidia_ecrc_enable = 1; 57 #endif 58 static int myri10ge_mtu_override = 0; 59 static int myri10ge_tx_copylen = 512; 60 static int myri10ge_deassert_wait = 1; 61 static int myri10ge_verbose = 0; 62 static int myri10ge_watchdog_reset = 0; 63 static int myri10ge_use_msix = 1; 64 static int myri10ge_max_slices = -1; 65 static int myri10ge_use_msi = 1; 66 int myri10ge_force_firmware = 0; 67 static boolean_t myri10ge_use_lso = B_TRUE; 68 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 69 static int myri10ge_tx_hash = 1; 70 static int myri10ge_lro = 0; 71 static int myri10ge_lro_cnt = 8; 72 int myri10ge_lro_max_aggr = 2; 73 static int myri10ge_lso_copy = 0; 74 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp); 75 int myri10ge_tx_handles_initial = 128; 76 77 static kmutex_t myri10ge_param_lock; 78 static void* myri10ge_db_lastfree; 79 80 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 81 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 82 static int myri10ge_quiesce(dev_info_t *dip); 83 84 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach, 85 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce); 86 87 88 static struct modldrv modldrv = { 89 &mod_driverops, 90 "Myricom 10G driver (10GbE)", 91 &myri10ge_ops, 92 }; 93 94 95 static struct modlinkage modlinkage = { 96 MODREV_1, 97 {&modldrv, NULL}, 98 }; 99 100 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 101 102 static ddi_dma_attr_t myri10ge_misc_dma_attr = { 103 DMA_ATTR_V0, /* version number. */ 104 (uint64_t)0, /* low address */ 105 (uint64_t)0xffffffffffffffffULL, /* high address */ 106 (uint64_t)0x7ffffff, /* address counter max */ 107 (uint64_t)4096, /* alignment */ 108 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 109 (uint32_t)0x1, /* minimum transfer size */ 110 (uint64_t)0x7fffffff, /* maximum transfer size */ 111 (uint64_t)0x7fffffff, /* maximum segment size */ 112 1, /* scatter/gather list length */ 113 1, /* granularity */ 114 0 /* attribute flags */ 115 }; 116 117 /* 118 * The Myri10GE NIC has the following constraints on receive buffers: 119 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB 120 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary 121 */ 122 123 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = { 124 DMA_ATTR_V0, /* version number. */ 125 (uint64_t)0, /* low address */ 126 (uint64_t)0xffffffffffffffffULL, /* high address */ 127 (uint64_t)0x7ffffff, /* address counter max */ 128 (uint64_t)4096, /* alignment */ 129 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 130 (uint32_t)0x1, /* minimum transfer size */ 131 (uint64_t)0x7fffffff, /* maximum transfer size */ 132 UINT64_MAX, /* maximum segment size */ 133 1, /* scatter/gather list length */ 134 1, /* granularity */ 135 0 /* attribute flags */ 136 }; 137 138 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = { 139 DMA_ATTR_V0, /* version number. */ 140 (uint64_t)0, /* low address */ 141 (uint64_t)0xffffffffffffffffULL, /* high address */ 142 (uint64_t)0x7ffffff, /* address counter max */ 143 #if defined sparc64 || defined __sparcv9 144 (uint64_t)4096, /* alignment */ 145 #else 146 (uint64_t)0x80, /* alignment */ 147 #endif 148 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 149 (uint32_t)0x1, /* minimum transfer size */ 150 (uint64_t)0x7fffffff, /* maximum transfer size */ 151 #if defined sparc64 || defined __sparcv9 152 UINT64_MAX, /* maximum segment size */ 153 #else 154 (uint64_t)0xfff, /* maximum segment size */ 155 #endif 156 1, /* scatter/gather list length */ 157 1, /* granularity */ 158 0 /* attribute flags */ 159 }; 160 161 static ddi_dma_attr_t myri10ge_tx_dma_attr = { 162 DMA_ATTR_V0, /* version number. */ 163 (uint64_t)0, /* low address */ 164 (uint64_t)0xffffffffffffffffULL, /* high address */ 165 (uint64_t)0x7ffffff, /* address counter max */ 166 (uint64_t)1, /* alignment */ 167 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 168 (uint32_t)0x1, /* minimum transfer size */ 169 (uint64_t)0x7fffffff, /* maximum transfer size */ 170 UINT64_MAX, /* maximum segment size */ 171 INT32_MAX, /* scatter/gather list length */ 172 1, /* granularity */ 173 0 /* attribute flags */ 174 }; 175 176 #if defined sparc64 || defined __sparcv9 177 #define WC 0 178 #else 179 #define WC 1 180 #endif 181 182 struct ddi_device_acc_attr myri10ge_dev_access_attr = { 183 DDI_DEVICE_ATTR_V0, /* version */ 184 DDI_NEVERSWAP_ACC, /* endian flash */ 185 #if WC 186 DDI_MERGING_OK_ACC /* data order */ 187 #else 188 DDI_STRICTORDER_ACC 189 #endif 190 }; 191 192 static void myri10ge_watchdog(void *arg); 193 194 #ifdef MYRICOM_PRIV 195 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ; 196 #else 197 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ; 198 #endif 199 int myri10ge_bigbufs_initial = 1024; 200 int myri10ge_bigbufs_max = 4096; 201 202 203 caddr_t 204 myri10ge_dma_alloc(dev_info_t *dip, size_t len, 205 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr, 206 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma, 207 int warn, int (*wait)(caddr_t)) 208 { 209 caddr_t kaddr; 210 size_t real_length; 211 ddi_dma_cookie_t cookie; 212 uint_t count; 213 int err; 214 215 err = ddi_dma_alloc_handle(dip, attr, wait, 216 NULL, &dma->handle); 217 if (err != DDI_SUCCESS) { 218 if (warn) 219 cmn_err(CE_WARN, 220 "myri10ge: ddi_dma_alloc_handle failed\n"); 221 goto abort_with_nothing; 222 } 223 224 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags, 225 wait, NULL, &kaddr, &real_length, 226 &dma->acc_handle); 227 if (err != DDI_SUCCESS) { 228 if (warn) 229 cmn_err(CE_WARN, 230 "myri10ge: ddi_dma_mem_alloc failed\n"); 231 goto abort_with_handle; 232 } 233 234 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len, 235 bind_flags, wait, NULL, &cookie, &count); 236 237 if (err != DDI_SUCCESS) { 238 if (warn) 239 cmn_err(CE_WARN, 240 "myri10ge: ddi_dma_addr_bind_handle failed\n"); 241 goto abort_with_mem; 242 } 243 244 if (count != 1) { 245 if (warn) 246 cmn_err(CE_WARN, 247 "myri10ge: got too many dma segments "); 248 goto abort_with_bind; 249 } 250 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 251 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 252 return (kaddr); 253 254 abort_with_bind: 255 (void) ddi_dma_unbind_handle(dma->handle); 256 257 abort_with_mem: 258 ddi_dma_mem_free(&dma->acc_handle); 259 260 abort_with_handle: 261 ddi_dma_free_handle(&dma->handle); 262 abort_with_nothing: 263 if (warn) { 264 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n "); 265 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n", 266 (void*) dip, len, (void*) attr); 267 cmn_err(CE_WARN, 268 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n", 269 (void*) accattr, alloc_flags); 270 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p", 271 bind_flags, (void*) dma); 272 } 273 return (NULL); 274 275 } 276 277 void 278 myri10ge_dma_free(struct myri10ge_dma_stuff *dma) 279 { 280 (void) ddi_dma_unbind_handle(dma->handle); 281 ddi_dma_mem_free(&dma->acc_handle); 282 ddi_dma_free_handle(&dma->handle); 283 } 284 285 static inline void 286 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size) 287 { 288 register volatile uint32_t *to32; 289 size_t i; 290 291 to32 = (volatile uint32_t *) to; 292 for (i = (size / 4); i; i--) { 293 *to32 = *from32; 294 to32++; 295 from32++; 296 } 297 } 298 299 #if defined(_LP64) 300 static inline void 301 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size) 302 { 303 register volatile uint64_t *to64; 304 size_t i; 305 306 to64 = (volatile uint64_t *) to; 307 for (i = (size / 8); i; i--) { 308 *to64 = *from64; 309 to64++; 310 from64++; 311 } 312 } 313 #endif 314 315 /* 316 * This routine copies memory from the host to the NIC. 317 * The "size" argument must always be a multiple of 318 * the size of long (4 or 8 bytes), and to/from must also 319 * be naturally aligned. 320 */ 321 static inline void 322 myri10ge_pio_copy(void *to, void *from, size_t size) 323 { 324 #if !defined(_LP64) 325 ASSERT((size % 4) == 0); 326 myri10ge_pio_copy32(to, (uint32_t *)from, size); 327 #else 328 ASSERT((size % 8) == 0); 329 myri10ge_pio_copy64(to, (uint64_t *)from, size); 330 #endif 331 } 332 333 334 /* 335 * Due to various bugs in Solaris (especially bug 6186772 where the 336 * TCP/UDP checksum is calculated incorrectly on mblk chains with more 337 * than two elements), and the design bug where hardware checksums are 338 * ignored on mblk chains with more than 2 elements, we need to 339 * allocate private pool of physically contiguous receive buffers. 340 */ 341 342 static void 343 myri10ge_jpool_init(struct myri10ge_slice_state *ss) 344 { 345 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 346 347 bzero(jpool, sizeof (*jpool)); 348 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER, 349 ss->mgp->icookie); 350 jpool->head = NULL; 351 } 352 353 static void 354 myri10ge_jpool_fini(struct myri10ge_slice_state *ss) 355 { 356 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 357 358 if (jpool->head != NULL) { 359 cmn_err(CE_WARN, 360 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n", 361 ss->mgp->name); 362 } 363 mutex_destroy(&jpool->mtx); 364 } 365 366 367 /* 368 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 369 * at most 32 bytes at a time, so as to avoid involving the software 370 * pio handler in the nic. We re-write the first segment's low 371 * DMA address to mark it valid only after we write the entire chunk 372 * in a burst 373 */ 374 static inline void 375 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) 376 { 377 src->addr_low |= BE_32(1); 378 myri10ge_pio_copy(dst, src, 4 * sizeof (*src)); 379 mb(); 380 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 381 mb(); 382 src->addr_low &= ~(BE_32(1)); 383 dst->addr_low = src->addr_low; 384 mb(); 385 } 386 387 static void 388 myri10ge_pull_jpool(struct myri10ge_slice_state *ss) 389 { 390 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 391 struct myri10ge_jpool_entry *jtail, *j, *jfree; 392 volatile uintptr_t *putp; 393 uintptr_t put; 394 int i; 395 396 /* find tail */ 397 jtail = NULL; 398 if (jpool->head != NULL) { 399 j = jpool->head; 400 while (j->next != NULL) 401 j = j->next; 402 jtail = j; 403 } 404 405 /* 406 * iterate over all per-CPU caches, and add contents into 407 * jpool 408 */ 409 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) { 410 /* take per-CPU free list */ 411 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head; 412 if (*putp == NULL) 413 continue; 414 put = atomic_swap_ulong(putp, 0); 415 jfree = (struct myri10ge_jpool_entry *)put; 416 417 /* append to pool */ 418 if (jtail == NULL) { 419 jpool->head = jfree; 420 } else { 421 jtail->next = jfree; 422 } 423 j = jfree; 424 while (j->next != NULL) 425 j = j->next; 426 jtail = j; 427 } 428 } 429 430 /* 431 * Transfers buffers from the free pool to the nic 432 * Must be called holding the jpool mutex. 433 */ 434 435 static inline void 436 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss) 437 { 438 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 439 struct myri10ge_jpool_entry *j; 440 myri10ge_rx_ring_t *rx; 441 int i, idx, limit; 442 443 rx = &ss->rx_big; 444 limit = ss->j_rx_cnt + (rx->mask + 1); 445 446 for (i = rx->cnt; i != limit; i++) { 447 idx = i & (rx->mask); 448 j = jpool->head; 449 if (j == NULL) { 450 myri10ge_pull_jpool(ss); 451 j = jpool->head; 452 if (j == NULL) { 453 break; 454 } 455 } 456 jpool->head = j->next; 457 rx->info[idx].j = j; 458 rx->shadow[idx].addr_low = j->dma.low; 459 rx->shadow[idx].addr_high = j->dma.high; 460 /* copy 4 descriptors (32-bytes) to the mcp at a time */ 461 if ((idx & 7) == 7) { 462 myri10ge_submit_8rx(&rx->lanai[idx - 7], 463 &rx->shadow[idx - 7]); 464 } 465 } 466 rx->cnt = i; 467 } 468 469 /* 470 * Transfer buffers from the nic to the free pool. 471 * Should be called holding the jpool mutex 472 */ 473 474 static inline void 475 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss) 476 { 477 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 478 struct myri10ge_jpool_entry *j; 479 myri10ge_rx_ring_t *rx; 480 int i; 481 482 mutex_enter(&jpool->mtx); 483 rx = &ss->rx_big; 484 485 for (i = 0; i < rx->mask + 1; i++) { 486 j = rx->info[i].j; 487 rx->info[i].j = NULL; 488 if (j == NULL) 489 continue; 490 j->next = jpool->head; 491 jpool->head = j; 492 } 493 mutex_exit(&jpool->mtx); 494 495 } 496 497 498 /* 499 * Free routine which is called when the mblk allocated via 500 * esballoc() is freed. Here we return the jumbo buffer 501 * to the free pool, and possibly pass some jumbo buffers 502 * to the nic 503 */ 504 505 static void 506 myri10ge_jfree_rtn(void *arg) 507 { 508 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg; 509 struct myri10ge_jpool_stuff *jpool; 510 volatile uintptr_t *putp; 511 uintptr_t old, new; 512 513 jpool = &j->ss->jpool; 514 515 /* prepend buffer locklessly to per-CPU freelist */ 516 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head; 517 new = (uintptr_t)j; 518 do { 519 old = *putp; 520 j->next = (void *)old; 521 } while (atomic_cas_ulong(putp, old, new) != old); 522 } 523 524 static void 525 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j) 526 { 527 (void) ddi_dma_unbind_handle(j->dma_handle); 528 ddi_dma_mem_free(&j->acc_handle); 529 ddi_dma_free_handle(&j->dma_handle); 530 kmem_free(j, sizeof (*j)); 531 } 532 533 534 /* 535 * Allocates one physically contiguous descriptor 536 * and add it to the jumbo buffer pool. 537 */ 538 539 static int 540 myri10ge_add_jbuf(struct myri10ge_slice_state *ss) 541 { 542 struct myri10ge_jpool_entry *j; 543 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 544 ddi_dma_attr_t *rx_dma_attr; 545 size_t real_length; 546 ddi_dma_cookie_t cookie; 547 uint_t count; 548 int err; 549 550 if (myri10ge_mtu < 2048) 551 rx_dma_attr = &myri10ge_rx_std_dma_attr; 552 else 553 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr; 554 555 again: 556 j = (struct myri10ge_jpool_entry *) 557 kmem_alloc(sizeof (*j), KM_SLEEP); 558 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr, 559 DDI_DMA_DONTWAIT, NULL, &j->dma_handle); 560 if (err != DDI_SUCCESS) 561 goto abort_with_j; 562 563 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu, 564 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 565 NULL, &j->buf, &real_length, &j->acc_handle); 566 if (err != DDI_SUCCESS) 567 goto abort_with_handle; 568 569 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf, 570 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 571 NULL, &cookie, &count); 572 if (err != DDI_SUCCESS) 573 goto abort_with_mem; 574 575 /* 576 * Make certain std MTU buffers do not cross a 4KB boundary: 577 * 578 * Setting dma_attr_align=4096 will do this, but the system 579 * will only allocate 1 RX buffer per 4KB page, rather than 2. 580 * Setting dma_attr_granular=4096 *seems* to work around this, 581 * but I'm paranoid about future systems no longer honoring 582 * this, so fall back to the safe, but memory wasting way if a 583 * buffer crosses a 4KB boundary. 584 */ 585 586 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 587 rx_dma_attr->dma_attr_align != 4096) { 588 uint32_t start, end; 589 590 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 591 end = start + myri10ge_mtu; 592 if (((end >> 12) != (start >> 12)) && (start & 4095U)) { 593 printf("std buffer crossed a 4KB boundary!\n"); 594 myri10ge_remove_jbuf(j); 595 rx_dma_attr->dma_attr_align = 4096; 596 rx_dma_attr->dma_attr_seg = UINT64_MAX; 597 goto again; 598 } 599 } 600 601 j->dma.low = 602 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 603 j->dma.high = 604 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 605 j->ss = ss; 606 607 608 j->free_func.free_func = myri10ge_jfree_rtn; 609 j->free_func.free_arg = (char *)j; 610 mutex_enter(&jpool->mtx); 611 j->next = jpool->head; 612 jpool->head = j; 613 jpool->num_alloc++; 614 mutex_exit(&jpool->mtx); 615 return (0); 616 617 abort_with_mem: 618 ddi_dma_mem_free(&j->acc_handle); 619 620 abort_with_handle: 621 ddi_dma_free_handle(&j->dma_handle); 622 623 abort_with_j: 624 kmem_free(j, sizeof (*j)); 625 626 /* 627 * If an allocation failed, perhaps it failed because it could 628 * not satisfy granularity requirement. Disable that, and 629 * try agin. 630 */ 631 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 632 rx_dma_attr->dma_attr_align != 4096) { 633 cmn_err(CE_NOTE, 634 "!alloc failed, reverting to gran=1\n"); 635 rx_dma_attr->dma_attr_align = 4096; 636 rx_dma_attr->dma_attr_seg = UINT64_MAX; 637 goto again; 638 } 639 return (err); 640 } 641 642 static int 643 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool) 644 { 645 int i; 646 struct myri10ge_jpool_entry *j; 647 648 mutex_enter(&jpool->mtx); 649 j = jpool->head; 650 i = 0; 651 while (j != NULL) { 652 i++; 653 j = j->next; 654 } 655 mutex_exit(&jpool->mtx); 656 return (i); 657 } 658 659 static int 660 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total) 661 { 662 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 663 int allocated = 0; 664 int err; 665 int needed; 666 667 /* 668 * if total is set, user wants "num" jbufs in the pool, 669 * otherwise the user wants to "num" additional jbufs 670 * added to the pool 671 */ 672 if (total && jpool->num_alloc) { 673 allocated = myri10ge_jfree_cnt(jpool); 674 needed = num - allocated; 675 } else { 676 needed = num; 677 } 678 679 while (needed > 0) { 680 needed--; 681 err = myri10ge_add_jbuf(ss); 682 if (err == 0) { 683 allocated++; 684 } 685 } 686 return (allocated); 687 } 688 689 static void 690 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss) 691 { 692 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 693 struct myri10ge_jpool_entry *j; 694 695 mutex_enter(&jpool->mtx); 696 myri10ge_pull_jpool(ss); 697 while (jpool->head != NULL) { 698 jpool->num_alloc--; 699 j = jpool->head; 700 jpool->head = j->next; 701 myri10ge_remove_jbuf(j); 702 } 703 mutex_exit(&jpool->mtx); 704 } 705 706 static void 707 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss) 708 { 709 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 710 struct myri10ge_jpool_entry *j = NULL; 711 caddr_t ptr; 712 uint32_t dma_low, dma_high; 713 int idx, len; 714 unsigned int alloc_size; 715 716 dma_low = dma_high = len = 0; 717 alloc_size = myri10ge_small_bytes + MXGEFW_PAD; 718 ptr = NULL; 719 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) { 720 /* Allocate a jumbo frame and carve it into small frames */ 721 if (len < alloc_size) { 722 mutex_enter(&jpool->mtx); 723 /* remove jumbo from freelist */ 724 j = jpool->head; 725 jpool->head = j->next; 726 /* place it onto small list */ 727 j->next = ss->small_jpool; 728 ss->small_jpool = j; 729 mutex_exit(&jpool->mtx); 730 len = myri10ge_mtu; 731 dma_low = ntohl(j->dma.low); 732 dma_high = ntohl(j->dma.high); 733 ptr = j->buf; 734 } 735 ss->rx_small.info[idx].ptr = ptr; 736 ss->rx_small.shadow[idx].addr_low = htonl(dma_low); 737 ss->rx_small.shadow[idx].addr_high = htonl(dma_high); 738 len -= alloc_size; 739 ptr += alloc_size; 740 dma_low += alloc_size; 741 } 742 } 743 744 /* 745 * Return the jumbo bufs we carved up for small to the jumbo pool 746 */ 747 748 static void 749 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss) 750 { 751 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 752 struct myri10ge_jpool_entry *j = NULL; 753 754 mutex_enter(&jpool->mtx); 755 while (ss->small_jpool != NULL) { 756 j = ss->small_jpool; 757 ss->small_jpool = j->next; 758 j->next = jpool->head; 759 jpool->head = j; 760 } 761 mutex_exit(&jpool->mtx); 762 ss->jbufs_for_smalls = 0; 763 } 764 765 static int 766 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss) 767 { 768 myri10ge_tx_ring_t *tx = &ss->tx; 769 struct myri10ge_priv *mgp = ss->mgp; 770 struct myri10ge_tx_dma_handle *handle; 771 int err; 772 773 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP); 774 err = ddi_dma_alloc_handle(mgp->dip, 775 &myri10ge_tx_dma_attr, 776 DDI_DMA_SLEEP, NULL, 777 &handle->h); 778 if (err) { 779 static int limit = 0; 780 if (limit == 0) 781 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n", 782 mgp->name); 783 limit++; 784 kmem_free(handle, sizeof (*handle)); 785 return (err); 786 } 787 mutex_enter(&tx->handle_lock); 788 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced); 789 handle->next = tx->free_tx_handles; 790 tx->free_tx_handles = handle; 791 mutex_exit(&tx->handle_lock); 792 return (DDI_SUCCESS); 793 } 794 795 static void 796 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss) 797 { 798 myri10ge_tx_ring_t *tx = &ss->tx; 799 struct myri10ge_tx_dma_handle *handle; 800 mutex_enter(&tx->handle_lock); 801 802 handle = tx->free_tx_handles; 803 while (handle != NULL) { 804 tx->free_tx_handles = handle->next; 805 ddi_dma_free_handle(&handle->h); 806 kmem_free(handle, sizeof (*handle)); 807 handle = tx->free_tx_handles; 808 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced); 809 } 810 mutex_exit(&tx->handle_lock); 811 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) { 812 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n", 813 ss->mgp->name, 814 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced)); 815 } 816 } 817 818 static void 819 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx, 820 struct myri10ge_tx_dma_handle_head *list) 821 { 822 mutex_enter(&tx->handle_lock); 823 list->tail->next = tx->free_tx_handles; 824 tx->free_tx_handles = list->head; 825 mutex_exit(&tx->handle_lock); 826 } 827 828 static void 829 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx, 830 struct myri10ge_tx_dma_handle *handle) 831 { 832 struct myri10ge_tx_dma_handle_head list; 833 834 if (handle == NULL) 835 return; 836 list.head = handle; 837 list.tail = handle; 838 while (handle != NULL) { 839 list.tail = handle; 840 handle = handle->next; 841 } 842 myri10ge_free_tx_handles(tx, &list); 843 } 844 845 static int 846 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count, 847 struct myri10ge_tx_dma_handle **ret) 848 { 849 myri10ge_tx_ring_t *tx = &ss->tx; 850 struct myri10ge_tx_dma_handle *handle; 851 int err, i; 852 853 mutex_enter(&tx->handle_lock); 854 for (i = 0; i < count; i++) { 855 handle = tx->free_tx_handles; 856 while (handle == NULL) { 857 mutex_exit(&tx->handle_lock); 858 err = myri10ge_add_tx_handle(ss); 859 if (err != DDI_SUCCESS) { 860 goto abort_with_handles; 861 } 862 mutex_enter(&tx->handle_lock); 863 handle = tx->free_tx_handles; 864 } 865 tx->free_tx_handles = handle->next; 866 handle->next = *ret; 867 *ret = handle; 868 } 869 mutex_exit(&tx->handle_lock); 870 return (DDI_SUCCESS); 871 872 abort_with_handles: 873 myri10ge_free_tx_handle_slist(tx, *ret); 874 return (err); 875 } 876 877 878 /* 879 * Frees DMA resources associated with the send ring 880 */ 881 static void 882 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss) 883 { 884 myri10ge_tx_ring_t *tx; 885 struct myri10ge_tx_dma_handle_head handles; 886 size_t bytes; 887 int idx; 888 889 tx = &ss->tx; 890 handles.head = NULL; 891 handles.tail = NULL; 892 for (idx = 0; idx < ss->tx.mask + 1; idx++) { 893 if (tx->info[idx].m) { 894 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 895 handles.head = tx->info[idx].handle; 896 if (handles.tail == NULL) 897 handles.tail = tx->info[idx].handle; 898 freeb(tx->info[idx].m); 899 tx->info[idx].m = 0; 900 tx->info[idx].handle = 0; 901 } 902 tx->cp[idx].va = NULL; 903 myri10ge_dma_free(&tx->cp[idx].dma); 904 } 905 bytes = sizeof (*tx->cp) * (tx->mask + 1); 906 kmem_free(tx->cp, bytes); 907 tx->cp = NULL; 908 if (handles.head != NULL) 909 myri10ge_free_tx_handles(tx, &handles); 910 myri10ge_remove_tx_handles(ss); 911 } 912 913 /* 914 * Allocates DMA handles associated with the send ring 915 */ 916 static inline int 917 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss) 918 { 919 struct myri10ge_tx_dma_handle *handles; 920 int h; 921 size_t bytes; 922 923 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 924 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP); 925 if (ss->tx.cp == NULL) { 926 cmn_err(CE_WARN, 927 "%s: Failed to allocate tx copyblock storage\n", 928 ss->mgp->name); 929 return (DDI_FAILURE); 930 } 931 932 933 /* allocate the TX copyblocks */ 934 for (h = 0; h < ss->tx.mask + 1; h++) { 935 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip, 936 4096, &myri10ge_rx_jumbo_dma_attr, 937 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 938 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1, 939 DDI_DMA_DONTWAIT); 940 if (ss->tx.cp[h].va == NULL) { 941 cmn_err(CE_WARN, "%s: Failed to allocate tx " 942 "copyblock %d\n", ss->mgp->name, h); 943 goto abort_with_copyblocks; 944 } 945 } 946 /* pre-allocate transmit handles */ 947 handles = NULL; 948 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial, 949 &handles); 950 if (handles != NULL) 951 myri10ge_free_tx_handle_slist(&ss->tx, handles); 952 953 return (DDI_SUCCESS); 954 955 abort_with_copyblocks: 956 while (h > 0) { 957 h--; 958 myri10ge_dma_free(&ss->tx.cp[h].dma); 959 } 960 961 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 962 kmem_free(ss->tx.cp, bytes); 963 ss->tx.cp = NULL; 964 return (DDI_FAILURE); 965 } 966 967 /* 968 * The eeprom strings on the lanaiX have the format 969 * SN=x\0 970 * MAC=x:x:x:x:x:x\0 971 * PT:ddd mmm xx xx:xx:xx xx\0 972 * PV:ddd mmm xx xx:xx:xx xx\0 973 */ 974 static int 975 myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 976 { 977 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++) 978 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \ 979 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \ 980 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1))) 981 982 char *ptr, *limit; 983 int i, hv, lv; 984 985 ptr = mgp->eeprom_strings; 986 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 987 988 while (*ptr != '\0' && ptr < limit) { 989 if (memcmp(ptr, "MAC=", 4) == 0) { 990 ptr += 4; 991 if (myri10ge_verbose) 992 printf("%s: mac address = %s\n", mgp->name, 993 ptr); 994 mgp->mac_addr_string = ptr; 995 for (i = 0; i < 6; i++) { 996 if ((ptr + 2) > limit) 997 goto abort; 998 999 if (*(ptr+1) == ':') { 1000 hv = 0; 1001 lv = myri10ge_digit(*ptr); ptr++; 1002 } else { 1003 hv = myri10ge_digit(*ptr); ptr++; 1004 lv = myri10ge_digit(*ptr); ptr++; 1005 } 1006 mgp->mac_addr[i] = (hv << 4) | lv; 1007 ptr++; 1008 } 1009 } 1010 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 1011 ptr += 3; 1012 mgp->sn_str = (char *)ptr; 1013 } 1014 if (memcmp((const void *)ptr, "PC=", 3) == 0) { 1015 ptr += 3; 1016 mgp->pc_str = (char *)ptr; 1017 } 1018 MYRI10GE_NEXT_STRING(ptr); 1019 } 1020 1021 return (0); 1022 1023 abort: 1024 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name); 1025 return (ENXIO); 1026 } 1027 1028 1029 /* 1030 * Determine the register set containing the PCI resource we 1031 * want to map: the memory-mappable part of the interface. We do 1032 * this by scanning the DDI "reg" property of the interface, 1033 * which is an array of mx_ddi_reg_set structures. 1034 */ 1035 static int 1036 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span, 1037 unsigned long *busno, unsigned long *devno, 1038 unsigned long *funcno) 1039 { 1040 1041 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff) 1042 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07) 1043 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f) 1044 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff) 1045 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03) 1046 #define PCI_ADDR_HIGH(ip) (ip[1]) 1047 #define PCI_ADDR_LOW(ip) (ip[2]) 1048 #define PCI_SPAN_HIGH(ip) (ip[3]) 1049 #define PCI_SPAN_LOW(ip) (ip[4]) 1050 1051 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2 1052 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3 1053 1054 int *data, i, *rs; 1055 uint32_t nelementsp; 1056 1057 #ifdef MYRI10GE_REGSET_VERBOSE 1058 char *address_space_name[] = { "Configuration Space", 1059 "I/O Space", 1060 "32-bit Memory Space", 1061 "64-bit Memory Space" 1062 }; 1063 #endif 1064 1065 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1066 "reg", &data, &nelementsp) != DDI_SUCCESS) { 1067 printf("Could not determine register set.\n"); 1068 return (ENXIO); 1069 } 1070 1071 #ifdef MYRI10GE_REGSET_VERBOSE 1072 printf("There are %d register sets.\n", nelementsp / 5); 1073 #endif 1074 if (!nelementsp) { 1075 printf("Didn't find any \"reg\" properties.\n"); 1076 ddi_prop_free(data); 1077 return (ENODEV); 1078 } 1079 1080 /* Scan for the register number. */ 1081 rs = &data[0]; 1082 *busno = BUS_NUMBER(rs); 1083 *devno = DEVICE_NUMBER(rs); 1084 *funcno = FUNCTION_NUMBER(rs); 1085 1086 #ifdef MYRI10GE_REGSET_VERBOSE 1087 printf("*** Scanning for register number.\n"); 1088 #endif 1089 for (i = 0; i < nelementsp / 5; i++) { 1090 rs = &data[5 * i]; 1091 #ifdef MYRI10GE_REGSET_VERBOSE 1092 printf("Examining register set %d:\n", i); 1093 printf(" Register number = %d.\n", REGISTER_NUMBER(rs)); 1094 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs)); 1095 printf(" Device number = %d.\n", DEVICE_NUMBER(rs)); 1096 printf(" Bus number = %d.\n", BUS_NUMBER(rs)); 1097 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs), 1098 address_space_name[ADDRESS_SPACE(rs)]); 1099 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs), 1100 PCI_ADDR_LOW(rs)); 1101 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs), 1102 PCI_SPAN_LOW(rs)); 1103 #endif 1104 /* We are looking for a memory property. */ 1105 1106 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE || 1107 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) { 1108 *reg_set = i; 1109 1110 #ifdef MYRI10GE_REGSET_VERBOSE 1111 printf("%s uses register set %d.\n", 1112 address_space_name[ADDRESS_SPACE(rs)], *reg_set); 1113 #endif 1114 1115 *span = (PCI_SPAN_LOW(rs)); 1116 #ifdef MYRI10GE_REGSET_VERBOSE 1117 printf("Board span is 0x%x\n", *span); 1118 #endif 1119 break; 1120 } 1121 } 1122 1123 ddi_prop_free(data); 1124 1125 /* If no match, fail. */ 1126 if (i >= nelementsp / 5) { 1127 return (EIO); 1128 } 1129 1130 return (0); 1131 } 1132 1133 1134 static int 1135 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit) 1136 { 1137 void *inflate_buffer; 1138 int rv, status; 1139 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE; 1140 size_t destlen; 1141 mcp_gen_header_t *hdr; 1142 unsigned hdr_offset, i; 1143 1144 1145 *limit = 0; /* -Wuninitialized */ 1146 status = 0; 1147 1148 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP); 1149 if (!inflate_buffer) { 1150 cmn_err(CE_WARN, 1151 "%s: Could not allocate buffer to inflate mcp\n", 1152 mgp->name); 1153 return (ENOMEM); 1154 } 1155 1156 destlen = sram_size; 1157 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e, 1158 mgp->eth_z8e_length); 1159 1160 if (rv != Z_OK) { 1161 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n", 1162 mgp->name, z_strerror(rv)); 1163 status = ENXIO; 1164 goto abort; 1165 } 1166 1167 *limit = (uint32_t)destlen; 1168 1169 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer + 1170 MCP_HEADER_PTR_OFFSET)); 1171 hdr = (void *)((char *)inflate_buffer + hdr_offset); 1172 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 1173 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name, 1174 ntohl(hdr->mcp_type)); 1175 status = EIO; 1176 goto abort; 1177 } 1178 1179 /* save firmware version for kstat */ 1180 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version)); 1181 if (myri10ge_verbose) 1182 printf("%s: firmware id: %s\n", mgp->name, hdr->version); 1183 1184 /* Copy the inflated firmware to NIC SRAM. */ 1185 for (i = 0; i < *limit; i += 256) { 1186 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i, 1187 (char *)inflate_buffer + i, 1188 min(256U, (unsigned)(*limit - i))); 1189 mb(); 1190 (void) *(int *)(void *)mgp->sram; 1191 mb(); 1192 } 1193 1194 abort: 1195 kmem_free(inflate_buffer, sram_size); 1196 1197 return (status); 1198 1199 } 1200 1201 1202 int 1203 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd, 1204 myri10ge_cmd_t *data) 1205 { 1206 mcp_cmd_t *buf; 1207 char buf_bytes[sizeof (*buf) + 8]; 1208 volatile mcp_cmd_response_t *response = mgp->cmd; 1209 volatile char *cmd_addr = 1210 (volatile char *)mgp->sram + MXGEFW_ETH_CMD; 1211 int sleep_total = 0; 1212 1213 /* ensure buf is aligned to 8 bytes */ 1214 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1215 1216 buf->data0 = htonl(data->data0); 1217 buf->data1 = htonl(data->data1); 1218 buf->data2 = htonl(data->data2); 1219 buf->cmd = htonl(cmd); 1220 buf->response_addr.low = mgp->cmd_dma.low; 1221 buf->response_addr.high = mgp->cmd_dma.high; 1222 mutex_enter(&mgp->cmd_lock); 1223 response->result = 0xffffffff; 1224 mb(); 1225 1226 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf)); 1227 1228 /* wait up to 20ms */ 1229 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 1230 mb(); 1231 if (response->result != 0xffffffff) { 1232 if (response->result == 0) { 1233 data->data0 = ntohl(response->data); 1234 mutex_exit(&mgp->cmd_lock); 1235 return (0); 1236 } else if (ntohl(response->result) 1237 == MXGEFW_CMD_UNKNOWN) { 1238 mutex_exit(&mgp->cmd_lock); 1239 return (ENOSYS); 1240 } else if (ntohl(response->result) 1241 == MXGEFW_CMD_ERROR_UNALIGNED) { 1242 mutex_exit(&mgp->cmd_lock); 1243 return (E2BIG); 1244 } else { 1245 cmn_err(CE_WARN, 1246 "%s: command %d failed, result = %d\n", 1247 mgp->name, cmd, ntohl(response->result)); 1248 mutex_exit(&mgp->cmd_lock); 1249 return (ENXIO); 1250 } 1251 } 1252 drv_usecwait(1000); 1253 } 1254 mutex_exit(&mgp->cmd_lock); 1255 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n", 1256 mgp->name, cmd, ntohl(response->result)); 1257 return (EAGAIN); 1258 } 1259 1260 /* 1261 * Enable or disable periodic RDMAs from the host to make certain 1262 * chipsets resend dropped PCIe messages 1263 */ 1264 1265 static void 1266 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 1267 { 1268 char buf_bytes[72]; 1269 volatile uint32_t *confirm; 1270 volatile char *submit; 1271 uint32_t *buf; 1272 int i; 1273 1274 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1275 1276 /* clear confirmation addr */ 1277 confirm = (volatile uint32_t *)mgp->cmd; 1278 *confirm = 0; 1279 mb(); 1280 1281 /* 1282 * send an rdma command to the PCIe engine, and wait for the 1283 * response in the confirmation address. The firmware should 1284 * write a -1 there to indicate it is alive and well 1285 */ 1286 1287 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1288 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1289 buf[2] = htonl(0xffffffff); /* confirm data */ 1290 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */ 1291 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */ 1292 buf[5] = htonl(enable); /* enable? */ 1293 1294 1295 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA); 1296 1297 myri10ge_pio_copy((char *)submit, buf, 64); 1298 mb(); 1299 drv_usecwait(1000); 1300 mb(); 1301 i = 0; 1302 while (*confirm != 0xffffffff && i < 20) { 1303 drv_usecwait(1000); 1304 i++; 1305 } 1306 if (*confirm != 0xffffffff) { 1307 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)", 1308 mgp->name, 1309 (enable ? "enable" : "disable"), (void*) confirm, *confirm); 1310 } 1311 } 1312 1313 static int 1314 myri10ge_load_firmware(struct myri10ge_priv *mgp) 1315 { 1316 myri10ge_cmd_t cmd; 1317 volatile uint32_t *confirm; 1318 volatile char *submit; 1319 char buf_bytes[72]; 1320 uint32_t *buf, size; 1321 int status, i; 1322 1323 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1324 1325 status = myri10ge_load_firmware_from_zlib(mgp, &size); 1326 if (status) { 1327 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name); 1328 return (status); 1329 } 1330 1331 /* clear confirmation addr */ 1332 confirm = (volatile uint32_t *)mgp->cmd; 1333 *confirm = 0; 1334 mb(); 1335 1336 /* 1337 * send a reload command to the bootstrap MCP, and wait for the 1338 * response in the confirmation address. The firmware should 1339 * write a -1 there to indicate it is alive and well 1340 */ 1341 1342 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1343 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1344 buf[2] = htonl(0xffffffff); /* confirm data */ 1345 1346 /* 1347 * FIX: All newest firmware should un-protect the bottom of 1348 * the sram before handoff. However, the very first interfaces 1349 * do not. Therefore the handoff copy must skip the first 8 bytes 1350 */ 1351 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 1352 buf[4] = htonl(size - 8); /* length of code */ 1353 buf[5] = htonl(8); /* where to copy to */ 1354 buf[6] = htonl(0); /* where to jump to */ 1355 1356 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF); 1357 1358 myri10ge_pio_copy((char *)submit, buf, 64); 1359 mb(); 1360 drv_usecwait(1000); 1361 mb(); 1362 i = 0; 1363 while (*confirm != 0xffffffff && i < 1000) { 1364 drv_usecwait(1000); 1365 i++; 1366 } 1367 if (*confirm != 0xffffffff) { 1368 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)", 1369 mgp->name, (void *) confirm, *confirm); 1370 1371 return (ENXIO); 1372 } 1373 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1374 if (status != 0) { 1375 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n", 1376 mgp->name); 1377 return (ENXIO); 1378 } 1379 1380 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 1381 myri10ge_dummy_rdma(mgp, 1); 1382 return (0); 1383 } 1384 1385 static int 1386 myri10ge_m_unicst(void *arg, const uint8_t *addr) 1387 { 1388 struct myri10ge_priv *mgp = arg; 1389 myri10ge_cmd_t cmd; 1390 int status; 1391 1392 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1393 | (addr[2] << 8) | addr[3]); 1394 1395 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1396 1397 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd); 1398 if (status == 0 && (addr != mgp->mac_addr)) 1399 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr)); 1400 1401 return (status); 1402 } 1403 1404 static int 1405 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 1406 { 1407 myri10ge_cmd_t cmd; 1408 int status; 1409 1410 if (pause) 1411 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL, 1412 &cmd); 1413 else 1414 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL, 1415 &cmd); 1416 1417 if (status) { 1418 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n", 1419 mgp->name); 1420 return (ENXIO); 1421 } 1422 mgp->pause = pause; 1423 return (0); 1424 } 1425 1426 static void 1427 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc) 1428 { 1429 myri10ge_cmd_t cmd; 1430 int status; 1431 1432 if (promisc) 1433 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd); 1434 else 1435 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd); 1436 1437 if (status) { 1438 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n", 1439 mgp->name); 1440 } 1441 } 1442 1443 static int 1444 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 1445 { 1446 myri10ge_cmd_t cmd; 1447 int status; 1448 uint32_t len; 1449 void *dmabench; 1450 struct myri10ge_dma_stuff dmabench_dma; 1451 char *test = " "; 1452 1453 /* 1454 * Run a small DMA test. 1455 * The magic multipliers to the length tell the firmware 1456 * tp do DMA read, write, or read+write tests. The 1457 * results are returned in cmd.data0. The upper 16 1458 * bits or the return is the number of transfers completed. 1459 * The lower 16 bits is the time in 0.5us ticks that the 1460 * transfers took to complete 1461 */ 1462 1463 len = mgp->tx_boundary; 1464 1465 dmabench = myri10ge_dma_alloc(mgp->dip, len, 1466 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr, 1467 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING, 1468 &dmabench_dma, 1, DDI_DMA_DONTWAIT); 1469 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0; 1470 if (dmabench == NULL) { 1471 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name); 1472 return (ENOMEM); 1473 } 1474 1475 cmd.data0 = ntohl(dmabench_dma.low); 1476 cmd.data1 = ntohl(dmabench_dma.high); 1477 cmd.data2 = len * 0x10000; 1478 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1479 if (status != 0) { 1480 test = "read"; 1481 goto abort; 1482 } 1483 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1484 1485 cmd.data0 = ntohl(dmabench_dma.low); 1486 cmd.data1 = ntohl(dmabench_dma.high); 1487 cmd.data2 = len * 0x1; 1488 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1489 if (status != 0) { 1490 test = "write"; 1491 goto abort; 1492 } 1493 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1494 1495 cmd.data0 = ntohl(dmabench_dma.low); 1496 cmd.data1 = ntohl(dmabench_dma.high); 1497 cmd.data2 = len * 0x10001; 1498 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1499 if (status != 0) { 1500 test = "read/write"; 1501 goto abort; 1502 } 1503 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 1504 (cmd.data0 & 0xffff); 1505 1506 1507 abort: 1508 myri10ge_dma_free(&dmabench_dma); 1509 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 1510 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name, 1511 test); 1512 return (status); 1513 } 1514 1515 static int 1516 myri10ge_reset(struct myri10ge_priv *mgp) 1517 { 1518 myri10ge_cmd_t cmd; 1519 struct myri10ge_nic_stat *ethstat; 1520 struct myri10ge_slice_state *ss; 1521 int i, status; 1522 size_t bytes; 1523 1524 /* send a reset command to the card to see if it is alive */ 1525 (void) memset(&cmd, 0, sizeof (cmd)); 1526 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 1527 if (status != 0) { 1528 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 1529 return (ENXIO); 1530 } 1531 1532 /* Now exchange information about interrupts */ 1533 1534 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry); 1535 cmd.data0 = (uint32_t)bytes; 1536 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1537 1538 /* 1539 * Even though we already know how many slices are supported 1540 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 1541 * has magic side effects, and must be called after a reset. 1542 * It must be called prior to calling any RSS related cmds, 1543 * including assigning an interrupt queue for anything but 1544 * slice 0. It must also be called *after* 1545 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1546 * the firmware to compute offsets. 1547 */ 1548 1549 if (mgp->num_slices > 1) { 1550 1551 /* ask the maximum number of slices it supports */ 1552 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1553 &cmd); 1554 if (status != 0) { 1555 cmn_err(CE_WARN, 1556 "%s: failed to get number of slices\n", 1557 mgp->name); 1558 return (status); 1559 } 1560 1561 /* 1562 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1563 * to setting up the interrupt queue DMA 1564 */ 1565 1566 cmd.data0 = mgp->num_slices; 1567 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE | 1568 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1569 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1570 &cmd); 1571 if (status != 0) { 1572 cmn_err(CE_WARN, 1573 "%s: failed to set number of slices\n", 1574 mgp->name); 1575 return (status); 1576 } 1577 } 1578 for (i = 0; i < mgp->num_slices; i++) { 1579 ss = &mgp->ss[i]; 1580 cmd.data0 = ntohl(ss->rx_done.dma.low); 1581 cmd.data1 = ntohl(ss->rx_done.dma.high); 1582 cmd.data2 = i; 1583 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1584 &cmd); 1585 }; 1586 1587 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1588 for (i = 0; i < mgp->num_slices; i++) { 1589 ss = &mgp->ss[i]; 1590 ss->irq_claim = (volatile unsigned int *) 1591 (void *)(mgp->sram + cmd.data0 + 8 * i); 1592 } 1593 1594 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 1595 status |= myri10ge_send_cmd(mgp, 1596 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1597 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1598 } 1599 1600 status |= myri10ge_send_cmd(mgp, 1601 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1602 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1603 1604 if (status != 0) { 1605 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n", 1606 mgp->name); 1607 return (status); 1608 } 1609 1610 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 1611 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 1612 1613 /* reset mcp/driver shared state back to 0 */ 1614 1615 for (i = 0; i < mgp->num_slices; i++) { 1616 ss = &mgp->ss[i]; 1617 bytes = mgp->max_intr_slots * 1618 sizeof (*mgp->ss[0].rx_done.entry); 1619 (void) memset(ss->rx_done.entry, 0, bytes); 1620 ss->tx.req = 0; 1621 ss->tx.done = 0; 1622 ss->tx.pkt_done = 0; 1623 ss->rx_big.cnt = 0; 1624 ss->rx_small.cnt = 0; 1625 ss->rx_done.idx = 0; 1626 ss->rx_done.cnt = 0; 1627 ss->rx_token = 0; 1628 ss->tx.watchdog_done = 0; 1629 ss->tx.watchdog_req = 0; 1630 ss->tx.active = 0; 1631 ss->tx.activate = 0; 1632 } 1633 mgp->watchdog_rx_pause = 0; 1634 if (mgp->ksp_stat != NULL) { 1635 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data; 1636 ethstat->link_changes.value.ul = 0; 1637 } 1638 status = myri10ge_m_unicst(mgp, mgp->mac_addr); 1639 myri10ge_change_promisc(mgp, 0); 1640 (void) myri10ge_change_pause(mgp, mgp->pause); 1641 return (status); 1642 } 1643 1644 static int 1645 myri10ge_init_toeplitz(struct myri10ge_priv *mgp) 1646 { 1647 myri10ge_cmd_t cmd; 1648 int i, b, s, t, j; 1649 int status; 1650 uint32_t k[8]; 1651 uint32_t tmp; 1652 uint8_t *key; 1653 1654 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 1655 &cmd); 1656 if (status != 0) { 1657 cmn_err(CE_WARN, "%s: failed to get rss key\n", 1658 mgp->name); 1659 return (EIO); 1660 } 1661 myri10ge_pio_copy32(mgp->rss_key, 1662 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0), 1663 sizeof (mgp->rss_key)); 1664 1665 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256, 1666 KM_SLEEP); 1667 key = (uint8_t *)mgp->rss_key; 1668 t = 0; 1669 for (b = 0; b < 12; b++) { 1670 for (s = 0; s < 8; s++) { 1671 /* Bits: b*8+s, ..., b*8+s+31 */ 1672 k[s] = 0; 1673 for (j = 0; j < 32; j++) { 1674 int bit = b*8+s+j; 1675 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7))); 1676 k[s] |= bit << (31 - j); 1677 } 1678 } 1679 1680 for (i = 0; i <= 0xff; i++) { 1681 tmp = 0; 1682 if (i & (1 << 7)) { tmp ^= k[0]; } 1683 if (i & (1 << 6)) { tmp ^= k[1]; } 1684 if (i & (1 << 5)) { tmp ^= k[2]; } 1685 if (i & (1 << 4)) { tmp ^= k[3]; } 1686 if (i & (1 << 3)) { tmp ^= k[4]; } 1687 if (i & (1 << 2)) { tmp ^= k[5]; } 1688 if (i & (1 << 1)) { tmp ^= k[6]; } 1689 if (i & (1 << 0)) { tmp ^= k[7]; } 1690 mgp->toeplitz_hash_table[t++] = tmp; 1691 } 1692 } 1693 return (0); 1694 } 1695 1696 static inline struct myri10ge_slice_state * 1697 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1698 { 1699 struct tcphdr *hdr; 1700 uint32_t saddr, daddr; 1701 uint32_t hash, slice; 1702 uint32_t *table = mgp->toeplitz_hash_table; 1703 uint16_t src, dst; 1704 1705 /* 1706 * Note hashing order is reversed from how it is done 1707 * in the NIC, so as to generate the same hash value 1708 * for the connection to try to keep connections CPU local 1709 */ 1710 1711 /* hash on IPv4 src/dst address */ 1712 saddr = ntohl(ip->ip_src.s_addr); 1713 daddr = ntohl(ip->ip_dst.s_addr); 1714 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)]; 1715 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)]; 1716 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)]; 1717 hash ^= table[(256 * 3) + ((daddr) & 0xff)]; 1718 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)]; 1719 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)]; 1720 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)]; 1721 hash ^= table[(256 * 7) + ((saddr) & 0xff)]; 1722 /* hash on TCP port, if required */ 1723 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) && 1724 ip->ip_p == IPPROTO_TCP) { 1725 hdr = (struct tcphdr *)(void *) 1726 (((uint8_t *)ip) + (ip->ip_hl << 2)); 1727 src = ntohs(hdr->th_sport); 1728 dst = ntohs(hdr->th_dport); 1729 1730 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)]; 1731 hash ^= table[(256 * 9) + ((dst) & 0xff)]; 1732 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)]; 1733 hash ^= table[(256 * 11) + ((src) & 0xff)]; 1734 } 1735 slice = (mgp->num_slices - 1) & hash; 1736 return (&mgp->ss[slice]); 1737 1738 } 1739 1740 static inline struct myri10ge_slice_state * 1741 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1742 { 1743 struct tcphdr *hdr; 1744 uint32_t slice, hash_val; 1745 1746 1747 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) { 1748 return (&mgp->ss[0]); 1749 } 1750 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2)); 1751 1752 /* 1753 * Use the second byte of the *destination* address for 1754 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing 1755 */ 1756 hash_val = ntohs(hdr->th_dport) & 0xff; 1757 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT) 1758 hash_val += ntohs(hdr->th_sport) & 0xff; 1759 1760 slice = (mgp->num_slices - 1) & hash_val; 1761 return (&mgp->ss[slice]); 1762 } 1763 1764 static inline struct myri10ge_slice_state * 1765 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp) 1766 { 1767 unsigned int slice = 0; 1768 struct ether_header *eh; 1769 struct ether_vlan_header *vh; 1770 struct ip *ip; 1771 int ehl, ihl; 1772 1773 if (mgp->num_slices == 1) 1774 return (&mgp->ss[0]); 1775 1776 if (myri10ge_tx_hash == 0) { 1777 slice = CPU->cpu_id & (mgp->num_slices - 1); 1778 return (&mgp->ss[slice]); 1779 } 1780 1781 /* 1782 * ensure it is a TCP or UDP over IPv4 packet, and that the 1783 * headers are in the 1st mblk. Otherwise, punt 1784 */ 1785 ehl = sizeof (*eh); 1786 ihl = sizeof (*ip); 1787 if ((MBLKL(mp)) < (ehl + ihl + 8)) 1788 return (&mgp->ss[0]); 1789 eh = (struct ether_header *)(void *)mp->b_rptr; 1790 ip = (struct ip *)(void *)(eh + 1); 1791 if (eh->ether_type != BE_16(ETHERTYPE_IP)) { 1792 if (eh->ether_type != BE_16(ETHERTYPE_VLAN)) 1793 return (&mgp->ss[0]); 1794 vh = (struct ether_vlan_header *)(void *)mp->b_rptr; 1795 if (vh->ether_type != BE_16(ETHERTYPE_IP)) 1796 return (&mgp->ss[0]); 1797 ehl += 4; 1798 ip = (struct ip *)(void *)(vh + 1); 1799 } 1800 ihl = ip->ip_hl << 2; 1801 if (MBLKL(mp) < (ehl + ihl + 8)) 1802 return (&mgp->ss[0]); 1803 switch (myri10ge_rss_hash) { 1804 case MXGEFW_RSS_HASH_TYPE_IPV4: 1805 /* fallthru */ 1806 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4: 1807 /* fallthru */ 1808 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4): 1809 return (myri10ge_toeplitz_send_hash(mgp, ip)); 1810 case MXGEFW_RSS_HASH_TYPE_SRC_PORT: 1811 /* fallthru */ 1812 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT: 1813 return (myri10ge_simple_send_hash(mgp, ip)); 1814 default: 1815 break; 1816 } 1817 return (&mgp->ss[0]); 1818 } 1819 1820 static int 1821 myri10ge_setup_slice(struct myri10ge_slice_state *ss) 1822 { 1823 struct myri10ge_priv *mgp = ss->mgp; 1824 myri10ge_cmd_t cmd; 1825 int tx_ring_size, rx_ring_size; 1826 int tx_ring_entries, rx_ring_entries; 1827 int slice, status; 1828 int allocated, idx; 1829 size_t bytes; 1830 1831 slice = ss - mgp->ss; 1832 cmd.data0 = slice; 1833 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 1834 tx_ring_size = cmd.data0; 1835 cmd.data0 = slice; 1836 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1837 if (status != 0) 1838 return (status); 1839 rx_ring_size = cmd.data0; 1840 1841 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send); 1842 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr); 1843 ss->tx.mask = tx_ring_entries - 1; 1844 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 1845 1846 /* get the lanai pointers to the send and receive rings */ 1847 1848 cmd.data0 = slice; 1849 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 1850 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0); 1851 if (mgp->num_slices > 1) { 1852 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice; 1853 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP + 1854 64 * slice; 1855 } else { 1856 ss->tx.go = NULL; 1857 ss->tx.stop = NULL; 1858 } 1859 1860 cmd.data0 = slice; 1861 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 1862 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *) 1863 (void *)(mgp->sram + cmd.data0); 1864 1865 cmd.data0 = slice; 1866 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 1867 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *) 1868 (mgp->sram + cmd.data0); 1869 1870 if (status != 0) { 1871 cmn_err(CE_WARN, 1872 "%s: failed to get ring sizes or locations\n", mgp->name); 1873 return (status); 1874 } 1875 1876 status = ENOMEM; 1877 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 1878 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP); 1879 if (ss->rx_small.shadow == NULL) 1880 goto abort; 1881 (void) memset(ss->rx_small.shadow, 0, bytes); 1882 1883 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 1884 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP); 1885 if (ss->rx_big.shadow == NULL) 1886 goto abort_with_rx_small_shadow; 1887 (void) memset(ss->rx_big.shadow, 0, bytes); 1888 1889 /* allocate the host info rings */ 1890 1891 bytes = tx_ring_entries * sizeof (*ss->tx.info); 1892 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP); 1893 if (ss->tx.info == NULL) 1894 goto abort_with_rx_big_shadow; 1895 (void) memset(ss->tx.info, 0, bytes); 1896 1897 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 1898 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP); 1899 if (ss->rx_small.info == NULL) 1900 goto abort_with_tx_info; 1901 (void) memset(ss->rx_small.info, 0, bytes); 1902 1903 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 1904 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP); 1905 if (ss->rx_big.info == NULL) 1906 goto abort_with_rx_small_info; 1907 (void) memset(ss->rx_big.info, 0, bytes); 1908 1909 ss->tx.stall = ss->tx.sched = 0; 1910 ss->tx.stall_early = ss->tx.stall_late = 0; 1911 1912 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) / 1913 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD)); 1914 1915 allocated = myri10ge_add_jbufs(ss, 1916 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1); 1917 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) { 1918 cmn_err(CE_WARN, 1919 "%s: Could not allocate enough receive buffers (%d/%d)\n", 1920 mgp->name, allocated, 1921 myri10ge_bigbufs_initial + ss->jbufs_for_smalls); 1922 goto abort_with_jumbos; 1923 } 1924 1925 myri10ge_carve_up_jbufs_into_small_ring(ss); 1926 ss->j_rx_cnt = 0; 1927 1928 mutex_enter(&ss->jpool.mtx); 1929 if (allocated < rx_ring_entries) 1930 ss->jpool.low_water = allocated / 4; 1931 else 1932 ss->jpool.low_water = rx_ring_entries / 2; 1933 1934 /* 1935 * invalidate the big receive ring in case we do not 1936 * allocate sufficient jumbos to fill it 1937 */ 1938 (void) memset(ss->rx_big.shadow, 1, 1939 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0])); 1940 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) { 1941 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7], 1942 &ss->rx_big.shadow[idx - 7]); 1943 mb(); 1944 } 1945 1946 1947 myri10ge_restock_jumbos(ss); 1948 1949 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) { 1950 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7], 1951 &ss->rx_small.shadow[idx - 7]); 1952 mb(); 1953 } 1954 ss->rx_small.cnt = ss->rx_small.mask + 1; 1955 1956 mutex_exit(&ss->jpool.mtx); 1957 1958 status = myri10ge_prepare_tx_ring(ss); 1959 1960 if (status != 0) 1961 goto abort_with_small_jbufs; 1962 1963 cmd.data0 = ntohl(ss->fw_stats_dma.low); 1964 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1965 cmd.data2 = sizeof (mcp_irq_data_t); 1966 cmd.data2 |= (slice << 16); 1967 bzero(ss->fw_stats, sizeof (*ss->fw_stats)); 1968 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 1969 if (status == ENOSYS) { 1970 cmd.data0 = ntohl(ss->fw_stats_dma.low) + 1971 offsetof(mcp_irq_data_t, send_done_count); 1972 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1973 status = myri10ge_send_cmd(mgp, 1974 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); 1975 } 1976 if (status) { 1977 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name); 1978 goto abort_with_tx; 1979 } 1980 1981 return (0); 1982 1983 abort_with_tx: 1984 myri10ge_unprepare_tx_ring(ss); 1985 1986 abort_with_small_jbufs: 1987 myri10ge_release_small_jbufs(ss); 1988 1989 abort_with_jumbos: 1990 if (allocated != 0) { 1991 mutex_enter(&ss->jpool.mtx); 1992 ss->jpool.low_water = 0; 1993 mutex_exit(&ss->jpool.mtx); 1994 myri10ge_unstock_jumbos(ss); 1995 myri10ge_remove_jbufs(ss); 1996 } 1997 1998 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 1999 kmem_free(ss->rx_big.info, bytes); 2000 2001 abort_with_rx_small_info: 2002 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2003 kmem_free(ss->rx_small.info, bytes); 2004 2005 abort_with_tx_info: 2006 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2007 kmem_free(ss->tx.info, bytes); 2008 2009 abort_with_rx_big_shadow: 2010 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2011 kmem_free(ss->rx_big.shadow, bytes); 2012 2013 abort_with_rx_small_shadow: 2014 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2015 kmem_free(ss->rx_small.shadow, bytes); 2016 abort: 2017 return (status); 2018 2019 } 2020 2021 static void 2022 myri10ge_teardown_slice(struct myri10ge_slice_state *ss) 2023 { 2024 int tx_ring_entries, rx_ring_entries; 2025 size_t bytes; 2026 2027 /* ignore slices that have not been fully setup */ 2028 if (ss->tx.cp == NULL) 2029 return; 2030 /* Free the TX copy buffers */ 2031 myri10ge_unprepare_tx_ring(ss); 2032 2033 /* stop passing returned buffers to firmware */ 2034 2035 mutex_enter(&ss->jpool.mtx); 2036 ss->jpool.low_water = 0; 2037 mutex_exit(&ss->jpool.mtx); 2038 myri10ge_release_small_jbufs(ss); 2039 2040 /* Release the free jumbo frame pool */ 2041 myri10ge_unstock_jumbos(ss); 2042 myri10ge_remove_jbufs(ss); 2043 2044 rx_ring_entries = ss->rx_big.mask + 1; 2045 tx_ring_entries = ss->tx.mask + 1; 2046 2047 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2048 kmem_free(ss->rx_big.info, bytes); 2049 2050 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2051 kmem_free(ss->rx_small.info, bytes); 2052 2053 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2054 kmem_free(ss->tx.info, bytes); 2055 2056 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2057 kmem_free(ss->rx_big.shadow, bytes); 2058 2059 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2060 kmem_free(ss->rx_small.shadow, bytes); 2061 2062 } 2063 static int 2064 myri10ge_start_locked(struct myri10ge_priv *mgp) 2065 { 2066 myri10ge_cmd_t cmd; 2067 int status, big_pow2, i; 2068 volatile uint8_t *itable; 2069 2070 status = DDI_SUCCESS; 2071 /* Allocate DMA resources and receive buffers */ 2072 2073 status = myri10ge_reset(mgp); 2074 if (status != 0) { 2075 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 2076 return (DDI_FAILURE); 2077 } 2078 2079 if (mgp->num_slices > 1) { 2080 cmd.data0 = mgp->num_slices; 2081 cmd.data1 = 1; /* use MSI-X */ 2082 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2083 &cmd); 2084 if (status != 0) { 2085 cmn_err(CE_WARN, 2086 "%s: failed to set number of slices\n", 2087 mgp->name); 2088 goto abort_with_nothing; 2089 } 2090 /* setup the indirection table */ 2091 cmd.data0 = mgp->num_slices; 2092 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2093 &cmd); 2094 2095 status |= myri10ge_send_cmd(mgp, 2096 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 2097 if (status != 0) { 2098 cmn_err(CE_WARN, 2099 "%s: failed to setup rss tables\n", mgp->name); 2100 } 2101 2102 /* just enable an identity mapping */ 2103 itable = mgp->sram + cmd.data0; 2104 for (i = 0; i < mgp->num_slices; i++) 2105 itable[i] = (uint8_t)i; 2106 2107 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) { 2108 status = myri10ge_init_toeplitz(mgp); 2109 if (status != 0) { 2110 cmn_err(CE_WARN, "%s: failed to setup " 2111 "toeplitz tx hash table", mgp->name); 2112 goto abort_with_nothing; 2113 } 2114 } 2115 cmd.data0 = 1; 2116 cmd.data1 = myri10ge_rss_hash; 2117 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2118 &cmd); 2119 if (status != 0) { 2120 cmn_err(CE_WARN, 2121 "%s: failed to enable slices\n", mgp->name); 2122 goto abort_with_toeplitz; 2123 } 2124 } 2125 2126 for (i = 0; i < mgp->num_slices; i++) { 2127 status = myri10ge_setup_slice(&mgp->ss[i]); 2128 if (status != 0) 2129 goto abort_with_slices; 2130 } 2131 2132 /* 2133 * Tell the MCP how many buffers he has, and to 2134 * bring the ethernet interface up 2135 * 2136 * Firmware needs the big buff size as a power of 2. Lie and 2137 * tell him the buffer is larger, because we only use 1 2138 * buffer/pkt, and the mtu will prevent overruns 2139 */ 2140 big_pow2 = myri10ge_mtu + MXGEFW_PAD; 2141 while ((big_pow2 & (big_pow2 - 1)) != 0) 2142 big_pow2++; 2143 2144 /* now give firmware buffers sizes, and MTU */ 2145 cmd.data0 = myri10ge_mtu; 2146 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd); 2147 cmd.data0 = myri10ge_small_bytes; 2148 status |= 2149 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 2150 cmd.data0 = big_pow2; 2151 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2152 if (status) { 2153 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name); 2154 goto abort_with_slices; 2155 } 2156 2157 2158 cmd.data0 = 1; 2159 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd); 2160 if (status) { 2161 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n", 2162 mgp->name, status); 2163 } else { 2164 mgp->features |= MYRI10GE_TSO; 2165 } 2166 2167 mgp->link_state = -1; 2168 mgp->rdma_tags_available = 15; 2169 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd); 2170 if (status) { 2171 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name); 2172 goto abort_with_slices; 2173 } 2174 mgp->running = MYRI10GE_ETH_RUNNING; 2175 return (DDI_SUCCESS); 2176 2177 abort_with_slices: 2178 for (i = 0; i < mgp->num_slices; i++) 2179 myri10ge_teardown_slice(&mgp->ss[i]); 2180 2181 mgp->running = MYRI10GE_ETH_STOPPED; 2182 2183 abort_with_toeplitz: 2184 if (mgp->toeplitz_hash_table != NULL) { 2185 kmem_free(mgp->toeplitz_hash_table, 2186 sizeof (uint32_t) * 12 * 256); 2187 mgp->toeplitz_hash_table = NULL; 2188 } 2189 2190 abort_with_nothing: 2191 return (DDI_FAILURE); 2192 } 2193 2194 static void 2195 myri10ge_stop_locked(struct myri10ge_priv *mgp) 2196 { 2197 int status, old_down_cnt; 2198 myri10ge_cmd_t cmd; 2199 int wait_time = 10; 2200 int i, polling; 2201 2202 old_down_cnt = mgp->down_cnt; 2203 mb(); 2204 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2205 if (status) { 2206 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 2207 } 2208 2209 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2210 delay(1 * drv_usectohz(1000000)); 2211 wait_time--; 2212 if (wait_time == 0) 2213 break; 2214 } 2215 again: 2216 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2217 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name); 2218 for (i = 0; i < mgp->num_slices; i++) { 2219 /* 2220 * take and release the rx lock to ensure 2221 * that no interrupt thread is blocked 2222 * elsewhere in the stack, preventing 2223 * completion 2224 */ 2225 2226 mutex_enter(&mgp->ss[i].rx_lock); 2227 printf("%s: slice %d rx irq idle\n", 2228 mgp->name, i); 2229 mutex_exit(&mgp->ss[i].rx_lock); 2230 2231 /* verify that the poll handler is inactive */ 2232 mutex_enter(&mgp->ss->poll_lock); 2233 polling = mgp->ss->rx_polling; 2234 mutex_exit(&mgp->ss->poll_lock); 2235 if (polling) { 2236 printf("%s: slice %d is polling\n", 2237 mgp->name, i); 2238 delay(1 * drv_usectohz(1000000)); 2239 goto again; 2240 } 2241 } 2242 delay(1 * drv_usectohz(1000000)); 2243 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2244 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name); 2245 } 2246 } 2247 2248 for (i = 0; i < mgp->num_slices; i++) 2249 myri10ge_teardown_slice(&mgp->ss[i]); 2250 2251 if (mgp->toeplitz_hash_table != NULL) { 2252 kmem_free(mgp->toeplitz_hash_table, 2253 sizeof (uint32_t) * 12 * 256); 2254 mgp->toeplitz_hash_table = NULL; 2255 } 2256 mgp->running = MYRI10GE_ETH_STOPPED; 2257 } 2258 2259 static int 2260 myri10ge_m_start(void *arg) 2261 { 2262 struct myri10ge_priv *mgp = arg; 2263 int status; 2264 2265 mutex_enter(&mgp->intrlock); 2266 2267 if (mgp->running != MYRI10GE_ETH_STOPPED) { 2268 mutex_exit(&mgp->intrlock); 2269 return (DDI_FAILURE); 2270 } 2271 status = myri10ge_start_locked(mgp); 2272 mutex_exit(&mgp->intrlock); 2273 2274 if (status != DDI_SUCCESS) 2275 return (status); 2276 2277 /* start the watchdog timer */ 2278 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 2279 mgp->timer_ticks); 2280 return (DDI_SUCCESS); 2281 2282 } 2283 2284 static void 2285 myri10ge_m_stop(void *arg) 2286 { 2287 struct myri10ge_priv *mgp = arg; 2288 2289 mutex_enter(&mgp->intrlock); 2290 /* if the device not running give up */ 2291 if (mgp->running != MYRI10GE_ETH_RUNNING) { 2292 mutex_exit(&mgp->intrlock); 2293 return; 2294 } 2295 2296 mgp->running = MYRI10GE_ETH_STOPPING; 2297 mutex_exit(&mgp->intrlock); 2298 (void) untimeout(mgp->timer_id); 2299 mutex_enter(&mgp->intrlock); 2300 myri10ge_stop_locked(mgp); 2301 mutex_exit(&mgp->intrlock); 2302 2303 } 2304 2305 static inline void 2306 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) 2307 { 2308 struct ether_header *eh; 2309 struct ip *ip; 2310 struct ip6_hdr *ip6; 2311 uint32_t start, stuff, end, partial, hdrlen; 2312 2313 2314 csum = ntohs((uint16_t)csum); 2315 eh = (struct ether_header *)(void *)mp->b_rptr; 2316 hdrlen = sizeof (*eh); 2317 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2318 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2319 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2320 s->brdcstrcv++; 2321 else 2322 s->multircv++; 2323 } 2324 2325 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 2326 /* 2327 * fix checksum by subtracting 4 bytes after what the 2328 * firmware thought was the end of the ether hdr 2329 */ 2330 partial = *(uint32_t *) 2331 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE); 2332 csum += ~partial; 2333 csum += (csum < ~partial); 2334 csum = (csum >> 16) + (csum & 0xFFFF); 2335 csum = (csum >> 16) + (csum & 0xFFFF); 2336 hdrlen += VLAN_TAGSZ; 2337 } 2338 2339 if (eh->ether_type == BE_16(ETHERTYPE_IP)) { 2340 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen); 2341 start = ip->ip_hl << 2; 2342 2343 if (ip->ip_p == IPPROTO_TCP) 2344 stuff = start + offsetof(struct tcphdr, th_sum); 2345 else if (ip->ip_p == IPPROTO_UDP) 2346 stuff = start + offsetof(struct udphdr, uh_sum); 2347 else 2348 return; 2349 end = ntohs(ip->ip_len); 2350 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) { 2351 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen); 2352 start = sizeof (*ip6); 2353 if (ip6->ip6_nxt == IPPROTO_TCP) { 2354 stuff = start + offsetof(struct tcphdr, th_sum); 2355 } else if (ip6->ip6_nxt == IPPROTO_UDP) 2356 stuff = start + offsetof(struct udphdr, uh_sum); 2357 else 2358 return; 2359 end = start + ntohs(ip6->ip6_plen); 2360 /* 2361 * IPv6 headers do not contain a checksum, and hence 2362 * do not checksum to zero, so they don't "fall out" 2363 * of the partial checksum calculation like IPv4 2364 * headers do. We need to fix the partial checksum by 2365 * subtracting the checksum of the IPv6 header. 2366 */ 2367 2368 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6)); 2369 csum += ~partial; 2370 csum += (csum < ~partial); 2371 csum = (csum >> 16) + (csum & 0xFFFF); 2372 csum = (csum >> 16) + (csum & 0xFFFF); 2373 } else { 2374 return; 2375 } 2376 2377 if (MBLKL(mp) > hdrlen + end) { 2378 /* padded frame, so hw csum may be invalid */ 2379 return; 2380 } 2381 2382 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); 2383 } 2384 2385 static mblk_t * 2386 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len, 2387 uint32_t csum) 2388 { 2389 mblk_t *mp; 2390 myri10ge_rx_ring_t *rx; 2391 int idx; 2392 2393 rx = &ss->rx_small; 2394 idx = rx->cnt & rx->mask; 2395 ss->rx_small.cnt++; 2396 2397 /* allocate a new buffer to pass up the stack */ 2398 mp = allocb(len + MXGEFW_PAD, 0); 2399 if (mp == NULL) { 2400 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf); 2401 goto abort; 2402 } 2403 bcopy(ss->rx_small.info[idx].ptr, 2404 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2405 mp->b_wptr += len + MXGEFW_PAD; 2406 mp->b_rptr += MXGEFW_PAD; 2407 2408 ss->rx_stats.ibytes += len; 2409 ss->rx_stats.ipackets += 1; 2410 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2411 2412 abort: 2413 if ((idx & 7) == 7) { 2414 myri10ge_submit_8rx(&rx->lanai[idx - 7], 2415 &rx->shadow[idx - 7]); 2416 } 2417 2418 return (mp); 2419 } 2420 2421 2422 static mblk_t * 2423 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len, 2424 uint32_t csum) 2425 { 2426 struct myri10ge_jpool_stuff *jpool; 2427 struct myri10ge_jpool_entry *j; 2428 mblk_t *mp; 2429 int idx, num_owned_by_mcp; 2430 2431 jpool = &ss->jpool; 2432 idx = ss->j_rx_cnt & ss->rx_big.mask; 2433 j = ss->rx_big.info[idx].j; 2434 2435 if (j == NULL) { 2436 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n", 2437 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt); 2438 return (NULL); 2439 } 2440 2441 2442 ss->rx_big.info[idx].j = NULL; 2443 ss->j_rx_cnt++; 2444 2445 2446 /* 2447 * Check to see if we are low on rx buffers. 2448 * Note that we must leave at least 8 free so there are 2449 * enough to free in a single 64-byte write. 2450 */ 2451 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2452 if (num_owned_by_mcp < jpool->low_water) { 2453 mutex_enter(&jpool->mtx); 2454 myri10ge_restock_jumbos(ss); 2455 mutex_exit(&jpool->mtx); 2456 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2457 /* if we are still low, then we have to copy */ 2458 if (num_owned_by_mcp < 16) { 2459 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy); 2460 /* allocate a new buffer to pass up the stack */ 2461 mp = allocb(len + MXGEFW_PAD, 0); 2462 if (mp == NULL) { 2463 goto abort; 2464 } 2465 bcopy(j->buf, 2466 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2467 myri10ge_jfree_rtn(j); 2468 /* push buffer back to NIC */ 2469 mutex_enter(&jpool->mtx); 2470 myri10ge_restock_jumbos(ss); 2471 mutex_exit(&jpool->mtx); 2472 goto set_len; 2473 } 2474 } 2475 2476 /* loan our buffer to the stack */ 2477 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func); 2478 if (mp == NULL) { 2479 goto abort; 2480 } 2481 2482 set_len: 2483 mp->b_rptr += MXGEFW_PAD; 2484 mp->b_wptr = ((unsigned char *) mp->b_rptr + len); 2485 2486 ss->rx_stats.ibytes += len; 2487 ss->rx_stats.ipackets += 1; 2488 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2489 2490 return (mp); 2491 2492 abort: 2493 myri10ge_jfree_rtn(j); 2494 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf); 2495 return (NULL); 2496 } 2497 2498 /* 2499 * Free all transmit buffers up until the specified index 2500 */ 2501 static inline void 2502 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index) 2503 { 2504 myri10ge_tx_ring_t *tx; 2505 struct myri10ge_tx_dma_handle_head handles; 2506 int idx; 2507 int limit = 0; 2508 2509 tx = &ss->tx; 2510 handles.head = NULL; 2511 handles.tail = NULL; 2512 while (tx->pkt_done != (int)mcp_index) { 2513 idx = tx->done & tx->mask; 2514 2515 /* 2516 * mblk & DMA handle attached only to first slot 2517 * per buffer in the packet 2518 */ 2519 2520 if (tx->info[idx].m) { 2521 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 2522 tx->info[idx].handle->next = handles.head; 2523 handles.head = tx->info[idx].handle; 2524 if (handles.tail == NULL) 2525 handles.tail = tx->info[idx].handle; 2526 freeb(tx->info[idx].m); 2527 tx->info[idx].m = 0; 2528 tx->info[idx].handle = 0; 2529 } 2530 if (tx->info[idx].ostat.opackets != 0) { 2531 tx->stats.multixmt += tx->info[idx].ostat.multixmt; 2532 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt; 2533 tx->stats.obytes += tx->info[idx].ostat.obytes; 2534 tx->stats.opackets += tx->info[idx].ostat.opackets; 2535 tx->info[idx].stat.un.all = 0; 2536 tx->pkt_done++; 2537 } 2538 2539 tx->done++; 2540 /* 2541 * if we stalled the queue, wake it. But Wait until 2542 * we have at least 1/2 our slots free. 2543 */ 2544 if ((tx->req - tx->done) < (tx->mask >> 1) && 2545 tx->stall != tx->sched) { 2546 mutex_enter(&ss->tx.lock); 2547 tx->sched = tx->stall; 2548 mutex_exit(&ss->tx.lock); 2549 mac_tx_ring_update(ss->mgp->mh, tx->rh); 2550 } 2551 2552 /* limit potential for livelock */ 2553 if (unlikely(++limit > 2 * tx->mask)) 2554 break; 2555 } 2556 if (tx->req == tx->done && tx->stop != NULL) { 2557 /* 2558 * Nic has sent all pending requests, allow him 2559 * to stop polling this queue 2560 */ 2561 mutex_enter(&tx->lock); 2562 if (tx->req == tx->done && tx->active) { 2563 *(int *)(void *)tx->stop = 1; 2564 tx->active = 0; 2565 mb(); 2566 } 2567 mutex_exit(&tx->lock); 2568 } 2569 if (handles.head != NULL) 2570 myri10ge_free_tx_handles(tx, &handles); 2571 } 2572 2573 static void 2574 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl) 2575 { 2576 mbl->head = NULL; 2577 mbl->tail = &mbl->head; 2578 mbl->cnt = 0; 2579 } 2580 2581 /*ARGSUSED*/ 2582 void 2583 myri10ge_mbl_append(struct myri10ge_slice_state *ss, 2584 struct myri10ge_mblk_list *mbl, mblk_t *mp) 2585 { 2586 *(mbl->tail) = mp; 2587 mbl->tail = &mp->b_next; 2588 mp->b_next = NULL; 2589 mbl->cnt++; 2590 } 2591 2592 2593 static inline void 2594 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, 2595 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop) 2596 { 2597 myri10ge_rx_done_t *rx_done = &ss->rx_done; 2598 struct myri10ge_priv *mgp = ss->mgp; 2599 mblk_t *mp; 2600 struct lro_entry *lro; 2601 uint16_t length; 2602 uint16_t checksum; 2603 2604 2605 while (rx_done->entry[rx_done->idx].length != 0) { 2606 if (unlikely (*stop)) { 2607 break; 2608 } 2609 length = ntohs(rx_done->entry[rx_done->idx].length); 2610 length &= (~MXGEFW_RSS_HASH_MASK); 2611 2612 /* limit potential for livelock */ 2613 limit -= length; 2614 if (unlikely(limit < 0)) 2615 break; 2616 2617 rx_done->entry[rx_done->idx].length = 0; 2618 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2619 if (length <= myri10ge_small_bytes) 2620 mp = myri10ge_rx_done_small(ss, length, checksum); 2621 else 2622 mp = myri10ge_rx_done_big(ss, length, checksum); 2623 if (mp != NULL) { 2624 if (!myri10ge_lro || 2625 0 != myri10ge_lro_rx(ss, mp, checksum, mbl)) 2626 myri10ge_mbl_append(ss, mbl, mp); 2627 } 2628 rx_done->cnt++; 2629 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1); 2630 } 2631 while (ss->lro_active != NULL) { 2632 lro = ss->lro_active; 2633 ss->lro_active = lro->next; 2634 myri10ge_lro_flush(ss, lro, mbl); 2635 } 2636 } 2637 2638 static void 2639 myri10ge_intr_rx(struct myri10ge_slice_state *ss) 2640 { 2641 uint64_t gen; 2642 struct myri10ge_mblk_list mbl; 2643 2644 myri10ge_mbl_init(&mbl); 2645 if (mutex_tryenter(&ss->rx_lock) == 0) 2646 return; 2647 gen = ss->rx_gen_num; 2648 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL, 2649 &ss->rx_polling); 2650 if (mbl.head != NULL) 2651 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen); 2652 mutex_exit(&ss->rx_lock); 2653 2654 } 2655 2656 static mblk_t * 2657 myri10ge_poll_rx(void *arg, int bytes) 2658 { 2659 struct myri10ge_slice_state *ss = arg; 2660 struct myri10ge_mblk_list mbl; 2661 boolean_t dummy = B_FALSE; 2662 2663 if (bytes == 0) 2664 return (NULL); 2665 2666 myri10ge_mbl_init(&mbl); 2667 mutex_enter(&ss->rx_lock); 2668 if (ss->rx_polling) 2669 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy); 2670 else 2671 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss - 2672 ss->mgp->ss), ss->rx_token, ss->rx_polling); 2673 mutex_exit(&ss->rx_lock); 2674 return (mbl.head); 2675 } 2676 2677 /*ARGSUSED*/ 2678 static uint_t 2679 myri10ge_intr(caddr_t arg0, caddr_t arg1) 2680 { 2681 struct myri10ge_slice_state *ss = 2682 (struct myri10ge_slice_state *)(void *)arg0; 2683 struct myri10ge_priv *mgp = ss->mgp; 2684 mcp_irq_data_t *stats = ss->fw_stats; 2685 myri10ge_tx_ring_t *tx = &ss->tx; 2686 uint32_t send_done_count; 2687 uint8_t valid; 2688 2689 2690 /* make sure the DMA has finished */ 2691 if (!stats->valid) { 2692 return (DDI_INTR_UNCLAIMED); 2693 } 2694 valid = stats->valid; 2695 2696 /* low bit indicates receives are present */ 2697 if (valid & 1) 2698 myri10ge_intr_rx(ss); 2699 2700 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 2701 /* lower legacy IRQ */ 2702 *mgp->irq_deassert = 0; 2703 if (!myri10ge_deassert_wait) 2704 /* don't wait for conf. that irq is low */ 2705 stats->valid = 0; 2706 mb(); 2707 } else { 2708 /* no need to wait for conf. that irq is low */ 2709 stats->valid = 0; 2710 } 2711 2712 do { 2713 /* check for transmit completes and receives */ 2714 send_done_count = ntohl(stats->send_done_count); 2715 if (send_done_count != tx->pkt_done) 2716 myri10ge_tx_done(ss, (int)send_done_count); 2717 } while (*((volatile uint8_t *) &stats->valid)); 2718 2719 if (stats->stats_updated) { 2720 if (mgp->link_state != stats->link_up || stats->link_down) { 2721 mgp->link_state = stats->link_up; 2722 if (stats->link_down) { 2723 mgp->down_cnt += stats->link_down; 2724 mgp->link_state = 0; 2725 } 2726 if (mgp->link_state) { 2727 if (myri10ge_verbose) 2728 printf("%s: link up\n", mgp->name); 2729 mac_link_update(mgp->mh, LINK_STATE_UP); 2730 } else { 2731 if (myri10ge_verbose) 2732 printf("%s: link down\n", mgp->name); 2733 mac_link_update(mgp->mh, LINK_STATE_DOWN); 2734 } 2735 MYRI10GE_NIC_STAT_INC(link_changes); 2736 } 2737 if (mgp->rdma_tags_available != 2738 ntohl(ss->fw_stats->rdma_tags_available)) { 2739 mgp->rdma_tags_available = 2740 ntohl(ss->fw_stats->rdma_tags_available); 2741 cmn_err(CE_NOTE, "%s: RDMA timed out! " 2742 "%d tags left\n", mgp->name, 2743 mgp->rdma_tags_available); 2744 } 2745 } 2746 2747 mb(); 2748 /* check to see if we have rx token to pass back */ 2749 if (valid & 0x1) { 2750 mutex_enter(&ss->poll_lock); 2751 if (ss->rx_polling) { 2752 ss->rx_token = 1; 2753 } else { 2754 *ss->irq_claim = BE_32(3); 2755 ss->rx_token = 0; 2756 } 2757 mutex_exit(&ss->poll_lock); 2758 } 2759 *(ss->irq_claim + 1) = BE_32(3); 2760 return (DDI_INTR_CLAIMED); 2761 } 2762 2763 /* 2764 * Add or remove a multicast address. This is called with our 2765 * macinfo's lock held by GLD, so we do not need to worry about 2766 * our own locking here. 2767 */ 2768 static int 2769 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr) 2770 { 2771 myri10ge_cmd_t cmd; 2772 struct myri10ge_priv *mgp = arg; 2773 int status, join_leave; 2774 2775 if (add) 2776 join_leave = MXGEFW_JOIN_MULTICAST_GROUP; 2777 else 2778 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP; 2779 (void) memcpy(&cmd.data0, multicastaddr, 4); 2780 (void) memcpy(&cmd.data1, multicastaddr + 4, 2); 2781 cmd.data0 = htonl(cmd.data0); 2782 cmd.data1 = htonl(cmd.data1); 2783 status = myri10ge_send_cmd(mgp, join_leave, &cmd); 2784 if (status == 0) 2785 return (0); 2786 2787 cmn_err(CE_WARN, "%s: failed to set multicast address\n", 2788 mgp->name); 2789 return (status); 2790 } 2791 2792 2793 static int 2794 myri10ge_m_promisc(void *arg, boolean_t on) 2795 { 2796 struct myri10ge_priv *mgp = arg; 2797 2798 myri10ge_change_promisc(mgp, on); 2799 return (0); 2800 } 2801 2802 /* 2803 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2804 * backwards one at a time and handle ring wraps 2805 */ 2806 2807 static inline void 2808 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx, 2809 mcp_kreq_ether_send_t *src, int cnt) 2810 { 2811 int idx, starting_slot; 2812 starting_slot = tx->req; 2813 while (cnt > 1) { 2814 cnt--; 2815 idx = (starting_slot + cnt) & tx->mask; 2816 myri10ge_pio_copy(&tx->lanai[idx], 2817 &src[cnt], sizeof (*src)); 2818 mb(); 2819 } 2820 } 2821 2822 /* 2823 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2824 * at most 32 bytes at a time, so as to avoid involving the software 2825 * pio handler in the nic. We re-write the first segment's flags 2826 * to mark them valid only after writing the entire chain 2827 */ 2828 2829 static inline void 2830 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 2831 int cnt) 2832 { 2833 int idx, i; 2834 uint32_t *src_ints, *dst_ints; 2835 mcp_kreq_ether_send_t *srcp, *dstp, *dst; 2836 uint8_t last_flags; 2837 2838 idx = tx->req & tx->mask; 2839 2840 last_flags = src->flags; 2841 src->flags = 0; 2842 mb(); 2843 dst = dstp = &tx->lanai[idx]; 2844 srcp = src; 2845 2846 if ((idx + cnt) < tx->mask) { 2847 for (i = 0; i < (cnt - 1); i += 2) { 2848 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src)); 2849 mb(); /* force write every 32 bytes */ 2850 srcp += 2; 2851 dstp += 2; 2852 } 2853 } else { 2854 /* 2855 * submit all but the first request, and ensure 2856 * that it is submitted below 2857 */ 2858 myri10ge_submit_req_backwards(tx, src, cnt); 2859 i = 0; 2860 } 2861 if (i < cnt) { 2862 /* submit the first request */ 2863 myri10ge_pio_copy(dstp, srcp, sizeof (*src)); 2864 mb(); /* barrier before setting valid flag */ 2865 } 2866 2867 /* re-write the last 32-bits with the valid flags */ 2868 src->flags |= last_flags; 2869 src_ints = (uint32_t *)src; 2870 src_ints += 3; 2871 dst_ints = (uint32_t *)dst; 2872 dst_ints += 3; 2873 *dst_ints = *src_ints; 2874 tx->req += cnt; 2875 mb(); 2876 /* notify NIC to poll this tx ring */ 2877 if (!tx->active && tx->go != NULL) { 2878 *(int *)(void *)tx->go = 1; 2879 tx->active = 1; 2880 tx->activate++; 2881 mb(); 2882 } 2883 } 2884 2885 /* ARGSUSED */ 2886 static inline void 2887 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 2888 { 2889 uint32_t lso_flag; 2890 mac_lso_get(mp, mss, &lso_flag); 2891 (*flags) |= lso_flag; 2892 } 2893 2894 2895 /* like pullupmsg, except preserve hcksum/LSO attributes */ 2896 static int 2897 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) 2898 { 2899 uint32_t start, stuff, tx_offload_flags, mss; 2900 int ok; 2901 2902 mss = 0; 2903 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 2904 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 2905 2906 ok = pullupmsg(mp, -1); 2907 if (!ok) { 2908 printf("pullupmsg failed"); 2909 return (DDI_FAILURE); 2910 } 2911 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); 2912 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); 2913 if (tx_offload_flags & HW_LSO) 2914 DB_LSOMSS(mp) = (uint16_t)mss; 2915 lso_info_set(mp, mss, tx_offload_flags); 2916 return (DDI_SUCCESS); 2917 } 2918 2919 static inline void 2920 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh, 2921 int opackets, int obytes) 2922 { 2923 s->un.all = 0; 2924 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2925 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2926 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2927 s->un.s.brdcstxmt = 1; 2928 else 2929 s->un.s.multixmt = 1; 2930 } 2931 s->un.s.opackets = (uint16_t)opackets; 2932 s->un.s.obytes = obytes; 2933 } 2934 2935 static int 2936 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 2937 mcp_kreq_ether_send_t *req) 2938 { 2939 myri10ge_tx_ring_t *tx = &ss->tx; 2940 caddr_t ptr; 2941 struct myri10ge_tx_copybuf *cp; 2942 mblk_t *bp; 2943 int idx, mblen, avail; 2944 uint16_t len; 2945 2946 mutex_enter(&tx->lock); 2947 avail = tx->mask - (tx->req - tx->done); 2948 if (avail <= 1) { 2949 mutex_exit(&tx->lock); 2950 return (EBUSY); 2951 } 2952 idx = tx->req & tx->mask; 2953 cp = &tx->cp[idx]; 2954 ptr = cp->va; 2955 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) { 2956 mblen = MBLKL(bp); 2957 bcopy(bp->b_rptr, ptr, mblen); 2958 ptr += mblen; 2959 len += mblen; 2960 } 2961 /* ensure runts are padded to 60 bytes */ 2962 if (len < 60) { 2963 bzero(ptr, 64 - len); 2964 len = 60; 2965 } 2966 req->addr_low = cp->dma.low; 2967 req->addr_high = cp->dma.high; 2968 req->length = htons(len); 2969 req->pad = 0; 2970 req->rdma_count = 1; 2971 myri10ge_tx_stat(&tx->info[idx].stat, 2972 (struct ether_header *)(void *)cp->va, 1, len); 2973 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV); 2974 myri10ge_submit_req(&ss->tx, req, 1); 2975 mutex_exit(&tx->lock); 2976 freemsg(mp); 2977 return (DDI_SUCCESS); 2978 } 2979 2980 2981 static void 2982 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list, 2983 struct myri10ge_tx_buffer_state *tx_info, 2984 int count) 2985 { 2986 int i, idx; 2987 2988 idx = 0; /* gcc -Wuninitialized */ 2989 /* store unmapping and bp info for tx irq handler */ 2990 for (i = 0; i < count; i++) { 2991 idx = (tx->req + i) & tx->mask; 2992 tx->info[idx].m = tx_info[i].m; 2993 tx->info[idx].handle = tx_info[i].handle; 2994 } 2995 tx->info[idx].stat.un.all = tx_info[0].stat.un.all; 2996 2997 /* submit the frame to the nic */ 2998 myri10ge_submit_req(tx, req_list, count); 2999 3000 3001 } 3002 3003 3004 3005 static void 3006 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf) 3007 { 3008 mblk_t *bp; 3009 int seglen; 3010 uint_t count; 3011 3012 bp = mp; 3013 3014 while (off > 0) { 3015 seglen = MBLKL(bp); 3016 if (off < seglen) 3017 break; 3018 off -= seglen; 3019 bp = bp->b_cont; 3020 } 3021 while (len > 0) { 3022 seglen = MBLKL(bp); 3023 count = min(seglen - off, len); 3024 bcopy(bp->b_rptr + off, buf, count); 3025 len -= count; 3026 buf += count; 3027 off = 0; 3028 bp = bp->b_cont; 3029 } 3030 } 3031 3032 static int 3033 myri10ge_ether_parse_header(mblk_t *mp) 3034 { 3035 struct ether_header eh_copy; 3036 struct ether_header *eh; 3037 int eth_hdr_len, seglen; 3038 3039 seglen = MBLKL(mp); 3040 eth_hdr_len = sizeof (*eh); 3041 if (seglen < eth_hdr_len) { 3042 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy); 3043 eh = &eh_copy; 3044 } else { 3045 eh = (struct ether_header *)(void *)mp->b_rptr; 3046 } 3047 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 3048 eth_hdr_len += 4; 3049 } 3050 3051 return (eth_hdr_len); 3052 } 3053 3054 static int 3055 myri10ge_lso_parse_header(mblk_t *mp, int off) 3056 { 3057 char buf[128]; 3058 int seglen, sum_off; 3059 struct ip *ip; 3060 struct tcphdr *tcp; 3061 3062 seglen = MBLKL(mp); 3063 if (seglen < off + sizeof (*ip)) { 3064 myri10ge_copydata(mp, off, sizeof (*ip), buf); 3065 ip = (struct ip *)(void *)buf; 3066 } else { 3067 ip = (struct ip *)(void *)(mp->b_rptr + off); 3068 } 3069 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) { 3070 myri10ge_copydata(mp, off, 3071 (ip->ip_hl << 2) + sizeof (*tcp), buf); 3072 ip = (struct ip *)(void *)buf; 3073 } 3074 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2)); 3075 3076 /* 3077 * NIC expects ip_sum to be zero. Recent changes to 3078 * OpenSolaris leave the correct ip checksum there, rather 3079 * than the required zero, so we need to zero it. Otherwise, 3080 * the NIC will produce bad checksums when sending LSO packets. 3081 */ 3082 if (ip->ip_sum != 0) { 3083 if (((char *)ip) != buf) { 3084 /* ip points into mblk, so just zero it */ 3085 ip->ip_sum = 0; 3086 } else { 3087 /* 3088 * ip points into a copy, so walk the chain 3089 * to find the ip_csum, then zero it 3090 */ 3091 sum_off = off + _PTRDIFF(&ip->ip_sum, buf); 3092 while (sum_off > (int)(MBLKL(mp) - 1)) { 3093 sum_off -= MBLKL(mp); 3094 mp = mp->b_cont; 3095 } 3096 mp->b_rptr[sum_off] = 0; 3097 sum_off++; 3098 while (sum_off > MBLKL(mp) - 1) { 3099 sum_off -= MBLKL(mp); 3100 mp = mp->b_cont; 3101 } 3102 mp->b_rptr[sum_off] = 0; 3103 } 3104 } 3105 return (off + ((ip->ip_hl + tcp->th_off) << 2)); 3106 } 3107 3108 static int 3109 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 3110 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size, 3111 uint16_t mss, uint8_t cksum_offset) 3112 { 3113 myri10ge_tx_ring_t *tx = &ss->tx; 3114 struct myri10ge_priv *mgp = ss->mgp; 3115 mblk_t *bp; 3116 mcp_kreq_ether_send_t *req; 3117 struct myri10ge_tx_copybuf *cp; 3118 caddr_t rptr, ptr; 3119 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp; 3120 int resid, avail, idx, hdr_size_tmp, tx_boundary; 3121 int rdma_count; 3122 uint32_t seglen, len, boundary, low, high_swapped; 3123 uint16_t pseudo_hdr_offset = htons(mss); 3124 uint8_t flags; 3125 3126 tx_boundary = mgp->tx_boundary; 3127 hdr_size_tmp = hdr_size; 3128 resid = tx_boundary; 3129 count = 1; 3130 mutex_enter(&tx->lock); 3131 3132 /* check to see if the slots are really there */ 3133 avail = tx->mask - (tx->req - tx->done); 3134 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) { 3135 atomic_add_32(&tx->stall, 1); 3136 mutex_exit(&tx->lock); 3137 return (EBUSY); 3138 } 3139 3140 /* copy */ 3141 cum_len = -hdr_size; 3142 count = 0; 3143 req = req_list; 3144 idx = tx->mask & tx->req; 3145 cp = &tx->cp[idx]; 3146 low = ntohl(cp->dma.low); 3147 ptr = cp->va; 3148 cp->len = 0; 3149 if (mss) { 3150 int payload = pkt_size - hdr_size; 3151 uint16_t opackets = (payload / mss) + ((payload % mss) != 0); 3152 tx->info[idx].ostat.opackets = opackets; 3153 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size 3154 + pkt_size; 3155 } 3156 hdr_size_tmp = hdr_size; 3157 mss_resid = mss; 3158 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3159 tx_req = tx->req; 3160 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3161 mblen = MBLKL(bp); 3162 rptr = (caddr_t)bp->b_rptr; 3163 len = min(hdr_size_tmp, mblen); 3164 if (len) { 3165 bcopy(rptr, ptr, len); 3166 rptr += len; 3167 ptr += len; 3168 resid -= len; 3169 mblen -= len; 3170 hdr_size_tmp -= len; 3171 cp->len += len; 3172 if (hdr_size_tmp) 3173 continue; 3174 if (resid < mss) { 3175 tx_req++; 3176 idx = tx->mask & tx_req; 3177 cp = &tx->cp[idx]; 3178 low = ntohl(cp->dma.low); 3179 ptr = cp->va; 3180 resid = tx_boundary; 3181 } 3182 } 3183 while (mblen) { 3184 len = min(mss_resid, mblen); 3185 bcopy(rptr, ptr, len); 3186 mss_resid -= len; 3187 resid -= len; 3188 mblen -= len; 3189 rptr += len; 3190 ptr += len; 3191 cp->len += len; 3192 if (mss_resid == 0) { 3193 mss_resid = mss; 3194 if (resid < mss) { 3195 tx_req++; 3196 idx = tx->mask & tx_req; 3197 cp = &tx->cp[idx]; 3198 cp->len = 0; 3199 low = ntohl(cp->dma.low); 3200 ptr = cp->va; 3201 resid = tx_boundary; 3202 } 3203 } 3204 } 3205 } 3206 3207 req = req_list; 3208 pkt_size_tmp = pkt_size; 3209 count = 0; 3210 rdma_count = 0; 3211 tx_req = tx->req; 3212 while (pkt_size_tmp) { 3213 idx = tx->mask & tx_req; 3214 cp = &tx->cp[idx]; 3215 high_swapped = cp->dma.high; 3216 low = ntohl(cp->dma.low); 3217 len = cp->len; 3218 if (len == 0) { 3219 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n", 3220 pkt_size_tmp, pkt_size); 3221 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3222 mblen = MBLKL(bp); 3223 printf("mblen:%d\n", mblen); 3224 } 3225 pkt_size_tmp = pkt_size; 3226 tx_req = tx->req; 3227 while (pkt_size_tmp > 0) { 3228 idx = tx->mask & tx_req; 3229 cp = &tx->cp[idx]; 3230 printf("cp->len = %d\n", cp->len); 3231 pkt_size_tmp -= cp->len; 3232 tx_req++; 3233 } 3234 printf("dropped\n"); 3235 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3236 goto done; 3237 } 3238 pkt_size_tmp -= len; 3239 while (len) { 3240 while (len) { 3241 uint8_t flags_next; 3242 int cum_len_next; 3243 3244 boundary = (low + mgp->tx_boundary) & 3245 ~(mgp->tx_boundary - 1); 3246 seglen = boundary - low; 3247 if (seglen > len) 3248 seglen = len; 3249 3250 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3251 cum_len_next = cum_len + seglen; 3252 (req-rdma_count)->rdma_count = rdma_count + 1; 3253 if (likely(cum_len >= 0)) { 3254 /* payload */ 3255 int next_is_first, chop; 3256 3257 chop = (cum_len_next > mss); 3258 cum_len_next = cum_len_next % mss; 3259 next_is_first = (cum_len_next == 0); 3260 flags |= chop * 3261 MXGEFW_FLAGS_TSO_CHOP; 3262 flags_next |= next_is_first * 3263 MXGEFW_FLAGS_FIRST; 3264 rdma_count |= -(chop | next_is_first); 3265 rdma_count += chop & !next_is_first; 3266 } else if (likely(cum_len_next >= 0)) { 3267 /* header ends */ 3268 int small; 3269 3270 rdma_count = -1; 3271 cum_len_next = 0; 3272 seglen = -cum_len; 3273 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 3274 flags_next = MXGEFW_FLAGS_TSO_PLD | 3275 MXGEFW_FLAGS_FIRST | 3276 (small * MXGEFW_FLAGS_SMALL); 3277 } 3278 req->addr_high = high_swapped; 3279 req->addr_low = htonl(low); 3280 req->pseudo_hdr_offset = pseudo_hdr_offset; 3281 req->pad = 0; /* complete solid 16-byte block */ 3282 req->rdma_count = 1; 3283 req->cksum_offset = cksum_offset; 3284 req->length = htons(seglen); 3285 req->flags = flags | ((cum_len & 1) * 3286 MXGEFW_FLAGS_ALIGN_ODD); 3287 if (cksum_offset > seglen) 3288 cksum_offset -= seglen; 3289 else 3290 cksum_offset = 0; 3291 low += seglen; 3292 len -= seglen; 3293 cum_len = cum_len_next; 3294 req++; 3295 req->flags = 0; 3296 flags = flags_next; 3297 count++; 3298 rdma_count++; 3299 } 3300 } 3301 tx_req++; 3302 } 3303 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3304 do { 3305 req--; 3306 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3307 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3308 MXGEFW_FLAGS_FIRST))); 3309 3310 myri10ge_submit_req(tx, req_list, count); 3311 done: 3312 mutex_exit(&tx->lock); 3313 freemsg(mp); 3314 return (DDI_SUCCESS); 3315 } 3316 3317 /* 3318 * Try to send the chain of buffers described by the mp. We must not 3319 * encapsulate more than eth->tx.req - eth->tx.done, or 3320 * MXGEFW_MAX_SEND_DESC, whichever is more. 3321 */ 3322 3323 static int 3324 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, 3325 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info) 3326 { 3327 struct myri10ge_priv *mgp = ss->mgp; 3328 myri10ge_tx_ring_t *tx = &ss->tx; 3329 mcp_kreq_ether_send_t *req; 3330 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL; 3331 mblk_t *bp; 3332 ddi_dma_cookie_t cookie; 3333 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen, 3334 rdma_count, cum_len, lso_hdr_size; 3335 uint32_t start, stuff, tx_offload_flags; 3336 uint32_t seglen, len, mss, boundary, low, high_swapped; 3337 uint_t ncookies; 3338 uint16_t pseudo_hdr_offset; 3339 uint8_t flags, cksum_offset, odd_flag; 3340 int pkt_size; 3341 int lso_copy = myri10ge_lso_copy; 3342 try_pullup = 1; 3343 3344 again: 3345 /* Setup checksum offloading, if needed */ 3346 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 3347 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 3348 if (tx_offload_flags & HW_LSO) { 3349 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3350 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) { 3351 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags); 3352 freemsg(mp); 3353 return (DDI_SUCCESS); 3354 } 3355 } else { 3356 max_segs = MXGEFW_MAX_SEND_DESC; 3357 mss = 0; 3358 } 3359 req = req_list; 3360 cksum_offset = 0; 3361 pseudo_hdr_offset = 0; 3362 3363 /* leave an extra slot keep the ring from wrapping */ 3364 avail = tx->mask - (tx->req - tx->done); 3365 3366 /* 3367 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length 3368 * message will need to be pulled up in order to fit. 3369 * Otherwise, we are low on transmit descriptors, it is 3370 * probably better to stall and try again rather than pullup a 3371 * message to fit. 3372 */ 3373 3374 if (avail < max_segs) { 3375 err = EBUSY; 3376 atomic_add_32(&tx->stall_early, 1); 3377 goto stall; 3378 } 3379 3380 /* find out how long the frame is and how many segments it is */ 3381 count = 0; 3382 odd_flag = 0; 3383 pkt_size = 0; 3384 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 3385 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3386 dblk_t *dbp; 3387 mblen = MBLKL(bp); 3388 if (mblen == 0) { 3389 /* 3390 * we can't simply skip over 0-length mblks 3391 * because the hardware can't deal with them, 3392 * and we could leak them. 3393 */ 3394 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len); 3395 err = EIO; 3396 goto pullup; 3397 } 3398 /* 3399 * There's no advantage to copying most gesballoc 3400 * attached blocks, so disable lso copy in that case 3401 */ 3402 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) { 3403 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) { 3404 lso_copy = 0; 3405 } 3406 } 3407 pkt_size += mblen; 3408 count++; 3409 } 3410 3411 /* Try to pull up excessivly long chains */ 3412 if (count >= max_segs) { 3413 err = myri10ge_pullup(ss, mp); 3414 if (likely(err == DDI_SUCCESS)) { 3415 count = 1; 3416 } else { 3417 if (count < MYRI10GE_MAX_SEND_DESC_TSO) { 3418 /* 3419 * just let the h/w send it, it will be 3420 * inefficient, but us better than dropping 3421 */ 3422 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3423 } else { 3424 /* drop it */ 3425 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3426 freemsg(mp); 3427 return (0); 3428 } 3429 } 3430 } 3431 3432 cum_len = 0; 3433 maclen = myri10ge_ether_parse_header(mp); 3434 3435 if (tx_offload_flags & HCK_PARTIALCKSUM) { 3436 3437 cksum_offset = start + maclen; 3438 pseudo_hdr_offset = htons(stuff + maclen); 3439 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 3440 flags |= MXGEFW_FLAGS_CKSUM; 3441 } 3442 3443 lso_hdr_size = 0; /* -Wunitinialized */ 3444 if (mss) { /* LSO */ 3445 /* this removes any CKSUM flag from before */ 3446 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3447 /* 3448 * parse the headers and set cum_len to a negative 3449 * value to reflect the offset of the TCP payload 3450 */ 3451 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen); 3452 cum_len = -lso_hdr_size; 3453 if ((mss < mgp->tx_boundary) && lso_copy) { 3454 err = myri10ge_tx_tso_copy(ss, mp, req_list, 3455 lso_hdr_size, pkt_size, mss, cksum_offset); 3456 return (err); 3457 } 3458 3459 /* 3460 * for TSO, pseudo_hdr_offset holds mss. The firmware 3461 * figures out where to put the checksum by parsing 3462 * the header. 3463 */ 3464 3465 pseudo_hdr_offset = htons(mss); 3466 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) { 3467 flags |= MXGEFW_FLAGS_SMALL; 3468 if (pkt_size < myri10ge_tx_copylen) { 3469 req->cksum_offset = cksum_offset; 3470 req->pseudo_hdr_offset = pseudo_hdr_offset; 3471 req->flags = flags; 3472 err = myri10ge_tx_copy(ss, mp, req); 3473 return (err); 3474 } 3475 cum_len = 0; 3476 } 3477 3478 /* pull one DMA handle for each bp from our freelist */ 3479 handles = NULL; 3480 err = myri10ge_alloc_tx_handles(ss, count, &handles); 3481 if (err != DDI_SUCCESS) { 3482 err = DDI_FAILURE; 3483 goto stall; 3484 } 3485 count = 0; 3486 rdma_count = 0; 3487 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3488 mblen = MBLKL(bp); 3489 dma_handle = handles; 3490 handles = handles->next; 3491 3492 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL, 3493 (caddr_t)bp->b_rptr, mblen, 3494 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL, 3495 &cookie, &ncookies); 3496 if (unlikely(rv != DDI_DMA_MAPPED)) { 3497 err = EIO; 3498 try_pullup = 0; 3499 dma_handle->next = handles; 3500 handles = dma_handle; 3501 goto abort_with_handles; 3502 } 3503 3504 /* reserve the slot */ 3505 tx_info[count].m = bp; 3506 tx_info[count].handle = dma_handle; 3507 3508 for (; ; ) { 3509 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 3510 high_swapped = 3511 htonl(MYRI10GE_HIGHPART_TO_U32( 3512 cookie.dmac_laddress)); 3513 len = (uint32_t)cookie.dmac_size; 3514 while (len) { 3515 uint8_t flags_next; 3516 int cum_len_next; 3517 3518 boundary = (low + mgp->tx_boundary) & 3519 ~(mgp->tx_boundary - 1); 3520 seglen = boundary - low; 3521 if (seglen > len) 3522 seglen = len; 3523 3524 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3525 cum_len_next = cum_len + seglen; 3526 if (mss) { 3527 (req-rdma_count)->rdma_count = 3528 rdma_count + 1; 3529 if (likely(cum_len >= 0)) { 3530 /* payload */ 3531 int next_is_first, chop; 3532 3533 chop = (cum_len_next > mss); 3534 cum_len_next = 3535 cum_len_next % mss; 3536 next_is_first = 3537 (cum_len_next == 0); 3538 flags |= chop * 3539 MXGEFW_FLAGS_TSO_CHOP; 3540 flags_next |= next_is_first * 3541 MXGEFW_FLAGS_FIRST; 3542 rdma_count |= 3543 -(chop | next_is_first); 3544 rdma_count += 3545 chop & !next_is_first; 3546 } else if (likely(cum_len_next >= 0)) { 3547 /* header ends */ 3548 int small; 3549 3550 rdma_count = -1; 3551 cum_len_next = 0; 3552 seglen = -cum_len; 3553 small = (mss <= 3554 MXGEFW_SEND_SMALL_SIZE); 3555 flags_next = 3556 MXGEFW_FLAGS_TSO_PLD 3557 | MXGEFW_FLAGS_FIRST 3558 | (small * 3559 MXGEFW_FLAGS_SMALL); 3560 } 3561 } 3562 req->addr_high = high_swapped; 3563 req->addr_low = htonl(low); 3564 req->pseudo_hdr_offset = pseudo_hdr_offset; 3565 req->pad = 0; /* complete solid 16-byte block */ 3566 req->rdma_count = 1; 3567 req->cksum_offset = cksum_offset; 3568 req->length = htons(seglen); 3569 req->flags = flags | ((cum_len & 1) * odd_flag); 3570 if (cksum_offset > seglen) 3571 cksum_offset -= seglen; 3572 else 3573 cksum_offset = 0; 3574 low += seglen; 3575 len -= seglen; 3576 cum_len = cum_len_next; 3577 count++; 3578 rdma_count++; 3579 /* make sure all the segments will fit */ 3580 if (unlikely(count >= max_segs)) { 3581 MYRI10GE_ATOMIC_SLICE_STAT_INC( 3582 xmit_lowbuf); 3583 /* may try a pullup */ 3584 err = EBUSY; 3585 if (try_pullup) 3586 try_pullup = 2; 3587 goto abort_with_handles; 3588 } 3589 req++; 3590 req->flags = 0; 3591 flags = flags_next; 3592 tx_info[count].m = 0; 3593 } 3594 ncookies--; 3595 if (ncookies == 0) 3596 break; 3597 ddi_dma_nextcookie(dma_handle->h, &cookie); 3598 } 3599 } 3600 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3601 3602 if (mss) { 3603 do { 3604 req--; 3605 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3606 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3607 MXGEFW_FLAGS_FIRST))); 3608 } 3609 3610 /* calculate tx stats */ 3611 if (mss) { 3612 uint16_t opackets; 3613 int payload; 3614 3615 payload = pkt_size - lso_hdr_size; 3616 opackets = (payload / mss) + ((payload % mss) != 0); 3617 tx_info[0].stat.un.all = 0; 3618 tx_info[0].ostat.opackets = opackets; 3619 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size 3620 + pkt_size; 3621 } else { 3622 myri10ge_tx_stat(&tx_info[0].stat, 3623 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size); 3624 } 3625 mutex_enter(&tx->lock); 3626 3627 /* check to see if the slots are really there */ 3628 avail = tx->mask - (tx->req - tx->done); 3629 if (unlikely(avail <= count)) { 3630 mutex_exit(&tx->lock); 3631 err = 0; 3632 goto late_stall; 3633 } 3634 3635 myri10ge_send_locked(tx, req_list, tx_info, count); 3636 mutex_exit(&tx->lock); 3637 return (DDI_SUCCESS); 3638 3639 late_stall: 3640 try_pullup = 0; 3641 atomic_add_32(&tx->stall_late, 1); 3642 3643 abort_with_handles: 3644 /* unbind and free handles from previous mblks */ 3645 for (i = 0; i < count; i++) { 3646 bp = tx_info[i].m; 3647 tx_info[i].m = 0; 3648 if (bp) { 3649 dma_handle = tx_info[i].handle; 3650 (void) ddi_dma_unbind_handle(dma_handle->h); 3651 dma_handle->next = handles; 3652 handles = dma_handle; 3653 tx_info[i].handle = NULL; 3654 tx_info[i].m = NULL; 3655 } 3656 } 3657 myri10ge_free_tx_handle_slist(tx, handles); 3658 pullup: 3659 if (try_pullup) { 3660 err = myri10ge_pullup(ss, mp); 3661 if (err != DDI_SUCCESS && try_pullup == 2) { 3662 /* drop */ 3663 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3664 freemsg(mp); 3665 return (0); 3666 } 3667 try_pullup = 0; 3668 goto again; 3669 } 3670 3671 stall: 3672 if (err != 0) { 3673 if (err == EBUSY) { 3674 atomic_add_32(&tx->stall, 1); 3675 } else { 3676 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3677 } 3678 } 3679 return (err); 3680 } 3681 3682 static mblk_t * 3683 myri10ge_send_wrapper(void *arg, mblk_t *mp) 3684 { 3685 struct myri10ge_slice_state *ss = arg; 3686 int err = 0; 3687 mcp_kreq_ether_send_t *req_list; 3688 #if defined(__i386) 3689 /* 3690 * We need about 2.5KB of scratch space to handle transmits. 3691 * i86pc has only 8KB of kernel stack space, so we malloc the 3692 * scratch space there rather than keeping it on the stack. 3693 */ 3694 size_t req_size, tx_info_size; 3695 struct myri10ge_tx_buffer_state *tx_info; 3696 caddr_t req_bytes; 3697 3698 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3699 + 8; 3700 req_bytes = kmem_alloc(req_size, KM_SLEEP); 3701 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1); 3702 tx_info = kmem_alloc(tx_info_size, KM_SLEEP); 3703 #else 3704 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3705 + 8]; 3706 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1]; 3707 #endif 3708 3709 /* ensure req_list entries are aligned to 8 bytes */ 3710 req_list = (struct mcp_kreq_ether_send *) 3711 (((unsigned long)req_bytes + 7UL) & ~7UL); 3712 3713 err = myri10ge_send(ss, mp, req_list, tx_info); 3714 3715 #if defined(__i386) 3716 kmem_free(tx_info, tx_info_size); 3717 kmem_free(req_bytes, req_size); 3718 #endif 3719 if (err) 3720 return (mp); 3721 else 3722 return (NULL); 3723 } 3724 3725 static int 3726 myri10ge_addmac(void *arg, const uint8_t *mac_addr) 3727 { 3728 struct myri10ge_priv *mgp = arg; 3729 int err; 3730 3731 if (mac_addr == NULL) 3732 return (EINVAL); 3733 3734 mutex_enter(&mgp->intrlock); 3735 if (mgp->macaddr_cnt) { 3736 mutex_exit(&mgp->intrlock); 3737 return (ENOSPC); 3738 } 3739 err = myri10ge_m_unicst(mgp, mac_addr); 3740 if (!err) 3741 mgp->macaddr_cnt++; 3742 3743 mutex_exit(&mgp->intrlock); 3744 if (err) 3745 return (err); 3746 3747 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr)); 3748 return (0); 3749 } 3750 3751 /*ARGSUSED*/ 3752 static int 3753 myri10ge_remmac(void *arg, const uint8_t *mac_addr) 3754 { 3755 struct myri10ge_priv *mgp = arg; 3756 3757 mutex_enter(&mgp->intrlock); 3758 mgp->macaddr_cnt--; 3759 mutex_exit(&mgp->intrlock); 3760 3761 return (0); 3762 } 3763 3764 /*ARGSUSED*/ 3765 static void 3766 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index, 3767 mac_group_info_t *infop, mac_group_handle_t gh) 3768 { 3769 struct myri10ge_priv *mgp = arg; 3770 3771 if (rtype != MAC_RING_TYPE_RX) 3772 return; 3773 3774 infop->mgi_driver = (mac_group_driver_t)mgp; 3775 infop->mgi_start = NULL; 3776 infop->mgi_stop = NULL; 3777 infop->mgi_addmac = myri10ge_addmac; 3778 infop->mgi_remmac = myri10ge_remmac; 3779 infop->mgi_count = mgp->num_slices; 3780 } 3781 3782 static int 3783 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) 3784 { 3785 struct myri10ge_slice_state *ss; 3786 3787 ss = (struct myri10ge_slice_state *)rh; 3788 mutex_enter(&ss->rx_lock); 3789 ss->rx_gen_num = mr_gen_num; 3790 mutex_exit(&ss->rx_lock); 3791 return (0); 3792 } 3793 3794 /* 3795 * Retrieve a value for one of the statistics for a particular rx ring 3796 */ 3797 int 3798 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3799 { 3800 struct myri10ge_slice_state *ss; 3801 3802 ss = (struct myri10ge_slice_state *)rh; 3803 switch (stat) { 3804 case MAC_STAT_RBYTES: 3805 *val = ss->rx_stats.ibytes; 3806 break; 3807 3808 case MAC_STAT_IPACKETS: 3809 *val = ss->rx_stats.ipackets; 3810 break; 3811 3812 default: 3813 *val = 0; 3814 return (ENOTSUP); 3815 } 3816 3817 return (0); 3818 } 3819 3820 /* 3821 * Retrieve a value for one of the statistics for a particular tx ring 3822 */ 3823 int 3824 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3825 { 3826 struct myri10ge_slice_state *ss; 3827 3828 ss = (struct myri10ge_slice_state *)rh; 3829 switch (stat) { 3830 case MAC_STAT_OBYTES: 3831 *val = ss->tx.stats.obytes; 3832 break; 3833 3834 case MAC_STAT_OPACKETS: 3835 *val = ss->tx.stats.opackets; 3836 break; 3837 3838 default: 3839 *val = 0; 3840 return (ENOTSUP); 3841 } 3842 3843 return (0); 3844 } 3845 3846 static int 3847 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) 3848 { 3849 struct myri10ge_slice_state *ss; 3850 3851 ss = (struct myri10ge_slice_state *)intrh; 3852 mutex_enter(&ss->poll_lock); 3853 ss->rx_polling = B_TRUE; 3854 mutex_exit(&ss->poll_lock); 3855 return (0); 3856 } 3857 3858 static int 3859 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh) 3860 { 3861 struct myri10ge_slice_state *ss; 3862 3863 ss = (struct myri10ge_slice_state *)intrh; 3864 mutex_enter(&ss->poll_lock); 3865 ss->rx_polling = B_FALSE; 3866 if (ss->rx_token) { 3867 *ss->irq_claim = BE_32(3); 3868 ss->rx_token = 0; 3869 } 3870 mutex_exit(&ss->poll_lock); 3871 return (0); 3872 } 3873 3874 /*ARGSUSED*/ 3875 static void 3876 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 3877 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 3878 { 3879 struct myri10ge_priv *mgp = arg; 3880 struct myri10ge_slice_state *ss; 3881 mac_intr_t *mintr = &infop->mri_intr; 3882 3883 ASSERT((unsigned int)ring_index < mgp->num_slices); 3884 3885 ss = &mgp->ss[ring_index]; 3886 switch (rtype) { 3887 case MAC_RING_TYPE_RX: 3888 ss->rx_rh = rh; 3889 infop->mri_driver = (mac_ring_driver_t)ss; 3890 infop->mri_start = myri10ge_ring_start; 3891 infop->mri_stop = NULL; 3892 infop->mri_poll = myri10ge_poll_rx; 3893 infop->mri_stat = myri10ge_rx_ring_stat; 3894 mintr->mi_handle = (mac_intr_handle_t)ss; 3895 mintr->mi_enable = myri10ge_rx_ring_intr_enable; 3896 mintr->mi_disable = myri10ge_rx_ring_intr_disable; 3897 break; 3898 case MAC_RING_TYPE_TX: 3899 ss->tx.rh = rh; 3900 infop->mri_driver = (mac_ring_driver_t)ss; 3901 infop->mri_start = NULL; 3902 infop->mri_stop = NULL; 3903 infop->mri_tx = myri10ge_send_wrapper; 3904 infop->mri_stat = myri10ge_tx_ring_stat; 3905 break; 3906 default: 3907 break; 3908 } 3909 } 3910 3911 static void 3912 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp) 3913 { 3914 if (mgp->ksp_stat == NULL) 3915 return; 3916 3917 kstat_delete(mgp->ksp_stat); 3918 mgp->ksp_stat = NULL; 3919 } 3920 3921 static void 3922 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss) 3923 { 3924 if (ss->ksp_stat == NULL) 3925 return; 3926 3927 kstat_delete(ss->ksp_stat); 3928 ss->ksp_stat = NULL; 3929 } 3930 3931 static void 3932 myri10ge_info_destroy(struct myri10ge_priv *mgp) 3933 { 3934 if (mgp->ksp_info == NULL) 3935 return; 3936 3937 kstat_delete(mgp->ksp_info); 3938 mgp->ksp_info = NULL; 3939 } 3940 3941 static int 3942 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw) 3943 { 3944 struct myri10ge_nic_stat *ethstat; 3945 struct myri10ge_priv *mgp; 3946 mcp_irq_data_t *fw_stats; 3947 3948 3949 if (rw == KSTAT_WRITE) 3950 return (EACCES); 3951 3952 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data; 3953 mgp = (struct myri10ge_priv *)ksp->ks_private; 3954 fw_stats = mgp->ss[0].fw_stats; 3955 3956 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma; 3957 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma; 3958 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma; 3959 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) 3960 ethstat->dma_force_physical.value.ul = 1; 3961 else 3962 ethstat->dma_force_physical.value.ul = 0; 3963 ethstat->lanes.value.ul = mgp->pcie_link_width; 3964 ethstat->dropped_bad_crc32.value.ul = 3965 ntohl(fw_stats->dropped_bad_crc32); 3966 ethstat->dropped_bad_phy.value.ul = 3967 ntohl(fw_stats->dropped_bad_phy); 3968 ethstat->dropped_link_error_or_filtered.value.ul = 3969 ntohl(fw_stats->dropped_link_error_or_filtered); 3970 ethstat->dropped_link_overflow.value.ul = 3971 ntohl(fw_stats->dropped_link_overflow); 3972 ethstat->dropped_multicast_filtered.value.ul = 3973 ntohl(fw_stats->dropped_multicast_filtered); 3974 ethstat->dropped_no_big_buffer.value.ul = 3975 ntohl(fw_stats->dropped_no_big_buffer); 3976 ethstat->dropped_no_small_buffer.value.ul = 3977 ntohl(fw_stats->dropped_no_small_buffer); 3978 ethstat->dropped_overrun.value.ul = 3979 ntohl(fw_stats->dropped_overrun); 3980 ethstat->dropped_pause.value.ul = 3981 ntohl(fw_stats->dropped_pause); 3982 ethstat->dropped_runt.value.ul = 3983 ntohl(fw_stats->dropped_runt); 3984 ethstat->link_up.value.ul = 3985 ntohl(fw_stats->link_up); 3986 ethstat->dropped_unicast_filtered.value.ul = 3987 ntohl(fw_stats->dropped_unicast_filtered); 3988 return (0); 3989 } 3990 3991 static int 3992 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw) 3993 { 3994 struct myri10ge_slice_stat *ethstat; 3995 struct myri10ge_slice_state *ss; 3996 3997 if (rw == KSTAT_WRITE) 3998 return (EACCES); 3999 4000 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data; 4001 ss = (struct myri10ge_slice_state *)ksp->ks_private; 4002 4003 ethstat->rx_big.value.ul = ss->j_rx_cnt; 4004 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt; 4005 ethstat->rx_bigbuf_pool.value.ul = 4006 ss->jpool.num_alloc - ss->jbufs_for_smalls; 4007 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls; 4008 ethstat->rx_small.value.ul = ss->rx_small.cnt - 4009 (ss->rx_small.mask + 1); 4010 ethstat->tx_done.value.ul = ss->tx.done; 4011 ethstat->tx_req.value.ul = ss->tx.req; 4012 ethstat->tx_activate.value.ul = ss->tx.activate; 4013 ethstat->xmit_sched.value.ul = ss->tx.sched; 4014 ethstat->xmit_stall.value.ul = ss->tx.stall; 4015 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early; 4016 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late; 4017 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err); 4018 return (0); 4019 } 4020 4021 static int 4022 myri10ge_info_kstat_update(kstat_t *ksp, int rw) 4023 { 4024 struct myri10ge_info *info; 4025 struct myri10ge_priv *mgp; 4026 4027 4028 if (rw == KSTAT_WRITE) 4029 return (EACCES); 4030 4031 info = (struct myri10ge_info *)ksp->ks_data; 4032 mgp = (struct myri10ge_priv *)ksp->ks_private; 4033 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR); 4034 kstat_named_setstr(&info->firmware_version, mgp->fw_version); 4035 kstat_named_setstr(&info->firmware_name, mgp->fw_name); 4036 kstat_named_setstr(&info->interrupt_type, mgp->intr_type); 4037 kstat_named_setstr(&info->product_code, mgp->pc_str); 4038 kstat_named_setstr(&info->serial_number, mgp->sn_str); 4039 return (0); 4040 } 4041 4042 static struct myri10ge_info myri10ge_info_template = { 4043 { "driver_version", KSTAT_DATA_STRING }, 4044 { "firmware_version", KSTAT_DATA_STRING }, 4045 { "firmware_name", KSTAT_DATA_STRING }, 4046 { "interrupt_type", KSTAT_DATA_STRING }, 4047 { "product_code", KSTAT_DATA_STRING }, 4048 { "serial_number", KSTAT_DATA_STRING }, 4049 }; 4050 static kmutex_t myri10ge_info_template_lock; 4051 4052 4053 static int 4054 myri10ge_info_init(struct myri10ge_priv *mgp) 4055 { 4056 struct kstat *ksp; 4057 4058 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4059 "myri10ge_info", "net", KSTAT_TYPE_NAMED, 4060 sizeof (myri10ge_info_template) / 4061 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 4062 if (ksp == NULL) { 4063 cmn_err(CE_WARN, 4064 "%s: myri10ge_info_init: kstat_create failed", mgp->name); 4065 return (DDI_FAILURE); 4066 } 4067 mgp->ksp_info = ksp; 4068 ksp->ks_update = myri10ge_info_kstat_update; 4069 ksp->ks_private = (void *) mgp; 4070 ksp->ks_data = &myri10ge_info_template; 4071 ksp->ks_lock = &myri10ge_info_template_lock; 4072 if (MYRI10GE_VERSION_STR != NULL) 4073 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1; 4074 if (mgp->fw_version != NULL) 4075 ksp->ks_data_size += strlen(mgp->fw_version) + 1; 4076 ksp->ks_data_size += strlen(mgp->fw_name) + 1; 4077 ksp->ks_data_size += strlen(mgp->intr_type) + 1; 4078 if (mgp->pc_str != NULL) 4079 ksp->ks_data_size += strlen(mgp->pc_str) + 1; 4080 if (mgp->sn_str != NULL) 4081 ksp->ks_data_size += strlen(mgp->sn_str) + 1; 4082 4083 kstat_install(ksp); 4084 return (DDI_SUCCESS); 4085 } 4086 4087 4088 static int 4089 myri10ge_nic_stat_init(struct myri10ge_priv *mgp) 4090 { 4091 struct kstat *ksp; 4092 struct myri10ge_nic_stat *ethstat; 4093 4094 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4095 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED, 4096 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4097 if (ksp == NULL) { 4098 cmn_err(CE_WARN, 4099 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4100 return (DDI_FAILURE); 4101 } 4102 mgp->ksp_stat = ksp; 4103 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data); 4104 4105 kstat_named_init(ðstat->dma_read_bw_MBs, 4106 "dma_read_bw_MBs", KSTAT_DATA_ULONG); 4107 kstat_named_init(ðstat->dma_write_bw_MBs, 4108 "dma_write_bw_MBs", KSTAT_DATA_ULONG); 4109 kstat_named_init(ðstat->dma_read_write_bw_MBs, 4110 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG); 4111 kstat_named_init(ðstat->dma_force_physical, 4112 "dma_force_physical", KSTAT_DATA_ULONG); 4113 kstat_named_init(ðstat->lanes, 4114 "lanes", KSTAT_DATA_ULONG); 4115 kstat_named_init(ðstat->dropped_bad_crc32, 4116 "dropped_bad_crc32", KSTAT_DATA_ULONG); 4117 kstat_named_init(ðstat->dropped_bad_phy, 4118 "dropped_bad_phy", KSTAT_DATA_ULONG); 4119 kstat_named_init(ðstat->dropped_link_error_or_filtered, 4120 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG); 4121 kstat_named_init(ðstat->dropped_link_overflow, 4122 "dropped_link_overflow", KSTAT_DATA_ULONG); 4123 kstat_named_init(ðstat->dropped_multicast_filtered, 4124 "dropped_multicast_filtered", KSTAT_DATA_ULONG); 4125 kstat_named_init(ðstat->dropped_no_big_buffer, 4126 "dropped_no_big_buffer", KSTAT_DATA_ULONG); 4127 kstat_named_init(ðstat->dropped_no_small_buffer, 4128 "dropped_no_small_buffer", KSTAT_DATA_ULONG); 4129 kstat_named_init(ðstat->dropped_overrun, 4130 "dropped_overrun", KSTAT_DATA_ULONG); 4131 kstat_named_init(ðstat->dropped_pause, 4132 "dropped_pause", KSTAT_DATA_ULONG); 4133 kstat_named_init(ðstat->dropped_runt, 4134 "dropped_runt", KSTAT_DATA_ULONG); 4135 kstat_named_init(ðstat->dropped_unicast_filtered, 4136 "dropped_unicast_filtered", KSTAT_DATA_ULONG); 4137 kstat_named_init(ðstat->dropped_runt, "dropped_runt", 4138 KSTAT_DATA_ULONG); 4139 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG); 4140 kstat_named_init(ðstat->link_changes, "link_changes", 4141 KSTAT_DATA_ULONG); 4142 ksp->ks_update = myri10ge_nic_stat_kstat_update; 4143 ksp->ks_private = (void *) mgp; 4144 kstat_install(ksp); 4145 return (DDI_SUCCESS); 4146 } 4147 4148 static int 4149 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss) 4150 { 4151 struct myri10ge_priv *mgp = ss->mgp; 4152 struct kstat *ksp; 4153 struct myri10ge_slice_stat *ethstat; 4154 int instance; 4155 4156 /* 4157 * fake an instance so that the same slice numbers from 4158 * different instances do not collide 4159 */ 4160 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss); 4161 ksp = kstat_create("myri10ge", instance, 4162 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED, 4163 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4164 if (ksp == NULL) { 4165 cmn_err(CE_WARN, 4166 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4167 return (DDI_FAILURE); 4168 } 4169 ss->ksp_stat = ksp; 4170 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data); 4171 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum", 4172 KSTAT_DATA_ULONG); 4173 kstat_named_init(ðstat->lro_flushed, "lro_flushed", 4174 KSTAT_DATA_ULONG); 4175 kstat_named_init(ðstat->lro_queued, "lro_queued", 4176 KSTAT_DATA_ULONG); 4177 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware", 4178 KSTAT_DATA_ULONG); 4179 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool", 4180 KSTAT_DATA_ULONG); 4181 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls", 4182 KSTAT_DATA_ULONG); 4183 kstat_named_init(ðstat->rx_copy, "rx_copy", 4184 KSTAT_DATA_ULONG); 4185 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf", 4186 KSTAT_DATA_ULONG); 4187 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf", 4188 KSTAT_DATA_ULONG); 4189 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len", 4190 KSTAT_DATA_ULONG); 4191 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup", 4192 KSTAT_DATA_ULONG); 4193 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first", 4194 KSTAT_DATA_ULONG); 4195 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf", 4196 KSTAT_DATA_ULONG); 4197 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags", 4198 KSTAT_DATA_ULONG); 4199 kstat_named_init(ðstat->xmit_sched, "xmit_sched", 4200 KSTAT_DATA_ULONG); 4201 kstat_named_init(ðstat->xmit_stall, "xmit_stall", 4202 KSTAT_DATA_ULONG); 4203 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early", 4204 KSTAT_DATA_ULONG); 4205 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late", 4206 KSTAT_DATA_ULONG); 4207 kstat_named_init(ðstat->xmit_err, "xmit_err", 4208 KSTAT_DATA_ULONG); 4209 kstat_named_init(ðstat->tx_req, "tx_req", 4210 KSTAT_DATA_ULONG); 4211 kstat_named_init(ðstat->tx_activate, "tx_activate", 4212 KSTAT_DATA_ULONG); 4213 kstat_named_init(ðstat->tx_done, "tx_done", 4214 KSTAT_DATA_ULONG); 4215 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced", 4216 KSTAT_DATA_ULONG); 4217 kstat_named_init(ðstat->rx_big, "rx_big", 4218 KSTAT_DATA_ULONG); 4219 kstat_named_init(ðstat->rx_small, "rx_small", 4220 KSTAT_DATA_ULONG); 4221 ksp->ks_update = myri10ge_slice_stat_kstat_update; 4222 ksp->ks_private = (void *) ss; 4223 kstat_install(ksp); 4224 return (DDI_SUCCESS); 4225 } 4226 4227 4228 4229 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4230 4231 #include <vm/hat.h> 4232 #include <sys/ddi_isa.h> 4233 void *device_arena_alloc(size_t size, int vm_flag); 4234 void device_arena_free(void *vaddr, size_t size); 4235 4236 static void 4237 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4238 { 4239 dev_info_t *parent_dip; 4240 ddi_acc_handle_t handle; 4241 unsigned long bus_number, dev_number, func_number; 4242 unsigned long cfg_pa, paddr, base, pgoffset; 4243 char *cvaddr, *ptr; 4244 uint32_t *ptr32; 4245 int retval = DDI_FAILURE; 4246 int dontcare; 4247 uint16_t read_vid, read_did, vendor_id, device_id; 4248 4249 if (!myri10ge_nvidia_ecrc_enable) 4250 return; 4251 4252 parent_dip = ddi_get_parent(mgp->dip); 4253 if (parent_dip == NULL) { 4254 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name); 4255 return; 4256 } 4257 4258 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) { 4259 cmn_err(CE_WARN, 4260 "%s: Could not access my parent's registers", mgp->name); 4261 return; 4262 } 4263 4264 vendor_id = pci_config_get16(handle, PCI_CONF_VENID); 4265 device_id = pci_config_get16(handle, PCI_CONF_DEVID); 4266 pci_config_teardown(&handle); 4267 4268 if (myri10ge_verbose) { 4269 unsigned long bus_number, dev_number, func_number; 4270 int reg_set, span; 4271 (void) myri10ge_reg_set(parent_dip, ®_set, &span, 4272 &bus_number, &dev_number, &func_number); 4273 if (myri10ge_verbose) 4274 printf("%s: parent at %ld:%ld:%ld\n", mgp->name, 4275 bus_number, dev_number, func_number); 4276 } 4277 4278 if (vendor_id != 0x10de) 4279 return; 4280 4281 if (device_id != 0x005d /* CK804 */ && 4282 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) { 4283 return; 4284 } 4285 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare, 4286 &bus_number, &dev_number, &func_number); 4287 4288 for (cfg_pa = 0xf0000000UL; 4289 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL; 4290 cfg_pa -= 0x10000000UL) { 4291 /* find the config space address for the nvidia bridge */ 4292 paddr = (cfg_pa + bus_number * 0x00100000UL + 4293 (dev_number * 8 + func_number) * 0x00001000UL); 4294 4295 base = paddr & (~MMU_PAGEOFFSET); 4296 pgoffset = paddr & MMU_PAGEOFFSET; 4297 4298 /* map it into the kernel */ 4299 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP); 4300 if (cvaddr == NULL) 4301 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n", 4302 mgp->name); 4303 4304 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1), 4305 i_ddi_paddr_to_pfn(base), 4306 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK); 4307 4308 ptr = cvaddr + pgoffset; 4309 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID); 4310 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID); 4311 if (vendor_id == read_did || device_id == read_did) { 4312 ptr32 = (uint32_t *)(void *)(ptr + 0x178); 4313 if (myri10ge_verbose) 4314 printf("%s: Enabling ECRC on upstream " 4315 "Nvidia bridge (0x%x:0x%x) " 4316 "at %ld:%ld:%ld\n", mgp->name, 4317 read_vid, read_did, bus_number, 4318 dev_number, func_number); 4319 *ptr32 |= 0x40; 4320 retval = DDI_SUCCESS; 4321 } 4322 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK); 4323 device_arena_free(cvaddr, ptob(1)); 4324 } 4325 } 4326 4327 #else 4328 /*ARGSUSED*/ 4329 static void 4330 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4331 { 4332 } 4333 #endif /* i386 */ 4334 4335 4336 /* 4337 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 4338 * when the PCI-E Completion packets are aligned on an 8-byte 4339 * boundary. Some PCI-E chip sets always align Completion packets; on 4340 * the ones that do not, the alignment can be enforced by enabling 4341 * ECRC generation (if supported). 4342 * 4343 * When PCI-E Completion packets are not aligned, it is actually more 4344 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 4345 * 4346 * If the driver can neither enable ECRC nor verify that it has 4347 * already been enabled, then it must use a firmware image which works 4348 * around unaligned completion packets (ethp_z8e.dat), and it should 4349 * also ensure that it never gives the device a Read-DMA which is 4350 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 4351 * enabled, then the driver should use the aligned (eth_z8e.dat) 4352 * firmware image, and set tx.boundary to 4KB. 4353 */ 4354 4355 4356 static int 4357 myri10ge_firmware_probe(struct myri10ge_priv *mgp) 4358 { 4359 int status; 4360 4361 mgp->tx_boundary = 4096; 4362 /* 4363 * Verify the max read request size was set to 4KB 4364 * before trying the test with 4KB. 4365 */ 4366 if (mgp->max_read_request_4k == 0) 4367 mgp->tx_boundary = 2048; 4368 /* 4369 * load the optimized firmware which assumes aligned PCIe 4370 * completions in order to see if it works on this host. 4371 */ 4372 4373 mgp->fw_name = "rss_eth_z8e"; 4374 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4375 mgp->eth_z8e_length = rss_eth_z8e_length; 4376 4377 status = myri10ge_load_firmware(mgp); 4378 if (status != 0) { 4379 return (status); 4380 } 4381 /* 4382 * Enable ECRC if possible 4383 */ 4384 myri10ge_enable_nvidia_ecrc(mgp); 4385 4386 /* 4387 * Run a DMA test which watches for unaligned completions and 4388 * aborts on the first one seen. 4389 */ 4390 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 4391 if (status == 0) 4392 return (0); /* keep the aligned firmware */ 4393 4394 if (status != E2BIG) 4395 cmn_err(CE_WARN, "%s: DMA test failed: %d\n", 4396 mgp->name, status); 4397 if (status == ENOSYS) 4398 cmn_err(CE_WARN, "%s: Falling back to ethp! " 4399 "Please install up to date fw\n", mgp->name); 4400 return (status); 4401 } 4402 4403 static int 4404 myri10ge_select_firmware(struct myri10ge_priv *mgp) 4405 { 4406 int aligned; 4407 4408 aligned = 0; 4409 4410 if (myri10ge_force_firmware == 1) { 4411 if (myri10ge_verbose) 4412 printf("%s: Assuming aligned completions (forced)\n", 4413 mgp->name); 4414 aligned = 1; 4415 goto done; 4416 } 4417 4418 if (myri10ge_force_firmware == 2) { 4419 if (myri10ge_verbose) 4420 printf("%s: Assuming unaligned completions (forced)\n", 4421 mgp->name); 4422 aligned = 0; 4423 goto done; 4424 } 4425 4426 /* If the width is less than 8, we may used the aligned firmware */ 4427 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) { 4428 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n", 4429 mgp->name, mgp->pcie_link_width); 4430 aligned = 1; 4431 goto done; 4432 } 4433 4434 if (0 == myri10ge_firmware_probe(mgp)) 4435 return (0); /* keep optimized firmware */ 4436 4437 done: 4438 if (aligned) { 4439 mgp->fw_name = "rss_eth_z8e"; 4440 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4441 mgp->eth_z8e_length = rss_eth_z8e_length; 4442 mgp->tx_boundary = 4096; 4443 } else { 4444 mgp->fw_name = "rss_ethp_z8e"; 4445 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e; 4446 mgp->eth_z8e_length = rss_ethp_z8e_length; 4447 mgp->tx_boundary = 2048; 4448 } 4449 4450 return (myri10ge_load_firmware(mgp)); 4451 } 4452 4453 static int 4454 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler) 4455 { 4456 dev_info_t *devinfo = mgp->dip; 4457 int count, avail, actual, intr_types; 4458 int x, y, rc, inum = 0; 4459 4460 4461 rc = ddi_intr_get_supported_types(devinfo, &intr_types); 4462 if (rc != DDI_SUCCESS) { 4463 cmn_err(CE_WARN, 4464 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name, 4465 rc); 4466 return (DDI_FAILURE); 4467 } 4468 4469 if (!myri10ge_use_msi) 4470 intr_types &= ~DDI_INTR_TYPE_MSI; 4471 if (!myri10ge_use_msix) 4472 intr_types &= ~DDI_INTR_TYPE_MSIX; 4473 4474 if (intr_types & DDI_INTR_TYPE_MSIX) { 4475 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX; 4476 mgp->intr_type = "MSI-X"; 4477 } else if (intr_types & DDI_INTR_TYPE_MSI) { 4478 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI; 4479 mgp->intr_type = "MSI"; 4480 } else { 4481 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED; 4482 mgp->intr_type = "Legacy"; 4483 } 4484 /* Get number of interrupts */ 4485 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count); 4486 if ((rc != DDI_SUCCESS) || (count == 0)) { 4487 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, " 4488 "count: %d", mgp->name, rc, count); 4489 4490 return (DDI_FAILURE); 4491 } 4492 4493 /* Get number of available interrupts */ 4494 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail); 4495 if ((rc != DDI_SUCCESS) || (avail == 0)) { 4496 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, " 4497 "rc: %d, avail: %d\n", mgp->name, rc, avail); 4498 return (DDI_FAILURE); 4499 } 4500 if (avail < count) { 4501 cmn_err(CE_NOTE, 4502 "!%s: nintrs() returned %d, navail returned %d", 4503 mgp->name, count, avail); 4504 count = avail; 4505 } 4506 4507 if (count < mgp->num_slices) 4508 return (DDI_FAILURE); 4509 4510 if (count > mgp->num_slices) 4511 count = mgp->num_slices; 4512 4513 /* Allocate memory for MSI interrupts */ 4514 mgp->intr_size = count * sizeof (ddi_intr_handle_t); 4515 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP); 4516 4517 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum, 4518 count, &actual, DDI_INTR_ALLOC_NORMAL); 4519 4520 if ((rc != DDI_SUCCESS) || (actual == 0)) { 4521 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d", 4522 mgp->name, rc); 4523 4524 kmem_free(mgp->htable, mgp->intr_size); 4525 mgp->htable = NULL; 4526 return (DDI_FAILURE); 4527 } 4528 4529 if ((actual < count) && myri10ge_verbose) { 4530 cmn_err(CE_NOTE, "%s: got %d/%d slices", 4531 mgp->name, actual, count); 4532 } 4533 4534 mgp->intr_cnt = actual; 4535 4536 /* 4537 * Get priority for first irq, assume remaining are all the same 4538 */ 4539 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri) 4540 != DDI_SUCCESS) { 4541 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name); 4542 4543 /* Free already allocated intr */ 4544 for (y = 0; y < actual; y++) { 4545 (void) ddi_intr_free(mgp->htable[y]); 4546 } 4547 4548 kmem_free(mgp->htable, mgp->intr_size); 4549 mgp->htable = NULL; 4550 return (DDI_FAILURE); 4551 } 4552 4553 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri; 4554 4555 if (!add_handler) 4556 return (DDI_SUCCESS); 4557 4558 /* Call ddi_intr_add_handler() */ 4559 for (x = 0; x < actual; x++) { 4560 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr, 4561 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) { 4562 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed", 4563 mgp->name); 4564 4565 /* Free already allocated intr */ 4566 for (y = 0; y < actual; y++) { 4567 (void) ddi_intr_free(mgp->htable[y]); 4568 } 4569 4570 kmem_free(mgp->htable, mgp->intr_size); 4571 mgp->htable = NULL; 4572 return (DDI_FAILURE); 4573 } 4574 } 4575 4576 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap); 4577 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4578 /* Call ddi_intr_block_enable() for MSI */ 4579 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt); 4580 } else { 4581 /* Call ddi_intr_enable() for MSI non block enable */ 4582 for (x = 0; x < mgp->intr_cnt; x++) { 4583 (void) ddi_intr_enable(mgp->htable[x]); 4584 } 4585 } 4586 4587 return (DDI_SUCCESS); 4588 } 4589 4590 static void 4591 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed) 4592 { 4593 int x, err; 4594 4595 /* Disable all interrupts */ 4596 if (handler_installed) { 4597 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4598 /* Call ddi_intr_block_disable() */ 4599 (void) ddi_intr_block_disable(mgp->htable, 4600 mgp->intr_cnt); 4601 } else { 4602 for (x = 0; x < mgp->intr_cnt; x++) { 4603 (void) ddi_intr_disable(mgp->htable[x]); 4604 } 4605 } 4606 } 4607 4608 for (x = 0; x < mgp->intr_cnt; x++) { 4609 if (handler_installed) { 4610 /* Call ddi_intr_remove_handler() */ 4611 err = ddi_intr_remove_handler(mgp->htable[x]); 4612 if (err != DDI_SUCCESS) { 4613 cmn_err(CE_WARN, 4614 "%s: ddi_intr_remove_handler for" 4615 "vec %d returned %d\n", mgp->name, 4616 x, err); 4617 } 4618 } 4619 err = ddi_intr_free(mgp->htable[x]); 4620 if (err != DDI_SUCCESS) { 4621 cmn_err(CE_WARN, 4622 "%s: ddi_intr_free for vec %d returned %d\n", 4623 mgp->name, x, err); 4624 } 4625 } 4626 kmem_free(mgp->htable, mgp->intr_size); 4627 mgp->htable = NULL; 4628 } 4629 4630 static void 4631 myri10ge_test_physical(dev_info_t *dip) 4632 { 4633 ddi_dma_handle_t handle; 4634 struct myri10ge_dma_stuff dma; 4635 void *addr; 4636 int err; 4637 4638 /* test #1, sufficient for older sparc systems */ 4639 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 4640 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr, 4641 DDI_DMA_DONTWAIT, NULL, &handle); 4642 if (err == DDI_DMA_BADATTR) 4643 goto fail; 4644 ddi_dma_free_handle(&handle); 4645 4646 /* test #2, required on Olympis where the bind is what fails */ 4647 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr, 4648 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 4649 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT); 4650 if (addr == NULL) 4651 goto fail; 4652 myri10ge_dma_free(&dma); 4653 return; 4654 4655 fail: 4656 if (myri10ge_verbose) 4657 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, " 4658 "using IOMMU\n", ddi_get_instance(dip)); 4659 4660 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL; 4661 } 4662 4663 static void 4664 myri10ge_get_props(dev_info_t *dip) 4665 { 4666 4667 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4668 "myri10ge_flow_control", myri10ge_flow_control); 4669 4670 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4671 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay); 4672 4673 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4674 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4675 "myri10ge_nvidia_ecrc_enable", 1); 4676 #endif 4677 4678 4679 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4680 "myri10ge_use_msi", myri10ge_use_msi); 4681 4682 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4683 "myri10ge_deassert_wait", myri10ge_deassert_wait); 4684 4685 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4686 "myri10ge_verbose", myri10ge_verbose); 4687 4688 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4689 "myri10ge_tx_copylen", myri10ge_tx_copylen); 4690 4691 if (myri10ge_tx_copylen < 60) { 4692 cmn_err(CE_WARN, 4693 "myri10ge_tx_copylen must be >= 60 bytes\n"); 4694 myri10ge_tx_copylen = 60; 4695 } 4696 4697 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4698 "myri10ge_mtu_override", myri10ge_mtu_override); 4699 4700 if (myri10ge_mtu_override >= 1500 && myri10ge_mtu_override <= 9000) 4701 myri10ge_mtu = myri10ge_mtu_override + 4702 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ; 4703 else if (myri10ge_mtu_override != 0) { 4704 cmn_err(CE_WARN, 4705 "myri10ge_mtu_override must be between 1500 and " 4706 "9000 bytes\n"); 4707 } 4708 4709 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4710 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial); 4711 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4712 "myri10ge_bigbufs_max", myri10ge_bigbufs_max); 4713 4714 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4715 "myri10ge_watchdog_reset", myri10ge_watchdog_reset); 4716 4717 if (myri10ge_bigbufs_initial < 128) { 4718 cmn_err(CE_WARN, 4719 "myri10ge_bigbufs_initial be at least 128\n"); 4720 myri10ge_bigbufs_initial = 128; 4721 } 4722 if (myri10ge_bigbufs_max < 128) { 4723 cmn_err(CE_WARN, 4724 "myri10ge_bigbufs_max be at least 128\n"); 4725 myri10ge_bigbufs_max = 128; 4726 } 4727 4728 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) { 4729 cmn_err(CE_WARN, 4730 "myri10ge_bigbufs_max must be >= " 4731 "myri10ge_bigbufs_initial\n"); 4732 myri10ge_bigbufs_max = myri10ge_bigbufs_initial; 4733 } 4734 4735 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4736 "myri10ge_force_firmware", myri10ge_force_firmware); 4737 4738 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4739 "myri10ge_max_slices", myri10ge_max_slices); 4740 4741 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4742 "myri10ge_use_msix", myri10ge_use_msix); 4743 4744 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4745 "myri10ge_rss_hash", myri10ge_rss_hash); 4746 4747 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX || 4748 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) { 4749 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n", 4750 myri10ge_rss_hash); 4751 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4752 } 4753 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4754 "myri10ge_lro", myri10ge_lro); 4755 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4756 "myri10ge_lro_cnt", myri10ge_lro_cnt); 4757 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4758 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr); 4759 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4760 "myri10ge_tx_hash", myri10ge_tx_hash); 4761 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4762 "myri10ge_use_lso", myri10ge_use_lso); 4763 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4764 "myri10ge_lso_copy", myri10ge_lso_copy); 4765 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4766 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial); 4767 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4768 "myri10ge_small_bytes", myri10ge_small_bytes); 4769 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) { 4770 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n", 4771 myri10ge_small_bytes); 4772 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n"); 4773 myri10ge_small_bytes += 128; 4774 myri10ge_small_bytes &= ~(128 -1); 4775 myri10ge_small_bytes -= MXGEFW_PAD; 4776 cmn_err(CE_WARN, "rounded up to %d\n", 4777 myri10ge_small_bytes); 4778 4779 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4780 } 4781 } 4782 4783 #ifndef PCI_EXP_LNKSTA 4784 #define PCI_EXP_LNKSTA 18 4785 #endif 4786 4787 static int 4788 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid) 4789 { 4790 uint16_t status; 4791 uint8_t ptr; 4792 4793 /* check to see if we have capabilities */ 4794 status = pci_config_get16(handle, PCI_CONF_STAT); 4795 if (!(status & PCI_STAT_CAP)) { 4796 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n"); 4797 return (ENXIO); 4798 } 4799 4800 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR); 4801 4802 /* Walk the capabilities list, looking for a PCI Express cap */ 4803 while (ptr != PCI_CAP_NEXT_PTR_NULL) { 4804 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid) 4805 break; 4806 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR); 4807 } 4808 if (ptr < 64) { 4809 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr); 4810 return (ENXIO); 4811 } 4812 *capptr = ptr; 4813 return (0); 4814 } 4815 4816 static int 4817 myri10ge_set_max_readreq(ddi_acc_handle_t handle) 4818 { 4819 int err; 4820 uint16_t val; 4821 uint8_t ptr; 4822 4823 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4824 if (err != 0) { 4825 cmn_err(CE_WARN, "could not find PCIe cap\n"); 4826 return (ENXIO); 4827 } 4828 4829 /* set max read req to 4096 */ 4830 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4831 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) | 4832 PCIE_DEVCTL_MAX_READ_REQ_4096; 4833 pci_config_put16(handle, ptr + PCIE_DEVCTL, val); 4834 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4835 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) != 4836 PCIE_DEVCTL_MAX_READ_REQ_4096) { 4837 cmn_err(CE_WARN, "could not set max read req (%x)\n", val); 4838 return (EINVAL); 4839 } 4840 return (0); 4841 } 4842 4843 static int 4844 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link) 4845 { 4846 int err; 4847 uint16_t val; 4848 uint8_t ptr; 4849 4850 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4851 if (err != 0) { 4852 cmn_err(CE_WARN, "could not set max read req\n"); 4853 return (ENXIO); 4854 } 4855 4856 /* read link width */ 4857 val = pci_config_get16(handle, ptr + PCIE_LINKSTS); 4858 val &= PCIE_LINKSTS_NEG_WIDTH_MASK; 4859 *link = (val >> 4); 4860 return (0); 4861 } 4862 4863 static int 4864 myri10ge_reset_nic(struct myri10ge_priv *mgp) 4865 { 4866 ddi_acc_handle_t handle = mgp->cfg_hdl; 4867 uint32_t reboot; 4868 uint16_t cmd; 4869 int err; 4870 4871 cmd = pci_config_get16(handle, PCI_CONF_COMM); 4872 if ((cmd & PCI_COMM_ME) == 0) { 4873 /* 4874 * Bus master DMA disabled? Check to see if the card 4875 * rebooted due to a parity error For now, just report 4876 * it 4877 */ 4878 4879 /* enter read32 mode */ 4880 pci_config_put8(handle, mgp->vso + 0x10, 0x3); 4881 /* read REBOOT_STATUS (0xfffffff0) */ 4882 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0); 4883 reboot = pci_config_get16(handle, mgp->vso + 0x14); 4884 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot); 4885 return (0); 4886 } 4887 if (!myri10ge_watchdog_reset) { 4888 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name); 4889 return (1); 4890 } 4891 4892 myri10ge_stop_locked(mgp); 4893 err = myri10ge_start_locked(mgp); 4894 if (err == DDI_FAILURE) { 4895 return (0); 4896 } 4897 mac_tx_update(mgp->mh); 4898 return (1); 4899 } 4900 4901 static inline int 4902 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx) 4903 { 4904 if (tx->sched != tx->stall && 4905 tx->done == tx->watchdog_done && 4906 tx->watchdog_req != tx->watchdog_done) 4907 return (1); 4908 return (0); 4909 } 4910 4911 static void 4912 myri10ge_watchdog(void *arg) 4913 { 4914 struct myri10ge_priv *mgp; 4915 struct myri10ge_slice_state *ss; 4916 myri10ge_tx_ring_t *tx; 4917 int nic_ok = 1; 4918 int slices_stalled, rx_pause, i; 4919 int add_rx; 4920 4921 mgp = arg; 4922 mutex_enter(&mgp->intrlock); 4923 if (mgp->running != MYRI10GE_ETH_RUNNING) { 4924 cmn_err(CE_WARN, 4925 "%s not running, not rearming watchdog (%d)\n", 4926 mgp->name, mgp->running); 4927 mutex_exit(&mgp->intrlock); 4928 return; 4929 } 4930 4931 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause); 4932 4933 /* 4934 * make sure nic is stalled before we reset the nic, so as to 4935 * ensure we don't rip the transmit data structures out from 4936 * under a pending transmit 4937 */ 4938 4939 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) { 4940 tx = &mgp->ss[i].tx; 4941 slices_stalled = myri10ge_ring_stalled(tx); 4942 if (slices_stalled) 4943 break; 4944 } 4945 4946 if (slices_stalled) { 4947 if (mgp->watchdog_rx_pause == rx_pause) { 4948 cmn_err(CE_WARN, 4949 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)", 4950 mgp->name, i, tx->sched, tx->stall, 4951 tx->done, tx->watchdog_done, tx->req, tx->pkt_done, 4952 (int)ntohl(mgp->ss[i].fw_stats->send_done_count)); 4953 nic_ok = myri10ge_reset_nic(mgp); 4954 } else { 4955 cmn_err(CE_WARN, 4956 "%s Flow controlled, check link partner\n", 4957 mgp->name); 4958 } 4959 } 4960 4961 if (!nic_ok) { 4962 cmn_err(CE_WARN, 4963 "%s Nic dead, not rearming watchdog\n", mgp->name); 4964 mutex_exit(&mgp->intrlock); 4965 return; 4966 } 4967 for (i = 0; i < mgp->num_slices; i++) { 4968 ss = &mgp->ss[i]; 4969 tx = &ss->tx; 4970 tx->watchdog_done = tx->done; 4971 tx->watchdog_req = tx->req; 4972 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) { 4973 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy); 4974 add_rx = 4975 min(ss->jpool.num_alloc, 4976 myri10ge_bigbufs_max - 4977 (ss->jpool.num_alloc - 4978 ss->jbufs_for_smalls)); 4979 if (add_rx != 0) { 4980 (void) myri10ge_add_jbufs(ss, add_rx, 0); 4981 /* now feed them to the firmware */ 4982 mutex_enter(&ss->jpool.mtx); 4983 myri10ge_restock_jumbos(ss); 4984 mutex_exit(&ss->jpool.mtx); 4985 } 4986 } 4987 } 4988 mgp->watchdog_rx_pause = rx_pause; 4989 4990 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 4991 mgp->timer_ticks); 4992 mutex_exit(&mgp->intrlock); 4993 } 4994 4995 /*ARGSUSED*/ 4996 static int 4997 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 4998 4999 { 5000 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5001 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay); 5002 return (0); 5003 } 5004 5005 /*ARGSUSED*/ 5006 static int 5007 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value, 5008 caddr_t cp, cred_t *credp) 5009 5010 { 5011 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5012 char *end; 5013 size_t new_value; 5014 5015 new_value = mi_strtol(value, &end, 10); 5016 if (end == value) 5017 return (EINVAL); 5018 5019 mutex_enter(&myri10ge_param_lock); 5020 mgp->intr_coal_delay = (int)new_value; 5021 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 5022 mutex_exit(&myri10ge_param_lock); 5023 return (0); 5024 } 5025 5026 /*ARGSUSED*/ 5027 static int 5028 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5029 5030 { 5031 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5032 (void) mi_mpprintf(mp, "%d", mgp->pause); 5033 return (0); 5034 } 5035 5036 /*ARGSUSED*/ 5037 static int 5038 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value, 5039 caddr_t cp, cred_t *credp) 5040 5041 { 5042 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5043 char *end; 5044 size_t new_value; 5045 int err = 0; 5046 5047 new_value = mi_strtol(value, &end, 10); 5048 if (end == value) 5049 return (EINVAL); 5050 if (new_value != 0) 5051 new_value = 1; 5052 5053 mutex_enter(&myri10ge_param_lock); 5054 if (new_value != mgp->pause) 5055 err = myri10ge_change_pause(mgp, new_value); 5056 mutex_exit(&myri10ge_param_lock); 5057 return (err); 5058 } 5059 5060 /*ARGSUSED*/ 5061 static int 5062 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5063 5064 { 5065 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp); 5066 return (0); 5067 } 5068 5069 /*ARGSUSED*/ 5070 static int 5071 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value, 5072 caddr_t cp, cred_t *credp) 5073 5074 { 5075 char *end; 5076 size_t new_value; 5077 5078 new_value = mi_strtol(value, &end, 10); 5079 if (end == value) 5080 return (EINVAL); 5081 *(int *)(void *)cp = new_value; 5082 5083 return (0); 5084 } 5085 5086 static void 5087 myri10ge_ndd_init(struct myri10ge_priv *mgp) 5088 { 5089 mgp->nd_head = NULL; 5090 5091 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay", 5092 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp); 5093 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control", 5094 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp); 5095 (void) nd_load(&mgp->nd_head, "myri10ge_verbose", 5096 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose); 5097 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait", 5098 myri10ge_get_int, myri10ge_set_int, 5099 (caddr_t)&myri10ge_deassert_wait); 5100 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max", 5101 myri10ge_get_int, myri10ge_set_int, 5102 (caddr_t)&myri10ge_bigbufs_max); 5103 (void) nd_load(&mgp->nd_head, "myri10ge_lro", 5104 myri10ge_get_int, myri10ge_set_int, 5105 (caddr_t)&myri10ge_lro); 5106 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr", 5107 myri10ge_get_int, myri10ge_set_int, 5108 (caddr_t)&myri10ge_lro_max_aggr); 5109 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash", 5110 myri10ge_get_int, myri10ge_set_int, 5111 (caddr_t)&myri10ge_tx_hash); 5112 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy", 5113 myri10ge_get_int, myri10ge_set_int, 5114 (caddr_t)&myri10ge_lso_copy); 5115 } 5116 5117 static void 5118 myri10ge_ndd_fini(struct myri10ge_priv *mgp) 5119 { 5120 nd_free(&mgp->nd_head); 5121 } 5122 5123 static void 5124 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) 5125 { 5126 struct iocblk *iocp; 5127 struct myri10ge_priv *mgp = arg; 5128 int cmd, ok, err; 5129 5130 iocp = (struct iocblk *)(void *)mp->b_rptr; 5131 cmd = iocp->ioc_cmd; 5132 5133 ok = 0; 5134 err = 0; 5135 5136 switch (cmd) { 5137 case ND_GET: 5138 case ND_SET: 5139 ok = nd_getset(wq, mgp->nd_head, mp); 5140 break; 5141 default: 5142 break; 5143 } 5144 if (!ok) 5145 err = EINVAL; 5146 else 5147 err = iocp->ioc_error; 5148 5149 if (!err) 5150 miocack(wq, mp, iocp->ioc_count, err); 5151 else 5152 miocnak(wq, mp, 0, err); 5153 } 5154 5155 static struct myri10ge_priv *mgp_list; 5156 5157 struct myri10ge_priv * 5158 myri10ge_get_instance(uint_t unit) 5159 { 5160 struct myri10ge_priv *mgp; 5161 5162 mutex_enter(&myri10ge_param_lock); 5163 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) { 5164 if (unit == ddi_get_instance(mgp->dip)) { 5165 mgp->refcnt++; 5166 break; 5167 } 5168 } 5169 mutex_exit(&myri10ge_param_lock); 5170 return (mgp); 5171 } 5172 5173 void 5174 myri10ge_put_instance(struct myri10ge_priv *mgp) 5175 { 5176 mutex_enter(&myri10ge_param_lock); 5177 mgp->refcnt--; 5178 mutex_exit(&myri10ge_param_lock); 5179 } 5180 5181 static boolean_t 5182 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 5183 { 5184 struct myri10ge_priv *mgp = arg; 5185 uint32_t *cap_hcksum; 5186 mac_capab_lso_t *cap_lso; 5187 mac_capab_rings_t *cap_rings; 5188 5189 switch (cap) { 5190 case MAC_CAPAB_HCKSUM: 5191 cap_hcksum = cap_data; 5192 *cap_hcksum = HCKSUM_INET_PARTIAL; 5193 break; 5194 case MAC_CAPAB_RINGS: 5195 cap_rings = cap_data; 5196 switch (cap_rings->mr_type) { 5197 case MAC_RING_TYPE_RX: 5198 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5199 cap_rings->mr_rnum = mgp->num_slices; 5200 cap_rings->mr_gnum = 1; 5201 cap_rings->mr_rget = myri10ge_fill_ring; 5202 cap_rings->mr_gget = myri10ge_fill_group; 5203 break; 5204 case MAC_RING_TYPE_TX: 5205 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5206 cap_rings->mr_rnum = mgp->num_slices; 5207 cap_rings->mr_gnum = 0; 5208 cap_rings->mr_rget = myri10ge_fill_ring; 5209 cap_rings->mr_gget = NULL; 5210 break; 5211 default: 5212 return (B_FALSE); 5213 } 5214 break; 5215 case MAC_CAPAB_LSO: 5216 cap_lso = cap_data; 5217 if (!myri10ge_use_lso) 5218 return (B_FALSE); 5219 if (!(mgp->features & MYRI10GE_TSO)) 5220 return (B_FALSE); 5221 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; 5222 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1; 5223 break; 5224 5225 default: 5226 return (B_FALSE); 5227 } 5228 return (B_TRUE); 5229 } 5230 5231 5232 static int 5233 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val) 5234 { 5235 struct myri10ge_priv *mgp = arg; 5236 struct myri10ge_rx_ring_stats *rstat; 5237 struct myri10ge_tx_ring_stats *tstat; 5238 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats; 5239 struct myri10ge_slice_state *ss; 5240 uint64_t tmp = 0; 5241 int i; 5242 5243 switch (stat) { 5244 case MAC_STAT_IFSPEED: 5245 *val = 10ull * 1000ull * 1000000ull; 5246 break; 5247 5248 case MAC_STAT_MULTIRCV: 5249 for (i = 0; i < mgp->num_slices; i++) { 5250 rstat = &mgp->ss[i].rx_stats; 5251 tmp += rstat->multircv; 5252 } 5253 *val = tmp; 5254 break; 5255 5256 case MAC_STAT_BRDCSTRCV: 5257 for (i = 0; i < mgp->num_slices; i++) { 5258 rstat = &mgp->ss[i].rx_stats; 5259 tmp += rstat->brdcstrcv; 5260 } 5261 *val = tmp; 5262 break; 5263 5264 case MAC_STAT_MULTIXMT: 5265 for (i = 0; i < mgp->num_slices; i++) { 5266 tstat = &mgp->ss[i].tx.stats; 5267 tmp += tstat->multixmt; 5268 } 5269 *val = tmp; 5270 break; 5271 5272 case MAC_STAT_BRDCSTXMT: 5273 for (i = 0; i < mgp->num_slices; i++) { 5274 tstat = &mgp->ss[i].tx.stats; 5275 tmp += tstat->brdcstxmt; 5276 } 5277 *val = tmp; 5278 break; 5279 5280 case MAC_STAT_NORCVBUF: 5281 tmp = ntohl(fw_stats->dropped_no_big_buffer); 5282 tmp += ntohl(fw_stats->dropped_no_small_buffer); 5283 tmp += ntohl(fw_stats->dropped_link_overflow); 5284 for (i = 0; i < mgp->num_slices; i++) { 5285 ss = &mgp->ss[i]; 5286 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf); 5287 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf); 5288 } 5289 *val = tmp; 5290 break; 5291 5292 case MAC_STAT_IERRORS: 5293 tmp += ntohl(fw_stats->dropped_bad_crc32); 5294 tmp += ntohl(fw_stats->dropped_bad_phy); 5295 tmp += ntohl(fw_stats->dropped_runt); 5296 tmp += ntohl(fw_stats->dropped_overrun); 5297 *val = tmp; 5298 break; 5299 5300 case MAC_STAT_OERRORS: 5301 for (i = 0; i < mgp->num_slices; i++) { 5302 ss = &mgp->ss[i]; 5303 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags); 5304 tmp += MYRI10GE_SLICE_STAT(xmit_err); 5305 } 5306 *val = tmp; 5307 break; 5308 5309 case MAC_STAT_RBYTES: 5310 for (i = 0; i < mgp->num_slices; i++) { 5311 rstat = &mgp->ss[i].rx_stats; 5312 tmp += rstat->ibytes; 5313 } 5314 *val = tmp; 5315 break; 5316 5317 case MAC_STAT_IPACKETS: 5318 for (i = 0; i < mgp->num_slices; i++) { 5319 rstat = &mgp->ss[i].rx_stats; 5320 tmp += rstat->ipackets; 5321 } 5322 *val = tmp; 5323 break; 5324 5325 case MAC_STAT_OBYTES: 5326 for (i = 0; i < mgp->num_slices; i++) { 5327 tstat = &mgp->ss[i].tx.stats; 5328 tmp += tstat->obytes; 5329 } 5330 *val = tmp; 5331 break; 5332 5333 case MAC_STAT_OPACKETS: 5334 for (i = 0; i < mgp->num_slices; i++) { 5335 tstat = &mgp->ss[i].tx.stats; 5336 tmp += tstat->opackets; 5337 } 5338 *val = tmp; 5339 break; 5340 5341 case ETHER_STAT_TOOLONG_ERRORS: 5342 *val = ntohl(fw_stats->dropped_overrun); 5343 break; 5344 5345 #ifdef SOLARIS_S11 5346 case ETHER_STAT_TOOSHORT_ERRORS: 5347 *val = ntohl(fw_stats->dropped_runt); 5348 break; 5349 #endif 5350 5351 case ETHER_STAT_LINK_PAUSE: 5352 *val = mgp->pause; 5353 break; 5354 5355 case ETHER_STAT_LINK_AUTONEG: 5356 *val = 1; 5357 break; 5358 5359 case ETHER_STAT_LINK_DUPLEX: 5360 *val = LINK_DUPLEX_FULL; 5361 break; 5362 5363 default: 5364 return (ENOTSUP); 5365 } 5366 5367 return (0); 5368 } 5369 5370 static mac_callbacks_t myri10ge_m_callbacks = { 5371 (MC_IOCTL | MC_GETCAPAB), 5372 myri10ge_m_stat, 5373 myri10ge_m_start, 5374 myri10ge_m_stop, 5375 myri10ge_m_promisc, 5376 myri10ge_m_multicst, 5377 NULL, 5378 NULL, 5379 NULL, 5380 myri10ge_m_ioctl, 5381 myri10ge_m_getcapab 5382 }; 5383 5384 5385 static int 5386 myri10ge_probe_slices(struct myri10ge_priv *mgp) 5387 { 5388 myri10ge_cmd_t cmd; 5389 int status; 5390 5391 mgp->num_slices = 1; 5392 5393 /* hit the board with a reset to ensure it is alive */ 5394 (void) memset(&cmd, 0, sizeof (cmd)); 5395 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 5396 if (status != 0) { 5397 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 5398 return (ENXIO); 5399 } 5400 5401 if (myri10ge_use_msix == 0) 5402 return (0); 5403 5404 /* tell it the size of the interrupt queues */ 5405 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot); 5406 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 5407 if (status != 0) { 5408 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n", 5409 mgp->name); 5410 return (ENXIO); 5411 } 5412 5413 /* ask the maximum number of slices it supports */ 5414 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 5415 &cmd); 5416 if (status != 0) 5417 return (0); 5418 5419 mgp->num_slices = cmd.data0; 5420 5421 /* 5422 * if the admin did not specify a limit to how many 5423 * slices we should use, cap it automatically to the 5424 * number of CPUs currently online 5425 */ 5426 if (myri10ge_max_slices == -1) 5427 myri10ge_max_slices = ncpus; 5428 5429 if (mgp->num_slices > myri10ge_max_slices) 5430 mgp->num_slices = myri10ge_max_slices; 5431 5432 5433 /* 5434 * Now try to allocate as many MSI-X vectors as we have 5435 * slices. We give up on MSI-X if we can only get a single 5436 * vector. 5437 */ 5438 while (mgp->num_slices > 1) { 5439 /* make sure it is a power of two */ 5440 while (mgp->num_slices & (mgp->num_slices - 1)) 5441 mgp->num_slices--; 5442 if (mgp->num_slices == 1) 5443 return (0); 5444 5445 status = myri10ge_add_intrs(mgp, 0); 5446 if (status == 0) { 5447 myri10ge_rem_intrs(mgp, 0); 5448 if (mgp->intr_cnt == mgp->num_slices) { 5449 if (myri10ge_verbose) 5450 printf("Got %d slices!\n", 5451 mgp->num_slices); 5452 return (0); 5453 } 5454 mgp->num_slices = mgp->intr_cnt; 5455 } else { 5456 mgp->num_slices = mgp->num_slices / 2; 5457 } 5458 } 5459 5460 if (myri10ge_verbose) 5461 printf("Got %d slices\n", mgp->num_slices); 5462 return (0); 5463 } 5464 5465 static void 5466 myri10ge_lro_free(struct myri10ge_slice_state *ss) 5467 { 5468 struct lro_entry *lro; 5469 5470 while (ss->lro_free != NULL) { 5471 lro = ss->lro_free; 5472 ss->lro_free = lro->next; 5473 kmem_free(lro, sizeof (*lro)); 5474 } 5475 } 5476 5477 static void 5478 myri10ge_lro_alloc(struct myri10ge_slice_state *ss) 5479 { 5480 struct lro_entry *lro; 5481 int idx; 5482 5483 ss->lro_free = NULL; 5484 ss->lro_active = NULL; 5485 5486 for (idx = 0; idx < myri10ge_lro_cnt; idx++) { 5487 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP); 5488 if (lro == NULL) 5489 continue; 5490 lro->next = ss->lro_free; 5491 ss->lro_free = lro; 5492 } 5493 } 5494 5495 static void 5496 myri10ge_free_slices(struct myri10ge_priv *mgp) 5497 { 5498 struct myri10ge_slice_state *ss; 5499 size_t bytes; 5500 int i; 5501 5502 if (mgp->ss == NULL) 5503 return; 5504 5505 for (i = 0; i < mgp->num_slices; i++) { 5506 ss = &mgp->ss[i]; 5507 if (ss->rx_done.entry == NULL) 5508 continue; 5509 myri10ge_dma_free(&ss->rx_done.dma); 5510 ss->rx_done.entry = NULL; 5511 if (ss->fw_stats == NULL) 5512 continue; 5513 myri10ge_dma_free(&ss->fw_stats_dma); 5514 ss->fw_stats = NULL; 5515 mutex_destroy(&ss->rx_lock); 5516 mutex_destroy(&ss->tx.lock); 5517 mutex_destroy(&ss->tx.handle_lock); 5518 mutex_destroy(&ss->poll_lock); 5519 myri10ge_jpool_fini(ss); 5520 myri10ge_slice_stat_destroy(ss); 5521 myri10ge_lro_free(ss); 5522 } 5523 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5524 kmem_free(mgp->ss, bytes); 5525 mgp->ss = NULL; 5526 } 5527 5528 5529 static int 5530 myri10ge_alloc_slices(struct myri10ge_priv *mgp) 5531 { 5532 struct myri10ge_slice_state *ss; 5533 size_t bytes; 5534 int i; 5535 5536 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5537 mgp->ss = kmem_zalloc(bytes, KM_SLEEP); 5538 if (mgp->ss == NULL) 5539 return (ENOMEM); 5540 for (i = 0; i < mgp->num_slices; i++) { 5541 ss = &mgp->ss[i]; 5542 5543 ss->mgp = mgp; 5544 5545 /* allocate the per-slice firmware stats */ 5546 bytes = sizeof (*ss->fw_stats); 5547 ss->fw_stats = (mcp_irq_data_t *)(void *) 5548 myri10ge_dma_alloc(mgp->dip, bytes, 5549 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5550 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5551 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT); 5552 if (ss->fw_stats == NULL) 5553 goto abort; 5554 (void) memset(ss->fw_stats, 0, bytes); 5555 5556 /* allocate rx done ring */ 5557 bytes = mgp->max_intr_slots * 5558 sizeof (*ss->rx_done.entry); 5559 ss->rx_done.entry = (mcp_slot_t *)(void *) 5560 myri10ge_dma_alloc(mgp->dip, bytes, 5561 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5562 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5563 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT); 5564 if (ss->rx_done.entry == NULL) { 5565 goto abort; 5566 } 5567 (void) memset(ss->rx_done.entry, 0, bytes); 5568 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie); 5569 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL); 5570 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL); 5571 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL); 5572 myri10ge_jpool_init(ss); 5573 (void) myri10ge_slice_stat_init(ss); 5574 myri10ge_lro_alloc(ss); 5575 } 5576 5577 return (0); 5578 5579 abort: 5580 myri10ge_free_slices(mgp); 5581 return (ENOMEM); 5582 } 5583 5584 static int 5585 myri10ge_save_msi_state(struct myri10ge_priv *mgp, 5586 ddi_acc_handle_t handle) 5587 { 5588 uint8_t ptr; 5589 int err; 5590 5591 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5592 if (err != 0) { 5593 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5594 mgp->name); 5595 return (DDI_FAILURE); 5596 } 5597 mgp->pci_saved_state.msi_ctrl = 5598 pci_config_get16(handle, ptr + PCI_MSI_CTRL); 5599 mgp->pci_saved_state.msi_addr_low = 5600 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET); 5601 mgp->pci_saved_state.msi_addr_high = 5602 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4); 5603 mgp->pci_saved_state.msi_data_32 = 5604 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA); 5605 mgp->pci_saved_state.msi_data_64 = 5606 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA); 5607 return (DDI_SUCCESS); 5608 } 5609 5610 static int 5611 myri10ge_restore_msi_state(struct myri10ge_priv *mgp, 5612 ddi_acc_handle_t handle) 5613 { 5614 uint8_t ptr; 5615 int err; 5616 5617 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5618 if (err != 0) { 5619 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5620 mgp->name); 5621 return (DDI_FAILURE); 5622 } 5623 5624 pci_config_put16(handle, ptr + PCI_MSI_CTRL, 5625 mgp->pci_saved_state.msi_ctrl); 5626 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET, 5627 mgp->pci_saved_state.msi_addr_low); 5628 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4, 5629 mgp->pci_saved_state.msi_addr_high); 5630 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA, 5631 mgp->pci_saved_state.msi_data_32); 5632 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA, 5633 mgp->pci_saved_state.msi_data_64); 5634 5635 return (DDI_SUCCESS); 5636 } 5637 5638 static int 5639 myri10ge_save_pci_state(struct myri10ge_priv *mgp) 5640 { 5641 ddi_acc_handle_t handle = mgp->cfg_hdl; 5642 int i; 5643 int err = DDI_SUCCESS; 5644 5645 5646 /* Save the non-extended PCI config space 32-bits at a time */ 5647 for (i = 0; i < 16; i++) 5648 mgp->pci_saved_state.base[i] = 5649 pci_config_get32(handle, i*4); 5650 5651 /* now save MSI interrupt state *, if needed */ 5652 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5653 err = myri10ge_save_msi_state(mgp, handle); 5654 5655 return (err); 5656 } 5657 5658 static int 5659 myri10ge_restore_pci_state(struct myri10ge_priv *mgp) 5660 { 5661 ddi_acc_handle_t handle = mgp->cfg_hdl; 5662 int i; 5663 int err = DDI_SUCCESS; 5664 5665 5666 /* Restore the non-extended PCI config space 32-bits at a time */ 5667 for (i = 15; i >= 0; i--) 5668 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]); 5669 5670 /* now restore MSI interrupt state *, if needed */ 5671 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5672 err = myri10ge_restore_msi_state(mgp, handle); 5673 5674 if (mgp->max_read_request_4k) 5675 (void) myri10ge_set_max_readreq(handle); 5676 return (err); 5677 } 5678 5679 5680 static int 5681 myri10ge_suspend(dev_info_t *dip) 5682 { 5683 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5684 int status; 5685 5686 if (mgp == NULL) { 5687 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n"); 5688 return (DDI_FAILURE); 5689 } 5690 if (mgp->dip != dip) { 5691 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n"); 5692 return (DDI_FAILURE); 5693 } 5694 mutex_enter(&mgp->intrlock); 5695 if (mgp->running == MYRI10GE_ETH_RUNNING) { 5696 mgp->running = MYRI10GE_ETH_STOPPING; 5697 mutex_exit(&mgp->intrlock); 5698 (void) untimeout(mgp->timer_id); 5699 mutex_enter(&mgp->intrlock); 5700 myri10ge_stop_locked(mgp); 5701 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING; 5702 } 5703 status = myri10ge_save_pci_state(mgp); 5704 mutex_exit(&mgp->intrlock); 5705 return (status); 5706 } 5707 5708 static int 5709 myri10ge_resume(dev_info_t *dip) 5710 { 5711 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5712 int status = DDI_SUCCESS; 5713 5714 if (mgp == NULL) { 5715 cmn_err(CE_WARN, "null dip in myri10ge_resume\n"); 5716 return (DDI_FAILURE); 5717 } 5718 if (mgp->dip != dip) { 5719 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n"); 5720 return (DDI_FAILURE); 5721 } 5722 5723 mutex_enter(&mgp->intrlock); 5724 status = myri10ge_restore_pci_state(mgp); 5725 if (status == DDI_SUCCESS && 5726 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) { 5727 status = myri10ge_start_locked(mgp); 5728 } 5729 mutex_exit(&mgp->intrlock); 5730 if (status != DDI_SUCCESS) 5731 return (status); 5732 5733 /* start the watchdog timer */ 5734 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5735 mgp->timer_ticks); 5736 return (DDI_SUCCESS); 5737 } 5738 5739 static int 5740 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5741 { 5742 5743 struct myri10ge_priv *mgp; 5744 mac_register_t *macp, *omacp; 5745 ddi_acc_handle_t handle; 5746 uint32_t csr, hdr_offset; 5747 int status, span, link_width, max_read_request_4k; 5748 unsigned long bus_number, dev_number, func_number; 5749 size_t bytes; 5750 offset_t ss_offset; 5751 uint8_t vso; 5752 5753 if (cmd == DDI_RESUME) { 5754 return (myri10ge_resume(dip)); 5755 } 5756 5757 if (cmd != DDI_ATTACH) 5758 return (DDI_FAILURE); 5759 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) 5760 return (DDI_FAILURE); 5761 5762 /* enable busmater and io space access */ 5763 csr = pci_config_get32(handle, PCI_CONF_COMM); 5764 pci_config_put32(handle, PCI_CONF_COMM, 5765 (csr |PCI_COMM_ME|PCI_COMM_MAE)); 5766 status = myri10ge_read_pcie_link_width(handle, &link_width); 5767 if (status != 0) { 5768 cmn_err(CE_WARN, "could not read link width!\n"); 5769 link_width = 0; 5770 } 5771 max_read_request_4k = !myri10ge_set_max_readreq(handle); 5772 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS); 5773 if (status != 0) 5774 goto abort_with_cfg_hdl; 5775 if ((omacp = mac_alloc(MAC_VERSION)) == NULL) 5776 goto abort_with_cfg_hdl; 5777 /* 5778 * XXXX Hack: mac_register_t grows in newer kernels. To be 5779 * able to write newer fields, such as m_margin, without 5780 * writing outside allocated memory, we allocate our own macp 5781 * and pass that to mac_register() 5782 */ 5783 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP); 5784 macp->m_version = omacp->m_version; 5785 5786 if ((mgp = (struct myri10ge_priv *) 5787 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) { 5788 goto abort_with_macinfo; 5789 } 5790 ddi_set_driver_private(dip, mgp); 5791 5792 /* setup device name for log messages */ 5793 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip)); 5794 5795 mutex_enter(&myri10ge_param_lock); 5796 myri10ge_get_props(dip); 5797 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 5798 mgp->pause = myri10ge_flow_control; 5799 mutex_exit(&myri10ge_param_lock); 5800 5801 mgp->max_read_request_4k = max_read_request_4k; 5802 mgp->pcie_link_width = link_width; 5803 mgp->running = MYRI10GE_ETH_STOPPED; 5804 mgp->vso = vso; 5805 mgp->dip = dip; 5806 mgp->cfg_hdl = handle; 5807 5808 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */ 5809 myri10ge_test_physical(dip); 5810 5811 /* allocate command page */ 5812 bytes = sizeof (*mgp->cmd); 5813 mgp->cmd = (mcp_cmd_response_t *) 5814 (void *)myri10ge_dma_alloc(dip, bytes, 5815 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5816 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT, 5817 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT); 5818 if (mgp->cmd == NULL) 5819 goto abort_with_mgp; 5820 5821 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number, 5822 &dev_number, &func_number); 5823 if (myri10ge_verbose) 5824 printf("%s at %ld:%ld:%ld attaching\n", mgp->name, 5825 bus_number, dev_number, func_number); 5826 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram, 5827 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr, 5828 &mgp->io_handle); 5829 if (status != DDI_SUCCESS) { 5830 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name); 5831 printf("%s: reg_set = %d, span = %d, status = %d", 5832 mgp->name, mgp->reg_set, span, status); 5833 goto abort_with_mgp; 5834 } 5835 5836 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET); 5837 hdr_offset = ntohl(hdr_offset) & 0xffffc; 5838 ss_offset = hdr_offset + 5839 offsetof(struct mcp_gen_header, string_specs); 5840 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset)); 5841 myri10ge_pio_copy32(mgp->eeprom_strings, 5842 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size), 5843 MYRI10GE_EEPROM_STRINGS_SIZE); 5844 (void) memset(mgp->eeprom_strings + 5845 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 5846 5847 status = myri10ge_read_mac_addr(mgp); 5848 if (status) { 5849 goto abort_with_mapped; 5850 } 5851 5852 status = myri10ge_select_firmware(mgp); 5853 if (status != 0) { 5854 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name); 5855 goto abort_with_mapped; 5856 } 5857 5858 status = myri10ge_probe_slices(mgp); 5859 if (status != 0) { 5860 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name); 5861 goto abort_with_dummy_rdma; 5862 } 5863 5864 status = myri10ge_alloc_slices(mgp); 5865 if (status != 0) { 5866 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name); 5867 goto abort_with_dummy_rdma; 5868 } 5869 5870 /* add the interrupt handler */ 5871 status = myri10ge_add_intrs(mgp, 1); 5872 if (status != 0) { 5873 cmn_err(CE_WARN, "%s: Failed to add interrupt\n", 5874 mgp->name); 5875 goto abort_with_slices; 5876 } 5877 5878 /* now that we have an iblock_cookie, init the mutexes */ 5879 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie); 5880 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie); 5881 5882 5883 status = myri10ge_nic_stat_init(mgp); 5884 if (status != DDI_SUCCESS) 5885 goto abort_with_interrupts; 5886 status = myri10ge_info_init(mgp); 5887 if (status != DDI_SUCCESS) 5888 goto abort_with_stats; 5889 5890 /* 5891 * Initialize GLD state 5892 */ 5893 5894 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 5895 macp->m_driver = mgp; 5896 macp->m_dip = dip; 5897 macp->m_src_addr = mgp->mac_addr; 5898 macp->m_callbacks = &myri10ge_m_callbacks; 5899 macp->m_min_sdu = 0; 5900 macp->m_max_sdu = myri10ge_mtu - 5901 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ); 5902 #ifdef SOLARIS_S11 5903 macp->m_margin = VLAN_TAGSZ; 5904 #endif 5905 macp->m_v12n = MAC_VIRT_LEVEL1; 5906 status = mac_register(macp, &mgp->mh); 5907 if (status != 0) { 5908 cmn_err(CE_WARN, "%s: mac_register failed with %d\n", 5909 mgp->name, status); 5910 goto abort_with_info; 5911 } 5912 myri10ge_ndd_init(mgp); 5913 if (myri10ge_verbose) 5914 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name, 5915 mgp->intr_type, mgp->tx_boundary, mgp->fw_name); 5916 mutex_enter(&myri10ge_param_lock); 5917 mgp->next = mgp_list; 5918 mgp_list = mgp; 5919 mutex_exit(&myri10ge_param_lock); 5920 kmem_free(macp, sizeof (*macp) * 8); 5921 mac_free(omacp); 5922 return (DDI_SUCCESS); 5923 5924 abort_with_info: 5925 myri10ge_info_destroy(mgp); 5926 5927 abort_with_stats: 5928 myri10ge_nic_stat_destroy(mgp); 5929 5930 abort_with_interrupts: 5931 mutex_destroy(&mgp->cmd_lock); 5932 mutex_destroy(&mgp->intrlock); 5933 myri10ge_rem_intrs(mgp, 1); 5934 5935 abort_with_slices: 5936 myri10ge_free_slices(mgp); 5937 5938 abort_with_dummy_rdma: 5939 myri10ge_dummy_rdma(mgp, 0); 5940 5941 abort_with_mapped: 5942 ddi_regs_map_free(&mgp->io_handle); 5943 5944 myri10ge_dma_free(&mgp->cmd_dma); 5945 5946 abort_with_mgp: 5947 kmem_free(mgp, sizeof (*mgp)); 5948 5949 abort_with_macinfo: 5950 kmem_free(macp, sizeof (*macp) * 8); 5951 mac_free(omacp); 5952 5953 abort_with_cfg_hdl: 5954 pci_config_teardown(&handle); 5955 return (DDI_FAILURE); 5956 5957 } 5958 5959 5960 static int 5961 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5962 { 5963 struct myri10ge_priv *mgp, *tmp; 5964 int status, i, jbufs_alloced; 5965 5966 if (cmd == DDI_SUSPEND) { 5967 status = myri10ge_suspend(dip); 5968 return (status); 5969 } 5970 5971 if (cmd != DDI_DETACH) { 5972 return (DDI_FAILURE); 5973 } 5974 /* Get the driver private (gld_mac_info_t) structure */ 5975 mgp = ddi_get_driver_private(dip); 5976 5977 mutex_enter(&mgp->intrlock); 5978 jbufs_alloced = 0; 5979 for (i = 0; i < mgp->num_slices; i++) { 5980 myri10ge_remove_jbufs(&mgp->ss[i]); 5981 jbufs_alloced += mgp->ss[i].jpool.num_alloc; 5982 } 5983 mutex_exit(&mgp->intrlock); 5984 if (jbufs_alloced != 0) { 5985 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n", 5986 mgp->name, jbufs_alloced); 5987 return (DDI_FAILURE); 5988 } 5989 5990 mutex_enter(&myri10ge_param_lock); 5991 if (mgp->refcnt != 0) { 5992 mutex_exit(&myri10ge_param_lock); 5993 cmn_err(CE_NOTE, "%s: %d external refs remain\n", 5994 mgp->name, mgp->refcnt); 5995 return (DDI_FAILURE); 5996 } 5997 mutex_exit(&myri10ge_param_lock); 5998 5999 status = mac_unregister(mgp->mh); 6000 if (status != DDI_SUCCESS) 6001 return (status); 6002 6003 myri10ge_ndd_fini(mgp); 6004 myri10ge_dummy_rdma(mgp, 0); 6005 myri10ge_nic_stat_destroy(mgp); 6006 myri10ge_info_destroy(mgp); 6007 6008 mutex_destroy(&mgp->cmd_lock); 6009 mutex_destroy(&mgp->intrlock); 6010 6011 myri10ge_rem_intrs(mgp, 1); 6012 6013 myri10ge_free_slices(mgp); 6014 ddi_regs_map_free(&mgp->io_handle); 6015 myri10ge_dma_free(&mgp->cmd_dma); 6016 pci_config_teardown(&mgp->cfg_hdl); 6017 6018 mutex_enter(&myri10ge_param_lock); 6019 if (mgp_list == mgp) { 6020 mgp_list = mgp->next; 6021 } else { 6022 tmp = mgp_list; 6023 while (tmp->next != mgp && tmp->next != NULL) 6024 tmp = tmp->next; 6025 if (tmp->next != NULL) 6026 tmp->next = tmp->next->next; 6027 } 6028 kmem_free(mgp, sizeof (*mgp)); 6029 mutex_exit(&myri10ge_param_lock); 6030 return (DDI_SUCCESS); 6031 } 6032 6033 /* 6034 * Helper for quiesce entry point: Interrupt threads are not being 6035 * scheduled, so we must poll for the confirmation DMA to arrive in 6036 * the firmware stats block for slice 0. We're essentially running 6037 * the guts of the interrupt handler, and just cherry picking the 6038 * confirmation that the NIC is queuesced (stats->link_down) 6039 */ 6040 6041 static int 6042 myri10ge_poll_down(struct myri10ge_priv *mgp) 6043 { 6044 struct myri10ge_slice_state *ss = mgp->ss; 6045 mcp_irq_data_t *stats = ss->fw_stats; 6046 int valid; 6047 int found_down = 0; 6048 6049 6050 /* check for a pending IRQ */ 6051 6052 if (! *((volatile uint8_t *)& stats->valid)) 6053 return (0); 6054 valid = stats->valid; 6055 6056 /* 6057 * Make sure to tell the NIC to lower a legacy IRQ, else 6058 * it may have corrupt state after restarting 6059 */ 6060 6061 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 6062 /* lower legacy IRQ */ 6063 *mgp->irq_deassert = 0; 6064 mb(); 6065 /* wait for irq conf DMA */ 6066 while (*((volatile uint8_t *)& stats->valid)) 6067 ; 6068 } 6069 if (stats->stats_updated && stats->link_down) 6070 found_down = 1; 6071 6072 if (valid & 0x1) 6073 *ss->irq_claim = BE_32(3); 6074 *(ss->irq_claim + 1) = BE_32(3); 6075 6076 return (found_down); 6077 } 6078 6079 static int 6080 myri10ge_quiesce(dev_info_t *dip) 6081 { 6082 struct myri10ge_priv *mgp; 6083 myri10ge_cmd_t cmd; 6084 int status, down, i; 6085 6086 mgp = ddi_get_driver_private(dip); 6087 if (mgp == NULL) 6088 return (DDI_FAILURE); 6089 6090 /* if devices was unplumbed, it is guaranteed to be quiescent */ 6091 if (mgp->running == MYRI10GE_ETH_STOPPED) 6092 return (DDI_SUCCESS); 6093 6094 /* send a down CMD to queuesce NIC */ 6095 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 6096 if (status) { 6097 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 6098 return (DDI_FAILURE); 6099 } 6100 6101 for (i = 0; i < 20; i++) { 6102 down = myri10ge_poll_down(mgp); 6103 if (down) 6104 break; 6105 delay(drv_usectohz(100000)); 6106 mb(); 6107 } 6108 if (down) 6109 return (DDI_SUCCESS); 6110 return (DDI_FAILURE); 6111 } 6112 6113 /* 6114 * Distinguish between allocb'ed blocks, and gesballoc'ed attached 6115 * storage. 6116 */ 6117 static void 6118 myri10ge_find_lastfree(void) 6119 { 6120 mblk_t *mp = allocb(1024, 0); 6121 dblk_t *dbp; 6122 6123 if (mp == NULL) { 6124 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n"); 6125 return; 6126 } 6127 dbp = mp->b_datap; 6128 myri10ge_db_lastfree = (void *)dbp->db_lastfree; 6129 } 6130 6131 int 6132 _init(void) 6133 { 6134 int i; 6135 6136 if (myri10ge_verbose) 6137 cmn_err(CE_NOTE, 6138 "Myricom 10G driver (10GbE) version %s loading\n", 6139 MYRI10GE_VERSION_STR); 6140 myri10ge_find_lastfree(); 6141 mac_init_ops(&myri10ge_ops, "myri10ge"); 6142 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL); 6143 if ((i = mod_install(&modlinkage)) != 0) { 6144 cmn_err(CE_WARN, "mod_install returned %d\n", i); 6145 mac_fini_ops(&myri10ge_ops); 6146 mutex_destroy(&myri10ge_param_lock); 6147 } 6148 return (i); 6149 } 6150 6151 int 6152 _fini(void) 6153 { 6154 int i; 6155 i = mod_remove(&modlinkage); 6156 if (i != 0) { 6157 return (i); 6158 } 6159 mac_fini_ops(&myri10ge_ops); 6160 mutex_destroy(&myri10ge_param_lock); 6161 return (0); 6162 } 6163 6164 int 6165 _info(struct modinfo *modinfop) 6166 { 6167 return (mod_info(&modlinkage, modinfop)); 6168 } 6169 6170 6171 /* 6172 * This file uses MyriGE driver indentation. 6173 * 6174 * Local Variables: 6175 * c-file-style:"sun" 6176 * tab-width:8 6177 * End: 6178 */ 6179