1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 #ifndef lint 33 static const char __idstring[] = 34 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $"; 35 #endif 36 37 #define MXGEFW_NDIS 38 #include "myri10ge_var.h" 39 #include "rss_eth_z8e.h" 40 #include "rss_ethp_z8e.h" 41 #include "mcp_gen_header.h" 42 43 #define MYRI10GE_MAX_ETHER_MTU 9014 44 45 #define MYRI10GE_ETH_STOPPED 0 46 #define MYRI10GE_ETH_STOPPING 1 47 #define MYRI10GE_ETH_STARTING 2 48 #define MYRI10GE_ETH_RUNNING 3 49 #define MYRI10GE_ETH_OPEN_FAILED 4 50 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5 51 52 static int myri10ge_small_bytes = 510; 53 static int myri10ge_intr_coal_delay = 125; 54 static int myri10ge_flow_control = 1; 55 #if #cpu(i386) || defined __i386 || defined i386 || \ 56 defined __i386__ || #cpu(x86_64) || defined __x86_64__ 57 static int myri10ge_nvidia_ecrc_enable = 1; 58 #endif 59 static int myri10ge_mtu_override = 0; 60 static int myri10ge_tx_copylen = 512; 61 static int myri10ge_deassert_wait = 1; 62 static int myri10ge_verbose = 0; 63 static int myri10ge_watchdog_reset = 0; 64 static int myri10ge_use_msix = 1; 65 static int myri10ge_max_slices = -1; 66 static int myri10ge_use_msi = 1; 67 int myri10ge_force_firmware = 0; 68 static boolean_t myri10ge_use_lso = B_TRUE; 69 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 70 static int myri10ge_tx_hash = 1; 71 static int myri10ge_lro = 0; 72 static int myri10ge_lro_cnt = 8; 73 int myri10ge_lro_max_aggr = 2; 74 static int myri10ge_lso_copy = 0; 75 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp); 76 int myri10ge_tx_handles_initial = 128; 77 78 static kmutex_t myri10ge_param_lock; 79 static void* myri10ge_db_lastfree; 80 81 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 82 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 83 static int myri10ge_quiesce(dev_info_t *dip); 84 85 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach, 86 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce); 87 88 89 static struct modldrv modldrv = { 90 &mod_driverops, 91 "Myricom 10G driver (10GbE)", 92 &myri10ge_ops, 93 }; 94 95 96 static struct modlinkage modlinkage = { 97 MODREV_1, 98 {&modldrv, NULL}, 99 }; 100 101 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 102 103 static ddi_dma_attr_t myri10ge_misc_dma_attr = { 104 DMA_ATTR_V0, /* version number. */ 105 (uint64_t)0, /* low address */ 106 (uint64_t)0xffffffffffffffffULL, /* high address */ 107 (uint64_t)0x7ffffff, /* address counter max */ 108 (uint64_t)4096, /* alignment */ 109 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 110 (uint32_t)0x1, /* minimum transfer size */ 111 (uint64_t)0x7fffffff, /* maximum transfer size */ 112 (uint64_t)0x7fffffff, /* maximum segment size */ 113 1, /* scatter/gather list length */ 114 1, /* granularity */ 115 0 /* attribute flags */ 116 }; 117 118 /* 119 * The Myri10GE NIC has the following constraints on receive buffers: 120 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB 121 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary 122 */ 123 124 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = { 125 DMA_ATTR_V0, /* version number. */ 126 (uint64_t)0, /* low address */ 127 (uint64_t)0xffffffffffffffffULL, /* high address */ 128 (uint64_t)0x7ffffff, /* address counter max */ 129 (uint64_t)4096, /* alignment */ 130 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 131 (uint32_t)0x1, /* minimum transfer size */ 132 (uint64_t)0x7fffffff, /* maximum transfer size */ 133 UINT64_MAX, /* maximum segment size */ 134 1, /* scatter/gather list length */ 135 1, /* granularity */ 136 0 /* attribute flags */ 137 }; 138 139 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = { 140 DMA_ATTR_V0, /* version number. */ 141 (uint64_t)0, /* low address */ 142 (uint64_t)0xffffffffffffffffULL, /* high address */ 143 (uint64_t)0x7ffffff, /* address counter max */ 144 #if defined sparc64 || defined __sparcv9 145 (uint64_t)4096, /* alignment */ 146 #else 147 (uint64_t)0x80, /* alignment */ 148 #endif 149 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 150 (uint32_t)0x1, /* minimum transfer size */ 151 (uint64_t)0x7fffffff, /* maximum transfer size */ 152 #if defined sparc64 || defined __sparcv9 153 UINT64_MAX, /* maximum segment size */ 154 #else 155 (uint64_t)0xfff, /* maximum segment size */ 156 #endif 157 1, /* scatter/gather list length */ 158 1, /* granularity */ 159 0 /* attribute flags */ 160 }; 161 162 static ddi_dma_attr_t myri10ge_tx_dma_attr = { 163 DMA_ATTR_V0, /* version number. */ 164 (uint64_t)0, /* low address */ 165 (uint64_t)0xffffffffffffffffULL, /* high address */ 166 (uint64_t)0x7ffffff, /* address counter max */ 167 (uint64_t)1, /* alignment */ 168 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 169 (uint32_t)0x1, /* minimum transfer size */ 170 (uint64_t)0x7fffffff, /* maximum transfer size */ 171 UINT64_MAX, /* maximum segment size */ 172 INT32_MAX, /* scatter/gather list length */ 173 1, /* granularity */ 174 0 /* attribute flags */ 175 }; 176 177 #if defined sparc64 || defined __sparcv9 178 #define WC 0 179 #else 180 #define WC 1 181 #endif 182 183 struct ddi_device_acc_attr myri10ge_dev_access_attr = { 184 DDI_DEVICE_ATTR_V0, /* version */ 185 DDI_NEVERSWAP_ACC, /* endian flash */ 186 #if WC 187 DDI_MERGING_OK_ACC /* data order */ 188 #else 189 DDI_STRICTORDER_ACC 190 #endif 191 }; 192 193 static void myri10ge_watchdog(void *arg); 194 195 #ifdef MYRICOM_PRIV 196 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ; 197 #else 198 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ; 199 #endif 200 int myri10ge_bigbufs_initial = 1024; 201 int myri10ge_bigbufs_max = 4096; 202 203 204 caddr_t 205 myri10ge_dma_alloc(dev_info_t *dip, size_t len, 206 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr, 207 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma, 208 int warn, int (*wait)(caddr_t)) 209 { 210 caddr_t kaddr; 211 size_t real_length; 212 ddi_dma_cookie_t cookie; 213 uint_t count; 214 int err; 215 216 err = ddi_dma_alloc_handle(dip, attr, wait, 217 NULL, &dma->handle); 218 if (err != DDI_SUCCESS) { 219 if (warn) 220 cmn_err(CE_WARN, 221 "myri10ge: ddi_dma_alloc_handle failed\n"); 222 goto abort_with_nothing; 223 } 224 225 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags, 226 wait, NULL, &kaddr, &real_length, 227 &dma->acc_handle); 228 if (err != DDI_SUCCESS) { 229 if (warn) 230 cmn_err(CE_WARN, 231 "myri10ge: ddi_dma_mem_alloc failed\n"); 232 goto abort_with_handle; 233 } 234 235 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len, 236 bind_flags, wait, NULL, &cookie, &count); 237 238 if (err != DDI_SUCCESS) { 239 if (warn) 240 cmn_err(CE_WARN, 241 "myri10ge: ddi_dma_addr_bind_handle failed\n"); 242 goto abort_with_mem; 243 } 244 245 if (count != 1) { 246 if (warn) 247 cmn_err(CE_WARN, 248 "myri10ge: got too many dma segments "); 249 goto abort_with_bind; 250 } 251 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 252 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 253 return (kaddr); 254 255 abort_with_bind: 256 (void) ddi_dma_unbind_handle(dma->handle); 257 258 abort_with_mem: 259 ddi_dma_mem_free(&dma->acc_handle); 260 261 abort_with_handle: 262 ddi_dma_free_handle(&dma->handle); 263 abort_with_nothing: 264 if (warn) { 265 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n "); 266 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n", 267 (void*) dip, len, (void*) attr); 268 cmn_err(CE_WARN, 269 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n", 270 (void*) accattr, alloc_flags); 271 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p", 272 bind_flags, (void*) dma); 273 } 274 return (NULL); 275 276 } 277 278 void 279 myri10ge_dma_free(struct myri10ge_dma_stuff *dma) 280 { 281 (void) ddi_dma_unbind_handle(dma->handle); 282 ddi_dma_mem_free(&dma->acc_handle); 283 ddi_dma_free_handle(&dma->handle); 284 } 285 286 static inline void 287 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size) 288 { 289 register volatile uint32_t *to32; 290 size_t i; 291 292 to32 = (volatile uint32_t *) to; 293 for (i = (size / 4); i; i--) { 294 *to32 = *from32; 295 to32++; 296 from32++; 297 } 298 } 299 300 #if defined(_LP64) 301 static inline void 302 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size) 303 { 304 register volatile uint64_t *to64; 305 size_t i; 306 307 to64 = (volatile uint64_t *) to; 308 for (i = (size / 8); i; i--) { 309 *to64 = *from64; 310 to64++; 311 from64++; 312 } 313 } 314 #endif 315 316 /* 317 * This routine copies memory from the host to the NIC. 318 * The "size" argument must always be a multiple of 319 * the size of long (4 or 8 bytes), and to/from must also 320 * be naturally aligned. 321 */ 322 static inline void 323 myri10ge_pio_copy(void *to, void *from, size_t size) 324 { 325 #if !defined(_LP64) 326 ASSERT((size % 4) == 0); 327 myri10ge_pio_copy32(to, (uint32_t *)from, size); 328 #else 329 ASSERT((size % 8) == 0); 330 myri10ge_pio_copy64(to, (uint64_t *)from, size); 331 #endif 332 } 333 334 335 /* 336 * Due to various bugs in Solaris (especially bug 6186772 where the 337 * TCP/UDP checksum is calculated incorrectly on mblk chains with more 338 * than two elements), and the design bug where hardware checksums are 339 * ignored on mblk chains with more than 2 elements, we need to 340 * allocate private pool of physically contiguous receive buffers. 341 */ 342 343 static void 344 myri10ge_jpool_init(struct myri10ge_slice_state *ss) 345 { 346 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 347 348 bzero(jpool, sizeof (*jpool)); 349 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER, 350 ss->mgp->icookie); 351 jpool->head = NULL; 352 } 353 354 static void 355 myri10ge_jpool_fini(struct myri10ge_slice_state *ss) 356 { 357 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 358 359 if (jpool->head != NULL) { 360 cmn_err(CE_WARN, 361 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n", 362 ss->mgp->name); 363 } 364 mutex_destroy(&jpool->mtx); 365 } 366 367 368 /* 369 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 370 * at most 32 bytes at a time, so as to avoid involving the software 371 * pio handler in the nic. We re-write the first segment's low 372 * DMA address to mark it valid only after we write the entire chunk 373 * in a burst 374 */ 375 static inline void 376 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) 377 { 378 src->addr_low |= BE_32(1); 379 myri10ge_pio_copy(dst, src, 4 * sizeof (*src)); 380 mb(); 381 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 382 mb(); 383 src->addr_low &= ~(BE_32(1)); 384 dst->addr_low = src->addr_low; 385 mb(); 386 } 387 388 static void 389 myri10ge_pull_jpool(struct myri10ge_slice_state *ss) 390 { 391 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 392 struct myri10ge_jpool_entry *jtail, *j, *jfree; 393 volatile uintptr_t *putp; 394 uintptr_t put; 395 int i; 396 397 /* find tail */ 398 jtail = NULL; 399 if (jpool->head != NULL) { 400 j = jpool->head; 401 while (j->next != NULL) 402 j = j->next; 403 jtail = j; 404 } 405 406 /* 407 * iterate over all per-CPU caches, and add contents into 408 * jpool 409 */ 410 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) { 411 /* take per-CPU free list */ 412 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head; 413 if (*putp == NULL) 414 continue; 415 put = atomic_swap_ulong(putp, 0); 416 jfree = (struct myri10ge_jpool_entry *)put; 417 418 /* append to pool */ 419 if (jtail == NULL) { 420 jpool->head = jfree; 421 } else { 422 jtail->next = jfree; 423 } 424 j = jfree; 425 while (j->next != NULL) 426 j = j->next; 427 jtail = j; 428 } 429 } 430 431 /* 432 * Transfers buffers from the free pool to the nic 433 * Must be called holding the jpool mutex. 434 */ 435 436 static inline void 437 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss) 438 { 439 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 440 struct myri10ge_jpool_entry *j; 441 myri10ge_rx_ring_t *rx; 442 int i, idx, limit; 443 444 rx = &ss->rx_big; 445 limit = ss->j_rx_cnt + (rx->mask + 1); 446 447 for (i = rx->cnt; i != limit; i++) { 448 idx = i & (rx->mask); 449 j = jpool->head; 450 if (j == NULL) { 451 myri10ge_pull_jpool(ss); 452 j = jpool->head; 453 if (j == NULL) { 454 break; 455 } 456 } 457 jpool->head = j->next; 458 rx->info[idx].j = j; 459 rx->shadow[idx].addr_low = j->dma.low; 460 rx->shadow[idx].addr_high = j->dma.high; 461 /* copy 4 descriptors (32-bytes) to the mcp at a time */ 462 if ((idx & 7) == 7) { 463 myri10ge_submit_8rx(&rx->lanai[idx - 7], 464 &rx->shadow[idx - 7]); 465 } 466 } 467 rx->cnt = i; 468 } 469 470 /* 471 * Transfer buffers from the nic to the free pool. 472 * Should be called holding the jpool mutex 473 */ 474 475 static inline void 476 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss) 477 { 478 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 479 struct myri10ge_jpool_entry *j; 480 myri10ge_rx_ring_t *rx; 481 int i; 482 483 mutex_enter(&jpool->mtx); 484 rx = &ss->rx_big; 485 486 for (i = 0; i < rx->mask + 1; i++) { 487 j = rx->info[i].j; 488 rx->info[i].j = NULL; 489 if (j == NULL) 490 continue; 491 j->next = jpool->head; 492 jpool->head = j; 493 } 494 mutex_exit(&jpool->mtx); 495 496 } 497 498 499 /* 500 * Free routine which is called when the mblk allocated via 501 * esballoc() is freed. Here we return the jumbo buffer 502 * to the free pool, and possibly pass some jumbo buffers 503 * to the nic 504 */ 505 506 static void 507 myri10ge_jfree_rtn(void *arg) 508 { 509 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg; 510 struct myri10ge_jpool_stuff *jpool; 511 volatile uintptr_t *putp; 512 uintptr_t old, new; 513 514 jpool = &j->ss->jpool; 515 516 /* prepend buffer locklessly to per-CPU freelist */ 517 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head; 518 new = (uintptr_t)j; 519 do { 520 old = *putp; 521 j->next = (void *)old; 522 } while (atomic_cas_ulong(putp, old, new) != old); 523 } 524 525 static void 526 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j) 527 { 528 (void) ddi_dma_unbind_handle(j->dma_handle); 529 ddi_dma_mem_free(&j->acc_handle); 530 ddi_dma_free_handle(&j->dma_handle); 531 kmem_free(j, sizeof (*j)); 532 } 533 534 535 /* 536 * Allocates one physically contiguous descriptor 537 * and add it to the jumbo buffer pool. 538 */ 539 540 static int 541 myri10ge_add_jbuf(struct myri10ge_slice_state *ss) 542 { 543 struct myri10ge_jpool_entry *j; 544 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 545 ddi_dma_attr_t *rx_dma_attr; 546 size_t real_length; 547 ddi_dma_cookie_t cookie; 548 uint_t count; 549 int err; 550 551 if (myri10ge_mtu < 2048) 552 rx_dma_attr = &myri10ge_rx_std_dma_attr; 553 else 554 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr; 555 556 again: 557 j = (struct myri10ge_jpool_entry *) 558 kmem_alloc(sizeof (*j), KM_SLEEP); 559 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr, 560 DDI_DMA_DONTWAIT, NULL, &j->dma_handle); 561 if (err != DDI_SUCCESS) 562 goto abort_with_j; 563 564 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu, 565 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 566 NULL, &j->buf, &real_length, &j->acc_handle); 567 if (err != DDI_SUCCESS) 568 goto abort_with_handle; 569 570 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf, 571 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 572 NULL, &cookie, &count); 573 if (err != DDI_SUCCESS) 574 goto abort_with_mem; 575 576 /* 577 * Make certain std MTU buffers do not cross a 4KB boundary: 578 * 579 * Setting dma_attr_align=4096 will do this, but the system 580 * will only allocate 1 RX buffer per 4KB page, rather than 2. 581 * Setting dma_attr_granular=4096 *seems* to work around this, 582 * but I'm paranoid about future systems no longer honoring 583 * this, so fall back to the safe, but memory wasting way if a 584 * buffer crosses a 4KB boundary. 585 */ 586 587 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 588 rx_dma_attr->dma_attr_align != 4096) { 589 uint32_t start, end; 590 591 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 592 end = start + myri10ge_mtu; 593 if (((end >> 12) != (start >> 12)) && (start & 4095U)) { 594 printf("std buffer crossed a 4KB boundary!\n"); 595 myri10ge_remove_jbuf(j); 596 rx_dma_attr->dma_attr_align = 4096; 597 rx_dma_attr->dma_attr_seg = UINT64_MAX; 598 goto again; 599 } 600 } 601 602 j->dma.low = 603 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 604 j->dma.high = 605 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 606 j->ss = ss; 607 608 609 j->free_func.free_func = myri10ge_jfree_rtn; 610 j->free_func.free_arg = (char *)j; 611 mutex_enter(&jpool->mtx); 612 j->next = jpool->head; 613 jpool->head = j; 614 jpool->num_alloc++; 615 mutex_exit(&jpool->mtx); 616 return (0); 617 618 abort_with_mem: 619 ddi_dma_mem_free(&j->acc_handle); 620 621 abort_with_handle: 622 ddi_dma_free_handle(&j->dma_handle); 623 624 abort_with_j: 625 kmem_free(j, sizeof (*j)); 626 627 /* 628 * If an allocation failed, perhaps it failed because it could 629 * not satisfy granularity requirement. Disable that, and 630 * try agin. 631 */ 632 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 633 rx_dma_attr->dma_attr_align != 4096) { 634 cmn_err(CE_NOTE, 635 "!alloc failed, reverting to gran=1\n"); 636 rx_dma_attr->dma_attr_align = 4096; 637 rx_dma_attr->dma_attr_seg = UINT64_MAX; 638 goto again; 639 } 640 return (err); 641 } 642 643 static int 644 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool) 645 { 646 int i; 647 struct myri10ge_jpool_entry *j; 648 649 mutex_enter(&jpool->mtx); 650 j = jpool->head; 651 i = 0; 652 while (j != NULL) { 653 i++; 654 j = j->next; 655 } 656 mutex_exit(&jpool->mtx); 657 return (i); 658 } 659 660 static int 661 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total) 662 { 663 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 664 int allocated = 0; 665 int err; 666 int needed; 667 668 /* 669 * if total is set, user wants "num" jbufs in the pool, 670 * otherwise the user wants to "num" additional jbufs 671 * added to the pool 672 */ 673 if (total && jpool->num_alloc) { 674 allocated = myri10ge_jfree_cnt(jpool); 675 needed = num - allocated; 676 } else { 677 needed = num; 678 } 679 680 while (needed > 0) { 681 needed--; 682 err = myri10ge_add_jbuf(ss); 683 if (err == 0) { 684 allocated++; 685 } 686 } 687 return (allocated); 688 } 689 690 static void 691 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss) 692 { 693 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 694 struct myri10ge_jpool_entry *j; 695 696 mutex_enter(&jpool->mtx); 697 myri10ge_pull_jpool(ss); 698 while (jpool->head != NULL) { 699 jpool->num_alloc--; 700 j = jpool->head; 701 jpool->head = j->next; 702 myri10ge_remove_jbuf(j); 703 } 704 mutex_exit(&jpool->mtx); 705 } 706 707 static void 708 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss) 709 { 710 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 711 struct myri10ge_jpool_entry *j = NULL; 712 caddr_t ptr; 713 uint32_t dma_low, dma_high; 714 int idx, len; 715 unsigned int alloc_size; 716 717 dma_low = dma_high = len = 0; 718 alloc_size = myri10ge_small_bytes + MXGEFW_PAD; 719 ptr = NULL; 720 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) { 721 /* Allocate a jumbo frame and carve it into small frames */ 722 if (len < alloc_size) { 723 mutex_enter(&jpool->mtx); 724 /* remove jumbo from freelist */ 725 j = jpool->head; 726 jpool->head = j->next; 727 /* place it onto small list */ 728 j->next = ss->small_jpool; 729 ss->small_jpool = j; 730 mutex_exit(&jpool->mtx); 731 len = myri10ge_mtu; 732 dma_low = ntohl(j->dma.low); 733 dma_high = ntohl(j->dma.high); 734 ptr = j->buf; 735 } 736 ss->rx_small.info[idx].ptr = ptr; 737 ss->rx_small.shadow[idx].addr_low = htonl(dma_low); 738 ss->rx_small.shadow[idx].addr_high = htonl(dma_high); 739 len -= alloc_size; 740 ptr += alloc_size; 741 dma_low += alloc_size; 742 } 743 } 744 745 /* 746 * Return the jumbo bufs we carved up for small to the jumbo pool 747 */ 748 749 static void 750 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss) 751 { 752 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 753 struct myri10ge_jpool_entry *j = NULL; 754 755 mutex_enter(&jpool->mtx); 756 while (ss->small_jpool != NULL) { 757 j = ss->small_jpool; 758 ss->small_jpool = j->next; 759 j->next = jpool->head; 760 jpool->head = j; 761 } 762 mutex_exit(&jpool->mtx); 763 ss->jbufs_for_smalls = 0; 764 } 765 766 static int 767 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss) 768 { 769 myri10ge_tx_ring_t *tx = &ss->tx; 770 struct myri10ge_priv *mgp = ss->mgp; 771 struct myri10ge_tx_dma_handle *handle; 772 int err; 773 774 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP); 775 err = ddi_dma_alloc_handle(mgp->dip, 776 &myri10ge_tx_dma_attr, 777 DDI_DMA_SLEEP, NULL, 778 &handle->h); 779 if (err) { 780 static int limit = 0; 781 if (limit == 0) 782 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n", 783 mgp->name); 784 limit++; 785 kmem_free(handle, sizeof (*handle)); 786 return (err); 787 } 788 mutex_enter(&tx->handle_lock); 789 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced); 790 handle->next = tx->free_tx_handles; 791 tx->free_tx_handles = handle; 792 mutex_exit(&tx->handle_lock); 793 return (DDI_SUCCESS); 794 } 795 796 static void 797 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss) 798 { 799 myri10ge_tx_ring_t *tx = &ss->tx; 800 struct myri10ge_tx_dma_handle *handle; 801 mutex_enter(&tx->handle_lock); 802 803 handle = tx->free_tx_handles; 804 while (handle != NULL) { 805 tx->free_tx_handles = handle->next; 806 ddi_dma_free_handle(&handle->h); 807 kmem_free(handle, sizeof (*handle)); 808 handle = tx->free_tx_handles; 809 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced); 810 } 811 mutex_exit(&tx->handle_lock); 812 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) { 813 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n", 814 ss->mgp->name, 815 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced)); 816 } 817 } 818 819 static void 820 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx, 821 struct myri10ge_tx_dma_handle_head *list) 822 { 823 mutex_enter(&tx->handle_lock); 824 list->tail->next = tx->free_tx_handles; 825 tx->free_tx_handles = list->head; 826 mutex_exit(&tx->handle_lock); 827 } 828 829 static void 830 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx, 831 struct myri10ge_tx_dma_handle *handle) 832 { 833 struct myri10ge_tx_dma_handle_head list; 834 835 if (handle == NULL) 836 return; 837 list.head = handle; 838 list.tail = handle; 839 while (handle != NULL) { 840 list.tail = handle; 841 handle = handle->next; 842 } 843 myri10ge_free_tx_handles(tx, &list); 844 } 845 846 static int 847 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count, 848 struct myri10ge_tx_dma_handle **ret) 849 { 850 myri10ge_tx_ring_t *tx = &ss->tx; 851 struct myri10ge_tx_dma_handle *handle; 852 int err, i; 853 854 mutex_enter(&tx->handle_lock); 855 for (i = 0; i < count; i++) { 856 handle = tx->free_tx_handles; 857 while (handle == NULL) { 858 mutex_exit(&tx->handle_lock); 859 err = myri10ge_add_tx_handle(ss); 860 if (err != DDI_SUCCESS) { 861 goto abort_with_handles; 862 } 863 mutex_enter(&tx->handle_lock); 864 handle = tx->free_tx_handles; 865 } 866 tx->free_tx_handles = handle->next; 867 handle->next = *ret; 868 *ret = handle; 869 } 870 mutex_exit(&tx->handle_lock); 871 return (DDI_SUCCESS); 872 873 abort_with_handles: 874 myri10ge_free_tx_handle_slist(tx, *ret); 875 return (err); 876 } 877 878 879 /* 880 * Frees DMA resources associated with the send ring 881 */ 882 static void 883 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss) 884 { 885 myri10ge_tx_ring_t *tx; 886 struct myri10ge_tx_dma_handle_head handles; 887 size_t bytes; 888 int idx; 889 890 tx = &ss->tx; 891 handles.head = NULL; 892 handles.tail = NULL; 893 for (idx = 0; idx < ss->tx.mask + 1; idx++) { 894 if (tx->info[idx].m) { 895 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 896 handles.head = tx->info[idx].handle; 897 if (handles.tail == NULL) 898 handles.tail = tx->info[idx].handle; 899 freeb(tx->info[idx].m); 900 tx->info[idx].m = 0; 901 tx->info[idx].handle = 0; 902 } 903 tx->cp[idx].va = NULL; 904 myri10ge_dma_free(&tx->cp[idx].dma); 905 } 906 bytes = sizeof (*tx->cp) * (tx->mask + 1); 907 kmem_free(tx->cp, bytes); 908 tx->cp = NULL; 909 if (handles.head != NULL) 910 myri10ge_free_tx_handles(tx, &handles); 911 myri10ge_remove_tx_handles(ss); 912 } 913 914 /* 915 * Allocates DMA handles associated with the send ring 916 */ 917 static inline int 918 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss) 919 { 920 struct myri10ge_tx_dma_handle *handles; 921 int h; 922 size_t bytes; 923 924 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 925 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP); 926 if (ss->tx.cp == NULL) { 927 cmn_err(CE_WARN, 928 "%s: Failed to allocate tx copyblock storage\n", 929 ss->mgp->name); 930 return (DDI_FAILURE); 931 } 932 933 934 /* allocate the TX copyblocks */ 935 for (h = 0; h < ss->tx.mask + 1; h++) { 936 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip, 937 4096, &myri10ge_rx_jumbo_dma_attr, 938 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 939 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1, 940 DDI_DMA_DONTWAIT); 941 if (ss->tx.cp[h].va == NULL) { 942 cmn_err(CE_WARN, "%s: Failed to allocate tx " 943 "copyblock %d\n", ss->mgp->name, h); 944 goto abort_with_copyblocks; 945 } 946 } 947 /* pre-allocate transmit handles */ 948 handles = NULL; 949 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial, 950 &handles); 951 if (handles != NULL) 952 myri10ge_free_tx_handle_slist(&ss->tx, handles); 953 954 return (DDI_SUCCESS); 955 956 abort_with_copyblocks: 957 while (h > 0) { 958 h--; 959 myri10ge_dma_free(&ss->tx.cp[h].dma); 960 } 961 962 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 963 kmem_free(ss->tx.cp, bytes); 964 ss->tx.cp = NULL; 965 return (DDI_FAILURE); 966 } 967 968 /* 969 * The eeprom strings on the lanaiX have the format 970 * SN=x\0 971 * MAC=x:x:x:x:x:x\0 972 * PT:ddd mmm xx xx:xx:xx xx\0 973 * PV:ddd mmm xx xx:xx:xx xx\0 974 */ 975 static int 976 myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 977 { 978 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++) 979 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \ 980 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \ 981 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1))) 982 983 char *ptr, *limit; 984 int i, hv, lv; 985 986 ptr = mgp->eeprom_strings; 987 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 988 989 while (*ptr != '\0' && ptr < limit) { 990 if (memcmp(ptr, "MAC=", 4) == 0) { 991 ptr += 4; 992 if (myri10ge_verbose) 993 printf("%s: mac address = %s\n", mgp->name, 994 ptr); 995 mgp->mac_addr_string = ptr; 996 for (i = 0; i < 6; i++) { 997 if ((ptr + 2) > limit) 998 goto abort; 999 1000 if (*(ptr+1) == ':') { 1001 hv = 0; 1002 lv = myri10ge_digit(*ptr); ptr++; 1003 } else { 1004 hv = myri10ge_digit(*ptr); ptr++; 1005 lv = myri10ge_digit(*ptr); ptr++; 1006 } 1007 mgp->mac_addr[i] = (hv << 4) | lv; 1008 ptr++; 1009 } 1010 } 1011 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 1012 ptr += 3; 1013 mgp->sn_str = (char *)ptr; 1014 } 1015 if (memcmp((const void *)ptr, "PC=", 3) == 0) { 1016 ptr += 3; 1017 mgp->pc_str = (char *)ptr; 1018 } 1019 MYRI10GE_NEXT_STRING(ptr); 1020 } 1021 1022 return (0); 1023 1024 abort: 1025 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name); 1026 return (ENXIO); 1027 } 1028 1029 1030 /* 1031 * Determine the register set containing the PCI resource we 1032 * want to map: the memory-mappable part of the interface. We do 1033 * this by scanning the DDI "reg" property of the interface, 1034 * which is an array of mx_ddi_reg_set structures. 1035 */ 1036 static int 1037 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span, 1038 unsigned long *busno, unsigned long *devno, 1039 unsigned long *funcno) 1040 { 1041 1042 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff) 1043 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07) 1044 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f) 1045 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff) 1046 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03) 1047 #define PCI_ADDR_HIGH(ip) (ip[1]) 1048 #define PCI_ADDR_LOW(ip) (ip[2]) 1049 #define PCI_SPAN_HIGH(ip) (ip[3]) 1050 #define PCI_SPAN_LOW(ip) (ip[4]) 1051 1052 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2 1053 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3 1054 1055 int *data, i, *rs; 1056 uint32_t nelementsp; 1057 1058 #ifdef MYRI10GE_REGSET_VERBOSE 1059 char *address_space_name[] = { "Configuration Space", 1060 "I/O Space", 1061 "32-bit Memory Space", 1062 "64-bit Memory Space" 1063 }; 1064 #endif 1065 1066 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1067 "reg", &data, &nelementsp) != DDI_SUCCESS) { 1068 printf("Could not determine register set.\n"); 1069 return (ENXIO); 1070 } 1071 1072 #ifdef MYRI10GE_REGSET_VERBOSE 1073 printf("There are %d register sets.\n", nelementsp / 5); 1074 #endif 1075 if (!nelementsp) { 1076 printf("Didn't find any \"reg\" properties.\n"); 1077 ddi_prop_free(data); 1078 return (ENODEV); 1079 } 1080 1081 /* Scan for the register number. */ 1082 rs = &data[0]; 1083 *busno = BUS_NUMBER(rs); 1084 *devno = DEVICE_NUMBER(rs); 1085 *funcno = FUNCTION_NUMBER(rs); 1086 1087 #ifdef MYRI10GE_REGSET_VERBOSE 1088 printf("*** Scanning for register number.\n"); 1089 #endif 1090 for (i = 0; i < nelementsp / 5; i++) { 1091 rs = &data[5 * i]; 1092 #ifdef MYRI10GE_REGSET_VERBOSE 1093 printf("Examining register set %d:\n", i); 1094 printf(" Register number = %d.\n", REGISTER_NUMBER(rs)); 1095 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs)); 1096 printf(" Device number = %d.\n", DEVICE_NUMBER(rs)); 1097 printf(" Bus number = %d.\n", BUS_NUMBER(rs)); 1098 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs), 1099 address_space_name[ADDRESS_SPACE(rs)]); 1100 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs), 1101 PCI_ADDR_LOW(rs)); 1102 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs), 1103 PCI_SPAN_LOW(rs)); 1104 #endif 1105 /* We are looking for a memory property. */ 1106 1107 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE || 1108 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) { 1109 *reg_set = i; 1110 1111 #ifdef MYRI10GE_REGSET_VERBOSE 1112 printf("%s uses register set %d.\n", 1113 address_space_name[ADDRESS_SPACE(rs)], *reg_set); 1114 #endif 1115 1116 *span = (PCI_SPAN_LOW(rs)); 1117 #ifdef MYRI10GE_REGSET_VERBOSE 1118 printf("Board span is 0x%x\n", *span); 1119 #endif 1120 break; 1121 } 1122 } 1123 1124 ddi_prop_free(data); 1125 1126 /* If no match, fail. */ 1127 if (i >= nelementsp / 5) { 1128 return (EIO); 1129 } 1130 1131 return (0); 1132 } 1133 1134 1135 static int 1136 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit) 1137 { 1138 void *inflate_buffer; 1139 int rv, status; 1140 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE; 1141 size_t destlen; 1142 mcp_gen_header_t *hdr; 1143 unsigned hdr_offset, i; 1144 1145 1146 *limit = 0; /* -Wuninitialized */ 1147 status = 0; 1148 1149 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP); 1150 if (!inflate_buffer) { 1151 cmn_err(CE_WARN, 1152 "%s: Could not allocate buffer to inflate mcp\n", 1153 mgp->name); 1154 return (ENOMEM); 1155 } 1156 1157 destlen = sram_size; 1158 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e, 1159 mgp->eth_z8e_length); 1160 1161 if (rv != Z_OK) { 1162 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n", 1163 mgp->name, z_strerror(rv)); 1164 status = ENXIO; 1165 goto abort; 1166 } 1167 1168 *limit = (uint32_t)destlen; 1169 1170 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer + 1171 MCP_HEADER_PTR_OFFSET)); 1172 hdr = (void *)((char *)inflate_buffer + hdr_offset); 1173 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 1174 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name, 1175 ntohl(hdr->mcp_type)); 1176 status = EIO; 1177 goto abort; 1178 } 1179 1180 /* save firmware version for kstat */ 1181 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version)); 1182 if (myri10ge_verbose) 1183 printf("%s: firmware id: %s\n", mgp->name, hdr->version); 1184 1185 /* Copy the inflated firmware to NIC SRAM. */ 1186 for (i = 0; i < *limit; i += 256) { 1187 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i, 1188 (char *)inflate_buffer + i, 1189 min(256U, (unsigned)(*limit - i))); 1190 mb(); 1191 (void) *(int *)(void *)mgp->sram; 1192 mb(); 1193 } 1194 1195 abort: 1196 kmem_free(inflate_buffer, sram_size); 1197 1198 return (status); 1199 1200 } 1201 1202 1203 int 1204 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd, 1205 myri10ge_cmd_t *data) 1206 { 1207 mcp_cmd_t *buf; 1208 char buf_bytes[sizeof (*buf) + 8]; 1209 volatile mcp_cmd_response_t *response = mgp->cmd; 1210 volatile char *cmd_addr = 1211 (volatile char *)mgp->sram + MXGEFW_ETH_CMD; 1212 int sleep_total = 0; 1213 1214 /* ensure buf is aligned to 8 bytes */ 1215 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1216 1217 buf->data0 = htonl(data->data0); 1218 buf->data1 = htonl(data->data1); 1219 buf->data2 = htonl(data->data2); 1220 buf->cmd = htonl(cmd); 1221 buf->response_addr.low = mgp->cmd_dma.low; 1222 buf->response_addr.high = mgp->cmd_dma.high; 1223 mutex_enter(&mgp->cmd_lock); 1224 response->result = 0xffffffff; 1225 mb(); 1226 1227 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf)); 1228 1229 /* wait up to 20ms */ 1230 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 1231 mb(); 1232 if (response->result != 0xffffffff) { 1233 if (response->result == 0) { 1234 data->data0 = ntohl(response->data); 1235 mutex_exit(&mgp->cmd_lock); 1236 return (0); 1237 } else if (ntohl(response->result) 1238 == MXGEFW_CMD_UNKNOWN) { 1239 mutex_exit(&mgp->cmd_lock); 1240 return (ENOSYS); 1241 } else if (ntohl(response->result) 1242 == MXGEFW_CMD_ERROR_UNALIGNED) { 1243 mutex_exit(&mgp->cmd_lock); 1244 return (E2BIG); 1245 } else { 1246 cmn_err(CE_WARN, 1247 "%s: command %d failed, result = %d\n", 1248 mgp->name, cmd, ntohl(response->result)); 1249 mutex_exit(&mgp->cmd_lock); 1250 return (ENXIO); 1251 } 1252 } 1253 drv_usecwait(1000); 1254 } 1255 mutex_exit(&mgp->cmd_lock); 1256 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n", 1257 mgp->name, cmd, ntohl(response->result)); 1258 return (EAGAIN); 1259 } 1260 1261 /* 1262 * Enable or disable periodic RDMAs from the host to make certain 1263 * chipsets resend dropped PCIe messages 1264 */ 1265 1266 static void 1267 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 1268 { 1269 char buf_bytes[72]; 1270 volatile uint32_t *confirm; 1271 volatile char *submit; 1272 uint32_t *buf; 1273 int i; 1274 1275 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1276 1277 /* clear confirmation addr */ 1278 confirm = (volatile uint32_t *)mgp->cmd; 1279 *confirm = 0; 1280 mb(); 1281 1282 /* 1283 * send an rdma command to the PCIe engine, and wait for the 1284 * response in the confirmation address. The firmware should 1285 * write a -1 there to indicate it is alive and well 1286 */ 1287 1288 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1289 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1290 buf[2] = htonl(0xffffffff); /* confirm data */ 1291 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */ 1292 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */ 1293 buf[5] = htonl(enable); /* enable? */ 1294 1295 1296 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA); 1297 1298 myri10ge_pio_copy((char *)submit, buf, 64); 1299 mb(); 1300 drv_usecwait(1000); 1301 mb(); 1302 i = 0; 1303 while (*confirm != 0xffffffff && i < 20) { 1304 drv_usecwait(1000); 1305 i++; 1306 } 1307 if (*confirm != 0xffffffff) { 1308 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)", 1309 mgp->name, 1310 (enable ? "enable" : "disable"), (void*) confirm, *confirm); 1311 } 1312 } 1313 1314 static int 1315 myri10ge_load_firmware(struct myri10ge_priv *mgp) 1316 { 1317 myri10ge_cmd_t cmd; 1318 volatile uint32_t *confirm; 1319 volatile char *submit; 1320 char buf_bytes[72]; 1321 uint32_t *buf, size; 1322 int status, i; 1323 1324 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1325 1326 status = myri10ge_load_firmware_from_zlib(mgp, &size); 1327 if (status) { 1328 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name); 1329 return (status); 1330 } 1331 1332 /* clear confirmation addr */ 1333 confirm = (volatile uint32_t *)mgp->cmd; 1334 *confirm = 0; 1335 mb(); 1336 1337 /* 1338 * send a reload command to the bootstrap MCP, and wait for the 1339 * response in the confirmation address. The firmware should 1340 * write a -1 there to indicate it is alive and well 1341 */ 1342 1343 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1344 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1345 buf[2] = htonl(0xffffffff); /* confirm data */ 1346 1347 /* 1348 * FIX: All newest firmware should un-protect the bottom of 1349 * the sram before handoff. However, the very first interfaces 1350 * do not. Therefore the handoff copy must skip the first 8 bytes 1351 */ 1352 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 1353 buf[4] = htonl(size - 8); /* length of code */ 1354 buf[5] = htonl(8); /* where to copy to */ 1355 buf[6] = htonl(0); /* where to jump to */ 1356 1357 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF); 1358 1359 myri10ge_pio_copy((char *)submit, buf, 64); 1360 mb(); 1361 drv_usecwait(1000); 1362 mb(); 1363 i = 0; 1364 while (*confirm != 0xffffffff && i < 1000) { 1365 drv_usecwait(1000); 1366 i++; 1367 } 1368 if (*confirm != 0xffffffff) { 1369 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)", 1370 mgp->name, (void *) confirm, *confirm); 1371 1372 return (ENXIO); 1373 } 1374 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1375 if (status != 0) { 1376 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n", 1377 mgp->name); 1378 return (ENXIO); 1379 } 1380 1381 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 1382 myri10ge_dummy_rdma(mgp, 1); 1383 return (0); 1384 } 1385 1386 static int 1387 myri10ge_m_unicst(void *arg, const uint8_t *addr) 1388 { 1389 struct myri10ge_priv *mgp = arg; 1390 myri10ge_cmd_t cmd; 1391 int status; 1392 1393 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1394 | (addr[2] << 8) | addr[3]); 1395 1396 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1397 1398 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd); 1399 if (status == 0 && (addr != mgp->mac_addr)) 1400 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr)); 1401 1402 return (status); 1403 } 1404 1405 static int 1406 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 1407 { 1408 myri10ge_cmd_t cmd; 1409 int status; 1410 1411 if (pause) 1412 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL, 1413 &cmd); 1414 else 1415 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL, 1416 &cmd); 1417 1418 if (status) { 1419 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n", 1420 mgp->name); 1421 return (ENXIO); 1422 } 1423 mgp->pause = pause; 1424 return (0); 1425 } 1426 1427 static void 1428 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc) 1429 { 1430 myri10ge_cmd_t cmd; 1431 int status; 1432 1433 if (promisc) 1434 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd); 1435 else 1436 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd); 1437 1438 if (status) { 1439 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n", 1440 mgp->name); 1441 } 1442 } 1443 1444 static int 1445 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 1446 { 1447 myri10ge_cmd_t cmd; 1448 int status; 1449 uint32_t len; 1450 void *dmabench; 1451 struct myri10ge_dma_stuff dmabench_dma; 1452 char *test = " "; 1453 1454 /* 1455 * Run a small DMA test. 1456 * The magic multipliers to the length tell the firmware 1457 * tp do DMA read, write, or read+write tests. The 1458 * results are returned in cmd.data0. The upper 16 1459 * bits or the return is the number of transfers completed. 1460 * The lower 16 bits is the time in 0.5us ticks that the 1461 * transfers took to complete 1462 */ 1463 1464 len = mgp->tx_boundary; 1465 1466 dmabench = myri10ge_dma_alloc(mgp->dip, len, 1467 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr, 1468 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING, 1469 &dmabench_dma, 1, DDI_DMA_DONTWAIT); 1470 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0; 1471 if (dmabench == NULL) { 1472 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name); 1473 return (ENOMEM); 1474 } 1475 1476 cmd.data0 = ntohl(dmabench_dma.low); 1477 cmd.data1 = ntohl(dmabench_dma.high); 1478 cmd.data2 = len * 0x10000; 1479 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1480 if (status != 0) { 1481 test = "read"; 1482 goto abort; 1483 } 1484 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1485 1486 cmd.data0 = ntohl(dmabench_dma.low); 1487 cmd.data1 = ntohl(dmabench_dma.high); 1488 cmd.data2 = len * 0x1; 1489 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1490 if (status != 0) { 1491 test = "write"; 1492 goto abort; 1493 } 1494 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1495 1496 cmd.data0 = ntohl(dmabench_dma.low); 1497 cmd.data1 = ntohl(dmabench_dma.high); 1498 cmd.data2 = len * 0x10001; 1499 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1500 if (status != 0) { 1501 test = "read/write"; 1502 goto abort; 1503 } 1504 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 1505 (cmd.data0 & 0xffff); 1506 1507 1508 abort: 1509 myri10ge_dma_free(&dmabench_dma); 1510 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 1511 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name, 1512 test); 1513 return (status); 1514 } 1515 1516 static int 1517 myri10ge_reset(struct myri10ge_priv *mgp) 1518 { 1519 myri10ge_cmd_t cmd; 1520 struct myri10ge_nic_stat *ethstat; 1521 struct myri10ge_slice_state *ss; 1522 int i, status; 1523 size_t bytes; 1524 1525 /* send a reset command to the card to see if it is alive */ 1526 (void) memset(&cmd, 0, sizeof (cmd)); 1527 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 1528 if (status != 0) { 1529 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 1530 return (ENXIO); 1531 } 1532 1533 /* Now exchange information about interrupts */ 1534 1535 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry); 1536 cmd.data0 = (uint32_t)bytes; 1537 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1538 1539 /* 1540 * Even though we already know how many slices are supported 1541 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 1542 * has magic side effects, and must be called after a reset. 1543 * It must be called prior to calling any RSS related cmds, 1544 * including assigning an interrupt queue for anything but 1545 * slice 0. It must also be called *after* 1546 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1547 * the firmware to compute offsets. 1548 */ 1549 1550 if (mgp->num_slices > 1) { 1551 1552 /* ask the maximum number of slices it supports */ 1553 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1554 &cmd); 1555 if (status != 0) { 1556 cmn_err(CE_WARN, 1557 "%s: failed to get number of slices\n", 1558 mgp->name); 1559 return (status); 1560 } 1561 1562 /* 1563 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1564 * to setting up the interrupt queue DMA 1565 */ 1566 1567 cmd.data0 = mgp->num_slices; 1568 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE | 1569 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1570 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1571 &cmd); 1572 if (status != 0) { 1573 cmn_err(CE_WARN, 1574 "%s: failed to set number of slices\n", 1575 mgp->name); 1576 return (status); 1577 } 1578 } 1579 for (i = 0; i < mgp->num_slices; i++) { 1580 ss = &mgp->ss[i]; 1581 cmd.data0 = ntohl(ss->rx_done.dma.low); 1582 cmd.data1 = ntohl(ss->rx_done.dma.high); 1583 cmd.data2 = i; 1584 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1585 &cmd); 1586 }; 1587 1588 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1589 for (i = 0; i < mgp->num_slices; i++) { 1590 ss = &mgp->ss[i]; 1591 ss->irq_claim = (volatile unsigned int *) 1592 (void *)(mgp->sram + cmd.data0 + 8 * i); 1593 } 1594 1595 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 1596 status |= myri10ge_send_cmd(mgp, 1597 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1598 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1599 } 1600 1601 status |= myri10ge_send_cmd(mgp, 1602 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1603 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1604 1605 if (status != 0) { 1606 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n", 1607 mgp->name); 1608 return (status); 1609 } 1610 1611 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 1612 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 1613 1614 /* reset mcp/driver shared state back to 0 */ 1615 1616 for (i = 0; i < mgp->num_slices; i++) { 1617 ss = &mgp->ss[i]; 1618 bytes = mgp->max_intr_slots * 1619 sizeof (*mgp->ss[0].rx_done.entry); 1620 (void) memset(ss->rx_done.entry, 0, bytes); 1621 ss->tx.req = 0; 1622 ss->tx.done = 0; 1623 ss->tx.pkt_done = 0; 1624 ss->rx_big.cnt = 0; 1625 ss->rx_small.cnt = 0; 1626 ss->rx_done.idx = 0; 1627 ss->rx_done.cnt = 0; 1628 ss->rx_token = 0; 1629 ss->tx.watchdog_done = 0; 1630 ss->tx.watchdog_req = 0; 1631 ss->tx.active = 0; 1632 ss->tx.activate = 0; 1633 } 1634 mgp->watchdog_rx_pause = 0; 1635 if (mgp->ksp_stat != NULL) { 1636 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data; 1637 ethstat->link_changes.value.ul = 0; 1638 } 1639 status = myri10ge_m_unicst(mgp, mgp->mac_addr); 1640 myri10ge_change_promisc(mgp, 0); 1641 (void) myri10ge_change_pause(mgp, mgp->pause); 1642 return (status); 1643 } 1644 1645 static int 1646 myri10ge_init_toeplitz(struct myri10ge_priv *mgp) 1647 { 1648 myri10ge_cmd_t cmd; 1649 int i, b, s, t, j; 1650 int status; 1651 uint32_t k[8]; 1652 uint32_t tmp; 1653 uint8_t *key; 1654 1655 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 1656 &cmd); 1657 if (status != 0) { 1658 cmn_err(CE_WARN, "%s: failed to get rss key\n", 1659 mgp->name); 1660 return (EIO); 1661 } 1662 myri10ge_pio_copy32(mgp->rss_key, 1663 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0), 1664 sizeof (mgp->rss_key)); 1665 1666 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256, 1667 KM_SLEEP); 1668 key = (uint8_t *)mgp->rss_key; 1669 t = 0; 1670 for (b = 0; b < 12; b++) { 1671 for (s = 0; s < 8; s++) { 1672 /* Bits: b*8+s, ..., b*8+s+31 */ 1673 k[s] = 0; 1674 for (j = 0; j < 32; j++) { 1675 int bit = b*8+s+j; 1676 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7))); 1677 k[s] |= bit << (31 - j); 1678 } 1679 } 1680 1681 for (i = 0; i <= 0xff; i++) { 1682 tmp = 0; 1683 if (i & (1 << 7)) { tmp ^= k[0]; } 1684 if (i & (1 << 6)) { tmp ^= k[1]; } 1685 if (i & (1 << 5)) { tmp ^= k[2]; } 1686 if (i & (1 << 4)) { tmp ^= k[3]; } 1687 if (i & (1 << 3)) { tmp ^= k[4]; } 1688 if (i & (1 << 2)) { tmp ^= k[5]; } 1689 if (i & (1 << 1)) { tmp ^= k[6]; } 1690 if (i & (1 << 0)) { tmp ^= k[7]; } 1691 mgp->toeplitz_hash_table[t++] = tmp; 1692 } 1693 } 1694 return (0); 1695 } 1696 1697 static inline struct myri10ge_slice_state * 1698 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1699 { 1700 struct tcphdr *hdr; 1701 uint32_t saddr, daddr; 1702 uint32_t hash, slice; 1703 uint32_t *table = mgp->toeplitz_hash_table; 1704 uint16_t src, dst; 1705 1706 /* 1707 * Note hashing order is reversed from how it is done 1708 * in the NIC, so as to generate the same hash value 1709 * for the connection to try to keep connections CPU local 1710 */ 1711 1712 /* hash on IPv4 src/dst address */ 1713 saddr = ntohl(ip->ip_src.s_addr); 1714 daddr = ntohl(ip->ip_dst.s_addr); 1715 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)]; 1716 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)]; 1717 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)]; 1718 hash ^= table[(256 * 3) + ((daddr) & 0xff)]; 1719 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)]; 1720 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)]; 1721 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)]; 1722 hash ^= table[(256 * 7) + ((saddr) & 0xff)]; 1723 /* hash on TCP port, if required */ 1724 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) && 1725 ip->ip_p == IPPROTO_TCP) { 1726 hdr = (struct tcphdr *)(void *) 1727 (((uint8_t *)ip) + (ip->ip_hl << 2)); 1728 src = ntohs(hdr->th_sport); 1729 dst = ntohs(hdr->th_dport); 1730 1731 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)]; 1732 hash ^= table[(256 * 9) + ((dst) & 0xff)]; 1733 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)]; 1734 hash ^= table[(256 * 11) + ((src) & 0xff)]; 1735 } 1736 slice = (mgp->num_slices - 1) & hash; 1737 return (&mgp->ss[slice]); 1738 1739 } 1740 1741 static inline struct myri10ge_slice_state * 1742 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1743 { 1744 struct tcphdr *hdr; 1745 uint32_t slice, hash_val; 1746 1747 1748 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) { 1749 return (&mgp->ss[0]); 1750 } 1751 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2)); 1752 1753 /* 1754 * Use the second byte of the *destination* address for 1755 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing 1756 */ 1757 hash_val = ntohs(hdr->th_dport) & 0xff; 1758 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT) 1759 hash_val += ntohs(hdr->th_sport) & 0xff; 1760 1761 slice = (mgp->num_slices - 1) & hash_val; 1762 return (&mgp->ss[slice]); 1763 } 1764 1765 static inline struct myri10ge_slice_state * 1766 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp) 1767 { 1768 unsigned int slice = 0; 1769 struct ether_header *eh; 1770 struct ether_vlan_header *vh; 1771 struct ip *ip; 1772 int ehl, ihl; 1773 1774 if (mgp->num_slices == 1) 1775 return (&mgp->ss[0]); 1776 1777 if (myri10ge_tx_hash == 0) { 1778 slice = CPU->cpu_id & (mgp->num_slices - 1); 1779 return (&mgp->ss[slice]); 1780 } 1781 1782 /* 1783 * ensure it is a TCP or UDP over IPv4 packet, and that the 1784 * headers are in the 1st mblk. Otherwise, punt 1785 */ 1786 ehl = sizeof (*eh); 1787 ihl = sizeof (*ip); 1788 if ((MBLKL(mp)) < (ehl + ihl + 8)) 1789 return (&mgp->ss[0]); 1790 eh = (struct ether_header *)(void *)mp->b_rptr; 1791 ip = (struct ip *)(void *)(eh + 1); 1792 if (eh->ether_type != BE_16(ETHERTYPE_IP)) { 1793 if (eh->ether_type != BE_16(ETHERTYPE_VLAN)) 1794 return (&mgp->ss[0]); 1795 vh = (struct ether_vlan_header *)(void *)mp->b_rptr; 1796 if (vh->ether_type != BE_16(ETHERTYPE_IP)) 1797 return (&mgp->ss[0]); 1798 ehl += 4; 1799 ip = (struct ip *)(void *)(vh + 1); 1800 } 1801 ihl = ip->ip_hl << 2; 1802 if (MBLKL(mp) < (ehl + ihl + 8)) 1803 return (&mgp->ss[0]); 1804 switch (myri10ge_rss_hash) { 1805 case MXGEFW_RSS_HASH_TYPE_IPV4: 1806 /* fallthru */ 1807 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4: 1808 /* fallthru */ 1809 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4): 1810 return (myri10ge_toeplitz_send_hash(mgp, ip)); 1811 case MXGEFW_RSS_HASH_TYPE_SRC_PORT: 1812 /* fallthru */ 1813 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT: 1814 return (myri10ge_simple_send_hash(mgp, ip)); 1815 default: 1816 break; 1817 } 1818 return (&mgp->ss[0]); 1819 } 1820 1821 static int 1822 myri10ge_setup_slice(struct myri10ge_slice_state *ss) 1823 { 1824 struct myri10ge_priv *mgp = ss->mgp; 1825 myri10ge_cmd_t cmd; 1826 int tx_ring_size, rx_ring_size; 1827 int tx_ring_entries, rx_ring_entries; 1828 int slice, status; 1829 int allocated, idx; 1830 size_t bytes; 1831 1832 slice = ss - mgp->ss; 1833 cmd.data0 = slice; 1834 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 1835 tx_ring_size = cmd.data0; 1836 cmd.data0 = slice; 1837 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1838 if (status != 0) 1839 return (status); 1840 rx_ring_size = cmd.data0; 1841 1842 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send); 1843 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr); 1844 ss->tx.mask = tx_ring_entries - 1; 1845 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 1846 1847 /* get the lanai pointers to the send and receive rings */ 1848 1849 cmd.data0 = slice; 1850 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 1851 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0); 1852 if (mgp->num_slices > 1) { 1853 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice; 1854 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP + 1855 64 * slice; 1856 } else { 1857 ss->tx.go = NULL; 1858 ss->tx.stop = NULL; 1859 } 1860 1861 cmd.data0 = slice; 1862 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 1863 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *) 1864 (void *)(mgp->sram + cmd.data0); 1865 1866 cmd.data0 = slice; 1867 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 1868 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *) 1869 (mgp->sram + cmd.data0); 1870 1871 if (status != 0) { 1872 cmn_err(CE_WARN, 1873 "%s: failed to get ring sizes or locations\n", mgp->name); 1874 return (status); 1875 } 1876 1877 status = ENOMEM; 1878 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 1879 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP); 1880 if (ss->rx_small.shadow == NULL) 1881 goto abort; 1882 (void) memset(ss->rx_small.shadow, 0, bytes); 1883 1884 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 1885 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP); 1886 if (ss->rx_big.shadow == NULL) 1887 goto abort_with_rx_small_shadow; 1888 (void) memset(ss->rx_big.shadow, 0, bytes); 1889 1890 /* allocate the host info rings */ 1891 1892 bytes = tx_ring_entries * sizeof (*ss->tx.info); 1893 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP); 1894 if (ss->tx.info == NULL) 1895 goto abort_with_rx_big_shadow; 1896 (void) memset(ss->tx.info, 0, bytes); 1897 1898 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 1899 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP); 1900 if (ss->rx_small.info == NULL) 1901 goto abort_with_tx_info; 1902 (void) memset(ss->rx_small.info, 0, bytes); 1903 1904 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 1905 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP); 1906 if (ss->rx_big.info == NULL) 1907 goto abort_with_rx_small_info; 1908 (void) memset(ss->rx_big.info, 0, bytes); 1909 1910 ss->tx.stall = ss->tx.sched = 0; 1911 ss->tx.stall_early = ss->tx.stall_late = 0; 1912 1913 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) / 1914 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD)); 1915 1916 allocated = myri10ge_add_jbufs(ss, 1917 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1); 1918 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) { 1919 cmn_err(CE_WARN, 1920 "%s: Could not allocate enough receive buffers (%d/%d)\n", 1921 mgp->name, allocated, 1922 myri10ge_bigbufs_initial + ss->jbufs_for_smalls); 1923 goto abort_with_jumbos; 1924 } 1925 1926 myri10ge_carve_up_jbufs_into_small_ring(ss); 1927 ss->j_rx_cnt = 0; 1928 1929 mutex_enter(&ss->jpool.mtx); 1930 if (allocated < rx_ring_entries) 1931 ss->jpool.low_water = allocated / 4; 1932 else 1933 ss->jpool.low_water = rx_ring_entries / 2; 1934 1935 /* 1936 * invalidate the big receive ring in case we do not 1937 * allocate sufficient jumbos to fill it 1938 */ 1939 (void) memset(ss->rx_big.shadow, 1, 1940 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0])); 1941 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) { 1942 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7], 1943 &ss->rx_big.shadow[idx - 7]); 1944 mb(); 1945 } 1946 1947 1948 myri10ge_restock_jumbos(ss); 1949 1950 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) { 1951 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7], 1952 &ss->rx_small.shadow[idx - 7]); 1953 mb(); 1954 } 1955 ss->rx_small.cnt = ss->rx_small.mask + 1; 1956 1957 mutex_exit(&ss->jpool.mtx); 1958 1959 status = myri10ge_prepare_tx_ring(ss); 1960 1961 if (status != 0) 1962 goto abort_with_small_jbufs; 1963 1964 cmd.data0 = ntohl(ss->fw_stats_dma.low); 1965 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1966 cmd.data2 = sizeof (mcp_irq_data_t); 1967 cmd.data2 |= (slice << 16); 1968 bzero(ss->fw_stats, sizeof (*ss->fw_stats)); 1969 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 1970 if (status == ENOSYS) { 1971 cmd.data0 = ntohl(ss->fw_stats_dma.low) + 1972 offsetof(mcp_irq_data_t, send_done_count); 1973 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1974 status = myri10ge_send_cmd(mgp, 1975 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); 1976 } 1977 if (status) { 1978 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name); 1979 goto abort_with_tx; 1980 } 1981 1982 return (0); 1983 1984 abort_with_tx: 1985 myri10ge_unprepare_tx_ring(ss); 1986 1987 abort_with_small_jbufs: 1988 myri10ge_release_small_jbufs(ss); 1989 1990 abort_with_jumbos: 1991 if (allocated != 0) { 1992 mutex_enter(&ss->jpool.mtx); 1993 ss->jpool.low_water = 0; 1994 mutex_exit(&ss->jpool.mtx); 1995 myri10ge_unstock_jumbos(ss); 1996 myri10ge_remove_jbufs(ss); 1997 } 1998 1999 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2000 kmem_free(ss->rx_big.info, bytes); 2001 2002 abort_with_rx_small_info: 2003 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2004 kmem_free(ss->rx_small.info, bytes); 2005 2006 abort_with_tx_info: 2007 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2008 kmem_free(ss->tx.info, bytes); 2009 2010 abort_with_rx_big_shadow: 2011 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2012 kmem_free(ss->rx_big.shadow, bytes); 2013 2014 abort_with_rx_small_shadow: 2015 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2016 kmem_free(ss->rx_small.shadow, bytes); 2017 abort: 2018 return (status); 2019 2020 } 2021 2022 static void 2023 myri10ge_teardown_slice(struct myri10ge_slice_state *ss) 2024 { 2025 int tx_ring_entries, rx_ring_entries; 2026 size_t bytes; 2027 2028 /* ignore slices that have not been fully setup */ 2029 if (ss->tx.cp == NULL) 2030 return; 2031 /* Free the TX copy buffers */ 2032 myri10ge_unprepare_tx_ring(ss); 2033 2034 /* stop passing returned buffers to firmware */ 2035 2036 mutex_enter(&ss->jpool.mtx); 2037 ss->jpool.low_water = 0; 2038 mutex_exit(&ss->jpool.mtx); 2039 myri10ge_release_small_jbufs(ss); 2040 2041 /* Release the free jumbo frame pool */ 2042 myri10ge_unstock_jumbos(ss); 2043 myri10ge_remove_jbufs(ss); 2044 2045 rx_ring_entries = ss->rx_big.mask + 1; 2046 tx_ring_entries = ss->tx.mask + 1; 2047 2048 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2049 kmem_free(ss->rx_big.info, bytes); 2050 2051 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2052 kmem_free(ss->rx_small.info, bytes); 2053 2054 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2055 kmem_free(ss->tx.info, bytes); 2056 2057 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2058 kmem_free(ss->rx_big.shadow, bytes); 2059 2060 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2061 kmem_free(ss->rx_small.shadow, bytes); 2062 2063 } 2064 static int 2065 myri10ge_start_locked(struct myri10ge_priv *mgp) 2066 { 2067 myri10ge_cmd_t cmd; 2068 int status, big_pow2, i; 2069 volatile uint8_t *itable; 2070 2071 status = DDI_SUCCESS; 2072 /* Allocate DMA resources and receive buffers */ 2073 2074 status = myri10ge_reset(mgp); 2075 if (status != 0) { 2076 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 2077 return (DDI_FAILURE); 2078 } 2079 2080 if (mgp->num_slices > 1) { 2081 cmd.data0 = mgp->num_slices; 2082 cmd.data1 = 1; /* use MSI-X */ 2083 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2084 &cmd); 2085 if (status != 0) { 2086 cmn_err(CE_WARN, 2087 "%s: failed to set number of slices\n", 2088 mgp->name); 2089 goto abort_with_nothing; 2090 } 2091 /* setup the indirection table */ 2092 cmd.data0 = mgp->num_slices; 2093 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2094 &cmd); 2095 2096 status |= myri10ge_send_cmd(mgp, 2097 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 2098 if (status != 0) { 2099 cmn_err(CE_WARN, 2100 "%s: failed to setup rss tables\n", mgp->name); 2101 } 2102 2103 /* just enable an identity mapping */ 2104 itable = mgp->sram + cmd.data0; 2105 for (i = 0; i < mgp->num_slices; i++) 2106 itable[i] = (uint8_t)i; 2107 2108 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) { 2109 status = myri10ge_init_toeplitz(mgp); 2110 if (status != 0) { 2111 cmn_err(CE_WARN, "%s: failed to setup " 2112 "toeplitz tx hash table", mgp->name); 2113 goto abort_with_nothing; 2114 } 2115 } 2116 cmd.data0 = 1; 2117 cmd.data1 = myri10ge_rss_hash; 2118 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2119 &cmd); 2120 if (status != 0) { 2121 cmn_err(CE_WARN, 2122 "%s: failed to enable slices\n", mgp->name); 2123 goto abort_with_toeplitz; 2124 } 2125 } 2126 2127 for (i = 0; i < mgp->num_slices; i++) { 2128 status = myri10ge_setup_slice(&mgp->ss[i]); 2129 if (status != 0) 2130 goto abort_with_slices; 2131 } 2132 2133 /* 2134 * Tell the MCP how many buffers he has, and to 2135 * bring the ethernet interface up 2136 * 2137 * Firmware needs the big buff size as a power of 2. Lie and 2138 * tell him the buffer is larger, because we only use 1 2139 * buffer/pkt, and the mtu will prevent overruns 2140 */ 2141 big_pow2 = myri10ge_mtu + MXGEFW_PAD; 2142 while ((big_pow2 & (big_pow2 - 1)) != 0) 2143 big_pow2++; 2144 2145 /* now give firmware buffers sizes, and MTU */ 2146 cmd.data0 = myri10ge_mtu; 2147 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd); 2148 cmd.data0 = myri10ge_small_bytes; 2149 status |= 2150 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 2151 cmd.data0 = big_pow2; 2152 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2153 if (status) { 2154 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name); 2155 goto abort_with_slices; 2156 } 2157 2158 2159 cmd.data0 = 1; 2160 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd); 2161 if (status) { 2162 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n", 2163 mgp->name, status); 2164 } else { 2165 mgp->features |= MYRI10GE_TSO; 2166 } 2167 2168 mgp->link_state = -1; 2169 mgp->rdma_tags_available = 15; 2170 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd); 2171 if (status) { 2172 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name); 2173 goto abort_with_slices; 2174 } 2175 mgp->running = MYRI10GE_ETH_RUNNING; 2176 return (DDI_SUCCESS); 2177 2178 abort_with_slices: 2179 for (i = 0; i < mgp->num_slices; i++) 2180 myri10ge_teardown_slice(&mgp->ss[i]); 2181 2182 mgp->running = MYRI10GE_ETH_STOPPED; 2183 2184 abort_with_toeplitz: 2185 if (mgp->toeplitz_hash_table != NULL) { 2186 kmem_free(mgp->toeplitz_hash_table, 2187 sizeof (uint32_t) * 12 * 256); 2188 mgp->toeplitz_hash_table = NULL; 2189 } 2190 2191 abort_with_nothing: 2192 return (DDI_FAILURE); 2193 } 2194 2195 static void 2196 myri10ge_stop_locked(struct myri10ge_priv *mgp) 2197 { 2198 int status, old_down_cnt; 2199 myri10ge_cmd_t cmd; 2200 int wait_time = 10; 2201 int i, polling; 2202 2203 old_down_cnt = mgp->down_cnt; 2204 mb(); 2205 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2206 if (status) { 2207 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 2208 } 2209 2210 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2211 delay(1 * drv_usectohz(1000000)); 2212 wait_time--; 2213 if (wait_time == 0) 2214 break; 2215 } 2216 again: 2217 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2218 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name); 2219 for (i = 0; i < mgp->num_slices; i++) { 2220 /* 2221 * take and release the rx lock to ensure 2222 * that no interrupt thread is blocked 2223 * elsewhere in the stack, preventing 2224 * completion 2225 */ 2226 2227 mutex_enter(&mgp->ss[i].rx_lock); 2228 printf("%s: slice %d rx irq idle\n", 2229 mgp->name, i); 2230 mutex_exit(&mgp->ss[i].rx_lock); 2231 2232 /* verify that the poll handler is inactive */ 2233 mutex_enter(&mgp->ss->poll_lock); 2234 polling = mgp->ss->rx_polling; 2235 mutex_exit(&mgp->ss->poll_lock); 2236 if (polling) { 2237 printf("%s: slice %d is polling\n", 2238 mgp->name, i); 2239 delay(1 * drv_usectohz(1000000)); 2240 goto again; 2241 } 2242 } 2243 delay(1 * drv_usectohz(1000000)); 2244 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2245 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name); 2246 } 2247 } 2248 2249 for (i = 0; i < mgp->num_slices; i++) 2250 myri10ge_teardown_slice(&mgp->ss[i]); 2251 2252 if (mgp->toeplitz_hash_table != NULL) { 2253 kmem_free(mgp->toeplitz_hash_table, 2254 sizeof (uint32_t) * 12 * 256); 2255 mgp->toeplitz_hash_table = NULL; 2256 } 2257 mgp->running = MYRI10GE_ETH_STOPPED; 2258 } 2259 2260 static int 2261 myri10ge_m_start(void *arg) 2262 { 2263 struct myri10ge_priv *mgp = arg; 2264 int status; 2265 2266 mutex_enter(&mgp->intrlock); 2267 2268 if (mgp->running != MYRI10GE_ETH_STOPPED) { 2269 mutex_exit(&mgp->intrlock); 2270 return (DDI_FAILURE); 2271 } 2272 status = myri10ge_start_locked(mgp); 2273 mutex_exit(&mgp->intrlock); 2274 2275 if (status != DDI_SUCCESS) 2276 return (status); 2277 2278 /* start the watchdog timer */ 2279 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 2280 mgp->timer_ticks); 2281 return (DDI_SUCCESS); 2282 2283 } 2284 2285 static void 2286 myri10ge_m_stop(void *arg) 2287 { 2288 struct myri10ge_priv *mgp = arg; 2289 2290 mutex_enter(&mgp->intrlock); 2291 /* if the device not running give up */ 2292 if (mgp->running != MYRI10GE_ETH_RUNNING) { 2293 mutex_exit(&mgp->intrlock); 2294 return; 2295 } 2296 2297 mgp->running = MYRI10GE_ETH_STOPPING; 2298 mutex_exit(&mgp->intrlock); 2299 (void) untimeout(mgp->timer_id); 2300 mutex_enter(&mgp->intrlock); 2301 myri10ge_stop_locked(mgp); 2302 mutex_exit(&mgp->intrlock); 2303 2304 } 2305 2306 static inline void 2307 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) 2308 { 2309 struct ether_header *eh; 2310 struct ip *ip; 2311 struct ip6_hdr *ip6; 2312 uint32_t start, stuff, end, partial, hdrlen; 2313 2314 2315 csum = ntohs((uint16_t)csum); 2316 eh = (struct ether_header *)(void *)mp->b_rptr; 2317 hdrlen = sizeof (*eh); 2318 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2319 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2320 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2321 s->brdcstrcv++; 2322 else 2323 s->multircv++; 2324 } 2325 2326 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 2327 /* 2328 * fix checksum by subtracting 4 bytes after what the 2329 * firmware thought was the end of the ether hdr 2330 */ 2331 partial = *(uint32_t *) 2332 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE); 2333 csum += ~partial; 2334 csum += (csum < ~partial); 2335 csum = (csum >> 16) + (csum & 0xFFFF); 2336 csum = (csum >> 16) + (csum & 0xFFFF); 2337 hdrlen += VLAN_TAGSZ; 2338 } 2339 2340 if (eh->ether_type == BE_16(ETHERTYPE_IP)) { 2341 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen); 2342 start = ip->ip_hl << 2; 2343 2344 if (ip->ip_p == IPPROTO_TCP) 2345 stuff = start + offsetof(struct tcphdr, th_sum); 2346 else if (ip->ip_p == IPPROTO_UDP) 2347 stuff = start + offsetof(struct udphdr, uh_sum); 2348 else 2349 return; 2350 end = ntohs(ip->ip_len); 2351 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) { 2352 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen); 2353 start = sizeof (*ip6); 2354 if (ip6->ip6_nxt == IPPROTO_TCP) { 2355 stuff = start + offsetof(struct tcphdr, th_sum); 2356 } else if (ip6->ip6_nxt == IPPROTO_UDP) 2357 stuff = start + offsetof(struct udphdr, uh_sum); 2358 else 2359 return; 2360 end = start + ntohs(ip6->ip6_plen); 2361 /* 2362 * IPv6 headers do not contain a checksum, and hence 2363 * do not checksum to zero, so they don't "fall out" 2364 * of the partial checksum calculation like IPv4 2365 * headers do. We need to fix the partial checksum by 2366 * subtracting the checksum of the IPv6 header. 2367 */ 2368 2369 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6)); 2370 csum += ~partial; 2371 csum += (csum < ~partial); 2372 csum = (csum >> 16) + (csum & 0xFFFF); 2373 csum = (csum >> 16) + (csum & 0xFFFF); 2374 } else { 2375 return; 2376 } 2377 2378 if (MBLKL(mp) > hdrlen + end) { 2379 /* padded frame, so hw csum may be invalid */ 2380 return; 2381 } 2382 2383 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); 2384 } 2385 2386 static mblk_t * 2387 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len, 2388 uint32_t csum) 2389 { 2390 mblk_t *mp; 2391 myri10ge_rx_ring_t *rx; 2392 int idx; 2393 2394 rx = &ss->rx_small; 2395 idx = rx->cnt & rx->mask; 2396 ss->rx_small.cnt++; 2397 2398 /* allocate a new buffer to pass up the stack */ 2399 mp = allocb(len + MXGEFW_PAD, 0); 2400 if (mp == NULL) { 2401 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf); 2402 goto abort; 2403 } 2404 bcopy(ss->rx_small.info[idx].ptr, 2405 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2406 mp->b_wptr += len + MXGEFW_PAD; 2407 mp->b_rptr += MXGEFW_PAD; 2408 2409 ss->rx_stats.ibytes += len; 2410 ss->rx_stats.ipackets += 1; 2411 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2412 2413 abort: 2414 if ((idx & 7) == 7) { 2415 myri10ge_submit_8rx(&rx->lanai[idx - 7], 2416 &rx->shadow[idx - 7]); 2417 } 2418 2419 return (mp); 2420 } 2421 2422 2423 static mblk_t * 2424 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len, 2425 uint32_t csum) 2426 { 2427 struct myri10ge_jpool_stuff *jpool; 2428 struct myri10ge_jpool_entry *j; 2429 mblk_t *mp; 2430 int idx, num_owned_by_mcp; 2431 2432 jpool = &ss->jpool; 2433 idx = ss->j_rx_cnt & ss->rx_big.mask; 2434 j = ss->rx_big.info[idx].j; 2435 2436 if (j == NULL) { 2437 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n", 2438 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt); 2439 return (NULL); 2440 } 2441 2442 2443 ss->rx_big.info[idx].j = NULL; 2444 ss->j_rx_cnt++; 2445 2446 2447 /* 2448 * Check to see if we are low on rx buffers. 2449 * Note that we must leave at least 8 free so there are 2450 * enough to free in a single 64-byte write. 2451 */ 2452 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2453 if (num_owned_by_mcp < jpool->low_water) { 2454 mutex_enter(&jpool->mtx); 2455 myri10ge_restock_jumbos(ss); 2456 mutex_exit(&jpool->mtx); 2457 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2458 /* if we are still low, then we have to copy */ 2459 if (num_owned_by_mcp < 16) { 2460 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy); 2461 /* allocate a new buffer to pass up the stack */ 2462 mp = allocb(len + MXGEFW_PAD, 0); 2463 if (mp == NULL) { 2464 goto abort; 2465 } 2466 bcopy(j->buf, 2467 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2468 myri10ge_jfree_rtn(j); 2469 /* push buffer back to NIC */ 2470 mutex_enter(&jpool->mtx); 2471 myri10ge_restock_jumbos(ss); 2472 mutex_exit(&jpool->mtx); 2473 goto set_len; 2474 } 2475 } 2476 2477 /* loan our buffer to the stack */ 2478 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func); 2479 if (mp == NULL) { 2480 goto abort; 2481 } 2482 2483 set_len: 2484 mp->b_rptr += MXGEFW_PAD; 2485 mp->b_wptr = ((unsigned char *) mp->b_rptr + len); 2486 2487 ss->rx_stats.ibytes += len; 2488 ss->rx_stats.ipackets += 1; 2489 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2490 2491 return (mp); 2492 2493 abort: 2494 myri10ge_jfree_rtn(j); 2495 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf); 2496 return (NULL); 2497 } 2498 2499 /* 2500 * Free all transmit buffers up until the specified index 2501 */ 2502 static inline void 2503 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index) 2504 { 2505 myri10ge_tx_ring_t *tx; 2506 struct myri10ge_tx_dma_handle_head handles; 2507 int idx; 2508 int limit = 0; 2509 2510 tx = &ss->tx; 2511 handles.head = NULL; 2512 handles.tail = NULL; 2513 while (tx->pkt_done != (int)mcp_index) { 2514 idx = tx->done & tx->mask; 2515 2516 /* 2517 * mblk & DMA handle attached only to first slot 2518 * per buffer in the packet 2519 */ 2520 2521 if (tx->info[idx].m) { 2522 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 2523 tx->info[idx].handle->next = handles.head; 2524 handles.head = tx->info[idx].handle; 2525 if (handles.tail == NULL) 2526 handles.tail = tx->info[idx].handle; 2527 freeb(tx->info[idx].m); 2528 tx->info[idx].m = 0; 2529 tx->info[idx].handle = 0; 2530 } 2531 if (tx->info[idx].ostat.opackets != 0) { 2532 tx->stats.multixmt += tx->info[idx].ostat.multixmt; 2533 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt; 2534 tx->stats.obytes += tx->info[idx].ostat.obytes; 2535 tx->stats.opackets += tx->info[idx].ostat.opackets; 2536 tx->info[idx].stat.un.all = 0; 2537 tx->pkt_done++; 2538 } 2539 2540 tx->done++; 2541 /* 2542 * if we stalled the queue, wake it. But Wait until 2543 * we have at least 1/2 our slots free. 2544 */ 2545 if ((tx->req - tx->done) < (tx->mask >> 1) && 2546 tx->stall != tx->sched) { 2547 mutex_enter(&ss->tx.lock); 2548 tx->sched = tx->stall; 2549 mutex_exit(&ss->tx.lock); 2550 mac_tx_ring_update(ss->mgp->mh, tx->rh); 2551 } 2552 2553 /* limit potential for livelock */ 2554 if (unlikely(++limit > 2 * tx->mask)) 2555 break; 2556 } 2557 if (tx->req == tx->done && tx->stop != NULL) { 2558 /* 2559 * Nic has sent all pending requests, allow him 2560 * to stop polling this queue 2561 */ 2562 mutex_enter(&tx->lock); 2563 if (tx->req == tx->done && tx->active) { 2564 *(int *)(void *)tx->stop = 1; 2565 tx->active = 0; 2566 mb(); 2567 } 2568 mutex_exit(&tx->lock); 2569 } 2570 if (handles.head != NULL) 2571 myri10ge_free_tx_handles(tx, &handles); 2572 } 2573 2574 static void 2575 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl) 2576 { 2577 mbl->head = NULL; 2578 mbl->tail = &mbl->head; 2579 mbl->cnt = 0; 2580 } 2581 2582 /*ARGSUSED*/ 2583 void 2584 myri10ge_mbl_append(struct myri10ge_slice_state *ss, 2585 struct myri10ge_mblk_list *mbl, mblk_t *mp) 2586 { 2587 *(mbl->tail) = mp; 2588 mbl->tail = &mp->b_next; 2589 mp->b_next = NULL; 2590 mbl->cnt++; 2591 } 2592 2593 2594 static inline void 2595 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, 2596 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop) 2597 { 2598 myri10ge_rx_done_t *rx_done = &ss->rx_done; 2599 struct myri10ge_priv *mgp = ss->mgp; 2600 mblk_t *mp; 2601 struct lro_entry *lro; 2602 uint16_t length; 2603 uint16_t checksum; 2604 2605 2606 while (rx_done->entry[rx_done->idx].length != 0) { 2607 if (unlikely (*stop)) { 2608 break; 2609 } 2610 length = ntohs(rx_done->entry[rx_done->idx].length); 2611 length &= (~MXGEFW_RSS_HASH_MASK); 2612 2613 /* limit potential for livelock */ 2614 limit -= length; 2615 if (unlikely(limit < 0)) 2616 break; 2617 2618 rx_done->entry[rx_done->idx].length = 0; 2619 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2620 if (length <= myri10ge_small_bytes) 2621 mp = myri10ge_rx_done_small(ss, length, checksum); 2622 else 2623 mp = myri10ge_rx_done_big(ss, length, checksum); 2624 if (mp != NULL) { 2625 if (!myri10ge_lro || 2626 0 != myri10ge_lro_rx(ss, mp, checksum, mbl)) 2627 myri10ge_mbl_append(ss, mbl, mp); 2628 } 2629 rx_done->cnt++; 2630 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1); 2631 } 2632 while (ss->lro_active != NULL) { 2633 lro = ss->lro_active; 2634 ss->lro_active = lro->next; 2635 myri10ge_lro_flush(ss, lro, mbl); 2636 } 2637 } 2638 2639 static void 2640 myri10ge_intr_rx(struct myri10ge_slice_state *ss) 2641 { 2642 uint64_t gen; 2643 struct myri10ge_mblk_list mbl; 2644 2645 myri10ge_mbl_init(&mbl); 2646 if (mutex_tryenter(&ss->rx_lock) == 0) 2647 return; 2648 gen = ss->rx_gen_num; 2649 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL, 2650 &ss->rx_polling); 2651 if (mbl.head != NULL) 2652 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen); 2653 mutex_exit(&ss->rx_lock); 2654 2655 } 2656 2657 static mblk_t * 2658 myri10ge_poll_rx(void *arg, int bytes) 2659 { 2660 struct myri10ge_slice_state *ss = arg; 2661 struct myri10ge_mblk_list mbl; 2662 boolean_t dummy = B_FALSE; 2663 2664 if (bytes == 0) 2665 return (NULL); 2666 2667 myri10ge_mbl_init(&mbl); 2668 mutex_enter(&ss->rx_lock); 2669 if (ss->rx_polling) 2670 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy); 2671 else 2672 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss - 2673 ss->mgp->ss), ss->rx_token, ss->rx_polling); 2674 mutex_exit(&ss->rx_lock); 2675 return (mbl.head); 2676 } 2677 2678 /*ARGSUSED*/ 2679 static uint_t 2680 myri10ge_intr(caddr_t arg0, caddr_t arg1) 2681 { 2682 struct myri10ge_slice_state *ss = 2683 (struct myri10ge_slice_state *)(void *)arg0; 2684 struct myri10ge_priv *mgp = ss->mgp; 2685 mcp_irq_data_t *stats = ss->fw_stats; 2686 myri10ge_tx_ring_t *tx = &ss->tx; 2687 uint32_t send_done_count; 2688 uint8_t valid; 2689 2690 2691 /* make sure the DMA has finished */ 2692 if (!stats->valid) { 2693 return (DDI_INTR_UNCLAIMED); 2694 } 2695 valid = stats->valid; 2696 2697 /* low bit indicates receives are present */ 2698 if (valid & 1) 2699 myri10ge_intr_rx(ss); 2700 2701 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 2702 /* lower legacy IRQ */ 2703 *mgp->irq_deassert = 0; 2704 if (!myri10ge_deassert_wait) 2705 /* don't wait for conf. that irq is low */ 2706 stats->valid = 0; 2707 mb(); 2708 } else { 2709 /* no need to wait for conf. that irq is low */ 2710 stats->valid = 0; 2711 } 2712 2713 do { 2714 /* check for transmit completes and receives */ 2715 send_done_count = ntohl(stats->send_done_count); 2716 if (send_done_count != tx->pkt_done) 2717 myri10ge_tx_done(ss, (int)send_done_count); 2718 } while (*((volatile uint8_t *) &stats->valid)); 2719 2720 if (stats->stats_updated) { 2721 if (mgp->link_state != stats->link_up || stats->link_down) { 2722 mgp->link_state = stats->link_up; 2723 if (stats->link_down) { 2724 mgp->down_cnt += stats->link_down; 2725 mgp->link_state = 0; 2726 } 2727 if (mgp->link_state) { 2728 if (myri10ge_verbose) 2729 printf("%s: link up\n", mgp->name); 2730 mac_link_update(mgp->mh, LINK_STATE_UP); 2731 } else { 2732 if (myri10ge_verbose) 2733 printf("%s: link down\n", mgp->name); 2734 mac_link_update(mgp->mh, LINK_STATE_DOWN); 2735 } 2736 MYRI10GE_NIC_STAT_INC(link_changes); 2737 } 2738 if (mgp->rdma_tags_available != 2739 ntohl(ss->fw_stats->rdma_tags_available)) { 2740 mgp->rdma_tags_available = 2741 ntohl(ss->fw_stats->rdma_tags_available); 2742 cmn_err(CE_NOTE, "%s: RDMA timed out! " 2743 "%d tags left\n", mgp->name, 2744 mgp->rdma_tags_available); 2745 } 2746 } 2747 2748 mb(); 2749 /* check to see if we have rx token to pass back */ 2750 if (valid & 0x1) { 2751 mutex_enter(&ss->poll_lock); 2752 if (ss->rx_polling) { 2753 ss->rx_token = 1; 2754 } else { 2755 *ss->irq_claim = BE_32(3); 2756 ss->rx_token = 0; 2757 } 2758 mutex_exit(&ss->poll_lock); 2759 } 2760 *(ss->irq_claim + 1) = BE_32(3); 2761 return (DDI_INTR_CLAIMED); 2762 } 2763 2764 /* 2765 * Add or remove a multicast address. This is called with our 2766 * macinfo's lock held by GLD, so we do not need to worry about 2767 * our own locking here. 2768 */ 2769 static int 2770 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr) 2771 { 2772 myri10ge_cmd_t cmd; 2773 struct myri10ge_priv *mgp = arg; 2774 int status, join_leave; 2775 2776 if (add) 2777 join_leave = MXGEFW_JOIN_MULTICAST_GROUP; 2778 else 2779 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP; 2780 (void) memcpy(&cmd.data0, multicastaddr, 4); 2781 (void) memcpy(&cmd.data1, multicastaddr + 4, 2); 2782 cmd.data0 = htonl(cmd.data0); 2783 cmd.data1 = htonl(cmd.data1); 2784 status = myri10ge_send_cmd(mgp, join_leave, &cmd); 2785 if (status == 0) 2786 return (0); 2787 2788 cmn_err(CE_WARN, "%s: failed to set multicast address\n", 2789 mgp->name); 2790 return (status); 2791 } 2792 2793 2794 static int 2795 myri10ge_m_promisc(void *arg, boolean_t on) 2796 { 2797 struct myri10ge_priv *mgp = arg; 2798 2799 myri10ge_change_promisc(mgp, on); 2800 return (0); 2801 } 2802 2803 /* 2804 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2805 * backwards one at a time and handle ring wraps 2806 */ 2807 2808 static inline void 2809 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx, 2810 mcp_kreq_ether_send_t *src, int cnt) 2811 { 2812 int idx, starting_slot; 2813 starting_slot = tx->req; 2814 while (cnt > 1) { 2815 cnt--; 2816 idx = (starting_slot + cnt) & tx->mask; 2817 myri10ge_pio_copy(&tx->lanai[idx], 2818 &src[cnt], sizeof (*src)); 2819 mb(); 2820 } 2821 } 2822 2823 /* 2824 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2825 * at most 32 bytes at a time, so as to avoid involving the software 2826 * pio handler in the nic. We re-write the first segment's flags 2827 * to mark them valid only after writing the entire chain 2828 */ 2829 2830 static inline void 2831 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 2832 int cnt) 2833 { 2834 int idx, i; 2835 uint32_t *src_ints, *dst_ints; 2836 mcp_kreq_ether_send_t *srcp, *dstp, *dst; 2837 uint8_t last_flags; 2838 2839 idx = tx->req & tx->mask; 2840 2841 last_flags = src->flags; 2842 src->flags = 0; 2843 mb(); 2844 dst = dstp = &tx->lanai[idx]; 2845 srcp = src; 2846 2847 if ((idx + cnt) < tx->mask) { 2848 for (i = 0; i < (cnt - 1); i += 2) { 2849 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src)); 2850 mb(); /* force write every 32 bytes */ 2851 srcp += 2; 2852 dstp += 2; 2853 } 2854 } else { 2855 /* 2856 * submit all but the first request, and ensure 2857 * that it is submitted below 2858 */ 2859 myri10ge_submit_req_backwards(tx, src, cnt); 2860 i = 0; 2861 } 2862 if (i < cnt) { 2863 /* submit the first request */ 2864 myri10ge_pio_copy(dstp, srcp, sizeof (*src)); 2865 mb(); /* barrier before setting valid flag */ 2866 } 2867 2868 /* re-write the last 32-bits with the valid flags */ 2869 src->flags |= last_flags; 2870 src_ints = (uint32_t *)src; 2871 src_ints += 3; 2872 dst_ints = (uint32_t *)dst; 2873 dst_ints += 3; 2874 *dst_ints = *src_ints; 2875 tx->req += cnt; 2876 mb(); 2877 /* notify NIC to poll this tx ring */ 2878 if (!tx->active && tx->go != NULL) { 2879 *(int *)(void *)tx->go = 1; 2880 tx->active = 1; 2881 tx->activate++; 2882 mb(); 2883 } 2884 } 2885 2886 /* ARGSUSED */ 2887 static inline void 2888 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 2889 { 2890 uint32_t lso_flag; 2891 mac_lso_get(mp, mss, &lso_flag); 2892 (*flags) |= lso_flag; 2893 } 2894 2895 2896 /* like pullupmsg, except preserve hcksum/LSO attributes */ 2897 static int 2898 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) 2899 { 2900 uint32_t start, stuff, tx_offload_flags, mss; 2901 int ok; 2902 2903 mss = 0; 2904 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 2905 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 2906 2907 ok = pullupmsg(mp, -1); 2908 if (!ok) { 2909 printf("pullupmsg failed"); 2910 return (DDI_FAILURE); 2911 } 2912 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); 2913 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); 2914 if (tx_offload_flags & HW_LSO) 2915 DB_LSOMSS(mp) = (uint16_t)mss; 2916 lso_info_set(mp, mss, tx_offload_flags); 2917 return (DDI_SUCCESS); 2918 } 2919 2920 static inline void 2921 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh, 2922 int opackets, int obytes) 2923 { 2924 s->un.all = 0; 2925 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2926 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2927 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2928 s->un.s.brdcstxmt = 1; 2929 else 2930 s->un.s.multixmt = 1; 2931 } 2932 s->un.s.opackets = (uint16_t)opackets; 2933 s->un.s.obytes = obytes; 2934 } 2935 2936 static int 2937 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 2938 mcp_kreq_ether_send_t *req) 2939 { 2940 myri10ge_tx_ring_t *tx = &ss->tx; 2941 caddr_t ptr; 2942 struct myri10ge_tx_copybuf *cp; 2943 mblk_t *bp; 2944 int idx, mblen, avail; 2945 uint16_t len; 2946 2947 mutex_enter(&tx->lock); 2948 avail = tx->mask - (tx->req - tx->done); 2949 if (avail <= 1) { 2950 mutex_exit(&tx->lock); 2951 return (EBUSY); 2952 } 2953 idx = tx->req & tx->mask; 2954 cp = &tx->cp[idx]; 2955 ptr = cp->va; 2956 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) { 2957 mblen = MBLKL(bp); 2958 bcopy(bp->b_rptr, ptr, mblen); 2959 ptr += mblen; 2960 len += mblen; 2961 } 2962 /* ensure runts are padded to 60 bytes */ 2963 if (len < 60) { 2964 bzero(ptr, 64 - len); 2965 len = 60; 2966 } 2967 req->addr_low = cp->dma.low; 2968 req->addr_high = cp->dma.high; 2969 req->length = htons(len); 2970 req->pad = 0; 2971 req->rdma_count = 1; 2972 myri10ge_tx_stat(&tx->info[idx].stat, 2973 (struct ether_header *)(void *)cp->va, 1, len); 2974 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV); 2975 myri10ge_submit_req(&ss->tx, req, 1); 2976 mutex_exit(&tx->lock); 2977 freemsg(mp); 2978 return (DDI_SUCCESS); 2979 } 2980 2981 2982 static void 2983 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list, 2984 struct myri10ge_tx_buffer_state *tx_info, 2985 int count) 2986 { 2987 int i, idx; 2988 2989 idx = 0; /* gcc -Wuninitialized */ 2990 /* store unmapping and bp info for tx irq handler */ 2991 for (i = 0; i < count; i++) { 2992 idx = (tx->req + i) & tx->mask; 2993 tx->info[idx].m = tx_info[i].m; 2994 tx->info[idx].handle = tx_info[i].handle; 2995 } 2996 tx->info[idx].stat.un.all = tx_info[0].stat.un.all; 2997 2998 /* submit the frame to the nic */ 2999 myri10ge_submit_req(tx, req_list, count); 3000 3001 3002 } 3003 3004 3005 3006 static void 3007 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf) 3008 { 3009 mblk_t *bp; 3010 int seglen; 3011 uint_t count; 3012 3013 bp = mp; 3014 3015 while (off > 0) { 3016 seglen = MBLKL(bp); 3017 if (off < seglen) 3018 break; 3019 off -= seglen; 3020 bp = bp->b_cont; 3021 } 3022 while (len > 0) { 3023 seglen = MBLKL(bp); 3024 count = min(seglen - off, len); 3025 bcopy(bp->b_rptr + off, buf, count); 3026 len -= count; 3027 buf += count; 3028 off = 0; 3029 bp = bp->b_cont; 3030 } 3031 } 3032 3033 static int 3034 myri10ge_ether_parse_header(mblk_t *mp) 3035 { 3036 struct ether_header eh_copy; 3037 struct ether_header *eh; 3038 int eth_hdr_len, seglen; 3039 3040 seglen = MBLKL(mp); 3041 eth_hdr_len = sizeof (*eh); 3042 if (seglen < eth_hdr_len) { 3043 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy); 3044 eh = &eh_copy; 3045 } else { 3046 eh = (struct ether_header *)(void *)mp->b_rptr; 3047 } 3048 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 3049 eth_hdr_len += 4; 3050 } 3051 3052 return (eth_hdr_len); 3053 } 3054 3055 static int 3056 myri10ge_lso_parse_header(mblk_t *mp, int off) 3057 { 3058 char buf[128]; 3059 int seglen, sum_off; 3060 struct ip *ip; 3061 struct tcphdr *tcp; 3062 3063 seglen = MBLKL(mp); 3064 if (seglen < off + sizeof (*ip)) { 3065 myri10ge_copydata(mp, off, sizeof (*ip), buf); 3066 ip = (struct ip *)(void *)buf; 3067 } else { 3068 ip = (struct ip *)(void *)(mp->b_rptr + off); 3069 } 3070 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) { 3071 myri10ge_copydata(mp, off, 3072 (ip->ip_hl << 2) + sizeof (*tcp), buf); 3073 ip = (struct ip *)(void *)buf; 3074 } 3075 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2)); 3076 3077 /* 3078 * NIC expects ip_sum to be zero. Recent changes to 3079 * OpenSolaris leave the correct ip checksum there, rather 3080 * than the required zero, so we need to zero it. Otherwise, 3081 * the NIC will produce bad checksums when sending LSO packets. 3082 */ 3083 if (ip->ip_sum != 0) { 3084 if (((char *)ip) != buf) { 3085 /* ip points into mblk, so just zero it */ 3086 ip->ip_sum = 0; 3087 } else { 3088 /* 3089 * ip points into a copy, so walk the chain 3090 * to find the ip_csum, then zero it 3091 */ 3092 sum_off = off + _PTRDIFF(&ip->ip_sum, buf); 3093 while (sum_off > (int)(MBLKL(mp) - 1)) { 3094 sum_off -= MBLKL(mp); 3095 mp = mp->b_cont; 3096 } 3097 mp->b_rptr[sum_off] = 0; 3098 sum_off++; 3099 while (sum_off > MBLKL(mp) - 1) { 3100 sum_off -= MBLKL(mp); 3101 mp = mp->b_cont; 3102 } 3103 mp->b_rptr[sum_off] = 0; 3104 } 3105 } 3106 return (off + ((ip->ip_hl + tcp->th_off) << 2)); 3107 } 3108 3109 static int 3110 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 3111 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size, 3112 uint16_t mss, uint8_t cksum_offset) 3113 { 3114 myri10ge_tx_ring_t *tx = &ss->tx; 3115 struct myri10ge_priv *mgp = ss->mgp; 3116 mblk_t *bp; 3117 mcp_kreq_ether_send_t *req; 3118 struct myri10ge_tx_copybuf *cp; 3119 caddr_t rptr, ptr; 3120 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp; 3121 int resid, avail, idx, hdr_size_tmp, tx_boundary; 3122 int rdma_count; 3123 uint32_t seglen, len, boundary, low, high_swapped; 3124 uint16_t pseudo_hdr_offset = htons(mss); 3125 uint8_t flags; 3126 3127 tx_boundary = mgp->tx_boundary; 3128 hdr_size_tmp = hdr_size; 3129 resid = tx_boundary; 3130 count = 1; 3131 mutex_enter(&tx->lock); 3132 3133 /* check to see if the slots are really there */ 3134 avail = tx->mask - (tx->req - tx->done); 3135 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) { 3136 atomic_add_32(&tx->stall, 1); 3137 mutex_exit(&tx->lock); 3138 return (EBUSY); 3139 } 3140 3141 /* copy */ 3142 cum_len = -hdr_size; 3143 count = 0; 3144 req = req_list; 3145 idx = tx->mask & tx->req; 3146 cp = &tx->cp[idx]; 3147 low = ntohl(cp->dma.low); 3148 ptr = cp->va; 3149 cp->len = 0; 3150 if (mss) { 3151 int payload = pkt_size - hdr_size; 3152 uint16_t opackets = (payload / mss) + ((payload % mss) != 0); 3153 tx->info[idx].ostat.opackets = opackets; 3154 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size 3155 + pkt_size; 3156 } 3157 hdr_size_tmp = hdr_size; 3158 mss_resid = mss; 3159 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3160 tx_req = tx->req; 3161 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3162 mblen = MBLKL(bp); 3163 rptr = (caddr_t)bp->b_rptr; 3164 len = min(hdr_size_tmp, mblen); 3165 if (len) { 3166 bcopy(rptr, ptr, len); 3167 rptr += len; 3168 ptr += len; 3169 resid -= len; 3170 mblen -= len; 3171 hdr_size_tmp -= len; 3172 cp->len += len; 3173 if (hdr_size_tmp) 3174 continue; 3175 if (resid < mss) { 3176 tx_req++; 3177 idx = tx->mask & tx_req; 3178 cp = &tx->cp[idx]; 3179 low = ntohl(cp->dma.low); 3180 ptr = cp->va; 3181 resid = tx_boundary; 3182 } 3183 } 3184 while (mblen) { 3185 len = min(mss_resid, mblen); 3186 bcopy(rptr, ptr, len); 3187 mss_resid -= len; 3188 resid -= len; 3189 mblen -= len; 3190 rptr += len; 3191 ptr += len; 3192 cp->len += len; 3193 if (mss_resid == 0) { 3194 mss_resid = mss; 3195 if (resid < mss) { 3196 tx_req++; 3197 idx = tx->mask & tx_req; 3198 cp = &tx->cp[idx]; 3199 cp->len = 0; 3200 low = ntohl(cp->dma.low); 3201 ptr = cp->va; 3202 resid = tx_boundary; 3203 } 3204 } 3205 } 3206 } 3207 3208 req = req_list; 3209 pkt_size_tmp = pkt_size; 3210 count = 0; 3211 rdma_count = 0; 3212 tx_req = tx->req; 3213 while (pkt_size_tmp) { 3214 idx = tx->mask & tx_req; 3215 cp = &tx->cp[idx]; 3216 high_swapped = cp->dma.high; 3217 low = ntohl(cp->dma.low); 3218 len = cp->len; 3219 if (len == 0) { 3220 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n", 3221 pkt_size_tmp, pkt_size); 3222 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3223 mblen = MBLKL(bp); 3224 printf("mblen:%d\n", mblen); 3225 } 3226 pkt_size_tmp = pkt_size; 3227 tx_req = tx->req; 3228 while (pkt_size_tmp > 0) { 3229 idx = tx->mask & tx_req; 3230 cp = &tx->cp[idx]; 3231 printf("cp->len = %d\n", cp->len); 3232 pkt_size_tmp -= cp->len; 3233 tx_req++; 3234 } 3235 printf("dropped\n"); 3236 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3237 goto done; 3238 } 3239 pkt_size_tmp -= len; 3240 while (len) { 3241 while (len) { 3242 uint8_t flags_next; 3243 int cum_len_next; 3244 3245 boundary = (low + mgp->tx_boundary) & 3246 ~(mgp->tx_boundary - 1); 3247 seglen = boundary - low; 3248 if (seglen > len) 3249 seglen = len; 3250 3251 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3252 cum_len_next = cum_len + seglen; 3253 (req-rdma_count)->rdma_count = rdma_count + 1; 3254 if (likely(cum_len >= 0)) { 3255 /* payload */ 3256 int next_is_first, chop; 3257 3258 chop = (cum_len_next > mss); 3259 cum_len_next = cum_len_next % mss; 3260 next_is_first = (cum_len_next == 0); 3261 flags |= chop * 3262 MXGEFW_FLAGS_TSO_CHOP; 3263 flags_next |= next_is_first * 3264 MXGEFW_FLAGS_FIRST; 3265 rdma_count |= -(chop | next_is_first); 3266 rdma_count += chop & !next_is_first; 3267 } else if (likely(cum_len_next >= 0)) { 3268 /* header ends */ 3269 int small; 3270 3271 rdma_count = -1; 3272 cum_len_next = 0; 3273 seglen = -cum_len; 3274 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 3275 flags_next = MXGEFW_FLAGS_TSO_PLD | 3276 MXGEFW_FLAGS_FIRST | 3277 (small * MXGEFW_FLAGS_SMALL); 3278 } 3279 req->addr_high = high_swapped; 3280 req->addr_low = htonl(low); 3281 req->pseudo_hdr_offset = pseudo_hdr_offset; 3282 req->pad = 0; /* complete solid 16-byte block */ 3283 req->rdma_count = 1; 3284 req->cksum_offset = cksum_offset; 3285 req->length = htons(seglen); 3286 req->flags = flags | ((cum_len & 1) * 3287 MXGEFW_FLAGS_ALIGN_ODD); 3288 if (cksum_offset > seglen) 3289 cksum_offset -= seglen; 3290 else 3291 cksum_offset = 0; 3292 low += seglen; 3293 len -= seglen; 3294 cum_len = cum_len_next; 3295 req++; 3296 req->flags = 0; 3297 flags = flags_next; 3298 count++; 3299 rdma_count++; 3300 } 3301 } 3302 tx_req++; 3303 } 3304 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3305 do { 3306 req--; 3307 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3308 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3309 MXGEFW_FLAGS_FIRST))); 3310 3311 myri10ge_submit_req(tx, req_list, count); 3312 done: 3313 mutex_exit(&tx->lock); 3314 freemsg(mp); 3315 return (DDI_SUCCESS); 3316 } 3317 3318 /* 3319 * Try to send the chain of buffers described by the mp. We must not 3320 * encapsulate more than eth->tx.req - eth->tx.done, or 3321 * MXGEFW_MAX_SEND_DESC, whichever is more. 3322 */ 3323 3324 static int 3325 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, 3326 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info) 3327 { 3328 struct myri10ge_priv *mgp = ss->mgp; 3329 myri10ge_tx_ring_t *tx = &ss->tx; 3330 mcp_kreq_ether_send_t *req; 3331 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL; 3332 mblk_t *bp; 3333 ddi_dma_cookie_t cookie; 3334 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen, 3335 rdma_count, cum_len, lso_hdr_size; 3336 uint32_t start, stuff, tx_offload_flags; 3337 uint32_t seglen, len, mss, boundary, low, high_swapped; 3338 uint_t ncookies; 3339 uint16_t pseudo_hdr_offset; 3340 uint8_t flags, cksum_offset, odd_flag; 3341 int pkt_size; 3342 int lso_copy = myri10ge_lso_copy; 3343 try_pullup = 1; 3344 3345 again: 3346 /* Setup checksum offloading, if needed */ 3347 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 3348 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 3349 if (tx_offload_flags & HW_LSO) { 3350 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3351 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) { 3352 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags); 3353 freemsg(mp); 3354 return (DDI_SUCCESS); 3355 } 3356 } else { 3357 max_segs = MXGEFW_MAX_SEND_DESC; 3358 mss = 0; 3359 } 3360 req = req_list; 3361 cksum_offset = 0; 3362 pseudo_hdr_offset = 0; 3363 3364 /* leave an extra slot keep the ring from wrapping */ 3365 avail = tx->mask - (tx->req - tx->done); 3366 3367 /* 3368 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length 3369 * message will need to be pulled up in order to fit. 3370 * Otherwise, we are low on transmit descriptors, it is 3371 * probably better to stall and try again rather than pullup a 3372 * message to fit. 3373 */ 3374 3375 if (avail < max_segs) { 3376 err = EBUSY; 3377 atomic_add_32(&tx->stall_early, 1); 3378 goto stall; 3379 } 3380 3381 /* find out how long the frame is and how many segments it is */ 3382 count = 0; 3383 odd_flag = 0; 3384 pkt_size = 0; 3385 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 3386 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3387 dblk_t *dbp; 3388 mblen = MBLKL(bp); 3389 if (mblen == 0) { 3390 /* 3391 * we can't simply skip over 0-length mblks 3392 * because the hardware can't deal with them, 3393 * and we could leak them. 3394 */ 3395 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len); 3396 err = EIO; 3397 goto pullup; 3398 } 3399 /* 3400 * There's no advantage to copying most gesballoc 3401 * attached blocks, so disable lso copy in that case 3402 */ 3403 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) { 3404 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) { 3405 lso_copy = 0; 3406 } 3407 } 3408 pkt_size += mblen; 3409 count++; 3410 } 3411 3412 /* Try to pull up excessivly long chains */ 3413 if (count >= max_segs) { 3414 err = myri10ge_pullup(ss, mp); 3415 if (likely(err == DDI_SUCCESS)) { 3416 count = 1; 3417 } else { 3418 if (count < MYRI10GE_MAX_SEND_DESC_TSO) { 3419 /* 3420 * just let the h/w send it, it will be 3421 * inefficient, but us better than dropping 3422 */ 3423 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3424 } else { 3425 /* drop it */ 3426 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3427 freemsg(mp); 3428 return (0); 3429 } 3430 } 3431 } 3432 3433 cum_len = 0; 3434 maclen = myri10ge_ether_parse_header(mp); 3435 3436 if (tx_offload_flags & HCK_PARTIALCKSUM) { 3437 3438 cksum_offset = start + maclen; 3439 pseudo_hdr_offset = htons(stuff + maclen); 3440 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 3441 flags |= MXGEFW_FLAGS_CKSUM; 3442 } 3443 3444 lso_hdr_size = 0; /* -Wunitinialized */ 3445 if (mss) { /* LSO */ 3446 /* this removes any CKSUM flag from before */ 3447 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3448 /* 3449 * parse the headers and set cum_len to a negative 3450 * value to reflect the offset of the TCP payload 3451 */ 3452 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen); 3453 cum_len = -lso_hdr_size; 3454 if ((mss < mgp->tx_boundary) && lso_copy) { 3455 err = myri10ge_tx_tso_copy(ss, mp, req_list, 3456 lso_hdr_size, pkt_size, mss, cksum_offset); 3457 return (err); 3458 } 3459 3460 /* 3461 * for TSO, pseudo_hdr_offset holds mss. The firmware 3462 * figures out where to put the checksum by parsing 3463 * the header. 3464 */ 3465 3466 pseudo_hdr_offset = htons(mss); 3467 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) { 3468 flags |= MXGEFW_FLAGS_SMALL; 3469 if (pkt_size < myri10ge_tx_copylen) { 3470 req->cksum_offset = cksum_offset; 3471 req->pseudo_hdr_offset = pseudo_hdr_offset; 3472 req->flags = flags; 3473 err = myri10ge_tx_copy(ss, mp, req); 3474 return (err); 3475 } 3476 cum_len = 0; 3477 } 3478 3479 /* pull one DMA handle for each bp from our freelist */ 3480 handles = NULL; 3481 err = myri10ge_alloc_tx_handles(ss, count, &handles); 3482 if (err != DDI_SUCCESS) { 3483 err = DDI_FAILURE; 3484 goto stall; 3485 } 3486 count = 0; 3487 rdma_count = 0; 3488 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3489 mblen = MBLKL(bp); 3490 dma_handle = handles; 3491 handles = handles->next; 3492 3493 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL, 3494 (caddr_t)bp->b_rptr, mblen, 3495 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL, 3496 &cookie, &ncookies); 3497 if (unlikely(rv != DDI_DMA_MAPPED)) { 3498 err = EIO; 3499 try_pullup = 0; 3500 dma_handle->next = handles; 3501 handles = dma_handle; 3502 goto abort_with_handles; 3503 } 3504 3505 /* reserve the slot */ 3506 tx_info[count].m = bp; 3507 tx_info[count].handle = dma_handle; 3508 3509 for (; ; ) { 3510 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 3511 high_swapped = 3512 htonl(MYRI10GE_HIGHPART_TO_U32( 3513 cookie.dmac_laddress)); 3514 len = (uint32_t)cookie.dmac_size; 3515 while (len) { 3516 uint8_t flags_next; 3517 int cum_len_next; 3518 3519 boundary = (low + mgp->tx_boundary) & 3520 ~(mgp->tx_boundary - 1); 3521 seglen = boundary - low; 3522 if (seglen > len) 3523 seglen = len; 3524 3525 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3526 cum_len_next = cum_len + seglen; 3527 if (mss) { 3528 (req-rdma_count)->rdma_count = 3529 rdma_count + 1; 3530 if (likely(cum_len >= 0)) { 3531 /* payload */ 3532 int next_is_first, chop; 3533 3534 chop = (cum_len_next > mss); 3535 cum_len_next = 3536 cum_len_next % mss; 3537 next_is_first = 3538 (cum_len_next == 0); 3539 flags |= chop * 3540 MXGEFW_FLAGS_TSO_CHOP; 3541 flags_next |= next_is_first * 3542 MXGEFW_FLAGS_FIRST; 3543 rdma_count |= 3544 -(chop | next_is_first); 3545 rdma_count += 3546 chop & !next_is_first; 3547 } else if (likely(cum_len_next >= 0)) { 3548 /* header ends */ 3549 int small; 3550 3551 rdma_count = -1; 3552 cum_len_next = 0; 3553 seglen = -cum_len; 3554 small = (mss <= 3555 MXGEFW_SEND_SMALL_SIZE); 3556 flags_next = 3557 MXGEFW_FLAGS_TSO_PLD 3558 | MXGEFW_FLAGS_FIRST 3559 | (small * 3560 MXGEFW_FLAGS_SMALL); 3561 } 3562 } 3563 req->addr_high = high_swapped; 3564 req->addr_low = htonl(low); 3565 req->pseudo_hdr_offset = pseudo_hdr_offset; 3566 req->pad = 0; /* complete solid 16-byte block */ 3567 req->rdma_count = 1; 3568 req->cksum_offset = cksum_offset; 3569 req->length = htons(seglen); 3570 req->flags = flags | ((cum_len & 1) * odd_flag); 3571 if (cksum_offset > seglen) 3572 cksum_offset -= seglen; 3573 else 3574 cksum_offset = 0; 3575 low += seglen; 3576 len -= seglen; 3577 cum_len = cum_len_next; 3578 count++; 3579 rdma_count++; 3580 /* make sure all the segments will fit */ 3581 if (unlikely(count >= max_segs)) { 3582 MYRI10GE_ATOMIC_SLICE_STAT_INC( 3583 xmit_lowbuf); 3584 /* may try a pullup */ 3585 err = EBUSY; 3586 if (try_pullup) 3587 try_pullup = 2; 3588 goto abort_with_handles; 3589 } 3590 req++; 3591 req->flags = 0; 3592 flags = flags_next; 3593 tx_info[count].m = 0; 3594 } 3595 ncookies--; 3596 if (ncookies == 0) 3597 break; 3598 ddi_dma_nextcookie(dma_handle->h, &cookie); 3599 } 3600 } 3601 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3602 3603 if (mss) { 3604 do { 3605 req--; 3606 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3607 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3608 MXGEFW_FLAGS_FIRST))); 3609 } 3610 3611 /* calculate tx stats */ 3612 if (mss) { 3613 uint16_t opackets; 3614 int payload; 3615 3616 payload = pkt_size - lso_hdr_size; 3617 opackets = (payload / mss) + ((payload % mss) != 0); 3618 tx_info[0].stat.un.all = 0; 3619 tx_info[0].ostat.opackets = opackets; 3620 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size 3621 + pkt_size; 3622 } else { 3623 myri10ge_tx_stat(&tx_info[0].stat, 3624 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size); 3625 } 3626 mutex_enter(&tx->lock); 3627 3628 /* check to see if the slots are really there */ 3629 avail = tx->mask - (tx->req - tx->done); 3630 if (unlikely(avail <= count)) { 3631 mutex_exit(&tx->lock); 3632 err = 0; 3633 goto late_stall; 3634 } 3635 3636 myri10ge_send_locked(tx, req_list, tx_info, count); 3637 mutex_exit(&tx->lock); 3638 return (DDI_SUCCESS); 3639 3640 late_stall: 3641 try_pullup = 0; 3642 atomic_add_32(&tx->stall_late, 1); 3643 3644 abort_with_handles: 3645 /* unbind and free handles from previous mblks */ 3646 for (i = 0; i < count; i++) { 3647 bp = tx_info[i].m; 3648 tx_info[i].m = 0; 3649 if (bp) { 3650 dma_handle = tx_info[i].handle; 3651 (void) ddi_dma_unbind_handle(dma_handle->h); 3652 dma_handle->next = handles; 3653 handles = dma_handle; 3654 tx_info[i].handle = NULL; 3655 tx_info[i].m = NULL; 3656 } 3657 } 3658 myri10ge_free_tx_handle_slist(tx, handles); 3659 pullup: 3660 if (try_pullup) { 3661 err = myri10ge_pullup(ss, mp); 3662 if (err != DDI_SUCCESS && try_pullup == 2) { 3663 /* drop */ 3664 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3665 freemsg(mp); 3666 return (0); 3667 } 3668 try_pullup = 0; 3669 goto again; 3670 } 3671 3672 stall: 3673 if (err != 0) { 3674 if (err == EBUSY) { 3675 atomic_add_32(&tx->stall, 1); 3676 } else { 3677 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3678 } 3679 } 3680 return (err); 3681 } 3682 3683 static mblk_t * 3684 myri10ge_send_wrapper(void *arg, mblk_t *mp) 3685 { 3686 struct myri10ge_slice_state *ss = arg; 3687 int err = 0; 3688 mcp_kreq_ether_send_t *req_list; 3689 #if defined(__i386) 3690 /* 3691 * We need about 2.5KB of scratch space to handle transmits. 3692 * i86pc has only 8KB of kernel stack space, so we malloc the 3693 * scratch space there rather than keeping it on the stack. 3694 */ 3695 size_t req_size, tx_info_size; 3696 struct myri10ge_tx_buffer_state *tx_info; 3697 caddr_t req_bytes; 3698 3699 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3700 + 8; 3701 req_bytes = kmem_alloc(req_size, KM_SLEEP); 3702 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1); 3703 tx_info = kmem_alloc(tx_info_size, KM_SLEEP); 3704 #else 3705 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3706 + 8]; 3707 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1]; 3708 #endif 3709 3710 /* ensure req_list entries are aligned to 8 bytes */ 3711 req_list = (struct mcp_kreq_ether_send *) 3712 (((unsigned long)req_bytes + 7UL) & ~7UL); 3713 3714 err = myri10ge_send(ss, mp, req_list, tx_info); 3715 3716 #if defined(__i386) 3717 kmem_free(tx_info, tx_info_size); 3718 kmem_free(req_bytes, req_size); 3719 #endif 3720 if (err) 3721 return (mp); 3722 else 3723 return (NULL); 3724 } 3725 3726 static int 3727 myri10ge_addmac(void *arg, const uint8_t *mac_addr) 3728 { 3729 struct myri10ge_priv *mgp = arg; 3730 int err; 3731 3732 if (mac_addr == NULL) 3733 return (EINVAL); 3734 3735 mutex_enter(&mgp->intrlock); 3736 if (mgp->macaddr_cnt) { 3737 mutex_exit(&mgp->intrlock); 3738 return (ENOSPC); 3739 } 3740 err = myri10ge_m_unicst(mgp, mac_addr); 3741 if (!err) 3742 mgp->macaddr_cnt++; 3743 3744 mutex_exit(&mgp->intrlock); 3745 if (err) 3746 return (err); 3747 3748 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr)); 3749 return (0); 3750 } 3751 3752 /*ARGSUSED*/ 3753 static int 3754 myri10ge_remmac(void *arg, const uint8_t *mac_addr) 3755 { 3756 struct myri10ge_priv *mgp = arg; 3757 3758 mutex_enter(&mgp->intrlock); 3759 mgp->macaddr_cnt--; 3760 mutex_exit(&mgp->intrlock); 3761 3762 return (0); 3763 } 3764 3765 /*ARGSUSED*/ 3766 static void 3767 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index, 3768 mac_group_info_t *infop, mac_group_handle_t gh) 3769 { 3770 struct myri10ge_priv *mgp = arg; 3771 3772 if (rtype != MAC_RING_TYPE_RX) 3773 return; 3774 3775 infop->mgi_driver = (mac_group_driver_t)mgp; 3776 infop->mgi_start = NULL; 3777 infop->mgi_stop = NULL; 3778 infop->mgi_addmac = myri10ge_addmac; 3779 infop->mgi_remmac = myri10ge_remmac; 3780 infop->mgi_count = mgp->num_slices; 3781 } 3782 3783 static int 3784 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) 3785 { 3786 struct myri10ge_slice_state *ss; 3787 3788 ss = (struct myri10ge_slice_state *)rh; 3789 mutex_enter(&ss->rx_lock); 3790 ss->rx_gen_num = mr_gen_num; 3791 mutex_exit(&ss->rx_lock); 3792 return (0); 3793 } 3794 3795 /* 3796 * Retrieve a value for one of the statistics for a particular rx ring 3797 */ 3798 int 3799 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3800 { 3801 struct myri10ge_slice_state *ss; 3802 3803 ss = (struct myri10ge_slice_state *)rh; 3804 switch (stat) { 3805 case MAC_STAT_RBYTES: 3806 *val = ss->rx_stats.ibytes; 3807 break; 3808 3809 case MAC_STAT_IPACKETS: 3810 *val = ss->rx_stats.ipackets; 3811 break; 3812 3813 default: 3814 *val = 0; 3815 return (ENOTSUP); 3816 } 3817 3818 return (0); 3819 } 3820 3821 /* 3822 * Retrieve a value for one of the statistics for a particular tx ring 3823 */ 3824 int 3825 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3826 { 3827 struct myri10ge_slice_state *ss; 3828 3829 ss = (struct myri10ge_slice_state *)rh; 3830 switch (stat) { 3831 case MAC_STAT_OBYTES: 3832 *val = ss->tx.stats.obytes; 3833 break; 3834 3835 case MAC_STAT_OPACKETS: 3836 *val = ss->tx.stats.opackets; 3837 break; 3838 3839 default: 3840 *val = 0; 3841 return (ENOTSUP); 3842 } 3843 3844 return (0); 3845 } 3846 3847 static int 3848 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) 3849 { 3850 struct myri10ge_slice_state *ss; 3851 3852 ss = (struct myri10ge_slice_state *)intrh; 3853 mutex_enter(&ss->poll_lock); 3854 ss->rx_polling = B_TRUE; 3855 mutex_exit(&ss->poll_lock); 3856 return (0); 3857 } 3858 3859 static int 3860 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh) 3861 { 3862 struct myri10ge_slice_state *ss; 3863 3864 ss = (struct myri10ge_slice_state *)intrh; 3865 mutex_enter(&ss->poll_lock); 3866 ss->rx_polling = B_FALSE; 3867 if (ss->rx_token) { 3868 *ss->irq_claim = BE_32(3); 3869 ss->rx_token = 0; 3870 } 3871 mutex_exit(&ss->poll_lock); 3872 return (0); 3873 } 3874 3875 /*ARGSUSED*/ 3876 static void 3877 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 3878 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 3879 { 3880 struct myri10ge_priv *mgp = arg; 3881 struct myri10ge_slice_state *ss; 3882 mac_intr_t *mintr = &infop->mri_intr; 3883 3884 ASSERT((unsigned int)ring_index < mgp->num_slices); 3885 3886 ss = &mgp->ss[ring_index]; 3887 switch (rtype) { 3888 case MAC_RING_TYPE_RX: 3889 ss->rx_rh = rh; 3890 infop->mri_driver = (mac_ring_driver_t)ss; 3891 infop->mri_start = myri10ge_ring_start; 3892 infop->mri_stop = NULL; 3893 infop->mri_poll = myri10ge_poll_rx; 3894 infop->mri_stat = myri10ge_rx_ring_stat; 3895 mintr->mi_handle = (mac_intr_handle_t)ss; 3896 mintr->mi_enable = myri10ge_rx_ring_intr_enable; 3897 mintr->mi_disable = myri10ge_rx_ring_intr_disable; 3898 break; 3899 case MAC_RING_TYPE_TX: 3900 ss->tx.rh = rh; 3901 infop->mri_driver = (mac_ring_driver_t)ss; 3902 infop->mri_start = NULL; 3903 infop->mri_stop = NULL; 3904 infop->mri_tx = myri10ge_send_wrapper; 3905 infop->mri_stat = myri10ge_tx_ring_stat; 3906 break; 3907 default: 3908 break; 3909 } 3910 } 3911 3912 static void 3913 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp) 3914 { 3915 if (mgp->ksp_stat == NULL) 3916 return; 3917 3918 kstat_delete(mgp->ksp_stat); 3919 mgp->ksp_stat = NULL; 3920 } 3921 3922 static void 3923 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss) 3924 { 3925 if (ss->ksp_stat == NULL) 3926 return; 3927 3928 kstat_delete(ss->ksp_stat); 3929 ss->ksp_stat = NULL; 3930 } 3931 3932 static void 3933 myri10ge_info_destroy(struct myri10ge_priv *mgp) 3934 { 3935 if (mgp->ksp_info == NULL) 3936 return; 3937 3938 kstat_delete(mgp->ksp_info); 3939 mgp->ksp_info = NULL; 3940 } 3941 3942 static int 3943 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw) 3944 { 3945 struct myri10ge_nic_stat *ethstat; 3946 struct myri10ge_priv *mgp; 3947 mcp_irq_data_t *fw_stats; 3948 3949 3950 if (rw == KSTAT_WRITE) 3951 return (EACCES); 3952 3953 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data; 3954 mgp = (struct myri10ge_priv *)ksp->ks_private; 3955 fw_stats = mgp->ss[0].fw_stats; 3956 3957 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma; 3958 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma; 3959 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma; 3960 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) 3961 ethstat->dma_force_physical.value.ul = 1; 3962 else 3963 ethstat->dma_force_physical.value.ul = 0; 3964 ethstat->lanes.value.ul = mgp->pcie_link_width; 3965 ethstat->dropped_bad_crc32.value.ul = 3966 ntohl(fw_stats->dropped_bad_crc32); 3967 ethstat->dropped_bad_phy.value.ul = 3968 ntohl(fw_stats->dropped_bad_phy); 3969 ethstat->dropped_link_error_or_filtered.value.ul = 3970 ntohl(fw_stats->dropped_link_error_or_filtered); 3971 ethstat->dropped_link_overflow.value.ul = 3972 ntohl(fw_stats->dropped_link_overflow); 3973 ethstat->dropped_multicast_filtered.value.ul = 3974 ntohl(fw_stats->dropped_multicast_filtered); 3975 ethstat->dropped_no_big_buffer.value.ul = 3976 ntohl(fw_stats->dropped_no_big_buffer); 3977 ethstat->dropped_no_small_buffer.value.ul = 3978 ntohl(fw_stats->dropped_no_small_buffer); 3979 ethstat->dropped_overrun.value.ul = 3980 ntohl(fw_stats->dropped_overrun); 3981 ethstat->dropped_pause.value.ul = 3982 ntohl(fw_stats->dropped_pause); 3983 ethstat->dropped_runt.value.ul = 3984 ntohl(fw_stats->dropped_runt); 3985 ethstat->link_up.value.ul = 3986 ntohl(fw_stats->link_up); 3987 ethstat->dropped_unicast_filtered.value.ul = 3988 ntohl(fw_stats->dropped_unicast_filtered); 3989 return (0); 3990 } 3991 3992 static int 3993 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw) 3994 { 3995 struct myri10ge_slice_stat *ethstat; 3996 struct myri10ge_slice_state *ss; 3997 3998 if (rw == KSTAT_WRITE) 3999 return (EACCES); 4000 4001 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data; 4002 ss = (struct myri10ge_slice_state *)ksp->ks_private; 4003 4004 ethstat->rx_big.value.ul = ss->j_rx_cnt; 4005 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt; 4006 ethstat->rx_bigbuf_pool.value.ul = 4007 ss->jpool.num_alloc - ss->jbufs_for_smalls; 4008 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls; 4009 ethstat->rx_small.value.ul = ss->rx_small.cnt - 4010 (ss->rx_small.mask + 1); 4011 ethstat->tx_done.value.ul = ss->tx.done; 4012 ethstat->tx_req.value.ul = ss->tx.req; 4013 ethstat->tx_activate.value.ul = ss->tx.activate; 4014 ethstat->xmit_sched.value.ul = ss->tx.sched; 4015 ethstat->xmit_stall.value.ul = ss->tx.stall; 4016 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early; 4017 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late; 4018 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err); 4019 return (0); 4020 } 4021 4022 static int 4023 myri10ge_info_kstat_update(kstat_t *ksp, int rw) 4024 { 4025 struct myri10ge_info *info; 4026 struct myri10ge_priv *mgp; 4027 4028 4029 if (rw == KSTAT_WRITE) 4030 return (EACCES); 4031 4032 info = (struct myri10ge_info *)ksp->ks_data; 4033 mgp = (struct myri10ge_priv *)ksp->ks_private; 4034 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR); 4035 kstat_named_setstr(&info->firmware_version, mgp->fw_version); 4036 kstat_named_setstr(&info->firmware_name, mgp->fw_name); 4037 kstat_named_setstr(&info->interrupt_type, mgp->intr_type); 4038 kstat_named_setstr(&info->product_code, mgp->pc_str); 4039 kstat_named_setstr(&info->serial_number, mgp->sn_str); 4040 return (0); 4041 } 4042 4043 static struct myri10ge_info myri10ge_info_template = { 4044 { "driver_version", KSTAT_DATA_STRING }, 4045 { "firmware_version", KSTAT_DATA_STRING }, 4046 { "firmware_name", KSTAT_DATA_STRING }, 4047 { "interrupt_type", KSTAT_DATA_STRING }, 4048 { "product_code", KSTAT_DATA_STRING }, 4049 { "serial_number", KSTAT_DATA_STRING }, 4050 }; 4051 static kmutex_t myri10ge_info_template_lock; 4052 4053 4054 static int 4055 myri10ge_info_init(struct myri10ge_priv *mgp) 4056 { 4057 struct kstat *ksp; 4058 4059 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4060 "myri10ge_info", "net", KSTAT_TYPE_NAMED, 4061 sizeof (myri10ge_info_template) / 4062 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 4063 if (ksp == NULL) { 4064 cmn_err(CE_WARN, 4065 "%s: myri10ge_info_init: kstat_create failed", mgp->name); 4066 return (DDI_FAILURE); 4067 } 4068 mgp->ksp_info = ksp; 4069 ksp->ks_update = myri10ge_info_kstat_update; 4070 ksp->ks_private = (void *) mgp; 4071 ksp->ks_data = &myri10ge_info_template; 4072 ksp->ks_lock = &myri10ge_info_template_lock; 4073 if (MYRI10GE_VERSION_STR != NULL) 4074 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1; 4075 if (mgp->fw_version != NULL) 4076 ksp->ks_data_size += strlen(mgp->fw_version) + 1; 4077 ksp->ks_data_size += strlen(mgp->fw_name) + 1; 4078 ksp->ks_data_size += strlen(mgp->intr_type) + 1; 4079 if (mgp->pc_str != NULL) 4080 ksp->ks_data_size += strlen(mgp->pc_str) + 1; 4081 if (mgp->sn_str != NULL) 4082 ksp->ks_data_size += strlen(mgp->sn_str) + 1; 4083 4084 kstat_install(ksp); 4085 return (DDI_SUCCESS); 4086 } 4087 4088 4089 static int 4090 myri10ge_nic_stat_init(struct myri10ge_priv *mgp) 4091 { 4092 struct kstat *ksp; 4093 struct myri10ge_nic_stat *ethstat; 4094 4095 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4096 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED, 4097 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4098 if (ksp == NULL) { 4099 cmn_err(CE_WARN, 4100 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4101 return (DDI_FAILURE); 4102 } 4103 mgp->ksp_stat = ksp; 4104 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data); 4105 4106 kstat_named_init(ðstat->dma_read_bw_MBs, 4107 "dma_read_bw_MBs", KSTAT_DATA_ULONG); 4108 kstat_named_init(ðstat->dma_write_bw_MBs, 4109 "dma_write_bw_MBs", KSTAT_DATA_ULONG); 4110 kstat_named_init(ðstat->dma_read_write_bw_MBs, 4111 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG); 4112 kstat_named_init(ðstat->dma_force_physical, 4113 "dma_force_physical", KSTAT_DATA_ULONG); 4114 kstat_named_init(ðstat->lanes, 4115 "lanes", KSTAT_DATA_ULONG); 4116 kstat_named_init(ðstat->dropped_bad_crc32, 4117 "dropped_bad_crc32", KSTAT_DATA_ULONG); 4118 kstat_named_init(ðstat->dropped_bad_phy, 4119 "dropped_bad_phy", KSTAT_DATA_ULONG); 4120 kstat_named_init(ðstat->dropped_link_error_or_filtered, 4121 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG); 4122 kstat_named_init(ðstat->dropped_link_overflow, 4123 "dropped_link_overflow", KSTAT_DATA_ULONG); 4124 kstat_named_init(ðstat->dropped_multicast_filtered, 4125 "dropped_multicast_filtered", KSTAT_DATA_ULONG); 4126 kstat_named_init(ðstat->dropped_no_big_buffer, 4127 "dropped_no_big_buffer", KSTAT_DATA_ULONG); 4128 kstat_named_init(ðstat->dropped_no_small_buffer, 4129 "dropped_no_small_buffer", KSTAT_DATA_ULONG); 4130 kstat_named_init(ðstat->dropped_overrun, 4131 "dropped_overrun", KSTAT_DATA_ULONG); 4132 kstat_named_init(ðstat->dropped_pause, 4133 "dropped_pause", KSTAT_DATA_ULONG); 4134 kstat_named_init(ðstat->dropped_runt, 4135 "dropped_runt", KSTAT_DATA_ULONG); 4136 kstat_named_init(ðstat->dropped_unicast_filtered, 4137 "dropped_unicast_filtered", KSTAT_DATA_ULONG); 4138 kstat_named_init(ðstat->dropped_runt, "dropped_runt", 4139 KSTAT_DATA_ULONG); 4140 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG); 4141 kstat_named_init(ðstat->link_changes, "link_changes", 4142 KSTAT_DATA_ULONG); 4143 ksp->ks_update = myri10ge_nic_stat_kstat_update; 4144 ksp->ks_private = (void *) mgp; 4145 kstat_install(ksp); 4146 return (DDI_SUCCESS); 4147 } 4148 4149 static int 4150 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss) 4151 { 4152 struct myri10ge_priv *mgp = ss->mgp; 4153 struct kstat *ksp; 4154 struct myri10ge_slice_stat *ethstat; 4155 int instance; 4156 4157 /* 4158 * fake an instance so that the same slice numbers from 4159 * different instances do not collide 4160 */ 4161 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss); 4162 ksp = kstat_create("myri10ge", instance, 4163 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED, 4164 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4165 if (ksp == NULL) { 4166 cmn_err(CE_WARN, 4167 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4168 return (DDI_FAILURE); 4169 } 4170 ss->ksp_stat = ksp; 4171 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data); 4172 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum", 4173 KSTAT_DATA_ULONG); 4174 kstat_named_init(ðstat->lro_flushed, "lro_flushed", 4175 KSTAT_DATA_ULONG); 4176 kstat_named_init(ðstat->lro_queued, "lro_queued", 4177 KSTAT_DATA_ULONG); 4178 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware", 4179 KSTAT_DATA_ULONG); 4180 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool", 4181 KSTAT_DATA_ULONG); 4182 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls", 4183 KSTAT_DATA_ULONG); 4184 kstat_named_init(ðstat->rx_copy, "rx_copy", 4185 KSTAT_DATA_ULONG); 4186 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf", 4187 KSTAT_DATA_ULONG); 4188 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf", 4189 KSTAT_DATA_ULONG); 4190 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len", 4191 KSTAT_DATA_ULONG); 4192 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup", 4193 KSTAT_DATA_ULONG); 4194 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first", 4195 KSTAT_DATA_ULONG); 4196 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf", 4197 KSTAT_DATA_ULONG); 4198 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags", 4199 KSTAT_DATA_ULONG); 4200 kstat_named_init(ðstat->xmit_sched, "xmit_sched", 4201 KSTAT_DATA_ULONG); 4202 kstat_named_init(ðstat->xmit_stall, "xmit_stall", 4203 KSTAT_DATA_ULONG); 4204 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early", 4205 KSTAT_DATA_ULONG); 4206 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late", 4207 KSTAT_DATA_ULONG); 4208 kstat_named_init(ðstat->xmit_err, "xmit_err", 4209 KSTAT_DATA_ULONG); 4210 kstat_named_init(ðstat->tx_req, "tx_req", 4211 KSTAT_DATA_ULONG); 4212 kstat_named_init(ðstat->tx_activate, "tx_activate", 4213 KSTAT_DATA_ULONG); 4214 kstat_named_init(ðstat->tx_done, "tx_done", 4215 KSTAT_DATA_ULONG); 4216 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced", 4217 KSTAT_DATA_ULONG); 4218 kstat_named_init(ðstat->rx_big, "rx_big", 4219 KSTAT_DATA_ULONG); 4220 kstat_named_init(ðstat->rx_small, "rx_small", 4221 KSTAT_DATA_ULONG); 4222 ksp->ks_update = myri10ge_slice_stat_kstat_update; 4223 ksp->ks_private = (void *) ss; 4224 kstat_install(ksp); 4225 return (DDI_SUCCESS); 4226 } 4227 4228 4229 4230 #if #cpu(i386) || defined __i386 || defined i386 || \ 4231 defined __i386__ || #cpu(x86_64) || defined __x86_64__ 4232 4233 #include <vm/hat.h> 4234 #include <sys/ddi_isa.h> 4235 void *device_arena_alloc(size_t size, int vm_flag); 4236 void device_arena_free(void *vaddr, size_t size); 4237 4238 static void 4239 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4240 { 4241 dev_info_t *parent_dip; 4242 ddi_acc_handle_t handle; 4243 unsigned long bus_number, dev_number, func_number; 4244 unsigned long cfg_pa, paddr, base, pgoffset; 4245 char *cvaddr, *ptr; 4246 uint32_t *ptr32; 4247 int retval = DDI_FAILURE; 4248 int dontcare; 4249 uint16_t read_vid, read_did, vendor_id, device_id; 4250 4251 if (!myri10ge_nvidia_ecrc_enable) 4252 return; 4253 4254 parent_dip = ddi_get_parent(mgp->dip); 4255 if (parent_dip == NULL) { 4256 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name); 4257 return; 4258 } 4259 4260 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) { 4261 cmn_err(CE_WARN, 4262 "%s: Could not access my parent's registers", mgp->name); 4263 return; 4264 } 4265 4266 vendor_id = pci_config_get16(handle, PCI_CONF_VENID); 4267 device_id = pci_config_get16(handle, PCI_CONF_DEVID); 4268 pci_config_teardown(&handle); 4269 4270 if (myri10ge_verbose) { 4271 unsigned long bus_number, dev_number, func_number; 4272 int reg_set, span; 4273 (void) myri10ge_reg_set(parent_dip, ®_set, &span, 4274 &bus_number, &dev_number, &func_number); 4275 if (myri10ge_verbose) 4276 printf("%s: parent at %ld:%ld:%ld\n", mgp->name, 4277 bus_number, dev_number, func_number); 4278 } 4279 4280 if (vendor_id != 0x10de) 4281 return; 4282 4283 if (device_id != 0x005d /* CK804 */ && 4284 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) { 4285 return; 4286 } 4287 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare, 4288 &bus_number, &dev_number, &func_number); 4289 4290 for (cfg_pa = 0xf0000000UL; 4291 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL; 4292 cfg_pa -= 0x10000000UL) { 4293 /* find the config space address for the nvidia bridge */ 4294 paddr = (cfg_pa + bus_number * 0x00100000UL + 4295 (dev_number * 8 + func_number) * 0x00001000UL); 4296 4297 base = paddr & (~MMU_PAGEOFFSET); 4298 pgoffset = paddr & MMU_PAGEOFFSET; 4299 4300 /* map it into the kernel */ 4301 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP); 4302 if (cvaddr == NULL) 4303 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n", 4304 mgp->name); 4305 4306 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1), 4307 i_ddi_paddr_to_pfn(base), 4308 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK); 4309 4310 ptr = cvaddr + pgoffset; 4311 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID); 4312 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID); 4313 if (vendor_id == read_did || device_id == read_did) { 4314 ptr32 = (uint32_t *)(void *)(ptr + 0x178); 4315 if (myri10ge_verbose) 4316 printf("%s: Enabling ECRC on upstream " 4317 "Nvidia bridge (0x%x:0x%x) " 4318 "at %ld:%ld:%ld\n", mgp->name, 4319 read_vid, read_did, bus_number, 4320 dev_number, func_number); 4321 *ptr32 |= 0x40; 4322 retval = DDI_SUCCESS; 4323 } 4324 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK); 4325 device_arena_free(cvaddr, ptob(1)); 4326 } 4327 } 4328 4329 #else 4330 /*ARGSUSED*/ 4331 static void 4332 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4333 { 4334 } 4335 #endif /* i386 */ 4336 4337 4338 /* 4339 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 4340 * when the PCI-E Completion packets are aligned on an 8-byte 4341 * boundary. Some PCI-E chip sets always align Completion packets; on 4342 * the ones that do not, the alignment can be enforced by enabling 4343 * ECRC generation (if supported). 4344 * 4345 * When PCI-E Completion packets are not aligned, it is actually more 4346 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 4347 * 4348 * If the driver can neither enable ECRC nor verify that it has 4349 * already been enabled, then it must use a firmware image which works 4350 * around unaligned completion packets (ethp_z8e.dat), and it should 4351 * also ensure that it never gives the device a Read-DMA which is 4352 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 4353 * enabled, then the driver should use the aligned (eth_z8e.dat) 4354 * firmware image, and set tx.boundary to 4KB. 4355 */ 4356 4357 4358 static int 4359 myri10ge_firmware_probe(struct myri10ge_priv *mgp) 4360 { 4361 int status; 4362 4363 mgp->tx_boundary = 4096; 4364 /* 4365 * Verify the max read request size was set to 4KB 4366 * before trying the test with 4KB. 4367 */ 4368 if (mgp->max_read_request_4k == 0) 4369 mgp->tx_boundary = 2048; 4370 /* 4371 * load the optimized firmware which assumes aligned PCIe 4372 * completions in order to see if it works on this host. 4373 */ 4374 4375 mgp->fw_name = "rss_eth_z8e"; 4376 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4377 mgp->eth_z8e_length = rss_eth_z8e_length; 4378 4379 status = myri10ge_load_firmware(mgp); 4380 if (status != 0) { 4381 return (status); 4382 } 4383 /* 4384 * Enable ECRC if possible 4385 */ 4386 myri10ge_enable_nvidia_ecrc(mgp); 4387 4388 /* 4389 * Run a DMA test which watches for unaligned completions and 4390 * aborts on the first one seen. 4391 */ 4392 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 4393 if (status == 0) 4394 return (0); /* keep the aligned firmware */ 4395 4396 if (status != E2BIG) 4397 cmn_err(CE_WARN, "%s: DMA test failed: %d\n", 4398 mgp->name, status); 4399 if (status == ENOSYS) 4400 cmn_err(CE_WARN, "%s: Falling back to ethp! " 4401 "Please install up to date fw\n", mgp->name); 4402 return (status); 4403 } 4404 4405 static int 4406 myri10ge_select_firmware(struct myri10ge_priv *mgp) 4407 { 4408 int aligned; 4409 4410 aligned = 0; 4411 4412 if (myri10ge_force_firmware == 1) { 4413 if (myri10ge_verbose) 4414 printf("%s: Assuming aligned completions (forced)\n", 4415 mgp->name); 4416 aligned = 1; 4417 goto done; 4418 } 4419 4420 if (myri10ge_force_firmware == 2) { 4421 if (myri10ge_verbose) 4422 printf("%s: Assuming unaligned completions (forced)\n", 4423 mgp->name); 4424 aligned = 0; 4425 goto done; 4426 } 4427 4428 /* If the width is less than 8, we may used the aligned firmware */ 4429 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) { 4430 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n", 4431 mgp->name, mgp->pcie_link_width); 4432 aligned = 1; 4433 goto done; 4434 } 4435 4436 if (0 == myri10ge_firmware_probe(mgp)) 4437 return (0); /* keep optimized firmware */ 4438 4439 done: 4440 if (aligned) { 4441 mgp->fw_name = "rss_eth_z8e"; 4442 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4443 mgp->eth_z8e_length = rss_eth_z8e_length; 4444 mgp->tx_boundary = 4096; 4445 } else { 4446 mgp->fw_name = "rss_ethp_z8e"; 4447 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e; 4448 mgp->eth_z8e_length = rss_ethp_z8e_length; 4449 mgp->tx_boundary = 2048; 4450 } 4451 4452 return (myri10ge_load_firmware(mgp)); 4453 } 4454 4455 static int 4456 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler) 4457 { 4458 dev_info_t *devinfo = mgp->dip; 4459 int count, avail, actual, intr_types; 4460 int x, y, rc, inum = 0; 4461 4462 4463 rc = ddi_intr_get_supported_types(devinfo, &intr_types); 4464 if (rc != DDI_SUCCESS) { 4465 cmn_err(CE_WARN, 4466 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name, 4467 rc); 4468 return (DDI_FAILURE); 4469 } 4470 4471 if (!myri10ge_use_msi) 4472 intr_types &= ~DDI_INTR_TYPE_MSI; 4473 if (!myri10ge_use_msix) 4474 intr_types &= ~DDI_INTR_TYPE_MSIX; 4475 4476 if (intr_types & DDI_INTR_TYPE_MSIX) { 4477 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX; 4478 mgp->intr_type = "MSI-X"; 4479 } else if (intr_types & DDI_INTR_TYPE_MSI) { 4480 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI; 4481 mgp->intr_type = "MSI"; 4482 } else { 4483 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED; 4484 mgp->intr_type = "Legacy"; 4485 } 4486 /* Get number of interrupts */ 4487 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count); 4488 if ((rc != DDI_SUCCESS) || (count == 0)) { 4489 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, " 4490 "count: %d", mgp->name, rc, count); 4491 4492 return (DDI_FAILURE); 4493 } 4494 4495 /* Get number of available interrupts */ 4496 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail); 4497 if ((rc != DDI_SUCCESS) || (avail == 0)) { 4498 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, " 4499 "rc: %d, avail: %d\n", mgp->name, rc, avail); 4500 return (DDI_FAILURE); 4501 } 4502 if (avail < count) { 4503 cmn_err(CE_NOTE, 4504 "!%s: nintrs() returned %d, navail returned %d", 4505 mgp->name, count, avail); 4506 count = avail; 4507 } 4508 4509 if (count < mgp->num_slices) 4510 return (DDI_FAILURE); 4511 4512 if (count > mgp->num_slices) 4513 count = mgp->num_slices; 4514 4515 /* Allocate memory for MSI interrupts */ 4516 mgp->intr_size = count * sizeof (ddi_intr_handle_t); 4517 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP); 4518 4519 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum, 4520 count, &actual, DDI_INTR_ALLOC_NORMAL); 4521 4522 if ((rc != DDI_SUCCESS) || (actual == 0)) { 4523 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d", 4524 mgp->name, rc); 4525 4526 kmem_free(mgp->htable, mgp->intr_size); 4527 mgp->htable = NULL; 4528 return (DDI_FAILURE); 4529 } 4530 4531 if ((actual < count) && myri10ge_verbose) { 4532 cmn_err(CE_NOTE, "%s: got %d/%d slices", 4533 mgp->name, actual, count); 4534 } 4535 4536 mgp->intr_cnt = actual; 4537 4538 /* 4539 * Get priority for first irq, assume remaining are all the same 4540 */ 4541 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri) 4542 != DDI_SUCCESS) { 4543 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name); 4544 4545 /* Free already allocated intr */ 4546 for (y = 0; y < actual; y++) { 4547 (void) ddi_intr_free(mgp->htable[y]); 4548 } 4549 4550 kmem_free(mgp->htable, mgp->intr_size); 4551 mgp->htable = NULL; 4552 return (DDI_FAILURE); 4553 } 4554 4555 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri; 4556 4557 if (!add_handler) 4558 return (DDI_SUCCESS); 4559 4560 /* Call ddi_intr_add_handler() */ 4561 for (x = 0; x < actual; x++) { 4562 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr, 4563 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) { 4564 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed", 4565 mgp->name); 4566 4567 /* Free already allocated intr */ 4568 for (y = 0; y < actual; y++) { 4569 (void) ddi_intr_free(mgp->htable[y]); 4570 } 4571 4572 kmem_free(mgp->htable, mgp->intr_size); 4573 mgp->htable = NULL; 4574 return (DDI_FAILURE); 4575 } 4576 } 4577 4578 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap); 4579 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4580 /* Call ddi_intr_block_enable() for MSI */ 4581 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt); 4582 } else { 4583 /* Call ddi_intr_enable() for MSI non block enable */ 4584 for (x = 0; x < mgp->intr_cnt; x++) { 4585 (void) ddi_intr_enable(mgp->htable[x]); 4586 } 4587 } 4588 4589 return (DDI_SUCCESS); 4590 } 4591 4592 static void 4593 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed) 4594 { 4595 int x, err; 4596 4597 /* Disable all interrupts */ 4598 if (handler_installed) { 4599 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4600 /* Call ddi_intr_block_disable() */ 4601 (void) ddi_intr_block_disable(mgp->htable, 4602 mgp->intr_cnt); 4603 } else { 4604 for (x = 0; x < mgp->intr_cnt; x++) { 4605 (void) ddi_intr_disable(mgp->htable[x]); 4606 } 4607 } 4608 } 4609 4610 for (x = 0; x < mgp->intr_cnt; x++) { 4611 if (handler_installed) { 4612 /* Call ddi_intr_remove_handler() */ 4613 err = ddi_intr_remove_handler(mgp->htable[x]); 4614 if (err != DDI_SUCCESS) { 4615 cmn_err(CE_WARN, 4616 "%s: ddi_intr_remove_handler for" 4617 "vec %d returned %d\n", mgp->name, 4618 x, err); 4619 } 4620 } 4621 err = ddi_intr_free(mgp->htable[x]); 4622 if (err != DDI_SUCCESS) { 4623 cmn_err(CE_WARN, 4624 "%s: ddi_intr_free for vec %d returned %d\n", 4625 mgp->name, x, err); 4626 } 4627 } 4628 kmem_free(mgp->htable, mgp->intr_size); 4629 mgp->htable = NULL; 4630 } 4631 4632 static void 4633 myri10ge_test_physical(dev_info_t *dip) 4634 { 4635 ddi_dma_handle_t handle; 4636 struct myri10ge_dma_stuff dma; 4637 void *addr; 4638 int err; 4639 4640 /* test #1, sufficient for older sparc systems */ 4641 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 4642 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr, 4643 DDI_DMA_DONTWAIT, NULL, &handle); 4644 if (err == DDI_DMA_BADATTR) 4645 goto fail; 4646 ddi_dma_free_handle(&handle); 4647 4648 /* test #2, required on Olympis where the bind is what fails */ 4649 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr, 4650 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 4651 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT); 4652 if (addr == NULL) 4653 goto fail; 4654 myri10ge_dma_free(&dma); 4655 return; 4656 4657 fail: 4658 if (myri10ge_verbose) 4659 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, " 4660 "using IOMMU\n", ddi_get_instance(dip)); 4661 4662 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL; 4663 } 4664 4665 static void 4666 myri10ge_get_props(dev_info_t *dip) 4667 { 4668 4669 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4670 "myri10ge_flow_control", myri10ge_flow_control); 4671 4672 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4673 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay); 4674 4675 #if #cpu(i386) || defined __i386 || defined i386 || \ 4676 defined __i386__ || #cpu(x86_64) || defined __x86_64__ 4677 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4678 "myri10ge_nvidia_ecrc_enable", 1); 4679 #endif 4680 4681 4682 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4683 "myri10ge_use_msi", myri10ge_use_msi); 4684 4685 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4686 "myri10ge_deassert_wait", myri10ge_deassert_wait); 4687 4688 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4689 "myri10ge_verbose", myri10ge_verbose); 4690 4691 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4692 "myri10ge_tx_copylen", myri10ge_tx_copylen); 4693 4694 if (myri10ge_tx_copylen < 60) { 4695 cmn_err(CE_WARN, 4696 "myri10ge_tx_copylen must be >= 60 bytes\n"); 4697 myri10ge_tx_copylen = 60; 4698 } 4699 4700 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4701 "myri10ge_mtu_override", myri10ge_mtu_override); 4702 4703 if (myri10ge_mtu_override >= 1500 && myri10ge_mtu_override <= 9000) 4704 myri10ge_mtu = myri10ge_mtu_override + 4705 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ; 4706 else if (myri10ge_mtu_override != 0) { 4707 cmn_err(CE_WARN, 4708 "myri10ge_mtu_override must be between 1500 and " 4709 "9000 bytes\n"); 4710 } 4711 4712 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4713 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial); 4714 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4715 "myri10ge_bigbufs_max", myri10ge_bigbufs_max); 4716 4717 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4718 "myri10ge_watchdog_reset", myri10ge_watchdog_reset); 4719 4720 if (myri10ge_bigbufs_initial < 128) { 4721 cmn_err(CE_WARN, 4722 "myri10ge_bigbufs_initial be at least 128\n"); 4723 myri10ge_bigbufs_initial = 128; 4724 } 4725 if (myri10ge_bigbufs_max < 128) { 4726 cmn_err(CE_WARN, 4727 "myri10ge_bigbufs_max be at least 128\n"); 4728 myri10ge_bigbufs_max = 128; 4729 } 4730 4731 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) { 4732 cmn_err(CE_WARN, 4733 "myri10ge_bigbufs_max must be >= " 4734 "myri10ge_bigbufs_initial\n"); 4735 myri10ge_bigbufs_max = myri10ge_bigbufs_initial; 4736 } 4737 4738 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4739 "myri10ge_force_firmware", myri10ge_force_firmware); 4740 4741 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4742 "myri10ge_max_slices", myri10ge_max_slices); 4743 4744 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4745 "myri10ge_use_msix", myri10ge_use_msix); 4746 4747 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4748 "myri10ge_rss_hash", myri10ge_rss_hash); 4749 4750 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX || 4751 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) { 4752 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n", 4753 myri10ge_rss_hash); 4754 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4755 } 4756 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4757 "myri10ge_lro", myri10ge_lro); 4758 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4759 "myri10ge_lro_cnt", myri10ge_lro_cnt); 4760 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4761 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr); 4762 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4763 "myri10ge_tx_hash", myri10ge_tx_hash); 4764 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4765 "myri10ge_use_lso", myri10ge_use_lso); 4766 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4767 "myri10ge_lso_copy", myri10ge_lso_copy); 4768 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4769 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial); 4770 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4771 "myri10ge_small_bytes", myri10ge_small_bytes); 4772 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) { 4773 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n", 4774 myri10ge_small_bytes); 4775 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n"); 4776 myri10ge_small_bytes += 128; 4777 myri10ge_small_bytes &= ~(128 -1); 4778 myri10ge_small_bytes -= MXGEFW_PAD; 4779 cmn_err(CE_WARN, "rounded up to %d\n", 4780 myri10ge_small_bytes); 4781 4782 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4783 } 4784 } 4785 4786 #ifndef PCI_EXP_LNKSTA 4787 #define PCI_EXP_LNKSTA 18 4788 #endif 4789 4790 static int 4791 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid) 4792 { 4793 uint16_t status; 4794 uint8_t ptr; 4795 4796 /* check to see if we have capabilities */ 4797 status = pci_config_get16(handle, PCI_CONF_STAT); 4798 if (!(status & PCI_STAT_CAP)) { 4799 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n"); 4800 return (ENXIO); 4801 } 4802 4803 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR); 4804 4805 /* Walk the capabilities list, looking for a PCI Express cap */ 4806 while (ptr != PCI_CAP_NEXT_PTR_NULL) { 4807 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid) 4808 break; 4809 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR); 4810 } 4811 if (ptr < 64) { 4812 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr); 4813 return (ENXIO); 4814 } 4815 *capptr = ptr; 4816 return (0); 4817 } 4818 4819 static int 4820 myri10ge_set_max_readreq(ddi_acc_handle_t handle) 4821 { 4822 int err; 4823 uint16_t val; 4824 uint8_t ptr; 4825 4826 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4827 if (err != 0) { 4828 cmn_err(CE_WARN, "could not find PCIe cap\n"); 4829 return (ENXIO); 4830 } 4831 4832 /* set max read req to 4096 */ 4833 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4834 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) | 4835 PCIE_DEVCTL_MAX_READ_REQ_4096; 4836 pci_config_put16(handle, ptr + PCIE_DEVCTL, val); 4837 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4838 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) != 4839 PCIE_DEVCTL_MAX_READ_REQ_4096) { 4840 cmn_err(CE_WARN, "could not set max read req (%x)\n", val); 4841 return (EINVAL); 4842 } 4843 return (0); 4844 } 4845 4846 static int 4847 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link) 4848 { 4849 int err; 4850 uint16_t val; 4851 uint8_t ptr; 4852 4853 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4854 if (err != 0) { 4855 cmn_err(CE_WARN, "could not set max read req\n"); 4856 return (ENXIO); 4857 } 4858 4859 /* read link width */ 4860 val = pci_config_get16(handle, ptr + PCIE_LINKSTS); 4861 val &= PCIE_LINKSTS_NEG_WIDTH_MASK; 4862 *link = (val >> 4); 4863 return (0); 4864 } 4865 4866 static int 4867 myri10ge_reset_nic(struct myri10ge_priv *mgp) 4868 { 4869 ddi_acc_handle_t handle = mgp->cfg_hdl; 4870 uint32_t reboot; 4871 uint16_t cmd; 4872 int err; 4873 4874 cmd = pci_config_get16(handle, PCI_CONF_COMM); 4875 if ((cmd & PCI_COMM_ME) == 0) { 4876 /* 4877 * Bus master DMA disabled? Check to see if the card 4878 * rebooted due to a parity error For now, just report 4879 * it 4880 */ 4881 4882 /* enter read32 mode */ 4883 pci_config_put8(handle, mgp->vso + 0x10, 0x3); 4884 /* read REBOOT_STATUS (0xfffffff0) */ 4885 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0); 4886 reboot = pci_config_get16(handle, mgp->vso + 0x14); 4887 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot); 4888 return (0); 4889 } 4890 if (!myri10ge_watchdog_reset) { 4891 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name); 4892 return (1); 4893 } 4894 4895 myri10ge_stop_locked(mgp); 4896 err = myri10ge_start_locked(mgp); 4897 if (err == DDI_FAILURE) { 4898 return (0); 4899 } 4900 mac_tx_update(mgp->mh); 4901 return (1); 4902 } 4903 4904 static inline int 4905 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx) 4906 { 4907 if (tx->sched != tx->stall && 4908 tx->done == tx->watchdog_done && 4909 tx->watchdog_req != tx->watchdog_done) 4910 return (1); 4911 return (0); 4912 } 4913 4914 static void 4915 myri10ge_watchdog(void *arg) 4916 { 4917 struct myri10ge_priv *mgp; 4918 struct myri10ge_slice_state *ss; 4919 myri10ge_tx_ring_t *tx; 4920 int nic_ok = 1; 4921 int slices_stalled, rx_pause, i; 4922 int add_rx; 4923 4924 mgp = arg; 4925 mutex_enter(&mgp->intrlock); 4926 if (mgp->running != MYRI10GE_ETH_RUNNING) { 4927 cmn_err(CE_WARN, 4928 "%s not running, not rearming watchdog (%d)\n", 4929 mgp->name, mgp->running); 4930 mutex_exit(&mgp->intrlock); 4931 return; 4932 } 4933 4934 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause); 4935 4936 /* 4937 * make sure nic is stalled before we reset the nic, so as to 4938 * ensure we don't rip the transmit data structures out from 4939 * under a pending transmit 4940 */ 4941 4942 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) { 4943 tx = &mgp->ss[i].tx; 4944 slices_stalled = myri10ge_ring_stalled(tx); 4945 if (slices_stalled) 4946 break; 4947 } 4948 4949 if (slices_stalled) { 4950 if (mgp->watchdog_rx_pause == rx_pause) { 4951 cmn_err(CE_WARN, 4952 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)", 4953 mgp->name, i, tx->sched, tx->stall, 4954 tx->done, tx->watchdog_done, tx->req, tx->pkt_done, 4955 (int)ntohl(mgp->ss[i].fw_stats->send_done_count)); 4956 nic_ok = myri10ge_reset_nic(mgp); 4957 } else { 4958 cmn_err(CE_WARN, 4959 "%s Flow controlled, check link partner\n", 4960 mgp->name); 4961 } 4962 } 4963 4964 if (!nic_ok) { 4965 cmn_err(CE_WARN, 4966 "%s Nic dead, not rearming watchdog\n", mgp->name); 4967 mutex_exit(&mgp->intrlock); 4968 return; 4969 } 4970 for (i = 0; i < mgp->num_slices; i++) { 4971 ss = &mgp->ss[i]; 4972 tx = &ss->tx; 4973 tx->watchdog_done = tx->done; 4974 tx->watchdog_req = tx->req; 4975 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) { 4976 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy); 4977 add_rx = 4978 min(ss->jpool.num_alloc, 4979 myri10ge_bigbufs_max - 4980 (ss->jpool.num_alloc - 4981 ss->jbufs_for_smalls)); 4982 if (add_rx != 0) { 4983 (void) myri10ge_add_jbufs(ss, add_rx, 0); 4984 /* now feed them to the firmware */ 4985 mutex_enter(&ss->jpool.mtx); 4986 myri10ge_restock_jumbos(ss); 4987 mutex_exit(&ss->jpool.mtx); 4988 } 4989 } 4990 } 4991 mgp->watchdog_rx_pause = rx_pause; 4992 4993 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 4994 mgp->timer_ticks); 4995 mutex_exit(&mgp->intrlock); 4996 } 4997 4998 /*ARGSUSED*/ 4999 static int 5000 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5001 5002 { 5003 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5004 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay); 5005 return (0); 5006 } 5007 5008 /*ARGSUSED*/ 5009 static int 5010 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value, 5011 caddr_t cp, cred_t *credp) 5012 5013 { 5014 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5015 char *end; 5016 size_t new_value; 5017 5018 new_value = mi_strtol(value, &end, 10); 5019 if (end == value) 5020 return (EINVAL); 5021 5022 mutex_enter(&myri10ge_param_lock); 5023 mgp->intr_coal_delay = (int)new_value; 5024 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 5025 mutex_exit(&myri10ge_param_lock); 5026 return (0); 5027 } 5028 5029 /*ARGSUSED*/ 5030 static int 5031 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5032 5033 { 5034 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5035 (void) mi_mpprintf(mp, "%d", mgp->pause); 5036 return (0); 5037 } 5038 5039 /*ARGSUSED*/ 5040 static int 5041 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value, 5042 caddr_t cp, cred_t *credp) 5043 5044 { 5045 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5046 char *end; 5047 size_t new_value; 5048 int err = 0; 5049 5050 new_value = mi_strtol(value, &end, 10); 5051 if (end == value) 5052 return (EINVAL); 5053 if (new_value != 0) 5054 new_value = 1; 5055 5056 mutex_enter(&myri10ge_param_lock); 5057 if (new_value != mgp->pause) 5058 err = myri10ge_change_pause(mgp, new_value); 5059 mutex_exit(&myri10ge_param_lock); 5060 return (err); 5061 } 5062 5063 /*ARGSUSED*/ 5064 static int 5065 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5066 5067 { 5068 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp); 5069 return (0); 5070 } 5071 5072 /*ARGSUSED*/ 5073 static int 5074 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value, 5075 caddr_t cp, cred_t *credp) 5076 5077 { 5078 char *end; 5079 size_t new_value; 5080 5081 new_value = mi_strtol(value, &end, 10); 5082 if (end == value) 5083 return (EINVAL); 5084 *(int *)(void *)cp = new_value; 5085 5086 return (0); 5087 } 5088 5089 static void 5090 myri10ge_ndd_init(struct myri10ge_priv *mgp) 5091 { 5092 mgp->nd_head = NULL; 5093 5094 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay", 5095 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp); 5096 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control", 5097 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp); 5098 (void) nd_load(&mgp->nd_head, "myri10ge_verbose", 5099 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose); 5100 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait", 5101 myri10ge_get_int, myri10ge_set_int, 5102 (caddr_t)&myri10ge_deassert_wait); 5103 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max", 5104 myri10ge_get_int, myri10ge_set_int, 5105 (caddr_t)&myri10ge_bigbufs_max); 5106 (void) nd_load(&mgp->nd_head, "myri10ge_lro", 5107 myri10ge_get_int, myri10ge_set_int, 5108 (caddr_t)&myri10ge_lro); 5109 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr", 5110 myri10ge_get_int, myri10ge_set_int, 5111 (caddr_t)&myri10ge_lro_max_aggr); 5112 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash", 5113 myri10ge_get_int, myri10ge_set_int, 5114 (caddr_t)&myri10ge_tx_hash); 5115 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy", 5116 myri10ge_get_int, myri10ge_set_int, 5117 (caddr_t)&myri10ge_lso_copy); 5118 } 5119 5120 static void 5121 myri10ge_ndd_fini(struct myri10ge_priv *mgp) 5122 { 5123 nd_free(&mgp->nd_head); 5124 } 5125 5126 static void 5127 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) 5128 { 5129 struct iocblk *iocp; 5130 struct myri10ge_priv *mgp = arg; 5131 int cmd, ok, err; 5132 5133 iocp = (struct iocblk *)(void *)mp->b_rptr; 5134 cmd = iocp->ioc_cmd; 5135 5136 ok = 0; 5137 err = 0; 5138 5139 switch (cmd) { 5140 case ND_GET: 5141 case ND_SET: 5142 ok = nd_getset(wq, mgp->nd_head, mp); 5143 break; 5144 default: 5145 break; 5146 } 5147 if (!ok) 5148 err = EINVAL; 5149 else 5150 err = iocp->ioc_error; 5151 5152 if (!err) 5153 miocack(wq, mp, iocp->ioc_count, err); 5154 else 5155 miocnak(wq, mp, 0, err); 5156 } 5157 5158 static struct myri10ge_priv *mgp_list; 5159 5160 struct myri10ge_priv * 5161 myri10ge_get_instance(uint_t unit) 5162 { 5163 struct myri10ge_priv *mgp; 5164 5165 mutex_enter(&myri10ge_param_lock); 5166 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) { 5167 if (unit == ddi_get_instance(mgp->dip)) { 5168 mgp->refcnt++; 5169 break; 5170 } 5171 } 5172 mutex_exit(&myri10ge_param_lock); 5173 return (mgp); 5174 } 5175 5176 void 5177 myri10ge_put_instance(struct myri10ge_priv *mgp) 5178 { 5179 mutex_enter(&myri10ge_param_lock); 5180 mgp->refcnt--; 5181 mutex_exit(&myri10ge_param_lock); 5182 } 5183 5184 static boolean_t 5185 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 5186 { 5187 struct myri10ge_priv *mgp = arg; 5188 uint32_t *cap_hcksum; 5189 mac_capab_lso_t *cap_lso; 5190 mac_capab_rings_t *cap_rings; 5191 5192 switch (cap) { 5193 case MAC_CAPAB_HCKSUM: 5194 cap_hcksum = cap_data; 5195 *cap_hcksum = HCKSUM_INET_PARTIAL; 5196 break; 5197 case MAC_CAPAB_RINGS: 5198 cap_rings = cap_data; 5199 switch (cap_rings->mr_type) { 5200 case MAC_RING_TYPE_RX: 5201 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5202 cap_rings->mr_rnum = mgp->num_slices; 5203 cap_rings->mr_gnum = 1; 5204 cap_rings->mr_rget = myri10ge_fill_ring; 5205 cap_rings->mr_gget = myri10ge_fill_group; 5206 break; 5207 case MAC_RING_TYPE_TX: 5208 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5209 cap_rings->mr_rnum = mgp->num_slices; 5210 cap_rings->mr_gnum = 0; 5211 cap_rings->mr_rget = myri10ge_fill_ring; 5212 cap_rings->mr_gget = NULL; 5213 break; 5214 default: 5215 return (B_FALSE); 5216 } 5217 break; 5218 case MAC_CAPAB_LSO: 5219 cap_lso = cap_data; 5220 if (!myri10ge_use_lso) 5221 return (B_FALSE); 5222 if (!(mgp->features & MYRI10GE_TSO)) 5223 return (B_FALSE); 5224 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; 5225 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1; 5226 break; 5227 5228 default: 5229 return (B_FALSE); 5230 } 5231 return (B_TRUE); 5232 } 5233 5234 5235 static int 5236 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val) 5237 { 5238 struct myri10ge_priv *mgp = arg; 5239 struct myri10ge_rx_ring_stats *rstat; 5240 struct myri10ge_tx_ring_stats *tstat; 5241 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats; 5242 struct myri10ge_slice_state *ss; 5243 uint64_t tmp = 0; 5244 int i; 5245 5246 switch (stat) { 5247 case MAC_STAT_IFSPEED: 5248 *val = 10ull * 1000ull * 1000000ull; 5249 break; 5250 5251 case MAC_STAT_MULTIRCV: 5252 for (i = 0; i < mgp->num_slices; i++) { 5253 rstat = &mgp->ss[i].rx_stats; 5254 tmp += rstat->multircv; 5255 } 5256 *val = tmp; 5257 break; 5258 5259 case MAC_STAT_BRDCSTRCV: 5260 for (i = 0; i < mgp->num_slices; i++) { 5261 rstat = &mgp->ss[i].rx_stats; 5262 tmp += rstat->brdcstrcv; 5263 } 5264 *val = tmp; 5265 break; 5266 5267 case MAC_STAT_MULTIXMT: 5268 for (i = 0; i < mgp->num_slices; i++) { 5269 tstat = &mgp->ss[i].tx.stats; 5270 tmp += tstat->multixmt; 5271 } 5272 *val = tmp; 5273 break; 5274 5275 case MAC_STAT_BRDCSTXMT: 5276 for (i = 0; i < mgp->num_slices; i++) { 5277 tstat = &mgp->ss[i].tx.stats; 5278 tmp += tstat->brdcstxmt; 5279 } 5280 *val = tmp; 5281 break; 5282 5283 case MAC_STAT_NORCVBUF: 5284 tmp = ntohl(fw_stats->dropped_no_big_buffer); 5285 tmp += ntohl(fw_stats->dropped_no_small_buffer); 5286 tmp += ntohl(fw_stats->dropped_link_overflow); 5287 for (i = 0; i < mgp->num_slices; i++) { 5288 ss = &mgp->ss[i]; 5289 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf); 5290 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf); 5291 } 5292 *val = tmp; 5293 break; 5294 5295 case MAC_STAT_IERRORS: 5296 tmp += ntohl(fw_stats->dropped_bad_crc32); 5297 tmp += ntohl(fw_stats->dropped_bad_phy); 5298 tmp += ntohl(fw_stats->dropped_runt); 5299 tmp += ntohl(fw_stats->dropped_overrun); 5300 *val = tmp; 5301 break; 5302 5303 case MAC_STAT_OERRORS: 5304 for (i = 0; i < mgp->num_slices; i++) { 5305 ss = &mgp->ss[i]; 5306 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags); 5307 tmp += MYRI10GE_SLICE_STAT(xmit_err); 5308 } 5309 *val = tmp; 5310 break; 5311 5312 case MAC_STAT_RBYTES: 5313 for (i = 0; i < mgp->num_slices; i++) { 5314 rstat = &mgp->ss[i].rx_stats; 5315 tmp += rstat->ibytes; 5316 } 5317 *val = tmp; 5318 break; 5319 5320 case MAC_STAT_IPACKETS: 5321 for (i = 0; i < mgp->num_slices; i++) { 5322 rstat = &mgp->ss[i].rx_stats; 5323 tmp += rstat->ipackets; 5324 } 5325 *val = tmp; 5326 break; 5327 5328 case MAC_STAT_OBYTES: 5329 for (i = 0; i < mgp->num_slices; i++) { 5330 tstat = &mgp->ss[i].tx.stats; 5331 tmp += tstat->obytes; 5332 } 5333 *val = tmp; 5334 break; 5335 5336 case MAC_STAT_OPACKETS: 5337 for (i = 0; i < mgp->num_slices; i++) { 5338 tstat = &mgp->ss[i].tx.stats; 5339 tmp += tstat->opackets; 5340 } 5341 *val = tmp; 5342 break; 5343 5344 case ETHER_STAT_TOOLONG_ERRORS: 5345 *val = ntohl(fw_stats->dropped_overrun); 5346 break; 5347 5348 #ifdef SOLARIS_S11 5349 case ETHER_STAT_TOOSHORT_ERRORS: 5350 *val = ntohl(fw_stats->dropped_runt); 5351 break; 5352 #endif 5353 5354 case ETHER_STAT_LINK_PAUSE: 5355 *val = mgp->pause; 5356 break; 5357 5358 case ETHER_STAT_LINK_AUTONEG: 5359 *val = 1; 5360 break; 5361 5362 case ETHER_STAT_LINK_DUPLEX: 5363 *val = LINK_DUPLEX_FULL; 5364 break; 5365 5366 default: 5367 return (ENOTSUP); 5368 } 5369 5370 return (0); 5371 } 5372 5373 static mac_callbacks_t myri10ge_m_callbacks = { 5374 (MC_IOCTL | MC_GETCAPAB), 5375 myri10ge_m_stat, 5376 myri10ge_m_start, 5377 myri10ge_m_stop, 5378 myri10ge_m_promisc, 5379 myri10ge_m_multicst, 5380 NULL, 5381 NULL, 5382 NULL, 5383 myri10ge_m_ioctl, 5384 myri10ge_m_getcapab 5385 }; 5386 5387 5388 static int 5389 myri10ge_probe_slices(struct myri10ge_priv *mgp) 5390 { 5391 myri10ge_cmd_t cmd; 5392 int status; 5393 5394 mgp->num_slices = 1; 5395 5396 /* hit the board with a reset to ensure it is alive */ 5397 (void) memset(&cmd, 0, sizeof (cmd)); 5398 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 5399 if (status != 0) { 5400 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 5401 return (ENXIO); 5402 } 5403 5404 if (myri10ge_use_msix == 0) 5405 return (0); 5406 5407 /* tell it the size of the interrupt queues */ 5408 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot); 5409 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 5410 if (status != 0) { 5411 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n", 5412 mgp->name); 5413 return (ENXIO); 5414 } 5415 5416 /* ask the maximum number of slices it supports */ 5417 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 5418 &cmd); 5419 if (status != 0) 5420 return (0); 5421 5422 mgp->num_slices = cmd.data0; 5423 5424 /* 5425 * if the admin did not specify a limit to how many 5426 * slices we should use, cap it automatically to the 5427 * number of CPUs currently online 5428 */ 5429 if (myri10ge_max_slices == -1) 5430 myri10ge_max_slices = ncpus; 5431 5432 if (mgp->num_slices > myri10ge_max_slices) 5433 mgp->num_slices = myri10ge_max_slices; 5434 5435 5436 /* 5437 * Now try to allocate as many MSI-X vectors as we have 5438 * slices. We give up on MSI-X if we can only get a single 5439 * vector. 5440 */ 5441 while (mgp->num_slices > 1) { 5442 /* make sure it is a power of two */ 5443 while (mgp->num_slices & (mgp->num_slices - 1)) 5444 mgp->num_slices--; 5445 if (mgp->num_slices == 1) 5446 return (0); 5447 5448 status = myri10ge_add_intrs(mgp, 0); 5449 if (status == 0) { 5450 myri10ge_rem_intrs(mgp, 0); 5451 if (mgp->intr_cnt == mgp->num_slices) { 5452 if (myri10ge_verbose) 5453 printf("Got %d slices!\n", 5454 mgp->num_slices); 5455 return (0); 5456 } 5457 mgp->num_slices = mgp->intr_cnt; 5458 } else { 5459 mgp->num_slices = mgp->num_slices / 2; 5460 } 5461 } 5462 5463 if (myri10ge_verbose) 5464 printf("Got %d slices\n", mgp->num_slices); 5465 return (0); 5466 } 5467 5468 static void 5469 myri10ge_lro_free(struct myri10ge_slice_state *ss) 5470 { 5471 struct lro_entry *lro; 5472 5473 while (ss->lro_free != NULL) { 5474 lro = ss->lro_free; 5475 ss->lro_free = lro->next; 5476 kmem_free(lro, sizeof (*lro)); 5477 } 5478 } 5479 5480 static void 5481 myri10ge_lro_alloc(struct myri10ge_slice_state *ss) 5482 { 5483 struct lro_entry *lro; 5484 int idx; 5485 5486 ss->lro_free = NULL; 5487 ss->lro_active = NULL; 5488 5489 for (idx = 0; idx < myri10ge_lro_cnt; idx++) { 5490 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP); 5491 if (lro == NULL) 5492 continue; 5493 lro->next = ss->lro_free; 5494 ss->lro_free = lro; 5495 } 5496 } 5497 5498 static void 5499 myri10ge_free_slices(struct myri10ge_priv *mgp) 5500 { 5501 struct myri10ge_slice_state *ss; 5502 size_t bytes; 5503 int i; 5504 5505 if (mgp->ss == NULL) 5506 return; 5507 5508 for (i = 0; i < mgp->num_slices; i++) { 5509 ss = &mgp->ss[i]; 5510 if (ss->rx_done.entry == NULL) 5511 continue; 5512 myri10ge_dma_free(&ss->rx_done.dma); 5513 ss->rx_done.entry = NULL; 5514 if (ss->fw_stats == NULL) 5515 continue; 5516 myri10ge_dma_free(&ss->fw_stats_dma); 5517 ss->fw_stats = NULL; 5518 mutex_destroy(&ss->rx_lock); 5519 mutex_destroy(&ss->tx.lock); 5520 mutex_destroy(&ss->tx.handle_lock); 5521 mutex_destroy(&ss->poll_lock); 5522 myri10ge_jpool_fini(ss); 5523 myri10ge_slice_stat_destroy(ss); 5524 myri10ge_lro_free(ss); 5525 } 5526 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5527 kmem_free(mgp->ss, bytes); 5528 mgp->ss = NULL; 5529 } 5530 5531 5532 static int 5533 myri10ge_alloc_slices(struct myri10ge_priv *mgp) 5534 { 5535 struct myri10ge_slice_state *ss; 5536 size_t bytes; 5537 int i; 5538 5539 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5540 mgp->ss = kmem_zalloc(bytes, KM_SLEEP); 5541 if (mgp->ss == NULL) 5542 return (ENOMEM); 5543 for (i = 0; i < mgp->num_slices; i++) { 5544 ss = &mgp->ss[i]; 5545 5546 ss->mgp = mgp; 5547 5548 /* allocate the per-slice firmware stats */ 5549 bytes = sizeof (*ss->fw_stats); 5550 ss->fw_stats = (mcp_irq_data_t *)(void *) 5551 myri10ge_dma_alloc(mgp->dip, bytes, 5552 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5553 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5554 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT); 5555 if (ss->fw_stats == NULL) 5556 goto abort; 5557 (void) memset(ss->fw_stats, 0, bytes); 5558 5559 /* allocate rx done ring */ 5560 bytes = mgp->max_intr_slots * 5561 sizeof (*ss->rx_done.entry); 5562 ss->rx_done.entry = (mcp_slot_t *)(void *) 5563 myri10ge_dma_alloc(mgp->dip, bytes, 5564 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5565 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5566 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT); 5567 if (ss->rx_done.entry == NULL) { 5568 goto abort; 5569 } 5570 (void) memset(ss->rx_done.entry, 0, bytes); 5571 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie); 5572 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL); 5573 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL); 5574 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL); 5575 myri10ge_jpool_init(ss); 5576 (void) myri10ge_slice_stat_init(ss); 5577 myri10ge_lro_alloc(ss); 5578 } 5579 5580 return (0); 5581 5582 abort: 5583 myri10ge_free_slices(mgp); 5584 return (ENOMEM); 5585 } 5586 5587 static int 5588 myri10ge_save_msi_state(struct myri10ge_priv *mgp, 5589 ddi_acc_handle_t handle) 5590 { 5591 uint8_t ptr; 5592 int err; 5593 5594 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5595 if (err != 0) { 5596 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5597 mgp->name); 5598 return (DDI_FAILURE); 5599 } 5600 mgp->pci_saved_state.msi_ctrl = 5601 pci_config_get16(handle, ptr + PCI_MSI_CTRL); 5602 mgp->pci_saved_state.msi_addr_low = 5603 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET); 5604 mgp->pci_saved_state.msi_addr_high = 5605 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4); 5606 mgp->pci_saved_state.msi_data_32 = 5607 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA); 5608 mgp->pci_saved_state.msi_data_64 = 5609 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA); 5610 return (DDI_SUCCESS); 5611 } 5612 5613 static int 5614 myri10ge_restore_msi_state(struct myri10ge_priv *mgp, 5615 ddi_acc_handle_t handle) 5616 { 5617 uint8_t ptr; 5618 int err; 5619 5620 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5621 if (err != 0) { 5622 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5623 mgp->name); 5624 return (DDI_FAILURE); 5625 } 5626 5627 pci_config_put16(handle, ptr + PCI_MSI_CTRL, 5628 mgp->pci_saved_state.msi_ctrl); 5629 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET, 5630 mgp->pci_saved_state.msi_addr_low); 5631 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4, 5632 mgp->pci_saved_state.msi_addr_high); 5633 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA, 5634 mgp->pci_saved_state.msi_data_32); 5635 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA, 5636 mgp->pci_saved_state.msi_data_64); 5637 5638 return (DDI_SUCCESS); 5639 } 5640 5641 static int 5642 myri10ge_save_pci_state(struct myri10ge_priv *mgp) 5643 { 5644 ddi_acc_handle_t handle = mgp->cfg_hdl; 5645 int i; 5646 int err = DDI_SUCCESS; 5647 5648 5649 /* Save the non-extended PCI config space 32-bits at a time */ 5650 for (i = 0; i < 16; i++) 5651 mgp->pci_saved_state.base[i] = 5652 pci_config_get32(handle, i*4); 5653 5654 /* now save MSI interrupt state *, if needed */ 5655 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5656 err = myri10ge_save_msi_state(mgp, handle); 5657 5658 return (err); 5659 } 5660 5661 static int 5662 myri10ge_restore_pci_state(struct myri10ge_priv *mgp) 5663 { 5664 ddi_acc_handle_t handle = mgp->cfg_hdl; 5665 int i; 5666 int err = DDI_SUCCESS; 5667 5668 5669 /* Restore the non-extended PCI config space 32-bits at a time */ 5670 for (i = 15; i >= 0; i--) 5671 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]); 5672 5673 /* now restore MSI interrupt state *, if needed */ 5674 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5675 err = myri10ge_restore_msi_state(mgp, handle); 5676 5677 if (mgp->max_read_request_4k) 5678 (void) myri10ge_set_max_readreq(handle); 5679 return (err); 5680 } 5681 5682 5683 static int 5684 myri10ge_suspend(dev_info_t *dip) 5685 { 5686 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5687 int status; 5688 5689 if (mgp == NULL) { 5690 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n"); 5691 return (DDI_FAILURE); 5692 } 5693 if (mgp->dip != dip) { 5694 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n"); 5695 return (DDI_FAILURE); 5696 } 5697 mutex_enter(&mgp->intrlock); 5698 if (mgp->running == MYRI10GE_ETH_RUNNING) { 5699 mgp->running = MYRI10GE_ETH_STOPPING; 5700 mutex_exit(&mgp->intrlock); 5701 (void) untimeout(mgp->timer_id); 5702 mutex_enter(&mgp->intrlock); 5703 myri10ge_stop_locked(mgp); 5704 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING; 5705 } 5706 status = myri10ge_save_pci_state(mgp); 5707 mutex_exit(&mgp->intrlock); 5708 return (status); 5709 } 5710 5711 static int 5712 myri10ge_resume(dev_info_t *dip) 5713 { 5714 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5715 int status = DDI_SUCCESS; 5716 5717 if (mgp == NULL) { 5718 cmn_err(CE_WARN, "null dip in myri10ge_resume\n"); 5719 return (DDI_FAILURE); 5720 } 5721 if (mgp->dip != dip) { 5722 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n"); 5723 return (DDI_FAILURE); 5724 } 5725 5726 mutex_enter(&mgp->intrlock); 5727 status = myri10ge_restore_pci_state(mgp); 5728 if (status == DDI_SUCCESS && 5729 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) { 5730 status = myri10ge_start_locked(mgp); 5731 } 5732 mutex_exit(&mgp->intrlock); 5733 if (status != DDI_SUCCESS) 5734 return (status); 5735 5736 /* start the watchdog timer */ 5737 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5738 mgp->timer_ticks); 5739 return (DDI_SUCCESS); 5740 } 5741 5742 static int 5743 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5744 { 5745 5746 struct myri10ge_priv *mgp; 5747 mac_register_t *macp, *omacp; 5748 ddi_acc_handle_t handle; 5749 uint32_t csr, hdr_offset; 5750 int status, span, link_width, max_read_request_4k; 5751 unsigned long bus_number, dev_number, func_number; 5752 size_t bytes; 5753 offset_t ss_offset; 5754 uint8_t vso; 5755 5756 if (cmd == DDI_RESUME) { 5757 return (myri10ge_resume(dip)); 5758 } 5759 5760 if (cmd != DDI_ATTACH) 5761 return (DDI_FAILURE); 5762 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) 5763 return (DDI_FAILURE); 5764 5765 /* enable busmater and io space access */ 5766 csr = pci_config_get32(handle, PCI_CONF_COMM); 5767 pci_config_put32(handle, PCI_CONF_COMM, 5768 (csr |PCI_COMM_ME|PCI_COMM_MAE)); 5769 status = myri10ge_read_pcie_link_width(handle, &link_width); 5770 if (status != 0) { 5771 cmn_err(CE_WARN, "could not read link width!\n"); 5772 link_width = 0; 5773 } 5774 max_read_request_4k = !myri10ge_set_max_readreq(handle); 5775 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS); 5776 if (status != 0) 5777 goto abort_with_cfg_hdl; 5778 if ((omacp = mac_alloc(MAC_VERSION)) == NULL) 5779 goto abort_with_cfg_hdl; 5780 /* 5781 * XXXX Hack: mac_register_t grows in newer kernels. To be 5782 * able to write newer fields, such as m_margin, without 5783 * writing outside allocated memory, we allocate our own macp 5784 * and pass that to mac_register() 5785 */ 5786 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP); 5787 macp->m_version = omacp->m_version; 5788 5789 if ((mgp = (struct myri10ge_priv *) 5790 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) { 5791 goto abort_with_macinfo; 5792 } 5793 ddi_set_driver_private(dip, mgp); 5794 5795 /* setup device name for log messages */ 5796 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip)); 5797 5798 mutex_enter(&myri10ge_param_lock); 5799 myri10ge_get_props(dip); 5800 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 5801 mgp->pause = myri10ge_flow_control; 5802 mutex_exit(&myri10ge_param_lock); 5803 5804 mgp->max_read_request_4k = max_read_request_4k; 5805 mgp->pcie_link_width = link_width; 5806 mgp->running = MYRI10GE_ETH_STOPPED; 5807 mgp->vso = vso; 5808 mgp->dip = dip; 5809 mgp->cfg_hdl = handle; 5810 5811 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */ 5812 myri10ge_test_physical(dip); 5813 5814 /* allocate command page */ 5815 bytes = sizeof (*mgp->cmd); 5816 mgp->cmd = (mcp_cmd_response_t *) 5817 (void *)myri10ge_dma_alloc(dip, bytes, 5818 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5819 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT, 5820 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT); 5821 if (mgp->cmd == NULL) 5822 goto abort_with_mgp; 5823 5824 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number, 5825 &dev_number, &func_number); 5826 if (myri10ge_verbose) 5827 printf("%s at %ld:%ld:%ld attaching\n", mgp->name, 5828 bus_number, dev_number, func_number); 5829 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram, 5830 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr, 5831 &mgp->io_handle); 5832 if (status != DDI_SUCCESS) { 5833 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name); 5834 printf("%s: reg_set = %d, span = %d, status = %d", 5835 mgp->name, mgp->reg_set, span, status); 5836 goto abort_with_mgp; 5837 } 5838 5839 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET); 5840 hdr_offset = ntohl(hdr_offset) & 0xffffc; 5841 ss_offset = hdr_offset + 5842 offsetof(struct mcp_gen_header, string_specs); 5843 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset)); 5844 myri10ge_pio_copy32(mgp->eeprom_strings, 5845 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size), 5846 MYRI10GE_EEPROM_STRINGS_SIZE); 5847 (void) memset(mgp->eeprom_strings + 5848 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 5849 5850 status = myri10ge_read_mac_addr(mgp); 5851 if (status) { 5852 goto abort_with_mapped; 5853 } 5854 5855 status = myri10ge_select_firmware(mgp); 5856 if (status != 0) { 5857 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name); 5858 goto abort_with_mapped; 5859 } 5860 5861 status = myri10ge_probe_slices(mgp); 5862 if (status != 0) { 5863 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name); 5864 goto abort_with_dummy_rdma; 5865 } 5866 5867 status = myri10ge_alloc_slices(mgp); 5868 if (status != 0) { 5869 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name); 5870 goto abort_with_dummy_rdma; 5871 } 5872 5873 /* add the interrupt handler */ 5874 status = myri10ge_add_intrs(mgp, 1); 5875 if (status != 0) { 5876 cmn_err(CE_WARN, "%s: Failed to add interrupt\n", 5877 mgp->name); 5878 goto abort_with_slices; 5879 } 5880 5881 /* now that we have an iblock_cookie, init the mutexes */ 5882 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie); 5883 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie); 5884 5885 5886 status = myri10ge_nic_stat_init(mgp); 5887 if (status != DDI_SUCCESS) 5888 goto abort_with_interrupts; 5889 status = myri10ge_info_init(mgp); 5890 if (status != DDI_SUCCESS) 5891 goto abort_with_stats; 5892 5893 /* 5894 * Initialize GLD state 5895 */ 5896 5897 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 5898 macp->m_driver = mgp; 5899 macp->m_dip = dip; 5900 macp->m_src_addr = mgp->mac_addr; 5901 macp->m_callbacks = &myri10ge_m_callbacks; 5902 macp->m_min_sdu = 0; 5903 macp->m_max_sdu = myri10ge_mtu - 5904 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ); 5905 #ifdef SOLARIS_S11 5906 macp->m_margin = VLAN_TAGSZ; 5907 #endif 5908 macp->m_v12n = MAC_VIRT_LEVEL1; 5909 status = mac_register(macp, &mgp->mh); 5910 if (status != 0) { 5911 cmn_err(CE_WARN, "%s: mac_register failed with %d\n", 5912 mgp->name, status); 5913 goto abort_with_info; 5914 } 5915 myri10ge_ndd_init(mgp); 5916 if (myri10ge_verbose) 5917 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name, 5918 mgp->intr_type, mgp->tx_boundary, mgp->fw_name); 5919 mutex_enter(&myri10ge_param_lock); 5920 mgp->next = mgp_list; 5921 mgp_list = mgp; 5922 mutex_exit(&myri10ge_param_lock); 5923 kmem_free(macp, sizeof (*macp) * 8); 5924 mac_free(omacp); 5925 return (DDI_SUCCESS); 5926 5927 abort_with_info: 5928 myri10ge_info_destroy(mgp); 5929 5930 abort_with_stats: 5931 myri10ge_nic_stat_destroy(mgp); 5932 5933 abort_with_interrupts: 5934 mutex_destroy(&mgp->cmd_lock); 5935 mutex_destroy(&mgp->intrlock); 5936 myri10ge_rem_intrs(mgp, 1); 5937 5938 abort_with_slices: 5939 myri10ge_free_slices(mgp); 5940 5941 abort_with_dummy_rdma: 5942 myri10ge_dummy_rdma(mgp, 0); 5943 5944 abort_with_mapped: 5945 ddi_regs_map_free(&mgp->io_handle); 5946 5947 myri10ge_dma_free(&mgp->cmd_dma); 5948 5949 abort_with_mgp: 5950 kmem_free(mgp, sizeof (*mgp)); 5951 5952 abort_with_macinfo: 5953 kmem_free(macp, sizeof (*macp) * 8); 5954 mac_free(omacp); 5955 5956 abort_with_cfg_hdl: 5957 pci_config_teardown(&handle); 5958 return (DDI_FAILURE); 5959 5960 } 5961 5962 5963 static int 5964 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5965 { 5966 struct myri10ge_priv *mgp, *tmp; 5967 int status, i, jbufs_alloced; 5968 5969 if (cmd == DDI_SUSPEND) { 5970 status = myri10ge_suspend(dip); 5971 return (status); 5972 } 5973 5974 if (cmd != DDI_DETACH) { 5975 return (DDI_FAILURE); 5976 } 5977 /* Get the driver private (gld_mac_info_t) structure */ 5978 mgp = ddi_get_driver_private(dip); 5979 5980 mutex_enter(&mgp->intrlock); 5981 jbufs_alloced = 0; 5982 for (i = 0; i < mgp->num_slices; i++) { 5983 myri10ge_remove_jbufs(&mgp->ss[i]); 5984 jbufs_alloced += mgp->ss[i].jpool.num_alloc; 5985 } 5986 mutex_exit(&mgp->intrlock); 5987 if (jbufs_alloced != 0) { 5988 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n", 5989 mgp->name, jbufs_alloced); 5990 return (DDI_FAILURE); 5991 } 5992 5993 mutex_enter(&myri10ge_param_lock); 5994 if (mgp->refcnt != 0) { 5995 mutex_exit(&myri10ge_param_lock); 5996 cmn_err(CE_NOTE, "%s: %d external refs remain\n", 5997 mgp->name, mgp->refcnt); 5998 return (DDI_FAILURE); 5999 } 6000 mutex_exit(&myri10ge_param_lock); 6001 6002 status = mac_unregister(mgp->mh); 6003 if (status != DDI_SUCCESS) 6004 return (status); 6005 6006 myri10ge_ndd_fini(mgp); 6007 myri10ge_dummy_rdma(mgp, 0); 6008 myri10ge_nic_stat_destroy(mgp); 6009 myri10ge_info_destroy(mgp); 6010 6011 mutex_destroy(&mgp->cmd_lock); 6012 mutex_destroy(&mgp->intrlock); 6013 6014 myri10ge_rem_intrs(mgp, 1); 6015 6016 myri10ge_free_slices(mgp); 6017 ddi_regs_map_free(&mgp->io_handle); 6018 myri10ge_dma_free(&mgp->cmd_dma); 6019 pci_config_teardown(&mgp->cfg_hdl); 6020 6021 mutex_enter(&myri10ge_param_lock); 6022 if (mgp_list == mgp) { 6023 mgp_list = mgp->next; 6024 } else { 6025 tmp = mgp_list; 6026 while (tmp->next != mgp && tmp->next != NULL) 6027 tmp = tmp->next; 6028 if (tmp->next != NULL) 6029 tmp->next = tmp->next->next; 6030 } 6031 kmem_free(mgp, sizeof (*mgp)); 6032 mutex_exit(&myri10ge_param_lock); 6033 return (DDI_SUCCESS); 6034 } 6035 6036 /* 6037 * Helper for quiesce entry point: Interrupt threads are not being 6038 * scheduled, so we must poll for the confirmation DMA to arrive in 6039 * the firmware stats block for slice 0. We're essentially running 6040 * the guts of the interrupt handler, and just cherry picking the 6041 * confirmation that the NIC is queuesced (stats->link_down) 6042 */ 6043 6044 static int 6045 myri10ge_poll_down(struct myri10ge_priv *mgp) 6046 { 6047 struct myri10ge_slice_state *ss = mgp->ss; 6048 mcp_irq_data_t *stats = ss->fw_stats; 6049 int valid; 6050 int found_down = 0; 6051 6052 6053 /* check for a pending IRQ */ 6054 6055 if (! *((volatile uint8_t *)& stats->valid)) 6056 return (0); 6057 valid = stats->valid; 6058 6059 /* 6060 * Make sure to tell the NIC to lower a legacy IRQ, else 6061 * it may have corrupt state after restarting 6062 */ 6063 6064 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 6065 /* lower legacy IRQ */ 6066 *mgp->irq_deassert = 0; 6067 mb(); 6068 /* wait for irq conf DMA */ 6069 while (*((volatile uint8_t *)& stats->valid)) 6070 ; 6071 } 6072 if (stats->stats_updated && stats->link_down) 6073 found_down = 1; 6074 6075 if (valid & 0x1) 6076 *ss->irq_claim = BE_32(3); 6077 *(ss->irq_claim + 1) = BE_32(3); 6078 6079 return (found_down); 6080 } 6081 6082 static int 6083 myri10ge_quiesce(dev_info_t *dip) 6084 { 6085 struct myri10ge_priv *mgp; 6086 myri10ge_cmd_t cmd; 6087 int status, down, i; 6088 6089 mgp = ddi_get_driver_private(dip); 6090 if (mgp == NULL) 6091 return (DDI_FAILURE); 6092 6093 /* if devices was unplumbed, it is guaranteed to be quiescent */ 6094 if (mgp->running == MYRI10GE_ETH_STOPPED) 6095 return (DDI_SUCCESS); 6096 6097 /* send a down CMD to queuesce NIC */ 6098 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 6099 if (status) { 6100 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 6101 return (DDI_FAILURE); 6102 } 6103 6104 for (i = 0; i < 20; i++) { 6105 down = myri10ge_poll_down(mgp); 6106 if (down) 6107 break; 6108 delay(drv_usectohz(100000)); 6109 mb(); 6110 } 6111 if (down) 6112 return (DDI_SUCCESS); 6113 return (DDI_FAILURE); 6114 } 6115 6116 /* 6117 * Distinguish between allocb'ed blocks, and gesballoc'ed attached 6118 * storage. 6119 */ 6120 static void 6121 myri10ge_find_lastfree(void) 6122 { 6123 mblk_t *mp = allocb(1024, 0); 6124 dblk_t *dbp; 6125 6126 if (mp == NULL) { 6127 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n"); 6128 return; 6129 } 6130 dbp = mp->b_datap; 6131 myri10ge_db_lastfree = (void *)dbp->db_lastfree; 6132 } 6133 6134 int 6135 _init(void) 6136 { 6137 int i; 6138 6139 if (myri10ge_verbose) 6140 cmn_err(CE_NOTE, 6141 "Myricom 10G driver (10GbE) version %s loading\n", 6142 MYRI10GE_VERSION_STR); 6143 myri10ge_find_lastfree(); 6144 mac_init_ops(&myri10ge_ops, "myri10ge"); 6145 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL); 6146 if ((i = mod_install(&modlinkage)) != 0) { 6147 cmn_err(CE_WARN, "mod_install returned %d\n", i); 6148 mac_fini_ops(&myri10ge_ops); 6149 mutex_destroy(&myri10ge_param_lock); 6150 } 6151 return (i); 6152 } 6153 6154 int 6155 _fini(void) 6156 { 6157 int i; 6158 i = mod_remove(&modlinkage); 6159 if (i != 0) { 6160 return (i); 6161 } 6162 mac_fini_ops(&myri10ge_ops); 6163 mutex_destroy(&myri10ge_param_lock); 6164 return (0); 6165 } 6166 6167 int 6168 _info(struct modinfo *modinfop) 6169 { 6170 return (mod_info(&modlinkage, modinfop)); 6171 } 6172 6173 6174 /* 6175 * This file uses MyriGE driver indentation. 6176 * 6177 * Local Variables: 6178 * c-file-style:"sun" 6179 * tab-width:8 6180 * End: 6181 */ 6182