1 /* 2 * Copyright (c) 2008-2016 Solarflare Communications Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * The views and conclusions contained in the software and documentation are 27 * those of the authors and should not be interpreted as representing official 28 * policies, either expressed or implied, of the FreeBSD Project. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/atomic.h> 36 #include <sys/stream.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/pattr.h> 40 #include <sys/cpu.h> 41 42 #include <sys/ethernet.h> 43 #include <inet/ip.h> 44 45 #include <netinet/in.h> 46 #include <netinet/ip.h> 47 #include <netinet/tcp.h> 48 49 #include "sfxge.h" 50 51 #include "efx.h" 52 53 /* TXQ flush response timeout (in microseconds) */ 54 #define SFXGE_TX_QFLUSH_USEC (2000000) 55 56 /* See sfxge.conf.private for descriptions */ 57 #define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 4096 58 #define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 256 59 60 61 /* Transmit buffer DMA attributes */ 62 static ddi_device_acc_attr_t sfxge_tx_buffer_devacc = { 63 64 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 65 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 66 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 67 }; 68 69 static ddi_dma_attr_t sfxge_tx_buffer_dma_attr = { 70 DMA_ATTR_V0, /* dma_attr_version */ 71 0, /* dma_attr_addr_lo */ 72 0xffffffffffffffffull, /* dma_attr_addr_hi */ 73 0xffffffffffffffffull, /* dma_attr_count_max */ 74 SFXGE_TX_BUFFER_SIZE, /* dma_attr_align */ 75 0xffffffff, /* dma_attr_burstsizes */ 76 1, /* dma_attr_minxfer */ 77 0xffffffffffffffffull, /* dma_attr_maxxfer */ 78 0xffffffffffffffffull, /* dma_attr_seg */ 79 1, /* dma_attr_sgllen */ 80 1, /* dma_attr_granular */ 81 0 /* dma_attr_flags */ 82 }; 83 84 /* Transmit mapping DMA attributes */ 85 static ddi_dma_attr_t sfxge_tx_mapping_dma_attr = { 86 DMA_ATTR_V0, /* dma_attr_version */ 87 0, /* dma_attr_addr_lo */ 88 0xffffffffffffffffull, /* dma_attr_addr_hi */ 89 0xffffffffffffffffull, /* dma_attr_count_max */ 90 1, /* dma_attr_align */ 91 0xffffffff, /* dma_attr_burstsizes */ 92 1, /* dma_attr_minxfer */ 93 0xffffffffffffffffull, /* dma_attr_maxxfer */ 94 0xffffffffffffffffull, /* dma_attr_seg */ 95 0x7fffffff, /* dma_attr_sgllen */ 96 1, /* dma_attr_granular */ 97 0 /* dma_attr_flags */ 98 }; 99 100 /* Transmit queue DMA attributes */ 101 static ddi_device_acc_attr_t sfxge_txq_devacc = { 102 103 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 104 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 105 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 106 }; 107 108 static ddi_dma_attr_t sfxge_txq_dma_attr = { 109 DMA_ATTR_V0, /* dma_attr_version */ 110 0, /* dma_attr_addr_lo */ 111 0xffffffffffffffffull, /* dma_attr_addr_hi */ 112 0xffffffffffffffffull, /* dma_attr_count_max */ 113 EFX_BUF_SIZE, /* dma_attr_align */ 114 0xffffffff, /* dma_attr_burstsizes */ 115 1, /* dma_attr_minxfer */ 116 0xffffffffffffffffull, /* dma_attr_maxxfer */ 117 0xffffffffffffffffull, /* dma_attr_seg */ 118 1, /* dma_attr_sgllen */ 119 1, /* dma_attr_granular */ 120 0 /* dma_attr_flags */ 121 }; 122 123 124 /* 125 * A sfxge_tx_qdpl_swizzle() can happen when the DPL get list is one packet 126 * under the limit, and must move all packets from the DPL put->get list 127 * Hence this is the real maximum length of the TX DPL get list. 128 */ 129 static int 130 sfxge_tx_dpl_get_pkt_max(sfxge_txq_t *stp) 131 { 132 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 133 return (stdp->get_pkt_limit + stdp->put_pkt_limit - 1); 134 } 135 136 137 static int 138 sfxge_tx_packet_ctor(void *buf, void *arg, int kmflags) 139 { 140 _NOTE(ARGUNUSED(arg, kmflags)) 141 142 bzero(buf, sizeof (sfxge_tx_packet_t)); 143 144 return (0); 145 } 146 147 static void 148 sfxge_tx_packet_dtor(void *buf, void *arg) 149 { 150 sfxge_tx_packet_t *stpp = buf; 151 152 _NOTE(ARGUNUSED(arg)) 153 154 SFXGE_OBJ_CHECK(stpp, sfxge_tx_packet_t); 155 } 156 157 static int 158 sfxge_tx_buffer_ctor(void *buf, void *arg, int kmflags) 159 { 160 sfxge_tx_buffer_t *stbp = buf; 161 sfxge_t *sp = arg; 162 sfxge_dma_buffer_attr_t dma_attr; 163 int rc; 164 165 bzero(buf, sizeof (sfxge_tx_buffer_t)); 166 167 dma_attr.sdba_dip = sp->s_dip; 168 dma_attr.sdba_dattrp = &sfxge_tx_buffer_dma_attr; 169 dma_attr.sdba_callback = ((kmflags == KM_SLEEP) ? 170 DDI_DMA_SLEEP : DDI_DMA_DONTWAIT); 171 dma_attr.sdba_length = SFXGE_TX_BUFFER_SIZE; 172 dma_attr.sdba_memflags = DDI_DMA_STREAMING; 173 dma_attr.sdba_devaccp = &sfxge_tx_buffer_devacc; 174 dma_attr.sdba_bindflags = DDI_DMA_WRITE | DDI_DMA_STREAMING; 175 dma_attr.sdba_maxcookies = 1; 176 dma_attr.sdba_zeroinit = B_FALSE; 177 178 if ((rc = sfxge_dma_buffer_create(&(stbp->stb_esm), &dma_attr)) != 0) 179 goto fail1; 180 181 return (0); 182 183 fail1: 184 DTRACE_PROBE1(fail1, int, rc); 185 186 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t); 187 188 return (-1); 189 } 190 191 static void 192 sfxge_tx_buffer_dtor(void *buf, void *arg) 193 { 194 sfxge_tx_buffer_t *stbp = buf; 195 196 _NOTE(ARGUNUSED(arg)) 197 198 sfxge_dma_buffer_destroy(&(stbp->stb_esm)); 199 200 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t); 201 } 202 203 static int 204 sfxge_tx_mapping_ctor(void *buf, void *arg, int kmflags) 205 { 206 sfxge_tx_mapping_t *stmp = buf; 207 sfxge_t *sp = arg; 208 dev_info_t *dip = sp->s_dip; 209 int rc; 210 211 bzero(buf, sizeof (sfxge_tx_mapping_t)); 212 213 stmp->stm_sp = sp; 214 215 /* Allocate DMA handle */ 216 rc = ddi_dma_alloc_handle(dip, &sfxge_tx_mapping_dma_attr, 217 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, 218 NULL, &(stmp->stm_dma_handle)); 219 if (rc != DDI_SUCCESS) 220 goto fail1; 221 222 return (0); 223 224 fail1: 225 DTRACE_PROBE1(fail1, int, rc); 226 227 stmp->stm_sp = NULL; 228 229 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t); 230 231 return (-1); 232 } 233 234 static void 235 sfxge_tx_mapping_dtor(void *buf, void *arg) 236 { 237 sfxge_tx_mapping_t *stmp = buf; 238 239 ASSERT3P(stmp->stm_sp, ==, arg); 240 241 /* Free the DMA handle */ 242 ddi_dma_free_handle(&(stmp->stm_dma_handle)); 243 stmp->stm_dma_handle = NULL; 244 245 stmp->stm_sp = NULL; 246 247 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t); 248 } 249 250 static int 251 sfxge_tx_qctor(void *buf, void *arg, int kmflags) 252 { 253 sfxge_txq_t *stp = buf; 254 efsys_mem_t *esmp = &(stp->st_mem); 255 sfxge_t *sp = arg; 256 sfxge_dma_buffer_attr_t dma_attr; 257 sfxge_tx_dpl_t *stdp; 258 int rc; 259 260 /* Compile-time structure layout checks */ 261 EFX_STATIC_ASSERT(sizeof (stp->__st_u1.__st_s1) <= 262 sizeof (stp->__st_u1.__st_pad)); 263 EFX_STATIC_ASSERT(sizeof (stp->__st_u2.__st_s2) <= 264 sizeof (stp->__st_u2.__st_pad)); 265 EFX_STATIC_ASSERT(sizeof (stp->__st_u3.__st_s3) <= 266 sizeof (stp->__st_u3.__st_pad)); 267 EFX_STATIC_ASSERT(sizeof (stp->__st_u4.__st_s4) <= 268 sizeof (stp->__st_u4.__st_pad)); 269 270 bzero(buf, sizeof (sfxge_txq_t)); 271 272 stp->st_sp = sp; 273 274 dma_attr.sdba_dip = sp->s_dip; 275 dma_attr.sdba_dattrp = &sfxge_txq_dma_attr; 276 dma_attr.sdba_callback = DDI_DMA_SLEEP; 277 dma_attr.sdba_length = EFX_TXQ_SIZE(SFXGE_TX_NDESCS); 278 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT; 279 dma_attr.sdba_devaccp = &sfxge_txq_devacc; 280 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT; 281 dma_attr.sdba_maxcookies = EFX_TXQ_NBUFS(SFXGE_TX_NDESCS); 282 dma_attr.sdba_zeroinit = B_FALSE; 283 284 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0) 285 goto fail1; 286 287 /* Allocate some buffer table entries */ 288 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS), 289 &(stp->st_id))) != 0) 290 goto fail2; 291 292 /* Allocate the descriptor array */ 293 if ((stp->st_eb = kmem_zalloc(sizeof (efx_buffer_t) * 294 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS), kmflags)) == NULL) { 295 rc = ENOMEM; 296 goto fail3; 297 } 298 299 /* Allocate the context arrays */ 300 if ((stp->st_stmp = kmem_zalloc(sizeof (sfxge_tx_mapping_t *) * 301 SFXGE_TX_NDESCS, kmflags)) == NULL) { 302 rc = ENOMEM; 303 goto fail4; 304 } 305 306 if ((stp->st_stbp = kmem_zalloc(sizeof (sfxge_tx_buffer_t *) * 307 SFXGE_TX_NDESCS, kmflags)) == NULL) { 308 rc = ENOMEM; 309 goto fail5; 310 } 311 312 if ((stp->st_mp = kmem_zalloc(sizeof (mblk_t *) * 313 SFXGE_TX_NDESCS, kmflags)) == NULL) { 314 rc = ENOMEM; 315 goto fail6; 316 } 317 318 /* Initialize the deferred packet list */ 319 stdp = &(stp->st_dpl); 320 stdp->std_getp = &(stdp->std_get); 321 322 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 323 324 return (0); 325 326 fail6: 327 DTRACE_PROBE(fail6); 328 329 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS); 330 stp->st_stbp = NULL; 331 332 fail5: 333 DTRACE_PROBE(fail5); 334 335 kmem_free(stp->st_stmp, 336 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS); 337 stp->st_stmp = NULL; 338 339 fail4: 340 DTRACE_PROBE(fail4); 341 342 /* Free the descriptor array */ 343 kmem_free(stp->st_eb, sizeof (efx_buffer_t) * 344 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 345 stp->st_eb = NULL; 346 347 fail3: 348 DTRACE_PROBE(fail3); 349 350 /* Free the buffer table entries */ 351 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 352 stp->st_id = 0; 353 354 fail2: 355 DTRACE_PROBE(fail2); 356 357 /* Tear down DMA setup */ 358 sfxge_dma_buffer_destroy(esmp); 359 360 fail1: 361 DTRACE_PROBE1(fail1, int, rc); 362 363 stp->st_sp = NULL; 364 365 SFXGE_OBJ_CHECK(stp, sfxge_txq_t); 366 367 return (-1); 368 } 369 370 static void 371 sfxge_tx_qdtor(void *buf, void *arg) 372 { 373 sfxge_txq_t *stp = buf; 374 efsys_mem_t *esmp = &(stp->st_mem); 375 sfxge_t *sp = stp->st_sp; 376 sfxge_tx_dpl_t *stdp; 377 378 _NOTE(ARGUNUSED(arg)) 379 380 stp->st_unblock = 0; 381 382 /* Tear down the deferred packet list */ 383 stdp = &(stp->st_dpl); 384 ASSERT3P(stdp->std_getp, ==, &(stdp->std_get)); 385 stdp->std_getp = NULL; 386 387 /* Free the context arrays */ 388 kmem_free(stp->st_mp, sizeof (mblk_t *) * SFXGE_TX_NDESCS); 389 stp->st_mp = NULL; 390 391 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS); 392 stp->st_stbp = NULL; 393 394 kmem_free(stp->st_stmp, 395 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS); 396 stp->st_stmp = NULL; 397 398 /* Free the descriptor array */ 399 kmem_free(stp->st_eb, sizeof (efx_buffer_t) * 400 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 401 stp->st_eb = NULL; 402 403 /* Free the buffer table entries */ 404 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 405 stp->st_id = 0; 406 407 /* Tear down dma setup */ 408 sfxge_dma_buffer_destroy(esmp); 409 410 stp->st_sp = NULL; 411 412 SFXGE_OBJ_CHECK(stp, sfxge_txq_t); 413 } 414 415 static void 416 sfxge_tx_packet_destroy(sfxge_t *sp, sfxge_tx_packet_t *stpp) 417 { 418 kmem_cache_free(sp->s_tpc, stpp); 419 } 420 421 static sfxge_tx_packet_t * 422 sfxge_tx_packet_create(sfxge_t *sp) 423 { 424 sfxge_tx_packet_t *stpp; 425 426 stpp = kmem_cache_alloc(sp->s_tpc, KM_NOSLEEP); 427 428 return (stpp); 429 } 430 431 static inline int 432 sfxge_tx_qfpp_put(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp) 433 { 434 sfxge_tx_fpp_t *stfp = &(stp->st_fpp); 435 436 ASSERT(mutex_owned(&(stp->st_lock))); 437 438 ASSERT3P(stpp->stp_next, ==, NULL); 439 ASSERT3P(stpp->stp_mp, ==, NULL); 440 ASSERT3P(stpp->stp_etherhp, ==, NULL); 441 ASSERT3P(stpp->stp_iphp, ==, NULL); 442 ASSERT3P(stpp->stp_thp, ==, NULL); 443 ASSERT3U(stpp->stp_off, ==, 0); 444 ASSERT3U(stpp->stp_size, ==, 0); 445 ASSERT3U(stpp->stp_mss, ==, 0); 446 ASSERT3U(stpp->stp_dpl_put_len, ==, 0); 447 448 if (stfp->stf_count < SFXGE_TX_FPP_MAX) { 449 /* Add to the start of the list */ 450 stpp->stp_next = stfp->stf_stpp; 451 stfp->stf_stpp = stpp; 452 stfp->stf_count++; 453 454 return (0); 455 } 456 457 DTRACE_PROBE(fpp_full); 458 return (ENOSPC); 459 } 460 461 static inline sfxge_tx_packet_t * 462 sfxge_tx_qfpp_get(sfxge_txq_t *stp) 463 { 464 sfxge_tx_packet_t *stpp; 465 sfxge_tx_fpp_t *stfp = &(stp->st_fpp); 466 467 ASSERT(mutex_owned(&(stp->st_lock))); 468 469 stpp = stfp->stf_stpp; 470 if (stpp == NULL) { 471 ASSERT3U(stfp->stf_count, ==, 0); 472 return (NULL); 473 } 474 475 /* Remove item from the head of the list */ 476 stfp->stf_stpp = stpp->stp_next; 477 stpp->stp_next = NULL; 478 479 ASSERT3U(stfp->stf_count, >, 0); 480 stfp->stf_count--; 481 482 if (stfp->stf_count != 0) { 483 ASSERT(stfp->stf_stpp != NULL); 484 prefetch_read_many(stfp->stf_stpp); 485 } 486 return (stpp); 487 } 488 489 static void 490 sfxge_tx_qfpp_empty(sfxge_txq_t *stp) 491 { 492 sfxge_t *sp = stp->st_sp; 493 sfxge_tx_fpp_t *stfp = &(stp->st_fpp); 494 sfxge_tx_packet_t *stpp; 495 496 mutex_enter(&(stp->st_lock)); 497 498 stpp = stfp->stf_stpp; 499 stfp->stf_stpp = NULL; 500 501 while (stpp != NULL) { 502 sfxge_tx_packet_t *next; 503 504 next = stpp->stp_next; 505 stpp->stp_next = NULL; 506 507 ASSERT3U(stfp->stf_count, >, 0); 508 stfp->stf_count--; 509 510 sfxge_tx_packet_destroy(sp, stpp); 511 512 stpp = next; 513 } 514 ASSERT3U(stfp->stf_count, ==, 0); 515 516 mutex_exit(&(stp->st_lock)); 517 } 518 519 static inline void 520 sfxge_tx_qfbp_put(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp) 521 { 522 sfxge_tx_fbp_t *stfp = &(stp->st_fbp); 523 524 ASSERT3P(stbp->stb_next, ==, NULL); 525 ASSERT3U(stbp->stb_off, ==, 0); 526 ASSERT3U(stbp->stb_esm.esm_used, ==, 0); 527 528 stbp->stb_next = stfp->stf_stbp; 529 stfp->stf_stbp = stbp; 530 stfp->stf_count++; 531 } 532 533 534 static inline sfxge_tx_buffer_t * 535 sfxge_tx_qfbp_get(sfxge_txq_t *stp) 536 { 537 sfxge_tx_buffer_t *stbp; 538 sfxge_tx_fbp_t *stfp = &(stp->st_fbp); 539 540 stbp = stfp->stf_stbp; 541 if (stbp == NULL) { 542 ASSERT3U(stfp->stf_count, ==, 0); 543 return (NULL); 544 } 545 546 stfp->stf_stbp = stbp->stb_next; 547 stbp->stb_next = NULL; 548 549 ASSERT3U(stfp->stf_count, >, 0); 550 stfp->stf_count--; 551 552 if (stfp->stf_count != 0) { 553 ASSERT(stfp->stf_stbp != NULL); 554 prefetch_read_many(stfp->stf_stbp); 555 } 556 557 return (stbp); 558 } 559 560 static void 561 sfxge_tx_qfbp_empty(sfxge_txq_t *stp) 562 { 563 sfxge_t *sp = stp->st_sp; 564 sfxge_tx_fbp_t *stfp = &(stp->st_fbp); 565 sfxge_tx_buffer_t *stbp; 566 567 mutex_enter(&(stp->st_lock)); 568 569 stbp = stfp->stf_stbp; 570 stfp->stf_stbp = NULL; 571 572 while (stbp != NULL) { 573 sfxge_tx_buffer_t *next; 574 575 next = stbp->stb_next; 576 stbp->stb_next = NULL; 577 578 ASSERT3U(stfp->stf_count, >, 0); 579 stfp->stf_count--; 580 581 kmem_cache_free(sp->s_tbc, stbp); 582 583 stbp = next; 584 } 585 ASSERT3U(stfp->stf_count, ==, 0); 586 587 mutex_exit(&(stp->st_lock)); 588 } 589 590 static inline void 591 sfxge_tx_qfmp_put(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp) 592 { 593 sfxge_tx_fmp_t *stfp = &(stp->st_fmp); 594 595 ASSERT3P(stmp->stm_next, ==, NULL); 596 ASSERT3P(stmp->stm_mp, ==, NULL); 597 ASSERT3P(stmp->stm_base, ==, NULL); 598 ASSERT3U(stmp->stm_off, ==, 0); 599 ASSERT3U(stmp->stm_size, ==, 0); 600 601 stmp->stm_next = stfp->stf_stmp; 602 stfp->stf_stmp = stmp; 603 stfp->stf_count++; 604 } 605 606 static inline sfxge_tx_mapping_t * 607 sfxge_tx_qfmp_get(sfxge_txq_t *stp) 608 { 609 sfxge_tx_mapping_t *stmp; 610 sfxge_tx_fmp_t *stfp = &(stp->st_fmp); 611 612 stmp = stfp->stf_stmp; 613 if (stmp == NULL) { 614 ASSERT3U(stfp->stf_count, ==, 0); 615 return (NULL); 616 } 617 618 stfp->stf_stmp = stmp->stm_next; 619 stmp->stm_next = NULL; 620 621 ASSERT3U(stfp->stf_count, >, 0); 622 stfp->stf_count--; 623 624 if (stfp->stf_count != 0) { 625 ASSERT(stfp->stf_stmp != NULL); 626 prefetch_read_many(stfp->stf_stmp); 627 } 628 return (stmp); 629 } 630 631 static void 632 sfxge_tx_qfmp_empty(sfxge_txq_t *stp) 633 { 634 sfxge_t *sp = stp->st_sp; 635 sfxge_tx_fmp_t *stfp = &(stp->st_fmp); 636 sfxge_tx_mapping_t *stmp; 637 638 mutex_enter(&(stp->st_lock)); 639 640 stmp = stfp->stf_stmp; 641 stfp->stf_stmp = NULL; 642 643 while (stmp != NULL) { 644 sfxge_tx_mapping_t *next; 645 646 next = stmp->stm_next; 647 stmp->stm_next = NULL; 648 649 ASSERT3U(stfp->stf_count, >, 0); 650 stfp->stf_count--; 651 652 kmem_cache_free(sp->s_tmc, stmp); 653 654 stmp = next; 655 } 656 ASSERT3U(stfp->stf_count, ==, 0); 657 658 mutex_exit(&(stp->st_lock)); 659 } 660 661 static void 662 sfxge_tx_msgb_unbind(sfxge_tx_mapping_t *stmp) 663 { 664 bzero(stmp->stm_addr, sizeof (uint64_t) * SFXGE_TX_MAPPING_NADDR); 665 stmp->stm_off = 0; 666 667 (void) ddi_dma_unbind_handle(stmp->stm_dma_handle); 668 669 stmp->stm_size = 0; 670 stmp->stm_base = NULL; 671 672 stmp->stm_mp = NULL; 673 } 674 675 #define SFXGE_TX_DESCSHIFT 12 676 #define SFXGE_TX_DESCSIZE (1 << 12) 677 678 #define SFXGE_TX_DESCOFFSET (SFXGE_TX_DESCSIZE - 1) 679 #define SFXGE_TX_DESCMASK (~SFXGE_TX_DESCOFFSET) 680 681 static int 682 sfxge_tx_msgb_bind(mblk_t *mp, sfxge_tx_mapping_t *stmp) 683 { 684 ddi_dma_cookie_t dmac; 685 unsigned int ncookies; 686 size_t size; 687 unsigned int n; 688 int rc; 689 690 ASSERT(mp != NULL); 691 ASSERT3U(DB_TYPE(mp), ==, M_DATA); 692 693 ASSERT(stmp->stm_mp == NULL); 694 stmp->stm_mp = mp; 695 696 stmp->stm_base = (caddr_t)(mp->b_rptr); 697 stmp->stm_size = MBLKL(mp); 698 699 /* Bind the STREAMS block to the mapping */ 700 rc = ddi_dma_addr_bind_handle(stmp->stm_dma_handle, NULL, 701 stmp->stm_base, stmp->stm_size, DDI_DMA_WRITE | DDI_DMA_STREAMING, 702 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies); 703 if (rc != DDI_DMA_MAPPED) 704 goto fail1; 705 706 ASSERT3U(ncookies, <=, SFXGE_TX_MAPPING_NADDR); 707 708 /* 709 * Construct an array of addresses and an initial 710 * offset. 711 */ 712 n = 0; 713 stmp->stm_addr[n++] = dmac.dmac_laddress & SFXGE_TX_DESCMASK; 714 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress & SFXGE_TX_DESCMASK); 715 716 stmp->stm_off = dmac.dmac_laddress & SFXGE_TX_DESCOFFSET; 717 718 size = MIN(SFXGE_TX_DESCSIZE - stmp->stm_off, dmac.dmac_size); 719 dmac.dmac_laddress += size; 720 dmac.dmac_size -= size; 721 722 for (;;) { 723 ASSERT3U(n, <, SFXGE_TX_MAPPING_NADDR); 724 725 if (dmac.dmac_size == 0) { 726 if (--ncookies == 0) 727 break; 728 729 ddi_dma_nextcookie(stmp->stm_dma_handle, &dmac); 730 } 731 732 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCMASK) != 0); 733 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCOFFSET) == 0); 734 stmp->stm_addr[n++] = dmac.dmac_laddress; 735 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress); 736 737 size = MIN(SFXGE_TX_DESCSIZE, dmac.dmac_size); 738 dmac.dmac_laddress += size; 739 dmac.dmac_size -= size; 740 } 741 ASSERT3U(n, <=, SFXGE_TX_MAPPING_NADDR); 742 743 return (0); 744 745 fail1: 746 DTRACE_PROBE1(fail1, int, rc); 747 748 stmp->stm_size = 0; 749 stmp->stm_base = NULL; 750 751 stmp->stm_mp = NULL; 752 753 return (-1); 754 } 755 756 static void 757 sfxge_tx_qreap(sfxge_txq_t *stp) 758 { 759 unsigned int reaped; 760 761 ASSERT(mutex_owned(&(stp->st_lock))); 762 763 reaped = stp->st_reaped; 764 while (reaped != stp->st_completed) { 765 unsigned int id; 766 sfxge_tx_mapping_t *stmp; 767 sfxge_tx_buffer_t *stbp; 768 769 id = reaped++ & (SFXGE_TX_NDESCS - 1); 770 771 ASSERT3P(stp->st_mp[id], ==, NULL); 772 773 if ((stmp = stp->st_stmp[id]) != NULL) { 774 stp->st_stmp[id] = NULL; 775 776 /* Free all the mappings */ 777 do { 778 sfxge_tx_mapping_t *next; 779 780 next = stmp->stm_next; 781 stmp->stm_next = NULL; 782 783 sfxge_tx_qfmp_put(stp, stmp); 784 785 stmp = next; 786 } while (stmp != NULL); 787 } 788 789 if ((stbp = stp->st_stbp[id]) != NULL) { 790 stp->st_stbp[id] = NULL; 791 792 /* Free all the buffers */ 793 do { 794 sfxge_tx_buffer_t *next; 795 796 next = stbp->stb_next; 797 stbp->stb_next = NULL; 798 799 stbp->stb_esm.esm_used = 0; 800 stbp->stb_off = 0; 801 802 sfxge_tx_qfbp_put(stp, stbp); 803 804 stbp = next; 805 } while (stbp != NULL); 806 } 807 } 808 stp->st_reaped = reaped; 809 } 810 811 static void 812 sfxge_tx_qlist_abort(sfxge_txq_t *stp) 813 { 814 unsigned int id; 815 sfxge_tx_mapping_t *stmp; 816 sfxge_tx_buffer_t *stbp; 817 mblk_t *mp; 818 819 ASSERT(mutex_owned(&(stp->st_lock))); 820 821 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 822 823 /* Clear the completion information */ 824 stmp = stp->st_stmp[id]; 825 stp->st_stmp[id] = NULL; 826 827 /* Free any mappings that were used */ 828 while (stmp != NULL) { 829 sfxge_tx_mapping_t *next; 830 831 next = stmp->stm_next; 832 stmp->stm_next = NULL; 833 834 if (stmp->stm_mp != NULL) 835 sfxge_tx_msgb_unbind(stmp); 836 837 sfxge_tx_qfmp_put(stp, stmp); 838 839 stmp = next; 840 } 841 842 stbp = stp->st_stbp[id]; 843 stp->st_stbp[id] = NULL; 844 845 /* Free any buffers that were used */ 846 while (stbp != NULL) { 847 sfxge_tx_buffer_t *next; 848 849 next = stbp->stb_next; 850 stbp->stb_next = NULL; 851 852 stbp->stb_off = 0; 853 stbp->stb_esm.esm_used = 0; 854 855 sfxge_tx_qfbp_put(stp, stbp); 856 857 stbp = next; 858 } 859 860 mp = stp->st_mp[id]; 861 stp->st_mp[id] = NULL; 862 863 if (mp != NULL) 864 freemsg(mp); 865 866 /* Clear the fragment list */ 867 stp->st_n = 0; 868 } 869 870 /* Push descriptors to the TX ring setting blocked if no space */ 871 static void 872 sfxge_tx_qlist_post(sfxge_txq_t *stp) 873 { 874 unsigned int id; 875 unsigned int level; 876 unsigned int available; 877 int rc; 878 879 ASSERT(mutex_owned(&(stp->st_lock))); 880 881 ASSERT(stp->st_n != 0); 882 883 again: 884 level = stp->st_added - stp->st_reaped; 885 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level; 886 887 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 888 889 if (available < stp->st_n) { 890 rc = ENOSPC; 891 goto fail1; 892 } 893 894 ASSERT3U(available, >=, stp->st_n); 895 896 /* Post the fragment list */ 897 if ((rc = efx_tx_qpost(stp->st_etp, stp->st_eb, stp->st_n, 898 stp->st_reaped, &(stp->st_added))) != 0) 899 goto fail2; 900 901 /* 902 * If the list took more than a single descriptor then we need to 903 * to move the completion information so it is referenced by the last 904 * descriptor. 905 */ 906 if (((stp->st_added - 1) & (SFXGE_TX_NDESCS - 1)) != id) { 907 sfxge_tx_mapping_t *stmp; 908 sfxge_tx_buffer_t *stbp; 909 mblk_t *mp; 910 911 stmp = stp->st_stmp[id]; 912 stp->st_stmp[id] = NULL; 913 914 stbp = stp->st_stbp[id]; 915 stp->st_stbp[id] = NULL; 916 917 mp = stp->st_mp[id]; 918 stp->st_mp[id] = NULL; 919 920 id = (stp->st_added - 1) & (SFXGE_TX_NDESCS - 1); 921 922 ASSERT(stp->st_stmp[id] == NULL); 923 stp->st_stmp[id] = stmp; 924 925 ASSERT(stp->st_stbp[id] == NULL); 926 stp->st_stbp[id] = stbp; 927 928 ASSERT(stp->st_mp[id] == NULL); 929 stp->st_mp[id] = mp; 930 } 931 932 /* Clear the list */ 933 stp->st_n = 0; 934 935 ASSERT3U(stp->st_unblock, ==, SFXGE_TXQ_NOT_BLOCKED); 936 return; 937 938 fail2: 939 DTRACE_PROBE(fail2); 940 fail1: 941 DTRACE_PROBE1(fail1, int, rc); 942 943 ASSERT(rc == ENOSPC); 944 945 level = stp->st_added - stp->st_completed; 946 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level; 947 948 /* 949 * If there would be enough space after we've reaped any completed 950 * mappings and buffers, and we gain sufficient queue space by doing 951 * so, then reap now and try posting again. 952 */ 953 if (stp->st_n <= available && 954 stp->st_completed - stp->st_reaped >= SFXGE_TX_BATCH) { 955 sfxge_tx_qreap(stp); 956 957 goto again; 958 } 959 960 /* Set the unblock level */ 961 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED) { 962 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL1; 963 } else { 964 ASSERT(stp->st_unblock == SFXGE_TXQ_UNBLOCK_LEVEL1); 965 966 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL2; 967 } 968 969 /* 970 * Avoid a race with completion interrupt handling that could leave the 971 * queue blocked. 972 * 973 * NOTE: The use of st_pending rather than st_completed is intentional 974 * as st_pending is updated per-event rather than per-batch and 975 * therefore avoids needless deferring. 976 */ 977 if (stp->st_pending == stp->st_added) { 978 sfxge_tx_qreap(stp); 979 980 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 981 goto again; 982 } 983 984 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED); 985 } 986 987 static int 988 sfxge_tx_kstat_update(kstat_t *ksp, int rw) 989 { 990 sfxge_txq_t *stp = ksp->ks_private; 991 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 992 kstat_named_t *knp; 993 int rc; 994 995 ASSERT(mutex_owned(&(stp->st_lock))); 996 997 if (rw != KSTAT_READ) { 998 rc = EACCES; 999 goto fail1; 1000 } 1001 1002 if (stp->st_state != SFXGE_TXQ_STARTED) 1003 goto done; 1004 1005 efx_tx_qstats_update(stp->st_etp, stp->st_stat); 1006 knp = (kstat_named_t *)ksp->ks_data + TX_NQSTATS; 1007 knp->value.ui64 = stdp->get_pkt_limit; 1008 knp++; 1009 knp->value.ui64 = stdp->put_pkt_limit; 1010 knp++; 1011 knp->value.ui64 = stdp->get_full_count; 1012 knp++; 1013 knp->value.ui64 = stdp->put_full_count; 1014 1015 done: 1016 return (0); 1017 1018 fail1: 1019 DTRACE_PROBE1(fail1, int, rc); 1020 1021 return (rc); 1022 } 1023 1024 static int 1025 sfxge_tx_kstat_init(sfxge_txq_t *stp) 1026 { 1027 sfxge_t *sp = stp->st_sp; 1028 unsigned int index = stp->st_index; 1029 dev_info_t *dip = sp->s_dip; 1030 kstat_t *ksp; 1031 kstat_named_t *knp; 1032 char name[MAXNAMELEN]; 1033 unsigned int id; 1034 int rc; 1035 1036 /* Create the set */ 1037 (void) snprintf(name, MAXNAMELEN - 1, "%s_txq%04d", 1038 ddi_driver_name(dip), index); 1039 1040 if ((ksp = kstat_create((char *)ddi_driver_name(dip), 1041 ddi_get_instance(dip), name, "queue", KSTAT_TYPE_NAMED, 1042 TX_NQSTATS + 4, 0)) == NULL) { 1043 rc = ENOMEM; 1044 goto fail1; 1045 } 1046 1047 stp->st_ksp = ksp; 1048 1049 ksp->ks_update = sfxge_tx_kstat_update; 1050 ksp->ks_private = stp; 1051 ksp->ks_lock = &(stp->st_lock); 1052 1053 /* Initialise the named stats */ 1054 stp->st_stat = knp = ksp->ks_data; 1055 for (id = 0; id < TX_NQSTATS; id++) { 1056 kstat_named_init(knp, (char *)efx_tx_qstat_name(sp->s_enp, id), 1057 KSTAT_DATA_UINT64); 1058 knp++; 1059 } 1060 kstat_named_init(knp, "dpl_get_pkt_limit", KSTAT_DATA_UINT64); 1061 knp++; 1062 kstat_named_init(knp, "dpl_put_pkt_limit", KSTAT_DATA_UINT64); 1063 knp++; 1064 kstat_named_init(knp, "dpl_get_full_count", KSTAT_DATA_UINT64); 1065 knp++; 1066 kstat_named_init(knp, "dpl_put_full_count", KSTAT_DATA_UINT64); 1067 1068 kstat_install(ksp); 1069 return (0); 1070 1071 fail1: 1072 DTRACE_PROBE1(fail1, int, rc); 1073 1074 return (rc); 1075 } 1076 1077 static void 1078 sfxge_tx_kstat_fini(sfxge_txq_t *stp) 1079 { 1080 /* Destroy the set */ 1081 kstat_delete(stp->st_ksp); 1082 stp->st_ksp = NULL; 1083 stp->st_stat = NULL; 1084 } 1085 1086 static int 1087 sfxge_tx_qinit(sfxge_t *sp, unsigned int index, sfxge_txq_type_t type, 1088 unsigned int evq) 1089 { 1090 sfxge_txq_t *stp; 1091 sfxge_tx_dpl_t *stdp; 1092 int rc; 1093 1094 ASSERT3U(index, <, EFX_ARRAY_SIZE(sp->s_stp)); 1095 ASSERT3U(type, <, SFXGE_TXQ_NTYPES); 1096 ASSERT3U(evq, <, EFX_ARRAY_SIZE(sp->s_sep)); 1097 1098 if ((stp = kmem_cache_alloc(sp->s_tqc, KM_SLEEP)) == NULL) { 1099 rc = ENOMEM; 1100 goto fail1; 1101 } 1102 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_UNINITIALIZED); 1103 1104 stdp = &(stp->st_dpl); 1105 1106 stp->st_index = index; 1107 stp->st_type = type; 1108 stp->st_evq = evq; 1109 1110 mutex_init(&(stp->st_lock), NULL, MUTEX_DRIVER, 1111 DDI_INTR_PRI(sp->s_intr.si_intr_pri)); 1112 1113 /* Initialize the statistics */ 1114 if ((rc = sfxge_tx_kstat_init(stp)) != 0) 1115 goto fail2; 1116 1117 stdp->get_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 1118 DDI_PROP_DONTPASS, "tx_dpl_get_pkt_limit", 1119 SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT); 1120 1121 stdp->put_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 1122 DDI_PROP_DONTPASS, "tx_dpl_put_pkt_limit", 1123 SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT); 1124 1125 /* Allocate a per-EVQ label for events from this TXQ */ 1126 if ((rc = sfxge_ev_txlabel_alloc(sp, evq, stp, &(stp->st_label))) != 0) 1127 goto fail2; 1128 1129 stp->st_state = SFXGE_TXQ_INITIALIZED; 1130 1131 /* Attach the TXQ to the driver */ 1132 ASSERT3P(sp->s_stp[index], ==, NULL); 1133 sp->s_stp[index] = stp; 1134 sp->s_tx_qcount++; 1135 1136 return (0); 1137 1138 fail2: 1139 DTRACE_PROBE(fail2); 1140 1141 sfxge_tx_kstat_fini(stp); 1142 1143 1144 stp->st_evq = 0; 1145 stp->st_type = 0; 1146 stp->st_index = 0; 1147 1148 mutex_destroy(&(stp->st_lock)); 1149 1150 kmem_cache_free(sp->s_tqc, stp); 1151 1152 fail1: 1153 DTRACE_PROBE1(fail1, int, rc); 1154 1155 return (rc); 1156 } 1157 1158 static int 1159 sfxge_tx_qstart(sfxge_t *sp, unsigned int index) 1160 { 1161 sfxge_txq_t *stp = sp->s_stp[index]; 1162 efx_nic_t *enp = sp->s_enp; 1163 efsys_mem_t *esmp; 1164 sfxge_evq_t *sep; 1165 unsigned int evq; 1166 unsigned int flags; 1167 unsigned int desc_index; 1168 int rc; 1169 1170 mutex_enter(&(stp->st_lock)); 1171 1172 esmp = &(stp->st_mem); 1173 evq = stp->st_evq; 1174 sep = sp->s_sep[evq]; 1175 1176 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED); 1177 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED); 1178 1179 /* Zero the memory */ 1180 bzero(esmp->esm_base, EFX_TXQ_SIZE(SFXGE_TX_NDESCS)); 1181 1182 /* Program the buffer table */ 1183 if ((rc = sfxge_sram_buf_tbl_set(sp, stp->st_id, esmp, 1184 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS))) != 0) 1185 goto fail1; 1186 1187 switch (stp->st_type) { 1188 case SFXGE_TXQ_NON_CKSUM: 1189 flags = 0; 1190 break; 1191 1192 case SFXGE_TXQ_IP_CKSUM: 1193 flags = EFX_TXQ_CKSUM_IPV4; 1194 break; 1195 1196 case SFXGE_TXQ_IP_TCP_UDP_CKSUM: 1197 flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; 1198 break; 1199 1200 default: 1201 ASSERT(B_FALSE); 1202 1203 flags = 0; 1204 break; 1205 } 1206 1207 /* Create the transmit queue */ 1208 if ((rc = efx_tx_qcreate(enp, index, stp->st_label, esmp, 1209 SFXGE_TX_NDESCS, stp->st_id, flags, sep->se_eep, 1210 &(stp->st_etp), &desc_index)) != 0) 1211 goto fail2; 1212 1213 /* Initialise queue descriptor indexes */ 1214 stp->st_added = desc_index; 1215 stp->st_pending = desc_index; 1216 stp->st_completed = desc_index; 1217 stp->st_reaped = desc_index; 1218 1219 /* Enable the transmit queue */ 1220 efx_tx_qenable(stp->st_etp); 1221 1222 stp->st_state = SFXGE_TXQ_STARTED; 1223 1224 mutex_exit(&(stp->st_lock)); 1225 1226 return (0); 1227 1228 fail2: 1229 DTRACE_PROBE(fail2); 1230 1231 /* Clear entries from the buffer table */ 1232 sfxge_sram_buf_tbl_clear(sp, stp->st_id, 1233 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 1234 1235 fail1: 1236 DTRACE_PROBE1(fail1, int, rc); 1237 1238 mutex_exit(&(stp->st_lock)); 1239 1240 return (rc); 1241 } 1242 1243 static inline int 1244 sfxge_tx_qmapping_add(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp, 1245 size_t *offp, size_t *limitp) 1246 { 1247 mblk_t *mp; 1248 size_t mapping_off; 1249 size_t mapping_size; 1250 int rc; 1251 1252 ASSERT3U(*offp, <, stmp->stm_size); 1253 ASSERT(*limitp != 0); 1254 1255 mp = stmp->stm_mp; 1256 1257 ASSERT3P(stmp->stm_base, ==, mp->b_rptr); 1258 ASSERT3U(stmp->stm_size, ==, MBLKL(mp)); 1259 1260 mapping_off = stmp->stm_off + *offp; 1261 mapping_size = stmp->stm_size - *offp; 1262 1263 while (mapping_size != 0 && *limitp != 0) { 1264 size_t page = 1265 mapping_off >> SFXGE_TX_DESCSHIFT; 1266 size_t page_off = 1267 mapping_off & SFXGE_TX_DESCOFFSET; 1268 size_t page_size = 1269 SFXGE_TX_DESCSIZE - page_off; 1270 efx_buffer_t *ebp; 1271 1272 ASSERT3U(page, <, SFXGE_TX_MAPPING_NADDR); 1273 ASSERT((stmp->stm_addr[page] & SFXGE_TX_DESCMASK) != 0); 1274 1275 page_size = MIN(page_size, mapping_size); 1276 page_size = MIN(page_size, *limitp); 1277 1278 ASSERT3U(stp->st_n, <=, 1279 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 1280 if (stp->st_n == 1281 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) { 1282 rc = ENOSPC; 1283 goto fail1; 1284 } 1285 1286 ebp = &(stp->st_eb[stp->st_n++]); 1287 ebp->eb_addr = stmp->stm_addr[page] + 1288 page_off; 1289 ebp->eb_size = page_size; 1290 1291 *offp += page_size; 1292 *limitp -= page_size; 1293 1294 mapping_off += page_size; 1295 mapping_size -= page_size; 1296 1297 ebp->eb_eop = (*limitp == 0 || 1298 (mapping_size == 0 && mp->b_cont == NULL)); 1299 1300 DTRACE_PROBE5(tx_mapping_add, 1301 unsigned int, stp->st_index, 1302 unsigned int, stp->st_n - 1, 1303 uint64_t, ebp->eb_addr, 1304 size_t, ebp->eb_size, 1305 boolean_t, ebp->eb_eop); 1306 } 1307 1308 ASSERT3U(*offp, <=, stmp->stm_size); 1309 1310 return (0); 1311 1312 fail1: 1313 DTRACE_PROBE1(fail1, int, rc); 1314 1315 return (rc); 1316 } 1317 1318 static inline int 1319 sfxge_tx_qbuffer_add(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp, boolean_t eop) 1320 { 1321 efx_buffer_t *ebp; 1322 int rc; 1323 1324 ASSERT3U(stp->st_n, <=, 1325 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 1326 if (stp->st_n == EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) { 1327 rc = ENOSPC; 1328 goto fail1; 1329 } 1330 1331 ebp = &(stp->st_eb[stp->st_n++]); 1332 ebp->eb_addr = stbp->stb_esm.esm_addr + stbp->stb_off; 1333 ebp->eb_size = stbp->stb_esm.esm_used - stbp->stb_off; 1334 ebp->eb_eop = eop; 1335 1336 (void) ddi_dma_sync(stbp->stb_esm.esm_dma_handle, 1337 stbp->stb_off, ebp->eb_size, 1338 DDI_DMA_SYNC_FORDEV); 1339 1340 stbp->stb_off = stbp->stb_esm.esm_used; 1341 1342 DTRACE_PROBE5(tx_buffer_add, 1343 unsigned int, stp->st_index, 1344 unsigned int, stp->st_n - 1, 1345 uint64_t, ebp->eb_addr, size_t, ebp->eb_size, 1346 boolean_t, ebp->eb_eop); 1347 1348 return (0); 1349 1350 fail1: 1351 DTRACE_PROBE1(fail1, int, rc); 1352 1353 return (rc); 1354 } 1355 1356 static inline boolean_t 1357 sfxge_tx_msgb_copy(mblk_t *mp, sfxge_tx_buffer_t *stbp, size_t *offp, 1358 size_t *limitp) 1359 { 1360 size_t data_off; 1361 size_t data_size; 1362 size_t copy_off; 1363 size_t copy_size; 1364 boolean_t eop; 1365 1366 ASSERT3U(*offp, <=, MBLKL(mp)); 1367 ASSERT(*limitp != 0); 1368 1369 data_off = *offp; 1370 data_size = MBLKL(mp) - *offp; 1371 1372 copy_off = stbp->stb_esm.esm_used; 1373 copy_size = SFXGE_TX_BUFFER_SIZE - copy_off; 1374 1375 copy_size = MIN(copy_size, data_size); 1376 copy_size = MIN(copy_size, *limitp); 1377 1378 bcopy(mp->b_rptr + data_off, 1379 stbp->stb_esm.esm_base + copy_off, copy_size); 1380 1381 stbp->stb_esm.esm_used += copy_size; 1382 ASSERT3U(stbp->stb_esm.esm_used, <=, 1383 SFXGE_TX_BUFFER_SIZE); 1384 1385 *offp += copy_size; 1386 *limitp -= copy_size; 1387 1388 data_off += copy_size; 1389 data_size -= copy_size; 1390 1391 eop = (*limitp == 0 || 1392 (data_size == 0 && mp->b_cont == NULL)); 1393 1394 ASSERT3U(*offp, <=, MBLKL(mp)); 1395 1396 return (eop); 1397 } 1398 1399 static int 1400 sfxge_tx_qpayload_fragment(sfxge_txq_t *stp, unsigned int id, mblk_t **mpp, 1401 size_t *offp, size_t size, boolean_t copy) 1402 { 1403 sfxge_t *sp = stp->st_sp; 1404 mblk_t *mp = *mpp; 1405 size_t off = *offp; 1406 sfxge_tx_buffer_t *stbp; 1407 sfxge_tx_mapping_t *stmp; 1408 int rc; 1409 1410 stbp = stp->st_stbp[id]; 1411 ASSERT(stbp == NULL || (stbp->stb_esm.esm_used == stbp->stb_off)); 1412 1413 stmp = stp->st_stmp[id]; 1414 1415 while (size != 0) { 1416 boolean_t eop; 1417 1418 ASSERT(mp != NULL); 1419 1420 if (mp->b_cont != NULL) 1421 prefetch_read_many(mp->b_cont); 1422 1423 ASSERT3U(off, <, MBLKL(mp)); 1424 1425 if (copy) 1426 goto copy; 1427 1428 /* 1429 * Check whether we have already mapped this data block for 1430 * DMA. 1431 */ 1432 if (stmp == NULL || stmp->stm_mp != mp) { 1433 /* 1434 * If we are part way through copying a data block then 1435 * there's no point in trying to map it for DMA. 1436 */ 1437 if (off != 0) 1438 goto copy; 1439 1440 /* 1441 * If the data block is too short then the cost of 1442 * mapping it for DMA would outweigh the cost of 1443 * copying it. 1444 */ 1445 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD) 1446 goto copy; 1447 1448 /* Try to grab a transmit mapping from the pool */ 1449 stmp = sfxge_tx_qfmp_get(stp); 1450 if (stmp == NULL) { 1451 /* 1452 * The pool was empty so allocate a new 1453 * mapping. 1454 */ 1455 if ((stmp = kmem_cache_alloc(sp->s_tmc, 1456 KM_NOSLEEP)) == NULL) 1457 goto copy; 1458 } 1459 1460 /* Add the DMA mapping to the list */ 1461 stmp->stm_next = stp->st_stmp[id]; 1462 stp->st_stmp[id] = stmp; 1463 1464 /* Try to bind the data block to the mapping */ 1465 if (sfxge_tx_msgb_bind(mp, stmp) != 0) 1466 goto copy; 1467 } 1468 ASSERT3P(stmp->stm_mp, ==, mp); 1469 1470 /* 1471 * If we have a partially filled buffer then we must add it to 1472 * the fragment list before adding the mapping. 1473 */ 1474 if (stbp != NULL && (stbp->stb_esm.esm_used > stbp->stb_off)) { 1475 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE); 1476 if (rc != 0) 1477 goto fail1; 1478 } 1479 1480 /* Add the mapping to the fragment list */ 1481 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size); 1482 if (rc != 0) 1483 goto fail2; 1484 1485 ASSERT(off == MBLKL(mp) || size == 0); 1486 1487 /* 1488 * If the data block has been exhausted then Skip over the 1489 * control block and advance to the next data block. 1490 */ 1491 if (off == MBLKL(mp)) { 1492 mp = mp->b_cont; 1493 off = 0; 1494 } 1495 1496 continue; 1497 1498 copy: 1499 if (stbp == NULL || 1500 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE) { 1501 /* Try to grab a buffer from the pool */ 1502 stbp = sfxge_tx_qfbp_get(stp); 1503 if (stbp == NULL) { 1504 /* 1505 * The pool was empty so allocate a new 1506 * buffer. 1507 */ 1508 if ((stbp = kmem_cache_alloc(sp->s_tbc, 1509 KM_NOSLEEP)) == NULL) { 1510 rc = ENOMEM; 1511 goto fail3; 1512 } 1513 } 1514 1515 /* Add it to the list */ 1516 stbp->stb_next = stp->st_stbp[id]; 1517 stp->st_stbp[id] = stbp; 1518 } 1519 1520 /* Copy as much of the data block as we can into the buffer */ 1521 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size); 1522 1523 ASSERT(off == MBLKL(mp) || size == 0 || 1524 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE); 1525 1526 /* 1527 * If we have reached the end of the packet, or the buffer is 1528 * full, then add the buffer to the fragment list. 1529 */ 1530 if (stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE || eop) { 1531 rc = sfxge_tx_qbuffer_add(stp, stbp, eop); 1532 if (rc != 0) 1533 goto fail4; 1534 } 1535 1536 /* 1537 * If the data block has been exhaused then advance to the next 1538 * one. 1539 */ 1540 if (off == MBLKL(mp)) { 1541 mp = mp->b_cont; 1542 off = 0; 1543 } 1544 } 1545 1546 *mpp = mp; 1547 *offp = off; 1548 1549 return (0); 1550 1551 fail4: 1552 DTRACE_PROBE(fail4); 1553 fail3: 1554 DTRACE_PROBE(fail3); 1555 fail2: 1556 DTRACE_PROBE(fail2); 1557 fail1: 1558 DTRACE_PROBE1(fail1, int, rc); 1559 1560 return (rc); 1561 } 1562 1563 static int 1564 sfxge_tx_qlso_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, 1565 boolean_t copy) 1566 { 1567 sfxge_t *sp = stp->st_sp; 1568 mblk_t *mp = stpp->stp_mp; 1569 struct ether_header *etherhp = stpp->stp_etherhp; 1570 struct ip *iphp = stpp->stp_iphp; 1571 struct tcphdr *thp = stpp->stp_thp; 1572 size_t size = stpp->stp_size; 1573 size_t off = stpp->stp_off; 1574 size_t mss = stpp->stp_mss; 1575 unsigned int id; 1576 caddr_t hp; 1577 size_t ehs, hs; 1578 uint16_t start_len; 1579 uint16_t start_id; 1580 uint16_t ip_id; 1581 uint8_t start_flags; 1582 uint32_t start_seq; 1583 uint32_t th_seq; 1584 size_t lss; 1585 sfxge_tx_buffer_t *stbp; 1586 int rc; 1587 1588 ASSERT(mutex_owned(&(stp->st_lock))); 1589 1590 if ((DB_LSOFLAGS(mp) & HW_LSO) == 0) { 1591 rc = EINVAL; 1592 goto fail1; 1593 } 1594 1595 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 1596 1597 ASSERT(stp->st_n == 0); 1598 ASSERT(stp->st_stbp[id] == NULL); 1599 ASSERT(stp->st_stmp[id] == NULL); 1600 1601 ehs = (etherhp->ether_type == htons(ETHERTYPE_VLAN)) ? 1602 sizeof (struct ether_vlan_header) : 1603 sizeof (struct ether_header); 1604 if (msgdsize(mp) != ehs + ntohs(iphp->ip_len)) { 1605 rc = EINVAL; 1606 goto fail2; 1607 } 1608 1609 /* The payload offset is equivalent to the size of the headers */ 1610 hp = (caddr_t)(mp->b_rptr); 1611 hs = off; 1612 1613 /* 1614 * If the initial data block only contains the headers then advance 1615 * to the next one. 1616 */ 1617 if (hs > MBLKL(mp)) { 1618 rc = EINVAL; 1619 goto fail3; 1620 } 1621 mp->b_rptr += hs; 1622 1623 if (MBLKL(mp) == 0) 1624 mp = mp->b_cont; 1625 1626 off = 0; 1627 1628 /* Check IP and TCP headers are suitable for LSO */ 1629 if (((iphp->ip_off & ~htons(IP_DF)) != 0) || 1630 ((thp->th_flags & (TH_URG | TH_SYN)) != 0) || 1631 (thp->th_urp != 0)) { 1632 rc = EINVAL; 1633 goto fail4; 1634 } 1635 1636 if (size + (thp->th_off << 2) + (iphp->ip_hl << 2) != 1637 ntohs(iphp->ip_len)) { 1638 rc = EINVAL; 1639 goto fail4; 1640 } 1641 1642 /* 1643 * Get the base IP id, The stack leaves enough of a gap in id space 1644 * for us to increment this for each segment we send out. 1645 */ 1646 start_len = ntohs(iphp->ip_len); 1647 start_id = ip_id = ntohs(iphp->ip_id); 1648 1649 /* Get the base TCP sequence number and flags */ 1650 start_flags = thp->th_flags; 1651 start_seq = th_seq = ntohl(thp->th_seq); 1652 1653 /* Adjust the header for interim segments */ 1654 iphp->ip_len = htons((iphp->ip_hl << 2) + (thp->th_off << 2) + mss); 1655 thp->th_flags = start_flags & ~(TH_PUSH | TH_FIN); 1656 1657 lss = size; 1658 if ((lss / mss) >= (EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) / 2)) { 1659 rc = EINVAL; 1660 goto fail5; 1661 } 1662 1663 stbp = NULL; 1664 while (lss != 0) { 1665 size_t ss = MIN(lss, mss); 1666 boolean_t eol = (ss == lss); 1667 1668 /* Adjust the header for this segment */ 1669 iphp->ip_id = htons(ip_id); 1670 ip_id++; 1671 1672 thp->th_seq = htonl(th_seq); 1673 th_seq += ss; 1674 1675 /* If this is the final segment then do some extra adjustment */ 1676 if (eol) { 1677 iphp->ip_len = htons((iphp->ip_hl << 2) + 1678 (thp->th_off << 2) + ss); 1679 thp->th_flags = start_flags; 1680 } 1681 1682 if (stbp == NULL || 1683 stbp->stb_esm.esm_used + hs > SFXGE_TX_BUFFER_SIZE) { 1684 /* Try to grab a buffer from the pool */ 1685 stbp = sfxge_tx_qfbp_get(stp); 1686 if (stbp == NULL) { 1687 /* 1688 * The pool was empty so allocate a new 1689 * buffer. 1690 */ 1691 if ((stbp = kmem_cache_alloc(sp->s_tbc, 1692 KM_NOSLEEP)) == NULL) { 1693 rc = ENOMEM; 1694 goto fail6; 1695 } 1696 } 1697 1698 /* Add it to the list */ 1699 stbp->stb_next = stp->st_stbp[id]; 1700 stp->st_stbp[id] = stbp; 1701 } 1702 1703 /* Copy in the headers */ 1704 ASSERT3U(stbp->stb_off, ==, stbp->stb_esm.esm_used); 1705 bcopy(hp, stbp->stb_esm.esm_base + stbp->stb_off, hs); 1706 stbp->stb_esm.esm_used += hs; 1707 1708 /* Add the buffer to the fragment list */ 1709 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE); 1710 if (rc != 0) 1711 goto fail7; 1712 1713 /* Add the payload to the fragment list */ 1714 if ((rc = sfxge_tx_qpayload_fragment(stp, id, &mp, &off, 1715 ss, copy)) != 0) 1716 goto fail8; 1717 1718 lss -= ss; 1719 } 1720 ASSERT3U(off, ==, 0); 1721 ASSERT3P(mp, ==, NULL); 1722 1723 ASSERT3U(th_seq - start_seq, ==, size); 1724 1725 /* 1726 * If no part of the packet has been mapped for DMA then we can free 1727 * it now, otherwise it can only be freed on completion. 1728 */ 1729 if (stp->st_stmp[id] == NULL) 1730 freemsg(stpp->stp_mp); 1731 else 1732 stp->st_mp[id] = stpp->stp_mp; 1733 1734 stpp->stp_mp = NULL; 1735 1736 return (0); 1737 1738 fail8: 1739 DTRACE_PROBE(fail8); 1740 fail7: 1741 DTRACE_PROBE(fail7); 1742 fail6: 1743 DTRACE_PROBE(fail6); 1744 fail5: 1745 DTRACE_PROBE(fail5); 1746 1747 /* Restore the header */ 1748 thp->th_seq = htonl(start_seq); 1749 thp->th_flags = start_flags; 1750 1751 iphp->ip_len = htons(start_len); 1752 iphp->ip_id = htons(start_id); 1753 1754 fail4: 1755 DTRACE_PROBE(fail4); 1756 1757 mp = stpp->stp_mp; 1758 mp->b_rptr -= hs; 1759 1760 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ? 1761 sizeof (struct ether_vlan_header) : 1762 sizeof (struct ether_header)) + 1763 ntohs(iphp->ip_len), ==, msgdsize(mp)); 1764 1765 ASSERT(stp->st_mp[id] == NULL); 1766 1767 fail3: 1768 DTRACE_PROBE(fail3); 1769 fail2: 1770 DTRACE_PROBE(fail2); 1771 fail1: 1772 DTRACE_PROBE1(fail1, int, rc); 1773 1774 return (rc); 1775 } 1776 1777 static int 1778 sfxge_tx_qpacket_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, 1779 boolean_t copy) 1780 { 1781 sfxge_t *sp = stp->st_sp; 1782 mblk_t *mp = stpp->stp_mp; 1783 unsigned int id; 1784 size_t off; 1785 size_t size; 1786 sfxge_tx_mapping_t *stmp; 1787 sfxge_tx_buffer_t *stbp; 1788 int rc; 1789 1790 ASSERT(mutex_owned(&(stp->st_lock))); 1791 1792 ASSERT(stp->st_n == 0); 1793 1794 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 1795 1796 ASSERT(stp->st_stbp[id] == NULL); 1797 ASSERT(stp->st_stmp[id] == NULL); 1798 1799 off = 0; 1800 size = LONG_MAX; /* must be larger than the packet */ 1801 1802 stbp = NULL; 1803 stmp = NULL; 1804 1805 while (mp != NULL) { 1806 boolean_t eop; 1807 1808 ASSERT(mp != NULL); 1809 1810 if (mp->b_cont != NULL) 1811 prefetch_read_many(mp->b_cont); 1812 1813 ASSERT(stmp == NULL || stmp->stm_mp != mp); 1814 1815 if (copy) 1816 goto copy; 1817 1818 /* 1819 * If we are part way through copying a data block then there's 1820 * no point in trying to map it for DMA. 1821 */ 1822 if (off != 0) 1823 goto copy; 1824 1825 /* 1826 * If the data block is too short then the cost of mapping it 1827 * for DMA would outweigh the cost of copying it. 1828 * 1829 * TX copy break 1830 */ 1831 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD) 1832 goto copy; 1833 1834 /* Try to grab a transmit mapping from the pool */ 1835 stmp = sfxge_tx_qfmp_get(stp); 1836 if (stmp == NULL) { 1837 /* 1838 * The pool was empty so allocate a new 1839 * mapping. 1840 */ 1841 if ((stmp = kmem_cache_alloc(sp->s_tmc, 1842 KM_NOSLEEP)) == NULL) 1843 goto copy; 1844 } 1845 1846 /* Add the DMA mapping to the list */ 1847 stmp->stm_next = stp->st_stmp[id]; 1848 stp->st_stmp[id] = stmp; 1849 1850 /* Try to bind the data block to the mapping */ 1851 if (sfxge_tx_msgb_bind(mp, stmp) != 0) 1852 goto copy; 1853 1854 /* 1855 * If we have a partially filled buffer then we must add it to 1856 * the fragment list before adding the mapping. 1857 */ 1858 if (stbp != NULL && (stbp->stb_esm.esm_used > stbp->stb_off)) { 1859 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE); 1860 if (rc != 0) 1861 goto fail1; 1862 } 1863 1864 /* Add the mapping to the fragment list */ 1865 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size); 1866 if (rc != 0) 1867 goto fail2; 1868 1869 ASSERT3U(off, ==, MBLKL(mp)); 1870 1871 /* Advance to the next data block */ 1872 mp = mp->b_cont; 1873 off = 0; 1874 continue; 1875 1876 copy: 1877 if (stbp == NULL || 1878 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE) { 1879 /* Try to grab a buffer from the pool */ 1880 stbp = sfxge_tx_qfbp_get(stp); 1881 if (stbp == NULL) { 1882 /* 1883 * The pool was empty so allocate a new 1884 * buffer. 1885 */ 1886 if ((stbp = kmem_cache_alloc(sp->s_tbc, 1887 KM_NOSLEEP)) == NULL) { 1888 rc = ENOMEM; 1889 goto fail3; 1890 } 1891 } 1892 1893 /* Add it to the list */ 1894 stbp->stb_next = stp->st_stbp[id]; 1895 stp->st_stbp[id] = stbp; 1896 } 1897 1898 /* Copy as much of the data block as we can into the buffer */ 1899 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size); 1900 1901 ASSERT(off == MBLKL(mp) || 1902 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE); 1903 1904 /* 1905 * If we have reached the end of the packet, or the buffer is 1906 * full, then add the buffer to the fragment list. 1907 */ 1908 if (stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE || eop) { 1909 rc = sfxge_tx_qbuffer_add(stp, stbp, eop); 1910 if (rc != 0) 1911 goto fail4; 1912 } 1913 1914 /* 1915 * If the data block has been exhaused then advance to the next 1916 * one. 1917 */ 1918 if (off == MBLKL(mp)) { 1919 mp = mp->b_cont; 1920 off = 0; 1921 } 1922 } 1923 ASSERT3U(off, ==, 0); 1924 ASSERT3P(mp, ==, NULL); 1925 ASSERT3U(size, !=, 0); 1926 1927 /* 1928 * If no part of the packet has been mapped for DMA then we can free 1929 * it now, otherwise it can only be freed on completion. 1930 */ 1931 if (stp->st_stmp[id] == NULL) 1932 freemsg(stpp->stp_mp); 1933 else 1934 stp->st_mp[id] = stpp->stp_mp; 1935 1936 stpp->stp_mp = NULL; 1937 1938 return (0); 1939 1940 fail4: 1941 DTRACE_PROBE(fail4); 1942 fail3: 1943 DTRACE_PROBE(fail3); 1944 fail2: 1945 DTRACE_PROBE(fail2); 1946 fail1: 1947 DTRACE_PROBE1(fail1, int, rc); 1948 1949 ASSERT(stp->st_stmp[id] == NULL); 1950 1951 return (rc); 1952 } 1953 1954 1955 #define SFXGE_TX_QDPL_PUT_PENDING(_stp) \ 1956 ((_stp)->st_dpl.std_put != 0) 1957 1958 static void 1959 sfxge_tx_qdpl_swizzle(sfxge_txq_t *stp) 1960 { 1961 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 1962 volatile uintptr_t *putp; 1963 uintptr_t put; 1964 sfxge_tx_packet_t *stpp; 1965 sfxge_tx_packet_t *p; 1966 sfxge_tx_packet_t **pp; 1967 unsigned int count; 1968 1969 ASSERT(mutex_owned(&(stp->st_lock))); 1970 1971 /* 1972 * Guaranteed that in flight TX packets will cause more TX completions 1973 * hence more swizzles must happen 1974 */ 1975 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 1976 if (stdp->std_count >= stdp->get_pkt_limit) 1977 return; 1978 1979 /* Acquire the put list - replacing with an empty list */ 1980 putp = &(stdp->std_put); 1981 put = atomic_swap_ulong(putp, 0); 1982 stpp = (void *)put; 1983 1984 if (stpp == NULL) 1985 return; 1986 1987 /* Reverse the list */ 1988 pp = &(stpp->stp_next); 1989 p = NULL; 1990 1991 count = 0; 1992 do { 1993 sfxge_tx_packet_t *next; 1994 1995 next = stpp->stp_next; 1996 1997 stpp->stp_next = p; 1998 p = stpp; 1999 2000 count++; 2001 stpp = next; 2002 } while (stpp != NULL); 2003 2004 /* Add it to the tail of the get list */ 2005 ASSERT3P(*pp, ==, NULL); 2006 2007 *(stdp->std_getp) = p; 2008 stdp->std_getp = pp; 2009 stdp->std_count += count; 2010 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 2011 2012 DTRACE_PROBE2(dpl_counts, int, stdp->std_count, int, count); 2013 } 2014 2015 2016 /* 2017 * If TXQ locked, add the RX DPL put list and this packet to the TX DPL get list 2018 * If TXQ unlocked, atomically add this packet to TX DPL put list 2019 * 2020 * The only possible error is ENOSPC (used for TX backpressure) 2021 * For the TX DPL put or get list becoming full, in both cases there must be 2022 * future TX completions (as represented by the packets on the DPL get lists). 2023 * 2024 * This ensures that in the future mac_tx_update() will be called from 2025 * sfxge_tx_qcomplete() 2026 */ 2027 static inline int 2028 sfxge_tx_qdpl_add(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, int locked) 2029 { 2030 sfxge_tx_dpl_t *stdp = &stp->st_dpl; 2031 2032 ASSERT3P(stpp->stp_next, ==, NULL); 2033 2034 if (locked) { 2035 ASSERT(mutex_owned(&stp->st_lock)); 2036 2037 if (stdp->std_count >= stdp->get_pkt_limit) { 2038 stdp->get_full_count++; 2039 return (ENOSPC); 2040 } 2041 2042 /* Reverse the put list onto the get list */ 2043 sfxge_tx_qdpl_swizzle(stp); 2044 2045 /* Add to the tail of the get list */ 2046 *(stdp->std_getp) = stpp; 2047 stdp->std_getp = &stpp->stp_next; 2048 stdp->std_count++; 2049 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 2050 2051 } else { 2052 volatile uintptr_t *putp; 2053 uintptr_t old; 2054 uintptr_t new; 2055 sfxge_tx_packet_t *old_pkt; 2056 2057 putp = &(stdp->std_put); 2058 new = (uintptr_t)stpp; 2059 2060 /* Add to the head of the put list, keeping a list length */ 2061 do { 2062 old = *putp; 2063 old_pkt = (sfxge_tx_packet_t *)old; 2064 2065 stpp->stp_dpl_put_len = old ? 2066 old_pkt->stp_dpl_put_len + 1 : 1; 2067 2068 if (stpp->stp_dpl_put_len >= stdp->put_pkt_limit) { 2069 stpp->stp_next = 0; 2070 stpp->stp_dpl_put_len = 0; 2071 stdp->put_full_count++; 2072 return (ENOSPC); 2073 } 2074 2075 stpp->stp_next = (void *)old; 2076 } while (atomic_cas_ulong(putp, old, new) != old); 2077 } 2078 return (0); 2079 } 2080 2081 2082 /* Take all packets from DPL get list and try to send to HW */ 2083 static void 2084 sfxge_tx_qdpl_drain(sfxge_txq_t *stp) 2085 { 2086 sfxge_t *sp = stp->st_sp; 2087 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2088 unsigned int pushed = stp->st_added; 2089 sfxge_tx_packet_t *stpp; 2090 unsigned int count; 2091 2092 ASSERT(mutex_owned(&(stp->st_lock))); 2093 2094 prefetch_read_many(sp->s_enp); 2095 prefetch_read_many(stp->st_etp); 2096 2097 stpp = stdp->std_get; 2098 count = stdp->std_count; 2099 2100 while (count != 0) { 2101 sfxge_tx_packet_t *next; 2102 boolean_t copy; 2103 int rc; 2104 2105 ASSERT(stpp != NULL); 2106 2107 /* Split stpp off */ 2108 next = stpp->stp_next; 2109 stpp->stp_next = NULL; 2110 2111 if (next != NULL) 2112 prefetch_read_many(next); 2113 2114 if (stp->st_state != SFXGE_TXQ_STARTED) 2115 goto reject; 2116 2117 copy = B_FALSE; 2118 2119 again: 2120 /* Fragment the packet */ 2121 if (stpp->stp_mss != 0) { 2122 rc = sfxge_tx_qlso_fragment(stp, stpp, copy); 2123 } else { 2124 rc = sfxge_tx_qpacket_fragment(stp, stpp, copy); 2125 } 2126 2127 switch (rc) { 2128 case 0: 2129 break; 2130 2131 case ENOSPC: 2132 if (!copy) 2133 goto copy; 2134 2135 /*FALLTHRU*/ 2136 default: 2137 goto reject; 2138 } 2139 2140 /* Free the packet structure */ 2141 stpp->stp_etherhp = NULL; 2142 stpp->stp_iphp = NULL; 2143 stpp->stp_thp = NULL; 2144 stpp->stp_off = 0; 2145 stpp->stp_size = 0; 2146 stpp->stp_mss = 0; 2147 stpp->stp_dpl_put_len = 0; 2148 2149 ASSERT3P(stpp->stp_mp, ==, NULL); 2150 2151 if (sfxge_tx_qfpp_put(stp, stpp) != 0) { 2152 sfxge_tx_packet_destroy(sp, stpp); 2153 stpp = NULL; 2154 } 2155 2156 --count; 2157 stpp = next; 2158 2159 /* Post the packet */ 2160 sfxge_tx_qlist_post(stp); 2161 2162 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) 2163 goto defer; 2164 2165 if (stp->st_added - pushed >= SFXGE_TX_BATCH) { 2166 efx_tx_qpush(stp->st_etp, stp->st_added, pushed); 2167 pushed = stp->st_added; 2168 } 2169 2170 continue; 2171 2172 copy: 2173 /* Abort the current fragment list */ 2174 sfxge_tx_qlist_abort(stp); 2175 2176 /* Try copying the packet to flatten it */ 2177 ASSERT(!copy); 2178 copy = B_TRUE; 2179 2180 goto again; 2181 2182 reject: 2183 /* Abort the current fragment list */ 2184 sfxge_tx_qlist_abort(stp); 2185 2186 /* Discard the packet */ 2187 freemsg(stpp->stp_mp); 2188 stpp->stp_mp = NULL; 2189 2190 /* Free the packet structure */ 2191 stpp->stp_etherhp = NULL; 2192 stpp->stp_iphp = NULL; 2193 stpp->stp_thp = NULL; 2194 stpp->stp_off = 0; 2195 stpp->stp_size = 0; 2196 stpp->stp_mss = 0; 2197 stpp->stp_dpl_put_len = 0; 2198 2199 if (sfxge_tx_qfpp_put(stp, stpp) != 0) { 2200 sfxge_tx_packet_destroy(sp, stpp); 2201 stpp = NULL; 2202 } 2203 2204 --count; 2205 stpp = next; 2206 continue; 2207 defer: 2208 DTRACE_PROBE1(defer, unsigned int, stp->st_index); 2209 break; 2210 } 2211 2212 if (count == 0) { 2213 /* New empty get list */ 2214 ASSERT3P(stpp, ==, NULL); 2215 stdp->std_get = NULL; 2216 stdp->std_count = 0; 2217 2218 stdp->std_getp = &(stdp->std_get); 2219 } else { 2220 /* shorten the list by moving the head */ 2221 stdp->std_get = stpp; 2222 stdp->std_count = count; 2223 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 2224 } 2225 2226 if (stp->st_added != pushed) 2227 efx_tx_qpush(stp->st_etp, stp->st_added, pushed); 2228 2229 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED || 2230 stdp->std_count == 0); 2231 } 2232 2233 /* Swizzle deferred packet list, try and push to HW */ 2234 static inline void 2235 sfxge_tx_qdpl_service(sfxge_txq_t *stp) 2236 { 2237 do { 2238 ASSERT(mutex_owned(&(stp->st_lock))); 2239 2240 if (SFXGE_TX_QDPL_PUT_PENDING(stp)) 2241 sfxge_tx_qdpl_swizzle(stp); 2242 2243 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED) 2244 sfxge_tx_qdpl_drain(stp); 2245 2246 mutex_exit(&(stp->st_lock)); 2247 2248 if (!SFXGE_TX_QDPL_PUT_PENDING(stp)) 2249 break; 2250 } while (mutex_tryenter(&(stp->st_lock))); 2251 } 2252 2253 static void 2254 sfxge_tx_qdpl_flush_locked(sfxge_txq_t *stp) 2255 { 2256 sfxge_t *sp = stp->st_sp; 2257 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2258 sfxge_tx_packet_t *stpp; 2259 unsigned int count; 2260 2261 ASSERT(mutex_owned(&(stp->st_lock))); 2262 2263 /* Swizzle put list to the get list */ 2264 sfxge_tx_qdpl_swizzle(stp); 2265 2266 stpp = stdp->std_get; 2267 count = stdp->std_count; 2268 2269 while (count != 0) { 2270 sfxge_tx_packet_t *next; 2271 2272 next = stpp->stp_next; 2273 stpp->stp_next = NULL; 2274 2275 /* Discard the packet */ 2276 freemsg(stpp->stp_mp); 2277 stpp->stp_mp = NULL; 2278 2279 /* Free the packet structure */ 2280 stpp->stp_etherhp = NULL; 2281 stpp->stp_iphp = NULL; 2282 stpp->stp_thp = NULL; 2283 stpp->stp_off = 0; 2284 stpp->stp_size = 0; 2285 stpp->stp_mss = 0; 2286 stpp->stp_dpl_put_len = 0; 2287 2288 sfxge_tx_packet_destroy(sp, stpp); 2289 2290 --count; 2291 stpp = next; 2292 } 2293 2294 ASSERT3P(stpp, ==, NULL); 2295 2296 /* Empty list */ 2297 stdp->std_get = NULL; 2298 stdp->std_count = 0; 2299 stdp->std_getp = &(stdp->std_get); 2300 } 2301 2302 2303 void 2304 sfxge_tx_qdpl_flush(sfxge_txq_t *stp) 2305 { 2306 mutex_enter(&(stp->st_lock)); 2307 sfxge_tx_qdpl_flush_locked(stp); 2308 mutex_exit(&(stp->st_lock)); 2309 } 2310 2311 2312 static void 2313 sfxge_tx_qunblock(sfxge_txq_t *stp) 2314 { 2315 sfxge_t *sp = stp->st_sp; 2316 unsigned int evq = stp->st_evq; 2317 sfxge_evq_t *sep = sp->s_sep[evq]; 2318 2319 ASSERT(mutex_owned(&(sep->se_lock))); 2320 2321 mutex_enter(&(stp->st_lock)); 2322 2323 if (stp->st_state != SFXGE_TXQ_STARTED) { 2324 mutex_exit(&(stp->st_lock)); 2325 return; 2326 } 2327 2328 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) { 2329 unsigned int level; 2330 2331 level = stp->st_added - stp->st_completed; 2332 if (level <= stp->st_unblock) { 2333 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 2334 sfxge_tx_qlist_post(stp); 2335 } 2336 } 2337 2338 sfxge_tx_qdpl_service(stp); 2339 /* lock has been dropped */ 2340 } 2341 2342 void 2343 sfxge_tx_qcomplete(sfxge_txq_t *stp) 2344 { 2345 sfxge_t *sp = stp->st_sp; 2346 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2347 unsigned int evq = stp->st_evq; 2348 sfxge_evq_t *sep = sp->s_sep[evq]; 2349 unsigned int completed; 2350 2351 ASSERT(mutex_owned(&(sep->se_lock))); 2352 2353 completed = stp->st_completed; 2354 while (completed != stp->st_pending) { 2355 unsigned int id; 2356 sfxge_tx_mapping_t *stmp; 2357 2358 id = completed++ & (SFXGE_TX_NDESCS - 1); 2359 2360 if ((stmp = stp->st_stmp[id]) != NULL) { 2361 mblk_t *mp; 2362 2363 /* Unbind all the mappings */ 2364 do { 2365 ASSERT(stmp->stm_mp != NULL); 2366 sfxge_tx_msgb_unbind(stmp); 2367 2368 stmp = stmp->stm_next; 2369 } while (stmp != NULL); 2370 2371 /* 2372 * Now that the packet is no longer mapped for DMA it 2373 * can be freed. 2374 */ 2375 mp = stp->st_mp[id]; 2376 stp->st_mp[id] = NULL; 2377 2378 ASSERT(mp != NULL); 2379 freemsg(mp); 2380 } 2381 } 2382 stp->st_completed = completed; 2383 2384 /* Check whether we need to unblock the queue */ 2385 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) { 2386 unsigned int level; 2387 2388 level = stp->st_added - stp->st_completed; 2389 if (level <= stp->st_unblock) 2390 sfxge_tx_qunblock(stp); 2391 } 2392 2393 /* Release TX backpressure from the TX DPL put/get list being full */ 2394 if (stdp->std_count < stdp->get_pkt_limit) 2395 mac_tx_update(sp->s_mh); 2396 } 2397 2398 void 2399 sfxge_tx_qflush_done(sfxge_txq_t *stp) 2400 { 2401 sfxge_t *sp = stp->st_sp; 2402 boolean_t flush_pending = B_FALSE; 2403 2404 ASSERT(mutex_owned(&(sp->s_sep[stp->st_evq]->se_lock))); 2405 2406 mutex_enter(&(stp->st_lock)); 2407 2408 switch (stp->st_state) { 2409 case SFXGE_TXQ_INITIALIZED: 2410 /* Ignore flush event after TxQ destroyed */ 2411 break; 2412 2413 case SFXGE_TXQ_FLUSH_PENDING: 2414 flush_pending = B_TRUE; 2415 stp->st_state = SFXGE_TXQ_FLUSH_DONE; 2416 break; 2417 2418 case SFXGE_TXQ_FLUSH_FAILED: 2419 /* MC may have rebooted before handling the flush request */ 2420 stp->st_state = SFXGE_TXQ_FLUSH_DONE; 2421 break; 2422 2423 case SFXGE_TXQ_STARTED: 2424 /* 2425 * MC initiated flush on MC reboot or because of bad Tx 2426 * descriptor 2427 */ 2428 stp->st_state = SFXGE_TXQ_FLUSH_DONE; 2429 break; 2430 2431 case SFXGE_TXQ_FLUSH_DONE: 2432 /* Ignore unexpected extra flush event */ 2433 ASSERT(B_FALSE); 2434 break; 2435 2436 default: 2437 ASSERT(B_FALSE); 2438 } 2439 2440 2441 mutex_exit(&(stp->st_lock)); 2442 2443 if (flush_pending == B_FALSE) { 2444 /* Flush was not pending */ 2445 return; 2446 } 2447 2448 mutex_enter(&(sp->s_tx_flush_lock)); 2449 sp->s_tx_flush_pending--; 2450 if (sp->s_tx_flush_pending <= 0) { 2451 /* All queues flushed: wakeup sfxge_tx_stop() */ 2452 cv_signal(&(sp->s_tx_flush_kv)); 2453 } 2454 mutex_exit(&(sp->s_tx_flush_lock)); 2455 } 2456 2457 static void 2458 sfxge_tx_qflush(sfxge_t *sp, unsigned int index, boolean_t wait_for_flush) 2459 { 2460 sfxge_txq_t *stp = sp->s_stp[index]; 2461 int rc; 2462 2463 ASSERT(mutex_owned(&(sp->s_state_lock))); 2464 ASSERT(mutex_owned(&(sp->s_tx_flush_lock))); 2465 2466 mutex_enter(&(stp->st_lock)); 2467 2468 /* Prepare to flush and stop the queue */ 2469 if (stp->st_state == SFXGE_TXQ_STARTED) { 2470 /* Flush the transmit queue */ 2471 if ((rc = efx_tx_qflush(stp->st_etp)) == EALREADY) { 2472 /* Already flushed, may be initiated by MC */ 2473 stp->st_state = SFXGE_TXQ_FLUSH_DONE; 2474 } else if (rc != 0) { 2475 /* Unexpected error */ 2476 stp->st_state = SFXGE_TXQ_FLUSH_FAILED; 2477 } else if (wait_for_flush) { 2478 stp->st_state = SFXGE_TXQ_FLUSH_PENDING; 2479 sp->s_tx_flush_pending++; 2480 } else { 2481 /* Assume the flush is done */ 2482 stp->st_state = SFXGE_TXQ_FLUSH_DONE; 2483 } 2484 } 2485 2486 mutex_exit(&(stp->st_lock)); 2487 } 2488 2489 static void 2490 sfxge_tx_qstop(sfxge_t *sp, unsigned int index) 2491 { 2492 sfxge_txq_t *stp = sp->s_stp[index]; 2493 unsigned int evq = stp->st_evq; 2494 sfxge_evq_t *sep = sp->s_sep[evq]; 2495 2496 mutex_enter(&(sep->se_lock)); 2497 mutex_enter(&(stp->st_lock)); 2498 2499 if (stp->st_state == SFXGE_TXQ_INITIALIZED) 2500 goto done; 2501 2502 ASSERT(stp->st_state == SFXGE_TXQ_FLUSH_PENDING || 2503 stp->st_state == SFXGE_TXQ_FLUSH_DONE || 2504 stp->st_state == SFXGE_TXQ_FLUSH_FAILED); 2505 2506 /* All queues should have been flushed */ 2507 if (stp->st_sp->s_tx_flush_pending != 0) { 2508 dev_err(sp->s_dip, CE_NOTE, 2509 SFXGE_CMN_ERR "txq[%d] stop with flush_pending=%d", 2510 index, stp->st_sp->s_tx_flush_pending); 2511 } 2512 if (stp->st_state == SFXGE_TXQ_FLUSH_FAILED) { 2513 dev_err(sp->s_dip, CE_NOTE, 2514 SFXGE_CMN_ERR "txq[%d] flush failed", index); 2515 } 2516 2517 /* Destroy the transmit queue */ 2518 efx_tx_qdestroy(stp->st_etp); 2519 stp->st_etp = NULL; 2520 2521 /* Clear entries from the buffer table */ 2522 sfxge_sram_buf_tbl_clear(sp, stp->st_id, 2523 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 2524 2525 sfxge_tx_qlist_abort(stp); 2526 ASSERT3U(stp->st_n, ==, 0); 2527 2528 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 2529 2530 stp->st_pending = stp->st_added; 2531 2532 sfxge_tx_qcomplete(stp); 2533 ASSERT3U(stp->st_completed, ==, stp->st_pending); 2534 2535 sfxge_tx_qreap(stp); 2536 ASSERT3U(stp->st_reaped, ==, stp->st_completed); 2537 2538 /* 2539 * Ensure the deferred packet list is cleared 2540 * Can race with sfxge_tx_packet_add() adding to the put list 2541 */ 2542 sfxge_tx_qdpl_flush_locked(stp); 2543 2544 stp->st_added = 0; 2545 stp->st_pending = 0; 2546 stp->st_completed = 0; 2547 stp->st_reaped = 0; 2548 2549 stp->st_state = SFXGE_TXQ_INITIALIZED; 2550 2551 done: 2552 mutex_exit(&(stp->st_lock)); 2553 mutex_exit(&(sep->se_lock)); 2554 } 2555 2556 static void 2557 sfxge_tx_qfini(sfxge_t *sp, unsigned int index) 2558 { 2559 sfxge_txq_t *stp = sp->s_stp[index]; 2560 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2561 2562 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED); 2563 stp->st_state = SFXGE_TXQ_UNINITIALIZED; 2564 2565 /* Detach the TXQ from the driver */ 2566 sp->s_stp[index] = NULL; 2567 ASSERT(sp->s_tx_qcount > 0); 2568 sp->s_tx_qcount--; 2569 2570 /* Free the EVQ label for events from this TXQ */ 2571 (void) sfxge_ev_txlabel_free(sp, stp->st_evq, stp, stp->st_label); 2572 stp->st_label = 0; 2573 2574 /* Tear down the statistics */ 2575 sfxge_tx_kstat_fini(stp); 2576 2577 /* Ensure the deferred packet list is empty */ 2578 ASSERT3U(stdp->std_count, ==, 0); 2579 ASSERT3P(stdp->std_get, ==, NULL); 2580 ASSERT3U(stdp->std_put, ==, 0); 2581 2582 /* Clear the free buffer pool */ 2583 sfxge_tx_qfbp_empty(stp); 2584 2585 /* Clear the free mapping pool */ 2586 sfxge_tx_qfmp_empty(stp); 2587 2588 /* Clear the free packet pool */ 2589 sfxge_tx_qfpp_empty(stp); 2590 2591 mutex_destroy(&(stp->st_lock)); 2592 2593 stp->st_evq = 0; 2594 stp->st_type = 0; 2595 stp->st_index = 0; 2596 2597 kmem_cache_free(sp->s_tqc, stp); 2598 } 2599 2600 int 2601 sfxge_tx_init(sfxge_t *sp) 2602 { 2603 sfxge_intr_t *sip = &(sp->s_intr); 2604 char name[MAXNAMELEN]; 2605 sfxge_txq_type_t qtype; 2606 unsigned int txq, evq; 2607 int index; 2608 int rc; 2609 2610 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_packet_cache", 2611 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2612 2613 sp->s_tpc = kmem_cache_create(name, sizeof (sfxge_tx_packet_t), 2614 SFXGE_CPU_CACHE_SIZE, sfxge_tx_packet_ctor, sfxge_tx_packet_dtor, 2615 NULL, sp, NULL, 0); 2616 ASSERT(sp->s_tpc != NULL); 2617 2618 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_buffer_cache", 2619 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2620 2621 sp->s_tbc = kmem_cache_create(name, sizeof (sfxge_tx_buffer_t), 2622 SFXGE_CPU_CACHE_SIZE, sfxge_tx_buffer_ctor, sfxge_tx_buffer_dtor, 2623 NULL, sp, NULL, 0); 2624 ASSERT(sp->s_tbc != NULL); 2625 2626 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_mapping_cache", 2627 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2628 2629 sp->s_tmc = kmem_cache_create(name, sizeof (sfxge_tx_mapping_t), 2630 SFXGE_CPU_CACHE_SIZE, sfxge_tx_mapping_ctor, sfxge_tx_mapping_dtor, 2631 NULL, sp, NULL, 0); 2632 ASSERT(sp->s_tmc != NULL); 2633 2634 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_txq_cache", 2635 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2636 2637 sp->s_tqc = kmem_cache_create(name, sizeof (sfxge_txq_t), 2638 SFXGE_CPU_CACHE_SIZE, sfxge_tx_qctor, sfxge_tx_qdtor, NULL, sp, 2639 NULL, 0); 2640 ASSERT(sp->s_tqc != NULL); 2641 2642 /* Initialize the transmit queues. */ 2643 sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM] = sip->si_nalloc; 2644 sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM] = 1; 2645 sp->s_tx_scale_max[SFXGE_TXQ_IP_TCP_UDP_CKSUM] = sip->si_nalloc; 2646 2647 /* Ensure minimum queue counts required by sfxge_tx_packet_add(). */ 2648 if (sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM] < 1) 2649 sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM] = 1; 2650 2651 if (sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM] < 1) 2652 sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM] = 1; 2653 2654 txq = 0; 2655 for (qtype = 0; qtype < SFXGE_TXQ_NTYPES; qtype++) { 2656 unsigned int tx_scale = sp->s_tx_scale_max[qtype]; 2657 2658 if (txq + tx_scale > EFX_ARRAY_SIZE(sp->s_stp)) { 2659 rc = EINVAL; 2660 goto fail1; 2661 } 2662 2663 sp->s_tx_scale_base[qtype] = txq; 2664 2665 for (evq = 0; evq < tx_scale; evq++) { 2666 if ((rc = sfxge_tx_qinit(sp, txq, qtype, evq)) != 0) { 2667 goto fail2; 2668 } 2669 txq++; 2670 } 2671 ASSERT3U(txq, <=, EFX_ARRAY_SIZE(sp->s_stp)); 2672 } 2673 2674 return (0); 2675 2676 fail2: 2677 DTRACE_PROBE(fail2); 2678 2679 fail1: 2680 DTRACE_PROBE1(fail1, int, rc); 2681 2682 index = EFX_ARRAY_SIZE(sp->s_stp); 2683 while (--index >= 0) { 2684 if (sp->s_stp[index] != NULL) 2685 sfxge_tx_qfini(sp, index); 2686 } 2687 2688 kmem_cache_destroy(sp->s_tqc); 2689 sp->s_tqc = NULL; 2690 2691 kmem_cache_destroy(sp->s_tmc); 2692 sp->s_tmc = NULL; 2693 2694 kmem_cache_destroy(sp->s_tbc); 2695 sp->s_tbc = NULL; 2696 2697 kmem_cache_destroy(sp->s_tpc); 2698 sp->s_tpc = NULL; 2699 2700 return (rc); 2701 } 2702 2703 int 2704 sfxge_tx_start(sfxge_t *sp) 2705 { 2706 efx_nic_t *enp = sp->s_enp; 2707 int index; 2708 int rc; 2709 2710 /* Initialize the transmit module */ 2711 if ((rc = efx_tx_init(enp)) != 0) 2712 goto fail1; 2713 2714 for (index = 0; index < EFX_ARRAY_SIZE(sp->s_stp); index++) { 2715 if (sp->s_stp[index] != NULL) 2716 if ((rc = sfxge_tx_qstart(sp, index)) != 0) 2717 goto fail2; 2718 } 2719 2720 return (0); 2721 2722 fail2: 2723 DTRACE_PROBE(fail2); 2724 2725 sfxge_tx_stop(sp); 2726 2727 fail1: 2728 DTRACE_PROBE1(fail1, int, rc); 2729 2730 return (rc); 2731 } 2732 2733 2734 /* 2735 * Add a packet to the TX Deferred Packet List and if the TX queue lock 2736 * can be acquired then call sfxge_tx_qdpl_service() to fragment and push 2737 * to the H/W transmit descriptor ring 2738 * 2739 * If ENOSPC is returned then the DPL is full or the packet create failed, but 2740 * the mblk isn't freed so that the caller can return this mblk from mc_tx() to 2741 * back-pressure the OS stack. 2742 * 2743 * For all other errors the mblk is freed 2744 */ 2745 int 2746 sfxge_tx_packet_add(sfxge_t *sp, mblk_t *mp) 2747 { 2748 struct ether_header *etherhp; 2749 struct ip *iphp; 2750 struct tcphdr *thp; 2751 size_t off; 2752 size_t size; 2753 size_t mss; 2754 sfxge_txq_t *stp; 2755 unsigned int txq; 2756 int index; 2757 boolean_t locked; 2758 sfxge_tx_packet_t *stpp; 2759 sfxge_packet_type_t pkt_type; 2760 uint16_t sport, dport; 2761 int rc = 0; 2762 2763 ASSERT3P(mp->b_next, ==, NULL); 2764 ASSERT(!(DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM)); 2765 2766 /* 2767 * Do not enqueue packets during startup/shutdown; 2768 * 2769 * NOTE: This access to the state is NOT protected by the state lock. It 2770 * is an imperfect test and anything further getting onto the get/put 2771 * deferred packet lists is cleaned up in (possibly repeated) calls to 2772 * sfxge_can_destroy(). 2773 */ 2774 if (sp->s_state != SFXGE_STARTED) { 2775 rc = EINVAL; 2776 goto fail1; 2777 } 2778 2779 etherhp = NULL; 2780 iphp = NULL; 2781 thp = NULL; 2782 off = 0; 2783 size = 0; 2784 mss = 0; 2785 2786 /* Check whether we need the header pointers for LSO segmentation */ 2787 if (DB_LSOFLAGS(mp) & HW_LSO) { 2788 /* LSO segmentation relies on hardware checksum offload */ 2789 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; 2790 2791 if ((mss = DB_LSOMSS(mp)) == 0) { 2792 rc = EINVAL; 2793 goto fail1; 2794 } 2795 2796 pkt_type = sfxge_pkthdr_parse(mp, ðerhp, &iphp, &thp, 2797 &off, &size, &sport, &dport); 2798 2799 if (pkt_type != SFXGE_PACKET_TYPE_IPV4_TCP || 2800 etherhp == NULL || 2801 iphp == NULL || 2802 thp == NULL || 2803 off == 0) { 2804 rc = EINVAL; 2805 goto fail2; 2806 } 2807 } 2808 2809 /* Choose the appropriate transit queue */ 2810 if (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) { 2811 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2812 2813 if (srsp->srs_state == SFXGE_RX_SCALE_STARTED) { 2814 uint32_t hash; 2815 2816 if (srsp->srs_count > 1) { 2817 /* 2818 * If we have not already parsed the headers 2819 * for LSO segmentation then we need to do it 2820 * now so we can calculate the hash. 2821 */ 2822 if (thp == NULL) { 2823 (void) sfxge_pkthdr_parse(mp, ðerhp, 2824 &iphp, &thp, &off, &size, 2825 &sport, &dport); 2826 } 2827 2828 if (thp != NULL) { 2829 SFXGE_TCP_HASH(sp, 2830 &iphp->ip_dst.s_addr, 2831 thp->th_dport, 2832 &iphp->ip_src.s_addr, 2833 thp->th_sport, hash); 2834 2835 index = srsp->srs_tbl[hash % 2836 SFXGE_RX_SCALE_MAX]; 2837 } else if (iphp != NULL) { 2838 /* 2839 * Calculate IPv4 4-tuple hash, with 2840 * TCP/UDP/SCTP src/dest ports. Ports 2841 * are zero for other IPv4 protocols. 2842 */ 2843 SFXGE_IP_HASH(sp, 2844 &iphp->ip_dst.s_addr, dport, 2845 &iphp->ip_src.s_addr, sport, hash); 2846 2847 index = srsp->srs_tbl[hash % 2848 SFXGE_RX_SCALE_MAX]; 2849 } else { 2850 /* 2851 * Other traffic always goes to the 2852 * the queue in the zero-th entry of 2853 * the RSS table. 2854 */ 2855 index = srsp->srs_tbl[0]; 2856 } 2857 } else { 2858 /* 2859 * It does not matter what the hash is 2860 * because all the RSS table entries will be 2861 * the same. 2862 */ 2863 index = srsp->srs_tbl[0]; 2864 } 2865 2866 /* 2867 * Find the event queue corresponding to the hash in 2868 * the RSS table. 2869 */ 2870 txq = sp->s_tx_scale_base[SFXGE_TXQ_IP_TCP_UDP_CKSUM] + 2871 index; 2872 stp = sp->s_stp[txq]; 2873 ASSERT3U(stp->st_evq, ==, index); 2874 } else { 2875 index = 0; 2876 txq = sp->s_tx_scale_base[SFXGE_TXQ_IP_TCP_UDP_CKSUM] + 2877 index; 2878 stp = sp->s_stp[txq]; 2879 } 2880 } else if (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) { 2881 ASSERT3U(sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM], >=, 1); 2882 index = 0; 2883 txq = sp->s_tx_scale_base[SFXGE_TXQ_IP_CKSUM] + index; 2884 stp = sp->s_stp[txq]; 2885 } else { 2886 /* 2887 * No hardware checksum offload requested. 2888 */ 2889 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2890 2891 if (srsp->srs_state == SFXGE_RX_SCALE_STARTED) { 2892 uint32_t hash = 0; 2893 2894 if (srsp->srs_count > 1) { 2895 if (iphp == NULL) { 2896 (void) sfxge_pkthdr_parse(mp, ðerhp, 2897 &iphp, &thp, &off, &size, 2898 &sport, &dport); 2899 } 2900 2901 if (iphp != NULL) { 2902 /* 2903 * Calculate IPv4 4-tuple hash, with 2904 * TCP/UDP/SCTP src/dest ports. Ports 2905 * are zero for other IPv4 protocols. 2906 */ 2907 SFXGE_IP_HASH(sp, 2908 &iphp->ip_dst.s_addr, dport, 2909 &iphp->ip_src.s_addr, sport, hash); 2910 2911 hash = hash % SFXGE_RX_SCALE_MAX; 2912 } 2913 } 2914 index = srsp->srs_tbl[hash]; 2915 2916 /* 2917 * The RSS table (indexed by hash) gives the RXQ index, 2918 * (mapped 1:1 with EVQs). Find the TXQ that results in 2919 * using the same EVQ as for the RX data path. 2920 */ 2921 ASSERT3U(sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM], 2922 >, index); 2923 txq = sp->s_tx_scale_base[SFXGE_TXQ_NON_CKSUM] + index; 2924 stp = sp->s_stp[txq]; 2925 ASSERT3U(stp->st_evq, ==, index); 2926 } else { 2927 ASSERT3U(sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM], >, 0); 2928 index = 0; 2929 txq = sp->s_tx_scale_base[SFXGE_TXQ_NON_CKSUM] + index; 2930 stp = sp->s_stp[txq]; 2931 } 2932 2933 2934 } 2935 ASSERT(stp != NULL); 2936 2937 ASSERT(mss == 0 || (DB_LSOFLAGS(mp) & HW_LSO)); 2938 2939 /* Try to grab the lock */ 2940 locked = mutex_tryenter(&(stp->st_lock)); 2941 2942 if (locked) { 2943 /* Try to grab a packet from the pool */ 2944 stpp = sfxge_tx_qfpp_get(stp); 2945 } else { 2946 stpp = NULL; 2947 } 2948 2949 if (stpp == NULL) { 2950 /* 2951 * Either the pool was empty or we don't have the lock so 2952 * allocate a new packet. 2953 */ 2954 if ((stpp = sfxge_tx_packet_create(sp)) == NULL) { 2955 rc = ENOSPC; 2956 goto fail3; 2957 } 2958 } 2959 2960 stpp->stp_mp = mp; 2961 stpp->stp_etherhp = etherhp; 2962 stpp->stp_iphp = iphp; 2963 stpp->stp_thp = thp; 2964 stpp->stp_off = off; 2965 stpp->stp_size = size; 2966 stpp->stp_mss = mss; 2967 stpp->stp_dpl_put_len = 0; 2968 2969 rc = sfxge_tx_qdpl_add(stp, stpp, locked); 2970 if (rc != 0) { 2971 /* ENOSPC can happen for DPL get or put list is full */ 2972 ASSERT3U(rc, ==, ENOSPC); 2973 2974 /* 2975 * Note; if this is the unlocked DPL put list full case there is 2976 * no need to worry about a race with locked 2977 * sfxge_tx_qdpl_swizzle() as we know that the TX DPL put list 2978 * was full and would have been swizzle'd to the TX DPL get 2979 * list; hence guaranteeing future TX completions and calls 2980 * to mac_tx_update() via sfxge_tx_qcomplete() 2981 */ 2982 goto fail4; 2983 } 2984 2985 /* Try to grab the lock again */ 2986 if (!locked) 2987 locked = mutex_tryenter(&(stp->st_lock)); 2988 2989 if (locked) { 2990 /* Try to service the list */ 2991 sfxge_tx_qdpl_service(stp); 2992 /* lock has been dropped */ 2993 } 2994 2995 return (0); 2996 2997 fail4: 2998 DTRACE_PROBE(fail4); 2999 sfxge_tx_packet_destroy(sp, stpp); 3000 fail3: 3001 DTRACE_PROBE(fail3); 3002 if (locked) 3003 mutex_exit(&(stp->st_lock)); 3004 fail2: 3005 DTRACE_PROBE(fail2); 3006 fail1: 3007 DTRACE_PROBE1(fail1, int, rc); 3008 3009 if (rc != ENOSPC) 3010 freemsg(mp); 3011 return (rc); 3012 } 3013 3014 void 3015 sfxge_tx_stop(sfxge_t *sp) 3016 { 3017 efx_nic_t *enp = sp->s_enp; 3018 clock_t timeout; 3019 boolean_t wait_for_flush; 3020 int index; 3021 3022 ASSERT(mutex_owned(&(sp->s_state_lock))); 3023 3024 mutex_enter(&(sp->s_tx_flush_lock)); 3025 3026 /* Flush all the queues */ 3027 if (sp->s_hw_err == SFXGE_HW_OK) { 3028 wait_for_flush = B_TRUE; 3029 } else { 3030 /* 3031 * Flag indicates possible hardware failure. 3032 * Attempt flush but do not wait for it to complete. 3033 */ 3034 wait_for_flush = B_FALSE; 3035 } 3036 3037 /* Prepare queues to stop and flush the hardware ring */ 3038 index = EFX_ARRAY_SIZE(sp->s_stp); 3039 while (--index >= 0) { 3040 if (sp->s_stp[index] != NULL) 3041 sfxge_tx_qflush(sp, index, wait_for_flush); 3042 } 3043 3044 if (wait_for_flush == B_FALSE) 3045 goto flush_done; 3046 3047 /* Wait upto 2sec for queue flushing to complete */ 3048 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_TX_QFLUSH_USEC); 3049 3050 while (sp->s_tx_flush_pending > 0) { 3051 if (cv_timedwait(&(sp->s_tx_flush_kv), &(sp->s_tx_flush_lock), 3052 timeout) < 0) { 3053 /* Timeout waiting for queues to flush */ 3054 dev_info_t *dip = sp->s_dip; 3055 3056 DTRACE_PROBE(timeout); 3057 dev_err(dip, CE_NOTE, 3058 SFXGE_CMN_ERR "tx qflush timeout"); 3059 break; 3060 } 3061 } 3062 3063 flush_done: 3064 sp->s_tx_flush_pending = 0; 3065 mutex_exit(&(sp->s_tx_flush_lock)); 3066 3067 /* Stop all the queues */ 3068 index = EFX_ARRAY_SIZE(sp->s_stp); 3069 while (--index >= 0) { 3070 if (sp->s_stp[index] != NULL) 3071 sfxge_tx_qstop(sp, index); 3072 } 3073 3074 /* Tear down the transmit module */ 3075 efx_tx_fini(enp); 3076 } 3077 3078 void 3079 sfxge_tx_fini(sfxge_t *sp) 3080 { 3081 int index; 3082 3083 index = EFX_ARRAY_SIZE(sp->s_stp); 3084 while (--index >= 0) { 3085 if (sp->s_stp[index] != NULL) 3086 sfxge_tx_qfini(sp, index); 3087 } 3088 3089 kmem_cache_destroy(sp->s_tqc); 3090 sp->s_tqc = NULL; 3091 3092 kmem_cache_destroy(sp->s_tmc); 3093 sp->s_tmc = NULL; 3094 3095 kmem_cache_destroy(sp->s_tbc); 3096 sp->s_tbc = NULL; 3097 3098 kmem_cache_destroy(sp->s_tpc); 3099 sp->s_tpc = NULL; 3100 } 3101