1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/nxge/nxge_impl.h> 29 30 extern uint32_t nxge_reclaim_pending; 31 extern uint32_t nxge_bcopy_thresh; 32 extern uint32_t nxge_dvma_thresh; 33 extern uint32_t nxge_dma_stream_thresh; 34 extern uint32_t nxge_tx_minfree; 35 extern uint32_t nxge_tx_intr_thres; 36 extern uint32_t nxge_tx_max_gathers; 37 extern uint32_t nxge_tx_tiny_pack; 38 extern uint32_t nxge_tx_use_bcopy; 39 extern uint32_t nxge_tx_lb_policy; 40 extern uint32_t nxge_no_tx_lb; 41 42 typedef struct _mac_tx_hint { 43 uint16_t sap; 44 uint16_t vid; 45 void *hash; 46 } mac_tx_hint_t, *p_mac_tx_hint_t; 47 48 int nxge_tx_lb_ring_1(p_mblk_t, uint32_t, p_mac_tx_hint_t); 49 50 int 51 nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) 52 { 53 int status = 0; 54 p_tx_desc_t tx_desc_ring_vp; 55 npi_handle_t npi_desc_handle; 56 nxge_os_dma_handle_t tx_desc_dma_handle; 57 p_tx_desc_t tx_desc_p; 58 p_tx_msg_t tx_msg_ring; 59 p_tx_msg_t tx_msg_p; 60 tx_desc_t tx_desc, *tmp_desc_p; 61 tx_desc_t sop_tx_desc, *sop_tx_desc_p; 62 p_tx_pkt_header_t hdrp; 63 p_tx_pkt_hdr_all_t pkthdrp; 64 uint8_t npads = 0; 65 uint64_t dma_ioaddr; 66 uint32_t dma_flags; 67 int last_bidx; 68 uint8_t *b_rptr; 69 caddr_t kaddr; 70 uint32_t nmblks; 71 uint32_t ngathers; 72 uint32_t clen; 73 int len; 74 uint32_t pkt_len, pack_len, min_len; 75 uint32_t bcopy_thresh; 76 int i, cur_index, sop_index; 77 uint16_t tail_index; 78 boolean_t tail_wrap = B_FALSE; 79 nxge_dma_common_t desc_area; 80 nxge_os_dma_handle_t dma_handle; 81 ddi_dma_cookie_t dma_cookie; 82 npi_handle_t npi_handle; 83 p_mblk_t nmp; 84 p_mblk_t t_mp; 85 uint32_t ncookies; 86 boolean_t good_packet; 87 boolean_t mark_mode = B_FALSE; 88 p_nxge_stats_t statsp; 89 p_nxge_tx_ring_stats_t tdc_stats; 90 t_uscalar_t start_offset = 0; 91 t_uscalar_t stuff_offset = 0; 92 t_uscalar_t end_offset = 0; 93 t_uscalar_t value = 0; 94 t_uscalar_t cksum_flags = 0; 95 boolean_t cksum_on = B_FALSE; 96 uint32_t boff = 0; 97 uint64_t tot_xfer_len = 0, tmp_len = 0; 98 boolean_t header_set = B_FALSE; 99 #ifdef NXGE_DEBUG 100 p_tx_desc_t tx_desc_ring_pp; 101 p_tx_desc_t tx_desc_pp; 102 tx_desc_t *save_desc_p; 103 int dump_len; 104 int sad_len; 105 uint64_t sad; 106 int xfer_len; 107 uint32_t msgsize; 108 #endif 109 110 NXGE_DEBUG_MSG((nxgep, TX_CTL, 111 "==> nxge_start: tx dma channel %d", tx_ring_p->tdc)); 112 NXGE_DEBUG_MSG((nxgep, TX_CTL, 113 "==> nxge_start: Starting tdc %d desc pending %d", 114 tx_ring_p->tdc, tx_ring_p->descs_pending)); 115 116 statsp = nxgep->statsp; 117 118 if (nxgep->statsp->port_stats.lb_mode == nxge_lb_normal) { 119 if (!statsp->mac_stats.link_up) { 120 freemsg(mp); 121 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 122 "link not up or LB mode")); 123 goto nxge_start_fail1; 124 } 125 } 126 127 hcksum_retrieve(mp, NULL, NULL, &start_offset, 128 &stuff_offset, &end_offset, &value, &cksum_flags); 129 if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) { 130 start_offset += sizeof (ether_header_t); 131 stuff_offset += sizeof (ether_header_t); 132 } else { 133 start_offset += sizeof (struct ether_vlan_header); 134 stuff_offset += sizeof (struct ether_vlan_header); 135 } 136 137 if (cksum_flags & HCK_PARTIALCKSUM) { 138 NXGE_DEBUG_MSG((nxgep, TX_CTL, 139 "==> nxge_start: cksum_flags 0x%x (partial checksum) ", 140 cksum_flags)); 141 cksum_on = B_TRUE; 142 } 143 144 #ifdef NXGE_DEBUG 145 if (tx_ring_p->descs_pending) { 146 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 147 "desc pending %d ", tx_ring_p->descs_pending)); 148 } 149 150 dump_len = (int)(MBLKL(mp)); 151 dump_len = (dump_len > 128) ? 128: dump_len; 152 153 NXGE_DEBUG_MSG((nxgep, TX_CTL, 154 "==> nxge_start: tdc %d: dumping ...: b_rptr $%p " 155 "(Before header reserve: ORIGINAL LEN %d)", 156 tx_ring_p->tdc, 157 mp->b_rptr, 158 dump_len)); 159 160 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: dump packets " 161 "(IP ORIGINAL b_rptr $%p): %s", mp->b_rptr, 162 nxge_dump_packet((char *)mp->b_rptr, dump_len))); 163 #endif 164 165 MUTEX_ENTER(&tx_ring_p->lock); 166 tdc_stats = tx_ring_p->tdc_stats; 167 mark_mode = (tx_ring_p->descs_pending && 168 ((tx_ring_p->tx_ring_size - tx_ring_p->descs_pending) 169 < nxge_tx_minfree)); 170 171 NXGE_DEBUG_MSG((nxgep, TX_CTL, 172 "TX Descriptor ring is channel %d mark mode %d", 173 tx_ring_p->tdc, mark_mode)); 174 175 if (!nxge_txdma_reclaim(nxgep, tx_ring_p, nxge_tx_minfree)) { 176 NXGE_DEBUG_MSG((nxgep, TX_CTL, 177 "TX Descriptor ring is full: channel %d", 178 tx_ring_p->tdc)); 179 cas32((uint32_t *)&tx_ring_p->queueing, 0, 1); 180 tdc_stats->tx_no_desc++; 181 MUTEX_EXIT(&tx_ring_p->lock); 182 if (nxgep->resched_needed && !nxgep->resched_running) { 183 nxgep->resched_running = B_TRUE; 184 ddi_trigger_softintr(nxgep->resched_id); 185 } 186 status = 1; 187 goto nxge_start_fail1; 188 } 189 190 nmp = mp; 191 i = sop_index = tx_ring_p->wr_index; 192 nmblks = 0; 193 ngathers = 0; 194 pkt_len = 0; 195 pack_len = 0; 196 clen = 0; 197 last_bidx = -1; 198 good_packet = B_TRUE; 199 200 desc_area = tx_ring_p->tdc_desc; 201 npi_handle = desc_area.npi_handle; 202 npi_desc_handle.regh = (nxge_os_acc_handle_t) 203 DMA_COMMON_ACC_HANDLE(desc_area); 204 tx_desc_ring_vp = (p_tx_desc_t)DMA_COMMON_VPTR(desc_area); 205 #ifdef NXGE_DEBUG 206 tx_desc_ring_pp = (p_tx_desc_t)DMA_COMMON_IOADDR(desc_area); 207 #endif 208 tx_desc_dma_handle = (nxge_os_dma_handle_t) 209 DMA_COMMON_HANDLE(desc_area); 210 tx_msg_ring = tx_ring_p->tx_msg_ring; 211 212 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: wr_index %d i %d", 213 sop_index, i)); 214 215 #ifdef NXGE_DEBUG 216 msgsize = msgdsize(nmp); 217 NXGE_DEBUG_MSG((nxgep, TX_CTL, 218 "==> nxge_start(1): wr_index %d i %d msgdsize %d", 219 sop_index, i, msgsize)); 220 #endif 221 /* 222 * The first 16 bytes of the premapped buffer are reserved 223 * for header. No padding will be used. 224 */ 225 pkt_len = pack_len = boff = TX_PKT_HEADER_SIZE; 226 if (nxge_tx_use_bcopy) { 227 bcopy_thresh = (nxge_bcopy_thresh - TX_PKT_HEADER_SIZE); 228 } else { 229 bcopy_thresh = (TX_BCOPY_SIZE - TX_PKT_HEADER_SIZE); 230 } 231 while (nmp) { 232 good_packet = B_TRUE; 233 b_rptr = nmp->b_rptr; 234 len = MBLKL(nmp); 235 if (len <= 0) { 236 nmp = nmp->b_cont; 237 continue; 238 } 239 nmblks++; 240 241 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(1): nmblks %d " 242 "len %d pkt_len %d pack_len %d", 243 nmblks, len, pkt_len, pack_len)); 244 /* 245 * Hardware limits the transfer length to 4K for NIU and 246 * 4076 (TX_MAX_TRANSFER_LENGTH) for Neptune. But we just 247 * use TX_MAX_TRANSFER_LENGTH as the limit for both. 248 * If len is longer than the limit, then we break nmp into 249 * two chunks: Make the first chunk equal to the limit and 250 * the second chunk for the remaining data. If the second 251 * chunk is still larger than the limit, then it will be 252 * broken into two in the next pass. 253 */ 254 if (len > TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE) { 255 t_mp = dupb(nmp); 256 nmp->b_wptr = nmp->b_rptr + 257 (TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE); 258 t_mp->b_rptr = nmp->b_wptr; 259 t_mp->b_cont = nmp->b_cont; 260 nmp->b_cont = t_mp; 261 len = MBLKL(nmp); 262 } 263 264 tx_desc.value = 0; 265 tx_desc_p = &tx_desc_ring_vp[i]; 266 #ifdef NXGE_DEBUG 267 tx_desc_pp = &tx_desc_ring_pp[i]; 268 #endif 269 tx_msg_p = &tx_msg_ring[i]; 270 npi_desc_handle.regp = (uint64_t)tx_desc_p; 271 if (!header_set && 272 ((!nxge_tx_use_bcopy && (len > TX_BCOPY_SIZE)) || 273 (len >= bcopy_thresh))) { 274 header_set = B_TRUE; 275 bcopy_thresh += TX_PKT_HEADER_SIZE; 276 boff = 0; 277 pack_len = 0; 278 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 279 hdrp = (p_tx_pkt_header_t)kaddr; 280 clen = pkt_len; 281 dma_handle = tx_msg_p->buf_dma_handle; 282 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 283 (void) ddi_dma_sync(dma_handle, 284 i * nxge_bcopy_thresh, nxge_bcopy_thresh, 285 DDI_DMA_SYNC_FORDEV); 286 287 tx_msg_p->flags.dma_type = USE_BCOPY; 288 goto nxge_start_control_header_only; 289 } 290 291 pkt_len += len; 292 pack_len += len; 293 294 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(3): " 295 "desc entry %d " 296 "DESC IOADDR $%p " 297 "desc_vp $%p tx_desc_p $%p " 298 "desc_pp $%p tx_desc_pp $%p " 299 "len %d pkt_len %d pack_len %d", 300 i, 301 DMA_COMMON_IOADDR(desc_area), 302 tx_desc_ring_vp, tx_desc_p, 303 tx_desc_ring_pp, tx_desc_pp, 304 len, pkt_len, pack_len)); 305 306 if (len < bcopy_thresh) { 307 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(4): " 308 "USE BCOPY: ")); 309 if (nxge_tx_tiny_pack) { 310 uint32_t blst = 311 TXDMA_DESC_NEXT_INDEX(i, -1, 312 tx_ring_p->tx_wrap_mask); 313 NXGE_DEBUG_MSG((nxgep, TX_CTL, 314 "==> nxge_start(5): pack")); 315 if ((pack_len <= bcopy_thresh) && 316 (last_bidx == blst)) { 317 NXGE_DEBUG_MSG((nxgep, TX_CTL, 318 "==> nxge_start: pack(6) " 319 "(pkt_len %d pack_len %d)", 320 pkt_len, pack_len)); 321 i = blst; 322 tx_desc_p = &tx_desc_ring_vp[i]; 323 #ifdef NXGE_DEBUG 324 tx_desc_pp = &tx_desc_ring_pp[i]; 325 #endif 326 tx_msg_p = &tx_msg_ring[i]; 327 boff = pack_len - len; 328 ngathers--; 329 } else if (pack_len > bcopy_thresh && 330 header_set) { 331 pack_len = len; 332 boff = 0; 333 bcopy_thresh = nxge_bcopy_thresh; 334 NXGE_DEBUG_MSG((nxgep, TX_CTL, 335 "==> nxge_start(7): > max NEW " 336 "bcopy thresh %d " 337 "pkt_len %d pack_len %d(next)", 338 bcopy_thresh, 339 pkt_len, pack_len)); 340 } 341 last_bidx = i; 342 } 343 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 344 if ((boff == TX_PKT_HEADER_SIZE) && (nmblks == 1)) { 345 hdrp = (p_tx_pkt_header_t)kaddr; 346 header_set = B_TRUE; 347 NXGE_DEBUG_MSG((nxgep, TX_CTL, 348 "==> nxge_start(7_x2): " 349 "pkt_len %d pack_len %d (new hdrp $%p)", 350 pkt_len, pack_len, hdrp)); 351 } 352 tx_msg_p->flags.dma_type = USE_BCOPY; 353 kaddr += boff; 354 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(8): " 355 "USE BCOPY: before bcopy " 356 "DESC IOADDR $%p entry %d " 357 "bcopy packets %d " 358 "bcopy kaddr $%p " 359 "bcopy ioaddr (SAD) $%p " 360 "bcopy clen %d " 361 "bcopy boff %d", 362 DMA_COMMON_IOADDR(desc_area), i, 363 tdc_stats->tx_hdr_pkts, 364 kaddr, 365 dma_ioaddr, 366 clen, 367 boff)); 368 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 369 "1USE BCOPY: ")); 370 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 371 "2USE BCOPY: ")); 372 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 373 "last USE BCOPY: copy from b_rptr $%p " 374 "to KADDR $%p (len %d offset %d", 375 b_rptr, kaddr, len, boff)); 376 377 bcopy(b_rptr, kaddr, len); 378 379 #ifdef NXGE_DEBUG 380 dump_len = (len > 128) ? 128: len; 381 NXGE_DEBUG_MSG((nxgep, TX_CTL, 382 "==> nxge_start: dump packets " 383 "(After BCOPY len %d)" 384 "(b_rptr $%p): %s", len, nmp->b_rptr, 385 nxge_dump_packet((char *)nmp->b_rptr, 386 dump_len))); 387 #endif 388 389 dma_handle = tx_msg_p->buf_dma_handle; 390 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 391 (void) ddi_dma_sync(dma_handle, 392 i * nxge_bcopy_thresh, nxge_bcopy_thresh, 393 DDI_DMA_SYNC_FORDEV); 394 clen = len + boff; 395 tdc_stats->tx_hdr_pkts++; 396 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(9): " 397 "USE BCOPY: " 398 "DESC IOADDR $%p entry %d " 399 "bcopy packets %d " 400 "bcopy kaddr $%p " 401 "bcopy ioaddr (SAD) $%p " 402 "bcopy clen %d " 403 "bcopy boff %d", 404 DMA_COMMON_IOADDR(desc_area), 405 i, 406 tdc_stats->tx_hdr_pkts, 407 kaddr, 408 dma_ioaddr, 409 clen, 410 boff)); 411 } else { 412 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(12): " 413 "USE DVMA: len %d", len)); 414 tx_msg_p->flags.dma_type = USE_DMA; 415 dma_flags = DDI_DMA_WRITE; 416 if (len < nxge_dma_stream_thresh) { 417 dma_flags |= DDI_DMA_CONSISTENT; 418 } else { 419 dma_flags |= DDI_DMA_STREAMING; 420 } 421 422 dma_handle = tx_msg_p->dma_handle; 423 status = ddi_dma_addr_bind_handle(dma_handle, NULL, 424 (caddr_t)b_rptr, len, dma_flags, 425 DDI_DMA_DONTWAIT, NULL, 426 &dma_cookie, &ncookies); 427 if (status == DDI_DMA_MAPPED) { 428 dma_ioaddr = dma_cookie.dmac_laddress; 429 len = (int)dma_cookie.dmac_size; 430 clen = (uint32_t)dma_cookie.dmac_size; 431 NXGE_DEBUG_MSG((nxgep, TX_CTL, 432 "==> nxge_start(12_1): " 433 "USE DVMA: len %d clen %d " 434 "ngathers %d", 435 len, clen, 436 ngathers)); 437 438 npi_desc_handle.regp = (uint64_t)tx_desc_p; 439 while (ncookies > 1) { 440 ngathers++; 441 /* 442 * this is the fix for multiple 443 * cookies, which are basicaly 444 * a descriptor entry, we don't set 445 * SOP bit as well as related fields 446 */ 447 448 (void) npi_txdma_desc_gather_set( 449 npi_desc_handle, 450 &tx_desc, 451 (ngathers -1), 452 mark_mode, 453 ngathers, 454 dma_ioaddr, 455 clen); 456 457 tx_msg_p->tx_msg_size = clen; 458 NXGE_DEBUG_MSG((nxgep, TX_CTL, 459 "==> nxge_start: DMA " 460 "ncookie %d " 461 "ngathers %d " 462 "dma_ioaddr $%p len %d" 463 "desc $%p descp $%p (%d)", 464 ncookies, 465 ngathers, 466 dma_ioaddr, clen, 467 *tx_desc_p, tx_desc_p, i)); 468 469 ddi_dma_nextcookie(dma_handle, 470 &dma_cookie); 471 dma_ioaddr = 472 dma_cookie.dmac_laddress; 473 474 len = (int)dma_cookie.dmac_size; 475 clen = (uint32_t)dma_cookie.dmac_size; 476 NXGE_DEBUG_MSG((nxgep, TX_CTL, 477 "==> nxge_start(12_2): " 478 "USE DVMA: len %d clen %d ", 479 len, clen)); 480 481 i = TXDMA_DESC_NEXT_INDEX(i, 1, 482 tx_ring_p->tx_wrap_mask); 483 tx_desc_p = &tx_desc_ring_vp[i]; 484 485 npi_desc_handle.regp = 486 (uint64_t)tx_desc_p; 487 tx_msg_p = &tx_msg_ring[i]; 488 tx_msg_p->flags.dma_type = USE_NONE; 489 tx_desc.value = 0; 490 491 ncookies--; 492 } 493 tdc_stats->tx_ddi_pkts++; 494 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start:" 495 "DMA: ddi packets %d", 496 tdc_stats->tx_ddi_pkts)); 497 } else { 498 NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, 499 "dma mapping failed for %d " 500 "bytes addr $%p flags %x (%d)", 501 len, b_rptr, status, status)); 502 good_packet = B_FALSE; 503 tdc_stats->tx_dma_bind_fail++; 504 tx_msg_p->flags.dma_type = USE_NONE; 505 goto nxge_start_fail2; 506 } 507 } /* ddi dvma */ 508 509 nmp = nmp->b_cont; 510 nxge_start_control_header_only: 511 npi_desc_handle.regp = (uint64_t)tx_desc_p; 512 ngathers++; 513 514 if (ngathers == 1) { 515 #ifdef NXGE_DEBUG 516 save_desc_p = &sop_tx_desc; 517 #endif 518 sop_tx_desc_p = &sop_tx_desc; 519 sop_tx_desc_p->value = 0; 520 sop_tx_desc_p->bits.hdw.tr_len = clen; 521 sop_tx_desc_p->bits.hdw.sad = dma_ioaddr >> 32; 522 sop_tx_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff; 523 } else { 524 #ifdef NXGE_DEBUG 525 save_desc_p = &tx_desc; 526 #endif 527 tmp_desc_p = &tx_desc; 528 tmp_desc_p->value = 0; 529 tmp_desc_p->bits.hdw.tr_len = clen; 530 tmp_desc_p->bits.hdw.sad = dma_ioaddr >> 32; 531 tmp_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff; 532 533 tx_desc_p->value = tmp_desc_p->value; 534 } 535 536 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(13): " 537 "Desc_entry %d ngathers %d " 538 "desc_vp $%p tx_desc_p $%p " 539 "len %d clen %d pkt_len %d pack_len %d nmblks %d " 540 "dma_ioaddr (SAD) $%p mark %d", 541 i, ngathers, 542 tx_desc_ring_vp, tx_desc_p, 543 len, clen, pkt_len, pack_len, nmblks, 544 dma_ioaddr, mark_mode)); 545 546 #ifdef NXGE_DEBUG 547 npi_desc_handle.nxgep = nxgep; 548 npi_desc_handle.function.function = nxgep->function_num; 549 npi_desc_handle.function.instance = nxgep->instance; 550 sad = (save_desc_p->value & TX_PKT_DESC_SAD_MASK); 551 xfer_len = ((save_desc_p->value & TX_PKT_DESC_TR_LEN_MASK) >> 552 TX_PKT_DESC_TR_LEN_SHIFT); 553 554 555 NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n" 556 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\t" 557 "mark %d sop %d\n", 558 save_desc_p->value, 559 sad, 560 save_desc_p->bits.hdw.tr_len, 561 xfer_len, 562 save_desc_p->bits.hdw.num_ptr, 563 save_desc_p->bits.hdw.mark, 564 save_desc_p->bits.hdw.sop)); 565 566 npi_txdma_dump_desc_one(npi_desc_handle, NULL, i); 567 #endif 568 569 tx_msg_p->tx_msg_size = clen; 570 i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); 571 if (ngathers > nxge_tx_max_gathers) { 572 good_packet = B_FALSE; 573 hcksum_retrieve(mp, NULL, NULL, &start_offset, 574 &stuff_offset, &end_offset, &value, 575 &cksum_flags); 576 577 NXGE_DEBUG_MSG((NULL, TX_CTL, 578 "==> nxge_start(14): pull msg - " 579 "len %d pkt_len %d ngathers %d", 580 len, pkt_len, ngathers)); 581 /* Pull all message blocks from b_cont */ 582 if ((msgpullup(mp, -1)) == NULL) { 583 goto nxge_start_fail2; 584 } 585 goto nxge_start_fail2; 586 } 587 } /* while (nmp) */ 588 589 tx_msg_p->tx_message = mp; 590 tx_desc_p = &tx_desc_ring_vp[sop_index]; 591 npi_desc_handle.regp = (uint64_t)tx_desc_p; 592 593 pkthdrp = (p_tx_pkt_hdr_all_t)hdrp; 594 pkthdrp->reserved = 0; 595 hdrp->value = 0; 596 (void) nxge_fill_tx_hdr(mp, B_FALSE, cksum_on, 597 (pkt_len - TX_PKT_HEADER_SIZE), npads, pkthdrp); 598 599 if (pkt_len > NXGE_MTU_DEFAULT_MAX) { 600 tdc_stats->tx_jumbo_pkts++; 601 } 602 603 min_len = (nxgep->msg_min + TX_PKT_HEADER_SIZE + (npads * 2)); 604 if (pkt_len < min_len) { 605 /* Assume we use bcopy to premapped buffers */ 606 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 607 NXGE_DEBUG_MSG((NULL, TX_CTL, 608 "==> nxge_start(14-1): < (msg_min + 16)" 609 "len %d pkt_len %d min_len %d bzero %d ngathers %d", 610 len, pkt_len, min_len, (min_len - pkt_len), ngathers)); 611 bzero((kaddr + pkt_len), (min_len - pkt_len)); 612 pkt_len = tx_msg_p->tx_msg_size = min_len; 613 614 sop_tx_desc_p->bits.hdw.tr_len = min_len; 615 616 NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value); 617 tx_desc_p->value = sop_tx_desc_p->value; 618 619 NXGE_DEBUG_MSG((NULL, TX_CTL, 620 "==> nxge_start(14-2): < msg_min - " 621 "len %d pkt_len %d min_len %d ngathers %d", 622 len, pkt_len, min_len, ngathers)); 623 } 624 625 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: cksum_flags 0x%x ", 626 cksum_flags)); 627 if (cksum_flags & HCK_PARTIALCKSUM) { 628 NXGE_DEBUG_MSG((nxgep, TX_CTL, 629 "==> nxge_start: cksum_flags 0x%x (partial checksum) ", 630 cksum_flags)); 631 cksum_on = B_TRUE; 632 NXGE_DEBUG_MSG((nxgep, TX_CTL, 633 "==> nxge_start: from IP cksum_flags 0x%x " 634 "(partial checksum) " 635 "start_offset %d stuff_offset %d", 636 cksum_flags, start_offset, stuff_offset)); 637 tmp_len = (uint64_t)(start_offset >> 1); 638 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4START_SHIFT); 639 tmp_len = (uint64_t)(stuff_offset >> 1); 640 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4STUFF_SHIFT); 641 642 NXGE_DEBUG_MSG((nxgep, TX_CTL, 643 "==> nxge_start: from IP cksum_flags 0x%x " 644 "(partial checksum) " 645 "after SHIFT start_offset %d stuff_offset %d", 646 cksum_flags, start_offset, stuff_offset)); 647 } 648 { 649 uint64_t tmp_len; 650 651 /* pkt_len already includes 16 + paddings!! */ 652 /* Update the control header length */ 653 tot_xfer_len = (pkt_len - TX_PKT_HEADER_SIZE); 654 tmp_len = hdrp->value | 655 (tot_xfer_len << TX_PKT_HEADER_TOT_XFER_LEN_SHIFT); 656 657 NXGE_DEBUG_MSG((nxgep, TX_CTL, 658 "==> nxge_start(15_x1): setting SOP " 659 "tot_xfer_len 0x%llx (%d) pkt_len %d tmp_len " 660 "0x%llx hdrp->value 0x%llx", 661 tot_xfer_len, tot_xfer_len, pkt_len, 662 tmp_len, hdrp->value)); 663 #if defined(_BIG_ENDIAN) 664 hdrp->value = ddi_swap64(tmp_len); 665 #else 666 hdrp->value = tmp_len; 667 #endif 668 NXGE_DEBUG_MSG((nxgep, 669 TX_CTL, "==> nxge_start(15_x2): setting SOP " 670 "after SWAP: tot_xfer_len 0x%llx pkt_len %d " 671 "tmp_len 0x%llx hdrp->value 0x%llx", 672 tot_xfer_len, pkt_len, 673 tmp_len, hdrp->value)); 674 } 675 676 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(15): setting SOP " 677 "wr_index %d " 678 "tot_xfer_len (%d) pkt_len %d npads %d", 679 sop_index, 680 tot_xfer_len, pkt_len, 681 npads)); 682 683 sop_tx_desc_p->bits.hdw.sop = 1; 684 sop_tx_desc_p->bits.hdw.mark = mark_mode; 685 sop_tx_desc_p->bits.hdw.num_ptr = ngathers; 686 687 NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value); 688 689 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(16): set SOP done")); 690 691 #ifdef NXGE_DEBUG 692 npi_desc_handle.nxgep = nxgep; 693 npi_desc_handle.function.function = nxgep->function_num; 694 npi_desc_handle.function.instance = nxgep->instance; 695 696 NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n" 697 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\tmark %d sop %d\n", 698 save_desc_p->value, 699 sad, 700 save_desc_p->bits.hdw.tr_len, 701 xfer_len, 702 save_desc_p->bits.hdw.num_ptr, 703 save_desc_p->bits.hdw.mark, 704 save_desc_p->bits.hdw.sop)); 705 (void) npi_txdma_dump_desc_one(npi_desc_handle, NULL, sop_index); 706 707 dump_len = (pkt_len > 128) ? 128: pkt_len; 708 NXGE_DEBUG_MSG((nxgep, TX_CTL, 709 "==> nxge_start: dump packets(17) (after sop set, len " 710 " (len/dump_len/pkt_len/tot_xfer_len) %d/%d/%d/%d):\n" 711 "ptr $%p: %s", len, dump_len, pkt_len, tot_xfer_len, 712 (char *)hdrp, 713 nxge_dump_packet((char *)hdrp, dump_len))); 714 NXGE_DEBUG_MSG((nxgep, TX_CTL, 715 "==> nxge_start(18): TX desc sync: sop_index %d", 716 sop_index)); 717 #endif 718 719 if ((ngathers == 1) || tx_ring_p->wr_index < i) { 720 (void) ddi_dma_sync(tx_desc_dma_handle, 721 sop_index * sizeof (tx_desc_t), 722 ngathers * sizeof (tx_desc_t), 723 DDI_DMA_SYNC_FORDEV); 724 725 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(19): sync 1 " 726 "cs_off = 0x%02X cs_s_off = 0x%02X " 727 "pkt_len %d ngathers %d sop_index %d\n", 728 stuff_offset, start_offset, 729 pkt_len, ngathers, sop_index)); 730 } else { /* more than one descriptor and wrap around */ 731 uint32_t nsdescs = tx_ring_p->tx_ring_size - sop_index; 732 (void) ddi_dma_sync(tx_desc_dma_handle, 733 sop_index * sizeof (tx_desc_t), 734 nsdescs * sizeof (tx_desc_t), 735 DDI_DMA_SYNC_FORDEV); 736 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(20): sync 1 " 737 "cs_off = 0x%02X cs_s_off = 0x%02X " 738 "pkt_len %d ngathers %d sop_index %d\n", 739 stuff_offset, start_offset, 740 pkt_len, ngathers, sop_index)); 741 742 (void) ddi_dma_sync(tx_desc_dma_handle, 743 0, 744 (ngathers - nsdescs) * sizeof (tx_desc_t), 745 DDI_DMA_SYNC_FORDEV); 746 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(21): sync 2 " 747 "cs_off = 0x%02X cs_s_off = 0x%02X " 748 "pkt_len %d ngathers %d sop_index %d\n", 749 stuff_offset, start_offset, 750 pkt_len, ngathers, sop_index)); 751 } 752 753 tail_index = tx_ring_p->wr_index; 754 tail_wrap = tx_ring_p->wr_index_wrap; 755 756 tx_ring_p->wr_index = i; 757 if (tx_ring_p->wr_index <= tail_index) { 758 tx_ring_p->wr_index_wrap = ((tail_wrap == B_TRUE) ? 759 B_FALSE : B_TRUE); 760 } 761 762 tx_ring_p->descs_pending += ngathers; 763 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX kick: " 764 "channel %d wr_index %d wrap %d ngathers %d desc_pend %d", 765 tx_ring_p->tdc, 766 tx_ring_p->wr_index, 767 tx_ring_p->wr_index_wrap, 768 ngathers, 769 tx_ring_p->descs_pending)); 770 771 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX KICKING: ")); 772 773 { 774 tx_ring_kick_t kick; 775 776 kick.value = 0; 777 kick.bits.ldw.wrap = tx_ring_p->wr_index_wrap; 778 kick.bits.ldw.tail = (uint16_t)tx_ring_p->wr_index; 779 780 /* Kick start the Transmit kick register */ 781 TXDMA_REG_WRITE64(NXGE_DEV_NPI_HANDLE(nxgep), 782 TX_RING_KICK_REG, 783 (uint8_t)tx_ring_p->tdc, 784 kick.value); 785 } 786 787 tdc_stats->tx_starts++; 788 789 MUTEX_EXIT(&tx_ring_p->lock); 790 791 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start")); 792 793 return (status); 794 795 nxge_start_fail2: 796 if (good_packet == B_FALSE) { 797 cur_index = sop_index; 798 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: clean up")); 799 for (i = 0; i < ngathers; i++) { 800 tx_desc_p = &tx_desc_ring_vp[cur_index]; 801 npi_handle.regp = (uint64_t)tx_desc_p; 802 tx_msg_p = &tx_msg_ring[cur_index]; 803 (void) npi_txdma_desc_set_zero(npi_handle, 1); 804 if (tx_msg_p->flags.dma_type == USE_DVMA) { 805 NXGE_DEBUG_MSG((nxgep, TX_CTL, 806 "tx_desc_p = %X index = %d", 807 tx_desc_p, tx_ring_p->rd_index)); 808 (void) dvma_unload( 809 tx_msg_p->dvma_handle, 810 0, -1); 811 tx_msg_p->dvma_handle = NULL; 812 if (tx_ring_p->dvma_wr_index == 813 tx_ring_p->dvma_wrap_mask) 814 tx_ring_p->dvma_wr_index = 0; 815 else 816 tx_ring_p->dvma_wr_index++; 817 tx_ring_p->dvma_pending--; 818 } else if (tx_msg_p->flags.dma_type == 819 USE_DMA) { 820 if (ddi_dma_unbind_handle( 821 tx_msg_p->dma_handle)) 822 cmn_err(CE_WARN, "!nxge_start: " 823 "ddi_dma_unbind_handle failed"); 824 } 825 tx_msg_p->flags.dma_type = USE_NONE; 826 cur_index = TXDMA_DESC_NEXT_INDEX(cur_index, 1, 827 tx_ring_p->tx_wrap_mask); 828 829 } 830 831 nxgep->resched_needed = B_TRUE; 832 } 833 834 MUTEX_EXIT(&tx_ring_p->lock); 835 836 nxge_start_fail1: 837 /* Add FMA to check the access handle nxge_hregh */ 838 839 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start")); 840 841 return (status); 842 } 843 844 boolean_t 845 nxge_send(p_nxge_t nxgep, mblk_t *mp, p_mac_tx_hint_t hp) 846 { 847 p_tx_ring_t *tx_rings; 848 uint8_t ring_index; 849 850 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_send")); 851 852 ASSERT(mp->b_next == NULL); 853 854 ring_index = nxge_tx_lb_ring_1(mp, nxgep->max_tdcs, hp); 855 tx_rings = nxgep->tx_rings->rings; 856 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_tx_msg: tx_rings $%p", 857 tx_rings)); 858 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_tx_msg: max_tdcs %d " 859 "ring_index %d", nxgep->max_tdcs, ring_index)); 860 861 if (nxge_start(nxgep, tx_rings[ring_index], mp)) { 862 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_send: failed " 863 "ring index %d", ring_index)); 864 return (B_FALSE); 865 } 866 867 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_send: ring index %d", 868 ring_index)); 869 870 return (B_TRUE); 871 } 872 873 /* 874 * nxge_m_tx() - send a chain of packets 875 */ 876 mblk_t * 877 nxge_m_tx(void *arg, mblk_t *mp) 878 { 879 p_nxge_t nxgep = (p_nxge_t)arg; 880 mblk_t *next; 881 mac_tx_hint_t hint; 882 883 if (!(nxgep->drv_state & STATE_HW_INITIALIZED)) { 884 NXGE_DEBUG_MSG((nxgep, DDI_CTL, 885 "==> nxge_m_tx: hardware not initialized")); 886 NXGE_DEBUG_MSG((nxgep, DDI_CTL, 887 "<== nxge_m_tx")); 888 return (mp); 889 } 890 891 hint.hash = NULL; 892 hint.vid = 0; 893 hint.sap = 0; 894 895 while (mp != NULL) { 896 next = mp->b_next; 897 mp->b_next = NULL; 898 899 /* 900 * Until Nemo tx resource works, the mac driver 901 * does the load balancing based on TCP port, 902 * or CPU. For debugging, we use a system 903 * configurable parameter. 904 */ 905 if (!nxge_send(nxgep, mp, &hint)) { 906 mp->b_next = next; 907 break; 908 } 909 910 mp = next; 911 } 912 913 return (mp); 914 } 915 916 int 917 nxge_tx_lb_ring_1(p_mblk_t mp, uint32_t maxtdcs, p_mac_tx_hint_t hp) 918 { 919 uint8_t ring_index = 0; 920 uint8_t *tcp_port; 921 p_mblk_t nmp; 922 size_t mblk_len; 923 size_t iph_len; 924 size_t hdrs_size; 925 uint8_t hdrs_buf[sizeof (struct ether_header) + 926 IP_MAX_HDR_LENGTH + sizeof (uint32_t)]; 927 /* 928 * allocate space big enough to cover 929 * the max ip header length and the first 930 * 4 bytes of the TCP/IP header. 931 */ 932 933 boolean_t qos = B_FALSE; 934 935 NXGE_DEBUG_MSG((NULL, TX_CTL, "==> nxge_tx_lb_ring")); 936 937 if (hp->vid) { 938 qos = B_TRUE; 939 } 940 switch (nxge_tx_lb_policy) { 941 case NXGE_TX_LB_TCPUDP: /* default IPv4 TCP/UDP */ 942 default: 943 tcp_port = mp->b_rptr; 944 if (!nxge_no_tx_lb && !qos && 945 (ntohs(((p_ether_header_t)tcp_port)->ether_type) 946 == ETHERTYPE_IP)) { 947 nmp = mp; 948 mblk_len = MBLKL(nmp); 949 tcp_port = NULL; 950 if (mblk_len > sizeof (struct ether_header) + 951 sizeof (uint8_t)) { 952 tcp_port = nmp->b_rptr + 953 sizeof (struct ether_header); 954 mblk_len -= sizeof (struct ether_header); 955 iph_len = ((*tcp_port) & 0x0f) << 2; 956 if (mblk_len > (iph_len + sizeof (uint32_t))) { 957 tcp_port = nmp->b_rptr; 958 } else { 959 tcp_port = NULL; 960 } 961 } 962 if (tcp_port == NULL) { 963 hdrs_size = 0; 964 ((p_ether_header_t)hdrs_buf)->ether_type = 0; 965 while ((nmp) && (hdrs_size < 966 sizeof (hdrs_buf))) { 967 mblk_len = MBLKL(nmp); 968 if (mblk_len >= 969 (sizeof (hdrs_buf) - hdrs_size)) 970 mblk_len = sizeof (hdrs_buf) - 971 hdrs_size; 972 bcopy(nmp->b_rptr, 973 &hdrs_buf[hdrs_size], mblk_len); 974 hdrs_size += mblk_len; 975 nmp = nmp->b_cont; 976 } 977 tcp_port = hdrs_buf; 978 } 979 tcp_port += sizeof (ether_header_t); 980 if (!(tcp_port[6] & 0x3f) && !(tcp_port[7] & 0xff)) { 981 if ((tcp_port[9] == IPPROTO_TCP) || 982 (tcp_port[9] == IPPROTO_UDP)) { 983 tcp_port += ((*tcp_port) & 0x0f) << 2; 984 ring_index = 985 ((tcp_port[1] ^ tcp_port[3]) 986 % maxtdcs); 987 } else { 988 ring_index = tcp_port[19] % maxtdcs; 989 } 990 } else { /* fragmented packet */ 991 ring_index = tcp_port[19] % maxtdcs; 992 } 993 } else { 994 ring_index = mp->b_band % maxtdcs; 995 } 996 break; 997 998 case NXGE_TX_LB_HASH: 999 if (hp->hash) { 1000 ring_index = ((uint64_t)(hp->hash) % maxtdcs); 1001 } else { 1002 ring_index = mp->b_band % maxtdcs; 1003 } 1004 break; 1005 1006 case NXGE_TX_LB_DEST_MAC: /* Use destination MAC address */ 1007 tcp_port = mp->b_rptr; 1008 ring_index = tcp_port[5] % maxtdcs; 1009 break; 1010 } 1011 1012 NXGE_DEBUG_MSG((NULL, TX_CTL, "<== nxge_tx_lb_ring")); 1013 1014 return (ring_index); 1015 } 1016 1017 uint_t 1018 nxge_reschedule(caddr_t arg) 1019 { 1020 p_nxge_t nxgep; 1021 1022 nxgep = (p_nxge_t)arg; 1023 1024 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_reschedule")); 1025 1026 if (nxgep->nxge_mac_state == NXGE_MAC_STARTED && 1027 nxgep->resched_needed) { 1028 mac_tx_update(nxgep->mach); 1029 nxgep->resched_needed = B_FALSE; 1030 nxgep->resched_running = B_FALSE; 1031 } 1032 1033 NXGE_DEBUG_MSG((NULL, TX_CTL, "<== nxge_reschedule")); 1034 return (DDI_INTR_CLAIMED); 1035 } 1036