1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/nxge/nxge_impl.h> 29 30 extern uint32_t nxge_reclaim_pending; 31 extern uint32_t nxge_bcopy_thresh; 32 extern uint32_t nxge_dvma_thresh; 33 extern uint32_t nxge_dma_stream_thresh; 34 extern uint32_t nxge_tx_minfree; 35 extern uint32_t nxge_tx_intr_thres; 36 extern uint32_t nxge_tx_max_gathers; 37 extern uint32_t nxge_tx_tiny_pack; 38 extern uint32_t nxge_tx_use_bcopy; 39 extern uint32_t nxge_tx_lb_policy; 40 extern uint32_t nxge_no_tx_lb; 41 extern nxge_tx_mode_t nxge_tx_scheme; 42 43 typedef struct _mac_tx_hint { 44 uint16_t sap; 45 uint16_t vid; 46 void *hash; 47 } mac_tx_hint_t, *p_mac_tx_hint_t; 48 49 int nxge_tx_lb_ring_1(p_mblk_t, uint32_t, p_mac_tx_hint_t); 50 51 int 52 nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) 53 { 54 int status = 0; 55 p_tx_desc_t tx_desc_ring_vp; 56 npi_handle_t npi_desc_handle; 57 nxge_os_dma_handle_t tx_desc_dma_handle; 58 p_tx_desc_t tx_desc_p; 59 p_tx_msg_t tx_msg_ring; 60 p_tx_msg_t tx_msg_p; 61 tx_desc_t tx_desc, *tmp_desc_p; 62 tx_desc_t sop_tx_desc, *sop_tx_desc_p; 63 p_tx_pkt_header_t hdrp; 64 p_tx_pkt_hdr_all_t pkthdrp; 65 uint8_t npads = 0; 66 uint64_t dma_ioaddr; 67 uint32_t dma_flags; 68 int last_bidx; 69 uint8_t *b_rptr; 70 caddr_t kaddr; 71 uint32_t nmblks; 72 uint32_t ngathers; 73 uint32_t clen; 74 int len; 75 uint32_t pkt_len, pack_len, min_len; 76 uint32_t bcopy_thresh; 77 int i, cur_index, sop_index; 78 uint16_t tail_index; 79 boolean_t tail_wrap = B_FALSE; 80 nxge_dma_common_t desc_area; 81 nxge_os_dma_handle_t dma_handle; 82 ddi_dma_cookie_t dma_cookie; 83 npi_handle_t npi_handle; 84 p_mblk_t nmp; 85 p_mblk_t t_mp; 86 uint32_t ncookies; 87 boolean_t good_packet; 88 boolean_t mark_mode = B_FALSE; 89 p_nxge_stats_t statsp; 90 p_nxge_tx_ring_stats_t tdc_stats; 91 t_uscalar_t start_offset = 0; 92 t_uscalar_t stuff_offset = 0; 93 t_uscalar_t end_offset = 0; 94 t_uscalar_t value = 0; 95 t_uscalar_t cksum_flags = 0; 96 boolean_t cksum_on = B_FALSE; 97 uint32_t boff = 0; 98 uint64_t tot_xfer_len = 0, tmp_len = 0; 99 boolean_t header_set = B_FALSE; 100 #ifdef NXGE_DEBUG 101 p_tx_desc_t tx_desc_ring_pp; 102 p_tx_desc_t tx_desc_pp; 103 tx_desc_t *save_desc_p; 104 int dump_len; 105 int sad_len; 106 uint64_t sad; 107 int xfer_len; 108 uint32_t msgsize; 109 #endif 110 111 NXGE_DEBUG_MSG((nxgep, TX_CTL, 112 "==> nxge_start: tx dma channel %d", tx_ring_p->tdc)); 113 NXGE_DEBUG_MSG((nxgep, TX_CTL, 114 "==> nxge_start: Starting tdc %d desc pending %d", 115 tx_ring_p->tdc, tx_ring_p->descs_pending)); 116 117 statsp = nxgep->statsp; 118 119 if (nxgep->statsp->port_stats.lb_mode == nxge_lb_normal) { 120 if (!statsp->mac_stats.link_up) { 121 freemsg(mp); 122 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 123 "link not up or LB mode")); 124 goto nxge_start_fail1; 125 } 126 } 127 128 hcksum_retrieve(mp, NULL, NULL, &start_offset, 129 &stuff_offset, &end_offset, &value, &cksum_flags); 130 if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) { 131 start_offset += sizeof (ether_header_t); 132 stuff_offset += sizeof (ether_header_t); 133 } else { 134 start_offset += sizeof (struct ether_vlan_header); 135 stuff_offset += sizeof (struct ether_vlan_header); 136 } 137 138 if (cksum_flags & HCK_PARTIALCKSUM) { 139 NXGE_DEBUG_MSG((nxgep, TX_CTL, 140 "==> nxge_start: cksum_flags 0x%x (partial checksum) ", 141 cksum_flags)); 142 cksum_on = B_TRUE; 143 } 144 145 #ifdef NXGE_DEBUG 146 if (tx_ring_p->descs_pending) { 147 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 148 "desc pending %d ", tx_ring_p->descs_pending)); 149 } 150 151 dump_len = (int)(MBLKL(mp)); 152 dump_len = (dump_len > 128) ? 128: dump_len; 153 154 NXGE_DEBUG_MSG((nxgep, TX_CTL, 155 "==> nxge_start: tdc %d: dumping ...: b_rptr $%p " 156 "(Before header reserve: ORIGINAL LEN %d)", 157 tx_ring_p->tdc, 158 mp->b_rptr, 159 dump_len)); 160 161 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: dump packets " 162 "(IP ORIGINAL b_rptr $%p): %s", mp->b_rptr, 163 nxge_dump_packet((char *)mp->b_rptr, dump_len))); 164 #endif 165 166 MUTEX_ENTER(&tx_ring_p->lock); 167 tdc_stats = tx_ring_p->tdc_stats; 168 mark_mode = (tx_ring_p->descs_pending && 169 ((tx_ring_p->tx_ring_size - tx_ring_p->descs_pending) 170 < nxge_tx_minfree)); 171 172 NXGE_DEBUG_MSG((nxgep, TX_CTL, 173 "TX Descriptor ring is channel %d mark mode %d", 174 tx_ring_p->tdc, mark_mode)); 175 176 if (!nxge_txdma_reclaim(nxgep, tx_ring_p, nxge_tx_minfree)) { 177 NXGE_DEBUG_MSG((nxgep, TX_CTL, 178 "TX Descriptor ring is full: channel %d", 179 tx_ring_p->tdc)); 180 cas32((uint32_t *)&tx_ring_p->queueing, 0, 1); 181 tdc_stats->tx_no_desc++; 182 MUTEX_EXIT(&tx_ring_p->lock); 183 if (nxgep->resched_needed && !nxgep->resched_running) { 184 nxgep->resched_running = B_TRUE; 185 ddi_trigger_softintr(nxgep->resched_id); 186 } 187 status = 1; 188 goto nxge_start_fail1; 189 } 190 191 nmp = mp; 192 i = sop_index = tx_ring_p->wr_index; 193 nmblks = 0; 194 ngathers = 0; 195 pkt_len = 0; 196 pack_len = 0; 197 clen = 0; 198 last_bidx = -1; 199 good_packet = B_TRUE; 200 201 desc_area = tx_ring_p->tdc_desc; 202 npi_handle = desc_area.npi_handle; 203 npi_desc_handle.regh = (nxge_os_acc_handle_t) 204 DMA_COMMON_ACC_HANDLE(desc_area); 205 tx_desc_ring_vp = (p_tx_desc_t)DMA_COMMON_VPTR(desc_area); 206 #ifdef NXGE_DEBUG 207 tx_desc_ring_pp = (p_tx_desc_t)DMA_COMMON_IOADDR(desc_area); 208 #endif 209 tx_desc_dma_handle = (nxge_os_dma_handle_t) 210 DMA_COMMON_HANDLE(desc_area); 211 tx_msg_ring = tx_ring_p->tx_msg_ring; 212 213 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: wr_index %d i %d", 214 sop_index, i)); 215 216 #ifdef NXGE_DEBUG 217 msgsize = msgdsize(nmp); 218 NXGE_DEBUG_MSG((nxgep, TX_CTL, 219 "==> nxge_start(1): wr_index %d i %d msgdsize %d", 220 sop_index, i, msgsize)); 221 #endif 222 /* 223 * The first 16 bytes of the premapped buffer are reserved 224 * for header. No padding will be used. 225 */ 226 pkt_len = pack_len = boff = TX_PKT_HEADER_SIZE; 227 if (nxge_tx_use_bcopy && (nxgep->niu_type != N2_NIU)) { 228 bcopy_thresh = (nxge_bcopy_thresh - TX_PKT_HEADER_SIZE); 229 } else { 230 bcopy_thresh = (TX_BCOPY_SIZE - TX_PKT_HEADER_SIZE); 231 } 232 while (nmp) { 233 good_packet = B_TRUE; 234 b_rptr = nmp->b_rptr; 235 len = MBLKL(nmp); 236 if (len <= 0) { 237 nmp = nmp->b_cont; 238 continue; 239 } 240 nmblks++; 241 242 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(1): nmblks %d " 243 "len %d pkt_len %d pack_len %d", 244 nmblks, len, pkt_len, pack_len)); 245 /* 246 * Hardware limits the transfer length to 4K for NIU and 247 * 4076 (TX_MAX_TRANSFER_LENGTH) for Neptune. But we just 248 * use TX_MAX_TRANSFER_LENGTH as the limit for both. 249 * If len is longer than the limit, then we break nmp into 250 * two chunks: Make the first chunk equal to the limit and 251 * the second chunk for the remaining data. If the second 252 * chunk is still larger than the limit, then it will be 253 * broken into two in the next pass. 254 */ 255 if (len > TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE) { 256 t_mp = dupb(nmp); 257 nmp->b_wptr = nmp->b_rptr + 258 (TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE); 259 t_mp->b_rptr = nmp->b_wptr; 260 t_mp->b_cont = nmp->b_cont; 261 nmp->b_cont = t_mp; 262 len = MBLKL(nmp); 263 } 264 265 tx_desc.value = 0; 266 tx_desc_p = &tx_desc_ring_vp[i]; 267 #ifdef NXGE_DEBUG 268 tx_desc_pp = &tx_desc_ring_pp[i]; 269 #endif 270 tx_msg_p = &tx_msg_ring[i]; 271 npi_desc_handle.regp = (uint64_t)tx_desc_p; 272 if (!header_set && 273 ((!nxge_tx_use_bcopy && (len > TX_BCOPY_SIZE)) || 274 (len >= bcopy_thresh))) { 275 header_set = B_TRUE; 276 bcopy_thresh += TX_PKT_HEADER_SIZE; 277 boff = 0; 278 pack_len = 0; 279 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 280 hdrp = (p_tx_pkt_header_t)kaddr; 281 clen = pkt_len; 282 dma_handle = tx_msg_p->buf_dma_handle; 283 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 284 (void) ddi_dma_sync(dma_handle, 285 i * nxge_bcopy_thresh, nxge_bcopy_thresh, 286 DDI_DMA_SYNC_FORDEV); 287 288 tx_msg_p->flags.dma_type = USE_BCOPY; 289 goto nxge_start_control_header_only; 290 } 291 292 pkt_len += len; 293 pack_len += len; 294 295 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(3): " 296 "desc entry %d " 297 "DESC IOADDR $%p " 298 "desc_vp $%p tx_desc_p $%p " 299 "desc_pp $%p tx_desc_pp $%p " 300 "len %d pkt_len %d pack_len %d", 301 i, 302 DMA_COMMON_IOADDR(desc_area), 303 tx_desc_ring_vp, tx_desc_p, 304 tx_desc_ring_pp, tx_desc_pp, 305 len, pkt_len, pack_len)); 306 307 if (len < bcopy_thresh) { 308 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(4): " 309 "USE BCOPY: ")); 310 if (nxge_tx_tiny_pack) { 311 uint32_t blst = 312 TXDMA_DESC_NEXT_INDEX(i, -1, 313 tx_ring_p->tx_wrap_mask); 314 NXGE_DEBUG_MSG((nxgep, TX_CTL, 315 "==> nxge_start(5): pack")); 316 if ((pack_len <= bcopy_thresh) && 317 (last_bidx == blst)) { 318 NXGE_DEBUG_MSG((nxgep, TX_CTL, 319 "==> nxge_start: pack(6) " 320 "(pkt_len %d pack_len %d)", 321 pkt_len, pack_len)); 322 i = blst; 323 tx_desc_p = &tx_desc_ring_vp[i]; 324 #ifdef NXGE_DEBUG 325 tx_desc_pp = &tx_desc_ring_pp[i]; 326 #endif 327 tx_msg_p = &tx_msg_ring[i]; 328 boff = pack_len - len; 329 ngathers--; 330 } else if (pack_len > bcopy_thresh && 331 header_set) { 332 pack_len = len; 333 boff = 0; 334 bcopy_thresh = nxge_bcopy_thresh; 335 NXGE_DEBUG_MSG((nxgep, TX_CTL, 336 "==> nxge_start(7): > max NEW " 337 "bcopy thresh %d " 338 "pkt_len %d pack_len %d(next)", 339 bcopy_thresh, 340 pkt_len, pack_len)); 341 } 342 last_bidx = i; 343 } 344 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 345 if ((boff == TX_PKT_HEADER_SIZE) && (nmblks == 1)) { 346 hdrp = (p_tx_pkt_header_t)kaddr; 347 header_set = B_TRUE; 348 NXGE_DEBUG_MSG((nxgep, TX_CTL, 349 "==> nxge_start(7_x2): " 350 "pkt_len %d pack_len %d (new hdrp $%p)", 351 pkt_len, pack_len, hdrp)); 352 } 353 tx_msg_p->flags.dma_type = USE_BCOPY; 354 kaddr += boff; 355 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(8): " 356 "USE BCOPY: before bcopy " 357 "DESC IOADDR $%p entry %d " 358 "bcopy packets %d " 359 "bcopy kaddr $%p " 360 "bcopy ioaddr (SAD) $%p " 361 "bcopy clen %d " 362 "bcopy boff %d", 363 DMA_COMMON_IOADDR(desc_area), i, 364 tdc_stats->tx_hdr_pkts, 365 kaddr, 366 dma_ioaddr, 367 clen, 368 boff)); 369 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 370 "1USE BCOPY: ")); 371 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 372 "2USE BCOPY: ")); 373 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 374 "last USE BCOPY: copy from b_rptr $%p " 375 "to KADDR $%p (len %d offset %d", 376 b_rptr, kaddr, len, boff)); 377 378 bcopy(b_rptr, kaddr, len); 379 380 #ifdef NXGE_DEBUG 381 dump_len = (len > 128) ? 128: len; 382 NXGE_DEBUG_MSG((nxgep, TX_CTL, 383 "==> nxge_start: dump packets " 384 "(After BCOPY len %d)" 385 "(b_rptr $%p): %s", len, nmp->b_rptr, 386 nxge_dump_packet((char *)nmp->b_rptr, 387 dump_len))); 388 #endif 389 390 dma_handle = tx_msg_p->buf_dma_handle; 391 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 392 (void) ddi_dma_sync(dma_handle, 393 i * nxge_bcopy_thresh, nxge_bcopy_thresh, 394 DDI_DMA_SYNC_FORDEV); 395 clen = len + boff; 396 tdc_stats->tx_hdr_pkts++; 397 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(9): " 398 "USE BCOPY: " 399 "DESC IOADDR $%p entry %d " 400 "bcopy packets %d " 401 "bcopy kaddr $%p " 402 "bcopy ioaddr (SAD) $%p " 403 "bcopy clen %d " 404 "bcopy boff %d", 405 DMA_COMMON_IOADDR(desc_area), 406 i, 407 tdc_stats->tx_hdr_pkts, 408 kaddr, 409 dma_ioaddr, 410 clen, 411 boff)); 412 } else { 413 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(12): " 414 "USE DVMA: len %d", len)); 415 tx_msg_p->flags.dma_type = USE_DMA; 416 dma_flags = DDI_DMA_WRITE; 417 if (len < nxge_dma_stream_thresh) { 418 dma_flags |= DDI_DMA_CONSISTENT; 419 } else { 420 dma_flags |= DDI_DMA_STREAMING; 421 } 422 423 dma_handle = tx_msg_p->dma_handle; 424 status = ddi_dma_addr_bind_handle(dma_handle, NULL, 425 (caddr_t)b_rptr, len, dma_flags, 426 DDI_DMA_DONTWAIT, NULL, 427 &dma_cookie, &ncookies); 428 if (status == DDI_DMA_MAPPED) { 429 dma_ioaddr = dma_cookie.dmac_laddress; 430 len = (int)dma_cookie.dmac_size; 431 clen = (uint32_t)dma_cookie.dmac_size; 432 NXGE_DEBUG_MSG((nxgep, TX_CTL, 433 "==> nxge_start(12_1): " 434 "USE DVMA: len %d clen %d " 435 "ngathers %d", 436 len, clen, 437 ngathers)); 438 439 npi_desc_handle.regp = (uint64_t)tx_desc_p; 440 while (ncookies > 1) { 441 ngathers++; 442 /* 443 * this is the fix for multiple 444 * cookies, which are basicaly 445 * a descriptor entry, we don't set 446 * SOP bit as well as related fields 447 */ 448 449 (void) npi_txdma_desc_gather_set( 450 npi_desc_handle, 451 &tx_desc, 452 (ngathers -1), 453 mark_mode, 454 ngathers, 455 dma_ioaddr, 456 clen); 457 458 tx_msg_p->tx_msg_size = clen; 459 NXGE_DEBUG_MSG((nxgep, TX_CTL, 460 "==> nxge_start: DMA " 461 "ncookie %d " 462 "ngathers %d " 463 "dma_ioaddr $%p len %d" 464 "desc $%p descp $%p (%d)", 465 ncookies, 466 ngathers, 467 dma_ioaddr, clen, 468 *tx_desc_p, tx_desc_p, i)); 469 470 ddi_dma_nextcookie(dma_handle, 471 &dma_cookie); 472 dma_ioaddr = 473 dma_cookie.dmac_laddress; 474 475 len = (int)dma_cookie.dmac_size; 476 clen = (uint32_t)dma_cookie.dmac_size; 477 NXGE_DEBUG_MSG((nxgep, TX_CTL, 478 "==> nxge_start(12_2): " 479 "USE DVMA: len %d clen %d ", 480 len, clen)); 481 482 i = TXDMA_DESC_NEXT_INDEX(i, 1, 483 tx_ring_p->tx_wrap_mask); 484 tx_desc_p = &tx_desc_ring_vp[i]; 485 486 npi_desc_handle.regp = 487 (uint64_t)tx_desc_p; 488 tx_msg_p = &tx_msg_ring[i]; 489 tx_msg_p->flags.dma_type = USE_NONE; 490 tx_desc.value = 0; 491 492 ncookies--; 493 } 494 tdc_stats->tx_ddi_pkts++; 495 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start:" 496 "DMA: ddi packets %d", 497 tdc_stats->tx_ddi_pkts)); 498 } else { 499 NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, 500 "dma mapping failed for %d " 501 "bytes addr $%p flags %x (%d)", 502 len, b_rptr, status, status)); 503 good_packet = B_FALSE; 504 tdc_stats->tx_dma_bind_fail++; 505 tx_msg_p->flags.dma_type = USE_NONE; 506 goto nxge_start_fail2; 507 } 508 } /* ddi dvma */ 509 510 nmp = nmp->b_cont; 511 nxge_start_control_header_only: 512 npi_desc_handle.regp = (uint64_t)tx_desc_p; 513 ngathers++; 514 515 if (ngathers == 1) { 516 #ifdef NXGE_DEBUG 517 save_desc_p = &sop_tx_desc; 518 #endif 519 sop_tx_desc_p = &sop_tx_desc; 520 sop_tx_desc_p->value = 0; 521 sop_tx_desc_p->bits.hdw.tr_len = clen; 522 sop_tx_desc_p->bits.hdw.sad = dma_ioaddr >> 32; 523 sop_tx_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff; 524 } else { 525 #ifdef NXGE_DEBUG 526 save_desc_p = &tx_desc; 527 #endif 528 tmp_desc_p = &tx_desc; 529 tmp_desc_p->value = 0; 530 tmp_desc_p->bits.hdw.tr_len = clen; 531 tmp_desc_p->bits.hdw.sad = dma_ioaddr >> 32; 532 tmp_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff; 533 534 tx_desc_p->value = tmp_desc_p->value; 535 } 536 537 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(13): " 538 "Desc_entry %d ngathers %d " 539 "desc_vp $%p tx_desc_p $%p " 540 "len %d clen %d pkt_len %d pack_len %d nmblks %d " 541 "dma_ioaddr (SAD) $%p mark %d", 542 i, ngathers, 543 tx_desc_ring_vp, tx_desc_p, 544 len, clen, pkt_len, pack_len, nmblks, 545 dma_ioaddr, mark_mode)); 546 547 #ifdef NXGE_DEBUG 548 npi_desc_handle.nxgep = nxgep; 549 npi_desc_handle.function.function = nxgep->function_num; 550 npi_desc_handle.function.instance = nxgep->instance; 551 sad = (save_desc_p->value & TX_PKT_DESC_SAD_MASK); 552 xfer_len = ((save_desc_p->value & TX_PKT_DESC_TR_LEN_MASK) >> 553 TX_PKT_DESC_TR_LEN_SHIFT); 554 555 556 NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n" 557 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\t" 558 "mark %d sop %d\n", 559 save_desc_p->value, 560 sad, 561 save_desc_p->bits.hdw.tr_len, 562 xfer_len, 563 save_desc_p->bits.hdw.num_ptr, 564 save_desc_p->bits.hdw.mark, 565 save_desc_p->bits.hdw.sop)); 566 567 npi_txdma_dump_desc_one(npi_desc_handle, NULL, i); 568 #endif 569 570 tx_msg_p->tx_msg_size = clen; 571 i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); 572 if (ngathers > nxge_tx_max_gathers) { 573 good_packet = B_FALSE; 574 hcksum_retrieve(mp, NULL, NULL, &start_offset, 575 &stuff_offset, &end_offset, &value, 576 &cksum_flags); 577 578 NXGE_DEBUG_MSG((NULL, TX_CTL, 579 "==> nxge_start(14): pull msg - " 580 "len %d pkt_len %d ngathers %d", 581 len, pkt_len, ngathers)); 582 /* Pull all message blocks from b_cont */ 583 if ((msgpullup(mp, -1)) == NULL) { 584 goto nxge_start_fail2; 585 } 586 goto nxge_start_fail2; 587 } 588 } /* while (nmp) */ 589 590 tx_msg_p->tx_message = mp; 591 tx_desc_p = &tx_desc_ring_vp[sop_index]; 592 npi_desc_handle.regp = (uint64_t)tx_desc_p; 593 594 pkthdrp = (p_tx_pkt_hdr_all_t)hdrp; 595 pkthdrp->reserved = 0; 596 hdrp->value = 0; 597 (void) nxge_fill_tx_hdr(mp, B_FALSE, cksum_on, 598 (pkt_len - TX_PKT_HEADER_SIZE), npads, pkthdrp); 599 600 if (pkt_len > NXGE_MTU_DEFAULT_MAX) { 601 tdc_stats->tx_jumbo_pkts++; 602 } 603 604 min_len = (nxgep->msg_min + TX_PKT_HEADER_SIZE + (npads * 2)); 605 if (pkt_len < min_len) { 606 /* Assume we use bcopy to premapped buffers */ 607 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 608 NXGE_DEBUG_MSG((NULL, TX_CTL, 609 "==> nxge_start(14-1): < (msg_min + 16)" 610 "len %d pkt_len %d min_len %d bzero %d ngathers %d", 611 len, pkt_len, min_len, (min_len - pkt_len), ngathers)); 612 bzero((kaddr + pkt_len), (min_len - pkt_len)); 613 pkt_len = tx_msg_p->tx_msg_size = min_len; 614 615 sop_tx_desc_p->bits.hdw.tr_len = min_len; 616 617 NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value); 618 tx_desc_p->value = sop_tx_desc_p->value; 619 620 NXGE_DEBUG_MSG((NULL, TX_CTL, 621 "==> nxge_start(14-2): < msg_min - " 622 "len %d pkt_len %d min_len %d ngathers %d", 623 len, pkt_len, min_len, ngathers)); 624 } 625 626 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: cksum_flags 0x%x ", 627 cksum_flags)); 628 if (cksum_flags & HCK_PARTIALCKSUM) { 629 NXGE_DEBUG_MSG((nxgep, TX_CTL, 630 "==> nxge_start: cksum_flags 0x%x (partial checksum) ", 631 cksum_flags)); 632 cksum_on = B_TRUE; 633 NXGE_DEBUG_MSG((nxgep, TX_CTL, 634 "==> nxge_start: from IP cksum_flags 0x%x " 635 "(partial checksum) " 636 "start_offset %d stuff_offset %d", 637 cksum_flags, start_offset, stuff_offset)); 638 tmp_len = (uint64_t)(start_offset >> 1); 639 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4START_SHIFT); 640 tmp_len = (uint64_t)(stuff_offset >> 1); 641 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4STUFF_SHIFT); 642 643 NXGE_DEBUG_MSG((nxgep, TX_CTL, 644 "==> nxge_start: from IP cksum_flags 0x%x " 645 "(partial checksum) " 646 "after SHIFT start_offset %d stuff_offset %d", 647 cksum_flags, start_offset, stuff_offset)); 648 } 649 { 650 uint64_t tmp_len; 651 652 /* pkt_len already includes 16 + paddings!! */ 653 /* Update the control header length */ 654 tot_xfer_len = (pkt_len - TX_PKT_HEADER_SIZE); 655 tmp_len = hdrp->value | 656 (tot_xfer_len << TX_PKT_HEADER_TOT_XFER_LEN_SHIFT); 657 658 NXGE_DEBUG_MSG((nxgep, TX_CTL, 659 "==> nxge_start(15_x1): setting SOP " 660 "tot_xfer_len 0x%llx (%d) pkt_len %d tmp_len " 661 "0x%llx hdrp->value 0x%llx", 662 tot_xfer_len, tot_xfer_len, pkt_len, 663 tmp_len, hdrp->value)); 664 #if defined(_BIG_ENDIAN) 665 hdrp->value = ddi_swap64(tmp_len); 666 #else 667 hdrp->value = tmp_len; 668 #endif 669 NXGE_DEBUG_MSG((nxgep, 670 TX_CTL, "==> nxge_start(15_x2): setting SOP " 671 "after SWAP: tot_xfer_len 0x%llx pkt_len %d " 672 "tmp_len 0x%llx hdrp->value 0x%llx", 673 tot_xfer_len, pkt_len, 674 tmp_len, hdrp->value)); 675 } 676 677 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(15): setting SOP " 678 "wr_index %d " 679 "tot_xfer_len (%d) pkt_len %d npads %d", 680 sop_index, 681 tot_xfer_len, pkt_len, 682 npads)); 683 684 sop_tx_desc_p->bits.hdw.sop = 1; 685 sop_tx_desc_p->bits.hdw.mark = mark_mode; 686 sop_tx_desc_p->bits.hdw.num_ptr = ngathers; 687 688 NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value); 689 690 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(16): set SOP done")); 691 692 #ifdef NXGE_DEBUG 693 npi_desc_handle.nxgep = nxgep; 694 npi_desc_handle.function.function = nxgep->function_num; 695 npi_desc_handle.function.instance = nxgep->instance; 696 697 NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n" 698 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\tmark %d sop %d\n", 699 save_desc_p->value, 700 sad, 701 save_desc_p->bits.hdw.tr_len, 702 xfer_len, 703 save_desc_p->bits.hdw.num_ptr, 704 save_desc_p->bits.hdw.mark, 705 save_desc_p->bits.hdw.sop)); 706 (void) npi_txdma_dump_desc_one(npi_desc_handle, NULL, sop_index); 707 708 dump_len = (pkt_len > 128) ? 128: pkt_len; 709 NXGE_DEBUG_MSG((nxgep, TX_CTL, 710 "==> nxge_start: dump packets(17) (after sop set, len " 711 " (len/dump_len/pkt_len/tot_xfer_len) %d/%d/%d/%d):\n" 712 "ptr $%p: %s", len, dump_len, pkt_len, tot_xfer_len, 713 (char *)hdrp, 714 nxge_dump_packet((char *)hdrp, dump_len))); 715 NXGE_DEBUG_MSG((nxgep, TX_CTL, 716 "==> nxge_start(18): TX desc sync: sop_index %d", 717 sop_index)); 718 #endif 719 720 if ((ngathers == 1) || tx_ring_p->wr_index < i) { 721 (void) ddi_dma_sync(tx_desc_dma_handle, 722 sop_index * sizeof (tx_desc_t), 723 ngathers * sizeof (tx_desc_t), 724 DDI_DMA_SYNC_FORDEV); 725 726 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(19): sync 1 " 727 "cs_off = 0x%02X cs_s_off = 0x%02X " 728 "pkt_len %d ngathers %d sop_index %d\n", 729 stuff_offset, start_offset, 730 pkt_len, ngathers, sop_index)); 731 } else { /* more than one descriptor and wrap around */ 732 uint32_t nsdescs = tx_ring_p->tx_ring_size - sop_index; 733 (void) ddi_dma_sync(tx_desc_dma_handle, 734 sop_index * sizeof (tx_desc_t), 735 nsdescs * sizeof (tx_desc_t), 736 DDI_DMA_SYNC_FORDEV); 737 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(20): sync 1 " 738 "cs_off = 0x%02X cs_s_off = 0x%02X " 739 "pkt_len %d ngathers %d sop_index %d\n", 740 stuff_offset, start_offset, 741 pkt_len, ngathers, sop_index)); 742 743 (void) ddi_dma_sync(tx_desc_dma_handle, 744 0, 745 (ngathers - nsdescs) * sizeof (tx_desc_t), 746 DDI_DMA_SYNC_FORDEV); 747 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(21): sync 2 " 748 "cs_off = 0x%02X cs_s_off = 0x%02X " 749 "pkt_len %d ngathers %d sop_index %d\n", 750 stuff_offset, start_offset, 751 pkt_len, ngathers, sop_index)); 752 } 753 754 tail_index = tx_ring_p->wr_index; 755 tail_wrap = tx_ring_p->wr_index_wrap; 756 757 tx_ring_p->wr_index = i; 758 if (tx_ring_p->wr_index <= tail_index) { 759 tx_ring_p->wr_index_wrap = ((tail_wrap == B_TRUE) ? 760 B_FALSE : B_TRUE); 761 } 762 763 tx_ring_p->descs_pending += ngathers; 764 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX kick: " 765 "channel %d wr_index %d wrap %d ngathers %d desc_pend %d", 766 tx_ring_p->tdc, 767 tx_ring_p->wr_index, 768 tx_ring_p->wr_index_wrap, 769 ngathers, 770 tx_ring_p->descs_pending)); 771 772 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX KICKING: ")); 773 774 { 775 tx_ring_kick_t kick; 776 777 kick.value = 0; 778 kick.bits.ldw.wrap = tx_ring_p->wr_index_wrap; 779 kick.bits.ldw.tail = (uint16_t)tx_ring_p->wr_index; 780 781 /* Kick start the Transmit kick register */ 782 TXDMA_REG_WRITE64(NXGE_DEV_NPI_HANDLE(nxgep), 783 TX_RING_KICK_REG, 784 (uint8_t)tx_ring_p->tdc, 785 kick.value); 786 } 787 788 tdc_stats->tx_starts++; 789 790 MUTEX_EXIT(&tx_ring_p->lock); 791 792 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start")); 793 794 return (status); 795 796 nxge_start_fail2: 797 if (good_packet == B_FALSE) { 798 cur_index = sop_index; 799 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: clean up")); 800 for (i = 0; i < ngathers; i++) { 801 tx_desc_p = &tx_desc_ring_vp[cur_index]; 802 npi_handle.regp = (uint64_t)tx_desc_p; 803 tx_msg_p = &tx_msg_ring[cur_index]; 804 (void) npi_txdma_desc_set_zero(npi_handle, 1); 805 if (tx_msg_p->flags.dma_type == USE_DVMA) { 806 NXGE_DEBUG_MSG((nxgep, TX_CTL, 807 "tx_desc_p = %X index = %d", 808 tx_desc_p, tx_ring_p->rd_index)); 809 (void) dvma_unload( 810 tx_msg_p->dvma_handle, 811 0, -1); 812 tx_msg_p->dvma_handle = NULL; 813 if (tx_ring_p->dvma_wr_index == 814 tx_ring_p->dvma_wrap_mask) 815 tx_ring_p->dvma_wr_index = 0; 816 else 817 tx_ring_p->dvma_wr_index++; 818 tx_ring_p->dvma_pending--; 819 } else if (tx_msg_p->flags.dma_type == 820 USE_DMA) { 821 if (ddi_dma_unbind_handle( 822 tx_msg_p->dma_handle)) 823 cmn_err(CE_WARN, "!nxge_start: " 824 "ddi_dma_unbind_handle failed"); 825 } 826 tx_msg_p->flags.dma_type = USE_NONE; 827 cur_index = TXDMA_DESC_NEXT_INDEX(cur_index, 1, 828 tx_ring_p->tx_wrap_mask); 829 830 } 831 832 nxgep->resched_needed = B_TRUE; 833 } 834 835 MUTEX_EXIT(&tx_ring_p->lock); 836 837 nxge_start_fail1: 838 /* Add FMA to check the access handle nxge_hregh */ 839 840 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start")); 841 842 return (status); 843 } 844 845 int 846 nxge_serial_tx(mblk_t *mp, void *arg) 847 { 848 p_tx_ring_t tx_ring_p = (p_tx_ring_t)arg; 849 p_nxge_t nxgep = tx_ring_p->nxgep; 850 851 return (nxge_start(nxgep, tx_ring_p, mp)); 852 } 853 854 boolean_t 855 nxge_send(p_nxge_t nxgep, mblk_t *mp, p_mac_tx_hint_t hp) 856 { 857 p_tx_ring_t *tx_rings; 858 uint8_t ring_index; 859 p_tx_ring_t tx_ring_p; 860 861 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_send")); 862 863 ASSERT(mp->b_next == NULL); 864 865 ring_index = nxge_tx_lb_ring_1(mp, nxgep->max_tdcs, hp); 866 tx_rings = nxgep->tx_rings->rings; 867 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_tx_msg: tx_rings $%p", 868 tx_rings)); 869 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_tx_msg: max_tdcs %d " 870 "ring_index %d", nxgep->max_tdcs, ring_index)); 871 872 switch (nxge_tx_scheme) { 873 case NXGE_USE_START: 874 if (nxge_start(nxgep, tx_rings[ring_index], mp)) { 875 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_send: failed " 876 "ring index %d", ring_index)); 877 return (B_FALSE); 878 } 879 break; 880 881 case NXGE_USE_SERIAL: 882 default: 883 tx_ring_p = tx_rings[ring_index]; 884 nxge_serialize_enter(tx_ring_p->serial, mp); 885 break; 886 } 887 888 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_send: ring index %d", 889 ring_index)); 890 891 return (B_TRUE); 892 } 893 894 /* 895 * nxge_m_tx() - send a chain of packets 896 */ 897 mblk_t * 898 nxge_m_tx(void *arg, mblk_t *mp) 899 { 900 p_nxge_t nxgep = (p_nxge_t)arg; 901 mblk_t *next; 902 mac_tx_hint_t hint; 903 904 if (!(nxgep->drv_state & STATE_HW_INITIALIZED)) { 905 NXGE_DEBUG_MSG((nxgep, DDI_CTL, 906 "==> nxge_m_tx: hardware not initialized")); 907 NXGE_DEBUG_MSG((nxgep, DDI_CTL, 908 "<== nxge_m_tx")); 909 return (mp); 910 } 911 912 hint.hash = NULL; 913 hint.vid = 0; 914 hint.sap = 0; 915 916 while (mp != NULL) { 917 next = mp->b_next; 918 mp->b_next = NULL; 919 920 /* 921 * Until Nemo tx resource works, the mac driver 922 * does the load balancing based on TCP port, 923 * or CPU. For debugging, we use a system 924 * configurable parameter. 925 */ 926 if (!nxge_send(nxgep, mp, &hint)) { 927 mp->b_next = next; 928 break; 929 } 930 931 mp = next; 932 } 933 934 return (mp); 935 } 936 937 int 938 nxge_tx_lb_ring_1(p_mblk_t mp, uint32_t maxtdcs, p_mac_tx_hint_t hp) 939 { 940 uint8_t ring_index = 0; 941 uint8_t *tcp_port; 942 p_mblk_t nmp; 943 size_t mblk_len; 944 size_t iph_len; 945 size_t hdrs_size; 946 uint8_t hdrs_buf[sizeof (struct ether_header) + 947 IP_MAX_HDR_LENGTH + sizeof (uint32_t)]; 948 /* 949 * allocate space big enough to cover 950 * the max ip header length and the first 951 * 4 bytes of the TCP/IP header. 952 */ 953 954 boolean_t qos = B_FALSE; 955 956 NXGE_DEBUG_MSG((NULL, TX_CTL, "==> nxge_tx_lb_ring")); 957 958 if (hp->vid) { 959 qos = B_TRUE; 960 } 961 switch (nxge_tx_lb_policy) { 962 case NXGE_TX_LB_TCPUDP: /* default IPv4 TCP/UDP */ 963 default: 964 tcp_port = mp->b_rptr; 965 if (!nxge_no_tx_lb && !qos && 966 (ntohs(((p_ether_header_t)tcp_port)->ether_type) 967 == ETHERTYPE_IP)) { 968 nmp = mp; 969 mblk_len = MBLKL(nmp); 970 tcp_port = NULL; 971 if (mblk_len > sizeof (struct ether_header) + 972 sizeof (uint8_t)) { 973 tcp_port = nmp->b_rptr + 974 sizeof (struct ether_header); 975 mblk_len -= sizeof (struct ether_header); 976 iph_len = ((*tcp_port) & 0x0f) << 2; 977 if (mblk_len > (iph_len + sizeof (uint32_t))) { 978 tcp_port = nmp->b_rptr; 979 } else { 980 tcp_port = NULL; 981 } 982 } 983 if (tcp_port == NULL) { 984 hdrs_size = 0; 985 ((p_ether_header_t)hdrs_buf)->ether_type = 0; 986 while ((nmp) && (hdrs_size < 987 sizeof (hdrs_buf))) { 988 mblk_len = MBLKL(nmp); 989 if (mblk_len >= 990 (sizeof (hdrs_buf) - hdrs_size)) 991 mblk_len = sizeof (hdrs_buf) - 992 hdrs_size; 993 bcopy(nmp->b_rptr, 994 &hdrs_buf[hdrs_size], mblk_len); 995 hdrs_size += mblk_len; 996 nmp = nmp->b_cont; 997 } 998 tcp_port = hdrs_buf; 999 } 1000 tcp_port += sizeof (ether_header_t); 1001 if (!(tcp_port[6] & 0x3f) && !(tcp_port[7] & 0xff)) { 1002 switch (tcp_port[9]) { 1003 case IPPROTO_TCP: 1004 case IPPROTO_UDP: 1005 case IPPROTO_ESP: 1006 tcp_port += ((*tcp_port) & 0x0f) << 2; 1007 ring_index = 1008 ((tcp_port[0] ^ 1009 tcp_port[1] ^ 1010 tcp_port[2] ^ 1011 tcp_port[3]) % maxtdcs); 1012 break; 1013 1014 case IPPROTO_AH: 1015 /* SPI starts at the 4th byte */ 1016 tcp_port += ((*tcp_port) & 0x0f) << 2; 1017 ring_index = 1018 ((tcp_port[4] ^ 1019 tcp_port[5] ^ 1020 tcp_port[6] ^ 1021 tcp_port[7]) % maxtdcs); 1022 break; 1023 1024 default: 1025 ring_index = tcp_port[19] % maxtdcs; 1026 break; 1027 } 1028 } else { /* fragmented packet */ 1029 ring_index = tcp_port[19] % maxtdcs; 1030 } 1031 } else { 1032 ring_index = mp->b_band % maxtdcs; 1033 } 1034 break; 1035 1036 case NXGE_TX_LB_HASH: 1037 if (hp->hash) { 1038 ring_index = ((uint64_t)(hp->hash) % maxtdcs); 1039 } else { 1040 ring_index = mp->b_band % maxtdcs; 1041 } 1042 break; 1043 1044 case NXGE_TX_LB_DEST_MAC: /* Use destination MAC address */ 1045 tcp_port = mp->b_rptr; 1046 ring_index = tcp_port[5] % maxtdcs; 1047 break; 1048 } 1049 1050 NXGE_DEBUG_MSG((NULL, TX_CTL, "<== nxge_tx_lb_ring")); 1051 1052 return (ring_index); 1053 } 1054 1055 uint_t 1056 nxge_reschedule(caddr_t arg) 1057 { 1058 p_nxge_t nxgep; 1059 1060 nxgep = (p_nxge_t)arg; 1061 1062 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_reschedule")); 1063 1064 if (nxgep->nxge_mac_state == NXGE_MAC_STARTED && 1065 nxgep->resched_needed) { 1066 mac_tx_update(nxgep->mach); 1067 nxgep->resched_needed = B_FALSE; 1068 nxgep->resched_running = B_FALSE; 1069 } 1070 1071 NXGE_DEBUG_MSG((NULL, TX_CTL, "<== nxge_reschedule")); 1072 return (DDI_INTR_CLAIMED); 1073 } 1074