1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <hxge_impl.h> 29 30 extern uint32_t hxge_reclaim_pending; 31 extern uint32_t hxge_bcopy_thresh; 32 extern uint32_t hxge_dvma_thresh; 33 extern uint32_t hxge_dma_stream_thresh; 34 extern uint32_t hxge_tx_minfree; 35 extern uint32_t hxge_tx_intr_thres; 36 extern uint32_t hxge_tx_max_gathers; 37 extern uint32_t hxge_tx_tiny_pack; 38 extern uint32_t hxge_tx_use_bcopy; 39 extern uint32_t hxge_tx_lb_policy; 40 extern uint32_t hxge_no_tx_lb; 41 42 typedef struct _mac_tx_hint { 43 uint16_t sap; 44 uint16_t vid; 45 void *hash; 46 } mac_tx_hint_t, *p_mac_tx_hint_t; 47 48 int hxge_tx_lb_ring(p_mblk_t, uint32_t, p_mac_tx_hint_t); 49 50 int 51 hxge_start(p_hxge_t hxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) 52 { 53 int status = 0; 54 p_tx_desc_t tx_desc_ring_vp; 55 hpi_handle_t hpi_desc_handle; 56 hxge_os_dma_handle_t tx_desc_dma_handle; 57 p_tx_desc_t tx_desc_p; 58 p_tx_msg_t tx_msg_ring; 59 p_tx_msg_t tx_msg_p; 60 tx_desc_t tx_desc, *tmp_desc_p; 61 tx_desc_t sop_tx_desc, *sop_tx_desc_p; 62 p_tx_pkt_header_t hdrp; 63 p_tx_pkt_hdr_all_t pkthdrp; 64 uint8_t npads = 0; 65 uint64_t dma_ioaddr; 66 uint32_t dma_flags; 67 int last_bidx; 68 uint8_t *b_rptr; 69 caddr_t kaddr; 70 uint32_t nmblks; 71 uint32_t ngathers; 72 uint32_t clen; 73 int len; 74 uint32_t pkt_len, pack_len, min_len; 75 uint32_t bcopy_thresh; 76 int i, cur_index, sop_index; 77 uint16_t tail_index; 78 boolean_t tail_wrap = B_FALSE; 79 hxge_dma_common_t desc_area; 80 hxge_os_dma_handle_t dma_handle; 81 ddi_dma_cookie_t dma_cookie; 82 hpi_handle_t hpi_handle; 83 p_mblk_t nmp; 84 p_mblk_t t_mp; 85 uint32_t ncookies; 86 boolean_t good_packet; 87 boolean_t mark_mode = B_FALSE; 88 p_hxge_stats_t statsp; 89 p_hxge_tx_ring_stats_t tdc_stats; 90 t_uscalar_t start_offset = 0; 91 t_uscalar_t stuff_offset = 0; 92 t_uscalar_t end_offset = 0; 93 t_uscalar_t value = 0; 94 t_uscalar_t cksum_flags = 0; 95 boolean_t cksum_on = B_FALSE; 96 uint32_t boff = 0; 97 uint64_t tot_xfer_len = 0, tmp_len = 0; 98 boolean_t header_set = B_FALSE; 99 tdc_tdr_kick_t kick; 100 #ifdef HXGE_DEBUG 101 p_tx_desc_t tx_desc_ring_pp; 102 p_tx_desc_t tx_desc_pp; 103 tx_desc_t *save_desc_p; 104 int dump_len; 105 int sad_len; 106 uint64_t sad; 107 int xfer_len; 108 uint32_t msgsize; 109 #endif 110 111 HXGE_DEBUG_MSG((hxgep, TX_CTL, 112 "==> hxge_start: tx dma channel %d", tx_ring_p->tdc)); 113 HXGE_DEBUG_MSG((hxgep, TX_CTL, 114 "==> hxge_start: Starting tdc %d desc pending %d", 115 tx_ring_p->tdc, tx_ring_p->descs_pending)); 116 117 statsp = hxgep->statsp; 118 119 if (hxgep->statsp->port_stats.lb_mode == hxge_lb_normal) { 120 if (!statsp->mac_stats.link_up) { 121 freemsg(mp); 122 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: " 123 "link not up or LB mode")); 124 goto hxge_start_fail1; 125 } 126 } 127 128 hcksum_retrieve(mp, NULL, NULL, &start_offset, 129 &stuff_offset, &end_offset, &value, &cksum_flags); 130 if (!HXGE_IS_VLAN_PACKET(mp->b_rptr)) { 131 start_offset += sizeof (ether_header_t); 132 stuff_offset += sizeof (ether_header_t); 133 } else { 134 start_offset += sizeof (struct ether_vlan_header); 135 stuff_offset += sizeof (struct ether_vlan_header); 136 } 137 138 if (cksum_flags & HCK_PARTIALCKSUM) { 139 HXGE_DEBUG_MSG((hxgep, TX_CTL, 140 "==> hxge_start: mp $%p len %d " 141 "cksum_flags 0x%x (partial checksum) ", 142 mp, MBLKL(mp), cksum_flags)); 143 cksum_on = B_TRUE; 144 } 145 146 MUTEX_ENTER(&tx_ring_p->lock); 147 start_again: 148 ngathers = 0; 149 sop_index = tx_ring_p->wr_index; 150 #ifdef HXGE_DEBUG 151 if (tx_ring_p->descs_pending) { 152 HXGE_DEBUG_MSG((hxgep, TX_CTL, 153 "==> hxge_start: desc pending %d ", 154 tx_ring_p->descs_pending)); 155 } 156 157 dump_len = (int)(MBLKL(mp)); 158 dump_len = (dump_len > 128) ? 128: dump_len; 159 160 HXGE_DEBUG_MSG((hxgep, TX_CTL, 161 "==> hxge_start: tdc %d: dumping ...: b_rptr $%p " 162 "(Before header reserve: ORIGINAL LEN %d)", 163 tx_ring_p->tdc, mp->b_rptr, dump_len)); 164 165 HXGE_DEBUG_MSG((hxgep, TX_CTL, 166 "==> hxge_start: dump packets (IP ORIGINAL b_rptr $%p): %s", 167 mp->b_rptr, hxge_dump_packet((char *)mp->b_rptr, dump_len))); 168 #endif 169 170 tdc_stats = tx_ring_p->tdc_stats; 171 mark_mode = (tx_ring_p->descs_pending && 172 ((tx_ring_p->tx_ring_size - tx_ring_p->descs_pending) < 173 hxge_tx_minfree)); 174 175 HXGE_DEBUG_MSG((hxgep, TX_CTL, 176 "TX Descriptor ring is channel %d mark mode %d", 177 tx_ring_p->tdc, mark_mode)); 178 179 if (!hxge_txdma_reclaim(hxgep, tx_ring_p, hxge_tx_minfree)) { 180 HXGE_DEBUG_MSG((hxgep, TX_CTL, 181 "TX Descriptor ring is full: channel %d", tx_ring_p->tdc)); 182 HXGE_DEBUG_MSG((hxgep, TX_CTL, 183 "TX Descriptor ring is full: channel %d", tx_ring_p->tdc)); 184 cas32((uint32_t *)&tx_ring_p->queueing, 0, 1); 185 tdc_stats->tx_no_desc++; 186 MUTEX_EXIT(&tx_ring_p->lock); 187 if (hxgep->resched_needed && !hxgep->resched_running) { 188 hxgep->resched_running = B_TRUE; 189 ddi_trigger_softintr(hxgep->resched_id); 190 } 191 status = 1; 192 goto hxge_start_fail1; 193 } 194 195 nmp = mp; 196 i = sop_index = tx_ring_p->wr_index; 197 nmblks = 0; 198 ngathers = 0; 199 pkt_len = 0; 200 pack_len = 0; 201 clen = 0; 202 last_bidx = -1; 203 good_packet = B_TRUE; 204 205 desc_area = tx_ring_p->tdc_desc; 206 hpi_handle = desc_area.hpi_handle; 207 hpi_desc_handle.regh = (hxge_os_acc_handle_t) 208 DMA_COMMON_ACC_HANDLE(desc_area); 209 tx_desc_ring_vp = (p_tx_desc_t)DMA_COMMON_VPTR(desc_area); 210 #ifdef HXGE_DEBUG 211 tx_desc_ring_pp = (p_tx_desc_t)DMA_COMMON_IOADDR(desc_area); 212 #endif 213 tx_desc_dma_handle = (hxge_os_dma_handle_t)DMA_COMMON_HANDLE(desc_area); 214 tx_msg_ring = tx_ring_p->tx_msg_ring; 215 216 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: wr_index %d i %d", 217 sop_index, i)); 218 219 #ifdef HXGE_DEBUG 220 msgsize = msgdsize(nmp); 221 HXGE_DEBUG_MSG((hxgep, TX_CTL, 222 "==> hxge_start(1): wr_index %d i %d msgdsize %d", 223 sop_index, i, msgsize)); 224 #endif 225 /* 226 * The first 16 bytes of the premapped buffer are reserved 227 * for header. No padding will be used. 228 */ 229 pkt_len = pack_len = boff = TX_PKT_HEADER_SIZE; 230 if (hxge_tx_use_bcopy) { 231 bcopy_thresh = (hxge_bcopy_thresh - TX_PKT_HEADER_SIZE); 232 } else { 233 bcopy_thresh = (TX_BCOPY_SIZE - TX_PKT_HEADER_SIZE); 234 } 235 while (nmp) { 236 good_packet = B_TRUE; 237 b_rptr = nmp->b_rptr; 238 len = MBLKL(nmp); 239 if (len <= 0) { 240 nmp = nmp->b_cont; 241 continue; 242 } 243 nmblks++; 244 245 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(1): nmblks %d " 246 "len %d pkt_len %d pack_len %d", 247 nmblks, len, pkt_len, pack_len)); 248 /* 249 * Hardware limits the transfer length to 4K. 250 * If len is more than 4K, we need to break 251 * nmp into two chunks: Make first chunk smaller 252 * than 4K. The second chunk will be broken into 253 * less than 4K (if needed) during the next pass. 254 */ 255 if (len > (TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE)) { 256 if ((t_mp = dupb(nmp)) != NULL) { 257 nmp->b_wptr = nmp->b_rptr + 258 (TX_MAX_TRANSFER_LENGTH - 259 TX_PKT_HEADER_SIZE); 260 t_mp->b_rptr = nmp->b_wptr; 261 t_mp->b_cont = nmp->b_cont; 262 nmp->b_cont = t_mp; 263 len = MBLKL(nmp); 264 } else { 265 good_packet = B_FALSE; 266 goto hxge_start_fail2; 267 } 268 } 269 tx_desc.value = 0; 270 tx_desc_p = &tx_desc_ring_vp[i]; 271 #ifdef HXGE_DEBUG 272 tx_desc_pp = &tx_desc_ring_pp[i]; 273 #endif 274 tx_msg_p = &tx_msg_ring[i]; 275 #if defined(__i386) 276 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 277 #else 278 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 279 #endif 280 if (!header_set && 281 ((!hxge_tx_use_bcopy && (len > TX_BCOPY_SIZE)) || 282 (len >= bcopy_thresh))) { 283 header_set = B_TRUE; 284 bcopy_thresh += TX_PKT_HEADER_SIZE; 285 boff = 0; 286 pack_len = 0; 287 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 288 hdrp = (p_tx_pkt_header_t)kaddr; 289 clen = pkt_len; 290 dma_handle = tx_msg_p->buf_dma_handle; 291 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 292 (void) ddi_dma_sync(dma_handle, 293 i * hxge_bcopy_thresh, hxge_bcopy_thresh, 294 DDI_DMA_SYNC_FORDEV); 295 296 tx_msg_p->flags.dma_type = USE_BCOPY; 297 goto hxge_start_control_header_only; 298 } 299 300 pkt_len += len; 301 pack_len += len; 302 303 HXGE_DEBUG_MSG((hxgep, TX_CTL, 304 "==> hxge_start(3): desc entry %d DESC IOADDR $%p " 305 "desc_vp $%p tx_desc_p $%p desc_pp $%p tx_desc_pp $%p " 306 "len %d pkt_len %d pack_len %d", 307 i, 308 DMA_COMMON_IOADDR(desc_area), 309 tx_desc_ring_vp, tx_desc_p, 310 tx_desc_ring_pp, tx_desc_pp, 311 len, pkt_len, pack_len)); 312 313 if (len < bcopy_thresh) { 314 HXGE_DEBUG_MSG((hxgep, TX_CTL, 315 "==> hxge_start(4): USE BCOPY: ")); 316 if (hxge_tx_tiny_pack) { 317 uint32_t blst = TXDMA_DESC_NEXT_INDEX(i, -1, 318 tx_ring_p->tx_wrap_mask); 319 HXGE_DEBUG_MSG((hxgep, TX_CTL, 320 "==> hxge_start(5): pack")); 321 if ((pack_len <= bcopy_thresh) && 322 (last_bidx == blst)) { 323 HXGE_DEBUG_MSG((hxgep, TX_CTL, 324 "==> hxge_start: pack(6) " 325 "(pkt_len %d pack_len %d)", 326 pkt_len, pack_len)); 327 i = blst; 328 tx_desc_p = &tx_desc_ring_vp[i]; 329 #ifdef HXGE_DEBUG 330 tx_desc_pp = &tx_desc_ring_pp[i]; 331 #endif 332 tx_msg_p = &tx_msg_ring[i]; 333 boff = pack_len - len; 334 ngathers--; 335 } else if (pack_len > bcopy_thresh && 336 header_set) { 337 pack_len = len; 338 boff = 0; 339 bcopy_thresh = hxge_bcopy_thresh; 340 HXGE_DEBUG_MSG((hxgep, TX_CTL, 341 "==> hxge_start(7): > max NEW " 342 "bcopy thresh %d " 343 "pkt_len %d pack_len %d(next)", 344 bcopy_thresh, pkt_len, pack_len)); 345 } 346 last_bidx = i; 347 } 348 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 349 if ((boff == TX_PKT_HEADER_SIZE) && (nmblks == 1)) { 350 hdrp = (p_tx_pkt_header_t)kaddr; 351 header_set = B_TRUE; 352 HXGE_DEBUG_MSG((hxgep, TX_CTL, 353 "==> hxge_start(7_x2): " 354 "pkt_len %d pack_len %d (new hdrp $%p)", 355 pkt_len, pack_len, hdrp)); 356 } 357 tx_msg_p->flags.dma_type = USE_BCOPY; 358 kaddr += boff; 359 HXGE_DEBUG_MSG((hxgep, TX_CTL, 360 "==> hxge_start(8): USE BCOPY: before bcopy " 361 "DESC IOADDR $%p entry %d bcopy packets %d " 362 "bcopy kaddr $%p bcopy ioaddr (SAD) $%p " 363 "bcopy clen %d bcopy boff %d", 364 DMA_COMMON_IOADDR(desc_area), i, 365 tdc_stats->tx_hdr_pkts, kaddr, dma_ioaddr, 366 clen, boff)); 367 HXGE_DEBUG_MSG((hxgep, TX_CTL, 368 "==> hxge_start: 1USE BCOPY: ")); 369 HXGE_DEBUG_MSG((hxgep, TX_CTL, 370 "==> hxge_start: 2USE BCOPY: ")); 371 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: " 372 "last USE BCOPY: copy from b_rptr $%p " 373 "to KADDR $%p (len %d offset %d", 374 b_rptr, kaddr, len, boff)); 375 bcopy(b_rptr, kaddr, len); 376 #ifdef HXGE_DEBUG 377 dump_len = (len > 128) ? 128: len; 378 HXGE_DEBUG_MSG((hxgep, TX_CTL, 379 "==> hxge_start: dump packets " 380 "(After BCOPY len %d)" 381 "(b_rptr $%p): %s", len, nmp->b_rptr, 382 hxge_dump_packet((char *)nmp->b_rptr, 383 dump_len))); 384 #endif 385 dma_handle = tx_msg_p->buf_dma_handle; 386 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 387 (void) ddi_dma_sync(dma_handle, 388 i * hxge_bcopy_thresh, hxge_bcopy_thresh, 389 DDI_DMA_SYNC_FORDEV); 390 clen = len + boff; 391 tdc_stats->tx_hdr_pkts++; 392 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(9): " 393 "USE BCOPY: DESC IOADDR $%p entry %d " 394 "bcopy packets %d bcopy kaddr $%p " 395 "bcopy ioaddr (SAD) $%p bcopy clen %d " 396 "bcopy boff %d", 397 DMA_COMMON_IOADDR(desc_area), i, 398 tdc_stats->tx_hdr_pkts, kaddr, dma_ioaddr, 399 clen, boff)); 400 } else { 401 HXGE_DEBUG_MSG((hxgep, TX_CTL, 402 "==> hxge_start(12): USE DVMA: len %d", len)); 403 tx_msg_p->flags.dma_type = USE_DMA; 404 dma_flags = DDI_DMA_WRITE; 405 if (len < hxge_dma_stream_thresh) { 406 dma_flags |= DDI_DMA_CONSISTENT; 407 } else { 408 dma_flags |= DDI_DMA_STREAMING; 409 } 410 411 dma_handle = tx_msg_p->dma_handle; 412 status = ddi_dma_addr_bind_handle(dma_handle, NULL, 413 (caddr_t)b_rptr, len, dma_flags, 414 DDI_DMA_DONTWAIT, NULL, 415 &dma_cookie, &ncookies); 416 if (status == DDI_DMA_MAPPED) { 417 dma_ioaddr = dma_cookie.dmac_laddress; 418 len = (int)dma_cookie.dmac_size; 419 clen = (uint32_t)dma_cookie.dmac_size; 420 HXGE_DEBUG_MSG((hxgep, TX_CTL, 421 "==> hxge_start(12_1): " 422 "USE DVMA: len %d clen %d ngathers %d", 423 len, clen, ngathers)); 424 #if defined(__i386) 425 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 426 #else 427 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 428 #endif 429 while (ncookies > 1) { 430 ngathers++; 431 /* 432 * this is the fix for multiple 433 * cookies, which are basically 434 * a descriptor entry, we don't set 435 * SOP bit as well as related fields 436 */ 437 438 (void) hpi_txdma_desc_gather_set( 439 hpi_desc_handle, &tx_desc, 440 (ngathers -1), mark_mode, 441 ngathers, dma_ioaddr, clen); 442 tx_msg_p->tx_msg_size = clen; 443 HXGE_DEBUG_MSG((hxgep, TX_CTL, 444 "==> hxge_start: DMA " 445 "ncookie %d ngathers %d " 446 "dma_ioaddr $%p len %d" 447 "desc $%p descp $%p (%d)", 448 ncookies, ngathers, 449 dma_ioaddr, clen, 450 *tx_desc_p, tx_desc_p, i)); 451 452 ddi_dma_nextcookie(dma_handle, 453 &dma_cookie); 454 dma_ioaddr = dma_cookie.dmac_laddress; 455 456 len = (int)dma_cookie.dmac_size; 457 clen = (uint32_t)dma_cookie.dmac_size; 458 HXGE_DEBUG_MSG((hxgep, TX_CTL, 459 "==> hxge_start(12_2): " 460 "USE DVMA: len %d clen %d ", 461 len, clen)); 462 463 i = TXDMA_DESC_NEXT_INDEX(i, 1, 464 tx_ring_p->tx_wrap_mask); 465 tx_desc_p = &tx_desc_ring_vp[i]; 466 467 hpi_desc_handle.regp = 468 #if defined(__i386) 469 (uint32_t)tx_desc_p; 470 #else 471 (uint64_t)tx_desc_p; 472 #endif 473 tx_msg_p = &tx_msg_ring[i]; 474 tx_msg_p->flags.dma_type = USE_NONE; 475 tx_desc.value = 0; 476 ncookies--; 477 } 478 tdc_stats->tx_ddi_pkts++; 479 HXGE_DEBUG_MSG((hxgep, TX_CTL, 480 "==> hxge_start: DMA: ddi packets %d", 481 tdc_stats->tx_ddi_pkts)); 482 } else { 483 HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL, 484 "dma mapping failed for %d " 485 "bytes addr $%p flags %x (%d)", 486 len, b_rptr, status, status)); 487 good_packet = B_FALSE; 488 tdc_stats->tx_dma_bind_fail++; 489 tx_msg_p->flags.dma_type = USE_NONE; 490 goto hxge_start_fail2; 491 } 492 } /* ddi dvma */ 493 494 nmp = nmp->b_cont; 495 hxge_start_control_header_only: 496 #if defined(__i386) 497 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 498 #else 499 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 500 #endif 501 ngathers++; 502 503 if (ngathers == 1) { 504 #ifdef HXGE_DEBUG 505 save_desc_p = &sop_tx_desc; 506 #endif 507 sop_tx_desc_p = &sop_tx_desc; 508 sop_tx_desc_p->value = 0; 509 sop_tx_desc_p->bits.tr_len = clen; 510 sop_tx_desc_p->bits.sad = dma_ioaddr; 511 } else { 512 #ifdef HXGE_DEBUG 513 save_desc_p = &tx_desc; 514 #endif 515 tmp_desc_p = &tx_desc; 516 tmp_desc_p->value = 0; 517 tmp_desc_p->bits.tr_len = clen; 518 tmp_desc_p->bits.sad = dma_ioaddr; 519 520 tx_desc_p->value = tmp_desc_p->value; 521 } 522 523 HXGE_DEBUG_MSG((hxgep, TX_CTL, 524 "==> hxge_start(13): Desc_entry %d ngathers %d " 525 "desc_vp $%p tx_desc_p $%p " 526 "len %d clen %d pkt_len %d pack_len %d nmblks %d " 527 "dma_ioaddr (SAD) $%p mark %d", 528 i, ngathers, tx_desc_ring_vp, tx_desc_p, 529 len, clen, pkt_len, pack_len, nmblks, 530 dma_ioaddr, mark_mode)); 531 532 #ifdef HXGE_DEBUG 533 hpi_desc_handle.hxgep = hxgep; 534 hpi_desc_handle.function.function = 0; 535 hpi_desc_handle.function.instance = hxgep->instance; 536 sad = save_desc_p->bits.sad; 537 xfer_len = save_desc_p->bits.tr_len; 538 539 HXGE_DEBUG_MSG((hxgep, TX_CTL, "\n\t: value 0x%llx\n" 540 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\t" 541 "mark %d sop %d\n", 542 save_desc_p->value, sad, save_desc_p->bits.tr_len, 543 xfer_len, save_desc_p->bits.num_ptr, 544 save_desc_p->bits.mark, save_desc_p->bits.sop)); 545 546 hpi_txdma_dump_desc_one(hpi_desc_handle, NULL, i); 547 #endif 548 549 tx_msg_p->tx_msg_size = clen; 550 i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); 551 if (ngathers > hxge_tx_max_gathers) { 552 good_packet = B_FALSE; 553 hcksum_retrieve(mp, NULL, NULL, &start_offset, 554 &stuff_offset, &end_offset, &value, &cksum_flags); 555 556 HXGE_DEBUG_MSG((NULL, TX_CTL, 557 "==> hxge_start(14): pull msg - " 558 "len %d pkt_len %d ngathers %d", 559 len, pkt_len, ngathers)); 560 /* Pull all message blocks from b_cont */ 561 if ((msgpullup(mp, -1)) == NULL) { 562 goto hxge_start_fail2; 563 } 564 goto hxge_start_fail2; 565 } 566 } /* while (nmp) */ 567 568 tx_msg_p->tx_message = mp; 569 tx_desc_p = &tx_desc_ring_vp[sop_index]; 570 #if defined(__i386) 571 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 572 #else 573 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 574 #endif 575 576 pkthdrp = (p_tx_pkt_hdr_all_t)hdrp; 577 pkthdrp->reserved = 0; 578 hdrp->value = 0; 579 (void) hxge_fill_tx_hdr(mp, B_FALSE, cksum_on, 580 (pkt_len - TX_PKT_HEADER_SIZE), npads, pkthdrp); 581 if (pkt_len > STD_FRAME_SIZE) { 582 tdc_stats->tx_jumbo_pkts++; 583 } 584 585 min_len = (hxgep->msg_min + TX_PKT_HEADER_SIZE + (npads * 2)); 586 if (pkt_len < min_len) { 587 /* Assume we use bcopy to premapped buffers */ 588 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 589 HXGE_DEBUG_MSG((NULL, TX_CTL, 590 "==> hxge_start(14-1): < (msg_min + 16)" 591 "len %d pkt_len %d min_len %d bzero %d ngathers %d", 592 len, pkt_len, min_len, (min_len - pkt_len), ngathers)); 593 bzero((kaddr + pkt_len), (min_len - pkt_len)); 594 pkt_len = tx_msg_p->tx_msg_size = min_len; 595 596 sop_tx_desc_p->bits.tr_len = min_len; 597 598 HXGE_MEM_PIO_WRITE64(hpi_desc_handle, sop_tx_desc_p->value); 599 tx_desc_p->value = sop_tx_desc_p->value; 600 601 HXGE_DEBUG_MSG((NULL, TX_CTL, 602 "==> hxge_start(14-2): < msg_min - " 603 "len %d pkt_len %d min_len %d ngathers %d", 604 len, pkt_len, min_len, ngathers)); 605 } 606 607 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: cksum_flags 0x%x ", 608 cksum_flags)); 609 if (cksum_flags & HCK_PARTIALCKSUM) { 610 HXGE_DEBUG_MSG((hxgep, TX_CTL, 611 "==> hxge_start: cksum_flags 0x%x (partial checksum) ", 612 cksum_flags)); 613 cksum_on = B_TRUE; 614 HXGE_DEBUG_MSG((hxgep, TX_CTL, 615 "==> hxge_start: from IP cksum_flags 0x%x " 616 "(partial checksum) " 617 "start_offset %d stuff_offset %d", 618 cksum_flags, start_offset, stuff_offset)); 619 tmp_len = (uint64_t)(start_offset >> 1); 620 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4START_SHIFT); 621 tmp_len = (uint64_t)(stuff_offset >> 1); 622 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4STUFF_SHIFT); 623 624 HXGE_DEBUG_MSG((hxgep, TX_CTL, 625 "==> hxge_start: from IP cksum_flags 0x%x " 626 "(partial checksum) " 627 "after SHIFT start_offset %d stuff_offset %d", 628 cksum_flags, start_offset, stuff_offset)); 629 } 630 631 /* 632 * pkt_len already includes 16 + paddings!! 633 * Update the control header length 634 */ 635 636 /* 637 * Note that Hydra is different from Neptune where 638 * tot_xfer_len = (pkt_len - TX_PKT_HEADER_SIZE); 639 */ 640 tot_xfer_len = pkt_len; 641 tmp_len = hdrp->value | 642 (tot_xfer_len << TX_PKT_HEADER_TOT_XFER_LEN_SHIFT); 643 644 HXGE_DEBUG_MSG((hxgep, TX_CTL, 645 "==> hxge_start(15_x1): setting SOP " 646 "tot_xfer_len 0x%llx (%d) pkt_len %d tmp_len " 647 "0x%llx hdrp->value 0x%llx", 648 tot_xfer_len, tot_xfer_len, pkt_len, tmp_len, hdrp->value)); 649 #if defined(_BIG_ENDIAN) 650 hdrp->value = ddi_swap64(tmp_len); 651 #else 652 hdrp->value = tmp_len; 653 #endif 654 HXGE_DEBUG_MSG((hxgep, 655 TX_CTL, "==> hxge_start(15_x2): setting SOP " 656 "after SWAP: tot_xfer_len 0x%llx pkt_len %d " 657 "tmp_len 0x%llx hdrp->value 0x%llx", 658 tot_xfer_len, pkt_len, tmp_len, hdrp->value)); 659 660 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(15): setting SOP " 661 "wr_index %d tot_xfer_len (%d) pkt_len %d npads %d", 662 sop_index, tot_xfer_len, pkt_len, npads)); 663 664 sop_tx_desc_p->bits.sop = 1; 665 sop_tx_desc_p->bits.mark = mark_mode; 666 sop_tx_desc_p->bits.num_ptr = ngathers; 667 668 if (mark_mode) 669 tdc_stats->tx_marks++; 670 671 HXGE_MEM_PIO_WRITE64(hpi_desc_handle, sop_tx_desc_p->value); 672 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(16): set SOP done")); 673 674 #ifdef HXGE_DEBUG 675 hpi_desc_handle.hxgep = hxgep; 676 hpi_desc_handle.function.function = 0; 677 hpi_desc_handle.function.instance = hxgep->instance; 678 679 HXGE_DEBUG_MSG((hxgep, TX_CTL, "\n\t: value 0x%llx\n" 680 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\tmark %d sop %d\n", 681 save_desc_p->value, sad, save_desc_p->bits.tr_len, 682 xfer_len, save_desc_p->bits.num_ptr, save_desc_p->bits.mark, 683 save_desc_p->bits.sop)); 684 (void) hpi_txdma_dump_desc_one(hpi_desc_handle, NULL, sop_index); 685 686 dump_len = (pkt_len > 128) ? 128: pkt_len; 687 HXGE_DEBUG_MSG((hxgep, TX_CTL, 688 "==> hxge_start: dump packets(17) (after sop set, len " 689 " (len/dump_len/pkt_len/tot_xfer_len) %d/%d/%d/%d):\n" 690 "ptr $%p: %s", len, dump_len, pkt_len, tot_xfer_len, 691 (char *)hdrp, hxge_dump_packet((char *)hdrp, dump_len))); 692 HXGE_DEBUG_MSG((hxgep, TX_CTL, 693 "==> hxge_start(18): TX desc sync: sop_index %d", sop_index)); 694 #endif 695 696 if ((ngathers == 1) || tx_ring_p->wr_index < i) { 697 (void) ddi_dma_sync(tx_desc_dma_handle, 698 sop_index * sizeof (tx_desc_t), 699 ngathers * sizeof (tx_desc_t), DDI_DMA_SYNC_FORDEV); 700 701 HXGE_DEBUG_MSG((hxgep, TX_CTL, "hxge_start(19): sync 1 " 702 "cs_off = 0x%02X cs_s_off = 0x%02X " 703 "pkt_len %d ngathers %d sop_index %d\n", 704 stuff_offset, start_offset, 705 pkt_len, ngathers, sop_index)); 706 } else { /* more than one descriptor and wrap around */ 707 uint32_t nsdescs = tx_ring_p->tx_ring_size - sop_index; 708 (void) ddi_dma_sync(tx_desc_dma_handle, 709 sop_index * sizeof (tx_desc_t), 710 nsdescs * sizeof (tx_desc_t), DDI_DMA_SYNC_FORDEV); 711 HXGE_DEBUG_MSG((hxgep, TX_CTL, "hxge_start(20): sync 1 " 712 "cs_off = 0x%02X cs_s_off = 0x%02X " 713 "pkt_len %d ngathers %d sop_index %d\n", 714 stuff_offset, start_offset, pkt_len, ngathers, sop_index)); 715 716 (void) ddi_dma_sync(tx_desc_dma_handle, 0, 717 (ngathers - nsdescs) * sizeof (tx_desc_t), 718 DDI_DMA_SYNC_FORDEV); 719 HXGE_DEBUG_MSG((hxgep, TX_CTL, "hxge_start(21): sync 2 " 720 "cs_off = 0x%02X cs_s_off = 0x%02X " 721 "pkt_len %d ngathers %d sop_index %d\n", 722 stuff_offset, start_offset, 723 pkt_len, ngathers, sop_index)); 724 } 725 726 tail_index = tx_ring_p->wr_index; 727 tail_wrap = tx_ring_p->wr_index_wrap; 728 729 tx_ring_p->wr_index = i; 730 if (tx_ring_p->wr_index <= tail_index) { 731 tx_ring_p->wr_index_wrap = ((tail_wrap == B_TRUE) ? 732 B_FALSE : B_TRUE); 733 } 734 735 tx_ring_p->descs_pending += ngathers; 736 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: TX kick: " 737 "channel %d wr_index %d wrap %d ngathers %d desc_pend %d", 738 tx_ring_p->tdc, tx_ring_p->wr_index, tx_ring_p->wr_index_wrap, 739 ngathers, tx_ring_p->descs_pending)); 740 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: TX KICKING: ")); 741 742 kick.value = 0; 743 kick.bits.wrap = tx_ring_p->wr_index_wrap; 744 kick.bits.tail = (uint16_t)tx_ring_p->wr_index; 745 746 /* Kick start the Transmit kick register */ 747 TXDMA_REG_WRITE64(HXGE_DEV_HPI_HANDLE(hxgep), 748 TDC_TDR_KICK, (uint8_t)tx_ring_p->tdc, kick.value); 749 tdc_stats->tx_starts++; 750 MUTEX_EXIT(&tx_ring_p->lock); 751 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_start")); 752 return (status); 753 754 hxge_start_fail2: 755 if (good_packet == B_FALSE) { 756 cur_index = sop_index; 757 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: clean up")); 758 for (i = 0; i < ngathers; i++) { 759 tx_desc_p = &tx_desc_ring_vp[cur_index]; 760 #if defined(__i386) 761 hpi_handle.regp = (uint32_t)tx_desc_p; 762 #else 763 hpi_handle.regp = (uint64_t)tx_desc_p; 764 #endif 765 tx_msg_p = &tx_msg_ring[cur_index]; 766 (void) hpi_txdma_desc_set_zero(hpi_handle, 1); 767 if (tx_msg_p->flags.dma_type == USE_DVMA) { 768 HXGE_DEBUG_MSG((hxgep, TX_CTL, 769 "tx_desc_p = %X index = %d", 770 tx_desc_p, tx_ring_p->rd_index)); 771 (void) dvma_unload(tx_msg_p->dvma_handle, 772 0, -1); 773 tx_msg_p->dvma_handle = NULL; 774 if (tx_ring_p->dvma_wr_index == 775 tx_ring_p->dvma_wrap_mask) 776 tx_ring_p->dvma_wr_index = 0; 777 else 778 tx_ring_p->dvma_wr_index++; 779 tx_ring_p->dvma_pending--; 780 } else if (tx_msg_p->flags.dma_type == USE_DMA) { 781 if (ddi_dma_unbind_handle( 782 tx_msg_p->dma_handle)) { 783 cmn_err(CE_WARN, "hxge_start: " 784 "ddi_dma_unbind_handle failed"); 785 } 786 } 787 tx_msg_p->flags.dma_type = USE_NONE; 788 cur_index = TXDMA_DESC_NEXT_INDEX(cur_index, 1, 789 tx_ring_p->tx_wrap_mask); 790 791 } 792 793 hxgep->resched_needed = B_TRUE; 794 } 795 796 MUTEX_EXIT(&tx_ring_p->lock); 797 798 hxge_start_fail1: 799 /* Add FMA to check the access handle hxge_hregh */ 800 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_start")); 801 return (status); 802 } 803 804 boolean_t 805 hxge_send(p_hxge_t hxgep, mblk_t *mp, p_mac_tx_hint_t hp) 806 { 807 p_tx_ring_t *tx_rings; 808 uint8_t ring_index; 809 810 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_send")); 811 812 ASSERT(mp->b_next == NULL); 813 814 ring_index = hxge_tx_lb_ring(mp, hxgep->max_tdcs, hp); 815 tx_rings = hxgep->tx_rings->rings; 816 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_tx_msg: tx_rings $%p", 817 tx_rings)); 818 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_tx_msg: max_tdcs %d " 819 "ring_index %d", hxgep->max_tdcs, ring_index)); 820 821 if (hxge_start(hxgep, tx_rings[ring_index], mp)) { 822 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_send: failed " 823 "ring index %d", ring_index)); 824 return (B_FALSE); 825 } 826 827 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_send: ring index %d", 828 ring_index)); 829 return (B_TRUE); 830 } 831 832 /* 833 * hxge_m_tx() - send a chain of packets 834 */ 835 mblk_t * 836 hxge_m_tx(void *arg, mblk_t *mp) 837 { 838 p_hxge_t hxgep = (p_hxge_t)arg; 839 mblk_t *next; 840 mac_tx_hint_t hint; 841 842 if (!(hxgep->drv_state & STATE_HW_INITIALIZED)) { 843 HXGE_DEBUG_MSG((hxgep, DDI_CTL, 844 "==> hxge_m_tx: hardware not initialized")); 845 HXGE_DEBUG_MSG((hxgep, DDI_CTL, "<== hxge_m_tx")); 846 return (mp); 847 } 848 849 hint.hash = NULL; 850 hint.vid = 0; 851 hint.sap = 0; 852 853 while (mp != NULL) { 854 next = mp->b_next; 855 mp->b_next = NULL; 856 857 /* 858 * Until Nemo tx resource works, the mac driver 859 * does the load balancing based on TCP port, 860 * or CPU. For debugging, we use a system 861 * configurable parameter. 862 */ 863 if (!hxge_send(hxgep, mp, &hint)) { 864 mp->b_next = next; 865 break; 866 } 867 868 mp = next; 869 870 HXGE_DEBUG_MSG((NULL, TX_CTL, 871 "==> hxge_m_tx: (go back to loop) mp $%p next $%p", 872 mp, next)); 873 } 874 return (mp); 875 } 876 877 int 878 hxge_tx_lb_ring(p_mblk_t mp, uint32_t maxtdcs, p_mac_tx_hint_t hp) 879 { 880 uint8_t ring_index = 0; 881 uint8_t *tcp_port; 882 p_mblk_t nmp; 883 size_t mblk_len; 884 size_t iph_len; 885 size_t hdrs_size; 886 uint8_t hdrs_buf[sizeof (struct ether_header) + 887 IP_MAX_HDR_LENGTH + sizeof (uint32_t)]; 888 889 /* 890 * allocate space big enough to cover 891 * the max ip header length and the first 892 * 4 bytes of the TCP/IP header. 893 */ 894 boolean_t qos = B_FALSE; 895 896 HXGE_DEBUG_MSG((NULL, TX_CTL, "==> hxge_tx_lb_ring")); 897 898 if (hp->vid) { 899 qos = B_TRUE; 900 } 901 switch (hxge_tx_lb_policy) { 902 case HXGE_TX_LB_TCPUDP: /* default IPv4 TCP/UDP */ 903 default: 904 tcp_port = mp->b_rptr; 905 if (!hxge_no_tx_lb && !qos && 906 (ntohs(((p_ether_header_t)tcp_port)->ether_type) == 907 ETHERTYPE_IP)) { 908 nmp = mp; 909 mblk_len = MBLKL(nmp); 910 tcp_port = NULL; 911 if (mblk_len > sizeof (struct ether_header) + 912 sizeof (uint8_t)) { 913 tcp_port = nmp->b_rptr + 914 sizeof (struct ether_header); 915 mblk_len -= sizeof (struct ether_header); 916 iph_len = ((*tcp_port) & 0x0f) << 2; 917 if (mblk_len > (iph_len + sizeof (uint32_t))) { 918 tcp_port = nmp->b_rptr; 919 } else { 920 tcp_port = NULL; 921 } 922 } 923 if (tcp_port == NULL) { 924 hdrs_size = 0; 925 ((p_ether_header_t)hdrs_buf)->ether_type = 0; 926 while ((nmp) && (hdrs_size < 927 sizeof (hdrs_buf))) { 928 mblk_len = MBLKL(nmp); 929 if (mblk_len >= 930 (sizeof (hdrs_buf) - hdrs_size)) 931 mblk_len = sizeof (hdrs_buf) - 932 hdrs_size; 933 bcopy(nmp->b_rptr, 934 &hdrs_buf[hdrs_size], mblk_len); 935 hdrs_size += mblk_len; 936 nmp = nmp->b_cont; 937 } 938 tcp_port = hdrs_buf; 939 } 940 tcp_port += sizeof (ether_header_t); 941 if (!(tcp_port[6] & 0x3f) && !(tcp_port[7] & 0xff)) { 942 switch (tcp_port[9]) { 943 case IPPROTO_TCP: 944 case IPPROTO_UDP: 945 case IPPROTO_ESP: 946 tcp_port += ((*tcp_port) & 0x0f) << 2; 947 ring_index = ((tcp_port[0] ^ 948 tcp_port[1] ^ 949 tcp_port[2] ^ 950 tcp_port[3]) % maxtdcs); 951 break; 952 953 case IPPROTO_AH: 954 /* SPI starts at the 4th byte */ 955 tcp_port += ((*tcp_port) & 0x0f) << 2; 956 ring_index = ((tcp_port[4] ^ 957 tcp_port[5] ^ 958 tcp_port[6] ^ 959 tcp_port[7]) % maxtdcs); 960 break; 961 962 default: 963 ring_index = tcp_port[19] % maxtdcs; 964 break; 965 } 966 } else { /* fragmented packet */ 967 ring_index = tcp_port[19] % maxtdcs; 968 } 969 } else { 970 ring_index = mp->b_band % maxtdcs; 971 } 972 break; 973 974 case HXGE_TX_LB_HASH: 975 if (hp->hash) { 976 #if defined(__i386) 977 ring_index = ((uint32_t)(hp->hash) % maxtdcs); 978 #else 979 ring_index = ((uint64_t)(hp->hash) % maxtdcs); 980 #endif 981 } else { 982 ring_index = mp->b_band % maxtdcs; 983 } 984 break; 985 986 case HXGE_TX_LB_DEST_MAC: 987 /* Use destination MAC address */ 988 tcp_port = mp->b_rptr; 989 ring_index = tcp_port[5] % maxtdcs; 990 break; 991 } 992 HXGE_DEBUG_MSG((NULL, TX_CTL, "<== hxge_tx_lb_ring")); 993 return (ring_index); 994 } 995 996 uint_t 997 hxge_reschedule(caddr_t arg) 998 { 999 p_hxge_t hxgep; 1000 1001 hxgep = (p_hxge_t)arg; 1002 1003 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_reschedule")); 1004 1005 if (hxgep->hxge_mac_state == HXGE_MAC_STARTED && 1006 hxgep->resched_needed) { 1007 mac_tx_update(hxgep->mach); 1008 hxgep->resched_needed = B_FALSE; 1009 hxgep->resched_running = B_FALSE; 1010 } 1011 1012 HXGE_DEBUG_MSG((NULL, TX_CTL, "<== hxge_reschedule")); 1013 return (DDI_INTR_CLAIMED); 1014 } 1015