1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <hxge_impl.h> 27 28 extern uint32_t hxge_reclaim_pending; 29 extern uint32_t hxge_bcopy_thresh; 30 extern uint32_t hxge_dvma_thresh; 31 extern uint32_t hxge_dma_stream_thresh; 32 extern uint32_t hxge_tx_minfree; 33 extern uint32_t hxge_tx_intr_thres; 34 extern uint32_t hxge_tx_max_gathers; 35 extern uint32_t hxge_tx_tiny_pack; 36 extern uint32_t hxge_tx_use_bcopy; 37 extern uint32_t hxge_tx_lb_policy; 38 extern uint32_t hxge_no_tx_lb; 39 40 typedef struct _mac_tx_hint { 41 uint16_t sap; 42 uint16_t vid; 43 void *hash; 44 } mac_tx_hint_t, *p_mac_tx_hint_t; 45 46 int hxge_tx_lb_ring(p_mblk_t, uint32_t, p_mac_tx_hint_t); 47 48 int 49 hxge_start(p_hxge_t hxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) 50 { 51 int status = 0; 52 p_tx_desc_t tx_desc_ring_vp; 53 hpi_handle_t hpi_desc_handle; 54 hxge_os_dma_handle_t tx_desc_dma_handle; 55 p_tx_desc_t tx_desc_p; 56 p_tx_msg_t tx_msg_ring; 57 p_tx_msg_t tx_msg_p; 58 tx_desc_t tx_desc, *tmp_desc_p; 59 tx_desc_t sop_tx_desc, *sop_tx_desc_p; 60 p_tx_pkt_header_t hdrp; 61 p_tx_pkt_hdr_all_t pkthdrp; 62 uint8_t npads = 0; 63 uint64_t dma_ioaddr; 64 uint32_t dma_flags; 65 int last_bidx; 66 uint8_t *b_rptr; 67 caddr_t kaddr; 68 uint32_t nmblks; 69 uint32_t ngathers; 70 uint32_t clen; 71 int len; 72 uint32_t pkt_len, pack_len, min_len; 73 uint32_t bcopy_thresh; 74 int i, cur_index, sop_index; 75 uint16_t tail_index; 76 boolean_t tail_wrap = B_FALSE; 77 hxge_dma_common_t desc_area; 78 hxge_os_dma_handle_t dma_handle; 79 ddi_dma_cookie_t dma_cookie; 80 hpi_handle_t hpi_handle; 81 p_mblk_t nmp; 82 p_mblk_t t_mp; 83 uint32_t ncookies; 84 boolean_t good_packet; 85 boolean_t mark_mode = B_FALSE; 86 p_hxge_stats_t statsp; 87 p_hxge_tx_ring_stats_t tdc_stats; 88 t_uscalar_t start_offset = 0; 89 t_uscalar_t stuff_offset = 0; 90 t_uscalar_t end_offset = 0; 91 t_uscalar_t value = 0; 92 t_uscalar_t cksum_flags = 0; 93 boolean_t cksum_on = B_FALSE; 94 uint32_t boff = 0; 95 uint64_t tot_xfer_len = 0, tmp_len = 0; 96 boolean_t header_set = B_FALSE; 97 tdc_tdr_kick_t kick; 98 uint32_t offset; 99 #ifdef HXGE_DEBUG 100 p_tx_desc_t tx_desc_ring_pp; 101 p_tx_desc_t tx_desc_pp; 102 tx_desc_t *save_desc_p; 103 int dump_len; 104 int sad_len; 105 uint64_t sad; 106 int xfer_len; 107 uint32_t msgsize; 108 #endif 109 110 HXGE_DEBUG_MSG((hxgep, TX_CTL, 111 "==> hxge_start: tx dma channel %d", tx_ring_p->tdc)); 112 HXGE_DEBUG_MSG((hxgep, TX_CTL, 113 "==> hxge_start: Starting tdc %d desc pending %d", 114 tx_ring_p->tdc, tx_ring_p->descs_pending)); 115 116 statsp = hxgep->statsp; 117 118 if (hxgep->statsp->port_stats.lb_mode == hxge_lb_normal) { 119 if (!statsp->mac_stats.link_up) { 120 freemsg(mp); 121 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: " 122 "link not up or LB mode")); 123 goto hxge_start_fail1; 124 } 125 } 126 127 hcksum_retrieve(mp, NULL, NULL, &start_offset, 128 &stuff_offset, &end_offset, &value, &cksum_flags); 129 if (!HXGE_IS_VLAN_PACKET(mp->b_rptr)) { 130 start_offset += sizeof (ether_header_t); 131 stuff_offset += sizeof (ether_header_t); 132 } else { 133 start_offset += sizeof (struct ether_vlan_header); 134 stuff_offset += sizeof (struct ether_vlan_header); 135 } 136 137 if (cksum_flags & HCK_PARTIALCKSUM) { 138 HXGE_DEBUG_MSG((hxgep, TX_CTL, 139 "==> hxge_start: mp $%p len %d " 140 "cksum_flags 0x%x (partial checksum) ", 141 mp, MBLKL(mp), cksum_flags)); 142 cksum_on = B_TRUE; 143 } 144 145 MUTEX_ENTER(&tx_ring_p->lock); 146 start_again: 147 ngathers = 0; 148 sop_index = tx_ring_p->wr_index; 149 #ifdef HXGE_DEBUG 150 if (tx_ring_p->descs_pending) { 151 HXGE_DEBUG_MSG((hxgep, TX_CTL, 152 "==> hxge_start: desc pending %d ", 153 tx_ring_p->descs_pending)); 154 } 155 156 dump_len = (int)(MBLKL(mp)); 157 dump_len = (dump_len > 128) ? 128: dump_len; 158 159 HXGE_DEBUG_MSG((hxgep, TX_CTL, 160 "==> hxge_start: tdc %d: dumping ...: b_rptr $%p " 161 "(Before header reserve: ORIGINAL LEN %d)", 162 tx_ring_p->tdc, mp->b_rptr, dump_len)); 163 164 HXGE_DEBUG_MSG((hxgep, TX_CTL, 165 "==> hxge_start: dump packets (IP ORIGINAL b_rptr $%p): %s", 166 mp->b_rptr, hxge_dump_packet((char *)mp->b_rptr, dump_len))); 167 #endif 168 169 tdc_stats = tx_ring_p->tdc_stats; 170 mark_mode = (tx_ring_p->descs_pending && 171 ((tx_ring_p->tx_ring_size - tx_ring_p->descs_pending) < 172 hxge_tx_minfree)); 173 174 HXGE_DEBUG_MSG((hxgep, TX_CTL, 175 "TX Descriptor ring is channel %d mark mode %d", 176 tx_ring_p->tdc, mark_mode)); 177 178 if (!hxge_txdma_reclaim(hxgep, tx_ring_p, hxge_tx_minfree)) { 179 HXGE_DEBUG_MSG((hxgep, TX_CTL, 180 "TX Descriptor ring is full: channel %d", tx_ring_p->tdc)); 181 HXGE_DEBUG_MSG((hxgep, TX_CTL, 182 "TX Descriptor ring is full: channel %d", tx_ring_p->tdc)); 183 cas32((uint32_t *)&tx_ring_p->queueing, 0, 1); 184 tdc_stats->tx_no_desc++; 185 MUTEX_EXIT(&tx_ring_p->lock); 186 if (hxgep->resched_needed && !hxgep->resched_running) { 187 hxgep->resched_running = B_TRUE; 188 ddi_trigger_softintr(hxgep->resched_id); 189 } 190 status = 1; 191 goto hxge_start_fail1; 192 } 193 194 nmp = mp; 195 i = sop_index = tx_ring_p->wr_index; 196 nmblks = 0; 197 ngathers = 0; 198 pkt_len = 0; 199 pack_len = 0; 200 clen = 0; 201 last_bidx = -1; 202 good_packet = B_TRUE; 203 204 desc_area = tx_ring_p->tdc_desc; 205 hpi_handle = desc_area.hpi_handle; 206 hpi_desc_handle.regh = (hxge_os_acc_handle_t) 207 DMA_COMMON_ACC_HANDLE(desc_area); 208 hpi_desc_handle.hxgep = hxgep; 209 tx_desc_ring_vp = (p_tx_desc_t)DMA_COMMON_VPTR(desc_area); 210 #ifdef HXGE_DEBUG 211 #if defined(__i386) 212 tx_desc_ring_pp = (p_tx_desc_t)(uint32_t)DMA_COMMON_IOADDR(desc_area); 213 #else 214 tx_desc_ring_pp = (p_tx_desc_t)DMA_COMMON_IOADDR(desc_area); 215 #endif 216 #endif 217 tx_desc_dma_handle = (hxge_os_dma_handle_t)DMA_COMMON_HANDLE(desc_area); 218 tx_msg_ring = tx_ring_p->tx_msg_ring; 219 220 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: wr_index %d i %d", 221 sop_index, i)); 222 223 #ifdef HXGE_DEBUG 224 msgsize = msgdsize(nmp); 225 HXGE_DEBUG_MSG((hxgep, TX_CTL, 226 "==> hxge_start(1): wr_index %d i %d msgdsize %d", 227 sop_index, i, msgsize)); 228 #endif 229 /* 230 * The first 16 bytes of the premapped buffer are reserved 231 * for header. No padding will be used. 232 */ 233 pkt_len = pack_len = boff = TX_PKT_HEADER_SIZE; 234 if (hxge_tx_use_bcopy) { 235 bcopy_thresh = (hxge_bcopy_thresh - TX_PKT_HEADER_SIZE); 236 } else { 237 bcopy_thresh = (TX_BCOPY_SIZE - TX_PKT_HEADER_SIZE); 238 } 239 while (nmp) { 240 good_packet = B_TRUE; 241 b_rptr = nmp->b_rptr; 242 len = MBLKL(nmp); 243 if (len <= 0) { 244 nmp = nmp->b_cont; 245 continue; 246 } 247 nmblks++; 248 249 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(1): nmblks %d " 250 "len %d pkt_len %d pack_len %d", 251 nmblks, len, pkt_len, pack_len)); 252 /* 253 * Hardware limits the transfer length to 4K. 254 * If len is more than 4K, we need to break 255 * nmp into two chunks: Make first chunk smaller 256 * than 4K. The second chunk will be broken into 257 * less than 4K (if needed) during the next pass. 258 */ 259 if (len > (TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE)) { 260 if ((t_mp = dupb(nmp)) != NULL) { 261 nmp->b_wptr = nmp->b_rptr + 262 (TX_MAX_TRANSFER_LENGTH - 263 TX_PKT_HEADER_SIZE); 264 t_mp->b_rptr = nmp->b_wptr; 265 t_mp->b_cont = nmp->b_cont; 266 nmp->b_cont = t_mp; 267 len = MBLKL(nmp); 268 } else { 269 good_packet = B_FALSE; 270 goto hxge_start_fail2; 271 } 272 } 273 tx_desc.value = 0; 274 tx_desc_p = &tx_desc_ring_vp[i]; 275 #ifdef HXGE_DEBUG 276 tx_desc_pp = &tx_desc_ring_pp[i]; 277 #endif 278 tx_msg_p = &tx_msg_ring[i]; 279 #if defined(__i386) 280 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 281 #else 282 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 283 #endif 284 if (!header_set && 285 ((!hxge_tx_use_bcopy && (len > TX_BCOPY_SIZE)) || 286 (len >= bcopy_thresh))) { 287 header_set = B_TRUE; 288 bcopy_thresh += TX_PKT_HEADER_SIZE; 289 boff = 0; 290 pack_len = 0; 291 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 292 hdrp = (p_tx_pkt_header_t)kaddr; 293 clen = pkt_len; 294 dma_handle = tx_msg_p->buf_dma_handle; 295 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 296 offset = tx_msg_p->offset_index * hxge_bcopy_thresh; 297 (void) ddi_dma_sync(dma_handle, 298 offset, hxge_bcopy_thresh, DDI_DMA_SYNC_FORDEV); 299 300 tx_msg_p->flags.dma_type = USE_BCOPY; 301 goto hxge_start_control_header_only; 302 } 303 304 pkt_len += len; 305 pack_len += len; 306 307 HXGE_DEBUG_MSG((hxgep, TX_CTL, 308 "==> hxge_start(3): desc entry %d DESC IOADDR $%p " 309 "desc_vp $%p tx_desc_p $%p desc_pp $%p tx_desc_pp $%p " 310 "len %d pkt_len %d pack_len %d", 311 i, 312 DMA_COMMON_IOADDR(desc_area), 313 tx_desc_ring_vp, tx_desc_p, 314 tx_desc_ring_pp, tx_desc_pp, 315 len, pkt_len, pack_len)); 316 317 if (len < bcopy_thresh) { 318 HXGE_DEBUG_MSG((hxgep, TX_CTL, 319 "==> hxge_start(4): USE BCOPY: ")); 320 if (hxge_tx_tiny_pack) { 321 uint32_t blst = TXDMA_DESC_NEXT_INDEX(i, -1, 322 tx_ring_p->tx_wrap_mask); 323 HXGE_DEBUG_MSG((hxgep, TX_CTL, 324 "==> hxge_start(5): pack")); 325 if ((pack_len <= bcopy_thresh) && 326 (last_bidx == blst)) { 327 HXGE_DEBUG_MSG((hxgep, TX_CTL, 328 "==> hxge_start: pack(6) " 329 "(pkt_len %d pack_len %d)", 330 pkt_len, pack_len)); 331 i = blst; 332 tx_desc_p = &tx_desc_ring_vp[i]; 333 #ifdef HXGE_DEBUG 334 tx_desc_pp = &tx_desc_ring_pp[i]; 335 #endif 336 tx_msg_p = &tx_msg_ring[i]; 337 boff = pack_len - len; 338 ngathers--; 339 } else if (pack_len > bcopy_thresh && 340 header_set) { 341 pack_len = len; 342 boff = 0; 343 bcopy_thresh = hxge_bcopy_thresh; 344 HXGE_DEBUG_MSG((hxgep, TX_CTL, 345 "==> hxge_start(7): > max NEW " 346 "bcopy thresh %d " 347 "pkt_len %d pack_len %d(next)", 348 bcopy_thresh, pkt_len, pack_len)); 349 } 350 last_bidx = i; 351 } 352 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 353 if ((boff == TX_PKT_HEADER_SIZE) && (nmblks == 1)) { 354 hdrp = (p_tx_pkt_header_t)kaddr; 355 header_set = B_TRUE; 356 HXGE_DEBUG_MSG((hxgep, TX_CTL, 357 "==> hxge_start(7_x2): " 358 "pkt_len %d pack_len %d (new hdrp $%p)", 359 pkt_len, pack_len, hdrp)); 360 } 361 tx_msg_p->flags.dma_type = USE_BCOPY; 362 kaddr += boff; 363 HXGE_DEBUG_MSG((hxgep, TX_CTL, 364 "==> hxge_start(8): USE BCOPY: before bcopy " 365 "DESC IOADDR $%p entry %d bcopy packets %d " 366 "bcopy kaddr $%p bcopy ioaddr (SAD) $%p " 367 "bcopy clen %d bcopy boff %d", 368 DMA_COMMON_IOADDR(desc_area), i, 369 tdc_stats->tx_hdr_pkts, kaddr, dma_ioaddr, 370 clen, boff)); 371 HXGE_DEBUG_MSG((hxgep, TX_CTL, 372 "==> hxge_start: 1USE BCOPY: ")); 373 HXGE_DEBUG_MSG((hxgep, TX_CTL, 374 "==> hxge_start: 2USE BCOPY: ")); 375 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: " 376 "last USE BCOPY: copy from b_rptr $%p " 377 "to KADDR $%p (len %d offset %d", 378 b_rptr, kaddr, len, boff)); 379 bcopy(b_rptr, kaddr, len); 380 #ifdef HXGE_DEBUG 381 dump_len = (len > 128) ? 128: len; 382 HXGE_DEBUG_MSG((hxgep, TX_CTL, 383 "==> hxge_start: dump packets " 384 "(After BCOPY len %d)" 385 "(b_rptr $%p): %s", len, nmp->b_rptr, 386 hxge_dump_packet((char *)nmp->b_rptr, 387 dump_len))); 388 #endif 389 dma_handle = tx_msg_p->buf_dma_handle; 390 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 391 offset = tx_msg_p->offset_index * hxge_bcopy_thresh; 392 (void) ddi_dma_sync(dma_handle, 393 offset, hxge_bcopy_thresh, DDI_DMA_SYNC_FORDEV); 394 clen = len + boff; 395 tdc_stats->tx_hdr_pkts++; 396 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(9): " 397 "USE BCOPY: DESC IOADDR $%p entry %d " 398 "bcopy packets %d bcopy kaddr $%p " 399 "bcopy ioaddr (SAD) $%p bcopy clen %d " 400 "bcopy boff %d", 401 DMA_COMMON_IOADDR(desc_area), i, 402 tdc_stats->tx_hdr_pkts, kaddr, dma_ioaddr, 403 clen, boff)); 404 } else { 405 HXGE_DEBUG_MSG((hxgep, TX_CTL, 406 "==> hxge_start(12): USE DVMA: len %d", len)); 407 tx_msg_p->flags.dma_type = USE_DMA; 408 dma_flags = DDI_DMA_WRITE; 409 if (len < hxge_dma_stream_thresh) { 410 dma_flags |= DDI_DMA_CONSISTENT; 411 } else { 412 dma_flags |= DDI_DMA_STREAMING; 413 } 414 415 dma_handle = tx_msg_p->dma_handle; 416 status = ddi_dma_addr_bind_handle(dma_handle, NULL, 417 (caddr_t)b_rptr, len, dma_flags, 418 DDI_DMA_DONTWAIT, NULL, 419 &dma_cookie, &ncookies); 420 if (status == DDI_DMA_MAPPED) { 421 dma_ioaddr = dma_cookie.dmac_laddress; 422 len = (int)dma_cookie.dmac_size; 423 clen = (uint32_t)dma_cookie.dmac_size; 424 HXGE_DEBUG_MSG((hxgep, TX_CTL, 425 "==> hxge_start(12_1): " 426 "USE DVMA: len %d clen %d ngathers %d", 427 len, clen, ngathers)); 428 #if defined(__i386) 429 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 430 #else 431 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 432 #endif 433 while (ncookies > 1) { 434 ngathers++; 435 /* 436 * this is the fix for multiple 437 * cookies, which are basically 438 * a descriptor entry, we don't set 439 * SOP bit as well as related fields 440 */ 441 442 (void) hpi_txdma_desc_gather_set( 443 hpi_desc_handle, &tx_desc, 444 (ngathers -1), mark_mode, 445 ngathers, dma_ioaddr, clen); 446 tx_msg_p->tx_msg_size = clen; 447 HXGE_DEBUG_MSG((hxgep, TX_CTL, 448 "==> hxge_start: DMA " 449 "ncookie %d ngathers %d " 450 "dma_ioaddr $%p len %d" 451 "desc $%p descp $%p (%d)", 452 ncookies, ngathers, 453 dma_ioaddr, clen, 454 *tx_desc_p, tx_desc_p, i)); 455 456 ddi_dma_nextcookie(dma_handle, 457 &dma_cookie); 458 dma_ioaddr = dma_cookie.dmac_laddress; 459 460 len = (int)dma_cookie.dmac_size; 461 clen = (uint32_t)dma_cookie.dmac_size; 462 HXGE_DEBUG_MSG((hxgep, TX_CTL, 463 "==> hxge_start(12_2): " 464 "USE DVMA: len %d clen %d ", 465 len, clen)); 466 467 i = TXDMA_DESC_NEXT_INDEX(i, 1, 468 tx_ring_p->tx_wrap_mask); 469 tx_desc_p = &tx_desc_ring_vp[i]; 470 471 hpi_desc_handle.regp = 472 #if defined(__i386) 473 (uint32_t)tx_desc_p; 474 #else 475 (uint64_t)tx_desc_p; 476 #endif 477 tx_msg_p = &tx_msg_ring[i]; 478 tx_msg_p->flags.dma_type = USE_NONE; 479 tx_desc.value = 0; 480 ncookies--; 481 } 482 tdc_stats->tx_ddi_pkts++; 483 HXGE_DEBUG_MSG((hxgep, TX_CTL, 484 "==> hxge_start: DMA: ddi packets %d", 485 tdc_stats->tx_ddi_pkts)); 486 } else { 487 HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL, 488 "dma mapping failed for %d " 489 "bytes addr $%p flags %x (%d)", 490 len, b_rptr, status, status)); 491 good_packet = B_FALSE; 492 tdc_stats->tx_dma_bind_fail++; 493 tx_msg_p->flags.dma_type = USE_NONE; 494 goto hxge_start_fail2; 495 } 496 } /* ddi dvma */ 497 498 nmp = nmp->b_cont; 499 hxge_start_control_header_only: 500 #if defined(__i386) 501 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 502 #else 503 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 504 #endif 505 ngathers++; 506 507 if (ngathers == 1) { 508 #ifdef HXGE_DEBUG 509 save_desc_p = &sop_tx_desc; 510 #endif 511 sop_tx_desc_p = &sop_tx_desc; 512 sop_tx_desc_p->value = 0; 513 sop_tx_desc_p->bits.tr_len = clen; 514 sop_tx_desc_p->bits.sad = dma_ioaddr >> 32; 515 sop_tx_desc_p->bits.sad_l = dma_ioaddr & 0xffffffff; 516 } else { 517 #ifdef HXGE_DEBUG 518 save_desc_p = &tx_desc; 519 #endif 520 tmp_desc_p = &tx_desc; 521 tmp_desc_p->value = 0; 522 tmp_desc_p->bits.tr_len = clen; 523 tmp_desc_p->bits.sad = dma_ioaddr >> 32; 524 tmp_desc_p->bits.sad_l = dma_ioaddr & 0xffffffff; 525 526 tx_desc_p->value = tmp_desc_p->value; 527 } 528 529 HXGE_DEBUG_MSG((hxgep, TX_CTL, 530 "==> hxge_start(13): Desc_entry %d ngathers %d " 531 "desc_vp $%p tx_desc_p $%p " 532 "len %d clen %d pkt_len %d pack_len %d nmblks %d " 533 "dma_ioaddr (SAD) $%p mark %d", 534 i, ngathers, tx_desc_ring_vp, tx_desc_p, 535 len, clen, pkt_len, pack_len, nmblks, 536 dma_ioaddr, mark_mode)); 537 538 #ifdef HXGE_DEBUG 539 hpi_desc_handle.hxgep = hxgep; 540 hpi_desc_handle.function.function = 0; 541 hpi_desc_handle.function.instance = hxgep->instance; 542 sad = save_desc_p->bits.sad; 543 sad = (sad << 32) | save_desc_p->bits.sad_l; 544 xfer_len = save_desc_p->bits.tr_len; 545 546 HXGE_DEBUG_MSG((hxgep, TX_CTL, "\n\t: value 0x%llx\n" 547 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\t" 548 "mark %d sop %d\n", 549 save_desc_p->value, sad, save_desc_p->bits.tr_len, 550 xfer_len, save_desc_p->bits.num_ptr, 551 save_desc_p->bits.mark, save_desc_p->bits.sop)); 552 553 hpi_txdma_dump_desc_one(hpi_desc_handle, NULL, i); 554 #endif 555 556 tx_msg_p->tx_msg_size = clen; 557 i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); 558 if (ngathers > hxge_tx_max_gathers) { 559 good_packet = B_FALSE; 560 hcksum_retrieve(mp, NULL, NULL, &start_offset, 561 &stuff_offset, &end_offset, &value, &cksum_flags); 562 563 HXGE_DEBUG_MSG((NULL, TX_CTL, 564 "==> hxge_start(14): pull msg - " 565 "len %d pkt_len %d ngathers %d", 566 len, pkt_len, ngathers)); 567 /* Pull all message blocks from b_cont */ 568 if ((msgpullup(mp, -1)) == NULL) { 569 goto hxge_start_fail2; 570 } 571 goto hxge_start_fail2; 572 } 573 } /* while (nmp) */ 574 575 tx_msg_p->tx_message = mp; 576 tx_desc_p = &tx_desc_ring_vp[sop_index]; 577 #if defined(__i386) 578 hpi_desc_handle.regp = (uint32_t)tx_desc_p; 579 #else 580 hpi_desc_handle.regp = (uint64_t)tx_desc_p; 581 #endif 582 583 pkthdrp = (p_tx_pkt_hdr_all_t)hdrp; 584 pkthdrp->reserved = 0; 585 hdrp->value = 0; 586 (void) hxge_fill_tx_hdr(mp, B_FALSE, cksum_on, 587 (pkt_len - TX_PKT_HEADER_SIZE), npads, pkthdrp); 588 589 /* 590 * Hardware header should not be counted as part of the frame 591 * when determining the frame size 592 */ 593 if ((pkt_len - TX_PKT_HEADER_SIZE) > (STD_FRAME_SIZE - ETHERFCSL)) { 594 tdc_stats->tx_jumbo_pkts++; 595 } 596 597 min_len = (hxgep->msg_min + TX_PKT_HEADER_SIZE + (npads * 2)); 598 if (pkt_len < min_len) { 599 /* Assume we use bcopy to premapped buffers */ 600 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 601 HXGE_DEBUG_MSG((NULL, TX_CTL, 602 "==> hxge_start(14-1): < (msg_min + 16)" 603 "len %d pkt_len %d min_len %d bzero %d ngathers %d", 604 len, pkt_len, min_len, (min_len - pkt_len), ngathers)); 605 bzero((kaddr + pkt_len), (min_len - pkt_len)); 606 pkt_len = tx_msg_p->tx_msg_size = min_len; 607 608 sop_tx_desc_p->bits.tr_len = min_len; 609 610 HXGE_MEM_PIO_WRITE64(hpi_desc_handle, sop_tx_desc_p->value); 611 tx_desc_p->value = sop_tx_desc_p->value; 612 613 HXGE_DEBUG_MSG((NULL, TX_CTL, 614 "==> hxge_start(14-2): < msg_min - " 615 "len %d pkt_len %d min_len %d ngathers %d", 616 len, pkt_len, min_len, ngathers)); 617 } 618 619 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: cksum_flags 0x%x ", 620 cksum_flags)); 621 if (cksum_flags & HCK_PARTIALCKSUM) { 622 HXGE_DEBUG_MSG((hxgep, TX_CTL, 623 "==> hxge_start: cksum_flags 0x%x (partial checksum) ", 624 cksum_flags)); 625 cksum_on = B_TRUE; 626 HXGE_DEBUG_MSG((hxgep, TX_CTL, 627 "==> hxge_start: from IP cksum_flags 0x%x " 628 "(partial checksum) " 629 "start_offset %d stuff_offset %d", 630 cksum_flags, start_offset, stuff_offset)); 631 tmp_len = (uint64_t)(start_offset >> 1); 632 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4START_SHIFT); 633 tmp_len = (uint64_t)(stuff_offset >> 1); 634 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4STUFF_SHIFT); 635 636 HXGE_DEBUG_MSG((hxgep, TX_CTL, 637 "==> hxge_start: from IP cksum_flags 0x%x " 638 "(partial checksum) " 639 "after SHIFT start_offset %d stuff_offset %d", 640 cksum_flags, start_offset, stuff_offset)); 641 } 642 643 /* 644 * pkt_len already includes 16 + paddings!! 645 * Update the control header length 646 */ 647 648 /* 649 * Note that Hydra is different from Neptune where 650 * tot_xfer_len = (pkt_len - TX_PKT_HEADER_SIZE); 651 */ 652 tot_xfer_len = pkt_len; 653 tmp_len = hdrp->value | 654 (tot_xfer_len << TX_PKT_HEADER_TOT_XFER_LEN_SHIFT); 655 656 HXGE_DEBUG_MSG((hxgep, TX_CTL, 657 "==> hxge_start(15_x1): setting SOP " 658 "tot_xfer_len 0x%llx (%d) pkt_len %d tmp_len " 659 "0x%llx hdrp->value 0x%llx", 660 tot_xfer_len, tot_xfer_len, pkt_len, tmp_len, hdrp->value)); 661 #if defined(_BIG_ENDIAN) 662 hdrp->value = ddi_swap64(tmp_len); 663 #else 664 hdrp->value = tmp_len; 665 #endif 666 HXGE_DEBUG_MSG((hxgep, 667 TX_CTL, "==> hxge_start(15_x2): setting SOP " 668 "after SWAP: tot_xfer_len 0x%llx pkt_len %d " 669 "tmp_len 0x%llx hdrp->value 0x%llx", 670 tot_xfer_len, pkt_len, tmp_len, hdrp->value)); 671 672 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(15): setting SOP " 673 "wr_index %d tot_xfer_len (%d) pkt_len %d npads %d", 674 sop_index, tot_xfer_len, pkt_len, npads)); 675 676 sop_tx_desc_p->bits.sop = 1; 677 sop_tx_desc_p->bits.mark = mark_mode; 678 sop_tx_desc_p->bits.num_ptr = ngathers; 679 680 if (mark_mode) 681 tdc_stats->tx_marks++; 682 683 HXGE_MEM_PIO_WRITE64(hpi_desc_handle, sop_tx_desc_p->value); 684 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start(16): set SOP done")); 685 686 #ifdef HXGE_DEBUG 687 hpi_desc_handle.hxgep = hxgep; 688 hpi_desc_handle.function.function = 0; 689 hpi_desc_handle.function.instance = hxgep->instance; 690 691 HXGE_DEBUG_MSG((hxgep, TX_CTL, "\n\t: value 0x%llx\n" 692 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\tmark %d sop %d\n", 693 save_desc_p->value, sad, save_desc_p->bits.tr_len, 694 xfer_len, save_desc_p->bits.num_ptr, save_desc_p->bits.mark, 695 save_desc_p->bits.sop)); 696 (void) hpi_txdma_dump_desc_one(hpi_desc_handle, NULL, sop_index); 697 698 dump_len = (pkt_len > 128) ? 128: pkt_len; 699 HXGE_DEBUG_MSG((hxgep, TX_CTL, 700 "==> hxge_start: dump packets(17) (after sop set, len " 701 " (len/dump_len/pkt_len/tot_xfer_len) %d/%d/%d/%d):\n" 702 "ptr $%p: %s", len, dump_len, pkt_len, tot_xfer_len, 703 (char *)hdrp, hxge_dump_packet((char *)hdrp, dump_len))); 704 HXGE_DEBUG_MSG((hxgep, TX_CTL, 705 "==> hxge_start(18): TX desc sync: sop_index %d", sop_index)); 706 #endif 707 708 if ((ngathers == 1) || tx_ring_p->wr_index < i) { 709 (void) ddi_dma_sync(tx_desc_dma_handle, 710 sop_index * sizeof (tx_desc_t), 711 ngathers * sizeof (tx_desc_t), DDI_DMA_SYNC_FORDEV); 712 713 HXGE_DEBUG_MSG((hxgep, TX_CTL, "hxge_start(19): sync 1 " 714 "cs_off = 0x%02X cs_s_off = 0x%02X " 715 "pkt_len %d ngathers %d sop_index %d\n", 716 stuff_offset, start_offset, 717 pkt_len, ngathers, sop_index)); 718 } else { /* more than one descriptor and wrap around */ 719 uint32_t nsdescs = tx_ring_p->tx_ring_size - sop_index; 720 (void) ddi_dma_sync(tx_desc_dma_handle, 721 sop_index * sizeof (tx_desc_t), 722 nsdescs * sizeof (tx_desc_t), DDI_DMA_SYNC_FORDEV); 723 HXGE_DEBUG_MSG((hxgep, TX_CTL, "hxge_start(20): sync 1 " 724 "cs_off = 0x%02X cs_s_off = 0x%02X " 725 "pkt_len %d ngathers %d sop_index %d\n", 726 stuff_offset, start_offset, pkt_len, ngathers, sop_index)); 727 728 (void) ddi_dma_sync(tx_desc_dma_handle, 0, 729 (ngathers - nsdescs) * sizeof (tx_desc_t), 730 DDI_DMA_SYNC_FORDEV); 731 HXGE_DEBUG_MSG((hxgep, TX_CTL, "hxge_start(21): sync 2 " 732 "cs_off = 0x%02X cs_s_off = 0x%02X " 733 "pkt_len %d ngathers %d sop_index %d\n", 734 stuff_offset, start_offset, 735 pkt_len, ngathers, sop_index)); 736 } 737 738 tail_index = tx_ring_p->wr_index; 739 tail_wrap = tx_ring_p->wr_index_wrap; 740 741 tx_ring_p->wr_index = i; 742 if (tx_ring_p->wr_index <= tail_index) { 743 tx_ring_p->wr_index_wrap = ((tail_wrap == B_TRUE) ? 744 B_FALSE : B_TRUE); 745 } 746 747 tx_ring_p->descs_pending += ngathers; 748 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: TX kick: " 749 "channel %d wr_index %d wrap %d ngathers %d desc_pend %d", 750 tx_ring_p->tdc, tx_ring_p->wr_index, tx_ring_p->wr_index_wrap, 751 ngathers, tx_ring_p->descs_pending)); 752 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: TX KICKING: ")); 753 754 kick.value = 0; 755 kick.bits.wrap = tx_ring_p->wr_index_wrap; 756 kick.bits.tail = (uint16_t)tx_ring_p->wr_index; 757 758 /* Kick start the Transmit kick register */ 759 TXDMA_REG_WRITE64(HXGE_DEV_HPI_HANDLE(hxgep), 760 TDC_TDR_KICK, (uint8_t)tx_ring_p->tdc, kick.value); 761 tdc_stats->tx_starts++; 762 MUTEX_EXIT(&tx_ring_p->lock); 763 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_start")); 764 return (status); 765 766 hxge_start_fail2: 767 if (good_packet == B_FALSE) { 768 cur_index = sop_index; 769 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_start: clean up")); 770 for (i = 0; i < ngathers; i++) { 771 tx_desc_p = &tx_desc_ring_vp[cur_index]; 772 #if defined(__i386) 773 hpi_handle.regp = (uint32_t)tx_desc_p; 774 #else 775 hpi_handle.regp = (uint64_t)tx_desc_p; 776 #endif 777 tx_msg_p = &tx_msg_ring[cur_index]; 778 (void) hpi_txdma_desc_set_zero(hpi_handle, 1); 779 if (tx_msg_p->flags.dma_type == USE_DVMA) { 780 HXGE_DEBUG_MSG((hxgep, TX_CTL, 781 "tx_desc_p = %X index = %d", 782 tx_desc_p, tx_ring_p->rd_index)); 783 (void) dvma_unload(tx_msg_p->dvma_handle, 784 0, -1); 785 tx_msg_p->dvma_handle = NULL; 786 if (tx_ring_p->dvma_wr_index == 787 tx_ring_p->dvma_wrap_mask) 788 tx_ring_p->dvma_wr_index = 0; 789 else 790 tx_ring_p->dvma_wr_index++; 791 tx_ring_p->dvma_pending--; 792 } else if (tx_msg_p->flags.dma_type == USE_DMA) { 793 if (ddi_dma_unbind_handle( 794 tx_msg_p->dma_handle)) { 795 cmn_err(CE_WARN, "hxge_start: " 796 "ddi_dma_unbind_handle failed"); 797 } 798 } 799 tx_msg_p->flags.dma_type = USE_NONE; 800 cur_index = TXDMA_DESC_NEXT_INDEX(cur_index, 1, 801 tx_ring_p->tx_wrap_mask); 802 803 } 804 805 hxgep->resched_needed = B_TRUE; 806 } 807 808 MUTEX_EXIT(&tx_ring_p->lock); 809 810 hxge_start_fail1: 811 /* Add FMA to check the access handle hxge_hregh */ 812 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_start")); 813 return (status); 814 } 815 816 boolean_t 817 hxge_send(p_hxge_t hxgep, mblk_t *mp, p_mac_tx_hint_t hp) 818 { 819 p_tx_ring_t *tx_rings; 820 uint8_t ring_index; 821 822 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_send")); 823 824 ASSERT(mp->b_next == NULL); 825 826 ring_index = hxge_tx_lb_ring(mp, hxgep->max_tdcs, hp); 827 tx_rings = hxgep->tx_rings->rings; 828 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_tx_msg: tx_rings $%p", 829 tx_rings)); 830 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_tx_msg: max_tdcs %d " 831 "ring_index %d", hxgep->max_tdcs, ring_index)); 832 833 if (hxge_start(hxgep, tx_rings[ring_index], mp)) { 834 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_send: failed " 835 "ring index %d", ring_index)); 836 return (B_FALSE); 837 } 838 839 HXGE_DEBUG_MSG((hxgep, TX_CTL, "<== hxge_send: ring index %d", 840 ring_index)); 841 return (B_TRUE); 842 } 843 844 /* 845 * hxge_m_tx() - send a chain of packets 846 */ 847 mblk_t * 848 hxge_m_tx(void *arg, mblk_t *mp) 849 { 850 p_hxge_t hxgep = (p_hxge_t)arg; 851 mblk_t *next; 852 mac_tx_hint_t hint; 853 854 if (!(hxgep->drv_state & STATE_HW_INITIALIZED)) { 855 HXGE_DEBUG_MSG((hxgep, DDI_CTL, 856 "==> hxge_m_tx: hardware not initialized")); 857 HXGE_DEBUG_MSG((hxgep, DDI_CTL, "<== hxge_m_tx")); 858 return (mp); 859 } 860 861 hint.hash = NULL; 862 hint.vid = 0; 863 hint.sap = 0; 864 865 while (mp != NULL) { 866 next = mp->b_next; 867 mp->b_next = NULL; 868 869 /* 870 * Until Nemo tx resource works, the mac driver 871 * does the load balancing based on TCP port, 872 * or CPU. For debugging, we use a system 873 * configurable parameter. 874 */ 875 if (!hxge_send(hxgep, mp, &hint)) { 876 mp->b_next = next; 877 break; 878 } 879 880 mp = next; 881 882 HXGE_DEBUG_MSG((NULL, TX_CTL, 883 "==> hxge_m_tx: (go back to loop) mp $%p next $%p", 884 mp, next)); 885 } 886 return (mp); 887 } 888 889 int 890 hxge_tx_lb_ring(p_mblk_t mp, uint32_t maxtdcs, p_mac_tx_hint_t hp) 891 { 892 uint8_t ring_index = 0; 893 uint8_t *tcp_port; 894 p_mblk_t nmp; 895 size_t mblk_len; 896 size_t iph_len; 897 size_t hdrs_size; 898 uint8_t hdrs_buf[sizeof (struct ether_header) + 899 IP_MAX_HDR_LENGTH + sizeof (uint32_t)]; 900 901 /* 902 * allocate space big enough to cover 903 * the max ip header length and the first 904 * 4 bytes of the TCP/IP header. 905 */ 906 boolean_t qos = B_FALSE; 907 908 HXGE_DEBUG_MSG((NULL, TX_CTL, "==> hxge_tx_lb_ring")); 909 910 if (hp->vid) { 911 qos = B_TRUE; 912 } 913 switch (hxge_tx_lb_policy) { 914 case HXGE_TX_LB_TCPUDP: /* default IPv4 TCP/UDP */ 915 default: 916 tcp_port = mp->b_rptr; 917 if (!hxge_no_tx_lb && !qos && 918 (ntohs(((p_ether_header_t)tcp_port)->ether_type) == 919 ETHERTYPE_IP)) { 920 nmp = mp; 921 mblk_len = MBLKL(nmp); 922 tcp_port = NULL; 923 if (mblk_len > sizeof (struct ether_header) + 924 sizeof (uint8_t)) { 925 tcp_port = nmp->b_rptr + 926 sizeof (struct ether_header); 927 mblk_len -= sizeof (struct ether_header); 928 iph_len = ((*tcp_port) & 0x0f) << 2; 929 if (mblk_len > (iph_len + sizeof (uint32_t))) { 930 tcp_port = nmp->b_rptr; 931 } else { 932 tcp_port = NULL; 933 } 934 } 935 if (tcp_port == NULL) { 936 hdrs_size = 0; 937 ((p_ether_header_t)hdrs_buf)->ether_type = 0; 938 while ((nmp) && (hdrs_size < 939 sizeof (hdrs_buf))) { 940 mblk_len = MBLKL(nmp); 941 if (mblk_len >= 942 (sizeof (hdrs_buf) - hdrs_size)) 943 mblk_len = sizeof (hdrs_buf) - 944 hdrs_size; 945 bcopy(nmp->b_rptr, 946 &hdrs_buf[hdrs_size], mblk_len); 947 hdrs_size += mblk_len; 948 nmp = nmp->b_cont; 949 } 950 tcp_port = hdrs_buf; 951 } 952 tcp_port += sizeof (ether_header_t); 953 if (!(tcp_port[6] & 0x3f) && !(tcp_port[7] & 0xff)) { 954 switch (tcp_port[9]) { 955 case IPPROTO_TCP: 956 case IPPROTO_UDP: 957 case IPPROTO_ESP: 958 tcp_port += ((*tcp_port) & 0x0f) << 2; 959 ring_index = ((tcp_port[0] ^ 960 tcp_port[1] ^ 961 tcp_port[2] ^ 962 tcp_port[3]) % maxtdcs); 963 break; 964 965 case IPPROTO_AH: 966 /* SPI starts at the 4th byte */ 967 tcp_port += ((*tcp_port) & 0x0f) << 2; 968 ring_index = ((tcp_port[4] ^ 969 tcp_port[5] ^ 970 tcp_port[6] ^ 971 tcp_port[7]) % maxtdcs); 972 break; 973 974 default: 975 ring_index = tcp_port[19] % maxtdcs; 976 break; 977 } 978 } else { /* fragmented packet */ 979 ring_index = tcp_port[19] % maxtdcs; 980 } 981 } else { 982 ring_index = mp->b_band % maxtdcs; 983 } 984 break; 985 986 case HXGE_TX_LB_HASH: 987 if (hp->hash) { 988 #if defined(__i386) 989 ring_index = ((uint32_t)(hp->hash) % maxtdcs); 990 #else 991 ring_index = ((uint64_t)(hp->hash) % maxtdcs); 992 #endif 993 } else { 994 ring_index = mp->b_band % maxtdcs; 995 } 996 break; 997 998 case HXGE_TX_LB_DEST_MAC: 999 /* Use destination MAC address */ 1000 tcp_port = mp->b_rptr; 1001 ring_index = tcp_port[5] % maxtdcs; 1002 break; 1003 } 1004 HXGE_DEBUG_MSG((NULL, TX_CTL, "<== hxge_tx_lb_ring")); 1005 return (ring_index); 1006 } 1007 1008 uint_t 1009 hxge_reschedule(caddr_t arg) 1010 { 1011 p_hxge_t hxgep; 1012 1013 hxgep = (p_hxge_t)arg; 1014 1015 HXGE_DEBUG_MSG((hxgep, TX_CTL, "==> hxge_reschedule")); 1016 1017 if (hxgep->hxge_mac_state == HXGE_MAC_STARTED && 1018 hxgep->resched_needed) { 1019 mac_tx_update(hxgep->mach); 1020 hxgep->resched_needed = B_FALSE; 1021 hxgep->resched_running = B_FALSE; 1022 } 1023 1024 HXGE_DEBUG_MSG((NULL, TX_CTL, "<== hxge_reschedule")); 1025 return (DDI_INTR_CLAIMED); 1026 } 1027