1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "bge_impl.h" 28 29 30 /* 31 * The transmit-side code uses an allocation process which is similar 32 * to some theme park roller-coaster rides, where riders sit in cars 33 * that can go individually, but work better in a train. 34 * 35 * 1) RESERVE a place - this doesn't refer to any specific car or 36 * seat, just that you will get a ride. The attempt to RESERVE a 37 * place can fail if all spaces in all cars are already committed. 38 * 39 * 2) Prepare yourself; this may take an arbitrary (but not unbounded) 40 * time, and you can back out at this stage, in which case you must 41 * give up (RENOUNCE) your place. 42 * 43 * 3) CLAIM your space - a specific car (the next sequentially 44 * numbered one) is allocated at this stage, and is guaranteed 45 * to be part of the next train to depart. Once you've done 46 * this, you can't back out, nor wait for any external event 47 * or resource. 48 * 49 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they 50 * all depart together as a single train! 51 * 52 * 5) At the end of the ride, you climb out of the car and RENOUNCE 53 * your right to it, so that it can be recycled for another rider. 54 * 55 * For each rider, these have to occur in this order, but the riders 56 * don't have to stay in the same order at each stage. In particular, 57 * they may overtake each other between RESERVING a place and CLAIMING 58 * it, or between CLAIMING and OCCUPYING a space. 59 * 60 * Once a car is CLAIMED, the train currently being assembled can't go 61 * without that car (this guarantees that the cars in a single train 62 * make up a consecutively-numbered set). Therefore, when any train 63 * leaves, we know there can't be any riders in transit between CLAIMING 64 * and OCCUPYING their cars. There can be some who have RESERVED but 65 * not yet CLAIMED their places. That's OK, though, because they'll go 66 * into the next train. 67 */ 68 69 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */ 70 71 /* 72 * ========== Send-side recycle routines ========== 73 */ 74 75 /* 76 * Recycle all the completed buffers in the specified send ring up to 77 * (but not including) the consumer index in the status block. 78 * 79 * This function must advance (srp->tc_next) AND adjust (srp->tx_free) 80 * to account for the packets it has recycled. 81 * 82 * This is a trivial version that just does that and nothing more, but 83 * it suffices while there's only one method for sending messages (by 84 * copying) and that method doesn't need any special per-buffer action 85 * for recycling. 86 */ 87 static void bge_recycle_ring(bge_t *bgep, send_ring_t *srp); 88 #pragma inline(bge_recycle_ring) 89 90 static void 91 bge_recycle_ring(bge_t *bgep, send_ring_t *srp) 92 { 93 sw_sbd_t *ssbdp; 94 bge_queue_item_t *buf_item; 95 bge_queue_item_t *buf_item_head; 96 bge_queue_item_t *buf_item_tail; 97 bge_queue_t *txbuf_queue; 98 uint64_t slot; 99 uint64_t n; 100 101 ASSERT(mutex_owned(srp->tc_lock)); 102 103 /* 104 * We're about to release one or more places :-) 105 * These ASSERTions check that our invariants still hold: 106 * there must always be at least one free place 107 * at this point, there must be at least one place NOT free 108 * we're not about to free more places than were claimed! 109 */ 110 ASSERT(srp->tx_free > 0); 111 ASSERT(srp->tx_free < srp->desc.nslots); 112 113 buf_item_head = buf_item_tail = NULL; 114 for (n = 0, slot = srp->tc_next; slot != *srp->cons_index_p; 115 slot = NEXT(slot, srp->desc.nslots)) { 116 ssbdp = &srp->sw_sbds[slot]; 117 ASSERT(ssbdp->pbuf != NULL); 118 buf_item = ssbdp->pbuf; 119 if (buf_item_head == NULL) 120 buf_item_head = buf_item_tail = buf_item; 121 else { 122 buf_item_tail->next = buf_item; 123 buf_item_tail = buf_item; 124 } 125 ssbdp->pbuf = NULL; 126 n++; 127 } 128 if (n == 0) 129 return; 130 131 /* 132 * Update recycle index and free tx BD number 133 */ 134 srp->tc_next = slot; 135 ASSERT(srp->tx_free + n <= srp->desc.nslots); 136 bge_atomic_renounce(&srp->tx_free, n); 137 138 /* 139 * Reset the watchdog count: to 0 if all buffers are 140 * now free, or to 1 if some are still outstanding. 141 * Note: non-synchonised access here means we may get 142 * the "wrong" answer, but only in a harmless fashion 143 * (i.e. we deactivate the watchdog because all buffers 144 * are apparently free, even though another thread may 145 * have claimed one before we leave here; in this case 146 * the watchdog will restart on the next send() call). 147 */ 148 bgep->watchdog = srp->tx_free == srp->desc.nslots ? 0 : 1; 149 150 /* 151 * Return tx buffers to buffer push queue 152 */ 153 txbuf_queue = srp->txbuf_push_queue; 154 mutex_enter(txbuf_queue->lock); 155 buf_item_tail->next = txbuf_queue->head; 156 txbuf_queue->head = buf_item_head; 157 txbuf_queue->count += n; 158 mutex_exit(txbuf_queue->lock); 159 160 /* 161 * Check if we need exchange the tx buffer push and pop queue 162 */ 163 if ((srp->txbuf_pop_queue->count < srp->tx_buffers_low) && 164 (srp->txbuf_pop_queue->count < txbuf_queue->count)) { 165 srp->txbuf_push_queue = srp->txbuf_pop_queue; 166 srp->txbuf_pop_queue = txbuf_queue; 167 } 168 169 if (srp->tx_flow != 0 || bgep->tx_resched_needed) 170 ddi_trigger_softintr(bgep->drain_id); 171 } 172 173 /* 174 * Recycle all returned slots in all rings. 175 * 176 * To give priority to low-numbered rings, whenever we have recycled any 177 * slots in any ring except 0, we restart scanning again from ring 0. 178 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the 179 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0: 180 * 181 * 0 found some - recycle them 182 * 1..2 none found 183 * 3 found some - recycle them and restart scan 184 * 0..9 none found 185 * 10 found some - recycle them and restart scan 186 * 0..2 none found 187 * 3 found some more - recycle them and restart scan 188 * 0 found some more - recycle them 189 * 0..9 none found 190 * 10 found some more - recycle them and restart scan 191 * 0 found some more - recycle them 192 * 1..15 none found 193 * 194 * The routine returns only when a complete scan has been performed 195 * without finding any slots to recycle. 196 * 197 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time 198 * constant and allows the compiler to optimise away the outer do-loop 199 * if only one send ring is being used. 200 */ 201 void bge_recycle(bge_t *bgep, bge_status_t *bsp); 202 #pragma no_inline(bge_recycle) 203 204 void 205 bge_recycle(bge_t *bgep, bge_status_t *bsp) 206 { 207 send_ring_t *srp; 208 uint64_t ring; 209 uint64_t tx_rings = bgep->chipid.tx_rings; 210 211 restart: 212 ring = 0; 213 srp = &bgep->send[ring]; 214 do { 215 /* 216 * For each ring, (srp->cons_index_p) points to the 217 * proper index within the status block (which has 218 * already been sync'd by the caller). 219 */ 220 ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring)); 221 222 if (*srp->cons_index_p == srp->tc_next) 223 continue; /* no slots to recycle */ 224 if (mutex_tryenter(srp->tc_lock) == 0) 225 continue; /* already in process */ 226 bge_recycle_ring(bgep, srp); 227 mutex_exit(srp->tc_lock); 228 229 /* 230 * Restart from ring 0, if we're not on ring 0 already. 231 * As H/W selects send BDs totally based on priority and 232 * available BDs on the higher priority ring are always 233 * selected first, driver should keep consistence with H/W 234 * and gives lower-numbered ring with higher priority. 235 */ 236 if (tx_rings > 1 && ring > 0) 237 goto restart; 238 239 /* 240 * Loop over all rings (if there *are* multiple rings) 241 */ 242 } while (++srp, ++ring < tx_rings); 243 } 244 245 246 /* 247 * ========== Send-side transmit routines ========== 248 */ 249 #define TCP_CKSUM_OFFSET 16 250 #define UDP_CKSUM_OFFSET 6 251 252 static void 253 bge_pseudo_cksum(uint8_t *buf) 254 { 255 uint32_t cksum; 256 uint16_t iphl; 257 uint16_t proto; 258 259 /* 260 * Point it to the ip header. 261 */ 262 buf += sizeof (struct ether_header); 263 264 /* 265 * Calculate the pseudo-header checksum. 266 */ 267 iphl = 4 * (buf[0] & 0xF); 268 cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl; 269 cksum += proto = buf[9]; 270 cksum += (((uint16_t)buf[12])<<8) + buf[13]; 271 cksum += (((uint16_t)buf[14])<<8) + buf[15]; 272 cksum += (((uint16_t)buf[16])<<8) + buf[17]; 273 cksum += (((uint16_t)buf[18])<<8) + buf[19]; 274 cksum = (cksum>>16) + (cksum & 0xFFFF); 275 cksum = (cksum>>16) + (cksum & 0xFFFF); 276 277 /* 278 * Point it to the TCP/UDP header, and 279 * update the checksum field. 280 */ 281 buf += iphl + ((proto == IPPROTO_TCP) ? 282 TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET); 283 284 /* 285 * A real possibility that pointer cast is a problem. 286 * Should be fixed when we know the code better. 287 * E_BAD_PTR_CAST_ALIGN is added to make it temporarily clean. 288 */ 289 *(uint16_t *)buf = htons((uint16_t)cksum); 290 } 291 292 static bge_queue_item_t * 293 bge_get_txbuf(bge_t *bgep, send_ring_t *srp) 294 { 295 bge_queue_item_t *txbuf_item; 296 bge_queue_t *txbuf_queue; 297 298 txbuf_queue = srp->txbuf_pop_queue; 299 mutex_enter(txbuf_queue->lock); 300 if (txbuf_queue->count == 0) { 301 mutex_exit(txbuf_queue->lock); 302 txbuf_queue = srp->txbuf_push_queue; 303 mutex_enter(txbuf_queue->lock); 304 if (txbuf_queue->count == 0) { 305 mutex_exit(txbuf_queue->lock); 306 /* Try to allocate more tx buffers */ 307 if (srp->tx_array < srp->tx_array_max) { 308 mutex_enter(srp->tx_lock); 309 txbuf_item = bge_alloc_txbuf_array(bgep, srp); 310 mutex_exit(srp->tx_lock); 311 } else 312 txbuf_item = NULL; 313 return (txbuf_item); 314 } 315 } 316 txbuf_item = txbuf_queue->head; 317 txbuf_queue->head = (bge_queue_item_t *)txbuf_item->next; 318 txbuf_queue->count--; 319 mutex_exit(txbuf_queue->lock); 320 txbuf_item->next = NULL; 321 322 return (txbuf_item); 323 } 324 325 static void bge_send_fill_txbd(send_ring_t *srp, send_pkt_t *pktp); 326 #pragma inline(bge_send_fill_txbd) 327 328 static void 329 bge_send_fill_txbd(send_ring_t *srp, send_pkt_t *pktp) 330 { 331 bge_sbd_t *hw_sbd_p; 332 sw_sbd_t *ssbdp; 333 bge_queue_item_t *txbuf_item; 334 sw_txbuf_t *txbuf; 335 uint64_t slot; 336 337 ASSERT(mutex_owned(srp->tx_lock)); 338 339 /* 340 * Go straight to claiming our already-reserved places 341 * on the train! 342 */ 343 ASSERT(pktp->txbuf_item != NULL); 344 txbuf_item = pktp->txbuf_item; 345 txbuf = txbuf_item->item; 346 slot = srp->tx_next; 347 ssbdp = &srp->sw_sbds[slot]; 348 hw_sbd_p = DMA_VPTR(ssbdp->desc); 349 hw_sbd_p->flags = 0; 350 ASSERT(txbuf->copy_len != 0); 351 (void) ddi_dma_sync(txbuf->buf.dma_hdl, 0, 352 txbuf->copy_len, DDI_DMA_SYNC_FORDEV); 353 ASSERT(ssbdp->pbuf == NULL); 354 ssbdp->pbuf = txbuf_item; 355 srp->tx_next = NEXT(slot, srp->desc.nslots); 356 pktp->txbuf_item = NULL; 357 358 /* 359 * Setting hardware send buffer descriptor 360 */ 361 hw_sbd_p->host_buf_addr = txbuf->buf.cookie.dmac_laddress; 362 hw_sbd_p->len = txbuf->copy_len; 363 if (pktp->vlan_tci != 0) { 364 hw_sbd_p->vlan_tci = pktp->vlan_tci; 365 hw_sbd_p->host_buf_addr += VLAN_TAGSZ; 366 hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG; 367 } 368 if (pktp->pflags & HCK_IPV4_HDRCKSUM) 369 hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM; 370 if (pktp->pflags & HCK_FULLCKSUM) 371 hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM; 372 hw_sbd_p->flags |= SBD_FLAG_PACKET_END; 373 } 374 375 /* 376 * Send a message by copying it into a preallocated (and premapped) buffer 377 */ 378 static void bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp); 379 #pragma inline(bge_send_copy) 380 381 static void 382 bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp) 383 { 384 mblk_t *bp; 385 uint32_t mblen; 386 char *pbuf; 387 388 txbuf->copy_len = 0; 389 pbuf = DMA_VPTR(txbuf->buf); 390 for (bp = mp; bp != NULL; bp = bp->b_cont) { 391 if ((mblen = MBLKL(bp)) == 0) 392 continue; 393 ASSERT(txbuf->copy_len + mblen <= 394 bgep->chipid.snd_buff_size); 395 bcopy(bp->b_rptr, pbuf, mblen); 396 pbuf += mblen; 397 txbuf->copy_len += mblen; 398 } 399 } 400 401 /* 402 * Fill the Tx buffer descriptors and trigger the h/w transmission 403 */ 404 static void 405 bge_send_serial(bge_t *bgep, send_ring_t *srp) 406 { 407 send_pkt_t *pktp; 408 uint64_t txfill_next; 409 uint32_t count; 410 uint32_t tx_next; 411 sw_sbd_t *ssbdp; 412 bge_status_t *bsp; 413 414 /* 415 * Try to hold the tx lock: 416 * If we are in an interrupt context, use mutex_enter() to 417 * ensure quick response for tx in interrupt context; 418 * Otherwise, use mutex_tryenter() to serialize this h/w tx 419 * BD filling and transmission triggering task. 420 */ 421 if (servicing_interrupt() != 0) 422 mutex_enter(srp->tx_lock); 423 else if (mutex_tryenter(srp->tx_lock) == 0) 424 return; /* already in process */ 425 426 bsp = DMA_VPTR(bgep->status_block); 427 txfill_next = srp->txfill_next; 428 start_tx: 429 tx_next = srp->tx_next; 430 ssbdp = &srp->sw_sbds[tx_next]; 431 for (count = 0; count < bgep->param_drain_max; ++count) { 432 pktp = &srp->pktp[txfill_next]; 433 if (!pktp->tx_ready) { 434 if (count == 0) 435 srp->tx_block++; 436 break; 437 } 438 439 /* 440 * If there are no enough BDs: try to recycle more 441 */ 442 if (srp->tx_free <= 1) 443 bge_recycle(bgep, bsp); 444 445 /* 446 * Reserved required BDs: 1 is enough 447 */ 448 if (!bge_atomic_reserve(&srp->tx_free, 1)) { 449 srp->tx_nobd++; 450 break; 451 } 452 453 /* 454 * Filling the tx BD 455 */ 456 bge_send_fill_txbd(srp, pktp); 457 txfill_next = NEXT(txfill_next, BGE_SEND_BUF_MAX); 458 pktp->tx_ready = B_FALSE; 459 } 460 461 /* 462 * Trigger h/w to start transmission. 463 */ 464 if (count != 0) { 465 bge_atomic_sub64(&srp->tx_flow, count); 466 if (tx_next + count > srp->desc.nslots) { 467 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 468 (srp->desc.nslots - tx_next) * sizeof (bge_sbd_t), 469 DDI_DMA_SYNC_FORDEV); 470 count -= srp->desc.nslots - tx_next; 471 ssbdp = &srp->sw_sbds[0]; 472 } 473 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 474 count*sizeof (bge_sbd_t), DDI_DMA_SYNC_FORDEV); 475 bge_mbx_put(bgep, srp->chip_mbx_reg, srp->tx_next); 476 srp->txfill_next = txfill_next; 477 bgep->watchdog++; 478 if (srp->tx_flow != 0 && srp->tx_free > 1) 479 goto start_tx; 480 } 481 482 mutex_exit(srp->tx_lock); 483 } 484 485 mblk_t * 486 bge_ring_tx(void *arg, mblk_t *mp) 487 { 488 send_ring_t *srp = arg; 489 bge_t *bgep = srp->bgep; 490 struct ether_vlan_header *ehp; 491 bge_queue_item_t *txbuf_item; 492 sw_txbuf_t *txbuf; 493 send_pkt_t *pktp; 494 uint64_t pkt_slot; 495 uint16_t vlan_tci; 496 uint32_t pflags; 497 char *pbuf; 498 499 ASSERT(mp->b_next == NULL); 500 501 /* 502 * Get a s/w tx buffer first 503 */ 504 txbuf_item = bge_get_txbuf(bgep, srp); 505 if (txbuf_item == NULL) { 506 /* no tx buffer available */ 507 srp->tx_nobuf++; 508 bgep->tx_resched_needed = B_TRUE; 509 bge_send_serial(bgep, srp); 510 return (mp); 511 } 512 513 /* 514 * Copy all mp fragments to the pkt buffer 515 */ 516 txbuf = txbuf_item->item; 517 bge_send_copy(bgep, txbuf, mp); 518 519 /* 520 * Determine if the packet is VLAN tagged. 521 */ 522 ASSERT(txbuf->copy_len >= sizeof (struct ether_header)); 523 pbuf = DMA_VPTR(txbuf->buf); 524 525 ehp = (void *)pbuf; 526 if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) { 527 /* Strip the vlan tag */ 528 vlan_tci = ntohs(ehp->ether_tci); 529 pbuf = memmove(pbuf + VLAN_TAGSZ, pbuf, 2 * ETHERADDRL); 530 txbuf->copy_len -= VLAN_TAGSZ; 531 } else 532 vlan_tci = 0; 533 534 /* 535 * Retrieve checksum offloading info. 536 */ 537 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 538 539 /* 540 * Calculate pseudo checksum if needed. 541 */ 542 if ((pflags & HCK_FULLCKSUM) && 543 (bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM)) 544 bge_pseudo_cksum((uint8_t *)pbuf); 545 546 /* 547 * Packet buffer is ready to send: get and fill pkt info 548 */ 549 pkt_slot = bge_atomic_next(&srp->txpkt_next, BGE_SEND_BUF_MAX); 550 pktp = &srp->pktp[pkt_slot]; 551 ASSERT(pktp->txbuf_item == NULL); 552 pktp->txbuf_item = txbuf_item; 553 pktp->vlan_tci = vlan_tci; 554 pktp->pflags = pflags; 555 atomic_inc_64(&srp->tx_flow); 556 ASSERT(pktp->tx_ready == B_FALSE); 557 pktp->tx_ready = B_TRUE; 558 559 /* 560 * Filling the h/w bd and trigger the h/w to start transmission 561 */ 562 bge_send_serial(bgep, srp); 563 564 srp->pushed_bytes += MBLKL(mp); 565 566 /* 567 * We've copied the contents, the message can be freed right away 568 */ 569 freemsg(mp); 570 return (NULL); 571 } 572 573 static mblk_t * 574 bge_send(bge_t *bgep, mblk_t *mp) 575 { 576 send_ring_t *ring; 577 578 ring = &bgep->send[0]; /* ring 0 */ 579 580 return (bge_ring_tx(ring, mp)); 581 } 582 583 uint_t 584 bge_send_drain(caddr_t arg) 585 { 586 uint_t ring = 0; /* use ring 0 */ 587 bge_t *bgep; 588 send_ring_t *srp; 589 590 bgep = (void *)arg; 591 BGE_TRACE(("bge_send_drain($%p)", (void *)bgep)); 592 593 srp = &bgep->send[ring]; 594 bge_send_serial(bgep, srp); 595 596 if (bgep->tx_resched_needed && 597 (srp->tx_flow < srp->tx_buffers_low) && 598 (bgep->bge_mac_state == BGE_MAC_STARTED)) { 599 mac_tx_update(bgep->mh); 600 bgep->tx_resched_needed = B_FALSE; 601 bgep->tx_resched++; 602 } 603 604 return (DDI_INTR_CLAIMED); 605 } 606 607 /* 608 * bge_m_tx() - send a chain of packets 609 */ 610 mblk_t * 611 bge_m_tx(void *arg, mblk_t *mp) 612 { 613 bge_t *bgep = arg; /* private device info */ 614 mblk_t *next; 615 616 BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp)); 617 618 ASSERT(mp != NULL); 619 ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED); 620 621 rw_enter(bgep->errlock, RW_READER); 622 if (bgep->bge_chip_state != BGE_CHIP_RUNNING) { 623 BGE_DEBUG(("bge_m_tx: chip not running")); 624 freemsgchain(mp); 625 mp = NULL; 626 } 627 628 while (mp != NULL) { 629 next = mp->b_next; 630 mp->b_next = NULL; 631 632 if ((mp = bge_send(bgep, mp)) != NULL) { 633 mp->b_next = next; 634 break; 635 } 636 637 mp = next; 638 } 639 rw_exit(bgep->errlock); 640 641 return (mp); 642 } 643