1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "bge_impl.h" 30 31 32 /* 33 * The transmit-side code uses an allocation process which is similar 34 * to some theme park roller-coaster rides, where riders sit in cars 35 * that can go individually, but work better in a train. 36 * 37 * 1) RESERVE a place - this doesn't refer to any specific car or 38 * seat, just that you will get a ride. The attempt to RESERVE a 39 * place can fail if all spaces in all cars are already committed. 40 * 41 * 2) Prepare yourself; this may take an arbitrary (but not unbounded) 42 * time, and you can back out at this stage, in which case you must 43 * give up (RENOUNCE) your place. 44 * 45 * 3) CLAIM your space - a specific car (the next sequentially 46 * numbered one) is allocated at this stage, and is guaranteed 47 * to be part of the next train to depart. Once you've done 48 * this, you can't back out, nor wait for any external event 49 * or resource. 50 * 51 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they 52 * all depart together as a single train! 53 * 54 * 5) At the end of the ride, you climb out of the car and RENOUNCE 55 * your right to it, so that it can be recycled for another rider. 56 * 57 * For each rider, these have to occur in this order, but the riders 58 * don't have to stay in the same order at each stage. In particular, 59 * they may overtake each other between RESERVING a place and CLAIMING 60 * it, or between CLAIMING and OCCUPYING a space. 61 * 62 * Once a car is CLAIMED, the train currently being assembled can't go 63 * without that car (this guarantees that the cars in a single train 64 * make up a consecutively-numbered set). Therefore, when any train 65 * leaves, we know there can't be any riders in transit between CLAIMING 66 * and OCCUPYING their cars. There can be some who have RESERVED but 67 * not yet CLAIMED their places. That's OK, though, because they'll go 68 * into the next train. 69 */ 70 71 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */ 72 73 /* 74 * ========== Send-side recycle routines ========== 75 */ 76 77 /* 78 * Recycle all the completed buffers in the specified send ring up to 79 * (but not including) the consumer index in the status block. 80 * 81 * This function must advance (srp->tc_next) AND adjust (srp->tx_free) 82 * to account for the packets it has recycled. 83 * 84 * This is a trivial version that just does that and nothing more, but 85 * it suffices while there's only one method for sending messages (by 86 * copying) and that method doesn't need any special per-buffer action 87 * for recycling. 88 */ 89 static void bge_recycle_ring(bge_t *bgep, send_ring_t *srp); 90 #pragma inline(bge_recycle_ring) 91 92 static void 93 bge_recycle_ring(bge_t *bgep, send_ring_t *srp) 94 { 95 sw_sbd_t *ssbdp; 96 bge_queue_item_t *buf_item; 97 bge_queue_item_t *buf_item_head; 98 bge_queue_item_t *buf_item_tail; 99 bge_queue_t *txbuf_queue; 100 uint64_t slot; 101 uint64_t n; 102 103 ASSERT(mutex_owned(srp->tc_lock)); 104 105 /* 106 * We're about to release one or more places :-) 107 * These ASSERTions check that our invariants still hold: 108 * there must always be at least one free place 109 * at this point, there must be at least one place NOT free 110 * we're not about to free more places than were claimed! 111 */ 112 ASSERT(srp->tx_free > 0); 113 ASSERT(srp->tx_free < srp->desc.nslots); 114 115 buf_item_head = buf_item_tail = NULL; 116 for (n = 0, slot = srp->tc_next; slot != *srp->cons_index_p; 117 slot = NEXT(slot, srp->desc.nslots)) { 118 ssbdp = &srp->sw_sbds[slot]; 119 ASSERT(ssbdp->pbuf != NULL); 120 buf_item = ssbdp->pbuf; 121 if (buf_item_head == NULL) 122 buf_item_head = buf_item_tail = buf_item; 123 else { 124 buf_item_tail->next = buf_item; 125 buf_item_tail = buf_item; 126 } 127 ssbdp->pbuf = NULL; 128 n++; 129 } 130 if (n == 0) 131 return; 132 133 /* 134 * Update recycle index and free tx BD number 135 */ 136 srp->tc_next = slot; 137 ASSERT(srp->tx_free + n <= srp->desc.nslots); 138 bge_atomic_renounce(&srp->tx_free, n); 139 140 /* 141 * Reset the watchdog count: to 0 if all buffers are 142 * now free, or to 1 if some are still outstanding. 143 * Note: non-synchonised access here means we may get 144 * the "wrong" answer, but only in a harmless fashion 145 * (i.e. we deactivate the watchdog because all buffers 146 * are apparently free, even though another thread may 147 * have claimed one before we leave here; in this case 148 * the watchdog will restart on the next send() call). 149 */ 150 bgep->watchdog = srp->tx_free == srp->desc.nslots ? 0 : 1; 151 152 /* 153 * Return tx buffers to buffer push queue 154 */ 155 txbuf_queue = srp->txbuf_push_queue; 156 mutex_enter(txbuf_queue->lock); 157 buf_item_tail->next = txbuf_queue->head; 158 txbuf_queue->head = buf_item_head; 159 txbuf_queue->count += n; 160 mutex_exit(txbuf_queue->lock); 161 162 /* 163 * Check if we need exchange the tx buffer push and pop queue 164 */ 165 if ((srp->txbuf_pop_queue->count < srp->tx_buffers_low) && 166 (srp->txbuf_pop_queue->count < txbuf_queue->count)) { 167 srp->txbuf_push_queue = srp->txbuf_pop_queue; 168 srp->txbuf_pop_queue = txbuf_queue; 169 } 170 171 if (bgep->tx_resched_needed) 172 ddi_trigger_softintr(bgep->drain_id); 173 } 174 175 /* 176 * Recycle all returned slots in all rings. 177 * 178 * To give priority to low-numbered rings, whenever we have recycled any 179 * slots in any ring except 0, we restart scanning again from ring 0. 180 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the 181 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0: 182 * 183 * 0 found some - recycle them 184 * 1..2 none found 185 * 3 found some - recycle them and restart scan 186 * 0..9 none found 187 * 10 found some - recycle them and restart scan 188 * 0..2 none found 189 * 3 found some more - recycle them and restart scan 190 * 0 found some more - recycle them 191 * 0..9 none found 192 * 10 found some more - recycle them and restart scan 193 * 0 found some more - recycle them 194 * 1..15 none found 195 * 196 * The routine returns only when a complete scan has been performed 197 * without finding any slots to recycle. 198 * 199 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time 200 * constant and allows the compiler to optimise away the outer do-loop 201 * if only one send ring is being used. 202 */ 203 void bge_recycle(bge_t *bgep, bge_status_t *bsp); 204 #pragma no_inline(bge_recycle) 205 206 void 207 bge_recycle(bge_t *bgep, bge_status_t *bsp) 208 { 209 send_ring_t *srp; 210 uint64_t ring; 211 uint64_t tx_rings = bgep->chipid.tx_rings; 212 213 restart: 214 ring = 0; 215 srp = &bgep->send[ring]; 216 do { 217 /* 218 * For each ring, (srp->cons_index_p) points to the 219 * proper index within the status block (which has 220 * already been sync'd by the caller). 221 */ 222 ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring)); 223 224 if (*srp->cons_index_p == srp->tc_next) 225 continue; /* no slots to recycle */ 226 if (mutex_tryenter(srp->tc_lock) == 0) 227 continue; /* already in process */ 228 bge_recycle_ring(bgep, srp); 229 mutex_exit(srp->tc_lock); 230 231 /* 232 * Restart from ring 0, if we're not on ring 0 already. 233 * As H/W selects send BDs totally based on priority and 234 * available BDs on the higher priority ring are always 235 * selected first, driver should keep consistence with H/W 236 * and gives lower-numbered ring with higher priority. 237 */ 238 if (tx_rings > 1 && ring > 0) 239 goto restart; 240 241 /* 242 * Loop over all rings (if there *are* multiple rings) 243 */ 244 } while (++srp, ++ring < tx_rings); 245 } 246 247 248 /* 249 * ========== Send-side transmit routines ========== 250 */ 251 #define TCP_CKSUM_OFFSET 16 252 #define UDP_CKSUM_OFFSET 6 253 254 static void 255 bge_pseudo_cksum(uint8_t *buf) 256 { 257 uint32_t cksum; 258 uint16_t iphl; 259 uint16_t proto; 260 261 /* 262 * Point it to the ip header. 263 */ 264 buf += sizeof (struct ether_header); 265 266 /* 267 * Calculate the pseudo-header checksum. 268 */ 269 iphl = 4 * (buf[0] & 0xF); 270 cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl; 271 cksum += proto = buf[9]; 272 cksum += (((uint16_t)buf[12])<<8) + buf[13]; 273 cksum += (((uint16_t)buf[14])<<8) + buf[15]; 274 cksum += (((uint16_t)buf[16])<<8) + buf[17]; 275 cksum += (((uint16_t)buf[18])<<8) + buf[19]; 276 cksum = (cksum>>16) + (cksum & 0xFFFF); 277 cksum = (cksum>>16) + (cksum & 0xFFFF); 278 279 /* 280 * Point it to the TCP/UDP header, and 281 * update the checksum field. 282 */ 283 buf += iphl + ((proto == IPPROTO_TCP) ? 284 TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET); 285 286 *(uint16_t *)buf = htons((uint16_t)cksum); 287 } 288 289 static bge_queue_item_t * 290 bge_get_txbuf(bge_t *bgep, send_ring_t *srp) 291 { 292 bge_queue_item_t *txbuf_item; 293 bge_queue_t *txbuf_queue; 294 295 txbuf_queue = srp->txbuf_pop_queue; 296 mutex_enter(txbuf_queue->lock); 297 if (txbuf_queue->count == 0) { 298 mutex_exit(txbuf_queue->lock); 299 txbuf_queue = srp->txbuf_push_queue; 300 mutex_enter(txbuf_queue->lock); 301 if (txbuf_queue->count == 0) { 302 mutex_exit(txbuf_queue->lock); 303 /* Try to allocate more tx buffers */ 304 if (srp->tx_array < srp->tx_array_max) { 305 mutex_enter(srp->tx_lock); 306 txbuf_item = bge_alloc_txbuf_array(bgep, srp); 307 mutex_exit(srp->tx_lock); 308 } else 309 txbuf_item = NULL; 310 return (txbuf_item); 311 } 312 } 313 txbuf_item = txbuf_queue->head; 314 txbuf_queue->head = (bge_queue_item_t *)txbuf_item->next; 315 txbuf_queue->count--; 316 mutex_exit(txbuf_queue->lock); 317 txbuf_item->next = NULL; 318 319 return (txbuf_item); 320 } 321 322 static void bge_send_fill_txbd(send_ring_t *srp, send_pkt_t *pktp); 323 #pragma inline(bge_send_fill_txbd) 324 325 static void 326 bge_send_fill_txbd(send_ring_t *srp, send_pkt_t *pktp) 327 { 328 bge_sbd_t *hw_sbd_p; 329 sw_sbd_t *ssbdp; 330 bge_queue_item_t *txbuf_item; 331 sw_txbuf_t *txbuf; 332 uint64_t slot; 333 334 ASSERT(mutex_owned(srp->tx_lock)); 335 336 /* 337 * Go straight to claiming our already-reserved places 338 * on the train! 339 */ 340 ASSERT(pktp->txbuf_item != NULL); 341 txbuf_item = pktp->txbuf_item; 342 txbuf = txbuf_item->item; 343 slot = srp->tx_next; 344 ssbdp = &srp->sw_sbds[slot]; 345 hw_sbd_p = DMA_VPTR(ssbdp->desc); 346 hw_sbd_p->flags = 0; 347 ASSERT(txbuf->copy_len != 0); 348 (void) ddi_dma_sync(txbuf->buf.dma_hdl, 0, 349 txbuf->copy_len, DDI_DMA_SYNC_FORDEV); 350 ASSERT(ssbdp->pbuf == NULL); 351 ssbdp->pbuf = txbuf_item; 352 srp->tx_next = NEXT(slot, srp->desc.nslots); 353 pktp->txbuf_item = NULL; 354 355 /* 356 * Setting hardware send buffer descriptor 357 */ 358 hw_sbd_p->host_buf_addr = txbuf->buf.cookie.dmac_laddress; 359 hw_sbd_p->len = txbuf->copy_len; 360 if (pktp->vlan_tci != 0) { 361 hw_sbd_p->vlan_tci = pktp->vlan_tci; 362 hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG; 363 } 364 if (pktp->pflags & HCK_IPV4_HDRCKSUM) 365 hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM; 366 if (pktp->pflags & HCK_FULLCKSUM) 367 hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM; 368 hw_sbd_p->flags |= SBD_FLAG_PACKET_END; 369 } 370 371 /* 372 * Send a message by copying it into a preallocated (and premapped) buffer 373 */ 374 static void bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp, 375 uint16_t tci); 376 #pragma inline(bge_send_copy) 377 378 static void 379 bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp, uint16_t tci) 380 { 381 mblk_t *bp; 382 uint32_t mblen; 383 char *pbuf; 384 385 txbuf->copy_len = 0; 386 pbuf = DMA_VPTR(txbuf->buf); 387 bp = mp; 388 if (tci != 0) { 389 mblen = MBLKL(bp); 390 ASSERT(mblen >= 2 * ETHERADDRL + VLAN_TAGSZ); 391 bcopy(bp->b_rptr, pbuf, 2 * ETHERADDRL); 392 pbuf += 2 * ETHERADDRL; 393 txbuf->copy_len += 2 * ETHERADDRL; 394 mblen -= 2 * ETHERADDRL + VLAN_TAGSZ; 395 if ((txbuf->copy_len += mblen) <= bgep->chipid.ethmax_size) { 396 bcopy(bp->b_wptr - mblen, pbuf, mblen); 397 pbuf += mblen; 398 } 399 bp = bp->b_cont; 400 } 401 for (; bp != NULL; bp = bp->b_cont) { 402 if ((mblen = MBLKL(bp)) == 0) 403 continue; 404 if ((txbuf->copy_len += mblen) <= bgep->chipid.ethmax_size) { 405 bcopy(bp->b_rptr, pbuf, mblen); 406 pbuf += mblen; 407 } 408 } 409 } 410 411 /* 412 * Fill the Tx buffer descriptors and trigger the h/w transmission 413 */ 414 static void 415 bge_send_serial(bge_t *bgep, send_ring_t *srp) 416 { 417 send_pkt_t *pktp; 418 uint64_t txfill_next; 419 uint32_t count; 420 uint32_t tx_next; 421 sw_sbd_t *ssbdp; 422 bge_status_t *bsp; 423 424 /* 425 * Try to hold the tx lock: 426 * If we are in an interrupt context, use mutex_enter() to 427 * ensure quick response for tx in interrupt context; 428 * Otherwise, use mutex_tryenter() to serialize this h/w tx 429 * BD filling and transmission triggering task. 430 */ 431 if (servicing_interrupt() != 0) 432 mutex_enter(srp->tx_lock); 433 else if (mutex_tryenter(srp->tx_lock) == 0) 434 return; /* already in process */ 435 436 bsp = DMA_VPTR(bgep->status_block); 437 txfill_next = srp->txfill_next; 438 tx_next = srp->tx_next; 439 ssbdp = &srp->sw_sbds[tx_next]; 440 for (count = 0; count < bgep->param_drain_max; ++count) { 441 pktp = &srp->pktp[txfill_next]; 442 if (!pktp->tx_ready) { 443 if (count == 0) 444 srp->tx_block++; 445 break; 446 } 447 448 /* 449 * If there are no enough BDs: try to recycle more 450 */ 451 if (srp->tx_free <= 1) 452 bge_recycle(bgep, bsp); 453 454 /* 455 * Reserved required BDs: 1 is enough 456 */ 457 if (!bge_atomic_reserve(&srp->tx_free, 1)) { 458 srp->tx_nobd++; 459 break; 460 } 461 462 /* 463 * Filling the tx BD 464 */ 465 bge_send_fill_txbd(srp, pktp); 466 txfill_next = NEXT(txfill_next, BGE_SEND_BUF_MAX); 467 pktp->tx_ready = B_FALSE; 468 } 469 470 /* 471 * Trigger h/w to start transmission. 472 */ 473 if (count != 0) { 474 bge_atomic_sub64(&srp->tx_flow, count); 475 if (tx_next + count > srp->desc.nslots) { 476 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 477 (srp->desc.nslots - tx_next) * sizeof (bge_sbd_t), 478 DDI_DMA_SYNC_FORDEV); 479 count -= srp->desc.nslots - tx_next; 480 ssbdp = &srp->sw_sbds[0]; 481 } 482 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 483 count*sizeof (bge_sbd_t), DDI_DMA_SYNC_FORDEV); 484 bge_mbx_put(bgep, srp->chip_mbx_reg, srp->tx_next); 485 srp->txfill_next = txfill_next; 486 bgep->watchdog++; 487 } 488 489 mutex_exit(srp->tx_lock); 490 } 491 492 static boolean_t 493 bge_send(bge_t *bgep, mblk_t *mp) 494 { 495 uint_t ring = 0; /* use ring 0 */ 496 send_ring_t *srp; 497 struct ether_vlan_header *ehp; 498 bge_queue_item_t *txbuf_item; 499 sw_txbuf_t *txbuf; 500 send_pkt_t *pktp; 501 uint64_t pkt_slot; 502 uint16_t vlan_tci; 503 uint32_t pflags; 504 505 ASSERT(mp->b_next == NULL); 506 srp = &bgep->send[ring]; 507 508 /* 509 * Get a s/w tx buffer first 510 */ 511 txbuf_item = bge_get_txbuf(bgep, srp); 512 if (txbuf_item == NULL) { 513 /* no tx buffer available */ 514 srp->tx_nobuf++; 515 bgep->tx_resched_needed = B_TRUE; 516 bge_send_serial(bgep, srp); 517 return (B_FALSE); 518 } 519 520 /* 521 * Determine if the packet is VLAN tagged. 522 */ 523 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 524 ehp = (struct ether_vlan_header *)mp->b_rptr; 525 if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) 526 vlan_tci = ntohs(ehp->ether_tci); 527 else 528 vlan_tci = 0; 529 530 /* 531 * Copy all mp fragments to the pkt buffer 532 */ 533 txbuf = txbuf_item->item; 534 bge_send_copy(bgep, txbuf, mp, vlan_tci); 535 ASSERT(txbuf->copy_len <= bgep->chipid.ethmax_size); 536 537 /* 538 * Retrieve checksum offloading info. 539 */ 540 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 541 542 /* 543 * Calculate pseudo checksum if needed. 544 */ 545 if ((pflags & HCK_FULLCKSUM) && 546 (bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM)) 547 bge_pseudo_cksum((uint8_t *)DMA_VPTR(txbuf->buf)); 548 549 /* 550 * Packet buffer is ready to send: get and fill pkt info 551 */ 552 pkt_slot = bge_atomic_next(&srp->txpkt_next, BGE_SEND_BUF_MAX); 553 pktp = &srp->pktp[pkt_slot]; 554 ASSERT(pktp->txbuf_item == NULL); 555 pktp->txbuf_item = txbuf_item; 556 pktp->vlan_tci = vlan_tci; 557 pktp->pflags = pflags; 558 atomic_inc_64(&srp->tx_flow); 559 ASSERT(pktp->tx_ready == B_FALSE); 560 pktp->tx_ready = B_TRUE; 561 562 /* 563 * Filling the h/w bd and trigger the h/w to start transmission 564 */ 565 bge_send_serial(bgep, srp); 566 567 /* 568 * We've copied the contents, the message can be freed right away 569 */ 570 freemsg(mp); 571 572 return (B_TRUE); 573 } 574 575 uint_t 576 bge_send_drain(caddr_t arg) 577 { 578 uint_t ring = 0; /* use ring 0 */ 579 bge_t *bgep; 580 send_ring_t *srp; 581 582 bgep = (bge_t *)arg; 583 BGE_TRACE(("bge_send_drain($%p)", (void *)bgep)); 584 585 srp = &bgep->send[ring]; 586 bge_send_serial(bgep, srp); 587 588 if (bgep->tx_resched_needed && 589 (srp->tx_flow < srp->tx_buffers_low) && 590 (bgep->bge_mac_state == BGE_MAC_STARTED)) { 591 mac_tx_update(bgep->mh); 592 bgep->tx_resched_needed = B_FALSE; 593 bgep->tx_resched++; 594 } 595 596 return (DDI_INTR_CLAIMED); 597 } 598 599 /* 600 * bge_m_tx() - send a chain of packets 601 */ 602 mblk_t * 603 bge_m_tx(void *arg, mblk_t *mp) 604 { 605 bge_t *bgep = arg; /* private device info */ 606 mblk_t *next; 607 608 BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp)); 609 610 ASSERT(mp != NULL); 611 ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED); 612 613 rw_enter(bgep->errlock, RW_READER); 614 if (bgep->bge_chip_state != BGE_CHIP_RUNNING) { 615 BGE_DEBUG(("bge_m_tx: chip not running")); 616 freemsgchain(mp); 617 mp = NULL; 618 } 619 620 while (mp != NULL) { 621 next = mp->b_next; 622 mp->b_next = NULL; 623 624 if (!bge_send(bgep, mp)) { 625 mp->b_next = next; 626 break; 627 } 628 629 mp = next; 630 } 631 rw_exit(bgep->errlock); 632 633 return (mp); 634 } 635