1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include "bge_impl.h" 27 28 29 /* 30 * The transmit-side code uses an allocation process which is similar 31 * to some theme park roller-coaster rides, where riders sit in cars 32 * that can go individually, but work better in a train. 33 * 34 * 1) RESERVE a place - this doesn't refer to any specific car or 35 * seat, just that you will get a ride. The attempt to RESERVE a 36 * place can fail if all spaces in all cars are already committed. 37 * 38 * 2) Prepare yourself; this may take an arbitrary (but not unbounded) 39 * time, and you can back out at this stage, in which case you must 40 * give up (RENOUNCE) your place. 41 * 42 * 3) CLAIM your space - a specific car (the next sequentially 43 * numbered one) is allocated at this stage, and is guaranteed 44 * to be part of the next train to depart. Once you've done 45 * this, you can't back out, nor wait for any external event 46 * or resource. 47 * 48 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they 49 * all depart together as a single train! 50 * 51 * 5) At the end of the ride, you climb out of the car and RENOUNCE 52 * your right to it, so that it can be recycled for another rider. 53 * 54 * For each rider, these have to occur in this order, but the riders 55 * don't have to stay in the same order at each stage. In particular, 56 * they may overtake each other between RESERVING a place and CLAIMING 57 * it, or between CLAIMING and OCCUPYING a space. 58 * 59 * Once a car is CLAIMED, the train currently being assembled can't go 60 * without that car (this guarantees that the cars in a single train 61 * make up a consecutively-numbered set). Therefore, when any train 62 * leaves, we know there can't be any riders in transit between CLAIMING 63 * and OCCUPYING their cars. There can be some who have RESERVED but 64 * not yet CLAIMED their places. That's OK, though, because they'll go 65 * into the next train. 66 */ 67 68 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */ 69 70 /* 71 * ========== Send-side recycle routines ========== 72 */ 73 74 /* 75 * Recycle all the completed buffers in the specified send ring up to 76 * (but not including) the consumer index in the status block. 77 * 78 * This function must advance (srp->tc_next) AND adjust (srp->tx_free) 79 * to account for the packets it has recycled. 80 * 81 * This is a trivial version that just does that and nothing more, but 82 * it suffices while there's only one method for sending messages (by 83 * copying) and that method doesn't need any special per-buffer action 84 * for recycling. 85 */ 86 static boolean_t bge_recycle_ring(bge_t *bgep, send_ring_t *srp); 87 #pragma inline(bge_recycle_ring) 88 89 static boolean_t 90 bge_recycle_ring(bge_t *bgep, send_ring_t *srp) 91 { 92 sw_sbd_t *ssbdp; 93 bge_queue_item_t *buf_item; 94 bge_queue_item_t *buf_item_head; 95 bge_queue_item_t *buf_item_tail; 96 bge_queue_t *txbuf_queue; 97 uint64_t slot; 98 uint64_t n; 99 100 ASSERT(mutex_owned(srp->tc_lock)); 101 102 /* 103 * We're about to release one or more places :-) 104 * These ASSERTions check that our invariants still hold: 105 * there must always be at least one free place 106 * at this point, there must be at least one place NOT free 107 * we're not about to free more places than were claimed! 108 */ 109 ASSERT(srp->tx_free <= srp->desc.nslots); 110 111 buf_item_head = buf_item_tail = NULL; 112 for (n = 0, slot = srp->tc_next; slot != *srp->cons_index_p; 113 slot = NEXT(slot, srp->desc.nslots)) { 114 ssbdp = &srp->sw_sbds[slot]; 115 ASSERT(ssbdp->pbuf != NULL); 116 buf_item = ssbdp->pbuf; 117 if (buf_item_head == NULL) 118 buf_item_head = buf_item_tail = buf_item; 119 else { 120 buf_item_tail->next = buf_item; 121 buf_item_tail = buf_item; 122 } 123 ssbdp->pbuf = NULL; 124 n++; 125 } 126 if (n == 0) 127 return (B_FALSE); 128 129 /* 130 * Reset the watchdog count: to 0 if all buffers are 131 * now free, or to 1 if some are still outstanding. 132 * Note: non-synchonised access here means we may get 133 * the "wrong" answer, but only in a harmless fashion 134 * (i.e. we deactivate the watchdog because all buffers 135 * are apparently free, even though another thread may 136 * have claimed one before we leave here; in this case 137 * the watchdog will restart on the next send() call). 138 */ 139 bgep->watchdog = (slot == srp->tx_next) ? 0 : 1; 140 141 /* 142 * Update recycle index and free tx BD number 143 */ 144 srp->tc_next = slot; 145 ASSERT(srp->tx_free + n <= srp->desc.nslots); 146 bge_atomic_renounce(&srp->tx_free, n); 147 148 /* 149 * Return tx buffers to buffer push queue 150 */ 151 txbuf_queue = srp->txbuf_push_queue; 152 mutex_enter(txbuf_queue->lock); 153 buf_item_tail->next = txbuf_queue->head; 154 txbuf_queue->head = buf_item_head; 155 txbuf_queue->count += n; 156 mutex_exit(txbuf_queue->lock); 157 158 /* 159 * Check if we need exchange the tx buffer push and pop queue 160 */ 161 if ((srp->txbuf_pop_queue->count < srp->tx_buffers_low) && 162 (srp->txbuf_pop_queue->count < txbuf_queue->count)) { 163 srp->txbuf_push_queue = srp->txbuf_pop_queue; 164 srp->txbuf_pop_queue = txbuf_queue; 165 } 166 167 if (srp->tx_flow != 0 || bgep->tx_resched_needed) 168 ddi_trigger_softintr(bgep->drain_id); 169 170 return (B_TRUE); 171 } 172 173 /* 174 * Recycle all returned slots in all rings. 175 * 176 * To give priority to low-numbered rings, whenever we have recycled any 177 * slots in any ring except 0, we restart scanning again from ring 0. 178 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the 179 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0: 180 * 181 * 0 found some - recycle them 182 * 1..2 none found 183 * 3 found some - recycle them and restart scan 184 * 0..9 none found 185 * 10 found some - recycle them and restart scan 186 * 0..2 none found 187 * 3 found some more - recycle them and restart scan 188 * 0 found some more - recycle them 189 * 0..9 none found 190 * 10 found some more - recycle them and restart scan 191 * 0 found some more - recycle them 192 * 1..15 none found 193 * 194 * The routine returns only when a complete scan has been performed 195 * without finding any slots to recycle. 196 * 197 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time 198 * constant and allows the compiler to optimise away the outer do-loop 199 * if only one send ring is being used. 200 */ 201 boolean_t bge_recycle(bge_t *bgep, bge_status_t *bsp); 202 #pragma no_inline(bge_recycle) 203 204 boolean_t 205 bge_recycle(bge_t *bgep, bge_status_t *bsp) 206 { 207 send_ring_t *srp; 208 uint64_t ring; 209 uint64_t tx_rings = bgep->chipid.tx_rings; 210 boolean_t tx_done = B_FALSE; 211 212 restart: 213 ring = 0; 214 srp = &bgep->send[ring]; 215 do { 216 /* 217 * For each ring, (srp->cons_index_p) points to the 218 * proper index within the status block (which has 219 * already been sync'd by the caller). 220 */ 221 ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring)); 222 223 if (*srp->cons_index_p == srp->tc_next) 224 continue; /* no slots to recycle */ 225 if (mutex_tryenter(srp->tc_lock) == 0) 226 continue; /* already in process */ 227 tx_done |= bge_recycle_ring(bgep, srp); 228 mutex_exit(srp->tc_lock); 229 230 /* 231 * Restart from ring 0, if we're not on ring 0 already. 232 * As H/W selects send BDs totally based on priority and 233 * available BDs on the higher priority ring are always 234 * selected first, driver should keep consistence with H/W 235 * and gives lower-numbered ring with higher priority. 236 */ 237 if (tx_rings > 1 && ring > 0) 238 goto restart; 239 240 /* 241 * Loop over all rings (if there *are* multiple rings) 242 */ 243 } while (++srp, ++ring < tx_rings); 244 245 return (tx_done); 246 } 247 248 249 /* 250 * ========== Send-side transmit routines ========== 251 */ 252 #define TCP_CKSUM_OFFSET 16 253 #define UDP_CKSUM_OFFSET 6 254 255 static void 256 bge_pseudo_cksum(uint8_t *buf) 257 { 258 uint32_t cksum; 259 uint16_t iphl; 260 uint16_t proto; 261 262 /* 263 * Point it to the ip header. 264 */ 265 buf += sizeof (struct ether_header); 266 267 /* 268 * Calculate the pseudo-header checksum. 269 */ 270 iphl = 4 * (buf[0] & 0xF); 271 cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl; 272 cksum += proto = buf[9]; 273 cksum += (((uint16_t)buf[12])<<8) + buf[13]; 274 cksum += (((uint16_t)buf[14])<<8) + buf[15]; 275 cksum += (((uint16_t)buf[16])<<8) + buf[17]; 276 cksum += (((uint16_t)buf[18])<<8) + buf[19]; 277 cksum = (cksum>>16) + (cksum & 0xFFFF); 278 cksum = (cksum>>16) + (cksum & 0xFFFF); 279 280 /* 281 * Point it to the TCP/UDP header, and 282 * update the checksum field. 283 */ 284 buf += iphl + ((proto == IPPROTO_TCP) ? 285 TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET); 286 287 /* 288 * A real possibility that pointer cast is a problem. 289 * Should be fixed when we know the code better. 290 * E_BAD_PTR_CAST_ALIGN is added to make it temporarily clean. 291 */ 292 *(uint16_t *)buf = htons((uint16_t)cksum); 293 } 294 295 static bge_queue_item_t * 296 bge_get_txbuf(bge_t *bgep, send_ring_t *srp) 297 { 298 bge_queue_item_t *txbuf_item; 299 bge_queue_t *txbuf_queue; 300 301 txbuf_queue = srp->txbuf_pop_queue; 302 mutex_enter(txbuf_queue->lock); 303 if (txbuf_queue->count == 0) { 304 mutex_exit(txbuf_queue->lock); 305 txbuf_queue = srp->txbuf_push_queue; 306 mutex_enter(txbuf_queue->lock); 307 if (txbuf_queue->count == 0) { 308 mutex_exit(txbuf_queue->lock); 309 /* Try to allocate more tx buffers */ 310 if (srp->tx_array < srp->tx_array_max) { 311 mutex_enter(srp->tx_lock); 312 txbuf_item = bge_alloc_txbuf_array(bgep, srp); 313 mutex_exit(srp->tx_lock); 314 } else 315 txbuf_item = NULL; 316 return (txbuf_item); 317 } 318 } 319 txbuf_item = txbuf_queue->head; 320 txbuf_queue->head = (bge_queue_item_t *)txbuf_item->next; 321 txbuf_queue->count--; 322 mutex_exit(txbuf_queue->lock); 323 txbuf_item->next = NULL; 324 325 return (txbuf_item); 326 } 327 328 /* 329 * Send a message by copying it into a preallocated (and premapped) buffer 330 */ 331 static void bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp); 332 #pragma inline(bge_send_copy) 333 334 static void 335 bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp) 336 { 337 mblk_t *bp; 338 uint32_t mblen; 339 char *pbuf; 340 341 txbuf->copy_len = 0; 342 pbuf = DMA_VPTR(txbuf->buf); 343 for (bp = mp; bp != NULL; bp = bp->b_cont) { 344 if ((mblen = MBLKL(bp)) == 0) 345 continue; 346 ASSERT(txbuf->copy_len + mblen <= 347 bgep->chipid.snd_buff_size); 348 bcopy(bp->b_rptr, pbuf, mblen); 349 pbuf += mblen; 350 txbuf->copy_len += mblen; 351 } 352 } 353 354 /* 355 * Fill the Tx buffer descriptors and trigger the h/w transmission 356 */ 357 static void 358 bge_send_serial(bge_t *bgep, send_ring_t *srp) 359 { 360 send_pkt_t *pktp; 361 uint64_t txfill_next; 362 uint32_t count; 363 uint32_t tx_next; 364 sw_sbd_t *ssbdp; 365 bge_status_t *bsp; 366 bge_sbd_t *hw_sbd_p; 367 bge_queue_item_t *txbuf_item; 368 sw_txbuf_t *txbuf; 369 370 /* 371 * Try to hold the tx lock: 372 * If we are in an interrupt context, use mutex_enter() to 373 * ensure quick response for tx in interrupt context; 374 * Otherwise, use mutex_tryenter() to serialize this h/w tx 375 * BD filling and transmission triggering task. 376 */ 377 if (servicing_interrupt() != 0) 378 mutex_enter(srp->tx_lock); 379 else if (mutex_tryenter(srp->tx_lock) == 0) 380 return; /* already in process */ 381 382 bsp = DMA_VPTR(bgep->status_block); 383 txfill_next = srp->txfill_next; 384 tx_next = srp->tx_next; 385 start_tx: 386 for (count = 0; count < bgep->param_drain_max; ++count) { 387 pktp = &srp->pktp[txfill_next]; 388 if (!pktp->tx_ready) { 389 if (count == 0) 390 srp->tx_block++; 391 break; 392 } 393 394 /* 395 * If there are no enough BDs: try to recycle more 396 */ 397 if (srp->tx_free <= 1) 398 (void) bge_recycle(bgep, bsp); 399 400 /* 401 * Reserved required BDs: 1 is enough 402 */ 403 if (!bge_atomic_reserve(&srp->tx_free, 1)) { 404 srp->tx_nobd++; 405 break; 406 } 407 408 /* 409 * Filling the tx BD 410 */ 411 412 /* 413 * Go straight to claiming our already-reserved places 414 * on the train! 415 */ 416 ASSERT(pktp->txbuf_item != NULL); 417 txbuf_item = pktp->txbuf_item; 418 pktp->txbuf_item = NULL; 419 pktp->tx_ready = B_FALSE; 420 421 txbuf = txbuf_item->item; 422 ASSERT(txbuf->copy_len != 0); 423 (void) ddi_dma_sync(txbuf->buf.dma_hdl, 0, 424 txbuf->copy_len, DDI_DMA_SYNC_FORDEV); 425 426 ssbdp = &srp->sw_sbds[tx_next]; 427 ASSERT(ssbdp->pbuf == NULL); 428 ssbdp->pbuf = txbuf_item; 429 430 /* 431 * Setting hardware send buffer descriptor 432 */ 433 hw_sbd_p = DMA_VPTR(ssbdp->desc); 434 hw_sbd_p->flags = 0; 435 hw_sbd_p->host_buf_addr = txbuf->buf.cookie.dmac_laddress; 436 hw_sbd_p->len = txbuf->copy_len; 437 if (pktp->vlan_tci != 0) { 438 hw_sbd_p->vlan_tci = pktp->vlan_tci; 439 hw_sbd_p->host_buf_addr += VLAN_TAGSZ; 440 hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG; 441 } 442 if (pktp->pflags & HCK_IPV4_HDRCKSUM) 443 hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM; 444 if (pktp->pflags & HCK_FULLCKSUM) 445 hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM; 446 hw_sbd_p->flags |= SBD_FLAG_PACKET_END; 447 448 txfill_next = NEXT(txfill_next, BGE_SEND_BUF_MAX); 449 tx_next = NEXT(tx_next, srp->desc.nslots); 450 } 451 452 /* 453 * Trigger h/w to start transmission. 454 */ 455 if (count != 0) { 456 bge_atomic_sub64(&srp->tx_flow, count); 457 srp->txfill_next = txfill_next; 458 459 if (srp->tx_next > tx_next) { 460 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 461 (srp->desc.nslots - srp->tx_next) * 462 sizeof (bge_sbd_t), 463 DDI_DMA_SYNC_FORDEV); 464 count -= srp->desc.nslots - srp->tx_next; 465 ssbdp = &srp->sw_sbds[0]; 466 } 467 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 468 count*sizeof (bge_sbd_t), DDI_DMA_SYNC_FORDEV); 469 bge_mbx_put(bgep, srp->chip_mbx_reg, tx_next); 470 srp->tx_next = tx_next; 471 atomic_or_32(&bgep->watchdog, 1); 472 473 if (srp->tx_flow != 0 && srp->tx_free > 1) 474 goto start_tx; 475 } 476 477 mutex_exit(srp->tx_lock); 478 } 479 480 mblk_t * 481 bge_ring_tx(void *arg, mblk_t *mp) 482 { 483 send_ring_t *srp = arg; 484 bge_t *bgep = srp->bgep; 485 struct ether_vlan_header *ehp; 486 bge_queue_item_t *txbuf_item; 487 sw_txbuf_t *txbuf; 488 send_pkt_t *pktp; 489 uint64_t pkt_slot; 490 uint16_t vlan_tci; 491 uint32_t pflags; 492 char *pbuf; 493 494 ASSERT(mp->b_next == NULL); 495 496 /* 497 * Get a s/w tx buffer first 498 */ 499 txbuf_item = bge_get_txbuf(bgep, srp); 500 if (txbuf_item == NULL) { 501 /* no tx buffer available */ 502 srp->tx_nobuf++; 503 bgep->tx_resched_needed = B_TRUE; 504 bge_send_serial(bgep, srp); 505 return (mp); 506 } 507 508 /* 509 * Copy all mp fragments to the pkt buffer 510 */ 511 txbuf = txbuf_item->item; 512 bge_send_copy(bgep, txbuf, mp); 513 514 /* 515 * Determine if the packet is VLAN tagged. 516 */ 517 ASSERT(txbuf->copy_len >= sizeof (struct ether_header)); 518 pbuf = DMA_VPTR(txbuf->buf); 519 520 ehp = (void *)pbuf; 521 if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) { 522 /* Strip the vlan tag */ 523 vlan_tci = ntohs(ehp->ether_tci); 524 pbuf = memmove(pbuf + VLAN_TAGSZ, pbuf, 2 * ETHERADDRL); 525 txbuf->copy_len -= VLAN_TAGSZ; 526 } else 527 vlan_tci = 0; 528 529 /* 530 * Retrieve checksum offloading info. 531 */ 532 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); 533 534 /* 535 * Calculate pseudo checksum if needed. 536 */ 537 if ((pflags & HCK_FULLCKSUM) && 538 (bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM)) 539 bge_pseudo_cksum((uint8_t *)pbuf); 540 541 /* 542 * Packet buffer is ready to send: get and fill pkt info 543 */ 544 pkt_slot = bge_atomic_next(&srp->txpkt_next, BGE_SEND_BUF_MAX); 545 pktp = &srp->pktp[pkt_slot]; 546 ASSERT(pktp->txbuf_item == NULL); 547 pktp->txbuf_item = txbuf_item; 548 pktp->vlan_tci = vlan_tci; 549 pktp->pflags = pflags; 550 atomic_inc_64(&srp->tx_flow); 551 ASSERT(pktp->tx_ready == B_FALSE); 552 pktp->tx_ready = B_TRUE; 553 554 /* 555 * Filling the h/w bd and trigger the h/w to start transmission 556 */ 557 bge_send_serial(bgep, srp); 558 559 srp->pushed_bytes += MBLKL(mp); 560 561 /* 562 * We've copied the contents, the message can be freed right away 563 */ 564 freemsg(mp); 565 return (NULL); 566 } 567 568 static mblk_t * 569 bge_send(bge_t *bgep, mblk_t *mp) 570 { 571 send_ring_t *ring; 572 573 ring = &bgep->send[0]; /* ring 0 */ 574 575 return (bge_ring_tx(ring, mp)); 576 } 577 578 uint_t 579 bge_send_drain(caddr_t arg) 580 { 581 uint_t ring = 0; /* use ring 0 */ 582 bge_t *bgep; 583 send_ring_t *srp; 584 585 bgep = (void *)arg; 586 BGE_TRACE(("bge_send_drain($%p)", (void *)bgep)); 587 588 srp = &bgep->send[ring]; 589 bge_send_serial(bgep, srp); 590 591 if (bgep->tx_resched_needed && 592 (srp->tx_flow < srp->tx_buffers_low) && 593 (bgep->bge_mac_state == BGE_MAC_STARTED)) { 594 mac_tx_update(bgep->mh); 595 bgep->tx_resched_needed = B_FALSE; 596 bgep->tx_resched++; 597 } 598 599 return (DDI_INTR_CLAIMED); 600 } 601 602 /* 603 * bge_m_tx() - send a chain of packets 604 */ 605 mblk_t * 606 bge_m_tx(void *arg, mblk_t *mp) 607 { 608 bge_t *bgep = arg; /* private device info */ 609 mblk_t *next; 610 611 BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp)); 612 613 ASSERT(mp != NULL); 614 ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED); 615 616 rw_enter(bgep->errlock, RW_READER); 617 if (bgep->bge_chip_state != BGE_CHIP_RUNNING) { 618 BGE_DEBUG(("bge_m_tx: chip not running")); 619 freemsgchain(mp); 620 mp = NULL; 621 } 622 623 while (mp != NULL) { 624 next = mp->b_next; 625 mp->b_next = NULL; 626 627 if ((mp = bge_send(bgep, mp)) != NULL) { 628 mp->b_next = next; 629 break; 630 } 631 632 mp = next; 633 } 634 rw_exit(bgep->errlock); 635 636 return (mp); 637 } 638