1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010-2013, by Broadcom, Inc. 24 * All Rights Reserved. 25 */ 26 27 /* 28 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. 29 * All rights reserved. 30 */ 31 32 #include "bge_impl.h" 33 34 35 /* 36 * The transmit-side code uses an allocation process which is similar 37 * to some theme park roller-coaster rides, where riders sit in cars 38 * that can go individually, but work better in a train. 39 * 40 * 1) RESERVE a place - this doesn't refer to any specific car or 41 * seat, just that you will get a ride. The attempt to RESERVE a 42 * place can fail if all spaces in all cars are already committed. 43 * 44 * 2) Prepare yourself; this may take an arbitrary (but not unbounded) 45 * time, and you can back out at this stage, in which case you must 46 * give up (RENOUNCE) your place. 47 * 48 * 3) CLAIM your space - a specific car (the next sequentially 49 * numbered one) is allocated at this stage, and is guaranteed 50 * to be part of the next train to depart. Once you've done 51 * this, you can't back out, nor wait for any external event 52 * or resource. 53 * 54 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they 55 * all depart together as a single train! 56 * 57 * 5) At the end of the ride, you climb out of the car and RENOUNCE 58 * your right to it, so that it can be recycled for another rider. 59 * 60 * For each rider, these have to occur in this order, but the riders 61 * don't have to stay in the same order at each stage. In particular, 62 * they may overtake each other between RESERVING a place and CLAIMING 63 * it, or between CLAIMING and OCCUPYING a space. 64 * 65 * Once a car is CLAIMED, the train currently being assembled can't go 66 * without that car (this guarantees that the cars in a single train 67 * make up a consecutively-numbered set). Therefore, when any train 68 * leaves, we know there can't be any riders in transit between CLAIMING 69 * and OCCUPYING their cars. There can be some who have RESERVED but 70 * not yet CLAIMED their places. That's OK, though, because they'll go 71 * into the next train. 72 */ 73 74 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */ 75 76 /* 77 * ========== Send-side recycle routines ========== 78 */ 79 80 /* 81 * Recycle all the completed buffers in the specified send ring up to 82 * (but not including) the consumer index in the status block. 83 * 84 * This function must advance (srp->tc_next) AND adjust (srp->tx_free) 85 * to account for the packets it has recycled. 86 * 87 * This is a trivial version that just does that and nothing more, but 88 * it suffices while there's only one method for sending messages (by 89 * copying) and that method doesn't need any special per-buffer action 90 * for recycling. 91 */ 92 static boolean_t bge_recycle_ring(bge_t *bgep, send_ring_t *srp); 93 #pragma inline(bge_recycle_ring) 94 95 static boolean_t 96 bge_recycle_ring(bge_t *bgep, send_ring_t *srp) 97 { 98 sw_sbd_t *ssbdp; 99 bge_queue_item_t *buf_item; 100 bge_queue_item_t *buf_item_head; 101 bge_queue_item_t *buf_item_tail; 102 bge_queue_t *txbuf_queue; 103 uint64_t slot; 104 uint64_t n; 105 106 ASSERT(mutex_owned(srp->tc_lock)); 107 108 /* 109 * We're about to release one or more places :-) 110 * These ASSERTions check that our invariants still hold: 111 * there must always be at least one free place 112 * at this point, there must be at least one place NOT free 113 * we're not about to free more places than were claimed! 114 */ 115 ASSERT(srp->tx_free <= srp->desc.nslots); 116 117 buf_item_head = buf_item_tail = NULL; 118 for (n = 0, slot = srp->tc_next; slot != *srp->cons_index_p; 119 slot = NEXT(slot, srp->desc.nslots)) { 120 ssbdp = &srp->sw_sbds[slot]; 121 ASSERT(ssbdp->pbuf != NULL); 122 buf_item = ssbdp->pbuf; 123 if (buf_item_head == NULL) 124 buf_item_head = buf_item_tail = buf_item; 125 else { 126 buf_item_tail->next = buf_item; 127 buf_item_tail = buf_item; 128 } 129 ssbdp->pbuf = NULL; 130 n++; 131 } 132 if (n == 0) 133 return (B_FALSE); 134 135 /* 136 * Reset the watchdog count: to 0 if all buffers are 137 * now free, or to 1 if some are still outstanding. 138 * Note: non-synchonised access here means we may get 139 * the "wrong" answer, but only in a harmless fashion 140 * (i.e. we deactivate the watchdog because all buffers 141 * are apparently free, even though another thread may 142 * have claimed one before we leave here; in this case 143 * the watchdog will restart on the next send() call). 144 */ 145 bgep->watchdog = (slot == srp->tx_next) ? 0 : 1; 146 147 /* 148 * Update recycle index and free tx BD number 149 */ 150 srp->tc_next = slot; 151 ASSERT(srp->tx_free + n <= srp->desc.nslots); 152 bge_atomic_renounce(&srp->tx_free, n); 153 154 /* 155 * Return tx buffers to buffer push queue 156 */ 157 txbuf_queue = srp->txbuf_push_queue; 158 mutex_enter(txbuf_queue->lock); 159 buf_item_tail->next = txbuf_queue->head; 160 txbuf_queue->head = buf_item_head; 161 txbuf_queue->count += n; 162 mutex_exit(txbuf_queue->lock); 163 164 /* 165 * Check if we need exchange the tx buffer push and pop queue 166 */ 167 if ((srp->txbuf_pop_queue->count < srp->tx_buffers_low) && 168 (srp->txbuf_pop_queue->count < txbuf_queue->count)) { 169 srp->txbuf_push_queue = srp->txbuf_pop_queue; 170 srp->txbuf_pop_queue = txbuf_queue; 171 } 172 173 if (srp->tx_flow != 0 || bgep->tx_resched_needed) 174 ddi_trigger_softintr(bgep->drain_id); 175 176 return (B_TRUE); 177 } 178 179 /* 180 * Recycle all returned slots in all rings. 181 * 182 * To give priority to low-numbered rings, whenever we have recycled any 183 * slots in any ring except 0, we restart scanning again from ring 0. 184 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the 185 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0: 186 * 187 * 0 found some - recycle them 188 * 1..2 none found 189 * 3 found some - recycle them and restart scan 190 * 0..9 none found 191 * 10 found some - recycle them and restart scan 192 * 0..2 none found 193 * 3 found some more - recycle them and restart scan 194 * 0 found some more - recycle them 195 * 0..9 none found 196 * 10 found some more - recycle them and restart scan 197 * 0 found some more - recycle them 198 * 1..15 none found 199 * 200 * The routine returns only when a complete scan has been performed 201 * without finding any slots to recycle. 202 * 203 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time 204 * constant and allows the compiler to optimise away the outer do-loop 205 * if only one send ring is being used. 206 */ 207 boolean_t bge_recycle(bge_t *bgep, bge_status_t *bsp); 208 #pragma no_inline(bge_recycle) 209 210 boolean_t 211 bge_recycle(bge_t *bgep, bge_status_t *bsp) 212 { 213 send_ring_t *srp; 214 uint64_t ring; 215 uint64_t tx_rings = bgep->chipid.tx_rings; 216 boolean_t tx_done = B_FALSE; 217 218 restart: 219 ring = 0; 220 srp = &bgep->send[ring]; 221 do { 222 /* 223 * For each ring, (srp->cons_index_p) points to the 224 * proper index within the status block (which has 225 * already been sync'd by the caller). 226 */ 227 ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring)); 228 229 if (*srp->cons_index_p == srp->tc_next) 230 continue; /* no slots to recycle */ 231 if (mutex_tryenter(srp->tc_lock) == 0) 232 continue; /* already in process */ 233 tx_done |= bge_recycle_ring(bgep, srp); 234 mutex_exit(srp->tc_lock); 235 236 /* 237 * Restart from ring 0, if we're not on ring 0 already. 238 * As H/W selects send BDs totally based on priority and 239 * available BDs on the higher priority ring are always 240 * selected first, driver should keep consistence with H/W 241 * and gives lower-numbered ring with higher priority. 242 */ 243 if (tx_rings > 1 && ring > 0) 244 goto restart; 245 246 /* 247 * Loop over all rings (if there *are* multiple rings) 248 */ 249 } while (++srp, ++ring < tx_rings); 250 251 return (tx_done); 252 } 253 254 255 /* 256 * ========== Send-side transmit routines ========== 257 */ 258 #define TCP_CKSUM_OFFSET 16 259 #define UDP_CKSUM_OFFSET 6 260 261 static void 262 bge_pseudo_cksum(uint8_t *buf) 263 { 264 uint32_t cksum; 265 uint16_t iphl; 266 uint16_t proto; 267 268 /* 269 * Point it to the ip header. 270 */ 271 buf += sizeof (struct ether_header); 272 273 /* 274 * Calculate the pseudo-header checksum. 275 */ 276 iphl = 4 * (buf[0] & 0xF); 277 cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl; 278 cksum += proto = buf[9]; 279 cksum += (((uint16_t)buf[12])<<8) + buf[13]; 280 cksum += (((uint16_t)buf[14])<<8) + buf[15]; 281 cksum += (((uint16_t)buf[16])<<8) + buf[17]; 282 cksum += (((uint16_t)buf[18])<<8) + buf[19]; 283 cksum = (cksum>>16) + (cksum & 0xFFFF); 284 cksum = (cksum>>16) + (cksum & 0xFFFF); 285 286 /* 287 * Point it to the TCP/UDP header, and 288 * update the checksum field. 289 */ 290 buf += iphl + ((proto == IPPROTO_TCP) ? 291 TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET); 292 293 /* 294 * A real possibility that pointer cast is a problem. 295 * Should be fixed when we know the code better. 296 * E_BAD_PTR_CAST_ALIGN is added to make it temporarily clean. 297 */ 298 *(uint16_t *)buf = htons((uint16_t)cksum); 299 } 300 301 static bge_queue_item_t * 302 bge_get_txbuf(bge_t *bgep, send_ring_t *srp) 303 { 304 bge_queue_item_t *txbuf_item; 305 bge_queue_t *txbuf_queue; 306 307 txbuf_queue = srp->txbuf_pop_queue; 308 mutex_enter(txbuf_queue->lock); 309 if (txbuf_queue->count == 0) { 310 mutex_exit(txbuf_queue->lock); 311 txbuf_queue = srp->txbuf_push_queue; 312 mutex_enter(txbuf_queue->lock); 313 if (txbuf_queue->count == 0) { 314 mutex_exit(txbuf_queue->lock); 315 /* Try to allocate more tx buffers */ 316 if (srp->tx_array < srp->tx_array_max) { 317 mutex_enter(srp->tx_lock); 318 txbuf_item = bge_alloc_txbuf_array(bgep, srp); 319 mutex_exit(srp->tx_lock); 320 } else 321 txbuf_item = NULL; 322 return (txbuf_item); 323 } 324 } 325 txbuf_item = txbuf_queue->head; 326 txbuf_queue->head = (bge_queue_item_t *)txbuf_item->next; 327 txbuf_queue->count--; 328 mutex_exit(txbuf_queue->lock); 329 txbuf_item->next = NULL; 330 331 return (txbuf_item); 332 } 333 334 /* 335 * Send a message by copying it into a preallocated (and premapped) buffer 336 */ 337 static void bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp); 338 #pragma inline(bge_send_copy) 339 340 static void 341 bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp) 342 { 343 mblk_t *bp; 344 uint32_t mblen; 345 char *pbuf; 346 347 txbuf->copy_len = 0; 348 pbuf = DMA_VPTR(txbuf->buf); 349 for (bp = mp; bp != NULL; bp = bp->b_cont) { 350 if ((mblen = MBLKL(bp)) == 0) 351 continue; 352 ASSERT(txbuf->copy_len + mblen <= 353 bgep->chipid.snd_buff_size); 354 bcopy(bp->b_rptr, pbuf, mblen); 355 pbuf += mblen; 356 txbuf->copy_len += mblen; 357 } 358 } 359 360 /* 361 * Fill the Tx buffer descriptors and trigger the h/w transmission 362 */ 363 static void 364 bge_send_serial(bge_t *bgep, send_ring_t *srp) 365 { 366 send_pkt_t *pktp; 367 uint64_t txfill_next; 368 uint32_t count; 369 uint32_t tx_next; 370 sw_sbd_t *ssbdp; 371 bge_status_t *bsp; 372 bge_sbd_t *hw_sbd_p; 373 bge_queue_item_t *txbuf_item; 374 sw_txbuf_t *txbuf; 375 376 /* 377 * Try to hold the tx lock: 378 * If we are in an interrupt context, use mutex_enter() to 379 * ensure quick response for tx in interrupt context; 380 * Otherwise, use mutex_tryenter() to serialize this h/w tx 381 * BD filling and transmission triggering task. 382 */ 383 if (servicing_interrupt() != 0) 384 mutex_enter(srp->tx_lock); 385 else if (mutex_tryenter(srp->tx_lock) == 0) 386 return; /* already in process */ 387 388 bsp = DMA_VPTR(bgep->status_block); 389 txfill_next = srp->txfill_next; 390 tx_next = srp->tx_next; 391 start_tx: 392 for (count = 0; count < bgep->param_drain_max; ++count) { 393 pktp = &srp->pktp[txfill_next]; 394 if (!pktp->tx_ready) { 395 if (count == 0) 396 srp->tx_block++; 397 break; 398 } 399 400 /* 401 * If there are no enough BDs: try to recycle more 402 */ 403 if (srp->tx_free <= 1) 404 (void) bge_recycle(bgep, bsp); 405 406 /* 407 * Reserved required BDs: 1 is enough 408 */ 409 if (!bge_atomic_reserve(&srp->tx_free, 1)) { 410 srp->tx_nobd++; 411 break; 412 } 413 414 /* 415 * Filling the tx BD 416 */ 417 418 /* 419 * Go straight to claiming our already-reserved places 420 * on the train! 421 */ 422 ASSERT(pktp->txbuf_item != NULL); 423 txbuf_item = pktp->txbuf_item; 424 pktp->txbuf_item = NULL; 425 pktp->tx_ready = B_FALSE; 426 427 txbuf = txbuf_item->item; 428 ASSERT(txbuf->copy_len != 0); 429 (void) ddi_dma_sync(txbuf->buf.dma_hdl, 0, 430 txbuf->copy_len, DDI_DMA_SYNC_FORDEV); 431 432 ssbdp = &srp->sw_sbds[tx_next]; 433 ASSERT(ssbdp->pbuf == NULL); 434 ssbdp->pbuf = txbuf_item; 435 436 /* 437 * Setting hardware send buffer descriptor 438 */ 439 hw_sbd_p = DMA_VPTR(ssbdp->desc); 440 hw_sbd_p->flags = 0; 441 hw_sbd_p->host_buf_addr = txbuf->buf.cookie.dmac_laddress; 442 hw_sbd_p->len = txbuf->copy_len; 443 if (pktp->vlan_tci != 0) { 444 hw_sbd_p->vlan_tci = pktp->vlan_tci; 445 hw_sbd_p->host_buf_addr += VLAN_TAGSZ; 446 hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG; 447 } 448 if (pktp->pflags & HCK_IPV4_HDRCKSUM) 449 hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM; 450 if (pktp->pflags & HCK_FULLCKSUM) 451 hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM; 452 if (!(bgep->chipid.flags & CHIP_FLAG_NO_JUMBO) && 453 (DEVICE_5717_SERIES_CHIPSETS(bgep) || 454 DEVICE_5725_SERIES_CHIPSETS(bgep)) && 455 (txbuf->copy_len > ETHERMAX)) 456 hw_sbd_p->flags |= SBD_FLAG_JMB_PKT; 457 hw_sbd_p->flags |= SBD_FLAG_PACKET_END; 458 459 txfill_next = NEXT(txfill_next, BGE_SEND_BUF_MAX); 460 tx_next = NEXT(tx_next, srp->desc.nslots); 461 } 462 463 /* 464 * Trigger h/w to start transmission. 465 */ 466 if (count != 0) { 467 bge_atomic_sub64(&srp->tx_flow, count); 468 srp->txfill_next = txfill_next; 469 470 if (srp->tx_next > tx_next) { 471 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 472 (srp->desc.nslots - srp->tx_next) * 473 sizeof (bge_sbd_t), 474 DDI_DMA_SYNC_FORDEV); 475 count -= srp->desc.nslots - srp->tx_next; 476 ssbdp = &srp->sw_sbds[0]; 477 } 478 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0, 479 count*sizeof (bge_sbd_t), DDI_DMA_SYNC_FORDEV); 480 bge_mbx_put(bgep, srp->chip_mbx_reg, tx_next); 481 srp->tx_next = tx_next; 482 atomic_or_32(&bgep->watchdog, 1); 483 484 if (srp->tx_flow != 0 && srp->tx_free > 1) 485 goto start_tx; 486 } 487 488 mutex_exit(srp->tx_lock); 489 } 490 491 mblk_t * 492 bge_ring_tx(void *arg, mblk_t *mp) 493 { 494 send_ring_t *srp = arg; 495 bge_t *bgep = srp->bgep; 496 struct ether_vlan_header *ehp; 497 bge_queue_item_t *txbuf_item; 498 sw_txbuf_t *txbuf; 499 send_pkt_t *pktp; 500 uint64_t pkt_slot; 501 uint16_t vlan_tci; 502 uint32_t pflags; 503 char *pbuf; 504 505 ASSERT(mp->b_next == NULL); 506 507 /* 508 * Get a s/w tx buffer first 509 */ 510 txbuf_item = bge_get_txbuf(bgep, srp); 511 if (txbuf_item == NULL) { 512 /* no tx buffer available */ 513 srp->tx_nobuf++; 514 bgep->tx_resched_needed = B_TRUE; 515 bge_send_serial(bgep, srp); 516 return (mp); 517 } 518 519 /* 520 * Copy all mp fragments to the pkt buffer 521 */ 522 txbuf = txbuf_item->item; 523 bge_send_copy(bgep, txbuf, mp); 524 525 /* 526 * Determine if the packet is VLAN tagged. 527 */ 528 ASSERT(txbuf->copy_len >= sizeof (struct ether_header)); 529 pbuf = DMA_VPTR(txbuf->buf); 530 531 ehp = (void *)pbuf; 532 if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) { 533 /* Strip the vlan tag */ 534 vlan_tci = ntohs(ehp->ether_tci); 535 pbuf = memmove(pbuf + VLAN_TAGSZ, pbuf, 2 * ETHERADDRL); 536 txbuf->copy_len -= VLAN_TAGSZ; 537 } else 538 vlan_tci = 0; 539 540 /* 541 * Retrieve checksum offloading info. 542 */ 543 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); 544 545 /* 546 * Calculate pseudo checksum if needed. 547 */ 548 if ((pflags & HCK_FULLCKSUM) && 549 (bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM)) 550 bge_pseudo_cksum((uint8_t *)pbuf); 551 552 /* 553 * Packet buffer is ready to send: get and fill pkt info 554 */ 555 pkt_slot = bge_atomic_next(&srp->txpkt_next, BGE_SEND_BUF_MAX); 556 pktp = &srp->pktp[pkt_slot]; 557 ASSERT(pktp->txbuf_item == NULL); 558 pktp->txbuf_item = txbuf_item; 559 pktp->vlan_tci = vlan_tci; 560 pktp->pflags = pflags; 561 atomic_inc_64(&srp->tx_flow); 562 ASSERT(pktp->tx_ready == B_FALSE); 563 pktp->tx_ready = B_TRUE; 564 565 /* 566 * Filling the h/w bd and trigger the h/w to start transmission 567 */ 568 bge_send_serial(bgep, srp); 569 570 srp->pushed_bytes += MBLKL(mp); 571 572 /* 573 * We've copied the contents, the message can be freed right away 574 */ 575 freemsg(mp); 576 return (NULL); 577 } 578 579 static mblk_t * 580 bge_send(bge_t *bgep, mblk_t *mp) 581 { 582 send_ring_t *ring; 583 584 ring = &bgep->send[0]; /* ring 0 */ 585 586 return (bge_ring_tx(ring, mp)); 587 } 588 589 uint_t 590 bge_send_drain(caddr_t arg) 591 { 592 uint_t ring = 0; /* use ring 0 */ 593 bge_t *bgep; 594 send_ring_t *srp; 595 596 bgep = (void *)arg; 597 BGE_TRACE(("bge_send_drain($%p)", (void *)bgep)); 598 599 srp = &bgep->send[ring]; 600 bge_send_serial(bgep, srp); 601 602 if (bgep->tx_resched_needed && 603 (srp->tx_flow < srp->tx_buffers_low) && 604 (bgep->bge_mac_state == BGE_MAC_STARTED)) { 605 mac_tx_update(bgep->mh); 606 bgep->tx_resched_needed = B_FALSE; 607 bgep->tx_resched++; 608 } 609 610 return (DDI_INTR_CLAIMED); 611 } 612 613 /* 614 * bge_m_tx() - send a chain of packets 615 */ 616 mblk_t * 617 bge_m_tx(void *arg, mblk_t *mp) 618 { 619 bge_t *bgep = arg; /* private device info */ 620 mblk_t *next; 621 622 BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp)); 623 624 ASSERT(mp != NULL); 625 ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED); 626 627 rw_enter(bgep->errlock, RW_READER); 628 if ((bgep->bge_chip_state != BGE_CHIP_RUNNING) || 629 !(bgep->param_link_up)) { 630 BGE_DEBUG(("bge_m_tx: chip not running or link down")); 631 freemsgchain(mp); 632 mp = NULL; 633 } 634 635 while (mp != NULL) { 636 next = mp->b_next; 637 mp->b_next = NULL; 638 639 if ((mp = bge_send(bgep, mp)) != NULL) { 640 mp->b_next = next; 641 break; 642 } 643 644 mp = next; 645 } 646 rw_exit(bgep->errlock); 647 648 return (mp); 649 } 650