1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "bge_impl.h" 30 31 32 /* 33 * The transmit-side code uses an allocation process which is similar 34 * to some theme park roller-coaster rides, where riders sit in cars 35 * that can go individually, but work better in a train. 36 * 37 * 1) RESERVE a place - this doesn't refer to any specific car or 38 * seat, just that you will get a ride. The attempt to RESERVE a 39 * place can fail if all spaces in all cars are already committed. 40 * 41 * 2) Prepare yourself; this may take an arbitrary (but not unbounded) 42 * time, and you can back out at this stage, in which case you must 43 * give up (RENOUNCE) your place. 44 * 45 * 3) CLAIM your space - a specific car (the next sequentially 46 * numbered one) is allocated at this stage, and is guaranteed 47 * to be part of the next train to depart. Once you've done 48 * this, you can't back out, nor wait for any external event 49 * or resource. 50 * 51 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they 52 * all depart together as a single train! 53 * 54 * 5) At the end of the ride, you climb out of the car and RENOUNCE 55 * your right to it, so that it can be recycled for another rider. 56 * 57 * For each rider, these have to occur in this order, but the riders 58 * don't have to stay in the same order at each stage. In particular, 59 * they may overtake each other between RESERVING a place and CLAIMING 60 * it, or between CLAIMING and OCCUPYING a space. 61 * 62 * Once a car is CLAIMED, the train currently being assembled can't go 63 * without that car (this guarantees that the cars in a single train 64 * make up a consecutively-numbered set). Therefore, when any train 65 * leaves, we know there can't be any riders in transit between CLAIMING 66 * and OCCUPYING their cars. There can be some who have RESERVED but 67 * not yet CLAIMED their places. That's OK, though, because they'll go 68 * into the next train. 69 */ 70 71 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */ 72 73 74 /* 75 * ========== Send-side recycle routines ========== 76 */ 77 78 /* 79 * Recycle all the completed buffers in the specified send ring up to 80 * (but not including) the consumer index in the status block. 81 * 82 * This function must advance (srp->tc_next) AND adjust (srp->tx_free) 83 * to account for the packets it has recycled. 84 * 85 * This is a trivial version that just does that and nothing more, but 86 * it suffices while there's only one method for sending messages (by 87 * copying) and that method doesn't need any special per-buffer action 88 * for recycling. 89 */ 90 static void bge_recycle_ring(bge_t *bgep, send_ring_t *srp); 91 #pragma inline(bge_recycle_ring) 92 93 static void 94 bge_recycle_ring(bge_t *bgep, send_ring_t *srp) 95 { 96 uint64_t slot; 97 uint64_t n; 98 99 _NOTE(ARGUNUSED(bgep)) 100 101 ASSERT(mutex_owned(srp->tc_lock)); 102 103 slot = *srp->cons_index_p; /* volatile */ 104 n = slot - srp->tc_next; 105 if (slot < srp->tc_next) 106 n += srp->desc.nslots; 107 108 /* 109 * We're about to release one or more places :-) 110 * These ASSERTions check that our invariants still hold: 111 * there must always be at least one free place 112 * at this point, there must be at least one place NOT free 113 * we're not about to free more places than were claimed! 114 */ 115 ASSERT(srp->tx_free > 0); 116 117 srp->tc_next = slot; 118 bge_atomic_renounce(&srp->tx_free, n); 119 120 /* 121 * Reset the watchdog count: to 0 if all buffers are 122 * now free, or to 1 if some are still outstanding. 123 * Note: non-synchonised access here means we may get 124 * the "wrong" answer, but only in a harmless fashion 125 * (i.e. we deactivate the watchdog because all buffers 126 * are apparently free, even though another thread may 127 * have claimed one before we leave here; in this case 128 * the watchdog will restart on the next send() call). 129 */ 130 bgep->watchdog = srp->tx_free == srp->desc.nslots ? 0 : 1; 131 } 132 133 /* 134 * Recycle all returned slots in all rings. 135 * 136 * To give priority to low-numbered rings, whenever we have recycled any 137 * slots in any ring except 0, we restart scanning again from ring 0. 138 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the 139 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0: 140 * 141 * 0 found some - recycle them 142 * 1..2 none found 143 * 3 found some - recycle them and restart scan 144 * 0..9 none found 145 * 10 found some - recycle them and restart scan 146 * 0..2 none found 147 * 3 found some more - recycle them and restart scan 148 * 0 found some more - recycle them 149 * 0..9 none found 150 * 10 found some more - recycle them and restart scan 151 * 0 found some more - recycle them 152 * 1..15 none found 153 * 154 * The routine returns only when a complete scan has been performed 155 * without finding any slots to recycle. 156 * 157 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time 158 * constant and allows the compiler to optimise away the outer do-loop 159 * if only one send ring is being used. 160 */ 161 void bge_recycle(bge_t *bgep, bge_status_t *bsp); 162 #pragma no_inline(bge_recycle) 163 164 void 165 bge_recycle(bge_t *bgep, bge_status_t *bsp) 166 { 167 send_ring_t *srp; 168 uint64_t ring; 169 uint64_t tx_rings = bgep->chipid.tx_rings; 170 171 restart: 172 ring = 0; 173 srp = &bgep->send[ring]; 174 do { 175 /* 176 * For each ring, (srp->cons_index_p) points to the 177 * proper index within the status block (which has 178 * already been sync'd by the caller). 179 */ 180 ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring)); 181 182 if (*srp->cons_index_p == srp->tc_next) 183 continue; /* no slots to recycle */ 184 185 mutex_enter(srp->tc_lock); 186 bge_recycle_ring(bgep, srp); 187 mutex_exit(srp->tc_lock); 188 189 if (bgep->resched_needed && !bgep->resched_running) { 190 bgep->resched_running = B_TRUE; 191 ddi_trigger_softintr(bgep->resched_id); 192 } 193 /* 194 * Restart from ring 0, if we're not on ring 0 already. 195 * As H/W selects send BDs totally based on priority and 196 * available BDs on the higher priority ring are always 197 * selected first, driver should keep consistence with H/W 198 * and gives lower-numbered ring with higher priority. 199 */ 200 if (tx_rings > 1 && ring > 0) 201 goto restart; 202 203 /* 204 * Loop over all rings (if there *are* multiple rings) 205 */ 206 } while (++srp, ++ring < tx_rings); 207 } 208 209 210 /* 211 * ========== Send-side transmit routines ========== 212 */ 213 214 /* 215 * CLAIM an already-reserved place on the next train 216 * 217 * This is the point of no return! 218 */ 219 static uint64_t bge_send_claim(bge_t *bgep, send_ring_t *srp); 220 #pragma inline(bge_send_claim) 221 222 static uint64_t 223 bge_send_claim(bge_t *bgep, send_ring_t *srp) 224 { 225 uint64_t slot; 226 227 mutex_enter(srp->tx_lock); 228 atomic_add_64(&srp->tx_flow, 1); 229 slot = bge_atomic_claim(&srp->tx_next, srp->desc.nslots); 230 mutex_exit(srp->tx_lock); 231 232 /* 233 * Bump the watchdog counter, thus guaranteeing that it's 234 * nonzero (watchdog activated). Note that non-synchonised 235 * access here means we may race with the reclaim() code 236 * above, but the outcome will be harmless. At worst, the 237 * counter may not get reset on a partial reclaim; but the 238 * large trigger threshold makes false positives unlikely 239 */ 240 bgep->watchdog += 1; 241 242 return (slot); 243 } 244 245 #define TCP_CKSUM_OFFSET 16 246 #define UDP_CKSUM_OFFSET 6 247 248 static void 249 bge_pseudo_cksum(uint8_t *buf) 250 { 251 uint32_t cksum; 252 uint16_t iphl; 253 uint16_t proto; 254 255 /* 256 * Point it to the ip header. 257 */ 258 buf += sizeof (struct ether_header); 259 260 /* 261 * Calculate the pseudo-header checksum. 262 */ 263 iphl = 4 * (buf[0] & 0xF); 264 cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl; 265 cksum += proto = buf[9]; 266 cksum += (((uint16_t)buf[12])<<8) + buf[13]; 267 cksum += (((uint16_t)buf[14])<<8) + buf[15]; 268 cksum += (((uint16_t)buf[16])<<8) + buf[17]; 269 cksum += (((uint16_t)buf[18])<<8) + buf[19]; 270 cksum = (cksum>>16) + (cksum & 0xFFFF); 271 cksum = (cksum>>16) + (cksum & 0xFFFF); 272 273 /* 274 * Point it to the TCP/UDP header, and 275 * update the checksum field. 276 */ 277 buf += iphl + ((proto == IPPROTO_TCP) ? 278 TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET); 279 280 *(uint16_t *)buf = htons((uint16_t)cksum); 281 } 282 283 /* 284 * Send a message by copying it into a preallocated (and premapped) buffer 285 */ 286 static enum send_status bge_send_copy(bge_t *bgep, mblk_t *mp, 287 send_ring_t *srp, uint16_t tci); 288 #pragma inline(bge_send_copy) 289 290 static enum send_status 291 bge_send_copy(bge_t *bgep, mblk_t *mp, send_ring_t *srp, uint16_t tci) 292 { 293 bge_sbd_t *hw_sbd_p; 294 sw_sbd_t *ssbdp; 295 mblk_t *bp; 296 char *txb; 297 uint64_t slot; 298 size_t totlen; 299 size_t mblen; 300 uint32_t pflags; 301 302 BGE_TRACE(("bge_send_copy($%p, $%p, $%p, 0x%x)", 303 (void *)bgep, (void *)mp, (void *)srp)); 304 305 /* 306 * IMPORTANT: 307 * Up to the point where it claims a place, a send_msg() 308 * routine can indicate failure by returning SEND_FAIL. 309 * Once it's claimed a place, it mustn't fail. 310 * 311 * In this version, there's no setup to be done here, and there's 312 * nothing that can fail, so we can go straight to claiming our 313 * already-reserved place on the train. 314 * 315 * This is the point of no return! 316 */ 317 slot = bge_send_claim(bgep, srp); 318 ssbdp = &srp->sw_sbds[slot]; 319 320 /* 321 * Copy the data into a pre-mapped buffer, which avoids the 322 * overhead (and complication) of mapping/unmapping STREAMS 323 * buffers and keeping hold of them until the DMA has completed. 324 * 325 * Because all buffers are the same size, and larger than the 326 * longest single valid message, we don't have to bother about 327 * splitting the message across multiple buffers either. 328 */ 329 txb = DMA_VPTR(ssbdp->pbuf); 330 totlen = 0; 331 bp = mp; 332 if (tci != 0) { 333 mblen = bp->b_wptr - bp->b_rptr; 334 335 ASSERT(mblen >= 2 * ETHERADDRL + VLAN_TAGSZ); 336 337 bcopy(bp->b_rptr, txb, 2 * ETHERADDRL); 338 txb += 2 * ETHERADDRL; 339 totlen = 2 * ETHERADDRL; 340 341 if (mblen -= 2 * ETHERADDRL + VLAN_TAGSZ) { 342 if ((totlen += mblen) <= bgep->chipid.ethmax_size) { 343 bcopy(bp->b_wptr-mblen, txb, mblen); 344 txb += mblen; 345 } 346 } 347 bp = bp->b_cont; 348 } 349 for (; bp != NULL; bp = bp->b_cont) { 350 mblen = bp->b_wptr - bp->b_rptr; 351 if ((totlen += mblen) <= bgep->chipid.ethmax_size) { 352 bcopy(bp->b_rptr, txb, mblen); 353 txb += mblen; 354 } 355 } 356 357 /* 358 * We've reached the end of the chain; and we should have 359 * collected no more than ETHERMAX bytes into our buffer. 360 */ 361 ASSERT(bp == NULL); 362 ASSERT(totlen <= bgep->chipid.ethmax_size); 363 DMA_SYNC(ssbdp->pbuf, DDI_DMA_SYNC_FORDEV); 364 365 /* 366 * Update the hardware send buffer descriptor; then we're done. 367 * The return status indicates that the message can be freed 368 * right away, as we've already copied the contents ... 369 */ 370 hw_sbd_p = DMA_VPTR(ssbdp->desc); 371 hw_sbd_p->host_buf_addr = ssbdp->pbuf.cookie.dmac_laddress; 372 hw_sbd_p->len = totlen; 373 hw_sbd_p->flags = SBD_FLAG_PACKET_END; 374 if (tci != 0) { 375 hw_sbd_p->vlan_tci = tci; 376 hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG; 377 } 378 379 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 380 if (pflags & HCK_IPV4_HDRCKSUM) 381 hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM; 382 if (pflags & HCK_FULLCKSUM) { 383 hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM; 384 if (bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM) 385 bge_pseudo_cksum((uint8_t *)DMA_VPTR(ssbdp->pbuf)); 386 } 387 388 return (SEND_FREE); 389 } 390 391 static boolean_t 392 bge_send(bge_t *bgep, mblk_t *mp) 393 { 394 send_ring_t *srp; 395 enum send_status status; 396 struct ether_vlan_header *ehp; 397 boolean_t need_strip = B_FALSE; 398 bge_status_t *bsp; 399 uint16_t tci; 400 uint_t ring = 0; 401 402 ASSERT(mp->b_next == NULL); 403 404 /* 405 * Determine if the packet is VLAN tagged. 406 */ 407 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 408 ehp = (struct ether_vlan_header *)mp->b_rptr; 409 410 if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) { 411 if (MBLKL(mp) < sizeof (struct ether_vlan_header)) { 412 uint32_t pflags; 413 414 /* 415 * Need to preserve checksum flags across pullup. 416 */ 417 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, 418 NULL, &pflags); 419 420 if (!pullupmsg(mp, 421 sizeof (struct ether_vlan_header))) { 422 BGE_DEBUG(("bge_send: pullup failure")); 423 bgep->resched_needed = B_TRUE; 424 return (B_FALSE); 425 } 426 427 (void) hcksum_assoc(mp, NULL, NULL, NULL, NULL, NULL, 428 NULL, pflags, KM_NOSLEEP); 429 } 430 431 ehp = (struct ether_vlan_header *)mp->b_rptr; 432 need_strip = B_TRUE; 433 } 434 435 /* 436 * Try to reserve a place in the chosen ring. Shouldn't try next 437 * higher-numbered (lower-priority) ring, if there aren't any 438 * available. Otherwise, packets with same priority may get 439 * transmission starvation. 440 */ 441 srp = &bgep->send[ring]; 442 if (!bge_atomic_reserve(&srp->tx_free, 1)) { 443 BGE_DEBUG(("bge_send: no free slots")); 444 bgep->resched_needed = B_TRUE; 445 return (B_FALSE); 446 } 447 448 /* 449 * Now that we know that there is space to transmit the packet 450 * strip any VLAN tag that is present. 451 */ 452 if (need_strip) { 453 tci = ntohs(ehp->ether_tci); 454 } else { 455 tci = 0; 456 } 457 458 if (srp->tx_free <= 16) { 459 bsp = DMA_VPTR(bgep->status_block); 460 bge_recycle(bgep, bsp); 461 } 462 /* 463 * We've reserved a place :-) 464 * These ASSERTions check that our invariants still hold: 465 * there must still be at least one free place 466 * there must be at least one place NOT free (ours!) 467 */ 468 ASSERT(srp->tx_free > 0); 469 470 if ((status = bge_send_copy(bgep, mp, srp, tci)) == SEND_FAIL) { 471 /* 472 * The send routine failed :( So we have to renounce 473 * our reservation before returning the error. 474 */ 475 bge_atomic_renounce(&srp->tx_free, 1); 476 bgep->resched_needed = B_TRUE; 477 return (B_FALSE); 478 } 479 480 /* 481 * The send routine succeeded; it will have updated the 482 * h/w ring descriptor, and the <tx_next> and <tx_flow> 483 * counters. 484 * 485 * Because there can be multiple concurrent threads in 486 * transit through this code, we only want to prod the 487 * hardware once the last one is departing ... 488 */ 489 mutex_enter(srp->tx_lock); 490 if (--srp->tx_flow == 0) { 491 DMA_SYNC(srp->desc, DDI_DMA_SYNC_FORDEV); 492 bge_mbx_put(bgep, srp->chip_mbx_reg, srp->tx_next); 493 if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK) 494 bgep->bge_chip_state = BGE_CHIP_ERROR; 495 } 496 mutex_exit(srp->tx_lock); 497 498 if (status == SEND_FREE) 499 freemsg(mp); 500 return (B_TRUE); 501 } 502 503 uint_t 504 bge_reschedule(caddr_t arg) 505 { 506 bge_t *bgep; 507 508 bgep = (bge_t *)arg; 509 510 BGE_TRACE(("bge_reschedule($%p)", (void *)bgep)); 511 512 if (bgep->bge_mac_state == BGE_MAC_STARTED && bgep->resched_needed) { 513 mac_tx_update(bgep->mh); 514 bgep->resched_needed = B_FALSE; 515 bgep->resched_running = B_FALSE; 516 } 517 518 return (DDI_INTR_CLAIMED); 519 } 520 521 /* 522 * bge_m_tx() - send a chain of packets 523 */ 524 mblk_t * 525 bge_m_tx(void *arg, mblk_t *mp) 526 { 527 bge_t *bgep = arg; /* private device info */ 528 mblk_t *next; 529 530 BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp)); 531 532 ASSERT(mp != NULL); 533 ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED); 534 535 if (bgep->bge_chip_state != BGE_CHIP_RUNNING) { 536 BGE_DEBUG(("bge_m_tx: chip not running")); 537 return (mp); 538 } 539 540 rw_enter(bgep->errlock, RW_READER); 541 while (mp != NULL) { 542 next = mp->b_next; 543 mp->b_next = NULL; 544 545 if (!bge_send(bgep, mp)) { 546 mp->b_next = next; 547 break; 548 } 549 550 mp = next; 551 } 552 rw_exit(bgep->errlock); 553 554 return (mp); 555 } 556