1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010-2013, by Broadcom, Inc.
24 * All Rights Reserved.
25 */
26
27 /*
28 * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
29 * All rights reserved.
30 */
31
32 #include "bge_impl.h"
33
34
35 /*
36 * The transmit-side code uses an allocation process which is similar
37 * to some theme park roller-coaster rides, where riders sit in cars
38 * that can go individually, but work better in a train.
39 *
40 * 1) RESERVE a place - this doesn't refer to any specific car or
41 * seat, just that you will get a ride. The attempt to RESERVE a
42 * place can fail if all spaces in all cars are already committed.
43 *
44 * 2) Prepare yourself; this may take an arbitrary (but not unbounded)
45 * time, and you can back out at this stage, in which case you must
46 * give up (RENOUNCE) your place.
47 *
48 * 3) CLAIM your space - a specific car (the next sequentially
49 * numbered one) is allocated at this stage, and is guaranteed
50 * to be part of the next train to depart. Once you've done
51 * this, you can't back out, nor wait for any external event
52 * or resource.
53 *
54 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they
55 * all depart together as a single train!
56 *
57 * 5) At the end of the ride, you climb out of the car and RENOUNCE
58 * your right to it, so that it can be recycled for another rider.
59 *
60 * For each rider, these have to occur in this order, but the riders
61 * don't have to stay in the same order at each stage. In particular,
62 * they may overtake each other between RESERVING a place and CLAIMING
63 * it, or between CLAIMING and OCCUPYING a space.
64 *
65 * Once a car is CLAIMED, the train currently being assembled can't go
66 * without that car (this guarantees that the cars in a single train
67 * make up a consecutively-numbered set). Therefore, when any train
68 * leaves, we know there can't be any riders in transit between CLAIMING
69 * and OCCUPYING their cars. There can be some who have RESERVED but
70 * not yet CLAIMED their places. That's OK, though, because they'll go
71 * into the next train.
72 */
73
74 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */
75
76 /*
77 * ========== Send-side recycle routines ==========
78 */
79
80 /*
81 * Recycle all the completed buffers in the specified send ring up to
82 * (but not including) the consumer index in the status block.
83 *
84 * This function must advance (srp->tc_next) AND adjust (srp->tx_free)
85 * to account for the packets it has recycled.
86 *
87 * This is a trivial version that just does that and nothing more, but
88 * it suffices while there's only one method for sending messages (by
89 * copying) and that method doesn't need any special per-buffer action
90 * for recycling.
91 */
92 static boolean_t bge_recycle_ring(bge_t *bgep, send_ring_t *srp);
93 #pragma inline(bge_recycle_ring)
94
95 static boolean_t
bge_recycle_ring(bge_t * bgep,send_ring_t * srp)96 bge_recycle_ring(bge_t *bgep, send_ring_t *srp)
97 {
98 sw_sbd_t *ssbdp;
99 bge_queue_item_t *buf_item;
100 bge_queue_item_t *buf_item_head;
101 bge_queue_item_t *buf_item_tail;
102 bge_queue_t *txbuf_queue;
103 uint64_t slot;
104 uint64_t n;
105
106 ASSERT(mutex_owned(srp->tc_lock));
107
108 /*
109 * We're about to release one or more places :-)
110 * These ASSERTions check that our invariants still hold:
111 * there must always be at least one free place
112 * at this point, there must be at least one place NOT free
113 * we're not about to free more places than were claimed!
114 */
115 ASSERT(srp->tx_free <= srp->desc.nslots);
116
117 buf_item_head = buf_item_tail = NULL;
118 for (n = 0, slot = srp->tc_next; slot != *srp->cons_index_p;
119 slot = NEXT(slot, srp->desc.nslots)) {
120 ssbdp = &srp->sw_sbds[slot];
121 ASSERT(ssbdp->pbuf != NULL);
122 buf_item = ssbdp->pbuf;
123 if (buf_item_head == NULL)
124 buf_item_head = buf_item_tail = buf_item;
125 else {
126 buf_item_tail->next = buf_item;
127 buf_item_tail = buf_item;
128 }
129 ssbdp->pbuf = NULL;
130 n++;
131 }
132 if (n == 0)
133 return (B_FALSE);
134
135 /*
136 * Reset the watchdog count: to 0 if all buffers are
137 * now free, or to 1 if some are still outstanding.
138 * Note: non-synchonised access here means we may get
139 * the "wrong" answer, but only in a harmless fashion
140 * (i.e. we deactivate the watchdog because all buffers
141 * are apparently free, even though another thread may
142 * have claimed one before we leave here; in this case
143 * the watchdog will restart on the next send() call).
144 */
145 bgep->watchdog = (slot == srp->tx_next) ? 0 : 1;
146
147 /*
148 * Update recycle index and free tx BD number
149 */
150 srp->tc_next = slot;
151 ASSERT(srp->tx_free + n <= srp->desc.nslots);
152 bge_atomic_renounce(&srp->tx_free, n);
153
154 /*
155 * Return tx buffers to buffer push queue
156 */
157 txbuf_queue = srp->txbuf_push_queue;
158 mutex_enter(txbuf_queue->lock);
159 buf_item_tail->next = txbuf_queue->head;
160 txbuf_queue->head = buf_item_head;
161 txbuf_queue->count += n;
162 mutex_exit(txbuf_queue->lock);
163
164 /*
165 * Check if we need exchange the tx buffer push and pop queue
166 */
167 if ((srp->txbuf_pop_queue->count < srp->tx_buffers_low) &&
168 (srp->txbuf_pop_queue->count < txbuf_queue->count)) {
169 srp->txbuf_push_queue = srp->txbuf_pop_queue;
170 srp->txbuf_pop_queue = txbuf_queue;
171 }
172
173 if (srp->tx_flow != 0 || bgep->tx_resched_needed)
174 ddi_trigger_softintr(bgep->drain_id);
175
176 return (B_TRUE);
177 }
178
179 /*
180 * Recycle all returned slots in all rings.
181 *
182 * To give priority to low-numbered rings, whenever we have recycled any
183 * slots in any ring except 0, we restart scanning again from ring 0.
184 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the
185 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0:
186 *
187 * 0 found some - recycle them
188 * 1..2 none found
189 * 3 found some - recycle them and restart scan
190 * 0..9 none found
191 * 10 found some - recycle them and restart scan
192 * 0..2 none found
193 * 3 found some more - recycle them and restart scan
194 * 0 found some more - recycle them
195 * 0..9 none found
196 * 10 found some more - recycle them and restart scan
197 * 0 found some more - recycle them
198 * 1..15 none found
199 *
200 * The routine returns only when a complete scan has been performed
201 * without finding any slots to recycle.
202 *
203 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time
204 * constant and allows the compiler to optimise away the outer do-loop
205 * if only one send ring is being used.
206 */
207 boolean_t bge_recycle(bge_t *bgep, bge_status_t *bsp);
208 #pragma no_inline(bge_recycle)
209
210 boolean_t
bge_recycle(bge_t * bgep,bge_status_t * bsp)211 bge_recycle(bge_t *bgep, bge_status_t *bsp)
212 {
213 send_ring_t *srp;
214 uint64_t ring;
215 uint64_t tx_rings = bgep->chipid.tx_rings;
216 boolean_t tx_done = B_FALSE;
217
218 restart:
219 ring = 0;
220 srp = &bgep->send[ring];
221 do {
222 /*
223 * For each ring, (srp->cons_index_p) points to the
224 * proper index within the status block (which has
225 * already been sync'd by the caller).
226 */
227 ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring));
228
229 if (*srp->cons_index_p == srp->tc_next)
230 continue; /* no slots to recycle */
231 if (mutex_tryenter(srp->tc_lock) == 0)
232 continue; /* already in process */
233 tx_done |= bge_recycle_ring(bgep, srp);
234 mutex_exit(srp->tc_lock);
235
236 /*
237 * Restart from ring 0, if we're not on ring 0 already.
238 * As H/W selects send BDs totally based on priority and
239 * available BDs on the higher priority ring are always
240 * selected first, driver should keep consistence with H/W
241 * and gives lower-numbered ring with higher priority.
242 */
243 if (tx_rings > 1 && ring > 0)
244 goto restart;
245
246 /*
247 * Loop over all rings (if there *are* multiple rings)
248 */
249 } while (++srp, ++ring < tx_rings);
250
251 return (tx_done);
252 }
253
254
255 /*
256 * ========== Send-side transmit routines ==========
257 */
258 #define TCP_CKSUM_OFFSET 16
259 #define UDP_CKSUM_OFFSET 6
260
261 static void
bge_pseudo_cksum(uint8_t * buf)262 bge_pseudo_cksum(uint8_t *buf)
263 {
264 uint32_t cksum;
265 uint16_t iphl;
266 uint16_t proto;
267
268 /*
269 * Point it to the ip header.
270 */
271 buf += sizeof (struct ether_header);
272
273 /*
274 * Calculate the pseudo-header checksum.
275 */
276 iphl = 4 * (buf[0] & 0xF);
277 cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl;
278 cksum += proto = buf[9];
279 cksum += (((uint16_t)buf[12])<<8) + buf[13];
280 cksum += (((uint16_t)buf[14])<<8) + buf[15];
281 cksum += (((uint16_t)buf[16])<<8) + buf[17];
282 cksum += (((uint16_t)buf[18])<<8) + buf[19];
283 cksum = (cksum>>16) + (cksum & 0xFFFF);
284 cksum = (cksum>>16) + (cksum & 0xFFFF);
285
286 /*
287 * Point it to the TCP/UDP header, and
288 * update the checksum field.
289 */
290 buf += iphl + ((proto == IPPROTO_TCP) ?
291 TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET);
292
293 /*
294 * A real possibility that pointer cast is a problem.
295 * Should be fixed when we know the code better.
296 * E_BAD_PTR_CAST_ALIGN is added to make it temporarily clean.
297 */
298 *(uint16_t *)buf = htons((uint16_t)cksum);
299 }
300
301 static bge_queue_item_t *
bge_get_txbuf(bge_t * bgep,send_ring_t * srp)302 bge_get_txbuf(bge_t *bgep, send_ring_t *srp)
303 {
304 bge_queue_item_t *txbuf_item;
305 bge_queue_t *txbuf_queue;
306
307 txbuf_queue = srp->txbuf_pop_queue;
308 mutex_enter(txbuf_queue->lock);
309 if (txbuf_queue->count == 0) {
310 mutex_exit(txbuf_queue->lock);
311 txbuf_queue = srp->txbuf_push_queue;
312 mutex_enter(txbuf_queue->lock);
313 if (txbuf_queue->count == 0) {
314 mutex_exit(txbuf_queue->lock);
315 /* Try to allocate more tx buffers */
316 if (srp->tx_array < srp->tx_array_max) {
317 mutex_enter(srp->tx_lock);
318 txbuf_item = bge_alloc_txbuf_array(bgep, srp);
319 mutex_exit(srp->tx_lock);
320 } else
321 txbuf_item = NULL;
322 return (txbuf_item);
323 }
324 }
325 txbuf_item = txbuf_queue->head;
326 txbuf_queue->head = (bge_queue_item_t *)txbuf_item->next;
327 txbuf_queue->count--;
328 mutex_exit(txbuf_queue->lock);
329 txbuf_item->next = NULL;
330
331 return (txbuf_item);
332 }
333
334 /*
335 * Send a message by copying it into a preallocated (and premapped) buffer
336 */
337 static void bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp);
338 #pragma inline(bge_send_copy)
339
340 static void
bge_send_copy(bge_t * bgep,sw_txbuf_t * txbuf,mblk_t * mp)341 bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp)
342 {
343 mblk_t *bp;
344 uint32_t mblen;
345 char *pbuf;
346
347 txbuf->copy_len = 0;
348 pbuf = DMA_VPTR(txbuf->buf);
349 for (bp = mp; bp != NULL; bp = bp->b_cont) {
350 if ((mblen = MBLKL(bp)) == 0)
351 continue;
352 ASSERT(txbuf->copy_len + mblen <=
353 bgep->chipid.snd_buff_size);
354 bcopy(bp->b_rptr, pbuf, mblen);
355 pbuf += mblen;
356 txbuf->copy_len += mblen;
357 }
358 }
359
360 /*
361 * Fill the Tx buffer descriptors and trigger the h/w transmission
362 */
363 static void
bge_send_serial(bge_t * bgep,send_ring_t * srp)364 bge_send_serial(bge_t *bgep, send_ring_t *srp)
365 {
366 send_pkt_t *pktp;
367 uint64_t txfill_next;
368 uint32_t count;
369 uint32_t tx_next;
370 sw_sbd_t *ssbdp;
371 bge_status_t *bsp;
372 bge_sbd_t *hw_sbd_p;
373 bge_queue_item_t *txbuf_item;
374 sw_txbuf_t *txbuf;
375
376 /*
377 * Try to hold the tx lock:
378 * If we are in an interrupt context, use mutex_enter() to
379 * ensure quick response for tx in interrupt context;
380 * Otherwise, use mutex_tryenter() to serialize this h/w tx
381 * BD filling and transmission triggering task.
382 */
383 if (servicing_interrupt() != 0)
384 mutex_enter(srp->tx_lock);
385 else if (mutex_tryenter(srp->tx_lock) == 0)
386 return; /* already in process */
387
388 bsp = DMA_VPTR(bgep->status_block);
389 txfill_next = srp->txfill_next;
390 tx_next = srp->tx_next;
391 start_tx:
392 for (count = 0; count < bgep->param_drain_max; ++count) {
393 pktp = &srp->pktp[txfill_next];
394 if (!pktp->tx_ready) {
395 if (count == 0)
396 srp->tx_block++;
397 break;
398 }
399
400 /*
401 * If there are no enough BDs: try to recycle more
402 */
403 if (srp->tx_free <= 1)
404 (void) bge_recycle(bgep, bsp);
405
406 /*
407 * Reserved required BDs: 1 is enough
408 */
409 if (!bge_atomic_reserve(&srp->tx_free, 1)) {
410 srp->tx_nobd++;
411 break;
412 }
413
414 /*
415 * Filling the tx BD
416 */
417
418 /*
419 * Go straight to claiming our already-reserved places
420 * on the train!
421 */
422 ASSERT(pktp->txbuf_item != NULL);
423 txbuf_item = pktp->txbuf_item;
424 pktp->txbuf_item = NULL;
425 pktp->tx_ready = B_FALSE;
426
427 txbuf = txbuf_item->item;
428 ASSERT(txbuf->copy_len != 0);
429 (void) ddi_dma_sync(txbuf->buf.dma_hdl, 0,
430 txbuf->copy_len, DDI_DMA_SYNC_FORDEV);
431
432 ssbdp = &srp->sw_sbds[tx_next];
433 ASSERT(ssbdp->pbuf == NULL);
434 ssbdp->pbuf = txbuf_item;
435
436 /*
437 * Setting hardware send buffer descriptor
438 */
439 hw_sbd_p = DMA_VPTR(ssbdp->desc);
440 hw_sbd_p->flags = 0;
441 hw_sbd_p->host_buf_addr = txbuf->buf.cookie.dmac_laddress;
442 hw_sbd_p->len = txbuf->copy_len;
443 if (pktp->vlan_tci != 0) {
444 hw_sbd_p->vlan_tci = pktp->vlan_tci;
445 hw_sbd_p->host_buf_addr += VLAN_TAGSZ;
446 hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG;
447 }
448 if (pktp->pflags & HCK_IPV4_HDRCKSUM)
449 hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM;
450 if (pktp->pflags & HCK_FULLCKSUM)
451 hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM;
452 if (!(bgep->chipid.flags & CHIP_FLAG_NO_JUMBO) &&
453 (DEVICE_5717_SERIES_CHIPSETS(bgep) ||
454 DEVICE_5725_SERIES_CHIPSETS(bgep)) &&
455 (txbuf->copy_len > ETHERMAX))
456 hw_sbd_p->flags |= SBD_FLAG_JMB_PKT;
457 hw_sbd_p->flags |= SBD_FLAG_PACKET_END;
458
459 txfill_next = NEXT(txfill_next, BGE_SEND_BUF_MAX);
460 tx_next = NEXT(tx_next, srp->desc.nslots);
461 }
462
463 /*
464 * Trigger h/w to start transmission.
465 */
466 if (count != 0) {
467 bge_atomic_sub64(&srp->tx_flow, count);
468 srp->txfill_next = txfill_next;
469
470 if (srp->tx_next > tx_next) {
471 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0,
472 (srp->desc.nslots - srp->tx_next) *
473 sizeof (bge_sbd_t),
474 DDI_DMA_SYNC_FORDEV);
475 count -= srp->desc.nslots - srp->tx_next;
476 ssbdp = &srp->sw_sbds[0];
477 }
478 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0,
479 count*sizeof (bge_sbd_t), DDI_DMA_SYNC_FORDEV);
480 bge_mbx_put(bgep, srp->chip_mbx_reg, tx_next);
481 srp->tx_next = tx_next;
482 atomic_or_32(&bgep->watchdog, 1);
483
484 if (srp->tx_flow != 0 && srp->tx_free > 1)
485 goto start_tx;
486 }
487
488 mutex_exit(srp->tx_lock);
489 }
490
491 mblk_t *
bge_ring_tx(void * arg,mblk_t * mp)492 bge_ring_tx(void *arg, mblk_t *mp)
493 {
494 send_ring_t *srp = arg;
495 bge_t *bgep = srp->bgep;
496 struct ether_vlan_header *ehp;
497 bge_queue_item_t *txbuf_item;
498 sw_txbuf_t *txbuf;
499 send_pkt_t *pktp;
500 uint64_t pkt_slot;
501 uint16_t vlan_tci;
502 uint32_t pflags;
503 char *pbuf;
504
505 ASSERT(mp->b_next == NULL);
506
507 /*
508 * Get a s/w tx buffer first
509 */
510 txbuf_item = bge_get_txbuf(bgep, srp);
511 if (txbuf_item == NULL) {
512 /* no tx buffer available */
513 srp->tx_nobuf++;
514 bgep->tx_resched_needed = B_TRUE;
515 bge_send_serial(bgep, srp);
516 return (mp);
517 }
518
519 /*
520 * Copy all mp fragments to the pkt buffer
521 */
522 txbuf = txbuf_item->item;
523 bge_send_copy(bgep, txbuf, mp);
524
525 /*
526 * Determine if the packet is VLAN tagged.
527 */
528 ASSERT(txbuf->copy_len >= sizeof (struct ether_header));
529 pbuf = DMA_VPTR(txbuf->buf);
530
531 ehp = (void *)pbuf;
532 if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) {
533 /* Strip the vlan tag */
534 vlan_tci = ntohs(ehp->ether_tci);
535 pbuf = memmove(pbuf + VLAN_TAGSZ, pbuf, 2 * ETHERADDRL);
536 txbuf->copy_len -= VLAN_TAGSZ;
537 } else
538 vlan_tci = 0;
539
540 /*
541 * Retrieve checksum offloading info.
542 */
543 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
544
545 /*
546 * Calculate pseudo checksum if needed.
547 */
548 if ((pflags & HCK_FULLCKSUM) &&
549 (bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM))
550 bge_pseudo_cksum((uint8_t *)pbuf);
551
552 /*
553 * Packet buffer is ready to send: get and fill pkt info
554 */
555 pkt_slot = bge_atomic_next(&srp->txpkt_next, BGE_SEND_BUF_MAX);
556 pktp = &srp->pktp[pkt_slot];
557 ASSERT(pktp->txbuf_item == NULL);
558 pktp->txbuf_item = txbuf_item;
559 pktp->vlan_tci = vlan_tci;
560 pktp->pflags = pflags;
561 atomic_inc_64(&srp->tx_flow);
562 ASSERT(pktp->tx_ready == B_FALSE);
563 pktp->tx_ready = B_TRUE;
564
565 /*
566 * Filling the h/w bd and trigger the h/w to start transmission
567 */
568 bge_send_serial(bgep, srp);
569
570 srp->pushed_bytes += MBLKL(mp);
571
572 /*
573 * We've copied the contents, the message can be freed right away
574 */
575 freemsg(mp);
576 return (NULL);
577 }
578
579 static mblk_t *
bge_send(bge_t * bgep,mblk_t * mp)580 bge_send(bge_t *bgep, mblk_t *mp)
581 {
582 send_ring_t *ring;
583
584 ring = &bgep->send[0]; /* ring 0 */
585
586 return (bge_ring_tx(ring, mp));
587 }
588
589 uint_t
bge_send_drain(caddr_t arg)590 bge_send_drain(caddr_t arg)
591 {
592 uint_t ring = 0; /* use ring 0 */
593 bge_t *bgep;
594 send_ring_t *srp;
595
596 bgep = (void *)arg;
597 BGE_TRACE(("bge_send_drain($%p)", (void *)bgep));
598
599 srp = &bgep->send[ring];
600 bge_send_serial(bgep, srp);
601
602 if (bgep->tx_resched_needed &&
603 (srp->tx_flow < srp->tx_buffers_low) &&
604 (bgep->bge_mac_state == BGE_MAC_STARTED)) {
605 mac_tx_update(bgep->mh);
606 bgep->tx_resched_needed = B_FALSE;
607 bgep->tx_resched++;
608 }
609
610 return (DDI_INTR_CLAIMED);
611 }
612
613 /*
614 * bge_m_tx() - send a chain of packets
615 */
616 mblk_t *
bge_m_tx(void * arg,mblk_t * mp)617 bge_m_tx(void *arg, mblk_t *mp)
618 {
619 bge_t *bgep = arg; /* private device info */
620 mblk_t *next;
621
622 BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp));
623
624 ASSERT(mp != NULL);
625 ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED);
626
627 rw_enter(bgep->errlock, RW_READER);
628 if ((bgep->bge_chip_state != BGE_CHIP_RUNNING) ||
629 !(bgep->param_link_up)) {
630 BGE_DEBUG(("bge_m_tx: chip not running or link down"));
631 freemsgchain(mp);
632 mp = NULL;
633 }
634
635 while (mp != NULL) {
636 next = mp->b_next;
637 mp->b_next = NULL;
638
639 if ((mp = bge_send(bgep, mp)) != NULL) {
640 mp->b_next = next;
641 break;
642 }
643
644 mp = next;
645 }
646 rw_exit(bgep->errlock);
647
648 return (mp);
649 }
650