xref: /freebsd/sys/dev/cxgbe/t4_mp_ring.c (revision fdafd315ad0d0f28a11b9fb4476a9ab059c62b92)
17951040fSNavdeep Parhar /*-
27951040fSNavdeep Parhar  * Copyright (c) 2014 Chelsio Communications, Inc.
37951040fSNavdeep Parhar  * All rights reserved.
47951040fSNavdeep Parhar  * Written by: Navdeep Parhar <np@FreeBSD.org>
57951040fSNavdeep Parhar  *
67951040fSNavdeep Parhar  * Redistribution and use in source and binary forms, with or without
77951040fSNavdeep Parhar  * modification, are permitted provided that the following conditions
87951040fSNavdeep Parhar  * are met:
97951040fSNavdeep Parhar  * 1. Redistributions of source code must retain the above copyright
107951040fSNavdeep Parhar  *    notice, this list of conditions and the following disclaimer.
117951040fSNavdeep Parhar  * 2. Redistributions in binary form must reproduce the above copyright
127951040fSNavdeep Parhar  *    notice, this list of conditions and the following disclaimer in the
137951040fSNavdeep Parhar  *    documentation and/or other materials provided with the distribution.
147951040fSNavdeep Parhar  *
157951040fSNavdeep Parhar  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
167951040fSNavdeep Parhar  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
177951040fSNavdeep Parhar  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
187951040fSNavdeep Parhar  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
197951040fSNavdeep Parhar  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
207951040fSNavdeep Parhar  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
217951040fSNavdeep Parhar  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
227951040fSNavdeep Parhar  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
237951040fSNavdeep Parhar  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
247951040fSNavdeep Parhar  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
257951040fSNavdeep Parhar  * SUCH DAMAGE.
267951040fSNavdeep Parhar  */
277951040fSNavdeep Parhar 
287951040fSNavdeep Parhar #include <sys/types.h>
297951040fSNavdeep Parhar #include <sys/param.h>
307951040fSNavdeep Parhar #include <sys/systm.h>
317951040fSNavdeep Parhar #include <sys/counter.h>
327951040fSNavdeep Parhar #include <sys/lock.h>
337951040fSNavdeep Parhar #include <sys/malloc.h>
34*d735920dSNavdeep Parhar #include <sys/mutex.h>
35*d735920dSNavdeep Parhar #include <sys/sysctl.h>
367951040fSNavdeep Parhar #include <machine/cpu.h>
377951040fSNavdeep Parhar 
387951040fSNavdeep Parhar #include "t4_mp_ring.h"
397951040fSNavdeep Parhar 
4088d7f6bdSNavdeep Parhar #if defined(__i386__)
4188d7f6bdSNavdeep Parhar #define atomic_cmpset_acq_64 atomic_cmpset_64
4288d7f6bdSNavdeep Parhar #define atomic_cmpset_rel_64 atomic_cmpset_64
4388d7f6bdSNavdeep Parhar #endif
4488d7f6bdSNavdeep Parhar 
45*d735920dSNavdeep Parhar /*
46*d735920dSNavdeep Parhar  * mp_ring handles multiple threads (producers) enqueueing data to a tx queue.
47*d735920dSNavdeep Parhar  * The thread that is writing the hardware descriptors is the consumer and it
48*d735920dSNavdeep Parhar  * runs with the consumer lock held.  A producer becomes the consumer if there
49*d735920dSNavdeep Parhar  * isn't one already.  The consumer runs with the flags sets to BUSY and
50*d735920dSNavdeep Parhar  * consumes everything (IDLE or COALESCING) or gets STALLED.  If it is running
51*d735920dSNavdeep Parhar  * over its budget it sets flags to TOO_BUSY.  A producer that observes a
52*d735920dSNavdeep Parhar  * TOO_BUSY consumer will become the new consumer by setting flags to
53*d735920dSNavdeep Parhar  * TAKING_OVER.  The original consumer stops and sets the flags back to BUSY for
54*d735920dSNavdeep Parhar  * the new consumer.
55*d735920dSNavdeep Parhar  *
56*d735920dSNavdeep Parhar  * COALESCING is the same as IDLE except there are items being held in the hope
57*d735920dSNavdeep Parhar  * that they can be coalesced with items that follow.  The driver must arrange
58*d735920dSNavdeep Parhar  * for a tx update or some other event that transmits all the held items in a
59*d735920dSNavdeep Parhar  * timely manner if nothing else is enqueued.
60*d735920dSNavdeep Parhar  */
61*d735920dSNavdeep Parhar 
627951040fSNavdeep Parhar union ring_state {
637951040fSNavdeep Parhar 	struct {
647951040fSNavdeep Parhar 		uint16_t pidx_head;
657951040fSNavdeep Parhar 		uint16_t pidx_tail;
667951040fSNavdeep Parhar 		uint16_t cidx;
677951040fSNavdeep Parhar 		uint16_t flags;
687951040fSNavdeep Parhar 	};
697951040fSNavdeep Parhar 	uint64_t state;
707951040fSNavdeep Parhar };
717951040fSNavdeep Parhar 
727951040fSNavdeep Parhar enum {
73*d735920dSNavdeep Parhar 	IDLE = 0,	/* tx is all caught up, nothing to do. */
74*d735920dSNavdeep Parhar 	COALESCING,	/* IDLE, but tx frames are being held for coalescing */
757951040fSNavdeep Parhar 	BUSY,		/* consumer is running already, or will be shortly. */
76*d735920dSNavdeep Parhar 	TOO_BUSY,	/* consumer is running and is beyond its budget */
77*d735920dSNavdeep Parhar 	TAKING_OVER,	/* new consumer taking over from a TOO_BUSY consumer */
787951040fSNavdeep Parhar 	STALLED,	/* consumer stopped due to lack of resources. */
79*d735920dSNavdeep Parhar };
80*d735920dSNavdeep Parhar 
81*d735920dSNavdeep Parhar enum {
82*d735920dSNavdeep Parhar 	C_FAST = 0,
83*d735920dSNavdeep Parhar 	C_2,
84*d735920dSNavdeep Parhar 	C_3,
85*d735920dSNavdeep Parhar 	C_TAKEOVER,
867951040fSNavdeep Parhar };
877951040fSNavdeep Parhar 
887951040fSNavdeep Parhar static inline uint16_t
space_available(struct mp_ring * r,union ring_state s)897951040fSNavdeep Parhar space_available(struct mp_ring *r, union ring_state s)
907951040fSNavdeep Parhar {
917951040fSNavdeep Parhar 	uint16_t x = r->size - 1;
927951040fSNavdeep Parhar 
937951040fSNavdeep Parhar 	if (s.cidx == s.pidx_head)
947951040fSNavdeep Parhar 		return (x);
957951040fSNavdeep Parhar 	else if (s.cidx > s.pidx_head)
967951040fSNavdeep Parhar 		return (s.cidx - s.pidx_head - 1);
977951040fSNavdeep Parhar 	else
987951040fSNavdeep Parhar 		return (x - s.pidx_head + s.cidx);
997951040fSNavdeep Parhar }
1007951040fSNavdeep Parhar 
1017951040fSNavdeep Parhar static inline uint16_t
increment_idx(struct mp_ring * r,uint16_t idx,uint16_t n)1027951040fSNavdeep Parhar increment_idx(struct mp_ring *r, uint16_t idx, uint16_t n)
1037951040fSNavdeep Parhar {
1047951040fSNavdeep Parhar 	int x = r->size - idx;
1057951040fSNavdeep Parhar 
1067951040fSNavdeep Parhar 	MPASS(x > 0);
1077951040fSNavdeep Parhar 	return (x > n ? idx + n : n - x);
1087951040fSNavdeep Parhar }
1097951040fSNavdeep Parhar 
1107951040fSNavdeep Parhar /*
111*d735920dSNavdeep Parhar  * Consumer.  Called with the consumer lock held and a guarantee that there is
112*d735920dSNavdeep Parhar  * work to do.
1137951040fSNavdeep Parhar  */
1147951040fSNavdeep Parhar static void
drain_ring(struct mp_ring * r,int budget)115*d735920dSNavdeep Parhar drain_ring(struct mp_ring *r, int budget)
1167951040fSNavdeep Parhar {
117*d735920dSNavdeep Parhar 	union ring_state os, ns;
1187951040fSNavdeep Parhar 	int n, pending, total;
119*d735920dSNavdeep Parhar 	uint16_t cidx;
120*d735920dSNavdeep Parhar 	uint16_t pidx;
121*d735920dSNavdeep Parhar 	bool coalescing;
1227951040fSNavdeep Parhar 
123*d735920dSNavdeep Parhar 	mtx_assert(r->cons_lock, MA_OWNED);
124*d735920dSNavdeep Parhar 
125*d735920dSNavdeep Parhar 	os.state = atomic_load_acq_64(&r->state);
1267951040fSNavdeep Parhar 	MPASS(os.flags == BUSY);
127*d735920dSNavdeep Parhar 
128*d735920dSNavdeep Parhar 	cidx = os.cidx;
129*d735920dSNavdeep Parhar 	pidx = os.pidx_tail;
1307951040fSNavdeep Parhar 	MPASS(cidx != pidx);
1317951040fSNavdeep Parhar 
1327951040fSNavdeep Parhar 	pending = 0;
1337951040fSNavdeep Parhar 	total = 0;
1347951040fSNavdeep Parhar 
1357951040fSNavdeep Parhar 	while (cidx != pidx) {
1367951040fSNavdeep Parhar 
1377951040fSNavdeep Parhar 		/* Items from cidx to pidx are available for consumption. */
138*d735920dSNavdeep Parhar 		n = r->drain(r, cidx, pidx, &coalescing);
1397951040fSNavdeep Parhar 		if (n == 0) {
1407951040fSNavdeep Parhar 			critical_enter();
141*d735920dSNavdeep Parhar 			os.state = atomic_load_64(&r->state);
1427951040fSNavdeep Parhar 			do {
143becda721SNavdeep Parhar 				ns.state = os.state;
1447951040fSNavdeep Parhar 				ns.cidx = cidx;
145*d735920dSNavdeep Parhar 
146*d735920dSNavdeep Parhar 				MPASS(os.flags == BUSY ||
147*d735920dSNavdeep Parhar 				    os.flags == TOO_BUSY ||
148*d735920dSNavdeep Parhar 				    os.flags == TAKING_OVER);
149*d735920dSNavdeep Parhar 
150*d735920dSNavdeep Parhar 				if (os.flags == TAKING_OVER)
151*d735920dSNavdeep Parhar 					ns.flags = BUSY;
152*d735920dSNavdeep Parhar 				else
1537951040fSNavdeep Parhar 					ns.flags = STALLED;
154becda721SNavdeep Parhar 			} while (atomic_fcmpset_64(&r->state, &os.state,
1557951040fSNavdeep Parhar 			    ns.state) == 0);
1567951040fSNavdeep Parhar 			critical_exit();
157*d735920dSNavdeep Parhar 			if (os.flags == TAKING_OVER)
158*d735920dSNavdeep Parhar 				counter_u64_add(r->abdications, 1);
159*d735920dSNavdeep Parhar 			else if (ns.flags == STALLED)
1607951040fSNavdeep Parhar 				counter_u64_add(r->stalls, 1);
1617951040fSNavdeep Parhar 			break;
1627951040fSNavdeep Parhar 		}
1637951040fSNavdeep Parhar 		cidx = increment_idx(r, cidx, n);
1647951040fSNavdeep Parhar 		pending += n;
1657951040fSNavdeep Parhar 		total += n;
166*d735920dSNavdeep Parhar 		counter_u64_add(r->consumed, n);
1677951040fSNavdeep Parhar 
168*d735920dSNavdeep Parhar 		os.state = atomic_load_64(&r->state);
1697951040fSNavdeep Parhar 		do {
170*d735920dSNavdeep Parhar 			MPASS(os.flags == BUSY || os.flags == TOO_BUSY ||
171*d735920dSNavdeep Parhar 			    os.flags == TAKING_OVER);
172*d735920dSNavdeep Parhar 
173becda721SNavdeep Parhar 			ns.state = os.state;
1747951040fSNavdeep Parhar 			ns.cidx = cidx;
175*d735920dSNavdeep Parhar 			if (__predict_false(os.flags == TAKING_OVER)) {
176*d735920dSNavdeep Parhar 				MPASS(total >= budget);
177*d735920dSNavdeep Parhar 				ns.flags = BUSY;
178*d735920dSNavdeep Parhar 				continue;
1797951040fSNavdeep Parhar 			}
180*d735920dSNavdeep Parhar 			if (cidx == os.pidx_tail) {
181*d735920dSNavdeep Parhar 				ns.flags = coalescing ? COALESCING : IDLE;
182*d735920dSNavdeep Parhar 				continue;
183*d735920dSNavdeep Parhar 			}
184*d735920dSNavdeep Parhar 			if (total >= budget) {
185*d735920dSNavdeep Parhar 				ns.flags = TOO_BUSY;
186*d735920dSNavdeep Parhar 				continue;
187*d735920dSNavdeep Parhar 			}
188*d735920dSNavdeep Parhar 			MPASS(os.flags == BUSY);
189*d735920dSNavdeep Parhar 			if (pending < 32)
190*d735920dSNavdeep Parhar 				break;
191*d735920dSNavdeep Parhar 		} while (atomic_fcmpset_acq_64(&r->state, &os.state, ns.state) == 0);
192*d735920dSNavdeep Parhar 
193*d735920dSNavdeep Parhar 		if (__predict_false(os.flags == TAKING_OVER)) {
194*d735920dSNavdeep Parhar 			MPASS(ns.flags == BUSY);
195*d735920dSNavdeep Parhar 			counter_u64_add(r->abdications, 1);
196*d735920dSNavdeep Parhar 			break;
197*d735920dSNavdeep Parhar 		}
198*d735920dSNavdeep Parhar 
199*d735920dSNavdeep Parhar 		if (ns.flags == IDLE || ns.flags == COALESCING) {
200*d735920dSNavdeep Parhar 			MPASS(ns.pidx_tail == cidx);
201*d735920dSNavdeep Parhar 			if (ns.pidx_head != ns.pidx_tail)
202*d735920dSNavdeep Parhar 				counter_u64_add(r->cons_idle2, 1);
203*d735920dSNavdeep Parhar 			else
204*d735920dSNavdeep Parhar 				counter_u64_add(r->cons_idle, 1);
2057951040fSNavdeep Parhar 			break;
2067951040fSNavdeep Parhar 		}
2077951040fSNavdeep Parhar 
2087951040fSNavdeep Parhar 		/*
2097951040fSNavdeep Parhar 		 * The acquire style atomic above guarantees visibility of items
2107951040fSNavdeep Parhar 		 * associated with any pidx change that we notice here.
2117951040fSNavdeep Parhar 		 */
2127951040fSNavdeep Parhar 		pidx = ns.pidx_tail;
2137951040fSNavdeep Parhar 		pending = 0;
2147951040fSNavdeep Parhar 	}
215*d735920dSNavdeep Parhar 
216*d735920dSNavdeep Parhar #ifdef INVARIANTS
217*d735920dSNavdeep Parhar 	if (os.flags == TAKING_OVER)
218*d735920dSNavdeep Parhar 		MPASS(ns.flags == BUSY);
219*d735920dSNavdeep Parhar 	else {
220*d735920dSNavdeep Parhar 		MPASS(ns.flags == IDLE || ns.flags == COALESCING ||
221*d735920dSNavdeep Parhar 		    ns.flags == STALLED);
222*d735920dSNavdeep Parhar 	}
223*d735920dSNavdeep Parhar #endif
224*d735920dSNavdeep Parhar }
225*d735920dSNavdeep Parhar 
226*d735920dSNavdeep Parhar static void
drain_txpkts(struct mp_ring * r,union ring_state os,int budget)227*d735920dSNavdeep Parhar drain_txpkts(struct mp_ring *r, union ring_state os, int budget)
228*d735920dSNavdeep Parhar {
229*d735920dSNavdeep Parhar 	union ring_state ns;
230*d735920dSNavdeep Parhar 	uint16_t cidx = os.cidx;
231*d735920dSNavdeep Parhar 	uint16_t pidx = os.pidx_tail;
232*d735920dSNavdeep Parhar 	bool coalescing;
233*d735920dSNavdeep Parhar 
234*d735920dSNavdeep Parhar 	mtx_assert(r->cons_lock, MA_OWNED);
235*d735920dSNavdeep Parhar 	MPASS(os.flags == BUSY);
236*d735920dSNavdeep Parhar 	MPASS(cidx == pidx);
237*d735920dSNavdeep Parhar 
238*d735920dSNavdeep Parhar 	r->drain(r, cidx, pidx, &coalescing);
239*d735920dSNavdeep Parhar 	MPASS(coalescing == false);
240*d735920dSNavdeep Parhar 	critical_enter();
241*d735920dSNavdeep Parhar 	os.state = atomic_load_64(&r->state);
242*d735920dSNavdeep Parhar 	do {
243*d735920dSNavdeep Parhar 		ns.state = os.state;
244*d735920dSNavdeep Parhar 		MPASS(os.flags == BUSY);
245*d735920dSNavdeep Parhar 		MPASS(os.cidx == cidx);
246*d735920dSNavdeep Parhar 		if (ns.cidx == ns.pidx_tail)
247*d735920dSNavdeep Parhar 			ns.flags = IDLE;
248*d735920dSNavdeep Parhar 		else
249*d735920dSNavdeep Parhar 			ns.flags = BUSY;
250*d735920dSNavdeep Parhar 	} while (atomic_fcmpset_acq_64(&r->state, &os.state, ns.state) == 0);
251*d735920dSNavdeep Parhar 	critical_exit();
252*d735920dSNavdeep Parhar 
253*d735920dSNavdeep Parhar 	if (ns.flags == BUSY)
254*d735920dSNavdeep Parhar 		drain_ring(r, budget);
2557951040fSNavdeep Parhar }
2567951040fSNavdeep Parhar 
2577951040fSNavdeep Parhar int
mp_ring_alloc(struct mp_ring ** pr,int size,void * cookie,ring_drain_t drain,ring_can_drain_t can_drain,struct malloc_type * mt,struct mtx * lck,int flags)2587951040fSNavdeep Parhar mp_ring_alloc(struct mp_ring **pr, int size, void *cookie, ring_drain_t drain,
259*d735920dSNavdeep Parhar     ring_can_drain_t can_drain, struct malloc_type *mt, struct mtx *lck,
260*d735920dSNavdeep Parhar     int flags)
2617951040fSNavdeep Parhar {
2627951040fSNavdeep Parhar 	struct mp_ring *r;
263*d735920dSNavdeep Parhar 	int i;
2647951040fSNavdeep Parhar 
2657951040fSNavdeep Parhar 	/* All idx are 16b so size can be 65536 at most */
2667951040fSNavdeep Parhar 	if (pr == NULL || size < 2 || size > 65536 || drain == NULL ||
2677951040fSNavdeep Parhar 	    can_drain == NULL)
2687951040fSNavdeep Parhar 		return (EINVAL);
2697951040fSNavdeep Parhar 	*pr = NULL;
2707951040fSNavdeep Parhar 	flags &= M_NOWAIT | M_WAITOK;
2717951040fSNavdeep Parhar 	MPASS(flags != 0);
2727951040fSNavdeep Parhar 
2737951040fSNavdeep Parhar 	r = malloc(__offsetof(struct mp_ring, items[size]), mt, flags | M_ZERO);
2747951040fSNavdeep Parhar 	if (r == NULL)
2757951040fSNavdeep Parhar 		return (ENOMEM);
2767951040fSNavdeep Parhar 	r->size = size;
2777951040fSNavdeep Parhar 	r->cookie = cookie;
2787951040fSNavdeep Parhar 	r->mt = mt;
2797951040fSNavdeep Parhar 	r->drain = drain;
2807951040fSNavdeep Parhar 	r->can_drain = can_drain;
281*d735920dSNavdeep Parhar 	r->cons_lock = lck;
282*d735920dSNavdeep Parhar 	if ((r->dropped = counter_u64_alloc(flags)) == NULL)
283*d735920dSNavdeep Parhar 		goto failed;
284*d735920dSNavdeep Parhar 	for (i = 0; i < nitems(r->consumer); i++) {
285*d735920dSNavdeep Parhar 		if ((r->consumer[i] = counter_u64_alloc(flags)) == NULL)
286*d735920dSNavdeep Parhar 			goto failed;
2877951040fSNavdeep Parhar 	}
288*d735920dSNavdeep Parhar 	if ((r->not_consumer = counter_u64_alloc(flags)) == NULL)
289*d735920dSNavdeep Parhar 		goto failed;
290*d735920dSNavdeep Parhar 	if ((r->abdications = counter_u64_alloc(flags)) == NULL)
291*d735920dSNavdeep Parhar 		goto failed;
292*d735920dSNavdeep Parhar 	if ((r->stalls = counter_u64_alloc(flags)) == NULL)
293*d735920dSNavdeep Parhar 		goto failed;
294*d735920dSNavdeep Parhar 	if ((r->consumed = counter_u64_alloc(flags)) == NULL)
295*d735920dSNavdeep Parhar 		goto failed;
296*d735920dSNavdeep Parhar 	if ((r->cons_idle = counter_u64_alloc(flags)) == NULL)
297*d735920dSNavdeep Parhar 		goto failed;
298*d735920dSNavdeep Parhar 	if ((r->cons_idle2 = counter_u64_alloc(flags)) == NULL)
299*d735920dSNavdeep Parhar 		goto failed;
3007951040fSNavdeep Parhar 	*pr = r;
3017951040fSNavdeep Parhar 	return (0);
302*d735920dSNavdeep Parhar failed:
303*d735920dSNavdeep Parhar 	mp_ring_free(r);
304*d735920dSNavdeep Parhar 	return (ENOMEM);
3057951040fSNavdeep Parhar }
3067951040fSNavdeep Parhar 
3077951040fSNavdeep Parhar void
3087951040fSNavdeep Parhar 
mp_ring_free(struct mp_ring * r)3097951040fSNavdeep Parhar mp_ring_free(struct mp_ring *r)
3107951040fSNavdeep Parhar {
311*d735920dSNavdeep Parhar 	int i;
3127951040fSNavdeep Parhar 
3137951040fSNavdeep Parhar 	if (r == NULL)
3147951040fSNavdeep Parhar 		return;
3157951040fSNavdeep Parhar 
316*d735920dSNavdeep Parhar 	if (r->dropped != NULL)
317*d735920dSNavdeep Parhar 		counter_u64_free(r->dropped);
318*d735920dSNavdeep Parhar 	for (i = 0; i < nitems(r->consumer); i++) {
319*d735920dSNavdeep Parhar 		if (r->consumer[i] != NULL)
320*d735920dSNavdeep Parhar 			counter_u64_free(r->consumer[i]);
321*d735920dSNavdeep Parhar 	}
322*d735920dSNavdeep Parhar 	if (r->not_consumer != NULL)
323*d735920dSNavdeep Parhar 		counter_u64_free(r->not_consumer);
3247951040fSNavdeep Parhar 	if (r->abdications != NULL)
3257951040fSNavdeep Parhar 		counter_u64_free(r->abdications);
326*d735920dSNavdeep Parhar 	if (r->stalls != NULL)
327*d735920dSNavdeep Parhar 		counter_u64_free(r->stalls);
328*d735920dSNavdeep Parhar 	if (r->consumed != NULL)
329*d735920dSNavdeep Parhar 		counter_u64_free(r->consumed);
330*d735920dSNavdeep Parhar 	if (r->cons_idle != NULL)
331*d735920dSNavdeep Parhar 		counter_u64_free(r->cons_idle);
332*d735920dSNavdeep Parhar 	if (r->cons_idle2 != NULL)
333*d735920dSNavdeep Parhar 		counter_u64_free(r->cons_idle2);
3347951040fSNavdeep Parhar 
3357951040fSNavdeep Parhar 	free(r, r->mt);
3367951040fSNavdeep Parhar }
3377951040fSNavdeep Parhar 
3387951040fSNavdeep Parhar /*
3397951040fSNavdeep Parhar  * Enqueue n items and maybe drain the ring for some time.
3407951040fSNavdeep Parhar  *
3417951040fSNavdeep Parhar  * Returns an errno.
3427951040fSNavdeep Parhar  */
3437951040fSNavdeep Parhar int
mp_ring_enqueue(struct mp_ring * r,void ** items,int n,int budget)3447951040fSNavdeep Parhar mp_ring_enqueue(struct mp_ring *r, void **items, int n, int budget)
3457951040fSNavdeep Parhar {
3467951040fSNavdeep Parhar 	union ring_state os, ns;
3477951040fSNavdeep Parhar 	uint16_t pidx_start, pidx_stop;
348*d735920dSNavdeep Parhar 	int i, nospc, cons;
349*d735920dSNavdeep Parhar 	bool consumer;
3507951040fSNavdeep Parhar 
3517951040fSNavdeep Parhar 	MPASS(items != NULL);
3527951040fSNavdeep Parhar 	MPASS(n > 0);
3537951040fSNavdeep Parhar 
3547951040fSNavdeep Parhar 	/*
3557951040fSNavdeep Parhar 	 * Reserve room for the new items.  Our reservation, if successful, is
3567951040fSNavdeep Parhar 	 * from 'pidx_start' to 'pidx_stop'.
3577951040fSNavdeep Parhar 	 */
358*d735920dSNavdeep Parhar 	nospc = 0;
359*d735920dSNavdeep Parhar 	os.state = atomic_load_64(&r->state);
360becda721SNavdeep Parhar 	for (;;) {
361*d735920dSNavdeep Parhar 		for (;;) {
362*d735920dSNavdeep Parhar 			if (__predict_true(space_available(r, os) >= n))
363*d735920dSNavdeep Parhar 				break;
364*d735920dSNavdeep Parhar 
365*d735920dSNavdeep Parhar 			/* Not enough room in the ring. */
366*d735920dSNavdeep Parhar 
3677951040fSNavdeep Parhar 			MPASS(os.flags != IDLE);
368*d735920dSNavdeep Parhar 			MPASS(os.flags != COALESCING);
369*d735920dSNavdeep Parhar 			if (__predict_false(++nospc > 100)) {
370*d735920dSNavdeep Parhar 				counter_u64_add(r->dropped, n);
3717951040fSNavdeep Parhar 				return (ENOBUFS);
3727951040fSNavdeep Parhar 			}
373*d735920dSNavdeep Parhar 			if (os.flags == STALLED)
374*d735920dSNavdeep Parhar 				mp_ring_check_drainage(r, 64);
375*d735920dSNavdeep Parhar 			else
376*d735920dSNavdeep Parhar 				cpu_spinwait();
377*d735920dSNavdeep Parhar 			os.state = atomic_load_64(&r->state);
378*d735920dSNavdeep Parhar 		}
379*d735920dSNavdeep Parhar 
380*d735920dSNavdeep Parhar 		/* There is room in the ring. */
381*d735920dSNavdeep Parhar 
382*d735920dSNavdeep Parhar 		cons = -1;
3837951040fSNavdeep Parhar 		ns.state = os.state;
3847951040fSNavdeep Parhar 		ns.pidx_head = increment_idx(r, os.pidx_head, n);
385*d735920dSNavdeep Parhar 		if (os.flags == IDLE || os.flags == COALESCING) {
386*d735920dSNavdeep Parhar 			MPASS(os.pidx_tail == os.cidx);
387*d735920dSNavdeep Parhar 			if (os.pidx_head == os.pidx_tail) {
388*d735920dSNavdeep Parhar 				cons = C_FAST;
389*d735920dSNavdeep Parhar 				ns.pidx_tail = increment_idx(r, os.pidx_tail, n);
390*d735920dSNavdeep Parhar 			} else
391*d735920dSNavdeep Parhar 				cons = C_2;
392*d735920dSNavdeep Parhar 			ns.flags = BUSY;
393*d735920dSNavdeep Parhar 		} else if (os.flags == TOO_BUSY) {
394*d735920dSNavdeep Parhar 			cons = C_TAKEOVER;
395*d735920dSNavdeep Parhar 			ns.flags = TAKING_OVER;
396*d735920dSNavdeep Parhar 		}
3977951040fSNavdeep Parhar 		critical_enter();
398becda721SNavdeep Parhar 		if (atomic_fcmpset_64(&r->state, &os.state, ns.state))
3997951040fSNavdeep Parhar 			break;
4007951040fSNavdeep Parhar 		critical_exit();
4017951040fSNavdeep Parhar 		cpu_spinwait();
402*d735920dSNavdeep Parhar 	};
403*d735920dSNavdeep Parhar 
4047951040fSNavdeep Parhar 	pidx_start = os.pidx_head;
4057951040fSNavdeep Parhar 	pidx_stop = ns.pidx_head;
4067951040fSNavdeep Parhar 
407*d735920dSNavdeep Parhar 	if (cons == C_FAST) {
408*d735920dSNavdeep Parhar 		i = pidx_start;
409*d735920dSNavdeep Parhar 		do {
410*d735920dSNavdeep Parhar 			r->items[i] = *items++;
411*d735920dSNavdeep Parhar 			if (__predict_false(++i == r->size))
412*d735920dSNavdeep Parhar 				i = 0;
413*d735920dSNavdeep Parhar 		} while (i != pidx_stop);
414*d735920dSNavdeep Parhar 		critical_exit();
415*d735920dSNavdeep Parhar 		counter_u64_add(r->consumer[C_FAST], 1);
416*d735920dSNavdeep Parhar 		mtx_lock(r->cons_lock);
417*d735920dSNavdeep Parhar 		drain_ring(r, budget);
418*d735920dSNavdeep Parhar 		mtx_unlock(r->cons_lock);
419*d735920dSNavdeep Parhar 		return (0);
420*d735920dSNavdeep Parhar 	}
421*d735920dSNavdeep Parhar 
4227951040fSNavdeep Parhar 	/*
4237951040fSNavdeep Parhar 	 * Wait for other producers who got in ahead of us to enqueue their
4247951040fSNavdeep Parhar 	 * items, one producer at a time.  It is our turn when the ring's
425453130d9SPedro F. Giffuni 	 * pidx_tail reaches the beginning of our reservation (pidx_start).
4267951040fSNavdeep Parhar 	 */
4277951040fSNavdeep Parhar 	while (ns.pidx_tail != pidx_start) {
4287951040fSNavdeep Parhar 		cpu_spinwait();
429*d735920dSNavdeep Parhar 		ns.state = atomic_load_64(&r->state);
4307951040fSNavdeep Parhar 	}
4317951040fSNavdeep Parhar 
4327951040fSNavdeep Parhar 	/* Now it is our turn to fill up the area we reserved earlier. */
4337951040fSNavdeep Parhar 	i = pidx_start;
4347951040fSNavdeep Parhar 	do {
4357951040fSNavdeep Parhar 		r->items[i] = *items++;
4367951040fSNavdeep Parhar 		if (__predict_false(++i == r->size))
4377951040fSNavdeep Parhar 			i = 0;
4387951040fSNavdeep Parhar 	} while (i != pidx_stop);
4397951040fSNavdeep Parhar 
4407951040fSNavdeep Parhar 	/*
4417951040fSNavdeep Parhar 	 * Update the ring's pidx_tail.  The release style atomic guarantees
4427951040fSNavdeep Parhar 	 * that the items are visible to any thread that sees the updated pidx.
4437951040fSNavdeep Parhar 	 */
444*d735920dSNavdeep Parhar 	os.state = atomic_load_64(&r->state);
4457951040fSNavdeep Parhar 	do {
446*d735920dSNavdeep Parhar 		consumer = false;
447becda721SNavdeep Parhar 		ns.state = os.state;
4487951040fSNavdeep Parhar 		ns.pidx_tail = pidx_stop;
449*d735920dSNavdeep Parhar 		if (os.flags == IDLE || os.flags == COALESCING ||
450*d735920dSNavdeep Parhar 		    (os.flags == STALLED && r->can_drain(r))) {
451*d735920dSNavdeep Parhar 			MPASS(cons == -1);
452*d735920dSNavdeep Parhar 			consumer = true;
4537951040fSNavdeep Parhar 			ns.flags = BUSY;
454*d735920dSNavdeep Parhar 		}
455becda721SNavdeep Parhar 	} while (atomic_fcmpset_rel_64(&r->state, &os.state, ns.state) == 0);
4567951040fSNavdeep Parhar 	critical_exit();
4577951040fSNavdeep Parhar 
458*d735920dSNavdeep Parhar 	if (cons == -1) {
459*d735920dSNavdeep Parhar 		if (consumer)
460*d735920dSNavdeep Parhar 			cons = C_3;
461*d735920dSNavdeep Parhar 		else {
462*d735920dSNavdeep Parhar 			counter_u64_add(r->not_consumer, 1);
463*d735920dSNavdeep Parhar 			return (0);
464*d735920dSNavdeep Parhar 		}
465*d735920dSNavdeep Parhar 	}
466*d735920dSNavdeep Parhar 	MPASS(cons > C_FAST && cons < nitems(r->consumer));
467*d735920dSNavdeep Parhar 	counter_u64_add(r->consumer[cons], 1);
468*d735920dSNavdeep Parhar 	mtx_lock(r->cons_lock);
469*d735920dSNavdeep Parhar 	drain_ring(r, budget);
470*d735920dSNavdeep Parhar 	mtx_unlock(r->cons_lock);
4717951040fSNavdeep Parhar 
4727951040fSNavdeep Parhar 	return (0);
4737951040fSNavdeep Parhar }
4747951040fSNavdeep Parhar 
4757951040fSNavdeep Parhar void
mp_ring_check_drainage(struct mp_ring * r,int budget)4767951040fSNavdeep Parhar mp_ring_check_drainage(struct mp_ring *r, int budget)
4777951040fSNavdeep Parhar {
4787951040fSNavdeep Parhar 	union ring_state os, ns;
4797951040fSNavdeep Parhar 
480*d735920dSNavdeep Parhar 	os.state = atomic_load_64(&r->state);
481*d735920dSNavdeep Parhar 	if (os.flags == STALLED && r->can_drain(r)) {
4827951040fSNavdeep Parhar 		MPASS(os.cidx != os.pidx_tail);	/* implied by STALLED */
4837951040fSNavdeep Parhar 		ns.state = os.state;
4847951040fSNavdeep Parhar 		ns.flags = BUSY;
485*d735920dSNavdeep Parhar 		if (atomic_cmpset_acq_64(&r->state, os.state, ns.state)) {
486*d735920dSNavdeep Parhar 			mtx_lock(r->cons_lock);
487*d735920dSNavdeep Parhar 			drain_ring(r, budget);
488*d735920dSNavdeep Parhar 			mtx_unlock(r->cons_lock);
489*d735920dSNavdeep Parhar 		}
490*d735920dSNavdeep Parhar 	} else if (os.flags == COALESCING) {
491*d735920dSNavdeep Parhar 		MPASS(os.cidx == os.pidx_tail);
492*d735920dSNavdeep Parhar 		ns.state = os.state;
493*d735920dSNavdeep Parhar 		ns.flags = BUSY;
494*d735920dSNavdeep Parhar 		if (atomic_cmpset_acq_64(&r->state, os.state, ns.state)) {
495*d735920dSNavdeep Parhar 			mtx_lock(r->cons_lock);
496*d735920dSNavdeep Parhar 			drain_txpkts(r, ns, budget);
497*d735920dSNavdeep Parhar 			mtx_unlock(r->cons_lock);
498*d735920dSNavdeep Parhar 		}
499*d735920dSNavdeep Parhar 	}
5007951040fSNavdeep Parhar }
5017951040fSNavdeep Parhar 
5027951040fSNavdeep Parhar void
mp_ring_reset_stats(struct mp_ring * r)5037951040fSNavdeep Parhar mp_ring_reset_stats(struct mp_ring *r)
5047951040fSNavdeep Parhar {
505*d735920dSNavdeep Parhar 	int i;
5067951040fSNavdeep Parhar 
507*d735920dSNavdeep Parhar 	counter_u64_zero(r->dropped);
508*d735920dSNavdeep Parhar 	for (i = 0; i < nitems(r->consumer); i++)
509*d735920dSNavdeep Parhar 		counter_u64_zero(r->consumer[i]);
510*d735920dSNavdeep Parhar 	counter_u64_zero(r->not_consumer);
5117951040fSNavdeep Parhar 	counter_u64_zero(r->abdications);
512*d735920dSNavdeep Parhar 	counter_u64_zero(r->stalls);
513*d735920dSNavdeep Parhar 	counter_u64_zero(r->consumed);
514*d735920dSNavdeep Parhar 	counter_u64_zero(r->cons_idle);
515*d735920dSNavdeep Parhar 	counter_u64_zero(r->cons_idle2);
5167951040fSNavdeep Parhar }
5177951040fSNavdeep Parhar 
518*d735920dSNavdeep Parhar bool
mp_ring_is_idle(struct mp_ring * r)5197951040fSNavdeep Parhar mp_ring_is_idle(struct mp_ring *r)
5207951040fSNavdeep Parhar {
5217951040fSNavdeep Parhar 	union ring_state s;
5227951040fSNavdeep Parhar 
523*d735920dSNavdeep Parhar 	s.state = atomic_load_64(&r->state);
5247951040fSNavdeep Parhar 	if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx &&
5257951040fSNavdeep Parhar 	    s.flags == IDLE)
526*d735920dSNavdeep Parhar 		return (true);
5277951040fSNavdeep Parhar 
528*d735920dSNavdeep Parhar 	return (false);
529*d735920dSNavdeep Parhar }
530*d735920dSNavdeep Parhar 
531*d735920dSNavdeep Parhar void
mp_ring_sysctls(struct mp_ring * r,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * children)532*d735920dSNavdeep Parhar mp_ring_sysctls(struct mp_ring *r, struct sysctl_ctx_list *ctx,
533*d735920dSNavdeep Parhar     struct sysctl_oid_list *children)
534*d735920dSNavdeep Parhar {
535*d735920dSNavdeep Parhar 	struct sysctl_oid *oid;
536*d735920dSNavdeep Parhar 
537*d735920dSNavdeep Parhar 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "mp_ring", CTLFLAG_RD |
538*d735920dSNavdeep Parhar 	    CTLFLAG_MPSAFE, NULL, "mp_ring statistics");
539*d735920dSNavdeep Parhar 	children = SYSCTL_CHILDREN(oid);
540*d735920dSNavdeep Parhar 
541*d735920dSNavdeep Parhar 	SYSCTL_ADD_U64(ctx, children, OID_AUTO, "state", CTLFLAG_RD,
542*d735920dSNavdeep Parhar 	    __DEVOLATILE(uint64_t *, &r->state), 0, "ring state");
543*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "dropped", CTLFLAG_RD,
544*d735920dSNavdeep Parhar 	    &r->dropped, "# of items dropped");
545*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumed",
546*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->consumed, "# of items consumed");
547*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "fast_consumer",
548*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->consumer[C_FAST],
549*d735920dSNavdeep Parhar 	    "# of times producer became consumer (fast)");
550*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumer2",
551*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->consumer[C_2],
552*d735920dSNavdeep Parhar 	    "# of times producer became consumer (2)");
553*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumer3",
554*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->consumer[C_3],
555*d735920dSNavdeep Parhar 	    "# of times producer became consumer (3)");
556*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "takeovers",
557*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->consumer[C_TAKEOVER],
558*d735920dSNavdeep Parhar 	    "# of times producer took over from another consumer.");
559*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "not_consumer",
560*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->not_consumer,
561*d735920dSNavdeep Parhar 	    "# of times producer did not become consumer");
562*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "abdications",
563*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->abdications, "# of consumer abdications");
564*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "stalls",
565*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->stalls, "# of consumer stalls");
566*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "cons_idle",
567*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->cons_idle,
568*d735920dSNavdeep Parhar 	    "# of times consumer ran fully to completion");
569*d735920dSNavdeep Parhar 	SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "cons_idle2",
570*d735920dSNavdeep Parhar 	    CTLFLAG_RD, &r->cons_idle2,
571*d735920dSNavdeep Parhar 	    "# of times consumer idled when another enqueue was in progress");
5727951040fSNavdeep Parhar }
573