xref: /illumos-gate/usr/src/uts/common/io/mac/mac_soft_ring.c (revision 10597944279b73141546abca67a8e947810e5bb2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2018 Joyent, Inc.
25  * Copyright 2025 Oxide Computer Company
26  */
27 
28 /*
29  * General Soft rings - Simulating Rx rings in S/W.
30  *
31  * Soft ring is a data abstraction containing a queue and a worker
32  * thread and represents a hardware Rx ring in software. Each soft
33  * ring set can have a collection of soft rings for separating
34  * L3/L4 specific traffic (IPv4 from IPv6 or TCP from UDP) or for
35  * allowing a higher degree of parallelism by sending traffic to
36  * one of the soft rings for a SRS (using a hash on src IP or port).
37  * Each soft ring worker thread can be bound to a different CPU
38  * allowing the processing for each soft ring to happen in parallel
39  * and independent from each other.
40  *
41  * Protocol soft rings:
42  *
43  * Each SRS has at an minimum 3 softrings. One each for IPv4 TCP,
44  * IPv4 UDP and rest (OTH - for IPv6 and everything else). The
45  * SRS does dynamic polling and enforces link level bandwidth but
46  * it does so for all traffic (IPv4 and IPv6 and all protocols) on
47  * that link. However, each protocol layer wants a different
48  * behaviour. For instance IPv4 TCP has per CPU squeues which
49  * enforce their own polling and flow control so IPv4 TCP traffic
50  * needs to go to a separate soft ring which can be polled by the
51  * TCP squeue. It also allows TCP squeue to push back flow control
52  * all the way to NIC hardware (if it puts its corresponding soft
53  * ring in the poll mode and soft ring queue builds up, the
54  * shared srs_poll_pkt_cnt goes up and SRS automatically stops
55  * more packets from entering the system).
56  *
57  * Similarly, the UDP benefits from a DLS bypass and packet chaining
58  * so sending it to a separate soft ring is desired. All the rest of
59  * the traffic (including IPv6 is sent to OTH softring). The IPv6
60  * traffic current goes through OTH softring and via DLS because
61  * it need more processing to be done. Irrespective of the sap
62  * (IPv4 or IPv6) or the transport, the dynamic polling, B/W enforcement,
63  * cpu assignment, fanout, etc apply to all traffic since they
64  * are implement by the SRS which is agnostic to sap or transport.
65  *
66  * Fanout soft rings:
67  *
68  * On a multithreaded system, we can assign more CPU and multi thread
69  * the stack by creating a soft ring per CPU and spreading traffic
70  * based on a hash computed on src IP etc. Since we still need to
71  * keep the protocol separation, we create a set of 3 soft ring per
72  * CPU (specified by cpu list or degree of fanout).
73  *
74  * NOTE: See the block level comment on top of mac_sched.c
75  */
76 
77 #include <sys/types.h>
78 #include <sys/callb.h>
79 #include <sys/sdt.h>
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82 #include <sys/vlan.h>
83 #include <inet/ipsec_impl.h>
84 #include <inet/ip_impl.h>
85 #include <inet/sadb.h>
86 #include <inet/ipsecesp.h>
87 #include <inet/ipsecah.h>
88 
89 #include <sys/mac_impl.h>
90 #include <sys/mac_client_impl.h>
91 #include <sys/mac_soft_ring.h>
92 #include <sys/mac_flow_impl.h>
93 #include <sys/mac_stat.h>
94 
95 static void mac_rx_soft_ring_drain(mac_soft_ring_t *);
96 static void mac_soft_ring_fire(void *);
97 static void mac_soft_ring_worker(mac_soft_ring_t *);
98 static void mac_tx_soft_ring_drain(mac_soft_ring_t *);
99 
100 uint32_t mac_tx_soft_ring_max_q_cnt = 100000;
101 uint32_t mac_tx_soft_ring_hiwat = 1000;
102 
103 extern kmem_cache_t *mac_soft_ring_cache;
104 
105 #define	ADD_SOFTRING_TO_SET(mac_srs, softring) {			\
106 	if (mac_srs->srs_soft_ring_head == NULL) {			\
107 		mac_srs->srs_soft_ring_head = softring;			\
108 		mac_srs->srs_soft_ring_tail = softring;			\
109 	} else {							\
110 		/* ADD to the list */					\
111 		softring->s_ring_prev =					\
112 			mac_srs->srs_soft_ring_tail;			\
113 		mac_srs->srs_soft_ring_tail->s_ring_next = softring;	\
114 		mac_srs->srs_soft_ring_tail = softring;			\
115 	}								\
116 	mac_srs->srs_soft_ring_count++;					\
117 }
118 
119 /*
120  * mac_soft_ring_worker_wakeup
121  *
122  * Wake up the soft ring worker thread to process the queue as long
123  * as no one else is processing it and upper layer (client) is still
124  * ready to receive packets.
125  */
126 void
mac_soft_ring_worker_wakeup(mac_soft_ring_t * ringp)127 mac_soft_ring_worker_wakeup(mac_soft_ring_t *ringp)
128 {
129 	ASSERT(MUTEX_HELD(&ringp->s_ring_lock));
130 	if (!(ringp->s_ring_state & S_RING_PROC) &&
131 	    !(ringp->s_ring_state & S_RING_BLANK) &&
132 	    (ringp->s_ring_tid == NULL)) {
133 		if (ringp->s_ring_wait != 0) {
134 			ringp->s_ring_tid =
135 			    timeout(mac_soft_ring_fire, ringp,
136 			    ringp->s_ring_wait);
137 		} else {
138 			/* Schedule the worker thread. */
139 			cv_signal(&ringp->s_ring_async);
140 		}
141 	}
142 }
143 
144 /*
145  * mac_soft_ring_create
146  *
147  * Create a soft ring, do the necessary setup and bind the worker
148  * thread to the assigned CPU.
149  */
150 mac_soft_ring_t *
mac_soft_ring_create(int id,clock_t wait,uint16_t type,pri_t pri,mac_client_impl_t * mcip,mac_soft_ring_set_t * mac_srs,processorid_t cpuid,mac_direct_rx_t rx_func,void * x_arg1,mac_resource_handle_t x_arg2)151 mac_soft_ring_create(int id, clock_t wait, uint16_t type,
152     pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
153     processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1,
154     mac_resource_handle_t x_arg2)
155 {
156 	mac_soft_ring_t		*ringp;
157 	char			name[S_RING_NAMELEN];
158 
159 	bzero(name, 64);
160 	ringp = kmem_cache_alloc(mac_soft_ring_cache, KM_SLEEP);
161 
162 	if (type & ST_RING_TCP) {
163 		(void) snprintf(name, sizeof (name),
164 		    "mac_tcp_soft_ring_%d_%p", id, (void *)mac_srs);
165 	} else if (type & ST_RING_TCP6) {
166 		(void) snprintf(name, sizeof (name),
167 		    "mac_tcp6_soft_ring_%d_%p", id, (void *)mac_srs);
168 	} else if (type & ST_RING_UDP) {
169 		(void) snprintf(name, sizeof (name),
170 		    "mac_udp_soft_ring_%d_%p", id, (void *)mac_srs);
171 	} else if (type & ST_RING_UDP6) {
172 		(void) snprintf(name, sizeof (name),
173 		    "mac_udp6_soft_ring_%d_%p", id, (void *)mac_srs);
174 	} else if (type & ST_RING_OTH) {
175 		(void) snprintf(name, sizeof (name),
176 		    "mac_oth_soft_ring_%d_%p", id, (void *)mac_srs);
177 	} else {
178 		ASSERT(type & ST_RING_TX);
179 		(void) snprintf(name, sizeof (name),
180 		    "mac_tx_soft_ring_%d_%p", id, (void *)mac_srs);
181 	}
182 
183 	bzero(ringp, sizeof (mac_soft_ring_t));
184 	(void) strncpy(ringp->s_ring_name, name, S_RING_NAMELEN + 1);
185 	ringp->s_ring_name[S_RING_NAMELEN] = '\0';
186 	mutex_init(&ringp->s_ring_lock, NULL, MUTEX_DEFAULT, NULL);
187 	ringp->s_ring_notify_cb_info.mcbi_lockp = &ringp->s_ring_lock;
188 
189 	ringp->s_ring_type = type;
190 	ringp->s_ring_wait = MSEC_TO_TICK(wait);
191 	ringp->s_ring_mcip = mcip;
192 	ringp->s_ring_set = mac_srs;
193 
194 	/*
195 	 * Protect against access from DR callbacks (mac_walk_srs_bind/unbind)
196 	 * which can't grab the mac perimeter
197 	 */
198 	mutex_enter(&mac_srs->srs_lock);
199 	ADD_SOFTRING_TO_SET(mac_srs, ringp);
200 	mutex_exit(&mac_srs->srs_lock);
201 
202 	/*
203 	 * set the bind CPU to -1 to indicate
204 	 * no thread affinity set
205 	 */
206 	ringp->s_ring_cpuid = ringp->s_ring_cpuid_save = -1;
207 	ringp->s_ring_worker = thread_create(NULL, 0,
208 	    mac_soft_ring_worker, ringp, 0, &p0, TS_RUN, pri);
209 	if (type & ST_RING_TX) {
210 		ringp->s_ring_drain_func = mac_tx_soft_ring_drain;
211 		ringp->s_ring_tx_arg1 = x_arg1;
212 		ringp->s_ring_tx_arg2 = x_arg2;
213 		ringp->s_ring_tx_max_q_cnt = mac_tx_soft_ring_max_q_cnt;
214 		ringp->s_ring_tx_hiwat =
215 		    (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ?
216 		    mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat;
217 		if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
218 			mac_srs_tx_t *tx = &mac_srs->srs_tx;
219 
220 			ASSERT(tx->st_soft_rings[
221 			    ((mac_ring_t *)x_arg2)->mr_index] == NULL);
222 			tx->st_soft_rings[((mac_ring_t *)x_arg2)->mr_index] =
223 			    ringp;
224 		}
225 	} else {
226 		ringp->s_ring_drain_func = mac_rx_soft_ring_drain;
227 		ringp->s_ring_rx_func = rx_func;
228 		ringp->s_ring_rx_arg1 = x_arg1;
229 		ringp->s_ring_rx_arg2 = x_arg2;
230 		if (mac_srs->srs_state & SRS_SOFTRING_QUEUE)
231 			ringp->s_ring_type |= ST_RING_WORKER_ONLY;
232 	}
233 	if (cpuid != -1)
234 		(void) mac_soft_ring_bind(ringp, cpuid);
235 
236 	mac_soft_ring_stat_create(ringp);
237 
238 	return (ringp);
239 }
240 
241 /*
242  * mac_soft_ring_free
243  *
244  * Free the soft ring once we are done with it.
245  */
246 void
mac_soft_ring_free(mac_soft_ring_t * softring)247 mac_soft_ring_free(mac_soft_ring_t *softring)
248 {
249 	ASSERT((softring->s_ring_state &
250 	    (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) ==
251 	    (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE));
252 	mac_drop_chain(softring->s_ring_first, "softring free");
253 	softring->s_ring_tx_arg2 = NULL;
254 	mac_soft_ring_stat_delete(softring);
255 	mac_callback_free(softring->s_ring_notify_cb_list);
256 	kmem_cache_free(mac_soft_ring_cache, softring);
257 }
258 
259 int mac_soft_ring_thread_bind = 1;
260 
261 /*
262  * mac_soft_ring_bind
263  *
264  * Bind a soft ring worker thread to supplied CPU.
265  */
266 cpu_t *
mac_soft_ring_bind(mac_soft_ring_t * ringp,processorid_t cpuid)267 mac_soft_ring_bind(mac_soft_ring_t *ringp, processorid_t cpuid)
268 {
269 	cpu_t *cp;
270 	boolean_t clear = B_FALSE;
271 
272 	ASSERT(MUTEX_HELD(&cpu_lock));
273 
274 	if (mac_soft_ring_thread_bind == 0) {
275 		DTRACE_PROBE1(mac__soft__ring__no__cpu__bound,
276 		    mac_soft_ring_t *, ringp);
277 		return (NULL);
278 	}
279 
280 	cp = cpu_get(cpuid);
281 	if (cp == NULL || !cpu_is_online(cp))
282 		return (NULL);
283 
284 	mutex_enter(&ringp->s_ring_lock);
285 	ringp->s_ring_state |= S_RING_BOUND;
286 	if (ringp->s_ring_cpuid != -1)
287 		clear = B_TRUE;
288 	ringp->s_ring_cpuid = cpuid;
289 	mutex_exit(&ringp->s_ring_lock);
290 
291 	if (clear)
292 		thread_affinity_clear(ringp->s_ring_worker);
293 
294 	DTRACE_PROBE2(mac__soft__ring__cpu__bound, mac_soft_ring_t *,
295 	    ringp, processorid_t, cpuid);
296 
297 	thread_affinity_set(ringp->s_ring_worker, cpuid);
298 
299 	return (cp);
300 }
301 
302 /*
303  * mac_soft_ring_unbind
304  *
305  * Un Bind a soft ring worker thread.
306  */
307 void
mac_soft_ring_unbind(mac_soft_ring_t * ringp)308 mac_soft_ring_unbind(mac_soft_ring_t *ringp)
309 {
310 	ASSERT(MUTEX_HELD(&cpu_lock));
311 
312 	mutex_enter(&ringp->s_ring_lock);
313 	if (!(ringp->s_ring_state & S_RING_BOUND)) {
314 		ASSERT(ringp->s_ring_cpuid == -1);
315 		mutex_exit(&ringp->s_ring_lock);
316 		return;
317 	}
318 
319 	ringp->s_ring_cpuid = -1;
320 	ringp->s_ring_state &= ~S_RING_BOUND;
321 	thread_affinity_clear(ringp->s_ring_worker);
322 	mutex_exit(&ringp->s_ring_lock);
323 }
324 
325 /*
326  * PRIVATE FUNCTIONS
327  */
328 
329 static void
mac_soft_ring_fire(void * arg)330 mac_soft_ring_fire(void *arg)
331 {
332 	mac_soft_ring_t	*ringp = arg;
333 
334 	mutex_enter(&ringp->s_ring_lock);
335 	if (ringp->s_ring_tid == NULL) {
336 		mutex_exit(&ringp->s_ring_lock);
337 		return;
338 	}
339 
340 	ringp->s_ring_tid = NULL;
341 
342 	if (!(ringp->s_ring_state & S_RING_PROC)) {
343 		cv_signal(&ringp->s_ring_async);
344 	}
345 	mutex_exit(&ringp->s_ring_lock);
346 }
347 
348 /*
349  * Drain the soft ring pointed to by ringp.
350  *
351  *    o s_ring_first: pointer to the queued packet chain.
352  *
353  *    o s_ring_rx_func: pointer to to the client's Rx routine.
354  *
355  *    o s_ring_rx_{arg1,arg2}: opaque values specific to the client.
356  */
357 static void
mac_rx_soft_ring_drain(mac_soft_ring_t * ringp)358 mac_rx_soft_ring_drain(mac_soft_ring_t *ringp)
359 {
360 	mblk_t		*mp;
361 	void		*arg1;
362 	mac_resource_handle_t arg2;
363 	timeout_id_t	tid;
364 	mac_direct_rx_t	proc;
365 	size_t		sz;
366 	int		cnt;
367 	mac_soft_ring_set_t	*mac_srs = ringp->s_ring_set;
368 
369 	ringp->s_ring_run = curthread;
370 	ASSERT(mutex_owned(&ringp->s_ring_lock));
371 	ASSERT(!(ringp->s_ring_state & S_RING_PROC));
372 
373 	if ((tid = ringp->s_ring_tid) != NULL)
374 		ringp->s_ring_tid = NULL;
375 
376 	ringp->s_ring_state |= S_RING_PROC;
377 
378 	proc = ringp->s_ring_rx_func;
379 	arg1 = ringp->s_ring_rx_arg1;
380 	arg2 = ringp->s_ring_rx_arg2;
381 
382 	while ((ringp->s_ring_first != NULL) &&
383 	    !(ringp->s_ring_state & S_RING_PAUSE)) {
384 		mp = ringp->s_ring_first;
385 		ringp->s_ring_first = NULL;
386 		ringp->s_ring_last = NULL;
387 		cnt = ringp->s_ring_count;
388 		ringp->s_ring_count = 0;
389 		sz = ringp->s_ring_size;
390 		ringp->s_ring_size = 0;
391 		mutex_exit(&ringp->s_ring_lock);
392 
393 		if (tid != NULL) {
394 			(void) untimeout(tid);
395 			tid = NULL;
396 		}
397 
398 		(*proc)(arg1, arg2, mp, NULL);
399 
400 		/*
401 		 * If we have an SRS performing bandwidth control, then
402 		 * we need to decrement the size and count so the SRS
403 		 * has an accurate measure of the data queued between
404 		 * the SRS and its soft rings. We decrement the
405 		 * counters only when the packet is processed by both
406 		 * the SRS and the soft ring.
407 		 */
408 		mutex_enter(&mac_srs->srs_lock);
409 		MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
410 		MAC_UPDATE_SRS_SIZE_LOCKED(mac_srs, sz);
411 		mutex_exit(&mac_srs->srs_lock);
412 
413 		mutex_enter(&ringp->s_ring_lock);
414 	}
415 	ringp->s_ring_state &= ~S_RING_PROC;
416 	if (ringp->s_ring_state & S_RING_CLIENT_WAIT)
417 		cv_signal(&ringp->s_ring_client_cv);
418 	ringp->s_ring_run = NULL;
419 }
420 
421 /*
422  * The soft ring worker routine to process any queued packets. In
423  * normal case, the worker thread is bound to a CPU. If the soft ring
424  * handles TCP packets then the worker thread is bound to the same CPU
425  * as the TCP squeue.
426  */
427 static void
mac_soft_ring_worker(mac_soft_ring_t * ringp)428 mac_soft_ring_worker(mac_soft_ring_t *ringp)
429 {
430 	kmutex_t *lock = &ringp->s_ring_lock;
431 	kcondvar_t *async = &ringp->s_ring_async;
432 	mac_soft_ring_set_t *srs = ringp->s_ring_set;
433 	callb_cpr_t cprinfo;
434 
435 	CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "mac_soft_ring");
436 	mutex_enter(lock);
437 start:
438 	for (;;) {
439 		while (((ringp->s_ring_first == NULL ||
440 		    (ringp->s_ring_state & (S_RING_BLOCK|S_RING_BLANK))) &&
441 		    !(ringp->s_ring_state & S_RING_PAUSE)) ||
442 		    (ringp->s_ring_state & S_RING_PROC)) {
443 
444 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
445 			cv_wait(async, lock);
446 			CALLB_CPR_SAFE_END(&cprinfo, lock);
447 		}
448 
449 		/*
450 		 * Either we have work to do, or we have been asked to
451 		 * shutdown temporarily or permanently
452 		 */
453 		if (ringp->s_ring_state & S_RING_PAUSE)
454 			goto done;
455 
456 		ringp->s_ring_drain_func(ringp);
457 	}
458 done:
459 	mutex_exit(lock);
460 	mutex_enter(&srs->srs_lock);
461 	mutex_enter(lock);
462 
463 	ringp->s_ring_state |= S_RING_QUIESCE_DONE;
464 	if (!(ringp->s_ring_state & S_RING_CONDEMNED)) {
465 		srs->srs_soft_ring_quiesced_count++;
466 		cv_broadcast(&srs->srs_async);
467 		mutex_exit(&srs->srs_lock);
468 		while (!(ringp->s_ring_state &
469 		    (S_RING_RESTART | S_RING_CONDEMNED)))
470 			cv_wait(&ringp->s_ring_async, &ringp->s_ring_lock);
471 		mutex_exit(lock);
472 		mutex_enter(&srs->srs_lock);
473 		mutex_enter(lock);
474 		srs->srs_soft_ring_quiesced_count--;
475 		if (ringp->s_ring_state & S_RING_RESTART) {
476 			ASSERT(!(ringp->s_ring_state & S_RING_CONDEMNED));
477 			ringp->s_ring_state &= ~(S_RING_RESTART |
478 			    S_RING_QUIESCE | S_RING_QUIESCE_DONE);
479 			cv_broadcast(&srs->srs_async);
480 			mutex_exit(&srs->srs_lock);
481 			goto start;
482 		}
483 	}
484 	ASSERT(ringp->s_ring_state & S_RING_CONDEMNED);
485 	ringp->s_ring_state |= S_RING_CONDEMNED_DONE;
486 	CALLB_CPR_EXIT(&cprinfo);
487 	srs->srs_soft_ring_condemned_count++;
488 	cv_broadcast(&srs->srs_async);
489 	mutex_exit(&srs->srs_lock);
490 	thread_exit();
491 }
492 
493 /*
494  * mac_soft_ring_intr_enable and mac_soft_ring_intr_disable
495  *
496  * these functions are called to toggle the sending of packets to the
497  * client. They are called by the client. the client gets the name
498  * of these routine and corresponding cookie (pointing to softring)
499  * during capability negotiation at setup time.
500  *
501  * Enabling is allow the processing thread to send packets to the
502  * client while disabling does the opposite.
503  */
504 int
mac_soft_ring_intr_enable(void * arg)505 mac_soft_ring_intr_enable(void *arg)
506 {
507 	mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
508 	mutex_enter(&ringp->s_ring_lock);
509 	ringp->s_ring_state &= ~S_RING_BLANK;
510 	if (ringp->s_ring_first != NULL)
511 		mac_soft_ring_worker_wakeup(ringp);
512 	mutex_exit(&ringp->s_ring_lock);
513 	return (0);
514 }
515 
516 boolean_t
mac_soft_ring_intr_disable(void * arg)517 mac_soft_ring_intr_disable(void *arg)
518 {
519 	mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
520 	boolean_t sring_blanked = B_FALSE;
521 	/*
522 	 * Stop worker thread from sending packets above.
523 	 * Squeue will poll soft ring when it needs packets.
524 	 */
525 	mutex_enter(&ringp->s_ring_lock);
526 	if (!(ringp->s_ring_state & S_RING_PROC)) {
527 		ringp->s_ring_state |= S_RING_BLANK;
528 		sring_blanked = B_TRUE;
529 	}
530 	mutex_exit(&ringp->s_ring_lock);
531 	return (sring_blanked);
532 }
533 
534 /*
535  * mac_soft_ring_poll
536  *
537  * This routine is called by the client to poll for packets from
538  * the soft ring. The function name and cookie corresponding to
539  * the soft ring is exchanged during capability negotiation during
540  * setup.
541  */
542 mblk_t *
mac_soft_ring_poll(mac_soft_ring_t * ringp,size_t bytes_to_pickup)543 mac_soft_ring_poll(mac_soft_ring_t *ringp, size_t bytes_to_pickup)
544 {
545 	mblk_t	*head, *tail;
546 	mblk_t	*mp;
547 	size_t	sz = 0;
548 	int	cnt = 0;
549 	mac_soft_ring_set_t	*mac_srs = ringp->s_ring_set;
550 
551 	ASSERT(mac_srs != NULL);
552 
553 	mutex_enter(&ringp->s_ring_lock);
554 	head = tail = mp = ringp->s_ring_first;
555 	if (head == NULL) {
556 		mutex_exit(&ringp->s_ring_lock);
557 		return (NULL);
558 	}
559 
560 	if (ringp->s_ring_size <= bytes_to_pickup) {
561 		head = ringp->s_ring_first;
562 		ringp->s_ring_first = NULL;
563 		ringp->s_ring_last = NULL;
564 		cnt = ringp->s_ring_count;
565 		ringp->s_ring_count = 0;
566 		sz = ringp->s_ring_size;
567 		ringp->s_ring_size = 0;
568 	} else {
569 		while (mp && sz <= bytes_to_pickup) {
570 			sz += msgdsize(mp);
571 			cnt++;
572 			tail = mp;
573 			mp = mp->b_next;
574 		}
575 		ringp->s_ring_count -= cnt;
576 		ringp->s_ring_size -= sz;
577 		tail->b_next = NULL;
578 		if (mp == NULL) {
579 			ringp->s_ring_first = NULL;
580 			ringp->s_ring_last = NULL;
581 			ASSERT(ringp->s_ring_count == 0);
582 		} else {
583 			ringp->s_ring_first = mp;
584 		}
585 	}
586 
587 	mutex_exit(&ringp->s_ring_lock);
588 	/*
589 	 * Update the shared count and size counters so
590 	 * that SRS has a accurate idea of queued packets.
591 	 */
592 	mutex_enter(&mac_srs->srs_lock);
593 	MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
594 	MAC_UPDATE_SRS_SIZE_LOCKED(mac_srs, sz);
595 	mutex_exit(&mac_srs->srs_lock);
596 	return (head);
597 }
598 
599 /*
600  * mac_soft_ring_dls_bypass
601  *
602  * Enable direct client (IP) callback function from the softrings.
603  * Callers need to make sure they don't need any DLS layer processing
604  */
605 void
mac_soft_ring_dls_bypass(void * arg,mac_direct_rx_t rx_func,void * rx_arg1)606 mac_soft_ring_dls_bypass(void *arg, mac_direct_rx_t rx_func, void *rx_arg1)
607 {
608 	mac_soft_ring_t		*softring = arg;
609 	mac_soft_ring_set_t	*srs;
610 
611 	VERIFY3P(rx_func, !=, NULL);
612 
613 	mutex_enter(&softring->s_ring_lock);
614 	softring->s_ring_rx_func = rx_func;
615 	softring->s_ring_rx_arg1 = rx_arg1;
616 	mutex_exit(&softring->s_ring_lock);
617 
618 	srs = softring->s_ring_set;
619 	mutex_enter(&srs->srs_lock);
620 	srs->srs_type |= SRST_DLS_BYPASS;
621 	mutex_exit(&srs->srs_lock);
622 }
623 
624 /*
625  * mac_soft_ring_signal
626  *
627  * Typically used to set the soft ring state to QUIESCE, CONDEMNED, or
628  * RESTART.
629  *
630  * In the Rx side, the quiescing is done bottom up. After the Rx upcalls
631  * from the driver are done, then the Rx SRS is quiesced and only then can
632  * we signal the soft rings. Thus this function can't be called arbitrarily
633  * without satisfying the prerequisites. On the Tx side, the threads from
634  * top need to quiesced, then the Tx SRS and only then can we signal the
635  * Tx soft rings.
636  */
637 void
mac_soft_ring_signal(mac_soft_ring_t * softring,uint_t sr_flag)638 mac_soft_ring_signal(mac_soft_ring_t *softring, uint_t sr_flag)
639 {
640 	mutex_enter(&softring->s_ring_lock);
641 	softring->s_ring_state |= sr_flag;
642 	cv_signal(&softring->s_ring_async);
643 	mutex_exit(&softring->s_ring_lock);
644 }
645 
646 /*
647  * mac_tx_soft_ring_drain
648  *
649  * The transmit side drain routine in case the soft ring was being
650  * used to transmit packets.
651  */
652 static void
mac_tx_soft_ring_drain(mac_soft_ring_t * ringp)653 mac_tx_soft_ring_drain(mac_soft_ring_t *ringp)
654 {
655 	mblk_t			*mp;
656 	void			*arg1;
657 	void			*arg2;
658 	mblk_t			*tail;
659 	uint_t			saved_pkt_count, saved_size;
660 	mac_tx_stats_t		stats;
661 	mac_soft_ring_set_t	*mac_srs = ringp->s_ring_set;
662 
663 	saved_pkt_count = saved_size = 0;
664 	ringp->s_ring_run = curthread;
665 	ASSERT(mutex_owned(&ringp->s_ring_lock));
666 	ASSERT(!(ringp->s_ring_state & S_RING_PROC));
667 
668 	ringp->s_ring_state |= S_RING_PROC;
669 	arg1 = ringp->s_ring_tx_arg1;
670 	arg2 = ringp->s_ring_tx_arg2;
671 
672 	while (ringp->s_ring_first != NULL) {
673 		mp = ringp->s_ring_first;
674 		tail = ringp->s_ring_last;
675 		saved_pkt_count = ringp->s_ring_count;
676 		saved_size = ringp->s_ring_size;
677 		ringp->s_ring_first = NULL;
678 		ringp->s_ring_last = NULL;
679 		ringp->s_ring_count = 0;
680 		ringp->s_ring_size = 0;
681 		mutex_exit(&ringp->s_ring_lock);
682 
683 		mp = mac_tx_send(arg1, arg2, mp, &stats);
684 
685 		mutex_enter(&ringp->s_ring_lock);
686 		if (mp != NULL) {
687 			/* Device out of tx desc, set block */
688 			tail->b_next = ringp->s_ring_first;
689 			ringp->s_ring_first = mp;
690 			ringp->s_ring_count +=
691 			    (saved_pkt_count - stats.mts_opackets);
692 			ringp->s_ring_size += (saved_size - stats.mts_obytes);
693 			if (ringp->s_ring_last == NULL)
694 				ringp->s_ring_last = tail;
695 
696 			if (ringp->s_ring_tx_woken_up) {
697 				ringp->s_ring_tx_woken_up = B_FALSE;
698 			} else {
699 				ringp->s_ring_state |= S_RING_BLOCK;
700 				ringp->s_st_stat.mts_blockcnt++;
701 			}
702 
703 			ringp->s_ring_state &= ~S_RING_PROC;
704 			ringp->s_ring_run = NULL;
705 			return;
706 		} else {
707 			ringp->s_ring_tx_woken_up = B_FALSE;
708 			SRS_TX_STATS_UPDATE(mac_srs, &stats);
709 			SOFTRING_TX_STATS_UPDATE(ringp, &stats);
710 		}
711 	}
712 
713 	if (ringp->s_ring_count == 0 && ringp->s_ring_state &
714 	    (S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED)) {
715 		mac_client_impl_t *mcip =  ringp->s_ring_mcip;
716 		boolean_t wakeup_required = B_FALSE;
717 
718 		if (ringp->s_ring_state &
719 		    (S_RING_TX_HIWAT|S_RING_WAKEUP_CLIENT)) {
720 			wakeup_required = B_TRUE;
721 		}
722 		ringp->s_ring_state &=
723 		    ~(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED);
724 		mutex_exit(&ringp->s_ring_lock);
725 		if (wakeup_required) {
726 			mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)ringp);
727 			/*
728 			 * If the client is not the primary MAC client, then we
729 			 * need to send the notification to the clients upper
730 			 * MAC, i.e. mci_upper_mip.
731 			 */
732 			mac_tx_notify(mcip->mci_upper_mip != NULL ?
733 			    mcip->mci_upper_mip : mcip->mci_mip);
734 		}
735 		mutex_enter(&ringp->s_ring_lock);
736 	}
737 	ringp->s_ring_state &= ~S_RING_PROC;
738 	ringp->s_ring_run = NULL;
739 }
740