xref: /illumos-gate/usr/src/uts/common/io/mac/mac_soft_ring.c (revision 7de10d4b605cc3f58e6c76ae356c572f19259d20)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2018 Joyent, Inc.
25  * Copyright 2026 Oxide Computer Company
26  */
27 
28 /*
29  * General Soft rings - Simulating Rx rings in S/W.
30  *
31  * Soft ring is a data abstraction containing a queue and a worker
32  * thread and represents a hardware Rx ring in software. Each soft
33  * ring set can have a collection of soft rings for separating
34  * L3/L4 specific traffic (IPv4 from IPv6 or TCP from UDP) or for
35  * allowing a higher degree of parallelism by sending traffic to
36  * one of the soft rings for a SRS (using a hash on src IP or port).
37  * Each soft ring worker thread can be bound to a different CPU
38  * allowing the processing for each soft ring to happen in parallel
39  * and independent from each other.
40  *
41  * Protocol soft rings:
42  *
43  * Each SRS has at an minimum 3 softrings. One each for IPv4 TCP,
44  * IPv4 UDP and rest (OTH - for IPv6 and everything else). The
45  * SRS does dynamic polling and enforces link level bandwidth but
46  * it does so for all traffic (IPv4 and IPv6 and all protocols) on
47  * that link. However, each protocol layer wants a different
48  * behaviour. For instance IPv4 TCP has per CPU squeues which
49  * enforce their own polling and flow control so IPv4 TCP traffic
50  * needs to go to a separate soft ring which can be polled by the
51  * TCP squeue. It also allows TCP squeue to push back flow control
52  * all the way to NIC hardware (if it puts its corresponding soft
53  * ring in the poll mode and soft ring queue builds up, the
54  * shared srs_poll_pkt_cnt goes up and SRS automatically stops
55  * more packets from entering the system).
56  *
57  * Similarly, the UDP benefits from a DLS bypass and packet chaining
58  * so sending it to a separate soft ring is desired. All the rest of
59  * the traffic (including IPv6 is sent to OTH softring). The IPv6
60  * traffic current goes through OTH softring and via DLS because
61  * it need more processing to be done. Irrespective of the sap
62  * (IPv4 or IPv6) or the transport, the dynamic polling, B/W enforcement,
63  * cpu assignment, fanout, etc apply to all traffic since they
64  * are implement by the SRS which is agnostic to sap or transport.
65  *
66  * Fanout soft rings:
67  *
68  * On a multithreaded system, we can assign more CPU and multi thread
69  * the stack by creating a soft ring per CPU and spreading traffic
70  * based on a hash computed on src IP etc. Since we still need to
71  * keep the protocol separation, we create a set of 3 soft ring per
72  * CPU (specified by cpu list or degree of fanout).
73  *
74  * NOTE: See the block level comment on top of mac_sched.c
75  */
76 
77 #include <sys/types.h>
78 #include <sys/callb.h>
79 #include <sys/sdt.h>
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82 #include <sys/vlan.h>
83 #include <inet/ipsec_impl.h>
84 #include <inet/ip_impl.h>
85 #include <inet/sadb.h>
86 #include <inet/ipsecesp.h>
87 #include <inet/ipsecah.h>
88 
89 #include <sys/mac_impl.h>
90 #include <sys/mac_client_impl.h>
91 #include <sys/mac_soft_ring.h>
92 #include <sys/mac_flow_impl.h>
93 #include <sys/mac_stat.h>
94 
95 static void mac_rx_soft_ring_drain(mac_soft_ring_t *);
96 static void mac_soft_ring_fire(void *);
97 static void mac_soft_ring_worker(mac_soft_ring_t *);
98 static void mac_tx_soft_ring_drain(mac_soft_ring_t *);
99 
100 uint32_t mac_tx_soft_ring_max_q_cnt = 100000;
101 uint32_t mac_tx_soft_ring_hiwat = 1000;
102 
103 extern kmem_cache_t *mac_soft_ring_cache;
104 
105 #define	ADD_SOFTRING_TO_SET(mac_srs, softring) {			\
106 	if (mac_srs->srs_soft_ring_head == NULL) {			\
107 		mac_srs->srs_soft_ring_head = softring;			\
108 		mac_srs->srs_soft_ring_tail = softring;			\
109 	} else {							\
110 		/* ADD to the list */					\
111 		softring->s_ring_prev =					\
112 			mac_srs->srs_soft_ring_tail;			\
113 		mac_srs->srs_soft_ring_tail->s_ring_next = softring;	\
114 		mac_srs->srs_soft_ring_tail = softring;			\
115 	}								\
116 	mac_srs->srs_soft_ring_count++;					\
117 }
118 
119 /*
120  * mac_soft_ring_worker_wakeup
121  *
122  * Wake up the soft ring worker thread to process the queue as long
123  * as no one else is processing it and upper layer (client) is still
124  * ready to receive packets.
125  */
126 void
mac_soft_ring_worker_wakeup(mac_soft_ring_t * ringp)127 mac_soft_ring_worker_wakeup(mac_soft_ring_t *ringp)
128 {
129 	ASSERT(MUTEX_HELD(&ringp->s_ring_lock));
130 	if (!(ringp->s_ring_state & S_RING_PROC) &&
131 	    !(ringp->s_ring_state & S_RING_BLANK) &&
132 	    (ringp->s_ring_tid == NULL)) {
133 		if (ringp->s_ring_wait != 0) {
134 			ringp->s_ring_tid =
135 			    timeout(mac_soft_ring_fire, ringp,
136 			    ringp->s_ring_wait);
137 		} else {
138 			/* Schedule the worker thread. */
139 			cv_signal(&ringp->s_ring_async);
140 		}
141 	}
142 }
143 
144 /*
145  * Create a soft ring, do the necessary setup and bind the worker
146  * thread to the assigned CPU.
147  */
148 static mac_soft_ring_t *
mac_soft_ring_create_i(int id,clock_t wait,const mac_soft_ring_state_t type,pri_t pri,mac_client_impl_t * mcip,mac_soft_ring_set_t * mac_srs,processorid_t cpuid)149 mac_soft_ring_create_i(int id, clock_t wait, const mac_soft_ring_state_t type,
150     pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
151     processorid_t cpuid)
152 {
153 	mac_soft_ring_t		*ringp;
154 	char			name[S_RING_NAMELEN];
155 
156 	VERIFY3U(type & SR_STATE, ==, 0);
157 
158 	bzero(name, 64);
159 	ringp = kmem_cache_alloc(mac_soft_ring_cache, KM_SLEEP);
160 
161 	if (type & ST_RING_TCP) {
162 		(void) snprintf(name, sizeof (name),
163 		    "mac_tcp_soft_ring_%d_%p", id, (void *)mac_srs);
164 	} else if (type & ST_RING_TCP6) {
165 		(void) snprintf(name, sizeof (name),
166 		    "mac_tcp6_soft_ring_%d_%p", id, (void *)mac_srs);
167 	} else if (type & ST_RING_UDP) {
168 		(void) snprintf(name, sizeof (name),
169 		    "mac_udp_soft_ring_%d_%p", id, (void *)mac_srs);
170 	} else if (type & ST_RING_UDP6) {
171 		(void) snprintf(name, sizeof (name),
172 		    "mac_udp6_soft_ring_%d_%p", id, (void *)mac_srs);
173 	} else if (type & ST_RING_OTH) {
174 		(void) snprintf(name, sizeof (name),
175 		    "mac_oth_soft_ring_%d_%p", id, (void *)mac_srs);
176 	} else {
177 		ASSERT(type & ST_RING_TX);
178 		(void) snprintf(name, sizeof (name),
179 		    "mac_tx_soft_ring_%d_%p", id, (void *)mac_srs);
180 	}
181 
182 	bzero(ringp, sizeof (mac_soft_ring_t));
183 	(void) strncpy(ringp->s_ring_name, name, S_RING_NAMELEN + 1);
184 	ringp->s_ring_name[S_RING_NAMELEN] = '\0';
185 	mutex_init(&ringp->s_ring_lock, NULL, MUTEX_DEFAULT, NULL);
186 	ringp->s_ring_notify_cb_info.mcbi_lockp = &ringp->s_ring_lock;
187 
188 	ringp->s_ring_state = type;
189 	ringp->s_ring_wait = MSEC_TO_TICK(wait);
190 	ringp->s_ring_mcip = mcip;
191 	ringp->s_ring_set = mac_srs;
192 
193 	/*
194 	 * Protect against access from DR callbacks (mac_walk_srs_bind/unbind)
195 	 * which can't grab the mac perimeter
196 	 */
197 	mutex_enter(&mac_srs->srs_lock);
198 	ADD_SOFTRING_TO_SET(mac_srs, ringp);
199 	mutex_exit(&mac_srs->srs_lock);
200 
201 	/*
202 	 * set the bind CPU to -1 to indicate
203 	 * no thread affinity set
204 	 */
205 	ringp->s_ring_cpuid = ringp->s_ring_cpuid_save = -1;
206 	ringp->s_ring_worker = thread_create(NULL, 0,
207 	    mac_soft_ring_worker, ringp, 0, &p0, TS_RUN, pri);
208 	if (cpuid != -1)
209 		(void) mac_soft_ring_bind(ringp, cpuid);
210 
211 	return (ringp);
212 }
213 
214 mac_soft_ring_t *
mac_soft_ring_create_rx(int id,clock_t wait,const mac_soft_ring_state_t type,pri_t pri,mac_client_impl_t * mcip,mac_soft_ring_set_t * mac_srs,processorid_t cpuid,mac_direct_rx_t rx_func,void * x_arg1)215 mac_soft_ring_create_rx(int id, clock_t wait, const mac_soft_ring_state_t type,
216     pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
217     processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1)
218 {
219 	VERIFY3U((type & ST_RING_TX), ==, 0);
220 
221 	mac_soft_ring_t *ringp = mac_soft_ring_create_i(id, wait, type, pri,
222 	    mcip, mac_srs, cpuid);
223 
224 	ringp->s_ring_drain_func = mac_rx_soft_ring_drain;
225 	ringp->s_ring_rx_func = rx_func;
226 	ringp->s_ring_rx_arg1 = x_arg1;
227 	ringp->s_ring_rx_arg2 = NULL;
228 	if (mac_srs->srs_type & SRST_ENQUEUE) {
229 		ringp->s_ring_state |= ST_RING_WORKER_ONLY;
230 	}
231 
232 	mac_soft_ring_stat_create(ringp);
233 
234 	return (ringp);
235 }
236 
237 mac_soft_ring_t *
mac_soft_ring_create_tx(int id,clock_t wait,const mac_soft_ring_state_t type,pri_t pri,mac_client_impl_t * mcip,mac_soft_ring_set_t * mac_srs,processorid_t cpuid,mac_ring_t * ring)238 mac_soft_ring_create_tx(int id, clock_t wait, const mac_soft_ring_state_t type,
239     pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
240     processorid_t cpuid, mac_ring_t *ring)
241 {
242 	VERIFY3U((type & ST_RING_TX), ==, 0);
243 	VERIFY(ring != NULL);
244 
245 	mac_soft_ring_t *ringp = mac_soft_ring_create_i(id, wait,
246 	    type | ST_RING_TX, pri, mcip, mac_srs, cpuid);
247 
248 	ringp->s_ring_drain_func = mac_tx_soft_ring_drain;
249 	ringp->s_ring_tx_arg1 = mcip;
250 	ringp->s_ring_tx_arg2 = ring;
251 	ringp->s_ring_tx_max_q_cnt = mac_tx_soft_ring_max_q_cnt;
252 	ringp->s_ring_tx_hiwat =
253 	    (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ?
254 	    mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat;
255 	if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
256 		mac_srs_tx_t *tx = &mac_srs->srs_tx;
257 		VERIFY3P(tx->st_soft_rings[ring->mr_index], ==, NULL);
258 		tx->st_soft_rings[ring->mr_index] = ringp;
259 	}
260 
261 	mac_soft_ring_stat_create(ringp);
262 
263 	return (ringp);
264 }
265 
266 /*
267  * mac_soft_ring_free
268  *
269  * Free the soft ring once we are done with it.
270  */
271 void
mac_soft_ring_free(mac_soft_ring_t * softring)272 mac_soft_ring_free(mac_soft_ring_t *softring)
273 {
274 	ASSERT((softring->s_ring_state &
275 	    (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) ==
276 	    (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE));
277 	mac_drop_chain(softring->s_ring_first, "softring free");
278 	softring->s_ring_tx_arg2 = NULL;
279 	mac_soft_ring_stat_delete(softring);
280 	mac_callback_free(softring->s_ring_notify_cb_list);
281 	kmem_cache_free(mac_soft_ring_cache, softring);
282 }
283 
284 int mac_soft_ring_thread_bind = 1;
285 
286 /*
287  * mac_soft_ring_bind
288  *
289  * Bind a soft ring worker thread to supplied CPU.
290  */
291 cpu_t *
mac_soft_ring_bind(mac_soft_ring_t * ringp,processorid_t cpuid)292 mac_soft_ring_bind(mac_soft_ring_t *ringp, processorid_t cpuid)
293 {
294 	cpu_t *cp;
295 	boolean_t clear = B_FALSE;
296 
297 	ASSERT(MUTEX_HELD(&cpu_lock));
298 
299 	if (mac_soft_ring_thread_bind == 0) {
300 		DTRACE_PROBE1(mac__soft__ring__no__cpu__bound,
301 		    mac_soft_ring_t *, ringp);
302 		return (NULL);
303 	}
304 
305 	cp = cpu_get(cpuid);
306 	if (cp == NULL || !cpu_is_online(cp))
307 		return (NULL);
308 
309 	mutex_enter(&ringp->s_ring_lock);
310 	ringp->s_ring_state |= S_RING_BOUND;
311 	if (ringp->s_ring_cpuid != -1)
312 		clear = B_TRUE;
313 	ringp->s_ring_cpuid = cpuid;
314 	mutex_exit(&ringp->s_ring_lock);
315 
316 	if (clear)
317 		thread_affinity_clear(ringp->s_ring_worker);
318 
319 	DTRACE_PROBE2(mac__soft__ring__cpu__bound, mac_soft_ring_t *,
320 	    ringp, processorid_t, cpuid);
321 
322 	thread_affinity_set(ringp->s_ring_worker, cpuid);
323 
324 	return (cp);
325 }
326 
327 /*
328  * mac_soft_ring_unbind
329  *
330  * Un Bind a soft ring worker thread.
331  */
332 void
mac_soft_ring_unbind(mac_soft_ring_t * ringp)333 mac_soft_ring_unbind(mac_soft_ring_t *ringp)
334 {
335 	ASSERT(MUTEX_HELD(&cpu_lock));
336 
337 	mutex_enter(&ringp->s_ring_lock);
338 	if (!(ringp->s_ring_state & S_RING_BOUND)) {
339 		ASSERT(ringp->s_ring_cpuid == -1);
340 		mutex_exit(&ringp->s_ring_lock);
341 		return;
342 	}
343 
344 	ringp->s_ring_cpuid = -1;
345 	ringp->s_ring_state &= ~S_RING_BOUND;
346 	thread_affinity_clear(ringp->s_ring_worker);
347 	mutex_exit(&ringp->s_ring_lock);
348 }
349 
350 /*
351  * PRIVATE FUNCTIONS
352  */
353 
354 static void
mac_soft_ring_fire(void * arg)355 mac_soft_ring_fire(void *arg)
356 {
357 	mac_soft_ring_t	*ringp = arg;
358 
359 	mutex_enter(&ringp->s_ring_lock);
360 	if (ringp->s_ring_tid == NULL) {
361 		mutex_exit(&ringp->s_ring_lock);
362 		return;
363 	}
364 
365 	ringp->s_ring_tid = NULL;
366 
367 	if (!(ringp->s_ring_state & S_RING_PROC)) {
368 		cv_signal(&ringp->s_ring_async);
369 	}
370 	mutex_exit(&ringp->s_ring_lock);
371 }
372 
373 /*
374  * Drain the soft ring pointed to by ringp.
375  *
376  *    o s_ring_first: pointer to the queued packet chain.
377  *
378  *    o s_ring_rx_func: pointer to to the client's Rx routine.
379  *
380  *    o s_ring_rx_{arg1,arg2}: opaque values specific to the client.
381  */
382 static void
mac_rx_soft_ring_drain(mac_soft_ring_t * ringp)383 mac_rx_soft_ring_drain(mac_soft_ring_t *ringp)
384 {
385 	mblk_t		*mp;
386 	void		*arg1;
387 	mac_resource_handle_t arg2;
388 	timeout_id_t	tid;
389 	mac_direct_rx_t	proc;
390 	int		cnt;
391 	mac_soft_ring_set_t	*mac_srs = ringp->s_ring_set;
392 
393 	ringp->s_ring_run = curthread;
394 	ASSERT(mutex_owned(&ringp->s_ring_lock));
395 	ASSERT(!(ringp->s_ring_state & S_RING_PROC));
396 
397 	if ((tid = ringp->s_ring_tid) != NULL)
398 		ringp->s_ring_tid = NULL;
399 
400 	ringp->s_ring_state |= S_RING_PROC;
401 
402 	proc = ringp->s_ring_rx_func;
403 	arg1 = ringp->s_ring_rx_arg1;
404 	arg2 = ringp->s_ring_rx_arg2;
405 
406 	while ((ringp->s_ring_first != NULL) &&
407 	    !(ringp->s_ring_state & S_RING_PAUSE)) {
408 		mp = ringp->s_ring_first;
409 		ringp->s_ring_first = NULL;
410 		ringp->s_ring_last = NULL;
411 		cnt = ringp->s_ring_count;
412 		ringp->s_ring_count = 0;
413 		ringp->s_ring_size = 0;
414 		mutex_exit(&ringp->s_ring_lock);
415 
416 		if (tid != NULL) {
417 			(void) untimeout(tid);
418 			tid = NULL;
419 		}
420 
421 		(*proc)(arg1, arg2, mp, NULL);
422 
423 		/*
424 		 * If we have an SRS performing bandwidth control, then
425 		 * we need to decrement the size and count so the SRS
426 		 * has an accurate measure of the data queued between
427 		 * the SRS and its soft rings. We decrement the
428 		 * counters only when the packet is processed by both
429 		 * the SRS and the soft ring.
430 		 */
431 		mutex_enter(&mac_srs->srs_lock);
432 		MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
433 		mutex_exit(&mac_srs->srs_lock);
434 
435 		mutex_enter(&ringp->s_ring_lock);
436 	}
437 	ringp->s_ring_state &= ~S_RING_PROC;
438 	if (ringp->s_ring_state & S_RING_CLIENT_WAIT)
439 		cv_signal(&ringp->s_ring_client_cv);
440 	ringp->s_ring_run = NULL;
441 }
442 
443 /*
444  * The soft ring worker routine to process any queued packets. In
445  * normal case, the worker thread is bound to a CPU. If the soft ring
446  * handles TCP packets then the worker thread is bound to the same CPU
447  * as the TCP squeue.
448  */
449 static void
mac_soft_ring_worker(mac_soft_ring_t * ringp)450 mac_soft_ring_worker(mac_soft_ring_t *ringp)
451 {
452 	kmutex_t *lock = &ringp->s_ring_lock;
453 	kcondvar_t *async = &ringp->s_ring_async;
454 	mac_soft_ring_set_t *srs = ringp->s_ring_set;
455 	callb_cpr_t cprinfo;
456 
457 	CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "mac_soft_ring");
458 	mutex_enter(lock);
459 start:
460 	for (;;) {
461 		while (((ringp->s_ring_first == NULL ||
462 		    (ringp->s_ring_state & (S_RING_BLOCK|S_RING_BLANK))) &&
463 		    !(ringp->s_ring_state & S_RING_PAUSE)) ||
464 		    (ringp->s_ring_state & S_RING_PROC)) {
465 
466 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
467 			cv_wait(async, lock);
468 			CALLB_CPR_SAFE_END(&cprinfo, lock);
469 		}
470 
471 		/*
472 		 * Either we have work to do, or we have been asked to
473 		 * shutdown temporarily or permanently
474 		 */
475 		if (ringp->s_ring_state & S_RING_PAUSE)
476 			goto done;
477 
478 		ringp->s_ring_drain_func(ringp);
479 	}
480 done:
481 	mutex_exit(lock);
482 	mutex_enter(&srs->srs_lock);
483 	mutex_enter(lock);
484 
485 	ringp->s_ring_state |= S_RING_QUIESCE_DONE;
486 	if (!(ringp->s_ring_state & S_RING_CONDEMNED)) {
487 		srs->srs_soft_ring_quiesced_count++;
488 		cv_broadcast(&srs->srs_async);
489 		mutex_exit(&srs->srs_lock);
490 		while (!(ringp->s_ring_state &
491 		    (S_RING_RESTART | S_RING_CONDEMNED)))
492 			cv_wait(&ringp->s_ring_async, &ringp->s_ring_lock);
493 		mutex_exit(lock);
494 		mutex_enter(&srs->srs_lock);
495 		mutex_enter(lock);
496 		srs->srs_soft_ring_quiesced_count--;
497 		if (ringp->s_ring_state & S_RING_RESTART) {
498 			ASSERT(!(ringp->s_ring_state & S_RING_CONDEMNED));
499 			ringp->s_ring_state &= ~(S_RING_RESTART |
500 			    S_RING_QUIESCE | S_RING_QUIESCE_DONE);
501 			cv_broadcast(&srs->srs_async);
502 			mutex_exit(&srs->srs_lock);
503 			goto start;
504 		}
505 	}
506 	ASSERT(ringp->s_ring_state & S_RING_CONDEMNED);
507 	ringp->s_ring_state |= S_RING_CONDEMNED_DONE;
508 	CALLB_CPR_EXIT(&cprinfo);
509 	srs->srs_soft_ring_condemned_count++;
510 	cv_broadcast(&srs->srs_async);
511 	mutex_exit(&srs->srs_lock);
512 	thread_exit();
513 }
514 
515 /*
516  * mac_soft_ring_intr_enable and mac_soft_ring_intr_disable
517  *
518  * these functions are called to toggle the sending of packets to the
519  * client. They are called by the client. the client gets the name
520  * of these routine and corresponding cookie (pointing to softring)
521  * during capability negotiation at setup time.
522  *
523  * Enabling is allow the processing thread to send packets to the
524  * client while disabling does the opposite.
525  */
526 int
mac_soft_ring_intr_enable(void * arg)527 mac_soft_ring_intr_enable(void *arg)
528 {
529 	mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
530 	mutex_enter(&ringp->s_ring_lock);
531 	ringp->s_ring_state &= ~S_RING_BLANK;
532 	if (ringp->s_ring_first != NULL)
533 		mac_soft_ring_worker_wakeup(ringp);
534 	mutex_exit(&ringp->s_ring_lock);
535 	return (0);
536 }
537 
538 boolean_t
mac_soft_ring_intr_disable(void * arg)539 mac_soft_ring_intr_disable(void *arg)
540 {
541 	mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
542 	boolean_t sring_blanked = B_FALSE;
543 	/*
544 	 * Stop worker thread from sending packets above.
545 	 * Squeue will poll soft ring when it needs packets.
546 	 */
547 	mutex_enter(&ringp->s_ring_lock);
548 	if (!(ringp->s_ring_state & S_RING_PROC)) {
549 		ringp->s_ring_state |= S_RING_BLANK;
550 		sring_blanked = B_TRUE;
551 	}
552 	mutex_exit(&ringp->s_ring_lock);
553 	return (sring_blanked);
554 }
555 
556 /*
557  * mac_soft_ring_poll
558  *
559  * This routine is called by the client to poll for packets from
560  * the soft ring. The function name and cookie corresponding to
561  * the soft ring is exchanged during capability negotiation during
562  * setup.
563  */
564 mblk_t *
mac_soft_ring_poll(mac_soft_ring_t * ringp,size_t bytes_to_pickup)565 mac_soft_ring_poll(mac_soft_ring_t *ringp, size_t bytes_to_pickup)
566 {
567 	mblk_t	*head, *tail;
568 	mblk_t	*mp;
569 	size_t	sz = 0;
570 	int	cnt = 0;
571 	mac_soft_ring_set_t	*mac_srs = ringp->s_ring_set;
572 
573 	ASSERT(mac_srs != NULL);
574 
575 	mutex_enter(&ringp->s_ring_lock);
576 	head = tail = mp = ringp->s_ring_first;
577 	if (head == NULL) {
578 		mutex_exit(&ringp->s_ring_lock);
579 		return (NULL);
580 	}
581 
582 	if (ringp->s_ring_size <= bytes_to_pickup) {
583 		head = ringp->s_ring_first;
584 		ringp->s_ring_first = NULL;
585 		ringp->s_ring_last = NULL;
586 		cnt = ringp->s_ring_count;
587 		ringp->s_ring_count = 0;
588 		sz = ringp->s_ring_size;
589 		ringp->s_ring_size = 0;
590 	} else {
591 		while (mp && sz <= bytes_to_pickup) {
592 			sz += msgdsize(mp);
593 			cnt++;
594 			tail = mp;
595 			mp = mp->b_next;
596 		}
597 		ringp->s_ring_count -= cnt;
598 		ringp->s_ring_size -= sz;
599 		tail->b_next = NULL;
600 		if (mp == NULL) {
601 			ringp->s_ring_first = NULL;
602 			ringp->s_ring_last = NULL;
603 			ASSERT(ringp->s_ring_count == 0);
604 		} else {
605 			ringp->s_ring_first = mp;
606 		}
607 	}
608 
609 	mutex_exit(&ringp->s_ring_lock);
610 	/*
611 	 * Update the shared count and size counters so
612 	 * that SRS has a accurate idea of queued packets.
613 	 */
614 	mutex_enter(&mac_srs->srs_lock);
615 	MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
616 	mutex_exit(&mac_srs->srs_lock);
617 	return (head);
618 }
619 
620 /*
621  * Enable direct client (IP) callback function from the softrings.
622  * Callers need to make sure they don't need any DLS layer processing
623  */
624 void
mac_soft_ring_dls_bypass_enable(mac_soft_ring_t * softring,mac_direct_rx_t rx_func,void * rx_arg1)625 mac_soft_ring_dls_bypass_enable(mac_soft_ring_t *softring,
626     mac_direct_rx_t rx_func, void *rx_arg1)
627 {
628 	VERIFY3P(rx_func, !=, NULL);
629 	mutex_enter(&softring->s_ring_lock);
630 	softring->s_ring_rx_func = rx_func;
631 	softring->s_ring_rx_arg1 = rx_arg1;
632 	mutex_exit(&softring->s_ring_lock);
633 }
634 
635 /* Disable DLS bypass. */
636 void
mac_soft_ring_dls_bypass_disable(mac_soft_ring_t * softring,mac_client_impl_t * mcip)637 mac_soft_ring_dls_bypass_disable(mac_soft_ring_t *softring,
638     mac_client_impl_t *mcip)
639 {
640 	mutex_enter(&softring->s_ring_lock);
641 	/*
642 	 * Before modifying the ring state we first wait for any in-progress
643 	 * processing to stop.
644 	 */
645 	while (softring->s_ring_state & S_RING_PROC) {
646 		softring->s_ring_state |= S_RING_CLIENT_WAIT;
647 		cv_wait(&softring->s_ring_client_cv,
648 		    &softring->s_ring_lock);
649 	}
650 
651 	softring->s_ring_state &= ~S_RING_CLIENT_WAIT;
652 	softring->s_ring_rx_func = mac_rx_deliver;
653 	softring->s_ring_rx_arg1 = mcip;
654 	mutex_exit(&softring->s_ring_lock);
655 }
656 
657 void
mac_soft_ring_poll_enable(mac_soft_ring_t * sr,mac_direct_rx_t drx,void * drx_arg,mac_resource_cb_t * rcb,uint32_t pri)658 mac_soft_ring_poll_enable(mac_soft_ring_t *sr, mac_direct_rx_t drx,
659     void *drx_arg, mac_resource_cb_t *rcb, uint32_t pri)
660 {
661 	mac_rx_fifo_t mrf;
662 
663 	/* Only TCP/IP clients are poll capable at the moment. */
664 	VERIFY((sr->s_ring_state & (ST_RING_TCP | ST_RING_TCP6)) != 0);
665 	/* The client resourse callback structure better be set. */
666 	VERIFY3P(rcb->mrc_arg, !=, NULL);
667 	/* Polling should be configured only once on a given softring. */
668 	VERIFY3P(sr->s_ring_rx_arg2, ==, NULL);
669 
670 	/*
671 	 * As polling elides DLS processing we must make sure that
672 	 * softring processing (i.e. non-polling) also bypasses DLS
673 	 * processing.
674 	 */
675 	mac_soft_ring_dls_bypass_enable(sr, drx, drx_arg);
676 
677 	bzero(&mrf, sizeof (mrf));
678 	mrf.mrf_type = MAC_RX_FIFO;
679 	mrf.mrf_receive = (mac_receive_t)mac_soft_ring_poll;
680 	mrf.mrf_intr_enable =
681 	    (mac_intr_enable_t)mac_soft_ring_intr_enable;
682 	mrf.mrf_intr_disable =
683 	    (mac_intr_disable_t)mac_soft_ring_intr_disable;
684 	mrf.mrf_rx_arg = sr;
685 	mrf.mrf_intr_handle = (mac_intr_handle_t)sr;
686 	mrf.mrf_cpu_id = sr->s_ring_cpuid;
687 	mrf.mrf_flow_priority = pri;
688 
689 	sr->s_ring_rx_arg2 = rcb->mrc_add(rcb->mrc_arg,
690 	    (mac_resource_t *)&mrf);
691 }
692 
693 void
mac_soft_ring_poll_disable(mac_soft_ring_t * sr,mac_resource_cb_t * rcb,mac_client_impl_t * mcip)694 mac_soft_ring_poll_disable(mac_soft_ring_t *sr, mac_resource_cb_t *rcb,
695     mac_client_impl_t *mcip)
696 {
697 	/* Only TCP/IP clients are poll capable at the moment. */
698 	VERIFY((sr->s_ring_state & (ST_RING_TCP | ST_RING_TCP6)) != 0);
699 
700 	/*
701 	 * Remove the IP ring if there is one associated with this
702 	 * softring. Note that IP rings are a limited resource; and
703 	 * SRST_CLIENT_POLL_V4/V6 being set on the SRS is no guarantee
704 	 * that all TCP softrings have an associated IP ring. This is by
705 	 * design. See ip_squeue_add_ring().
706 	 */
707 	if (sr->s_ring_rx_arg2 != NULL) {
708 		VERIFY3P(rcb->mrc_arg, !=, NULL);
709 		rcb->mrc_remove(rcb->mrc_arg, sr->s_ring_rx_arg2);
710 		sr->s_ring_rx_arg2 = NULL;
711 	}
712 
713 	mac_soft_ring_dls_bypass_disable(sr, mcip);
714 }
715 
716 /*
717  * mac_soft_ring_signal
718  *
719  * Typically used to set the soft ring state to QUIESCE, CONDEMNED, or
720  * RESTART.
721  *
722  * In the Rx side, the quiescing is done bottom up. After the Rx upcalls
723  * from the driver are done, then the Rx SRS is quiesced and only then can
724  * we signal the soft rings. Thus this function can't be called arbitrarily
725  * without satisfying the prerequisites. On the Tx side, the threads from
726  * top need to quiesced, then the Tx SRS and only then can we signal the
727  * Tx soft rings.
728  */
729 void
mac_soft_ring_signal(mac_soft_ring_t * softring,const mac_soft_ring_state_t sr_flag)730 mac_soft_ring_signal(mac_soft_ring_t *softring,
731     const mac_soft_ring_state_t sr_flag)
732 {
733 	mutex_enter(&softring->s_ring_lock);
734 	softring->s_ring_state |= sr_flag;
735 	cv_signal(&softring->s_ring_async);
736 	mutex_exit(&softring->s_ring_lock);
737 }
738 
739 /*
740  * mac_tx_soft_ring_drain
741  *
742  * The transmit side drain routine in case the soft ring was being
743  * used to transmit packets.
744  */
745 static void
mac_tx_soft_ring_drain(mac_soft_ring_t * ringp)746 mac_tx_soft_ring_drain(mac_soft_ring_t *ringp)
747 {
748 	mblk_t			*mp;
749 	void			*arg1;
750 	void			*arg2;
751 	mblk_t			*tail;
752 	uint_t			saved_pkt_count, saved_size;
753 	mac_tx_stats_t		stats;
754 	mac_soft_ring_set_t	*mac_srs = ringp->s_ring_set;
755 
756 	saved_pkt_count = saved_size = 0;
757 	ringp->s_ring_run = curthread;
758 	ASSERT(mutex_owned(&ringp->s_ring_lock));
759 	ASSERT(!(ringp->s_ring_state & S_RING_PROC));
760 
761 	ringp->s_ring_state |= S_RING_PROC;
762 	arg1 = ringp->s_ring_tx_arg1;
763 	arg2 = ringp->s_ring_tx_arg2;
764 
765 	while (ringp->s_ring_first != NULL) {
766 		mp = ringp->s_ring_first;
767 		tail = ringp->s_ring_last;
768 		saved_pkt_count = ringp->s_ring_count;
769 		saved_size = ringp->s_ring_size;
770 		ringp->s_ring_first = NULL;
771 		ringp->s_ring_last = NULL;
772 		ringp->s_ring_count = 0;
773 		ringp->s_ring_size = 0;
774 		mutex_exit(&ringp->s_ring_lock);
775 
776 		mp = mac_tx_send(arg1, arg2, mp, &stats);
777 
778 		mutex_enter(&ringp->s_ring_lock);
779 		if (mp != NULL) {
780 			/* Device out of tx desc, set block */
781 			tail->b_next = ringp->s_ring_first;
782 			ringp->s_ring_first = mp;
783 			ringp->s_ring_count +=
784 			    (saved_pkt_count - stats.mts_opackets);
785 			ringp->s_ring_size += (saved_size - stats.mts_obytes);
786 			if (ringp->s_ring_last == NULL)
787 				ringp->s_ring_last = tail;
788 
789 			if (ringp->s_ring_tx_woken_up) {
790 				ringp->s_ring_tx_woken_up = B_FALSE;
791 			} else {
792 				ringp->s_ring_state |= S_RING_BLOCK;
793 				ringp->s_st_stat.mts_blockcnt++;
794 			}
795 
796 			ringp->s_ring_state &= ~S_RING_PROC;
797 			ringp->s_ring_run = NULL;
798 			return;
799 		} else {
800 			ringp->s_ring_tx_woken_up = B_FALSE;
801 			SRS_TX_STATS_UPDATE(mac_srs, &stats);
802 			SOFTRING_TX_STATS_UPDATE(ringp, &stats);
803 		}
804 	}
805 
806 	if (ringp->s_ring_count == 0 && ringp->s_ring_state &
807 	    (S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED)) {
808 		mac_client_impl_t *mcip =  ringp->s_ring_mcip;
809 		boolean_t wakeup_required = B_FALSE;
810 
811 		if (ringp->s_ring_state &
812 		    (S_RING_TX_HIWAT|S_RING_WAKEUP_CLIENT)) {
813 			wakeup_required = B_TRUE;
814 		}
815 		ringp->s_ring_state &=
816 		    ~(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED);
817 		mutex_exit(&ringp->s_ring_lock);
818 		if (wakeup_required) {
819 			mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)ringp);
820 			/*
821 			 * If the client is not the primary MAC client, then we
822 			 * need to send the notification to the clients upper
823 			 * MAC, i.e. mci_upper_mip.
824 			 */
825 			mac_tx_notify(mcip->mci_upper_mip != NULL ?
826 			    mcip->mci_upper_mip : mcip->mci_mip);
827 		}
828 		mutex_enter(&ringp->s_ring_lock);
829 	}
830 	ringp->s_ring_state &= ~S_RING_PROC;
831 	ringp->s_ring_run = NULL;
832 }
833