1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2018 Joyent, Inc.
25 * Copyright 2026 Oxide Computer Company
26 */
27
28 /*
29 * General Soft rings - Simulating Rx rings in S/W.
30 *
31 * Soft ring is a data abstraction containing a queue and a worker
32 * thread and represents a hardware Rx ring in software. Each soft
33 * ring set can have a collection of soft rings for separating
34 * L3/L4 specific traffic (IPv4 from IPv6 or TCP from UDP) or for
35 * allowing a higher degree of parallelism by sending traffic to
36 * one of the soft rings for a SRS (using a hash on src IP or port).
37 * Each soft ring worker thread can be bound to a different CPU
38 * allowing the processing for each soft ring to happen in parallel
39 * and independent from each other.
40 *
41 * Protocol soft rings:
42 *
43 * Each SRS has at an minimum 3 softrings. One each for IPv4 TCP,
44 * IPv4 UDP and rest (OTH - for IPv6 and everything else). The
45 * SRS does dynamic polling and enforces link level bandwidth but
46 * it does so for all traffic (IPv4 and IPv6 and all protocols) on
47 * that link. However, each protocol layer wants a different
48 * behaviour. For instance IPv4 TCP has per CPU squeues which
49 * enforce their own polling and flow control so IPv4 TCP traffic
50 * needs to go to a separate soft ring which can be polled by the
51 * TCP squeue. It also allows TCP squeue to push back flow control
52 * all the way to NIC hardware (if it puts its corresponding soft
53 * ring in the poll mode and soft ring queue builds up, the
54 * shared srs_poll_pkt_cnt goes up and SRS automatically stops
55 * more packets from entering the system).
56 *
57 * Similarly, the UDP benefits from a DLS bypass and packet chaining
58 * so sending it to a separate soft ring is desired. All the rest of
59 * the traffic (including IPv6 is sent to OTH softring). The IPv6
60 * traffic current goes through OTH softring and via DLS because
61 * it need more processing to be done. Irrespective of the sap
62 * (IPv4 or IPv6) or the transport, the dynamic polling, B/W enforcement,
63 * cpu assignment, fanout, etc apply to all traffic since they
64 * are implement by the SRS which is agnostic to sap or transport.
65 *
66 * Fanout soft rings:
67 *
68 * On a multithreaded system, we can assign more CPU and multi thread
69 * the stack by creating a soft ring per CPU and spreading traffic
70 * based on a hash computed on src IP etc. Since we still need to
71 * keep the protocol separation, we create a set of 3 soft ring per
72 * CPU (specified by cpu list or degree of fanout).
73 *
74 * NOTE: See the block level comment on top of mac_sched.c
75 */
76
77 #include <sys/types.h>
78 #include <sys/callb.h>
79 #include <sys/sdt.h>
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82 #include <sys/vlan.h>
83 #include <inet/ipsec_impl.h>
84 #include <inet/ip_impl.h>
85 #include <inet/sadb.h>
86 #include <inet/ipsecesp.h>
87 #include <inet/ipsecah.h>
88
89 #include <sys/mac_impl.h>
90 #include <sys/mac_client_impl.h>
91 #include <sys/mac_soft_ring.h>
92 #include <sys/mac_flow_impl.h>
93 #include <sys/mac_stat.h>
94
95 static void mac_rx_soft_ring_drain(mac_soft_ring_t *);
96 static void mac_soft_ring_fire(void *);
97 static void mac_soft_ring_worker(mac_soft_ring_t *);
98 static void mac_tx_soft_ring_drain(mac_soft_ring_t *);
99
100 uint32_t mac_tx_soft_ring_max_q_cnt = 100000;
101 uint32_t mac_tx_soft_ring_hiwat = 1000;
102
103 extern kmem_cache_t *mac_soft_ring_cache;
104
105 #define ADD_SOFTRING_TO_SET(mac_srs, softring) { \
106 if (mac_srs->srs_soft_ring_head == NULL) { \
107 mac_srs->srs_soft_ring_head = softring; \
108 mac_srs->srs_soft_ring_tail = softring; \
109 } else { \
110 /* ADD to the list */ \
111 softring->s_ring_prev = \
112 mac_srs->srs_soft_ring_tail; \
113 mac_srs->srs_soft_ring_tail->s_ring_next = softring; \
114 mac_srs->srs_soft_ring_tail = softring; \
115 } \
116 mac_srs->srs_soft_ring_count++; \
117 }
118
119 /*
120 * mac_soft_ring_worker_wakeup
121 *
122 * Wake up the soft ring worker thread to process the queue as long
123 * as no one else is processing it and upper layer (client) is still
124 * ready to receive packets.
125 */
126 void
mac_soft_ring_worker_wakeup(mac_soft_ring_t * ringp)127 mac_soft_ring_worker_wakeup(mac_soft_ring_t *ringp)
128 {
129 ASSERT(MUTEX_HELD(&ringp->s_ring_lock));
130 if (!(ringp->s_ring_state & S_RING_PROC) &&
131 !(ringp->s_ring_state & S_RING_BLANK) &&
132 (ringp->s_ring_tid == NULL)) {
133 if (ringp->s_ring_wait != 0) {
134 ringp->s_ring_tid =
135 timeout(mac_soft_ring_fire, ringp,
136 ringp->s_ring_wait);
137 } else {
138 /* Schedule the worker thread. */
139 cv_signal(&ringp->s_ring_async);
140 }
141 }
142 }
143
144 /*
145 * Create a soft ring, do the necessary setup and bind the worker
146 * thread to the assigned CPU.
147 */
148 static mac_soft_ring_t *
mac_soft_ring_create_i(int id,clock_t wait,const mac_soft_ring_state_t type,pri_t pri,mac_client_impl_t * mcip,mac_soft_ring_set_t * mac_srs,processorid_t cpuid)149 mac_soft_ring_create_i(int id, clock_t wait, const mac_soft_ring_state_t type,
150 pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
151 processorid_t cpuid)
152 {
153 mac_soft_ring_t *ringp;
154 char name[S_RING_NAMELEN];
155
156 VERIFY3U(type & SR_STATE, ==, 0);
157
158 bzero(name, 64);
159 ringp = kmem_cache_alloc(mac_soft_ring_cache, KM_SLEEP);
160
161 if (type & ST_RING_TCP) {
162 (void) snprintf(name, sizeof (name),
163 "mac_tcp_soft_ring_%d_%p", id, (void *)mac_srs);
164 } else if (type & ST_RING_TCP6) {
165 (void) snprintf(name, sizeof (name),
166 "mac_tcp6_soft_ring_%d_%p", id, (void *)mac_srs);
167 } else if (type & ST_RING_UDP) {
168 (void) snprintf(name, sizeof (name),
169 "mac_udp_soft_ring_%d_%p", id, (void *)mac_srs);
170 } else if (type & ST_RING_UDP6) {
171 (void) snprintf(name, sizeof (name),
172 "mac_udp6_soft_ring_%d_%p", id, (void *)mac_srs);
173 } else if (type & ST_RING_OTH) {
174 (void) snprintf(name, sizeof (name),
175 "mac_oth_soft_ring_%d_%p", id, (void *)mac_srs);
176 } else {
177 ASSERT(type & ST_RING_TX);
178 (void) snprintf(name, sizeof (name),
179 "mac_tx_soft_ring_%d_%p", id, (void *)mac_srs);
180 }
181
182 bzero(ringp, sizeof (mac_soft_ring_t));
183 (void) strncpy(ringp->s_ring_name, name, S_RING_NAMELEN + 1);
184 ringp->s_ring_name[S_RING_NAMELEN] = '\0';
185 mutex_init(&ringp->s_ring_lock, NULL, MUTEX_DEFAULT, NULL);
186 ringp->s_ring_notify_cb_info.mcbi_lockp = &ringp->s_ring_lock;
187
188 ringp->s_ring_state = type;
189 ringp->s_ring_wait = MSEC_TO_TICK(wait);
190 ringp->s_ring_mcip = mcip;
191 ringp->s_ring_set = mac_srs;
192
193 /*
194 * Protect against access from DR callbacks (mac_walk_srs_bind/unbind)
195 * which can't grab the mac perimeter
196 */
197 mutex_enter(&mac_srs->srs_lock);
198 ADD_SOFTRING_TO_SET(mac_srs, ringp);
199 mutex_exit(&mac_srs->srs_lock);
200
201 /*
202 * set the bind CPU to -1 to indicate
203 * no thread affinity set
204 */
205 ringp->s_ring_cpuid = ringp->s_ring_cpuid_save = -1;
206 ringp->s_ring_worker = thread_create(NULL, 0,
207 mac_soft_ring_worker, ringp, 0, &p0, TS_RUN, pri);
208 if (cpuid != -1)
209 (void) mac_soft_ring_bind(ringp, cpuid);
210
211 return (ringp);
212 }
213
214 mac_soft_ring_t *
mac_soft_ring_create_rx(int id,clock_t wait,const mac_soft_ring_state_t type,pri_t pri,mac_client_impl_t * mcip,mac_soft_ring_set_t * mac_srs,processorid_t cpuid,mac_direct_rx_t rx_func,void * x_arg1)215 mac_soft_ring_create_rx(int id, clock_t wait, const mac_soft_ring_state_t type,
216 pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
217 processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1)
218 {
219 VERIFY3U((type & ST_RING_TX), ==, 0);
220
221 mac_soft_ring_t *ringp = mac_soft_ring_create_i(id, wait, type, pri,
222 mcip, mac_srs, cpuid);
223
224 ringp->s_ring_drain_func = mac_rx_soft_ring_drain;
225 ringp->s_ring_rx_func = rx_func;
226 ringp->s_ring_rx_arg1 = x_arg1;
227 ringp->s_ring_rx_arg2 = NULL;
228 if (mac_srs->srs_type & SRST_ENQUEUE) {
229 ringp->s_ring_state |= ST_RING_WORKER_ONLY;
230 }
231
232 mac_soft_ring_stat_create(ringp);
233
234 return (ringp);
235 }
236
237 mac_soft_ring_t *
mac_soft_ring_create_tx(int id,clock_t wait,const mac_soft_ring_state_t type,pri_t pri,mac_client_impl_t * mcip,mac_soft_ring_set_t * mac_srs,processorid_t cpuid,mac_ring_t * ring)238 mac_soft_ring_create_tx(int id, clock_t wait, const mac_soft_ring_state_t type,
239 pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
240 processorid_t cpuid, mac_ring_t *ring)
241 {
242 VERIFY3U((type & ST_RING_TX), ==, 0);
243 VERIFY(ring != NULL);
244
245 mac_soft_ring_t *ringp = mac_soft_ring_create_i(id, wait,
246 type | ST_RING_TX, pri, mcip, mac_srs, cpuid);
247
248 ringp->s_ring_drain_func = mac_tx_soft_ring_drain;
249 ringp->s_ring_tx_arg1 = mcip;
250 ringp->s_ring_tx_arg2 = ring;
251 ringp->s_ring_tx_max_q_cnt = mac_tx_soft_ring_max_q_cnt;
252 ringp->s_ring_tx_hiwat =
253 (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ?
254 mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat;
255 if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
256 mac_srs_tx_t *tx = &mac_srs->srs_tx;
257 VERIFY3P(tx->st_soft_rings[ring->mr_index], ==, NULL);
258 tx->st_soft_rings[ring->mr_index] = ringp;
259 }
260
261 mac_soft_ring_stat_create(ringp);
262
263 return (ringp);
264 }
265
266 /*
267 * mac_soft_ring_free
268 *
269 * Free the soft ring once we are done with it.
270 */
271 void
mac_soft_ring_free(mac_soft_ring_t * softring)272 mac_soft_ring_free(mac_soft_ring_t *softring)
273 {
274 ASSERT((softring->s_ring_state &
275 (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) ==
276 (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE));
277 mac_drop_chain(softring->s_ring_first, "softring free");
278 softring->s_ring_tx_arg2 = NULL;
279 mac_soft_ring_stat_delete(softring);
280 mac_callback_free(softring->s_ring_notify_cb_list);
281 kmem_cache_free(mac_soft_ring_cache, softring);
282 }
283
284 int mac_soft_ring_thread_bind = 1;
285
286 /*
287 * mac_soft_ring_bind
288 *
289 * Bind a soft ring worker thread to supplied CPU.
290 */
291 cpu_t *
mac_soft_ring_bind(mac_soft_ring_t * ringp,processorid_t cpuid)292 mac_soft_ring_bind(mac_soft_ring_t *ringp, processorid_t cpuid)
293 {
294 cpu_t *cp;
295 boolean_t clear = B_FALSE;
296
297 ASSERT(MUTEX_HELD(&cpu_lock));
298
299 if (mac_soft_ring_thread_bind == 0) {
300 DTRACE_PROBE1(mac__soft__ring__no__cpu__bound,
301 mac_soft_ring_t *, ringp);
302 return (NULL);
303 }
304
305 cp = cpu_get(cpuid);
306 if (cp == NULL || !cpu_is_online(cp))
307 return (NULL);
308
309 mutex_enter(&ringp->s_ring_lock);
310 ringp->s_ring_state |= S_RING_BOUND;
311 if (ringp->s_ring_cpuid != -1)
312 clear = B_TRUE;
313 ringp->s_ring_cpuid = cpuid;
314 mutex_exit(&ringp->s_ring_lock);
315
316 if (clear)
317 thread_affinity_clear(ringp->s_ring_worker);
318
319 DTRACE_PROBE2(mac__soft__ring__cpu__bound, mac_soft_ring_t *,
320 ringp, processorid_t, cpuid);
321
322 thread_affinity_set(ringp->s_ring_worker, cpuid);
323
324 return (cp);
325 }
326
327 /*
328 * mac_soft_ring_unbind
329 *
330 * Un Bind a soft ring worker thread.
331 */
332 void
mac_soft_ring_unbind(mac_soft_ring_t * ringp)333 mac_soft_ring_unbind(mac_soft_ring_t *ringp)
334 {
335 ASSERT(MUTEX_HELD(&cpu_lock));
336
337 mutex_enter(&ringp->s_ring_lock);
338 if (!(ringp->s_ring_state & S_RING_BOUND)) {
339 ASSERT(ringp->s_ring_cpuid == -1);
340 mutex_exit(&ringp->s_ring_lock);
341 return;
342 }
343
344 ringp->s_ring_cpuid = -1;
345 ringp->s_ring_state &= ~S_RING_BOUND;
346 thread_affinity_clear(ringp->s_ring_worker);
347 mutex_exit(&ringp->s_ring_lock);
348 }
349
350 /*
351 * PRIVATE FUNCTIONS
352 */
353
354 static void
mac_soft_ring_fire(void * arg)355 mac_soft_ring_fire(void *arg)
356 {
357 mac_soft_ring_t *ringp = arg;
358
359 mutex_enter(&ringp->s_ring_lock);
360 if (ringp->s_ring_tid == NULL) {
361 mutex_exit(&ringp->s_ring_lock);
362 return;
363 }
364
365 ringp->s_ring_tid = NULL;
366
367 if (!(ringp->s_ring_state & S_RING_PROC)) {
368 cv_signal(&ringp->s_ring_async);
369 }
370 mutex_exit(&ringp->s_ring_lock);
371 }
372
373 /*
374 * Drain the soft ring pointed to by ringp.
375 *
376 * o s_ring_first: pointer to the queued packet chain.
377 *
378 * o s_ring_rx_func: pointer to to the client's Rx routine.
379 *
380 * o s_ring_rx_{arg1,arg2}: opaque values specific to the client.
381 */
382 static void
mac_rx_soft_ring_drain(mac_soft_ring_t * ringp)383 mac_rx_soft_ring_drain(mac_soft_ring_t *ringp)
384 {
385 mblk_t *mp;
386 void *arg1;
387 mac_resource_handle_t arg2;
388 timeout_id_t tid;
389 mac_direct_rx_t proc;
390 int cnt;
391 mac_soft_ring_set_t *mac_srs = ringp->s_ring_set;
392
393 ringp->s_ring_run = curthread;
394 ASSERT(mutex_owned(&ringp->s_ring_lock));
395 ASSERT(!(ringp->s_ring_state & S_RING_PROC));
396
397 if ((tid = ringp->s_ring_tid) != NULL)
398 ringp->s_ring_tid = NULL;
399
400 ringp->s_ring_state |= S_RING_PROC;
401
402 proc = ringp->s_ring_rx_func;
403 arg1 = ringp->s_ring_rx_arg1;
404 arg2 = ringp->s_ring_rx_arg2;
405
406 while ((ringp->s_ring_first != NULL) &&
407 !(ringp->s_ring_state & S_RING_PAUSE)) {
408 mp = ringp->s_ring_first;
409 ringp->s_ring_first = NULL;
410 ringp->s_ring_last = NULL;
411 cnt = ringp->s_ring_count;
412 ringp->s_ring_count = 0;
413 ringp->s_ring_size = 0;
414 mutex_exit(&ringp->s_ring_lock);
415
416 if (tid != NULL) {
417 (void) untimeout(tid);
418 tid = NULL;
419 }
420
421 (*proc)(arg1, arg2, mp, NULL);
422
423 /*
424 * If we have an SRS performing bandwidth control, then
425 * we need to decrement the size and count so the SRS
426 * has an accurate measure of the data queued between
427 * the SRS and its soft rings. We decrement the
428 * counters only when the packet is processed by both
429 * the SRS and the soft ring.
430 */
431 mutex_enter(&mac_srs->srs_lock);
432 MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
433 mutex_exit(&mac_srs->srs_lock);
434
435 mutex_enter(&ringp->s_ring_lock);
436 }
437 ringp->s_ring_state &= ~S_RING_PROC;
438 if (ringp->s_ring_state & S_RING_CLIENT_WAIT)
439 cv_signal(&ringp->s_ring_client_cv);
440 ringp->s_ring_run = NULL;
441 }
442
443 /*
444 * The soft ring worker routine to process any queued packets. In
445 * normal case, the worker thread is bound to a CPU. If the soft ring
446 * handles TCP packets then the worker thread is bound to the same CPU
447 * as the TCP squeue.
448 */
449 static void
mac_soft_ring_worker(mac_soft_ring_t * ringp)450 mac_soft_ring_worker(mac_soft_ring_t *ringp)
451 {
452 kmutex_t *lock = &ringp->s_ring_lock;
453 kcondvar_t *async = &ringp->s_ring_async;
454 mac_soft_ring_set_t *srs = ringp->s_ring_set;
455 callb_cpr_t cprinfo;
456
457 CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "mac_soft_ring");
458 mutex_enter(lock);
459 start:
460 for (;;) {
461 while (((ringp->s_ring_first == NULL ||
462 (ringp->s_ring_state & (S_RING_BLOCK|S_RING_BLANK))) &&
463 !(ringp->s_ring_state & S_RING_PAUSE)) ||
464 (ringp->s_ring_state & S_RING_PROC)) {
465
466 CALLB_CPR_SAFE_BEGIN(&cprinfo);
467 cv_wait(async, lock);
468 CALLB_CPR_SAFE_END(&cprinfo, lock);
469 }
470
471 /*
472 * Either we have work to do, or we have been asked to
473 * shutdown temporarily or permanently
474 */
475 if (ringp->s_ring_state & S_RING_PAUSE)
476 goto done;
477
478 ringp->s_ring_drain_func(ringp);
479 }
480 done:
481 mutex_exit(lock);
482 mutex_enter(&srs->srs_lock);
483 mutex_enter(lock);
484
485 ringp->s_ring_state |= S_RING_QUIESCE_DONE;
486 if (!(ringp->s_ring_state & S_RING_CONDEMNED)) {
487 srs->srs_soft_ring_quiesced_count++;
488 cv_broadcast(&srs->srs_async);
489 mutex_exit(&srs->srs_lock);
490 while (!(ringp->s_ring_state &
491 (S_RING_RESTART | S_RING_CONDEMNED)))
492 cv_wait(&ringp->s_ring_async, &ringp->s_ring_lock);
493 mutex_exit(lock);
494 mutex_enter(&srs->srs_lock);
495 mutex_enter(lock);
496 srs->srs_soft_ring_quiesced_count--;
497 if (ringp->s_ring_state & S_RING_RESTART) {
498 ASSERT(!(ringp->s_ring_state & S_RING_CONDEMNED));
499 ringp->s_ring_state &= ~(S_RING_RESTART |
500 S_RING_QUIESCE | S_RING_QUIESCE_DONE);
501 cv_broadcast(&srs->srs_async);
502 mutex_exit(&srs->srs_lock);
503 goto start;
504 }
505 }
506 ASSERT(ringp->s_ring_state & S_RING_CONDEMNED);
507 ringp->s_ring_state |= S_RING_CONDEMNED_DONE;
508 CALLB_CPR_EXIT(&cprinfo);
509 srs->srs_soft_ring_condemned_count++;
510 cv_broadcast(&srs->srs_async);
511 mutex_exit(&srs->srs_lock);
512 thread_exit();
513 }
514
515 /*
516 * mac_soft_ring_intr_enable and mac_soft_ring_intr_disable
517 *
518 * these functions are called to toggle the sending of packets to the
519 * client. They are called by the client. the client gets the name
520 * of these routine and corresponding cookie (pointing to softring)
521 * during capability negotiation at setup time.
522 *
523 * Enabling is allow the processing thread to send packets to the
524 * client while disabling does the opposite.
525 */
526 int
mac_soft_ring_intr_enable(void * arg)527 mac_soft_ring_intr_enable(void *arg)
528 {
529 mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
530 mutex_enter(&ringp->s_ring_lock);
531 ringp->s_ring_state &= ~S_RING_BLANK;
532 if (ringp->s_ring_first != NULL)
533 mac_soft_ring_worker_wakeup(ringp);
534 mutex_exit(&ringp->s_ring_lock);
535 return (0);
536 }
537
538 boolean_t
mac_soft_ring_intr_disable(void * arg)539 mac_soft_ring_intr_disable(void *arg)
540 {
541 mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
542 boolean_t sring_blanked = B_FALSE;
543 /*
544 * Stop worker thread from sending packets above.
545 * Squeue will poll soft ring when it needs packets.
546 */
547 mutex_enter(&ringp->s_ring_lock);
548 if (!(ringp->s_ring_state & S_RING_PROC)) {
549 ringp->s_ring_state |= S_RING_BLANK;
550 sring_blanked = B_TRUE;
551 }
552 mutex_exit(&ringp->s_ring_lock);
553 return (sring_blanked);
554 }
555
556 /*
557 * mac_soft_ring_poll
558 *
559 * This routine is called by the client to poll for packets from
560 * the soft ring. The function name and cookie corresponding to
561 * the soft ring is exchanged during capability negotiation during
562 * setup.
563 */
564 mblk_t *
mac_soft_ring_poll(mac_soft_ring_t * ringp,size_t bytes_to_pickup)565 mac_soft_ring_poll(mac_soft_ring_t *ringp, size_t bytes_to_pickup)
566 {
567 mblk_t *head, *tail;
568 mblk_t *mp;
569 size_t sz = 0;
570 int cnt = 0;
571 mac_soft_ring_set_t *mac_srs = ringp->s_ring_set;
572
573 ASSERT(mac_srs != NULL);
574
575 mutex_enter(&ringp->s_ring_lock);
576 head = tail = mp = ringp->s_ring_first;
577 if (head == NULL) {
578 mutex_exit(&ringp->s_ring_lock);
579 return (NULL);
580 }
581
582 if (ringp->s_ring_size <= bytes_to_pickup) {
583 head = ringp->s_ring_first;
584 ringp->s_ring_first = NULL;
585 ringp->s_ring_last = NULL;
586 cnt = ringp->s_ring_count;
587 ringp->s_ring_count = 0;
588 sz = ringp->s_ring_size;
589 ringp->s_ring_size = 0;
590 } else {
591 while (mp && sz <= bytes_to_pickup) {
592 sz += msgdsize(mp);
593 cnt++;
594 tail = mp;
595 mp = mp->b_next;
596 }
597 ringp->s_ring_count -= cnt;
598 ringp->s_ring_size -= sz;
599 tail->b_next = NULL;
600 if (mp == NULL) {
601 ringp->s_ring_first = NULL;
602 ringp->s_ring_last = NULL;
603 ASSERT(ringp->s_ring_count == 0);
604 } else {
605 ringp->s_ring_first = mp;
606 }
607 }
608
609 mutex_exit(&ringp->s_ring_lock);
610 /*
611 * Update the shared count and size counters so
612 * that SRS has a accurate idea of queued packets.
613 */
614 mutex_enter(&mac_srs->srs_lock);
615 MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
616 mutex_exit(&mac_srs->srs_lock);
617 return (head);
618 }
619
620 /*
621 * Enable direct client (IP) callback function from the softrings.
622 * Callers need to make sure they don't need any DLS layer processing
623 */
624 void
mac_soft_ring_dls_bypass_enable(mac_soft_ring_t * softring,mac_direct_rx_t rx_func,void * rx_arg1)625 mac_soft_ring_dls_bypass_enable(mac_soft_ring_t *softring,
626 mac_direct_rx_t rx_func, void *rx_arg1)
627 {
628 VERIFY3P(rx_func, !=, NULL);
629 mutex_enter(&softring->s_ring_lock);
630 softring->s_ring_rx_func = rx_func;
631 softring->s_ring_rx_arg1 = rx_arg1;
632 mutex_exit(&softring->s_ring_lock);
633 }
634
635 /* Disable DLS bypass. */
636 void
mac_soft_ring_dls_bypass_disable(mac_soft_ring_t * softring,mac_client_impl_t * mcip)637 mac_soft_ring_dls_bypass_disable(mac_soft_ring_t *softring,
638 mac_client_impl_t *mcip)
639 {
640 mutex_enter(&softring->s_ring_lock);
641 /*
642 * Before modifying the ring state we first wait for any in-progress
643 * processing to stop.
644 */
645 while (softring->s_ring_state & S_RING_PROC) {
646 softring->s_ring_state |= S_RING_CLIENT_WAIT;
647 cv_wait(&softring->s_ring_client_cv,
648 &softring->s_ring_lock);
649 }
650
651 softring->s_ring_state &= ~S_RING_CLIENT_WAIT;
652 softring->s_ring_rx_func = mac_rx_deliver;
653 softring->s_ring_rx_arg1 = mcip;
654 mutex_exit(&softring->s_ring_lock);
655 }
656
657 void
mac_soft_ring_poll_enable(mac_soft_ring_t * sr,mac_direct_rx_t drx,void * drx_arg,mac_resource_cb_t * rcb,uint32_t pri)658 mac_soft_ring_poll_enable(mac_soft_ring_t *sr, mac_direct_rx_t drx,
659 void *drx_arg, mac_resource_cb_t *rcb, uint32_t pri)
660 {
661 mac_rx_fifo_t mrf;
662
663 /* Only TCP/IP clients are poll capable at the moment. */
664 VERIFY((sr->s_ring_state & (ST_RING_TCP | ST_RING_TCP6)) != 0);
665 /* The client resourse callback structure better be set. */
666 VERIFY3P(rcb->mrc_arg, !=, NULL);
667 /* Polling should be configured only once on a given softring. */
668 VERIFY3P(sr->s_ring_rx_arg2, ==, NULL);
669
670 /*
671 * As polling elides DLS processing we must make sure that
672 * softring processing (i.e. non-polling) also bypasses DLS
673 * processing.
674 */
675 mac_soft_ring_dls_bypass_enable(sr, drx, drx_arg);
676
677 bzero(&mrf, sizeof (mrf));
678 mrf.mrf_type = MAC_RX_FIFO;
679 mrf.mrf_receive = (mac_receive_t)mac_soft_ring_poll;
680 mrf.mrf_intr_enable =
681 (mac_intr_enable_t)mac_soft_ring_intr_enable;
682 mrf.mrf_intr_disable =
683 (mac_intr_disable_t)mac_soft_ring_intr_disable;
684 mrf.mrf_rx_arg = sr;
685 mrf.mrf_intr_handle = (mac_intr_handle_t)sr;
686 mrf.mrf_cpu_id = sr->s_ring_cpuid;
687 mrf.mrf_flow_priority = pri;
688
689 sr->s_ring_rx_arg2 = rcb->mrc_add(rcb->mrc_arg,
690 (mac_resource_t *)&mrf);
691 }
692
693 void
mac_soft_ring_poll_disable(mac_soft_ring_t * sr,mac_resource_cb_t * rcb,mac_client_impl_t * mcip)694 mac_soft_ring_poll_disable(mac_soft_ring_t *sr, mac_resource_cb_t *rcb,
695 mac_client_impl_t *mcip)
696 {
697 /* Only TCP/IP clients are poll capable at the moment. */
698 VERIFY((sr->s_ring_state & (ST_RING_TCP | ST_RING_TCP6)) != 0);
699
700 /*
701 * Remove the IP ring if there is one associated with this
702 * softring. Note that IP rings are a limited resource; and
703 * SRST_CLIENT_POLL_V4/V6 being set on the SRS is no guarantee
704 * that all TCP softrings have an associated IP ring. This is by
705 * design. See ip_squeue_add_ring().
706 */
707 if (sr->s_ring_rx_arg2 != NULL) {
708 VERIFY3P(rcb->mrc_arg, !=, NULL);
709 rcb->mrc_remove(rcb->mrc_arg, sr->s_ring_rx_arg2);
710 sr->s_ring_rx_arg2 = NULL;
711 }
712
713 mac_soft_ring_dls_bypass_disable(sr, mcip);
714 }
715
716 /*
717 * mac_soft_ring_signal
718 *
719 * Typically used to set the soft ring state to QUIESCE, CONDEMNED, or
720 * RESTART.
721 *
722 * In the Rx side, the quiescing is done bottom up. After the Rx upcalls
723 * from the driver are done, then the Rx SRS is quiesced and only then can
724 * we signal the soft rings. Thus this function can't be called arbitrarily
725 * without satisfying the prerequisites. On the Tx side, the threads from
726 * top need to quiesced, then the Tx SRS and only then can we signal the
727 * Tx soft rings.
728 */
729 void
mac_soft_ring_signal(mac_soft_ring_t * softring,const mac_soft_ring_state_t sr_flag)730 mac_soft_ring_signal(mac_soft_ring_t *softring,
731 const mac_soft_ring_state_t sr_flag)
732 {
733 mutex_enter(&softring->s_ring_lock);
734 softring->s_ring_state |= sr_flag;
735 cv_signal(&softring->s_ring_async);
736 mutex_exit(&softring->s_ring_lock);
737 }
738
739 /*
740 * mac_tx_soft_ring_drain
741 *
742 * The transmit side drain routine in case the soft ring was being
743 * used to transmit packets.
744 */
745 static void
mac_tx_soft_ring_drain(mac_soft_ring_t * ringp)746 mac_tx_soft_ring_drain(mac_soft_ring_t *ringp)
747 {
748 mblk_t *mp;
749 void *arg1;
750 void *arg2;
751 mblk_t *tail;
752 uint_t saved_pkt_count, saved_size;
753 mac_tx_stats_t stats;
754 mac_soft_ring_set_t *mac_srs = ringp->s_ring_set;
755
756 saved_pkt_count = saved_size = 0;
757 ringp->s_ring_run = curthread;
758 ASSERT(mutex_owned(&ringp->s_ring_lock));
759 ASSERT(!(ringp->s_ring_state & S_RING_PROC));
760
761 ringp->s_ring_state |= S_RING_PROC;
762 arg1 = ringp->s_ring_tx_arg1;
763 arg2 = ringp->s_ring_tx_arg2;
764
765 while (ringp->s_ring_first != NULL) {
766 mp = ringp->s_ring_first;
767 tail = ringp->s_ring_last;
768 saved_pkt_count = ringp->s_ring_count;
769 saved_size = ringp->s_ring_size;
770 ringp->s_ring_first = NULL;
771 ringp->s_ring_last = NULL;
772 ringp->s_ring_count = 0;
773 ringp->s_ring_size = 0;
774 mutex_exit(&ringp->s_ring_lock);
775
776 mp = mac_tx_send(arg1, arg2, mp, &stats);
777
778 mutex_enter(&ringp->s_ring_lock);
779 if (mp != NULL) {
780 /* Device out of tx desc, set block */
781 tail->b_next = ringp->s_ring_first;
782 ringp->s_ring_first = mp;
783 ringp->s_ring_count +=
784 (saved_pkt_count - stats.mts_opackets);
785 ringp->s_ring_size += (saved_size - stats.mts_obytes);
786 if (ringp->s_ring_last == NULL)
787 ringp->s_ring_last = tail;
788
789 if (ringp->s_ring_tx_woken_up) {
790 ringp->s_ring_tx_woken_up = B_FALSE;
791 } else {
792 ringp->s_ring_state |= S_RING_BLOCK;
793 ringp->s_st_stat.mts_blockcnt++;
794 }
795
796 ringp->s_ring_state &= ~S_RING_PROC;
797 ringp->s_ring_run = NULL;
798 return;
799 } else {
800 ringp->s_ring_tx_woken_up = B_FALSE;
801 SRS_TX_STATS_UPDATE(mac_srs, &stats);
802 SOFTRING_TX_STATS_UPDATE(ringp, &stats);
803 }
804 }
805
806 if (ringp->s_ring_count == 0 && ringp->s_ring_state &
807 (S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED)) {
808 mac_client_impl_t *mcip = ringp->s_ring_mcip;
809 boolean_t wakeup_required = B_FALSE;
810
811 if (ringp->s_ring_state &
812 (S_RING_TX_HIWAT|S_RING_WAKEUP_CLIENT)) {
813 wakeup_required = B_TRUE;
814 }
815 ringp->s_ring_state &=
816 ~(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED);
817 mutex_exit(&ringp->s_ring_lock);
818 if (wakeup_required) {
819 mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)ringp);
820 /*
821 * If the client is not the primary MAC client, then we
822 * need to send the notification to the clients upper
823 * MAC, i.e. mci_upper_mip.
824 */
825 mac_tx_notify(mcip->mci_upper_mip != NULL ?
826 mcip->mci_upper_mip : mcip->mci_mip);
827 }
828 mutex_enter(&ringp->s_ring_lock);
829 }
830 ringp->s_ring_state &= ~S_RING_PROC;
831 ringp->s_ring_run = NULL;
832 }
833