1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2017 Joyent, Inc.
26 * Copyright 2026 Oxide Computer Company
27 */
28
29 #ifndef _SYS_MAC_SOFT_RING_H
30 #define _SYS_MAC_SOFT_RING_H
31
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35
36 #include <sys/types.h>
37 #include <sys/cpuvar.h>
38 #include <sys/cpupart.h>
39 #include <sys/processor.h>
40 #include <sys/stream.h>
41 #include <sys/squeue.h>
42 #include <sys/dlpi.h>
43 #include <sys/mac_impl.h>
44 #include <sys/mac_stat.h>
45
46 #define S_RING_NAMELEN 64
47
48 #define MAX_SR_FANOUT 24
49
50 extern boolean_t mac_soft_ring_enable;
51 extern boolean_t mac_latency_optimize;
52
53 typedef struct mac_soft_ring_s mac_soft_ring_t;
54 typedef struct mac_soft_ring_set_s mac_soft_ring_set_t;
55
56 typedef void (*mac_soft_ring_drain_func_t)(mac_soft_ring_t *);
57 typedef mac_tx_cookie_t (*mac_tx_func_t)(mac_soft_ring_set_t *, mblk_t *,
58 uintptr_t, uint16_t, mblk_t **);
59
60 /* Tx notify callback */
61 typedef struct mac_tx_notify_cb_s {
62 mac_cb_t mtnf_link; /* Linked list of callbacks */
63 mac_tx_notify_t mtnf_fn; /* The callback function */
64 void *mtnf_arg; /* Callback function argument */
65 } mac_tx_notify_cb_t;
66
67 /*
68 * Flagset of immutable and datapath-altered aspects of a softring.
69 *
70 * Flags prefixed by `ST_` identify static characteristics of how a ring should
71 * process packets, whereas those prefixed `S_RING` reflect the current state
72 * of datapath processing.
73 *
74 * Gaps in flag allocation correspond to former flag definitions (such that
75 * existing flags mapped to their historic values). New flags can be placed in
76 * these gaps without issue. See issue 17920.
77 */
78 typedef enum {
79 /*
80 * Packets may only be drained from this softring by its own worker
81 * thread, and cannot be handled inline by the SRS or its caller..
82 *
83 * Immutable.
84 */
85 ST_RING_WORKER_ONLY = 1 << 0,
86 /*
87 * This softring is dedicated to handling TCP/IPv4 traffic when DLS
88 * bypass is configured.
89 *
90 * Immutable.
91 */
92 ST_RING_TCP = 1 << 2,
93 /*
94 * This softring is dedicated to handling UDP/IPv4 traffic when DLS
95 * bypass is configured.
96 *
97 * Immutable.
98 */
99 ST_RING_UDP = 1 << 3,
100 /*
101 * This softring handles all traffic which is ineligible for DLS bypass.
102 *
103 * Immutable.
104 */
105 ST_RING_OTH = 1 << 4,
106 /*
107 * If set, this is a transmit softring. Packets will be directed via
108 * `mac_tx_send` to an underlying provider's ring.
109 *
110 * If absent, this is a receive softring. Packets will be delivered to a
111 * client via `s_ring_rx_func`.
112 *
113 * Immutable.
114 */
115 ST_RING_TX = 1 << 6,
116 /*
117 * This softring is dedicated to handling TCP/IPv6 traffic when DLS
118 * bypass is configured.
119 *
120 * Immutable.
121 */
122 ST_RING_TCP6 = 1 << 7,
123 /*
124 * This softring is dedicated to handling UDP/IPv6 traffic when DLS
125 * bypass is configured.
126 *
127 * Immutable.
128 */
129 ST_RING_UDP6 = 1 << 8,
130 /*
131 * A thread is currently processing packets from this softring, and has
132 * relinquished its hold on `s_ring_lock` to allow more packets to be
133 * enqueued while it does so.
134 *
135 * Rx/Tx process methods will always enqueue packets if set, with the
136 * expectation that whoever is draining the thread will continue to
137 * do so.
138 */
139 S_RING_PROC = 1 << 16,
140 /*
141 * The worker thread of this softring has been bound to a specific CPU.
142 */
143 S_RING_BOUND = 1 << 17,
144 /*
145 * This softring is a TX softring and has run out of descriptors on the
146 * underlying ring/NIC.
147 *
148 * Any outbound packets will be queued until the underlying provider
149 * marks more descriptors as available via `mac_tx_ring_update`.
150 */
151 S_RING_BLOCK = 1 << 18,
152 /*
153 * This softring is a TX softring and is flow controlled: more than
154 * `s_ring_tx_hiwat` packets are currently enqueued.
155 *
156 * Any outbound packets will be enqueued, and drained by the softring
157 * worker. Senders will receive a cookie -- they will be informed when
158 * any cookie is no longer flow controlled if they have registered a
159 * callback via `mac_client_tx_notify`.
160 */
161 S_RING_TX_HIWAT = 1 << 19,
162 /*
163 * This softring is a TX softring and has returned a cookie to at least
164 * one sender who has set `MAC_TX_NO_ENQUEUE` regardless of watermark
165 * state.
166 *
167 * When the softring is drained, notify the client via its
168 * `mac_client_tx_notify` callback that it may send.
169 */
170 S_RING_WAKEUP_CLIENT = 1 << 20,
171 /*
172 * This RX softring is client pollable and its client has called
173 * `mac_soft_ring_intr_disble` to stop MAC from delivering frames via
174 * `s_ring_rx_func`.
175 *
176 * Packets may _only_ be delivered by client polling. The client may
177 * undo this using `mac_soft_ring_intr_enable`.
178 */
179 S_RING_BLANK = 1 << 21,
180 /*
181 * Request the thread processing packets to notify a waiting client when
182 * it is safe to alter the `s_ring_rx_func` callback and its arguments.
183 */
184 S_RING_CLIENT_WAIT = 1 << 22,
185 /*
186 * This softring is marked for deletion.
187 *
188 * No further packets can be admitted into the softring, and enqueued
189 * packets must not be processed.
190 */
191 S_RING_CONDEMNED = 1 << 24,
192 /*
193 * The softring worker has completed any teardown in response to
194 * `S_RING_CONDEMNED`.
195 *
196 * Requires `S_RING_QUIESCE_DONE`.
197 */
198 S_RING_CONDEMNED_DONE = 1 << 25,
199 /*
200 * This softring has been signalled to stop processing any packets.
201 *
202 * The presence of this flag implies that the parent SRS has
203 * *also* been asked to quiesce. It will not enqueue any packets here.
204 */
205 S_RING_QUIESCE = 1 << 26,
206 /*
207 * The softring has ceased processing any enqueued/arriving packets, and
208 * is awaiting a signal of either `S_RING_CONDEMNED` or `S_RING_RESTART`
209 * to wake up.
210 */
211 S_RING_QUIESCE_DONE = 1 << 27,
212 /*
213 * The softring has been signalled to resume processing traffic.
214 *
215 * The worker thread should unset this and any `QUIESCE` flags and
216 * resume processing packets.
217 */
218 S_RING_RESTART = 1 << 28,
219 /*
220 * This TX softring has packets enqueued, which the worker thread is
221 * responsible for draining.
222 */
223 S_RING_ENQUEUED = 1 << 29,
224 } mac_soft_ring_state_t;
225
226 /*
227 * Used to verify whether a given value is allowed to be used as the
228 * `type` of a softring during creation.
229 */
230 #define SR_STATE 0xffff0000
231
232 struct mac_soft_ring_s {
233 /* Keep the most used members 64bytes cache aligned */
234 kmutex_t s_ring_lock; /* lock before using any member */
235 mac_soft_ring_state_t s_ring_state; /* processing model and state */
236 uint32_t s_ring_count; /* # of mblocks in mac_soft_ring */
237 size_t s_ring_size; /* Size of data queued */
238 mblk_t *s_ring_first; /* first mblk chain or NULL */
239 mblk_t *s_ring_last; /* last mblk chain or NULL */
240
241 /* Protected by s_ring_lock + !S_RING_PROC */
242 mac_direct_rx_t s_ring_rx_func;
243 void *s_ring_rx_arg1;
244 mac_resource_handle_t s_ring_rx_arg2;
245
246 /*
247 * Threshold after which packets get dropped.
248 * Is always greater than s_ring_tx_hiwat
249 */
250 uint32_t s_ring_tx_max_q_cnt;
251 /* # of mblocks after which to apply flow control */
252 uint32_t s_ring_tx_hiwat;
253 /* # of mblocks after which to relieve flow control */
254 uint32_t s_ring_tx_lowat;
255 boolean_t s_ring_tx_woken_up;
256 uint32_t s_ring_hiwat_cnt; /* times blocked for Tx descs */
257
258 /* Arguments for `mac_tx_send`, called by `mac_tx_soft_ring_drain` */
259 mac_client_impl_t *s_ring_tx_arg1;
260 mac_ring_t *s_ring_tx_arg2;
261
262 /* Tx notify callback */
263 mac_cb_info_t s_ring_notify_cb_info; /* cb list info */
264 mac_cb_t *s_ring_notify_cb_list; /* The cb list */
265
266 clock_t s_ring_awaken; /* time async thread was awakened */
267
268 kthread_t *s_ring_run; /* Current thread processing sq */
269 processorid_t s_ring_cpuid; /* processor to bind to */
270 processorid_t s_ring_cpuid_save; /* saved cpuid during offline */
271 kcondvar_t s_ring_async; /* async thread blocks on */
272 clock_t s_ring_wait; /* lbolts to wait after a fill() */
273 timeout_id_t s_ring_tid; /* timer id of pending timeout() */
274 kthread_t *s_ring_worker; /* kernel thread id */
275 char s_ring_name[S_RING_NAMELEN + 1];
276 uint64_t s_ring_total_inpkt;
277 uint64_t s_ring_total_rbytes;
278 uint64_t s_ring_drops;
279 mac_client_impl_t *s_ring_mcip;
280 kstat_t *s_ring_ksp;
281
282 /* Teardown, poll disable control ops */
283 kcondvar_t s_ring_client_cv; /* Client wait for control op */
284
285 mac_soft_ring_set_t *s_ring_set; /* The SRS this ring belongs to */
286 mac_soft_ring_t *s_ring_next;
287 mac_soft_ring_t *s_ring_prev;
288 mac_soft_ring_drain_func_t s_ring_drain_func;
289
290 mac_tx_stats_t s_st_stat;
291 };
292
293 /*
294 * soft ring set (SRS) Tx modes
295 */
296 typedef enum {
297 SRS_TX_DEFAULT = 0,
298 SRS_TX_SERIALIZE,
299 SRS_TX_FANOUT,
300 SRS_TX_BW,
301 SRS_TX_BW_FANOUT,
302 SRS_TX_AGGR,
303 SRS_TX_BW_AGGR
304 } mac_tx_srs_mode_t;
305
306 /* Transmit side Soft Ring Set */
307 typedef struct mac_srs_tx_s {
308 /* Members for Tx-side processing */
309 mac_tx_srs_mode_t st_mode;
310 mac_tx_func_t st_func;
311
312 /* Arguments for `mac_tx_send` when called within `st_func` */
313 mac_client_impl_t *st_arg1;
314 mac_ring_t *st_arg2;
315
316 mac_group_t *st_group; /* TX group for share */
317 boolean_t st_woken_up;
318
319 /*
320 * st_max_q_cnt is the queue depth threshold to limit
321 * outstanding packets on the Tx SRS. Once the limit
322 * is reached, Tx SRS will drop packets until the
323 * limit goes below the threshold.
324 */
325 uint32_t st_max_q_cnt; /* max. outstanding packets */
326 /*
327 * st_hiwat is used Tx serializer and bandwidth mode.
328 * This is the queue depth threshold upto which
329 * packets will get buffered with no flow-control
330 * back pressure applied to the caller. Once this
331 * threshold is reached, back pressure will be
332 * applied to the caller of mac_tx() (mac_tx() starts
333 * returning a cookie to indicate a blocked SRS).
334 * st_hiwat should always be lesser than or equal to
335 * st_max_q_cnt.
336 */
337 uint32_t st_hiwat; /* mblk cnt to apply flow control */
338 uint32_t st_lowat; /* mblk cnt to relieve flow control */
339 uint32_t st_hiwat_cnt; /* times blocked for Tx descs */
340 mac_tx_stats_t st_stat;
341 mac_capab_aggr_t st_capab_aggr;
342 /*
343 * st_soft_rings is used as an array to store aggr Tx soft
344 * rings. When aggr_find_tx_ring() returns a pseudo ring,
345 * the associated soft ring has to be found. st_soft_rings
346 * array stores the soft ring associated with a pseudo Tx
347 * ring and it can be accessed using the pseudo ring
348 * index (mr_index). Note that the ring index is unique
349 * for each ring in a group.
350 */
351 mac_soft_ring_t **st_soft_rings;
352 } mac_srs_tx_t;
353
354 /* Receive side Soft Ring Set */
355 typedef struct mac_srs_rx_s {
356 /*
357 * Upcall function for Rx processing when `SRST_NO_SOFT_RINGS` is set.
358 * Rx softring callbacks for non-bypass traffic should use the same
359 * function and initial argument.
360 * Argument 2 of `sr_func` would be a client-provided handle, but is
361 * always `NULL` in this context as SRSes themselves cannot be used as
362 * part of client polling.
363 *
364 * Protected by srs_lock + !SRS_PROC.
365 */
366 mac_direct_rx_t sr_func;
367 void *sr_arg1;
368
369 mac_rx_func_t sr_lower_proc; /* Atomically changed */
370 uint32_t sr_poll_pkt_cnt;
371 uint32_t sr_poll_thres;
372
373 /* mblk cnt to apply flow control */
374 uint32_t sr_hiwat;
375 /* mblk cnt to relieve flow control */
376 uint32_t sr_lowat;
377 mac_rx_stats_t sr_stat;
378
379 /* Times polling was enabled */
380 uint32_t sr_poll_on;
381 /* Times polling was enabled by worker thread */
382 uint32_t sr_worker_poll_on;
383 /* Times polling was disabled */
384 uint32_t sr_poll_off;
385 /* Poll thread signalled count */
386 uint32_t sr_poll_thr_sig;
387 /* Poll thread busy */
388 uint32_t sr_poll_thr_busy;
389 /* SRS drains, stays in poll mode but doesn't poll */
390 uint32_t sr_poll_drain_no_poll;
391 /*
392 * SRS has nothing to do and no packets in H/W but
393 * there is a backlog in softrings. SRS stays in
394 * poll mode but doesn't do polling.
395 */
396 uint32_t sr_poll_no_poll;
397 /* Active polling restarted */
398 uint32_t sr_below_hiwat;
399 /* Found packets in last poll so try and poll again */
400 uint32_t sr_poll_again;
401 /*
402 * Packets in queue but poll thread not allowed to process so
403 * signal the worker thread.
404 */
405 uint32_t sr_poll_sig_worker;
406 /*
407 * Poll thread has nothing to do and H/W has nothing so
408 * reenable the interrupts.
409 */
410 uint32_t sr_poll_intr_enable;
411 /*
412 * Poll thread has nothing to do and worker thread was already
413 * running so it can decide to reenable interrupt or poll again.
414 */
415 uint32_t sr_poll_goto_sleep;
416 /* Worker thread goes back to draining the queue */
417 uint32_t sr_drain_again;
418 /* More Packets in queue so signal the poll thread to drain */
419 uint32_t sr_drain_poll_sig;
420 /* More Packets in queue so signal the worker thread to drain */
421 uint32_t sr_drain_worker_sig;
422 /* Poll thread is already running so worker has nothing to do */
423 uint32_t sr_drain_poll_running;
424 /* We have packets already queued so keep polling */
425 uint32_t sr_drain_keep_polling;
426 /* Drain is done and interrupts are reenabled */
427 uint32_t sr_drain_finish_intr;
428 /* Polling thread needs to schedule worker wakeup */
429 uint32_t sr_poll_worker_wakeup;
430 } mac_srs_rx_t;
431
432 /*
433 * Flagset of immutable and slowly-varying aspects of a softring set (SRS).
434 *
435 * These identify mainly static characteristics (Tx/Rx, whether the SRS
436 * corresponds to the entrypoint on a MAC client) as well as state on an
437 * administrative timescale (fanout behaviour, bandwidth control).
438 *
439 * See the commentary on `mac_soft_ring_state_t` for commentary on gaps in the
440 * numbering of flags for this type.
441 */
442 enum mac_soft_ring_set_type {
443 /*
444 * The flow entry underpinning this SRS belongs to a MAC client for
445 * a link.
446 *
447 * Immutable.
448 */
449 SRST_LINK = 1 << 0,
450 /*
451 * The flow entry underpinning this SRS belongs to a flow classifier
452 * attached to a given MAC client.
453 *
454 * Immutable.
455 */
456 SRST_FLOW = 1 << 1,
457 /*
458 * This SRS does not have any softrings assigned.
459 *
460 * A Tx SRS has no rings and will send packets directly to the NIC,
461 * and an Rx SRS will handle packets inline via `sr_func`.
462 *
463 * Mutable for Tx SRSes.
464 */
465 SRST_NO_SOFT_RINGS = 1 << 2,
466 /*
467 * Set on all Rx SRSes when the tunable `mac_latency_optimize` is
468 * `true`.
469 *
470 * If set, packets may be processed inline by any caller who arrives
471 * with more packets to enqueue if there is no existing backlog.
472 * The worker thread will share a CPU binding with the poll thread.
473 * Wakeups sent to worker threads will be instantaneous (loopback,
474 * teardown, and bandwidth-controlled cases).
475 *
476 * If unset on an Rx SRS, packets may only be moved to softrings by the
477 * worker thread. `SRST_ENQUEUE` will also be set in this case.
478 *
479 * Immutable. Requires !`SRST_TX`.
480 */
481 SRST_LATENCY_OPT = 1 << 3,
482 /*
483 * This Rx SRS has softrings assigned, and has at least one per traffic
484 * class. Traffic must move to a softring for processing, but may still
485 * drain inline if the SRS is quiet.
486 *
487 * Immutable. Requires !`SRST_TX`. Mutually exclusive with
488 * `SRST_NO_SOFT_RINGS`.
489 */
490 SRST_FANOUT_PROTO = 1 << 4,
491 /*
492 * This receive SRS has more than one softring for each traffic class,
493 * and must hash/round-robin received packets amongst a class's rings.
494 *
495 * Mutable. Requires !`SRST_TX`.
496 */
497 SRST_FANOUT_SRC_IP = 1 << 5,
498 /*
499 * All softrings will be initialised with `ST_RING_WORKER_ONLY`.
500 *
501 * Set when `SRST_LATENCY_OPT` is disabled, or when the underlying ring
502 * requires `MAC_RING_RX_ENQUEUE` (sun4v).
503 *
504 * Immutable. Requires !`SRST_TX`.
505 */
506 SRST_ENQUEUE = 1 << 6,
507 /*
508 * The SRS's client is placed on the default group (either due to
509 * oversubscription, or the device admits only one group).
510 *
511 * A hardware classified ring of this type will receive additional
512 * traffic when moved into full or all-multicast promiscuous mode.
513 *
514 * Mutable. Requires !`SRST_TX`.
515 */
516 SRST_DEFAULT_GRP = 1 << 7,
517 /*
518 * If present, this is a transmit SRS. Otherwise it is a receive SRS.
519 *
520 * Transmit SRSes use softrings as mappings to underlying Tx rings
521 * from the hardware.
522 *
523 * The validity of `srs_tx`/`srs_rx` are gated on this flag, as are the
524 * choice of drain functions, enqueue behaviours, etc.
525 *
526 * Immutable.
527 */
528 SRST_TX = 1 << 8,
529 /*
530 * `srs_bw` is enabled, and the queue size and egress rate of this SRS
531 * are limited accordingly.
532 *
533 * Mutable.
534 */
535 SRST_BW_CONTROL = 1 << 9,
536 /*
537 * The SRS's MAC client has had a callback plumbed from IP to allow
538 * matching IPv4 packets to bypass DLS.
539 *
540 * When set, `ST_RING_TCP` and `ST_RING_UDP` must make use of this
541 * callback. The Rx path will send eligible traffic to these softrings
542 * in this case.
543 *
544 * Mutable under quiescence. Requires !`SRST_TX`.
545 */
546 SRST_DLS_BYPASS_V4 = 1 << 12,
547 /*
548 * The SRS's MAC client has had a callback plumbed from IP to allow
549 * matching IPv6 packets to bypass DLS.
550 *
551 * When set, `ST_RING_TCP6` and `ST_RING_UDP6` must make use of this
552 * callback. The Rx path will send eligible traffic to these softrings
553 * in this case.
554 *
555 * Mutable under quiescence. Requires !`SRST_TX`.
556 */
557 SRST_DLS_BYPASS_V6 = 1 << 13,
558 /*
559 * The underlying MAC client has had a `mac_resource_cb_t` plumbed down
560 * from IP for TCP/IPv4 classified traffic. MAC must inform IP of the
561 * addition, removal, and other state changes to any `ST_RING_TCP`
562 * softrings.
563 *
564 * Mutable under quiescence. Requires !`SRST_TX`.
565 */
566 SRST_CLIENT_POLL_V4 = 1 << 14,
567 /*
568 * The underlying MAC client has had a `mac_resource_cb_t` plumbed down
569 * from IP for TCP/IPv6 classified traffic. MAC must inform IP of the
570 * addition, removal, and other state changes to any `ST_RING_TCP6`
571 * softrings.
572 *
573 * Mutable under quiescence. Requires !`SRST_TX`.
574 */
575 SRST_CLIENT_POLL_V6 = 1 << 15,
576 };
577
578 /*
579 * Flagset reflecting the current state of datapath processing for a given SRS.
580 *
581 * See the commentary on `mac_soft_ring_state_t` for commentary on gaps in the
582 * numbering of flags for this type.
583 */
584 typedef enum {
585 /*
586 * This SRS's worker thread is explicitly bound to a single CPU.
587 */
588 SRS_WORKER_BOUND = 1 << 1,
589 /*
590 * This Rx SRS's poll thread is explicitly bound to a single CPU.
591 */
592 SRS_POLL_BOUND = 1 << 2,
593 /*
594 * This Rx SRS is created on top of (and has exclusive
595 * use of) a dedicated ring. When under sufficient load, MAC will
596 * disable interrupts and pull packets into the SRS by polling the
597 * NIC/ring, and will set `SRS_POLLING` when this is the case.
598 *
599 * This flag may be added/removed as SRSes move between
600 * hardware/software classification (e.g., if groups must be shared).
601 */
602 SRS_POLLING_CAPAB = 1 << 3,
603 /*
604 * A thread is currently processing packets from this SRS, and
605 * has relinquished its hold on `srs_lock` to allow more packets to be
606 * enqueued while it does so.
607 *
608 * SRS processing will always enqueue packets if set, with the
609 * expectation that whoever is draining the thread will continue to
610 * do so.
611 *
612 * Requires qualification of what thread is doing the processing: either
613 * `SRS_WORKER`, `SRS_PROC_FAST`, or `SRS_POLL_PROC`.
614 */
615 SRS_PROC = 1 << 4,
616 /*
617 * The Rx poll thread should request more packets from the underlying
618 * device.
619 *
620 * Requires `SRS_POLLING`.
621 */
622 SRS_GET_PKTS = 1 << 5,
623 /*
624 * This Rx SRS has been moved into poll mode. Interrupts from
625 * the underlying device are disabled, and the poll thread is
626 * exclusively responsible for moving packets into the SRS.
627 *
628 * Requires `SRS_POLLING_CAPAB`.
629 */
630 SRS_POLLING = 1 << 6,
631 /*
632 * The SRS worker thread currently holds `SRS_PROC`.
633 *
634 * Requires `SRS_PROC`.
635 */
636 SRS_WORKER = 1 << 8,
637 /*
638 * Packets have been enqueued on this TX SRS due to either flow control
639 * or a lack of Tx descriptors on the NIC.
640 */
641 SRS_ENQUEUED = 1 << 9,
642 /*
643 * `SRS_PROC` is held by the caller of `mac_rx_srs_process` (typically
644 * the interrupt context) and packets are being processed inline.
645 *
646 * Requires `SRS_PROC`.
647 */
648 SRS_PROC_FAST = 1 << 11,
649 /*
650 * The Rx SRS poll thread currently holds `SRS_PROC`.
651 *
652 * Requires `SRS_PROC`.
653 */
654 SRS_POLL_PROC = 1 << 12,
655 /*
656 * This Tx SRS has run out of descriptors on the underlying NIC.
657 *
658 * Any outbound packets will be queued until the underlying provider
659 * marks more descriptors as available via `mac_tx_ring_update`.
660 */
661 SRS_TX_BLOCKED = 1 << 13,
662 /*
663 * This Tx SRS is flow controlled: more than `st_hiwat` packets are
664 * currently enqueued.
665 *
666 * Any outbound packets will be enqueued, and drained by the SRS
667 * worker. Senders will receive a cookie -- they will be informed when
668 * any cookie is no longer flow controlled if they have registered a
669 * callback via `mac_client_tx_notify`.
670 */
671 SRS_TX_HIWAT = 1 << 14,
672 /*
673 * This Tx SRS has returned a cookie to at least one sender who has set
674 * `MAC_TX_NO_ENQUEUE` regardless of watermark state.
675 *
676 * When the SRS is drained, notify the client via its
677 * `mac_client_tx_notify` callback that it may send.
678 */
679 SRS_TX_WAKEUP_CLIENT = 1 << 15,
680 /*
681 * `SRS_PROC` is held by the SRS drain function, which is handling
682 * packets inline because it is of type `SRST_NO_SOFT_RINGS`.
683 *
684 * Requires `SRS_PROC`.
685 */
686 SRS_CLIENT_PROC = 1 << 16,
687 /*
688 * This SRS has been signalled to stop processing any packets.
689 *
690 * Downstack entrypoints (rings, flows) which can call into this SRS
691 * should be quiesced such that no more packets will be enqueued while
692 * this is set.
693 *
694 * The SRS worker thread will propagate the request to any softrings.
695 */
696 SRS_QUIESCE = 1 << 18,
697 /*
698 * The SRS has ceased processing any enqueued packets, the worker thread
699 * has finished quiescing any softrings and is awaiting a signal
700 * of either `SRS_CONDEMNED` or `SRS_RESTART` to wake up.
701 */
702 SRS_QUIESCE_DONE = 1 << 19,
703 /*
704 * This SRS is marked for deletion.
705 *
706 * Downstack entrypoints (rings, flows) which can call into this SRS
707 * should be quiesced such that no more packets will be enqueued while
708 * this is set.
709 *
710 * The SRS worker thread will propagate the request to any softrings.
711 */
712 SRS_CONDEMNED = 1 << 20,
713 /*
714 * The SRS worker has completed any teardown in response to
715 * `SRS_CONDEMNED`.
716 *
717 * Requires `SRS_CONDEMNED_DONE`.
718 */
719 SRS_CONDEMNED_DONE = 1 << 21,
720 /*
721 * This Rx SRS's poll thread has quiesced in response to `SRS_QUIESCE`.
722 */
723 SRS_POLL_THR_QUIESCED = 1 << 22,
724 /*
725 * The SRS has been signalled to resume processing traffic.
726 *
727 * The worker thread should unset this and any `QUIESCE` flags,
728 * propagate the request to softrings and the poll thread, and
729 * resume processing packets.
730 */
731 SRS_RESTART = 1 << 23,
732 /*
733 * The SRS has successfully restarted all of its softrings and poll
734 * thread, if present.
735 */
736 SRS_RESTART_DONE = 1 << 24,
737 /*
738 * This Rx SRS's worker thread has signalled the poll thread to resume
739 * in response to `SRS_RESTART`.
740 */
741 SRS_POLL_THR_RESTART = 1 << 25,
742 /*
743 * This SRS is part of the global list `mac_srs_g_list`. Its siblings
744 * are accessed via `srs_next` and `srs_prev`.
745 */
746 SRS_IN_GLIST = 1 << 26,
747 /*
748 * This Rx SRS's poll thread has terminated in response to
749 * `SRS_CONDEMN`.
750 */
751 SRS_POLL_THR_EXITED = 1 << 27,
752 /*
753 * This SRS is semi-permanently quiesced, and should not accept
754 * `SRS_RESTART` requests.
755 */
756 SRS_QUIESCE_PERM = 1 << 28,
757 } mac_soft_ring_set_state_t;
758
759 /*
760 * SRS fanout states.
761 *
762 * These are set during SRS initialisation and by the flow CPU init methods to
763 * indicate whether any work is needing done to adjust the softrings.
764 */
765 typedef enum {
766 /*
767 * This is a new SRS. Softrings have not yet been created.
768 */
769 SRS_FANOUT_UNINIT = 0,
770 /*
771 * The SRS's bindings and fanout count match the underlying CPU spec.
772 */
773 SRS_FANOUT_INIT,
774 /*
775 * CPU count and/or bindings have changed and the SRS needs to be
776 * modified accordingly.
777 */
778 SRS_FANOUT_REINIT
779 } mac_srs_fanout_state_t;
780
781 typedef void (*mac_srs_drain_proc_t)(mac_soft_ring_set_t *,
782 const mac_soft_ring_set_state_t);
783
784 /*
785 * mac_soft_ring_set_s:
786 * This is used both for Tx and Rx side. The srs_type identifies Rx or
787 * Tx type.
788 *
789 * Note that the structure is carefully crafted, with Rx elements coming
790 * first followed by Tx specific members. Future additions to this
791 * structure should follow the same guidelines.
792 *
793 * Rx-side notes:
794 * mac_rx_classify_flow_add() always creates a mac_soft_ring_set_t and fn_flow
795 * points to info from it (func = srs_lower_proc, arg = soft_ring_set). On
796 * interrupt path, srs_lower_proc does B/W adjustment and switch to polling mode
797 * (if poll capable) and feeds the packets to soft_ring_list via choosen
798 * fanout type (specified by srs_type). In poll mode, the poll thread which is
799 * also a pointer can pick up the packets and feed them to various
800 * soft_ring_list.
801 *
802 * The srs_type can either be protocol based or fanout based where fanout itelf
803 * can be various types
804 *
805 * The polling works by turning off interrupts as soon as a packets
806 * are queued on the soft ring set. Once the backlog is clear and poll
807 * thread return empty handed i.e. Rx ring doesn't have anything, the
808 * interrupt is turned back on. For this purpose we keep a separate
809 * srs_poll_pkt_cnt counter which tracks the packets queued between SRS
810 * and the soft rings as well. The counter is incremented when packets
811 * are queued and decremented when SRS processes them (in case it has
812 * no soft rings) or the soft ring process them. Its important that
813 * in case SRS has softrings, the decrement doesn't happen till the
814 * packet is processed by the soft rings since it takes very little time
815 * for SRS to queue packet from SRS to soft rings and it will keep
816 * bringing more packets in the system faster than soft rings can
817 * process them.
818 *
819 * Tx side notes:
820 * The srs structure acts as a serializer with a worker thread. The
821 * default behavior of srs though is to act as a pass-thru. The queues
822 * (srs_first, srs_last, srs_count) get used when Tx ring runs out of Tx
823 * descriptors or to enforce bandwidth limits.
824 *
825 * When multiple Tx rings are present, the SRS state will be set to
826 * SRS_FANOUT_OTH. Outgoing packets coming into mac_tx_srs_process()
827 * function will be fanned out to one of the Tx side soft rings based on
828 * a hint passed in mac_tx_srs_process(). Each soft ring, in turn, will
829 * be associated with a distinct h/w Tx ring.
830 */
831 struct mac_soft_ring_set_s {
832 /*
833 * Common elements, common to both Rx and Tx SRS type.
834 * The following block of fields are protected by srs_lock
835 */
836 kmutex_t srs_lock;
837 mac_soft_ring_set_type_t srs_type;
838 mac_soft_ring_set_state_t srs_state;
839
840 /*
841 * The SRS's packet queue.
842 */
843 mblk_t *srs_first; /* first mblk chain or NULL */
844 mblk_t *srs_last; /* last mblk chain or NULL */
845 size_t srs_size; /* Size of packets queued in bytes */
846 uint32_t srs_count;
847
848 kcondvar_t srs_async; /* cv for worker thread */
849 kcondvar_t srs_cv; /* cv for poll thread */
850 timeout_id_t srs_tid; /* timeout id for pending timeout */
851
852 /*
853 * List of soft rings & processing function.
854 * The following block is protected by Rx quiescence.
855 * i.e. they can be changed only after quiescing the SRS
856 * Protected by srs_lock.
857 */
858 mac_soft_ring_t *srs_soft_ring_head;
859 mac_soft_ring_t *srs_soft_ring_tail;
860 int srs_soft_ring_count;
861 int srs_soft_ring_quiesced_count;
862 int srs_soft_ring_condemned_count;
863
864 kcondvar_t srs_quiesce_done_cv; /* cv for removal */
865
866 mac_soft_ring_t **srs_tcp_soft_rings;
867 mac_soft_ring_t **srs_udp_soft_rings;
868 mac_soft_ring_t **srs_tcp6_soft_rings;
869 mac_soft_ring_t **srs_udp6_soft_rings;
870 mac_soft_ring_t **srs_oth_soft_rings;
871 /*
872 * srs_tx_soft_rings is used by tx_srs in
873 * when operating in multi tx ring mode.
874 */
875 mac_soft_ring_t **srs_tx_soft_rings;
876 int srs_tcp_ring_count;
877 int srs_udp_ring_count;
878 int srs_tcp6_ring_count;
879 int srs_udp6_ring_count;
880 int srs_oth_ring_count;
881 int srs_tx_ring_count;
882
883 /*
884 * Bandwidth control related members.
885 */
886 mac_bw_ctl_t *srs_bw; /* WO */
887
888 /*
889 * Priority assignment for poll/worker threads for this SRS and its
890 * softrings.
891 */
892 pri_t srs_pri; /* srs_lock */
893
894 mac_soft_ring_set_t *srs_next; /* mac_srs_g_lock */
895 mac_soft_ring_set_t *srs_prev; /* mac_srs_g_lock */
896
897 /* Attribute specific drain func (BW ctl vs non-BW ctl) */
898 mac_srs_drain_proc_t srs_drain_func; /* srs_lock(Rx), Quiesce(tx) */
899
900 /*
901 * If the associated ring is exclusively used by a mac client, e.g.,
902 * an aggregation, this fields is used to keep a reference to the
903 * MAC client's pseudo ring.
904 */
905 mac_resource_handle_t srs_mrh;
906 /*
907 * The following blocks are write once (WO) and valid for the life
908 * of the SRS
909 */
910 mac_client_impl_t *srs_mcip; /* back ptr to mac client */
911 flow_entry_t *srs_flent; /* back ptr to flent */
912 mac_ring_t *srs_ring; /* Ring Descriptor */
913
914 kthread_t *srs_worker; /* WO, worker thread */
915 kthread_t *srs_poll_thr; /* WO, poll thread */
916
917 uint_t srs_ind; /* Round Robin indx for picking up SR */
918 processorid_t srs_worker_cpuid; /* processor to bind to */
919 processorid_t srs_worker_cpuid_save; /* saved cpuid during offline */
920 processorid_t srs_poll_cpuid; /* processor to bind to */
921 processorid_t srs_poll_cpuid_save; /* saved cpuid during offline */
922 mac_srs_fanout_state_t srs_fanout_state;
923 mac_cpus_t srs_cpu;
924
925 mac_srs_rx_t srs_rx;
926 mac_srs_tx_t srs_tx;
927 kstat_t *srs_ksp;
928 };
929
930 static inline boolean_t
mac_srs_is_bw_controlled(const mac_soft_ring_set_t * srs)931 mac_srs_is_bw_controlled(const mac_soft_ring_set_t *srs)
932 {
933 return ((srs->srs_type & SRST_BW_CONTROL) != 0);
934 }
935
936 /*
937 * The total number of softring protocol lanes: TCP, TCP6, UDP, UDP6, OTH.
938 */
939 #define ST_RING_NUM_PROTO 5
940
941 /*
942 * arguments for processors to bind to
943 */
944 #define S_RING_BIND_NONE -1
945
946 #define SRS_QUIESCED(srs) (srs->srs_state & SRS_QUIESCE_DONE)
947
948 /*
949 * If the SRS_QUIESCE_PERM flag is set, the SRS worker thread will not be
950 * able to be restarted.
951 */
952 #define SRS_QUIESCED_PERMANENT(srs) (srs->srs_state & SRS_QUIESCE_PERM)
953
954 /*
955 * Structure for dls statistics
956 */
957 struct dls_kstats {
958 kstat_named_t dlss_soft_ring_pkt_drop;
959 };
960
961 extern struct dls_kstats dls_kstat;
962
963 #define DLS_BUMP_STAT(x, y) (dls_kstat.x.value.ui32 += y)
964
965 /* Turn dynamic polling off */
966 #define MAC_SRS_POLLING_OFF(mac_srs) { \
967 ASSERT(MUTEX_HELD(&(mac_srs)->srs_lock)); \
968 if (((mac_srs)->srs_state & (SRS_POLLING_CAPAB|SRS_POLLING)) == \
969 (SRS_POLLING_CAPAB|SRS_POLLING)) { \
970 (mac_srs)->srs_state &= ~SRS_POLLING; \
971 (void) mac_hwring_enable_intr((mac_ring_handle_t) \
972 (mac_srs)->srs_ring); \
973 (mac_srs)->srs_rx.sr_poll_off++; \
974 } \
975 }
976
977 #define MAC_COUNT_CHAIN(mac_srs, head, tail, cnt, sz) { \
978 mblk_t *tmp; \
979 const boolean_t bw_ctl = mac_srs_is_bw_controlled(mac_srs); \
980 \
981 ASSERT((head) != NULL); \
982 cnt = 0; \
983 sz = 0; \
984 tmp = tail = (head); \
985 if ((head)->b_next == NULL) { \
986 cnt = 1; \
987 if (bw_ctl) \
988 sz += msgdsize(head); \
989 } else { \
990 while (tmp != NULL) { \
991 tail = tmp; \
992 cnt++; \
993 if (bw_ctl) \
994 sz += msgdsize(tmp); \
995 tmp = tmp->b_next; \
996 } \
997 } \
998 }
999
1000 /*
1001 * Decrement the cumulative packet count in SRS and its
1002 * soft rings. If the srs_poll_pkt_cnt goes below lowat, then check
1003 * if if the interface was left in a polling mode and no one
1004 * is really processing the queue (to get the interface out
1005 * of poll mode). If no one is processing the queue, then
1006 * acquire the PROC and signal the poll thread to check the
1007 * interface for packets and get the interface back to interrupt
1008 * mode if nothing is found.
1009 */
1010 #define MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt) { \
1011 mac_srs_rx_t *srs_rx = &(mac_srs)->srs_rx; \
1012 ASSERT(MUTEX_HELD(&(mac_srs)->srs_lock)); \
1013 \
1014 srs_rx->sr_poll_pkt_cnt -= cnt; \
1015 if ((srs_rx->sr_poll_pkt_cnt <= srs_rx->sr_poll_thres) && \
1016 (((mac_srs)->srs_state & \
1017 (SRS_POLLING|SRS_PROC|SRS_GET_PKTS)) == SRS_POLLING)) \
1018 { \
1019 (mac_srs)->srs_state |= (SRS_PROC|SRS_GET_PKTS); \
1020 cv_signal(&(mac_srs)->srs_cv); \
1021 srs_rx->sr_below_hiwat++; \
1022 } \
1023 }
1024
1025 #define MAC_TX_SOFT_RINGS(mac_srs) ((mac_srs)->srs_tx_ring_count >= 1)
1026
1027 /* Soft ring flags for teardown */
1028 #define SRS_POLL_THR_OWNER (SRS_PROC | SRS_POLLING | SRS_GET_PKTS)
1029 #define SRS_PAUSE (SRS_CONDEMNED | SRS_QUIESCE)
1030 #define S_RING_PAUSE (S_RING_CONDEMNED | S_RING_QUIESCE)
1031
1032 /* Soft rings */
1033 extern void mac_soft_ring_init(void);
1034 extern void mac_soft_ring_finish(void);
1035 extern void mac_fanout_setup(mac_client_impl_t *, flow_entry_t *,
1036 mac_resource_props_t *, mac_direct_rx_t, void *, cpupart_t *);
1037
1038 extern void mac_soft_ring_worker_wakeup(mac_soft_ring_t *);
1039 extern mblk_t *mac_soft_ring_poll(mac_soft_ring_t *, size_t);
1040 extern void mac_soft_ring_dls_bypass_enable(mac_soft_ring_t *, mac_direct_rx_t,
1041 void *);
1042 extern void mac_soft_ring_dls_bypass_disable(mac_soft_ring_t *,
1043 mac_client_impl_t *);
1044 extern void mac_soft_ring_poll_enable(mac_soft_ring_t *, mac_direct_rx_t,
1045 void *, mac_resource_cb_t *, uint32_t);
1046 extern void mac_soft_ring_poll_disable(mac_soft_ring_t *, mac_resource_cb_t *,
1047 mac_client_impl_t *);
1048
1049 /* SRS */
1050 extern void mac_srs_free(mac_soft_ring_set_t *);
1051 extern void mac_srs_signal(mac_soft_ring_set_t *,
1052 const mac_soft_ring_set_state_t);
1053
1054 extern void mac_rx_srs_retarget_intr(mac_soft_ring_set_t *, processorid_t);
1055 extern void mac_tx_srs_retarget_intr(mac_soft_ring_set_t *);
1056
1057 extern void mac_srs_client_poll_enable(mac_client_impl_t *,
1058 mac_soft_ring_set_t *, boolean_t);
1059 extern void mac_srs_client_poll_disable(mac_client_impl_t *,
1060 mac_soft_ring_set_t *, boolean_t);
1061 extern void mac_srs_client_poll_quiesce(mac_client_impl_t *,
1062 mac_soft_ring_set_t *);
1063 extern void mac_srs_client_poll_restart(mac_client_impl_t *,
1064 mac_soft_ring_set_t *);
1065 extern void mac_rx_srs_quiesce(mac_soft_ring_set_t *,
1066 const mac_soft_ring_set_state_t);
1067 extern void mac_rx_srs_restart(mac_soft_ring_set_t *);
1068 extern void mac_tx_srs_quiesce(mac_soft_ring_set_t *,
1069 const mac_soft_ring_set_state_t);
1070
1071 /* Tx SRS, Tx softring */
1072 extern void mac_tx_srs_wakeup(mac_soft_ring_set_t *, mac_ring_handle_t);
1073 extern void mac_tx_srs_setup(mac_client_impl_t *, flow_entry_t *);
1074 extern mac_tx_func_t mac_tx_get_func(uint32_t);
1075 extern mblk_t *mac_tx_send(mac_client_impl_t *, mac_ring_t *, mblk_t *,
1076 mac_tx_stats_t *);
1077 extern boolean_t mac_tx_srs_ring_present(mac_soft_ring_set_t *, mac_ring_t *);
1078 extern mac_soft_ring_t *mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *,
1079 mac_ring_t *);
1080 extern void mac_tx_srs_add_ring(mac_soft_ring_set_t *, mac_ring_t *);
1081 extern void mac_tx_srs_del_ring(mac_soft_ring_set_t *, mac_ring_t *);
1082 extern mac_tx_cookie_t mac_tx_srs_no_desc(mac_soft_ring_set_t *, mblk_t *,
1083 uint16_t, mblk_t **);
1084
1085 /* Subflow specific stuff */
1086 extern void mac_srs_update_bwlimit(flow_entry_t *, mac_resource_props_t *);
1087 extern void mac_update_srs_priority(mac_soft_ring_set_t *, pri_t);
1088 extern void mac_client_update_classifier(mac_client_impl_t *, boolean_t);
1089 extern void mac_rx_srs_subflow_process(void *, mac_resource_handle_t, mblk_t *,
1090 boolean_t);
1091
1092 /* Resource callbacks for clients */
1093 extern int mac_soft_ring_intr_enable(void *);
1094 extern boolean_t mac_soft_ring_intr_disable(void *);
1095 extern cpu_t *mac_soft_ring_bind(mac_soft_ring_t *, processorid_t);
1096 extern void mac_soft_ring_unbind(mac_soft_ring_t *);
1097
1098 extern mac_soft_ring_t *mac_soft_ring_create_rx(int, clock_t,
1099 const mac_soft_ring_state_t, pri_t, mac_client_impl_t *,
1100 mac_soft_ring_set_t *, processorid_t, mac_direct_rx_t, void *);
1101 extern mac_soft_ring_t *mac_soft_ring_create_tx(int, clock_t,
1102 const mac_soft_ring_state_t, pri_t, mac_client_impl_t *,
1103 mac_soft_ring_set_t *, processorid_t, mac_ring_t *);
1104 extern void mac_soft_ring_free(mac_soft_ring_t *);
1105 extern void mac_soft_ring_signal(mac_soft_ring_t *,
1106 const mac_soft_ring_state_t);
1107 extern void mac_rx_soft_ring_process(mac_client_impl_t *, mac_soft_ring_t *,
1108 mblk_t *, mblk_t *, int, size_t);
1109 extern mac_tx_cookie_t mac_tx_soft_ring_process(mac_soft_ring_t *,
1110 mblk_t *, uint16_t, mblk_t **);
1111 extern void mac_srs_worker_quiesce(mac_soft_ring_set_t *);
1112 extern void mac_srs_worker_restart(mac_soft_ring_set_t *);
1113
1114 extern void mac_rx_srs_drain_bw(mac_soft_ring_set_t *,
1115 const mac_soft_ring_set_state_t);
1116 extern void mac_rx_srs_drain(mac_soft_ring_set_t *,
1117 const mac_soft_ring_set_state_t);
1118 extern void mac_rx_srs_process(void *, mac_resource_handle_t, mblk_t *,
1119 boolean_t);
1120 extern void mac_srs_worker(mac_soft_ring_set_t *);
1121 extern void mac_rx_srs_poll_ring(mac_soft_ring_set_t *);
1122 extern void mac_tx_srs_drain(mac_soft_ring_set_t *,
1123 const mac_soft_ring_set_state_t);
1124
1125 extern void mac_tx_srs_restart(mac_soft_ring_set_t *);
1126 extern void mac_rx_srs_remove(mac_soft_ring_set_t *);
1127
1128 #ifdef __cplusplus
1129 }
1130 #endif
1131
1132 #endif /* _SYS_MAC_SOFT_RING_H */
1133