17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5e824d57fSjohnlev * Common Development and Distribution License (the "License").
6e824d57fSjohnlev * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
229ee3959aSAnders Persson * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate */
247c478bd9Sstevel@tonic-gate
257c478bd9Sstevel@tonic-gate /*
26*1ddb55e6SBryan Cantrill * Copyright 2012 Joyent, Inc. All rights reserved.
27*1ddb55e6SBryan Cantrill */
28*1ddb55e6SBryan Cantrill
29*1ddb55e6SBryan Cantrill /*
30da14cebeSEric Cheng * Squeues: General purpose serialization mechanism
31da14cebeSEric Cheng * ------------------------------------------------
327c478bd9Sstevel@tonic-gate *
33da14cebeSEric Cheng * Background:
34da14cebeSEric Cheng * -----------
357c478bd9Sstevel@tonic-gate *
36da14cebeSEric Cheng * This is a general purpose high-performance serialization mechanism
37da14cebeSEric Cheng * currently used by TCP/IP. It is implement by means of a per CPU queue,
38da14cebeSEric Cheng * a worker thread and a polling thread with are bound to the CPU
39da14cebeSEric Cheng * associated with the squeue. The squeue is strictly FIFO for both read
40da14cebeSEric Cheng * and write side and only one thread can process it at any given time.
41da14cebeSEric Cheng * The design goal of squeue was to offer a very high degree of
42da14cebeSEric Cheng * parallelization (on a per H/W execution pipeline basis) with at
43da14cebeSEric Cheng * most one queuing.
447c478bd9Sstevel@tonic-gate *
45bd670b35SErik Nordmark * The modules needing protection typically calls SQUEUE_ENTER_ONE() or
46bd670b35SErik Nordmark * SQUEUE_ENTER() macro as soon as a thread enter the module
47da14cebeSEric Cheng * from either direction. For each packet, the processing function
48da14cebeSEric Cheng * and argument is stored in the mblk itself. When the packet is ready
49da14cebeSEric Cheng * to be processed, the squeue retrieves the stored function and calls
50da14cebeSEric Cheng * it with the supplied argument and the pointer to the packet itself.
51da14cebeSEric Cheng * The called function can assume that no other thread is processing
52da14cebeSEric Cheng * the squeue when it is executing.
537c478bd9Sstevel@tonic-gate *
54da14cebeSEric Cheng * Squeue/connection binding:
55da14cebeSEric Cheng * --------------------------
567c478bd9Sstevel@tonic-gate *
57da14cebeSEric Cheng * TCP/IP uses an IP classifier in conjunction with squeue where specific
58da14cebeSEric Cheng * connections are assigned to specific squeue (based on various policies),
59da14cebeSEric Cheng * at the connection creation time. Once assigned, the connection to
60da14cebeSEric Cheng * squeue mapping is never changed and all future packets for that
61da14cebeSEric Cheng * connection are processed on that squeue. The connection ("conn") to
62da14cebeSEric Cheng * squeue mapping is stored in "conn_t" member "conn_sqp".
637c478bd9Sstevel@tonic-gate *
64da14cebeSEric Cheng * Since the processing of the connection cuts across multiple layers
65da14cebeSEric Cheng * but still allows packets for different connnection to be processed on
66da14cebeSEric Cheng * other CPU/squeues, squeues are also termed as "Vertical Perimeter" or
67da14cebeSEric Cheng * "Per Connection Vertical Perimeter".
687c478bd9Sstevel@tonic-gate *
69da14cebeSEric Cheng * Processing Model:
70da14cebeSEric Cheng * -----------------
717c478bd9Sstevel@tonic-gate *
72da14cebeSEric Cheng * Squeue doesn't necessary processes packets with its own worker thread.
73da14cebeSEric Cheng * The callers can pick if they just want to queue the packet, process
74da14cebeSEric Cheng * their packet if nothing is queued or drain and process. The first two
75da14cebeSEric Cheng * modes are typically employed when the packet was generated while
76da14cebeSEric Cheng * already doing the processing behind the squeue and last mode (drain
77da14cebeSEric Cheng * and process) is typically employed when the thread is entering squeue
78da14cebeSEric Cheng * for the first time. The squeue still imposes a finite time limit
79da14cebeSEric Cheng * for which a external thread can do processing after which it switches
80da14cebeSEric Cheng * processing to its own worker thread.
817c478bd9Sstevel@tonic-gate *
82da14cebeSEric Cheng * Once created, squeues are never deleted. Hence squeue pointers are
83da14cebeSEric Cheng * always valid. This means that functions outside the squeue can still
84da14cebeSEric Cheng * refer safely to conn_sqp and their is no need for ref counts.
857c478bd9Sstevel@tonic-gate *
86da14cebeSEric Cheng * Only a thread executing in the squeue can change the squeue of the
87da14cebeSEric Cheng * connection. It does so by calling a squeue framework function to do this.
88da14cebeSEric Cheng * After changing the squeue, the thread must leave the squeue. It must not
89da14cebeSEric Cheng * continue to execute any code that needs squeue protection.
907c478bd9Sstevel@tonic-gate *
91da14cebeSEric Cheng * The squeue framework, after entering the squeue, checks if the current
92da14cebeSEric Cheng * squeue matches the conn_sqp. If the check fails, the packet is delivered
93da14cebeSEric Cheng * to right squeue.
947c478bd9Sstevel@tonic-gate *
95da14cebeSEric Cheng * Polling Model:
96da14cebeSEric Cheng * --------------
977c478bd9Sstevel@tonic-gate *
98da14cebeSEric Cheng * Squeues can control the rate of packet arrival into itself from the
99da14cebeSEric Cheng * NIC or specific Rx ring within a NIC. As part of capability negotiation
100da14cebeSEric Cheng * between IP and MAC layer, squeue are created for each TCP soft ring
101da14cebeSEric Cheng * (or TCP Rx ring - to be implemented in future). As part of this
102da14cebeSEric Cheng * negotiation, squeues get a cookie for underlying soft ring or Rx
103da14cebeSEric Cheng * ring, a function to turn off incoming packets and a function to call
104da14cebeSEric Cheng * to poll for packets. This helps schedule the receive side packet
105da14cebeSEric Cheng * processing so that queue backlog doesn't build up and packet processing
106da14cebeSEric Cheng * doesn't keep getting disturbed by high priority interrupts. As part
107da14cebeSEric Cheng * of this mode, as soon as a backlog starts building, squeue turns off
108da14cebeSEric Cheng * the interrupts and switches to poll mode. In poll mode, when poll
109da14cebeSEric Cheng * thread goes down to retrieve packets, it retrieves them in the form of
110da14cebeSEric Cheng * a chain which improves performance even more. As the squeue/softring
111da14cebeSEric Cheng * system gets more packets, it gets more efficient by switching to
112da14cebeSEric Cheng * polling more often and dealing with larger packet chains.
1137c478bd9Sstevel@tonic-gate *
1147c478bd9Sstevel@tonic-gate */
1157c478bd9Sstevel@tonic-gate
1167c478bd9Sstevel@tonic-gate #include <sys/types.h>
1177c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
1187c478bd9Sstevel@tonic-gate #include <sys/debug.h>
1197c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
1207c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
1217c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h>
1227c478bd9Sstevel@tonic-gate #include <sys/systm.h>
1237c478bd9Sstevel@tonic-gate #include <sys/callb.h>
1247c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
1257c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
126da14cebeSEric Cheng #include <sys/sunddi.h>
127*1ddb55e6SBryan Cantrill #include <sys/stack.h>
128*1ddb55e6SBryan Cantrill #include <sys/archsystm.h>
1297c478bd9Sstevel@tonic-gate
1307c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
131d045b987Smasputra #include <inet/udp_impl.h>
1327c478bd9Sstevel@tonic-gate
1337c478bd9Sstevel@tonic-gate #include <sys/squeue_impl.h>
1347c478bd9Sstevel@tonic-gate
1357c478bd9Sstevel@tonic-gate static void squeue_fire(void *);
136d19d6468Sbw static void squeue_drain(squeue_t *, uint_t, hrtime_t);
1377c478bd9Sstevel@tonic-gate static void squeue_worker(squeue_t *sqp);
138da14cebeSEric Cheng static void squeue_polling_thread(squeue_t *sqp);
1397c478bd9Sstevel@tonic-gate
1407c478bd9Sstevel@tonic-gate kmem_cache_t *squeue_cache;
1417c478bd9Sstevel@tonic-gate
142d19d6468Sbw #define SQUEUE_MSEC_TO_NSEC 1000000
143d19d6468Sbw
144da14cebeSEric Cheng int squeue_drain_ms = 20;
145da14cebeSEric Cheng int squeue_workerwait_ms = 0;
1467c478bd9Sstevel@tonic-gate
147d19d6468Sbw /* The values above converted to ticks or nano seconds */
148da14cebeSEric Cheng static int squeue_drain_ns = 0;
1497c478bd9Sstevel@tonic-gate static int squeue_workerwait_tick = 0;
1507c478bd9Sstevel@tonic-gate
151*1ddb55e6SBryan Cantrill uintptr_t squeue_drain_stack_needed = 10240;
152*1ddb55e6SBryan Cantrill uint_t squeue_drain_stack_toodeep;
153*1ddb55e6SBryan Cantrill
154da14cebeSEric Cheng #define MAX_BYTES_TO_PICKUP 150000
1557c478bd9Sstevel@tonic-gate
1567c478bd9Sstevel@tonic-gate #define ENQUEUE_CHAIN(sqp, mp, tail, cnt) { \
1577c478bd9Sstevel@tonic-gate /* \
1587c478bd9Sstevel@tonic-gate * Enqueue our mblk chain. \
1597c478bd9Sstevel@tonic-gate */ \
1607c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&(sqp)->sq_lock)); \
1617c478bd9Sstevel@tonic-gate \
1627c478bd9Sstevel@tonic-gate if ((sqp)->sq_last != NULL) \
1637c478bd9Sstevel@tonic-gate (sqp)->sq_last->b_next = (mp); \
1647c478bd9Sstevel@tonic-gate else \
1657c478bd9Sstevel@tonic-gate (sqp)->sq_first = (mp); \
1667c478bd9Sstevel@tonic-gate (sqp)->sq_last = (tail); \
1677c478bd9Sstevel@tonic-gate (sqp)->sq_count += (cnt); \
1687c478bd9Sstevel@tonic-gate ASSERT((sqp)->sq_count > 0); \
1697c478bd9Sstevel@tonic-gate DTRACE_PROBE4(squeue__enqueuechain, squeue_t *, sqp, \
1707c478bd9Sstevel@tonic-gate mblk_t *, mp, mblk_t *, tail, int, cnt); \
1717c478bd9Sstevel@tonic-gate \
1727c478bd9Sstevel@tonic-gate }
1737c478bd9Sstevel@tonic-gate
174efe28d82SRajagopal Kunhappan /*
175efe28d82SRajagopal Kunhappan * Blank the receive ring (in this case it is the soft ring). When
176efe28d82SRajagopal Kunhappan * blanked, the soft ring will not send any more packets up.
177efe28d82SRajagopal Kunhappan * Blanking may not succeed when there is a CPU already in the soft
178efe28d82SRajagopal Kunhappan * ring sending packets up. In that case, SQS_POLLING will not be
179efe28d82SRajagopal Kunhappan * set.
180efe28d82SRajagopal Kunhappan */
181da14cebeSEric Cheng #define SQS_POLLING_ON(sqp, sq_poll_capable, rx_ring) { \
1827c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&(sqp)->sq_lock)); \
183da14cebeSEric Cheng if (sq_poll_capable) { \
184da14cebeSEric Cheng ASSERT(rx_ring != NULL); \
185da14cebeSEric Cheng ASSERT(sqp->sq_state & SQS_POLL_CAPAB); \
186da14cebeSEric Cheng if (!(sqp->sq_state & SQS_POLLING)) { \
187efe28d82SRajagopal Kunhappan if (rx_ring->rr_intr_disable(rx_ring->rr_intr_handle)) \
188da14cebeSEric Cheng sqp->sq_state |= SQS_POLLING; \
189da14cebeSEric Cheng } \
190da14cebeSEric Cheng } \
1917c478bd9Sstevel@tonic-gate }
1927c478bd9Sstevel@tonic-gate
193da14cebeSEric Cheng #define SQS_POLLING_OFF(sqp, sq_poll_capable, rx_ring) { \
1947c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&(sqp)->sq_lock)); \
195da14cebeSEric Cheng if (sq_poll_capable) { \
196da14cebeSEric Cheng ASSERT(rx_ring != NULL); \
197da14cebeSEric Cheng ASSERT(sqp->sq_state & SQS_POLL_CAPAB); \
198da14cebeSEric Cheng if (sqp->sq_state & SQS_POLLING) { \
199da14cebeSEric Cheng sqp->sq_state &= ~SQS_POLLING; \
200da14cebeSEric Cheng rx_ring->rr_intr_enable(rx_ring->rr_intr_handle); \
201da14cebeSEric Cheng } \
202da14cebeSEric Cheng } \
2037c478bd9Sstevel@tonic-gate }
2047c478bd9Sstevel@tonic-gate
205efe28d82SRajagopal Kunhappan /* Wakeup poll thread only if SQS_POLLING is set */
206efe28d82SRajagopal Kunhappan #define SQS_POLL_RING(sqp) { \
207da14cebeSEric Cheng ASSERT(MUTEX_HELD(&(sqp)->sq_lock)); \
208efe28d82SRajagopal Kunhappan if (sqp->sq_state & SQS_POLLING) { \
209da14cebeSEric Cheng ASSERT(sqp->sq_state & SQS_POLL_CAPAB); \
210da14cebeSEric Cheng if (!(sqp->sq_state & SQS_GET_PKTS)) { \
211da14cebeSEric Cheng sqp->sq_state |= SQS_GET_PKTS; \
212da14cebeSEric Cheng cv_signal(&sqp->sq_poll_cv); \
213da14cebeSEric Cheng } \
214da14cebeSEric Cheng } \
215da14cebeSEric Cheng }
216da14cebeSEric Cheng
217da14cebeSEric Cheng #ifdef DEBUG
218da14cebeSEric Cheng #define SQUEUE_DBG_SET(sqp, mp, proc, connp, tag) { \
219da14cebeSEric Cheng (sqp)->sq_curmp = (mp); \
220da14cebeSEric Cheng (sqp)->sq_curproc = (proc); \
221da14cebeSEric Cheng (sqp)->sq_connp = (connp); \
222da14cebeSEric Cheng (mp)->b_tag = (sqp)->sq_tag = (tag); \
223da14cebeSEric Cheng }
224da14cebeSEric Cheng
225da14cebeSEric Cheng #define SQUEUE_DBG_CLEAR(sqp) { \
226da14cebeSEric Cheng (sqp)->sq_curmp = NULL; \
227da14cebeSEric Cheng (sqp)->sq_curproc = NULL; \
228da14cebeSEric Cheng (sqp)->sq_connp = NULL; \
229da14cebeSEric Cheng }
230da14cebeSEric Cheng #else
231da14cebeSEric Cheng #define SQUEUE_DBG_SET(sqp, mp, proc, connp, tag)
232da14cebeSEric Cheng #define SQUEUE_DBG_CLEAR(sqp)
233da14cebeSEric Cheng #endif
234da14cebeSEric Cheng
2357c478bd9Sstevel@tonic-gate void
squeue_init(void)2367c478bd9Sstevel@tonic-gate squeue_init(void)
2377c478bd9Sstevel@tonic-gate {
2387c478bd9Sstevel@tonic-gate squeue_cache = kmem_cache_create("squeue_cache",
2397c478bd9Sstevel@tonic-gate sizeof (squeue_t), 64, NULL, NULL, NULL, NULL, NULL, 0);
2407c478bd9Sstevel@tonic-gate
241da14cebeSEric Cheng squeue_drain_ns = squeue_drain_ms * SQUEUE_MSEC_TO_NSEC;
2427c478bd9Sstevel@tonic-gate squeue_workerwait_tick = MSEC_TO_TICK_ROUNDUP(squeue_workerwait_ms);
2437c478bd9Sstevel@tonic-gate }
2447c478bd9Sstevel@tonic-gate
2457c478bd9Sstevel@tonic-gate /* ARGSUSED */
2467c478bd9Sstevel@tonic-gate squeue_t *
squeue_create(clock_t wait,pri_t pri)247da14cebeSEric Cheng squeue_create(clock_t wait, pri_t pri)
2487c478bd9Sstevel@tonic-gate {
2497c478bd9Sstevel@tonic-gate squeue_t *sqp = kmem_cache_alloc(squeue_cache, KM_SLEEP);
2507c478bd9Sstevel@tonic-gate
2517c478bd9Sstevel@tonic-gate bzero(sqp, sizeof (squeue_t));
252da14cebeSEric Cheng sqp->sq_bind = PBIND_NONE;
253da14cebeSEric Cheng sqp->sq_priority = pri;
2547c478bd9Sstevel@tonic-gate sqp->sq_wait = MSEC_TO_TICK(wait);
2557c478bd9Sstevel@tonic-gate sqp->sq_worker = thread_create(NULL, 0, squeue_worker,
2567c478bd9Sstevel@tonic-gate sqp, 0, &p0, TS_RUN, pri);
2577c478bd9Sstevel@tonic-gate
258da14cebeSEric Cheng sqp->sq_poll_thr = thread_create(NULL, 0, squeue_polling_thread,
259da14cebeSEric Cheng sqp, 0, &p0, TS_RUN, pri);
260da14cebeSEric Cheng
261da14cebeSEric Cheng sqp->sq_enter = squeue_enter;
262da14cebeSEric Cheng sqp->sq_drain = squeue_drain;
263da14cebeSEric Cheng
2647c478bd9Sstevel@tonic-gate return (sqp);
2657c478bd9Sstevel@tonic-gate }
2667c478bd9Sstevel@tonic-gate
267da14cebeSEric Cheng /*
268da14cebeSEric Cheng * Bind squeue worker thread to the specified CPU, given by CPU id.
269da14cebeSEric Cheng * If the CPU id value is -1, bind the worker thread to the value
270da14cebeSEric Cheng * specified in sq_bind field. If a thread is already bound to a
271da14cebeSEric Cheng * different CPU, unbind it from the old CPU and bind to the new one.
272da14cebeSEric Cheng */
273da14cebeSEric Cheng
2747c478bd9Sstevel@tonic-gate void
squeue_bind(squeue_t * sqp,processorid_t bind)2757c478bd9Sstevel@tonic-gate squeue_bind(squeue_t *sqp, processorid_t bind)
2767c478bd9Sstevel@tonic-gate {
2777c478bd9Sstevel@tonic-gate mutex_enter(&sqp->sq_lock);
278da14cebeSEric Cheng ASSERT(sqp->sq_bind != PBIND_NONE || bind != PBIND_NONE);
279da14cebeSEric Cheng ASSERT(MUTEX_HELD(&cpu_lock));
280da14cebeSEric Cheng
2817c478bd9Sstevel@tonic-gate if (sqp->sq_state & SQS_BOUND) {
282da14cebeSEric Cheng if (sqp->sq_bind == bind) {
2837c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
2847c478bd9Sstevel@tonic-gate return;
2857c478bd9Sstevel@tonic-gate }
286da14cebeSEric Cheng thread_affinity_clear(sqp->sq_worker);
287da14cebeSEric Cheng } else {
2887c478bd9Sstevel@tonic-gate sqp->sq_state |= SQS_BOUND;
289da14cebeSEric Cheng }
290da14cebeSEric Cheng
291da14cebeSEric Cheng if (bind != PBIND_NONE)
292da14cebeSEric Cheng sqp->sq_bind = bind;
2937c478bd9Sstevel@tonic-gate
2947c478bd9Sstevel@tonic-gate thread_affinity_set(sqp->sq_worker, sqp->sq_bind);
295da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
2967c478bd9Sstevel@tonic-gate }
2977c478bd9Sstevel@tonic-gate
2987c478bd9Sstevel@tonic-gate void
squeue_unbind(squeue_t * sqp)2997c478bd9Sstevel@tonic-gate squeue_unbind(squeue_t *sqp)
3007c478bd9Sstevel@tonic-gate {
3017c478bd9Sstevel@tonic-gate mutex_enter(&sqp->sq_lock);
3027c478bd9Sstevel@tonic-gate if (!(sqp->sq_state & SQS_BOUND)) {
3037c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
3047c478bd9Sstevel@tonic-gate return;
3057c478bd9Sstevel@tonic-gate }
3067c478bd9Sstevel@tonic-gate
3077c478bd9Sstevel@tonic-gate sqp->sq_state &= ~SQS_BOUND;
3087c478bd9Sstevel@tonic-gate thread_affinity_clear(sqp->sq_worker);
309da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
310da14cebeSEric Cheng }
311da14cebeSEric Cheng
312da14cebeSEric Cheng void
squeue_worker_wakeup(squeue_t * sqp)313da14cebeSEric Cheng squeue_worker_wakeup(squeue_t *sqp)
314da14cebeSEric Cheng {
315da14cebeSEric Cheng timeout_id_t tid = (sqp)->sq_tid;
316da14cebeSEric Cheng
317da14cebeSEric Cheng ASSERT(MUTEX_HELD(&(sqp)->sq_lock));
318da14cebeSEric Cheng
319da14cebeSEric Cheng if (sqp->sq_wait == 0) {
320da14cebeSEric Cheng ASSERT(tid == 0);
321da14cebeSEric Cheng ASSERT(!(sqp->sq_state & SQS_TMO_PROG));
322d3d50737SRafael Vanoni sqp->sq_awaken = ddi_get_lbolt();
323da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
324da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
325da14cebeSEric Cheng return;
326da14cebeSEric Cheng }
327da14cebeSEric Cheng
328da14cebeSEric Cheng /*
329da14cebeSEric Cheng * Queue isn't being processed, so take
330da14cebeSEric Cheng * any post enqueue actions needed before leaving.
331da14cebeSEric Cheng */
332da14cebeSEric Cheng if (tid != 0) {
333da14cebeSEric Cheng /*
334da14cebeSEric Cheng * Waiting for an enter() to process mblk(s).
335da14cebeSEric Cheng */
336d3d50737SRafael Vanoni clock_t now = ddi_get_lbolt();
337d3d50737SRafael Vanoni clock_t waited = now - sqp->sq_awaken;
338da14cebeSEric Cheng
339da14cebeSEric Cheng if (TICK_TO_MSEC(waited) >= sqp->sq_wait) {
340da14cebeSEric Cheng /*
341da14cebeSEric Cheng * Times up and have a worker thread
342da14cebeSEric Cheng * waiting for work, so schedule it.
343da14cebeSEric Cheng */
344da14cebeSEric Cheng sqp->sq_tid = 0;
345d3d50737SRafael Vanoni sqp->sq_awaken = now;
346da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
347da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
348da14cebeSEric Cheng (void) untimeout(tid);
349da14cebeSEric Cheng return;
350da14cebeSEric Cheng }
351da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
352da14cebeSEric Cheng return;
353da14cebeSEric Cheng } else if (sqp->sq_state & SQS_TMO_PROG) {
354da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
355da14cebeSEric Cheng return;
356da14cebeSEric Cheng } else {
357da14cebeSEric Cheng clock_t wait = sqp->sq_wait;
358da14cebeSEric Cheng /*
359da14cebeSEric Cheng * Wait up to sqp->sq_wait ms for an
360da14cebeSEric Cheng * enter() to process this queue. We
361da14cebeSEric Cheng * don't want to contend on timeout locks
362da14cebeSEric Cheng * with sq_lock held for performance reasons,
363da14cebeSEric Cheng * so drop the sq_lock before calling timeout
364da14cebeSEric Cheng * but we need to check if timeout is required
365da14cebeSEric Cheng * after re acquiring the sq_lock. Once
366da14cebeSEric Cheng * the sq_lock is dropped, someone else could
367da14cebeSEric Cheng * have processed the packet or the timeout could
368da14cebeSEric Cheng * have already fired.
369da14cebeSEric Cheng */
370da14cebeSEric Cheng sqp->sq_state |= SQS_TMO_PROG;
371da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
372da14cebeSEric Cheng tid = timeout(squeue_fire, sqp, wait);
373da14cebeSEric Cheng mutex_enter(&sqp->sq_lock);
374da14cebeSEric Cheng /* Check again if we still need the timeout */
375da14cebeSEric Cheng if (((sqp->sq_state & (SQS_PROC|SQS_TMO_PROG)) ==
376da14cebeSEric Cheng SQS_TMO_PROG) && (sqp->sq_tid == 0) &&
377da14cebeSEric Cheng (sqp->sq_first != NULL)) {
378da14cebeSEric Cheng sqp->sq_state &= ~SQS_TMO_PROG;
379da14cebeSEric Cheng sqp->sq_tid = tid;
380da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
381da14cebeSEric Cheng return;
382da14cebeSEric Cheng } else {
383da14cebeSEric Cheng if (sqp->sq_state & SQS_TMO_PROG) {
384da14cebeSEric Cheng sqp->sq_state &= ~SQS_TMO_PROG;
385da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
386da14cebeSEric Cheng (void) untimeout(tid);
387da14cebeSEric Cheng } else {
388da14cebeSEric Cheng /*
389da14cebeSEric Cheng * The timer fired before we could
390da14cebeSEric Cheng * reacquire the sq_lock. squeue_fire
391da14cebeSEric Cheng * removes the SQS_TMO_PROG flag
392da14cebeSEric Cheng * and we don't need to do anything
393da14cebeSEric Cheng * else.
394da14cebeSEric Cheng */
395da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
396da14cebeSEric Cheng }
397da14cebeSEric Cheng }
398da14cebeSEric Cheng }
399da14cebeSEric Cheng
400da14cebeSEric Cheng ASSERT(MUTEX_NOT_HELD(&sqp->sq_lock));
4017c478bd9Sstevel@tonic-gate }
4027c478bd9Sstevel@tonic-gate
4037c478bd9Sstevel@tonic-gate /*
4047c478bd9Sstevel@tonic-gate * squeue_enter() - enter squeue sqp with mblk mp (which can be
4057c478bd9Sstevel@tonic-gate * a chain), while tail points to the end and cnt in number of
4067c478bd9Sstevel@tonic-gate * mblks in the chain.
4077c478bd9Sstevel@tonic-gate *
4087c478bd9Sstevel@tonic-gate * For a chain of single packet (i.e. mp == tail), go through the
4097c478bd9Sstevel@tonic-gate * fast path if no one is processing the squeue and nothing is queued.
4107c478bd9Sstevel@tonic-gate *
4117c478bd9Sstevel@tonic-gate * The proc and arg for each mblk is already stored in the mblk in
4127c478bd9Sstevel@tonic-gate * appropriate places.
413da14cebeSEric Cheng *
414da14cebeSEric Cheng * The process_flag specifies if we are allowed to process the mblk
415da14cebeSEric Cheng * and drain in the entering thread context. If process_flag is
416da14cebeSEric Cheng * SQ_FILL, then we just queue the mblk and return (after signaling
417da14cebeSEric Cheng * the worker thread if no one else is processing the squeue).
418bd670b35SErik Nordmark *
419bd670b35SErik Nordmark * The ira argument can be used when the count is one.
420bd670b35SErik Nordmark * For a chain the caller needs to prepend any needed mblks from
421bd670b35SErik Nordmark * ip_recv_attr_to_mblk().
4227c478bd9Sstevel@tonic-gate */
423da14cebeSEric Cheng /* ARGSUSED */
4247c478bd9Sstevel@tonic-gate void
squeue_enter(squeue_t * sqp,mblk_t * mp,mblk_t * tail,uint32_t cnt,ip_recv_attr_t * ira,int process_flag,uint8_t tag)425da14cebeSEric Cheng squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
426bd670b35SErik Nordmark ip_recv_attr_t *ira, int process_flag, uint8_t tag)
4277c478bd9Sstevel@tonic-gate {
428da14cebeSEric Cheng conn_t *connp;
4297c478bd9Sstevel@tonic-gate sqproc_t proc;
430d19d6468Sbw hrtime_t now;
4317c478bd9Sstevel@tonic-gate
4327c478bd9Sstevel@tonic-gate ASSERT(sqp != NULL);
4337c478bd9Sstevel@tonic-gate ASSERT(mp != NULL);
4347c478bd9Sstevel@tonic-gate ASSERT(tail != NULL);
4357c478bd9Sstevel@tonic-gate ASSERT(cnt > 0);
4367c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&sqp->sq_lock));
437bd670b35SErik Nordmark ASSERT(ira == NULL || cnt == 1);
4387c478bd9Sstevel@tonic-gate
4397c478bd9Sstevel@tonic-gate mutex_enter(&sqp->sq_lock);
440da14cebeSEric Cheng
441da14cebeSEric Cheng /*
442da14cebeSEric Cheng * Try to process the packet if SQ_FILL flag is not set and
443da14cebeSEric Cheng * we are allowed to process the squeue. The SQ_NODRAIN is
444da14cebeSEric Cheng * ignored if the packet chain consists of more than 1 packet.
445da14cebeSEric Cheng */
446da14cebeSEric Cheng if (!(sqp->sq_state & SQS_PROC) && ((process_flag == SQ_PROCESS) ||
447da14cebeSEric Cheng (process_flag == SQ_NODRAIN && sqp->sq_first == NULL))) {
4487c478bd9Sstevel@tonic-gate /*
4497c478bd9Sstevel@tonic-gate * See if anything is already queued. If we are the
4507c478bd9Sstevel@tonic-gate * first packet, do inline processing else queue the
4517c478bd9Sstevel@tonic-gate * packet and do the drain.
4527c478bd9Sstevel@tonic-gate */
4537c478bd9Sstevel@tonic-gate if (sqp->sq_first == NULL && cnt == 1) {
4547c478bd9Sstevel@tonic-gate /*
4557c478bd9Sstevel@tonic-gate * Fast-path, ok to process and nothing queued.
4567c478bd9Sstevel@tonic-gate */
4577c478bd9Sstevel@tonic-gate sqp->sq_state |= (SQS_PROC|SQS_FAST);
458da14cebeSEric Cheng sqp->sq_run = curthread;
4597c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
4607c478bd9Sstevel@tonic-gate
4617c478bd9Sstevel@tonic-gate /*
4627c478bd9Sstevel@tonic-gate * We are the chain of 1 packet so
4637c478bd9Sstevel@tonic-gate * go through this fast path.
4647c478bd9Sstevel@tonic-gate */
465da14cebeSEric Cheng ASSERT(mp->b_prev != NULL);
466da14cebeSEric Cheng ASSERT(mp->b_queue != NULL);
467da14cebeSEric Cheng connp = (conn_t *)mp->b_prev;
468da14cebeSEric Cheng mp->b_prev = NULL;
469da14cebeSEric Cheng proc = (sqproc_t)mp->b_queue;
470da14cebeSEric Cheng mp->b_queue = NULL;
471da14cebeSEric Cheng ASSERT(proc != NULL && connp != NULL);
472da14cebeSEric Cheng ASSERT(mp->b_next == NULL);
473da14cebeSEric Cheng
474da14cebeSEric Cheng /*
475da14cebeSEric Cheng * Handle squeue switching. More details in the
476da14cebeSEric Cheng * block comment at the top of the file
477da14cebeSEric Cheng */
478da14cebeSEric Cheng if (connp->conn_sqp == sqp) {
479da14cebeSEric Cheng SQUEUE_DBG_SET(sqp, mp, proc, connp,
480da14cebeSEric Cheng tag);
481da14cebeSEric Cheng connp->conn_on_sqp = B_TRUE;
482da14cebeSEric Cheng DTRACE_PROBE3(squeue__proc__start, squeue_t *,
483da14cebeSEric Cheng sqp, mblk_t *, mp, conn_t *, connp);
484bd670b35SErik Nordmark (*proc)(connp, mp, sqp, ira);
485da14cebeSEric Cheng DTRACE_PROBE2(squeue__proc__end, squeue_t *,
486da14cebeSEric Cheng sqp, conn_t *, connp);
487da14cebeSEric Cheng connp->conn_on_sqp = B_FALSE;
488da14cebeSEric Cheng SQUEUE_DBG_CLEAR(sqp);
489da14cebeSEric Cheng CONN_DEC_REF(connp);
490da14cebeSEric Cheng } else {
491da14cebeSEric Cheng SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
492bd670b35SErik Nordmark connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
493da14cebeSEric Cheng }
494da14cebeSEric Cheng ASSERT(MUTEX_NOT_HELD(&sqp->sq_lock));
495da14cebeSEric Cheng mutex_enter(&sqp->sq_lock);
496da14cebeSEric Cheng sqp->sq_state &= ~(SQS_PROC|SQS_FAST);
497da14cebeSEric Cheng sqp->sq_run = NULL;
498da14cebeSEric Cheng if (sqp->sq_first == NULL ||
499da14cebeSEric Cheng process_flag == SQ_NODRAIN) {
500da14cebeSEric Cheng if (sqp->sq_first != NULL) {
501da14cebeSEric Cheng squeue_worker_wakeup(sqp);
502da14cebeSEric Cheng return;
503da14cebeSEric Cheng }
504da14cebeSEric Cheng /*
505da14cebeSEric Cheng * We processed inline our packet and nothing
506da14cebeSEric Cheng * new has arrived. We are done. In case any
507da14cebeSEric Cheng * control actions are pending, wake up the
508da14cebeSEric Cheng * worker.
509da14cebeSEric Cheng */
510da14cebeSEric Cheng if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
511da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
512da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
513da14cebeSEric Cheng return;
514da14cebeSEric Cheng }
515da14cebeSEric Cheng } else {
516bd670b35SErik Nordmark if (ira != NULL) {
517bd670b35SErik Nordmark mblk_t *attrmp;
518bd670b35SErik Nordmark
519bd670b35SErik Nordmark ASSERT(cnt == 1);
520bd670b35SErik Nordmark attrmp = ip_recv_attr_to_mblk(ira);
521bd670b35SErik Nordmark if (attrmp == NULL) {
522bd670b35SErik Nordmark mutex_exit(&sqp->sq_lock);
523bd670b35SErik Nordmark ip_drop_input("squeue: "
524bd670b35SErik Nordmark "ip_recv_attr_to_mblk",
525bd670b35SErik Nordmark mp, NULL);
526bd670b35SErik Nordmark /* Caller already set b_prev/b_next */
527bd670b35SErik Nordmark mp->b_prev = mp->b_next = NULL;
528bd670b35SErik Nordmark freemsg(mp);
529bd670b35SErik Nordmark return;
530bd670b35SErik Nordmark }
531bd670b35SErik Nordmark ASSERT(attrmp->b_cont == NULL);
532bd670b35SErik Nordmark attrmp->b_cont = mp;
533bd670b35SErik Nordmark /* Move connp and func to new */
534bd670b35SErik Nordmark attrmp->b_queue = mp->b_queue;
535bd670b35SErik Nordmark mp->b_queue = NULL;
536bd670b35SErik Nordmark attrmp->b_prev = mp->b_prev;
537bd670b35SErik Nordmark mp->b_prev = NULL;
538bd670b35SErik Nordmark
539bd670b35SErik Nordmark ASSERT(mp == tail);
540bd670b35SErik Nordmark tail = mp = attrmp;
541bd670b35SErik Nordmark }
542bd670b35SErik Nordmark
543da14cebeSEric Cheng ENQUEUE_CHAIN(sqp, mp, tail, cnt);
544da14cebeSEric Cheng #ifdef DEBUG
545da14cebeSEric Cheng mp->b_tag = tag;
546da14cebeSEric Cheng #endif
547da14cebeSEric Cheng }
548da14cebeSEric Cheng /*
549da14cebeSEric Cheng * We are here because either we couldn't do inline
550da14cebeSEric Cheng * processing (because something was already queued),
551da14cebeSEric Cheng * or we had a chain of more than one packet,
552da14cebeSEric Cheng * or something else arrived after we were done with
553da14cebeSEric Cheng * inline processing.
554da14cebeSEric Cheng */
555da14cebeSEric Cheng ASSERT(MUTEX_HELD(&sqp->sq_lock));
556da14cebeSEric Cheng ASSERT(sqp->sq_first != NULL);
557da14cebeSEric Cheng now = gethrtime();
5587c6d7024SJerry Jelinek sqp->sq_run = curthread;
559da14cebeSEric Cheng sqp->sq_drain(sqp, SQS_ENTER, now + squeue_drain_ns);
560da14cebeSEric Cheng
561da14cebeSEric Cheng /*
562da14cebeSEric Cheng * If we didn't do a complete drain, the worker
563da14cebeSEric Cheng * thread was already signalled by squeue_drain.
564da14cebeSEric Cheng * In case any control actions are pending, wake
565da14cebeSEric Cheng * up the worker.
566da14cebeSEric Cheng */
567da14cebeSEric Cheng sqp->sq_run = NULL;
568da14cebeSEric Cheng if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
569da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
570da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
571da14cebeSEric Cheng return;
572da14cebeSEric Cheng } else {
573da14cebeSEric Cheng /*
574da14cebeSEric Cheng * We let a thread processing a squeue reenter only
575da14cebeSEric Cheng * once. This helps the case of incoming connection
576da14cebeSEric Cheng * where a SYN-ACK-ACK that triggers the conn_ind
577da14cebeSEric Cheng * doesn't have to queue the packet if listener and
578da14cebeSEric Cheng * eager are on the same squeue. Also helps the
579da14cebeSEric Cheng * loopback connection where the two ends are bound
580da14cebeSEric Cheng * to the same squeue (which is typical on single
581da14cebeSEric Cheng * CPU machines).
582da14cebeSEric Cheng *
583da14cebeSEric Cheng * We let the thread reenter only once for the fear
584da14cebeSEric Cheng * of stack getting blown with multiple traversal.
585da14cebeSEric Cheng */
586da14cebeSEric Cheng connp = (conn_t *)mp->b_prev;
587da14cebeSEric Cheng if (!(sqp->sq_state & SQS_REENTER) &&
588da14cebeSEric Cheng (process_flag != SQ_FILL) && (sqp->sq_first == NULL) &&
589da14cebeSEric Cheng (sqp->sq_run == curthread) && (cnt == 1) &&
590da14cebeSEric Cheng (connp->conn_on_sqp == B_FALSE)) {
591da14cebeSEric Cheng sqp->sq_state |= SQS_REENTER;
592da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
593da14cebeSEric Cheng
594da14cebeSEric Cheng ASSERT(mp->b_prev != NULL);
595da14cebeSEric Cheng ASSERT(mp->b_queue != NULL);
596da14cebeSEric Cheng
5977c478bd9Sstevel@tonic-gate mp->b_prev = NULL;
5987c478bd9Sstevel@tonic-gate proc = (sqproc_t)mp->b_queue;
5997c478bd9Sstevel@tonic-gate mp->b_queue = NULL;
6007c478bd9Sstevel@tonic-gate
601da14cebeSEric Cheng /*
602da14cebeSEric Cheng * Handle squeue switching. More details in the
603da14cebeSEric Cheng * block comment at the top of the file
604da14cebeSEric Cheng */
605da14cebeSEric Cheng if (connp->conn_sqp == sqp) {
606da14cebeSEric Cheng connp->conn_on_sqp = B_TRUE;
6077c478bd9Sstevel@tonic-gate DTRACE_PROBE3(squeue__proc__start, squeue_t *,
608da14cebeSEric Cheng sqp, mblk_t *, mp, conn_t *, connp);
609bd670b35SErik Nordmark (*proc)(connp, mp, sqp, ira);
6107c478bd9Sstevel@tonic-gate DTRACE_PROBE2(squeue__proc__end, squeue_t *,
611da14cebeSEric Cheng sqp, conn_t *, connp);
612da14cebeSEric Cheng connp->conn_on_sqp = B_FALSE;
613da14cebeSEric Cheng CONN_DEC_REF(connp);
614da14cebeSEric Cheng } else {
615da14cebeSEric Cheng SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
616bd670b35SErik Nordmark connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
6177c478bd9Sstevel@tonic-gate }
6187c478bd9Sstevel@tonic-gate
6197c478bd9Sstevel@tonic-gate mutex_enter(&sqp->sq_lock);
620da14cebeSEric Cheng sqp->sq_state &= ~SQS_REENTER;
6217c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
6227c478bd9Sstevel@tonic-gate return;
6237c478bd9Sstevel@tonic-gate }
624da14cebeSEric Cheng
625da14cebeSEric Cheng /*
626da14cebeSEric Cheng * Queue is already being processed or there is already
627da14cebeSEric Cheng * one or more paquets on the queue. Enqueue the
628da14cebeSEric Cheng * packet and wakeup the squeue worker thread if the
629da14cebeSEric Cheng * squeue is not being processed.
630da14cebeSEric Cheng */
631da14cebeSEric Cheng #ifdef DEBUG
6327c478bd9Sstevel@tonic-gate mp->b_tag = tag;
6337c478bd9Sstevel@tonic-gate #endif
634bd670b35SErik Nordmark if (ira != NULL) {
635bd670b35SErik Nordmark mblk_t *attrmp;
6367c478bd9Sstevel@tonic-gate
637bd670b35SErik Nordmark ASSERT(cnt == 1);
638bd670b35SErik Nordmark attrmp = ip_recv_attr_to_mblk(ira);
639bd670b35SErik Nordmark if (attrmp == NULL) {
640bd670b35SErik Nordmark mutex_exit(&sqp->sq_lock);
641bd670b35SErik Nordmark ip_drop_input("squeue: ip_recv_attr_to_mblk",
642bd670b35SErik Nordmark mp, NULL);
643bd670b35SErik Nordmark /* Caller already set b_prev/b_next */
644bd670b35SErik Nordmark mp->b_prev = mp->b_next = NULL;
645bd670b35SErik Nordmark freemsg(mp);
646bd670b35SErik Nordmark return;
647bd670b35SErik Nordmark }
648bd670b35SErik Nordmark ASSERT(attrmp->b_cont == NULL);
649bd670b35SErik Nordmark attrmp->b_cont = mp;
650bd670b35SErik Nordmark /* Move connp and func to new */
651bd670b35SErik Nordmark attrmp->b_queue = mp->b_queue;
652bd670b35SErik Nordmark mp->b_queue = NULL;
653bd670b35SErik Nordmark attrmp->b_prev = mp->b_prev;
654bd670b35SErik Nordmark mp->b_prev = NULL;
655bd670b35SErik Nordmark
656bd670b35SErik Nordmark ASSERT(mp == tail);
657bd670b35SErik Nordmark tail = mp = attrmp;
658bd670b35SErik Nordmark }
6597c478bd9Sstevel@tonic-gate ENQUEUE_CHAIN(sqp, mp, tail, cnt);
6607c478bd9Sstevel@tonic-gate if (!(sqp->sq_state & SQS_PROC)) {
661da14cebeSEric Cheng squeue_worker_wakeup(sqp);
6627c478bd9Sstevel@tonic-gate return;
6637c478bd9Sstevel@tonic-gate }
6647c478bd9Sstevel@tonic-gate /*
665da14cebeSEric Cheng * In case any control actions are pending, wake
666da14cebeSEric Cheng * up the worker.
6677c478bd9Sstevel@tonic-gate */
668da14cebeSEric Cheng if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
669da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
6707c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
6717c478bd9Sstevel@tonic-gate return;
6727c478bd9Sstevel@tonic-gate }
6737c478bd9Sstevel@tonic-gate }
6747c478bd9Sstevel@tonic-gate
6757c478bd9Sstevel@tonic-gate /*
6767c478bd9Sstevel@tonic-gate * PRIVATE FUNCTIONS
6777c478bd9Sstevel@tonic-gate */
6787c478bd9Sstevel@tonic-gate
6797c478bd9Sstevel@tonic-gate static void
squeue_fire(void * arg)6807c478bd9Sstevel@tonic-gate squeue_fire(void *arg)
6817c478bd9Sstevel@tonic-gate {
6827c478bd9Sstevel@tonic-gate squeue_t *sqp = arg;
6837c478bd9Sstevel@tonic-gate uint_t state;
6847c478bd9Sstevel@tonic-gate
6857c478bd9Sstevel@tonic-gate mutex_enter(&sqp->sq_lock);
6867c478bd9Sstevel@tonic-gate
6877c478bd9Sstevel@tonic-gate state = sqp->sq_state;
6887c478bd9Sstevel@tonic-gate if (sqp->sq_tid == 0 && !(state & SQS_TMO_PROG)) {
6897c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
6907c478bd9Sstevel@tonic-gate return;
6917c478bd9Sstevel@tonic-gate }
6927c478bd9Sstevel@tonic-gate
6937c478bd9Sstevel@tonic-gate sqp->sq_tid = 0;
6947c478bd9Sstevel@tonic-gate /*
6957c478bd9Sstevel@tonic-gate * The timeout fired before we got a chance to set it.
6967c478bd9Sstevel@tonic-gate * Process it anyway but remove the SQS_TMO_PROG so that
6977c478bd9Sstevel@tonic-gate * the guy trying to set the timeout knows that it has
6987c478bd9Sstevel@tonic-gate * already been processed.
6997c478bd9Sstevel@tonic-gate */
7007c478bd9Sstevel@tonic-gate if (state & SQS_TMO_PROG)
7017c478bd9Sstevel@tonic-gate sqp->sq_state &= ~SQS_TMO_PROG;
7027c478bd9Sstevel@tonic-gate
7037c478bd9Sstevel@tonic-gate if (!(state & SQS_PROC)) {
704d3d50737SRafael Vanoni sqp->sq_awaken = ddi_get_lbolt();
705da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
7067c478bd9Sstevel@tonic-gate }
7077c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
7087c478bd9Sstevel@tonic-gate }
7097c478bd9Sstevel@tonic-gate
7107c478bd9Sstevel@tonic-gate static void
squeue_drain(squeue_t * sqp,uint_t proc_type,hrtime_t expire)711d19d6468Sbw squeue_drain(squeue_t *sqp, uint_t proc_type, hrtime_t expire)
7127c478bd9Sstevel@tonic-gate {
7137c478bd9Sstevel@tonic-gate mblk_t *mp;
7147c478bd9Sstevel@tonic-gate mblk_t *head;
7157c478bd9Sstevel@tonic-gate sqproc_t proc;
7167c478bd9Sstevel@tonic-gate conn_t *connp;
7177c478bd9Sstevel@tonic-gate timeout_id_t tid;
7187c478bd9Sstevel@tonic-gate ill_rx_ring_t *sq_rx_ring = sqp->sq_rx_ring;
719d19d6468Sbw hrtime_t now;
720da14cebeSEric Cheng boolean_t did_wakeup = B_FALSE;
721da14cebeSEric Cheng boolean_t sq_poll_capable;
722bd670b35SErik Nordmark ip_recv_attr_t *ira, iras;
7237c478bd9Sstevel@tonic-gate
724*1ddb55e6SBryan Cantrill /*
725*1ddb55e6SBryan Cantrill * Before doing any work, check our stack depth; if we're not a
726*1ddb55e6SBryan Cantrill * worker thread for this squeue and we're beginning to get tight on
727*1ddb55e6SBryan Cantrill * on stack, kick the worker, bump a counter and return.
728*1ddb55e6SBryan Cantrill */
729*1ddb55e6SBryan Cantrill if (proc_type != SQS_WORKER && STACK_BIAS + (uintptr_t)getfp() -
730*1ddb55e6SBryan Cantrill (uintptr_t)curthread->t_stkbase < squeue_drain_stack_needed) {
731*1ddb55e6SBryan Cantrill ASSERT(mutex_owned(&sqp->sq_lock));
732*1ddb55e6SBryan Cantrill sqp->sq_awaken = ddi_get_lbolt();
733*1ddb55e6SBryan Cantrill cv_signal(&sqp->sq_worker_cv);
734*1ddb55e6SBryan Cantrill squeue_drain_stack_toodeep++;
735*1ddb55e6SBryan Cantrill return;
736*1ddb55e6SBryan Cantrill }
737*1ddb55e6SBryan Cantrill
738da14cebeSEric Cheng sq_poll_capable = (sqp->sq_state & SQS_POLL_CAPAB) != 0;
739da14cebeSEric Cheng again:
7407c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&sqp->sq_lock));
741da14cebeSEric Cheng ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
742da14cebeSEric Cheng SQS_POLL_QUIESCE_DONE)));
7437c478bd9Sstevel@tonic-gate
744da14cebeSEric Cheng head = sqp->sq_first;
745da14cebeSEric Cheng sqp->sq_first = NULL;
746da14cebeSEric Cheng sqp->sq_last = NULL;
747da14cebeSEric Cheng sqp->sq_count = 0;
7487c478bd9Sstevel@tonic-gate
7497c478bd9Sstevel@tonic-gate if ((tid = sqp->sq_tid) != 0)
7507c478bd9Sstevel@tonic-gate sqp->sq_tid = 0;
7517c478bd9Sstevel@tonic-gate
7527c478bd9Sstevel@tonic-gate sqp->sq_state |= SQS_PROC | proc_type;
753da14cebeSEric Cheng
7547c478bd9Sstevel@tonic-gate /*
7557c478bd9Sstevel@tonic-gate * We have backlog built up. Switch to polling mode if the
756da14cebeSEric Cheng * device underneath allows it. Need to do it so that
757da14cebeSEric Cheng * more packets don't come in and disturb us (by contending
758da14cebeSEric Cheng * for sq_lock or higher priority thread preempting us).
759da14cebeSEric Cheng *
760da14cebeSEric Cheng * The worker thread is allowed to do active polling while we
761da14cebeSEric Cheng * just disable the interrupts for drain by non worker (kernel
762da14cebeSEric Cheng * or userland) threads so they can peacefully process the
763da14cebeSEric Cheng * packets during time allocated to them.
7647c478bd9Sstevel@tonic-gate */
765da14cebeSEric Cheng SQS_POLLING_ON(sqp, sq_poll_capable, sq_rx_ring);
7667c478bd9Sstevel@tonic-gate mutex_exit(&sqp->sq_lock);
7677c478bd9Sstevel@tonic-gate
7687c478bd9Sstevel@tonic-gate if (tid != 0)
7697c478bd9Sstevel@tonic-gate (void) untimeout(tid);
770da14cebeSEric Cheng
7717c478bd9Sstevel@tonic-gate while ((mp = head) != NULL) {
772da14cebeSEric Cheng
7737c478bd9Sstevel@tonic-gate head = mp->b_next;
7747c478bd9Sstevel@tonic-gate mp->b_next = NULL;
7757c478bd9Sstevel@tonic-gate
7767c478bd9Sstevel@tonic-gate proc = (sqproc_t)mp->b_queue;
7777c478bd9Sstevel@tonic-gate mp->b_queue = NULL;
7787c478bd9Sstevel@tonic-gate connp = (conn_t *)mp->b_prev;
7797c478bd9Sstevel@tonic-gate mp->b_prev = NULL;
7807c478bd9Sstevel@tonic-gate
781bd670b35SErik Nordmark /* Is there an ip_recv_attr_t to handle? */
782bd670b35SErik Nordmark if (ip_recv_attr_is_mblk(mp)) {
783bd670b35SErik Nordmark mblk_t *attrmp = mp;
784bd670b35SErik Nordmark
785bd670b35SErik Nordmark ASSERT(attrmp->b_cont != NULL);
786bd670b35SErik Nordmark
787bd670b35SErik Nordmark mp = attrmp->b_cont;
788bd670b35SErik Nordmark attrmp->b_cont = NULL;
789bd670b35SErik Nordmark ASSERT(mp->b_queue == NULL);
790bd670b35SErik Nordmark ASSERT(mp->b_prev == NULL);
791bd670b35SErik Nordmark
792bd670b35SErik Nordmark if (!ip_recv_attr_from_mblk(attrmp, &iras)) {
793bd670b35SErik Nordmark /* The ill or ip_stack_t disappeared on us */
794bd670b35SErik Nordmark ip_drop_input("ip_recv_attr_from_mblk",
795bd670b35SErik Nordmark mp, NULL);
796bd670b35SErik Nordmark ira_cleanup(&iras, B_TRUE);
797bd670b35SErik Nordmark CONN_DEC_REF(connp);
798bd670b35SErik Nordmark continue;
799bd670b35SErik Nordmark }
800bd670b35SErik Nordmark ira = &iras;
801bd670b35SErik Nordmark } else {
802bd670b35SErik Nordmark ira = NULL;
803bd670b35SErik Nordmark }
804bd670b35SErik Nordmark
805bd670b35SErik Nordmark
806da14cebeSEric Cheng /*
807da14cebeSEric Cheng * Handle squeue switching. More details in the
808da14cebeSEric Cheng * block comment at the top of the file
809da14cebeSEric Cheng */
810da14cebeSEric Cheng if (connp->conn_sqp == sqp) {
811da14cebeSEric Cheng SQUEUE_DBG_SET(sqp, mp, proc, connp,
812da14cebeSEric Cheng mp->b_tag);
8137c478bd9Sstevel@tonic-gate connp->conn_on_sqp = B_TRUE;
8147c478bd9Sstevel@tonic-gate DTRACE_PROBE3(squeue__proc__start, squeue_t *,
8157c478bd9Sstevel@tonic-gate sqp, mblk_t *, mp, conn_t *, connp);
816bd670b35SErik Nordmark (*proc)(connp, mp, sqp, ira);
8177c478bd9Sstevel@tonic-gate DTRACE_PROBE2(squeue__proc__end, squeue_t *,
8187c478bd9Sstevel@tonic-gate sqp, conn_t *, connp);
8197c478bd9Sstevel@tonic-gate connp->conn_on_sqp = B_FALSE;
8207c478bd9Sstevel@tonic-gate CONN_DEC_REF(connp);
821da14cebeSEric Cheng } else {
822bd670b35SErik Nordmark SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc, connp, ira,
823da14cebeSEric Cheng SQ_FILL, SQTAG_SQUEUE_CHANGE);
824da14cebeSEric Cheng }
825bd670b35SErik Nordmark if (ira != NULL)
826bd670b35SErik Nordmark ira_cleanup(ira, B_TRUE);
8277c478bd9Sstevel@tonic-gate }
8287c478bd9Sstevel@tonic-gate
829da14cebeSEric Cheng SQUEUE_DBG_CLEAR(sqp);
8307c478bd9Sstevel@tonic-gate
8317c478bd9Sstevel@tonic-gate mutex_enter(&sqp->sq_lock);
8327c478bd9Sstevel@tonic-gate
833da14cebeSEric Cheng /*
834da14cebeSEric Cheng * Check if there is still work to do (either more arrived or timer
835da14cebeSEric Cheng * expired). If we are the worker thread and we are polling capable,
836da14cebeSEric Cheng * continue doing the work since no one else is around to do the
837da14cebeSEric Cheng * work anyway (but signal the poll thread to retrieve some packets
838da14cebeSEric Cheng * in the meanwhile). If we are not the worker thread, just
839da14cebeSEric Cheng * signal the worker thread to take up the work if processing time
840da14cebeSEric Cheng * has expired.
841da14cebeSEric Cheng */
8427c478bd9Sstevel@tonic-gate if (sqp->sq_first != NULL) {
843da14cebeSEric Cheng /*
844da14cebeSEric Cheng * Still more to process. If time quanta not expired, we
845da14cebeSEric Cheng * should let the drain go on. The worker thread is allowed
846da14cebeSEric Cheng * to drain as long as there is anything left.
847da14cebeSEric Cheng */
848da14cebeSEric Cheng now = gethrtime();
849da14cebeSEric Cheng if ((now < expire) || (proc_type == SQS_WORKER)) {
850da14cebeSEric Cheng /*
851da14cebeSEric Cheng * If time not expired or we are worker thread and
852da14cebeSEric Cheng * this squeue is polling capable, continue to do
853da14cebeSEric Cheng * the drain.
854da14cebeSEric Cheng *
855da14cebeSEric Cheng * We turn off interrupts for all userland threads
856da14cebeSEric Cheng * doing drain but we do active polling only for
857da14cebeSEric Cheng * worker thread.
858efe28d82SRajagopal Kunhappan *
859efe28d82SRajagopal Kunhappan * Calling SQS_POLL_RING() even in the case of
860efe28d82SRajagopal Kunhappan * SQS_POLLING_ON() not succeeding is ok as
861efe28d82SRajagopal Kunhappan * SQS_POLL_RING() will not wake up poll thread
862efe28d82SRajagopal Kunhappan * if SQS_POLLING bit is not set.
863da14cebeSEric Cheng */
864da14cebeSEric Cheng if (proc_type == SQS_WORKER)
865efe28d82SRajagopal Kunhappan SQS_POLL_RING(sqp);
866da14cebeSEric Cheng goto again;
867da14cebeSEric Cheng } else {
868da14cebeSEric Cheng did_wakeup = B_TRUE;
869d3d50737SRafael Vanoni sqp->sq_awaken = ddi_get_lbolt();
870da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
871da14cebeSEric Cheng }
872da14cebeSEric Cheng }
873da14cebeSEric Cheng
874da14cebeSEric Cheng /*
875da14cebeSEric Cheng * If the poll thread is already running, just return. The
876da14cebeSEric Cheng * poll thread continues to hold the proc and will finish
877da14cebeSEric Cheng * processing.
878da14cebeSEric Cheng */
879da14cebeSEric Cheng if (sqp->sq_state & SQS_GET_PKTS) {
880da14cebeSEric Cheng ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
881da14cebeSEric Cheng SQS_POLL_QUIESCE_DONE)));
882da14cebeSEric Cheng sqp->sq_state &= ~proc_type;
883da14cebeSEric Cheng return;
884da14cebeSEric Cheng }
885da14cebeSEric Cheng
886da14cebeSEric Cheng /*
887da14cebeSEric Cheng *
888da14cebeSEric Cheng * If we are the worker thread and no work is left, send the poll
889da14cebeSEric Cheng * thread down once more to see if something arrived. Otherwise,
890da14cebeSEric Cheng * turn the interrupts back on and we are done.
891da14cebeSEric Cheng */
892efe28d82SRajagopal Kunhappan if ((proc_type == SQS_WORKER) && (sqp->sq_state & SQS_POLLING)) {
893da14cebeSEric Cheng /*
894da14cebeSEric Cheng * Do one last check to see if anything arrived
895da14cebeSEric Cheng * in the NIC. We leave the SQS_PROC set to ensure
896da14cebeSEric Cheng * that poll thread keeps the PROC and can decide
897da14cebeSEric Cheng * if it needs to turn polling off or continue
898da14cebeSEric Cheng * processing.
899da14cebeSEric Cheng *
900da14cebeSEric Cheng * If we drop the SQS_PROC here and poll thread comes
901da14cebeSEric Cheng * up empty handed, it can not safely turn polling off
902da14cebeSEric Cheng * since someone else could have acquired the PROC
903da14cebeSEric Cheng * and started draining. The previously running poll
904da14cebeSEric Cheng * thread and the current thread doing drain would end
905da14cebeSEric Cheng * up in a race for turning polling on/off and more
906da14cebeSEric Cheng * complex code would be required to deal with it.
907da14cebeSEric Cheng *
908da14cebeSEric Cheng * Its lot simpler for drain to hand the SQS_PROC to
909da14cebeSEric Cheng * poll thread (if running) and let poll thread finish
910da14cebeSEric Cheng * without worrying about racing with any other thread.
911da14cebeSEric Cheng */
912da14cebeSEric Cheng ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
913da14cebeSEric Cheng SQS_POLL_QUIESCE_DONE)));
914efe28d82SRajagopal Kunhappan SQS_POLL_RING(sqp);
915da14cebeSEric Cheng sqp->sq_state &= ~proc_type;
916da14cebeSEric Cheng } else {
917da14cebeSEric Cheng /*
918efe28d82SRajagopal Kunhappan * The squeue is either not capable of polling or the
919efe28d82SRajagopal Kunhappan * attempt to blank (i.e., turn SQS_POLLING_ON()) was
920efe28d82SRajagopal Kunhappan * unsuccessful or poll thread already finished
921efe28d82SRajagopal Kunhappan * processing and didn't find anything. Since there
922efe28d82SRajagopal Kunhappan * is nothing queued and we already turn polling on
923efe28d82SRajagopal Kunhappan * (for all threads doing drain), we should turn
924efe28d82SRajagopal Kunhappan * polling off and relinquish the PROC.
925da14cebeSEric Cheng */
926da14cebeSEric Cheng ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
927da14cebeSEric Cheng SQS_POLL_QUIESCE_DONE)));
928da14cebeSEric Cheng SQS_POLLING_OFF(sqp, sq_poll_capable, sq_rx_ring);
929da14cebeSEric Cheng sqp->sq_state &= ~(SQS_PROC | proc_type);
930da14cebeSEric Cheng if (!did_wakeup && sqp->sq_first != NULL) {
931da14cebeSEric Cheng squeue_worker_wakeup(sqp);
932da14cebeSEric Cheng mutex_enter(&sqp->sq_lock);
933da14cebeSEric Cheng }
934da14cebeSEric Cheng /*
935da14cebeSEric Cheng * If we are not the worker and there is a pending quiesce
936da14cebeSEric Cheng * event, wake up the worker
937da14cebeSEric Cheng */
938da14cebeSEric Cheng if ((proc_type != SQS_WORKER) &&
939da14cebeSEric Cheng (sqp->sq_state & SQS_WORKER_THR_CONTROL))
940da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
941da14cebeSEric Cheng }
942da14cebeSEric Cheng }
943da14cebeSEric Cheng
944da14cebeSEric Cheng /*
945da14cebeSEric Cheng * Quiesce, Restart, or Cleanup of the squeue poll thread.
946da14cebeSEric Cheng *
947da14cebeSEric Cheng * Quiesce and Restart: After an squeue poll thread has been quiesced, it does
948da14cebeSEric Cheng * not attempt to poll the underlying soft ring any more. The quiesce is
949da14cebeSEric Cheng * triggered by the mac layer when it wants to quiesce a soft ring. Typically
950da14cebeSEric Cheng * control operations such as changing the fanout of a NIC or VNIC (dladm
951da14cebeSEric Cheng * setlinkprop) need to quiesce data flow before changing the wiring.
952da14cebeSEric Cheng * The operation is done by the mac layer, but it calls back into IP to
953da14cebeSEric Cheng * quiesce the soft ring. After completing the operation (say increase or
954da14cebeSEric Cheng * decrease of the fanout) the mac layer then calls back into IP to restart
955da14cebeSEric Cheng * the quiesced soft ring.
956da14cebeSEric Cheng *
957da14cebeSEric Cheng * Cleanup: This is triggered when the squeue binding to a soft ring is
958da14cebeSEric Cheng * removed permanently. Typically interface plumb and unplumb would trigger
959da14cebeSEric Cheng * this. It can also be triggered from the mac layer when a soft ring is
960da14cebeSEric Cheng * being deleted say as the result of a fanout reduction. Since squeues are
961da14cebeSEric Cheng * never deleted, the cleanup marks the squeue as fit for recycling and
962da14cebeSEric Cheng * moves it to the zeroth squeue set.
963da14cebeSEric Cheng */
964da14cebeSEric Cheng static void
squeue_poll_thr_control(squeue_t * sqp)965da14cebeSEric Cheng squeue_poll_thr_control(squeue_t *sqp)
966da14cebeSEric Cheng {
967da14cebeSEric Cheng if (sqp->sq_state & SQS_POLL_THR_RESTART) {
968da14cebeSEric Cheng /* Restart implies a previous quiesce */
969da14cebeSEric Cheng ASSERT(sqp->sq_state & SQS_POLL_THR_QUIESCED);
970da14cebeSEric Cheng sqp->sq_state &= ~(SQS_POLL_THR_QUIESCED |
971da14cebeSEric Cheng SQS_POLL_THR_RESTART);
972da14cebeSEric Cheng sqp->sq_state |= SQS_POLL_CAPAB;
973da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
974da14cebeSEric Cheng return;
975da14cebeSEric Cheng }
976da14cebeSEric Cheng
977da14cebeSEric Cheng if (sqp->sq_state & SQS_POLL_THR_QUIESCE) {
978da14cebeSEric Cheng sqp->sq_state |= SQS_POLL_THR_QUIESCED;
979da14cebeSEric Cheng sqp->sq_state &= ~SQS_POLL_THR_QUIESCE;
980da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
981da14cebeSEric Cheng return;
982da14cebeSEric Cheng }
983da14cebeSEric Cheng }
984da14cebeSEric Cheng
985da14cebeSEric Cheng /*
986da14cebeSEric Cheng * POLLING Notes
987da14cebeSEric Cheng *
988da14cebeSEric Cheng * With polling mode, we want to do as much processing as we possibly can
989da14cebeSEric Cheng * in worker thread context. The sweet spot is worker thread keeps doing
990da14cebeSEric Cheng * work all the time in polling mode and writers etc. keep dumping packets
991da14cebeSEric Cheng * to worker thread. Occassionally, we send the poll thread (running at
992da14cebeSEric Cheng * lower priority to NIC to get the chain of packets to feed to worker).
993da14cebeSEric Cheng * Sending the poll thread down to NIC is dependant on 3 criterions
994da14cebeSEric Cheng *
995da14cebeSEric Cheng * 1) Its always driven from squeue_drain and only if worker thread is
996da14cebeSEric Cheng * doing the drain.
997da14cebeSEric Cheng * 2) We clear the backlog once and more packets arrived in between.
998da14cebeSEric Cheng * Before starting drain again, send the poll thread down if
999da14cebeSEric Cheng * the drain is being done by worker thread.
1000da14cebeSEric Cheng * 3) Before exiting the squeue_drain, if the poll thread is not already
1001da14cebeSEric Cheng * working and we are the worker thread, try to poll one more time.
1002da14cebeSEric Cheng *
1003da14cebeSEric Cheng * For latency sake, we do allow any thread calling squeue_enter
1004da14cebeSEric Cheng * to process its packet provided:
1005da14cebeSEric Cheng *
1006da14cebeSEric Cheng * 1) Nothing is queued
1007da14cebeSEric Cheng * 2) If more packets arrived in between, the non worker thread are allowed
1008da14cebeSEric Cheng * to do the drain till their time quanta expired provided SQS_GET_PKTS
1009da14cebeSEric Cheng * wasn't set in between.
1010da14cebeSEric Cheng *
1011da14cebeSEric Cheng * Avoiding deadlocks with interrupts
1012da14cebeSEric Cheng * ==================================
1013da14cebeSEric Cheng *
1014da14cebeSEric Cheng * One of the big problem is that we can't send poll_thr down while holding
1015da14cebeSEric Cheng * the sq_lock since the thread can block. So we drop the sq_lock before
1016da14cebeSEric Cheng * calling sq_get_pkts(). We keep holding the SQS_PROC as long as the
1017da14cebeSEric Cheng * poll thread is running so that no other thread can acquire the
1018da14cebeSEric Cheng * perimeter in between. If the squeue_drain gets done (no more work
1019da14cebeSEric Cheng * left), it leaves the SQS_PROC set if poll thread is running.
1020da14cebeSEric Cheng */
1021da14cebeSEric Cheng
1022da14cebeSEric Cheng /*
1023da14cebeSEric Cheng * This is the squeue poll thread. In poll mode, it polls the underlying
1024da14cebeSEric Cheng * TCP softring and feeds packets into the squeue. The worker thread then
1025da14cebeSEric Cheng * drains the squeue. The poll thread also responds to control signals for
1026da14cebeSEric Cheng * quiesceing, restarting, or cleanup of an squeue. These are driven by
1027da14cebeSEric Cheng * control operations like plumb/unplumb or as a result of dynamic Rx ring
1028da14cebeSEric Cheng * related operations that are driven from the mac layer.
1029da14cebeSEric Cheng */
1030da14cebeSEric Cheng static void
squeue_polling_thread(squeue_t * sqp)1031da14cebeSEric Cheng squeue_polling_thread(squeue_t *sqp)
1032da14cebeSEric Cheng {
1033da14cebeSEric Cheng kmutex_t *lock = &sqp->sq_lock;
1034da14cebeSEric Cheng kcondvar_t *async = &sqp->sq_poll_cv;
1035da14cebeSEric Cheng ip_mac_rx_t sq_get_pkts;
1036da14cebeSEric Cheng ip_accept_t ip_accept;
1037da14cebeSEric Cheng ill_rx_ring_t *sq_rx_ring;
1038da14cebeSEric Cheng ill_t *sq_ill;
1039da14cebeSEric Cheng mblk_t *head, *tail, *mp;
1040da14cebeSEric Cheng uint_t cnt;
1041da14cebeSEric Cheng void *sq_mac_handle;
1042da14cebeSEric Cheng callb_cpr_t cprinfo;
1043da14cebeSEric Cheng size_t bytes_to_pickup;
1044da14cebeSEric Cheng uint32_t ctl_state;
1045da14cebeSEric Cheng
1046da14cebeSEric Cheng CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "sq_poll");
1047da14cebeSEric Cheng mutex_enter(lock);
1048da14cebeSEric Cheng
1049da14cebeSEric Cheng for (;;) {
1050da14cebeSEric Cheng CALLB_CPR_SAFE_BEGIN(&cprinfo);
1051da14cebeSEric Cheng cv_wait(async, lock);
1052da14cebeSEric Cheng CALLB_CPR_SAFE_END(&cprinfo, lock);
1053da14cebeSEric Cheng
1054da14cebeSEric Cheng ctl_state = sqp->sq_state & (SQS_POLL_THR_CONTROL |
1055da14cebeSEric Cheng SQS_POLL_THR_QUIESCED);
1056da14cebeSEric Cheng if (ctl_state != 0) {
1057da14cebeSEric Cheng /*
1058da14cebeSEric Cheng * If the squeue is quiesced, then wait for a control
1059da14cebeSEric Cheng * request. A quiesced squeue must not poll the
1060da14cebeSEric Cheng * underlying soft ring.
1061da14cebeSEric Cheng */
1062da14cebeSEric Cheng if (ctl_state == SQS_POLL_THR_QUIESCED)
1063da14cebeSEric Cheng continue;
1064da14cebeSEric Cheng /*
1065da14cebeSEric Cheng * Act on control requests to quiesce, cleanup or
1066da14cebeSEric Cheng * restart an squeue
1067da14cebeSEric Cheng */
1068da14cebeSEric Cheng squeue_poll_thr_control(sqp);
1069da14cebeSEric Cheng continue;
1070da14cebeSEric Cheng }
1071da14cebeSEric Cheng
1072da14cebeSEric Cheng if (!(sqp->sq_state & SQS_POLL_CAPAB))
1073da14cebeSEric Cheng continue;
1074da14cebeSEric Cheng
1075da14cebeSEric Cheng ASSERT((sqp->sq_state &
1076da14cebeSEric Cheng (SQS_PROC|SQS_POLLING|SQS_GET_PKTS)) ==
1077da14cebeSEric Cheng (SQS_PROC|SQS_POLLING|SQS_GET_PKTS));
1078da14cebeSEric Cheng
1079da14cebeSEric Cheng poll_again:
1080da14cebeSEric Cheng sq_rx_ring = sqp->sq_rx_ring;
1081da14cebeSEric Cheng sq_get_pkts = sq_rx_ring->rr_rx;
1082da14cebeSEric Cheng sq_mac_handle = sq_rx_ring->rr_rx_handle;
1083da14cebeSEric Cheng ip_accept = sq_rx_ring->rr_ip_accept;
1084da14cebeSEric Cheng sq_ill = sq_rx_ring->rr_ill;
1085da14cebeSEric Cheng bytes_to_pickup = MAX_BYTES_TO_PICKUP;
1086da14cebeSEric Cheng mutex_exit(lock);
1087da14cebeSEric Cheng head = sq_get_pkts(sq_mac_handle, bytes_to_pickup);
1088da14cebeSEric Cheng mp = NULL;
1089da14cebeSEric Cheng if (head != NULL) {
1090da14cebeSEric Cheng /*
1091da14cebeSEric Cheng * We got the packet chain from the mac layer. It
1092da14cebeSEric Cheng * would be nice to be able to process it inline
1093da14cebeSEric Cheng * for better performance but we need to give
1094da14cebeSEric Cheng * IP a chance to look at this chain to ensure
1095da14cebeSEric Cheng * that packets are really meant for this squeue
1096da14cebeSEric Cheng * and do the IP processing.
1097da14cebeSEric Cheng */
1098da14cebeSEric Cheng mp = ip_accept(sq_ill, sq_rx_ring, sqp, head,
1099da14cebeSEric Cheng &tail, &cnt);
1100da14cebeSEric Cheng }
1101da14cebeSEric Cheng mutex_enter(lock);
1102bd670b35SErik Nordmark if (mp != NULL) {
1103bd670b35SErik Nordmark /*
1104bd670b35SErik Nordmark * The ip_accept function has already added an
1105bd670b35SErik Nordmark * ip_recv_attr_t mblk if that is needed.
1106bd670b35SErik Nordmark */
1107da14cebeSEric Cheng ENQUEUE_CHAIN(sqp, mp, tail, cnt);
1108bd670b35SErik Nordmark }
1109da14cebeSEric Cheng ASSERT((sqp->sq_state &
1110da14cebeSEric Cheng (SQS_PROC|SQS_POLLING|SQS_GET_PKTS)) ==
1111da14cebeSEric Cheng (SQS_PROC|SQS_POLLING|SQS_GET_PKTS));
1112da14cebeSEric Cheng
1113da14cebeSEric Cheng if (sqp->sq_first != NULL && !(sqp->sq_state & SQS_WORKER)) {
1114da14cebeSEric Cheng /*
1115da14cebeSEric Cheng * We have packets to process and worker thread
1116da14cebeSEric Cheng * is not running. Check to see if poll thread is
1117da14cebeSEric Cheng * allowed to process. Let it do processing only if it
1118da14cebeSEric Cheng * picked up some packets from the NIC otherwise
1119da14cebeSEric Cheng * wakeup the worker thread.
1120da14cebeSEric Cheng */
1121da14cebeSEric Cheng if (mp != NULL) {
1122da14cebeSEric Cheng hrtime_t now;
1123d19d6468Sbw
1124d19d6468Sbw now = gethrtime();
1125da14cebeSEric Cheng sqp->sq_run = curthread;
1126da14cebeSEric Cheng sqp->sq_drain(sqp, SQS_POLL_PROC, now +
1127da14cebeSEric Cheng squeue_drain_ns);
1128da14cebeSEric Cheng sqp->sq_run = NULL;
1129da14cebeSEric Cheng
1130da14cebeSEric Cheng if (sqp->sq_first == NULL)
1131da14cebeSEric Cheng goto poll_again;
11327c478bd9Sstevel@tonic-gate
11337c478bd9Sstevel@tonic-gate /*
1134da14cebeSEric Cheng * Couldn't do the entire drain because the
1135da14cebeSEric Cheng * time limit expired, let the
1136da14cebeSEric Cheng * worker thread take over.
11377c478bd9Sstevel@tonic-gate */
1138da14cebeSEric Cheng }
1139da14cebeSEric Cheng
1140d3d50737SRafael Vanoni sqp->sq_awaken = ddi_get_lbolt();
1141da14cebeSEric Cheng /*
1142da14cebeSEric Cheng * Put the SQS_PROC_HELD on so the worker
1143da14cebeSEric Cheng * thread can distinguish where its called from. We
1144da14cebeSEric Cheng * can remove the SQS_PROC flag here and turn off the
1145da14cebeSEric Cheng * polling so that it wouldn't matter who gets the
1146da14cebeSEric Cheng * processing but we get better performance this way
1147da14cebeSEric Cheng * and save the cost of turn polling off and possibly
1148da14cebeSEric Cheng * on again as soon as we start draining again.
1149da14cebeSEric Cheng *
1150da14cebeSEric Cheng * We can't remove the SQS_PROC flag without turning
1151da14cebeSEric Cheng * polling off until we can guarantee that control
1152da14cebeSEric Cheng * will return to squeue_drain immediately.
1153da14cebeSEric Cheng */
1154da14cebeSEric Cheng sqp->sq_state |= SQS_PROC_HELD;
1155da14cebeSEric Cheng sqp->sq_state &= ~SQS_GET_PKTS;
1156da14cebeSEric Cheng cv_signal(&sqp->sq_worker_cv);
1157da14cebeSEric Cheng } else if (sqp->sq_first == NULL &&
1158da14cebeSEric Cheng !(sqp->sq_state & SQS_WORKER)) {
1159da14cebeSEric Cheng /*
1160da14cebeSEric Cheng * Nothing queued and worker thread not running.
1161da14cebeSEric Cheng * Since we hold the proc, no other thread is
1162da14cebeSEric Cheng * processing the squeue. This means that there
1163da14cebeSEric Cheng * is no work to be done and nothing is queued
1164da14cebeSEric Cheng * in squeue or in NIC. Turn polling off and go
1165da14cebeSEric Cheng * back to interrupt mode.
1166da14cebeSEric Cheng */
1167da14cebeSEric Cheng sqp->sq_state &= ~(SQS_PROC|SQS_GET_PKTS);
1168da14cebeSEric Cheng /* LINTED: constant in conditional context */
1169da14cebeSEric Cheng SQS_POLLING_OFF(sqp, B_TRUE, sq_rx_ring);
11704cc34124SThirumalai Srinivasan
11714cc34124SThirumalai Srinivasan /*
11724cc34124SThirumalai Srinivasan * If there is a pending control operation
11734cc34124SThirumalai Srinivasan * wake up the worker, since it is currently
11744cc34124SThirumalai Srinivasan * not running.
11754cc34124SThirumalai Srinivasan */
11764cc34124SThirumalai Srinivasan if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
11774cc34124SThirumalai Srinivasan cv_signal(&sqp->sq_worker_cv);
1178da14cebeSEric Cheng } else {
1179da14cebeSEric Cheng /*
1180da14cebeSEric Cheng * Worker thread is already running. We don't need
1181da14cebeSEric Cheng * to do anything. Indicate that poll thread is done.
1182da14cebeSEric Cheng */
1183da14cebeSEric Cheng sqp->sq_state &= ~SQS_GET_PKTS;
1184da14cebeSEric Cheng }
1185da14cebeSEric Cheng if (sqp->sq_state & SQS_POLL_THR_CONTROL) {
1186da14cebeSEric Cheng /*
1187da14cebeSEric Cheng * Act on control requests to quiesce, cleanup or
1188da14cebeSEric Cheng * restart an squeue
1189da14cebeSEric Cheng */
1190da14cebeSEric Cheng squeue_poll_thr_control(sqp);
1191da14cebeSEric Cheng }
11927c478bd9Sstevel@tonic-gate }
11937c478bd9Sstevel@tonic-gate }
11947c478bd9Sstevel@tonic-gate
11957c478bd9Sstevel@tonic-gate /*
1196da14cebeSEric Cheng * The squeue worker thread acts on any control requests to quiesce, cleanup
1197da14cebeSEric Cheng * or restart an ill_rx_ring_t by calling this function. The worker thread
1198da14cebeSEric Cheng * synchronizes with the squeue poll thread to complete the request and finally
1199da14cebeSEric Cheng * wakes up the requestor when the request is completed.
12007c478bd9Sstevel@tonic-gate */
1201da14cebeSEric Cheng static void
squeue_worker_thr_control(squeue_t * sqp)1202da14cebeSEric Cheng squeue_worker_thr_control(squeue_t *sqp)
1203da14cebeSEric Cheng {
1204da14cebeSEric Cheng ill_t *ill;
1205da14cebeSEric Cheng ill_rx_ring_t *rx_ring;
12067c478bd9Sstevel@tonic-gate
1207da14cebeSEric Cheng ASSERT(MUTEX_HELD(&sqp->sq_lock));
1208da14cebeSEric Cheng
1209da14cebeSEric Cheng if (sqp->sq_state & SQS_POLL_RESTART) {
1210da14cebeSEric Cheng /* Restart implies a previous quiesce. */
1211da14cebeSEric Cheng ASSERT((sqp->sq_state & (SQS_PROC_HELD |
1212da14cebeSEric Cheng SQS_POLL_QUIESCE_DONE | SQS_PROC | SQS_WORKER)) ==
1213da14cebeSEric Cheng (SQS_POLL_QUIESCE_DONE | SQS_PROC | SQS_WORKER));
1214da14cebeSEric Cheng /*
1215da14cebeSEric Cheng * Request the squeue poll thread to restart and wait till
1216da14cebeSEric Cheng * it actually restarts.
1217da14cebeSEric Cheng */
1218da14cebeSEric Cheng sqp->sq_state &= ~SQS_POLL_QUIESCE_DONE;
1219da14cebeSEric Cheng sqp->sq_state |= SQS_POLL_THR_RESTART;
1220da14cebeSEric Cheng cv_signal(&sqp->sq_poll_cv);
1221da14cebeSEric Cheng while (sqp->sq_state & SQS_POLL_THR_QUIESCED)
1222da14cebeSEric Cheng cv_wait(&sqp->sq_worker_cv, &sqp->sq_lock);
1223da14cebeSEric Cheng sqp->sq_state &= ~(SQS_POLL_RESTART | SQS_PROC |
1224da14cebeSEric Cheng SQS_WORKER);
1225da14cebeSEric Cheng /*
1226da14cebeSEric Cheng * Signal any waiter that is waiting for the restart
1227da14cebeSEric Cheng * to complete
1228da14cebeSEric Cheng */
1229da14cebeSEric Cheng sqp->sq_state |= SQS_POLL_RESTART_DONE;
1230da14cebeSEric Cheng cv_signal(&sqp->sq_ctrlop_done_cv);
1231da14cebeSEric Cheng return;
1232da14cebeSEric Cheng }
1233da14cebeSEric Cheng
1234da14cebeSEric Cheng if (sqp->sq_state & SQS_PROC_HELD) {
1235da14cebeSEric Cheng /* The squeue poll thread handed control to us */
1236da14cebeSEric Cheng ASSERT(sqp->sq_state & SQS_PROC);
1237da14cebeSEric Cheng }
12387c478bd9Sstevel@tonic-gate
12397c478bd9Sstevel@tonic-gate /*
1240da14cebeSEric Cheng * Prevent any other thread from processing the squeue
1241da14cebeSEric Cheng * until we finish the control actions by setting SQS_PROC.
1242da14cebeSEric Cheng * But allow ourself to reenter by setting SQS_WORKER
12437c478bd9Sstevel@tonic-gate */
1244da14cebeSEric Cheng sqp->sq_state |= (SQS_PROC | SQS_WORKER);
1245da14cebeSEric Cheng
1246da14cebeSEric Cheng /* Signal the squeue poll thread and wait for it to quiesce itself */
1247da14cebeSEric Cheng if (!(sqp->sq_state & SQS_POLL_THR_QUIESCED)) {
1248da14cebeSEric Cheng sqp->sq_state |= SQS_POLL_THR_QUIESCE;
1249da14cebeSEric Cheng cv_signal(&sqp->sq_poll_cv);
1250da14cebeSEric Cheng while (!(sqp->sq_state & SQS_POLL_THR_QUIESCED))
1251da14cebeSEric Cheng cv_wait(&sqp->sq_worker_cv, &sqp->sq_lock);
1252da14cebeSEric Cheng }
1253da14cebeSEric Cheng
1254da14cebeSEric Cheng rx_ring = sqp->sq_rx_ring;
1255da14cebeSEric Cheng ill = rx_ring->rr_ill;
1256da14cebeSEric Cheng /*
1257da14cebeSEric Cheng * The lock hierarchy is as follows.
1258da14cebeSEric Cheng * cpu_lock -> ill_lock -> sqset_lock -> sq_lock
1259da14cebeSEric Cheng */
1260da14cebeSEric Cheng mutex_exit(&sqp->sq_lock);
1261da14cebeSEric Cheng mutex_enter(&ill->ill_lock);
1262da14cebeSEric Cheng mutex_enter(&sqp->sq_lock);
1263da14cebeSEric Cheng
1264da14cebeSEric Cheng SQS_POLLING_OFF(sqp, (sqp->sq_state & SQS_POLL_CAPAB) != 0,
1265da14cebeSEric Cheng sqp->sq_rx_ring);
1266da14cebeSEric Cheng sqp->sq_state &= ~(SQS_POLL_CAPAB | SQS_GET_PKTS | SQS_PROC_HELD);
1267da14cebeSEric Cheng if (sqp->sq_state & SQS_POLL_CLEANUP) {
1268da14cebeSEric Cheng /*
1269da14cebeSEric Cheng * Disassociate this squeue from its ill_rx_ring_t.
1270da14cebeSEric Cheng * The rr_sqp, sq_rx_ring fields are protected by the
1271da14cebeSEric Cheng * corresponding squeue, ill_lock* and sq_lock. Holding any
1272da14cebeSEric Cheng * of them will ensure that the ring to squeue mapping does
1273da14cebeSEric Cheng * not change.
1274da14cebeSEric Cheng */
1275da14cebeSEric Cheng ASSERT(!(sqp->sq_state & SQS_DEFAULT));
1276da14cebeSEric Cheng
1277da14cebeSEric Cheng sqp->sq_rx_ring = NULL;
1278da14cebeSEric Cheng rx_ring->rr_sqp = NULL;
1279da14cebeSEric Cheng
1280da14cebeSEric Cheng sqp->sq_state &= ~(SQS_POLL_CLEANUP | SQS_POLL_THR_QUIESCED |
1281da14cebeSEric Cheng SQS_POLL_QUIESCE_DONE);
1282da14cebeSEric Cheng sqp->sq_ill = NULL;
1283da14cebeSEric Cheng
1284da14cebeSEric Cheng rx_ring->rr_rx_handle = NULL;
1285da14cebeSEric Cheng rx_ring->rr_intr_handle = NULL;
1286da14cebeSEric Cheng rx_ring->rr_intr_enable = NULL;
1287da14cebeSEric Cheng rx_ring->rr_intr_disable = NULL;
1288da14cebeSEric Cheng sqp->sq_state |= SQS_POLL_CLEANUP_DONE;
1289da14cebeSEric Cheng } else {
1290da14cebeSEric Cheng sqp->sq_state &= ~SQS_POLL_QUIESCE;
1291da14cebeSEric Cheng sqp->sq_state |= SQS_POLL_QUIESCE_DONE;
1292da14cebeSEric Cheng }
1293da14cebeSEric Cheng /*
1294da14cebeSEric Cheng * Signal any waiter that is waiting for the quiesce or cleanup
1295da14cebeSEric Cheng * to complete and also wait for it to actually see and reset the
1296da14cebeSEric Cheng * SQS_POLL_CLEANUP_DONE.
1297da14cebeSEric Cheng */
1298da14cebeSEric Cheng cv_signal(&sqp->sq_ctrlop_done_cv);
1299da14cebeSEric Cheng mutex_exit(&ill->ill_lock);
1300da14cebeSEric Cheng if (sqp->sq_state & SQS_POLL_CLEANUP_DONE) {
1301da14cebeSEric Cheng cv_wait(&sqp->sq_worker_cv, &sqp->sq_lock);
1302da14cebeSEric Cheng sqp->sq_state &= ~(SQS_PROC | SQS_WORKER);
13037c478bd9Sstevel@tonic-gate }
13047c478bd9Sstevel@tonic-gate }
13057c478bd9Sstevel@tonic-gate
13067c478bd9Sstevel@tonic-gate static void
squeue_worker(squeue_t * sqp)13077c478bd9Sstevel@tonic-gate squeue_worker(squeue_t *sqp)
13087c478bd9Sstevel@tonic-gate {
13097c478bd9Sstevel@tonic-gate kmutex_t *lock = &sqp->sq_lock;
1310da14cebeSEric Cheng kcondvar_t *async = &sqp->sq_worker_cv;
13117c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo;
1312d19d6468Sbw hrtime_t now;
13137c478bd9Sstevel@tonic-gate
1314da14cebeSEric Cheng CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "sq_worker");
13157c478bd9Sstevel@tonic-gate mutex_enter(lock);
13167c478bd9Sstevel@tonic-gate
13177c478bd9Sstevel@tonic-gate for (;;) {
1318da14cebeSEric Cheng for (;;) {
1319da14cebeSEric Cheng /*
1320da14cebeSEric Cheng * If the poll thread has handed control to us
1321da14cebeSEric Cheng * we need to break out of the wait.
1322da14cebeSEric Cheng */
1323da14cebeSEric Cheng if (sqp->sq_state & SQS_PROC_HELD)
1324da14cebeSEric Cheng break;
1325da14cebeSEric Cheng
1326da14cebeSEric Cheng /*
1327da14cebeSEric Cheng * If the squeue is not being processed and we either
1328da14cebeSEric Cheng * have messages to drain or some thread has signaled
1329da14cebeSEric Cheng * some control activity we need to break
1330da14cebeSEric Cheng */
1331da14cebeSEric Cheng if (!(sqp->sq_state & SQS_PROC) &&
1332da14cebeSEric Cheng ((sqp->sq_state & SQS_WORKER_THR_CONTROL) ||
1333da14cebeSEric Cheng (sqp->sq_first != NULL)))
1334da14cebeSEric Cheng break;
1335da14cebeSEric Cheng
1336da14cebeSEric Cheng /*
1337da14cebeSEric Cheng * If we have started some control action, then check
1338da14cebeSEric Cheng * for the SQS_WORKER flag (since we don't
1339da14cebeSEric Cheng * release the squeue) to make sure we own the squeue
1340da14cebeSEric Cheng * and break out
1341da14cebeSEric Cheng */
1342da14cebeSEric Cheng if ((sqp->sq_state & SQS_WORKER_THR_CONTROL) &&
1343da14cebeSEric Cheng (sqp->sq_state & SQS_WORKER))
1344da14cebeSEric Cheng break;
1345da14cebeSEric Cheng
13467c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
13477c478bd9Sstevel@tonic-gate cv_wait(async, lock);
13487c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, lock);
13497c478bd9Sstevel@tonic-gate }
1350da14cebeSEric Cheng if (sqp->sq_state & SQS_WORKER_THR_CONTROL) {
1351da14cebeSEric Cheng squeue_worker_thr_control(sqp);
1352da14cebeSEric Cheng continue;
13537c478bd9Sstevel@tonic-gate }
1354da14cebeSEric Cheng ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
1355da14cebeSEric Cheng SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
1356da14cebeSEric Cheng SQS_WORKER_THR_CONTROL | SQS_POLL_THR_CONTROL)));
13577c478bd9Sstevel@tonic-gate
1358da14cebeSEric Cheng if (sqp->sq_state & SQS_PROC_HELD)
1359da14cebeSEric Cheng sqp->sq_state &= ~SQS_PROC_HELD;
1360da14cebeSEric Cheng
1361d19d6468Sbw now = gethrtime();
13627c478bd9Sstevel@tonic-gate sqp->sq_run = curthread;
1363da14cebeSEric Cheng sqp->sq_drain(sqp, SQS_WORKER, now + squeue_drain_ns);
13647c478bd9Sstevel@tonic-gate sqp->sq_run = NULL;
13657c478bd9Sstevel@tonic-gate }
13667c478bd9Sstevel@tonic-gate }
13677c478bd9Sstevel@tonic-gate
13687c478bd9Sstevel@tonic-gate uintptr_t *
squeue_getprivate(squeue_t * sqp,sqprivate_t p)13697c478bd9Sstevel@tonic-gate squeue_getprivate(squeue_t *sqp, sqprivate_t p)
13707c478bd9Sstevel@tonic-gate {
13717c478bd9Sstevel@tonic-gate ASSERT(p < SQPRIVATE_MAX);
13727c478bd9Sstevel@tonic-gate
13737c478bd9Sstevel@tonic-gate return (&sqp->sq_private[p]);
13747c478bd9Sstevel@tonic-gate }
13750f1702c5SYu Xiangning
13760f1702c5SYu Xiangning /* ARGSUSED */
13770f1702c5SYu Xiangning void
squeue_wakeup_conn(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)1378bd670b35SErik Nordmark squeue_wakeup_conn(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
13790f1702c5SYu Xiangning {
13800f1702c5SYu Xiangning conn_t *connp = (conn_t *)arg;
13810f1702c5SYu Xiangning squeue_t *sqp = connp->conn_sqp;
13820f1702c5SYu Xiangning
13830f1702c5SYu Xiangning /*
13840f1702c5SYu Xiangning * Mark the squeue as paused before waking up the thread stuck
13850f1702c5SYu Xiangning * in squeue_synch_enter().
13860f1702c5SYu Xiangning */
13870f1702c5SYu Xiangning mutex_enter(&sqp->sq_lock);
13880f1702c5SYu Xiangning sqp->sq_state |= SQS_PAUSE;
13890f1702c5SYu Xiangning
13900f1702c5SYu Xiangning /*
13910f1702c5SYu Xiangning * Notify the thread that it's OK to proceed; that is done by
13920f1702c5SYu Xiangning * clearing the MSGWAITSYNC flag. The synch thread will free the mblk.
13930f1702c5SYu Xiangning */
13940f1702c5SYu Xiangning ASSERT(mp->b_flag & MSGWAITSYNC);
13950f1702c5SYu Xiangning mp->b_flag &= ~MSGWAITSYNC;
13960f1702c5SYu Xiangning cv_broadcast(&connp->conn_sq_cv);
13970f1702c5SYu Xiangning
13980f1702c5SYu Xiangning /*
13990f1702c5SYu Xiangning * We are doing something on behalf of another thread, so we have to
14000f1702c5SYu Xiangning * pause and wait until it finishes.
14010f1702c5SYu Xiangning */
14020f1702c5SYu Xiangning while (sqp->sq_state & SQS_PAUSE) {
14030f1702c5SYu Xiangning cv_wait(&sqp->sq_synch_cv, &sqp->sq_lock);
14040f1702c5SYu Xiangning }
14050f1702c5SYu Xiangning mutex_exit(&sqp->sq_lock);
14060f1702c5SYu Xiangning }
14070f1702c5SYu Xiangning
14080f1702c5SYu Xiangning int
squeue_synch_enter(conn_t * connp,mblk_t * use_mp)14099ee3959aSAnders Persson squeue_synch_enter(conn_t *connp, mblk_t *use_mp)
14100f1702c5SYu Xiangning {
14119ee3959aSAnders Persson squeue_t *sqp;
14129ee3959aSAnders Persson
14139ee3959aSAnders Persson again:
14149ee3959aSAnders Persson sqp = connp->conn_sqp;
14159ee3959aSAnders Persson
14160f1702c5SYu Xiangning mutex_enter(&sqp->sq_lock);
14170f1702c5SYu Xiangning if (sqp->sq_first == NULL && !(sqp->sq_state & SQS_PROC)) {
14180f1702c5SYu Xiangning /*
14190f1702c5SYu Xiangning * We are OK to proceed if the squeue is empty, and
14200f1702c5SYu Xiangning * no one owns the squeue.
14210f1702c5SYu Xiangning *
14220f1702c5SYu Xiangning * The caller won't own the squeue as this is called from the
14230f1702c5SYu Xiangning * application.
14240f1702c5SYu Xiangning */
14250f1702c5SYu Xiangning ASSERT(sqp->sq_run == NULL);
14260f1702c5SYu Xiangning
14270f1702c5SYu Xiangning sqp->sq_state |= SQS_PROC;
14280f1702c5SYu Xiangning sqp->sq_run = curthread;
14290f1702c5SYu Xiangning mutex_exit(&sqp->sq_lock);
14300f1702c5SYu Xiangning
14319ee3959aSAnders Persson /*
14329ee3959aSAnders Persson * Handle squeue switching. The conn's squeue can only change
14339ee3959aSAnders Persson * while there is a thread in the squeue, which is why we do
14349ee3959aSAnders Persson * the check after entering the squeue. If it has changed, exit
14359ee3959aSAnders Persson * this squeue and redo everything with the new sqeueue.
14369ee3959aSAnders Persson */
14379ee3959aSAnders Persson if (sqp != connp->conn_sqp) {
14389ee3959aSAnders Persson mutex_enter(&sqp->sq_lock);
14399ee3959aSAnders Persson sqp->sq_state &= ~SQS_PROC;
14409ee3959aSAnders Persson sqp->sq_run = NULL;
14419ee3959aSAnders Persson mutex_exit(&sqp->sq_lock);
14429ee3959aSAnders Persson goto again;
14439ee3959aSAnders Persson }
14440f1702c5SYu Xiangning #if SQUEUE_DEBUG
14450f1702c5SYu Xiangning sqp->sq_curmp = NULL;
14460f1702c5SYu Xiangning sqp->sq_curproc = NULL;
14470f1702c5SYu Xiangning sqp->sq_connp = connp;
14480f1702c5SYu Xiangning #endif
14490f1702c5SYu Xiangning connp->conn_on_sqp = B_TRUE;
14500f1702c5SYu Xiangning return (0);
14510f1702c5SYu Xiangning } else {
14520f1702c5SYu Xiangning mblk_t *mp;
14530f1702c5SYu Xiangning
1454f3124163SAnders Persson mp = (use_mp == NULL) ? allocb(0, BPRI_MED) : use_mp;
14550f1702c5SYu Xiangning if (mp == NULL) {
14560f1702c5SYu Xiangning mutex_exit(&sqp->sq_lock);
14570f1702c5SYu Xiangning return (ENOMEM);
14580f1702c5SYu Xiangning }
14590f1702c5SYu Xiangning
14600f1702c5SYu Xiangning /*
14610f1702c5SYu Xiangning * We mark the mblk as awaiting synchronous squeue access
14620f1702c5SYu Xiangning * by setting the MSGWAITSYNC flag. Once squeue_wakeup_conn
14630f1702c5SYu Xiangning * fires, MSGWAITSYNC is cleared, at which point we know we
14640f1702c5SYu Xiangning * have exclusive access.
14650f1702c5SYu Xiangning */
14660f1702c5SYu Xiangning mp->b_flag |= MSGWAITSYNC;
14670f1702c5SYu Xiangning
14680f1702c5SYu Xiangning CONN_INC_REF(connp);
14690f1702c5SYu Xiangning SET_SQUEUE(mp, squeue_wakeup_conn, connp);
14700f1702c5SYu Xiangning ENQUEUE_CHAIN(sqp, mp, mp, 1);
14710f1702c5SYu Xiangning
14720f1702c5SYu Xiangning ASSERT(sqp->sq_run != curthread);
14730f1702c5SYu Xiangning
14740f1702c5SYu Xiangning /* Wait until the enqueued mblk get processed. */
14750f1702c5SYu Xiangning while (mp->b_flag & MSGWAITSYNC)
14760f1702c5SYu Xiangning cv_wait(&connp->conn_sq_cv, &sqp->sq_lock);
14770f1702c5SYu Xiangning mutex_exit(&sqp->sq_lock);
14780f1702c5SYu Xiangning
1479f3124163SAnders Persson if (use_mp == NULL)
14800f1702c5SYu Xiangning freeb(mp);
14810f1702c5SYu Xiangning
14820f1702c5SYu Xiangning return (0);
14830f1702c5SYu Xiangning }
14840f1702c5SYu Xiangning }
14850f1702c5SYu Xiangning
14860f1702c5SYu Xiangning void
squeue_synch_exit(conn_t * connp)14879ee3959aSAnders Persson squeue_synch_exit(conn_t *connp)
14880f1702c5SYu Xiangning {
14899ee3959aSAnders Persson squeue_t *sqp = connp->conn_sqp;
14909ee3959aSAnders Persson
14910f1702c5SYu Xiangning mutex_enter(&sqp->sq_lock);
14920f1702c5SYu Xiangning if (sqp->sq_run == curthread) {
14930f1702c5SYu Xiangning ASSERT(sqp->sq_state & SQS_PROC);
14940f1702c5SYu Xiangning
14950f1702c5SYu Xiangning sqp->sq_state &= ~SQS_PROC;
14960f1702c5SYu Xiangning sqp->sq_run = NULL;
14970f1702c5SYu Xiangning connp->conn_on_sqp = B_FALSE;
14980f1702c5SYu Xiangning
14990f1702c5SYu Xiangning if (sqp->sq_first == NULL) {
15000f1702c5SYu Xiangning mutex_exit(&sqp->sq_lock);
15010f1702c5SYu Xiangning } else {
15020f1702c5SYu Xiangning /*
15030f1702c5SYu Xiangning * If this was a normal thread, then it would
15040f1702c5SYu Xiangning * (most likely) continue processing the pending
15050f1702c5SYu Xiangning * requests. Since the just completed operation
15060f1702c5SYu Xiangning * was executed synchronously, the thread should
15070f1702c5SYu Xiangning * not be delayed. To compensate, wake up the
15080f1702c5SYu Xiangning * worker thread right away when there are outstanding
15090f1702c5SYu Xiangning * requests.
15100f1702c5SYu Xiangning */
1511d3d50737SRafael Vanoni sqp->sq_awaken = ddi_get_lbolt();
15120f1702c5SYu Xiangning cv_signal(&sqp->sq_worker_cv);
15130f1702c5SYu Xiangning mutex_exit(&sqp->sq_lock);
15140f1702c5SYu Xiangning }
15150f1702c5SYu Xiangning } else {
15160f1702c5SYu Xiangning /*
15170f1702c5SYu Xiangning * The caller doesn't own the squeue, clear the SQS_PAUSE flag,
15180f1702c5SYu Xiangning * and wake up the squeue owner, such that owner can continue
15190f1702c5SYu Xiangning * processing.
15200f1702c5SYu Xiangning */
15210f1702c5SYu Xiangning ASSERT(sqp->sq_state & SQS_PAUSE);
15220f1702c5SYu Xiangning sqp->sq_state &= ~SQS_PAUSE;
15230f1702c5SYu Xiangning
15240f1702c5SYu Xiangning /* There should be only one thread blocking on sq_synch_cv. */
15250f1702c5SYu Xiangning cv_signal(&sqp->sq_synch_cv);
15260f1702c5SYu Xiangning mutex_exit(&sqp->sq_lock);
15270f1702c5SYu Xiangning }
15280f1702c5SYu Xiangning }
1529