xref: /titanic_51/usr/src/uts/common/inet/ip/ip_squeue.c (revision d3d50737e566cade9a08d73d2af95105ac7cd960)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ee4701baSericheng  * Common Development and Distribution License (the "License").
6ee4701baSericheng  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
228670947dSThirumalai Srinivasan  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * IP interface to squeues.
287c478bd9Sstevel@tonic-gate  *
29da14cebeSEric Cheng  * IP uses squeues to force serialization of packets, both incoming and
30da14cebeSEric Cheng  * outgoing. Each squeue is associated with a connection instance (conn_t)
31da14cebeSEric Cheng  * above, and a soft ring (if enabled) below. Each CPU will have a default
32da14cebeSEric Cheng  * squeue for outbound connections, and each soft ring of an interface will
33da14cebeSEric Cheng  * have an squeue to which it sends incoming packets. squeues are never
34da14cebeSEric Cheng  * destroyed, and if they become unused they are kept around against future
35da14cebeSEric Cheng  * needs.
367c478bd9Sstevel@tonic-gate  *
37da14cebeSEric Cheng  * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU
38da14cebeSEric Cheng  * in the system there will be one squeue set, all of whose squeues will be
39da14cebeSEric Cheng  * bound to that CPU, plus one additional set known as the unbound set. Sets
40da14cebeSEric Cheng  * associated with CPUs will have one default squeue, for outbound
41da14cebeSEric Cheng  * connections, and a linked list of squeues used by various NICs for inbound
42da14cebeSEric Cheng  * packets. The unbound set also has a linked list of squeues, but no default
43da14cebeSEric Cheng  * squeue.
44da14cebeSEric Cheng  *
45da14cebeSEric Cheng  * When a CPU goes offline its squeue set is destroyed, and all its squeues
46da14cebeSEric Cheng  * are moved to the unbound set. When a CPU comes online, a new squeue set is
47da14cebeSEric Cheng  * created and the default set is searched for a default squeue formerly bound
48da14cebeSEric Cheng  * to this CPU. If no default squeue is found, a new one is created.
49da14cebeSEric Cheng  *
50da14cebeSEric Cheng  * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP
51da14cebeSEric Cheng  * and not the squeue code. squeue.c will not touch them, and we can modify
52da14cebeSEric Cheng  * them without holding the squeue lock because of the guarantee that squeues
53da14cebeSEric Cheng  * are never destroyed. ip_squeue locks must be held, however.
54da14cebeSEric Cheng  *
55da14cebeSEric Cheng  * All the squeue sets are protected by a single lock, the sqset_lock. This
56da14cebeSEric Cheng  * is also used to protect the sq_next and sq_set fields of an squeue_t.
57da14cebeSEric Cheng  *
58da14cebeSEric Cheng  * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock
597c478bd9Sstevel@tonic-gate  *
607c478bd9Sstevel@tonic-gate  * There are two modes of associating connection with squeues. The first mode
617c478bd9Sstevel@tonic-gate  * associates each connection with the CPU that creates the connection (either
627c478bd9Sstevel@tonic-gate  * during open time or during accept time). The second mode associates each
637c478bd9Sstevel@tonic-gate  * connection with a random CPU, effectively distributing load over all CPUs
647c478bd9Sstevel@tonic-gate  * and all squeues in the system. The mode is controlled by the
657c478bd9Sstevel@tonic-gate  * ip_squeue_fanout variable.
667c478bd9Sstevel@tonic-gate  *
677c478bd9Sstevel@tonic-gate  * NOTE: The fact that there is an association between each connection and
687c478bd9Sstevel@tonic-gate  * squeue and squeue and CPU does not mean that each connection is always
697c478bd9Sstevel@tonic-gate  * processed on this CPU and on this CPU only. Any thread calling squeue_enter()
707c478bd9Sstevel@tonic-gate  * may process the connection on whatever CPU it is scheduled. The squeue to CPU
717c478bd9Sstevel@tonic-gate  * binding is only relevant for the worker thread.
727c478bd9Sstevel@tonic-gate  *
737c478bd9Sstevel@tonic-gate  * INTERFACE:
747c478bd9Sstevel@tonic-gate  *
75da14cebeSEric Cheng  * squeue_t *ip_squeue_get(ill_rx_ring_t)
767c478bd9Sstevel@tonic-gate  *
77da14cebeSEric Cheng  * Returns the squeue associated with an ill receive ring. If the ring is
78da14cebeSEric Cheng  * not bound to a CPU, and we're currently servicing the interrupt which
79da14cebeSEric Cheng  * generated the packet, then bind the squeue to CPU.
807c478bd9Sstevel@tonic-gate  *
817c478bd9Sstevel@tonic-gate  *
827c478bd9Sstevel@tonic-gate  * DR Notes
837c478bd9Sstevel@tonic-gate  * ========
847c478bd9Sstevel@tonic-gate  *
857c478bd9Sstevel@tonic-gate  * The ip_squeue_init() registers a call-back function with the CPU DR
867c478bd9Sstevel@tonic-gate  * subsystem using register_cpu_setup_func(). The call-back function does two
877c478bd9Sstevel@tonic-gate  * things:
887c478bd9Sstevel@tonic-gate  *
897c478bd9Sstevel@tonic-gate  * o When the CPU is going off-line or unconfigured, the worker thread is
907c478bd9Sstevel@tonic-gate  *	unbound from the CPU. This allows the CPU unconfig code to move it to
917c478bd9Sstevel@tonic-gate  *	another CPU.
927c478bd9Sstevel@tonic-gate  *
937c478bd9Sstevel@tonic-gate  * o When the CPU is going online, it creates a new squeue for this CPU if
947c478bd9Sstevel@tonic-gate  *	necessary and binds the squeue worker thread to this CPU.
957c478bd9Sstevel@tonic-gate  *
96da14cebeSEric Cheng  * TUNABLES:
977c478bd9Sstevel@tonic-gate  *
98da14cebeSEric Cheng  * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then
99da14cebeSEric Cheng  * pick the default squeue from a random CPU, otherwise use our CPU's default
100da14cebeSEric Cheng  * squeue.
1017c478bd9Sstevel@tonic-gate  *
102da14cebeSEric Cheng  * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or
103da14cebeSEric Cheng  * /dev/ip.
1047c478bd9Sstevel@tonic-gate  *
105da14cebeSEric Cheng  * ip_squeue_worker_wait: global value for the sq_wait field for all squeues *
1067c478bd9Sstevel@tonic-gate  * created. This is the time squeue code waits before waking up the worker
1077c478bd9Sstevel@tonic-gate  * thread after queuing a request.
1087c478bd9Sstevel@tonic-gate  */
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate #include <sys/types.h>
1117c478bd9Sstevel@tonic-gate #include <sys/debug.h>
1127c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
1137c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
1147c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate #include <inet/common.h>
1177c478bd9Sstevel@tonic-gate #include <inet/ip.h>
118da14cebeSEric Cheng #include <netinet/ip6.h>
1197c478bd9Sstevel@tonic-gate #include <inet/ip_if.h>
120da14cebeSEric Cheng #include <inet/ip_ire.h>
1217c478bd9Sstevel@tonic-gate #include <inet/nd.h>
1227c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
1237c478bd9Sstevel@tonic-gate #include <sys/types.h>
1247c478bd9Sstevel@tonic-gate #include <sys/conf.h>
1257c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
12669bb4bb4Scarlsonj #include <sys/dlpi.h>
1277c478bd9Sstevel@tonic-gate #include <sys/squeue_impl.h>
128da14cebeSEric Cheng #include <sys/tihdr.h>
129da14cebeSEric Cheng #include <inet/udp_impl.h>
130da14cebeSEric Cheng #include <sys/strsubr.h>
131da14cebeSEric Cheng #include <sys/zone.h>
132da14cebeSEric Cheng #include <sys/dld.h>
133317108d2SGeorge Shepherd #include <sys/atomic.h>
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate /*
136da14cebeSEric Cheng  * List of all created squeue sets. The list and its size are protected by
137da14cebeSEric Cheng  * sqset_lock.
1387c478bd9Sstevel@tonic-gate  */
139da14cebeSEric Cheng static squeue_set_t	**sqset_global_list; /* list 0 is the unbound list */
140da14cebeSEric Cheng static uint_t		sqset_global_size;
141da14cebeSEric Cheng kmutex_t		sqset_lock;
1424b46d1efSkrgopi 
1437c478bd9Sstevel@tonic-gate static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
1447c478bd9Sstevel@tonic-gate 
1457c478bd9Sstevel@tonic-gate /*
1467c478bd9Sstevel@tonic-gate  * ip_squeue_worker_wait: global value for the sq_wait field for all squeues
1477c478bd9Sstevel@tonic-gate  *	created. This is the time squeue code waits before waking up the worker
1487c478bd9Sstevel@tonic-gate  *	thread after queuing a request.
1497c478bd9Sstevel@tonic-gate  */
1507c478bd9Sstevel@tonic-gate uint_t ip_squeue_worker_wait = 10;
1517c478bd9Sstevel@tonic-gate 
152da14cebeSEric Cheng static squeue_t *ip_squeue_create(pri_t);
153da14cebeSEric Cheng static squeue_set_t *ip_squeue_set_create(processorid_t);
1547c478bd9Sstevel@tonic-gate static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
155da14cebeSEric Cheng static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
156da14cebeSEric Cheng static void ip_squeue_set_destroy(cpu_t *);
1578df01f76Smeem static void ip_squeue_clean(void *, mblk_t *, void *);
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate #define	CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
1607c478bd9Sstevel@tonic-gate 
161da14cebeSEric Cheng static squeue_t *
162da14cebeSEric Cheng ip_squeue_create(pri_t pri)
1637c478bd9Sstevel@tonic-gate {
1647c478bd9Sstevel@tonic-gate 	squeue_t *sqp;
1657c478bd9Sstevel@tonic-gate 
166da14cebeSEric Cheng 	sqp = squeue_create(ip_squeue_worker_wait, pri);
1677c478bd9Sstevel@tonic-gate 	ASSERT(sqp != NULL);
1687c478bd9Sstevel@tonic-gate 	if (ip_squeue_create_callback != NULL)
1697c478bd9Sstevel@tonic-gate 		ip_squeue_create_callback(sqp);
170da14cebeSEric Cheng 	return (sqp);
1717c478bd9Sstevel@tonic-gate }
1727c478bd9Sstevel@tonic-gate 
173da14cebeSEric Cheng /*
174da14cebeSEric Cheng  * Create a new squeue_set. If id == -1, then we're creating the unbound set,
175da14cebeSEric Cheng  * which should only happen once when we are first initialized. Otherwise id
176da14cebeSEric Cheng  * is the id of the CPU that needs a set, either because we are initializing
177da14cebeSEric Cheng  * or because the CPU has come online.
178da14cebeSEric Cheng  *
179da14cebeSEric Cheng  * If id != -1, then we need at a minimum to provide a default squeue for the
180da14cebeSEric Cheng  * new set. We search the unbound set for candidates, and if none are found we
181da14cebeSEric Cheng  * create a new one.
182da14cebeSEric Cheng  */
183da14cebeSEric Cheng static squeue_set_t *
184da14cebeSEric Cheng ip_squeue_set_create(processorid_t id)
185da14cebeSEric Cheng {
186da14cebeSEric Cheng 	squeue_set_t	*sqs;
187da14cebeSEric Cheng 	squeue_set_t	*src = sqset_global_list[0];
188da14cebeSEric Cheng 	squeue_t	**lastsqp, *sq;
189da14cebeSEric Cheng 	squeue_t	**defaultq_lastp = NULL;
1907c478bd9Sstevel@tonic-gate 
191da14cebeSEric Cheng 	sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
192da14cebeSEric Cheng 	sqs->sqs_cpuid = id;
193da14cebeSEric Cheng 
194da14cebeSEric Cheng 	if (id == -1) {
195da14cebeSEric Cheng 		ASSERT(sqset_global_size == 0);
196da14cebeSEric Cheng 		sqset_global_list[0] = sqs;
197da14cebeSEric Cheng 		sqset_global_size = 1;
1987c478bd9Sstevel@tonic-gate 		return (sqs);
1997c478bd9Sstevel@tonic-gate 	}
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	/*
202da14cebeSEric Cheng 	 * When we create an squeue set id != -1, we need to give it a
203da14cebeSEric Cheng 	 * default squeue, in order to support fanout of conns across
204da14cebeSEric Cheng 	 * CPUs. Try to find a former default squeue that matches this
205da14cebeSEric Cheng 	 * cpu id on the unbound squeue set. If no such squeue is found,
2064cc34124SThirumalai Srinivasan 	 * find some non-default TCP squeue that is free. If still no such
207da14cebeSEric Cheng 	 * candidate is found, create a new squeue.
208da14cebeSEric Cheng 	 */
209da14cebeSEric Cheng 
210da14cebeSEric Cheng 	ASSERT(MUTEX_HELD(&cpu_lock));
211da14cebeSEric Cheng 	mutex_enter(&sqset_lock);
212da14cebeSEric Cheng 	lastsqp = &src->sqs_head;
213da14cebeSEric Cheng 
214da14cebeSEric Cheng 	while (*lastsqp) {
215da14cebeSEric Cheng 		if ((*lastsqp)->sq_bind == id &&
216da14cebeSEric Cheng 		    (*lastsqp)->sq_state & SQS_DEFAULT) {
2174cc34124SThirumalai Srinivasan 			/*
2184cc34124SThirumalai Srinivasan 			 * Exact match. Former default squeue of cpu 'id'
2194cc34124SThirumalai Srinivasan 			 */
2204cc34124SThirumalai Srinivasan 			ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
221da14cebeSEric Cheng 			defaultq_lastp = lastsqp;
222da14cebeSEric Cheng 			break;
223da14cebeSEric Cheng 		}
224da14cebeSEric Cheng 		if (defaultq_lastp == NULL &&
2254cc34124SThirumalai Srinivasan 		    !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
2264cc34124SThirumalai Srinivasan 			/*
2274cc34124SThirumalai Srinivasan 			 * A free non-default TCP squeue
2284cc34124SThirumalai Srinivasan 			 */
229da14cebeSEric Cheng 			defaultq_lastp = lastsqp;
230da14cebeSEric Cheng 		}
231da14cebeSEric Cheng 		lastsqp = &(*lastsqp)->sq_next;
232da14cebeSEric Cheng 	}
2334cc34124SThirumalai Srinivasan 
2344cc34124SThirumalai Srinivasan 	if (defaultq_lastp != NULL) {
235da14cebeSEric Cheng 		/* Remove from src set and set SQS_DEFAULT */
236da14cebeSEric Cheng 		sq = *defaultq_lastp;
237da14cebeSEric Cheng 		*defaultq_lastp = sq->sq_next;
238da14cebeSEric Cheng 		sq->sq_next = NULL;
239da14cebeSEric Cheng 		if (!(sq->sq_state & SQS_DEFAULT)) {
240da14cebeSEric Cheng 			mutex_enter(&sq->sq_lock);
241da14cebeSEric Cheng 			sq->sq_state |= SQS_DEFAULT;
242da14cebeSEric Cheng 			mutex_exit(&sq->sq_lock);
243da14cebeSEric Cheng 		}
244da14cebeSEric Cheng 	} else {
245da14cebeSEric Cheng 		sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
246da14cebeSEric Cheng 		sq->sq_state |= SQS_DEFAULT;
247da14cebeSEric Cheng 	}
248da14cebeSEric Cheng 
249da14cebeSEric Cheng 	sq->sq_set = sqs;
250da14cebeSEric Cheng 	sqs->sqs_default = sq;
251da14cebeSEric Cheng 	squeue_bind(sq, id); /* this locks squeue mutex */
252da14cebeSEric Cheng 
253da14cebeSEric Cheng 	ASSERT(sqset_global_size <= NCPU);
254da14cebeSEric Cheng 	sqset_global_list[sqset_global_size++] = sqs;
255da14cebeSEric Cheng 	mutex_exit(&sqset_lock);
256da14cebeSEric Cheng 	return (sqs);
257da14cebeSEric Cheng }
258da14cebeSEric Cheng 
259da14cebeSEric Cheng /*
260da14cebeSEric Cheng  * Called by ill_ring_add() to find an squeue to associate with a new ring.
261da14cebeSEric Cheng  */
262da14cebeSEric Cheng 
263da14cebeSEric Cheng squeue_t *
264da14cebeSEric Cheng ip_squeue_getfree(pri_t pri)
265da14cebeSEric Cheng {
266da14cebeSEric Cheng 	squeue_set_t	*sqs = sqset_global_list[0];
267da14cebeSEric Cheng 	squeue_t	*sq;
268da14cebeSEric Cheng 
269da14cebeSEric Cheng 	mutex_enter(&sqset_lock);
270da14cebeSEric Cheng 	for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
271da14cebeSEric Cheng 		/*
2724cc34124SThirumalai Srinivasan 		 * Select a non-default TCP squeue that is free i.e. not
2734cc34124SThirumalai Srinivasan 		 * bound to any ill.
274da14cebeSEric Cheng 		 */
275da14cebeSEric Cheng 		if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
276da14cebeSEric Cheng 			break;
277da14cebeSEric Cheng 	}
278da14cebeSEric Cheng 
279da14cebeSEric Cheng 	if (sq == NULL) {
280da14cebeSEric Cheng 		sq = ip_squeue_create(pri);
281da14cebeSEric Cheng 		sq->sq_set = sqs;
282da14cebeSEric Cheng 		sq->sq_next = sqs->sqs_head;
283da14cebeSEric Cheng 		sqs->sqs_head = sq;
284da14cebeSEric Cheng 	}
285da14cebeSEric Cheng 
286da14cebeSEric Cheng 	ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
287da14cebeSEric Cheng 	    SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
288da14cebeSEric Cheng 	    SQS_POLL_THR_QUIESCED)));
289da14cebeSEric Cheng 
290da14cebeSEric Cheng 	mutex_enter(&sq->sq_lock);
291da14cebeSEric Cheng 	sq->sq_state |= SQS_ILL_BOUND;
292da14cebeSEric Cheng 	mutex_exit(&sq->sq_lock);
293da14cebeSEric Cheng 	mutex_exit(&sqset_lock);
294da14cebeSEric Cheng 
295da14cebeSEric Cheng 	if (sq->sq_priority != pri) {
296da14cebeSEric Cheng 		thread_lock(sq->sq_worker);
297da14cebeSEric Cheng 		(void) thread_change_pri(sq->sq_worker, pri, 0);
298da14cebeSEric Cheng 		thread_unlock(sq->sq_worker);
299da14cebeSEric Cheng 
300da14cebeSEric Cheng 		thread_lock(sq->sq_poll_thr);
301da14cebeSEric Cheng 		(void) thread_change_pri(sq->sq_poll_thr, pri, 0);
302da14cebeSEric Cheng 		thread_unlock(sq->sq_poll_thr);
303da14cebeSEric Cheng 
304da14cebeSEric Cheng 		sq->sq_priority = pri;
305da14cebeSEric Cheng 	}
306da14cebeSEric Cheng 	return (sq);
307da14cebeSEric Cheng }
308da14cebeSEric Cheng 
309da14cebeSEric Cheng /*
3107c478bd9Sstevel@tonic-gate  * Initialize IP squeues.
3117c478bd9Sstevel@tonic-gate  */
3127c478bd9Sstevel@tonic-gate void
3137c478bd9Sstevel@tonic-gate ip_squeue_init(void (*callback)(squeue_t *))
3147c478bd9Sstevel@tonic-gate {
3157c478bd9Sstevel@tonic-gate 	int i;
316da14cebeSEric Cheng 	squeue_set_t	*sqs;
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate 	ASSERT(sqset_global_list == NULL);
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	ip_squeue_create_callback = callback;
3217c478bd9Sstevel@tonic-gate 	squeue_init();
322da14cebeSEric Cheng 	mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
3237c478bd9Sstevel@tonic-gate 	sqset_global_list =
324da14cebeSEric Cheng 	    kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
3257c478bd9Sstevel@tonic-gate 	sqset_global_size = 0;
326da14cebeSEric Cheng 	/*
327da14cebeSEric Cheng 	 * We are called at system boot time and we don't
328da14cebeSEric Cheng 	 * expect memory allocation failure.
329da14cebeSEric Cheng 	 */
330da14cebeSEric Cheng 	sqs = ip_squeue_set_create(-1);
331da14cebeSEric Cheng 	ASSERT(sqs != NULL);
3327c478bd9Sstevel@tonic-gate 
333da14cebeSEric Cheng 	mutex_enter(&cpu_lock);
3347c478bd9Sstevel@tonic-gate 	/* Create squeue for each active CPU available */
3357c478bd9Sstevel@tonic-gate 	for (i = 0; i < NCPU; i++) {
336da14cebeSEric Cheng 		cpu_t *cp = cpu_get(i);
3377c478bd9Sstevel@tonic-gate 		if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
338da14cebeSEric Cheng 			/*
339da14cebeSEric Cheng 			 * We are called at system boot time and we don't
340da14cebeSEric Cheng 			 * expect memory allocation failure then
341da14cebeSEric Cheng 			 */
342da14cebeSEric Cheng 			cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
343da14cebeSEric Cheng 			ASSERT(cp->cpu_squeue_set != NULL);
3447c478bd9Sstevel@tonic-gate 		}
3457c478bd9Sstevel@tonic-gate 	}
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 	register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
3487c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
3497c478bd9Sstevel@tonic-gate }
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate /*
352da14cebeSEric Cheng  * Get a default squeue, either from the current CPU or a CPU derived by hash
353da14cebeSEric Cheng  * from the index argument, depending upon the setting of ip_squeue_fanout.
3547c478bd9Sstevel@tonic-gate  */
3557c478bd9Sstevel@tonic-gate squeue_t *
3567c478bd9Sstevel@tonic-gate ip_squeue_random(uint_t index)
3577c478bd9Sstevel@tonic-gate {
358da14cebeSEric Cheng 	squeue_set_t *sqs = NULL;
359da14cebeSEric Cheng 	squeue_t *sq;
3607c478bd9Sstevel@tonic-gate 
361da14cebeSEric Cheng 	/*
362da14cebeSEric Cheng 	 * The minimum value of sqset_global_size is 2, one for the unbound
363da14cebeSEric Cheng 	 * squeue set and another for the squeue set of the zeroth CPU.
364da14cebeSEric Cheng 	 * Even though the value could be changing, it can never go below 2,
365da14cebeSEric Cheng 	 * so the assert does not need the lock protection.
366da14cebeSEric Cheng 	 */
367da14cebeSEric Cheng 	ASSERT(sqset_global_size > 1);
368da14cebeSEric Cheng 
369da14cebeSEric Cheng 	/* Protect against changes to sqset_global_list */
370da14cebeSEric Cheng 	mutex_enter(&sqset_lock);
371da14cebeSEric Cheng 
372da14cebeSEric Cheng 	if (!ip_squeue_fanout)
373da14cebeSEric Cheng 		sqs = CPU->cpu_squeue_set;
374da14cebeSEric Cheng 
375da14cebeSEric Cheng 	/*
376da14cebeSEric Cheng 	 * sqset_global_list[0] corresponds to the unbound squeue set.
377da14cebeSEric Cheng 	 * The computation below picks a set other than the unbound set.
378da14cebeSEric Cheng 	 */
379da14cebeSEric Cheng 	if (sqs == NULL)
380da14cebeSEric Cheng 		sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
381da14cebeSEric Cheng 	sq = sqs->sqs_default;
382da14cebeSEric Cheng 
383da14cebeSEric Cheng 	mutex_exit(&sqset_lock);
384da14cebeSEric Cheng 	ASSERT(sq);
385da14cebeSEric Cheng 	return (sq);
3867c478bd9Sstevel@tonic-gate }
3877c478bd9Sstevel@tonic-gate 
388da14cebeSEric Cheng /*
389da14cebeSEric Cheng  * Move squeue from its current set to newset. Not used for default squeues.
390da14cebeSEric Cheng  * Bind or unbind the worker thread as appropriate.
391da14cebeSEric Cheng  */
392da14cebeSEric Cheng 
3938df01f76Smeem static void
394da14cebeSEric Cheng ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
3957c478bd9Sstevel@tonic-gate {
396da14cebeSEric Cheng 	squeue_set_t	*set;
397da14cebeSEric Cheng 	squeue_t	**lastsqp;
398da14cebeSEric Cheng 	processorid_t	cpuid = newset->sqs_cpuid;
3997c478bd9Sstevel@tonic-gate 
400da14cebeSEric Cheng 	ASSERT(!(sq->sq_state & SQS_DEFAULT));
401da14cebeSEric Cheng 	ASSERT(!MUTEX_HELD(&sq->sq_lock));
402da14cebeSEric Cheng 	ASSERT(MUTEX_HELD(&sqset_lock));
4037c478bd9Sstevel@tonic-gate 
404da14cebeSEric Cheng 	set = sq->sq_set;
405da14cebeSEric Cheng 	if (set == newset)
406da14cebeSEric Cheng 		return;
407da14cebeSEric Cheng 
408da14cebeSEric Cheng 	lastsqp = &set->sqs_head;
409da14cebeSEric Cheng 	while (*lastsqp != sq)
410da14cebeSEric Cheng 		lastsqp = &(*lastsqp)->sq_next;
411da14cebeSEric Cheng 
412da14cebeSEric Cheng 	*lastsqp = sq->sq_next;
413da14cebeSEric Cheng 	sq->sq_next = newset->sqs_head;
414da14cebeSEric Cheng 	newset->sqs_head = sq;
415da14cebeSEric Cheng 	sq->sq_set = newset;
416da14cebeSEric Cheng 	if (cpuid == -1)
417da14cebeSEric Cheng 		squeue_unbind(sq);
418da14cebeSEric Cheng 	else
419da14cebeSEric Cheng 		squeue_bind(sq, cpuid);
420da14cebeSEric Cheng }
421da14cebeSEric Cheng 
422da14cebeSEric Cheng /*
423da14cebeSEric Cheng  * Move squeue from its current set to cpuid's set and bind to cpuid.
424da14cebeSEric Cheng  */
425da14cebeSEric Cheng 
426da14cebeSEric Cheng int
427da14cebeSEric Cheng ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
428da14cebeSEric Cheng {
429da14cebeSEric Cheng 	cpu_t *cpu;
430da14cebeSEric Cheng 	squeue_set_t *set;
431da14cebeSEric Cheng 
432da14cebeSEric Cheng 	if (sq->sq_state & SQS_DEFAULT)
433da14cebeSEric Cheng 		return (-1);
434da14cebeSEric Cheng 
435da14cebeSEric Cheng 	ASSERT(MUTEX_HELD(&cpu_lock));
436da14cebeSEric Cheng 
437da14cebeSEric Cheng 	cpu = cpu_get(cpuid);
438da14cebeSEric Cheng 	if (!CPU_ISON(cpu))
439da14cebeSEric Cheng 		return (-1);
440da14cebeSEric Cheng 
441da14cebeSEric Cheng 	mutex_enter(&sqset_lock);
442da14cebeSEric Cheng 	set = cpu->cpu_squeue_set;
443da14cebeSEric Cheng 	if (set != NULL)
444da14cebeSEric Cheng 		ip_squeue_set_move(sq, set);
445da14cebeSEric Cheng 	mutex_exit(&sqset_lock);
446da14cebeSEric Cheng 	return ((set == NULL) ? -1 : 0);
447da14cebeSEric Cheng }
448da14cebeSEric Cheng 
449da14cebeSEric Cheng /*
450da14cebeSEric Cheng  * The mac layer is calling, asking us to move an squeue to a
451da14cebeSEric Cheng  * new CPU. This routine is called with cpu_lock held.
452da14cebeSEric Cheng  */
453da14cebeSEric Cheng void
454da14cebeSEric Cheng ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
455da14cebeSEric Cheng {
456da14cebeSEric Cheng 	ASSERT(ILL_MAC_PERIM_HELD(ill));
457da14cebeSEric Cheng 	ASSERT(rx_ring->rr_ill == ill);
458da14cebeSEric Cheng 
459da14cebeSEric Cheng 	mutex_enter(&ill->ill_lock);
460da14cebeSEric Cheng 	if (rx_ring->rr_ring_state == RR_FREE ||
461da14cebeSEric Cheng 	    rx_ring->rr_ring_state == RR_FREE_INPROG) {
462da14cebeSEric Cheng 		mutex_exit(&ill->ill_lock);
4637c478bd9Sstevel@tonic-gate 		return;
4647c478bd9Sstevel@tonic-gate 	}
4657c478bd9Sstevel@tonic-gate 
466da14cebeSEric Cheng 	if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
467da14cebeSEric Cheng 		rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate 	mutex_exit(&ill->ill_lock);
4707c478bd9Sstevel@tonic-gate }
4717c478bd9Sstevel@tonic-gate 
472da14cebeSEric Cheng void *
473da14cebeSEric Cheng ip_squeue_add_ring(ill_t *ill, void *mrp)
474da14cebeSEric Cheng {
475da14cebeSEric Cheng 	mac_rx_fifo_t		*mrfp = (mac_rx_fifo_t *)mrp;
476da14cebeSEric Cheng 	ill_rx_ring_t		*rx_ring, *ring_tbl;
477da14cebeSEric Cheng 	int			ip_rx_index;
478da14cebeSEric Cheng 	squeue_t		*sq = NULL;
479da14cebeSEric Cheng 	pri_t			pri;
480da14cebeSEric Cheng 
481da14cebeSEric Cheng 	ASSERT(ILL_MAC_PERIM_HELD(ill));
482da14cebeSEric Cheng 	ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
483da14cebeSEric Cheng 	ASSERT(ill->ill_dld_capab != NULL);
484da14cebeSEric Cheng 
485da14cebeSEric Cheng 	ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
486da14cebeSEric Cheng 
487da14cebeSEric Cheng 	mutex_enter(&ill->ill_lock);
488da14cebeSEric Cheng 	for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
489da14cebeSEric Cheng 		rx_ring = &ring_tbl[ip_rx_index];
490da14cebeSEric Cheng 		if (rx_ring->rr_ring_state == RR_FREE)
491da14cebeSEric Cheng 			break;
492da14cebeSEric Cheng 	}
493da14cebeSEric Cheng 
494da14cebeSEric Cheng 	if (ip_rx_index == ILL_MAX_RINGS) {
4958df01f76Smeem 		/*
496da14cebeSEric Cheng 		 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If
497da14cebeSEric Cheng 		 * we have devices which can overwhelm this limit,
498da14cebeSEric Cheng 		 * ILL_MAX_RING should be made configurable. Meanwhile it
499da14cebeSEric Cheng 		 * cause no panic because driver will pass ip_input a NULL
500da14cebeSEric Cheng 		 * handle which will make IP allocate the default squeue and
501da14cebeSEric Cheng 		 * Polling mode will not be used for this ring.
5028df01f76Smeem 		 */
503da14cebeSEric Cheng 		cmn_err(CE_NOTE,
504da14cebeSEric Cheng 		    "Reached maximum number of receiving rings (%d) for %s\n",
505da14cebeSEric Cheng 		    ILL_MAX_RINGS, ill->ill_name);
506da14cebeSEric Cheng 		mutex_exit(&ill->ill_lock);
507da14cebeSEric Cheng 		return (NULL);
508da14cebeSEric Cheng 	}
509da14cebeSEric Cheng 
510da14cebeSEric Cheng 	bzero(rx_ring, sizeof (ill_rx_ring_t));
511da14cebeSEric Cheng 	rx_ring->rr_rx = (ip_mac_rx_t)mrfp->mrf_receive;
512da14cebeSEric Cheng 	/* XXX: Hard code it to tcp accept for now */
513da14cebeSEric Cheng 	rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp;
514da14cebeSEric Cheng 
515da14cebeSEric Cheng 	rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
516da14cebeSEric Cheng 	rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
517da14cebeSEric Cheng 	rx_ring->rr_intr_disable =
518da14cebeSEric Cheng 	    (ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
519da14cebeSEric Cheng 	rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
520da14cebeSEric Cheng 	rx_ring->rr_ill = ill;
521da14cebeSEric Cheng 
522da14cebeSEric Cheng 	pri = mrfp->mrf_flow_priority;
523da14cebeSEric Cheng 
524da14cebeSEric Cheng 	sq = ip_squeue_getfree(pri);
525da14cebeSEric Cheng 
526da14cebeSEric Cheng 	mutex_enter(&sq->sq_lock);
527da14cebeSEric Cheng 	sq->sq_rx_ring = rx_ring;
528da14cebeSEric Cheng 	rx_ring->rr_sqp = sq;
529da14cebeSEric Cheng 
530da14cebeSEric Cheng 	sq->sq_state |= SQS_POLL_CAPAB;
531da14cebeSEric Cheng 
532da14cebeSEric Cheng 	rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
533da14cebeSEric Cheng 	sq->sq_ill = ill;
534da14cebeSEric Cheng 	mutex_exit(&sq->sq_lock);
535da14cebeSEric Cheng 	mutex_exit(&ill->ill_lock);
536da14cebeSEric Cheng 
537da14cebeSEric Cheng 	DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
538da14cebeSEric Cheng 	    ip_rx_index, void *, mrfp->mrf_rx_arg);
539da14cebeSEric Cheng 
540da14cebeSEric Cheng 	/* Assign the squeue to the specified CPU as well */
541da14cebeSEric Cheng 	mutex_enter(&cpu_lock);
542da14cebeSEric Cheng 	(void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
543da14cebeSEric Cheng 	mutex_exit(&cpu_lock);
544da14cebeSEric Cheng 
545da14cebeSEric Cheng 	return (rx_ring);
546da14cebeSEric Cheng }
547da14cebeSEric Cheng 
548da14cebeSEric Cheng /*
549da14cebeSEric Cheng  * sanitize the squeue etc. Some of the processing
550da14cebeSEric Cheng  * needs to be done from inside the perimeter.
551da14cebeSEric Cheng  */
552da14cebeSEric Cheng void
5538df01f76Smeem ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
5548df01f76Smeem {
5558df01f76Smeem 	squeue_t *sqp;
5568df01f76Smeem 
557da14cebeSEric Cheng 	ASSERT(ILL_MAC_PERIM_HELD(ill));
5588df01f76Smeem 	ASSERT(rx_ring != NULL);
5598df01f76Smeem 
5608df01f76Smeem 	/* Just clean one squeue */
5618df01f76Smeem 	mutex_enter(&ill->ill_lock);
562da14cebeSEric Cheng 	if (rx_ring->rr_ring_state == RR_FREE) {
5638df01f76Smeem 		mutex_exit(&ill->ill_lock);
5648df01f76Smeem 		return;
5658df01f76Smeem 	}
566da14cebeSEric Cheng 	rx_ring->rr_ring_state = RR_FREE_INPROG;
5678df01f76Smeem 	sqp = rx_ring->rr_sqp;
5688df01f76Smeem 
569da14cebeSEric Cheng 	mutex_enter(&sqp->sq_lock);
570da14cebeSEric Cheng 	sqp->sq_state |= SQS_POLL_CLEANUP;
571da14cebeSEric Cheng 	cv_signal(&sqp->sq_worker_cv);
5728df01f76Smeem 	mutex_exit(&ill->ill_lock);
573da14cebeSEric Cheng 	while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
574da14cebeSEric Cheng 		cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
5754cc34124SThirumalai Srinivasan 	sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
5768df01f76Smeem 
577da14cebeSEric Cheng 	ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
578da14cebeSEric Cheng 	    SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
579da14cebeSEric Cheng 	    SQS_POLL_THR_QUIESCED)));
580da14cebeSEric Cheng 
581da14cebeSEric Cheng 	cv_signal(&sqp->sq_worker_cv);
582da14cebeSEric Cheng 	mutex_exit(&sqp->sq_lock);
5838df01f76Smeem 
5848df01f76Smeem 	/*
5854cc34124SThirumalai Srinivasan 	 * Move the squeue to sqset_global_list[0] which holds the set of
5864cc34124SThirumalai Srinivasan 	 * squeues not bound to any cpu. Note that the squeue is still
5874cc34124SThirumalai Srinivasan 	 * considered bound to an ill as long as SQS_ILL_BOUND is set.
5888df01f76Smeem 	 */
589da14cebeSEric Cheng 	mutex_enter(&sqset_lock);
590da14cebeSEric Cheng 	ip_squeue_set_move(sqp, sqset_global_list[0]);
591da14cebeSEric Cheng 	mutex_exit(&sqset_lock);
5928df01f76Smeem 
5934cc34124SThirumalai Srinivasan 	/*
5944cc34124SThirumalai Srinivasan 	 * CPU going offline can also trigger a move of the squeue to the
5954cc34124SThirumalai Srinivasan 	 * unbound set sqset_global_list[0]. However the squeue won't be
5964cc34124SThirumalai Srinivasan 	 * recycled for the next use as long as the SQS_ILL_BOUND flag
5974cc34124SThirumalai Srinivasan 	 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the
5984cc34124SThirumalai Srinivasan 	 * end after the move.
5994cc34124SThirumalai Srinivasan 	 */
6004cc34124SThirumalai Srinivasan 	mutex_enter(&sqp->sq_lock);
6014cc34124SThirumalai Srinivasan 	sqp->sq_state &= ~SQS_ILL_BOUND;
6024cc34124SThirumalai Srinivasan 	mutex_exit(&sqp->sq_lock);
6034cc34124SThirumalai Srinivasan 
6048df01f76Smeem 	mutex_enter(&ill->ill_lock);
605da14cebeSEric Cheng 	rx_ring->rr_ring_state = RR_FREE;
6068df01f76Smeem 	mutex_exit(&ill->ill_lock);
6078df01f76Smeem }
6088df01f76Smeem 
609da14cebeSEric Cheng /*
610da14cebeSEric Cheng  * Stop the squeue from polling. This needs to be done
611da14cebeSEric Cheng  * from inside the perimeter.
612da14cebeSEric Cheng  */
613da14cebeSEric Cheng void
614da14cebeSEric Cheng ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
615da14cebeSEric Cheng {
616da14cebeSEric Cheng 	squeue_t *sqp;
617da14cebeSEric Cheng 
618da14cebeSEric Cheng 	ASSERT(ILL_MAC_PERIM_HELD(ill));
619da14cebeSEric Cheng 	ASSERT(rx_ring != NULL);
620da14cebeSEric Cheng 
621da14cebeSEric Cheng 	sqp = rx_ring->rr_sqp;
622da14cebeSEric Cheng 	mutex_enter(&sqp->sq_lock);
623da14cebeSEric Cheng 	sqp->sq_state |= SQS_POLL_QUIESCE;
624da14cebeSEric Cheng 	cv_signal(&sqp->sq_worker_cv);
625da14cebeSEric Cheng 	while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
626da14cebeSEric Cheng 		cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
627da14cebeSEric Cheng 
628da14cebeSEric Cheng 	mutex_exit(&sqp->sq_lock);
629da14cebeSEric Cheng }
630da14cebeSEric Cheng 
631da14cebeSEric Cheng /*
632da14cebeSEric Cheng  * Restart polling etc. Needs to be inside the perimeter to
633da14cebeSEric Cheng  * prevent races.
634da14cebeSEric Cheng  */
635da14cebeSEric Cheng void
636da14cebeSEric Cheng ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
637da14cebeSEric Cheng {
638da14cebeSEric Cheng 	squeue_t *sqp;
639da14cebeSEric Cheng 
640da14cebeSEric Cheng 	ASSERT(ILL_MAC_PERIM_HELD(ill));
641da14cebeSEric Cheng 	ASSERT(rx_ring != NULL);
642da14cebeSEric Cheng 
643da14cebeSEric Cheng 	sqp = rx_ring->rr_sqp;
644da14cebeSEric Cheng 	mutex_enter(&sqp->sq_lock);
645da14cebeSEric Cheng 	/*
646da14cebeSEric Cheng 	 * Handle change in number of rings between the quiesce and
647da14cebeSEric Cheng 	 * restart operations by checking for a previous quiesce before
648da14cebeSEric Cheng 	 * attempting a restart.
649da14cebeSEric Cheng 	 */
650da14cebeSEric Cheng 	if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
651da14cebeSEric Cheng 		mutex_exit(&sqp->sq_lock);
652da14cebeSEric Cheng 		return;
653da14cebeSEric Cheng 	}
654da14cebeSEric Cheng 	sqp->sq_state |= SQS_POLL_RESTART;
655da14cebeSEric Cheng 	cv_signal(&sqp->sq_worker_cv);
656da14cebeSEric Cheng 	while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
657da14cebeSEric Cheng 		cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
658da14cebeSEric Cheng 	sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
659da14cebeSEric Cheng 	mutex_exit(&sqp->sq_lock);
660da14cebeSEric Cheng }
661da14cebeSEric Cheng 
662da14cebeSEric Cheng /*
663da14cebeSEric Cheng  * sanitize all squeues associated with the ill.
664da14cebeSEric Cheng  */
6658df01f76Smeem void
6668df01f76Smeem ip_squeue_clean_all(ill_t *ill)
6678df01f76Smeem {
6688df01f76Smeem 	int idx;
669da14cebeSEric Cheng 	ill_rx_ring_t	*rx_ring;
6708df01f76Smeem 
6718df01f76Smeem 	for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
672da14cebeSEric Cheng 		rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
673da14cebeSEric Cheng 		ip_squeue_clean_ring(ill, rx_ring);
6748df01f76Smeem 	}
6754b46d1efSkrgopi }
6764b46d1efSkrgopi 
6774b46d1efSkrgopi /*
678da14cebeSEric Cheng  * Used by IP to get the squeue associated with a ring. If the squeue isn't
679da14cebeSEric Cheng  * yet bound to a CPU, and we're being called directly from the NIC's
680da14cebeSEric Cheng  * interrupt, then we know what CPU we want to assign the squeue to, so
681da14cebeSEric Cheng  * dispatch that task to a taskq.
6827c478bd9Sstevel@tonic-gate  */
6837c478bd9Sstevel@tonic-gate squeue_t *
6847c478bd9Sstevel@tonic-gate ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
6857c478bd9Sstevel@tonic-gate {
6867c478bd9Sstevel@tonic-gate 	squeue_t 	*sqp;
6877c478bd9Sstevel@tonic-gate 
688da14cebeSEric Cheng 	if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
689*d3d50737SRafael Vanoni 		return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 	return (sqp);
6927c478bd9Sstevel@tonic-gate }
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate /*
695da14cebeSEric Cheng  * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all
696da14cebeSEric Cheng  * squeues are unboudn and moved to the unbound set.
6977c478bd9Sstevel@tonic-gate  */
698da14cebeSEric Cheng static void
699da14cebeSEric Cheng ip_squeue_set_destroy(cpu_t *cpu)
7007c478bd9Sstevel@tonic-gate {
7017c478bd9Sstevel@tonic-gate 	int i;
702da14cebeSEric Cheng 	squeue_t *sqp, *lastsqp = NULL;
703da14cebeSEric Cheng 	squeue_set_t *sqs, *unbound = sqset_global_list[0];
7047c478bd9Sstevel@tonic-gate 
705da14cebeSEric Cheng 	mutex_enter(&sqset_lock);
706da14cebeSEric Cheng 	if ((sqs = cpu->cpu_squeue_set) == NULL) {
707da14cebeSEric Cheng 		mutex_exit(&sqset_lock);
708da14cebeSEric Cheng 		return;
7097c478bd9Sstevel@tonic-gate 	}
7107c478bd9Sstevel@tonic-gate 
711da14cebeSEric Cheng 	/* Move all squeues to unbound set */
712da14cebeSEric Cheng 
713da14cebeSEric Cheng 	for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
714da14cebeSEric Cheng 		squeue_unbind(sqp);
715da14cebeSEric Cheng 		sqp->sq_set = unbound;
716da14cebeSEric Cheng 	}
717da14cebeSEric Cheng 	if (sqs->sqs_head) {
718da14cebeSEric Cheng 		lastsqp->sq_next = unbound->sqs_head;
719da14cebeSEric Cheng 		unbound->sqs_head = sqs->sqs_head;
7207c478bd9Sstevel@tonic-gate 	}
7217c478bd9Sstevel@tonic-gate 
722da14cebeSEric Cheng 	/* Also move default squeue to unbound set */
7237c478bd9Sstevel@tonic-gate 
724da14cebeSEric Cheng 	sqp = sqs->sqs_default;
7254cc34124SThirumalai Srinivasan 	ASSERT(sqp != NULL);
726da14cebeSEric Cheng 	ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
7277c478bd9Sstevel@tonic-gate 
728da14cebeSEric Cheng 	sqp->sq_next = unbound->sqs_head;
729da14cebeSEric Cheng 	unbound->sqs_head = sqp;
730da14cebeSEric Cheng 	squeue_unbind(sqp);
731da14cebeSEric Cheng 	sqp->sq_set = unbound;
7327c478bd9Sstevel@tonic-gate 
733da14cebeSEric Cheng 	for (i = 1; i < sqset_global_size; i++)
734da14cebeSEric Cheng 		if (sqset_global_list[i] == sqs)
735da14cebeSEric Cheng 			break;
7367c478bd9Sstevel@tonic-gate 
737da14cebeSEric Cheng 	ASSERT(i < sqset_global_size);
738da14cebeSEric Cheng 	sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
739da14cebeSEric Cheng 	sqset_global_list[sqset_global_size - 1] = NULL;
740da14cebeSEric Cheng 	sqset_global_size--;
7417c478bd9Sstevel@tonic-gate 
742da14cebeSEric Cheng 	mutex_exit(&sqset_lock);
743da14cebeSEric Cheng 	kmem_free(sqs, sizeof (*sqs));
7447c478bd9Sstevel@tonic-gate }
7457c478bd9Sstevel@tonic-gate 
7467c478bd9Sstevel@tonic-gate /*
7477c478bd9Sstevel@tonic-gate  * Reconfiguration callback
7487c478bd9Sstevel@tonic-gate  */
7497c478bd9Sstevel@tonic-gate /* ARGSUSED */
7507c478bd9Sstevel@tonic-gate static int
7517c478bd9Sstevel@tonic-gate ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
7527c478bd9Sstevel@tonic-gate {
753da14cebeSEric Cheng 	cpu_t *cp = cpu_get(id);
7547c478bd9Sstevel@tonic-gate 
7557c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7567c478bd9Sstevel@tonic-gate 	switch (what) {
757f8a52c19Sakolb 	case CPU_CONFIG:
7587c478bd9Sstevel@tonic-gate 	case CPU_ON:
7597c478bd9Sstevel@tonic-gate 	case CPU_INIT:
7607c478bd9Sstevel@tonic-gate 	case CPU_CPUPART_IN:
7618670947dSThirumalai Srinivasan 		if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
762da14cebeSEric Cheng 			cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
7637c478bd9Sstevel@tonic-gate 		break;
7647c478bd9Sstevel@tonic-gate 	case CPU_UNCONFIG:
7657c478bd9Sstevel@tonic-gate 	case CPU_OFF:
7667c478bd9Sstevel@tonic-gate 	case CPU_CPUPART_OUT:
7677c478bd9Sstevel@tonic-gate 		if (cp->cpu_squeue_set != NULL) {
768da14cebeSEric Cheng 			ip_squeue_set_destroy(cp);
769da14cebeSEric Cheng 			cp->cpu_squeue_set = NULL;
7707c478bd9Sstevel@tonic-gate 		}
7717c478bd9Sstevel@tonic-gate 		break;
7727c478bd9Sstevel@tonic-gate 	default:
7737c478bd9Sstevel@tonic-gate 		break;
7747c478bd9Sstevel@tonic-gate 	}
7757c478bd9Sstevel@tonic-gate 	return (0);
7767c478bd9Sstevel@tonic-gate }
777