xref: /freebsd/sys/rpc/svc.c (revision 78cd75393ec79565c63927bf200f06f839a1dc05)
1 /*	$NetBSD: svc.c,v 1.21 2000/07/06 03:10:35 christos Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  * Copyright (c) 2009, Sun Microsystems, Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  * - Redistributions of source code must retain the above copyright notice,
12  *   this list of conditions and the following disclaimer.
13  * - Redistributions in binary form must reproduce the above copyright notice,
14  *   this list of conditions and the following disclaimer in the documentation
15  *   and/or other materials provided with the distribution.
16  * - Neither the name of Sun Microsystems, Inc. nor the names of its
17  *   contributors may be used to endorse or promote products derived
18  *   from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 /*
35  * svc.c, Server-side remote procedure call interface.
36  *
37  * There are two sets of procedures here.  The xprt routines are
38  * for handling transport handles.  The svc routines handle the
39  * list of service routines.
40  *
41  * Copyright (C) 1984, Sun Microsystems, Inc.
42  */
43 
44 #include <sys/param.h>
45 #include <sys/jail.h>
46 #include <sys/lock.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/mutex.h>
52 #include <sys/proc.h>
53 #include <sys/queue.h>
54 #include <sys/socketvar.h>
55 #include <sys/systm.h>
56 #include <sys/smp.h>
57 #include <sys/sx.h>
58 #include <sys/ucred.h>
59 
60 #include <rpc/rpc.h>
61 #include <rpc/rpcb_clnt.h>
62 #include <rpc/replay.h>
63 
64 #include <rpc/rpc_com.h>
65 
66 #define SVC_VERSQUIET 0x0001		/* keep quiet about vers mismatch */
67 #define version_keepquiet(xp) (SVC_EXT(xp)->xp_flags & SVC_VERSQUIET)
68 
69 static struct svc_callout *svc_find(SVCPOOL *pool, rpcprog_t, rpcvers_t,
70     char *);
71 static void svc_new_thread(SVCGROUP *grp);
72 static void xprt_unregister_locked(SVCXPRT *xprt);
73 static void svc_change_space_used(SVCPOOL *pool, long delta);
74 static bool_t svc_request_space_available(SVCPOOL *pool);
75 static void svcpool_cleanup(SVCPOOL *pool);
76 
77 /* ***************  SVCXPRT related stuff **************** */
78 
79 static int svcpool_minthread_sysctl(SYSCTL_HANDLER_ARGS);
80 static int svcpool_maxthread_sysctl(SYSCTL_HANDLER_ARGS);
81 static int svcpool_threads_sysctl(SYSCTL_HANDLER_ARGS);
82 
83 SVCPOOL*
84 svcpool_create(const char *name, struct sysctl_oid_list *sysctl_base)
85 {
86 	SVCPOOL *pool;
87 	SVCGROUP *grp;
88 	int g;
89 
90 	pool = malloc(sizeof(SVCPOOL), M_RPC, M_WAITOK|M_ZERO);
91 
92 	mtx_init(&pool->sp_lock, "sp_lock", NULL, MTX_DEF);
93 	pool->sp_name = name;
94 	pool->sp_state = SVCPOOL_INIT;
95 	pool->sp_proc = NULL;
96 	TAILQ_INIT(&pool->sp_callouts);
97 	TAILQ_INIT(&pool->sp_lcallouts);
98 	pool->sp_minthreads = 1;
99 	pool->sp_maxthreads = 1;
100 	pool->sp_groupcount = 1;
101 	for (g = 0; g < SVC_MAXGROUPS; g++) {
102 		grp = &pool->sp_groups[g];
103 		mtx_init(&grp->sg_lock, "sg_lock", NULL, MTX_DEF);
104 		grp->sg_pool = pool;
105 		grp->sg_state = SVCPOOL_ACTIVE;
106 		TAILQ_INIT(&grp->sg_xlist);
107 		TAILQ_INIT(&grp->sg_active);
108 		LIST_INIT(&grp->sg_idlethreads);
109 		grp->sg_minthreads = 1;
110 		grp->sg_maxthreads = 1;
111 	}
112 
113 	/*
114 	 * Don't use more than a quarter of mbuf clusters.  Nota bene:
115 	 * nmbclusters is an int, but nmbclusters*MCLBYTES may overflow
116 	 * on LP64 architectures, so cast to u_long to avoid undefined
117 	 * behavior.  (ILP32 architectures cannot have nmbclusters
118 	 * large enough to overflow for other reasons.)
119 	 */
120 	pool->sp_space_high = (u_long)nmbclusters * MCLBYTES / 4;
121 	pool->sp_space_low = (pool->sp_space_high / 3) * 2;
122 
123 	sysctl_ctx_init(&pool->sp_sysctl);
124 	if (IS_DEFAULT_VNET(curvnet) && sysctl_base) {
125 		SYSCTL_ADD_PROC(&pool->sp_sysctl, sysctl_base, OID_AUTO,
126 		    "minthreads", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
127 		    pool, 0, svcpool_minthread_sysctl, "I",
128 		    "Minimal number of threads");
129 		SYSCTL_ADD_PROC(&pool->sp_sysctl, sysctl_base, OID_AUTO,
130 		    "maxthreads", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
131 		    pool, 0, svcpool_maxthread_sysctl, "I",
132 		    "Maximal number of threads");
133 		SYSCTL_ADD_PROC(&pool->sp_sysctl, sysctl_base, OID_AUTO,
134 		    "threads", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
135 		    pool, 0, svcpool_threads_sysctl, "I",
136 		    "Current number of threads");
137 		SYSCTL_ADD_INT(&pool->sp_sysctl, sysctl_base, OID_AUTO,
138 		    "groups", CTLFLAG_RD, &pool->sp_groupcount, 0,
139 		    "Number of thread groups");
140 
141 		SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
142 		    "request_space_used", CTLFLAG_RD,
143 		    &pool->sp_space_used,
144 		    "Space in parsed but not handled requests.");
145 
146 		SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
147 		    "request_space_used_highest", CTLFLAG_RD,
148 		    &pool->sp_space_used_highest,
149 		    "Highest space used since reboot.");
150 
151 		SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
152 		    "request_space_high", CTLFLAG_RW,
153 		    &pool->sp_space_high,
154 		    "Maximum space in parsed but not handled requests.");
155 
156 		SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
157 		    "request_space_low", CTLFLAG_RW,
158 		    &pool->sp_space_low,
159 		    "Low water mark for request space.");
160 
161 		SYSCTL_ADD_INT(&pool->sp_sysctl, sysctl_base, OID_AUTO,
162 		    "request_space_throttled", CTLFLAG_RD,
163 		    &pool->sp_space_throttled, 0,
164 		    "Whether nfs requests are currently throttled");
165 
166 		SYSCTL_ADD_INT(&pool->sp_sysctl, sysctl_base, OID_AUTO,
167 		    "request_space_throttle_count", CTLFLAG_RD,
168 		    &pool->sp_space_throttle_count, 0,
169 		    "Count of times throttling based on request space has occurred");
170 	}
171 
172 	return pool;
173 }
174 
175 /*
176  * Code common to svcpool_destroy() and svcpool_close(), which cleans up
177  * the pool data structures.
178  */
179 static void
180 svcpool_cleanup(SVCPOOL *pool)
181 {
182 	SVCGROUP *grp;
183 	SVCXPRT *xprt, *nxprt;
184 	struct svc_callout *s;
185 	struct svc_loss_callout *sl;
186 	struct svcxprt_list cleanup;
187 	int g;
188 
189 	TAILQ_INIT(&cleanup);
190 
191 	for (g = 0; g < SVC_MAXGROUPS; g++) {
192 		grp = &pool->sp_groups[g];
193 		mtx_lock(&grp->sg_lock);
194 		while ((xprt = TAILQ_FIRST(&grp->sg_xlist)) != NULL) {
195 			xprt_unregister_locked(xprt);
196 			TAILQ_INSERT_TAIL(&cleanup, xprt, xp_link);
197 		}
198 		mtx_unlock(&grp->sg_lock);
199 	}
200 	TAILQ_FOREACH_SAFE(xprt, &cleanup, xp_link, nxprt) {
201 		if (xprt->xp_socket != NULL)
202 			soshutdown(xprt->xp_socket, SHUT_WR);
203 		SVC_RELEASE(xprt);
204 	}
205 
206 	mtx_lock(&pool->sp_lock);
207 	while ((s = TAILQ_FIRST(&pool->sp_callouts)) != NULL) {
208 		mtx_unlock(&pool->sp_lock);
209 		svc_unreg(pool, s->sc_prog, s->sc_vers);
210 		mtx_lock(&pool->sp_lock);
211 	}
212 	while ((sl = TAILQ_FIRST(&pool->sp_lcallouts)) != NULL) {
213 		mtx_unlock(&pool->sp_lock);
214 		svc_loss_unreg(pool, sl->slc_dispatch);
215 		mtx_lock(&pool->sp_lock);
216 	}
217 	mtx_unlock(&pool->sp_lock);
218 }
219 
220 void
221 svcpool_destroy(SVCPOOL *pool)
222 {
223 	SVCGROUP *grp;
224 	int g;
225 
226 	svcpool_cleanup(pool);
227 
228 	for (g = 0; g < SVC_MAXGROUPS; g++) {
229 		grp = &pool->sp_groups[g];
230 		mtx_destroy(&grp->sg_lock);
231 	}
232 	mtx_destroy(&pool->sp_lock);
233 
234 	if (pool->sp_rcache)
235 		replay_freecache(pool->sp_rcache);
236 
237 	sysctl_ctx_free(&pool->sp_sysctl);
238 	free(pool, M_RPC);
239 }
240 
241 /*
242  * Similar to svcpool_destroy(), except that it does not destroy the actual
243  * data structures.  As such, "pool" may be used again.
244  */
245 void
246 svcpool_close(SVCPOOL *pool)
247 {
248 	SVCGROUP *grp;
249 	int g;
250 
251 	svcpool_cleanup(pool);
252 
253 	/* Now, initialize the pool's state for a fresh svc_run() call. */
254 	mtx_lock(&pool->sp_lock);
255 	pool->sp_state = SVCPOOL_INIT;
256 	mtx_unlock(&pool->sp_lock);
257 	for (g = 0; g < SVC_MAXGROUPS; g++) {
258 		grp = &pool->sp_groups[g];
259 		mtx_lock(&grp->sg_lock);
260 		grp->sg_state = SVCPOOL_ACTIVE;
261 		mtx_unlock(&grp->sg_lock);
262 	}
263 }
264 
265 /*
266  * Sysctl handler to get the present thread count on a pool
267  */
268 static int
269 svcpool_threads_sysctl(SYSCTL_HANDLER_ARGS)
270 {
271 	SVCPOOL *pool;
272 	int threads, error, g;
273 
274 	pool = oidp->oid_arg1;
275 	threads = 0;
276 	mtx_lock(&pool->sp_lock);
277 	for (g = 0; g < pool->sp_groupcount; g++)
278 		threads += pool->sp_groups[g].sg_threadcount;
279 	mtx_unlock(&pool->sp_lock);
280 	error = sysctl_handle_int(oidp, &threads, 0, req);
281 	return (error);
282 }
283 
284 /*
285  * Sysctl handler to set the minimum thread count on a pool
286  */
287 static int
288 svcpool_minthread_sysctl(SYSCTL_HANDLER_ARGS)
289 {
290 	SVCPOOL *pool;
291 	int newminthreads, error, g;
292 
293 	pool = oidp->oid_arg1;
294 	newminthreads = pool->sp_minthreads;
295 	error = sysctl_handle_int(oidp, &newminthreads, 0, req);
296 	if (error == 0 && newminthreads != pool->sp_minthreads) {
297 		if (newminthreads > pool->sp_maxthreads)
298 			return (EINVAL);
299 		mtx_lock(&pool->sp_lock);
300 		pool->sp_minthreads = newminthreads;
301 		for (g = 0; g < pool->sp_groupcount; g++) {
302 			pool->sp_groups[g].sg_minthreads = max(1,
303 			    pool->sp_minthreads / pool->sp_groupcount);
304 		}
305 		mtx_unlock(&pool->sp_lock);
306 	}
307 	return (error);
308 }
309 
310 /*
311  * Sysctl handler to set the maximum thread count on a pool
312  */
313 static int
314 svcpool_maxthread_sysctl(SYSCTL_HANDLER_ARGS)
315 {
316 	SVCPOOL *pool;
317 	int newmaxthreads, error, g;
318 
319 	pool = oidp->oid_arg1;
320 	newmaxthreads = pool->sp_maxthreads;
321 	error = sysctl_handle_int(oidp, &newmaxthreads, 0, req);
322 	if (error == 0 && newmaxthreads != pool->sp_maxthreads) {
323 		if (newmaxthreads < pool->sp_minthreads)
324 			return (EINVAL);
325 		mtx_lock(&pool->sp_lock);
326 		pool->sp_maxthreads = newmaxthreads;
327 		for (g = 0; g < pool->sp_groupcount; g++) {
328 			pool->sp_groups[g].sg_maxthreads = max(1,
329 			    pool->sp_maxthreads / pool->sp_groupcount);
330 		}
331 		mtx_unlock(&pool->sp_lock);
332 	}
333 	return (error);
334 }
335 
336 /*
337  * Activate a transport handle.
338  */
339 void
340 xprt_register(SVCXPRT *xprt)
341 {
342 	SVCPOOL *pool = xprt->xp_pool;
343 	SVCGROUP *grp;
344 	int g;
345 
346 	SVC_ACQUIRE(xprt);
347 	g = atomic_fetchadd_int(&pool->sp_nextgroup, 1) % pool->sp_groupcount;
348 	xprt->xp_group = grp = &pool->sp_groups[g];
349 	mtx_lock(&grp->sg_lock);
350 	xprt->xp_registered = TRUE;
351 	xprt->xp_active = FALSE;
352 	TAILQ_INSERT_TAIL(&grp->sg_xlist, xprt, xp_link);
353 	mtx_unlock(&grp->sg_lock);
354 }
355 
356 /*
357  * De-activate a transport handle. Note: the locked version doesn't
358  * release the transport - caller must do that after dropping the pool
359  * lock.
360  */
361 static void
362 xprt_unregister_locked(SVCXPRT *xprt)
363 {
364 	SVCGROUP *grp = xprt->xp_group;
365 
366 	mtx_assert(&grp->sg_lock, MA_OWNED);
367 	KASSERT(xprt->xp_registered == TRUE,
368 	    ("xprt_unregister_locked: not registered"));
369 	xprt_inactive_locked(xprt);
370 	TAILQ_REMOVE(&grp->sg_xlist, xprt, xp_link);
371 	xprt->xp_registered = FALSE;
372 }
373 
374 void
375 xprt_unregister(SVCXPRT *xprt)
376 {
377 	SVCGROUP *grp = xprt->xp_group;
378 
379 	mtx_lock(&grp->sg_lock);
380 	if (xprt->xp_registered == FALSE) {
381 		/* Already unregistered by another thread */
382 		mtx_unlock(&grp->sg_lock);
383 		return;
384 	}
385 	xprt_unregister_locked(xprt);
386 	mtx_unlock(&grp->sg_lock);
387 
388 	if (xprt->xp_socket != NULL)
389 		soshutdown(xprt->xp_socket, SHUT_WR);
390 	SVC_RELEASE(xprt);
391 }
392 
393 /*
394  * Attempt to assign a service thread to this transport.
395  */
396 static int
397 xprt_assignthread(SVCXPRT *xprt)
398 {
399 	SVCGROUP *grp = xprt->xp_group;
400 	SVCTHREAD *st;
401 
402 	mtx_assert(&grp->sg_lock, MA_OWNED);
403 	st = LIST_FIRST(&grp->sg_idlethreads);
404 	if (st) {
405 		LIST_REMOVE(st, st_ilink);
406 		SVC_ACQUIRE(xprt);
407 		xprt->xp_thread = st;
408 		st->st_xprt = xprt;
409 		cv_signal(&st->st_cond);
410 		return (TRUE);
411 	} else {
412 		/*
413 		 * See if we can create a new thread. The
414 		 * actual thread creation happens in
415 		 * svc_run_internal because our locking state
416 		 * is poorly defined (we are typically called
417 		 * from a socket upcall). Don't create more
418 		 * than one thread per second.
419 		 */
420 		if (grp->sg_state == SVCPOOL_ACTIVE
421 		    && grp->sg_lastcreatetime < time_uptime
422 		    && grp->sg_threadcount < grp->sg_maxthreads) {
423 			grp->sg_state = SVCPOOL_THREADWANTED;
424 		}
425 	}
426 	return (FALSE);
427 }
428 
429 void
430 xprt_active(SVCXPRT *xprt)
431 {
432 	SVCGROUP *grp = xprt->xp_group;
433 
434 	mtx_lock(&grp->sg_lock);
435 
436 	if (!xprt->xp_registered) {
437 		/*
438 		 * Race with xprt_unregister - we lose.
439 		 */
440 		mtx_unlock(&grp->sg_lock);
441 		return;
442 	}
443 
444 	if (!xprt->xp_active) {
445 		xprt->xp_active = TRUE;
446 		if (xprt->xp_thread == NULL) {
447 			if (!svc_request_space_available(xprt->xp_pool) ||
448 			    !xprt_assignthread(xprt))
449 				TAILQ_INSERT_TAIL(&grp->sg_active, xprt,
450 				    xp_alink);
451 		}
452 	}
453 
454 	mtx_unlock(&grp->sg_lock);
455 }
456 
457 void
458 xprt_inactive_locked(SVCXPRT *xprt)
459 {
460 	SVCGROUP *grp = xprt->xp_group;
461 
462 	mtx_assert(&grp->sg_lock, MA_OWNED);
463 	if (xprt->xp_active) {
464 		if (xprt->xp_thread == NULL)
465 			TAILQ_REMOVE(&grp->sg_active, xprt, xp_alink);
466 		xprt->xp_active = FALSE;
467 	}
468 }
469 
470 void
471 xprt_inactive(SVCXPRT *xprt)
472 {
473 	SVCGROUP *grp = xprt->xp_group;
474 
475 	mtx_lock(&grp->sg_lock);
476 	xprt_inactive_locked(xprt);
477 	mtx_unlock(&grp->sg_lock);
478 }
479 
480 /*
481  * Variant of xprt_inactive() for use only when sure that port is
482  * assigned to thread. For example, within receive handlers.
483  */
484 void
485 xprt_inactive_self(SVCXPRT *xprt)
486 {
487 
488 	KASSERT(xprt->xp_thread != NULL,
489 	    ("xprt_inactive_self(%p) with NULL xp_thread", xprt));
490 	xprt->xp_active = FALSE;
491 }
492 
493 /*
494  * Add a service program to the callout list.
495  * The dispatch routine will be called when a rpc request for this
496  * program number comes in.
497  */
498 bool_t
499 svc_reg(SVCXPRT *xprt, const rpcprog_t prog, const rpcvers_t vers,
500     void (*dispatch)(struct svc_req *, SVCXPRT *),
501     const struct netconfig *nconf)
502 {
503 	SVCPOOL *pool = xprt->xp_pool;
504 	struct svc_callout *s;
505 	char *netid = NULL;
506 	int flag = 0;
507 
508 /* VARIABLES PROTECTED BY svc_lock: s, svc_head */
509 
510 	if (xprt->xp_netid) {
511 		netid = strdup(xprt->xp_netid, M_RPC);
512 		flag = 1;
513 	} else if (nconf && nconf->nc_netid) {
514 		netid = strdup(nconf->nc_netid, M_RPC);
515 		flag = 1;
516 	} /* must have been created with svc_raw_create */
517 	if ((netid == NULL) && (flag == 1)) {
518 		return (FALSE);
519 	}
520 
521 	mtx_lock(&pool->sp_lock);
522 	if ((s = svc_find(pool, prog, vers, netid)) != NULL) {
523 		if (netid)
524 			free(netid, M_RPC);
525 		if (s->sc_dispatch == dispatch)
526 			goto rpcb_it; /* he is registering another xptr */
527 		mtx_unlock(&pool->sp_lock);
528 		return (FALSE);
529 	}
530 	s = malloc(sizeof (struct svc_callout), M_RPC, M_NOWAIT);
531 	if (s == NULL) {
532 		if (netid)
533 			free(netid, M_RPC);
534 		mtx_unlock(&pool->sp_lock);
535 		return (FALSE);
536 	}
537 
538 	s->sc_prog = prog;
539 	s->sc_vers = vers;
540 	s->sc_dispatch = dispatch;
541 	s->sc_netid = netid;
542 	TAILQ_INSERT_TAIL(&pool->sp_callouts, s, sc_link);
543 
544 	if ((xprt->xp_netid == NULL) && (flag == 1) && netid)
545 		((SVCXPRT *) xprt)->xp_netid = strdup(netid, M_RPC);
546 
547 rpcb_it:
548 	mtx_unlock(&pool->sp_lock);
549 	/* now register the information with the local binder service */
550 	if (nconf) {
551 		bool_t dummy;
552 		struct netconfig tnc;
553 		struct netbuf nb;
554 		tnc = *nconf;
555 		nb.buf = &xprt->xp_ltaddr;
556 		nb.len = xprt->xp_ltaddr.ss_len;
557 		dummy = rpcb_set(prog, vers, &tnc, &nb);
558 		return (dummy);
559 	}
560 	return (TRUE);
561 }
562 
563 /*
564  * Remove a service program from the callout list.
565  */
566 void
567 svc_unreg(SVCPOOL *pool, const rpcprog_t prog, const rpcvers_t vers)
568 {
569 	struct svc_callout *s;
570 
571 	/* unregister the information anyway */
572 	(void) rpcb_unset(prog, vers, NULL);
573 	mtx_lock(&pool->sp_lock);
574 	while ((s = svc_find(pool, prog, vers, NULL)) != NULL) {
575 		TAILQ_REMOVE(&pool->sp_callouts, s, sc_link);
576 		if (s->sc_netid)
577 			mem_free(s->sc_netid, sizeof (s->sc_netid) + 1);
578 		mem_free(s, sizeof (struct svc_callout));
579 	}
580 	mtx_unlock(&pool->sp_lock);
581 }
582 
583 /*
584  * Add a service connection loss program to the callout list.
585  * The dispatch routine will be called when some port in ths pool die.
586  */
587 bool_t
588 svc_loss_reg(SVCXPRT *xprt, void (*dispatch)(SVCXPRT *))
589 {
590 	SVCPOOL *pool = xprt->xp_pool;
591 	struct svc_loss_callout *s;
592 
593 	mtx_lock(&pool->sp_lock);
594 	TAILQ_FOREACH(s, &pool->sp_lcallouts, slc_link) {
595 		if (s->slc_dispatch == dispatch)
596 			break;
597 	}
598 	if (s != NULL) {
599 		mtx_unlock(&pool->sp_lock);
600 		return (TRUE);
601 	}
602 	s = malloc(sizeof(struct svc_loss_callout), M_RPC, M_NOWAIT);
603 	if (s == NULL) {
604 		mtx_unlock(&pool->sp_lock);
605 		return (FALSE);
606 	}
607 	s->slc_dispatch = dispatch;
608 	TAILQ_INSERT_TAIL(&pool->sp_lcallouts, s, slc_link);
609 	mtx_unlock(&pool->sp_lock);
610 	return (TRUE);
611 }
612 
613 /*
614  * Remove a service connection loss program from the callout list.
615  */
616 void
617 svc_loss_unreg(SVCPOOL *pool, void (*dispatch)(SVCXPRT *))
618 {
619 	struct svc_loss_callout *s;
620 
621 	mtx_lock(&pool->sp_lock);
622 	TAILQ_FOREACH(s, &pool->sp_lcallouts, slc_link) {
623 		if (s->slc_dispatch == dispatch) {
624 			TAILQ_REMOVE(&pool->sp_lcallouts, s, slc_link);
625 			free(s, M_RPC);
626 			break;
627 		}
628 	}
629 	mtx_unlock(&pool->sp_lock);
630 }
631 
632 /* ********************** CALLOUT list related stuff ************* */
633 
634 /*
635  * Search the callout list for a program number, return the callout
636  * struct.
637  */
638 static struct svc_callout *
639 svc_find(SVCPOOL *pool, rpcprog_t prog, rpcvers_t vers, char *netid)
640 {
641 	struct svc_callout *s;
642 
643 	mtx_assert(&pool->sp_lock, MA_OWNED);
644 	TAILQ_FOREACH(s, &pool->sp_callouts, sc_link) {
645 		if (s->sc_prog == prog && s->sc_vers == vers
646 		    && (netid == NULL || s->sc_netid == NULL ||
647 			strcmp(netid, s->sc_netid) == 0))
648 			break;
649 	}
650 
651 	return (s);
652 }
653 
654 /* ******************* REPLY GENERATION ROUTINES  ************ */
655 
656 static bool_t
657 svc_sendreply_common(struct svc_req *rqstp, struct rpc_msg *rply,
658     struct mbuf *body)
659 {
660 	SVCXPRT *xprt = rqstp->rq_xprt;
661 	bool_t ok;
662 
663 	if (rqstp->rq_args) {
664 		m_freem(rqstp->rq_args);
665 		rqstp->rq_args = NULL;
666 	}
667 
668 	if (xprt->xp_pool->sp_rcache)
669 		replay_setreply(xprt->xp_pool->sp_rcache,
670 		    rply, svc_getrpccaller(rqstp), body);
671 
672 	if (!SVCAUTH_WRAP(&rqstp->rq_auth, &body))
673 		return (FALSE);
674 
675 	ok = SVC_REPLY(xprt, rply, rqstp->rq_addr, body, &rqstp->rq_reply_seq);
676 	if (rqstp->rq_addr) {
677 		free(rqstp->rq_addr, M_SONAME);
678 		rqstp->rq_addr = NULL;
679 	}
680 
681 	return (ok);
682 }
683 
684 /*
685  * Send a reply to an rpc request
686  */
687 bool_t
688 svc_sendreply(struct svc_req *rqstp, xdrproc_t xdr_results, void * xdr_location)
689 {
690 	struct rpc_msg rply;
691 	struct mbuf *m;
692 	XDR xdrs;
693 	bool_t ok;
694 
695 	rply.rm_xid = rqstp->rq_xid;
696 	rply.rm_direction = REPLY;
697 	rply.rm_reply.rp_stat = MSG_ACCEPTED;
698 	rply.acpted_rply.ar_verf = rqstp->rq_verf;
699 	rply.acpted_rply.ar_stat = SUCCESS;
700 	rply.acpted_rply.ar_results.where = NULL;
701 	rply.acpted_rply.ar_results.proc = (xdrproc_t) xdr_void;
702 
703 	m = m_getcl(M_WAITOK, MT_DATA, 0);
704 	xdrmbuf_create(&xdrs, m, XDR_ENCODE);
705 	ok = xdr_results(&xdrs, xdr_location);
706 	XDR_DESTROY(&xdrs);
707 
708 	if (ok) {
709 		return (svc_sendreply_common(rqstp, &rply, m));
710 	} else {
711 		m_freem(m);
712 		return (FALSE);
713 	}
714 }
715 
716 bool_t
717 svc_sendreply_mbuf(struct svc_req *rqstp, struct mbuf *m)
718 {
719 	struct rpc_msg rply;
720 
721 	rply.rm_xid = rqstp->rq_xid;
722 	rply.rm_direction = REPLY;
723 	rply.rm_reply.rp_stat = MSG_ACCEPTED;
724 	rply.acpted_rply.ar_verf = rqstp->rq_verf;
725 	rply.acpted_rply.ar_stat = SUCCESS;
726 	rply.acpted_rply.ar_results.where = NULL;
727 	rply.acpted_rply.ar_results.proc = (xdrproc_t) xdr_void;
728 
729 	return (svc_sendreply_common(rqstp, &rply, m));
730 }
731 
732 /*
733  * No procedure error reply
734  */
735 void
736 svcerr_noproc(struct svc_req *rqstp)
737 {
738 	SVCXPRT *xprt = rqstp->rq_xprt;
739 	struct rpc_msg rply;
740 
741 	rply.rm_xid = rqstp->rq_xid;
742 	rply.rm_direction = REPLY;
743 	rply.rm_reply.rp_stat = MSG_ACCEPTED;
744 	rply.acpted_rply.ar_verf = rqstp->rq_verf;
745 	rply.acpted_rply.ar_stat = PROC_UNAVAIL;
746 
747 	if (xprt->xp_pool->sp_rcache)
748 		replay_setreply(xprt->xp_pool->sp_rcache,
749 		    &rply, svc_getrpccaller(rqstp), NULL);
750 
751 	svc_sendreply_common(rqstp, &rply, NULL);
752 }
753 
754 /*
755  * Can't decode args error reply
756  */
757 void
758 svcerr_decode(struct svc_req *rqstp)
759 {
760 	SVCXPRT *xprt = rqstp->rq_xprt;
761 	struct rpc_msg rply;
762 
763 	rply.rm_xid = rqstp->rq_xid;
764 	rply.rm_direction = REPLY;
765 	rply.rm_reply.rp_stat = MSG_ACCEPTED;
766 	rply.acpted_rply.ar_verf = rqstp->rq_verf;
767 	rply.acpted_rply.ar_stat = GARBAGE_ARGS;
768 
769 	if (xprt->xp_pool->sp_rcache)
770 		replay_setreply(xprt->xp_pool->sp_rcache,
771 		    &rply, (struct sockaddr *) &xprt->xp_rtaddr, NULL);
772 
773 	svc_sendreply_common(rqstp, &rply, NULL);
774 }
775 
776 /*
777  * Some system error
778  */
779 void
780 svcerr_systemerr(struct svc_req *rqstp)
781 {
782 	SVCXPRT *xprt = rqstp->rq_xprt;
783 	struct rpc_msg rply;
784 
785 	rply.rm_xid = rqstp->rq_xid;
786 	rply.rm_direction = REPLY;
787 	rply.rm_reply.rp_stat = MSG_ACCEPTED;
788 	rply.acpted_rply.ar_verf = rqstp->rq_verf;
789 	rply.acpted_rply.ar_stat = SYSTEM_ERR;
790 
791 	if (xprt->xp_pool->sp_rcache)
792 		replay_setreply(xprt->xp_pool->sp_rcache,
793 		    &rply, svc_getrpccaller(rqstp), NULL);
794 
795 	svc_sendreply_common(rqstp, &rply, NULL);
796 }
797 
798 /*
799  * Authentication error reply
800  */
801 void
802 svcerr_auth(struct svc_req *rqstp, enum auth_stat why)
803 {
804 	SVCXPRT *xprt = rqstp->rq_xprt;
805 	struct rpc_msg rply;
806 
807 	rply.rm_xid = rqstp->rq_xid;
808 	rply.rm_direction = REPLY;
809 	rply.rm_reply.rp_stat = MSG_DENIED;
810 	rply.rjcted_rply.rj_stat = AUTH_ERROR;
811 	rply.rjcted_rply.rj_why = why;
812 
813 	if (xprt->xp_pool->sp_rcache)
814 		replay_setreply(xprt->xp_pool->sp_rcache,
815 		    &rply, svc_getrpccaller(rqstp), NULL);
816 
817 	svc_sendreply_common(rqstp, &rply, NULL);
818 }
819 
820 /*
821  * Auth too weak error reply
822  */
823 void
824 svcerr_weakauth(struct svc_req *rqstp)
825 {
826 
827 	svcerr_auth(rqstp, AUTH_TOOWEAK);
828 }
829 
830 /*
831  * Program unavailable error reply
832  */
833 void
834 svcerr_noprog(struct svc_req *rqstp)
835 {
836 	SVCXPRT *xprt = rqstp->rq_xprt;
837 	struct rpc_msg rply;
838 
839 	rply.rm_xid = rqstp->rq_xid;
840 	rply.rm_direction = REPLY;
841 	rply.rm_reply.rp_stat = MSG_ACCEPTED;
842 	rply.acpted_rply.ar_verf = rqstp->rq_verf;
843 	rply.acpted_rply.ar_stat = PROG_UNAVAIL;
844 
845 	if (xprt->xp_pool->sp_rcache)
846 		replay_setreply(xprt->xp_pool->sp_rcache,
847 		    &rply, svc_getrpccaller(rqstp), NULL);
848 
849 	svc_sendreply_common(rqstp, &rply, NULL);
850 }
851 
852 /*
853  * Program version mismatch error reply
854  */
855 void
856 svcerr_progvers(struct svc_req *rqstp, rpcvers_t low_vers, rpcvers_t high_vers)
857 {
858 	SVCXPRT *xprt = rqstp->rq_xprt;
859 	struct rpc_msg rply;
860 
861 	rply.rm_xid = rqstp->rq_xid;
862 	rply.rm_direction = REPLY;
863 	rply.rm_reply.rp_stat = MSG_ACCEPTED;
864 	rply.acpted_rply.ar_verf = rqstp->rq_verf;
865 	rply.acpted_rply.ar_stat = PROG_MISMATCH;
866 	rply.acpted_rply.ar_vers.low = (uint32_t)low_vers;
867 	rply.acpted_rply.ar_vers.high = (uint32_t)high_vers;
868 
869 	if (xprt->xp_pool->sp_rcache)
870 		replay_setreply(xprt->xp_pool->sp_rcache,
871 		    &rply, svc_getrpccaller(rqstp), NULL);
872 
873 	svc_sendreply_common(rqstp, &rply, NULL);
874 }
875 
876 /*
877  * Allocate a new server transport structure. All fields are
878  * initialized to zero and xp_p3 is initialized to point at an
879  * extension structure to hold various flags and authentication
880  * parameters.
881  */
882 SVCXPRT *
883 svc_xprt_alloc(void)
884 {
885 	SVCXPRT *xprt;
886 	SVCXPRT_EXT *ext;
887 
888 	xprt = mem_alloc(sizeof(SVCXPRT));
889 	ext = mem_alloc(sizeof(SVCXPRT_EXT));
890 	xprt->xp_p3 = ext;
891 	refcount_init(&xprt->xp_refs, 1);
892 
893 	return (xprt);
894 }
895 
896 /*
897  * Free a server transport structure.
898  */
899 void
900 svc_xprt_free(SVCXPRT *xprt)
901 {
902 
903 	mem_free(xprt->xp_p3, sizeof(SVCXPRT_EXT));
904 	/* The size argument is ignored, so 0 is ok. */
905 	mem_free(xprt->xp_gidp, 0);
906 	mem_free(xprt, sizeof(SVCXPRT));
907 }
908 
909 /* ******************* SERVER INPUT STUFF ******************* */
910 
911 /*
912  * Read RPC requests from a transport and queue them to be
913  * executed. We handle authentication and replay cache replies here.
914  * Actually dispatching the RPC is deferred till svc_executereq.
915  */
916 static enum xprt_stat
917 svc_getreq(SVCXPRT *xprt, struct svc_req **rqstp_ret)
918 {
919 	SVCPOOL *pool = xprt->xp_pool;
920 	struct svc_req *r;
921 	struct rpc_msg msg;
922 	struct mbuf *args;
923 	struct svc_loss_callout *s;
924 	enum xprt_stat stat;
925 
926 	/* now receive msgs from xprtprt (support batch calls) */
927 	r = malloc(sizeof(*r), M_RPC, M_WAITOK|M_ZERO);
928 
929 	msg.rm_call.cb_cred.oa_base = r->rq_credarea;
930 	msg.rm_call.cb_verf.oa_base = &r->rq_credarea[MAX_AUTH_BYTES];
931 	r->rq_clntcred = &r->rq_credarea[2*MAX_AUTH_BYTES];
932 	if (SVC_RECV(xprt, &msg, &r->rq_addr, &args)) {
933 		enum auth_stat why;
934 
935 		/*
936 		 * Handle replays and authenticate before queuing the
937 		 * request to be executed.
938 		 */
939 		SVC_ACQUIRE(xprt);
940 		r->rq_xprt = xprt;
941 		if (pool->sp_rcache) {
942 			struct rpc_msg repmsg;
943 			struct mbuf *repbody;
944 			enum replay_state rs;
945 			rs = replay_find(pool->sp_rcache, &msg,
946 			    svc_getrpccaller(r), &repmsg, &repbody);
947 			switch (rs) {
948 			case RS_NEW:
949 				break;
950 			case RS_DONE:
951 				SVC_REPLY(xprt, &repmsg, r->rq_addr,
952 				    repbody, &r->rq_reply_seq);
953 				if (r->rq_addr) {
954 					free(r->rq_addr, M_SONAME);
955 					r->rq_addr = NULL;
956 				}
957 				m_freem(args);
958 				goto call_done;
959 
960 			default:
961 				m_freem(args);
962 				goto call_done;
963 			}
964 		}
965 
966 		r->rq_xid = msg.rm_xid;
967 		r->rq_prog = msg.rm_call.cb_prog;
968 		r->rq_vers = msg.rm_call.cb_vers;
969 		r->rq_proc = msg.rm_call.cb_proc;
970 		r->rq_size = sizeof(*r) + m_length(args, NULL);
971 		r->rq_args = args;
972 		if ((why = _authenticate(r, &msg)) != AUTH_OK) {
973 			/*
974 			 * RPCSEC_GSS uses this return code
975 			 * for requests that form part of its
976 			 * context establishment protocol and
977 			 * should not be dispatched to the
978 			 * application.
979 			 */
980 			if (why != RPCSEC_GSS_NODISPATCH)
981 				svcerr_auth(r, why);
982 			goto call_done;
983 		}
984 
985 		if (!SVCAUTH_UNWRAP(&r->rq_auth, &r->rq_args)) {
986 			svcerr_decode(r);
987 			goto call_done;
988 		}
989 
990 		/*
991 		 * Everything checks out, return request to caller.
992 		 */
993 		*rqstp_ret = r;
994 		r = NULL;
995 	}
996 call_done:
997 	if (r) {
998 		svc_freereq(r);
999 		r = NULL;
1000 	}
1001 	if ((stat = SVC_STAT(xprt)) == XPRT_DIED) {
1002 		TAILQ_FOREACH(s, &pool->sp_lcallouts, slc_link)
1003 			(*s->slc_dispatch)(xprt);
1004 		xprt_unregister(xprt);
1005 	}
1006 
1007 	return (stat);
1008 }
1009 
1010 static void
1011 svc_executereq(struct svc_req *rqstp)
1012 {
1013 	SVCXPRT *xprt = rqstp->rq_xprt;
1014 	SVCPOOL *pool = xprt->xp_pool;
1015 	int prog_found;
1016 	rpcvers_t low_vers;
1017 	rpcvers_t high_vers;
1018 	struct svc_callout *s;
1019 
1020 	/* now match message with a registered service*/
1021 	prog_found = FALSE;
1022 	low_vers = (rpcvers_t) -1L;
1023 	high_vers = (rpcvers_t) 0L;
1024 	TAILQ_FOREACH(s, &pool->sp_callouts, sc_link) {
1025 		if (s->sc_prog == rqstp->rq_prog) {
1026 			if (s->sc_vers == rqstp->rq_vers) {
1027 				/*
1028 				 * We hand ownership of r to the
1029 				 * dispatch method - they must call
1030 				 * svc_freereq.
1031 				 */
1032 				(*s->sc_dispatch)(rqstp, xprt);
1033 				return;
1034 			}  /* found correct version */
1035 			prog_found = TRUE;
1036 			if (s->sc_vers < low_vers)
1037 				low_vers = s->sc_vers;
1038 			if (s->sc_vers > high_vers)
1039 				high_vers = s->sc_vers;
1040 		}   /* found correct program */
1041 	}
1042 
1043 	/*
1044 	 * if we got here, the program or version
1045 	 * is not served ...
1046 	 */
1047 	if (prog_found)
1048 		svcerr_progvers(rqstp, low_vers, high_vers);
1049 	else
1050 		svcerr_noprog(rqstp);
1051 
1052 	svc_freereq(rqstp);
1053 }
1054 
1055 static void
1056 svc_checkidle(SVCGROUP *grp)
1057 {
1058 	SVCXPRT *xprt, *nxprt;
1059 	time_t timo;
1060 	struct svcxprt_list cleanup;
1061 
1062 	TAILQ_INIT(&cleanup);
1063 	TAILQ_FOREACH_SAFE(xprt, &grp->sg_xlist, xp_link, nxprt) {
1064 		/*
1065 		 * Only some transports have idle timers. Don't time
1066 		 * something out which is just waking up.
1067 		 */
1068 		if (!xprt->xp_idletimeout || xprt->xp_thread)
1069 			continue;
1070 
1071 		timo = xprt->xp_lastactive + xprt->xp_idletimeout;
1072 		if (time_uptime > timo) {
1073 			xprt_unregister_locked(xprt);
1074 			TAILQ_INSERT_TAIL(&cleanup, xprt, xp_link);
1075 		}
1076 	}
1077 
1078 	mtx_unlock(&grp->sg_lock);
1079 	TAILQ_FOREACH_SAFE(xprt, &cleanup, xp_link, nxprt) {
1080 		soshutdown(xprt->xp_socket, SHUT_WR);
1081 		SVC_RELEASE(xprt);
1082 	}
1083 	mtx_lock(&grp->sg_lock);
1084 }
1085 
1086 static void
1087 svc_assign_waiting_sockets(SVCPOOL *pool)
1088 {
1089 	SVCGROUP *grp;
1090 	SVCXPRT *xprt;
1091 	int g;
1092 
1093 	for (g = 0; g < pool->sp_groupcount; g++) {
1094 		grp = &pool->sp_groups[g];
1095 		mtx_lock(&grp->sg_lock);
1096 		while ((xprt = TAILQ_FIRST(&grp->sg_active)) != NULL) {
1097 			if (xprt_assignthread(xprt))
1098 				TAILQ_REMOVE(&grp->sg_active, xprt, xp_alink);
1099 			else
1100 				break;
1101 		}
1102 		mtx_unlock(&grp->sg_lock);
1103 	}
1104 }
1105 
1106 static void
1107 svc_change_space_used(SVCPOOL *pool, long delta)
1108 {
1109 	unsigned long value;
1110 
1111 	value = atomic_fetchadd_long(&pool->sp_space_used, delta) + delta;
1112 	if (delta > 0) {
1113 		if (value >= pool->sp_space_high && !pool->sp_space_throttled) {
1114 			pool->sp_space_throttled = TRUE;
1115 			pool->sp_space_throttle_count++;
1116 		}
1117 		if (value > pool->sp_space_used_highest)
1118 			pool->sp_space_used_highest = value;
1119 	} else {
1120 		if (value < pool->sp_space_low && pool->sp_space_throttled) {
1121 			pool->sp_space_throttled = FALSE;
1122 			svc_assign_waiting_sockets(pool);
1123 		}
1124 	}
1125 }
1126 
1127 static bool_t
1128 svc_request_space_available(SVCPOOL *pool)
1129 {
1130 
1131 	if (pool->sp_space_throttled)
1132 		return (FALSE);
1133 	return (TRUE);
1134 }
1135 
1136 static void
1137 svc_run_internal(SVCGROUP *grp, bool_t ismaster)
1138 {
1139 	SVCPOOL *pool = grp->sg_pool;
1140 	SVCTHREAD *st, *stpref;
1141 	SVCXPRT *xprt;
1142 	enum xprt_stat stat;
1143 	struct svc_req *rqstp;
1144 	struct proc *p;
1145 	long sz;
1146 	int error;
1147 
1148 	st = mem_alloc(sizeof(*st));
1149 	mtx_init(&st->st_lock, "st_lock", NULL, MTX_DEF);
1150 	st->st_pool = pool;
1151 	st->st_xprt = NULL;
1152 	STAILQ_INIT(&st->st_reqs);
1153 	cv_init(&st->st_cond, "rpcsvc");
1154 
1155 	mtx_lock(&grp->sg_lock);
1156 
1157 	/*
1158 	 * If we are a new thread which was spawned to cope with
1159 	 * increased load, set the state back to SVCPOOL_ACTIVE.
1160 	 */
1161 	if (grp->sg_state == SVCPOOL_THREADSTARTING)
1162 		grp->sg_state = SVCPOOL_ACTIVE;
1163 
1164 	while (grp->sg_state != SVCPOOL_CLOSING) {
1165 		/*
1166 		 * Create new thread if requested.
1167 		 */
1168 		if (grp->sg_state == SVCPOOL_THREADWANTED) {
1169 			grp->sg_state = SVCPOOL_THREADSTARTING;
1170 			grp->sg_lastcreatetime = time_uptime;
1171 			mtx_unlock(&grp->sg_lock);
1172 			svc_new_thread(grp);
1173 			mtx_lock(&grp->sg_lock);
1174 			continue;
1175 		}
1176 
1177 		/*
1178 		 * Check for idle transports once per second.
1179 		 */
1180 		if (time_uptime > grp->sg_lastidlecheck) {
1181 			grp->sg_lastidlecheck = time_uptime;
1182 			svc_checkidle(grp);
1183 		}
1184 
1185 		xprt = st->st_xprt;
1186 		if (!xprt) {
1187 			/*
1188 			 * Enforce maxthreads count.
1189 			 */
1190 			if (!ismaster && grp->sg_threadcount >
1191 			    grp->sg_maxthreads)
1192 				break;
1193 
1194 			/*
1195 			 * Before sleeping, see if we can find an
1196 			 * active transport which isn't being serviced
1197 			 * by a thread.
1198 			 */
1199 			if (svc_request_space_available(pool) &&
1200 			    (xprt = TAILQ_FIRST(&grp->sg_active)) != NULL) {
1201 				TAILQ_REMOVE(&grp->sg_active, xprt, xp_alink);
1202 				SVC_ACQUIRE(xprt);
1203 				xprt->xp_thread = st;
1204 				st->st_xprt = xprt;
1205 				continue;
1206 			}
1207 
1208 			LIST_INSERT_HEAD(&grp->sg_idlethreads, st, st_ilink);
1209 			if (ismaster || (!ismaster &&
1210 			    grp->sg_threadcount > grp->sg_minthreads))
1211 				error = cv_timedwait_sig(&st->st_cond,
1212 				    &grp->sg_lock, 5 * hz);
1213 			else
1214 				error = cv_wait_sig(&st->st_cond,
1215 				    &grp->sg_lock);
1216 			if (st->st_xprt == NULL)
1217 				LIST_REMOVE(st, st_ilink);
1218 
1219 			/*
1220 			 * Reduce worker thread count when idle.
1221 			 */
1222 			if (error == EWOULDBLOCK) {
1223 				if (!ismaster
1224 				    && (grp->sg_threadcount
1225 					> grp->sg_minthreads)
1226 					&& !st->st_xprt)
1227 					break;
1228 			} else if (error != 0) {
1229 				KASSERT(error == EINTR || error == ERESTART,
1230 				    ("non-signal error %d", error));
1231 				mtx_unlock(&grp->sg_lock);
1232 				p = curproc;
1233 				PROC_LOCK(p);
1234 				if (P_SHOULDSTOP(p) ||
1235 				    (p->p_flag & P_TOTAL_STOP) != 0) {
1236 					thread_suspend_check(0);
1237 					PROC_UNLOCK(p);
1238 					mtx_lock(&grp->sg_lock);
1239 				} else {
1240 					PROC_UNLOCK(p);
1241 					svc_exit(pool);
1242 					mtx_lock(&grp->sg_lock);
1243 					break;
1244 				}
1245 			}
1246 			continue;
1247 		}
1248 		mtx_unlock(&grp->sg_lock);
1249 
1250 		/*
1251 		 * Drain the transport socket and queue up any RPCs.
1252 		 */
1253 		xprt->xp_lastactive = time_uptime;
1254 		do {
1255 			if (!svc_request_space_available(pool))
1256 				break;
1257 			rqstp = NULL;
1258 			stat = svc_getreq(xprt, &rqstp);
1259 			if (rqstp) {
1260 				svc_change_space_used(pool, rqstp->rq_size);
1261 				/*
1262 				 * See if the application has a preference
1263 				 * for some other thread.
1264 				 */
1265 				if (pool->sp_assign) {
1266 					stpref = pool->sp_assign(st, rqstp);
1267 					rqstp->rq_thread = stpref;
1268 					STAILQ_INSERT_TAIL(&stpref->st_reqs,
1269 					    rqstp, rq_link);
1270 					mtx_unlock(&stpref->st_lock);
1271 					if (stpref != st)
1272 						rqstp = NULL;
1273 				} else {
1274 					rqstp->rq_thread = st;
1275 					STAILQ_INSERT_TAIL(&st->st_reqs,
1276 					    rqstp, rq_link);
1277 				}
1278 			}
1279 		} while (rqstp == NULL && stat == XPRT_MOREREQS
1280 		    && grp->sg_state != SVCPOOL_CLOSING);
1281 
1282 		/*
1283 		 * Move this transport to the end of the active list to
1284 		 * ensure fairness when multiple transports are active.
1285 		 * If this was the last queued request, svc_getreq will end
1286 		 * up calling xprt_inactive to remove from the active list.
1287 		 */
1288 		mtx_lock(&grp->sg_lock);
1289 		xprt->xp_thread = NULL;
1290 		st->st_xprt = NULL;
1291 		if (xprt->xp_active) {
1292 			if (!svc_request_space_available(pool) ||
1293 			    !xprt_assignthread(xprt))
1294 				TAILQ_INSERT_TAIL(&grp->sg_active,
1295 				    xprt, xp_alink);
1296 		}
1297 		mtx_unlock(&grp->sg_lock);
1298 		SVC_RELEASE(xprt);
1299 
1300 		/*
1301 		 * Execute what we have queued.
1302 		 */
1303 		mtx_lock(&st->st_lock);
1304 		while ((rqstp = STAILQ_FIRST(&st->st_reqs)) != NULL) {
1305 			STAILQ_REMOVE_HEAD(&st->st_reqs, rq_link);
1306 			mtx_unlock(&st->st_lock);
1307 			sz = (long)rqstp->rq_size;
1308 			svc_executereq(rqstp);
1309 			svc_change_space_used(pool, -sz);
1310 			mtx_lock(&st->st_lock);
1311 		}
1312 		mtx_unlock(&st->st_lock);
1313 		mtx_lock(&grp->sg_lock);
1314 	}
1315 
1316 	if (st->st_xprt) {
1317 		xprt = st->st_xprt;
1318 		st->st_xprt = NULL;
1319 		SVC_RELEASE(xprt);
1320 	}
1321 	KASSERT(STAILQ_EMPTY(&st->st_reqs), ("stray reqs on exit"));
1322 	mtx_destroy(&st->st_lock);
1323 	cv_destroy(&st->st_cond);
1324 	mem_free(st, sizeof(*st));
1325 
1326 	grp->sg_threadcount--;
1327 	if (!ismaster)
1328 		wakeup(grp);
1329 	mtx_unlock(&grp->sg_lock);
1330 }
1331 
1332 static void
1333 svc_thread_start(void *arg)
1334 {
1335 
1336 	svc_run_internal((SVCGROUP *) arg, FALSE);
1337 	kthread_exit();
1338 }
1339 
1340 static void
1341 svc_new_thread(SVCGROUP *grp)
1342 {
1343 	SVCPOOL *pool = grp->sg_pool;
1344 	struct thread *td;
1345 
1346 	mtx_lock(&grp->sg_lock);
1347 	grp->sg_threadcount++;
1348 	mtx_unlock(&grp->sg_lock);
1349 	kthread_add(svc_thread_start, grp, pool->sp_proc, &td, 0, 0,
1350 	    "%s: service", pool->sp_name);
1351 }
1352 
1353 void
1354 svc_run(SVCPOOL *pool)
1355 {
1356 	int g, i;
1357 	struct proc *p;
1358 	struct thread *td;
1359 	SVCGROUP *grp;
1360 
1361 	p = curproc;
1362 	td = curthread;
1363 	snprintf(td->td_name, sizeof(td->td_name),
1364 	    "%s: master", pool->sp_name);
1365 	pool->sp_state = SVCPOOL_ACTIVE;
1366 	pool->sp_proc = p;
1367 
1368 	/* Choose group count based on number of threads and CPUs. */
1369 	pool->sp_groupcount = max(1, min(SVC_MAXGROUPS,
1370 	    min(pool->sp_maxthreads / 2, mp_ncpus) / 6));
1371 	for (g = 0; g < pool->sp_groupcount; g++) {
1372 		grp = &pool->sp_groups[g];
1373 		grp->sg_minthreads = max(1,
1374 		    pool->sp_minthreads / pool->sp_groupcount);
1375 		grp->sg_maxthreads = max(1,
1376 		    pool->sp_maxthreads / pool->sp_groupcount);
1377 		grp->sg_lastcreatetime = time_uptime;
1378 	}
1379 
1380 	/* Starting threads */
1381 	pool->sp_groups[0].sg_threadcount++;
1382 	for (g = 0; g < pool->sp_groupcount; g++) {
1383 		grp = &pool->sp_groups[g];
1384 		for (i = ((g == 0) ? 1 : 0); i < grp->sg_minthreads; i++)
1385 			svc_new_thread(grp);
1386 	}
1387 	svc_run_internal(&pool->sp_groups[0], TRUE);
1388 
1389 	/* Waiting for threads to stop. */
1390 	for (g = 0; g < pool->sp_groupcount; g++) {
1391 		grp = &pool->sp_groups[g];
1392 		mtx_lock(&grp->sg_lock);
1393 		while (grp->sg_threadcount > 0)
1394 			msleep(grp, &grp->sg_lock, 0, "svcexit", 0);
1395 		mtx_unlock(&grp->sg_lock);
1396 	}
1397 }
1398 
1399 void
1400 svc_exit(SVCPOOL *pool)
1401 {
1402 	SVCGROUP *grp;
1403 	SVCTHREAD *st;
1404 	int g;
1405 
1406 	pool->sp_state = SVCPOOL_CLOSING;
1407 	for (g = 0; g < pool->sp_groupcount; g++) {
1408 		grp = &pool->sp_groups[g];
1409 		mtx_lock(&grp->sg_lock);
1410 		if (grp->sg_state != SVCPOOL_CLOSING) {
1411 			grp->sg_state = SVCPOOL_CLOSING;
1412 			LIST_FOREACH(st, &grp->sg_idlethreads, st_ilink)
1413 				cv_signal(&st->st_cond);
1414 		}
1415 		mtx_unlock(&grp->sg_lock);
1416 	}
1417 }
1418 
1419 bool_t
1420 svc_getargs(struct svc_req *rqstp, xdrproc_t xargs, void *args)
1421 {
1422 	struct mbuf *m;
1423 	XDR xdrs;
1424 	bool_t stat;
1425 
1426 	m = rqstp->rq_args;
1427 	rqstp->rq_args = NULL;
1428 
1429 	xdrmbuf_create(&xdrs, m, XDR_DECODE);
1430 	stat = xargs(&xdrs, args);
1431 	XDR_DESTROY(&xdrs);
1432 
1433 	return (stat);
1434 }
1435 
1436 bool_t
1437 svc_freeargs(struct svc_req *rqstp, xdrproc_t xargs, void *args)
1438 {
1439 	XDR xdrs;
1440 
1441 	if (rqstp->rq_addr) {
1442 		free(rqstp->rq_addr, M_SONAME);
1443 		rqstp->rq_addr = NULL;
1444 	}
1445 
1446 	xdrs.x_op = XDR_FREE;
1447 	return (xargs(&xdrs, args));
1448 }
1449 
1450 void
1451 svc_freereq(struct svc_req *rqstp)
1452 {
1453 	SVCTHREAD *st;
1454 	SVCPOOL *pool;
1455 
1456 	st = rqstp->rq_thread;
1457 	if (st) {
1458 		pool = st->st_pool;
1459 		if (pool->sp_done)
1460 			pool->sp_done(st, rqstp);
1461 	}
1462 
1463 	if (rqstp->rq_auth.svc_ah_ops)
1464 		SVCAUTH_RELEASE(&rqstp->rq_auth);
1465 
1466 	if (rqstp->rq_xprt) {
1467 		SVC_RELEASE(rqstp->rq_xprt);
1468 	}
1469 
1470 	if (rqstp->rq_addr)
1471 		free(rqstp->rq_addr, M_SONAME);
1472 
1473 	if (rqstp->rq_args)
1474 		m_freem(rqstp->rq_args);
1475 
1476 	free(rqstp, M_RPC);
1477 }
1478