xref: /illumos-gate/usr/src/lib/libnsl/rpc/svc_run.c (revision a1a46df055863a267eb7a80b7961c9379bc457ec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32 /*
33  * Portions of this source code were derived from Berkeley
34  * 4.3 BSD under license from the Regents of the University of
35  * California.
36  */
37 
38 /*
39  * This is the rpc server side idle loop
40  * Wait for input, call server program.
41  */
42 #include "mt.h"
43 #include "rpc_mt.h"
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <signal.h>
47 #include <rpc/rpc.h>
48 #include <errno.h>
49 #include <sys/poll.h>
50 #include <sys/types.h>
51 #include <syslog.h>
52 #include <thread.h>
53 #include <assert.h>
54 #include <libintl.h>
55 #include <values.h>
56 
57 extern const char __nsl_dom[];
58 
59 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
60 extern bool_t __is_a_userfd(int P_fd);
61 extern void   __destroy_userfd();
62 extern void clear_pollfd(int);
63 extern void set_pollfd(int /* fd */, short /* events */);
64 extern void svc_getreq_poll();
65 extern void (*__proc_cleanup_cb)();
66 
67 static void start_threads();
68 static void create_pipe();
69 static void clear_pipe();
70 static int select_next_pollfd();
71 static SVCXPRT *make_xprt_copy();
72 static void _svc_run_mt();
73 static void _svc_run();
74 
75 int _svc_prog_dispatch();
76 static void _svc_done_private();
77 
78 extern rwlock_t svc_fd_lock;
79 extern mutex_t	svc_door_mutex;
80 extern cond_t	svc_door_waitcv;
81 extern int	svc_ndoorfds;
82 extern void	__svc_cleanup_door_xprts();
83 extern void	__svc_free_xprtlist();
84 extern void	__svc_getreq_user(struct pollfd *);
85 
86 /*
87  * Maximum fragment size allowed for connection oriented connections.
88  * Zero means that no maximum size limit has been requested.
89  */
90 int __rpc_connmaxrec = 0;
91 
92 /* Inter-Record Timeout in secs for non-blocked connection RPC */
93 int __rpc_irtimeout = 35;
94 
95 /*
96  * Request exclusive access to tcp and udp non-priv ports bound with a
97  * wildcard addr.
98  */
99 bool_t __rpc_tp_exclbind = FALSE;
100 
101 /*
102  * Maximum number of outstanding connection indications (listen backlog).
103  */
104 static int __svc_lstnbklog = 64;	/* Chosen Arbitrarily */
105 
106 /*
107  * XXX - eventually, all mutexes and their initializations static
108  */
109 
110 /*
111  * Variables used for MT
112  */
113 int svc_mt_mode;		/* multi-threading mode */
114 
115 int svc_pipe[2];	/* pipe for breaking out of poll: read(0), write(1) */
116 
117 /* BEGIN PROTECTED BY svc_mutex */
118 
119 static int svc_thr_max = 16;	/* default maximum number of threads allowed */
120 
121 static int svc_thr_total;	/* current number of threads */
122 
123 static int svc_thr_active;	/* current number of threads active */
124 
125 /* circular array of file descriptors with pending data */
126 
127 #define	CIRCULAR_BUFSIZE	1024
128 
129 static int svc_pending_fds[CIRCULAR_BUFSIZE+1];	/* fds with pending data */
130 
131 static int svc_next_pending;			/* next one to be processed */
132 
133 static int svc_last_pending;			/* last one in list */
134 
135 static int svc_total_pending;			/* total in list */
136 
137 static int svc_thr_total_creates;	/* total created - stats */
138 
139 static int svc_thr_total_create_errors;	/* total create errors - stats */
140 
141 static int svc_waiters;		/* number of waiting threads */
142 
143 /* END PROTECTED BY svc_mutex */
144 
145 /* BEGIN PROTECTED BY svc_fd_lock: */
146 
147 int svc_nfds;		/* total number of active file descriptors */
148 
149 int svc_nfds_set;	/* total number of fd bits set in svc_fdset */
150 
151 int svc_max_fd = 0;	/* largest active file descriptor */
152 
153 int svc_npollfds;	/* total number of active pollfds */
154 
155 int svc_npollfds_set;	/* total number of pollfd set in svc_pollfd */
156 
157 int svc_max_pollfd;	/* largest active pollfd so far */
158 
159 int svc_pollfd_allocd;  /* number of pollfd structures allocated */
160 
161 /* END PROTECTED BY svc_fd_lock: */
162 
163 /* BEGIN PROTECTED BY svc_thr_mutex */
164 
165 #define	POLLSET_EXTEND	256
166 static int svc_pollset_allocd;
167 static struct pollfd *svc_pollset;
168 				/*
169 				 * array of file descriptors currently active
170 				 */
171 static int svc_polled;		/* no of fds polled in last poll() - input */
172 
173 static int svc_pollfds;		/* no of active fds in last poll() - output */
174 
175 static int svc_next_pollfd;	/* next fd  to processin svc_pollset */
176 
177 bool_t svc_polling;		/* true if a thread is polling */
178 
179 /* END PROTECTED BY svc_thr_mutex */
180 
181 /* BEGIN PROTECTED BY svc_exit_mutex */
182 
183 static bool_t svc_exit_done = TRUE;
184 
185 /* END PROTECTED BY svc_exit_mutex */
186 
187 /* VARIABLES PROTECTED BY svc_mutex:
188 	svc_thr_total, svc_thr_active, svc_pending_fds, svc_next_pending,
189 	svc_last_pending, svc_total_pending, svc_thr_total_creates,
190 	svc_thr_total_create_errors,
191 	svcxprt_list_t::next, svcxprt_ext_t::my_xlist,
192 	svc_thr_max, svc_waiters
193  */
194 
195 /* VARIABLES PROTECTED BY svc_fd_lock:
196 	svc_xports, svc_fdset, svc_nfds, svc_nfds_set, svc_max_fd,
197 	svc_pollfd, svc_npollfds, svc_npollfds_set, svc_max_pollfd
198  */
199 
200 /* VARIABLES PROTECTED BY svc_thr_mutex:
201 	svc_pollset, svc_pollfds, svc_next_pollfd, svc_polling
202 	svc_pollset_allocd, svc_polled
203  */
204 
205 /* VARIABLES PROTECTED BY svc_exit_mutex:
206 	svc_exit_done
207  */
208 
209 /* VARIABLES READABLE WITHOUT LOCK:
210 	svc_thr_total, svc_thr_active, svc_thr_total_creates,
211 	svc_thr_total_create_errors,
212 	svc_xports, svc_nfds, svc_nfds_set, svc_max_fd,
213 	svc_npollfds, svc_npollfds_set, svc_max_pollfd,
214 	svc_pollfds, svc_next_pollfd, svc_exit_done, svc_polling,
215 	svc_thr_max, svc_waiters
216  */
217 
218 /* VARIABLES PROTECTED BY "program_logic":
219 	rpc_msg::, svc_req::, svcxprt_ext_t::flags, svc_mt_mode,
220 	svcxprt_ext_t::parent
221  */
222 
223 /* LOCK ORDER:
224 	svc_exit_mutex, svc_thr_mutex, svc_mutex, svc_fd_lock
225  */
226 
227 
228 void
229 svc_run(void)
230 {
231 	/* NO OTHER THREADS ARE RUNNING */
232 
233 	svc_exit_done = FALSE;
234 
235 	while ((svc_npollfds > 0 || svc_ndoorfds > 0) && !svc_exit_done) {
236 		if (svc_npollfds > 0) {
237 			switch (svc_mt_mode) {
238 			case RPC_SVC_MT_NONE:
239 				_svc_run();
240 				break;
241 			default:
242 				_svc_run_mt();
243 				break;
244 			}
245 			continue;
246 		}
247 
248 		(void) mutex_lock(&svc_door_mutex);
249 		if (svc_ndoorfds > 0)
250 			(void) cond_wait(&svc_door_waitcv, &svc_door_mutex);
251 		(void) mutex_unlock(&svc_door_mutex);
252 	}
253 }
254 
255 
256 /*
257  *	This function causes svc_run() to exit by destroying all
258  *	service handles.
259  */
260 void
261 svc_exit(void)
262 {
263 	SVCXPRT	*xprt;
264 	int fd;
265 	char dummy;
266 
267 	/* NO LOCKS HELD */
268 
269 	(void) mutex_lock(&svc_exit_mutex);
270 	if (svc_exit_done) {
271 		(void) mutex_unlock(&svc_exit_mutex);
272 		return;
273 	}
274 	svc_exit_done = TRUE;
275 	for (fd = 0; fd < svc_max_pollfd; fd++) {
276 		xprt = svc_xports[fd];
277 		if (xprt) {
278 			SVC_DESTROY(xprt);
279 		}
280 	}
281 	__svc_free_xprtlist();
282 	__svc_cleanup_door_xprts();
283 	(void) mutex_unlock(&svc_exit_mutex);
284 
285 	if (svc_mt_mode != RPC_SVC_MT_NONE) {
286 		(void) mutex_lock(&svc_mutex);
287 		(void) cond_broadcast(&svc_thr_fdwait);
288 		(void) mutex_unlock(&svc_mutex);
289 
290 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
291 	}
292 
293 	(void) mutex_lock(&svc_door_mutex);
294 	(void) cond_signal(&svc_door_waitcv);	/* wake up door dispatching */
295 	(void) mutex_unlock(&svc_door_mutex);
296 
297 	/* destroy reactor information if any */
298 	__destroy_userfd();
299 }
300 
301 
302 /*
303  * this funtion is called with svc_fd_lock and svc_thr_mutex
304  */
305 
306 static int
307 alloc_pollset(int npollfds)
308 {
309 	if (npollfds > svc_pollset_allocd) {
310 		pollfd_t *tmp;
311 		do {
312 			svc_pollset_allocd += POLLSET_EXTEND;
313 		} while (npollfds > svc_pollset_allocd);
314 		tmp = realloc(svc_pollset,
315 		    sizeof (pollfd_t) * svc_pollset_allocd);
316 		if (tmp == NULL) {
317 			syslog(LOG_ERR, "alloc_pollset: out of memory");
318 			return (-1);
319 		}
320 		svc_pollset = tmp;
321 	}
322 	return (0);
323 }
324 
325 static void
326 _svc_run(void)
327 {
328 	sigset_t set, oldset;
329 	int npollfds;
330 	int i;
331 
332 	/*
333 	 * Block SIGALRM while doing work.  Unblock it while doing poll().
334 	 * This is so that services like rpc.rstatd can cause the poll()
335 	 * to be interrupted due to alarm() but that we don't end up in
336 	 * an MT-unsafe signal handler at an inopportune time.
337 	 */
338 	(void) sigemptyset(&set);
339 	(void) sigaddset(&set, SIGALRM);
340 	(void) sigprocmask(SIG_BLOCK, &set, &oldset);
341 	while (!svc_exit_done) {
342 		/*
343 		 * Check whether there is any server fd on which we may want
344 		 * to wait.
345 		 */
346 		(void) rw_rdlock(&svc_fd_lock);
347 		if (alloc_pollset(svc_npollfds) == -1)
348 			break;
349 		npollfds = __rpc_compress_pollfd(svc_max_pollfd,
350 		    svc_pollfd, svc_pollset);
351 		(void) rw_unlock(&svc_fd_lock);
352 		if (npollfds == 0)
353 			break;	/* None waiting, hence return */
354 
355 		(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
356 		i = poll(svc_pollset, npollfds, -1);
357 		(void) sigprocmask(SIG_BLOCK, &set, &oldset);
358 		switch (i) {
359 		case -1:
360 			/*
361 			 * We ignore all errors, continuing with the assumption
362 			 * that it was set by the signal handlers (or any
363 			 * other outside event) and not caused by poll().
364 			 */
365 		case 0:
366 			continue;
367 		default:
368 			svc_getreq_poll(svc_pollset, i);
369 		}
370 	}
371 	(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
372 }
373 
374 /*
375  * In _svc_run_mt, myfd is linked with mypollfd
376  * svc_pollset[mypollfd].fd == myfd
377  * However, in some cases, the link can not be made, thus we define the
378  * following values for these special cases
379  */
380 enum {
381 	INVALID_POLLFD	= -200,
382 	FD_FROM_PENDING
383 };
384 
385 static void
386 _svc_run_mt(void)
387 {
388 	int npollfds;
389 	int n_polled, dispatch;
390 
391 	static bool_t first_time = TRUE;
392 	bool_t main_thread = FALSE;
393 	int n_new;
394 	int myfd, mypollfd;
395 	SVCXPRT *parent_xprt, *xprt;
396 
397 	/*
398 	 * Server is multi-threaded.  Do "first time" initializations.
399 	 * Since only one thread exists in the beginning, there's no
400 	 * need for mutex protection for first time initializations.
401 	 */
402 	if (first_time) {
403 		first_time = FALSE;
404 		main_thread = TRUE;
405 		svc_thr_total = 1;	/* this thread */
406 		svc_next_pending = svc_last_pending = 0;
407 
408 		/*
409 		 * Create a pipe for waking up the poll, if new
410 		 * descriptors have been added to svc_fdset.
411 		 */
412 		create_pipe();
413 	}
414 
415 	/* OTHER THREADS ARE RUNNING */
416 
417 	if (svc_exit_done)
418 		return;
419 
420 	for (;;) {
421 		/*
422 		 * svc_thr_mutex prevents more than one thread from
423 		 * trying to select a descriptor to process further.
424 		 * svc_thr_mutex is unlocked after a thread selects
425 		 * a descriptor on which to receive data.  If there are
426 		 * no such descriptors, the thread will poll with
427 		 * svc_thr_mutex locked, after unlocking all other
428 		 * locks.  This prevents more than one thread from
429 		 * trying to poll at the same time.
430 		 */
431 		(void) mutex_lock(&svc_thr_mutex);
432 		(void) mutex_lock(&svc_mutex);
433 continue_with_locks:
434 		myfd = -1;
435 		mypollfd = INVALID_POLLFD;
436 
437 		/*
438 		 * Check if there are any descriptors with data pending.
439 		 */
440 		if (svc_total_pending > 0) {
441 			myfd = svc_pending_fds[svc_next_pending++];
442 			mypollfd = FD_FROM_PENDING;
443 			if (svc_next_pending > CIRCULAR_BUFSIZE)
444 				svc_next_pending = 0;
445 			svc_total_pending--;
446 		}
447 
448 		/*
449 		 * Get the next active file descriptor to process.
450 		 */
451 		if (myfd == -1 && svc_pollfds == 0) {
452 			/*
453 			 * svc_pollset is empty; do polling
454 			 */
455 			svc_polling = TRUE;
456 
457 			/*
458 			 * if there are no file descriptors, return
459 			 */
460 			(void) rw_rdlock(&svc_fd_lock);
461 			if (svc_npollfds == 0 ||
462 			    alloc_pollset(svc_npollfds + 1) == -1) {
463 				(void) rw_unlock(&svc_fd_lock);
464 				svc_polling = FALSE;
465 				svc_thr_total--;
466 				(void) mutex_unlock(&svc_mutex);
467 				(void) mutex_unlock(&svc_thr_mutex);
468 				if (!main_thread) {
469 					thr_exit(NULL);
470 					/* NOTREACHED */
471 				}
472 				break;
473 			}
474 
475 			npollfds = __rpc_compress_pollfd(svc_max_pollfd,
476 			    svc_pollfd, svc_pollset);
477 			(void) rw_unlock(&svc_fd_lock);
478 
479 			if (npollfds == 0) {
480 				/*
481 				 * There are file descriptors, but none of them
482 				 * are available for polling.  If this is the
483 				 * main thread, or if no thread is waiting,
484 				 * wait on condition variable, otherwise exit.
485 				 */
486 				svc_polling = FALSE;
487 				(void) mutex_unlock(&svc_thr_mutex);
488 				if ((!main_thread) && svc_waiters > 0) {
489 					svc_thr_total--;
490 					(void) mutex_unlock(&svc_mutex);
491 					thr_exit(NULL);
492 					/* NOTREACHED */
493 				}
494 
495 				while (svc_npollfds_set == 0 &&
496 				    svc_pollfds == 0 &&
497 				    svc_total_pending == 0 &&
498 				    !svc_exit_done) {
499 					svc_waiters++;
500 					(void) cond_wait(&svc_thr_fdwait,
501 					    &svc_mutex);
502 					svc_waiters--;
503 				}
504 
505 				/*
506 				 * Check exit flag.  If this is not the main
507 				 * thread, exit.
508 				 */
509 				if (svc_exit_done) {
510 					svc_thr_total--;
511 					(void) mutex_unlock(&svc_mutex);
512 					if (!main_thread)
513 						thr_exit(NULL);
514 					break;
515 				}
516 
517 				(void) mutex_unlock(&svc_mutex);
518 				continue;
519 			}
520 
521 			/*
522 			 * We're ready to poll.  Always set svc_pipe[0]
523 			 * as the last one, since the poll will occasionally
524 			 * need to be interrupted.  Release svc_mutex for
525 			 * the duration of the poll, but hold on to
526 			 * svc_thr_mutex, as we don't want any other thread
527 			 * to do the same.
528 			 */
529 			svc_pollset[npollfds].fd = svc_pipe[0];
530 			svc_pollset[npollfds].events = MASKVAL;
531 
532 			do {
533 				int i, j;
534 
535 				(void) mutex_unlock(&svc_mutex);
536 				n_polled = poll(svc_pollset, npollfds + 1, -1);
537 				(void) mutex_lock(&svc_mutex);
538 				if (n_polled <= 0)
539 					continue;
540 
541 				/*
542 				 * Check if information returned indicates one
543 				 * or more closed fd's; find and remove any such
544 				 * information
545 				 */
546 				for (i = 0; i <= npollfds; i++) {
547 					if (svc_pollset[i].revents & POLLNVAL) {
548 						/* Overwrite svc_pollset[i] */
549 						for (j = i; j < npollfds; j++)
550 							svc_pollset[j] =
551 							    svc_pollset[j + 1];
552 						(void) memset(&svc_pollset[j],
553 						    0, sizeof (struct pollfd));
554 						npollfds--;
555 						n_polled--;
556 						i--;
557 					}
558 				}
559 			} while (n_polled <= 0);
560 			svc_polling = FALSE;
561 
562 			/*
563 			 * If there's data in the pipe, clear it.
564 			 */
565 			if (svc_pollset[npollfds].revents) {
566 				clear_pipe();
567 				n_polled--;
568 				svc_pollset[npollfds].revents = 0;
569 			}
570 			svc_polled = npollfds;
571 			svc_pollfds = n_polled;
572 			svc_next_pollfd = 0;
573 
574 			/*
575 			 * Check exit flag.
576 			 */
577 			if (svc_exit_done) {
578 				svc_thr_total--;
579 				(void) mutex_unlock(&svc_mutex);
580 				(void) mutex_unlock(&svc_thr_mutex);
581 				if (!main_thread) {
582 					thr_exit(NULL);
583 					/* NOTREACHED */
584 				}
585 				break;
586 			}
587 
588 			/*
589 			 * If no descriptor is active, continue.
590 			 */
591 			if (svc_pollfds == 0)
592 				goto continue_with_locks;
593 		}
594 
595 		/*
596 		 * If a file descriptor has already not been selected,
597 		 * choose a file descriptor.
598 		 * svc_pollfds and svc_next_pollfd are updated.
599 		 */
600 		if (myfd == -1) {
601 			if (select_next_pollfd(&myfd, &mypollfd) == -1)
602 				goto continue_with_locks;
603 		}
604 
605 		/*
606 		 * Check to see if new threads need to be started.
607 		 * Count of threads that could be gainfully employed is
608 		 * obtained as follows:
609 		 *	- count 1 for poller
610 		 *	- count 1 for this request
611 		 *	- count active file descriptors (svc_pollfds)
612 		 *	- count pending file descriptors
613 		 *
614 		 * (svc_thr_total - svc_thr_active) are already available.
615 		 * This thread is one of the available threads.
616 		 *
617 		 * Number of new threads should not exceed
618 		 *	(svc_thr_max - svc_thr_total).
619 		 */
620 		if (svc_thr_total < svc_thr_max &&
621 		    svc_mt_mode == RPC_SVC_MT_AUTO && !svc_exit_done) {
622 			n_new = 1 + 1 + svc_pollfds + svc_total_pending -
623 			    (svc_thr_total - svc_thr_active);
624 			if (n_new > (svc_thr_max - svc_thr_total))
625 				n_new = svc_thr_max - svc_thr_total;
626 			if (n_new > 0)
627 				start_threads(n_new);
628 		}
629 
630 		/*
631 		 * Get parent xprt.  It is possible for the parent service
632 		 * handle to be destroyed by now, due to a race condition.
633 		 * Check for this, and if so, log a warning and go on.
634 		 */
635 		parent_xprt = svc_xports[myfd];
636 		if (parent_xprt == NULL) {
637 			/* Check if it is not a user FD */
638 			if (__is_a_userfd(myfd) == TRUE)
639 				__svc_getreq_user(&(svc_pollset[mypollfd]));
640 			goto continue_with_locks;
641 		}
642 /* LINTED pointer alignment */
643 		if (svc_defunct(parent_xprt) || svc_failed(parent_xprt))
644 			goto continue_with_locks;
645 
646 		/*
647 		 * Make a copy of parent xprt, update svc_fdset.
648 		 */
649 		if ((xprt = make_xprt_copy(parent_xprt)) == NULL)
650 			goto continue_with_locks;
651 
652 		/*
653 		 * Keep track of active threads in automatic mode.
654 		 */
655 		if (svc_mt_mode == RPC_SVC_MT_AUTO)
656 			svc_thr_active++;
657 
658 		/*
659 		 * Release mutexes so other threads can get going.
660 		 */
661 		(void) mutex_unlock(&svc_mutex);
662 		(void) mutex_unlock(&svc_thr_mutex);
663 
664 		/*
665 		 * Process request.
666 		 */
667 		{
668 			struct rpc_msg *msg;
669 			struct svc_req *r;
670 			char *cred_area;
671 
672 /* LINTED pointer alignment */
673 			msg = SVCEXT(xprt)->msg;
674 /* LINTED pointer alignment */
675 			r = SVCEXT(xprt)->req;
676 /* LINTED pointer alignment */
677 			cred_area = SVCEXT(xprt)->cred_area;
678 
679 
680 			msg->rm_call.cb_cred.oa_base = cred_area;
681 			msg->rm_call.cb_verf.oa_base =
682 			    &(cred_area[MAX_AUTH_BYTES]);
683 			r->rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]);
684 
685 			/*
686 			 * receive RPC message
687 			 */
688 			if ((dispatch = SVC_RECV(xprt, msg))) {
689 				if (svc_mt_mode != RPC_SVC_MT_NONE)
690 /* LINTED pointer alignment */
691 					svc_flags(xprt) |= SVC_ARGS_CHECK;
692 				dispatch = _svc_prog_dispatch(xprt, msg, r);
693 
694 				/*
695 				 * Call cleanup procedure if set.
696 				 */
697 				if (__proc_cleanup_cb != NULL)
698 					(*__proc_cleanup_cb)(xprt);
699 			} else
700 				svc_args_done(xprt);
701 
702 			/*
703 			 * Finish up, if automatic mode, or not dispatched.
704 			 */
705 			if (svc_mt_mode == RPC_SVC_MT_AUTO || !dispatch) {
706 /* LINTED pointer alignment */
707 				if (svc_flags(xprt) & SVC_ARGS_CHECK)
708 					svc_args_done(xprt);
709 				(void) mutex_lock(&svc_mutex);
710 				_svc_done_private(xprt);
711 				if (svc_mt_mode == RPC_SVC_MT_AUTO) {
712 					/*
713 					 * not active any more
714 					 */
715 					svc_thr_active--;
716 
717 					/*
718 					 * If not main thread, exit unless
719 					 * there's some immediate work.
720 					 */
721 					if (!main_thread &&
722 					    svc_pollfds <= 0 &&
723 					    svc_total_pending <= 0 &&
724 					    (svc_polling ||
725 					    svc_waiters > 0)) {
726 						svc_thr_total--;
727 						if (svc_thr_total ==
728 						    svc_waiters) {
729 							(void) cond_broadcast(
730 							    &svc_thr_fdwait);
731 						}
732 						(void) mutex_unlock(&svc_mutex);
733 						thr_exit(NULL);
734 						/* NOTREACHED */
735 					}
736 				}
737 				(void) mutex_unlock(&svc_mutex);
738 			}
739 		}
740 
741 	}
742 }
743 
744 
745 /*
746  * start_threads() - Start specified number of threads.
747  */
748 static void
749 start_threads(int num_threads)
750 {
751 	int		i;
752 
753 	assert(MUTEX_HELD(&svc_mutex));
754 
755 	for (i = 0; i < num_threads; i++) {
756 		if (thr_create(NULL, 0, (void *(*)(void *))_svc_run_mt, NULL,
757 		    THR_DETACHED, NULL) == 0) {
758 			svc_thr_total++;
759 			svc_thr_total_creates++;
760 		} else {
761 			svc_thr_total_create_errors++;
762 		}
763 	}
764 }
765 
766 
767 /*
768  * create_pipe() - create pipe for breaking out of poll.
769  */
770 static void
771 create_pipe(void)
772 {
773 	if (pipe(svc_pipe) == -1) {
774 		syslog(LOG_ERR, dgettext(__nsl_dom,
775 		    "RPC: svc could not create pipe - exiting"));
776 		exit(1);
777 	}
778 	if (fcntl(svc_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
779 		syslog(LOG_ERR, dgettext(__nsl_dom,
780 		    "RPC: svc pipe error - exiting"));
781 		exit(1);
782 	}
783 	if (fcntl(svc_pipe[1], F_SETFL, O_NONBLOCK) == -1) {
784 		syslog(LOG_ERR, dgettext(__nsl_dom,
785 		    "RPC: svc pipe error - exiting"));
786 		exit(1);
787 	}
788 }
789 
790 
791 /*
792  * clear_pipe() - Empty data in pipe.
793  */
794 static void
795 clear_pipe(void)
796 {
797 	char	buf[16];
798 	int	i;
799 
800 	do {
801 		i = read(svc_pipe[0], buf, sizeof (buf));
802 	} while (i == sizeof (buf));
803 }
804 
805 
806 /*
807  * select_next_pollfd() - Select the next active fd in svc_pollset.
808  */
809 static int
810 select_next_pollfd(int *fd, int *pollfdIndex)
811 {
812 	int i;
813 
814 	assert(MUTEX_HELD(&svc_thr_mutex));
815 	assert(MUTEX_HELD(&svc_mutex));
816 
817 	for (i = svc_next_pollfd; svc_pollfds > 0 && i < svc_polled; i++) {
818 		if (svc_pollset[i].revents) {
819 			svc_pollfds--;
820 			/*
821 			 * No more special case for POLLNVAL, because it may
822 			 * be linked with a user file descriptot callback
823 			 */
824 			svc_next_pollfd = i + 1;
825 
826 			*fd = svc_pollset[i].fd;
827 			*pollfdIndex = i;
828 
829 			return (0);
830 		}
831 	}
832 	svc_next_pollfd = svc_pollfds = 0;
833 	*fd = -1;
834 	*pollfdIndex = INVALID_POLLFD;
835 	return (-1);
836 }
837 
838 
839 /*
840  * make_xprt_copy() - make a copy of the parent xprt.
841  * Clear fd bit in svc_fdset.
842  */
843 static SVCXPRT *
844 make_xprt_copy(SVCXPRT *parent)
845 {
846 /* LINTED pointer alignment */
847 	SVCXPRT_LIST	*xlist = SVCEXT(parent)->my_xlist;
848 	SVCXPRT_LIST	*xret;
849 	SVCXPRT		*xprt;
850 	int		fd = parent->xp_fd;
851 
852 	assert(MUTEX_HELD(&svc_mutex));
853 
854 	xret = xlist->next;
855 	if (xret) {
856 		xlist->next = xret->next;
857 		xret->next = NULL;
858 		xprt = xret->xprt;
859 /* LINTED pointer alignment */
860 		svc_flags(xprt) = svc_flags(parent);
861 	} else
862 		xprt = svc_copy(parent);
863 
864 	if (xprt) {
865 /* LINTED pointer alignment */
866 		SVCEXT(parent)->refcnt++;
867 		(void) rw_wrlock(&svc_fd_lock);
868 		clear_pollfd(fd);
869 		(void) rw_unlock(&svc_fd_lock);
870 	}
871 	return (xprt);
872 }
873 
874 /*
875  * _svc_done_private() - return copies to library.
876  */
877 static void
878 _svc_done_private(SVCXPRT *xprt)
879 {
880 	SVCXPRT		*parent;
881 	SVCXPRT_LIST	*xhead, *xlist;
882 
883 	assert(MUTEX_HELD(&svc_mutex));
884 
885 /* LINTED pointer alignment */
886 	if ((parent = SVCEXT(xprt)->parent) == NULL)
887 		return;
888 
889 /* LINTED pointer alignment */
890 	xhead = SVCEXT(parent)->my_xlist;
891 /* LINTED pointer alignment */
892 	xlist = SVCEXT(xprt)->my_xlist;
893 	xlist->next = xhead->next;
894 	xhead->next = xlist;
895 
896 /* LINTED pointer alignment */
897 	SVCEXT(parent)->refcnt--;
898 
899 	/*
900 	 * Propagate any error flags.  This is done in both directions to
901 	 * ensure that if one child gets an error, everyone will see it
902 	 * (even if there are multiple outstanding children) and the
903 	 * transport will get closed.
904 	 */
905 /* LINTED pointer alignment */
906 	svc_flags(xprt) |= svc_flags(parent);
907 /* LINTED pointer alignment */
908 	if (svc_failed(xprt) || svc_defunct(xprt)) {
909 /* LINTED pointer alignment */
910 		svc_flags(parent) |= (svc_flags(xprt) &
911 		    (SVC_FAILED | SVC_DEFUNCT));
912 /* LINTED pointer alignment */
913 		if (SVCEXT(parent)->refcnt == 0)
914 			_svc_destroy_private(xprt);
915 	}
916 }
917 
918 void
919 svc_done(SVCXPRT *xprt)
920 {
921 	if (svc_mt_mode != RPC_SVC_MT_USER)
922 		return;
923 
924 	/*
925 	 * Make sure file descriptor is released in user mode.
926 	 * If the xprt is a door, do nothing: this work is performed by
927 	 * svc_door.c's return_xprt_copy() routine, which is basically a
928 	 * door-specific copy of _svc_done_private().
929 	 */
930 /* LINTED pointer alignment */
931 	if (svc_type(xprt) == SVC_DOOR)
932 		return;
933 
934 /* LINTED pointer alignment */
935 	if (svc_flags(xprt) & SVC_ARGS_CHECK)
936 		svc_args_done(xprt);
937 
938 	(void) mutex_lock(&svc_mutex);
939 	_svc_done_private(xprt);
940 	(void) mutex_unlock(&svc_mutex);
941 }
942 
943 
944 /*
945  * Mark argument completion.  Release file descriptor.
946  */
947 void
948 svc_args_done(SVCXPRT *xprt)
949 {
950 	char	dummy;
951 /* LINTED pointer alignment */
952 	SVCXPRT	*parent = SVCEXT(xprt)->parent;
953 	bool_t	wake_up_poller;
954 	enum	xprt_stat stat;
955 
956 /* LINTED pointer alignment */
957 	svc_flags(xprt) |= svc_flags(parent);
958 /* LINTED pointer alignment */
959 	svc_flags(xprt) &= ~SVC_ARGS_CHECK;
960 /* LINTED pointer alignment */
961 	if (svc_failed(xprt) || svc_defunct(parent))
962 		return;
963 
964 /* LINTED pointer alignment */
965 	if (svc_type(xprt) == SVC_CONNECTION &&
966 	    (stat = SVC_STAT(xprt)) != XPRT_IDLE) {
967 		if (stat == XPRT_MOREREQS) {
968 			(void) mutex_lock(&svc_mutex);
969 			svc_pending_fds[svc_last_pending++] = xprt->xp_fd;
970 			if (svc_last_pending > CIRCULAR_BUFSIZE)
971 				svc_last_pending = 0;
972 			svc_total_pending++;
973 			(void) mutex_unlock(&svc_mutex);
974 			wake_up_poller = FALSE;
975 		} else {
976 			/*
977 			 * connection failed
978 			 */
979 			return;
980 		}
981 	} else {
982 		(void) rw_wrlock(&svc_fd_lock);
983 		set_pollfd(xprt->xp_fd, MASKVAL);
984 		(void) rw_unlock(&svc_fd_lock);
985 		wake_up_poller = TRUE;
986 	}
987 
988 	if (!wake_up_poller || !svc_polling) {
989 		/*
990 		 * Wake up any waiting threads.
991 		 */
992 		(void) mutex_lock(&svc_mutex);
993 		if (svc_waiters > 0) {
994 			(void) cond_broadcast(&svc_thr_fdwait);
995 			(void) mutex_unlock(&svc_mutex);
996 			return;
997 		}
998 		(void) mutex_unlock(&svc_mutex);
999 	}
1000 
1001 	/*
1002 	 * Wake up any polling thread.
1003 	 */
1004 	if (svc_polling)
1005 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
1006 }
1007 
1008 
1009 int
1010 __rpc_legal_connmaxrec(int suggested) {
1011 	if (suggested == -1) {
1012 		/* Supply default */
1013 		return (RPC_MAXDATASIZE + 2*sizeof (uint32_t));
1014 	} else if (suggested < 0) {
1015 		return (-1);
1016 	} else if (suggested > 0) {
1017 		/* Round down to multiple of BYTES_PER_XDR_UNIT */
1018 		suggested -= suggested % BYTES_PER_XDR_UNIT;
1019 		/* If possible, allow for two fragment headers */
1020 		if (suggested < MAXINT-(2*sizeof (uint32_t))) {
1021 			/* Allow for two fragment headers */
1022 			suggested += 2 * sizeof (uint32_t);
1023 		} else {
1024 			suggested = MAXINT;
1025 		}
1026 		if (suggested < sizeof (struct rpc_msg)) {
1027 			return (-1);
1028 		}
1029 	}
1030 	return (suggested);
1031 }
1032 
1033 
1034 bool_t
1035 rpc_control(int op, void *info)
1036 {
1037 	int		tmp;
1038 
1039 	switch (op) {
1040 	case RPC_SVC_MTMODE_SET:
1041 		tmp = *((int *)info);
1042 		if (tmp != RPC_SVC_MT_NONE && tmp != RPC_SVC_MT_AUTO &&
1043 		    tmp != RPC_SVC_MT_USER)
1044 			return (FALSE);
1045 		if (svc_mt_mode != RPC_SVC_MT_NONE && svc_mt_mode != tmp)
1046 			return (FALSE);
1047 		svc_mt_mode = tmp;
1048 		return (TRUE);
1049 	case RPC_SVC_MTMODE_GET:
1050 		*((int *)info) = svc_mt_mode;
1051 		return (TRUE);
1052 	case RPC_SVC_THRMAX_SET:
1053 		if ((tmp = *((int *)info)) < 1)
1054 			return (FALSE);
1055 		(void) mutex_lock(&svc_mutex);
1056 		svc_thr_max = tmp;
1057 		(void) mutex_unlock(&svc_mutex);
1058 		return (TRUE);
1059 	case RPC_SVC_THRMAX_GET:
1060 		*((int *)info) = svc_thr_max;
1061 		return (TRUE);
1062 	case RPC_SVC_THRTOTAL_GET:
1063 		*((int *)info) = svc_thr_total;
1064 		return (TRUE);
1065 	case RPC_SVC_THRCREATES_GET:
1066 		*((int *)info) = svc_thr_total_creates;
1067 		return (TRUE);
1068 	case RPC_SVC_THRERRORS_GET:
1069 		*((int *)info) = svc_thr_total_create_errors;
1070 		return (TRUE);
1071 	case RPC_SVC_USE_POLLFD:
1072 		if (*((int *)info) && !__rpc_use_pollfd_done) {
1073 			__rpc_use_pollfd_done = 1;
1074 			return (TRUE);
1075 		}
1076 		return (FALSE);
1077 	case RPC_SVC_CONNMAXREC_SET:
1078 		tmp = __rpc_legal_connmaxrec(*(int *)info);
1079 		if (tmp >= 0) {
1080 			__rpc_connmaxrec = tmp;
1081 			return (TRUE);
1082 		} else {
1083 			return (FALSE);
1084 		}
1085 	case RPC_SVC_CONNMAXREC_GET:
1086 		*((int *)info) = __rpc_connmaxrec;
1087 		return (TRUE);
1088 	case RPC_SVC_IRTIMEOUT_SET:
1089 		tmp = *((int *)info);
1090 		if (tmp >= 0) {
1091 			__rpc_irtimeout = tmp;
1092 			return (TRUE);
1093 		} else {
1094 			return (FALSE);
1095 		}
1096 	/*
1097 	 * No mutex necessary as _EXCLBIND_SET will/should only
1098 	 * be used before an RPC daemon goes mt-hot.
1099 	 */
1100 	case __RPC_SVC_EXCLBIND_SET:
1101 		if (info) {
1102 			__rpc_tp_exclbind = *((bool_t *)info);
1103 			return (TRUE);
1104 		}
1105 		return (FALSE);
1106 	case __RPC_SVC_EXCLBIND_GET:
1107 		if (info) {
1108 			*((bool_t *)info) = __rpc_tp_exclbind;
1109 			return (TRUE);
1110 		}
1111 		return (FALSE);
1112 
1113 	case __RPC_SVC_LSTNBKLOG_SET:
1114 		tmp = *(int *)info;
1115 		if (tmp > 0) {
1116 			__svc_lstnbklog = tmp;
1117 			return (TRUE);
1118 		}
1119 		return (FALSE);
1120 	case __RPC_SVC_LSTNBKLOG_GET:
1121 		*(int *)info = __svc_lstnbklog;
1122 		return (TRUE);
1123 
1124 	default:
1125 		return (FALSE);
1126 	}
1127 }
1128