xref: /titanic_51/usr/src/lib/libnsl/rpc/svc_run.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29  * Portions of this source code were derived from Berkeley
30  * 4.3 BSD under license from the Regents of the University of
31  * California.
32  */
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 /*
37  * This is the rpc server side idle loop
38  * Wait for input, call server program.
39  */
40 #include "mt.h"
41 #include "rpc_mt.h"
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <signal.h>
45 #include <rpc/rpc.h>
46 #include <errno.h>
47 #include <sys/poll.h>
48 #include <sys/types.h>
49 #include <rpc/trace.h>
50 #include <syslog.h>
51 #include <thread.h>
52 #include <assert.h>
53 #include <libintl.h>
54 #include <values.h>
55 
56 extern const char __nsl_dom[];
57 
58 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
59 extern bool_t __is_a_userfd(int P_fd);
60 extern void   __destroy_userfd();
61 extern void clear_pollfd(int);
62 extern void set_pollfd(int /* fd */, short /* events */);
63 extern void svc_getreq_poll();
64 extern void (*__proc_cleanup_cb)();
65 
66 static void start_threads();
67 static void create_pipe();
68 static void clear_pipe();
69 static int select_next_pollfd();
70 static SVCXPRT *make_xprt_copy();
71 static void _svc_run_mt();
72 static void _svc_run();
73 
74 int _svc_prog_dispatch();
75 static void _svc_done_private();
76 
77 extern rwlock_t svc_fd_lock;
78 extern mutex_t	svc_door_mutex;
79 extern cond_t	svc_door_waitcv;
80 extern int	svc_ndoorfds;
81 extern void	__svc_cleanup_door_xprts();
82 extern void	__svc_free_xprtlist();
83 extern void	__svc_getreq_user(struct pollfd *);
84 
85 /*
86  * Maximum fragment size allowed for connection oriented connections.
87  * Zero means that no maximum size limit has been requested.
88  */
89 int __rpc_connmaxrec = 0;
90 
91 /* Inter-Record Timeout in secs for non-blocked connection RPC */
92 int __rpc_irtimeout = 35;
93 
94 /*
95  * Request exclusive access to tcp and udp non-priv ports bound with a
96  * wildcard addr.
97  */
98 bool_t __rpc_tp_exclbind = FALSE;
99 
100 /*
101  * XXX - eventually, all mutexes and their initializations static
102  */
103 
104 /*
105  * Variables used for MT
106  */
107 int svc_mt_mode;		/* multi-threading mode */
108 
109 int svc_pipe[2];	/* pipe for breaking out of poll: read(0), write(1) */
110 
111 /* BEGIN PROTECTED BY svc_mutex */
112 
113 static int svc_thr_max = 16;	/* default maximum number of threads allowed */
114 
115 static int svc_thr_total;	/* current number of threads */
116 
117 static int svc_thr_active;	/* current number of threads active */
118 
119 /* circular array of file descriptors with pending data */
120 
121 #define	CIRCULAR_BUFSIZE	1024
122 
123 static int svc_pending_fds[CIRCULAR_BUFSIZE+1];	/* fds with pending data */
124 
125 static int svc_next_pending;			/* next one to be processed */
126 
127 static int svc_last_pending;			/* last one in list */
128 
129 static int svc_total_pending;			/* total in list */
130 
131 static int svc_thr_total_creates;	/* total created - stats */
132 
133 static int svc_thr_total_create_errors;	/* total create errors - stats */
134 
135 static int svc_waiters;		/* number of waiting threads */
136 
137 /* END PROTECTED BY svc_mutex */
138 
139 /* BEGIN PROTECTED BY svc_fd_lock: */
140 
141 int svc_nfds;		/* total number of active file descriptors */
142 
143 int svc_nfds_set;	/* total number of fd bits set in svc_fdset */
144 
145 int svc_max_fd = 0;	/* largest active file descriptor */
146 
147 int svc_npollfds;	/* total number of active pollfds */
148 
149 int svc_npollfds_set;	/* total number of pollfd set in svc_pollfd */
150 
151 int svc_max_pollfd;	/* largest active pollfd so far */
152 
153 int svc_pollfd_allocd;  /* number of pollfd structures allocated */
154 
155 /* END PROTECTED BY svc_fd_lock: */
156 
157 /* BEGIN PROTECTED BY svc_thr_mutex */
158 
159 #define	POLLSET_EXTEND	256
160 static int svc_pollset_allocd;
161 static struct pollfd *svc_pollset;
162 				/*
163 				 * array of file descriptors currently active
164 				 */
165 static int svc_polled;		/* no of fds polled in last poll() - input */
166 
167 static int svc_pollfds;		/* no of active fds in last poll() - output */
168 
169 static int svc_next_pollfd;	/* next fd  to processin svc_pollset */
170 
171 bool_t svc_polling;		/* true if a thread is polling */
172 
173 /* END PROTECTED BY svc_thr_mutex */
174 
175 /* BEGIN PROTECTED BY svc_exit_mutex */
176 
177 static bool_t svc_exit_done = TRUE;
178 
179 /* END PROTECTED BY svc_exit_mutex */
180 
181 /*
182  * Warlock section
183  */
184 
185 /* VARIABLES PROTECTED BY svc_mutex:
186 	svc_thr_total, svc_thr_active, svc_pending_fds, svc_next_pending,
187 	svc_last_pending, svc_total_pending, svc_thr_total_creates,
188 	svc_thr_total_create_errors,
189 	svcxprt_list_t::next, svcxprt_ext_t::my_xlist,
190 	svc_thr_max, svc_waiters
191  */
192 
193 /* VARIABLES PROTECTED BY svc_fd_lock:
194 	svc_xports, svc_fdset, svc_nfds, svc_nfds_set, svc_max_fd,
195 	svc_pollfd, svc_npollfds, svc_npollfds_set, svc_max_pollfd
196  */
197 
198 /* VARIABLES PROTECTED BY svc_thr_mutex:
199 	svc_pollset, svc_pollfds, svc_next_pollfd, svc_polling
200 	svc_pollset_allocd, svc_polled
201  */
202 
203 /* VARIABLES PROTECTED BY svc_exit_mutex:
204 	svc_exit_done
205  */
206 
207 /* VARIABLES READABLE WITHOUT LOCK:
208 	svc_thr_total, svc_thr_active, svc_thr_total_creates,
209 	svc_thr_total_create_errors,
210 	svc_xports, svc_nfds, svc_nfds_set, svc_max_fd,
211 	svc_npollfds, svc_npollfds_set, svc_max_pollfd,
212 	svc_pollfds, svc_next_pollfd, svc_exit_done, svc_polling,
213 	svc_thr_max, svc_waiters
214  */
215 
216 /* VARIABLES PROTECTED BY "program_logic":
217 	rpc_msg::, svc_req::, svcxprt_ext_t::flags, svc_mt_mode,
218 	svcxprt_ext_t::parent
219  */
220 
221 /* LOCK ORDER:
222 	svc_exit_mutex, svc_thr_mutex, svc_mutex, svc_fd_lock
223  */
224 
225 
226 void
227 svc_run()
228 {
229 	/* NO OTHER THREADS ARE RUNNING */
230 
231 	svc_exit_done = FALSE;
232 
233 	while ((svc_npollfds > 0 || svc_ndoorfds > 0) && !svc_exit_done) {
234 		if (svc_npollfds > 0) {
235 			switch (svc_mt_mode) {
236 			case RPC_SVC_MT_NONE:
237 				_svc_run();
238 				break;
239 			default:
240 				_svc_run_mt();
241 				break;
242 			}
243 			continue;
244 		}
245 
246 		mutex_lock(&svc_door_mutex);
247 		if (svc_ndoorfds > 0)
248 			cond_wait(&svc_door_waitcv, &svc_door_mutex);
249 		mutex_unlock(&svc_door_mutex);
250 	}
251 }
252 
253 
254 /*
255  *	This function causes svc_run() to exit by destroying all
256  *	service handles.
257  */
258 void
259 svc_exit()
260 {
261 	SVCXPRT	*xprt;
262 	int fd;
263 	char dummy;
264 
265 	/* NO LOCKS HELD */
266 
267 	trace1(TR_svc_exit, 0);
268 	mutex_lock(&svc_exit_mutex);
269 	if (svc_exit_done) {
270 		mutex_unlock(&svc_exit_mutex);
271 		trace1(TR_svc_exit, 1);
272 		return;
273 	}
274 	svc_exit_done = TRUE;
275 	for (fd = 0; fd < svc_max_pollfd; fd++) {
276 		xprt = svc_xports[fd];
277 		if (xprt) {
278 			SVC_DESTROY(xprt);
279 		}
280 	}
281 	__svc_free_xprtlist();
282 	__svc_cleanup_door_xprts();
283 	mutex_unlock(&svc_exit_mutex);
284 
285 	if (svc_mt_mode != RPC_SVC_MT_NONE) {
286 		mutex_lock(&svc_mutex);
287 		cond_broadcast(&svc_thr_fdwait);
288 		mutex_unlock(&svc_mutex);
289 
290 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
291 	}
292 
293 	mutex_lock(&svc_door_mutex);
294 	cond_signal(&svc_door_waitcv);	/* wake up door dispatching */
295 	mutex_unlock(&svc_door_mutex);
296 
297 	/* destroy reactor information if any */
298 	__destroy_userfd();
299 
300 	trace1(TR_svc_exit, 1);
301 }
302 
303 
304 /*
305  * this funtion is called with svc_fd_lock and svc_thr_mutex
306  */
307 
308 static int
309 alloc_pollset(int npollfds)
310 {
311 	if (npollfds > svc_pollset_allocd) {
312 		pollfd_t *tmp;
313 		do {
314 			svc_pollset_allocd += POLLSET_EXTEND;
315 		} while (npollfds > svc_pollset_allocd);
316 		tmp = realloc(svc_pollset,
317 				sizeof (pollfd_t) * svc_pollset_allocd);
318 		if (tmp == NULL) {
319 			syslog(LOG_ERR, "alloc_pollset: out of memory");
320 			return (-1);
321 		}
322 		svc_pollset = tmp;
323 	}
324 	return (0);
325 }
326 
327 extern int _sigemptyset(sigset_t *);
328 extern int _sigaddset(sigset_t *, int);
329 extern int _sigprocmask(int, const sigset_t *, sigset_t *);
330 
331 static void
332 _svc_run()
333 {
334 	sigset_t set, oldset;
335 	int npollfds;
336 	int i;
337 
338 	/*
339 	 * Block SIGALRM while doing work.  Unblock it while doing poll().
340 	 * This is so that services like rpc.rstatd can cause the poll()
341 	 * to be interrupted due to alarm() but that we don't end up in
342 	 * an MT-unsafe signal handler at an inopportune time.
343 	 */
344 	(void) _sigemptyset(&set);
345 	(void) _sigaddset(&set, SIGALRM);
346 	(void) _sigprocmask(SIG_BLOCK, &set, &oldset);
347 	trace1(TR_svc_run, 0);
348 	while (!svc_exit_done) {
349 		/*
350 		 * Check whether there is any server fd on which we may want
351 		 * to wait.
352 		 */
353 		rw_rdlock(&svc_fd_lock);
354 		if (alloc_pollset(svc_npollfds) == -1)
355 			break;
356 		npollfds = __rpc_compress_pollfd(svc_max_pollfd,
357 			svc_pollfd, svc_pollset);
358 		rw_unlock(&svc_fd_lock);
359 		if (npollfds == 0)
360 			break;	/* None waiting, hence return */
361 
362 		(void) _sigprocmask(SIG_SETMASK, &oldset, NULL);
363 		i = poll(svc_pollset, npollfds, -1);
364 		(void) _sigprocmask(SIG_BLOCK, &set, &oldset);
365 		switch (i) {
366 		case -1:
367 			/*
368 			 * We ignore all errors, continuing with the assumption
369 			 * that it was set by the signal handlers (or any
370 			 * other outside event) and not caused by poll().
371 			 */
372 		case 0:
373 			continue;
374 		default:
375 			svc_getreq_poll(svc_pollset, i);
376 		}
377 	}
378 	trace1(TR_svc_run, 1);
379 	(void) _sigprocmask(SIG_SETMASK, &oldset, NULL);
380 }
381 
382 /*
383  * In _svc_run_mt, myfd is linked with mypollfd
384  * svc_pollset[mypollfd].fd == myfd
385  * However, in some cases, the link can not be made, thus we define the
386  * following values for these special cases
387  */
388 enum {
389 	INVALID_POLLFD	= -200,
390 	FD_FROM_PENDING
391 };
392 
393 static void
394 _svc_run_mt()
395 {
396 	int npollfds;
397 	int n_polled, dispatch;
398 
399 	static bool_t first_time = TRUE;
400 	bool_t main_thread = FALSE;
401 	int n_new;
402 	int myfd, mypollfd;
403 	SVCXPRT *parent_xprt, *xprt;
404 
405 	/*
406 	 * Server is multi-threaded.  Do "first time" initializations.
407 	 * Since only one thread exists in the beginning, there's no
408 	 * need for mutex protection for first time initializations.
409 	 */
410 	if (first_time) {
411 		first_time = FALSE;
412 		main_thread = TRUE;
413 		svc_thr_total = 1;	/* this thread */
414 		svc_next_pending = svc_last_pending = 0;
415 
416 		/*
417 		 * Create a pipe for waking up the poll, if new
418 		 * descriptors have been added to svc_fdset.
419 		 */
420 		create_pipe();
421 	}
422 
423 	/* OTHER THREADS ARE RUNNING */
424 
425 	if (svc_exit_done)
426 		return;
427 
428 	for (;;) {
429 		/*
430 		 * svc_thr_mutex prevents more than one thread from
431 		 * trying to select a descriptor to process further.
432 		 * svc_thr_mutex is unlocked after a thread selects
433 		 * a descriptor on which to receive data.  If there are
434 		 * no such descriptors, the thread will poll with
435 		 * svc_thr_mutex locked, after unlocking all other
436 		 * locks.  This prevents more than one thread from
437 		 * trying to poll at the same time.
438 		 */
439 		mutex_lock(&svc_thr_mutex);
440 		mutex_lock(&svc_mutex);
441 continue_with_locks:
442 		myfd = -1;
443 		mypollfd = INVALID_POLLFD;
444 
445 		/*
446 		 * Check if there are any descriptors with data pending.
447 		 */
448 		if (svc_total_pending > 0) {
449 			myfd = svc_pending_fds[svc_next_pending++];
450 			mypollfd = FD_FROM_PENDING;
451 			if (svc_next_pending > CIRCULAR_BUFSIZE)
452 				svc_next_pending = 0;
453 			svc_total_pending--;
454 		}
455 
456 		/*
457 		 * Get the next active file descriptor to process.
458 		 */
459 		if (myfd == -1 && svc_pollfds == 0) {
460 			/*
461 			 * svc_pollset is empty; do polling
462 			 */
463 			svc_polling = TRUE;
464 
465 			/*
466 			 * if there are no file descriptors, return
467 			 */
468 			rw_rdlock(&svc_fd_lock);
469 			if (svc_npollfds == 0 ||
470 					alloc_pollset(svc_npollfds + 1) == -1) {
471 				rw_unlock(&svc_fd_lock);
472 				svc_polling = FALSE;
473 				svc_thr_total--;
474 				mutex_unlock(&svc_mutex);
475 				mutex_unlock(&svc_thr_mutex);
476 				if (!main_thread) {
477 					thr_exit(NULL);
478 					/* NOTREACHED */
479 				}
480 				break;
481 			}
482 
483 			npollfds = __rpc_compress_pollfd(svc_max_pollfd,
484 					svc_pollfd, svc_pollset);
485 			rw_unlock(&svc_fd_lock);
486 
487 			if (npollfds == 0) {
488 				/*
489 				 * There are file descriptors, but none of them
490 				 * are available for polling.  If this is the
491 				 * main thread, or if no thread is waiting,
492 				 * wait on condition variable, otherwise exit.
493 				 */
494 				svc_polling = FALSE;
495 				mutex_unlock(&svc_thr_mutex);
496 				if ((!main_thread) && svc_waiters > 0) {
497 					svc_thr_total--;
498 					mutex_unlock(&svc_mutex);
499 					thr_exit(NULL);
500 					/* NOTREACHED */
501 				}
502 
503 				while (svc_npollfds_set == 0 &&
504 					svc_pollfds == 0 &&
505 					svc_total_pending == 0 &&
506 							!svc_exit_done) {
507 					svc_waiters++;
508 					cond_wait(&svc_thr_fdwait, &svc_mutex);
509 					svc_waiters--;
510 				}
511 
512 				/*
513 				 * Check exit flag.  If this is not the main
514 				 * thread, exit.
515 				 */
516 				if (svc_exit_done) {
517 					svc_thr_total--;
518 					mutex_unlock(&svc_mutex);
519 					if (!main_thread)
520 						thr_exit(NULL);
521 					break;
522 				}
523 
524 				mutex_unlock(&svc_mutex);
525 				continue;
526 			}
527 
528 			/*
529 			 * We're ready to poll.  Always set svc_pipe[0]
530 			 * as the last one, since the poll will occasionally
531 			 * need to be interrupted.  Release svc_mutex for
532 			 * the duration of the poll, but hold on to
533 			 * svc_thr_mutex, as we don't want any other thread
534 			 * to do the same.
535 			 */
536 			svc_pollset[npollfds].fd = svc_pipe[0];
537 			svc_pollset[npollfds].events = MASKVAL;
538 
539 			do {
540 				int i, j;
541 
542 				mutex_unlock(&svc_mutex);
543 				n_polled = poll(svc_pollset, npollfds + 1, -1);
544 				mutex_lock(&svc_mutex);
545 				if (n_polled <= 0)
546 					continue;
547 
548 				/*
549 				 * Check if information returned indicates one
550 				 * or more closed fd's; find and remove any such
551 				 * information
552 				 */
553 				for (i = 0; i <= npollfds; i++) {
554 					if (svc_pollset[i].revents & POLLNVAL) {
555 						/* Overwrite svc_pollset[i] */
556 						for (j = i; j < npollfds; j++)
557 							svc_pollset[j] =
558 							    svc_pollset[j + 1];
559 						(void) memset(&svc_pollset[j],
560 						    0, sizeof (struct pollfd));
561 						npollfds--;
562 						n_polled--;
563 						i--;
564 					}
565 				}
566 			} while (n_polled <= 0);
567 			svc_polling = FALSE;
568 
569 			/*
570 			 * If there's data in the pipe, clear it.
571 			 */
572 			if (svc_pollset[npollfds].revents) {
573 				clear_pipe();
574 				n_polled--;
575 				svc_pollset[npollfds].revents = 0;
576 			}
577 			svc_polled = npollfds;
578 			svc_pollfds = n_polled;
579 			svc_next_pollfd = 0;
580 
581 			/*
582 			 * Check exit flag.
583 			 */
584 			if (svc_exit_done) {
585 				svc_thr_total--;
586 				mutex_unlock(&svc_mutex);
587 				mutex_unlock(&svc_thr_mutex);
588 				if (!main_thread) {
589 					thr_exit(NULL);
590 					/* NOTREACHED */
591 				}
592 				break;
593 			}
594 
595 			/*
596 			 * If no descriptor is active, continue.
597 			 */
598 			if (svc_pollfds == 0)
599 				goto continue_with_locks;
600 		}
601 
602 		/*
603 		 * If a file descriptor has already not been selected,
604 		 * choose a file descriptor.
605 		 * svc_pollfds and svc_next_pollfd are updated.
606 		 */
607 		if (myfd == -1) {
608 			if (select_next_pollfd(&myfd, &mypollfd) == -1)
609 				goto continue_with_locks;
610 		}
611 
612 		/*
613 		 * Check to see if new threads need to be started.
614 		 * Count of threads that could be gainfully employed is
615 		 * obtained as follows:
616 		 *	- count 1 for poller
617 		 *	- count 1 for this request
618 		 *	- count active file descriptors (svc_pollfds)
619 		 *	- count pending file descriptors
620 		 *
621 		 * (svc_thr_total - svc_thr_active) are already available.
622 		 * This thread is one of the available threads.
623 		 *
624 		 * Number of new threads should not exceed
625 		 *	(svc_thr_max - svc_thr_total).
626 		 */
627 		if (svc_thr_total < svc_thr_max &&
628 			    svc_mt_mode == RPC_SVC_MT_AUTO && !svc_exit_done) {
629 			n_new = 1 + 1 + svc_pollfds + svc_total_pending -
630 					(svc_thr_total - svc_thr_active);
631 			if (n_new > (svc_thr_max - svc_thr_total))
632 				n_new = svc_thr_max - svc_thr_total;
633 			if (n_new > 0)
634 				start_threads(n_new);
635 		}
636 
637 		/*
638 		 * Get parent xprt.  It is possible for the parent service
639 		 * handle to be destroyed by now, due to a race condition.
640 		 * Check for this, and if so, log a warning and go on.
641 		 */
642 		parent_xprt = svc_xports[myfd];
643 		if (parent_xprt == NULL) {
644 			/* Check if it is not a user FD */
645 			if (__is_a_userfd(myfd) == TRUE)
646 				__svc_getreq_user(&(svc_pollset[mypollfd]));
647 			goto continue_with_locks;
648 		}
649 /* LINTED pointer alignment */
650 		if (svc_defunct(parent_xprt) || svc_failed(parent_xprt))
651 			goto continue_with_locks;
652 
653 		/*
654 		 * Make a copy of parent xprt, update svc_fdset.
655 		 */
656 		if ((xprt = make_xprt_copy(parent_xprt)) == NULL)
657 			goto continue_with_locks;
658 
659 		/*
660 		 * Keep track of active threads in automatic mode.
661 		 */
662 		if (svc_mt_mode == RPC_SVC_MT_AUTO)
663 			svc_thr_active++;
664 
665 		/*
666 		 * Release mutexes so other threads can get going.
667 		 */
668 		mutex_unlock(&svc_mutex);
669 		mutex_unlock(&svc_thr_mutex);
670 
671 		/*
672 		 * Process request.
673 		 */
674 		{
675 			struct rpc_msg *msg;
676 			struct svc_req *r;
677 			char *cred_area;
678 
679 /* LINTED pointer alignment */
680 			msg = SVCEXT(xprt)->msg;
681 /* LINTED pointer alignment */
682 			r = SVCEXT(xprt)->req;
683 /* LINTED pointer alignment */
684 			cred_area = SVCEXT(xprt)->cred_area;
685 
686 
687 			msg->rm_call.cb_cred.oa_base = cred_area;
688 			msg->rm_call.cb_verf.oa_base =
689 						&(cred_area[MAX_AUTH_BYTES]);
690 			r->rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]);
691 
692 			/*
693 			 * receive RPC message
694 			 */
695 			if ((dispatch = SVC_RECV(xprt, msg))) {
696 				if (svc_mt_mode != RPC_SVC_MT_NONE)
697 /* LINTED pointer alignment */
698 					svc_flags(xprt) |= SVC_ARGS_CHECK;
699 				dispatch = _svc_prog_dispatch(xprt, msg, r);
700 
701 				/*
702 				 * Call cleanup procedure if set.
703 				 */
704 				if (__proc_cleanup_cb != NULL)
705 					(*__proc_cleanup_cb)(xprt);
706 			} else
707 				svc_args_done(xprt);
708 
709 			/*
710 			 * Finish up, if automatic mode, or not dispatched.
711 			 */
712 			if (svc_mt_mode == RPC_SVC_MT_AUTO || !dispatch) {
713 /* LINTED pointer alignment */
714 				if (svc_flags(xprt) & SVC_ARGS_CHECK)
715 					svc_args_done(xprt);
716 				mutex_lock(&svc_mutex);
717 				_svc_done_private(xprt);
718 				if (svc_mt_mode == RPC_SVC_MT_AUTO) {
719 					/*
720 					 * not active any more
721 					 */
722 					svc_thr_active--;
723 
724 					/*
725 					 * If not main thread, exit unless
726 					 * there's some immediate work.
727 					 */
728 					if (!main_thread &&
729 						    svc_pollfds <= 0 &&
730 						    svc_total_pending <= 0 &&
731 						    (svc_polling ||
732 							svc_waiters > 0)) {
733 						svc_thr_total--;
734 						if (svc_thr_total ==
735 						    svc_waiters) {
736 							cond_broadcast(
737 							    &svc_thr_fdwait);
738 						}
739 						mutex_unlock(&svc_mutex);
740 						thr_exit(NULL);
741 						/* NOTREACHED */
742 					}
743 				}
744 				mutex_unlock(&svc_mutex);
745 			}
746 		}
747 
748 	}
749 }
750 
751 
752 /*
753  * start_threads() - Start specified number of threads.
754  */
755 static void
756 start_threads(num_threads)
757 	int		num_threads;
758 {
759 	int		i;
760 
761 	assert(MUTEX_HELD(&svc_mutex));
762 
763 	for (i = 0; i < num_threads; i++) {
764 		if (thr_create(NULL, 0, (void *(*)(void *))_svc_run_mt, NULL,
765 		    THR_DETACHED, NULL) == 0) {
766 			svc_thr_total++;
767 			svc_thr_total_creates++;
768 		} else {
769 			svc_thr_total_create_errors++;
770 		}
771 	}
772 }
773 
774 
775 /*
776  * create_pipe() - create pipe for breaking out of poll.
777  */
778 static void
779 create_pipe()
780 {
781 	if (pipe(svc_pipe) == -1) {
782 		syslog(LOG_ERR, dgettext(__nsl_dom,
783 				"RPC: svc could not create pipe - exiting"));
784 		exit(1);
785 	}
786 	if (_fcntl(svc_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
787 		syslog(LOG_ERR, dgettext(__nsl_dom,
788 					"RPC: svc pipe error - exiting"));
789 		exit(1);
790 	}
791 	if (_fcntl(svc_pipe[1], F_SETFL, O_NONBLOCK) == -1) {
792 		syslog(LOG_ERR, dgettext(__nsl_dom,
793 					"RPC: svc pipe error - exiting"));
794 		exit(1);
795 	}
796 }
797 
798 
799 /*
800  * clear_pipe() - Empty data in pipe.
801  */
802 static void
803 clear_pipe()
804 {
805 	char	buf[16];
806 	int	i;
807 
808 	do {
809 		i = read(svc_pipe[0], buf, sizeof (buf));
810 	} while (i == sizeof (buf));
811 }
812 
813 
814 /*
815  * select_next_pollfd() - Select the next active fd in svc_pollset.
816  */
817 static int
818 select_next_pollfd(int *fd, int *pollfdIndex)
819 {
820 	int i;
821 
822 	assert(MUTEX_HELD(&svc_thr_mutex));
823 	assert(MUTEX_HELD(&svc_mutex));
824 
825 	for (i = svc_next_pollfd; svc_pollfds > 0 && i < svc_polled;
826 							i++) {
827 		if (svc_pollset[i].revents) {
828 			svc_pollfds--;
829 			/*
830 			 * No more special case for POLLNVAL, because it may
831 			 * be linked with a user file descriptot callback
832 			 */
833 			svc_next_pollfd = i + 1;
834 
835 			*fd = svc_pollset[i].fd;
836 			*pollfdIndex = i;
837 
838 			return (0);
839 		}
840 	}
841 	svc_next_pollfd = svc_pollfds = 0;
842 	*fd = -1;
843 	*pollfdIndex = INVALID_POLLFD;
844 	return (-1);
845 }
846 
847 
848 /*
849  * make_xprt_copy() - make a copy of the parent xprt.
850  * Clear fd bit in svc_fdset.
851  */
852 static SVCXPRT *
853 make_xprt_copy(parent)
854 	SVCXPRT	*parent;
855 {
856 /* LINTED pointer alignment */
857 	SVCXPRT_LIST	*xlist = SVCEXT(parent)->my_xlist;
858 	SVCXPRT_LIST	*xret;
859 	SVCXPRT		*xprt;
860 	int		fd = parent->xp_fd;
861 
862 	assert(MUTEX_HELD(&svc_mutex));
863 
864 	xret = xlist->next;
865 	if (xret) {
866 		xlist->next = xret->next;
867 		xret->next = NULL;
868 		xprt = xret->xprt;
869 /* LINTED pointer alignment */
870 		svc_flags(xprt) = svc_flags(parent);
871 	} else
872 		xprt = svc_copy(parent);
873 
874 	if (xprt) {
875 /* LINTED pointer alignment */
876 		SVCEXT(parent)->refcnt++;
877 		rw_wrlock(&svc_fd_lock);
878 		clear_pollfd(fd);
879 		rw_unlock(&svc_fd_lock);
880 	}
881 	return (xprt);
882 }
883 
884 /*
885  * _svc_done_private() - return copies to library.
886  */
887 static void
888 _svc_done_private(xprt)
889 	SVCXPRT		*xprt;
890 {
891 	SVCXPRT		*parent;
892 	SVCXPRT_LIST	*xhead, *xlist;
893 
894 	assert(MUTEX_HELD(&svc_mutex));
895 
896 /* LINTED pointer alignment */
897 	if ((parent = SVCEXT(xprt)->parent) == NULL)
898 		return;
899 
900 /* LINTED pointer alignment */
901 	xhead = SVCEXT(parent)->my_xlist;
902 /* LINTED pointer alignment */
903 	xlist = SVCEXT(xprt)->my_xlist;
904 	xlist->next = xhead->next;
905 	xhead->next = xlist;
906 
907 /* LINTED pointer alignment */
908 	SVCEXT(parent)->refcnt--;
909 
910 	/*
911 	 * Propagate any error flags.  This is done in both directions to
912 	 * ensure that if one child gets an error, everyone will see it
913 	 * (even if there are multiple outstanding children) and the
914 	 * transport will get closed.
915 	 */
916 /* LINTED pointer alignment */
917 	svc_flags(xprt) |= svc_flags(parent);
918 /* LINTED pointer alignment */
919 	if (svc_failed(xprt) || svc_defunct(xprt)) {
920 /* LINTED pointer alignment */
921 		svc_flags(parent) |= (svc_flags(xprt) &
922 				(SVC_FAILED | SVC_DEFUNCT));
923 /* LINTED pointer alignment */
924 		if (SVCEXT(parent)->refcnt == 0)
925 			_svc_destroy_private(xprt);
926 	}
927 }
928 
929 void
930 svc_done(SVCXPRT *xprt)
931 {
932 	if (svc_mt_mode != RPC_SVC_MT_USER)
933 		return;
934 
935 	/*
936 	 * Make sure file descriptor is released in user mode.
937 	 * If the xprt is a door, do nothing: this work is performed by
938 	 * svc_door.c's return_xprt_copy() routine, which is basically a
939 	 * door-specific copy of _svc_done_private().
940 	 */
941 /* LINTED pointer alignment */
942 	if (svc_type(xprt) == SVC_DOOR)
943 		return;
944 
945 /* LINTED pointer alignment */
946 	if (svc_flags(xprt) & SVC_ARGS_CHECK)
947 		svc_args_done(xprt);
948 
949 	mutex_lock(&svc_mutex);
950 	_svc_done_private(xprt);
951 	mutex_unlock(&svc_mutex);
952 }
953 
954 
955 /*
956  * Mark argument completion.  Release file descriptor.
957  */
958 void
959 svc_args_done(xprt)
960 	SVCXPRT	*xprt;
961 {
962 	char	dummy;
963 /* LINTED pointer alignment */
964 	SVCXPRT	*parent = SVCEXT(xprt)->parent;
965 	bool_t	wake_up_poller;
966 	enum	xprt_stat stat;
967 
968 /* LINTED pointer alignment */
969 	svc_flags(xprt) |= svc_flags(parent);
970 /* LINTED pointer alignment */
971 	svc_flags(xprt) &= ~SVC_ARGS_CHECK;
972 /* LINTED pointer alignment */
973 	if (svc_failed(xprt) || svc_defunct(parent))
974 		return;
975 
976 /* LINTED pointer alignment */
977 	if (svc_type(xprt) == SVC_CONNECTION &&
978 				(stat = SVC_STAT(xprt)) != XPRT_IDLE) {
979 		if (stat == XPRT_MOREREQS) {
980 			mutex_lock(&svc_mutex);
981 			svc_pending_fds[svc_last_pending++] = xprt->xp_fd;
982 			if (svc_last_pending > CIRCULAR_BUFSIZE)
983 				svc_last_pending = 0;
984 			svc_total_pending++;
985 			mutex_unlock(&svc_mutex);
986 			wake_up_poller = FALSE;
987 		} else {
988 			/*
989 			 * connection failed
990 			 */
991 			return;
992 		}
993 	} else {
994 		rw_wrlock(&svc_fd_lock);
995 		set_pollfd(xprt->xp_fd, MASKVAL);
996 		rw_unlock(&svc_fd_lock);
997 		wake_up_poller = TRUE;
998 	}
999 
1000 	if (!wake_up_poller || !svc_polling) {
1001 		/*
1002 		 * Wake up any waiting threads.
1003 		 */
1004 		mutex_lock(&svc_mutex);
1005 		if (svc_waiters > 0) {
1006 			cond_broadcast(&svc_thr_fdwait);
1007 			mutex_unlock(&svc_mutex);
1008 			return;
1009 		}
1010 		mutex_unlock(&svc_mutex);
1011 	}
1012 
1013 	/*
1014 	 * Wake up any polling thread.
1015 	 */
1016 	if (svc_polling)
1017 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
1018 }
1019 
1020 
1021 int
1022 __rpc_legal_connmaxrec(int suggested) {
1023 	if (suggested == -1) {
1024 		/* Supply default */
1025 		return (RPC_MAXDATASIZE + 2*sizeof (uint32_t));
1026 	} else if (suggested < 0) {
1027 		return (-1);
1028 	} else if (suggested > 0) {
1029 		/* Round down to multiple of BYTES_PER_XDR_UNIT */
1030 		suggested -= suggested % BYTES_PER_XDR_UNIT;
1031 		/* If possible, allow for two fragment headers */
1032 		if (suggested < MAXINT-(2*sizeof (uint32_t))) {
1033 			/* Allow for two fragment headers */
1034 			suggested += 2 * sizeof (uint32_t);
1035 		} else {
1036 			suggested = MAXINT;
1037 		}
1038 		if (suggested < sizeof (struct rpc_msg)) {
1039 			return (-1);
1040 		}
1041 	}
1042 	return (suggested);
1043 }
1044 
1045 
1046 bool_t
1047 rpc_control(op, info)
1048 	int		op;
1049 	void		*info;
1050 {
1051 	int		tmp;
1052 	extern int	__rpc_minfd;
1053 
1054 	switch (op) {
1055 	case RPC_SVC_MTMODE_SET:
1056 		tmp = *((int *)info);
1057 		if (tmp != RPC_SVC_MT_NONE && tmp != RPC_SVC_MT_AUTO &&
1058 						tmp != RPC_SVC_MT_USER)
1059 			return (FALSE);
1060 		if (svc_mt_mode != RPC_SVC_MT_NONE && svc_mt_mode != tmp)
1061 			return (FALSE);
1062 		svc_mt_mode = tmp;
1063 		return (TRUE);
1064 	case RPC_SVC_MTMODE_GET:
1065 		*((int *)info) = svc_mt_mode;
1066 		return (TRUE);
1067 	case RPC_SVC_THRMAX_SET:
1068 		if ((tmp = *((int *)info)) < 1)
1069 			return (FALSE);
1070 		mutex_lock(&svc_mutex);
1071 		svc_thr_max = tmp;
1072 		mutex_unlock(&svc_mutex);
1073 		return (TRUE);
1074 	case RPC_SVC_THRMAX_GET:
1075 		*((int *)info) = svc_thr_max;
1076 		return (TRUE);
1077 	case RPC_SVC_THRTOTAL_GET:
1078 		*((int *)info) = svc_thr_total;
1079 		return (TRUE);
1080 	case RPC_SVC_THRCREATES_GET:
1081 		*((int *)info) = svc_thr_total_creates;
1082 		return (TRUE);
1083 	case RPC_SVC_THRERRORS_GET:
1084 		*((int *)info) = svc_thr_total_create_errors;
1085 		return (TRUE);
1086 	case RPC_SVC_USE_POLLFD:
1087 		if (*((int *)info) && !__rpc_use_pollfd_done) {
1088 			__rpc_use_pollfd_done = 1;
1089 			return (TRUE);
1090 		}
1091 		return (FALSE);
1092 	case __RPC_CLNT_MINFD_SET:
1093 		tmp = *((int *)info);
1094 		if (tmp < 0)
1095 			return (FALSE);
1096 		__rpc_minfd = tmp;
1097 		return (TRUE);
1098 	case __RPC_CLNT_MINFD_GET:
1099 		*((int *)info) = __rpc_minfd;
1100 		return (TRUE);
1101 	case RPC_SVC_CONNMAXREC_SET:
1102 		tmp = __rpc_legal_connmaxrec(*(int *)info);
1103 		if (tmp >= 0) {
1104 			__rpc_connmaxrec = tmp;
1105 			return (TRUE);
1106 		} else {
1107 			return (FALSE);
1108 		}
1109 	case RPC_SVC_CONNMAXREC_GET:
1110 		*((int *)info) = __rpc_connmaxrec;
1111 		return (TRUE);
1112 	case RPC_SVC_IRTIMEOUT_SET:
1113 		tmp = *((int *)info);
1114 		if (tmp >= 0) {
1115 			__rpc_irtimeout = tmp;
1116 			return (TRUE);
1117 		} else {
1118 			return (FALSE);
1119 		}
1120 	/*
1121 	 * No mutex necessary as _EXCLBIND_SET will/should only
1122 	 * be used before an RPC daemon goes mt-hot.
1123 	 */
1124 	case __RPC_SVC_EXCLBIND_SET:
1125 		if (info) {
1126 			__rpc_tp_exclbind = *((bool_t *)info);
1127 			return (TRUE);
1128 		}
1129 		return (FALSE);
1130 	case __RPC_SVC_EXCLBIND_GET:
1131 		if (info) {
1132 			*((bool_t *)info) = __rpc_tp_exclbind;
1133 			return (TRUE);
1134 		}
1135 		return (FALSE);
1136 
1137 	default:
1138 		return (FALSE);
1139 	}
1140 }
1141