xref: /titanic_50/usr/src/lib/libnsl/rpc/svc_run.c (revision 799823bbed51a695d01e13511bbb1369980bb714)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32 /*
33  * Portions of this source code were derived from Berkeley
34  * 4.3 BSD under license from the Regents of the University of
35  * California.
36  */
37 
38 /*
39  * This is the rpc server side idle loop
40  * Wait for input, call server program.
41  */
42 #include "mt.h"
43 #include "rpc_mt.h"
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <signal.h>
47 #include <rpc/rpc.h>
48 #include <errno.h>
49 #include <sys/poll.h>
50 #include <sys/types.h>
51 #include <syslog.h>
52 #include <thread.h>
53 #include <assert.h>
54 #include <libintl.h>
55 #include <values.h>
56 
57 extern const char __nsl_dom[];
58 
59 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
60 extern bool_t __is_a_userfd(int P_fd);
61 extern void   __destroy_userfd();
62 extern void clear_pollfd(int);
63 extern void set_pollfd(int /* fd */, short /* events */);
64 extern void svc_getreq_poll();
65 extern void (*__proc_cleanup_cb)();
66 
67 static void start_threads();
68 static void create_pipe();
69 static void clear_pipe();
70 static int select_next_pollfd();
71 static SVCXPRT *make_xprt_copy();
72 static void _svc_run_mt();
73 static void _svc_run();
74 
75 int _svc_prog_dispatch();
76 static void _svc_done_private();
77 
78 extern rwlock_t svc_fd_lock;
79 extern mutex_t	svc_door_mutex;
80 extern cond_t	svc_door_waitcv;
81 extern int	svc_ndoorfds;
82 extern void	__svc_cleanup_door_xprts();
83 extern void	__svc_free_xprtlist();
84 extern void	__svc_getreq_user(struct pollfd *);
85 
86 /*
87  * Maximum fragment size allowed for connection oriented connections.
88  * Zero means that no maximum size limit has been requested.
89  */
90 int __rpc_connmaxrec = 0;
91 
92 /* Inter-Record Timeout in secs for non-blocked connection RPC */
93 int __rpc_irtimeout = 35;
94 
95 /*
96  * Request exclusive access to tcp and udp non-priv ports bound with a
97  * wildcard addr.
98  */
99 bool_t __rpc_tp_exclbind = FALSE;
100 
101 /*
102  * Maximum number of outstanding connection indications (listen backlog).
103  */
104 static int __svc_lstnbklog = 64;	/* Chosen Arbitrarily */
105 
106 /*
107  * XXX - eventually, all mutexes and their initializations static
108  */
109 
110 /*
111  * Variables used for MT
112  */
113 int svc_mt_mode;		/* multi-threading mode */
114 
115 int svc_pipe[2];	/* pipe for breaking out of poll: read(0), write(1) */
116 
117 /* BEGIN PROTECTED BY svc_mutex */
118 
119 static int svc_thr_max = 16;	/* default maximum number of threads allowed */
120 
121 static int svc_thr_total;	/* current number of threads */
122 
123 static int svc_thr_active;	/* current number of threads active */
124 
125 /* circular array of file descriptors with pending data */
126 
127 #define	CIRCULAR_BUFSIZE	1024
128 
129 static int svc_pending_fds[CIRCULAR_BUFSIZE+1];	/* fds with pending data */
130 
131 static int svc_next_pending;			/* next one to be processed */
132 
133 static int svc_last_pending;			/* last one in list */
134 
135 static int svc_total_pending;			/* total in list */
136 
137 static int svc_thr_total_creates;	/* total created - stats */
138 
139 static int svc_thr_total_create_errors;	/* total create errors - stats */
140 
141 static int svc_waiters;		/* number of waiting threads */
142 
143 /* END PROTECTED BY svc_mutex */
144 
145 /* BEGIN PROTECTED BY svc_fd_lock: */
146 
147 int svc_nfds;		/* total number of active file descriptors */
148 
149 int svc_nfds_set;	/* total number of fd bits set in svc_fdset */
150 
151 int svc_max_fd = 0;	/* largest active file descriptor */
152 
153 int svc_npollfds;	/* total number of active pollfds */
154 
155 int svc_npollfds_set;	/* total number of pollfd set in svc_pollfd */
156 
157 int svc_max_pollfd;	/* largest active pollfd so far */
158 
159 int svc_pollfd_allocd;  /* number of pollfd structures allocated */
160 
161 /* END PROTECTED BY svc_fd_lock: */
162 
163 /* BEGIN PROTECTED BY svc_thr_mutex */
164 
165 #define	POLLSET_EXTEND	256
166 static int svc_pollset_allocd;
167 static struct pollfd *svc_pollset;
168 				/*
169 				 * array of file descriptors currently active
170 				 */
171 static int svc_polled;		/* no of fds polled in last poll() - input */
172 
173 static int svc_pollfds;		/* no of active fds in last poll() - output */
174 
175 static int svc_next_pollfd;	/* next fd  to processin svc_pollset */
176 
177 bool_t svc_polling;		/* true if a thread is polling */
178 
179 /* END PROTECTED BY svc_thr_mutex */
180 
181 /* BEGIN PROTECTED BY svc_exit_mutex */
182 
183 static bool_t svc_exit_done = TRUE;
184 
185 /* END PROTECTED BY svc_exit_mutex */
186 
187 /*
188  * Warlock section
189  */
190 
191 /* VARIABLES PROTECTED BY svc_mutex:
192 	svc_thr_total, svc_thr_active, svc_pending_fds, svc_next_pending,
193 	svc_last_pending, svc_total_pending, svc_thr_total_creates,
194 	svc_thr_total_create_errors,
195 	svcxprt_list_t::next, svcxprt_ext_t::my_xlist,
196 	svc_thr_max, svc_waiters
197  */
198 
199 /* VARIABLES PROTECTED BY svc_fd_lock:
200 	svc_xports, svc_fdset, svc_nfds, svc_nfds_set, svc_max_fd,
201 	svc_pollfd, svc_npollfds, svc_npollfds_set, svc_max_pollfd
202  */
203 
204 /* VARIABLES PROTECTED BY svc_thr_mutex:
205 	svc_pollset, svc_pollfds, svc_next_pollfd, svc_polling
206 	svc_pollset_allocd, svc_polled
207  */
208 
209 /* VARIABLES PROTECTED BY svc_exit_mutex:
210 	svc_exit_done
211  */
212 
213 /* VARIABLES READABLE WITHOUT LOCK:
214 	svc_thr_total, svc_thr_active, svc_thr_total_creates,
215 	svc_thr_total_create_errors,
216 	svc_xports, svc_nfds, svc_nfds_set, svc_max_fd,
217 	svc_npollfds, svc_npollfds_set, svc_max_pollfd,
218 	svc_pollfds, svc_next_pollfd, svc_exit_done, svc_polling,
219 	svc_thr_max, svc_waiters
220  */
221 
222 /* VARIABLES PROTECTED BY "program_logic":
223 	rpc_msg::, svc_req::, svcxprt_ext_t::flags, svc_mt_mode,
224 	svcxprt_ext_t::parent
225  */
226 
227 /* LOCK ORDER:
228 	svc_exit_mutex, svc_thr_mutex, svc_mutex, svc_fd_lock
229  */
230 
231 
232 void
233 svc_run(void)
234 {
235 	/* NO OTHER THREADS ARE RUNNING */
236 
237 	svc_exit_done = FALSE;
238 
239 	while ((svc_npollfds > 0 || svc_ndoorfds > 0) && !svc_exit_done) {
240 		if (svc_npollfds > 0) {
241 			switch (svc_mt_mode) {
242 			case RPC_SVC_MT_NONE:
243 				_svc_run();
244 				break;
245 			default:
246 				_svc_run_mt();
247 				break;
248 			}
249 			continue;
250 		}
251 
252 		(void) mutex_lock(&svc_door_mutex);
253 		if (svc_ndoorfds > 0)
254 			(void) cond_wait(&svc_door_waitcv, &svc_door_mutex);
255 		(void) mutex_unlock(&svc_door_mutex);
256 	}
257 }
258 
259 
260 /*
261  *	This function causes svc_run() to exit by destroying all
262  *	service handles.
263  */
264 void
265 svc_exit(void)
266 {
267 	SVCXPRT	*xprt;
268 	int fd;
269 	char dummy;
270 
271 	/* NO LOCKS HELD */
272 
273 	(void) mutex_lock(&svc_exit_mutex);
274 	if (svc_exit_done) {
275 		(void) mutex_unlock(&svc_exit_mutex);
276 		return;
277 	}
278 	svc_exit_done = TRUE;
279 	for (fd = 0; fd < svc_max_pollfd; fd++) {
280 		xprt = svc_xports[fd];
281 		if (xprt) {
282 			SVC_DESTROY(xprt);
283 		}
284 	}
285 	__svc_free_xprtlist();
286 	__svc_cleanup_door_xprts();
287 	(void) mutex_unlock(&svc_exit_mutex);
288 
289 	if (svc_mt_mode != RPC_SVC_MT_NONE) {
290 		(void) mutex_lock(&svc_mutex);
291 		(void) cond_broadcast(&svc_thr_fdwait);
292 		(void) mutex_unlock(&svc_mutex);
293 
294 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
295 	}
296 
297 	(void) mutex_lock(&svc_door_mutex);
298 	(void) cond_signal(&svc_door_waitcv);	/* wake up door dispatching */
299 	(void) mutex_unlock(&svc_door_mutex);
300 
301 	/* destroy reactor information if any */
302 	__destroy_userfd();
303 }
304 
305 
306 /*
307  * this funtion is called with svc_fd_lock and svc_thr_mutex
308  */
309 
310 static int
311 alloc_pollset(int npollfds)
312 {
313 	if (npollfds > svc_pollset_allocd) {
314 		pollfd_t *tmp;
315 		do {
316 			svc_pollset_allocd += POLLSET_EXTEND;
317 		} while (npollfds > svc_pollset_allocd);
318 		tmp = realloc(svc_pollset,
319 		    sizeof (pollfd_t) * svc_pollset_allocd);
320 		if (tmp == NULL) {
321 			syslog(LOG_ERR, "alloc_pollset: out of memory");
322 			return (-1);
323 		}
324 		svc_pollset = tmp;
325 	}
326 	return (0);
327 }
328 
329 static void
330 _svc_run(void)
331 {
332 	sigset_t set, oldset;
333 	int npollfds;
334 	int i;
335 
336 	/*
337 	 * Block SIGALRM while doing work.  Unblock it while doing poll().
338 	 * This is so that services like rpc.rstatd can cause the poll()
339 	 * to be interrupted due to alarm() but that we don't end up in
340 	 * an MT-unsafe signal handler at an inopportune time.
341 	 */
342 	(void) sigemptyset(&set);
343 	(void) sigaddset(&set, SIGALRM);
344 	(void) sigprocmask(SIG_BLOCK, &set, &oldset);
345 	while (!svc_exit_done) {
346 		/*
347 		 * Check whether there is any server fd on which we may want
348 		 * to wait.
349 		 */
350 		(void) rw_rdlock(&svc_fd_lock);
351 		if (alloc_pollset(svc_npollfds) == -1)
352 			break;
353 		npollfds = __rpc_compress_pollfd(svc_max_pollfd,
354 		    svc_pollfd, svc_pollset);
355 		(void) rw_unlock(&svc_fd_lock);
356 		if (npollfds == 0)
357 			break;	/* None waiting, hence return */
358 
359 		(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
360 		i = poll(svc_pollset, npollfds, -1);
361 		(void) sigprocmask(SIG_BLOCK, &set, &oldset);
362 		switch (i) {
363 		case -1:
364 			/*
365 			 * We ignore all errors, continuing with the assumption
366 			 * that it was set by the signal handlers (or any
367 			 * other outside event) and not caused by poll().
368 			 */
369 		case 0:
370 			continue;
371 		default:
372 			svc_getreq_poll(svc_pollset, i);
373 		}
374 	}
375 	(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
376 }
377 
378 /*
379  * In _svc_run_mt, myfd is linked with mypollfd
380  * svc_pollset[mypollfd].fd == myfd
381  * However, in some cases, the link can not be made, thus we define the
382  * following values for these special cases
383  */
384 enum {
385 	INVALID_POLLFD	= -200,
386 	FD_FROM_PENDING
387 };
388 
389 static void
390 _svc_run_mt(void)
391 {
392 	int npollfds;
393 	int n_polled, dispatch;
394 
395 	static bool_t first_time = TRUE;
396 	bool_t main_thread = FALSE;
397 	int n_new;
398 	int myfd, mypollfd;
399 	SVCXPRT *parent_xprt, *xprt;
400 
401 	/*
402 	 * Server is multi-threaded.  Do "first time" initializations.
403 	 * Since only one thread exists in the beginning, there's no
404 	 * need for mutex protection for first time initializations.
405 	 */
406 	if (first_time) {
407 		first_time = FALSE;
408 		main_thread = TRUE;
409 		svc_thr_total = 1;	/* this thread */
410 		svc_next_pending = svc_last_pending = 0;
411 
412 		/*
413 		 * Create a pipe for waking up the poll, if new
414 		 * descriptors have been added to svc_fdset.
415 		 */
416 		create_pipe();
417 	}
418 
419 	/* OTHER THREADS ARE RUNNING */
420 
421 	if (svc_exit_done)
422 		return;
423 
424 	for (;;) {
425 		/*
426 		 * svc_thr_mutex prevents more than one thread from
427 		 * trying to select a descriptor to process further.
428 		 * svc_thr_mutex is unlocked after a thread selects
429 		 * a descriptor on which to receive data.  If there are
430 		 * no such descriptors, the thread will poll with
431 		 * svc_thr_mutex locked, after unlocking all other
432 		 * locks.  This prevents more than one thread from
433 		 * trying to poll at the same time.
434 		 */
435 		(void) mutex_lock(&svc_thr_mutex);
436 		(void) mutex_lock(&svc_mutex);
437 continue_with_locks:
438 		myfd = -1;
439 		mypollfd = INVALID_POLLFD;
440 
441 		/*
442 		 * Check if there are any descriptors with data pending.
443 		 */
444 		if (svc_total_pending > 0) {
445 			myfd = svc_pending_fds[svc_next_pending++];
446 			mypollfd = FD_FROM_PENDING;
447 			if (svc_next_pending > CIRCULAR_BUFSIZE)
448 				svc_next_pending = 0;
449 			svc_total_pending--;
450 		}
451 
452 		/*
453 		 * Get the next active file descriptor to process.
454 		 */
455 		if (myfd == -1 && svc_pollfds == 0) {
456 			/*
457 			 * svc_pollset is empty; do polling
458 			 */
459 			svc_polling = TRUE;
460 
461 			/*
462 			 * if there are no file descriptors, return
463 			 */
464 			(void) rw_rdlock(&svc_fd_lock);
465 			if (svc_npollfds == 0 ||
466 			    alloc_pollset(svc_npollfds + 1) == -1) {
467 				(void) rw_unlock(&svc_fd_lock);
468 				svc_polling = FALSE;
469 				svc_thr_total--;
470 				(void) mutex_unlock(&svc_mutex);
471 				(void) mutex_unlock(&svc_thr_mutex);
472 				if (!main_thread) {
473 					thr_exit(NULL);
474 					/* NOTREACHED */
475 				}
476 				break;
477 			}
478 
479 			npollfds = __rpc_compress_pollfd(svc_max_pollfd,
480 			    svc_pollfd, svc_pollset);
481 			(void) rw_unlock(&svc_fd_lock);
482 
483 			if (npollfds == 0) {
484 				/*
485 				 * There are file descriptors, but none of them
486 				 * are available for polling.  If this is the
487 				 * main thread, or if no thread is waiting,
488 				 * wait on condition variable, otherwise exit.
489 				 */
490 				svc_polling = FALSE;
491 				(void) mutex_unlock(&svc_thr_mutex);
492 				if ((!main_thread) && svc_waiters > 0) {
493 					svc_thr_total--;
494 					(void) mutex_unlock(&svc_mutex);
495 					thr_exit(NULL);
496 					/* NOTREACHED */
497 				}
498 
499 				while (svc_npollfds_set == 0 &&
500 				    svc_pollfds == 0 &&
501 				    svc_total_pending == 0 &&
502 				    !svc_exit_done) {
503 					svc_waiters++;
504 					(void) cond_wait(&svc_thr_fdwait,
505 					    &svc_mutex);
506 					svc_waiters--;
507 				}
508 
509 				/*
510 				 * Check exit flag.  If this is not the main
511 				 * thread, exit.
512 				 */
513 				if (svc_exit_done) {
514 					svc_thr_total--;
515 					(void) mutex_unlock(&svc_mutex);
516 					if (!main_thread)
517 						thr_exit(NULL);
518 					break;
519 				}
520 
521 				(void) mutex_unlock(&svc_mutex);
522 				continue;
523 			}
524 
525 			/*
526 			 * We're ready to poll.  Always set svc_pipe[0]
527 			 * as the last one, since the poll will occasionally
528 			 * need to be interrupted.  Release svc_mutex for
529 			 * the duration of the poll, but hold on to
530 			 * svc_thr_mutex, as we don't want any other thread
531 			 * to do the same.
532 			 */
533 			svc_pollset[npollfds].fd = svc_pipe[0];
534 			svc_pollset[npollfds].events = MASKVAL;
535 
536 			do {
537 				int i, j;
538 
539 				(void) mutex_unlock(&svc_mutex);
540 				n_polled = poll(svc_pollset, npollfds + 1, -1);
541 				(void) mutex_lock(&svc_mutex);
542 				if (n_polled <= 0)
543 					continue;
544 
545 				/*
546 				 * Check if information returned indicates one
547 				 * or more closed fd's; find and remove any such
548 				 * information
549 				 */
550 				for (i = 0; i <= npollfds; i++) {
551 					if (svc_pollset[i].revents & POLLNVAL) {
552 						/* Overwrite svc_pollset[i] */
553 						for (j = i; j < npollfds; j++)
554 							svc_pollset[j] =
555 							    svc_pollset[j + 1];
556 						(void) memset(&svc_pollset[j],
557 						    0, sizeof (struct pollfd));
558 						npollfds--;
559 						n_polled--;
560 						i--;
561 					}
562 				}
563 			} while (n_polled <= 0);
564 			svc_polling = FALSE;
565 
566 			/*
567 			 * If there's data in the pipe, clear it.
568 			 */
569 			if (svc_pollset[npollfds].revents) {
570 				clear_pipe();
571 				n_polled--;
572 				svc_pollset[npollfds].revents = 0;
573 			}
574 			svc_polled = npollfds;
575 			svc_pollfds = n_polled;
576 			svc_next_pollfd = 0;
577 
578 			/*
579 			 * Check exit flag.
580 			 */
581 			if (svc_exit_done) {
582 				svc_thr_total--;
583 				(void) mutex_unlock(&svc_mutex);
584 				(void) mutex_unlock(&svc_thr_mutex);
585 				if (!main_thread) {
586 					thr_exit(NULL);
587 					/* NOTREACHED */
588 				}
589 				break;
590 			}
591 
592 			/*
593 			 * If no descriptor is active, continue.
594 			 */
595 			if (svc_pollfds == 0)
596 				goto continue_with_locks;
597 		}
598 
599 		/*
600 		 * If a file descriptor has already not been selected,
601 		 * choose a file descriptor.
602 		 * svc_pollfds and svc_next_pollfd are updated.
603 		 */
604 		if (myfd == -1) {
605 			if (select_next_pollfd(&myfd, &mypollfd) == -1)
606 				goto continue_with_locks;
607 		}
608 
609 		/*
610 		 * Check to see if new threads need to be started.
611 		 * Count of threads that could be gainfully employed is
612 		 * obtained as follows:
613 		 *	- count 1 for poller
614 		 *	- count 1 for this request
615 		 *	- count active file descriptors (svc_pollfds)
616 		 *	- count pending file descriptors
617 		 *
618 		 * (svc_thr_total - svc_thr_active) are already available.
619 		 * This thread is one of the available threads.
620 		 *
621 		 * Number of new threads should not exceed
622 		 *	(svc_thr_max - svc_thr_total).
623 		 */
624 		if (svc_thr_total < svc_thr_max &&
625 		    svc_mt_mode == RPC_SVC_MT_AUTO && !svc_exit_done) {
626 			n_new = 1 + 1 + svc_pollfds + svc_total_pending -
627 			    (svc_thr_total - svc_thr_active);
628 			if (n_new > (svc_thr_max - svc_thr_total))
629 				n_new = svc_thr_max - svc_thr_total;
630 			if (n_new > 0)
631 				start_threads(n_new);
632 		}
633 
634 		/*
635 		 * Get parent xprt.  It is possible for the parent service
636 		 * handle to be destroyed by now, due to a race condition.
637 		 * Check for this, and if so, log a warning and go on.
638 		 */
639 		parent_xprt = svc_xports[myfd];
640 		if (parent_xprt == NULL) {
641 			/* Check if it is not a user FD */
642 			if (__is_a_userfd(myfd) == TRUE)
643 				__svc_getreq_user(&(svc_pollset[mypollfd]));
644 			goto continue_with_locks;
645 		}
646 /* LINTED pointer alignment */
647 		if (svc_defunct(parent_xprt) || svc_failed(parent_xprt))
648 			goto continue_with_locks;
649 
650 		/*
651 		 * Make a copy of parent xprt, update svc_fdset.
652 		 */
653 		if ((xprt = make_xprt_copy(parent_xprt)) == NULL)
654 			goto continue_with_locks;
655 
656 		/*
657 		 * Keep track of active threads in automatic mode.
658 		 */
659 		if (svc_mt_mode == RPC_SVC_MT_AUTO)
660 			svc_thr_active++;
661 
662 		/*
663 		 * Release mutexes so other threads can get going.
664 		 */
665 		(void) mutex_unlock(&svc_mutex);
666 		(void) mutex_unlock(&svc_thr_mutex);
667 
668 		/*
669 		 * Process request.
670 		 */
671 		{
672 			struct rpc_msg *msg;
673 			struct svc_req *r;
674 			char *cred_area;
675 
676 /* LINTED pointer alignment */
677 			msg = SVCEXT(xprt)->msg;
678 /* LINTED pointer alignment */
679 			r = SVCEXT(xprt)->req;
680 /* LINTED pointer alignment */
681 			cred_area = SVCEXT(xprt)->cred_area;
682 
683 
684 			msg->rm_call.cb_cred.oa_base = cred_area;
685 			msg->rm_call.cb_verf.oa_base =
686 			    &(cred_area[MAX_AUTH_BYTES]);
687 			r->rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]);
688 
689 			/*
690 			 * receive RPC message
691 			 */
692 			if ((dispatch = SVC_RECV(xprt, msg))) {
693 				if (svc_mt_mode != RPC_SVC_MT_NONE)
694 /* LINTED pointer alignment */
695 					svc_flags(xprt) |= SVC_ARGS_CHECK;
696 				dispatch = _svc_prog_dispatch(xprt, msg, r);
697 
698 				/*
699 				 * Call cleanup procedure if set.
700 				 */
701 				if (__proc_cleanup_cb != NULL)
702 					(*__proc_cleanup_cb)(xprt);
703 			} else
704 				svc_args_done(xprt);
705 
706 			/*
707 			 * Finish up, if automatic mode, or not dispatched.
708 			 */
709 			if (svc_mt_mode == RPC_SVC_MT_AUTO || !dispatch) {
710 /* LINTED pointer alignment */
711 				if (svc_flags(xprt) & SVC_ARGS_CHECK)
712 					svc_args_done(xprt);
713 				(void) mutex_lock(&svc_mutex);
714 				_svc_done_private(xprt);
715 				if (svc_mt_mode == RPC_SVC_MT_AUTO) {
716 					/*
717 					 * not active any more
718 					 */
719 					svc_thr_active--;
720 
721 					/*
722 					 * If not main thread, exit unless
723 					 * there's some immediate work.
724 					 */
725 					if (!main_thread &&
726 					    svc_pollfds <= 0 &&
727 					    svc_total_pending <= 0 &&
728 					    (svc_polling ||
729 					    svc_waiters > 0)) {
730 						svc_thr_total--;
731 						if (svc_thr_total ==
732 						    svc_waiters) {
733 							(void) cond_broadcast(
734 							    &svc_thr_fdwait);
735 						}
736 						(void) mutex_unlock(&svc_mutex);
737 						thr_exit(NULL);
738 						/* NOTREACHED */
739 					}
740 				}
741 				(void) mutex_unlock(&svc_mutex);
742 			}
743 		}
744 
745 	}
746 }
747 
748 
749 /*
750  * start_threads() - Start specified number of threads.
751  */
752 static void
753 start_threads(int num_threads)
754 {
755 	int		i;
756 
757 	assert(MUTEX_HELD(&svc_mutex));
758 
759 	for (i = 0; i < num_threads; i++) {
760 		if (thr_create(NULL, 0, (void *(*)(void *))_svc_run_mt, NULL,
761 		    THR_DETACHED, NULL) == 0) {
762 			svc_thr_total++;
763 			svc_thr_total_creates++;
764 		} else {
765 			svc_thr_total_create_errors++;
766 		}
767 	}
768 }
769 
770 
771 /*
772  * create_pipe() - create pipe for breaking out of poll.
773  */
774 static void
775 create_pipe(void)
776 {
777 	if (pipe(svc_pipe) == -1) {
778 		syslog(LOG_ERR, dgettext(__nsl_dom,
779 		    "RPC: svc could not create pipe - exiting"));
780 		exit(1);
781 	}
782 	if (fcntl(svc_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
783 		syslog(LOG_ERR, dgettext(__nsl_dom,
784 		    "RPC: svc pipe error - exiting"));
785 		exit(1);
786 	}
787 	if (fcntl(svc_pipe[1], F_SETFL, O_NONBLOCK) == -1) {
788 		syslog(LOG_ERR, dgettext(__nsl_dom,
789 		    "RPC: svc pipe error - exiting"));
790 		exit(1);
791 	}
792 }
793 
794 
795 /*
796  * clear_pipe() - Empty data in pipe.
797  */
798 static void
799 clear_pipe(void)
800 {
801 	char	buf[16];
802 	int	i;
803 
804 	do {
805 		i = read(svc_pipe[0], buf, sizeof (buf));
806 	} while (i == sizeof (buf));
807 }
808 
809 
810 /*
811  * select_next_pollfd() - Select the next active fd in svc_pollset.
812  */
813 static int
814 select_next_pollfd(int *fd, int *pollfdIndex)
815 {
816 	int i;
817 
818 	assert(MUTEX_HELD(&svc_thr_mutex));
819 	assert(MUTEX_HELD(&svc_mutex));
820 
821 	for (i = svc_next_pollfd; svc_pollfds > 0 && i < svc_polled; i++) {
822 		if (svc_pollset[i].revents) {
823 			svc_pollfds--;
824 			/*
825 			 * No more special case for POLLNVAL, because it may
826 			 * be linked with a user file descriptot callback
827 			 */
828 			svc_next_pollfd = i + 1;
829 
830 			*fd = svc_pollset[i].fd;
831 			*pollfdIndex = i;
832 
833 			return (0);
834 		}
835 	}
836 	svc_next_pollfd = svc_pollfds = 0;
837 	*fd = -1;
838 	*pollfdIndex = INVALID_POLLFD;
839 	return (-1);
840 }
841 
842 
843 /*
844  * make_xprt_copy() - make a copy of the parent xprt.
845  * Clear fd bit in svc_fdset.
846  */
847 static SVCXPRT *
848 make_xprt_copy(SVCXPRT *parent)
849 {
850 /* LINTED pointer alignment */
851 	SVCXPRT_LIST	*xlist = SVCEXT(parent)->my_xlist;
852 	SVCXPRT_LIST	*xret;
853 	SVCXPRT		*xprt;
854 	int		fd = parent->xp_fd;
855 
856 	assert(MUTEX_HELD(&svc_mutex));
857 
858 	xret = xlist->next;
859 	if (xret) {
860 		xlist->next = xret->next;
861 		xret->next = NULL;
862 		xprt = xret->xprt;
863 /* LINTED pointer alignment */
864 		svc_flags(xprt) = svc_flags(parent);
865 	} else
866 		xprt = svc_copy(parent);
867 
868 	if (xprt) {
869 /* LINTED pointer alignment */
870 		SVCEXT(parent)->refcnt++;
871 		(void) rw_wrlock(&svc_fd_lock);
872 		clear_pollfd(fd);
873 		(void) rw_unlock(&svc_fd_lock);
874 	}
875 	return (xprt);
876 }
877 
878 /*
879  * _svc_done_private() - return copies to library.
880  */
881 static void
882 _svc_done_private(SVCXPRT *xprt)
883 {
884 	SVCXPRT		*parent;
885 	SVCXPRT_LIST	*xhead, *xlist;
886 
887 	assert(MUTEX_HELD(&svc_mutex));
888 
889 /* LINTED pointer alignment */
890 	if ((parent = SVCEXT(xprt)->parent) == NULL)
891 		return;
892 
893 /* LINTED pointer alignment */
894 	xhead = SVCEXT(parent)->my_xlist;
895 /* LINTED pointer alignment */
896 	xlist = SVCEXT(xprt)->my_xlist;
897 	xlist->next = xhead->next;
898 	xhead->next = xlist;
899 
900 /* LINTED pointer alignment */
901 	SVCEXT(parent)->refcnt--;
902 
903 	/*
904 	 * Propagate any error flags.  This is done in both directions to
905 	 * ensure that if one child gets an error, everyone will see it
906 	 * (even if there are multiple outstanding children) and the
907 	 * transport will get closed.
908 	 */
909 /* LINTED pointer alignment */
910 	svc_flags(xprt) |= svc_flags(parent);
911 /* LINTED pointer alignment */
912 	if (svc_failed(xprt) || svc_defunct(xprt)) {
913 /* LINTED pointer alignment */
914 		svc_flags(parent) |= (svc_flags(xprt) &
915 		    (SVC_FAILED | SVC_DEFUNCT));
916 /* LINTED pointer alignment */
917 		if (SVCEXT(parent)->refcnt == 0)
918 			_svc_destroy_private(xprt);
919 	}
920 }
921 
922 void
923 svc_done(SVCXPRT *xprt)
924 {
925 	if (svc_mt_mode != RPC_SVC_MT_USER)
926 		return;
927 
928 	/*
929 	 * Make sure file descriptor is released in user mode.
930 	 * If the xprt is a door, do nothing: this work is performed by
931 	 * svc_door.c's return_xprt_copy() routine, which is basically a
932 	 * door-specific copy of _svc_done_private().
933 	 */
934 /* LINTED pointer alignment */
935 	if (svc_type(xprt) == SVC_DOOR)
936 		return;
937 
938 /* LINTED pointer alignment */
939 	if (svc_flags(xprt) & SVC_ARGS_CHECK)
940 		svc_args_done(xprt);
941 
942 	(void) mutex_lock(&svc_mutex);
943 	_svc_done_private(xprt);
944 	(void) mutex_unlock(&svc_mutex);
945 }
946 
947 
948 /*
949  * Mark argument completion.  Release file descriptor.
950  */
951 void
952 svc_args_done(SVCXPRT *xprt)
953 {
954 	char	dummy;
955 /* LINTED pointer alignment */
956 	SVCXPRT	*parent = SVCEXT(xprt)->parent;
957 	bool_t	wake_up_poller;
958 	enum	xprt_stat stat;
959 
960 /* LINTED pointer alignment */
961 	svc_flags(xprt) |= svc_flags(parent);
962 /* LINTED pointer alignment */
963 	svc_flags(xprt) &= ~SVC_ARGS_CHECK;
964 /* LINTED pointer alignment */
965 	if (svc_failed(xprt) || svc_defunct(parent))
966 		return;
967 
968 /* LINTED pointer alignment */
969 	if (svc_type(xprt) == SVC_CONNECTION &&
970 	    (stat = SVC_STAT(xprt)) != XPRT_IDLE) {
971 		if (stat == XPRT_MOREREQS) {
972 			(void) mutex_lock(&svc_mutex);
973 			svc_pending_fds[svc_last_pending++] = xprt->xp_fd;
974 			if (svc_last_pending > CIRCULAR_BUFSIZE)
975 				svc_last_pending = 0;
976 			svc_total_pending++;
977 			(void) mutex_unlock(&svc_mutex);
978 			wake_up_poller = FALSE;
979 		} else {
980 			/*
981 			 * connection failed
982 			 */
983 			return;
984 		}
985 	} else {
986 		(void) rw_wrlock(&svc_fd_lock);
987 		set_pollfd(xprt->xp_fd, MASKVAL);
988 		(void) rw_unlock(&svc_fd_lock);
989 		wake_up_poller = TRUE;
990 	}
991 
992 	if (!wake_up_poller || !svc_polling) {
993 		/*
994 		 * Wake up any waiting threads.
995 		 */
996 		(void) mutex_lock(&svc_mutex);
997 		if (svc_waiters > 0) {
998 			(void) cond_broadcast(&svc_thr_fdwait);
999 			(void) mutex_unlock(&svc_mutex);
1000 			return;
1001 		}
1002 		(void) mutex_unlock(&svc_mutex);
1003 	}
1004 
1005 	/*
1006 	 * Wake up any polling thread.
1007 	 */
1008 	if (svc_polling)
1009 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
1010 }
1011 
1012 
1013 int
1014 __rpc_legal_connmaxrec(int suggested) {
1015 	if (suggested == -1) {
1016 		/* Supply default */
1017 		return (RPC_MAXDATASIZE + 2*sizeof (uint32_t));
1018 	} else if (suggested < 0) {
1019 		return (-1);
1020 	} else if (suggested > 0) {
1021 		/* Round down to multiple of BYTES_PER_XDR_UNIT */
1022 		suggested -= suggested % BYTES_PER_XDR_UNIT;
1023 		/* If possible, allow for two fragment headers */
1024 		if (suggested < MAXINT-(2*sizeof (uint32_t))) {
1025 			/* Allow for two fragment headers */
1026 			suggested += 2 * sizeof (uint32_t);
1027 		} else {
1028 			suggested = MAXINT;
1029 		}
1030 		if (suggested < sizeof (struct rpc_msg)) {
1031 			return (-1);
1032 		}
1033 	}
1034 	return (suggested);
1035 }
1036 
1037 
1038 bool_t
1039 rpc_control(int op, void *info)
1040 {
1041 	int		tmp;
1042 
1043 	switch (op) {
1044 	case RPC_SVC_MTMODE_SET:
1045 		tmp = *((int *)info);
1046 		if (tmp != RPC_SVC_MT_NONE && tmp != RPC_SVC_MT_AUTO &&
1047 		    tmp != RPC_SVC_MT_USER)
1048 			return (FALSE);
1049 		if (svc_mt_mode != RPC_SVC_MT_NONE && svc_mt_mode != tmp)
1050 			return (FALSE);
1051 		svc_mt_mode = tmp;
1052 		return (TRUE);
1053 	case RPC_SVC_MTMODE_GET:
1054 		*((int *)info) = svc_mt_mode;
1055 		return (TRUE);
1056 	case RPC_SVC_THRMAX_SET:
1057 		if ((tmp = *((int *)info)) < 1)
1058 			return (FALSE);
1059 		(void) mutex_lock(&svc_mutex);
1060 		svc_thr_max = tmp;
1061 		(void) mutex_unlock(&svc_mutex);
1062 		return (TRUE);
1063 	case RPC_SVC_THRMAX_GET:
1064 		*((int *)info) = svc_thr_max;
1065 		return (TRUE);
1066 	case RPC_SVC_THRTOTAL_GET:
1067 		*((int *)info) = svc_thr_total;
1068 		return (TRUE);
1069 	case RPC_SVC_THRCREATES_GET:
1070 		*((int *)info) = svc_thr_total_creates;
1071 		return (TRUE);
1072 	case RPC_SVC_THRERRORS_GET:
1073 		*((int *)info) = svc_thr_total_create_errors;
1074 		return (TRUE);
1075 	case RPC_SVC_USE_POLLFD:
1076 		if (*((int *)info) && !__rpc_use_pollfd_done) {
1077 			__rpc_use_pollfd_done = 1;
1078 			return (TRUE);
1079 		}
1080 		return (FALSE);
1081 	case RPC_SVC_CONNMAXREC_SET:
1082 		tmp = __rpc_legal_connmaxrec(*(int *)info);
1083 		if (tmp >= 0) {
1084 			__rpc_connmaxrec = tmp;
1085 			return (TRUE);
1086 		} else {
1087 			return (FALSE);
1088 		}
1089 	case RPC_SVC_CONNMAXREC_GET:
1090 		*((int *)info) = __rpc_connmaxrec;
1091 		return (TRUE);
1092 	case RPC_SVC_IRTIMEOUT_SET:
1093 		tmp = *((int *)info);
1094 		if (tmp >= 0) {
1095 			__rpc_irtimeout = tmp;
1096 			return (TRUE);
1097 		} else {
1098 			return (FALSE);
1099 		}
1100 	/*
1101 	 * No mutex necessary as _EXCLBIND_SET will/should only
1102 	 * be used before an RPC daemon goes mt-hot.
1103 	 */
1104 	case __RPC_SVC_EXCLBIND_SET:
1105 		if (info) {
1106 			__rpc_tp_exclbind = *((bool_t *)info);
1107 			return (TRUE);
1108 		}
1109 		return (FALSE);
1110 	case __RPC_SVC_EXCLBIND_GET:
1111 		if (info) {
1112 			*((bool_t *)info) = __rpc_tp_exclbind;
1113 			return (TRUE);
1114 		}
1115 		return (FALSE);
1116 
1117 	case __RPC_SVC_LSTNBKLOG_SET:
1118 		tmp = *(int *)info;
1119 		if (tmp > 0) {
1120 			__svc_lstnbklog = tmp;
1121 			return (TRUE);
1122 		}
1123 		return (FALSE);
1124 	case __RPC_SVC_LSTNBKLOG_GET:
1125 		*(int *)info = __svc_lstnbklog;
1126 		return (TRUE);
1127 
1128 	default:
1129 		return (FALSE);
1130 	}
1131 }
1132