xref: /illumos-gate/usr/src/lib/libnsl/rpc/svc_run.c (revision 646e55b6807cdf761fecd1e4095d73116cdefdb5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 /*
30  * Portions of this source code were derived from Berkeley
31  * 4.3 BSD under license from the Regents of the University of
32  * California.
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 /*
38  * This is the rpc server side idle loop
39  * Wait for input, call server program.
40  */
41 #include "mt.h"
42 #include "rpc_mt.h"
43 #include <stdlib.h>
44 #include <unistd.h>
45 #include <signal.h>
46 #include <rpc/rpc.h>
47 #include <errno.h>
48 #include <sys/poll.h>
49 #include <sys/types.h>
50 #include <syslog.h>
51 #include <thread.h>
52 #include <assert.h>
53 #include <libintl.h>
54 #include <values.h>
55 
56 extern const char __nsl_dom[];
57 
58 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
59 extern bool_t __is_a_userfd(int P_fd);
60 extern void   __destroy_userfd();
61 extern void clear_pollfd(int);
62 extern void set_pollfd(int /* fd */, short /* events */);
63 extern void svc_getreq_poll();
64 extern void (*__proc_cleanup_cb)();
65 
66 static void start_threads();
67 static void create_pipe();
68 static void clear_pipe();
69 static int select_next_pollfd();
70 static SVCXPRT *make_xprt_copy();
71 static void _svc_run_mt();
72 static void _svc_run();
73 
74 int _svc_prog_dispatch();
75 static void _svc_done_private();
76 
77 extern rwlock_t svc_fd_lock;
78 extern mutex_t	svc_door_mutex;
79 extern cond_t	svc_door_waitcv;
80 extern int	svc_ndoorfds;
81 extern void	__svc_cleanup_door_xprts();
82 extern void	__svc_free_xprtlist();
83 extern void	__svc_getreq_user(struct pollfd *);
84 
85 /*
86  * Maximum fragment size allowed for connection oriented connections.
87  * Zero means that no maximum size limit has been requested.
88  */
89 int __rpc_connmaxrec = 0;
90 
91 /* Inter-Record Timeout in secs for non-blocked connection RPC */
92 int __rpc_irtimeout = 35;
93 
94 /*
95  * Request exclusive access to tcp and udp non-priv ports bound with a
96  * wildcard addr.
97  */
98 bool_t __rpc_tp_exclbind = FALSE;
99 
100 /*
101  * XXX - eventually, all mutexes and their initializations static
102  */
103 
104 /*
105  * Variables used for MT
106  */
107 int svc_mt_mode;		/* multi-threading mode */
108 
109 int svc_pipe[2];	/* pipe for breaking out of poll: read(0), write(1) */
110 
111 /* BEGIN PROTECTED BY svc_mutex */
112 
113 static int svc_thr_max = 16;	/* default maximum number of threads allowed */
114 
115 static int svc_thr_total;	/* current number of threads */
116 
117 static int svc_thr_active;	/* current number of threads active */
118 
119 /* circular array of file descriptors with pending data */
120 
121 #define	CIRCULAR_BUFSIZE	1024
122 
123 static int svc_pending_fds[CIRCULAR_BUFSIZE+1];	/* fds with pending data */
124 
125 static int svc_next_pending;			/* next one to be processed */
126 
127 static int svc_last_pending;			/* last one in list */
128 
129 static int svc_total_pending;			/* total in list */
130 
131 static int svc_thr_total_creates;	/* total created - stats */
132 
133 static int svc_thr_total_create_errors;	/* total create errors - stats */
134 
135 static int svc_waiters;		/* number of waiting threads */
136 
137 /* END PROTECTED BY svc_mutex */
138 
139 /* BEGIN PROTECTED BY svc_fd_lock: */
140 
141 int svc_nfds;		/* total number of active file descriptors */
142 
143 int svc_nfds_set;	/* total number of fd bits set in svc_fdset */
144 
145 int svc_max_fd = 0;	/* largest active file descriptor */
146 
147 int svc_npollfds;	/* total number of active pollfds */
148 
149 int svc_npollfds_set;	/* total number of pollfd set in svc_pollfd */
150 
151 int svc_max_pollfd;	/* largest active pollfd so far */
152 
153 int svc_pollfd_allocd;  /* number of pollfd structures allocated */
154 
155 /* END PROTECTED BY svc_fd_lock: */
156 
157 /* BEGIN PROTECTED BY svc_thr_mutex */
158 
159 #define	POLLSET_EXTEND	256
160 static int svc_pollset_allocd;
161 static struct pollfd *svc_pollset;
162 				/*
163 				 * array of file descriptors currently active
164 				 */
165 static int svc_polled;		/* no of fds polled in last poll() - input */
166 
167 static int svc_pollfds;		/* no of active fds in last poll() - output */
168 
169 static int svc_next_pollfd;	/* next fd  to processin svc_pollset */
170 
171 bool_t svc_polling;		/* true if a thread is polling */
172 
173 /* END PROTECTED BY svc_thr_mutex */
174 
175 /* BEGIN PROTECTED BY svc_exit_mutex */
176 
177 static bool_t svc_exit_done = TRUE;
178 
179 /* END PROTECTED BY svc_exit_mutex */
180 
181 /*
182  * Warlock section
183  */
184 
185 /* VARIABLES PROTECTED BY svc_mutex:
186 	svc_thr_total, svc_thr_active, svc_pending_fds, svc_next_pending,
187 	svc_last_pending, svc_total_pending, svc_thr_total_creates,
188 	svc_thr_total_create_errors,
189 	svcxprt_list_t::next, svcxprt_ext_t::my_xlist,
190 	svc_thr_max, svc_waiters
191  */
192 
193 /* VARIABLES PROTECTED BY svc_fd_lock:
194 	svc_xports, svc_fdset, svc_nfds, svc_nfds_set, svc_max_fd,
195 	svc_pollfd, svc_npollfds, svc_npollfds_set, svc_max_pollfd
196  */
197 
198 /* VARIABLES PROTECTED BY svc_thr_mutex:
199 	svc_pollset, svc_pollfds, svc_next_pollfd, svc_polling
200 	svc_pollset_allocd, svc_polled
201  */
202 
203 /* VARIABLES PROTECTED BY svc_exit_mutex:
204 	svc_exit_done
205  */
206 
207 /* VARIABLES READABLE WITHOUT LOCK:
208 	svc_thr_total, svc_thr_active, svc_thr_total_creates,
209 	svc_thr_total_create_errors,
210 	svc_xports, svc_nfds, svc_nfds_set, svc_max_fd,
211 	svc_npollfds, svc_npollfds_set, svc_max_pollfd,
212 	svc_pollfds, svc_next_pollfd, svc_exit_done, svc_polling,
213 	svc_thr_max, svc_waiters
214  */
215 
216 /* VARIABLES PROTECTED BY "program_logic":
217 	rpc_msg::, svc_req::, svcxprt_ext_t::flags, svc_mt_mode,
218 	svcxprt_ext_t::parent
219  */
220 
221 /* LOCK ORDER:
222 	svc_exit_mutex, svc_thr_mutex, svc_mutex, svc_fd_lock
223  */
224 
225 
226 void
227 svc_run(void)
228 {
229 	/* NO OTHER THREADS ARE RUNNING */
230 
231 	svc_exit_done = FALSE;
232 
233 	while ((svc_npollfds > 0 || svc_ndoorfds > 0) && !svc_exit_done) {
234 		if (svc_npollfds > 0) {
235 			switch (svc_mt_mode) {
236 			case RPC_SVC_MT_NONE:
237 				_svc_run();
238 				break;
239 			default:
240 				_svc_run_mt();
241 				break;
242 			}
243 			continue;
244 		}
245 
246 		(void) mutex_lock(&svc_door_mutex);
247 		if (svc_ndoorfds > 0)
248 			(void) cond_wait(&svc_door_waitcv, &svc_door_mutex);
249 		(void) mutex_unlock(&svc_door_mutex);
250 	}
251 }
252 
253 
254 /*
255  *	This function causes svc_run() to exit by destroying all
256  *	service handles.
257  */
258 void
259 svc_exit(void)
260 {
261 	SVCXPRT	*xprt;
262 	int fd;
263 	char dummy;
264 
265 	/* NO LOCKS HELD */
266 
267 	(void) mutex_lock(&svc_exit_mutex);
268 	if (svc_exit_done) {
269 		(void) mutex_unlock(&svc_exit_mutex);
270 		return;
271 	}
272 	svc_exit_done = TRUE;
273 	for (fd = 0; fd < svc_max_pollfd; fd++) {
274 		xprt = svc_xports[fd];
275 		if (xprt) {
276 			SVC_DESTROY(xprt);
277 		}
278 	}
279 	__svc_free_xprtlist();
280 	__svc_cleanup_door_xprts();
281 	(void) mutex_unlock(&svc_exit_mutex);
282 
283 	if (svc_mt_mode != RPC_SVC_MT_NONE) {
284 		(void) mutex_lock(&svc_mutex);
285 		(void) cond_broadcast(&svc_thr_fdwait);
286 		(void) mutex_unlock(&svc_mutex);
287 
288 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
289 	}
290 
291 	(void) mutex_lock(&svc_door_mutex);
292 	(void) cond_signal(&svc_door_waitcv);	/* wake up door dispatching */
293 	(void) mutex_unlock(&svc_door_mutex);
294 
295 	/* destroy reactor information if any */
296 	__destroy_userfd();
297 }
298 
299 
300 /*
301  * this funtion is called with svc_fd_lock and svc_thr_mutex
302  */
303 
304 static int
305 alloc_pollset(int npollfds)
306 {
307 	if (npollfds > svc_pollset_allocd) {
308 		pollfd_t *tmp;
309 		do {
310 			svc_pollset_allocd += POLLSET_EXTEND;
311 		} while (npollfds > svc_pollset_allocd);
312 		tmp = realloc(svc_pollset,
313 				sizeof (pollfd_t) * svc_pollset_allocd);
314 		if (tmp == NULL) {
315 			syslog(LOG_ERR, "alloc_pollset: out of memory");
316 			return (-1);
317 		}
318 		svc_pollset = tmp;
319 	}
320 	return (0);
321 }
322 
323 static void
324 _svc_run(void)
325 {
326 	sigset_t set, oldset;
327 	int npollfds;
328 	int i;
329 
330 	/*
331 	 * Block SIGALRM while doing work.  Unblock it while doing poll().
332 	 * This is so that services like rpc.rstatd can cause the poll()
333 	 * to be interrupted due to alarm() but that we don't end up in
334 	 * an MT-unsafe signal handler at an inopportune time.
335 	 */
336 	(void) sigemptyset(&set);
337 	(void) sigaddset(&set, SIGALRM);
338 	(void) sigprocmask(SIG_BLOCK, &set, &oldset);
339 	while (!svc_exit_done) {
340 		/*
341 		 * Check whether there is any server fd on which we may want
342 		 * to wait.
343 		 */
344 		(void) rw_rdlock(&svc_fd_lock);
345 		if (alloc_pollset(svc_npollfds) == -1)
346 			break;
347 		npollfds = __rpc_compress_pollfd(svc_max_pollfd,
348 			svc_pollfd, svc_pollset);
349 		(void) rw_unlock(&svc_fd_lock);
350 		if (npollfds == 0)
351 			break;	/* None waiting, hence return */
352 
353 		(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
354 		i = poll(svc_pollset, npollfds, -1);
355 		(void) sigprocmask(SIG_BLOCK, &set, &oldset);
356 		switch (i) {
357 		case -1:
358 			/*
359 			 * We ignore all errors, continuing with the assumption
360 			 * that it was set by the signal handlers (or any
361 			 * other outside event) and not caused by poll().
362 			 */
363 		case 0:
364 			continue;
365 		default:
366 			svc_getreq_poll(svc_pollset, i);
367 		}
368 	}
369 	(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
370 }
371 
372 /*
373  * In _svc_run_mt, myfd is linked with mypollfd
374  * svc_pollset[mypollfd].fd == myfd
375  * However, in some cases, the link can not be made, thus we define the
376  * following values for these special cases
377  */
378 enum {
379 	INVALID_POLLFD	= -200,
380 	FD_FROM_PENDING
381 };
382 
383 static void
384 _svc_run_mt(void)
385 {
386 	int npollfds;
387 	int n_polled, dispatch;
388 
389 	static bool_t first_time = TRUE;
390 	bool_t main_thread = FALSE;
391 	int n_new;
392 	int myfd, mypollfd;
393 	SVCXPRT *parent_xprt, *xprt;
394 
395 	/*
396 	 * Server is multi-threaded.  Do "first time" initializations.
397 	 * Since only one thread exists in the beginning, there's no
398 	 * need for mutex protection for first time initializations.
399 	 */
400 	if (first_time) {
401 		first_time = FALSE;
402 		main_thread = TRUE;
403 		svc_thr_total = 1;	/* this thread */
404 		svc_next_pending = svc_last_pending = 0;
405 
406 		/*
407 		 * Create a pipe for waking up the poll, if new
408 		 * descriptors have been added to svc_fdset.
409 		 */
410 		create_pipe();
411 	}
412 
413 	/* OTHER THREADS ARE RUNNING */
414 
415 	if (svc_exit_done)
416 		return;
417 
418 	for (;;) {
419 		/*
420 		 * svc_thr_mutex prevents more than one thread from
421 		 * trying to select a descriptor to process further.
422 		 * svc_thr_mutex is unlocked after a thread selects
423 		 * a descriptor on which to receive data.  If there are
424 		 * no such descriptors, the thread will poll with
425 		 * svc_thr_mutex locked, after unlocking all other
426 		 * locks.  This prevents more than one thread from
427 		 * trying to poll at the same time.
428 		 */
429 		(void) mutex_lock(&svc_thr_mutex);
430 		(void) mutex_lock(&svc_mutex);
431 continue_with_locks:
432 		myfd = -1;
433 		mypollfd = INVALID_POLLFD;
434 
435 		/*
436 		 * Check if there are any descriptors with data pending.
437 		 */
438 		if (svc_total_pending > 0) {
439 			myfd = svc_pending_fds[svc_next_pending++];
440 			mypollfd = FD_FROM_PENDING;
441 			if (svc_next_pending > CIRCULAR_BUFSIZE)
442 				svc_next_pending = 0;
443 			svc_total_pending--;
444 		}
445 
446 		/*
447 		 * Get the next active file descriptor to process.
448 		 */
449 		if (myfd == -1 && svc_pollfds == 0) {
450 			/*
451 			 * svc_pollset is empty; do polling
452 			 */
453 			svc_polling = TRUE;
454 
455 			/*
456 			 * if there are no file descriptors, return
457 			 */
458 			(void) rw_rdlock(&svc_fd_lock);
459 			if (svc_npollfds == 0 ||
460 					alloc_pollset(svc_npollfds + 1) == -1) {
461 				(void) rw_unlock(&svc_fd_lock);
462 				svc_polling = FALSE;
463 				svc_thr_total--;
464 				(void) mutex_unlock(&svc_mutex);
465 				(void) mutex_unlock(&svc_thr_mutex);
466 				if (!main_thread) {
467 					thr_exit(NULL);
468 					/* NOTREACHED */
469 				}
470 				break;
471 			}
472 
473 			npollfds = __rpc_compress_pollfd(svc_max_pollfd,
474 					svc_pollfd, svc_pollset);
475 			(void) rw_unlock(&svc_fd_lock);
476 
477 			if (npollfds == 0) {
478 				/*
479 				 * There are file descriptors, but none of them
480 				 * are available for polling.  If this is the
481 				 * main thread, or if no thread is waiting,
482 				 * wait on condition variable, otherwise exit.
483 				 */
484 				svc_polling = FALSE;
485 				(void) mutex_unlock(&svc_thr_mutex);
486 				if ((!main_thread) && svc_waiters > 0) {
487 					svc_thr_total--;
488 					(void) mutex_unlock(&svc_mutex);
489 					thr_exit(NULL);
490 					/* NOTREACHED */
491 				}
492 
493 				while (svc_npollfds_set == 0 &&
494 					svc_pollfds == 0 &&
495 					svc_total_pending == 0 &&
496 							!svc_exit_done) {
497 					svc_waiters++;
498 					(void) cond_wait(&svc_thr_fdwait,
499 								&svc_mutex);
500 					svc_waiters--;
501 				}
502 
503 				/*
504 				 * Check exit flag.  If this is not the main
505 				 * thread, exit.
506 				 */
507 				if (svc_exit_done) {
508 					svc_thr_total--;
509 					(void) mutex_unlock(&svc_mutex);
510 					if (!main_thread)
511 						thr_exit(NULL);
512 					break;
513 				}
514 
515 				(void) mutex_unlock(&svc_mutex);
516 				continue;
517 			}
518 
519 			/*
520 			 * We're ready to poll.  Always set svc_pipe[0]
521 			 * as the last one, since the poll will occasionally
522 			 * need to be interrupted.  Release svc_mutex for
523 			 * the duration of the poll, but hold on to
524 			 * svc_thr_mutex, as we don't want any other thread
525 			 * to do the same.
526 			 */
527 			svc_pollset[npollfds].fd = svc_pipe[0];
528 			svc_pollset[npollfds].events = MASKVAL;
529 
530 			do {
531 				int i, j;
532 
533 				(void) mutex_unlock(&svc_mutex);
534 				n_polled = poll(svc_pollset, npollfds + 1, -1);
535 				(void) mutex_lock(&svc_mutex);
536 				if (n_polled <= 0)
537 					continue;
538 
539 				/*
540 				 * Check if information returned indicates one
541 				 * or more closed fd's; find and remove any such
542 				 * information
543 				 */
544 				for (i = 0; i <= npollfds; i++) {
545 					if (svc_pollset[i].revents & POLLNVAL) {
546 						/* Overwrite svc_pollset[i] */
547 						for (j = i; j < npollfds; j++)
548 							svc_pollset[j] =
549 							    svc_pollset[j + 1];
550 						(void) memset(&svc_pollset[j],
551 						    0, sizeof (struct pollfd));
552 						npollfds--;
553 						n_polled--;
554 						i--;
555 					}
556 				}
557 			} while (n_polled <= 0);
558 			svc_polling = FALSE;
559 
560 			/*
561 			 * If there's data in the pipe, clear it.
562 			 */
563 			if (svc_pollset[npollfds].revents) {
564 				clear_pipe();
565 				n_polled--;
566 				svc_pollset[npollfds].revents = 0;
567 			}
568 			svc_polled = npollfds;
569 			svc_pollfds = n_polled;
570 			svc_next_pollfd = 0;
571 
572 			/*
573 			 * Check exit flag.
574 			 */
575 			if (svc_exit_done) {
576 				svc_thr_total--;
577 				(void) mutex_unlock(&svc_mutex);
578 				(void) mutex_unlock(&svc_thr_mutex);
579 				if (!main_thread) {
580 					thr_exit(NULL);
581 					/* NOTREACHED */
582 				}
583 				break;
584 			}
585 
586 			/*
587 			 * If no descriptor is active, continue.
588 			 */
589 			if (svc_pollfds == 0)
590 				goto continue_with_locks;
591 		}
592 
593 		/*
594 		 * If a file descriptor has already not been selected,
595 		 * choose a file descriptor.
596 		 * svc_pollfds and svc_next_pollfd are updated.
597 		 */
598 		if (myfd == -1) {
599 			if (select_next_pollfd(&myfd, &mypollfd) == -1)
600 				goto continue_with_locks;
601 		}
602 
603 		/*
604 		 * Check to see if new threads need to be started.
605 		 * Count of threads that could be gainfully employed is
606 		 * obtained as follows:
607 		 *	- count 1 for poller
608 		 *	- count 1 for this request
609 		 *	- count active file descriptors (svc_pollfds)
610 		 *	- count pending file descriptors
611 		 *
612 		 * (svc_thr_total - svc_thr_active) are already available.
613 		 * This thread is one of the available threads.
614 		 *
615 		 * Number of new threads should not exceed
616 		 *	(svc_thr_max - svc_thr_total).
617 		 */
618 		if (svc_thr_total < svc_thr_max &&
619 			    svc_mt_mode == RPC_SVC_MT_AUTO && !svc_exit_done) {
620 			n_new = 1 + 1 + svc_pollfds + svc_total_pending -
621 					(svc_thr_total - svc_thr_active);
622 			if (n_new > (svc_thr_max - svc_thr_total))
623 				n_new = svc_thr_max - svc_thr_total;
624 			if (n_new > 0)
625 				start_threads(n_new);
626 		}
627 
628 		/*
629 		 * Get parent xprt.  It is possible for the parent service
630 		 * handle to be destroyed by now, due to a race condition.
631 		 * Check for this, and if so, log a warning and go on.
632 		 */
633 		parent_xprt = svc_xports[myfd];
634 		if (parent_xprt == NULL) {
635 			/* Check if it is not a user FD */
636 			if (__is_a_userfd(myfd) == TRUE)
637 				__svc_getreq_user(&(svc_pollset[mypollfd]));
638 			goto continue_with_locks;
639 		}
640 /* LINTED pointer alignment */
641 		if (svc_defunct(parent_xprt) || svc_failed(parent_xprt))
642 			goto continue_with_locks;
643 
644 		/*
645 		 * Make a copy of parent xprt, update svc_fdset.
646 		 */
647 		if ((xprt = make_xprt_copy(parent_xprt)) == NULL)
648 			goto continue_with_locks;
649 
650 		/*
651 		 * Keep track of active threads in automatic mode.
652 		 */
653 		if (svc_mt_mode == RPC_SVC_MT_AUTO)
654 			svc_thr_active++;
655 
656 		/*
657 		 * Release mutexes so other threads can get going.
658 		 */
659 		(void) mutex_unlock(&svc_mutex);
660 		(void) mutex_unlock(&svc_thr_mutex);
661 
662 		/*
663 		 * Process request.
664 		 */
665 		{
666 			struct rpc_msg *msg;
667 			struct svc_req *r;
668 			char *cred_area;
669 
670 /* LINTED pointer alignment */
671 			msg = SVCEXT(xprt)->msg;
672 /* LINTED pointer alignment */
673 			r = SVCEXT(xprt)->req;
674 /* LINTED pointer alignment */
675 			cred_area = SVCEXT(xprt)->cred_area;
676 
677 
678 			msg->rm_call.cb_cred.oa_base = cred_area;
679 			msg->rm_call.cb_verf.oa_base =
680 						&(cred_area[MAX_AUTH_BYTES]);
681 			r->rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]);
682 
683 			/*
684 			 * receive RPC message
685 			 */
686 			if ((dispatch = SVC_RECV(xprt, msg))) {
687 				if (svc_mt_mode != RPC_SVC_MT_NONE)
688 /* LINTED pointer alignment */
689 					svc_flags(xprt) |= SVC_ARGS_CHECK;
690 				dispatch = _svc_prog_dispatch(xprt, msg, r);
691 
692 				/*
693 				 * Call cleanup procedure if set.
694 				 */
695 				if (__proc_cleanup_cb != NULL)
696 					(*__proc_cleanup_cb)(xprt);
697 			} else
698 				svc_args_done(xprt);
699 
700 			/*
701 			 * Finish up, if automatic mode, or not dispatched.
702 			 */
703 			if (svc_mt_mode == RPC_SVC_MT_AUTO || !dispatch) {
704 /* LINTED pointer alignment */
705 				if (svc_flags(xprt) & SVC_ARGS_CHECK)
706 					svc_args_done(xprt);
707 				(void) mutex_lock(&svc_mutex);
708 				_svc_done_private(xprt);
709 				if (svc_mt_mode == RPC_SVC_MT_AUTO) {
710 					/*
711 					 * not active any more
712 					 */
713 					svc_thr_active--;
714 
715 					/*
716 					 * If not main thread, exit unless
717 					 * there's some immediate work.
718 					 */
719 					if (!main_thread &&
720 						    svc_pollfds <= 0 &&
721 						    svc_total_pending <= 0 &&
722 						    (svc_polling ||
723 							svc_waiters > 0)) {
724 						svc_thr_total--;
725 						if (svc_thr_total ==
726 						    svc_waiters) {
727 							(void) cond_broadcast(
728 							    &svc_thr_fdwait);
729 						}
730 						(void) mutex_unlock(&svc_mutex);
731 						thr_exit(NULL);
732 						/* NOTREACHED */
733 					}
734 				}
735 				(void) mutex_unlock(&svc_mutex);
736 			}
737 		}
738 
739 	}
740 }
741 
742 
743 /*
744  * start_threads() - Start specified number of threads.
745  */
746 static void
747 start_threads(int num_threads)
748 {
749 	int		i;
750 
751 	assert(MUTEX_HELD(&svc_mutex));
752 
753 	for (i = 0; i < num_threads; i++) {
754 		if (thr_create(NULL, 0, (void *(*)(void *))_svc_run_mt, NULL,
755 		    THR_DETACHED, NULL) == 0) {
756 			svc_thr_total++;
757 			svc_thr_total_creates++;
758 		} else {
759 			svc_thr_total_create_errors++;
760 		}
761 	}
762 }
763 
764 
765 /*
766  * create_pipe() - create pipe for breaking out of poll.
767  */
768 static void
769 create_pipe(void)
770 {
771 	if (pipe(svc_pipe) == -1) {
772 		syslog(LOG_ERR, dgettext(__nsl_dom,
773 				"RPC: svc could not create pipe - exiting"));
774 		exit(1);
775 	}
776 	if (fcntl(svc_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
777 		syslog(LOG_ERR, dgettext(__nsl_dom,
778 					"RPC: svc pipe error - exiting"));
779 		exit(1);
780 	}
781 	if (fcntl(svc_pipe[1], F_SETFL, O_NONBLOCK) == -1) {
782 		syslog(LOG_ERR, dgettext(__nsl_dom,
783 					"RPC: svc pipe error - exiting"));
784 		exit(1);
785 	}
786 }
787 
788 
789 /*
790  * clear_pipe() - Empty data in pipe.
791  */
792 static void
793 clear_pipe(void)
794 {
795 	char	buf[16];
796 	int	i;
797 
798 	do {
799 		i = read(svc_pipe[0], buf, sizeof (buf));
800 	} while (i == sizeof (buf));
801 }
802 
803 
804 /*
805  * select_next_pollfd() - Select the next active fd in svc_pollset.
806  */
807 static int
808 select_next_pollfd(int *fd, int *pollfdIndex)
809 {
810 	int i;
811 
812 	assert(MUTEX_HELD(&svc_thr_mutex));
813 	assert(MUTEX_HELD(&svc_mutex));
814 
815 	for (i = svc_next_pollfd; svc_pollfds > 0 && i < svc_polled;
816 							i++) {
817 		if (svc_pollset[i].revents) {
818 			svc_pollfds--;
819 			/*
820 			 * No more special case for POLLNVAL, because it may
821 			 * be linked with a user file descriptot callback
822 			 */
823 			svc_next_pollfd = i + 1;
824 
825 			*fd = svc_pollset[i].fd;
826 			*pollfdIndex = i;
827 
828 			return (0);
829 		}
830 	}
831 	svc_next_pollfd = svc_pollfds = 0;
832 	*fd = -1;
833 	*pollfdIndex = INVALID_POLLFD;
834 	return (-1);
835 }
836 
837 
838 /*
839  * make_xprt_copy() - make a copy of the parent xprt.
840  * Clear fd bit in svc_fdset.
841  */
842 static SVCXPRT *
843 make_xprt_copy(SVCXPRT *parent)
844 {
845 /* LINTED pointer alignment */
846 	SVCXPRT_LIST	*xlist = SVCEXT(parent)->my_xlist;
847 	SVCXPRT_LIST	*xret;
848 	SVCXPRT		*xprt;
849 	int		fd = parent->xp_fd;
850 
851 	assert(MUTEX_HELD(&svc_mutex));
852 
853 	xret = xlist->next;
854 	if (xret) {
855 		xlist->next = xret->next;
856 		xret->next = NULL;
857 		xprt = xret->xprt;
858 /* LINTED pointer alignment */
859 		svc_flags(xprt) = svc_flags(parent);
860 	} else
861 		xprt = svc_copy(parent);
862 
863 	if (xprt) {
864 /* LINTED pointer alignment */
865 		SVCEXT(parent)->refcnt++;
866 		(void) rw_wrlock(&svc_fd_lock);
867 		clear_pollfd(fd);
868 		(void) rw_unlock(&svc_fd_lock);
869 	}
870 	return (xprt);
871 }
872 
873 /*
874  * _svc_done_private() - return copies to library.
875  */
876 static void
877 _svc_done_private(SVCXPRT *xprt)
878 {
879 	SVCXPRT		*parent;
880 	SVCXPRT_LIST	*xhead, *xlist;
881 
882 	assert(MUTEX_HELD(&svc_mutex));
883 
884 /* LINTED pointer alignment */
885 	if ((parent = SVCEXT(xprt)->parent) == NULL)
886 		return;
887 
888 /* LINTED pointer alignment */
889 	xhead = SVCEXT(parent)->my_xlist;
890 /* LINTED pointer alignment */
891 	xlist = SVCEXT(xprt)->my_xlist;
892 	xlist->next = xhead->next;
893 	xhead->next = xlist;
894 
895 /* LINTED pointer alignment */
896 	SVCEXT(parent)->refcnt--;
897 
898 	/*
899 	 * Propagate any error flags.  This is done in both directions to
900 	 * ensure that if one child gets an error, everyone will see it
901 	 * (even if there are multiple outstanding children) and the
902 	 * transport will get closed.
903 	 */
904 /* LINTED pointer alignment */
905 	svc_flags(xprt) |= svc_flags(parent);
906 /* LINTED pointer alignment */
907 	if (svc_failed(xprt) || svc_defunct(xprt)) {
908 /* LINTED pointer alignment */
909 		svc_flags(parent) |= (svc_flags(xprt) &
910 				(SVC_FAILED | SVC_DEFUNCT));
911 /* LINTED pointer alignment */
912 		if (SVCEXT(parent)->refcnt == 0)
913 			_svc_destroy_private(xprt);
914 	}
915 }
916 
917 void
918 svc_done(SVCXPRT *xprt)
919 {
920 	if (svc_mt_mode != RPC_SVC_MT_USER)
921 		return;
922 
923 	/*
924 	 * Make sure file descriptor is released in user mode.
925 	 * If the xprt is a door, do nothing: this work is performed by
926 	 * svc_door.c's return_xprt_copy() routine, which is basically a
927 	 * door-specific copy of _svc_done_private().
928 	 */
929 /* LINTED pointer alignment */
930 	if (svc_type(xprt) == SVC_DOOR)
931 		return;
932 
933 /* LINTED pointer alignment */
934 	if (svc_flags(xprt) & SVC_ARGS_CHECK)
935 		svc_args_done(xprt);
936 
937 	(void) mutex_lock(&svc_mutex);
938 	_svc_done_private(xprt);
939 	(void) mutex_unlock(&svc_mutex);
940 }
941 
942 
943 /*
944  * Mark argument completion.  Release file descriptor.
945  */
946 void
947 svc_args_done(SVCXPRT *xprt)
948 {
949 	char	dummy;
950 /* LINTED pointer alignment */
951 	SVCXPRT	*parent = SVCEXT(xprt)->parent;
952 	bool_t	wake_up_poller;
953 	enum	xprt_stat stat;
954 
955 /* LINTED pointer alignment */
956 	svc_flags(xprt) |= svc_flags(parent);
957 /* LINTED pointer alignment */
958 	svc_flags(xprt) &= ~SVC_ARGS_CHECK;
959 /* LINTED pointer alignment */
960 	if (svc_failed(xprt) || svc_defunct(parent))
961 		return;
962 
963 /* LINTED pointer alignment */
964 	if (svc_type(xprt) == SVC_CONNECTION &&
965 				(stat = SVC_STAT(xprt)) != XPRT_IDLE) {
966 		if (stat == XPRT_MOREREQS) {
967 			(void) mutex_lock(&svc_mutex);
968 			svc_pending_fds[svc_last_pending++] = xprt->xp_fd;
969 			if (svc_last_pending > CIRCULAR_BUFSIZE)
970 				svc_last_pending = 0;
971 			svc_total_pending++;
972 			(void) mutex_unlock(&svc_mutex);
973 			wake_up_poller = FALSE;
974 		} else {
975 			/*
976 			 * connection failed
977 			 */
978 			return;
979 		}
980 	} else {
981 		(void) rw_wrlock(&svc_fd_lock);
982 		set_pollfd(xprt->xp_fd, MASKVAL);
983 		(void) rw_unlock(&svc_fd_lock);
984 		wake_up_poller = TRUE;
985 	}
986 
987 	if (!wake_up_poller || !svc_polling) {
988 		/*
989 		 * Wake up any waiting threads.
990 		 */
991 		(void) mutex_lock(&svc_mutex);
992 		if (svc_waiters > 0) {
993 			(void) cond_broadcast(&svc_thr_fdwait);
994 			(void) mutex_unlock(&svc_mutex);
995 			return;
996 		}
997 		(void) mutex_unlock(&svc_mutex);
998 	}
999 
1000 	/*
1001 	 * Wake up any polling thread.
1002 	 */
1003 	if (svc_polling)
1004 		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
1005 }
1006 
1007 
1008 int
1009 __rpc_legal_connmaxrec(int suggested) {
1010 	if (suggested == -1) {
1011 		/* Supply default */
1012 		return (RPC_MAXDATASIZE + 2*sizeof (uint32_t));
1013 	} else if (suggested < 0) {
1014 		return (-1);
1015 	} else if (suggested > 0) {
1016 		/* Round down to multiple of BYTES_PER_XDR_UNIT */
1017 		suggested -= suggested % BYTES_PER_XDR_UNIT;
1018 		/* If possible, allow for two fragment headers */
1019 		if (suggested < MAXINT-(2*sizeof (uint32_t))) {
1020 			/* Allow for two fragment headers */
1021 			suggested += 2 * sizeof (uint32_t);
1022 		} else {
1023 			suggested = MAXINT;
1024 		}
1025 		if (suggested < sizeof (struct rpc_msg)) {
1026 			return (-1);
1027 		}
1028 	}
1029 	return (suggested);
1030 }
1031 
1032 
1033 bool_t
1034 rpc_control(int op, void *info)
1035 {
1036 	int		tmp;
1037 
1038 	switch (op) {
1039 	case RPC_SVC_MTMODE_SET:
1040 		tmp = *((int *)info);
1041 		if (tmp != RPC_SVC_MT_NONE && tmp != RPC_SVC_MT_AUTO &&
1042 						tmp != RPC_SVC_MT_USER)
1043 			return (FALSE);
1044 		if (svc_mt_mode != RPC_SVC_MT_NONE && svc_mt_mode != tmp)
1045 			return (FALSE);
1046 		svc_mt_mode = tmp;
1047 		return (TRUE);
1048 	case RPC_SVC_MTMODE_GET:
1049 		*((int *)info) = svc_mt_mode;
1050 		return (TRUE);
1051 	case RPC_SVC_THRMAX_SET:
1052 		if ((tmp = *((int *)info)) < 1)
1053 			return (FALSE);
1054 		(void) mutex_lock(&svc_mutex);
1055 		svc_thr_max = tmp;
1056 		(void) mutex_unlock(&svc_mutex);
1057 		return (TRUE);
1058 	case RPC_SVC_THRMAX_GET:
1059 		*((int *)info) = svc_thr_max;
1060 		return (TRUE);
1061 	case RPC_SVC_THRTOTAL_GET:
1062 		*((int *)info) = svc_thr_total;
1063 		return (TRUE);
1064 	case RPC_SVC_THRCREATES_GET:
1065 		*((int *)info) = svc_thr_total_creates;
1066 		return (TRUE);
1067 	case RPC_SVC_THRERRORS_GET:
1068 		*((int *)info) = svc_thr_total_create_errors;
1069 		return (TRUE);
1070 	case RPC_SVC_USE_POLLFD:
1071 		if (*((int *)info) && !__rpc_use_pollfd_done) {
1072 			__rpc_use_pollfd_done = 1;
1073 			return (TRUE);
1074 		}
1075 		return (FALSE);
1076 	case RPC_SVC_CONNMAXREC_SET:
1077 		tmp = __rpc_legal_connmaxrec(*(int *)info);
1078 		if (tmp >= 0) {
1079 			__rpc_connmaxrec = tmp;
1080 			return (TRUE);
1081 		} else {
1082 			return (FALSE);
1083 		}
1084 	case RPC_SVC_CONNMAXREC_GET:
1085 		*((int *)info) = __rpc_connmaxrec;
1086 		return (TRUE);
1087 	case RPC_SVC_IRTIMEOUT_SET:
1088 		tmp = *((int *)info);
1089 		if (tmp >= 0) {
1090 			__rpc_irtimeout = tmp;
1091 			return (TRUE);
1092 		} else {
1093 			return (FALSE);
1094 		}
1095 	/*
1096 	 * No mutex necessary as _EXCLBIND_SET will/should only
1097 	 * be used before an RPC daemon goes mt-hot.
1098 	 */
1099 	case __RPC_SVC_EXCLBIND_SET:
1100 		if (info) {
1101 			__rpc_tp_exclbind = *((bool_t *)info);
1102 			return (TRUE);
1103 		}
1104 		return (FALSE);
1105 	case __RPC_SVC_EXCLBIND_GET:
1106 		if (info) {
1107 			*((bool_t *)info) = __rpc_tp_exclbind;
1108 			return (TRUE);
1109 		}
1110 		return (FALSE);
1111 
1112 	default:
1113 		return (FALSE);
1114 	}
1115 }
1116