xref: /illumos-gate/usr/src/cmd/fs.d/nfs/lib/thrpool.c (revision 4de2612967d06c4fdbf524a62556a1e8118a006f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <thread.h>
30 #include <stdlib.h>
31 #include <errno.h>
32 #include <strings.h>
33 #include <tiuser.h>
34 #include <syslog.h>
35 #include <zone.h>
36 #include <sys/priocntl.h>
37 #include <sys/fxpriocntl.h>
38 #include <nfs/nfs.h>
39 #include <nfs/nfssys.h>
40 #include "thrpool.h"
41 
42 extern	int	_nfssys(int, void *);
43 
44 /*
45  * Thread to call into the kernel and do work on behalf of NFS.
46  */
47 static void *
48 svcstart(void *arg)
49 {
50 	int id = (int)arg;
51 	int err;
52 
53 	while ((err = _nfssys(SVCPOOL_RUN, &id)) != 0) {
54 		/*
55 		 * Interrupted by a signal while in the kernel.
56 		 * this process is still alive, try again.
57 		 */
58 		if (err == EINTR)
59 			continue;
60 		else
61 			break;
62 	}
63 
64 	/*
65 	 * If we weren't interrupted by a signal, but did
66 	 * return from the kernel, this thread's work is done,
67 	 * and it should exit.
68 	 */
69 	thr_exit(NULL);
70 	return (NULL);
71 }
72 
73 static void *
74 svc_rdma_creator(void *arg)
75 {
76 	int error = 0;
77 	struct rdma_svc_args *rsap = (struct rdma_svc_args *)arg;
78 
79 	if (error = _nfssys(RDMA_SVC_INIT, rsap)) {
80 		if (error != ENODEV) {
81 			(void) syslog(LOG_INFO, "RDMA transport startup "
82 			    "failed with %m");
83 		}
84 	}
85 	free(rsap);
86 	thr_exit(NULL);
87 	return (NULL);
88 }
89 
90 /*
91  * User-space "creator" thread. This thread blocks in the kernel
92  * until new worker threads need to be created for the service
93  * pool. On return to userspace, if there is no error, create a
94  * new thread for the service pool.
95  */
96 static void *
97 svcblock(void *arg)
98 {
99 	int id = (int)arg;
100 
101 	/* CONSTCOND */
102 	while (1) {
103 		thread_t tid;
104 		int err;
105 
106 		/*
107 		 * Call into the kernel, and hang out there
108 		 * until a thread needs to be created.
109 		 */
110 		if (err = _nfssys(SVCPOOL_WAIT, &id)) {
111 			if (err == ECANCELED || err == EBUSY)
112 				/*
113 				 * If we get back ECANCELED, the service
114 				 * pool is exiting, and we may as well
115 				 * clean up this thread. If EBUSY is
116 				 * returned, there's already a thread
117 				 * looping on this pool, so we should
118 				 * give up.
119 				 */
120 				break;
121 			else
122 				continue;
123 		}
124 
125 		/*
126 		 * User portion of the thread does no real work since
127 		 * the svcpool threads actually spend their entire
128 		 * lives in the kernel. So, user portion of the thread
129 		 * should have the smallest stack possible.
130 		 */
131 		(void) thr_create(NULL, THR_MIN_STACK, svcstart, (void *)id,
132 		    THR_BOUND | THR_DETACHED, &tid);
133 	}
134 
135 	thr_exit(NULL);
136 	return (NULL);
137 }
138 
139 void
140 svcsetprio(void)
141 {
142 	pcinfo_t pcinfo;
143 	pri_t maxupri;
144 
145 	/*
146 	 * By default, all threads should be part of the FX scheduler
147 	 * class. As nfsd/lockd server threads used to be part of the
148 	 * kernel, they're used to being scheduled in the SYS class.
149 	 * Userland threads shouldn't be in SYS, but they can be given a
150 	 * higher priority by default. This change still renders nfsd/lockd
151 	 * managable by an admin by utilizing commands to change scheduling
152 	 * manually, or by using resource management tools such as pools
153 	 * to associate them with a different scheduling class and segregate
154 	 * the workload.
155 	 *
156 	 * We set the threads' priority to the upper bound for priorities
157 	 * in FX. This should be 60, but since the desired action is to
158 	 * make nfsd/lockd more important than TS threads, we bow to the
159 	 * system's knowledge rather than setting it manually. Furthermore,
160 	 * since the SYS class doesn't timeslice, use an "infinite" quantum.
161 	 * If anything fails, just log the failure and let the daemon
162 	 * default to TS.
163 	 *
164 	 * The change of scheduling class is expected to fail in a non-global
165 	 * zone, so we avoid worrying the zone administrator unnecessarily.
166 	 */
167 	(void) strcpy(pcinfo.pc_clname, "FX");
168 	if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) != -1) {
169 		maxupri = ((fxinfo_t *)pcinfo.pc_clinfo)->fx_maxupri;
170 		if (priocntl(P_LWPID, P_MYID, PC_SETXPARMS, "FX",
171 		    FX_KY_UPRILIM, maxupri, FX_KY_UPRI, maxupri,
172 		    FX_KY_TQNSECS, FX_TQINF, NULL) != 0 &&
173 		    getzoneid() == GLOBAL_ZONEID)
174 			(void) syslog(LOG_ERR, "Unable to use FX scheduler: "
175 			    "%m. Using system default scheduler.");
176 	} else
177 		(void) syslog(LOG_ERR, "Unable to determine parameters "
178 		    "for FX scheduler. Using system default scheduler.");
179 }
180 
181 int
182 svcrdma(int id, int versmin, int versmax, int delegation)
183 {
184 	thread_t tid;
185 	struct rdma_svc_args *rsa;
186 
187 	rsa = (struct rdma_svc_args *)malloc(sizeof (struct rdma_svc_args));
188 	rsa->poolid = (uint32_t)id;
189 	rsa->netid = NULL;
190 	rsa->nfs_versmin = versmin;
191 	rsa->nfs_versmax = versmax;
192 	rsa->delegation = delegation;
193 
194 	/*
195 	 * Create a thread to handle RDMA start and stop.
196 	 */
197 	if (thr_create(NULL, THR_MIN_STACK * 2, svc_rdma_creator, (void *)rsa,
198 	    THR_BOUND | THR_DETACHED, &tid))
199 		return (1);
200 
201 	return (0);
202 }
203 
204 int
205 svcwait(int id)
206 {
207 	thread_t tid;
208 
209 	/*
210 	 * Create a bound thread to wait for kernel LWPs that
211 	 * need to be created. This thread also has little need
212 	 * of stackspace, so should be created with that in mind.
213 	 */
214 	if (thr_create(NULL, THR_MIN_STACK * 2, svcblock, (void *)id,
215 	    THR_BOUND | THR_DETACHED, &tid))
216 		return (1);
217 
218 	return (0);
219 }
220