xref: /titanic_44/usr/src/cmd/fs.d/nfs/lib/thrpool.c (revision f9c1591d75a5b335f06e3a8d0787bb812b588c01)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <thread.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <strings.h>
30 #include <tiuser.h>
31 #include <syslog.h>
32 #include <zone.h>
33 #include <sys/priocntl.h>
34 #include <sys/fxpriocntl.h>
35 #include <nfs/nfs.h>
36 #include <nfs/nfssys.h>
37 #include "thrpool.h"
38 
39 extern	int	_nfssys(int, void *);
40 
41 /*
42  * Thread to call into the kernel and do work on behalf of NFS.
43  */
44 static void *
45 svcstart(void *arg)
46 {
47 	int id = (int)arg;
48 
49 	while (_nfssys(SVCPOOL_RUN, &id) < 0) {
50 		/*
51 		 * Interrupted by a signal while in the kernel.
52 		 * this process is still alive, try again.
53 		 */
54 		if (errno == EINTR)
55 			continue;
56 		else
57 			break;
58 	}
59 
60 	/*
61 	 * If we weren't interrupted by a signal, but did
62 	 * return from the kernel, this thread's work is done,
63 	 * and it should exit.
64 	 */
65 	thr_exit(NULL);
66 	return (NULL);
67 }
68 
69 static void *
70 svc_rdma_creator(void *arg)
71 {
72 	struct rdma_svc_args *rsap = (struct rdma_svc_args *)arg;
73 
74 	if (_nfssys(RDMA_SVC_INIT, rsap) < 0) {
75 		if (errno != ENODEV) {
76 			(void) syslog(LOG_INFO, "RDMA transport startup "
77 			    "failed with %m");
78 		}
79 	}
80 	free(rsap);
81 	thr_exit(NULL);
82 	return (NULL);
83 }
84 
85 /*
86  * User-space "creator" thread. This thread blocks in the kernel
87  * until new worker threads need to be created for the service
88  * pool. On return to userspace, if there is no error, create a
89  * new thread for the service pool.
90  */
91 static void *
92 svcblock(void *arg)
93 {
94 	int id = (int)arg;
95 
96 	/* CONSTCOND */
97 	while (1) {
98 		thread_t tid;
99 
100 		/*
101 		 * Call into the kernel, and hang out there
102 		 * until a thread needs to be created.
103 		 */
104 		if (_nfssys(SVCPOOL_WAIT, &id) < 0) {
105 			if (errno == ECANCELED || errno == EBUSY)
106 				/*
107 				 * If we get back ECANCELED, the service
108 				 * pool is exiting, and we may as well
109 				 * clean up this thread. If EBUSY is
110 				 * returned, there's already a thread
111 				 * looping on this pool, so we should
112 				 * give up.
113 				 */
114 				break;
115 			else
116 				continue;
117 		}
118 
119 		/*
120 		 * User portion of the thread does no real work since
121 		 * the svcpool threads actually spend their entire
122 		 * lives in the kernel. So, user portion of the thread
123 		 * should have the smallest stack possible.
124 		 */
125 		(void) thr_create(NULL, THR_MIN_STACK, svcstart, (void *)id,
126 		    THR_BOUND | THR_DETACHED, &tid);
127 	}
128 
129 	thr_exit(NULL);
130 	return (NULL);
131 }
132 
133 void
134 svcsetprio(void)
135 {
136 	pcinfo_t pcinfo;
137 	pri_t maxupri;
138 
139 	/*
140 	 * By default, all threads should be part of the FX scheduler
141 	 * class. As nfsd/lockd server threads used to be part of the
142 	 * kernel, they're used to being scheduled in the SYS class.
143 	 * Userland threads shouldn't be in SYS, but they can be given a
144 	 * higher priority by default. This change still renders nfsd/lockd
145 	 * managable by an admin by utilizing commands to change scheduling
146 	 * manually, or by using resource management tools such as pools
147 	 * to associate them with a different scheduling class and segregate
148 	 * the workload.
149 	 *
150 	 * We set the threads' priority to the upper bound for priorities
151 	 * in FX. This should be 60, but since the desired action is to
152 	 * make nfsd/lockd more important than TS threads, we bow to the
153 	 * system's knowledge rather than setting it manually. Furthermore,
154 	 * since the SYS class doesn't timeslice, use an "infinite" quantum.
155 	 * If anything fails, just log the failure and let the daemon
156 	 * default to TS.
157 	 *
158 	 * The change of scheduling class is expected to fail in a non-global
159 	 * zone, so we avoid worrying the zone administrator unnecessarily.
160 	 */
161 	(void) strcpy(pcinfo.pc_clname, "FX");
162 	if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) != -1) {
163 		maxupri = ((fxinfo_t *)pcinfo.pc_clinfo)->fx_maxupri;
164 		if (priocntl(P_LWPID, P_MYID, PC_SETXPARMS, "FX",
165 		    FX_KY_UPRILIM, maxupri, FX_KY_UPRI, maxupri,
166 		    FX_KY_TQNSECS, FX_TQINF, NULL) != 0 &&
167 		    getzoneid() == GLOBAL_ZONEID)
168 			(void) syslog(LOG_ERR, "Unable to use FX scheduler: "
169 			    "%m. Using system default scheduler.");
170 	} else
171 		(void) syslog(LOG_ERR, "Unable to determine parameters "
172 		    "for FX scheduler. Using system default scheduler.");
173 }
174 
175 int
176 svcrdma(int id, int versmin, int versmax, int delegation)
177 {
178 	thread_t tid;
179 	struct rdma_svc_args *rsa;
180 
181 	rsa = (struct rdma_svc_args *)malloc(sizeof (struct rdma_svc_args));
182 	rsa->poolid = (uint32_t)id;
183 	rsa->netid = NULL;
184 	rsa->nfs_versmin = versmin;
185 	rsa->nfs_versmax = versmax;
186 	rsa->delegation = delegation;
187 
188 	/*
189 	 * Create a thread to handle RDMA start and stop.
190 	 */
191 	if (thr_create(NULL, THR_MIN_STACK * 2, svc_rdma_creator, (void *)rsa,
192 	    THR_BOUND | THR_DETACHED, &tid))
193 		return (1);
194 
195 	return (0);
196 }
197 
198 int
199 svcwait(int id)
200 {
201 	thread_t tid;
202 
203 	/*
204 	 * Create a bound thread to wait for kernel LWPs that
205 	 * need to be created. This thread also has little need
206 	 * of stackspace, so should be created with that in mind.
207 	 */
208 	if (thr_create(NULL, THR_MIN_STACK * 2, svcblock, (void *)id,
209 	    THR_BOUND | THR_DETACHED, &tid))
210 		return (1);
211 
212 	return (0);
213 }
214