1fcf3ce44SJohn Forte /*
2fcf3ce44SJohn Forte * CDDL HEADER START
3fcf3ce44SJohn Forte *
4fcf3ce44SJohn Forte * The contents of this file are subject to the terms of the
5fcf3ce44SJohn Forte * Common Development and Distribution License (the "License").
6fcf3ce44SJohn Forte * You may not use this file except in compliance with the License.
7fcf3ce44SJohn Forte *
8fcf3ce44SJohn Forte * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fcf3ce44SJohn Forte * or http://www.opensolaris.org/os/licensing.
10fcf3ce44SJohn Forte * See the License for the specific language governing permissions
11fcf3ce44SJohn Forte * and limitations under the License.
12fcf3ce44SJohn Forte *
13fcf3ce44SJohn Forte * When distributing Covered Code, include this CDDL HEADER in each
14fcf3ce44SJohn Forte * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fcf3ce44SJohn Forte * If applicable, add the following below this CDDL HEADER, with the
16fcf3ce44SJohn Forte * fields enclosed by brackets "[]" replaced with your own identifying
17fcf3ce44SJohn Forte * information: Portions Copyright [yyyy] [name of copyright owner]
18fcf3ce44SJohn Forte *
19fcf3ce44SJohn Forte * CDDL HEADER END
20fcf3ce44SJohn Forte */
21*570de38fSSurya Prakki
22fcf3ce44SJohn Forte /*
23*570de38fSSurya Prakki * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24fcf3ce44SJohn Forte * Use is subject to license terms.
25fcf3ce44SJohn Forte */
26fcf3ce44SJohn Forte
27fcf3ce44SJohn Forte #include <sys/types.h>
28fcf3ce44SJohn Forte #include <sys/resource.h>
29fcf3ce44SJohn Forte #include <sys/priocntl.h>
30fcf3ce44SJohn Forte #include <sys/rtpriocntl.h>
31fcf3ce44SJohn Forte #include <sys/tspriocntl.h>
32fcf3ce44SJohn Forte #include <sys/wait.h>
33fcf3ce44SJohn Forte #include <sys/stat.h>
34fcf3ce44SJohn Forte
35fcf3ce44SJohn Forte #include <strings.h>
36fcf3ce44SJohn Forte #include <thread.h>
37fcf3ce44SJohn Forte #include <stdlib.h>
38fcf3ce44SJohn Forte #include <signal.h>
39fcf3ce44SJohn Forte #include <errno.h>
40fcf3ce44SJohn Forte #include <stdio.h>
41fcf3ce44SJohn Forte #include <fcntl.h>
42fcf3ce44SJohn Forte #include <locale.h>
43fcf3ce44SJohn Forte #include <unistd.h>
44fcf3ce44SJohn Forte #include <syslog.h>
45fcf3ce44SJohn Forte
46fcf3ce44SJohn Forte #include <sys/nsctl/cfg.h>
47fcf3ce44SJohn Forte #include <sys/nsctl/nsctl.h>
48fcf3ce44SJohn Forte #include <sys/nsctl/nsc_ioctl.h>
49fcf3ce44SJohn Forte #include <sys/nskernd.h>
50fcf3ce44SJohn Forte #include <nsctl.h>
51fcf3ce44SJohn Forte
52fcf3ce44SJohn Forte #include <sys/mkdev.h>
53fcf3ce44SJohn Forte #include <sys/nsctl/sv_efi.h>
54fcf3ce44SJohn Forte
55fcf3ce44SJohn Forte static const char *rdev = "/dev/nsctl";
56fcf3ce44SJohn Forte
57fcf3ce44SJohn Forte /*
58fcf3ce44SJohn Forte * Define a minimal user stack size in bytes over and above the
59fcf3ce44SJohn Forte * libthread THR_STACK_MIN minimum value.
60fcf3ce44SJohn Forte *
61fcf3ce44SJohn Forte * This stack size needs to be sufficient to run _newlwp() and then
62fcf3ce44SJohn Forte * ioctl() down into the kernel.
63fcf3ce44SJohn Forte */
64fcf3ce44SJohn Forte #define NSK_STACK_SIZE 512
65fcf3ce44SJohn Forte
66fcf3ce44SJohn Forte /*
67fcf3ce44SJohn Forte * LWP scheduling control switches.
68fcf3ce44SJohn Forte *
69fcf3ce44SJohn Forte * allow_pri - set to non-zero to enable priocntl() manipulations of
70fcf3ce44SJohn Forte * created LWPs.
71fcf3ce44SJohn Forte * allow_rt - set to non-zero to use the RT rather than the TS
72fcf3ce44SJohn Forte * scheduling class when manipulating the schduling
73fcf3ce44SJohn Forte * parameters for an LWP. Only used if allow_pri is
74fcf3ce44SJohn Forte * non-zero.
75fcf3ce44SJohn Forte */
76fcf3ce44SJohn Forte static int allow_pri = 1;
77fcf3ce44SJohn Forte static int allow_rt = 0; /* disallow - bad interactions with timeout() */
78fcf3ce44SJohn Forte
79fcf3ce44SJohn Forte static int nsctl_fd = -1;
80fcf3ce44SJohn Forte static int sigterm;
81fcf3ce44SJohn Forte
82fcf3ce44SJohn Forte static int nthreads; /* number of threads in the kernel */
83fcf3ce44SJohn Forte static int exiting; /* shutdown in progress flag */
84fcf3ce44SJohn Forte static mutex_t thr_mutex = DEFAULTMUTEX;
85fcf3ce44SJohn Forte static mutex_t cfg_mutex = DEFAULTMUTEX;
86fcf3ce44SJohn Forte
87fcf3ce44SJohn Forte static int cl_nodeid = -1;
88fcf3ce44SJohn Forte
89fcf3ce44SJohn Forte static int display_msg = 0;
90fcf3ce44SJohn Forte static int delay_time = 30;
91fcf3ce44SJohn Forte
92fcf3ce44SJohn Forte static void
usage(void)93fcf3ce44SJohn Forte usage(void)
94fcf3ce44SJohn Forte {
95*570de38fSSurya Prakki (void) fprintf(stderr, gettext("usage: nskernd\n"));
96fcf3ce44SJohn Forte exit(255);
97fcf3ce44SJohn Forte }
98fcf3ce44SJohn Forte
99fcf3ce44SJohn Forte
100fcf3ce44SJohn Forte static void
sighand(int sig)101fcf3ce44SJohn Forte sighand(int sig)
102fcf3ce44SJohn Forte {
103fcf3ce44SJohn Forte if (sig == SIGTERM) {
104fcf3ce44SJohn Forte sigterm++;
105fcf3ce44SJohn Forte }
106fcf3ce44SJohn Forte }
107fcf3ce44SJohn Forte
108fcf3ce44SJohn Forte
109fcf3ce44SJohn Forte /*
110fcf3ce44SJohn Forte * Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel
111fcf3ce44SJohn Forte */
112fcf3ce44SJohn Forte int
nthread_inc(void)113fcf3ce44SJohn Forte nthread_inc(void)
114fcf3ce44SJohn Forte {
115*570de38fSSurya Prakki (void) mutex_lock(&thr_mutex);
116fcf3ce44SJohn Forte if (exiting) {
117fcf3ce44SJohn Forte /* cannot enter kernel as nskernd is being shutdown - exit */
118*570de38fSSurya Prakki (void) mutex_unlock(&thr_mutex);
119fcf3ce44SJohn Forte return (0);
120fcf3ce44SJohn Forte }
121fcf3ce44SJohn Forte nthreads++;
122*570de38fSSurya Prakki (void) mutex_unlock(&thr_mutex);
123fcf3ce44SJohn Forte return (1);
124fcf3ce44SJohn Forte }
125fcf3ce44SJohn Forte
126fcf3ce44SJohn Forte
127fcf3ce44SJohn Forte void
nthread_dec(void)128fcf3ce44SJohn Forte nthread_dec(void)
129fcf3ce44SJohn Forte {
130*570de38fSSurya Prakki (void) mutex_lock(&thr_mutex);
131fcf3ce44SJohn Forte nthreads--;
132*570de38fSSurya Prakki (void) mutex_unlock(&thr_mutex);
133fcf3ce44SJohn Forte }
134fcf3ce44SJohn Forte
135fcf3ce44SJohn Forte
136fcf3ce44SJohn Forte /*
137fcf3ce44SJohn Forte * returns: 1 - can shutdown; 0 - unable to shutdown
138fcf3ce44SJohn Forte */
139fcf3ce44SJohn Forte int
canshutdown(void)140fcf3ce44SJohn Forte canshutdown(void)
141fcf3ce44SJohn Forte {
142fcf3ce44SJohn Forte int rc = 1;
143fcf3ce44SJohn Forte time_t start_delay;
144fcf3ce44SJohn Forte
145*570de38fSSurya Prakki (void) mutex_lock(&thr_mutex);
146fcf3ce44SJohn Forte if (nthreads > 0) {
147fcf3ce44SJohn Forte if (display_msg) {
148*570de38fSSurya Prakki (void) fprintf(stderr,
149fcf3ce44SJohn Forte gettext("nskernd: unable to shutdown: "
150fcf3ce44SJohn Forte "%d kernel threads in use\n"), nthreads);
151fcf3ce44SJohn Forte }
152fcf3ce44SJohn Forte start_delay = time(0);
153fcf3ce44SJohn Forte while (nthreads > 0 && (time(0) - start_delay) < delay_time) {
154*570de38fSSurya Prakki (void) mutex_unlock(&thr_mutex);
155*570de38fSSurya Prakki (void) sleep(1);
156*570de38fSSurya Prakki (void) mutex_lock(&thr_mutex);
157*570de38fSSurya Prakki (void) fprintf(stderr,
158fcf3ce44SJohn Forte gettext("nskernd: delay shutdown: "
159fcf3ce44SJohn Forte "%d kernel threads in use\n"), nthreads);
160fcf3ce44SJohn Forte }
161fcf3ce44SJohn Forte if (nthreads > 0) {
162fcf3ce44SJohn Forte rc = 0;
163fcf3ce44SJohn Forte } else {
164fcf3ce44SJohn Forte exiting = 1;
165fcf3ce44SJohn Forte }
166fcf3ce44SJohn Forte } else {
167fcf3ce44SJohn Forte /* flag shutdown in progress */
168fcf3ce44SJohn Forte exiting = 1;
169fcf3ce44SJohn Forte }
170*570de38fSSurya Prakki (void) mutex_unlock(&thr_mutex);
171fcf3ce44SJohn Forte
172fcf3ce44SJohn Forte return (rc);
173fcf3ce44SJohn Forte }
174fcf3ce44SJohn Forte
175fcf3ce44SJohn Forte
176fcf3ce44SJohn Forte /*
177fcf3ce44SJohn Forte * returns: 1 - shutdown successful; 0 - unable to shutdown
178fcf3ce44SJohn Forte */
179fcf3ce44SJohn Forte int
shutdown(void)180fcf3ce44SJohn Forte shutdown(void)
181fcf3ce44SJohn Forte {
182fcf3ce44SJohn Forte struct nskernd data;
183fcf3ce44SJohn Forte int rc;
184fcf3ce44SJohn Forte
185fcf3ce44SJohn Forte if (nsctl_fd < 0)
186fcf3ce44SJohn Forte return (1);
187fcf3ce44SJohn Forte
188fcf3ce44SJohn Forte bzero(&data, sizeof (data));
189fcf3ce44SJohn Forte data.command = NSKERND_STOP;
190fcf3ce44SJohn Forte
191fcf3ce44SJohn Forte if (!canshutdown()) {
192fcf3ce44SJohn Forte return (0);
193fcf3ce44SJohn Forte }
194fcf3ce44SJohn Forte
195fcf3ce44SJohn Forte rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data);
196fcf3ce44SJohn Forte if (rc < 0) {
197fcf3ce44SJohn Forte if (errno != EINTR || !sigterm) {
198*570de38fSSurya Prakki (void) fprintf(stderr,
199fcf3ce44SJohn Forte gettext("nskernd: NSKERND_STOP failed\n"));
200fcf3ce44SJohn Forte }
201fcf3ce44SJohn Forte }
202fcf3ce44SJohn Forte
203fcf3ce44SJohn Forte return (1);
204fcf3ce44SJohn Forte }
205fcf3ce44SJohn Forte
206fcf3ce44SJohn Forte
207fcf3ce44SJohn Forte /*
208fcf3ce44SJohn Forte * First function run by a NSKERND_NEWLWP thread.
209fcf3ce44SJohn Forte *
210fcf3ce44SJohn Forte * Determines if it needs to change the scheduling priority of the LWP,
211fcf3ce44SJohn Forte * and then calls back into the kernel.
212fcf3ce44SJohn Forte */
213fcf3ce44SJohn Forte static void *
_newlwp(void * arg)214fcf3ce44SJohn Forte _newlwp(void *arg)
215fcf3ce44SJohn Forte {
216fcf3ce44SJohn Forte struct nskernd nsk;
217fcf3ce44SJohn Forte pcparms_t pcparms;
218fcf3ce44SJohn Forte pcinfo_t pcinfo;
219fcf3ce44SJohn Forte
220fcf3ce44SJohn Forte /* copy arguments onto stack and free heap memory */
221fcf3ce44SJohn Forte bcopy(arg, &nsk, sizeof (nsk));
222fcf3ce44SJohn Forte free(arg);
223fcf3ce44SJohn Forte
224fcf3ce44SJohn Forte if (nsk.data2 && allow_pri) {
225fcf3ce44SJohn Forte /* increase the scheduling priority of this LWP */
226fcf3ce44SJohn Forte
227fcf3ce44SJohn Forte bzero(&pcinfo, sizeof (pcinfo));
228*570de38fSSurya Prakki (void) strcpy(pcinfo.pc_clname, allow_rt ? "RT" : "TS");
229fcf3ce44SJohn Forte
230fcf3ce44SJohn Forte if (priocntl(0, 0, PC_GETCID, (char *)&pcinfo) < 0) {
231*570de38fSSurya Prakki (void) fprintf(stderr,
232fcf3ce44SJohn Forte gettext(
233fcf3ce44SJohn Forte "nskernd: priocntl(PC_GETCID) failed: %s\n"),
234fcf3ce44SJohn Forte strerror(errno));
235fcf3ce44SJohn Forte goto pri_done;
236fcf3ce44SJohn Forte }
237fcf3ce44SJohn Forte
238fcf3ce44SJohn Forte bzero(&pcparms, sizeof (pcparms));
239fcf3ce44SJohn Forte pcparms.pc_cid = pcinfo.pc_cid;
240fcf3ce44SJohn Forte
241fcf3ce44SJohn Forte if (allow_rt) {
242fcf3ce44SJohn Forte ((rtparms_t *)pcparms.pc_clparms)->rt_pri =
243fcf3ce44SJohn Forte (pri_t)0; /* minimum RT priority */
244fcf3ce44SJohn Forte ((rtparms_t *)pcparms.pc_clparms)->rt_tqsecs =
245fcf3ce44SJohn Forte (uint_t)RT_TQDEF;
246fcf3ce44SJohn Forte ((rtparms_t *)pcparms.pc_clparms)->rt_tqnsecs =
247fcf3ce44SJohn Forte RT_TQDEF;
248fcf3ce44SJohn Forte } else {
249fcf3ce44SJohn Forte ((tsparms_t *)pcparms.pc_clparms)->ts_uprilim =
250fcf3ce44SJohn Forte ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri;
251fcf3ce44SJohn Forte ((tsparms_t *)pcparms.pc_clparms)->ts_upri =
252fcf3ce44SJohn Forte ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri;
253fcf3ce44SJohn Forte }
254fcf3ce44SJohn Forte
255fcf3ce44SJohn Forte if (priocntl(P_LWPID, P_MYID,
256fcf3ce44SJohn Forte PC_SETPARMS, (char *)&pcparms) < 0) {
257*570de38fSSurya Prakki (void) fprintf(stderr,
258fcf3ce44SJohn Forte gettext(
259fcf3ce44SJohn Forte "nskernd: priocntl(PC_SETPARMS) failed: %s\n"),
260fcf3ce44SJohn Forte strerror(errno));
261fcf3ce44SJohn Forte }
262fcf3ce44SJohn Forte }
263fcf3ce44SJohn Forte
264fcf3ce44SJohn Forte pri_done:
265fcf3ce44SJohn Forte if (nthread_inc()) {
266fcf3ce44SJohn Forte (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk);
267fcf3ce44SJohn Forte nthread_dec();
268fcf3ce44SJohn Forte }
269fcf3ce44SJohn Forte return (NULL);
270fcf3ce44SJohn Forte }
271fcf3ce44SJohn Forte
272fcf3ce44SJohn Forte
273fcf3ce44SJohn Forte /*
274fcf3ce44SJohn Forte * Start a new thread bound to an LWP.
275fcf3ce44SJohn Forte *
276fcf3ce44SJohn Forte * This is the user level side of nsc_create_process().
277fcf3ce44SJohn Forte */
278fcf3ce44SJohn Forte static void
newlwp(struct nskernd * req)279fcf3ce44SJohn Forte newlwp(struct nskernd *req)
280fcf3ce44SJohn Forte {
281fcf3ce44SJohn Forte struct nskernd *nskp;
282fcf3ce44SJohn Forte thread_t tid;
283fcf3ce44SJohn Forte int rc;
284fcf3ce44SJohn Forte
285fcf3ce44SJohn Forte nskp = malloc(sizeof (*nskp));
286fcf3ce44SJohn Forte if (!nskp) {
287fcf3ce44SJohn Forte #ifdef DEBUG
288*570de38fSSurya Prakki (void) fprintf(stderr, gettext("nskernd: malloc(%d) failed\n"),
289fcf3ce44SJohn Forte sizeof (*nskp));
290fcf3ce44SJohn Forte #endif
291fcf3ce44SJohn Forte req->data1 = (uint64_t)ENOMEM;
292fcf3ce44SJohn Forte return;
293fcf3ce44SJohn Forte }
294fcf3ce44SJohn Forte
295fcf3ce44SJohn Forte /* copy args for child */
296fcf3ce44SJohn Forte bcopy(req, nskp, sizeof (*nskp));
297fcf3ce44SJohn Forte
298fcf3ce44SJohn Forte rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE),
299fcf3ce44SJohn Forte _newlwp, nskp, THR_BOUND|THR_DETACHED, &tid);
300fcf3ce44SJohn Forte
301fcf3ce44SJohn Forte if (rc != 0) {
302fcf3ce44SJohn Forte /* thr_create failed */
303fcf3ce44SJohn Forte #ifdef DEBUG
304*570de38fSSurya Prakki (void) fprintf(stderr,
305*570de38fSSurya Prakki gettext("nskernd: thr_create failed: %s\n"),
306fcf3ce44SJohn Forte strerror(errno));
307fcf3ce44SJohn Forte #endif
308fcf3ce44SJohn Forte req->data1 = (uint64_t)errno;
309fcf3ce44SJohn Forte free(nskp);
310fcf3ce44SJohn Forte } else {
311fcf3ce44SJohn Forte /* success - _newlwp() will free nskp */
312fcf3ce44SJohn Forte req->data1 = (uint64_t)0;
313fcf3ce44SJohn Forte }
314fcf3ce44SJohn Forte }
315fcf3ce44SJohn Forte
316fcf3ce44SJohn Forte static int
log_iibmp_err(char * set,int flags)317fcf3ce44SJohn Forte log_iibmp_err(char *set, int flags)
318fcf3ce44SJohn Forte {
319fcf3ce44SJohn Forte CFGFILE *cfg;
320fcf3ce44SJohn Forte char key[CFG_MAX_KEY];
321fcf3ce44SJohn Forte char buf[CFG_MAX_BUF];
322fcf3ce44SJohn Forte char newflags[CFG_MAX_BUF];
323fcf3ce44SJohn Forte char outbuf[CFG_MAX_BUF];
324fcf3ce44SJohn Forte char *mst, *shd, *bmp, *mode, *ovr, *cnode, *opt, *grp;
325fcf3ce44SJohn Forte int setno, found = 0;
326fcf3ce44SJohn Forte int setlen;
327fcf3ce44SJohn Forte int rc = 0;
328fcf3ce44SJohn Forte pid_t pid = -1;
329fcf3ce44SJohn Forte
330fcf3ce44SJohn Forte if (set && *set) {
331fcf3ce44SJohn Forte setlen = strlen(set);
332fcf3ce44SJohn Forte } else {
333fcf3ce44SJohn Forte return (EINVAL);
334fcf3ce44SJohn Forte }
335fcf3ce44SJohn Forte
336*570de38fSSurya Prakki (void) mutex_lock(&cfg_mutex);
337fcf3ce44SJohn Forte cfg = cfg_open("");
338fcf3ce44SJohn Forte if (!cfg) {
339*570de38fSSurya Prakki (void) mutex_unlock(&cfg_mutex);
340fcf3ce44SJohn Forte return (ENXIO);
341fcf3ce44SJohn Forte }
342fcf3ce44SJohn Forte
343fcf3ce44SJohn Forte if (!cfg_lock(cfg, CFG_WRLOCK)) {
344fcf3ce44SJohn Forte
345*570de38fSSurya Prakki (void) mutex_unlock(&cfg_mutex);
346fcf3ce44SJohn Forte cfg_close(cfg);
347fcf3ce44SJohn Forte
348fcf3ce44SJohn Forte pid = fork();
349fcf3ce44SJohn Forte
350fcf3ce44SJohn Forte if (pid == -1) {
351*570de38fSSurya Prakki (void) fprintf(stderr, gettext(
352fcf3ce44SJohn Forte "nskernd: Error forking\n"));
353fcf3ce44SJohn Forte return (errno);
354fcf3ce44SJohn Forte } else if (pid > 0) {
355*570de38fSSurya Prakki (void) fprintf(stdout, gettext(
356fcf3ce44SJohn Forte "nskernd: Attempting deferred bitmap error\n"));
357fcf3ce44SJohn Forte return (0);
358fcf3ce44SJohn Forte }
359fcf3ce44SJohn Forte
360*570de38fSSurya Prakki (void) mutex_lock(&cfg_mutex);
361fcf3ce44SJohn Forte cfg = cfg_open("");
362fcf3ce44SJohn Forte if (!cfg) {
363*570de38fSSurya Prakki (void) mutex_unlock(&cfg_mutex);
364*570de38fSSurya Prakki (void) fprintf(stderr, gettext(
365fcf3ce44SJohn Forte "nskernd: Failed cfg_open, deferred bitmap\n"));
366fcf3ce44SJohn Forte return (ENXIO);
367fcf3ce44SJohn Forte }
368fcf3ce44SJohn Forte
369fcf3ce44SJohn Forte /* Sooner or later, this lock will be free */
370fcf3ce44SJohn Forte while (!cfg_lock(cfg, CFG_WRLOCK))
371*570de38fSSurya Prakki (void) sleep(2);
372fcf3ce44SJohn Forte }
373fcf3ce44SJohn Forte
374fcf3ce44SJohn Forte /* find the proper set number */
375fcf3ce44SJohn Forte for (setno = 1; !found; setno++) {
376*570de38fSSurya Prakki (void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno);
377fcf3ce44SJohn Forte if (cfg_get_cstring(cfg, key, buf, CFG_MAX_BUF) < 0) {
378fcf3ce44SJohn Forte break;
379fcf3ce44SJohn Forte }
380fcf3ce44SJohn Forte
381fcf3ce44SJohn Forte mst = strtok(buf, " ");
382fcf3ce44SJohn Forte shd = strtok(NULL, " ");
383fcf3ce44SJohn Forte if (strncmp(shd, set, setlen) == 0) {
384fcf3ce44SJohn Forte found = 1;
385fcf3ce44SJohn Forte
386fcf3ce44SJohn Forte bmp = strtok(NULL, " ");
387fcf3ce44SJohn Forte mode = strtok(NULL, " ");
388fcf3ce44SJohn Forte ovr = strtok(NULL, " ");
389fcf3ce44SJohn Forte cnode = strtok(NULL, " ");
390fcf3ce44SJohn Forte opt = strtok(NULL, " ");
391fcf3ce44SJohn Forte grp = strtok(NULL, " ");
392fcf3ce44SJohn Forte break;
393fcf3ce44SJohn Forte }
394fcf3ce44SJohn Forte }
395fcf3ce44SJohn Forte
396fcf3ce44SJohn Forte if (found) {
397fcf3ce44SJohn Forte /* were there flags in the options field already? */
398*570de38fSSurya Prakki (void) snprintf(newflags, CFG_MAX_BUF, "%s=0x%x",
399fcf3ce44SJohn Forte NSKERN_II_BMP_OPTION, flags);
400fcf3ce44SJohn Forte if (opt && strcmp(opt, "-") != 0) {
401fcf3ce44SJohn Forte bzero(newflags, CFG_MAX_BUF);
402fcf3ce44SJohn Forte opt = strtok(opt, ";");
403fcf3ce44SJohn Forte while (opt) {
404fcf3ce44SJohn Forte if (strncmp(opt, NSKERN_II_BMP_OPTION,
405fcf3ce44SJohn Forte strlen(NSKERN_II_BMP_OPTION)) != 0) {
406*570de38fSSurya Prakki (void) strcat(newflags, ";");
407*570de38fSSurya Prakki (void) strcat(newflags, opt);
408fcf3ce44SJohn Forte }
409fcf3ce44SJohn Forte }
410fcf3ce44SJohn Forte }
411*570de38fSSurya Prakki (void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno);
412*570de38fSSurya Prakki (void) snprintf(outbuf, CFG_MAX_BUF, "%s %s %s %s %s %s %s %s",
413fcf3ce44SJohn Forte mst, shd, bmp, mode, ovr, cnode, newflags, grp);
414fcf3ce44SJohn Forte if (cfg_put_cstring(cfg, key, outbuf, CFG_MAX_BUF) < 0) {
415*570de38fSSurya Prakki (void) printf("Failed to put [%s]\n", outbuf);
416fcf3ce44SJohn Forte rc = ENXIO;
417fcf3ce44SJohn Forte } else {
418*570de38fSSurya Prakki (void) cfg_commit(cfg);
419fcf3ce44SJohn Forte rc = 0;
420fcf3ce44SJohn Forte }
421fcf3ce44SJohn Forte } else {
422*570de38fSSurya Prakki (void) fprintf(stderr, gettext(
423fcf3ce44SJohn Forte "nskernd: Failed deferred bitmap [%s]\n"), set);
424fcf3ce44SJohn Forte rc = EINVAL;
425fcf3ce44SJohn Forte }
426fcf3ce44SJohn Forte cfg_unlock(cfg);
427fcf3ce44SJohn Forte cfg_close(cfg);
428*570de38fSSurya Prakki (void) mutex_unlock(&cfg_mutex);
429fcf3ce44SJohn Forte
430fcf3ce44SJohn Forte /*
431fcf3ce44SJohn Forte * if we are the fork'ed client, just exit, if parent just return
432fcf3ce44SJohn Forte */
433fcf3ce44SJohn Forte if (pid == 0) {
434fcf3ce44SJohn Forte exit(rc);
435fcf3ce44SJohn Forte /*NOTREACHED*/
436fcf3ce44SJohn Forte } else {
437fcf3ce44SJohn Forte return (rc);
438fcf3ce44SJohn Forte }
439fcf3ce44SJohn Forte }
440fcf3ce44SJohn Forte
441fcf3ce44SJohn Forte /*
442fcf3ce44SJohn Forte * First function run by a NSKERND_LOCK thread.
443fcf3ce44SJohn Forte *
444fcf3ce44SJohn Forte * Opens dscfg and locks it,
445fcf3ce44SJohn Forte * and then calls back into the kernel.
446fcf3ce44SJohn Forte *
447fcf3ce44SJohn Forte * Incoming:
448fcf3ce44SJohn Forte * data1 is the kernel address of the sync structure.
449fcf3ce44SJohn Forte * data2 is read(0)/write(1) lock mode.
450fcf3ce44SJohn Forte *
451fcf3ce44SJohn Forte * Returns:
452fcf3ce44SJohn Forte * data1 as incoming.
453fcf3ce44SJohn Forte * data2 errno.
454fcf3ce44SJohn Forte */
455fcf3ce44SJohn Forte static void *
_dolock(void * arg)456fcf3ce44SJohn Forte _dolock(void *arg)
457fcf3ce44SJohn Forte {
458fcf3ce44SJohn Forte struct nskernd nsk;
459fcf3ce44SJohn Forte CFGFILE *cfg;
460fcf3ce44SJohn Forte int locked;
461fcf3ce44SJohn Forte int mode;
462fcf3ce44SJohn Forte int rc = 0;
463fcf3ce44SJohn Forte
464fcf3ce44SJohn Forte /* copy arguments onto stack and free heap memory */
465fcf3ce44SJohn Forte bcopy(arg, &nsk, sizeof (nsk));
466fcf3ce44SJohn Forte free(arg);
467fcf3ce44SJohn Forte
468*570de38fSSurya Prakki (void) mutex_lock(&cfg_mutex);
469fcf3ce44SJohn Forte cfg = cfg_open("");
470fcf3ce44SJohn Forte if (cfg == NULL) {
471fcf3ce44SJohn Forte #ifdef DEBUG
472*570de38fSSurya Prakki (void) fprintf(stderr,
473*570de38fSSurya Prakki gettext("nskernd: cfg_open failed: %s\n"),
474fcf3ce44SJohn Forte strerror(errno));
475fcf3ce44SJohn Forte #endif
476fcf3ce44SJohn Forte rc = ENXIO;
477fcf3ce44SJohn Forte }
478fcf3ce44SJohn Forte
479fcf3ce44SJohn Forte if (nsk.data2 == 0) {
480fcf3ce44SJohn Forte mode = CFG_RDLOCK;
481fcf3ce44SJohn Forte } else {
482fcf3ce44SJohn Forte mode = CFG_WRLOCK;
483fcf3ce44SJohn Forte }
484fcf3ce44SJohn Forte
485fcf3ce44SJohn Forte locked = 0;
486fcf3ce44SJohn Forte if (rc == 0) {
487fcf3ce44SJohn Forte if (cfg_lock(cfg, mode)) {
488fcf3ce44SJohn Forte locked = 1;
489fcf3ce44SJohn Forte } else {
490fcf3ce44SJohn Forte #ifdef DEBUG
491*570de38fSSurya Prakki (void) fprintf(stderr,
492fcf3ce44SJohn Forte gettext("nskernd: cfg_lock failed: %s\n"),
493fcf3ce44SJohn Forte strerror(errno));
494fcf3ce44SJohn Forte #endif
495fcf3ce44SJohn Forte rc = EINVAL;
496fcf3ce44SJohn Forte }
497fcf3ce44SJohn Forte }
498fcf3ce44SJohn Forte
499fcf3ce44SJohn Forte /* return to kernel */
500fcf3ce44SJohn Forte
501fcf3ce44SJohn Forte nsk.data2 = (uint64_t)rc;
502fcf3ce44SJohn Forte if (nthread_inc()) {
503fcf3ce44SJohn Forte (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk);
504fcf3ce44SJohn Forte nthread_dec();
505fcf3ce44SJohn Forte }
506fcf3ce44SJohn Forte
507fcf3ce44SJohn Forte /* cleanup */
508fcf3ce44SJohn Forte
509fcf3ce44SJohn Forte if (locked) {
510fcf3ce44SJohn Forte cfg_unlock(cfg);
511fcf3ce44SJohn Forte locked = 0;
512fcf3ce44SJohn Forte }
513fcf3ce44SJohn Forte
514fcf3ce44SJohn Forte if (cfg != NULL) {
515fcf3ce44SJohn Forte cfg_close(cfg);
516fcf3ce44SJohn Forte cfg = NULL;
517fcf3ce44SJohn Forte }
518*570de38fSSurya Prakki (void) mutex_unlock(&cfg_mutex);
519fcf3ce44SJohn Forte
520fcf3ce44SJohn Forte return (NULL);
521fcf3ce44SJohn Forte }
522fcf3ce44SJohn Forte
523fcf3ce44SJohn Forte
524fcf3ce44SJohn Forte /*
525fcf3ce44SJohn Forte * Inter-node lock thread.
526fcf3ce44SJohn Forte *
527fcf3ce44SJohn Forte * This is the user level side of nsc_rmlock().
528fcf3ce44SJohn Forte */
529fcf3ce44SJohn Forte static void
dolock(struct nskernd * req)530fcf3ce44SJohn Forte dolock(struct nskernd *req)
531fcf3ce44SJohn Forte {
532fcf3ce44SJohn Forte struct nskernd *nskp;
533fcf3ce44SJohn Forte thread_t tid;
534fcf3ce44SJohn Forte int rc;
535fcf3ce44SJohn Forte
536fcf3ce44SJohn Forte /* create a new thread to do the lock and return to kernel */
537fcf3ce44SJohn Forte
538fcf3ce44SJohn Forte nskp = malloc(sizeof (*nskp));
539fcf3ce44SJohn Forte if (!nskp) {
540fcf3ce44SJohn Forte #ifdef DEBUG
541*570de38fSSurya Prakki (void) fprintf(stderr,
542*570de38fSSurya Prakki gettext("nskernd:dolock: malloc(%d) failed\n"),
543fcf3ce44SJohn Forte sizeof (*nskp));
544fcf3ce44SJohn Forte #endif
545fcf3ce44SJohn Forte req->data1 = (uint64_t)ENOMEM;
546fcf3ce44SJohn Forte return;
547fcf3ce44SJohn Forte }
548fcf3ce44SJohn Forte
549fcf3ce44SJohn Forte /* copy args for child */
550fcf3ce44SJohn Forte bcopy(req, nskp, sizeof (*nskp));
551fcf3ce44SJohn Forte
552fcf3ce44SJohn Forte rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE),
553fcf3ce44SJohn Forte _dolock, nskp, THR_BOUND|THR_DETACHED, &tid);
554fcf3ce44SJohn Forte
555fcf3ce44SJohn Forte if (rc != 0) {
556fcf3ce44SJohn Forte /* thr_create failed */
557fcf3ce44SJohn Forte #ifdef DEBUG
558*570de38fSSurya Prakki (void) fprintf(stderr,
559*570de38fSSurya Prakki gettext("nskernd: thr_create failed: %s\n"),
560fcf3ce44SJohn Forte strerror(errno));
561fcf3ce44SJohn Forte #endif
562fcf3ce44SJohn Forte req->data1 = (uint64_t)errno;
563fcf3ce44SJohn Forte free(nskp);
564fcf3ce44SJohn Forte } else {
565fcf3ce44SJohn Forte /* success - _dolock() will free nskp */
566fcf3ce44SJohn Forte req->data1 = (uint64_t)0;
567fcf3ce44SJohn Forte }
568fcf3ce44SJohn Forte }
569fcf3ce44SJohn Forte
570fcf3ce44SJohn Forte
571fcf3ce44SJohn Forte /*
572fcf3ce44SJohn Forte * Convenience code for engineering test of multi-terabyte volumes.
573fcf3ce44SJohn Forte *
574fcf3ce44SJohn Forte * zvol (part of zfs) does not support DKIOCPARTITION but does use EFI
575fcf3ce44SJohn Forte * labels. This code allocates a simple efi label structure and ioctls
576fcf3ce44SJohn Forte * to extract the size of a zvol. It only handles the minimal EFI ioctl
577fcf3ce44SJohn Forte * implementation in zvol.
578fcf3ce44SJohn Forte */
579fcf3ce44SJohn Forte
580fcf3ce44SJohn Forte static void
zvol_bsize(char * path,uint64_t * size,const int pnum)581fcf3ce44SJohn Forte zvol_bsize(char *path, uint64_t *size, const int pnum)
582fcf3ce44SJohn Forte {
583fcf3ce44SJohn Forte struct stat64 stb1, stb2;
584fcf3ce44SJohn Forte struct dk_minfo dkm;
585fcf3ce44SJohn Forte int fd = -1;
586fcf3ce44SJohn Forte int rc;
587fcf3ce44SJohn Forte
588fcf3ce44SJohn Forte if (cl_nodeid || pnum != 0)
589fcf3ce44SJohn Forte return;
590fcf3ce44SJohn Forte
591fcf3ce44SJohn Forte if ((fd = open(path, O_RDONLY)) < 0) {
592fcf3ce44SJohn Forte return;
593fcf3ce44SJohn Forte }
594fcf3ce44SJohn Forte
595fcf3ce44SJohn Forte if (stat64("/devices/pseudo/zfs@0:zfs", &stb1) != 0 ||
596fcf3ce44SJohn Forte fstat64(fd, &stb2) != 0 ||
597fcf3ce44SJohn Forte !S_ISCHR(stb1.st_mode) ||
598fcf3ce44SJohn Forte !S_ISCHR(stb2.st_mode) ||
599fcf3ce44SJohn Forte major(stb1.st_rdev) != major(stb2.st_rdev)) {
600fcf3ce44SJohn Forte (void) close(fd);
601fcf3ce44SJohn Forte return;
602fcf3ce44SJohn Forte }
603fcf3ce44SJohn Forte
604fcf3ce44SJohn Forte rc = ioctl(fd, DKIOCGMEDIAINFO, (void *)&dkm);
605fcf3ce44SJohn Forte if (rc >= 0) {
606fcf3ce44SJohn Forte *size = LE_64(dkm.dki_capacity) *
607fcf3ce44SJohn Forte (dkm.dki_lbsize) / 512;
608fcf3ce44SJohn Forte }
609fcf3ce44SJohn Forte
610fcf3ce44SJohn Forte (void) close(fd);
611fcf3ce44SJohn Forte }
612fcf3ce44SJohn Forte
613fcf3ce44SJohn Forte /* ARGSUSED */
614fcf3ce44SJohn Forte static void
get_bsize(uint64_t raw_fd,uint64_t * size,int * partitionp,char * path)615fcf3ce44SJohn Forte get_bsize(uint64_t raw_fd, uint64_t *size, int *partitionp, char *path)
616fcf3ce44SJohn Forte {
617fcf3ce44SJohn Forte struct nscioc_bsize bsize;
618fcf3ce44SJohn Forte #ifdef DKIOCPARTITION
619fcf3ce44SJohn Forte struct partition64 p64;
620fcf3ce44SJohn Forte #endif
621fcf3ce44SJohn Forte struct dk_cinfo dki_info;
622fcf3ce44SJohn Forte struct vtoc vtoc;
623fcf3ce44SJohn Forte int fd;
624fcf3ce44SJohn Forte
625fcf3ce44SJohn Forte *partitionp = -1;
626fcf3ce44SJohn Forte *size = (uint64_t)0;
627fcf3ce44SJohn Forte
628fcf3ce44SJohn Forte dki_info.dki_partition = (ushort_t)-1;
629fcf3ce44SJohn Forte bsize.dki_info = (uint64_t)(unsigned long)&dki_info;
630fcf3ce44SJohn Forte bsize.vtoc = (uint64_t)(unsigned long)&vtoc;
631fcf3ce44SJohn Forte bsize.raw_fd = raw_fd;
632fcf3ce44SJohn Forte bsize.efi = 0;
633fcf3ce44SJohn Forte
634fcf3ce44SJohn Forte fd = open(rdev, O_RDONLY);
635fcf3ce44SJohn Forte if (fd < 0)
636fcf3ce44SJohn Forte return;
637fcf3ce44SJohn Forte
638fcf3ce44SJohn Forte if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) {
639fcf3ce44SJohn Forte if (dki_info.dki_partition != (ushort_t)-1) {
640fcf3ce44SJohn Forte /* assume part# is ok and just the size failed */
641fcf3ce44SJohn Forte *partitionp = (int)dki_info.dki_partition;
642fcf3ce44SJohn Forte
643fcf3ce44SJohn Forte #ifdef DKIOCPARTITION
644fcf3ce44SJohn Forte /* see if this is an EFI label */
645fcf3ce44SJohn Forte bzero(&p64, sizeof (p64));
646fcf3ce44SJohn Forte p64.p_partno = (uint_t)*partitionp;
647fcf3ce44SJohn Forte if ((ioctl(fd, DKIOCPARTITION, &p64)) > 0) {
648fcf3ce44SJohn Forte *size = (uint64_t)p64.p_size;
649fcf3ce44SJohn Forte } else {
650fcf3ce44SJohn Forte bsize.p64 = (uint64_t)(unsigned long)&p64;
651fcf3ce44SJohn Forte bsize.efi = 1;
652fcf3ce44SJohn Forte
653fcf3ce44SJohn Forte if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) {
654fcf3ce44SJohn Forte /* see if this is a zvol */
655fcf3ce44SJohn Forte zvol_bsize(path, size, *partitionp);
656fcf3ce44SJohn Forte } else {
657fcf3ce44SJohn Forte *size = (uint64_t)p64.p_size;
658fcf3ce44SJohn Forte }
659fcf3ce44SJohn Forte }
660fcf3ce44SJohn Forte #endif /* DKIOCPARTITION */
661fcf3ce44SJohn Forte }
662fcf3ce44SJohn Forte
663*570de38fSSurya Prakki (void) close(fd);
664fcf3ce44SJohn Forte return;
665fcf3ce44SJohn Forte }
666fcf3ce44SJohn Forte
667*570de38fSSurya Prakki (void) close(fd);
668fcf3ce44SJohn Forte
669fcf3ce44SJohn Forte *partitionp = (int)dki_info.dki_partition;
670fcf3ce44SJohn Forte
671fcf3ce44SJohn Forte if (vtoc.v_sanity != VTOC_SANE)
672fcf3ce44SJohn Forte return;
673fcf3ce44SJohn Forte
674fcf3ce44SJohn Forte if (vtoc.v_version != V_VERSION && vtoc.v_version != 0)
675fcf3ce44SJohn Forte return;
676fcf3ce44SJohn Forte
677fcf3ce44SJohn Forte if (dki_info.dki_partition > V_NUMPAR)
678fcf3ce44SJohn Forte return;
679fcf3ce44SJohn Forte
680fcf3ce44SJohn Forte *size = (uint64_t)vtoc.v_part[(int)dki_info.dki_partition].p_size;
681fcf3ce44SJohn Forte }
682fcf3ce44SJohn Forte
683fcf3ce44SJohn Forte
684fcf3ce44SJohn Forte static int
iscluster(void)685fcf3ce44SJohn Forte iscluster(void)
686fcf3ce44SJohn Forte {
687fcf3ce44SJohn Forte /*
688fcf3ce44SJohn Forte * Find out if we are running in a cluster
689fcf3ce44SJohn Forte */
690fcf3ce44SJohn Forte cl_nodeid = cfg_iscluster();
691fcf3ce44SJohn Forte if (cl_nodeid > 0) {
692fcf3ce44SJohn Forte return (TRUE);
693fcf3ce44SJohn Forte } else if (cl_nodeid == 0) {
694fcf3ce44SJohn Forte return (FALSE);
695fcf3ce44SJohn Forte }
696fcf3ce44SJohn Forte
697*570de38fSSurya Prakki (void) fprintf(stderr, "%s\n",
698fcf3ce44SJohn Forte gettext("nskernd: unable to ascertain environment"));
699fcf3ce44SJohn Forte exit(1);
700fcf3ce44SJohn Forte /* NOTREACHED */
701fcf3ce44SJohn Forte }
702fcf3ce44SJohn Forte
703fcf3ce44SJohn Forte /*
704fcf3ce44SJohn Forte * Runtime Solaris release checking - build release == runtime release
705fcf3ce44SJohn Forte * is always considered success, so only keep entries in the map for
706fcf3ce44SJohn Forte * the special cases.
707fcf3ce44SJohn Forte */
708fcf3ce44SJohn Forte static nsc_release_t nskernd_rel_map[] = {
709fcf3ce44SJohn Forte /* { "5.10", "5.10" }, */
710fcf3ce44SJohn Forte { "5.11", "5.10" },
711fcf3ce44SJohn Forte { NULL, NULL }
712fcf3ce44SJohn Forte };
713fcf3ce44SJohn Forte
714fcf3ce44SJohn Forte
715fcf3ce44SJohn Forte #ifdef lint
716fcf3ce44SJohn Forte #define main nskernd_main
717fcf3ce44SJohn Forte #endif
718fcf3ce44SJohn Forte /* ARGSUSED1 */
719fcf3ce44SJohn Forte int
main(int argc,char * argv[])720fcf3ce44SJohn Forte main(int argc, char *argv[])
721fcf3ce44SJohn Forte {
722fcf3ce44SJohn Forte const char *dir = "/";
723fcf3ce44SJohn Forte struct nskernd data;
724fcf3ce44SJohn Forte struct rlimit rl;
725fcf3ce44SJohn Forte int i, run, rc;
726fcf3ce44SJohn Forte int partition;
727fcf3ce44SJohn Forte char *reqd;
728fcf3ce44SJohn Forte int syncpipe[2];
729fcf3ce44SJohn Forte int startup;
730fcf3ce44SJohn Forte
731fcf3ce44SJohn Forte (void) setlocale(LC_ALL, "");
732fcf3ce44SJohn Forte (void) textdomain("nskernd");
733fcf3ce44SJohn Forte
734fcf3ce44SJohn Forte rc = nsc_check_release(BUILD_REV_STR, nskernd_rel_map, &reqd);
735fcf3ce44SJohn Forte if (rc < 0) {
736*570de38fSSurya Prakki (void) fprintf(stderr,
737fcf3ce44SJohn Forte gettext("nskernd: unable to determine the current "
738fcf3ce44SJohn Forte "Solaris release: %s\n"), strerror(errno));
739fcf3ce44SJohn Forte exit(1);
740fcf3ce44SJohn Forte } else if (rc == FALSE) {
741*570de38fSSurya Prakki (void) fprintf(stderr,
742fcf3ce44SJohn Forte gettext("nskernd: incorrect Solaris release "
743fcf3ce44SJohn Forte "(requires %s)\n"), reqd);
744fcf3ce44SJohn Forte exit(1);
745fcf3ce44SJohn Forte }
746fcf3ce44SJohn Forte
747fcf3ce44SJohn Forte rc = 0;
748fcf3ce44SJohn Forte
749fcf3ce44SJohn Forte if (argc != 1)
750fcf3ce44SJohn Forte usage();
751fcf3ce44SJohn Forte
752fcf3ce44SJohn Forte /*
753fcf3ce44SJohn Forte * Usage: <progname> [-g] [-d <seconds to delay>]
754fcf3ce44SJohn Forte */
755fcf3ce44SJohn Forte while ((i = getopt(argc, argv, "gd:")) != EOF) {
756fcf3ce44SJohn Forte switch (i) {
757fcf3ce44SJohn Forte case 'g':
758fcf3ce44SJohn Forte display_msg = 1;
759fcf3ce44SJohn Forte break;
760fcf3ce44SJohn Forte case 'd':
761fcf3ce44SJohn Forte delay_time = atoi(optarg);
762fcf3ce44SJohn Forte if (delay_time <= 0) {
763fcf3ce44SJohn Forte delay_time = 30;
764fcf3ce44SJohn Forte }
765fcf3ce44SJohn Forte break;
766fcf3ce44SJohn Forte default:
767fcf3ce44SJohn Forte syslog(LOG_ERR,
768fcf3ce44SJohn Forte "Usage: nskernd [-g] [-d <seconds to delay>]");
769fcf3ce44SJohn Forte exit(1);
770fcf3ce44SJohn Forte break;
771fcf3ce44SJohn Forte }
772fcf3ce44SJohn Forte }
773fcf3ce44SJohn Forte
774fcf3ce44SJohn Forte if (chroot(dir) < 0) {
775*570de38fSSurya Prakki (void) fprintf(stderr, gettext("nskernd: chroot failed: %s\n"),
776fcf3ce44SJohn Forte strerror(errno));
777fcf3ce44SJohn Forte exit(1);
778fcf3ce44SJohn Forte }
779fcf3ce44SJohn Forte
780fcf3ce44SJohn Forte if (chdir(dir) < 0) {
781*570de38fSSurya Prakki (void) fprintf(stderr, gettext("nskernd: chdir failed: %s\n"),
782fcf3ce44SJohn Forte strerror(errno));
783fcf3ce44SJohn Forte exit(1);
784fcf3ce44SJohn Forte }
785fcf3ce44SJohn Forte
786fcf3ce44SJohn Forte /*
787fcf3ce44SJohn Forte * Determine if we are in a Sun Cluster or not, before fork'ing
788fcf3ce44SJohn Forte */
789fcf3ce44SJohn Forte (void) iscluster();
790fcf3ce44SJohn Forte
791fcf3ce44SJohn Forte /*
792fcf3ce44SJohn Forte * create a pipe to synchronise the parent with the
793fcf3ce44SJohn Forte * child just before it enters its service loop.
794fcf3ce44SJohn Forte */
795fcf3ce44SJohn Forte if (pipe(syncpipe) < 0) {
796*570de38fSSurya Prakki (void) fprintf(stderr,
797*570de38fSSurya Prakki gettext("nskernd: cannot create pipe: %s\n"),
798fcf3ce44SJohn Forte strerror(errno));
799fcf3ce44SJohn Forte exit(1);
800fcf3ce44SJohn Forte }
801fcf3ce44SJohn Forte /*
802fcf3ce44SJohn Forte * Fork off a child that becomes the daemon.
803fcf3ce44SJohn Forte */
804fcf3ce44SJohn Forte
805fcf3ce44SJohn Forte if ((rc = fork()) > 0) {
806fcf3ce44SJohn Forte char c;
807fcf3ce44SJohn Forte int n;
808fcf3ce44SJohn Forte (void) close(syncpipe[1]);
809fcf3ce44SJohn Forte /*
810fcf3ce44SJohn Forte * wait for the close of the pipe.
811fcf3ce44SJohn Forte * If we get a char back, indicates good
812fcf3ce44SJohn Forte * status from child, so exit 0.
813fcf3ce44SJohn Forte * If we get a zero length read, then the
814fcf3ce44SJohn Forte * child has failed, so we do too.
815fcf3ce44SJohn Forte */
816fcf3ce44SJohn Forte n = read(syncpipe[0], &c, 1);
817fcf3ce44SJohn Forte exit((n <= 0) ? 1 : 0);
818fcf3ce44SJohn Forte } else if (rc < 0) {
819*570de38fSSurya Prakki (void) fprintf(stderr, gettext("nskernd: cannot fork: %s\n"),
820fcf3ce44SJohn Forte strerror(errno));
821fcf3ce44SJohn Forte exit(1);
822fcf3ce44SJohn Forte }
823fcf3ce44SJohn Forte
824fcf3ce44SJohn Forte /*
825fcf3ce44SJohn Forte * In child - become daemon.
826fcf3ce44SJohn Forte */
827fcf3ce44SJohn Forte
828fcf3ce44SJohn Forte /* use closefrom(3C) from PSARC/2000/193 when possible */
829fcf3ce44SJohn Forte for (i = 0; i < syncpipe[1]; i++) {
830fcf3ce44SJohn Forte (void) close(i);
831fcf3ce44SJohn Forte }
832fcf3ce44SJohn Forte closefrom(syncpipe[1] + 1);
833fcf3ce44SJohn Forte
834fcf3ce44SJohn Forte (void) open("/dev/console", O_WRONLY|O_APPEND);
835fcf3ce44SJohn Forte (void) dup(0);
836fcf3ce44SJohn Forte (void) dup(0);
837fcf3ce44SJohn Forte (void) close(0);
838fcf3ce44SJohn Forte
839*570de38fSSurya Prakki (void) setpgrp();
840fcf3ce44SJohn Forte
841fcf3ce44SJohn Forte /*
842fcf3ce44SJohn Forte * Ignore all signals apart from SIGTERM.
843fcf3ce44SJohn Forte */
844fcf3ce44SJohn Forte
845fcf3ce44SJohn Forte for (i = 1; i < _sys_nsig; i++)
846fcf3ce44SJohn Forte (void) sigset(i, SIG_IGN);
847fcf3ce44SJohn Forte
848fcf3ce44SJohn Forte (void) sigset(SIGTERM, sighand);
849fcf3ce44SJohn Forte
850fcf3ce44SJohn Forte /*
851fcf3ce44SJohn Forte * Increase the number of fd's that can be open.
852fcf3ce44SJohn Forte */
853fcf3ce44SJohn Forte
854fcf3ce44SJohn Forte rl.rlim_cur = RLIM_INFINITY;
855fcf3ce44SJohn Forte rl.rlim_max = RLIM_INFINITY;
856fcf3ce44SJohn Forte if (setrlimit(RLIMIT_NOFILE, &rl) < 0) {
857*570de38fSSurya Prakki (void) fprintf(stderr,
858fcf3ce44SJohn Forte gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"),
859fcf3ce44SJohn Forte strerror(errno));
860*570de38fSSurya Prakki (void) fprintf(stderr,
861fcf3ce44SJohn Forte gettext("nskernd: the maximum number of nsctl open "
862fcf3ce44SJohn Forte "devices may be reduced\n"));
863fcf3ce44SJohn Forte }
864fcf3ce44SJohn Forte
865fcf3ce44SJohn Forte /*
866fcf3ce44SJohn Forte * Open /dev/nsctl and startup.
867fcf3ce44SJohn Forte */
868fcf3ce44SJohn Forte
869fcf3ce44SJohn Forte nsctl_fd = open(rdev, O_RDONLY);
870fcf3ce44SJohn Forte if (nsctl_fd < 0) {
871*570de38fSSurya Prakki (void) fprintf(stderr, gettext("nskernd: unable to open %s\n"),
872*570de38fSSurya Prakki rdev);
873fcf3ce44SJohn Forte exit(1);
874fcf3ce44SJohn Forte }
875fcf3ce44SJohn Forte
876fcf3ce44SJohn Forte bzero(&data, sizeof (data));
877fcf3ce44SJohn Forte
878fcf3ce44SJohn Forte data.command = NSKERND_START;
879fcf3ce44SJohn Forte data.data1 = (uint64_t)cl_nodeid;
880fcf3ce44SJohn Forte run = 1;
881fcf3ce44SJohn Forte
882fcf3ce44SJohn Forte startup = 1;
883fcf3ce44SJohn Forte while (run) {
884fcf3ce44SJohn Forte rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data);
885fcf3ce44SJohn Forte if (rc < 0) {
886fcf3ce44SJohn Forte /* try and do kernel cleanup and exit */
887fcf3ce44SJohn Forte if (shutdown()) {
888fcf3ce44SJohn Forte run = 0;
889fcf3ce44SJohn Forte } else {
890fcf3ce44SJohn Forte sigterm = 0;
891fcf3ce44SJohn Forte }
892fcf3ce44SJohn Forte
893*570de38fSSurya Prakki (void) fprintf(stderr,
894fcf3ce44SJohn Forte gettext("nskernd: NSCIOC_NSKERND failed: %s\n"),
895fcf3ce44SJohn Forte strerror(errno));
896fcf3ce44SJohn Forte continue;
897fcf3ce44SJohn Forte } else if (sigterm) {
898fcf3ce44SJohn Forte /* SIGTERM received - terminate */
899fcf3ce44SJohn Forte if (data.command != NSKERND_START &&
900fcf3ce44SJohn Forte (data.command != NSKERND_STOP ||
901fcf3ce44SJohn Forte data.data1 != (uint64_t)1)) {
902fcf3ce44SJohn Forte /* need to do kernel cleanup */
903fcf3ce44SJohn Forte if (shutdown()) {
904fcf3ce44SJohn Forte run = 0;
905fcf3ce44SJohn Forte } else {
906fcf3ce44SJohn Forte sigterm = 0;
907fcf3ce44SJohn Forte data.command = NSKERND_START;
908fcf3ce44SJohn Forte data.data1 = (uint64_t)cl_nodeid;
909fcf3ce44SJohn Forte }
910fcf3ce44SJohn Forte } else {
911fcf3ce44SJohn Forte /* just quit */
912fcf3ce44SJohn Forte if (canshutdown()) {
913fcf3ce44SJohn Forte run = 0;
914fcf3ce44SJohn Forte } else {
915fcf3ce44SJohn Forte /* cannot shutdown - threads active */
916fcf3ce44SJohn Forte sigterm = 0;
917fcf3ce44SJohn Forte data.command = NSKERND_START;
918fcf3ce44SJohn Forte data.data1 = (uint64_t)cl_nodeid;
919fcf3ce44SJohn Forte }
920fcf3ce44SJohn Forte }
921fcf3ce44SJohn Forte continue;
922fcf3ce44SJohn Forte }
923fcf3ce44SJohn Forte if (startup) {
924fcf3ce44SJohn Forte char c = 0;
925fcf3ce44SJohn Forte (void) write(syncpipe[1], &c, 1);
926fcf3ce44SJohn Forte (void) close(syncpipe[1]);
927fcf3ce44SJohn Forte startup = 0;
928fcf3ce44SJohn Forte }
929fcf3ce44SJohn Forte switch (data.command) {
930fcf3ce44SJohn Forte case NSKERND_START: /* (re)start completion */
931fcf3ce44SJohn Forte if (rc == 1) {
932*570de38fSSurya Prakki (void) fprintf(stderr,
933fcf3ce44SJohn Forte gettext("nskernd: already started\n"));
934fcf3ce44SJohn Forte run = 0;
935fcf3ce44SJohn Forte } else if (rc == 2) {
936*570de38fSSurya Prakki (void) fprintf(stderr,
937fcf3ce44SJohn Forte gettext("nskernd: stopped by kernel\n"));
938fcf3ce44SJohn Forte run = 0;
939fcf3ce44SJohn Forte }
940fcf3ce44SJohn Forte data.command = NSKERND_WAIT;
941fcf3ce44SJohn Forte break;
942fcf3ce44SJohn Forte
943fcf3ce44SJohn Forte case NSKERND_STOP: /* kernel telling daemon to stop */
944fcf3ce44SJohn Forte if (data.data1 != (uint64_t)1) {
945fcf3ce44SJohn Forte (void) shutdown();
946fcf3ce44SJohn Forte run = 0;
947fcf3ce44SJohn Forte }
948fcf3ce44SJohn Forte break;
949fcf3ce44SJohn Forte
950fcf3ce44SJohn Forte case NSKERND_BSIZE:
951fcf3ce44SJohn Forte /*
952fcf3ce44SJohn Forte * kernel requesting partsize
953fcf3ce44SJohn Forte * data1 - size return
954fcf3ce44SJohn Forte * data2 - raw_fd (entry)
955fcf3ce44SJohn Forte * - partition number (return)
956fcf3ce44SJohn Forte */
957fcf3ce44SJohn Forte partition = -1;
958fcf3ce44SJohn Forte get_bsize(data.data2, &data.data1,
959fcf3ce44SJohn Forte &partition, data.char1);
960fcf3ce44SJohn Forte data.data2 = (uint64_t)partition;
961fcf3ce44SJohn Forte data.command = NSKERND_WAIT;
962fcf3ce44SJohn Forte break;
963fcf3ce44SJohn Forte
964fcf3ce44SJohn Forte case NSKERND_NEWLWP: /* kernel requesting a new LWP */
965fcf3ce44SJohn Forte newlwp(&data);
966fcf3ce44SJohn Forte data.command = NSKERND_WAIT;
967fcf3ce44SJohn Forte break;
968fcf3ce44SJohn Forte
969fcf3ce44SJohn Forte case NSKERND_LOCK: /* kernel requesting lock */
970fcf3ce44SJohn Forte dolock(&data);
971fcf3ce44SJohn Forte data.command = NSKERND_WAIT;
972fcf3ce44SJohn Forte break;
973fcf3ce44SJohn Forte
974fcf3ce44SJohn Forte case NSKERND_WAIT: /* kernel retrying wait */
975fcf3ce44SJohn Forte /*
976fcf3ce44SJohn Forte * the kernel thread can be woken by the dr config
977fcf3ce44SJohn Forte * utilities (ie cfgadm) therefore we just reissue
978fcf3ce44SJohn Forte * the wait.
979fcf3ce44SJohn Forte */
980fcf3ce44SJohn Forte break;
981fcf3ce44SJohn Forte
982fcf3ce44SJohn Forte case NSKERND_IIBITMAP:
983fcf3ce44SJohn Forte rc = log_iibmp_err(data.char1, (int)data.data1);
984fcf3ce44SJohn Forte data.data1 = (uint64_t)rc;
985fcf3ce44SJohn Forte data.command = NSKERND_WAIT;
986fcf3ce44SJohn Forte break;
987fcf3ce44SJohn Forte
988fcf3ce44SJohn Forte default:
989*570de38fSSurya Prakki (void) fprintf(stderr,
990fcf3ce44SJohn Forte gettext("nskernd: unknown command %d"),
991fcf3ce44SJohn Forte data.command);
992fcf3ce44SJohn Forte data.command = NSKERND_WAIT;
993fcf3ce44SJohn Forte break;
994fcf3ce44SJohn Forte }
995fcf3ce44SJohn Forte }
996fcf3ce44SJohn Forte
997fcf3ce44SJohn Forte (void) close(nsctl_fd);
998fcf3ce44SJohn Forte
999fcf3ce44SJohn Forte return (rc);
1000fcf3ce44SJohn Forte }
1001