1*fcf3ce44SJohn Forte /* 2*fcf3ce44SJohn Forte * CDDL HEADER START 3*fcf3ce44SJohn Forte * 4*fcf3ce44SJohn Forte * The contents of this file are subject to the terms of the 5*fcf3ce44SJohn Forte * Common Development and Distribution License (the "License"). 6*fcf3ce44SJohn Forte * You may not use this file except in compliance with the License. 7*fcf3ce44SJohn Forte * 8*fcf3ce44SJohn Forte * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*fcf3ce44SJohn Forte * or http://www.opensolaris.org/os/licensing. 10*fcf3ce44SJohn Forte * See the License for the specific language governing permissions 11*fcf3ce44SJohn Forte * and limitations under the License. 12*fcf3ce44SJohn Forte * 13*fcf3ce44SJohn Forte * When distributing Covered Code, include this CDDL HEADER in each 14*fcf3ce44SJohn Forte * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*fcf3ce44SJohn Forte * If applicable, add the following below this CDDL HEADER, with the 16*fcf3ce44SJohn Forte * fields enclosed by brackets "[]" replaced with your own identifying 17*fcf3ce44SJohn Forte * information: Portions Copyright [yyyy] [name of copyright owner] 18*fcf3ce44SJohn Forte * 19*fcf3ce44SJohn Forte * CDDL HEADER END 20*fcf3ce44SJohn Forte */ 21*fcf3ce44SJohn Forte /* 22*fcf3ce44SJohn Forte * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*fcf3ce44SJohn Forte * Use is subject to license terms. 24*fcf3ce44SJohn Forte */ 25*fcf3ce44SJohn Forte 26*fcf3ce44SJohn Forte #include <sys/types.h> 27*fcf3ce44SJohn Forte #include <sys/resource.h> 28*fcf3ce44SJohn Forte #include <sys/priocntl.h> 29*fcf3ce44SJohn Forte #include <sys/rtpriocntl.h> 30*fcf3ce44SJohn Forte #include <sys/tspriocntl.h> 31*fcf3ce44SJohn Forte #include <sys/wait.h> 32*fcf3ce44SJohn Forte #include <sys/stat.h> 33*fcf3ce44SJohn Forte 34*fcf3ce44SJohn Forte #include <strings.h> 35*fcf3ce44SJohn Forte #include <thread.h> 36*fcf3ce44SJohn Forte #include <stdlib.h> 37*fcf3ce44SJohn Forte #include <signal.h> 38*fcf3ce44SJohn Forte #include <errno.h> 39*fcf3ce44SJohn Forte #include <stdio.h> 40*fcf3ce44SJohn Forte #include <fcntl.h> 41*fcf3ce44SJohn Forte #include <locale.h> 42*fcf3ce44SJohn Forte #include <unistd.h> 43*fcf3ce44SJohn Forte #include <syslog.h> 44*fcf3ce44SJohn Forte 45*fcf3ce44SJohn Forte #include <sys/nsctl/cfg.h> 46*fcf3ce44SJohn Forte #include <sys/nsctl/nsctl.h> 47*fcf3ce44SJohn Forte #include <sys/nsctl/nsc_ioctl.h> 48*fcf3ce44SJohn Forte #include <sys/nskernd.h> 49*fcf3ce44SJohn Forte #include <nsctl.h> 50*fcf3ce44SJohn Forte 51*fcf3ce44SJohn Forte #include <sys/mkdev.h> 52*fcf3ce44SJohn Forte #include <sys/nsctl/sv_efi.h> 53*fcf3ce44SJohn Forte 54*fcf3ce44SJohn Forte static const char *rdev = "/dev/nsctl"; 55*fcf3ce44SJohn Forte 56*fcf3ce44SJohn Forte /* 57*fcf3ce44SJohn Forte * Define a minimal user stack size in bytes over and above the 58*fcf3ce44SJohn Forte * libthread THR_STACK_MIN minimum value. 59*fcf3ce44SJohn Forte * 60*fcf3ce44SJohn Forte * This stack size needs to be sufficient to run _newlwp() and then 61*fcf3ce44SJohn Forte * ioctl() down into the kernel. 62*fcf3ce44SJohn Forte */ 63*fcf3ce44SJohn Forte #define NSK_STACK_SIZE 512 64*fcf3ce44SJohn Forte 65*fcf3ce44SJohn Forte /* 66*fcf3ce44SJohn Forte * LWP scheduling control switches. 67*fcf3ce44SJohn Forte * 68*fcf3ce44SJohn Forte * allow_pri - set to non-zero to enable priocntl() manipulations of 69*fcf3ce44SJohn Forte * created LWPs. 70*fcf3ce44SJohn Forte * allow_rt - set to non-zero to use the RT rather than the TS 71*fcf3ce44SJohn Forte * scheduling class when manipulating the schduling 72*fcf3ce44SJohn Forte * parameters for an LWP. Only used if allow_pri is 73*fcf3ce44SJohn Forte * non-zero. 74*fcf3ce44SJohn Forte */ 75*fcf3ce44SJohn Forte static int allow_pri = 1; 76*fcf3ce44SJohn Forte static int allow_rt = 0; /* disallow - bad interactions with timeout() */ 77*fcf3ce44SJohn Forte 78*fcf3ce44SJohn Forte static int nsctl_fd = -1; 79*fcf3ce44SJohn Forte static int sigterm; 80*fcf3ce44SJohn Forte 81*fcf3ce44SJohn Forte static int nthreads; /* number of threads in the kernel */ 82*fcf3ce44SJohn Forte static int exiting; /* shutdown in progress flag */ 83*fcf3ce44SJohn Forte static mutex_t thr_mutex = DEFAULTMUTEX; 84*fcf3ce44SJohn Forte static mutex_t cfg_mutex = DEFAULTMUTEX; 85*fcf3ce44SJohn Forte 86*fcf3ce44SJohn Forte static int cl_nodeid = -1; 87*fcf3ce44SJohn Forte 88*fcf3ce44SJohn Forte static int display_msg = 0; 89*fcf3ce44SJohn Forte static int delay_time = 30; 90*fcf3ce44SJohn Forte 91*fcf3ce44SJohn Forte static void 92*fcf3ce44SJohn Forte usage(void) 93*fcf3ce44SJohn Forte { 94*fcf3ce44SJohn Forte fprintf(stderr, gettext("usage: nskernd\n")); 95*fcf3ce44SJohn Forte exit(255); 96*fcf3ce44SJohn Forte } 97*fcf3ce44SJohn Forte 98*fcf3ce44SJohn Forte 99*fcf3ce44SJohn Forte static void 100*fcf3ce44SJohn Forte sighand(int sig) 101*fcf3ce44SJohn Forte { 102*fcf3ce44SJohn Forte if (sig == SIGTERM) { 103*fcf3ce44SJohn Forte sigterm++; 104*fcf3ce44SJohn Forte } 105*fcf3ce44SJohn Forte } 106*fcf3ce44SJohn Forte 107*fcf3ce44SJohn Forte 108*fcf3ce44SJohn Forte /* 109*fcf3ce44SJohn Forte * Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel 110*fcf3ce44SJohn Forte */ 111*fcf3ce44SJohn Forte int 112*fcf3ce44SJohn Forte nthread_inc(void) 113*fcf3ce44SJohn Forte { 114*fcf3ce44SJohn Forte mutex_lock(&thr_mutex); 115*fcf3ce44SJohn Forte if (exiting) { 116*fcf3ce44SJohn Forte /* cannot enter kernel as nskernd is being shutdown - exit */ 117*fcf3ce44SJohn Forte mutex_unlock(&thr_mutex); 118*fcf3ce44SJohn Forte return (0); 119*fcf3ce44SJohn Forte } 120*fcf3ce44SJohn Forte nthreads++; 121*fcf3ce44SJohn Forte mutex_unlock(&thr_mutex); 122*fcf3ce44SJohn Forte return (1); 123*fcf3ce44SJohn Forte } 124*fcf3ce44SJohn Forte 125*fcf3ce44SJohn Forte 126*fcf3ce44SJohn Forte void 127*fcf3ce44SJohn Forte nthread_dec(void) 128*fcf3ce44SJohn Forte { 129*fcf3ce44SJohn Forte mutex_lock(&thr_mutex); 130*fcf3ce44SJohn Forte nthreads--; 131*fcf3ce44SJohn Forte mutex_unlock(&thr_mutex); 132*fcf3ce44SJohn Forte } 133*fcf3ce44SJohn Forte 134*fcf3ce44SJohn Forte 135*fcf3ce44SJohn Forte /* 136*fcf3ce44SJohn Forte * returns: 1 - can shutdown; 0 - unable to shutdown 137*fcf3ce44SJohn Forte */ 138*fcf3ce44SJohn Forte int 139*fcf3ce44SJohn Forte canshutdown(void) 140*fcf3ce44SJohn Forte { 141*fcf3ce44SJohn Forte int rc = 1; 142*fcf3ce44SJohn Forte time_t start_delay; 143*fcf3ce44SJohn Forte 144*fcf3ce44SJohn Forte mutex_lock(&thr_mutex); 145*fcf3ce44SJohn Forte if (nthreads > 0) { 146*fcf3ce44SJohn Forte if (display_msg) { 147*fcf3ce44SJohn Forte fprintf(stderr, 148*fcf3ce44SJohn Forte gettext("nskernd: unable to shutdown: " 149*fcf3ce44SJohn Forte "%d kernel threads in use\n"), nthreads); 150*fcf3ce44SJohn Forte } 151*fcf3ce44SJohn Forte start_delay = time(0); 152*fcf3ce44SJohn Forte while (nthreads > 0 && (time(0) - start_delay) < delay_time) { 153*fcf3ce44SJohn Forte mutex_unlock(&thr_mutex); 154*fcf3ce44SJohn Forte sleep(1); 155*fcf3ce44SJohn Forte mutex_lock(&thr_mutex); 156*fcf3ce44SJohn Forte fprintf(stderr, 157*fcf3ce44SJohn Forte gettext("nskernd: delay shutdown: " 158*fcf3ce44SJohn Forte "%d kernel threads in use\n"), nthreads); 159*fcf3ce44SJohn Forte } 160*fcf3ce44SJohn Forte if (nthreads > 0) { 161*fcf3ce44SJohn Forte rc = 0; 162*fcf3ce44SJohn Forte } else { 163*fcf3ce44SJohn Forte exiting = 1; 164*fcf3ce44SJohn Forte } 165*fcf3ce44SJohn Forte } else { 166*fcf3ce44SJohn Forte /* flag shutdown in progress */ 167*fcf3ce44SJohn Forte exiting = 1; 168*fcf3ce44SJohn Forte } 169*fcf3ce44SJohn Forte mutex_unlock(&thr_mutex); 170*fcf3ce44SJohn Forte 171*fcf3ce44SJohn Forte return (rc); 172*fcf3ce44SJohn Forte } 173*fcf3ce44SJohn Forte 174*fcf3ce44SJohn Forte 175*fcf3ce44SJohn Forte /* 176*fcf3ce44SJohn Forte * returns: 1 - shutdown successful; 0 - unable to shutdown 177*fcf3ce44SJohn Forte */ 178*fcf3ce44SJohn Forte int 179*fcf3ce44SJohn Forte shutdown(void) 180*fcf3ce44SJohn Forte { 181*fcf3ce44SJohn Forte struct nskernd data; 182*fcf3ce44SJohn Forte int rc; 183*fcf3ce44SJohn Forte 184*fcf3ce44SJohn Forte if (nsctl_fd < 0) 185*fcf3ce44SJohn Forte return (1); 186*fcf3ce44SJohn Forte 187*fcf3ce44SJohn Forte bzero(&data, sizeof (data)); 188*fcf3ce44SJohn Forte data.command = NSKERND_STOP; 189*fcf3ce44SJohn Forte 190*fcf3ce44SJohn Forte if (!canshutdown()) { 191*fcf3ce44SJohn Forte return (0); 192*fcf3ce44SJohn Forte } 193*fcf3ce44SJohn Forte 194*fcf3ce44SJohn Forte rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 195*fcf3ce44SJohn Forte if (rc < 0) { 196*fcf3ce44SJohn Forte if (errno != EINTR || !sigterm) { 197*fcf3ce44SJohn Forte fprintf(stderr, 198*fcf3ce44SJohn Forte gettext("nskernd: NSKERND_STOP failed\n")); 199*fcf3ce44SJohn Forte } 200*fcf3ce44SJohn Forte } 201*fcf3ce44SJohn Forte 202*fcf3ce44SJohn Forte return (1); 203*fcf3ce44SJohn Forte } 204*fcf3ce44SJohn Forte 205*fcf3ce44SJohn Forte 206*fcf3ce44SJohn Forte /* 207*fcf3ce44SJohn Forte * First function run by a NSKERND_NEWLWP thread. 208*fcf3ce44SJohn Forte * 209*fcf3ce44SJohn Forte * Determines if it needs to change the scheduling priority of the LWP, 210*fcf3ce44SJohn Forte * and then calls back into the kernel. 211*fcf3ce44SJohn Forte */ 212*fcf3ce44SJohn Forte static void * 213*fcf3ce44SJohn Forte _newlwp(void *arg) 214*fcf3ce44SJohn Forte { 215*fcf3ce44SJohn Forte struct nskernd nsk; 216*fcf3ce44SJohn Forte pcparms_t pcparms; 217*fcf3ce44SJohn Forte pcinfo_t pcinfo; 218*fcf3ce44SJohn Forte 219*fcf3ce44SJohn Forte /* copy arguments onto stack and free heap memory */ 220*fcf3ce44SJohn Forte bcopy(arg, &nsk, sizeof (nsk)); 221*fcf3ce44SJohn Forte free(arg); 222*fcf3ce44SJohn Forte 223*fcf3ce44SJohn Forte if (nsk.data2 && allow_pri) { 224*fcf3ce44SJohn Forte /* increase the scheduling priority of this LWP */ 225*fcf3ce44SJohn Forte 226*fcf3ce44SJohn Forte bzero(&pcinfo, sizeof (pcinfo)); 227*fcf3ce44SJohn Forte strcpy(pcinfo.pc_clname, allow_rt ? "RT" : "TS"); 228*fcf3ce44SJohn Forte 229*fcf3ce44SJohn Forte if (priocntl(0, 0, PC_GETCID, (char *)&pcinfo) < 0) { 230*fcf3ce44SJohn Forte fprintf(stderr, 231*fcf3ce44SJohn Forte gettext( 232*fcf3ce44SJohn Forte "nskernd: priocntl(PC_GETCID) failed: %s\n"), 233*fcf3ce44SJohn Forte strerror(errno)); 234*fcf3ce44SJohn Forte goto pri_done; 235*fcf3ce44SJohn Forte } 236*fcf3ce44SJohn Forte 237*fcf3ce44SJohn Forte bzero(&pcparms, sizeof (pcparms)); 238*fcf3ce44SJohn Forte pcparms.pc_cid = pcinfo.pc_cid; 239*fcf3ce44SJohn Forte 240*fcf3ce44SJohn Forte if (allow_rt) { 241*fcf3ce44SJohn Forte ((rtparms_t *)pcparms.pc_clparms)->rt_pri = 242*fcf3ce44SJohn Forte (pri_t)0; /* minimum RT priority */ 243*fcf3ce44SJohn Forte ((rtparms_t *)pcparms.pc_clparms)->rt_tqsecs = 244*fcf3ce44SJohn Forte (uint_t)RT_TQDEF; 245*fcf3ce44SJohn Forte ((rtparms_t *)pcparms.pc_clparms)->rt_tqnsecs = 246*fcf3ce44SJohn Forte RT_TQDEF; 247*fcf3ce44SJohn Forte } else { 248*fcf3ce44SJohn Forte ((tsparms_t *)pcparms.pc_clparms)->ts_uprilim = 249*fcf3ce44SJohn Forte ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 250*fcf3ce44SJohn Forte ((tsparms_t *)pcparms.pc_clparms)->ts_upri = 251*fcf3ce44SJohn Forte ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 252*fcf3ce44SJohn Forte } 253*fcf3ce44SJohn Forte 254*fcf3ce44SJohn Forte if (priocntl(P_LWPID, P_MYID, 255*fcf3ce44SJohn Forte PC_SETPARMS, (char *)&pcparms) < 0) { 256*fcf3ce44SJohn Forte fprintf(stderr, 257*fcf3ce44SJohn Forte gettext( 258*fcf3ce44SJohn Forte "nskernd: priocntl(PC_SETPARMS) failed: %s\n"), 259*fcf3ce44SJohn Forte strerror(errno)); 260*fcf3ce44SJohn Forte } 261*fcf3ce44SJohn Forte } 262*fcf3ce44SJohn Forte 263*fcf3ce44SJohn Forte pri_done: 264*fcf3ce44SJohn Forte if (nthread_inc()) { 265*fcf3ce44SJohn Forte (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 266*fcf3ce44SJohn Forte nthread_dec(); 267*fcf3ce44SJohn Forte } 268*fcf3ce44SJohn Forte return (NULL); 269*fcf3ce44SJohn Forte } 270*fcf3ce44SJohn Forte 271*fcf3ce44SJohn Forte 272*fcf3ce44SJohn Forte /* 273*fcf3ce44SJohn Forte * Start a new thread bound to an LWP. 274*fcf3ce44SJohn Forte * 275*fcf3ce44SJohn Forte * This is the user level side of nsc_create_process(). 276*fcf3ce44SJohn Forte */ 277*fcf3ce44SJohn Forte static void 278*fcf3ce44SJohn Forte newlwp(struct nskernd *req) 279*fcf3ce44SJohn Forte { 280*fcf3ce44SJohn Forte struct nskernd *nskp; 281*fcf3ce44SJohn Forte thread_t tid; 282*fcf3ce44SJohn Forte int rc; 283*fcf3ce44SJohn Forte 284*fcf3ce44SJohn Forte nskp = malloc(sizeof (*nskp)); 285*fcf3ce44SJohn Forte if (!nskp) { 286*fcf3ce44SJohn Forte #ifdef DEBUG 287*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: malloc(%d) failed\n"), 288*fcf3ce44SJohn Forte sizeof (*nskp)); 289*fcf3ce44SJohn Forte #endif 290*fcf3ce44SJohn Forte req->data1 = (uint64_t)ENOMEM; 291*fcf3ce44SJohn Forte return; 292*fcf3ce44SJohn Forte } 293*fcf3ce44SJohn Forte 294*fcf3ce44SJohn Forte /* copy args for child */ 295*fcf3ce44SJohn Forte bcopy(req, nskp, sizeof (*nskp)); 296*fcf3ce44SJohn Forte 297*fcf3ce44SJohn Forte rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 298*fcf3ce44SJohn Forte _newlwp, nskp, THR_BOUND|THR_DETACHED, &tid); 299*fcf3ce44SJohn Forte 300*fcf3ce44SJohn Forte if (rc != 0) { 301*fcf3ce44SJohn Forte /* thr_create failed */ 302*fcf3ce44SJohn Forte #ifdef DEBUG 303*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: thr_create failed: %s\n"), 304*fcf3ce44SJohn Forte strerror(errno)); 305*fcf3ce44SJohn Forte #endif 306*fcf3ce44SJohn Forte req->data1 = (uint64_t)errno; 307*fcf3ce44SJohn Forte free(nskp); 308*fcf3ce44SJohn Forte } else { 309*fcf3ce44SJohn Forte /* success - _newlwp() will free nskp */ 310*fcf3ce44SJohn Forte req->data1 = (uint64_t)0; 311*fcf3ce44SJohn Forte } 312*fcf3ce44SJohn Forte } 313*fcf3ce44SJohn Forte 314*fcf3ce44SJohn Forte static int 315*fcf3ce44SJohn Forte log_iibmp_err(char *set, int flags) 316*fcf3ce44SJohn Forte { 317*fcf3ce44SJohn Forte CFGFILE *cfg; 318*fcf3ce44SJohn Forte char key[CFG_MAX_KEY]; 319*fcf3ce44SJohn Forte char buf[CFG_MAX_BUF]; 320*fcf3ce44SJohn Forte char newflags[CFG_MAX_BUF]; 321*fcf3ce44SJohn Forte char outbuf[CFG_MAX_BUF]; 322*fcf3ce44SJohn Forte char *mst, *shd, *bmp, *mode, *ovr, *cnode, *opt, *grp; 323*fcf3ce44SJohn Forte int setno, found = 0; 324*fcf3ce44SJohn Forte int setlen; 325*fcf3ce44SJohn Forte int rc = 0; 326*fcf3ce44SJohn Forte pid_t pid = -1; 327*fcf3ce44SJohn Forte 328*fcf3ce44SJohn Forte if (set && *set) { 329*fcf3ce44SJohn Forte setlen = strlen(set); 330*fcf3ce44SJohn Forte } else { 331*fcf3ce44SJohn Forte return (EINVAL); 332*fcf3ce44SJohn Forte } 333*fcf3ce44SJohn Forte 334*fcf3ce44SJohn Forte mutex_lock(&cfg_mutex); 335*fcf3ce44SJohn Forte cfg = cfg_open(""); 336*fcf3ce44SJohn Forte if (!cfg) { 337*fcf3ce44SJohn Forte mutex_unlock(&cfg_mutex); 338*fcf3ce44SJohn Forte return (ENXIO); 339*fcf3ce44SJohn Forte } 340*fcf3ce44SJohn Forte 341*fcf3ce44SJohn Forte if (!cfg_lock(cfg, CFG_WRLOCK)) { 342*fcf3ce44SJohn Forte 343*fcf3ce44SJohn Forte mutex_unlock(&cfg_mutex); 344*fcf3ce44SJohn Forte cfg_close(cfg); 345*fcf3ce44SJohn Forte 346*fcf3ce44SJohn Forte pid = fork(); 347*fcf3ce44SJohn Forte 348*fcf3ce44SJohn Forte if (pid == -1) { 349*fcf3ce44SJohn Forte fprintf(stderr, gettext( 350*fcf3ce44SJohn Forte "nskernd: Error forking\n")); 351*fcf3ce44SJohn Forte return (errno); 352*fcf3ce44SJohn Forte } else if (pid > 0) { 353*fcf3ce44SJohn Forte fprintf(stdout, gettext( 354*fcf3ce44SJohn Forte "nskernd: Attempting deferred bitmap error\n")); 355*fcf3ce44SJohn Forte return (0); 356*fcf3ce44SJohn Forte } 357*fcf3ce44SJohn Forte 358*fcf3ce44SJohn Forte mutex_lock(&cfg_mutex); 359*fcf3ce44SJohn Forte cfg = cfg_open(""); 360*fcf3ce44SJohn Forte if (!cfg) { 361*fcf3ce44SJohn Forte mutex_unlock(&cfg_mutex); 362*fcf3ce44SJohn Forte fprintf(stderr, gettext( 363*fcf3ce44SJohn Forte "nskernd: Failed cfg_open, deferred bitmap\n")); 364*fcf3ce44SJohn Forte return (ENXIO); 365*fcf3ce44SJohn Forte } 366*fcf3ce44SJohn Forte 367*fcf3ce44SJohn Forte /* Sooner or later, this lock will be free */ 368*fcf3ce44SJohn Forte while (!cfg_lock(cfg, CFG_WRLOCK)) 369*fcf3ce44SJohn Forte sleep(2); 370*fcf3ce44SJohn Forte } 371*fcf3ce44SJohn Forte 372*fcf3ce44SJohn Forte /* find the proper set number */ 373*fcf3ce44SJohn Forte for (setno = 1; !found; setno++) { 374*fcf3ce44SJohn Forte snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 375*fcf3ce44SJohn Forte if (cfg_get_cstring(cfg, key, buf, CFG_MAX_BUF) < 0) { 376*fcf3ce44SJohn Forte break; 377*fcf3ce44SJohn Forte } 378*fcf3ce44SJohn Forte 379*fcf3ce44SJohn Forte mst = strtok(buf, " "); 380*fcf3ce44SJohn Forte shd = strtok(NULL, " "); 381*fcf3ce44SJohn Forte if (strncmp(shd, set, setlen) == 0) { 382*fcf3ce44SJohn Forte found = 1; 383*fcf3ce44SJohn Forte 384*fcf3ce44SJohn Forte bmp = strtok(NULL, " "); 385*fcf3ce44SJohn Forte mode = strtok(NULL, " "); 386*fcf3ce44SJohn Forte ovr = strtok(NULL, " "); 387*fcf3ce44SJohn Forte cnode = strtok(NULL, " "); 388*fcf3ce44SJohn Forte opt = strtok(NULL, " "); 389*fcf3ce44SJohn Forte grp = strtok(NULL, " "); 390*fcf3ce44SJohn Forte break; 391*fcf3ce44SJohn Forte } 392*fcf3ce44SJohn Forte } 393*fcf3ce44SJohn Forte 394*fcf3ce44SJohn Forte if (found) { 395*fcf3ce44SJohn Forte /* were there flags in the options field already? */ 396*fcf3ce44SJohn Forte snprintf(newflags, CFG_MAX_BUF, "%s=0x%x", 397*fcf3ce44SJohn Forte NSKERN_II_BMP_OPTION, flags); 398*fcf3ce44SJohn Forte if (opt && strcmp(opt, "-") != 0) { 399*fcf3ce44SJohn Forte bzero(newflags, CFG_MAX_BUF); 400*fcf3ce44SJohn Forte opt = strtok(opt, ";"); 401*fcf3ce44SJohn Forte while (opt) { 402*fcf3ce44SJohn Forte if (strncmp(opt, NSKERN_II_BMP_OPTION, 403*fcf3ce44SJohn Forte strlen(NSKERN_II_BMP_OPTION)) != 0) { 404*fcf3ce44SJohn Forte strcat(newflags, ";"); 405*fcf3ce44SJohn Forte strcat(newflags, opt); 406*fcf3ce44SJohn Forte } 407*fcf3ce44SJohn Forte } 408*fcf3ce44SJohn Forte } 409*fcf3ce44SJohn Forte snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 410*fcf3ce44SJohn Forte snprintf(outbuf, CFG_MAX_BUF, "%s %s %s %s %s %s %s %s", 411*fcf3ce44SJohn Forte mst, shd, bmp, mode, ovr, cnode, newflags, grp); 412*fcf3ce44SJohn Forte if (cfg_put_cstring(cfg, key, outbuf, CFG_MAX_BUF) < 0) { 413*fcf3ce44SJohn Forte printf("Failed to put [%s]\n", outbuf); 414*fcf3ce44SJohn Forte rc = ENXIO; 415*fcf3ce44SJohn Forte } else { 416*fcf3ce44SJohn Forte cfg_commit(cfg); 417*fcf3ce44SJohn Forte rc = 0; 418*fcf3ce44SJohn Forte } 419*fcf3ce44SJohn Forte } else { 420*fcf3ce44SJohn Forte fprintf(stderr, gettext( 421*fcf3ce44SJohn Forte "nskernd: Failed deferred bitmap [%s]\n"), set); 422*fcf3ce44SJohn Forte rc = EINVAL; 423*fcf3ce44SJohn Forte } 424*fcf3ce44SJohn Forte cfg_unlock(cfg); 425*fcf3ce44SJohn Forte cfg_close(cfg); 426*fcf3ce44SJohn Forte mutex_unlock(&cfg_mutex); 427*fcf3ce44SJohn Forte 428*fcf3ce44SJohn Forte /* 429*fcf3ce44SJohn Forte * if we are the fork'ed client, just exit, if parent just return 430*fcf3ce44SJohn Forte */ 431*fcf3ce44SJohn Forte if (pid == 0) { 432*fcf3ce44SJohn Forte exit(rc); 433*fcf3ce44SJohn Forte /*NOTREACHED*/ 434*fcf3ce44SJohn Forte } else { 435*fcf3ce44SJohn Forte return (rc); 436*fcf3ce44SJohn Forte } 437*fcf3ce44SJohn Forte } 438*fcf3ce44SJohn Forte 439*fcf3ce44SJohn Forte /* 440*fcf3ce44SJohn Forte * First function run by a NSKERND_LOCK thread. 441*fcf3ce44SJohn Forte * 442*fcf3ce44SJohn Forte * Opens dscfg and locks it, 443*fcf3ce44SJohn Forte * and then calls back into the kernel. 444*fcf3ce44SJohn Forte * 445*fcf3ce44SJohn Forte * Incoming: 446*fcf3ce44SJohn Forte * data1 is the kernel address of the sync structure. 447*fcf3ce44SJohn Forte * data2 is read(0)/write(1) lock mode. 448*fcf3ce44SJohn Forte * 449*fcf3ce44SJohn Forte * Returns: 450*fcf3ce44SJohn Forte * data1 as incoming. 451*fcf3ce44SJohn Forte * data2 errno. 452*fcf3ce44SJohn Forte */ 453*fcf3ce44SJohn Forte static void * 454*fcf3ce44SJohn Forte _dolock(void *arg) 455*fcf3ce44SJohn Forte { 456*fcf3ce44SJohn Forte struct nskernd nsk; 457*fcf3ce44SJohn Forte CFGFILE *cfg; 458*fcf3ce44SJohn Forte int locked; 459*fcf3ce44SJohn Forte int mode; 460*fcf3ce44SJohn Forte int rc = 0; 461*fcf3ce44SJohn Forte 462*fcf3ce44SJohn Forte /* copy arguments onto stack and free heap memory */ 463*fcf3ce44SJohn Forte bcopy(arg, &nsk, sizeof (nsk)); 464*fcf3ce44SJohn Forte free(arg); 465*fcf3ce44SJohn Forte 466*fcf3ce44SJohn Forte mutex_lock(&cfg_mutex); 467*fcf3ce44SJohn Forte cfg = cfg_open(""); 468*fcf3ce44SJohn Forte if (cfg == NULL) { 469*fcf3ce44SJohn Forte #ifdef DEBUG 470*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: cfg_open failed: %s\n"), 471*fcf3ce44SJohn Forte strerror(errno)); 472*fcf3ce44SJohn Forte #endif 473*fcf3ce44SJohn Forte rc = ENXIO; 474*fcf3ce44SJohn Forte } 475*fcf3ce44SJohn Forte 476*fcf3ce44SJohn Forte if (nsk.data2 == 0) { 477*fcf3ce44SJohn Forte mode = CFG_RDLOCK; 478*fcf3ce44SJohn Forte } else { 479*fcf3ce44SJohn Forte mode = CFG_WRLOCK; 480*fcf3ce44SJohn Forte } 481*fcf3ce44SJohn Forte 482*fcf3ce44SJohn Forte locked = 0; 483*fcf3ce44SJohn Forte if (rc == 0) { 484*fcf3ce44SJohn Forte if (cfg_lock(cfg, mode)) { 485*fcf3ce44SJohn Forte locked = 1; 486*fcf3ce44SJohn Forte } else { 487*fcf3ce44SJohn Forte #ifdef DEBUG 488*fcf3ce44SJohn Forte fprintf(stderr, 489*fcf3ce44SJohn Forte gettext("nskernd: cfg_lock failed: %s\n"), 490*fcf3ce44SJohn Forte strerror(errno)); 491*fcf3ce44SJohn Forte #endif 492*fcf3ce44SJohn Forte rc = EINVAL; 493*fcf3ce44SJohn Forte } 494*fcf3ce44SJohn Forte } 495*fcf3ce44SJohn Forte 496*fcf3ce44SJohn Forte /* return to kernel */ 497*fcf3ce44SJohn Forte 498*fcf3ce44SJohn Forte nsk.data2 = (uint64_t)rc; 499*fcf3ce44SJohn Forte if (nthread_inc()) { 500*fcf3ce44SJohn Forte (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 501*fcf3ce44SJohn Forte nthread_dec(); 502*fcf3ce44SJohn Forte } 503*fcf3ce44SJohn Forte 504*fcf3ce44SJohn Forte /* cleanup */ 505*fcf3ce44SJohn Forte 506*fcf3ce44SJohn Forte if (locked) { 507*fcf3ce44SJohn Forte cfg_unlock(cfg); 508*fcf3ce44SJohn Forte locked = 0; 509*fcf3ce44SJohn Forte } 510*fcf3ce44SJohn Forte 511*fcf3ce44SJohn Forte if (cfg != NULL) { 512*fcf3ce44SJohn Forte cfg_close(cfg); 513*fcf3ce44SJohn Forte cfg = NULL; 514*fcf3ce44SJohn Forte } 515*fcf3ce44SJohn Forte mutex_unlock(&cfg_mutex); 516*fcf3ce44SJohn Forte 517*fcf3ce44SJohn Forte return (NULL); 518*fcf3ce44SJohn Forte } 519*fcf3ce44SJohn Forte 520*fcf3ce44SJohn Forte 521*fcf3ce44SJohn Forte /* 522*fcf3ce44SJohn Forte * Inter-node lock thread. 523*fcf3ce44SJohn Forte * 524*fcf3ce44SJohn Forte * This is the user level side of nsc_rmlock(). 525*fcf3ce44SJohn Forte */ 526*fcf3ce44SJohn Forte static void 527*fcf3ce44SJohn Forte dolock(struct nskernd *req) 528*fcf3ce44SJohn Forte { 529*fcf3ce44SJohn Forte struct nskernd *nskp; 530*fcf3ce44SJohn Forte thread_t tid; 531*fcf3ce44SJohn Forte int rc; 532*fcf3ce44SJohn Forte 533*fcf3ce44SJohn Forte /* create a new thread to do the lock and return to kernel */ 534*fcf3ce44SJohn Forte 535*fcf3ce44SJohn Forte nskp = malloc(sizeof (*nskp)); 536*fcf3ce44SJohn Forte if (!nskp) { 537*fcf3ce44SJohn Forte #ifdef DEBUG 538*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd:dolock: malloc(%d) failed\n"), 539*fcf3ce44SJohn Forte sizeof (*nskp)); 540*fcf3ce44SJohn Forte #endif 541*fcf3ce44SJohn Forte req->data1 = (uint64_t)ENOMEM; 542*fcf3ce44SJohn Forte return; 543*fcf3ce44SJohn Forte } 544*fcf3ce44SJohn Forte 545*fcf3ce44SJohn Forte /* copy args for child */ 546*fcf3ce44SJohn Forte bcopy(req, nskp, sizeof (*nskp)); 547*fcf3ce44SJohn Forte 548*fcf3ce44SJohn Forte rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 549*fcf3ce44SJohn Forte _dolock, nskp, THR_BOUND|THR_DETACHED, &tid); 550*fcf3ce44SJohn Forte 551*fcf3ce44SJohn Forte if (rc != 0) { 552*fcf3ce44SJohn Forte /* thr_create failed */ 553*fcf3ce44SJohn Forte #ifdef DEBUG 554*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: thr_create failed: %s\n"), 555*fcf3ce44SJohn Forte strerror(errno)); 556*fcf3ce44SJohn Forte #endif 557*fcf3ce44SJohn Forte req->data1 = (uint64_t)errno; 558*fcf3ce44SJohn Forte free(nskp); 559*fcf3ce44SJohn Forte } else { 560*fcf3ce44SJohn Forte /* success - _dolock() will free nskp */ 561*fcf3ce44SJohn Forte req->data1 = (uint64_t)0; 562*fcf3ce44SJohn Forte } 563*fcf3ce44SJohn Forte } 564*fcf3ce44SJohn Forte 565*fcf3ce44SJohn Forte 566*fcf3ce44SJohn Forte /* 567*fcf3ce44SJohn Forte * Convenience code for engineering test of multi-terabyte volumes. 568*fcf3ce44SJohn Forte * 569*fcf3ce44SJohn Forte * zvol (part of zfs) does not support DKIOCPARTITION but does use EFI 570*fcf3ce44SJohn Forte * labels. This code allocates a simple efi label structure and ioctls 571*fcf3ce44SJohn Forte * to extract the size of a zvol. It only handles the minimal EFI ioctl 572*fcf3ce44SJohn Forte * implementation in zvol. 573*fcf3ce44SJohn Forte */ 574*fcf3ce44SJohn Forte 575*fcf3ce44SJohn Forte static void 576*fcf3ce44SJohn Forte zvol_bsize(char *path, uint64_t *size, const int pnum) 577*fcf3ce44SJohn Forte { 578*fcf3ce44SJohn Forte struct stat64 stb1, stb2; 579*fcf3ce44SJohn Forte struct dk_minfo dkm; 580*fcf3ce44SJohn Forte int fd = -1; 581*fcf3ce44SJohn Forte int rc; 582*fcf3ce44SJohn Forte 583*fcf3ce44SJohn Forte if (cl_nodeid || pnum != 0) 584*fcf3ce44SJohn Forte return; 585*fcf3ce44SJohn Forte 586*fcf3ce44SJohn Forte if ((fd = open(path, O_RDONLY)) < 0) { 587*fcf3ce44SJohn Forte return; 588*fcf3ce44SJohn Forte } 589*fcf3ce44SJohn Forte 590*fcf3ce44SJohn Forte if (stat64("/devices/pseudo/zfs@0:zfs", &stb1) != 0 || 591*fcf3ce44SJohn Forte fstat64(fd, &stb2) != 0 || 592*fcf3ce44SJohn Forte !S_ISCHR(stb1.st_mode) || 593*fcf3ce44SJohn Forte !S_ISCHR(stb2.st_mode) || 594*fcf3ce44SJohn Forte major(stb1.st_rdev) != major(stb2.st_rdev)) { 595*fcf3ce44SJohn Forte (void) close(fd); 596*fcf3ce44SJohn Forte return; 597*fcf3ce44SJohn Forte } 598*fcf3ce44SJohn Forte 599*fcf3ce44SJohn Forte rc = ioctl(fd, DKIOCGMEDIAINFO, (void *)&dkm); 600*fcf3ce44SJohn Forte if (rc >= 0) { 601*fcf3ce44SJohn Forte *size = LE_64(dkm.dki_capacity) * 602*fcf3ce44SJohn Forte (dkm.dki_lbsize) / 512; 603*fcf3ce44SJohn Forte } 604*fcf3ce44SJohn Forte 605*fcf3ce44SJohn Forte (void) close(fd); 606*fcf3ce44SJohn Forte } 607*fcf3ce44SJohn Forte 608*fcf3ce44SJohn Forte /* ARGSUSED */ 609*fcf3ce44SJohn Forte static void 610*fcf3ce44SJohn Forte get_bsize(uint64_t raw_fd, uint64_t *size, int *partitionp, char *path) 611*fcf3ce44SJohn Forte { 612*fcf3ce44SJohn Forte struct nscioc_bsize bsize; 613*fcf3ce44SJohn Forte #ifdef DKIOCPARTITION 614*fcf3ce44SJohn Forte struct partition64 p64; 615*fcf3ce44SJohn Forte #endif 616*fcf3ce44SJohn Forte struct dk_cinfo dki_info; 617*fcf3ce44SJohn Forte struct vtoc vtoc; 618*fcf3ce44SJohn Forte int fd; 619*fcf3ce44SJohn Forte 620*fcf3ce44SJohn Forte *partitionp = -1; 621*fcf3ce44SJohn Forte *size = (uint64_t)0; 622*fcf3ce44SJohn Forte 623*fcf3ce44SJohn Forte dki_info.dki_partition = (ushort_t)-1; 624*fcf3ce44SJohn Forte bsize.dki_info = (uint64_t)(unsigned long)&dki_info; 625*fcf3ce44SJohn Forte bsize.vtoc = (uint64_t)(unsigned long)&vtoc; 626*fcf3ce44SJohn Forte bsize.raw_fd = raw_fd; 627*fcf3ce44SJohn Forte bsize.efi = 0; 628*fcf3ce44SJohn Forte 629*fcf3ce44SJohn Forte fd = open(rdev, O_RDONLY); 630*fcf3ce44SJohn Forte if (fd < 0) 631*fcf3ce44SJohn Forte return; 632*fcf3ce44SJohn Forte 633*fcf3ce44SJohn Forte if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 634*fcf3ce44SJohn Forte if (dki_info.dki_partition != (ushort_t)-1) { 635*fcf3ce44SJohn Forte /* assume part# is ok and just the size failed */ 636*fcf3ce44SJohn Forte *partitionp = (int)dki_info.dki_partition; 637*fcf3ce44SJohn Forte 638*fcf3ce44SJohn Forte #ifdef DKIOCPARTITION 639*fcf3ce44SJohn Forte /* see if this is an EFI label */ 640*fcf3ce44SJohn Forte bzero(&p64, sizeof (p64)); 641*fcf3ce44SJohn Forte p64.p_partno = (uint_t)*partitionp; 642*fcf3ce44SJohn Forte if ((ioctl(fd, DKIOCPARTITION, &p64)) > 0) { 643*fcf3ce44SJohn Forte *size = (uint64_t)p64.p_size; 644*fcf3ce44SJohn Forte } else { 645*fcf3ce44SJohn Forte bsize.p64 = (uint64_t)(unsigned long)&p64; 646*fcf3ce44SJohn Forte bsize.efi = 1; 647*fcf3ce44SJohn Forte 648*fcf3ce44SJohn Forte if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 649*fcf3ce44SJohn Forte /* see if this is a zvol */ 650*fcf3ce44SJohn Forte zvol_bsize(path, size, *partitionp); 651*fcf3ce44SJohn Forte } else { 652*fcf3ce44SJohn Forte *size = (uint64_t)p64.p_size; 653*fcf3ce44SJohn Forte } 654*fcf3ce44SJohn Forte } 655*fcf3ce44SJohn Forte #endif /* DKIOCPARTITION */ 656*fcf3ce44SJohn Forte } 657*fcf3ce44SJohn Forte 658*fcf3ce44SJohn Forte close(fd); 659*fcf3ce44SJohn Forte return; 660*fcf3ce44SJohn Forte } 661*fcf3ce44SJohn Forte 662*fcf3ce44SJohn Forte close(fd); 663*fcf3ce44SJohn Forte 664*fcf3ce44SJohn Forte *partitionp = (int)dki_info.dki_partition; 665*fcf3ce44SJohn Forte 666*fcf3ce44SJohn Forte if (vtoc.v_sanity != VTOC_SANE) 667*fcf3ce44SJohn Forte return; 668*fcf3ce44SJohn Forte 669*fcf3ce44SJohn Forte if (vtoc.v_version != V_VERSION && vtoc.v_version != 0) 670*fcf3ce44SJohn Forte return; 671*fcf3ce44SJohn Forte 672*fcf3ce44SJohn Forte if (dki_info.dki_partition > V_NUMPAR) 673*fcf3ce44SJohn Forte return; 674*fcf3ce44SJohn Forte 675*fcf3ce44SJohn Forte *size = (uint64_t)vtoc.v_part[(int)dki_info.dki_partition].p_size; 676*fcf3ce44SJohn Forte } 677*fcf3ce44SJohn Forte 678*fcf3ce44SJohn Forte 679*fcf3ce44SJohn Forte static int 680*fcf3ce44SJohn Forte iscluster(void) 681*fcf3ce44SJohn Forte { 682*fcf3ce44SJohn Forte /* 683*fcf3ce44SJohn Forte * Find out if we are running in a cluster 684*fcf3ce44SJohn Forte */ 685*fcf3ce44SJohn Forte cl_nodeid = cfg_iscluster(); 686*fcf3ce44SJohn Forte if (cl_nodeid > 0) { 687*fcf3ce44SJohn Forte return (TRUE); 688*fcf3ce44SJohn Forte } else if (cl_nodeid == 0) { 689*fcf3ce44SJohn Forte return (FALSE); 690*fcf3ce44SJohn Forte } 691*fcf3ce44SJohn Forte 692*fcf3ce44SJohn Forte fprintf(stderr, "%s\n", 693*fcf3ce44SJohn Forte gettext("nskernd: unable to ascertain environment")); 694*fcf3ce44SJohn Forte exit(1); 695*fcf3ce44SJohn Forte /* NOTREACHED */ 696*fcf3ce44SJohn Forte } 697*fcf3ce44SJohn Forte 698*fcf3ce44SJohn Forte /* 699*fcf3ce44SJohn Forte * Runtime Solaris release checking - build release == runtime release 700*fcf3ce44SJohn Forte * is always considered success, so only keep entries in the map for 701*fcf3ce44SJohn Forte * the special cases. 702*fcf3ce44SJohn Forte */ 703*fcf3ce44SJohn Forte static nsc_release_t nskernd_rel_map[] = { 704*fcf3ce44SJohn Forte /* { "5.10", "5.10" }, */ 705*fcf3ce44SJohn Forte { "5.11", "5.10" }, 706*fcf3ce44SJohn Forte { NULL, NULL } 707*fcf3ce44SJohn Forte }; 708*fcf3ce44SJohn Forte 709*fcf3ce44SJohn Forte 710*fcf3ce44SJohn Forte #ifdef lint 711*fcf3ce44SJohn Forte #define main nskernd_main 712*fcf3ce44SJohn Forte #endif 713*fcf3ce44SJohn Forte /* ARGSUSED1 */ 714*fcf3ce44SJohn Forte int 715*fcf3ce44SJohn Forte main(int argc, char *argv[]) 716*fcf3ce44SJohn Forte { 717*fcf3ce44SJohn Forte const char *dir = "/"; 718*fcf3ce44SJohn Forte struct nskernd data; 719*fcf3ce44SJohn Forte struct rlimit rl; 720*fcf3ce44SJohn Forte int i, run, rc; 721*fcf3ce44SJohn Forte int partition; 722*fcf3ce44SJohn Forte char *reqd; 723*fcf3ce44SJohn Forte int syncpipe[2]; 724*fcf3ce44SJohn Forte int startup; 725*fcf3ce44SJohn Forte 726*fcf3ce44SJohn Forte (void) setlocale(LC_ALL, ""); 727*fcf3ce44SJohn Forte (void) textdomain("nskernd"); 728*fcf3ce44SJohn Forte 729*fcf3ce44SJohn Forte rc = nsc_check_release(BUILD_REV_STR, nskernd_rel_map, &reqd); 730*fcf3ce44SJohn Forte if (rc < 0) { 731*fcf3ce44SJohn Forte fprintf(stderr, 732*fcf3ce44SJohn Forte gettext("nskernd: unable to determine the current " 733*fcf3ce44SJohn Forte "Solaris release: %s\n"), strerror(errno)); 734*fcf3ce44SJohn Forte exit(1); 735*fcf3ce44SJohn Forte } else if (rc == FALSE) { 736*fcf3ce44SJohn Forte fprintf(stderr, 737*fcf3ce44SJohn Forte gettext("nskernd: incorrect Solaris release " 738*fcf3ce44SJohn Forte "(requires %s)\n"), reqd); 739*fcf3ce44SJohn Forte exit(1); 740*fcf3ce44SJohn Forte } 741*fcf3ce44SJohn Forte 742*fcf3ce44SJohn Forte rc = 0; 743*fcf3ce44SJohn Forte 744*fcf3ce44SJohn Forte if (argc != 1) 745*fcf3ce44SJohn Forte usage(); 746*fcf3ce44SJohn Forte 747*fcf3ce44SJohn Forte /* 748*fcf3ce44SJohn Forte * Usage: <progname> [-g] [-d <seconds to delay>] 749*fcf3ce44SJohn Forte */ 750*fcf3ce44SJohn Forte while ((i = getopt(argc, argv, "gd:")) != EOF) { 751*fcf3ce44SJohn Forte switch (i) { 752*fcf3ce44SJohn Forte case 'g': 753*fcf3ce44SJohn Forte display_msg = 1; 754*fcf3ce44SJohn Forte break; 755*fcf3ce44SJohn Forte case 'd': 756*fcf3ce44SJohn Forte delay_time = atoi(optarg); 757*fcf3ce44SJohn Forte if (delay_time <= 0) { 758*fcf3ce44SJohn Forte delay_time = 30; 759*fcf3ce44SJohn Forte } 760*fcf3ce44SJohn Forte break; 761*fcf3ce44SJohn Forte default: 762*fcf3ce44SJohn Forte syslog(LOG_ERR, 763*fcf3ce44SJohn Forte "Usage: nskernd [-g] [-d <seconds to delay>]"); 764*fcf3ce44SJohn Forte exit(1); 765*fcf3ce44SJohn Forte break; 766*fcf3ce44SJohn Forte } 767*fcf3ce44SJohn Forte } 768*fcf3ce44SJohn Forte 769*fcf3ce44SJohn Forte if (chroot(dir) < 0) { 770*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: chroot failed: %s\n"), 771*fcf3ce44SJohn Forte strerror(errno)); 772*fcf3ce44SJohn Forte exit(1); 773*fcf3ce44SJohn Forte } 774*fcf3ce44SJohn Forte 775*fcf3ce44SJohn Forte if (chdir(dir) < 0) { 776*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: chdir failed: %s\n"), 777*fcf3ce44SJohn Forte strerror(errno)); 778*fcf3ce44SJohn Forte exit(1); 779*fcf3ce44SJohn Forte } 780*fcf3ce44SJohn Forte 781*fcf3ce44SJohn Forte /* 782*fcf3ce44SJohn Forte * Determine if we are in a Sun Cluster or not, before fork'ing 783*fcf3ce44SJohn Forte */ 784*fcf3ce44SJohn Forte (void) iscluster(); 785*fcf3ce44SJohn Forte 786*fcf3ce44SJohn Forte /* 787*fcf3ce44SJohn Forte * create a pipe to synchronise the parent with the 788*fcf3ce44SJohn Forte * child just before it enters its service loop. 789*fcf3ce44SJohn Forte */ 790*fcf3ce44SJohn Forte if (pipe(syncpipe) < 0) { 791*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: cannot create pipe: %s\n"), 792*fcf3ce44SJohn Forte strerror(errno)); 793*fcf3ce44SJohn Forte exit(1); 794*fcf3ce44SJohn Forte } 795*fcf3ce44SJohn Forte /* 796*fcf3ce44SJohn Forte * Fork off a child that becomes the daemon. 797*fcf3ce44SJohn Forte */ 798*fcf3ce44SJohn Forte 799*fcf3ce44SJohn Forte if ((rc = fork()) > 0) { 800*fcf3ce44SJohn Forte char c; 801*fcf3ce44SJohn Forte int n; 802*fcf3ce44SJohn Forte (void) close(syncpipe[1]); 803*fcf3ce44SJohn Forte /* 804*fcf3ce44SJohn Forte * wait for the close of the pipe. 805*fcf3ce44SJohn Forte * If we get a char back, indicates good 806*fcf3ce44SJohn Forte * status from child, so exit 0. 807*fcf3ce44SJohn Forte * If we get a zero length read, then the 808*fcf3ce44SJohn Forte * child has failed, so we do too. 809*fcf3ce44SJohn Forte */ 810*fcf3ce44SJohn Forte n = read(syncpipe[0], &c, 1); 811*fcf3ce44SJohn Forte exit((n <= 0) ? 1 : 0); 812*fcf3ce44SJohn Forte } else if (rc < 0) { 813*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: cannot fork: %s\n"), 814*fcf3ce44SJohn Forte strerror(errno)); 815*fcf3ce44SJohn Forte exit(1); 816*fcf3ce44SJohn Forte } 817*fcf3ce44SJohn Forte 818*fcf3ce44SJohn Forte /* 819*fcf3ce44SJohn Forte * In child - become daemon. 820*fcf3ce44SJohn Forte */ 821*fcf3ce44SJohn Forte 822*fcf3ce44SJohn Forte /* use closefrom(3C) from PSARC/2000/193 when possible */ 823*fcf3ce44SJohn Forte for (i = 0; i < syncpipe[1]; i++) { 824*fcf3ce44SJohn Forte (void) close(i); 825*fcf3ce44SJohn Forte } 826*fcf3ce44SJohn Forte closefrom(syncpipe[1] + 1); 827*fcf3ce44SJohn Forte 828*fcf3ce44SJohn Forte (void) open("/dev/console", O_WRONLY|O_APPEND); 829*fcf3ce44SJohn Forte (void) dup(0); 830*fcf3ce44SJohn Forte (void) dup(0); 831*fcf3ce44SJohn Forte (void) close(0); 832*fcf3ce44SJohn Forte 833*fcf3ce44SJohn Forte setpgrp(); 834*fcf3ce44SJohn Forte 835*fcf3ce44SJohn Forte /* 836*fcf3ce44SJohn Forte * Ignore all signals apart from SIGTERM. 837*fcf3ce44SJohn Forte */ 838*fcf3ce44SJohn Forte 839*fcf3ce44SJohn Forte for (i = 1; i < _sys_nsig; i++) 840*fcf3ce44SJohn Forte (void) sigset(i, SIG_IGN); 841*fcf3ce44SJohn Forte 842*fcf3ce44SJohn Forte (void) sigset(SIGTERM, sighand); 843*fcf3ce44SJohn Forte 844*fcf3ce44SJohn Forte /* 845*fcf3ce44SJohn Forte * Increase the number of fd's that can be open. 846*fcf3ce44SJohn Forte */ 847*fcf3ce44SJohn Forte 848*fcf3ce44SJohn Forte rl.rlim_cur = RLIM_INFINITY; 849*fcf3ce44SJohn Forte rl.rlim_max = RLIM_INFINITY; 850*fcf3ce44SJohn Forte if (setrlimit(RLIMIT_NOFILE, &rl) < 0) { 851*fcf3ce44SJohn Forte fprintf(stderr, 852*fcf3ce44SJohn Forte gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"), 853*fcf3ce44SJohn Forte strerror(errno)); 854*fcf3ce44SJohn Forte fprintf(stderr, 855*fcf3ce44SJohn Forte gettext("nskernd: the maximum number of nsctl open " 856*fcf3ce44SJohn Forte "devices may be reduced\n")); 857*fcf3ce44SJohn Forte } 858*fcf3ce44SJohn Forte 859*fcf3ce44SJohn Forte /* 860*fcf3ce44SJohn Forte * Open /dev/nsctl and startup. 861*fcf3ce44SJohn Forte */ 862*fcf3ce44SJohn Forte 863*fcf3ce44SJohn Forte nsctl_fd = open(rdev, O_RDONLY); 864*fcf3ce44SJohn Forte if (nsctl_fd < 0) { 865*fcf3ce44SJohn Forte fprintf(stderr, gettext("nskernd: unable to open %s\n"), rdev); 866*fcf3ce44SJohn Forte exit(1); 867*fcf3ce44SJohn Forte } 868*fcf3ce44SJohn Forte 869*fcf3ce44SJohn Forte bzero(&data, sizeof (data)); 870*fcf3ce44SJohn Forte 871*fcf3ce44SJohn Forte data.command = NSKERND_START; 872*fcf3ce44SJohn Forte data.data1 = (uint64_t)cl_nodeid; 873*fcf3ce44SJohn Forte run = 1; 874*fcf3ce44SJohn Forte 875*fcf3ce44SJohn Forte startup = 1; 876*fcf3ce44SJohn Forte while (run) { 877*fcf3ce44SJohn Forte rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 878*fcf3ce44SJohn Forte if (rc < 0) { 879*fcf3ce44SJohn Forte /* try and do kernel cleanup and exit */ 880*fcf3ce44SJohn Forte if (shutdown()) { 881*fcf3ce44SJohn Forte run = 0; 882*fcf3ce44SJohn Forte } else { 883*fcf3ce44SJohn Forte sigterm = 0; 884*fcf3ce44SJohn Forte } 885*fcf3ce44SJohn Forte 886*fcf3ce44SJohn Forte fprintf(stderr, 887*fcf3ce44SJohn Forte gettext("nskernd: NSCIOC_NSKERND failed: %s\n"), 888*fcf3ce44SJohn Forte strerror(errno)); 889*fcf3ce44SJohn Forte continue; 890*fcf3ce44SJohn Forte } else if (sigterm) { 891*fcf3ce44SJohn Forte /* SIGTERM received - terminate */ 892*fcf3ce44SJohn Forte if (data.command != NSKERND_START && 893*fcf3ce44SJohn Forte (data.command != NSKERND_STOP || 894*fcf3ce44SJohn Forte data.data1 != (uint64_t)1)) { 895*fcf3ce44SJohn Forte /* need to do kernel cleanup */ 896*fcf3ce44SJohn Forte if (shutdown()) { 897*fcf3ce44SJohn Forte run = 0; 898*fcf3ce44SJohn Forte } else { 899*fcf3ce44SJohn Forte sigterm = 0; 900*fcf3ce44SJohn Forte data.command = NSKERND_START; 901*fcf3ce44SJohn Forte data.data1 = (uint64_t)cl_nodeid; 902*fcf3ce44SJohn Forte } 903*fcf3ce44SJohn Forte } else { 904*fcf3ce44SJohn Forte /* just quit */ 905*fcf3ce44SJohn Forte if (canshutdown()) { 906*fcf3ce44SJohn Forte run = 0; 907*fcf3ce44SJohn Forte } else { 908*fcf3ce44SJohn Forte /* cannot shutdown - threads active */ 909*fcf3ce44SJohn Forte sigterm = 0; 910*fcf3ce44SJohn Forte data.command = NSKERND_START; 911*fcf3ce44SJohn Forte data.data1 = (uint64_t)cl_nodeid; 912*fcf3ce44SJohn Forte } 913*fcf3ce44SJohn Forte } 914*fcf3ce44SJohn Forte continue; 915*fcf3ce44SJohn Forte } 916*fcf3ce44SJohn Forte if (startup) { 917*fcf3ce44SJohn Forte char c = 0; 918*fcf3ce44SJohn Forte (void) write(syncpipe[1], &c, 1); 919*fcf3ce44SJohn Forte (void) close(syncpipe[1]); 920*fcf3ce44SJohn Forte startup = 0; 921*fcf3ce44SJohn Forte } 922*fcf3ce44SJohn Forte switch (data.command) { 923*fcf3ce44SJohn Forte case NSKERND_START: /* (re)start completion */ 924*fcf3ce44SJohn Forte if (rc == 1) { 925*fcf3ce44SJohn Forte fprintf(stderr, 926*fcf3ce44SJohn Forte gettext("nskernd: already started\n")); 927*fcf3ce44SJohn Forte run = 0; 928*fcf3ce44SJohn Forte } else if (rc == 2) { 929*fcf3ce44SJohn Forte fprintf(stderr, 930*fcf3ce44SJohn Forte gettext("nskernd: stopped by kernel\n")); 931*fcf3ce44SJohn Forte run = 0; 932*fcf3ce44SJohn Forte } 933*fcf3ce44SJohn Forte data.command = NSKERND_WAIT; 934*fcf3ce44SJohn Forte break; 935*fcf3ce44SJohn Forte 936*fcf3ce44SJohn Forte case NSKERND_STOP: /* kernel telling daemon to stop */ 937*fcf3ce44SJohn Forte if (data.data1 != (uint64_t)1) { 938*fcf3ce44SJohn Forte (void) shutdown(); 939*fcf3ce44SJohn Forte run = 0; 940*fcf3ce44SJohn Forte } 941*fcf3ce44SJohn Forte break; 942*fcf3ce44SJohn Forte 943*fcf3ce44SJohn Forte case NSKERND_BSIZE: 944*fcf3ce44SJohn Forte /* 945*fcf3ce44SJohn Forte * kernel requesting partsize 946*fcf3ce44SJohn Forte * data1 - size return 947*fcf3ce44SJohn Forte * data2 - raw_fd (entry) 948*fcf3ce44SJohn Forte * - partition number (return) 949*fcf3ce44SJohn Forte */ 950*fcf3ce44SJohn Forte partition = -1; 951*fcf3ce44SJohn Forte get_bsize(data.data2, &data.data1, 952*fcf3ce44SJohn Forte &partition, data.char1); 953*fcf3ce44SJohn Forte data.data2 = (uint64_t)partition; 954*fcf3ce44SJohn Forte data.command = NSKERND_WAIT; 955*fcf3ce44SJohn Forte break; 956*fcf3ce44SJohn Forte 957*fcf3ce44SJohn Forte case NSKERND_NEWLWP: /* kernel requesting a new LWP */ 958*fcf3ce44SJohn Forte newlwp(&data); 959*fcf3ce44SJohn Forte data.command = NSKERND_WAIT; 960*fcf3ce44SJohn Forte break; 961*fcf3ce44SJohn Forte 962*fcf3ce44SJohn Forte case NSKERND_LOCK: /* kernel requesting lock */ 963*fcf3ce44SJohn Forte dolock(&data); 964*fcf3ce44SJohn Forte data.command = NSKERND_WAIT; 965*fcf3ce44SJohn Forte break; 966*fcf3ce44SJohn Forte 967*fcf3ce44SJohn Forte case NSKERND_WAIT: /* kernel retrying wait */ 968*fcf3ce44SJohn Forte /* 969*fcf3ce44SJohn Forte * the kernel thread can be woken by the dr config 970*fcf3ce44SJohn Forte * utilities (ie cfgadm) therefore we just reissue 971*fcf3ce44SJohn Forte * the wait. 972*fcf3ce44SJohn Forte */ 973*fcf3ce44SJohn Forte break; 974*fcf3ce44SJohn Forte 975*fcf3ce44SJohn Forte case NSKERND_IIBITMAP: 976*fcf3ce44SJohn Forte rc = log_iibmp_err(data.char1, (int)data.data1); 977*fcf3ce44SJohn Forte data.data1 = (uint64_t)rc; 978*fcf3ce44SJohn Forte data.command = NSKERND_WAIT; 979*fcf3ce44SJohn Forte break; 980*fcf3ce44SJohn Forte 981*fcf3ce44SJohn Forte default: 982*fcf3ce44SJohn Forte fprintf(stderr, 983*fcf3ce44SJohn Forte gettext("nskernd: unknown command %d"), 984*fcf3ce44SJohn Forte data.command); 985*fcf3ce44SJohn Forte data.command = NSKERND_WAIT; 986*fcf3ce44SJohn Forte break; 987*fcf3ce44SJohn Forte } 988*fcf3ce44SJohn Forte } 989*fcf3ce44SJohn Forte 990*fcf3ce44SJohn Forte (void) close(nsctl_fd); 991*fcf3ce44SJohn Forte 992*fcf3ce44SJohn Forte return (rc); 993*fcf3ce44SJohn Forte } 994