1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/resource.h> 29 #include <sys/priocntl.h> 30 #include <sys/rtpriocntl.h> 31 #include <sys/tspriocntl.h> 32 #include <sys/wait.h> 33 #include <sys/stat.h> 34 35 #include <strings.h> 36 #include <thread.h> 37 #include <stdlib.h> 38 #include <signal.h> 39 #include <errno.h> 40 #include <stdio.h> 41 #include <fcntl.h> 42 #include <locale.h> 43 #include <unistd.h> 44 #include <syslog.h> 45 46 #include <sys/nsctl/cfg.h> 47 #include <sys/nsctl/nsctl.h> 48 #include <sys/nsctl/nsc_ioctl.h> 49 #include <sys/nskernd.h> 50 #include <nsctl.h> 51 52 #include <sys/mkdev.h> 53 #include <sys/nsctl/sv_efi.h> 54 55 static const char *rdev = "/dev/nsctl"; 56 57 /* 58 * Define a minimal user stack size in bytes over and above the 59 * libthread THR_STACK_MIN minimum value. 60 * 61 * This stack size needs to be sufficient to run _newlwp() and then 62 * ioctl() down into the kernel. 63 */ 64 #define NSK_STACK_SIZE 512 65 66 /* 67 * LWP scheduling control switches. 68 * 69 * allow_pri - set to non-zero to enable priocntl() manipulations of 70 * created LWPs. 71 * allow_rt - set to non-zero to use the RT rather than the TS 72 * scheduling class when manipulating the schduling 73 * parameters for an LWP. Only used if allow_pri is 74 * non-zero. 75 */ 76 static int allow_pri = 1; 77 static int allow_rt = 0; /* disallow - bad interactions with timeout() */ 78 79 static int nsctl_fd = -1; 80 static int sigterm; 81 82 static int nthreads; /* number of threads in the kernel */ 83 static int exiting; /* shutdown in progress flag */ 84 static mutex_t thr_mutex = DEFAULTMUTEX; 85 static mutex_t cfg_mutex = DEFAULTMUTEX; 86 87 static int cl_nodeid = -1; 88 89 static int display_msg = 0; 90 static int delay_time = 30; 91 92 static void 93 usage(void) 94 { 95 (void) fprintf(stderr, gettext("usage: nskernd\n")); 96 exit(255); 97 } 98 99 100 static void 101 sighand(int sig) 102 { 103 if (sig == SIGTERM) { 104 sigterm++; 105 } 106 } 107 108 109 /* 110 * Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel 111 */ 112 int 113 nthread_inc(void) 114 { 115 (void) mutex_lock(&thr_mutex); 116 if (exiting) { 117 /* cannot enter kernel as nskernd is being shutdown - exit */ 118 (void) mutex_unlock(&thr_mutex); 119 return (0); 120 } 121 nthreads++; 122 (void) mutex_unlock(&thr_mutex); 123 return (1); 124 } 125 126 127 void 128 nthread_dec(void) 129 { 130 (void) mutex_lock(&thr_mutex); 131 nthreads--; 132 (void) mutex_unlock(&thr_mutex); 133 } 134 135 136 /* 137 * returns: 1 - can shutdown; 0 - unable to shutdown 138 */ 139 int 140 canshutdown(void) 141 { 142 int rc = 1; 143 time_t start_delay; 144 145 (void) mutex_lock(&thr_mutex); 146 if (nthreads > 0) { 147 if (display_msg) { 148 (void) fprintf(stderr, 149 gettext("nskernd: unable to shutdown: " 150 "%d kernel threads in use\n"), nthreads); 151 } 152 start_delay = time(0); 153 while (nthreads > 0 && (time(0) - start_delay) < delay_time) { 154 (void) mutex_unlock(&thr_mutex); 155 (void) sleep(1); 156 (void) mutex_lock(&thr_mutex); 157 (void) fprintf(stderr, 158 gettext("nskernd: delay shutdown: " 159 "%d kernel threads in use\n"), nthreads); 160 } 161 if (nthreads > 0) { 162 rc = 0; 163 } else { 164 exiting = 1; 165 } 166 } else { 167 /* flag shutdown in progress */ 168 exiting = 1; 169 } 170 (void) mutex_unlock(&thr_mutex); 171 172 return (rc); 173 } 174 175 176 /* 177 * returns: 1 - shutdown successful; 0 - unable to shutdown 178 */ 179 int 180 shutdown(void) 181 { 182 struct nskernd data; 183 int rc; 184 185 if (nsctl_fd < 0) 186 return (1); 187 188 bzero(&data, sizeof (data)); 189 data.command = NSKERND_STOP; 190 191 if (!canshutdown()) { 192 return (0); 193 } 194 195 rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 196 if (rc < 0) { 197 if (errno != EINTR || !sigterm) { 198 (void) fprintf(stderr, 199 gettext("nskernd: NSKERND_STOP failed\n")); 200 } 201 } 202 203 return (1); 204 } 205 206 207 /* 208 * First function run by a NSKERND_NEWLWP thread. 209 * 210 * Determines if it needs to change the scheduling priority of the LWP, 211 * and then calls back into the kernel. 212 */ 213 static void * 214 _newlwp(void *arg) 215 { 216 struct nskernd nsk; 217 pcparms_t pcparms; 218 pcinfo_t pcinfo; 219 220 /* copy arguments onto stack and free heap memory */ 221 bcopy(arg, &nsk, sizeof (nsk)); 222 free(arg); 223 224 if (nsk.data2 && allow_pri) { 225 /* increase the scheduling priority of this LWP */ 226 227 bzero(&pcinfo, sizeof (pcinfo)); 228 (void) strcpy(pcinfo.pc_clname, allow_rt ? "RT" : "TS"); 229 230 if (priocntl(0, 0, PC_GETCID, (char *)&pcinfo) < 0) { 231 (void) fprintf(stderr, 232 gettext( 233 "nskernd: priocntl(PC_GETCID) failed: %s\n"), 234 strerror(errno)); 235 goto pri_done; 236 } 237 238 bzero(&pcparms, sizeof (pcparms)); 239 pcparms.pc_cid = pcinfo.pc_cid; 240 241 if (allow_rt) { 242 ((rtparms_t *)pcparms.pc_clparms)->rt_pri = 243 (pri_t)0; /* minimum RT priority */ 244 ((rtparms_t *)pcparms.pc_clparms)->rt_tqsecs = 245 (uint_t)RT_TQDEF; 246 ((rtparms_t *)pcparms.pc_clparms)->rt_tqnsecs = 247 RT_TQDEF; 248 } else { 249 ((tsparms_t *)pcparms.pc_clparms)->ts_uprilim = 250 ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 251 ((tsparms_t *)pcparms.pc_clparms)->ts_upri = 252 ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 253 } 254 255 if (priocntl(P_LWPID, P_MYID, 256 PC_SETPARMS, (char *)&pcparms) < 0) { 257 (void) fprintf(stderr, 258 gettext( 259 "nskernd: priocntl(PC_SETPARMS) failed: %s\n"), 260 strerror(errno)); 261 } 262 } 263 264 pri_done: 265 if (nthread_inc()) { 266 (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 267 nthread_dec(); 268 } 269 return (NULL); 270 } 271 272 273 /* 274 * Start a new thread bound to an LWP. 275 * 276 * This is the user level side of nsc_create_process(). 277 */ 278 static void 279 newlwp(struct nskernd *req) 280 { 281 struct nskernd *nskp; 282 thread_t tid; 283 int rc; 284 285 nskp = malloc(sizeof (*nskp)); 286 if (!nskp) { 287 #ifdef DEBUG 288 (void) fprintf(stderr, gettext("nskernd: malloc(%d) failed\n"), 289 sizeof (*nskp)); 290 #endif 291 req->data1 = (uint64_t)ENOMEM; 292 return; 293 } 294 295 /* copy args for child */ 296 bcopy(req, nskp, sizeof (*nskp)); 297 298 rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 299 _newlwp, nskp, THR_BOUND|THR_DETACHED, &tid); 300 301 if (rc != 0) { 302 /* thr_create failed */ 303 #ifdef DEBUG 304 (void) fprintf(stderr, 305 gettext("nskernd: thr_create failed: %s\n"), 306 strerror(errno)); 307 #endif 308 req->data1 = (uint64_t)errno; 309 free(nskp); 310 } else { 311 /* success - _newlwp() will free nskp */ 312 req->data1 = (uint64_t)0; 313 } 314 } 315 316 static int 317 log_iibmp_err(char *set, int flags) 318 { 319 CFGFILE *cfg; 320 char key[CFG_MAX_KEY]; 321 char buf[CFG_MAX_BUF]; 322 char newflags[CFG_MAX_BUF]; 323 char outbuf[CFG_MAX_BUF]; 324 char *mst, *shd, *bmp, *mode, *ovr, *cnode, *opt, *grp; 325 int setno, found = 0; 326 int setlen; 327 int rc = 0; 328 pid_t pid = -1; 329 330 if (set && *set) { 331 setlen = strlen(set); 332 } else { 333 return (EINVAL); 334 } 335 336 (void) mutex_lock(&cfg_mutex); 337 cfg = cfg_open(""); 338 if (!cfg) { 339 (void) mutex_unlock(&cfg_mutex); 340 return (ENXIO); 341 } 342 343 if (!cfg_lock(cfg, CFG_WRLOCK)) { 344 345 (void) mutex_unlock(&cfg_mutex); 346 cfg_close(cfg); 347 348 pid = fork(); 349 350 if (pid == -1) { 351 (void) fprintf(stderr, gettext( 352 "nskernd: Error forking\n")); 353 return (errno); 354 } else if (pid > 0) { 355 (void) fprintf(stdout, gettext( 356 "nskernd: Attempting deferred bitmap error\n")); 357 return (0); 358 } 359 360 (void) mutex_lock(&cfg_mutex); 361 cfg = cfg_open(""); 362 if (!cfg) { 363 (void) mutex_unlock(&cfg_mutex); 364 (void) fprintf(stderr, gettext( 365 "nskernd: Failed cfg_open, deferred bitmap\n")); 366 return (ENXIO); 367 } 368 369 /* Sooner or later, this lock will be free */ 370 while (!cfg_lock(cfg, CFG_WRLOCK)) 371 (void) sleep(2); 372 } 373 374 /* find the proper set number */ 375 for (setno = 1; !found; setno++) { 376 (void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 377 if (cfg_get_cstring(cfg, key, buf, CFG_MAX_BUF) < 0) { 378 break; 379 } 380 381 mst = strtok(buf, " "); 382 shd = strtok(NULL, " "); 383 if (strncmp(shd, set, setlen) == 0) { 384 found = 1; 385 386 bmp = strtok(NULL, " "); 387 mode = strtok(NULL, " "); 388 ovr = strtok(NULL, " "); 389 cnode = strtok(NULL, " "); 390 opt = strtok(NULL, " "); 391 grp = strtok(NULL, " "); 392 break; 393 } 394 } 395 396 if (found) { 397 /* were there flags in the options field already? */ 398 (void) snprintf(newflags, CFG_MAX_BUF, "%s=0x%x", 399 NSKERN_II_BMP_OPTION, flags); 400 if (opt && strcmp(opt, "-") != 0) { 401 bzero(newflags, CFG_MAX_BUF); 402 opt = strtok(opt, ";"); 403 while (opt) { 404 if (strncmp(opt, NSKERN_II_BMP_OPTION, 405 strlen(NSKERN_II_BMP_OPTION)) != 0) { 406 (void) strcat(newflags, ";"); 407 (void) strcat(newflags, opt); 408 } 409 } 410 } 411 (void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 412 (void) snprintf(outbuf, CFG_MAX_BUF, "%s %s %s %s %s %s %s %s", 413 mst, shd, bmp, mode, ovr, cnode, newflags, grp); 414 if (cfg_put_cstring(cfg, key, outbuf, CFG_MAX_BUF) < 0) { 415 (void) printf("Failed to put [%s]\n", outbuf); 416 rc = ENXIO; 417 } else { 418 (void) cfg_commit(cfg); 419 rc = 0; 420 } 421 } else { 422 (void) fprintf(stderr, gettext( 423 "nskernd: Failed deferred bitmap [%s]\n"), set); 424 rc = EINVAL; 425 } 426 cfg_unlock(cfg); 427 cfg_close(cfg); 428 (void) mutex_unlock(&cfg_mutex); 429 430 /* 431 * if we are the fork'ed client, just exit, if parent just return 432 */ 433 if (pid == 0) { 434 exit(rc); 435 /*NOTREACHED*/ 436 } else { 437 return (rc); 438 } 439 } 440 441 /* 442 * First function run by a NSKERND_LOCK thread. 443 * 444 * Opens dscfg and locks it, 445 * and then calls back into the kernel. 446 * 447 * Incoming: 448 * data1 is the kernel address of the sync structure. 449 * data2 is read(0)/write(1) lock mode. 450 * 451 * Returns: 452 * data1 as incoming. 453 * data2 errno. 454 */ 455 static void * 456 _dolock(void *arg) 457 { 458 struct nskernd nsk; 459 CFGFILE *cfg; 460 int locked; 461 int mode; 462 int rc = 0; 463 464 /* copy arguments onto stack and free heap memory */ 465 bcopy(arg, &nsk, sizeof (nsk)); 466 free(arg); 467 468 (void) mutex_lock(&cfg_mutex); 469 cfg = cfg_open(""); 470 if (cfg == NULL) { 471 #ifdef DEBUG 472 (void) fprintf(stderr, 473 gettext("nskernd: cfg_open failed: %s\n"), 474 strerror(errno)); 475 #endif 476 rc = ENXIO; 477 } 478 479 if (nsk.data2 == 0) { 480 mode = CFG_RDLOCK; 481 } else { 482 mode = CFG_WRLOCK; 483 } 484 485 locked = 0; 486 if (rc == 0) { 487 if (cfg_lock(cfg, mode)) { 488 locked = 1; 489 } else { 490 #ifdef DEBUG 491 (void) fprintf(stderr, 492 gettext("nskernd: cfg_lock failed: %s\n"), 493 strerror(errno)); 494 #endif 495 rc = EINVAL; 496 } 497 } 498 499 /* return to kernel */ 500 501 nsk.data2 = (uint64_t)rc; 502 if (nthread_inc()) { 503 (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 504 nthread_dec(); 505 } 506 507 /* cleanup */ 508 509 if (locked) { 510 cfg_unlock(cfg); 511 locked = 0; 512 } 513 514 if (cfg != NULL) { 515 cfg_close(cfg); 516 cfg = NULL; 517 } 518 (void) mutex_unlock(&cfg_mutex); 519 520 return (NULL); 521 } 522 523 524 /* 525 * Inter-node lock thread. 526 * 527 * This is the user level side of nsc_rmlock(). 528 */ 529 static void 530 dolock(struct nskernd *req) 531 { 532 struct nskernd *nskp; 533 thread_t tid; 534 int rc; 535 536 /* create a new thread to do the lock and return to kernel */ 537 538 nskp = malloc(sizeof (*nskp)); 539 if (!nskp) { 540 #ifdef DEBUG 541 (void) fprintf(stderr, 542 gettext("nskernd:dolock: malloc(%d) failed\n"), 543 sizeof (*nskp)); 544 #endif 545 req->data1 = (uint64_t)ENOMEM; 546 return; 547 } 548 549 /* copy args for child */ 550 bcopy(req, nskp, sizeof (*nskp)); 551 552 rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 553 _dolock, nskp, THR_BOUND|THR_DETACHED, &tid); 554 555 if (rc != 0) { 556 /* thr_create failed */ 557 #ifdef DEBUG 558 (void) fprintf(stderr, 559 gettext("nskernd: thr_create failed: %s\n"), 560 strerror(errno)); 561 #endif 562 req->data1 = (uint64_t)errno; 563 free(nskp); 564 } else { 565 /* success - _dolock() will free nskp */ 566 req->data1 = (uint64_t)0; 567 } 568 } 569 570 571 /* 572 * Convenience code for engineering test of multi-terabyte volumes. 573 * 574 * zvol (part of zfs) does not support DKIOCPARTITION but does use EFI 575 * labels. This code allocates a simple efi label structure and ioctls 576 * to extract the size of a zvol. It only handles the minimal EFI ioctl 577 * implementation in zvol. 578 */ 579 580 static void 581 zvol_bsize(char *path, uint64_t *size, const int pnum) 582 { 583 struct stat64 stb1, stb2; 584 struct dk_minfo dkm; 585 int fd = -1; 586 int rc; 587 588 if (cl_nodeid || pnum != 0) 589 return; 590 591 if ((fd = open(path, O_RDONLY)) < 0) { 592 return; 593 } 594 595 if (stat64("/devices/pseudo/zfs@0:zfs", &stb1) != 0 || 596 fstat64(fd, &stb2) != 0 || 597 !S_ISCHR(stb1.st_mode) || 598 !S_ISCHR(stb2.st_mode) || 599 major(stb1.st_rdev) != major(stb2.st_rdev)) { 600 (void) close(fd); 601 return; 602 } 603 604 rc = ioctl(fd, DKIOCGMEDIAINFO, (void *)&dkm); 605 if (rc >= 0) { 606 *size = LE_64(dkm.dki_capacity) * 607 (dkm.dki_lbsize) / 512; 608 } 609 610 (void) close(fd); 611 } 612 613 /* ARGSUSED */ 614 static void 615 get_bsize(uint64_t raw_fd, uint64_t *size, int *partitionp, char *path) 616 { 617 struct nscioc_bsize bsize; 618 #ifdef DKIOCPARTITION 619 struct partition64 p64; 620 #endif 621 struct dk_cinfo dki_info; 622 struct vtoc vtoc; 623 int fd; 624 625 *partitionp = -1; 626 *size = (uint64_t)0; 627 628 dki_info.dki_partition = (ushort_t)-1; 629 bsize.dki_info = (uint64_t)(unsigned long)&dki_info; 630 bsize.vtoc = (uint64_t)(unsigned long)&vtoc; 631 bsize.raw_fd = raw_fd; 632 bsize.efi = 0; 633 634 fd = open(rdev, O_RDONLY); 635 if (fd < 0) 636 return; 637 638 if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 639 if (dki_info.dki_partition != (ushort_t)-1) { 640 /* assume part# is ok and just the size failed */ 641 *partitionp = (int)dki_info.dki_partition; 642 643 #ifdef DKIOCPARTITION 644 /* see if this is an EFI label */ 645 bzero(&p64, sizeof (p64)); 646 p64.p_partno = (uint_t)*partitionp; 647 if ((ioctl(fd, DKIOCPARTITION, &p64)) > 0) { 648 *size = (uint64_t)p64.p_size; 649 } else { 650 bsize.p64 = (uint64_t)(unsigned long)&p64; 651 bsize.efi = 1; 652 653 if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 654 /* see if this is a zvol */ 655 zvol_bsize(path, size, *partitionp); 656 } else { 657 *size = (uint64_t)p64.p_size; 658 } 659 } 660 #endif /* DKIOCPARTITION */ 661 } 662 663 (void) close(fd); 664 return; 665 } 666 667 (void) close(fd); 668 669 *partitionp = (int)dki_info.dki_partition; 670 671 if (vtoc.v_sanity != VTOC_SANE) 672 return; 673 674 if (vtoc.v_version != V_VERSION && vtoc.v_version != 0) 675 return; 676 677 if (dki_info.dki_partition > V_NUMPAR) 678 return; 679 680 *size = (uint64_t)vtoc.v_part[(int)dki_info.dki_partition].p_size; 681 } 682 683 684 static int 685 iscluster(void) 686 { 687 /* 688 * Find out if we are running in a cluster 689 */ 690 cl_nodeid = cfg_iscluster(); 691 if (cl_nodeid > 0) { 692 return (TRUE); 693 } else if (cl_nodeid == 0) { 694 return (FALSE); 695 } 696 697 (void) fprintf(stderr, "%s\n", 698 gettext("nskernd: unable to ascertain environment")); 699 exit(1); 700 /* NOTREACHED */ 701 } 702 703 /* 704 * Runtime Solaris release checking - build release == runtime release 705 * is always considered success, so only keep entries in the map for 706 * the special cases. 707 */ 708 static nsc_release_t nskernd_rel_map[] = { 709 /* { "5.10", "5.10" }, */ 710 { "5.11", "5.10" }, 711 { NULL, NULL } 712 }; 713 714 715 #ifdef lint 716 #define main nskernd_main 717 #endif 718 /* ARGSUSED1 */ 719 int 720 main(int argc, char *argv[]) 721 { 722 const char *dir = "/"; 723 struct nskernd data; 724 struct rlimit rl; 725 int i, run, rc; 726 int partition; 727 char *reqd; 728 int syncpipe[2]; 729 int startup; 730 731 (void) setlocale(LC_ALL, ""); 732 (void) textdomain("nskernd"); 733 734 rc = nsc_check_release(BUILD_REV_STR, nskernd_rel_map, &reqd); 735 if (rc < 0) { 736 (void) fprintf(stderr, 737 gettext("nskernd: unable to determine the current " 738 "Solaris release: %s\n"), strerror(errno)); 739 exit(1); 740 } else if (rc == FALSE) { 741 (void) fprintf(stderr, 742 gettext("nskernd: incorrect Solaris release " 743 "(requires %s)\n"), reqd); 744 exit(1); 745 } 746 747 rc = 0; 748 749 if (argc != 1) 750 usage(); 751 752 /* 753 * Usage: <progname> [-g] [-d <seconds to delay>] 754 */ 755 while ((i = getopt(argc, argv, "gd:")) != EOF) { 756 switch (i) { 757 case 'g': 758 display_msg = 1; 759 break; 760 case 'd': 761 delay_time = atoi(optarg); 762 if (delay_time <= 0) { 763 delay_time = 30; 764 } 765 break; 766 default: 767 syslog(LOG_ERR, 768 "Usage: nskernd [-g] [-d <seconds to delay>]"); 769 exit(1); 770 break; 771 } 772 } 773 774 if (chroot(dir) < 0) { 775 (void) fprintf(stderr, gettext("nskernd: chroot failed: %s\n"), 776 strerror(errno)); 777 exit(1); 778 } 779 780 if (chdir(dir) < 0) { 781 (void) fprintf(stderr, gettext("nskernd: chdir failed: %s\n"), 782 strerror(errno)); 783 exit(1); 784 } 785 786 /* 787 * Determine if we are in a Sun Cluster or not, before fork'ing 788 */ 789 (void) iscluster(); 790 791 /* 792 * create a pipe to synchronise the parent with the 793 * child just before it enters its service loop. 794 */ 795 if (pipe(syncpipe) < 0) { 796 (void) fprintf(stderr, 797 gettext("nskernd: cannot create pipe: %s\n"), 798 strerror(errno)); 799 exit(1); 800 } 801 /* 802 * Fork off a child that becomes the daemon. 803 */ 804 805 if ((rc = fork()) > 0) { 806 char c; 807 int n; 808 (void) close(syncpipe[1]); 809 /* 810 * wait for the close of the pipe. 811 * If we get a char back, indicates good 812 * status from child, so exit 0. 813 * If we get a zero length read, then the 814 * child has failed, so we do too. 815 */ 816 n = read(syncpipe[0], &c, 1); 817 exit((n <= 0) ? 1 : 0); 818 } else if (rc < 0) { 819 (void) fprintf(stderr, gettext("nskernd: cannot fork: %s\n"), 820 strerror(errno)); 821 exit(1); 822 } 823 824 /* 825 * In child - become daemon. 826 */ 827 828 /* use closefrom(3C) from PSARC/2000/193 when possible */ 829 for (i = 0; i < syncpipe[1]; i++) { 830 (void) close(i); 831 } 832 closefrom(syncpipe[1] + 1); 833 834 (void) open("/dev/console", O_WRONLY|O_APPEND); 835 (void) dup(0); 836 (void) dup(0); 837 (void) close(0); 838 839 (void) setpgrp(); 840 841 /* 842 * Ignore all signals apart from SIGTERM. 843 */ 844 845 for (i = 1; i < _sys_nsig; i++) 846 (void) sigset(i, SIG_IGN); 847 848 (void) sigset(SIGTERM, sighand); 849 850 /* 851 * Increase the number of fd's that can be open. 852 */ 853 854 rl.rlim_cur = RLIM_INFINITY; 855 rl.rlim_max = RLIM_INFINITY; 856 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) { 857 (void) fprintf(stderr, 858 gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"), 859 strerror(errno)); 860 (void) fprintf(stderr, 861 gettext("nskernd: the maximum number of nsctl open " 862 "devices may be reduced\n")); 863 } 864 865 /* 866 * Open /dev/nsctl and startup. 867 */ 868 869 nsctl_fd = open(rdev, O_RDONLY); 870 if (nsctl_fd < 0) { 871 (void) fprintf(stderr, gettext("nskernd: unable to open %s\n"), 872 rdev); 873 exit(1); 874 } 875 876 bzero(&data, sizeof (data)); 877 878 data.command = NSKERND_START; 879 data.data1 = (uint64_t)cl_nodeid; 880 run = 1; 881 882 startup = 1; 883 while (run) { 884 rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 885 if (rc < 0) { 886 /* try and do kernel cleanup and exit */ 887 if (shutdown()) { 888 run = 0; 889 } else { 890 sigterm = 0; 891 } 892 893 (void) fprintf(stderr, 894 gettext("nskernd: NSCIOC_NSKERND failed: %s\n"), 895 strerror(errno)); 896 continue; 897 } else if (sigterm) { 898 /* SIGTERM received - terminate */ 899 if (data.command != NSKERND_START && 900 (data.command != NSKERND_STOP || 901 data.data1 != (uint64_t)1)) { 902 /* need to do kernel cleanup */ 903 if (shutdown()) { 904 run = 0; 905 } else { 906 sigterm = 0; 907 data.command = NSKERND_START; 908 data.data1 = (uint64_t)cl_nodeid; 909 } 910 } else { 911 /* just quit */ 912 if (canshutdown()) { 913 run = 0; 914 } else { 915 /* cannot shutdown - threads active */ 916 sigterm = 0; 917 data.command = NSKERND_START; 918 data.data1 = (uint64_t)cl_nodeid; 919 } 920 } 921 continue; 922 } 923 if (startup) { 924 char c = 0; 925 (void) write(syncpipe[1], &c, 1); 926 (void) close(syncpipe[1]); 927 startup = 0; 928 } 929 switch (data.command) { 930 case NSKERND_START: /* (re)start completion */ 931 if (rc == 1) { 932 (void) fprintf(stderr, 933 gettext("nskernd: already started\n")); 934 run = 0; 935 } else if (rc == 2) { 936 (void) fprintf(stderr, 937 gettext("nskernd: stopped by kernel\n")); 938 run = 0; 939 } 940 data.command = NSKERND_WAIT; 941 break; 942 943 case NSKERND_STOP: /* kernel telling daemon to stop */ 944 if (data.data1 != (uint64_t)1) { 945 (void) shutdown(); 946 run = 0; 947 } 948 break; 949 950 case NSKERND_BSIZE: 951 /* 952 * kernel requesting partsize 953 * data1 - size return 954 * data2 - raw_fd (entry) 955 * - partition number (return) 956 */ 957 partition = -1; 958 get_bsize(data.data2, &data.data1, 959 &partition, data.char1); 960 data.data2 = (uint64_t)partition; 961 data.command = NSKERND_WAIT; 962 break; 963 964 case NSKERND_NEWLWP: /* kernel requesting a new LWP */ 965 newlwp(&data); 966 data.command = NSKERND_WAIT; 967 break; 968 969 case NSKERND_LOCK: /* kernel requesting lock */ 970 dolock(&data); 971 data.command = NSKERND_WAIT; 972 break; 973 974 case NSKERND_WAIT: /* kernel retrying wait */ 975 /* 976 * the kernel thread can be woken by the dr config 977 * utilities (ie cfgadm) therefore we just reissue 978 * the wait. 979 */ 980 break; 981 982 case NSKERND_IIBITMAP: 983 rc = log_iibmp_err(data.char1, (int)data.data1); 984 data.data1 = (uint64_t)rc; 985 data.command = NSKERND_WAIT; 986 break; 987 988 default: 989 (void) fprintf(stderr, 990 gettext("nskernd: unknown command %d"), 991 data.command); 992 data.command = NSKERND_WAIT; 993 break; 994 } 995 } 996 997 (void) close(nsctl_fd); 998 999 return (rc); 1000 } 1001