1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/resource.h> 28 #include <sys/priocntl.h> 29 #include <sys/rtpriocntl.h> 30 #include <sys/tspriocntl.h> 31 #include <sys/wait.h> 32 #include <sys/stat.h> 33 34 #include <strings.h> 35 #include <thread.h> 36 #include <stdlib.h> 37 #include <signal.h> 38 #include <errno.h> 39 #include <stdio.h> 40 #include <fcntl.h> 41 #include <locale.h> 42 #include <unistd.h> 43 #include <syslog.h> 44 45 #include <sys/nsctl/cfg.h> 46 #include <sys/nsctl/nsctl.h> 47 #include <sys/nsctl/nsc_ioctl.h> 48 #include <sys/nskernd.h> 49 #include <nsctl.h> 50 51 #include <sys/mkdev.h> 52 #include <sys/nsctl/sv_efi.h> 53 54 static const char *rdev = "/dev/nsctl"; 55 56 /* 57 * Define a minimal user stack size in bytes over and above the 58 * libthread THR_STACK_MIN minimum value. 59 * 60 * This stack size needs to be sufficient to run _newlwp() and then 61 * ioctl() down into the kernel. 62 */ 63 #define NSK_STACK_SIZE 512 64 65 /* 66 * LWP scheduling control switches. 67 * 68 * allow_pri - set to non-zero to enable priocntl() manipulations of 69 * created LWPs. 70 * allow_rt - set to non-zero to use the RT rather than the TS 71 * scheduling class when manipulating the schduling 72 * parameters for an LWP. Only used if allow_pri is 73 * non-zero. 74 */ 75 static int allow_pri = 1; 76 static int allow_rt = 0; /* disallow - bad interactions with timeout() */ 77 78 static int nsctl_fd = -1; 79 static int sigterm; 80 81 static int nthreads; /* number of threads in the kernel */ 82 static int exiting; /* shutdown in progress flag */ 83 static mutex_t thr_mutex = DEFAULTMUTEX; 84 static mutex_t cfg_mutex = DEFAULTMUTEX; 85 86 static int cl_nodeid = -1; 87 88 static int display_msg = 0; 89 static int delay_time = 30; 90 91 static void 92 usage(void) 93 { 94 fprintf(stderr, gettext("usage: nskernd\n")); 95 exit(255); 96 } 97 98 99 static void 100 sighand(int sig) 101 { 102 if (sig == SIGTERM) { 103 sigterm++; 104 } 105 } 106 107 108 /* 109 * Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel 110 */ 111 int 112 nthread_inc(void) 113 { 114 mutex_lock(&thr_mutex); 115 if (exiting) { 116 /* cannot enter kernel as nskernd is being shutdown - exit */ 117 mutex_unlock(&thr_mutex); 118 return (0); 119 } 120 nthreads++; 121 mutex_unlock(&thr_mutex); 122 return (1); 123 } 124 125 126 void 127 nthread_dec(void) 128 { 129 mutex_lock(&thr_mutex); 130 nthreads--; 131 mutex_unlock(&thr_mutex); 132 } 133 134 135 /* 136 * returns: 1 - can shutdown; 0 - unable to shutdown 137 */ 138 int 139 canshutdown(void) 140 { 141 int rc = 1; 142 time_t start_delay; 143 144 mutex_lock(&thr_mutex); 145 if (nthreads > 0) { 146 if (display_msg) { 147 fprintf(stderr, 148 gettext("nskernd: unable to shutdown: " 149 "%d kernel threads in use\n"), nthreads); 150 } 151 start_delay = time(0); 152 while (nthreads > 0 && (time(0) - start_delay) < delay_time) { 153 mutex_unlock(&thr_mutex); 154 sleep(1); 155 mutex_lock(&thr_mutex); 156 fprintf(stderr, 157 gettext("nskernd: delay shutdown: " 158 "%d kernel threads in use\n"), nthreads); 159 } 160 if (nthreads > 0) { 161 rc = 0; 162 } else { 163 exiting = 1; 164 } 165 } else { 166 /* flag shutdown in progress */ 167 exiting = 1; 168 } 169 mutex_unlock(&thr_mutex); 170 171 return (rc); 172 } 173 174 175 /* 176 * returns: 1 - shutdown successful; 0 - unable to shutdown 177 */ 178 int 179 shutdown(void) 180 { 181 struct nskernd data; 182 int rc; 183 184 if (nsctl_fd < 0) 185 return (1); 186 187 bzero(&data, sizeof (data)); 188 data.command = NSKERND_STOP; 189 190 if (!canshutdown()) { 191 return (0); 192 } 193 194 rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 195 if (rc < 0) { 196 if (errno != EINTR || !sigterm) { 197 fprintf(stderr, 198 gettext("nskernd: NSKERND_STOP failed\n")); 199 } 200 } 201 202 return (1); 203 } 204 205 206 /* 207 * First function run by a NSKERND_NEWLWP thread. 208 * 209 * Determines if it needs to change the scheduling priority of the LWP, 210 * and then calls back into the kernel. 211 */ 212 static void * 213 _newlwp(void *arg) 214 { 215 struct nskernd nsk; 216 pcparms_t pcparms; 217 pcinfo_t pcinfo; 218 219 /* copy arguments onto stack and free heap memory */ 220 bcopy(arg, &nsk, sizeof (nsk)); 221 free(arg); 222 223 if (nsk.data2 && allow_pri) { 224 /* increase the scheduling priority of this LWP */ 225 226 bzero(&pcinfo, sizeof (pcinfo)); 227 strcpy(pcinfo.pc_clname, allow_rt ? "RT" : "TS"); 228 229 if (priocntl(0, 0, PC_GETCID, (char *)&pcinfo) < 0) { 230 fprintf(stderr, 231 gettext( 232 "nskernd: priocntl(PC_GETCID) failed: %s\n"), 233 strerror(errno)); 234 goto pri_done; 235 } 236 237 bzero(&pcparms, sizeof (pcparms)); 238 pcparms.pc_cid = pcinfo.pc_cid; 239 240 if (allow_rt) { 241 ((rtparms_t *)pcparms.pc_clparms)->rt_pri = 242 (pri_t)0; /* minimum RT priority */ 243 ((rtparms_t *)pcparms.pc_clparms)->rt_tqsecs = 244 (uint_t)RT_TQDEF; 245 ((rtparms_t *)pcparms.pc_clparms)->rt_tqnsecs = 246 RT_TQDEF; 247 } else { 248 ((tsparms_t *)pcparms.pc_clparms)->ts_uprilim = 249 ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 250 ((tsparms_t *)pcparms.pc_clparms)->ts_upri = 251 ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 252 } 253 254 if (priocntl(P_LWPID, P_MYID, 255 PC_SETPARMS, (char *)&pcparms) < 0) { 256 fprintf(stderr, 257 gettext( 258 "nskernd: priocntl(PC_SETPARMS) failed: %s\n"), 259 strerror(errno)); 260 } 261 } 262 263 pri_done: 264 if (nthread_inc()) { 265 (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 266 nthread_dec(); 267 } 268 return (NULL); 269 } 270 271 272 /* 273 * Start a new thread bound to an LWP. 274 * 275 * This is the user level side of nsc_create_process(). 276 */ 277 static void 278 newlwp(struct nskernd *req) 279 { 280 struct nskernd *nskp; 281 thread_t tid; 282 int rc; 283 284 nskp = malloc(sizeof (*nskp)); 285 if (!nskp) { 286 #ifdef DEBUG 287 fprintf(stderr, gettext("nskernd: malloc(%d) failed\n"), 288 sizeof (*nskp)); 289 #endif 290 req->data1 = (uint64_t)ENOMEM; 291 return; 292 } 293 294 /* copy args for child */ 295 bcopy(req, nskp, sizeof (*nskp)); 296 297 rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 298 _newlwp, nskp, THR_BOUND|THR_DETACHED, &tid); 299 300 if (rc != 0) { 301 /* thr_create failed */ 302 #ifdef DEBUG 303 fprintf(stderr, gettext("nskernd: thr_create failed: %s\n"), 304 strerror(errno)); 305 #endif 306 req->data1 = (uint64_t)errno; 307 free(nskp); 308 } else { 309 /* success - _newlwp() will free nskp */ 310 req->data1 = (uint64_t)0; 311 } 312 } 313 314 static int 315 log_iibmp_err(char *set, int flags) 316 { 317 CFGFILE *cfg; 318 char key[CFG_MAX_KEY]; 319 char buf[CFG_MAX_BUF]; 320 char newflags[CFG_MAX_BUF]; 321 char outbuf[CFG_MAX_BUF]; 322 char *mst, *shd, *bmp, *mode, *ovr, *cnode, *opt, *grp; 323 int setno, found = 0; 324 int setlen; 325 int rc = 0; 326 pid_t pid = -1; 327 328 if (set && *set) { 329 setlen = strlen(set); 330 } else { 331 return (EINVAL); 332 } 333 334 mutex_lock(&cfg_mutex); 335 cfg = cfg_open(""); 336 if (!cfg) { 337 mutex_unlock(&cfg_mutex); 338 return (ENXIO); 339 } 340 341 if (!cfg_lock(cfg, CFG_WRLOCK)) { 342 343 mutex_unlock(&cfg_mutex); 344 cfg_close(cfg); 345 346 pid = fork(); 347 348 if (pid == -1) { 349 fprintf(stderr, gettext( 350 "nskernd: Error forking\n")); 351 return (errno); 352 } else if (pid > 0) { 353 fprintf(stdout, gettext( 354 "nskernd: Attempting deferred bitmap error\n")); 355 return (0); 356 } 357 358 mutex_lock(&cfg_mutex); 359 cfg = cfg_open(""); 360 if (!cfg) { 361 mutex_unlock(&cfg_mutex); 362 fprintf(stderr, gettext( 363 "nskernd: Failed cfg_open, deferred bitmap\n")); 364 return (ENXIO); 365 } 366 367 /* Sooner or later, this lock will be free */ 368 while (!cfg_lock(cfg, CFG_WRLOCK)) 369 sleep(2); 370 } 371 372 /* find the proper set number */ 373 for (setno = 1; !found; setno++) { 374 snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 375 if (cfg_get_cstring(cfg, key, buf, CFG_MAX_BUF) < 0) { 376 break; 377 } 378 379 mst = strtok(buf, " "); 380 shd = strtok(NULL, " "); 381 if (strncmp(shd, set, setlen) == 0) { 382 found = 1; 383 384 bmp = strtok(NULL, " "); 385 mode = strtok(NULL, " "); 386 ovr = strtok(NULL, " "); 387 cnode = strtok(NULL, " "); 388 opt = strtok(NULL, " "); 389 grp = strtok(NULL, " "); 390 break; 391 } 392 } 393 394 if (found) { 395 /* were there flags in the options field already? */ 396 snprintf(newflags, CFG_MAX_BUF, "%s=0x%x", 397 NSKERN_II_BMP_OPTION, flags); 398 if (opt && strcmp(opt, "-") != 0) { 399 bzero(newflags, CFG_MAX_BUF); 400 opt = strtok(opt, ";"); 401 while (opt) { 402 if (strncmp(opt, NSKERN_II_BMP_OPTION, 403 strlen(NSKERN_II_BMP_OPTION)) != 0) { 404 strcat(newflags, ";"); 405 strcat(newflags, opt); 406 } 407 } 408 } 409 snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 410 snprintf(outbuf, CFG_MAX_BUF, "%s %s %s %s %s %s %s %s", 411 mst, shd, bmp, mode, ovr, cnode, newflags, grp); 412 if (cfg_put_cstring(cfg, key, outbuf, CFG_MAX_BUF) < 0) { 413 printf("Failed to put [%s]\n", outbuf); 414 rc = ENXIO; 415 } else { 416 cfg_commit(cfg); 417 rc = 0; 418 } 419 } else { 420 fprintf(stderr, gettext( 421 "nskernd: Failed deferred bitmap [%s]\n"), set); 422 rc = EINVAL; 423 } 424 cfg_unlock(cfg); 425 cfg_close(cfg); 426 mutex_unlock(&cfg_mutex); 427 428 /* 429 * if we are the fork'ed client, just exit, if parent just return 430 */ 431 if (pid == 0) { 432 exit(rc); 433 /*NOTREACHED*/ 434 } else { 435 return (rc); 436 } 437 } 438 439 /* 440 * First function run by a NSKERND_LOCK thread. 441 * 442 * Opens dscfg and locks it, 443 * and then calls back into the kernel. 444 * 445 * Incoming: 446 * data1 is the kernel address of the sync structure. 447 * data2 is read(0)/write(1) lock mode. 448 * 449 * Returns: 450 * data1 as incoming. 451 * data2 errno. 452 */ 453 static void * 454 _dolock(void *arg) 455 { 456 struct nskernd nsk; 457 CFGFILE *cfg; 458 int locked; 459 int mode; 460 int rc = 0; 461 462 /* copy arguments onto stack and free heap memory */ 463 bcopy(arg, &nsk, sizeof (nsk)); 464 free(arg); 465 466 mutex_lock(&cfg_mutex); 467 cfg = cfg_open(""); 468 if (cfg == NULL) { 469 #ifdef DEBUG 470 fprintf(stderr, gettext("nskernd: cfg_open failed: %s\n"), 471 strerror(errno)); 472 #endif 473 rc = ENXIO; 474 } 475 476 if (nsk.data2 == 0) { 477 mode = CFG_RDLOCK; 478 } else { 479 mode = CFG_WRLOCK; 480 } 481 482 locked = 0; 483 if (rc == 0) { 484 if (cfg_lock(cfg, mode)) { 485 locked = 1; 486 } else { 487 #ifdef DEBUG 488 fprintf(stderr, 489 gettext("nskernd: cfg_lock failed: %s\n"), 490 strerror(errno)); 491 #endif 492 rc = EINVAL; 493 } 494 } 495 496 /* return to kernel */ 497 498 nsk.data2 = (uint64_t)rc; 499 if (nthread_inc()) { 500 (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 501 nthread_dec(); 502 } 503 504 /* cleanup */ 505 506 if (locked) { 507 cfg_unlock(cfg); 508 locked = 0; 509 } 510 511 if (cfg != NULL) { 512 cfg_close(cfg); 513 cfg = NULL; 514 } 515 mutex_unlock(&cfg_mutex); 516 517 return (NULL); 518 } 519 520 521 /* 522 * Inter-node lock thread. 523 * 524 * This is the user level side of nsc_rmlock(). 525 */ 526 static void 527 dolock(struct nskernd *req) 528 { 529 struct nskernd *nskp; 530 thread_t tid; 531 int rc; 532 533 /* create a new thread to do the lock and return to kernel */ 534 535 nskp = malloc(sizeof (*nskp)); 536 if (!nskp) { 537 #ifdef DEBUG 538 fprintf(stderr, gettext("nskernd:dolock: malloc(%d) failed\n"), 539 sizeof (*nskp)); 540 #endif 541 req->data1 = (uint64_t)ENOMEM; 542 return; 543 } 544 545 /* copy args for child */ 546 bcopy(req, nskp, sizeof (*nskp)); 547 548 rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 549 _dolock, nskp, THR_BOUND|THR_DETACHED, &tid); 550 551 if (rc != 0) { 552 /* thr_create failed */ 553 #ifdef DEBUG 554 fprintf(stderr, gettext("nskernd: thr_create failed: %s\n"), 555 strerror(errno)); 556 #endif 557 req->data1 = (uint64_t)errno; 558 free(nskp); 559 } else { 560 /* success - _dolock() will free nskp */ 561 req->data1 = (uint64_t)0; 562 } 563 } 564 565 566 /* 567 * Convenience code for engineering test of multi-terabyte volumes. 568 * 569 * zvol (part of zfs) does not support DKIOCPARTITION but does use EFI 570 * labels. This code allocates a simple efi label structure and ioctls 571 * to extract the size of a zvol. It only handles the minimal EFI ioctl 572 * implementation in zvol. 573 */ 574 575 static void 576 zvol_bsize(char *path, uint64_t *size, const int pnum) 577 { 578 struct stat64 stb1, stb2; 579 struct dk_minfo dkm; 580 int fd = -1; 581 int rc; 582 583 if (cl_nodeid || pnum != 0) 584 return; 585 586 if ((fd = open(path, O_RDONLY)) < 0) { 587 return; 588 } 589 590 if (stat64("/devices/pseudo/zfs@0:zfs", &stb1) != 0 || 591 fstat64(fd, &stb2) != 0 || 592 !S_ISCHR(stb1.st_mode) || 593 !S_ISCHR(stb2.st_mode) || 594 major(stb1.st_rdev) != major(stb2.st_rdev)) { 595 (void) close(fd); 596 return; 597 } 598 599 rc = ioctl(fd, DKIOCGMEDIAINFO, (void *)&dkm); 600 if (rc >= 0) { 601 *size = LE_64(dkm.dki_capacity) * 602 (dkm.dki_lbsize) / 512; 603 } 604 605 (void) close(fd); 606 } 607 608 /* ARGSUSED */ 609 static void 610 get_bsize(uint64_t raw_fd, uint64_t *size, int *partitionp, char *path) 611 { 612 struct nscioc_bsize bsize; 613 #ifdef DKIOCPARTITION 614 struct partition64 p64; 615 #endif 616 struct dk_cinfo dki_info; 617 struct vtoc vtoc; 618 int fd; 619 620 *partitionp = -1; 621 *size = (uint64_t)0; 622 623 dki_info.dki_partition = (ushort_t)-1; 624 bsize.dki_info = (uint64_t)(unsigned long)&dki_info; 625 bsize.vtoc = (uint64_t)(unsigned long)&vtoc; 626 bsize.raw_fd = raw_fd; 627 bsize.efi = 0; 628 629 fd = open(rdev, O_RDONLY); 630 if (fd < 0) 631 return; 632 633 if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 634 if (dki_info.dki_partition != (ushort_t)-1) { 635 /* assume part# is ok and just the size failed */ 636 *partitionp = (int)dki_info.dki_partition; 637 638 #ifdef DKIOCPARTITION 639 /* see if this is an EFI label */ 640 bzero(&p64, sizeof (p64)); 641 p64.p_partno = (uint_t)*partitionp; 642 if ((ioctl(fd, DKIOCPARTITION, &p64)) > 0) { 643 *size = (uint64_t)p64.p_size; 644 } else { 645 bsize.p64 = (uint64_t)(unsigned long)&p64; 646 bsize.efi = 1; 647 648 if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 649 /* see if this is a zvol */ 650 zvol_bsize(path, size, *partitionp); 651 } else { 652 *size = (uint64_t)p64.p_size; 653 } 654 } 655 #endif /* DKIOCPARTITION */ 656 } 657 658 close(fd); 659 return; 660 } 661 662 close(fd); 663 664 *partitionp = (int)dki_info.dki_partition; 665 666 if (vtoc.v_sanity != VTOC_SANE) 667 return; 668 669 if (vtoc.v_version != V_VERSION && vtoc.v_version != 0) 670 return; 671 672 if (dki_info.dki_partition > V_NUMPAR) 673 return; 674 675 *size = (uint64_t)vtoc.v_part[(int)dki_info.dki_partition].p_size; 676 } 677 678 679 static int 680 iscluster(void) 681 { 682 /* 683 * Find out if we are running in a cluster 684 */ 685 cl_nodeid = cfg_iscluster(); 686 if (cl_nodeid > 0) { 687 return (TRUE); 688 } else if (cl_nodeid == 0) { 689 return (FALSE); 690 } 691 692 fprintf(stderr, "%s\n", 693 gettext("nskernd: unable to ascertain environment")); 694 exit(1); 695 /* NOTREACHED */ 696 } 697 698 /* 699 * Runtime Solaris release checking - build release == runtime release 700 * is always considered success, so only keep entries in the map for 701 * the special cases. 702 */ 703 static nsc_release_t nskernd_rel_map[] = { 704 /* { "5.10", "5.10" }, */ 705 { "5.11", "5.10" }, 706 { NULL, NULL } 707 }; 708 709 710 #ifdef lint 711 #define main nskernd_main 712 #endif 713 /* ARGSUSED1 */ 714 int 715 main(int argc, char *argv[]) 716 { 717 const char *dir = "/"; 718 struct nskernd data; 719 struct rlimit rl; 720 int i, run, rc; 721 int partition; 722 char *reqd; 723 int syncpipe[2]; 724 int startup; 725 726 (void) setlocale(LC_ALL, ""); 727 (void) textdomain("nskernd"); 728 729 rc = nsc_check_release(BUILD_REV_STR, nskernd_rel_map, &reqd); 730 if (rc < 0) { 731 fprintf(stderr, 732 gettext("nskernd: unable to determine the current " 733 "Solaris release: %s\n"), strerror(errno)); 734 exit(1); 735 } else if (rc == FALSE) { 736 fprintf(stderr, 737 gettext("nskernd: incorrect Solaris release " 738 "(requires %s)\n"), reqd); 739 exit(1); 740 } 741 742 rc = 0; 743 744 if (argc != 1) 745 usage(); 746 747 /* 748 * Usage: <progname> [-g] [-d <seconds to delay>] 749 */ 750 while ((i = getopt(argc, argv, "gd:")) != EOF) { 751 switch (i) { 752 case 'g': 753 display_msg = 1; 754 break; 755 case 'd': 756 delay_time = atoi(optarg); 757 if (delay_time <= 0) { 758 delay_time = 30; 759 } 760 break; 761 default: 762 syslog(LOG_ERR, 763 "Usage: nskernd [-g] [-d <seconds to delay>]"); 764 exit(1); 765 break; 766 } 767 } 768 769 if (chroot(dir) < 0) { 770 fprintf(stderr, gettext("nskernd: chroot failed: %s\n"), 771 strerror(errno)); 772 exit(1); 773 } 774 775 if (chdir(dir) < 0) { 776 fprintf(stderr, gettext("nskernd: chdir failed: %s\n"), 777 strerror(errno)); 778 exit(1); 779 } 780 781 /* 782 * Determine if we are in a Sun Cluster or not, before fork'ing 783 */ 784 (void) iscluster(); 785 786 /* 787 * create a pipe to synchronise the parent with the 788 * child just before it enters its service loop. 789 */ 790 if (pipe(syncpipe) < 0) { 791 fprintf(stderr, gettext("nskernd: cannot create pipe: %s\n"), 792 strerror(errno)); 793 exit(1); 794 } 795 /* 796 * Fork off a child that becomes the daemon. 797 */ 798 799 if ((rc = fork()) > 0) { 800 char c; 801 int n; 802 (void) close(syncpipe[1]); 803 /* 804 * wait for the close of the pipe. 805 * If we get a char back, indicates good 806 * status from child, so exit 0. 807 * If we get a zero length read, then the 808 * child has failed, so we do too. 809 */ 810 n = read(syncpipe[0], &c, 1); 811 exit((n <= 0) ? 1 : 0); 812 } else if (rc < 0) { 813 fprintf(stderr, gettext("nskernd: cannot fork: %s\n"), 814 strerror(errno)); 815 exit(1); 816 } 817 818 /* 819 * In child - become daemon. 820 */ 821 822 /* use closefrom(3C) from PSARC/2000/193 when possible */ 823 for (i = 0; i < syncpipe[1]; i++) { 824 (void) close(i); 825 } 826 closefrom(syncpipe[1] + 1); 827 828 (void) open("/dev/console", O_WRONLY|O_APPEND); 829 (void) dup(0); 830 (void) dup(0); 831 (void) close(0); 832 833 setpgrp(); 834 835 /* 836 * Ignore all signals apart from SIGTERM. 837 */ 838 839 for (i = 1; i < _sys_nsig; i++) 840 (void) sigset(i, SIG_IGN); 841 842 (void) sigset(SIGTERM, sighand); 843 844 /* 845 * Increase the number of fd's that can be open. 846 */ 847 848 rl.rlim_cur = RLIM_INFINITY; 849 rl.rlim_max = RLIM_INFINITY; 850 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) { 851 fprintf(stderr, 852 gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"), 853 strerror(errno)); 854 fprintf(stderr, 855 gettext("nskernd: the maximum number of nsctl open " 856 "devices may be reduced\n")); 857 } 858 859 /* 860 * Open /dev/nsctl and startup. 861 */ 862 863 nsctl_fd = open(rdev, O_RDONLY); 864 if (nsctl_fd < 0) { 865 fprintf(stderr, gettext("nskernd: unable to open %s\n"), rdev); 866 exit(1); 867 } 868 869 bzero(&data, sizeof (data)); 870 871 data.command = NSKERND_START; 872 data.data1 = (uint64_t)cl_nodeid; 873 run = 1; 874 875 startup = 1; 876 while (run) { 877 rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 878 if (rc < 0) { 879 /* try and do kernel cleanup and exit */ 880 if (shutdown()) { 881 run = 0; 882 } else { 883 sigterm = 0; 884 } 885 886 fprintf(stderr, 887 gettext("nskernd: NSCIOC_NSKERND failed: %s\n"), 888 strerror(errno)); 889 continue; 890 } else if (sigterm) { 891 /* SIGTERM received - terminate */ 892 if (data.command != NSKERND_START && 893 (data.command != NSKERND_STOP || 894 data.data1 != (uint64_t)1)) { 895 /* need to do kernel cleanup */ 896 if (shutdown()) { 897 run = 0; 898 } else { 899 sigterm = 0; 900 data.command = NSKERND_START; 901 data.data1 = (uint64_t)cl_nodeid; 902 } 903 } else { 904 /* just quit */ 905 if (canshutdown()) { 906 run = 0; 907 } else { 908 /* cannot shutdown - threads active */ 909 sigterm = 0; 910 data.command = NSKERND_START; 911 data.data1 = (uint64_t)cl_nodeid; 912 } 913 } 914 continue; 915 } 916 if (startup) { 917 char c = 0; 918 (void) write(syncpipe[1], &c, 1); 919 (void) close(syncpipe[1]); 920 startup = 0; 921 } 922 switch (data.command) { 923 case NSKERND_START: /* (re)start completion */ 924 if (rc == 1) { 925 fprintf(stderr, 926 gettext("nskernd: already started\n")); 927 run = 0; 928 } else if (rc == 2) { 929 fprintf(stderr, 930 gettext("nskernd: stopped by kernel\n")); 931 run = 0; 932 } 933 data.command = NSKERND_WAIT; 934 break; 935 936 case NSKERND_STOP: /* kernel telling daemon to stop */ 937 if (data.data1 != (uint64_t)1) { 938 (void) shutdown(); 939 run = 0; 940 } 941 break; 942 943 case NSKERND_BSIZE: 944 /* 945 * kernel requesting partsize 946 * data1 - size return 947 * data2 - raw_fd (entry) 948 * - partition number (return) 949 */ 950 partition = -1; 951 get_bsize(data.data2, &data.data1, 952 &partition, data.char1); 953 data.data2 = (uint64_t)partition; 954 data.command = NSKERND_WAIT; 955 break; 956 957 case NSKERND_NEWLWP: /* kernel requesting a new LWP */ 958 newlwp(&data); 959 data.command = NSKERND_WAIT; 960 break; 961 962 case NSKERND_LOCK: /* kernel requesting lock */ 963 dolock(&data); 964 data.command = NSKERND_WAIT; 965 break; 966 967 case NSKERND_WAIT: /* kernel retrying wait */ 968 /* 969 * the kernel thread can be woken by the dr config 970 * utilities (ie cfgadm) therefore we just reissue 971 * the wait. 972 */ 973 break; 974 975 case NSKERND_IIBITMAP: 976 rc = log_iibmp_err(data.char1, (int)data.data1); 977 data.data1 = (uint64_t)rc; 978 data.command = NSKERND_WAIT; 979 break; 980 981 default: 982 fprintf(stderr, 983 gettext("nskernd: unknown command %d"), 984 data.command); 985 data.command = NSKERND_WAIT; 986 break; 987 } 988 } 989 990 (void) close(nsctl_fd); 991 992 return (rc); 993 } 994