1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * University Copyright- Copyright (c) 1982, 1986, 1988 30 * The Regents of the University of California 31 * All Rights Reserved 32 * 33 * University Acknowledgment- Portions of this document are derived from 34 * software developed by the University of California, Berkeley, and its 35 * contributors. 36 */ 37 38 /* LINTLIBRARY */ 39 /* PROTOLIB1 */ 40 41 /* NFS server */ 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/stat.h> 46 #include <syslog.h> 47 #include <tiuser.h> 48 #include <rpc/rpc.h> 49 #include <errno.h> 50 #include <thread.h> 51 #include <sys/resource.h> 52 #include <sys/time.h> 53 #include <sys/file.h> 54 #include <nfs/nfs.h> 55 #include <nfs/nfs_acl.h> 56 #include <nfs/nfssys.h> 57 #include <stdio.h> 58 #include <stdio_ext.h> 59 #include <stdlib.h> 60 #include <signal.h> 61 #include <netconfig.h> 62 #include <netdir.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <limits.h> 66 #include <stropts.h> 67 #include <sys/tihdr.h> 68 #include <sys/wait.h> 69 #include <poll.h> 70 #include <priv_utils.h> 71 #include <sys/tiuser.h> 72 #include <netinet/tcp.h> 73 #include <deflt.h> 74 #include <rpcsvc/daemon_utils.h> 75 #include <rpcsvc/nfs4_prot.h> 76 #include <libnvpair.h> 77 #include <libscf.h> 78 #include <libshare.h> 79 #include "nfs_tbind.h" 80 #include "thrpool.h" 81 #include "smfcfg.h" 82 83 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */ 84 #define QUIESCE_VERSMIN 4 85 /* DSS: distributed stable storage */ 86 #define DSS_VERSMIN 4 87 88 static int nfssvc(int, struct netbuf, struct netconfig *); 89 static int nfssvcpool(int maxservers); 90 static int dss_init(uint_t npaths, char **pathnames); 91 static void dss_mkleafdirs(uint_t npaths, char **pathnames); 92 static void dss_mkleafdir(char *dir, char *leaf, char *path); 93 static void usage(void); 94 int qstrcmp(const void *s1, const void *s2); 95 96 extern int _nfssys(int, void *); 97 98 extern int daemonize_init(void); 99 extern void daemonize_fini(int fd); 100 101 /* signal handlers */ 102 static void sigflush(int); 103 static void quiesce(int); 104 105 static char *MyName; 106 static NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp", 107 "/dev/udp6", NULL }; 108 /* static NETSELDECL(defaultprotos)[] = { NC_UDP, NC_TCP, NULL }; */ 109 /* 110 * The following are all globals used by routines in nfs_tbind.c. 111 */ 112 size_t end_listen_fds; /* used by conn_close_oldest() */ 113 size_t num_fds = 0; /* used by multiple routines */ 114 int listen_backlog = 32; /* used by bind_to_{provider,proto}() */ 115 int num_servers; /* used by cots_listen_event() */ 116 int (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc; 117 /* used by cots_listen_event() */ 118 int max_conns_allowed = -1; /* used by cots_listen_event() */ 119 120 /* 121 * Keep track of min/max versions of NFS protocol to be started. 122 * Start with the defaults (min == 2, max == 3). We have the 123 * capability of starting vers=4 but only if the user requests it. 124 */ 125 int nfs_server_vers_min = NFS_VERSMIN_DEFAULT; 126 int nfs_server_vers_max = NFS_VERSMAX_DEFAULT; 127 128 /* 129 * Set the default for server delegation enablement and set per 130 * /etc/default/nfs configuration (if present). 131 */ 132 int nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT; 133 134 int 135 main(int ac, char *av[]) 136 { 137 char *dir = "/"; 138 int allflag = 0; 139 int df_allflag = 0; 140 int opt_cnt = 0; 141 int maxservers = 1; /* zero allows inifinte number of threads */ 142 int maxservers_set = 0; 143 int logmaxservers = 0; 144 int pid; 145 int i; 146 char *provider = (char *)NULL; 147 char *df_provider = (char *)NULL; 148 struct protob *protobp0, *protobp; 149 NETSELDECL(proto) = NULL; 150 NETSELDECL(df_proto) = NULL; 151 NETSELPDECL(providerp); 152 char *defval; 153 boolean_t can_do_mlp; 154 uint_t dss_npaths = 0; 155 char **dss_pathnames = NULL; 156 sigset_t sgset; 157 char name[PATH_MAX], value[PATH_MAX]; 158 int ret, bufsz; 159 160 int pipe_fd = -1; 161 162 MyName = *av; 163 164 /* 165 * Initializations that require more privileges than we need to run. 166 */ 167 (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID); 168 svcsetprio(); 169 170 can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP); 171 if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 172 DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS, 173 can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) { 174 (void) fprintf(stderr, "%s should be run with" 175 " sufficient privileges\n", av[0]); 176 exit(1); 177 } 178 179 (void) enable_extended_FILE_stdio(-1, -1); 180 181 /* 182 * Read in the values from SMF first before we check 183 * command line options so the options override SMF values. 184 */ 185 bufsz = PATH_MAX; 186 ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE, 187 SCF_TYPE_INTEGER, NFSD, &bufsz); 188 if (ret == SA_OK) { 189 errno = 0; 190 max_conns_allowed = strtol(value, (char **)NULL, 10); 191 if (errno != 0) 192 max_conns_allowed = -1; 193 } 194 195 bufsz = PATH_MAX; 196 ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE, 197 SCF_TYPE_INTEGER, NFSD, &bufsz); 198 if (ret == SA_OK) { 199 errno = 0; 200 listen_backlog = strtol(value, (char **)NULL, 10); 201 if (errno != 0) { 202 listen_backlog = 32; 203 } 204 } 205 206 bufsz = PATH_MAX; 207 ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE, 208 SCF_TYPE_ASTRING, NFSD, &bufsz); 209 if ((ret == SA_OK) && strlen(value) > 0) { 210 df_proto = strdup(value); 211 opt_cnt++; 212 if (strncasecmp("ALL", value, 3) == 0) { 213 free(df_proto); 214 df_proto = NULL; 215 df_allflag = 1; 216 } 217 } 218 219 bufsz = PATH_MAX; 220 ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE, 221 SCF_TYPE_ASTRING, NFSD, &bufsz); 222 if ((ret == SA_OK) && strlen(value) > 0) { 223 df_provider = strdup(value); 224 opt_cnt++; 225 } 226 227 bufsz = PATH_MAX; 228 ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE, 229 SCF_TYPE_INTEGER, NFSD, &bufsz); 230 if (ret == SA_OK) { 231 errno = 0; 232 maxservers = strtol(value, (char **)NULL, 10); 233 if (errno != 0) 234 maxservers = 1; 235 else 236 maxservers_set = 1; 237 } 238 239 bufsz = 4; 240 ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE, 241 SCF_TYPE_INTEGER, NFSD, &bufsz); 242 if (ret == SA_OK) 243 nfs_server_vers_min = strtol(value, (char **)NULL, 10); 244 245 bufsz = 4; 246 ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE, 247 SCF_TYPE_INTEGER, NFSD, &bufsz); 248 if (ret == SA_OK) 249 nfs_server_vers_max = strtol(value, (char **)NULL, 10); 250 251 bufsz = PATH_MAX; 252 ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE, 253 SCF_TYPE_ASTRING, NFSD, &bufsz); 254 if (ret == SA_OK) 255 if (strncasecmp(value, "off", 3) == 0) 256 nfs_server_delegation = FALSE; 257 258 /* 259 * Conflict options error messages. 260 */ 261 if (opt_cnt > 1) { 262 (void) fprintf(stderr, "\nConflicting options, only one of " 263 "the following options can be specified\n" 264 "in SMF:\n" 265 "\tprotocol=ALL\n" 266 "\tprotocol=protocol\n" 267 "\tdevice=devicename\n\n"); 268 usage(); 269 } 270 opt_cnt = 0; 271 272 while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) { 273 switch (i) { 274 case 'a': 275 free(df_proto); 276 df_proto = NULL; 277 free(df_provider); 278 df_provider = NULL; 279 280 allflag = 1; 281 opt_cnt++; 282 break; 283 284 case 'c': 285 max_conns_allowed = atoi(optarg); 286 break; 287 288 case 'p': 289 proto = optarg; 290 df_allflag = 0; 291 opt_cnt++; 292 break; 293 294 /* 295 * DSS: NFSv4 distributed stable storage. 296 * 297 * This is a Contracted Project Private interface, for 298 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313. 299 */ 300 case 's': 301 if (strlen(optarg) < MAXPATHLEN) { 302 /* first "-s" option encountered? */ 303 if (dss_pathnames == NULL) { 304 /* 305 * Allocate maximum possible space 306 * required given cmdline arg count; 307 * "-s <path>" consumes two args. 308 */ 309 size_t sz = (ac / 2) * sizeof (char *); 310 dss_pathnames = (char **)malloc(sz); 311 if (dss_pathnames == NULL) { 312 (void) fprintf(stderr, "%s: " 313 "dss paths malloc failed\n", 314 av[0]); 315 exit(1); 316 } 317 (void) memset(dss_pathnames, 0, sz); 318 } 319 dss_pathnames[dss_npaths] = optarg; 320 dss_npaths++; 321 } else { 322 (void) fprintf(stderr, 323 "%s: -s pathname too long.\n", av[0]); 324 } 325 break; 326 327 case 't': 328 provider = optarg; 329 df_allflag = 0; 330 opt_cnt++; 331 break; 332 333 case 'l': 334 listen_backlog = atoi(optarg); 335 break; 336 337 case '?': 338 usage(); 339 /* NOTREACHED */ 340 } 341 } 342 343 allflag = df_allflag; 344 if (proto == NULL) 345 proto = df_proto; 346 if (provider == NULL) 347 provider = df_provider; 348 349 /* 350 * Conflict options error messages. 351 */ 352 if (opt_cnt > 1) { 353 (void) fprintf(stderr, "\nConflicting options, only one of " 354 "the following options can be specified\n" 355 "on the command line:\n" 356 "\t-a\n" 357 "\t-p protocol\n" 358 "\t-t transport\n\n"); 359 usage(); 360 } 361 362 if (proto != NULL && 363 strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) { 364 if (nfs_server_vers_max == NFS_V4) { 365 if (nfs_server_vers_min == NFS_V4) { 366 fprintf(stderr, 367 "NFS version 4 is not supported " 368 "with the UDP protocol. Exiting\n"); 369 exit(3); 370 } else { 371 fprintf(stderr, 372 "NFS version 4 is not supported " 373 "with the UDP protocol.\n"); 374 } 375 } 376 } 377 378 /* 379 * If there is exactly one more argument, it is the number of 380 * servers. 381 */ 382 if (optind == ac - 1) { 383 maxservers = atoi(av[optind]); 384 maxservers_set = 1; 385 } 386 /* 387 * If there are two or more arguments, then this is a usage error. 388 */ 389 else if (optind < ac - 1) 390 usage(); 391 /* 392 * Check the ranges for min/max version specified 393 */ 394 else if ((nfs_server_vers_min > nfs_server_vers_max) || 395 (nfs_server_vers_min < NFS_VERSMIN) || 396 (nfs_server_vers_max > NFS_VERSMAX)) 397 usage(); 398 /* 399 * There are no additional arguments, and we haven't set maxservers 400 * explicitly via the config file, we use a default number of 401 * servers. We will log this. 402 */ 403 else if (maxservers_set == 0) 404 logmaxservers = 1; 405 406 /* 407 * Basic Sanity checks on options 408 * 409 * max_conns_allowed must be positive, except for the special 410 * value of -1 which is used internally to mean unlimited, -1 isn't 411 * documented but we allow it anyway. 412 * 413 * maxservers must be positive 414 * listen_backlog must be positive or zero 415 */ 416 if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) || 417 (listen_backlog < 0) || (maxservers <= 0)) { 418 usage(); 419 } 420 421 /* 422 * Set current dir to server root 423 */ 424 if (chdir(dir) < 0) { 425 (void) fprintf(stderr, "%s: ", MyName); 426 perror(dir); 427 exit(1); 428 } 429 430 #ifndef DEBUG 431 pipe_fd = daemonize_init(); 432 #endif 433 434 openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON); 435 436 /* 437 * establish our lock on the lock file and write our pid to it. 438 * exit if some other process holds the lock, or if there's any 439 * error in writing/locking the file. 440 */ 441 pid = _enter_daemon_lock(NFSD); 442 switch (pid) { 443 case 0: 444 break; 445 case -1: 446 fprintf(stderr, "error locking for %s: %s", NFSD, 447 strerror(errno)); 448 exit(2); 449 default: 450 /* daemon was already running */ 451 exit(0); 452 } 453 454 /* 455 * If we've been given a list of paths to be used for distributed 456 * stable storage, and provided we're going to run a version 457 * that supports it, setup the DSS paths. 458 */ 459 if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) { 460 if (dss_init(dss_npaths, dss_pathnames) != 0) { 461 fprintf(stderr, "%s", "dss_init failed. Exiting."); 462 exit(1); 463 } 464 } 465 466 /* 467 * Block all signals till we spawn other 468 * threads. 469 */ 470 (void) sigfillset(&sgset); 471 (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL); 472 473 if (logmaxservers) { 474 fprintf(stderr, 475 "Number of servers not specified. Using default of %d.", 476 maxservers); 477 } 478 479 /* 480 * Make sure to unregister any previous versions in case the 481 * user is reconfiguring the server in interesting ways. 482 */ 483 svc_unreg(NFS_PROGRAM, NFS_VERSION); 484 svc_unreg(NFS_PROGRAM, NFS_V3); 485 svc_unreg(NFS_PROGRAM, NFS_V4); 486 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2); 487 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3); 488 489 /* 490 * Set up kernel RPC thread pool for the NFS server. 491 */ 492 if (nfssvcpool(maxservers)) { 493 fprintf(stderr, "Can't set up kernel NFS service: %s. Exiting", 494 strerror(errno)); 495 exit(1); 496 } 497 498 /* 499 * Set up blocked thread to do LWP creation on behalf of the kernel. 500 */ 501 if (svcwait(NFS_SVCPOOL_ID)) { 502 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting", 503 strerror(errno)); 504 exit(1); 505 } 506 507 /* 508 * RDMA start and stop thread. 509 * Per pool RDMA listener creation and 510 * destructor thread. 511 * 512 * start rdma services and block in the kernel. 513 * (only if proto or provider is not set to TCP or UDP) 514 */ 515 if ((proto == NULL) && (provider == NULL)) { 516 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min, 517 nfs_server_vers_max, nfs_server_delegation)) { 518 fprintf(stderr, 519 "Can't set up RDMA creator thread : %s", 520 strerror(errno)); 521 } 522 } 523 524 /* 525 * Now open up for signal delivery 526 */ 527 528 (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL); 529 sigset(SIGTERM, sigflush); 530 sigset(SIGUSR1, quiesce); 531 532 /* 533 * Build a protocol block list for registration. 534 */ 535 protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob)); 536 protobp->serv = "NFS"; 537 protobp->versmin = nfs_server_vers_min; 538 protobp->versmax = nfs_server_vers_max; 539 protobp->program = NFS_PROGRAM; 540 541 protobp->next = (struct protob *)malloc(sizeof (struct protob)); 542 protobp = protobp->next; 543 protobp->serv = "NFS_ACL"; /* not used */ 544 protobp->versmin = nfs_server_vers_min; 545 /* XXX - this needs work to get the version just right */ 546 protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ? 547 NFS_ACL_V3 : nfs_server_vers_max; 548 protobp->program = NFS_ACL_PROGRAM; 549 protobp->next = (struct protob *)NULL; 550 551 if (allflag) { 552 if (do_all(protobp0, nfssvc) == -1) { 553 fprintf(stderr, "setnetconfig failed : %s", 554 strerror(errno)); 555 exit(1); 556 } 557 } else if (proto) { 558 /* there's more than one match for the same protocol */ 559 struct netconfig *nconf; 560 NCONF_HANDLE *nc; 561 bool_t protoFound = FALSE; 562 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) { 563 fprintf(stderr, "setnetconfig failed : %s", 564 strerror(errno)); 565 goto done; 566 } 567 while (nconf = getnetconfig(nc)) { 568 if (strcmp(nconf->nc_proto, proto) == 0) { 569 protoFound = TRUE; 570 do_one(nconf->nc_device, NULL, 571 protobp0, nfssvc); 572 } 573 } 574 (void) endnetconfig(nc); 575 if (protoFound == FALSE) { 576 fprintf(stderr, 577 "couldn't find netconfig entry for protocol %s", 578 proto); 579 } 580 } else if (provider) 581 do_one(provider, proto, protobp0, nfssvc); 582 else { 583 for (providerp = defaultproviders; 584 *providerp != NULL; providerp++) { 585 provider = *providerp; 586 do_one(provider, NULL, protobp0, nfssvc); 587 } 588 } 589 done: 590 591 free(protobp); 592 free(protobp0); 593 594 if (num_fds == 0) { 595 fprintf(stderr, "Could not start NFS service for any protocol." 596 " Exiting"); 597 exit(1); 598 } 599 600 end_listen_fds = num_fds; 601 602 /* 603 * nfsd is up and running as far as we are concerned. 604 */ 605 daemonize_fini(pipe_fd); 606 607 /* 608 * Get rid of unneeded privileges. 609 */ 610 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION, 611 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL); 612 613 /* 614 * Poll for non-data control events on the transport descriptors. 615 */ 616 poll_for_action(); 617 618 /* 619 * If we get here, something failed in poll_for_action(). 620 */ 621 return (1); 622 } 623 624 static int 625 nfssvcpool(int maxservers) 626 { 627 struct svcpool_args npa; 628 629 npa.id = NFS_SVCPOOL_ID; 630 npa.maxthreads = maxservers; 631 npa.redline = 0; 632 npa.qsize = 0; 633 npa.timeout = 0; 634 npa.stksize = 0; 635 npa.max_same_xprt = 0; 636 return (_nfssys(SVCPOOL_CREATE, &npa)); 637 } 638 639 /* 640 * Establish NFS service thread. 641 */ 642 static int 643 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf) 644 { 645 struct nfs_svc_args nsa; 646 647 nsa.fd = fd; 648 nsa.netid = nconf->nc_netid; 649 nsa.addrmask = addrmask; 650 if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) { 651 nsa.versmax = (nfs_server_vers_max > NFS_V3) ? 652 NFS_V3 : nfs_server_vers_max; 653 nsa.versmin = nfs_server_vers_min; 654 /* 655 * If no version left, silently do nothing, previous 656 * checks will have assured at least TCP is available. 657 */ 658 if (nsa.versmin > nsa.versmax) 659 return (0); 660 } else { 661 nsa.versmax = nfs_server_vers_max; 662 nsa.versmin = nfs_server_vers_min; 663 } 664 nsa.delegation = nfs_server_delegation; 665 return (_nfssys(NFS_SVC, &nsa)); 666 } 667 668 static void 669 usage(void) 670 { 671 (void) fprintf(stderr, 672 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName); 673 (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n"); 674 (void) fprintf(stderr, 675 "\twhere -a causes <nservers> to be started on each appropriate transport,\n"); 676 (void) fprintf(stderr, 677 "\tmax_conns is the maximum number of concurrent connections allowed,\n"); 678 (void) fprintf(stderr, "\t\tand max_conns must be a decimal number"); 679 (void) fprintf(stderr, "> zero,\n"); 680 (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n"); 681 (void) fprintf(stderr, 682 "\ttransport is a transport provider name (i.e. device),\n"); 683 (void) fprintf(stderr, 684 "\tlisten_backlog is the TCP listen backlog,\n"); 685 (void) fprintf(stderr, 686 "\tand <nservers> must be a decimal number > zero.\n"); 687 exit(1); 688 } 689 690 /* 691 * Issue nfssys system call to flush all logging buffers asynchronously. 692 * 693 * NOTICE: It is extremely important to flush NFS logging buffers when 694 * nfsd exits. When the system is halted or rebooted nfslogd 695 * may not have an opportunity to flush the buffers. 696 */ 697 static void 698 nfsl_flush() 699 { 700 struct nfsl_flush_args nfa; 701 702 memset((void *)&nfa, 0, sizeof (nfa)); 703 nfa.version = NFSL_FLUSH_ARGS_VERS; 704 nfa.directive = NFSL_ALL; /* flush all asynchronously */ 705 706 if (_nfssys(LOG_FLUSH, &nfa) < 0) 707 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n", 708 strerror(errno)); 709 } 710 711 /* 712 * SIGTERM handler. 713 * Flush logging buffers and exit. 714 */ 715 static void 716 sigflush(int sig) 717 { 718 nfsl_flush(); 719 _exit(0); 720 } 721 722 /* 723 * SIGUSR1 handler. 724 * 725 * Request that server quiesce, then (nfsd) exit. For subsequent warm start. 726 * 727 * This is a Contracted Project Private interface, for the sole use 728 * of Sun Cluster HA-NFS. See PSARC/2004/497. 729 * 730 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN. 731 */ 732 static void 733 quiesce(int sig) 734 { 735 int error; 736 int id = NFS_SVCPOOL_ID; 737 738 if (nfs_server_vers_max >= QUIESCE_VERSMIN) { 739 /* Request server quiesce at next shutdown */ 740 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id); 741 742 /* 743 * ENOENT is returned if there is no matching SVC pool 744 * for the id. Possibly because the pool is not yet setup. 745 * In this case, just exit as if no error. For all other errors, 746 * just return and allow caller to retry. 747 */ 748 if (error && errno != ENOENT) { 749 syslog(LOG_ERR, 750 "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s", 751 strerror(errno)); 752 return; 753 } 754 } 755 756 /* Flush logging buffers */ 757 nfsl_flush(); 758 759 _exit(0); 760 } 761 762 /* 763 * DSS: distributed stable storage. 764 * Create leaf directories as required, keeping an eye on path 765 * lengths. Calls exit(1) on failure. 766 * The pathnames passed in must already exist, and must be writeable by nfsd. 767 * Note: the leaf directories under NFS4_VAR_DIR are not created here; 768 * they're created at pkg install. 769 */ 770 static void 771 dss_mkleafdirs(uint_t npaths, char **pathnames) 772 { 773 int i; 774 char *tmppath = NULL; 775 776 /* 777 * Create the temporary storage used by dss_mkleafdir() here, 778 * rather than in that function, so that it only needs to be 779 * done once, rather than once for each call. Too big to put 780 * on the function's stack. 781 */ 782 tmppath = (char *)malloc(MAXPATHLEN); 783 if (tmppath == NULL) { 784 syslog(LOG_ERR, "tmppath malloc failed. Exiting"); 785 exit(1); 786 } 787 788 for (i = 0; i < npaths; i++) { 789 char *p = pathnames[i]; 790 791 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath); 792 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath); 793 } 794 795 free(tmppath); 796 } 797 798 /* 799 * Create "leaf" in "dir" (which must already exist). 800 * leaf: should start with a '/' 801 */ 802 static void 803 dss_mkleafdir(char *dir, char *leaf, char *tmppath) 804 { 805 /* MAXPATHLEN includes the terminating NUL */ 806 if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) { 807 fprintf(stderr, "stable storage path too long: %s%s. Exiting", 808 dir, leaf); 809 exit(1); 810 } 811 812 (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf); 813 814 /* the directory may already exist: that's OK */ 815 if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) { 816 fprintf(stderr, "error creating stable storage directory: " 817 "%s: %s. Exiting", strerror(errno), tmppath); 818 exit(1); 819 } 820 } 821 822 /* 823 * Create the storage dirs, and pass the path list to the kernel. 824 * This requires the nfssrv module to be loaded; the _nfssys() syscall 825 * will fail ENOTSUP if it is not. 826 * Use libnvpair(3LIB) to pass the data to the kernel. 827 */ 828 static int 829 dss_init(uint_t npaths, char **pathnames) 830 { 831 int i, j, nskipped, error; 832 char *bufp; 833 uint32_t bufsize; 834 size_t buflen; 835 nvlist_t *nvl; 836 837 if (npaths > 1) { 838 /* 839 * We need to remove duplicate paths; this might be user error 840 * in the general case, but HA-NFSv4 can also cause this. 841 * Sort the pathnames array, and NULL out duplicates, 842 * then write the non-NULL entries to a new array. 843 * Sorting will also allow the kernel to optimise its searches. 844 */ 845 846 qsort(pathnames, npaths, sizeof (char *), qstrcmp); 847 848 /* now NULL out any duplicates */ 849 i = 0; j = 1; nskipped = 0; 850 while (j < npaths) { 851 if (strcmp(pathnames[i], pathnames[j]) == NULL) { 852 pathnames[j] = NULL; 853 j++; 854 nskipped++; 855 continue; 856 } 857 858 /* skip i over any of its NULLed duplicates */ 859 i = j++; 860 } 861 862 /* finally, write the non-NULL entries to a new array */ 863 if (nskipped > 0) { 864 int nreal; 865 size_t sz; 866 char **tmp_pathnames; 867 868 nreal = npaths - nskipped; 869 870 sz = nreal * sizeof (char *); 871 tmp_pathnames = (char **)malloc(sz); 872 if (tmp_pathnames == NULL) { 873 fprintf(stderr, "tmp_pathnames malloc failed"); 874 exit(1); 875 } 876 877 for (i = 0, j = 0; i < npaths; i++) 878 if (pathnames[i] != NULL) 879 tmp_pathnames[j++] = pathnames[i]; 880 free(pathnames); 881 pathnames = tmp_pathnames; 882 npaths = nreal; 883 } 884 885 } 886 887 /* Create directories to store the distributed state files */ 888 dss_mkleafdirs(npaths, pathnames); 889 890 /* Create the name-value pair list */ 891 error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 892 if (error) { 893 fprintf(stderr, "nvlist_alloc failed: %s.", strerror(errno)); 894 return (1); 895 } 896 897 /* Add the pathnames array as a single name-value pair */ 898 error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME, 899 pathnames, npaths); 900 if (error) { 901 fprintf(stderr, "nvlist_add_string_array failed: %s.", 902 strerror(errno)); 903 nvlist_free(nvl); 904 return (1); 905 } 906 907 /* 908 * Pack list into contiguous memory, for passing to kernel. 909 * nvlist_pack() will allocate the memory for the buffer, 910 * which we should free() when no longer needed. 911 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary. 912 */ 913 bufp = NULL; 914 error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0); 915 if (error) { 916 fprintf(stderr, "nvlist_pack failed: %s.", strerror(errno)); 917 nvlist_free(nvl); 918 return (1); 919 } 920 921 /* Now we have the packed buffer, we no longer need the list */ 922 nvlist_free(nvl); 923 924 /* 925 * Let the kernel know in advance how big the buffer is. 926 * NOTE: we cannot just pass buflen, since size_t is a long, and 927 * thus a different size between ILP32 userland and LP64 kernel. 928 * Use an int for the transfer, since that should be big enough; 929 * this is a no-op at the moment, here, since nfsd is 32-bit, but 930 * that could change. 931 */ 932 bufsize = (uint32_t)buflen; 933 error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize); 934 if (error) { 935 fprintf(stderr, 936 "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ", 937 strerror(errno)); 938 free(bufp); 939 return (1); 940 } 941 942 /* Pass the packed buffer to the kernel */ 943 error = _nfssys(NFS4_DSS_SETPATHS, bufp); 944 if (error) { 945 fprintf(stderr, 946 "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno)); 947 free(bufp); 948 return (1); 949 } 950 951 /* 952 * The kernel has now unpacked the buffer and extracted the 953 * pathnames array, we no longer need the buffer. 954 */ 955 free(bufp); 956 957 return (0); 958 } 959 960 /* 961 * Quick sort string compare routine, for qsort. 962 * Needed to make arg types correct. 963 */ 964 int 965 qstrcmp(const void *p1, const void *p2) 966 { 967 char *s1 = *((char **)p1); 968 char *s2 = *((char **)p2); 969 970 return (strcmp(s1, s2)); 971 } 972