1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * University Copyright- Copyright (c) 1982, 1986, 1988 30 * The Regents of the University of California 31 * All Rights Reserved 32 * 33 * University Acknowledgment- Portions of this document are derived from 34 * software developed by the University of California, Berkeley, and its 35 * contributors. 36 */ 37 38 /* LINTLIBRARY */ 39 /* PROTOLIB1 */ 40 41 /* NFS server */ 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/stat.h> 46 #include <syslog.h> 47 #include <tiuser.h> 48 #include <rpc/rpc.h> 49 #include <errno.h> 50 #include <thread.h> 51 #include <sys/resource.h> 52 #include <sys/time.h> 53 #include <sys/file.h> 54 #include <nfs/nfs.h> 55 #include <nfs/nfs_acl.h> 56 #include <nfs/nfssys.h> 57 #include <stdio.h> 58 #include <stdio_ext.h> 59 #include <stdlib.h> 60 #include <signal.h> 61 #include <netconfig.h> 62 #include <netdir.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <limits.h> 66 #include <stropts.h> 67 #include <sys/tihdr.h> 68 #include <sys/wait.h> 69 #include <poll.h> 70 #include <priv_utils.h> 71 #include <sys/tiuser.h> 72 #include <netinet/tcp.h> 73 #include <deflt.h> 74 #include <rpcsvc/daemon_utils.h> 75 #include <rpcsvc/nfs4_prot.h> 76 #include <libnvpair.h> 77 #include <libscf.h> 78 #include <libshare.h> 79 #include "nfs_tbind.h" 80 #include "thrpool.h" 81 #include "smfcfg.h" 82 83 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */ 84 #define QUIESCE_VERSMIN 4 85 /* DSS: distributed stable storage */ 86 #define DSS_VERSMIN 4 87 88 static int nfssvc(int, struct netbuf, struct netconfig *); 89 static int nfssvcpool(int maxservers); 90 static int dss_init(uint_t npaths, char **pathnames); 91 static void dss_mkleafdirs(uint_t npaths, char **pathnames); 92 static void dss_mkleafdir(char *dir, char *leaf, char *path); 93 static void usage(void); 94 int qstrcmp(const void *s1, const void *s2); 95 96 extern int _nfssys(int, void *); 97 98 extern int daemonize_init(void); 99 extern void daemonize_fini(int fd); 100 101 /* signal handlers */ 102 static void sigflush(int); 103 static void quiesce(int); 104 105 static char *MyName; 106 static NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp", 107 "/dev/udp6", NULL }; 108 /* static NETSELDECL(defaultprotos)[] = { NC_UDP, NC_TCP, NULL }; */ 109 /* 110 * The following are all globals used by routines in nfs_tbind.c. 111 */ 112 size_t end_listen_fds; /* used by conn_close_oldest() */ 113 size_t num_fds = 0; /* used by multiple routines */ 114 int listen_backlog = 32; /* used by bind_to_{provider,proto}() */ 115 int num_servers; /* used by cots_listen_event() */ 116 int (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc; 117 /* used by cots_listen_event() */ 118 int max_conns_allowed = -1; /* used by cots_listen_event() */ 119 120 /* 121 * Keep track of min/max versions of NFS protocol to be started. 122 * Start with the defaults (min == 2, max == 3). We have the 123 * capability of starting vers=4 but only if the user requests it. 124 */ 125 int nfs_server_vers_min = NFS_VERSMIN_DEFAULT; 126 int nfs_server_vers_max = NFS_VERSMAX_DEFAULT; 127 128 /* 129 * Set the default for server delegation enablement and set per 130 * /etc/default/nfs configuration (if present). 131 */ 132 int nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT; 133 134 int 135 main(int ac, char *av[]) 136 { 137 char *dir = "/"; 138 int allflag = 0; 139 int df_allflag = 0; 140 int opt_cnt = 0; 141 int maxservers = 1024; /* zero allows inifinte number of threads */ 142 int maxservers_set = 0; 143 int logmaxservers = 0; 144 int pid; 145 int i; 146 char *provider = NULL; 147 char *df_provider = NULL; 148 struct protob *protobp0, *protobp; 149 NETSELDECL(proto) = NULL; 150 NETSELDECL(df_proto) = NULL; 151 NETSELPDECL(providerp); 152 char *defval; 153 boolean_t can_do_mlp; 154 uint_t dss_npaths = 0; 155 char **dss_pathnames = NULL; 156 sigset_t sgset; 157 char name[PATH_MAX], value[PATH_MAX]; 158 int ret, bufsz; 159 int pipe_fd = -1; 160 const char *errstr; 161 162 MyName = *av; 163 164 /* 165 * Initializations that require more privileges than we need to run. 166 */ 167 (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID); 168 svcsetprio(); 169 170 can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP); 171 if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 172 DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS, 173 can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) { 174 (void) fprintf(stderr, "%s should be run with" 175 " sufficient privileges\n", av[0]); 176 exit(1); 177 } 178 179 (void) enable_extended_FILE_stdio(-1, -1); 180 181 /* Upgrade SMF settings, if necessary. */ 182 nfs_config_upgrade(NFSD); 183 184 /* 185 * Read in the values from SMF first before we check 186 * command line options so the options override SMF values. 187 */ 188 bufsz = PATH_MAX; 189 ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE, 190 SCF_TYPE_INTEGER, NFSD, &bufsz); 191 if (ret == SA_OK) { 192 errno = 0; 193 max_conns_allowed = strtol(value, (char **)NULL, 10); 194 if (errno != 0) 195 max_conns_allowed = -1; 196 } 197 198 bufsz = PATH_MAX; 199 ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE, 200 SCF_TYPE_INTEGER, NFSD, &bufsz); 201 if (ret == SA_OK) { 202 errno = 0; 203 listen_backlog = strtol(value, (char **)NULL, 10); 204 if (errno != 0) { 205 listen_backlog = 32; 206 } 207 } 208 209 bufsz = PATH_MAX; 210 ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE, 211 SCF_TYPE_ASTRING, NFSD, &bufsz); 212 if ((ret == SA_OK) && strlen(value) > 0) { 213 df_proto = strdup(value); 214 opt_cnt++; 215 if (strncasecmp("ALL", value, 3) == 0) { 216 free(df_proto); 217 df_proto = NULL; 218 df_allflag = 1; 219 } 220 } 221 222 bufsz = PATH_MAX; 223 ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE, 224 SCF_TYPE_ASTRING, NFSD, &bufsz); 225 if ((ret == SA_OK) && strlen(value) > 0) { 226 df_provider = strdup(value); 227 opt_cnt++; 228 } 229 230 bufsz = PATH_MAX; 231 ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE, 232 SCF_TYPE_INTEGER, NFSD, &bufsz); 233 if (ret == SA_OK) { 234 errno = 0; 235 maxservers = strtol(value, (char **)NULL, 10); 236 if (errno != 0) 237 maxservers = 1024; 238 else 239 maxservers_set = 1; 240 } 241 242 bufsz = 4; 243 ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE, 244 SCF_TYPE_ASTRING, NFSD, &bufsz); 245 if (ret == SA_OK) { 246 ret = strtonum(value, NFS_VERSMIN, NFS_VERSMAX, &errstr); 247 if (errstr != NULL) { 248 (void) fprintf(stderr, "invalid server_versmin: %s\n", 249 errstr); 250 } else { 251 nfs_server_vers_min = ret; 252 } 253 } 254 255 bufsz = 4; 256 ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE, 257 SCF_TYPE_ASTRING, NFSD, &bufsz); 258 if (ret == SA_OK) { 259 ret = strtonum(value, NFS_VERSMIN, NFS_VERSMAX, &errstr); 260 if (errstr != NULL) { 261 (void) fprintf(stderr, "invalid server_versmax: %s\n", 262 errstr); 263 } else { 264 nfs_server_vers_max = ret; 265 } 266 } 267 268 bufsz = PATH_MAX; 269 ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE, 270 SCF_TYPE_ASTRING, NFSD, &bufsz); 271 if (ret == SA_OK) 272 if (strncasecmp(value, "off", 3) == 0) 273 nfs_server_delegation = FALSE; 274 275 /* 276 * Conflict options error messages. 277 */ 278 if (opt_cnt > 1) { 279 (void) fprintf(stderr, "\nConflicting options, only one of " 280 "the following options can be specified\n" 281 "in SMF:\n" 282 "\tprotocol=ALL\n" 283 "\tprotocol=protocol\n" 284 "\tdevice=devicename\n\n"); 285 usage(); 286 } 287 opt_cnt = 0; 288 289 while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) { 290 switch (i) { 291 case 'a': 292 free(df_proto); 293 df_proto = NULL; 294 free(df_provider); 295 df_provider = NULL; 296 297 allflag = 1; 298 opt_cnt++; 299 break; 300 301 case 'c': 302 max_conns_allowed = atoi(optarg); 303 break; 304 305 case 'p': 306 proto = optarg; 307 df_allflag = 0; 308 opt_cnt++; 309 break; 310 311 /* 312 * DSS: NFSv4 distributed stable storage. 313 * 314 * This is a Contracted Project Private interface, for 315 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313. 316 */ 317 case 's': 318 if (strlen(optarg) < MAXPATHLEN) { 319 /* first "-s" option encountered? */ 320 if (dss_pathnames == NULL) { 321 /* 322 * Allocate maximum possible space 323 * required given cmdline arg count; 324 * "-s <path>" consumes two args. 325 */ 326 size_t sz = (ac / 2) * sizeof (char *); 327 dss_pathnames = (char **)malloc(sz); 328 if (dss_pathnames == NULL) { 329 (void) fprintf(stderr, "%s: " 330 "dss paths malloc failed\n", 331 av[0]); 332 exit(1); 333 } 334 (void) memset(dss_pathnames, 0, sz); 335 } 336 dss_pathnames[dss_npaths] = optarg; 337 dss_npaths++; 338 } else { 339 (void) fprintf(stderr, 340 "%s: -s pathname too long.\n", av[0]); 341 } 342 break; 343 344 case 't': 345 provider = optarg; 346 df_allflag = 0; 347 opt_cnt++; 348 break; 349 350 case 'l': 351 listen_backlog = atoi(optarg); 352 break; 353 354 case '?': 355 usage(); 356 /* NOTREACHED */ 357 } 358 } 359 360 allflag = df_allflag; 361 if (proto == NULL) 362 proto = df_proto; 363 if (provider == NULL) 364 provider = df_provider; 365 366 /* 367 * Conflict options error messages. 368 */ 369 if (opt_cnt > 1) { 370 (void) fprintf(stderr, "\nConflicting options, only one of " 371 "the following options can be specified\n" 372 "on the command line:\n" 373 "\t-a\n" 374 "\t-p protocol\n" 375 "\t-t transport\n\n"); 376 usage(); 377 } 378 379 if (proto != NULL && 380 strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) { 381 if (nfs_server_vers_max == NFS_V4) { 382 if (nfs_server_vers_min == NFS_V4) { 383 fprintf(stderr, 384 "NFS version 4 is not supported " 385 "with the UDP protocol. Exiting\n"); 386 exit(3); 387 } else { 388 fprintf(stderr, 389 "NFS version 4 is not supported " 390 "with the UDP protocol.\n"); 391 } 392 } 393 } 394 395 /* 396 * If there is exactly one more argument, it is the number of 397 * servers. 398 */ 399 if (optind == ac - 1) { 400 maxservers = atoi(av[optind]); 401 maxservers_set = 1; 402 } 403 /* 404 * If there are two or more arguments, then this is a usage error. 405 */ 406 else if (optind < ac - 1) 407 usage(); 408 /* 409 * Check the ranges for min/max version specified 410 */ 411 else if ((nfs_server_vers_min > nfs_server_vers_max) || 412 (nfs_server_vers_min < NFS_VERSMIN) || 413 (nfs_server_vers_max > NFS_VERSMAX)) 414 usage(); 415 /* 416 * There are no additional arguments, and we haven't set maxservers 417 * explicitly via the config file, we use a default number of 418 * servers. We will log this. 419 */ 420 else if (maxservers_set == 0) 421 logmaxservers = 1; 422 423 /* 424 * Basic Sanity checks on options 425 * 426 * max_conns_allowed must be positive, except for the special 427 * value of -1 which is used internally to mean unlimited, -1 isn't 428 * documented but we allow it anyway. 429 * 430 * maxservers must be positive 431 * listen_backlog must be positive or zero 432 */ 433 if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) || 434 (listen_backlog < 0) || (maxservers <= 0)) { 435 usage(); 436 } 437 438 /* 439 * Set current dir to server root 440 */ 441 if (chdir(dir) < 0) { 442 (void) fprintf(stderr, "%s: ", MyName); 443 perror(dir); 444 exit(1); 445 } 446 447 #ifndef DEBUG 448 pipe_fd = daemonize_init(); 449 #endif 450 451 openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON); 452 453 /* 454 * establish our lock on the lock file and write our pid to it. 455 * exit if some other process holds the lock, or if there's any 456 * error in writing/locking the file. 457 */ 458 pid = _enter_daemon_lock(NFSD); 459 switch (pid) { 460 case 0: 461 break; 462 case -1: 463 fprintf(stderr, "error locking for %s: %s\n", NFSD, 464 strerror(errno)); 465 exit(2); 466 default: 467 /* daemon was already running */ 468 exit(0); 469 } 470 471 /* 472 * If we've been given a list of paths to be used for distributed 473 * stable storage, and provided we're going to run a version 474 * that supports it, setup the DSS paths. 475 */ 476 if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) { 477 if (dss_init(dss_npaths, dss_pathnames) != 0) { 478 fprintf(stderr, "%s", "dss_init failed. Exiting.\n"); 479 exit(1); 480 } 481 } 482 483 /* 484 * Block all signals till we spawn other 485 * threads. 486 */ 487 (void) sigfillset(&sgset); 488 (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL); 489 490 if (logmaxservers) { 491 fprintf(stderr, 492 "Number of servers not specified. Using default of %d.\n", 493 maxservers); 494 } 495 496 /* 497 * Make sure to unregister any previous versions in case the 498 * user is reconfiguring the server in interesting ways. 499 */ 500 svc_unreg(NFS_PROGRAM, NFS_VERSION); 501 svc_unreg(NFS_PROGRAM, NFS_V3); 502 svc_unreg(NFS_PROGRAM, NFS_V4); 503 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2); 504 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3); 505 506 /* 507 * Set up kernel RPC thread pool for the NFS server. 508 */ 509 if (nfssvcpool(maxservers)) { 510 fprintf(stderr, "Can't set up kernel NFS service: %s. " 511 "Exiting.\n", strerror(errno)); 512 exit(1); 513 } 514 515 /* 516 * Set up blocked thread to do LWP creation on behalf of the kernel. 517 */ 518 if (svcwait(NFS_SVCPOOL_ID)) { 519 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting.\n", 520 strerror(errno)); 521 exit(1); 522 } 523 524 /* 525 * RDMA start and stop thread. 526 * Per pool RDMA listener creation and 527 * destructor thread. 528 * 529 * start rdma services and block in the kernel. 530 * (only if proto or provider is not set to TCP or UDP) 531 */ 532 if ((proto == NULL) && (provider == NULL)) { 533 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min, 534 nfs_server_vers_max, nfs_server_delegation)) { 535 fprintf(stderr, 536 "Can't set up RDMA creator thread : %s\n", 537 strerror(errno)); 538 } 539 } 540 541 /* 542 * Now open up for signal delivery 543 */ 544 545 (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL); 546 sigset(SIGTERM, sigflush); 547 sigset(SIGUSR1, quiesce); 548 549 /* 550 * Build a protocol block list for registration. 551 */ 552 protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob)); 553 protobp->serv = "NFS"; 554 protobp->versmin = nfs_server_vers_min; 555 protobp->versmax = nfs_server_vers_max; 556 protobp->program = NFS_PROGRAM; 557 558 protobp->next = (struct protob *)malloc(sizeof (struct protob)); 559 protobp = protobp->next; 560 protobp->serv = "NFS_ACL"; /* not used */ 561 protobp->versmin = nfs_server_vers_min; 562 /* XXX - this needs work to get the version just right */ 563 protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ? 564 NFS_ACL_V3 : nfs_server_vers_max; 565 protobp->program = NFS_ACL_PROGRAM; 566 protobp->next = (struct protob *)NULL; 567 568 if (allflag) { 569 if (do_all(protobp0, nfssvc) == -1) { 570 fprintf(stderr, "setnetconfig failed : %s\n", 571 strerror(errno)); 572 exit(1); 573 } 574 } else if (proto) { 575 /* there's more than one match for the same protocol */ 576 struct netconfig *nconf; 577 NCONF_HANDLE *nc; 578 bool_t protoFound = FALSE; 579 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) { 580 fprintf(stderr, "setnetconfig failed : %s\n", 581 strerror(errno)); 582 goto done; 583 } 584 while (nconf = getnetconfig(nc)) { 585 if (strcmp(nconf->nc_proto, proto) == 0) { 586 protoFound = TRUE; 587 do_one(nconf->nc_device, NULL, 588 protobp0, nfssvc); 589 } 590 } 591 (void) endnetconfig(nc); 592 if (protoFound == FALSE) { 593 fprintf(stderr, 594 "couldn't find netconfig entry for protocol %s\n", 595 proto); 596 } 597 } else if (provider) 598 do_one(provider, proto, protobp0, nfssvc); 599 else { 600 for (providerp = defaultproviders; 601 *providerp != NULL; providerp++) { 602 provider = *providerp; 603 do_one(provider, NULL, protobp0, nfssvc); 604 } 605 } 606 done: 607 608 free(protobp); 609 free(protobp0); 610 611 if (num_fds == 0) { 612 fprintf(stderr, "Could not start NFS service for any protocol." 613 " Exiting.\n"); 614 exit(1); 615 } 616 617 end_listen_fds = num_fds; 618 619 /* 620 * nfsd is up and running as far as we are concerned. 621 */ 622 daemonize_fini(pipe_fd); 623 624 /* 625 * Get rid of unneeded privileges. 626 */ 627 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION, 628 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL); 629 630 /* 631 * Poll for non-data control events on the transport descriptors. 632 */ 633 poll_for_action(); 634 635 /* 636 * If we get here, something failed in poll_for_action(). 637 */ 638 return (1); 639 } 640 641 static int 642 nfssvcpool(int maxservers) 643 { 644 struct svcpool_args npa; 645 646 npa.id = NFS_SVCPOOL_ID; 647 npa.maxthreads = maxservers; 648 npa.redline = 0; 649 npa.qsize = 0; 650 npa.timeout = 0; 651 npa.stksize = 0; 652 npa.max_same_xprt = 0; 653 return (_nfssys(SVCPOOL_CREATE, &npa)); 654 } 655 656 /* 657 * Establish NFS service thread. 658 */ 659 static int 660 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf) 661 { 662 struct nfs_svc_args nsa; 663 664 nsa.fd = fd; 665 nsa.netid = nconf->nc_netid; 666 nsa.addrmask = addrmask; 667 if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) { 668 nsa.versmax = (nfs_server_vers_max > NFS_V3) ? 669 NFS_V3 : nfs_server_vers_max; 670 nsa.versmin = nfs_server_vers_min; 671 /* 672 * If no version left, silently do nothing, previous 673 * checks will have assured at least TCP is available. 674 */ 675 if (nsa.versmin > nsa.versmax) 676 return (0); 677 } else { 678 nsa.versmax = nfs_server_vers_max; 679 nsa.versmin = nfs_server_vers_min; 680 } 681 nsa.delegation = nfs_server_delegation; 682 return (_nfssys(NFS_SVC, &nsa)); 683 } 684 685 static void 686 usage(void) 687 { 688 (void) fprintf(stderr, 689 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName); 690 (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n"); 691 (void) fprintf(stderr, 692 "\twhere -a causes <nservers> to be started on each appropriate transport,\n"); 693 (void) fprintf(stderr, 694 "\tmax_conns is the maximum number of concurrent connections allowed,\n"); 695 (void) fprintf(stderr, "\t\tand max_conns must be a decimal number"); 696 (void) fprintf(stderr, "> zero,\n"); 697 (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n"); 698 (void) fprintf(stderr, 699 "\ttransport is a transport provider name (i.e. device),\n"); 700 (void) fprintf(stderr, 701 "\tlisten_backlog is the TCP listen backlog,\n"); 702 (void) fprintf(stderr, 703 "\tand <nservers> must be a decimal number > zero.\n"); 704 exit(1); 705 } 706 707 /* 708 * Issue nfssys system call to flush all logging buffers asynchronously. 709 * 710 * NOTICE: It is extremely important to flush NFS logging buffers when 711 * nfsd exits. When the system is halted or rebooted nfslogd 712 * may not have an opportunity to flush the buffers. 713 */ 714 static void 715 nfsl_flush() 716 { 717 struct nfsl_flush_args nfa; 718 719 memset((void *)&nfa, 0, sizeof (nfa)); 720 nfa.version = NFSL_FLUSH_ARGS_VERS; 721 nfa.directive = NFSL_ALL; /* flush all asynchronously */ 722 723 if (_nfssys(LOG_FLUSH, &nfa) < 0) 724 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n", 725 strerror(errno)); 726 } 727 728 /* 729 * SIGTERM handler. 730 * Flush logging buffers and exit. 731 */ 732 static void 733 sigflush(int sig) 734 { 735 nfsl_flush(); 736 _exit(0); 737 } 738 739 /* 740 * SIGUSR1 handler. 741 * 742 * Request that server quiesce, then (nfsd) exit. For subsequent warm start. 743 * 744 * This is a Contracted Project Private interface, for the sole use 745 * of Sun Cluster HA-NFS. See PSARC/2004/497. 746 * 747 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN. 748 */ 749 static void 750 quiesce(int sig) 751 { 752 int error; 753 int id = NFS_SVCPOOL_ID; 754 755 if (nfs_server_vers_max >= QUIESCE_VERSMIN) { 756 /* Request server quiesce at next shutdown */ 757 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id); 758 759 /* 760 * ENOENT is returned if there is no matching SVC pool 761 * for the id. Possibly because the pool is not yet setup. 762 * In this case, just exit as if no error. For all other errors, 763 * just return and allow caller to retry. 764 */ 765 if (error && errno != ENOENT) { 766 syslog(LOG_ERR, 767 "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s", 768 strerror(errno)); 769 return; 770 } 771 } 772 773 /* Flush logging buffers */ 774 nfsl_flush(); 775 776 _exit(0); 777 } 778 779 /* 780 * DSS: distributed stable storage. 781 * Create leaf directories as required, keeping an eye on path 782 * lengths. Calls exit(1) on failure. 783 * The pathnames passed in must already exist, and must be writeable by nfsd. 784 * Note: the leaf directories under NFS4_VAR_DIR are not created here; 785 * they're created at pkg install. 786 */ 787 static void 788 dss_mkleafdirs(uint_t npaths, char **pathnames) 789 { 790 int i; 791 char *tmppath = NULL; 792 793 /* 794 * Create the temporary storage used by dss_mkleafdir() here, 795 * rather than in that function, so that it only needs to be 796 * done once, rather than once for each call. Too big to put 797 * on the function's stack. 798 */ 799 tmppath = (char *)malloc(MAXPATHLEN); 800 if (tmppath == NULL) { 801 syslog(LOG_ERR, "tmppath malloc failed. Exiting"); 802 exit(1); 803 } 804 805 for (i = 0; i < npaths; i++) { 806 char *p = pathnames[i]; 807 808 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath); 809 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath); 810 } 811 812 free(tmppath); 813 } 814 815 /* 816 * Create "leaf" in "dir" (which must already exist). 817 * leaf: should start with a '/' 818 */ 819 static void 820 dss_mkleafdir(char *dir, char *leaf, char *tmppath) 821 { 822 /* MAXPATHLEN includes the terminating NUL */ 823 if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) { 824 fprintf(stderr, "stable storage path too long: %s%s. " 825 "Exiting.\n", dir, leaf); 826 exit(1); 827 } 828 829 (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf); 830 831 /* the directory may already exist: that's OK */ 832 if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) { 833 fprintf(stderr, "error creating stable storage directory: " 834 "%s: %s. Exiting.\n", strerror(errno), tmppath); 835 exit(1); 836 } 837 } 838 839 /* 840 * Create the storage dirs, and pass the path list to the kernel. 841 * This requires the nfssrv module to be loaded; the _nfssys() syscall 842 * will fail ENOTSUP if it is not. 843 * Use libnvpair(3LIB) to pass the data to the kernel. 844 */ 845 static int 846 dss_init(uint_t npaths, char **pathnames) 847 { 848 int i, j, nskipped, error; 849 char *bufp; 850 uint32_t bufsize; 851 size_t buflen; 852 nvlist_t *nvl; 853 854 if (npaths > 1) { 855 /* 856 * We need to remove duplicate paths; this might be user error 857 * in the general case, but HA-NFSv4 can also cause this. 858 * Sort the pathnames array, and NULL out duplicates, 859 * then write the non-NULL entries to a new array. 860 * Sorting will also allow the kernel to optimise its searches. 861 */ 862 863 qsort(pathnames, npaths, sizeof (char *), qstrcmp); 864 865 /* now NULL out any duplicates */ 866 i = 0; j = 1; nskipped = 0; 867 while (j < npaths) { 868 if (strcmp(pathnames[i], pathnames[j]) == 0) { 869 pathnames[j] = NULL; 870 j++; 871 nskipped++; 872 continue; 873 } 874 875 /* skip i over any of its NULLed duplicates */ 876 i = j++; 877 } 878 879 /* finally, write the non-NULL entries to a new array */ 880 if (nskipped > 0) { 881 int nreal; 882 size_t sz; 883 char **tmp_pathnames; 884 885 nreal = npaths - nskipped; 886 887 sz = nreal * sizeof (char *); 888 tmp_pathnames = (char **)malloc(sz); 889 if (tmp_pathnames == NULL) { 890 fprintf(stderr, "tmp_pathnames malloc " 891 "failed\n"); 892 exit(1); 893 } 894 895 for (i = 0, j = 0; i < npaths; i++) 896 if (pathnames[i] != NULL) 897 tmp_pathnames[j++] = pathnames[i]; 898 free(pathnames); 899 pathnames = tmp_pathnames; 900 npaths = nreal; 901 } 902 903 } 904 905 /* Create directories to store the distributed state files */ 906 dss_mkleafdirs(npaths, pathnames); 907 908 /* Create the name-value pair list */ 909 error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 910 if (error) { 911 fprintf(stderr, "nvlist_alloc failed: %s\n", strerror(errno)); 912 return (1); 913 } 914 915 /* Add the pathnames array as a single name-value pair */ 916 error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME, 917 pathnames, npaths); 918 if (error) { 919 fprintf(stderr, "nvlist_add_string_array failed: %s\n", 920 strerror(errno)); 921 nvlist_free(nvl); 922 return (1); 923 } 924 925 /* 926 * Pack list into contiguous memory, for passing to kernel. 927 * nvlist_pack() will allocate the memory for the buffer, 928 * which we should free() when no longer needed. 929 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary. 930 */ 931 bufp = NULL; 932 error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0); 933 if (error) { 934 fprintf(stderr, "nvlist_pack failed: %s\n", strerror(errno)); 935 nvlist_free(nvl); 936 return (1); 937 } 938 939 /* Now we have the packed buffer, we no longer need the list */ 940 nvlist_free(nvl); 941 942 /* 943 * Let the kernel know in advance how big the buffer is. 944 * NOTE: we cannot just pass buflen, since size_t is a long, and 945 * thus a different size between ILP32 userland and LP64 kernel. 946 * Use an int for the transfer, since that should be big enough; 947 * this is a no-op at the moment, here, since nfsd is 32-bit, but 948 * that could change. 949 */ 950 bufsize = (uint32_t)buflen; 951 error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize); 952 if (error) { 953 fprintf(stderr, 954 "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s\n", 955 strerror(errno)); 956 free(bufp); 957 return (1); 958 } 959 960 /* Pass the packed buffer to the kernel */ 961 error = _nfssys(NFS4_DSS_SETPATHS, bufp); 962 if (error) { 963 fprintf(stderr, 964 "_nfssys(NFS4_DSS_SETPATHS) failed: %s\n", strerror(errno)); 965 free(bufp); 966 return (1); 967 } 968 969 /* 970 * The kernel has now unpacked the buffer and extracted the 971 * pathnames array, we no longer need the buffer. 972 */ 973 free(bufp); 974 975 return (0); 976 } 977 978 /* 979 * Quick sort string compare routine, for qsort. 980 * Needed to make arg types correct. 981 */ 982 int 983 qstrcmp(const void *p1, const void *p2) 984 { 985 char *s1 = *((char **)p1); 986 char *s2 = *((char **)p2); 987 988 return (strcmp(s1, s2)); 989 } 990