1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * University Copyright- Copyright (c) 1982, 1986, 1988 30 * The Regents of the University of California 31 * All Rights Reserved 32 * 33 * University Acknowledgment- Portions of this document are derived from 34 * software developed by the University of California, Berkeley, and its 35 * contributors. 36 */ 37 38 /* NFS server */ 39 40 #include <sys/param.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <syslog.h> 44 #include <tiuser.h> 45 #include <rpc/rpc.h> 46 #include <errno.h> 47 #include <thread.h> 48 #include <sys/resource.h> 49 #include <sys/time.h> 50 #include <sys/file.h> 51 #include <nfs/nfs.h> 52 #include <nfs/nfs4.h> 53 #include <nfs/nfs_acl.h> 54 #include <nfs/nfssys.h> 55 #include <stdio.h> 56 #include <stdio_ext.h> 57 #include <stdlib.h> 58 #include <signal.h> 59 #include <netconfig.h> 60 #include <netdir.h> 61 #include <string.h> 62 #include <unistd.h> 63 #include <limits.h> 64 #include <stropts.h> 65 #include <sys/tihdr.h> 66 #include <sys/wait.h> 67 #include <poll.h> 68 #include <priv_utils.h> 69 #include <sys/tiuser.h> 70 #include <netinet/tcp.h> 71 #include <deflt.h> 72 #include <rpcsvc/daemon_utils.h> 73 #include <rpcsvc/nfs4_prot.h> 74 #include <libnvpair.h> 75 #include <libscf.h> 76 #include <libshare.h> 77 #include "nfs_tbind.h" 78 #include "thrpool.h" 79 #include "smfcfg.h" 80 81 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */ 82 #define QUIESCE_VERSMIN 4 83 /* DSS: distributed stable storage */ 84 #define DSS_VERSMIN 4 85 86 static int nfssvc(int, struct netbuf, struct netconfig *); 87 static int nfssvcpool(int maxservers); 88 static int dss_init(uint_t npaths, char **pathnames); 89 static void dss_mkleafdirs(uint_t npaths, char **pathnames); 90 static void dss_mkleafdir(char *dir, char *leaf, char *path); 91 static void usage(void); 92 int qstrcmp(const void *s1, const void *s2); 93 94 extern int _nfssys(int, void *); 95 96 extern int daemonize_init(void); 97 extern void daemonize_fini(int fd); 98 99 /* signal handlers */ 100 static void sigflush(int); 101 static void quiesce(int); 102 103 static char *MyName; 104 static NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp", 105 "/dev/udp6", NULL }; 106 107 /* 108 * The following are all globals used by routines in nfs_tbind.c. 109 */ 110 size_t end_listen_fds; /* used by conn_close_oldest() */ 111 size_t num_fds = 0; /* used by multiple routines */ 112 int listen_backlog = 32; /* used by bind_to_{provider,proto}() */ 113 int num_servers; /* used by cots_listen_event() */ 114 int (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc; 115 /* used by cots_listen_event() */ 116 int max_conns_allowed = -1; /* used by cots_listen_event() */ 117 118 /* 119 * Keep track of min/max versions of NFS protocol to be started. 120 * Start with the defaults (min == 2, max == 4). 121 * Used NFS_VERS_... and should be analyzed with NFS_PROT_VERSION 122 * macros. 123 */ 124 uint32_t nfs_server_vers_min = NFS_SRV_VERS_MIN; 125 uint32_t nfs_server_vers_max = NFS_SRV_VERS_MAX; 126 127 /* 128 * Set the default for server delegation enablement and set per 129 * /etc/default/nfs configuration (if present). 130 */ 131 int nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT; 132 133 int 134 main(int ac, char *av[]) 135 { 136 char *dir = "/"; 137 int allflag = 0; 138 int df_allflag = 0; 139 int opt_cnt = 0; 140 int maxservers = 1024; /* zero allows inifinte number of threads */ 141 int maxservers_set = 0; 142 int logmaxservers = 0; 143 int pid; 144 int i; 145 char *provider = NULL; 146 char *df_provider = NULL; 147 struct protob *protobp0, *protobp; 148 NETSELDECL(proto) = NULL; 149 NETSELDECL(df_proto) = NULL; 150 NETSELPDECL(providerp); 151 char *defval; 152 boolean_t can_do_mlp; 153 uint_t dss_npaths = 0; 154 char **dss_pathnames = NULL; 155 sigset_t sgset; 156 char name[PATH_MAX], value[PATH_MAX]; 157 int ret, bufsz; 158 int pipe_fd = -1; 159 const char *errstr; 160 161 MyName = *av; 162 163 /* 164 * Initializations that require more privileges than we need to run. 165 */ 166 (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID); 167 svcsetprio(); 168 169 can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP); 170 if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 171 DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS, 172 can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) { 173 (void) fprintf(stderr, "%s should be run with" 174 " sufficient privileges\n", av[0]); 175 exit(1); 176 } 177 178 (void) enable_extended_FILE_stdio(-1, -1); 179 180 /* Upgrade SMF settings, if necessary. */ 181 nfs_config_upgrade(NFSD); 182 183 /* 184 * Read in the values from SMF first before we check 185 * command line options so the options override SMF values. 186 */ 187 bufsz = PATH_MAX; 188 ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE, 189 SCF_TYPE_INTEGER, NFSD, &bufsz); 190 if (ret == SA_OK) { 191 max_conns_allowed = strtonum(value, -1, INT32_MAX, &errstr); 192 if (errstr != NULL) 193 max_conns_allowed = -1; 194 } 195 196 bufsz = PATH_MAX; 197 ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE, 198 SCF_TYPE_INTEGER, NFSD, &bufsz); 199 if (ret == SA_OK) { 200 listen_backlog = strtonum(value, 0, INT32_MAX, &errstr); 201 if (errstr != NULL) { 202 listen_backlog = 32; 203 } 204 } 205 206 bufsz = PATH_MAX; 207 ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE, 208 SCF_TYPE_ASTRING, NFSD, &bufsz); 209 if ((ret == SA_OK) && strlen(value) > 0) { 210 df_proto = strdup(value); 211 opt_cnt++; 212 if (strncasecmp("ALL", value, 3) == 0) { 213 free(df_proto); 214 df_proto = NULL; 215 df_allflag = 1; 216 } 217 } 218 219 bufsz = PATH_MAX; 220 ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE, 221 SCF_TYPE_ASTRING, NFSD, &bufsz); 222 if ((ret == SA_OK) && strlen(value) > 0) { 223 df_provider = strdup(value); 224 opt_cnt++; 225 } 226 227 bufsz = PATH_MAX; 228 ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE, 229 SCF_TYPE_INTEGER, NFSD, &bufsz); 230 if (ret == SA_OK) { 231 maxservers = strtonum(value, 1, INT32_MAX, &errstr); 232 if (errstr != NULL) 233 maxservers = 1024; 234 else 235 maxservers_set = 1; 236 } 237 238 bufsz = PATH_MAX; 239 ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE, 240 SCF_TYPE_ASTRING, NFSD, &bufsz); 241 if (ret == SA_OK) { 242 ret = nfs_convert_version_str(value); 243 if (ret == 0) { 244 (void) fprintf(stderr, "invalid server_versmin: %s\n", 245 value); 246 } else { 247 nfs_server_vers_min = ret; 248 } 249 } 250 251 bufsz = PATH_MAX; 252 ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE, 253 SCF_TYPE_ASTRING, NFSD, &bufsz); 254 if (ret == SA_OK) { 255 ret = nfs_convert_version_str(value); 256 if (ret == 0) { 257 (void) fprintf(stderr, "invalid server_versmax: %s\n", 258 value); 259 } else { 260 nfs_server_vers_max = ret; 261 } 262 } 263 264 bufsz = PATH_MAX; 265 ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE, 266 SCF_TYPE_ASTRING, NFSD, &bufsz); 267 if (ret == SA_OK) 268 if (strncasecmp(value, "off", 3) == 0) 269 nfs_server_delegation = FALSE; 270 271 /* 272 * Conflict options error messages. 273 */ 274 if (opt_cnt > 1) { 275 (void) fprintf(stderr, "\nConflicting options, only one of " 276 "the following options can be specified\n" 277 "in SMF:\n" 278 "\tprotocol=ALL\n" 279 "\tprotocol=protocol\n" 280 "\tdevice=devicename\n\n"); 281 usage(); 282 } 283 opt_cnt = 0; 284 285 while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) { 286 switch (i) { 287 case 'a': 288 free(df_proto); 289 df_proto = NULL; 290 free(df_provider); 291 df_provider = NULL; 292 293 allflag = 1; 294 opt_cnt++; 295 break; 296 297 case 'c': 298 max_conns_allowed = atoi(optarg); 299 break; 300 301 case 'p': 302 proto = optarg; 303 df_allflag = 0; 304 opt_cnt++; 305 break; 306 307 /* 308 * DSS: NFSv4 distributed stable storage. 309 * 310 * This is a Contracted Project Private interface, for 311 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313. 312 */ 313 case 's': 314 if (strlen(optarg) < MAXPATHLEN) { 315 /* first "-s" option encountered? */ 316 if (dss_pathnames == NULL) { 317 /* 318 * Allocate maximum possible space 319 * required given cmdline arg count; 320 * "-s <path>" consumes two args. 321 */ 322 size_t sz = (ac / 2) * sizeof (char *); 323 dss_pathnames = (char **)malloc(sz); 324 if (dss_pathnames == NULL) { 325 (void) fprintf(stderr, "%s: " 326 "dss paths malloc failed\n", 327 av[0]); 328 exit(1); 329 } 330 (void) memset(dss_pathnames, 0, sz); 331 } 332 dss_pathnames[dss_npaths] = optarg; 333 dss_npaths++; 334 } else { 335 (void) fprintf(stderr, 336 "%s: -s pathname too long.\n", av[0]); 337 } 338 break; 339 340 case 't': 341 provider = optarg; 342 df_allflag = 0; 343 opt_cnt++; 344 break; 345 346 case 'l': 347 listen_backlog = atoi(optarg); 348 break; 349 350 case '?': 351 usage(); 352 /* NOTREACHED */ 353 } 354 } 355 356 allflag = df_allflag; 357 if (proto == NULL) 358 proto = df_proto; 359 if (provider == NULL) 360 provider = df_provider; 361 362 /* 363 * Conflict options error messages. 364 */ 365 if (opt_cnt > 1) { 366 (void) fprintf(stderr, "\nConflicting options, only one of " 367 "the following options can be specified\n" 368 "on the command line:\n" 369 "\t-a\n" 370 "\t-p protocol\n" 371 "\t-t transport\n\n"); 372 usage(); 373 } 374 375 if (proto != NULL && 376 strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) { 377 if (NFS_PROT_VERSION(nfs_server_vers_max) == NFS_V4) { 378 if (NFS_PROT_VERSION(nfs_server_vers_min) == NFS_V4) { 379 fprintf(stderr, 380 "NFS version 4 is not supported " 381 "with the UDP protocol. Exiting\n"); 382 exit(3); 383 } else { 384 fprintf(stderr, 385 "NFS version 4 is not supported " 386 "with the UDP protocol.\n"); 387 } 388 } 389 } 390 391 /* 392 * If there is exactly one more argument, it is the number of 393 * servers. 394 */ 395 if (optind == ac - 1) { 396 maxservers = atoi(av[optind]); 397 maxservers_set = 1; 398 } 399 /* 400 * If there are two or more arguments, then this is a usage error. 401 */ 402 else if (optind < ac - 1) 403 usage(); 404 /* 405 * Check the ranges for min/max version specified 406 */ 407 else if ((nfs_server_vers_min > nfs_server_vers_max) || 408 (nfs_server_vers_min < NFS_SRV_VERS_MIN) || 409 (nfs_server_vers_max > NFS_SRV_VERS_MAX)) 410 usage(); 411 /* 412 * There are no additional arguments, and we haven't set maxservers 413 * explicitly via the config file, we use a default number of 414 * servers. We will log this. 415 */ 416 else if (maxservers_set == 0) 417 logmaxservers = 1; 418 419 /* 420 * Basic Sanity checks on options 421 * 422 * max_conns_allowed must be positive, except for the special 423 * value of -1 which is used internally to mean unlimited, -1 isn't 424 * documented but we allow it anyway. 425 * 426 * maxservers must be positive 427 * listen_backlog must be positive or zero 428 */ 429 if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) || 430 (listen_backlog < 0) || (maxservers <= 0)) { 431 usage(); 432 } 433 434 /* 435 * Set current dir to server root 436 */ 437 if (chdir(dir) < 0) { 438 (void) fprintf(stderr, "%s: ", MyName); 439 perror(dir); 440 exit(1); 441 } 442 443 #ifndef DEBUG 444 pipe_fd = daemonize_init(); 445 #endif 446 447 openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON); 448 449 /* 450 * establish our lock on the lock file and write our pid to it. 451 * exit if some other process holds the lock, or if there's any 452 * error in writing/locking the file. 453 */ 454 pid = _enter_daemon_lock(NFSD); 455 switch (pid) { 456 case 0: 457 break; 458 case -1: 459 fprintf(stderr, "error locking for %s: %s\n", NFSD, 460 strerror(errno)); 461 exit(2); 462 default: 463 /* daemon was already running */ 464 exit(0); 465 } 466 467 /* 468 * If we've been given a list of paths to be used for distributed 469 * stable storage, and provided we're going to run a version 470 * that supports it, setup the DSS paths. 471 */ 472 if (dss_pathnames != NULL && 473 NFS_PROT_VERSION(nfs_server_vers_max) >= DSS_VERSMIN) { 474 if (dss_init(dss_npaths, dss_pathnames) != 0) { 475 fprintf(stderr, "%s", "dss_init failed. Exiting.\n"); 476 exit(1); 477 } 478 } 479 480 /* 481 * Block all signals till we spawn other 482 * threads. 483 */ 484 (void) sigfillset(&sgset); 485 (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL); 486 487 if (logmaxservers) { 488 fprintf(stderr, 489 "Number of servers not specified. Using default of %d.\n", 490 maxservers); 491 } 492 493 /* 494 * Make sure to unregister any previous versions in case the 495 * user is reconfiguring the server in interesting ways. 496 */ 497 svc_unreg(NFS_PROGRAM, NFS_VERSION); 498 svc_unreg(NFS_PROGRAM, NFS_V3); 499 svc_unreg(NFS_PROGRAM, NFS_V4); 500 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2); 501 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3); 502 503 /* 504 * Set up kernel RPC thread pool for the NFS server. 505 */ 506 if (nfssvcpool(maxservers)) { 507 fprintf(stderr, "Can't set up kernel NFS service: %s. " 508 "Exiting.\n", strerror(errno)); 509 exit(1); 510 } 511 512 /* 513 * Set up blocked thread to do LWP creation on behalf of the kernel. 514 */ 515 if (svcwait(NFS_SVCPOOL_ID)) { 516 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting.\n", 517 strerror(errno)); 518 exit(1); 519 } 520 521 /* 522 * RDMA start and stop thread. 523 * Per pool RDMA listener creation and 524 * destructor thread. 525 * 526 * start rdma services and block in the kernel. 527 * (only if proto or provider is not set to TCP or UDP) 528 */ 529 if ((proto == NULL) && (provider == NULL)) { 530 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min, 531 nfs_server_vers_max, nfs_server_delegation)) { 532 fprintf(stderr, 533 "Can't set up RDMA creator thread : %s\n", 534 strerror(errno)); 535 } 536 } 537 538 /* 539 * Now open up for signal delivery 540 */ 541 542 (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL); 543 sigset(SIGTERM, sigflush); 544 sigset(SIGUSR1, quiesce); 545 546 /* 547 * Build a protocol block list for registration. 548 * In protocol list we have first block for NFS and second 549 * block for NFS_ACL - which is needed up to v3, as support 550 * for ACL is included in NFS protocol since v4. 551 */ 552 protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob)); 553 protobp->serv = "NFS"; 554 protobp->versmin = NFS_PROT_VERSION(nfs_server_vers_min); 555 protobp->versmax = NFS_PROT_VERSION(nfs_server_vers_max); 556 protobp->program = NFS_PROGRAM; 557 558 protobp->next = (struct protob *)malloc(sizeof (struct protob)); 559 protobp = protobp->next; 560 protobp->serv = "NFS_ACL"; /* not used */ 561 protobp->versmin = NFS_PROT_VERSION(nfs_server_vers_min); 562 /* XXX - this needs work to get the version just right */ 563 protobp->versmax = 564 MIN(NFS_PROT_VERSION(nfs_server_vers_max), NFS_ACL_V3); 565 protobp->program = NFS_ACL_PROGRAM; 566 protobp->next = NULL; 567 568 if (allflag) { 569 if (do_all(protobp0, nfssvc) == -1) { 570 fprintf(stderr, "setnetconfig failed : %s\n", 571 strerror(errno)); 572 exit(1); 573 } 574 } else if (proto) { 575 /* there's more than one match for the same protocol */ 576 struct netconfig *nconf; 577 NCONF_HANDLE *nc; 578 bool_t protoFound = FALSE; 579 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) { 580 fprintf(stderr, "setnetconfig failed : %s\n", 581 strerror(errno)); 582 goto done; 583 } 584 while (nconf = getnetconfig(nc)) { 585 if (strcmp(nconf->nc_proto, proto) == 0) { 586 protoFound = TRUE; 587 do_one(nconf->nc_device, NULL, 588 protobp0, nfssvc); 589 } 590 } 591 (void) endnetconfig(nc); 592 if (protoFound == FALSE) { 593 fprintf(stderr, 594 "couldn't find netconfig entry for protocol %s\n", 595 proto); 596 } 597 } else if (provider) 598 do_one(provider, proto, protobp0, nfssvc); 599 else { 600 for (providerp = defaultproviders; 601 *providerp != NULL; providerp++) { 602 provider = *providerp; 603 do_one(provider, NULL, protobp0, nfssvc); 604 } 605 } 606 done: 607 608 free(protobp); 609 free(protobp0); 610 611 if (num_fds == 0) { 612 fprintf(stderr, "Could not start NFS service for any protocol." 613 " Exiting.\n"); 614 exit(1); 615 } 616 617 end_listen_fds = num_fds; 618 619 /* 620 * nfsd is up and running as far as we are concerned. 621 */ 622 daemonize_fini(pipe_fd); 623 624 /* 625 * Get rid of unneeded privileges. 626 */ 627 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION, 628 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL); 629 630 /* 631 * Poll for non-data control events on the transport descriptors. 632 */ 633 poll_for_action(); 634 635 /* 636 * If we get here, something failed in poll_for_action(). 637 */ 638 return (1); 639 } 640 641 static int 642 nfssvcpool(int maxservers) 643 { 644 struct svcpool_args npa; 645 646 npa.id = NFS_SVCPOOL_ID; 647 npa.maxthreads = maxservers; 648 npa.redline = 0; 649 npa.qsize = 0; 650 npa.timeout = 0; 651 npa.stksize = 0; 652 npa.max_same_xprt = 0; 653 return (_nfssys(SVCPOOL_CREATE, &npa)); 654 } 655 656 /* 657 * Establish NFS service thread. 658 */ 659 static int 660 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf) 661 { 662 struct nfs_svc_args nsa; 663 664 nsa.fd = fd; 665 nsa.netid = nconf->nc_netid; 666 nsa.addrmask = addrmask; 667 if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) { 668 nsa.nfs_versmax = MIN(nfs_server_vers_max, NFS_VERS_3); 669 nsa.nfs_versmin = nfs_server_vers_min; 670 /* 671 * If no version left, silently do nothing, previous 672 * checks will have assured at least TCP is available. 673 */ 674 if (nsa.nfs_versmin > nsa.nfs_versmax) 675 return (0); 676 } else { 677 nsa.nfs_versmax = nfs_server_vers_max; 678 nsa.nfs_versmin = nfs_server_vers_min; 679 } 680 nsa.delegation = nfs_server_delegation; 681 return (_nfssys(NFS_SVC, &nsa)); 682 } 683 684 static void 685 usage(void) 686 { 687 (void) fprintf(stderr, 688 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName); 689 (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n"); 690 (void) fprintf(stderr, 691 "\twhere -a causes <nservers> to be started on each appropriate transport,\n"); 692 (void) fprintf(stderr, 693 "\tmax_conns is the maximum number of concurrent connections allowed,\n"); 694 (void) fprintf(stderr, "\t\tand max_conns must be a decimal number"); 695 (void) fprintf(stderr, "> zero,\n"); 696 (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n"); 697 (void) fprintf(stderr, 698 "\ttransport is a transport provider name (i.e. device),\n"); 699 (void) fprintf(stderr, 700 "\tlisten_backlog is the TCP listen backlog,\n"); 701 (void) fprintf(stderr, 702 "\tand <nservers> must be a decimal number > zero.\n"); 703 exit(1); 704 } 705 706 /* 707 * Issue nfssys system call to flush all logging buffers asynchronously. 708 * 709 * NOTICE: It is extremely important to flush NFS logging buffers when 710 * nfsd exits. When the system is halted or rebooted nfslogd 711 * may not have an opportunity to flush the buffers. 712 */ 713 static void 714 nfsl_flush() 715 { 716 struct nfsl_flush_args nfa; 717 718 memset((void *)&nfa, 0, sizeof (nfa)); 719 nfa.version = NFSL_FLUSH_ARGS_VERS; 720 nfa.directive = NFSL_ALL; /* flush all asynchronously */ 721 722 if (_nfssys(LOG_FLUSH, &nfa) < 0) 723 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n", 724 strerror(errno)); 725 } 726 727 /* 728 * SIGTERM handler. 729 * Flush logging buffers and exit. 730 */ 731 static void 732 sigflush(int sig) 733 { 734 nfsl_flush(); 735 _exit(0); 736 } 737 738 /* 739 * SIGUSR1 handler. 740 * 741 * Request that server quiesce, then (nfsd) exit. For subsequent warm start. 742 * 743 * This is a Contracted Project Private interface, for the sole use 744 * of Sun Cluster HA-NFS. See PSARC/2004/497. 745 * 746 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN. 747 */ 748 static void 749 quiesce(int sig) 750 { 751 int error; 752 int id = NFS_SVCPOOL_ID; 753 754 if (NFS_PROT_VERSION(nfs_server_vers_max) >= QUIESCE_VERSMIN) { 755 /* Request server quiesce at next shutdown */ 756 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id); 757 758 /* 759 * ENOENT is returned if there is no matching SVC pool 760 * for the id. Possibly because the pool is not yet setup. 761 * In this case, just exit as if no error. For all other errors, 762 * just return and allow caller to retry. 763 */ 764 if (error && errno != ENOENT) { 765 syslog(LOG_ERR, 766 "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s", 767 strerror(errno)); 768 return; 769 } 770 } 771 772 /* Flush logging buffers */ 773 nfsl_flush(); 774 775 _exit(0); 776 } 777 778 /* 779 * DSS: distributed stable storage. 780 * Create leaf directories as required, keeping an eye on path 781 * lengths. Calls exit(1) on failure. 782 * The pathnames passed in must already exist, and must be writeable by nfsd. 783 * Note: the leaf directories under NFS4_VAR_DIR are not created here; 784 * they're created at pkg install. 785 */ 786 static void 787 dss_mkleafdirs(uint_t npaths, char **pathnames) 788 { 789 int i; 790 char *tmppath = NULL; 791 792 /* 793 * Create the temporary storage used by dss_mkleafdir() here, 794 * rather than in that function, so that it only needs to be 795 * done once, rather than once for each call. Too big to put 796 * on the function's stack. 797 */ 798 tmppath = (char *)malloc(MAXPATHLEN); 799 if (tmppath == NULL) { 800 syslog(LOG_ERR, "tmppath malloc failed. Exiting"); 801 exit(1); 802 } 803 804 for (i = 0; i < npaths; i++) { 805 char *p = pathnames[i]; 806 807 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath); 808 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath); 809 } 810 811 free(tmppath); 812 } 813 814 /* 815 * Create "leaf" in "dir" (which must already exist). 816 * leaf: should start with a '/' 817 */ 818 static void 819 dss_mkleafdir(char *dir, char *leaf, char *tmppath) 820 { 821 /* MAXPATHLEN includes the terminating NUL */ 822 if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) { 823 fprintf(stderr, "stable storage path too long: %s%s. " 824 "Exiting.\n", dir, leaf); 825 exit(1); 826 } 827 828 (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf); 829 830 /* the directory may already exist: that's OK */ 831 if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) { 832 fprintf(stderr, "error creating stable storage directory: " 833 "%s: %s. Exiting.\n", strerror(errno), tmppath); 834 exit(1); 835 } 836 } 837 838 /* 839 * Create the storage dirs, and pass the path list to the kernel. 840 * This requires the nfssrv module to be loaded; the _nfssys() syscall 841 * will fail ENOTSUP if it is not. 842 * Use libnvpair(3LIB) to pass the data to the kernel. 843 */ 844 static int 845 dss_init(uint_t npaths, char **pathnames) 846 { 847 int i, j, nskipped, error; 848 char *bufp; 849 uint32_t bufsize; 850 size_t buflen; 851 nvlist_t *nvl; 852 853 if (npaths > 1) { 854 /* 855 * We need to remove duplicate paths; this might be user error 856 * in the general case, but HA-NFSv4 can also cause this. 857 * Sort the pathnames array, and NULL out duplicates, 858 * then write the non-NULL entries to a new array. 859 * Sorting will also allow the kernel to optimise its searches. 860 */ 861 862 qsort(pathnames, npaths, sizeof (char *), qstrcmp); 863 864 /* now NULL out any duplicates */ 865 i = 0; j = 1; nskipped = 0; 866 while (j < npaths) { 867 if (strcmp(pathnames[i], pathnames[j]) == 0) { 868 pathnames[j] = NULL; 869 j++; 870 nskipped++; 871 continue; 872 } 873 874 /* skip i over any of its NULLed duplicates */ 875 i = j++; 876 } 877 878 /* finally, write the non-NULL entries to a new array */ 879 if (nskipped > 0) { 880 int nreal; 881 size_t sz; 882 char **tmp_pathnames; 883 884 nreal = npaths - nskipped; 885 886 sz = nreal * sizeof (char *); 887 tmp_pathnames = (char **)malloc(sz); 888 if (tmp_pathnames == NULL) { 889 fprintf(stderr, "tmp_pathnames malloc " 890 "failed\n"); 891 exit(1); 892 } 893 894 for (i = 0, j = 0; i < npaths; i++) 895 if (pathnames[i] != NULL) 896 tmp_pathnames[j++] = pathnames[i]; 897 free(pathnames); 898 pathnames = tmp_pathnames; 899 npaths = nreal; 900 } 901 902 } 903 904 /* Create directories to store the distributed state files */ 905 dss_mkleafdirs(npaths, pathnames); 906 907 /* Create the name-value pair list */ 908 error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 909 if (error) { 910 fprintf(stderr, "nvlist_alloc failed: %s\n", strerror(errno)); 911 return (1); 912 } 913 914 /* Add the pathnames array as a single name-value pair */ 915 error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME, 916 pathnames, npaths); 917 if (error) { 918 fprintf(stderr, "nvlist_add_string_array failed: %s\n", 919 strerror(errno)); 920 nvlist_free(nvl); 921 return (1); 922 } 923 924 /* 925 * Pack list into contiguous memory, for passing to kernel. 926 * nvlist_pack() will allocate the memory for the buffer, 927 * which we should free() when no longer needed. 928 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary. 929 */ 930 bufp = NULL; 931 error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0); 932 if (error) { 933 fprintf(stderr, "nvlist_pack failed: %s\n", strerror(errno)); 934 nvlist_free(nvl); 935 return (1); 936 } 937 938 /* Now we have the packed buffer, we no longer need the list */ 939 nvlist_free(nvl); 940 941 /* 942 * Let the kernel know in advance how big the buffer is. 943 * NOTE: we cannot just pass buflen, since size_t is a long, and 944 * thus a different size between ILP32 userland and LP64 kernel. 945 * Use an int for the transfer, since that should be big enough; 946 * this is a no-op at the moment, here, since nfsd is 32-bit, but 947 * that could change. 948 */ 949 bufsize = (uint32_t)buflen; 950 error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize); 951 if (error) { 952 fprintf(stderr, 953 "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s\n", 954 strerror(errno)); 955 free(bufp); 956 return (1); 957 } 958 959 /* Pass the packed buffer to the kernel */ 960 error = _nfssys(NFS4_DSS_SETPATHS, bufp); 961 if (error) { 962 fprintf(stderr, 963 "_nfssys(NFS4_DSS_SETPATHS) failed: %s\n", strerror(errno)); 964 free(bufp); 965 return (1); 966 } 967 968 /* 969 * The kernel has now unpacked the buffer and extracted the 970 * pathnames array, we no longer need the buffer. 971 */ 972 free(bufp); 973 974 return (0); 975 } 976 977 /* 978 * Quick sort string compare routine, for qsort. 979 * Needed to make arg types correct. 980 */ 981 int 982 qstrcmp(const void *p1, const void *p2) 983 { 984 char *s1 = *((char **)p1); 985 char *s2 = *((char **)p2); 986 987 return (strcmp(s1, s2)); 988 } 989