1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 /* LINTLIBRARY */ 40 /* PROTOLIB1 */ 41 42 #pragma ident "%Z%%M% %I% %E% SMI" 43 44 /* NFS server */ 45 46 #include <sys/param.h> 47 #include <sys/types.h> 48 #include <sys/stat.h> 49 #include <syslog.h> 50 #include <tiuser.h> 51 #include <rpc/rpc.h> 52 #include <errno.h> 53 #include <thread.h> 54 #include <sys/resource.h> 55 #include <sys/time.h> 56 #include <sys/file.h> 57 #include <nfs/nfs.h> 58 #include <nfs/nfs_acl.h> 59 #include <nfs/nfssys.h> 60 #include <stdio.h> 61 #include <stdio_ext.h> 62 #include <stdlib.h> 63 #include <signal.h> 64 #include <netconfig.h> 65 #include <netdir.h> 66 #include <string.h> 67 #include <unistd.h> 68 #include <stropts.h> 69 #include <sys/tihdr.h> 70 #include <poll.h> 71 #include <priv_utils.h> 72 #include <sys/tiuser.h> 73 #include <netinet/tcp.h> 74 #include <deflt.h> 75 #include <rpcsvc/daemon_utils.h> 76 #include <rpcsvc/nfs4_prot.h> 77 #include <libnvpair.h> 78 #include "nfs_tbind.h" 79 #include "thrpool.h" 80 81 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */ 82 #define QUIESCE_VERSMIN 4 83 /* DSS: distributed stable storage */ 84 #define DSS_VERSMIN 4 85 86 static int nfssvc(int, struct netbuf, struct netconfig *); 87 static int nfssvcpool(int maxservers); 88 static int dss_init(uint_t npaths, char **pathnames); 89 static void dss_mkleafdirs(uint_t npaths, char **pathnames); 90 static void dss_mkleafdir(char *dir, char *leaf, char *path); 91 static void usage(void); 92 int qstrcmp(const void *s1, const void *s2); 93 94 extern int _nfssys(int, void *); 95 96 /* signal handlers */ 97 static void sigflush(int); 98 static void quiesce(int); 99 100 static char *MyName; 101 static NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp", 102 "/dev/udp6", NULL }; 103 /* static NETSELDECL(defaultprotos)[] = { NC_UDP, NC_TCP, NULL }; */ 104 /* 105 * The following are all globals used by routines in nfs_tbind.c. 106 */ 107 size_t end_listen_fds; /* used by conn_close_oldest() */ 108 size_t num_fds = 0; /* used by multiple routines */ 109 int listen_backlog = 32; /* used by bind_to_{provider,proto}() */ 110 int num_servers; /* used by cots_listen_event() */ 111 int (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc; 112 /* used by cots_listen_event() */ 113 int max_conns_allowed = -1; /* used by cots_listen_event() */ 114 115 /* 116 * Keep track of min/max versions of NFS protocol to be started. 117 * Start with the defaults (min == 2, max == 3). We have the 118 * capability of starting vers=4 but only if the user requests it. 119 */ 120 int nfs_server_vers_min = NFS_VERSMIN_DEFAULT; 121 int nfs_server_vers_max = NFS_VERSMAX_DEFAULT; 122 123 /* 124 * Set the default for server delegation enablement and set per 125 * /etc/default/nfs configuration (if present). 126 */ 127 int nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT; 128 129 int 130 main(int ac, char *av[]) 131 { 132 char *dir = "/"; 133 int allflag = 0; 134 int df_allflag = 0; 135 int opt_cnt = 0; 136 int maxservers = 1; /* zero allows inifinte number of threads */ 137 int maxservers_set = 0; 138 int logmaxservers = 0; 139 int pid; 140 int i; 141 char *provider = (char *)NULL; 142 char *df_provider = (char *)NULL; 143 struct protob *protobp0, *protobp; 144 NETSELDECL(proto) = NULL; 145 NETSELDECL(df_proto) = NULL; 146 NETSELPDECL(providerp); 147 char *defval; 148 boolean_t can_do_mlp; 149 uint_t dss_npaths = 0; 150 char **dss_pathnames = NULL; 151 152 MyName = *av; 153 154 /* 155 * Initializations that require more privileges than we need to run. 156 */ 157 (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID); 158 svcsetprio(); 159 160 can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP); 161 if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 162 DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS, 163 can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) { 164 (void) fprintf(stderr, "%s should be run with" 165 " sufficient privileges\n", av[0]); 166 exit(1); 167 } 168 169 (void) enable_extended_FILE_stdio(-1, -1); 170 171 /* 172 * Read in the values from config file first before we check 173 * commandline options so the options override the file. 174 */ 175 if ((defopen(NFSADMIN)) == 0) { 176 if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) { 177 errno = 0; 178 max_conns_allowed = strtol(defval, (char **)NULL, 10); 179 if (errno != 0) { 180 max_conns_allowed = -1; 181 } 182 } 183 if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) { 184 errno = 0; 185 listen_backlog = strtol(defval, (char **)NULL, 10); 186 if (errno != 0) { 187 listen_backlog = 32; 188 } 189 } 190 if ((defval = defread("NFSD_PROTOCOL=")) != NULL) { 191 df_proto = strdup(defval); 192 opt_cnt++; 193 if (strncasecmp("ALL", defval, 3) == 0) { 194 free(df_proto); 195 df_proto = NULL; 196 df_allflag = 1; 197 } 198 } 199 if ((defval = defread("NFSD_DEVICE=")) != NULL) { 200 df_provider = strdup(defval); 201 opt_cnt++; 202 } 203 if ((defval = defread("NFSD_SERVERS=")) != NULL) { 204 errno = 0; 205 maxservers = strtol(defval, (char **)NULL, 10); 206 if (errno != 0) { 207 maxservers = 1; 208 } else { 209 maxservers_set = 1; 210 } 211 } 212 if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) { 213 errno = 0; 214 nfs_server_vers_min = 215 strtol(defval, (char **)NULL, 10); 216 if (errno != 0) { 217 nfs_server_vers_min = NFS_VERSMIN_DEFAULT; 218 } 219 } 220 if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) { 221 errno = 0; 222 nfs_server_vers_max = 223 strtol(defval, (char **)NULL, 10); 224 if (errno != 0) { 225 nfs_server_vers_max = NFS_VERSMAX_DEFAULT; 226 } 227 } 228 if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) { 229 if (strcmp(defval, "off") == 0) { 230 nfs_server_delegation = FALSE; 231 } 232 } 233 234 /* close defaults file */ 235 defopen(NULL); 236 } 237 238 /* 239 * Conflict options error messages. 240 */ 241 if (opt_cnt > 1) { 242 (void) fprintf(stderr, "\nConflicting options, only one of " 243 "the following options can be specified\n" 244 "in " NFSADMIN ":\n" 245 "\tNFSD_PROTOCOL=ALL\n" 246 "\tNFSD_PROTOCOL=protocol\n" 247 "\tNFSD_DEVICE=device\n\n"); 248 usage(); 249 } 250 opt_cnt = 0; 251 252 while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) { 253 switch (i) { 254 case 'a': 255 free(df_proto); 256 df_proto = NULL; 257 free(df_provider); 258 df_provider = NULL; 259 260 allflag = 1; 261 opt_cnt++; 262 break; 263 264 case 'c': 265 max_conns_allowed = atoi(optarg); 266 break; 267 268 case 'p': 269 proto = optarg; 270 df_allflag = 0; 271 opt_cnt++; 272 break; 273 274 /* 275 * DSS: NFSv4 distributed stable storage. 276 * 277 * This is a Contracted Project Private interface, for 278 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313. 279 */ 280 case 's': 281 if (strlen(optarg) < MAXPATHLEN) { 282 /* first "-s" option encountered? */ 283 if (dss_pathnames == NULL) { 284 /* 285 * Allocate maximum possible space 286 * required given cmdline arg count; 287 * "-s <path>" consumes two args. 288 */ 289 size_t sz = (ac / 2) * sizeof (char *); 290 dss_pathnames = (char **)malloc(sz); 291 if (dss_pathnames == NULL) { 292 (void) fprintf(stderr, "%s: " 293 "dss paths malloc failed\n", 294 av[0]); 295 exit(1); 296 } 297 (void) memset(dss_pathnames, 0, sz); 298 } 299 dss_pathnames[dss_npaths] = optarg; 300 dss_npaths++; 301 } else { 302 (void) fprintf(stderr, 303 "%s: -s pathname too long.\n", av[0]); 304 } 305 break; 306 307 case 't': 308 provider = optarg; 309 df_allflag = 0; 310 opt_cnt++; 311 break; 312 313 case 'l': 314 listen_backlog = atoi(optarg); 315 break; 316 317 case '?': 318 usage(); 319 /* NOTREACHED */ 320 } 321 } 322 323 allflag = df_allflag; 324 if (proto == NULL) 325 proto = df_proto; 326 if (provider == NULL) 327 provider = df_provider; 328 329 /* 330 * Conflict options error messages. 331 */ 332 if (opt_cnt > 1) { 333 (void) fprintf(stderr, "\nConflicting options, only one of " 334 "the following options can be specified\n" 335 "on the command line:\n" 336 "\t-a\n" 337 "\t-p protocol\n" 338 "\t-t transport\n\n"); 339 usage(); 340 } 341 342 if (proto != NULL && 343 strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) { 344 if (nfs_server_vers_max == NFS_V4) { 345 if (nfs_server_vers_min == NFS_V4) { 346 syslog(LOG_ERR, 347 "NFS version 4 is not supported " 348 "with the UDP protocol. Exiting\n"); 349 fprintf(stderr, 350 "NFS version 4 is not supported " 351 "with the UDP protocol. Exiting\n"); 352 exit(3); 353 } else { 354 fprintf(stderr, 355 "NFS version 4 is not supported " 356 "with the UDP protocol.\n"); 357 } 358 } 359 } 360 361 /* 362 * If there is exactly one more argument, it is the number of 363 * servers. 364 */ 365 if (optind == ac - 1) { 366 maxservers = atoi(av[optind]); 367 maxservers_set = 1; 368 } 369 /* 370 * If there are two or more arguments, then this is a usage error. 371 */ 372 else if (optind < ac - 1) 373 usage(); 374 /* 375 * Check the ranges for min/max version specified 376 */ 377 else if ((nfs_server_vers_min > nfs_server_vers_max) || 378 (nfs_server_vers_min < NFS_VERSMIN) || 379 (nfs_server_vers_max > NFS_VERSMAX)) 380 usage(); 381 /* 382 * There are no additional arguments, and we haven't set maxservers 383 * explicitly via the config file, we use a default number of 384 * servers. We will log this. 385 */ 386 else if (maxservers_set == 0) 387 logmaxservers = 1; 388 389 /* 390 * Basic Sanity checks on options 391 * 392 * max_conns_allowed must be positive, except for the special 393 * value of -1 which is used internally to mean unlimited, -1 isn't 394 * documented but we allow it anyway. 395 * 396 * maxservers must be positive 397 * listen_backlog must be positive or zero 398 */ 399 if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) || 400 (listen_backlog < 0) || (maxservers <= 0)) { 401 usage(); 402 } 403 404 /* 405 * Set current dir to server root 406 */ 407 if (chdir(dir) < 0) { 408 (void) fprintf(stderr, "%s: ", MyName); 409 perror(dir); 410 exit(1); 411 } 412 413 #ifndef DEBUG 414 /* 415 * Background 416 */ 417 pid = fork(); 418 if (pid < 0) { 419 perror("nfsd: fork"); 420 exit(1); 421 } 422 if (pid != 0) 423 exit(0); 424 425 /* 426 * Close existing file descriptors, open "/dev/null" as 427 * standard input, output, and error, and detach from 428 * controlling terminal. 429 */ 430 closefrom(0); 431 (void) open("/dev/null", O_RDONLY); 432 (void) open("/dev/null", O_WRONLY); 433 (void) dup(1); 434 (void) setsid(); 435 #endif 436 openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON); 437 438 /* 439 * establish our lock on the lock file and write our pid to it. 440 * exit if some other process holds the lock, or if there's any 441 * error in writing/locking the file. 442 */ 443 pid = _enter_daemon_lock(NFSD); 444 switch (pid) { 445 case 0: 446 break; 447 case -1: 448 syslog(LOG_ERR, "error locking for %s: %s", NFSD, 449 strerror(errno)); 450 exit(2); 451 default: 452 /* daemon was already running */ 453 exit(0); 454 } 455 456 /* 457 * If we've been given a list of paths to be used for distributed 458 * stable storage, and provided we're going to run a version 459 * that supports it, setup the DSS paths. 460 */ 461 if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) { 462 if (dss_init(dss_npaths, dss_pathnames) != 0) { 463 syslog(LOG_ERR, "dss_init failed. Exiting."); 464 exit(1); 465 } 466 } 467 468 sigset(SIGTERM, sigflush); 469 sigset(SIGUSR1, quiesce); 470 471 if (logmaxservers) { 472 (void) syslog(LOG_INFO, 473 "Number of servers not specified. Using default of %d.", 474 maxservers); 475 } 476 477 /* 478 * Make sure to unregister any previous versions in case the 479 * user is reconfiguring the server in interesting ways. 480 */ 481 svc_unreg(NFS_PROGRAM, NFS_VERSION); 482 svc_unreg(NFS_PROGRAM, NFS_V3); 483 svc_unreg(NFS_PROGRAM, NFS_V4); 484 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2); 485 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3); 486 487 /* 488 * Set up kernel RPC thread pool for the NFS server. 489 */ 490 if (nfssvcpool(maxservers)) { 491 (void) syslog(LOG_ERR, 492 "Can't set up kernel NFS service: %m. Exiting"); 493 exit(1); 494 } 495 496 497 /* 498 * Set up blocked thread to do LWP creation on behalf of the kernel. 499 */ 500 if (svcwait(NFS_SVCPOOL_ID)) { 501 (void) syslog(LOG_ERR, 502 "Can't set up NFS pool creator: %m, Exiting"); 503 exit(1); 504 } 505 506 /* 507 * RDMA start and stop thread. 508 * Per pool RDMA listener creation and 509 * destructor thread. 510 * 511 * start rdma services and block in the kernel. 512 */ 513 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min, nfs_server_vers_max, 514 nfs_server_delegation)) { 515 (void) syslog(LOG_ERR, 516 "Can't set up RDMA creator thread : %m."); 517 } 518 519 /* 520 * Build a protocol block list for registration. 521 */ 522 protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob)); 523 protobp->serv = "NFS"; 524 protobp->versmin = nfs_server_vers_min; 525 protobp->versmax = nfs_server_vers_max; 526 protobp->program = NFS_PROGRAM; 527 528 protobp->next = (struct protob *)malloc(sizeof (struct protob)); 529 protobp = protobp->next; 530 protobp->serv = "NFS_ACL"; /* not used */ 531 protobp->versmin = nfs_server_vers_min; 532 /* XXX - this needs work to get the version just right */ 533 protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ? 534 NFS_ACL_V3 : nfs_server_vers_max; 535 protobp->program = NFS_ACL_PROGRAM; 536 protobp->next = (struct protob *)NULL; 537 538 if (allflag) { 539 if (do_all(protobp0, nfssvc, 0) == -1) 540 exit(1); 541 } else if (proto) { 542 /* there's more than one match for the same protocol */ 543 struct netconfig *nconf; 544 NCONF_HANDLE *nc; 545 bool_t protoFound = FALSE; 546 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) { 547 syslog(LOG_ERR, "setnetconfig failed: %m"); 548 goto done; 549 } 550 while (nconf = getnetconfig(nc)) { 551 if (strcmp(nconf->nc_proto, proto) == 0) { 552 protoFound = TRUE; 553 do_one(nconf->nc_device, NULL, 554 protobp0, nfssvc, 0); 555 } 556 } 557 (void) endnetconfig(nc); 558 if (protoFound == FALSE) 559 syslog(LOG_ERR, "couldn't find netconfig entry \ 560 for protocol %s", proto); 561 562 } else if (provider) 563 do_one(provider, proto, protobp0, nfssvc, 0); 564 else { 565 for (providerp = defaultproviders; 566 *providerp != NULL; providerp++) { 567 provider = *providerp; 568 do_one(provider, NULL, protobp0, nfssvc, 0); 569 } 570 } 571 done: 572 573 free(protobp); 574 free(protobp0); 575 576 if (num_fds == 0) { 577 (void) syslog(LOG_ERR, 578 "Could not start NFS service for any protocol. Exiting"); 579 exit(1); 580 } 581 582 end_listen_fds = num_fds; 583 584 /* 585 * Get rid of unneeded privileges. 586 */ 587 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION, 588 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL); 589 590 /* 591 * Poll for non-data control events on the transport descriptors. 592 */ 593 poll_for_action(); 594 595 /* 596 * If we get here, something failed in poll_for_action(). 597 */ 598 return (1); 599 } 600 601 static int 602 nfssvcpool(int maxservers) 603 { 604 struct svcpool_args npa; 605 606 npa.id = NFS_SVCPOOL_ID; 607 npa.maxthreads = maxservers; 608 npa.redline = 0; 609 npa.qsize = 0; 610 npa.timeout = 0; 611 npa.stksize = 0; 612 npa.max_same_xprt = 0; 613 return (_nfssys(SVCPOOL_CREATE, &npa)); 614 } 615 616 /* 617 * Establish NFS service thread. 618 */ 619 static int 620 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf) 621 { 622 struct nfs_svc_args nsa; 623 624 nsa.fd = fd; 625 nsa.netid = nconf->nc_netid; 626 nsa.addrmask = addrmask; 627 if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) { 628 nsa.versmax = (nfs_server_vers_max > NFS_V3) ? 629 NFS_V3 : nfs_server_vers_max; 630 nsa.versmin = nfs_server_vers_min; 631 /* 632 * If no version left, silently do nothing, previous 633 * checks will have assured at least TCP is available. 634 */ 635 if (nsa.versmin > nsa.versmax) 636 return (0); 637 } else { 638 nsa.versmax = nfs_server_vers_max; 639 nsa.versmin = nfs_server_vers_min; 640 } 641 nsa.delegation = nfs_server_delegation; 642 return (_nfssys(NFS_SVC, &nsa)); 643 } 644 645 static void 646 usage(void) 647 { 648 (void) fprintf(stderr, 649 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName); 650 (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n"); 651 (void) fprintf(stderr, 652 "\twhere -a causes <nservers> to be started on each appropriate transport,\n"); 653 (void) fprintf(stderr, 654 "\tmax_conns is the maximum number of concurrent connections allowed,\n"); 655 (void) fprintf(stderr, "\t\tand max_conns must be a decimal number"); 656 (void) fprintf(stderr, "> zero,\n"); 657 (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n"); 658 (void) fprintf(stderr, 659 "\ttransport is a transport provider name (i.e. device),\n"); 660 (void) fprintf(stderr, 661 "\tlisten_backlog is the TCP listen backlog,\n"); 662 (void) fprintf(stderr, 663 "\tand <nservers> must be a decimal number > zero.\n"); 664 exit(1); 665 } 666 667 /* 668 * Issue nfssys system call to flush all logging buffers asynchronously. 669 * 670 * NOTICE: It is extremely important to flush NFS logging buffers when 671 * nfsd exits. When the system is halted or rebooted nfslogd 672 * may not have an opportunity to flush the buffers. 673 */ 674 static void 675 nfsl_flush() 676 { 677 struct nfsl_flush_args nfa; 678 679 memset((void *)&nfa, 0, sizeof (nfa)); 680 nfa.version = NFSL_FLUSH_ARGS_VERS; 681 nfa.directive = NFSL_ALL; /* flush all asynchronously */ 682 683 if (_nfssys(LOG_FLUSH, &nfa) < 0) 684 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n", 685 strerror(errno)); 686 } 687 688 /* 689 * SIGTERM handler. 690 * Flush logging buffers and exit. 691 */ 692 static void 693 sigflush(int sig) 694 { 695 nfsl_flush(); 696 exit(0); 697 } 698 699 /* 700 * SIGUSR1 handler. 701 * 702 * Request that server quiesce, then (nfsd) exit. For subsequent warm start. 703 * 704 * This is a Contracted Project Private interface, for the sole use 705 * of Sun Cluster HA-NFS. See PSARC/2004/497. 706 * 707 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN. 708 */ 709 static void 710 quiesce(int sig) 711 { 712 int error; 713 int id = NFS_SVCPOOL_ID; 714 715 if (nfs_server_vers_max >= QUIESCE_VERSMIN) { 716 /* Request server quiesce at next shutdown */ 717 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id); 718 if (error) { 719 syslog(LOG_ERR, 720 "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s", 721 strerror(errno)); 722 return; 723 } 724 } 725 726 /* Flush logging buffers */ 727 nfsl_flush(); 728 729 exit(0); 730 } 731 732 /* 733 * DSS: distributed stable storage. 734 * Create leaf directories as required, keeping an eye on path 735 * lengths. Calls exit(1) on failure. 736 * The pathnames passed in must already exist, and must be writeable by nfsd. 737 * Note: the leaf directories under NFS4_VAR_DIR are not created here; 738 * they're created at pkg install. 739 */ 740 static void 741 dss_mkleafdirs(uint_t npaths, char **pathnames) 742 { 743 int i; 744 char *tmppath = NULL; 745 746 /* 747 * Create the temporary storage used by dss_mkleafdir() here, 748 * rather than in that function, so that it only needs to be 749 * done once, rather than once for each call. Too big to put 750 * on the function's stack. 751 */ 752 tmppath = (char *)malloc(MAXPATHLEN); 753 if (tmppath == NULL) { 754 syslog(LOG_ERR, "tmppath malloc failed. Exiting"); 755 exit(1); 756 } 757 758 for (i = 0; i < npaths; i++) { 759 char *p = pathnames[i]; 760 761 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath); 762 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath); 763 } 764 765 free(tmppath); 766 } 767 768 /* 769 * Create "leaf" in "dir" (which must already exist). 770 * leaf: should start with a '/' 771 */ 772 static void 773 dss_mkleafdir(char *dir, char *leaf, char *tmppath) 774 { 775 /* MAXPATHLEN includes the terminating NUL */ 776 if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) { 777 syslog(LOG_ERR, "stable storage path too long: %s%s. Exiting", 778 dir, leaf); 779 exit(1); 780 } 781 782 (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf); 783 784 /* the directory may already exist: that's OK */ 785 if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) { 786 syslog(LOG_ERR, "error creating stable storage directory: " 787 "%s: %s. Exiting", strerror(errno), tmppath); 788 exit(1); 789 } 790 } 791 792 /* 793 * Create the storage dirs, and pass the path list to the kernel. 794 * This requires the nfssrv module to be loaded; the _nfssys() syscall 795 * will fail ENOTSUP if it is not. 796 * Use libnvpair(3LIB) to pass the data to the kernel. 797 */ 798 static int 799 dss_init(uint_t npaths, char **pathnames) 800 { 801 int i, j, nskipped, error; 802 char *bufp; 803 uint32_t bufsize; 804 size_t buflen; 805 nvlist_t *nvl; 806 807 if (npaths > 1) { 808 /* 809 * We need to remove duplicate paths; this might be user error 810 * in the general case, but HA-NFSv4 can also cause this. 811 * Sort the pathnames array, and NULL out duplicates, 812 * then write the non-NULL entries to a new array. 813 * Sorting will also allow the kernel to optimise its searches. 814 */ 815 816 qsort(pathnames, npaths, sizeof (char *), qstrcmp); 817 818 /* now NULL out any duplicates */ 819 i = 0; j = 1; nskipped = 0; 820 while (j < npaths) { 821 if (strcmp(pathnames[i], pathnames[j]) == NULL) { 822 pathnames[j] = NULL; 823 j++; 824 nskipped++; 825 continue; 826 } 827 828 /* skip i over any of its NULLed duplicates */ 829 i = j++; 830 } 831 832 /* finally, write the non-NULL entries to a new array */ 833 if (nskipped > 0) { 834 int nreal; 835 size_t sz; 836 char **tmp_pathnames; 837 838 nreal = npaths - nskipped; 839 840 sz = nreal * sizeof (char *); 841 tmp_pathnames = (char **)malloc(sz); 842 if (tmp_pathnames == NULL) { 843 syslog(LOG_ERR, "tmp_pathnames malloc failed"); 844 exit(1); 845 } 846 847 for (i = 0, j = 0; i < npaths; i++) 848 if (pathnames[i] != NULL) 849 tmp_pathnames[j++] = pathnames[i]; 850 free(pathnames); 851 pathnames = tmp_pathnames; 852 npaths = nreal; 853 } 854 855 } 856 857 /* Create directories to store the distributed state files */ 858 dss_mkleafdirs(npaths, pathnames); 859 860 /* Create the name-value pair list */ 861 error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 862 if (error) { 863 syslog(LOG_ERR, "nvlist_alloc failed: %s.", strerror(errno)); 864 return (1); 865 } 866 867 /* Add the pathnames array as a single name-value pair */ 868 error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME, 869 pathnames, npaths); 870 if (error) { 871 syslog(LOG_ERR, "nvlist_add_string_array failed: %s.", 872 strerror(errno)); 873 nvlist_free(nvl); 874 return (1); 875 } 876 877 /* 878 * Pack list into contiguous memory, for passing to kernel. 879 * nvlist_pack() will allocate the memory for the buffer, 880 * which we should free() when no longer needed. 881 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary. 882 */ 883 bufp = NULL; 884 error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0); 885 if (error) { 886 syslog(LOG_ERR, "nvlist_pack failed: %s.", strerror(errno)); 887 nvlist_free(nvl); 888 return (1); 889 } 890 891 /* Now we have the packed buffer, we no longer need the list */ 892 nvlist_free(nvl); 893 894 /* 895 * Let the kernel know in advance how big the buffer is. 896 * NOTE: we cannot just pass buflen, since size_t is a long, and 897 * thus a different size between ILP32 userland and LP64 kernel. 898 * Use an int for the transfer, since that should be big enough; 899 * this is a no-op at the moment, here, since nfsd is 32-bit, but 900 * that could change. 901 */ 902 bufsize = (uint32_t)buflen; 903 error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize); 904 if (error) { 905 syslog(LOG_ERR, 906 "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ", 907 strerror(errno)); 908 free(bufp); 909 return (1); 910 } 911 912 /* Pass the packed buffer to the kernel */ 913 error = _nfssys(NFS4_DSS_SETPATHS, bufp); 914 if (error) { 915 syslog(LOG_ERR, 916 "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno)); 917 free(bufp); 918 return (1); 919 } 920 921 /* 922 * The kernel has now unpacked the buffer and extracted the 923 * pathnames array, we no longer need the buffer. 924 */ 925 free(bufp); 926 927 return (0); 928 } 929 930 /* 931 * Quick sort string compare routine, for qsort. 932 * Needed to make arg types correct. 933 */ 934 int 935 qstrcmp(const void *p1, const void *p2) 936 { 937 char *s1 = *((char **)p1); 938 char *s2 = *((char **)p2); 939 940 return (strcmp(s1, s2)); 941 } 942