1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * nfs_cast.c : broadcast to a specific group of NFS servers 24 * 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <stdio.h> 32 #include <syslog.h> 33 #include <errno.h> 34 #include <string.h> 35 #include <sys/types.h> 36 #include <sys/time.h> 37 #include <sys/resource.h> 38 #include <unistd.h> 39 #include <stdlib.h> 40 #include <rpc/rpc.h> 41 #include <rpc/clnt_soc.h> 42 #include <rpc/nettype.h> 43 #include <rpc/pmap_prot.h> 44 #include <netconfig.h> 45 #include <netdir.h> 46 #include <nfs/nfs.h> 47 #define NFSCLIENT 48 #include <locale.h> 49 #include "automount.h" 50 51 #define PENALTY_WEIGHT 100000 52 53 struct tstamps { 54 struct tstamps *ts_next; 55 int ts_penalty; 56 int ts_inx; 57 int ts_rcvd; 58 struct timeval ts_timeval; 59 }; 60 61 /* A list of addresses - all belonging to the same transport */ 62 63 struct addrs { 64 struct addrs *addr_next; 65 struct mapfs *addr_mfs; 66 struct nd_addrlist *addr_addrs; 67 struct tstamps *addr_if_tstamps; 68 }; 69 70 /* A list of connectionless transports */ 71 72 struct transp { 73 struct transp *tr_next; 74 int tr_fd; 75 char *tr_device; 76 struct t_bind *tr_taddr; 77 struct addrs *tr_addrs; 78 }; 79 80 /* A list of map entries and their roundtrip times, for sorting */ 81 82 struct sm { 83 struct mapfs *mfs; 84 struct timeval timeval; 85 }; 86 87 static void free_transports(struct transp *); 88 static void calc_resp_time(struct timeval *); 89 static struct mapfs *sort_responses(struct transp *); 90 static int host_sm(const void *, const void *b); 91 static int time_sm(const void *, const void *b); 92 extern struct mapfs *add_mfs(struct mapfs *, int, struct mapfs **, 93 struct mapfs **); 94 95 /* 96 * This routine is designed to be able to "ping" 97 * a list of hosts and create a list of responding 98 * hosts sorted by response time. 99 * This must be done without any prior 100 * contact with the host - therefore the "ping" 101 * must be to a "well-known" address. The outstanding 102 * candidate here is the address of "rpcbind". 103 * 104 * A response to a ping is no guarantee that the host 105 * is running NFS, has a mount daemon, or exports 106 * the required filesystem. If the subsequent 107 * mount attempt fails then the host will be marked 108 * "ignore" and the host list will be re-pinged 109 * (sans the bad host). This process continues 110 * until a successful mount is achieved or until 111 * there are no hosts left to try. 112 */ 113 enum clnt_stat 114 nfs_cast(struct mapfs *mfs_in, struct mapfs **mfs_out, int timeout) 115 { 116 enum clnt_stat stat; 117 AUTH *sys_auth = authsys_create_default(); 118 XDR xdr_stream; 119 register XDR *xdrs = &xdr_stream; 120 int outlen; 121 int if_inx; 122 int tsec; 123 int flag; 124 int sent, addr_cnt, rcvd, if_cnt; 125 fd_set readfds, mask; 126 register ulong_t xid; /* xid - unique per addr */ 127 register int i; 128 struct rpc_msg msg; 129 struct timeval t, rcv_timeout; 130 char outbuf[UDPMSGSIZE], inbuf[UDPMSGSIZE]; 131 struct t_unitdata t_udata, t_rdata; 132 struct nd_hostserv hs; 133 struct nd_addrlist *retaddrs; 134 struct transp *tr_head; 135 struct transp *trans, *prev_trans; 136 struct addrs *a, *prev_addr; 137 struct tstamps *ts, *prev_ts; 138 NCONF_HANDLE *nc = NULL; 139 struct netconfig *nconf; 140 struct rlimit rl; 141 int dtbsize; 142 struct mapfs *mfs; 143 144 /* 145 * For each connectionless transport get a list of 146 * host addresses. Any single host may have 147 * addresses on several transports. 148 */ 149 addr_cnt = sent = rcvd = 0; 150 tr_head = NULL; 151 FD_ZERO(&mask); 152 153 /* 154 * Set the default select size to be the maximum FD_SETSIZE, unless 155 * the current rlimit is lower. 156 */ 157 dtbsize = FD_SETSIZE; 158 if (getrlimit(RLIMIT_NOFILE, &rl) == 0) { 159 if (rl.rlim_cur < FD_SETSIZE) 160 dtbsize = rl.rlim_cur; 161 } 162 163 prev_trans = NULL; 164 prev_addr = NULL; 165 prev_ts = NULL; 166 for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) { 167 168 if (trace > 2) 169 trace_prt(1, "nfs_cast: host=%s\n", mfs->mfs_host); 170 171 nc = setnetconfig(); 172 if (nc == NULL) { 173 stat = RPC_CANTSEND; 174 goto done_broad; 175 } 176 while (nconf = getnetconfig(nc)) { 177 if (!(nconf->nc_flag & NC_VISIBLE) || 178 nconf->nc_semantics != NC_TPI_CLTS || 179 (strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0)) 180 continue; 181 trans = (struct transp *)malloc(sizeof (*trans)); 182 if (trans == NULL) { 183 syslog(LOG_ERR, "no memory"); 184 stat = RPC_CANTSEND; 185 goto done_broad; 186 } 187 (void) memset(trans, 0, sizeof (*trans)); 188 if (tr_head == NULL) 189 tr_head = trans; 190 else 191 prev_trans->tr_next = trans; 192 prev_trans = trans; 193 194 trans->tr_fd = t_open(nconf->nc_device, O_RDWR, NULL); 195 if (trans->tr_fd < 0) { 196 syslog(LOG_ERR, "nfscast: t_open: %s:%m", 197 nconf->nc_device); 198 stat = RPC_CANTSEND; 199 goto done_broad; 200 } 201 if (t_bind(trans->tr_fd, (struct t_bind *)NULL, 202 (struct t_bind *)NULL) < 0) { 203 syslog(LOG_ERR, "nfscast: t_bind: %m"); 204 stat = RPC_CANTSEND; 205 goto done_broad; 206 } 207 trans->tr_taddr = 208 /* LINTED pointer alignment */ 209 (struct t_bind *)t_alloc(trans->tr_fd, T_BIND, T_ADDR); 210 if (trans->tr_taddr == (struct t_bind *)NULL) { 211 syslog(LOG_ERR, "nfscast: t_alloc: %m"); 212 stat = RPC_SYSTEMERROR; 213 goto done_broad; 214 } 215 216 trans->tr_device = nconf->nc_device; 217 FD_SET(trans->tr_fd, &mask); 218 219 if_inx = 0; 220 hs.h_host = mfs->mfs_host; 221 hs.h_serv = "rpcbind"; 222 if (netdir_getbyname(nconf, &hs, &retaddrs) == ND_OK) { 223 224 /* 225 * If mfs->ignore is previously set for 226 * this map, clear it. Because a host can 227 * have either v6 or v4 address 228 */ 229 if (mfs->mfs_ignore == 1) 230 mfs->mfs_ignore = 0; 231 232 a = (struct addrs *)malloc(sizeof (*a)); 233 if (a == NULL) { 234 syslog(LOG_ERR, "no memory"); 235 stat = RPC_CANTSEND; 236 goto done_broad; 237 } 238 (void) memset(a, 0, sizeof (*a)); 239 if (trans->tr_addrs == NULL) 240 trans->tr_addrs = a; 241 else 242 prev_addr->addr_next = a; 243 prev_addr = a; 244 a->addr_if_tstamps = NULL; 245 a->addr_mfs = mfs; 246 a->addr_addrs = retaddrs; 247 if_cnt = retaddrs->n_cnt; 248 while (if_cnt--) { 249 ts = (struct tstamps *) 250 malloc(sizeof (*ts)); 251 if (ts == NULL) { 252 syslog(LOG_ERR, "no memory"); 253 stat = RPC_CANTSEND; 254 goto done_broad; 255 } 256 (void) memset(ts, 0, sizeof (*ts)); 257 ts->ts_penalty = mfs->mfs_penalty; 258 if (a->addr_if_tstamps == NULL) 259 a->addr_if_tstamps = ts; 260 else 261 prev_ts->ts_next = ts; 262 prev_ts = ts; 263 ts->ts_inx = if_inx++; 264 addr_cnt++; 265 } 266 break; 267 } else { 268 mfs->mfs_ignore = 1; 269 if (verbose) 270 syslog(LOG_ERR, 271 "%s:%s address not known", 272 mfs->mfs_host, 273 strcmp(nconf->nc_proto, NC_INET)?"IPv6":"IPv4"); 274 } 275 } /* while */ 276 277 endnetconfig(nc); 278 nc = NULL; 279 } /* for */ 280 if (addr_cnt == 0) { 281 syslog(LOG_ERR, "nfscast: couldn't find addresses"); 282 stat = RPC_CANTSEND; 283 goto done_broad; 284 } 285 286 (void) gettimeofday(&t, (struct timezone *)0); 287 xid = (getpid() ^ t.tv_sec ^ t.tv_usec) & ~0xFF; 288 t.tv_usec = 0; 289 290 /* serialize the RPC header */ 291 292 msg.rm_direction = CALL; 293 msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 294 msg.rm_call.cb_prog = RPCBPROG; 295 /* 296 * we can not use RPCBVERS here since it doesn't exist in 4.X, 297 * the fix to bug 1139883 has made the 4.X portmapper silent to 298 * version mismatches. This causes the RPC call to the remote 299 * portmapper to simply be ignored if it's not Version 2. 300 */ 301 msg.rm_call.cb_vers = PMAPVERS; 302 msg.rm_call.cb_proc = NULLPROC; 303 if (sys_auth == (AUTH *)NULL) { 304 stat = RPC_SYSTEMERROR; 305 goto done_broad; 306 } 307 msg.rm_call.cb_cred = sys_auth->ah_cred; 308 msg.rm_call.cb_verf = sys_auth->ah_verf; 309 xdrmem_create(xdrs, outbuf, sizeof (outbuf), XDR_ENCODE); 310 if (! xdr_callmsg(xdrs, &msg)) { 311 stat = RPC_CANTENCODEARGS; 312 goto done_broad; 313 } 314 outlen = (int)xdr_getpos(xdrs); 315 xdr_destroy(xdrs); 316 317 t_udata.opt.len = 0; 318 t_udata.udata.buf = outbuf; 319 t_udata.udata.len = outlen; 320 321 /* 322 * Basic loop: send packet to all hosts and wait for response(s). 323 * The response timeout grows larger per iteration. 324 * A unique xid is assigned to each address in order to 325 * correctly match the replies. 326 */ 327 for (tsec = 4; timeout > 0; tsec *= 2) { 328 329 timeout -= tsec; 330 if (timeout <= 0) 331 tsec += timeout; 332 333 rcv_timeout.tv_sec = tsec; 334 rcv_timeout.tv_usec = 0; 335 336 sent = 0; 337 for (trans = tr_head; trans; trans = trans->tr_next) { 338 for (a = trans->tr_addrs; a; a = a->addr_next) { 339 struct netbuf *if_netbuf = 340 a->addr_addrs->n_addrs; 341 ts = a->addr_if_tstamps; 342 if_cnt = a->addr_addrs->n_cnt; 343 while (if_cnt--) { 344 345 /* 346 * xid is the first thing in 347 * preserialized buffer 348 */ 349 /* LINTED pointer alignment */ 350 *((ulong_t *)outbuf) = 351 htonl(xid + ts->ts_inx); 352 (void) gettimeofday(&(ts->ts_timeval), 353 (struct timezone *)0); 354 /* 355 * Check if already received 356 * from a previous iteration. 357 */ 358 if (ts->ts_rcvd) { 359 sent++; 360 ts = ts->ts_next; 361 continue; 362 } 363 364 t_udata.addr = *if_netbuf++; 365 366 if (t_sndudata(trans->tr_fd, 367 &t_udata) == 0) { 368 sent++; 369 } 370 371 ts = ts->ts_next; 372 } 373 } 374 } 375 if (sent == 0) { /* no packets sent ? */ 376 stat = RPC_CANTSEND; 377 goto done_broad; 378 } 379 380 /* 381 * Have sent all the packets. Now collect the responses... 382 */ 383 rcvd = 0; 384 recv_again: 385 msg.acpted_rply.ar_verf = _null_auth; 386 msg.acpted_rply.ar_results.proc = xdr_void; 387 readfds = mask; 388 389 switch (select(dtbsize, &readfds, 390 (fd_set *)NULL, (fd_set *)NULL, &rcv_timeout)) { 391 392 case 0: /* Timed out */ 393 /* 394 * If we got at least one response in the 395 * last interval, then don't wait for any 396 * more. In theory we should wait for 397 * the max weighting (penalty) value so 398 * that a very slow server has a chance to 399 * respond but this could take a long time 400 * if the admin has set a high weighting 401 * value. 402 */ 403 if (rcvd > 0) 404 goto done_broad; 405 406 stat = RPC_TIMEDOUT; 407 continue; 408 409 case -1: /* some kind of error */ 410 if (errno == EINTR) 411 goto recv_again; 412 syslog(LOG_ERR, "nfscast: select: %m"); 413 if (rcvd == 0) 414 stat = RPC_CANTRECV; 415 goto done_broad; 416 417 } /* end of select results switch */ 418 419 for (trans = tr_head; trans; trans = trans->tr_next) { 420 if (FD_ISSET(trans->tr_fd, &readfds)) 421 break; 422 } 423 if (trans == NULL) 424 goto recv_again; 425 426 try_again: 427 t_rdata.addr = trans->tr_taddr->addr; 428 t_rdata.udata.buf = inbuf; 429 t_rdata.udata.maxlen = sizeof (inbuf); 430 t_rdata.udata.len = 0; 431 t_rdata.opt.len = 0; 432 if (t_rcvudata(trans->tr_fd, &t_rdata, &flag) < 0) { 433 if (errno == EINTR) 434 goto try_again; 435 syslog(LOG_ERR, "nfscast: t_rcvudata: %s:%m", 436 trans->tr_device); 437 stat = RPC_CANTRECV; 438 continue; 439 } 440 if (t_rdata.udata.len < sizeof (ulong_t)) 441 goto recv_again; 442 if (flag & T_MORE) { 443 syslog(LOG_ERR, 444 "nfscast: t_rcvudata: %s: buffer overflow", 445 trans->tr_device); 446 goto recv_again; 447 } 448 449 /* 450 * see if reply transaction id matches sent id. 451 * If so, decode the results. 452 * Note: received addr is ignored, it could be 453 * different from the send addr if the host has 454 * more than one addr. 455 */ 456 xdrmem_create(xdrs, inbuf, (uint_t)t_rdata.udata.len, 457 XDR_DECODE); 458 if (xdr_replymsg(xdrs, &msg)) { 459 if (msg.rm_reply.rp_stat == MSG_ACCEPTED && 460 (msg.rm_xid & ~0xFF) == xid) { 461 struct addrs *curr_addr; 462 463 i = msg.rm_xid & 0xFF; 464 for (curr_addr = trans->tr_addrs; curr_addr; 465 curr_addr = curr_addr->addr_next) { 466 for (ts = curr_addr->addr_if_tstamps; ts; 467 ts = ts->ts_next) 468 if (ts->ts_inx == i && !ts->ts_rcvd) { 469 ts->ts_rcvd = 1; 470 calc_resp_time(&ts->ts_timeval); 471 stat = RPC_SUCCESS; 472 rcvd++; 473 break; 474 } 475 } 476 } /* otherwise, we just ignore the errors ... */ 477 } 478 xdrs->x_op = XDR_FREE; 479 msg.acpted_rply.ar_results.proc = xdr_void; 480 (void) xdr_replymsg(xdrs, &msg); 481 XDR_DESTROY(xdrs); 482 if (rcvd == sent) 483 goto done_broad; 484 else 485 goto recv_again; 486 } 487 if (!rcvd) 488 stat = RPC_TIMEDOUT; 489 490 done_broad: 491 if (rcvd) { 492 *mfs_out = sort_responses(tr_head); 493 stat = RPC_SUCCESS; 494 } 495 if (nc) 496 endnetconfig(nc); 497 free_transports(tr_head); 498 AUTH_DESTROY(sys_auth); 499 return (stat); 500 } 501 502 /* 503 * Go through all the responses and sort fastest to slowest. 504 * Note that any penalty is added to the response time - so the 505 * fastest response isn't necessarily the one that arrived first. 506 */ 507 static struct mapfs * 508 sort_responses(trans) 509 struct transp *trans; 510 { 511 struct transp *t; 512 struct addrs *a; 513 struct tstamps *ti; 514 int i, size = 0, allocsize = 10; 515 struct mapfs *p, *mfs_head = NULL, *mfs_tail = NULL; 516 struct sm *buffer; 517 518 buffer = (struct sm *)malloc(allocsize * sizeof (struct sm)); 519 if (!buffer) { 520 syslog(LOG_ERR, "sort_responses: malloc error.\n"); 521 return (NULL); 522 } 523 524 for (t = trans; t; t = t->tr_next) { 525 for (a = t->tr_addrs; a; a = a->addr_next) { 526 for (ti = a->addr_if_tstamps; 527 ti; ti = ti->ts_next) { 528 if (!ti->ts_rcvd) 529 continue; 530 ti->ts_timeval.tv_usec += 531 (ti->ts_penalty * PENALTY_WEIGHT); 532 if (ti->ts_timeval.tv_usec >= 1000000) { 533 ti->ts_timeval.tv_sec += 534 (ti->ts_timeval.tv_usec / 1000000); 535 ti->ts_timeval.tv_usec = 536 (ti->ts_timeval.tv_usec % 1000000); 537 } 538 539 if (size >= allocsize) { 540 allocsize += 10; 541 buffer = (struct sm *)realloc(buffer, 542 allocsize * sizeof (struct sm)); 543 if (!buffer) { 544 syslog(LOG_ERR, 545 "sort_responses: malloc error.\n"); 546 return (NULL); 547 } 548 } 549 buffer[size].timeval = ti->ts_timeval; 550 buffer[size].mfs = a->addr_mfs; 551 size++; 552 } 553 } 554 } 555 556 #ifdef DEBUG 557 if (trace > 3) { 558 trace_prt(1, " sort_responses: before host sort:\n"); 559 for (i = 0; i < size; i++) 560 trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host, 561 buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec); 562 trace_prt(0, "\n"); 563 } 564 #endif 565 566 qsort((void *)buffer, size, sizeof (struct sm), host_sm); 567 568 /* 569 * Cope with multiply listed hosts by choosing first time 570 */ 571 for (i = 1; i < size; i++) { 572 #ifdef DEBUG 573 if (trace > 3) { 574 trace_prt(1, " sort_responses: comparing %s and %s\n", 575 buffer[i-1].mfs->mfs_host, 576 buffer[i].mfs->mfs_host); 577 } 578 #endif 579 if (strcmp(buffer[i-1].mfs->mfs_host, 580 buffer[i].mfs->mfs_host) == 0) 581 memcpy(&buffer[i].timeval, &buffer[i-1].timeval, 582 sizeof (struct timeval)); 583 } 584 if (trace > 3) 585 trace_prt(0, "\n"); 586 587 #ifdef DEBUG 588 if (trace > 3) { 589 trace_prt(1, " sort_responses: before time sort:\n"); 590 for (i = 0; i < size; i++) 591 trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host, 592 buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec); 593 trace_prt(0, "\n"); 594 } 595 #endif 596 597 qsort((void *)buffer, size, sizeof (struct sm), time_sm); 598 599 #ifdef DEBUG 600 if (trace > 3) { 601 trace_prt(1, " sort_responses: after sort:\n"); 602 for (i = 0; i < size; i++) 603 trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host, 604 buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec); 605 trace_prt(0, "\n"); 606 } 607 #endif 608 609 for (i = 0; i < size; i++) { 610 #ifdef DEBUG 611 if (trace > 3) { 612 trace_prt(1, " sort_responses: adding %s\n", 613 buffer[i].mfs->mfs_host); 614 } 615 #endif 616 p = add_mfs(buffer[i].mfs, 0, &mfs_head, &mfs_tail); 617 if (!p) 618 return (NULL); 619 } 620 free(buffer); 621 622 return (mfs_head); 623 } 624 625 626 /* 627 * Comparison routines called by qsort(3). 628 */ 629 static int host_sm(const void *a, const void *b) 630 { 631 return (strcmp(((struct sm *)a)->mfs->mfs_host, 632 ((struct sm *)b)->mfs->mfs_host)); 633 } 634 635 static int time_sm(const void *a, const void *b) 636 { 637 if (timercmp(&(((struct sm *)a)->timeval), 638 &(((struct sm *)b)->timeval), < /* cstyle */)) 639 return (-1); 640 else if (timercmp(&(((struct sm *)a)->timeval), 641 &(((struct sm *)b)->timeval), > /* cstyle */)) 642 return (1); 643 else 644 return (0); 645 } 646 647 /* 648 * Given send_time which is the time a request 649 * was transmitted to a server, subtract it 650 * from the time "now" thereby converting it 651 * to an elapsed time. 652 */ 653 static void 654 calc_resp_time(send_time) 655 struct timeval *send_time; 656 { 657 struct timeval time_now; 658 659 (void) gettimeofday(&time_now, (struct timezone *)0); 660 if (time_now.tv_usec < send_time->tv_usec) { 661 time_now.tv_sec--; 662 time_now.tv_usec += 1000000; 663 } 664 send_time->tv_sec = time_now.tv_sec - send_time->tv_sec; 665 send_time->tv_usec = time_now.tv_usec - send_time->tv_usec; 666 } 667 668 static void 669 free_transports(trans) 670 struct transp *trans; 671 { 672 struct transp *t, *tmpt = NULL; 673 struct addrs *a, *tmpa = NULL; 674 struct tstamps *ts, *tmpts = NULL; 675 676 for (t = trans; t; t = tmpt) { 677 if (t->tr_taddr) 678 (void) t_free((char *)t->tr_taddr, T_BIND); 679 if (t->tr_fd > 0) 680 (void) t_close(t->tr_fd); 681 for (a = t->tr_addrs; a; a = tmpa) { 682 for (ts = a->addr_if_tstamps; ts; ts = tmpts) { 683 tmpts = ts->ts_next; 684 free(ts); 685 } 686 (void) netdir_free((char *)a->addr_addrs, ND_ADDRLIST); 687 tmpa = a->addr_next; 688 free(a); 689 } 690 tmpt = t->tr_next; 691 free(t); 692 } 693 } 694