1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * nfs_cast.c : broadcast to a specific group of NFS servers
24 *
25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 #pragma ident "%Z%%M% %I% %E% SMI"
30
31 #include <stdio.h>
32 #include <syslog.h>
33 #include <errno.h>
34 #include <string.h>
35 #include <sys/types.h>
36 #include <sys/time.h>
37 #include <sys/resource.h>
38 #include <unistd.h>
39 #include <stdlib.h>
40 #include <rpc/rpc.h>
41 #include <rpc/clnt_soc.h>
42 #include <rpc/nettype.h>
43 #include <rpc/pmap_prot.h>
44 #include <netconfig.h>
45 #include <netdir.h>
46 #include <nfs/nfs.h>
47 #define NFSCLIENT
48 #include <locale.h>
49 #include "automount.h"
50
51 #define PENALTY_WEIGHT 100000
52
53 struct tstamps {
54 struct tstamps *ts_next;
55 int ts_penalty;
56 int ts_inx;
57 int ts_rcvd;
58 struct timeval ts_timeval;
59 };
60
61 /* A list of addresses - all belonging to the same transport */
62
63 struct addrs {
64 struct addrs *addr_next;
65 struct mapfs *addr_mfs;
66 struct nd_addrlist *addr_addrs;
67 struct tstamps *addr_if_tstamps;
68 };
69
70 /* A list of connectionless transports */
71
72 struct transp {
73 struct transp *tr_next;
74 int tr_fd;
75 char *tr_device;
76 struct t_bind *tr_taddr;
77 struct addrs *tr_addrs;
78 };
79
80 /* A list of map entries and their roundtrip times, for sorting */
81
82 struct sm {
83 struct mapfs *mfs;
84 struct timeval timeval;
85 };
86
87 static void free_transports(struct transp *);
88 static void calc_resp_time(struct timeval *);
89 static struct mapfs *sort_responses(struct transp *);
90 static int host_sm(const void *, const void *b);
91 static int time_sm(const void *, const void *b);
92 extern struct mapfs *add_mfs(struct mapfs *, int, struct mapfs **,
93 struct mapfs **);
94
95 /*
96 * This routine is designed to be able to "ping"
97 * a list of hosts and create a list of responding
98 * hosts sorted by response time.
99 * This must be done without any prior
100 * contact with the host - therefore the "ping"
101 * must be to a "well-known" address. The outstanding
102 * candidate here is the address of "rpcbind".
103 *
104 * A response to a ping is no guarantee that the host
105 * is running NFS, has a mount daemon, or exports
106 * the required filesystem. If the subsequent
107 * mount attempt fails then the host will be marked
108 * "ignore" and the host list will be re-pinged
109 * (sans the bad host). This process continues
110 * until a successful mount is achieved or until
111 * there are no hosts left to try.
112 */
113 enum clnt_stat
nfs_cast(struct mapfs * mfs_in,struct mapfs ** mfs_out,int timeout)114 nfs_cast(struct mapfs *mfs_in, struct mapfs **mfs_out, int timeout)
115 {
116 enum clnt_stat stat;
117 AUTH *sys_auth = authsys_create_default();
118 XDR xdr_stream;
119 register XDR *xdrs = &xdr_stream;
120 int outlen;
121 int if_inx;
122 int tsec;
123 int flag;
124 int sent, addr_cnt, rcvd, if_cnt;
125 fd_set readfds, mask;
126 register ulong_t xid; /* xid - unique per addr */
127 register int i;
128 struct rpc_msg msg;
129 struct timeval t, rcv_timeout;
130 char outbuf[UDPMSGSIZE], inbuf[UDPMSGSIZE];
131 struct t_unitdata t_udata, t_rdata;
132 struct nd_hostserv hs;
133 struct nd_addrlist *retaddrs;
134 struct transp *tr_head;
135 struct transp *trans, *prev_trans;
136 struct addrs *a, *prev_addr;
137 struct tstamps *ts, *prev_ts;
138 NCONF_HANDLE *nc = NULL;
139 struct netconfig *nconf;
140 struct rlimit rl;
141 int dtbsize;
142 struct mapfs *mfs;
143
144 /*
145 * For each connectionless transport get a list of
146 * host addresses. Any single host may have
147 * addresses on several transports.
148 */
149 addr_cnt = sent = rcvd = 0;
150 tr_head = NULL;
151 FD_ZERO(&mask);
152
153 /*
154 * Set the default select size to be the maximum FD_SETSIZE, unless
155 * the current rlimit is lower.
156 */
157 dtbsize = FD_SETSIZE;
158 if (getrlimit(RLIMIT_NOFILE, &rl) == 0) {
159 if (rl.rlim_cur < FD_SETSIZE)
160 dtbsize = rl.rlim_cur;
161 }
162
163 prev_trans = NULL;
164 prev_addr = NULL;
165 prev_ts = NULL;
166 for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
167
168 if (trace > 2)
169 trace_prt(1, "nfs_cast: host=%s\n", mfs->mfs_host);
170
171 nc = setnetconfig();
172 if (nc == NULL) {
173 stat = RPC_CANTSEND;
174 goto done_broad;
175 }
176 while (nconf = getnetconfig(nc)) {
177 if (!(nconf->nc_flag & NC_VISIBLE) ||
178 nconf->nc_semantics != NC_TPI_CLTS ||
179 (strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0))
180 continue;
181 trans = (struct transp *)malloc(sizeof (*trans));
182 if (trans == NULL) {
183 syslog(LOG_ERR, "no memory");
184 stat = RPC_CANTSEND;
185 goto done_broad;
186 }
187 (void) memset(trans, 0, sizeof (*trans));
188 if (tr_head == NULL)
189 tr_head = trans;
190 else
191 prev_trans->tr_next = trans;
192 prev_trans = trans;
193
194 trans->tr_fd = t_open(nconf->nc_device, O_RDWR, NULL);
195 if (trans->tr_fd < 0) {
196 syslog(LOG_ERR, "nfscast: t_open: %s:%m",
197 nconf->nc_device);
198 stat = RPC_CANTSEND;
199 goto done_broad;
200 }
201 if (t_bind(trans->tr_fd, (struct t_bind *)NULL,
202 (struct t_bind *)NULL) < 0) {
203 syslog(LOG_ERR, "nfscast: t_bind: %m");
204 stat = RPC_CANTSEND;
205 goto done_broad;
206 }
207 trans->tr_taddr =
208 /* LINTED pointer alignment */
209 (struct t_bind *)t_alloc(trans->tr_fd, T_BIND, T_ADDR);
210 if (trans->tr_taddr == (struct t_bind *)NULL) {
211 syslog(LOG_ERR, "nfscast: t_alloc: %m");
212 stat = RPC_SYSTEMERROR;
213 goto done_broad;
214 }
215
216 trans->tr_device = nconf->nc_device;
217 FD_SET(trans->tr_fd, &mask);
218
219 if_inx = 0;
220 hs.h_host = mfs->mfs_host;
221 hs.h_serv = "rpcbind";
222 if (netdir_getbyname(nconf, &hs, &retaddrs) == ND_OK) {
223
224 /*
225 * If mfs->ignore is previously set for
226 * this map, clear it. Because a host can
227 * have either v6 or v4 address
228 */
229 if (mfs->mfs_ignore == 1)
230 mfs->mfs_ignore = 0;
231
232 a = (struct addrs *)malloc(sizeof (*a));
233 if (a == NULL) {
234 syslog(LOG_ERR, "no memory");
235 stat = RPC_CANTSEND;
236 goto done_broad;
237 }
238 (void) memset(a, 0, sizeof (*a));
239 if (trans->tr_addrs == NULL)
240 trans->tr_addrs = a;
241 else
242 prev_addr->addr_next = a;
243 prev_addr = a;
244 a->addr_if_tstamps = NULL;
245 a->addr_mfs = mfs;
246 a->addr_addrs = retaddrs;
247 if_cnt = retaddrs->n_cnt;
248 while (if_cnt--) {
249 ts = (struct tstamps *)
250 malloc(sizeof (*ts));
251 if (ts == NULL) {
252 syslog(LOG_ERR, "no memory");
253 stat = RPC_CANTSEND;
254 goto done_broad;
255 }
256 (void) memset(ts, 0, sizeof (*ts));
257 ts->ts_penalty = mfs->mfs_penalty;
258 if (a->addr_if_tstamps == NULL)
259 a->addr_if_tstamps = ts;
260 else
261 prev_ts->ts_next = ts;
262 prev_ts = ts;
263 ts->ts_inx = if_inx++;
264 addr_cnt++;
265 }
266 break;
267 } else {
268 mfs->mfs_ignore = 1;
269 if (verbose)
270 syslog(LOG_ERR,
271 "%s:%s address not known",
272 mfs->mfs_host,
273 strcmp(nconf->nc_proto, NC_INET)?"IPv6":"IPv4");
274 }
275 } /* while */
276
277 endnetconfig(nc);
278 nc = NULL;
279 } /* for */
280 if (addr_cnt == 0) {
281 syslog(LOG_ERR, "nfscast: couldn't find addresses");
282 stat = RPC_CANTSEND;
283 goto done_broad;
284 }
285
286 (void) gettimeofday(&t, (struct timezone *)0);
287 xid = (getpid() ^ t.tv_sec ^ t.tv_usec) & ~0xFF;
288 t.tv_usec = 0;
289
290 /* serialize the RPC header */
291
292 msg.rm_direction = CALL;
293 msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
294 msg.rm_call.cb_prog = RPCBPROG;
295 /*
296 * we can not use RPCBVERS here since it doesn't exist in 4.X,
297 * the fix to bug 1139883 has made the 4.X portmapper silent to
298 * version mismatches. This causes the RPC call to the remote
299 * portmapper to simply be ignored if it's not Version 2.
300 */
301 msg.rm_call.cb_vers = PMAPVERS;
302 msg.rm_call.cb_proc = NULLPROC;
303 if (sys_auth == (AUTH *)NULL) {
304 stat = RPC_SYSTEMERROR;
305 goto done_broad;
306 }
307 msg.rm_call.cb_cred = sys_auth->ah_cred;
308 msg.rm_call.cb_verf = sys_auth->ah_verf;
309 xdrmem_create(xdrs, outbuf, sizeof (outbuf), XDR_ENCODE);
310 if (! xdr_callmsg(xdrs, &msg)) {
311 stat = RPC_CANTENCODEARGS;
312 goto done_broad;
313 }
314 outlen = (int)xdr_getpos(xdrs);
315 xdr_destroy(xdrs);
316
317 t_udata.opt.len = 0;
318 t_udata.udata.buf = outbuf;
319 t_udata.udata.len = outlen;
320
321 /*
322 * Basic loop: send packet to all hosts and wait for response(s).
323 * The response timeout grows larger per iteration.
324 * A unique xid is assigned to each address in order to
325 * correctly match the replies.
326 */
327 for (tsec = 4; timeout > 0; tsec *= 2) {
328
329 timeout -= tsec;
330 if (timeout <= 0)
331 tsec += timeout;
332
333 rcv_timeout.tv_sec = tsec;
334 rcv_timeout.tv_usec = 0;
335
336 sent = 0;
337 for (trans = tr_head; trans; trans = trans->tr_next) {
338 for (a = trans->tr_addrs; a; a = a->addr_next) {
339 struct netbuf *if_netbuf =
340 a->addr_addrs->n_addrs;
341 ts = a->addr_if_tstamps;
342 if_cnt = a->addr_addrs->n_cnt;
343 while (if_cnt--) {
344
345 /*
346 * xid is the first thing in
347 * preserialized buffer
348 */
349 /* LINTED pointer alignment */
350 *((ulong_t *)outbuf) =
351 htonl(xid + ts->ts_inx);
352 (void) gettimeofday(&(ts->ts_timeval),
353 (struct timezone *)0);
354 /*
355 * Check if already received
356 * from a previous iteration.
357 */
358 if (ts->ts_rcvd) {
359 sent++;
360 ts = ts->ts_next;
361 continue;
362 }
363
364 t_udata.addr = *if_netbuf++;
365
366 if (t_sndudata(trans->tr_fd,
367 &t_udata) == 0) {
368 sent++;
369 }
370
371 ts = ts->ts_next;
372 }
373 }
374 }
375 if (sent == 0) { /* no packets sent ? */
376 stat = RPC_CANTSEND;
377 goto done_broad;
378 }
379
380 /*
381 * Have sent all the packets. Now collect the responses...
382 */
383 rcvd = 0;
384 recv_again:
385 msg.acpted_rply.ar_verf = _null_auth;
386 msg.acpted_rply.ar_results.proc = xdr_void;
387 readfds = mask;
388
389 switch (select(dtbsize, &readfds,
390 (fd_set *)NULL, (fd_set *)NULL, &rcv_timeout)) {
391
392 case 0: /* Timed out */
393 /*
394 * If we got at least one response in the
395 * last interval, then don't wait for any
396 * more. In theory we should wait for
397 * the max weighting (penalty) value so
398 * that a very slow server has a chance to
399 * respond but this could take a long time
400 * if the admin has set a high weighting
401 * value.
402 */
403 if (rcvd > 0)
404 goto done_broad;
405
406 stat = RPC_TIMEDOUT;
407 continue;
408
409 case -1: /* some kind of error */
410 if (errno == EINTR)
411 goto recv_again;
412 syslog(LOG_ERR, "nfscast: select: %m");
413 if (rcvd == 0)
414 stat = RPC_CANTRECV;
415 goto done_broad;
416
417 } /* end of select results switch */
418
419 for (trans = tr_head; trans; trans = trans->tr_next) {
420 if (FD_ISSET(trans->tr_fd, &readfds))
421 break;
422 }
423 if (trans == NULL)
424 goto recv_again;
425
426 try_again:
427 t_rdata.addr = trans->tr_taddr->addr;
428 t_rdata.udata.buf = inbuf;
429 t_rdata.udata.maxlen = sizeof (inbuf);
430 t_rdata.udata.len = 0;
431 t_rdata.opt.len = 0;
432 if (t_rcvudata(trans->tr_fd, &t_rdata, &flag) < 0) {
433 if (errno == EINTR)
434 goto try_again;
435 syslog(LOG_ERR, "nfscast: t_rcvudata: %s:%m",
436 trans->tr_device);
437 stat = RPC_CANTRECV;
438 continue;
439 }
440 if (t_rdata.udata.len < sizeof (ulong_t))
441 goto recv_again;
442 if (flag & T_MORE) {
443 syslog(LOG_ERR,
444 "nfscast: t_rcvudata: %s: buffer overflow",
445 trans->tr_device);
446 goto recv_again;
447 }
448
449 /*
450 * see if reply transaction id matches sent id.
451 * If so, decode the results.
452 * Note: received addr is ignored, it could be
453 * different from the send addr if the host has
454 * more than one addr.
455 */
456 xdrmem_create(xdrs, inbuf, (uint_t)t_rdata.udata.len,
457 XDR_DECODE);
458 if (xdr_replymsg(xdrs, &msg)) {
459 if (msg.rm_reply.rp_stat == MSG_ACCEPTED &&
460 (msg.rm_xid & ~0xFF) == xid) {
461 struct addrs *curr_addr;
462
463 i = msg.rm_xid & 0xFF;
464 for (curr_addr = trans->tr_addrs; curr_addr;
465 curr_addr = curr_addr->addr_next) {
466 for (ts = curr_addr->addr_if_tstamps; ts;
467 ts = ts->ts_next)
468 if (ts->ts_inx == i && !ts->ts_rcvd) {
469 ts->ts_rcvd = 1;
470 calc_resp_time(&ts->ts_timeval);
471 stat = RPC_SUCCESS;
472 rcvd++;
473 break;
474 }
475 }
476 } /* otherwise, we just ignore the errors ... */
477 }
478 xdrs->x_op = XDR_FREE;
479 msg.acpted_rply.ar_results.proc = xdr_void;
480 (void) xdr_replymsg(xdrs, &msg);
481 XDR_DESTROY(xdrs);
482 if (rcvd == sent)
483 goto done_broad;
484 else
485 goto recv_again;
486 }
487 if (!rcvd)
488 stat = RPC_TIMEDOUT;
489
490 done_broad:
491 if (rcvd) {
492 *mfs_out = sort_responses(tr_head);
493 stat = RPC_SUCCESS;
494 }
495 if (nc)
496 endnetconfig(nc);
497 free_transports(tr_head);
498 AUTH_DESTROY(sys_auth);
499 return (stat);
500 }
501
502 /*
503 * Go through all the responses and sort fastest to slowest.
504 * Note that any penalty is added to the response time - so the
505 * fastest response isn't necessarily the one that arrived first.
506 */
507 static struct mapfs *
sort_responses(trans)508 sort_responses(trans)
509 struct transp *trans;
510 {
511 struct transp *t;
512 struct addrs *a;
513 struct tstamps *ti;
514 int i, size = 0, allocsize = 10;
515 struct mapfs *p, *mfs_head = NULL, *mfs_tail = NULL;
516 struct sm *buffer;
517
518 buffer = (struct sm *)malloc(allocsize * sizeof (struct sm));
519 if (!buffer) {
520 syslog(LOG_ERR, "sort_responses: malloc error.\n");
521 return (NULL);
522 }
523
524 for (t = trans; t; t = t->tr_next) {
525 for (a = t->tr_addrs; a; a = a->addr_next) {
526 for (ti = a->addr_if_tstamps;
527 ti; ti = ti->ts_next) {
528 if (!ti->ts_rcvd)
529 continue;
530 ti->ts_timeval.tv_usec +=
531 (ti->ts_penalty * PENALTY_WEIGHT);
532 if (ti->ts_timeval.tv_usec >= 1000000) {
533 ti->ts_timeval.tv_sec +=
534 (ti->ts_timeval.tv_usec / 1000000);
535 ti->ts_timeval.tv_usec =
536 (ti->ts_timeval.tv_usec % 1000000);
537 }
538
539 if (size >= allocsize) {
540 allocsize += 10;
541 buffer = (struct sm *)realloc(buffer,
542 allocsize * sizeof (struct sm));
543 if (!buffer) {
544 syslog(LOG_ERR,
545 "sort_responses: malloc error.\n");
546 return (NULL);
547 }
548 }
549 buffer[size].timeval = ti->ts_timeval;
550 buffer[size].mfs = a->addr_mfs;
551 size++;
552 }
553 }
554 }
555
556 #ifdef DEBUG
557 if (trace > 3) {
558 trace_prt(1, " sort_responses: before host sort:\n");
559 for (i = 0; i < size; i++)
560 trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host,
561 buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
562 trace_prt(0, "\n");
563 }
564 #endif
565
566 qsort((void *)buffer, size, sizeof (struct sm), host_sm);
567
568 /*
569 * Cope with multiply listed hosts by choosing first time
570 */
571 for (i = 1; i < size; i++) {
572 #ifdef DEBUG
573 if (trace > 3) {
574 trace_prt(1, " sort_responses: comparing %s and %s\n",
575 buffer[i-1].mfs->mfs_host,
576 buffer[i].mfs->mfs_host);
577 }
578 #endif
579 if (strcmp(buffer[i-1].mfs->mfs_host,
580 buffer[i].mfs->mfs_host) == 0)
581 memcpy(&buffer[i].timeval, &buffer[i-1].timeval,
582 sizeof (struct timeval));
583 }
584 if (trace > 3)
585 trace_prt(0, "\n");
586
587 #ifdef DEBUG
588 if (trace > 3) {
589 trace_prt(1, " sort_responses: before time sort:\n");
590 for (i = 0; i < size; i++)
591 trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host,
592 buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
593 trace_prt(0, "\n");
594 }
595 #endif
596
597 qsort((void *)buffer, size, sizeof (struct sm), time_sm);
598
599 #ifdef DEBUG
600 if (trace > 3) {
601 trace_prt(1, " sort_responses: after sort:\n");
602 for (i = 0; i < size; i++)
603 trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host,
604 buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
605 trace_prt(0, "\n");
606 }
607 #endif
608
609 for (i = 0; i < size; i++) {
610 #ifdef DEBUG
611 if (trace > 3) {
612 trace_prt(1, " sort_responses: adding %s\n",
613 buffer[i].mfs->mfs_host);
614 }
615 #endif
616 p = add_mfs(buffer[i].mfs, 0, &mfs_head, &mfs_tail);
617 if (!p)
618 return (NULL);
619 }
620 free(buffer);
621
622 return (mfs_head);
623 }
624
625
626 /*
627 * Comparison routines called by qsort(3).
628 */
host_sm(const void * a,const void * b)629 static int host_sm(const void *a, const void *b)
630 {
631 return (strcmp(((struct sm *)a)->mfs->mfs_host,
632 ((struct sm *)b)->mfs->mfs_host));
633 }
634
time_sm(const void * a,const void * b)635 static int time_sm(const void *a, const void *b)
636 {
637 if (timercmp(&(((struct sm *)a)->timeval),
638 &(((struct sm *)b)->timeval), < /* cstyle */))
639 return (-1);
640 else if (timercmp(&(((struct sm *)a)->timeval),
641 &(((struct sm *)b)->timeval), > /* cstyle */))
642 return (1);
643 else
644 return (0);
645 }
646
647 /*
648 * Given send_time which is the time a request
649 * was transmitted to a server, subtract it
650 * from the time "now" thereby converting it
651 * to an elapsed time.
652 */
653 static void
calc_resp_time(send_time)654 calc_resp_time(send_time)
655 struct timeval *send_time;
656 {
657 struct timeval time_now;
658
659 (void) gettimeofday(&time_now, (struct timezone *)0);
660 if (time_now.tv_usec < send_time->tv_usec) {
661 time_now.tv_sec--;
662 time_now.tv_usec += 1000000;
663 }
664 send_time->tv_sec = time_now.tv_sec - send_time->tv_sec;
665 send_time->tv_usec = time_now.tv_usec - send_time->tv_usec;
666 }
667
668 static void
free_transports(trans)669 free_transports(trans)
670 struct transp *trans;
671 {
672 struct transp *t, *tmpt = NULL;
673 struct addrs *a, *tmpa = NULL;
674 struct tstamps *ts, *tmpts = NULL;
675
676 for (t = trans; t; t = tmpt) {
677 if (t->tr_taddr)
678 (void) t_free((char *)t->tr_taddr, T_BIND);
679 if (t->tr_fd > 0)
680 (void) t_close(t->tr_fd);
681 for (a = t->tr_addrs; a; a = tmpa) {
682 for (ts = a->addr_if_tstamps; ts; ts = tmpts) {
683 tmpts = ts->ts_next;
684 free(ts);
685 }
686 (void) netdir_free((char *)a->addr_addrs, ND_ADDRLIST);
687 tmpa = a->addr_next;
688 free(a);
689 }
690 tmpt = t->tr_next;
691 free(t);
692 }
693 }
694