xref: /illumos-gate/usr/src/lib/libslp/clib/slp_net.c (revision 12042ab213b3af68474f48555504db816a449211)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Module for all network transactions. SLP messages can be multicast,
29  * unicast over UDP, or unicast over TCP; this module provides routines
30  * for all three. TCP transactions are handled by a single dedicated
31  * thread, while multicast and UDP unicast messages are sent by the
32  * calling thread.
33  *
34  * slp_uc_tcp_send:	enqueues a message on the TCP transaction thread's
35  *				queue.
36  * slp_tcp_wait:	blocks until all TCP-enqueued transactions for
37  *				a given SLP handle are complete
38  * slp_uc_udp_send:	unicasts a message using a datagram
39  * slp_mc_send:		multicasts a message
40  */
41 
42 /*
43  * todo: correct multicast interfaces;
44  */
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <syslog.h>
49 #include <sys/types.h>
50 #include <sys/socket.h>
51 #include <arpa/inet.h>
52 #include <errno.h>
53 #include <unistd.h>
54 #include <time.h>
55 #include <string.h>
56 #include <slp-internal.h>
57 #include <slp_net_utils.h>
58 
59 /*
60  * TCP thread particulars
61  */
62 static SLPBoolean tcp_thr_running = SLP_FALSE;
63 static slp_queue_t *tcp_q;
64 static int tcp_sockfd;
65 static mutex_t start_lock = DEFAULTMUTEX;
66 
67 /* Used to pass arguments to the TCP thread, via 'tcp_q' */
68 struct tcp_rqst {
69 	slp_handle_impl_t *hp;
70 	slp_target_t *target;
71 	const char *scopes;
72 	SLPBoolean free_target;
73 	unsigned short xid;
74 };
75 
76 /* Used to keep track of broadcast interfaces */
77 struct bc_ifs {
78 	struct sockaddr_in *sin;
79 	int num_ifs;
80 };
81 
82 /*
83  * Private utility routines
84  */
85 static SLPError start_tcp_thr();
86 static void *tcp_thread(void *);
87 static SLPError make_header(slp_handle_impl_t *, char *, const char *);
88 static void udp_make_msghdr(struct sockaddr_in *, struct iovec *, int,
89 			    struct msghdr *);
90 static SLPError make_mc_target(slp_handle_impl_t *,
91 				struct sockaddr_in *, char *,
92 				struct pollfd **, nfds_t *, struct bc_ifs *);
93 static SLPError make_bc_target(slp_handle_impl_t *, struct in_addr *,
94 				int, struct bc_ifs *);
95 static SLPError mc_sendmsg(struct pollfd *, struct msghdr *,
96 				struct bc_ifs *);
97 static SLPError bc_sendmsg(struct pollfd *, struct msghdr *, struct bc_ifs *);
98 static void mc_recvmsg(struct pollfd *, nfds_t, slp_handle_impl_t *,
99 			const char *, char *, void **, unsigned long long,
100 			unsigned long long, unsigned long long *,
101 			int *, int *, int);
102 static void free_pfds(struct pollfd *, nfds_t);
103 static void tcp_handoff(slp_handle_impl_t *, const char *,
104 			struct sockaddr_in *, unsigned short);
105 static unsigned long long now_millis();
106 static int wait_for_response(unsigned long long, int *,
107 				unsigned long long, unsigned long long *,
108 				struct pollfd [], nfds_t);
109 static int add2pr_list(slp_msg_t *, struct sockaddr_in *, void **);
110 static void free_pr_node(void *, VISIT, int, void *);
111 
112 /*
113  * Unicasts a message using TCP. 'target' is a targets list
114  * containing DAs corresponding to 'scopes'. 'free_target' directs
115  * tcp_thread to free the target list when finished; this is useful
116  * when a target needs to be synthesised by another message thread
117  * (such as slp_mc_send for tcp_handoffs). If this message is a
118  * retransmission due to a large reply, 'xid' should be the same as for
119  * the original message.
120  *
121  * This call returns as soon as the message has been enqueued on 'tcp_q'.
122  * Callers interested in knowing when the transaction has completed
123  * should call slp_tcp_wait with the same SLP handle.
124  */
125 void slp_uc_tcp_send(slp_handle_impl_t *hp, slp_target_t *target,
126 			const char *scopes, SLPBoolean free_target,
127 			unsigned short xid) {
128 	struct tcp_rqst *rqst;
129 
130 	/* initialize TCP vars in handle, if necessary */
131 	if (!hp->tcp_lock) {
132 		if (!(hp->tcp_lock = malloc(sizeof (*(hp->tcp_lock))))) {
133 			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
134 				"out of memory");
135 			return;
136 		}
137 		(void) mutex_init(hp->tcp_lock, USYNC_THREAD, NULL);
138 	}
139 	if (!hp->tcp_wait) {
140 		if (!(hp->tcp_wait = malloc(sizeof (*(hp->tcp_wait))))) {
141 			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
142 				"out of memory");
143 			return;
144 		}
145 		(void) cond_init(hp->tcp_wait, USYNC_THREAD, NULL);
146 	}
147 	(void) mutex_lock(hp->tcp_lock);
148 	(hp->tcp_ref_cnt)++;
149 	(void) mutex_unlock(hp->tcp_lock);
150 
151 	/* start TCP thread, if not already running */
152 	if (!tcp_thr_running)
153 		if (start_tcp_thr() != SLP_OK)
154 			return;
155 
156 	/* create and enqueue the request */
157 	if (!(rqst = malloc(sizeof (*rqst)))) {
158 		slp_err(LOG_CRIT, 0, "slp_uc_tcp_send", "out of memory");
159 		return;
160 	}
161 	rqst->hp = hp;
162 	rqst->target = target;
163 	rqst->scopes = scopes;
164 	rqst->free_target = free_target;
165 	rqst->xid = xid;
166 	(void) slp_enqueue(tcp_q, rqst);
167 }
168 
169 /*
170  * Wait for TCP to complete, if a transaction corresponding to this
171  * SLP handle is pending. If none are pending, returns immediately.
172  */
173 void slp_tcp_wait(slp_handle_impl_t *hp) {
174 	(void) mutex_lock(hp->tcp_lock);
175 	while (hp->tcp_ref_cnt > 0)
176 		(void) cond_wait(hp->tcp_wait, hp->tcp_lock);
177 	(void) mutex_unlock(hp->tcp_lock);
178 }
179 
180 /*
181  * Unicasts a message using datagrams. 'target' should contain a
182  * list of DAs corresponding to 'scopes'.
183  *
184  * This call does not return until the transaction has completed. It
185  * may handoff a message to the TCP thread if necessary, but will not
186  * wait for that transaction to complete. Hence callers should always
187  * invoke slp_tcp_wait before cleaning up resources.
188  */
189 void slp_uc_udp_send(slp_handle_impl_t *hp, slp_target_t *target,
190 			const char *scopes) {
191 	slp_target_t *ctarg;
192 	struct sockaddr_in *sin;
193 	struct msghdr msg[1];
194 	char header[SLP_DEFAULT_SENDMTU];
195 	int sockfd;
196 	size_t mtu;
197 	SLPBoolean use_tcp;
198 	struct pollfd pfd[1];
199 	unsigned long long now, sent;
200 	char *reply = NULL;
201 
202 	use_tcp = SLP_FALSE;
203 	/* build the header and iovec */
204 	if (make_header(hp, header, scopes) != SLP_OK)
205 		return;
206 
207 	mtu = slp_get_mtu();
208 
209 	/* walk targets list until we either succeed or run out of targets */
210 	for (ctarg = target; ctarg; ctarg = slp_next_failover(ctarg)) {
211 		char *state;
212 		const char *timeouts;
213 		int timeout;
214 
215 		sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
216 
217 		/* make the socket, msghdr and reply buf */
218 		if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
219 			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
220 				"could not create socket: %s",
221 				strerror(errno));
222 			return;
223 		}
224 		pfd[0].fd = sockfd;
225 		pfd[0].events = POLLRDNORM;
226 
227 		udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
228 		if (!reply && !(reply = malloc(mtu))) {
229 			(void) close(sockfd);
230 			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
231 				"out of memory");
232 			return;
233 		}
234 
235 		/* timeout loop */
236 		timeouts = SLPGetProperty(SLP_CONFIG_DATAGRAMTIMEOUTS);
237 		state = (char *)timeouts;
238 		for (timeout = slp_get_next_onlist(&state);
239 			timeout != -1 &&
240 			!hp->cancel;
241 			timeout = slp_get_next_onlist(&state)) {
242 			int pollerr;
243 
244 			if (sendmsg(sockfd, msg, 0) < 0) {
245 				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
246 					"sendmsg failed: %s", strerror(errno));
247 				continue; /* try again */
248 			}
249 			sent = now_millis();
250 
251 			pollerr = wait_for_response(
252 				0, &timeout, sent, &now, pfd, 1);
253 
254 			if (pollerr == 0)
255 				/* timeout */
256 				continue;
257 			if (pollerr < 0)
258 				break;
259 
260 			/* only using one fd, so no need to scan pfd */
261 			if (recvfrom(sockfd, reply, mtu, 0, NULL, NULL) < 0) {
262 				/* if reply overflows, hand off to TCP */
263 				if (errno == ENOMEM) {
264 					free(reply); reply = NULL;
265 					use_tcp = SLP_TRUE;
266 					break;
267 				}
268 				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
269 					"recvfrom failed: %s",
270 					strerror(errno));
271 			} else {
272 				/* success -- but check error code */
273 				slp_proto_err errcode = slp_get_errcode(reply);
274 				switch (errcode) {
275 				case SLP_MSG_PARSE_ERROR:
276 				case SLP_VER_NOT_SUPPORTED:
277 				case SLP_SICK_DA:
278 				case SLP_DA_BUSY_NOW:
279 				case SLP_OPTION_NOT_UNDERSTOOD:
280 				case SLP_RQST_NOT_SUPPORTED: {
281 				    char addrbuf[INET6_ADDRSTRLEN], *cname;
282 
283 				    cname = slp_ntop(addrbuf, INET6_ADDRSTRLEN,
284 					(const void *) &(sin->sin_addr));
285 				    cname = cname ? cname : "[invalid addr]";
286 
287 				    /* drop it */
288 				    slp_err(LOG_INFO, 0,
289 				"DA %s returned error code %d; dropping reply",
290 							cname, errcode);
291 				    free(reply); reply = NULL;
292 				}
293 				}
294 			}
295 			break;
296 		}
297 		if (timeout != -1)
298 			/* success or cancel */
299 			break;
300 		/* else failure */
301 		slp_mark_target_failed(ctarg);
302 	}
303 	(void) close(sockfd);
304 	if (!ctarg || hp->cancel) {
305 		/* failed all attempts or canceled by consumer */
306 		if (reply) free(reply);
307 		return;
308 	}
309 	/* success or tcp handoff */
310 	if (reply) {
311 		if (slp_get_overflow(reply))
312 			use_tcp = SLP_TRUE;
313 		else
314 			slp_mark_target_used(ctarg);
315 		(void) slp_enqueue(hp->q, reply);
316 	}
317 	if (use_tcp)
318 		slp_uc_tcp_send(
319 			hp, ctarg, scopes, SLP_FALSE, slp_get_xid(header));
320 }
321 
322 /*
323  * Multicasts (or broadcasts) a message, using multicast convergance
324  * to collect results. Large replies will cause the message to be handed
325  * off to the TCP thread.
326  *
327  * This call does not return until the transaction is complete. It does
328  * not, however, wait until pending TCP transactions are complete, so
329  * callers should always invoke slp_tcp_wait before cleaning up any
330  * resources.
331  */
332 void slp_mc_send(slp_handle_impl_t *hp, const char *scopes) {
333 	char header[SLP_DEFAULT_SENDMTU], *state;
334 	const char *timeouts;
335 	struct sockaddr_in sin[1];
336 	struct msghdr msg[1];
337 	int maxwait, timeout, noresults, anyresults;
338 	unsigned long long final_to, now, sent;
339 	struct pollfd *pfd;
340 	nfds_t nfds;
341 	void *collator = NULL;
342 	struct bc_ifs bcifs;
343 
344 	/* build the header and iovec */
345 	if (make_header(hp, header, scopes) != SLP_OK)
346 		return;
347 
348 	(void) memset(sin, 0, sizeof (sin));
349 	if (make_mc_target(hp, sin, header, &pfd, &nfds, &bcifs) != SLP_OK)
350 		return;
351 	udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
352 
353 	maxwait = slp_get_mcmaxwait();
354 	maxwait = maxwait ? maxwait : SLP_DEFAULT_MAXWAIT;
355 
356 	/* set the final timeout */
357 	now = now_millis();
358 	final_to = now + maxwait;
359 
360 	/* timeout prep and loop */
361 	timeouts = SLPGetProperty(SLP_CONFIG_MULTICASTTIMEOUTS);
362 	state = (char *)timeouts;
363 	noresults = anyresults = 0;
364 
365 	for (timeout = slp_get_next_onlist(&state);
366 		timeout != -1 &&
367 		now < final_to &&
368 		noresults < 2 &&
369 		!hp->cancel;
370 		timeout = slp_get_next_onlist(&state)) {
371 
372 		/* send msg */
373 		if (mc_sendmsg(pfd, msg, &bcifs) != SLP_OK) {
374 			continue; /* try again */
375 		}
376 		sent = now_millis();
377 
378 		/* receive results */
379 		mc_recvmsg(pfd, nfds, hp, scopes, header, &collator, final_to,
380 			sent, &now, &noresults, &anyresults, timeout);
381 
382 		if (!anyresults)
383 			noresults++;
384 		anyresults = 0;
385 	}
386 	/* clean up PR list collator */
387 	if (collator)
388 		slp_twalk(collator, free_pr_node, 0, NULL);
389 
390 	/* close all fds in pfd */
391 	free_pfds(pfd, nfds);
392 
393 	/* free broadcast addrs, if used */
394 	if (bcifs.sin) free(bcifs.sin);
395 }
396 
397 /*
398  * Private net helper routines
399  */
400 
401 /*
402  * Starts the tcp_thread and allocates any necessary resources.
403  */
404 static SLPError
405 start_tcp_thr(void)
406 {
407 	SLPError err;
408 	int terr;
409 
410 	(void) mutex_lock(&start_lock);
411 	/* make sure someone else hasn't already intialized the thread */
412 	if (tcp_thr_running) {
413 		(void) mutex_unlock(&start_lock);
414 		return (SLP_OK);
415 	}
416 
417 	/* create the tcp queue */
418 	if (!(tcp_q = slp_new_queue(&err))) {
419 		(void) mutex_unlock(&start_lock);
420 		return (err);
421 	}
422 
423 	/* start the tcp thread */
424 	if ((terr = thr_create(0, 0, tcp_thread, NULL, 0, NULL)) != 0) {
425 		slp_err(LOG_CRIT, 0, "start_tcp_thr",
426 		    "could not start thread: %s", strerror(terr));
427 		(void) mutex_unlock(&start_lock);
428 		return (SLP_INTERNAL_SYSTEM_ERROR);
429 	}
430 
431 	tcp_thr_running = SLP_TRUE;
432 	(void) mutex_unlock(&start_lock);
433 	return (SLP_OK);
434 }
435 
436 /*
437  * Called by the tcp thread to shut itself down. The queue must be
438  * empty (and should be, since the tcp thread will only shut itself
439  * down if nothing has been put in its queue for the timeout period).
440  */
441 static void end_tcp_thr() {
442 	(void) mutex_lock(&start_lock);
443 
444 	tcp_thr_running = SLP_FALSE;
445 	slp_destroy_queue(tcp_q);
446 
447 	(void) mutex_unlock(&start_lock);
448 	thr_exit(NULL);
449 }
450 
451 /*
452  * The thread of control for the TCP thread. This sits in a loop, waiting
453  * on 'tcp_q' for new messages. If no message appear after 30 seconds,
454  * this thread cleans up resources and shuts itself down.
455  */
456 static void *
457 tcp_thread(void *arg __unused)
458 {
459 	struct tcp_rqst *rqst;
460 	char *reply, header[SLP_DEFAULT_SENDMTU];
461 	timestruc_t to[1];
462 	to->tv_nsec = 0;
463 
464 	for (;;) {
465 		slp_target_t *ctarg, *targets;
466 		slp_handle_impl_t *hp;
467 		const char *scopes;
468 		struct sockaddr_in *sin;
469 		SLPBoolean free_target, etimed;
470 		unsigned short xid;
471 
472 		/* set idle shutdown timeout */
473 		to->tv_sec = time(NULL) + 30;
474 		/* get the next request from the tcp queue */
475 		if (!(rqst = slp_dequeue_timed(tcp_q, to, &etimed))) {
476 			if (!etimed)
477 				continue;
478 			else
479 				end_tcp_thr();
480 		}
481 
482 		hp = rqst->hp;
483 		scopes = rqst->scopes;
484 		targets = rqst->target;
485 		free_target = rqst->free_target;
486 		xid = rqst->xid;
487 		free(rqst);
488 		reply = NULL;
489 
490 		/* Check if this handle has been cancelled */
491 		if (hp->cancel)
492 			goto transaction_complete;
493 
494 		/* build the header and iovec */
495 		if (make_header(hp, header, scopes) != SLP_OK) {
496 			if (free_target) slp_free_target(targets);
497 			continue;
498 		}
499 		if (xid)
500 			slp_set_xid(header, xid);
501 
502 	/* walk targets list until we either succeed or run out of targets */
503 		for (ctarg = targets; ctarg && !hp->cancel;
504 		    ctarg = slp_next_failover(ctarg)) {
505 
506 			sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
507 
508 			/* create the socket */
509 			if ((tcp_sockfd = socket(AF_INET, SOCK_STREAM, 0))
510 			    < 0) {
511 				slp_err(LOG_CRIT, 0, "tcp_thread",
512 				    "could not create socket: %s",
513 				    strerror(errno));
514 				ctarg = NULL;
515 				break;
516 			}
517 
518 			/* connect to target */
519 			if (connect(tcp_sockfd, (struct sockaddr *)sin,
520 			    sizeof (*sin)) < 0) {
521 				slp_err(LOG_INFO, 0, "tcp_thread",
522 				    "could not connect, error = %s",
523 				    strerror(errno));
524 				goto failed;
525 			}
526 
527 			/* send the message and read the reply */
528 			if (writev(tcp_sockfd, hp->msg.iov, hp->msg.iovlen)
529 			    == -1) {
530 				slp_err(LOG_INFO, 0, "tcp_thread",
531 				    "could not send, error = %s",
532 				    strerror(errno));
533 				goto failed;
534 			}
535 
536 			/* if success, break out of failover loop */
537 			if ((slp_tcp_read(tcp_sockfd, &reply)) == SLP_OK) {
538 				(void) close(tcp_sockfd);
539 				break;
540 			}
541 
542 		/* else if timed out, mark target failed and try next one */
543 failed:
544 			(void) close(tcp_sockfd);
545 			slp_mark_target_failed(ctarg);
546 		}
547 
548 		if (hp->cancel) {
549 			if (reply) {
550 				free(reply);
551 			}
552 		} else if (ctarg) {
553 			/* success */
554 			(void) slp_enqueue(hp->q, reply);
555 			slp_mark_target_used(ctarg);
556 		}
557 
558 	/* If all TCP transactions on this handle are complete, send notice */
559 transaction_complete:
560 		(void) mutex_lock(hp->tcp_lock);
561 		if (--(hp->tcp_ref_cnt) == 0)
562 			(void) cond_signal(hp->tcp_wait);
563 		(void) mutex_unlock(hp->tcp_lock);
564 
565 		if (free_target)
566 			slp_free_target(targets);
567 	}
568 	return (NULL);
569 }
570 
571 /*
572  * Performs a full read for TCP replies, dynamically allocating a
573  * buffer large enough to hold the reply.
574  */
575 SLPError slp_tcp_read(int sockfd, char **reply) {
576 	char lenbuf[5], *p;
577 	size_t nleft;
578 	ssize_t nread;
579 	unsigned int len;
580 
581 	/* find out how long the reply is */
582 	nleft = 5;
583 	p = lenbuf;
584 	while (nleft != 0) {
585 		if ((nread = read(sockfd, p, 5)) < 0) {
586 			if (errno == EINTR)
587 				nread = 0;
588 			else
589 				return (SLP_NETWORK_ERROR);
590 		} else if (nread == 0)
591 			/* shouldn't hit EOF here */
592 			return (SLP_NETWORK_ERROR);
593 		nleft -= nread;
594 		p += nread;
595 	}
596 
597 	len = slp_get_length(lenbuf);
598 
599 	/* allocate space for the reply, and copy in what we've already read */
600 	/* This buffer gets freed by a msg-specific unpacking routine later */
601 	if (!(*reply = malloc(len))) {
602 		slp_err(LOG_CRIT, 0, "tcp_read", "out of memory");
603 		return (SLP_MEMORY_ALLOC_FAILED);
604 	}
605 	(void) memcpy(*reply, lenbuf, 5);
606 
607 	/* read the rest of the message */
608 	nleft = len - 5;
609 	p = *reply + 5;
610 	while (nleft != 0) {
611 		if ((nread = read(sockfd, p, nleft)) < 0) {
612 			if (errno == EINTR)
613 				nread = 0;
614 			else {
615 				free(*reply);
616 				return (SLP_NETWORK_ERROR);
617 			}
618 		} else if (nread == 0)
619 			/*
620 			 * shouldn't hit EOF here, but perhaps we've
621 			 * gotten something useful, so return OK.
622 			 */
623 			return (SLP_OK);
624 
625 		nleft -= nread;
626 		p += nread;
627 	}
628 
629 	return (SLP_OK);
630 }
631 
632 /*
633  * Lays in a SLP header for this message into the scatter / gather
634  * array 'iov'. 'header' is the buffer used to contain the header,
635  * and must contain enough space. 'scopes' should contain a string
636  * with the scopes to be used for this message.
637  */
638 static SLPError make_header(slp_handle_impl_t *hp, char *header,
639 			    const char *scopes) {
640 	SLPError err;
641 	size_t msgLen, off;
642 	int i;
643 	size_t mtu;
644 	unsigned short slen = (unsigned short)strlen(scopes);
645 
646 	mtu = slp_get_mtu();
647 	msgLen = slp_hdrlang_length(hp);
648 	hp->msg.iov[0].iov_base = header;
649 	hp->msg.iov[0].iov_len = msgLen;	/* now the length of the hdr */
650 
651 	/* use the remaining buffer in header for the prlist */
652 	hp->msg.prlist->iov_base = header + msgLen;
653 
654 	for (i = 1; i < hp->msg.iovlen; i++) {
655 		msgLen += hp->msg.iov[i].iov_len;
656 	}
657 	msgLen += slen;
658 
659 	off = 0;
660 	if ((err = slp_add_header(hp->locale, header, mtu,
661 					hp->fid, msgLen, &off)) != SLP_OK)
662 		return (err);
663 
664 	/* start out with empty prlist */
665 	hp->msg.prlist->iov_len = 0;
666 
667 	/* store the scope string len into the space provided by the caller */
668 	off = 0;
669 	if ((err = slp_add_sht((char *)hp->msg.scopeslen.iov_base,
670 				2, slen, &off)) != SLP_OK) {
671 		return (err);
672 	}
673 	hp->msg.scopes->iov_base = (caddr_t)scopes;
674 	hp->msg.scopes->iov_len = slen;
675 
676 	return (SLP_OK);
677 }
678 
679 /*
680  * Populates a struct msghdr suitable for use with sendmsg.
681  */
682 static void udp_make_msghdr(struct sockaddr_in *sin, struct iovec *iov,
683 			    int iovlen, struct msghdr *msg) {
684 	msg->msg_name = (caddr_t)sin;
685 	msg->msg_namelen = 16;
686 	msg->msg_iov = iov;
687 	msg->msg_iovlen = iovlen;
688 	msg->msg_accrights = NULL;
689 	msg->msg_accrightslen = 0;
690 }
691 
692 /*
693  * Sets the address on 'sin', sets the flag in the message header,
694  * and creates an array of pollfds for all interfaces we need to
695  * use. If we need to use only broadcast, and net.slp.interfaces
696  * is set, fills bcifs with an array of subnet broadcast addresses
697  * to which we should send. Returns err != SLP_OK only on catastrophic
698  * error.
699  */
700 static SLPError make_mc_target(slp_handle_impl_t *hp,
701 				struct sockaddr_in *sin, char *header,
702 				struct pollfd **fds, nfds_t *nfds,
703 				struct bc_ifs *bcifs) {
704 
705 	unsigned char ttl = slp_get_multicastTTL();
706 	char *ifs_string;
707 	SLPBoolean have_valid_if = SLP_FALSE;
708 	SLPBoolean use_broadcast = slp_get_usebroadcast();
709 	int fd, i, num_givenifs;
710 	struct in_addr *given_ifs = NULL;
711 	nfds_t nfd_i;
712 
713 	sin->sin_port = htons(SLP_PORT);
714 	sin->sin_family = AF_INET;
715 	slp_set_mcast(header);
716 
717 	/* Get the desired multicast interfaces, if set */
718 	bcifs->sin = NULL;
719 	*fds = NULL;
720 	if ((ifs_string = (char *)SLPGetProperty(
721 		SLP_CONFIG_INTERFACES)) != NULL && *ifs_string) {
722 
723 		char *p, *tstate;
724 
725 		/* count the number of IFs given */
726 		p = strchr(ifs_string, ',');
727 		for (num_givenifs = 1; p; num_givenifs++) {
728 			p = strchr(p + 1, ',');
729 		}
730 
731 		/* copy the given IFs into an array for easier processing */
732 		if (!(given_ifs = calloc(num_givenifs, sizeof (*given_ifs)))) {
733 			slp_err(LOG_CRIT, 0, "make_mc_target",
734 						"out of memory");
735 			return (SLP_MEMORY_ALLOC_FAILED);
736 		}
737 
738 		i = 0;
739 		/* strtok_r will destructively modify, so make a copy first */
740 		if (!(ifs_string = strdup(ifs_string))) {
741 			slp_err(LOG_CRIT, 0, "make_mc_target",
742 						"out of memory");
743 			free(given_ifs);
744 			return (SLP_MEMORY_ALLOC_FAILED);
745 		}
746 		for (
747 			p = strtok_r(ifs_string, ",", &tstate);
748 			p;
749 			p = strtok_r(NULL, ",", &tstate)) {
750 
751 			if (slp_pton(p, &(given_ifs[i])) < 1) {
752 				/* skip */
753 				num_givenifs--;
754 				continue;
755 			}
756 			i++;
757 		}
758 		*nfds = num_givenifs;
759 		free(ifs_string);
760 
761 		/* allocate a pollfd array for all interfaces */
762 		if (!(*fds = calloc(num_givenifs, sizeof (**fds)))) {
763 			slp_err(LOG_CRIT, 0, "make_mc_target",
764 						"out of memory");
765 			free(ifs_string);
766 			free(given_ifs);
767 			return (SLP_MEMORY_ALLOC_FAILED);
768 		}
769 
770 		/* lay the given interfaces into the pollfd array */
771 		for (i = 0; i < num_givenifs; i++) {
772 
773 			/* create a socket to bind to this interface */
774 			if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
775 				slp_err(LOG_CRIT, 0, "make_mc_target",
776 						"could not create socket: %s",
777 						strerror(errno));
778 				free_pfds(*fds, *nfds);
779 				return (SLP_INTERNAL_SYSTEM_ERROR);
780 			}
781 
782 			/* fill in the pollfd structure */
783 			(*fds)[i].fd = fd;
784 			(*fds)[i].events |= POLLRDNORM;
785 
786 			if (use_broadcast) {
787 				struct sockaddr_in bcsin[1];
788 
789 				(void) memcpy(
790 					&(bcsin->sin_addr), &(given_ifs[i]),
791 					sizeof (bcsin->sin_addr));
792 				bcsin->sin_family = AF_INET;
793 				bcsin->sin_port = 0;
794 
795 				/* bind fd to interface */
796 				if (bind(fd, (struct sockaddr *)bcsin,
797 						sizeof (*bcsin)) == 0) {
798 					continue;
799 				}
800 				/* else fallthru to default (multicast) */
801 				slp_err(LOG_INFO, 0, "make_mc_target",
802 				"could not set broadcast interface: %s",
803 					strerror(errno));
804 			}
805 			/* else use multicast */
806 			if (setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF,
807 					&(given_ifs[i]), sizeof (given_ifs[i]))
808 					< 0) {
809 
810 					slp_err(LOG_INFO, 0, "make_mc_target",
811 				"could not set multicast interface: %s",
812 							strerror(errno));
813 					continue;
814 			}
815 
816 			have_valid_if = SLP_TRUE;
817 		}
818 
819 		if (use_broadcast) {
820 		    SLPError err;
821 
822 		    if ((err = make_bc_target(
823 					hp, given_ifs, num_givenifs, bcifs))
824 			!= SLP_OK) {
825 
826 			if (err == SLP_MEMORY_ALLOC_FAILED) {
827 			    /* the only thing which is really a showstopper */
828 			    return (err);
829 			}
830 
831 			/* else no valid interfaces */
832 			have_valid_if = SLP_FALSE;
833 		    }
834 		}
835 		free(given_ifs);
836 	}
837 
838 	if (!have_valid_if) {
839 		if (*fds && !have_valid_if) {
840 			/* couldn't process net.slp.interfaces property */
841 			free(*fds);
842 		}
843 
844 		/* bind to default interface */
845 		if (!(*fds = calloc(1, sizeof (**fds)))) {
846 			slp_err(LOG_CRIT, 0, "make_mc_target",
847 						"out of memory");
848 			return (SLP_MEMORY_ALLOC_FAILED);
849 		}
850 
851 		if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
852 			slp_err(LOG_CRIT, 0, "make_mc_target",
853 						"could not create socket: %s",
854 						strerror(errno));
855 			free(*fds);
856 			return (SLP_INTERNAL_SYSTEM_ERROR);
857 		}
858 
859 		(**fds).fd = fd;
860 		(**fds).events |= POLLRDNORM;
861 		*nfds = 1;
862 	}
863 
864 	/* set required options on all configured fds */
865 	for (nfd_i = 0; nfd_i < *nfds; nfd_i++) {
866 		if (use_broadcast) {
867 			const int on = 1;
868 			if (setsockopt((*fds)[nfd_i].fd, SOL_SOCKET,
869 					SO_BROADCAST,
870 					(void *) &on, sizeof (on)) < 0) {
871 				slp_err(LOG_CRIT, 0, "make_mc_target",
872 					"could not enable broadcast: %s",
873 					strerror(errno));
874 			}
875 		} else {
876 			if (setsockopt((*fds)[nfd_i].fd, IPPROTO_IP,
877 					IP_MULTICAST_TTL, &ttl, 1) < 0) {
878 				slp_err(LOG_CRIT, 0, "make_mc_target",
879 					    "could not set multicast TTL: %s",
880 					    strerror(errno));
881 			}
882 		}
883 	}
884 
885 	if (use_broadcast) {
886 	    sin->sin_addr.s_addr = INADDR_BROADCAST;
887 	} else {
888 		sin->sin_addr.s_addr = SLP_MULTICAST_ADDRESS;
889 	}
890 
891 	return (SLP_OK);
892 }
893 
894 /*
895  * Obtains the subnet broadcast address for each interface specified
896  * in net.slp.interfaces, and fill bcifs->sin with an array of these
897  * addresses.
898  */
899 static SLPError make_bc_target(slp_handle_impl_t *hp,
900 				struct in_addr *given_ifs,
901 				int num_givenifs, struct bc_ifs *bcifs) {
902 	SLPError err;
903 	int i;
904 
905 	if ((err = slp_broadcast_addrs(hp, given_ifs, num_givenifs,
906 					&(bcifs->sin), &(bcifs->num_ifs)))
907 	    != SLP_OK) {
908 	    return (err);
909 	}
910 
911 	/* set SLP port on each sockaddr_in */
912 	for (i = 0; i < bcifs->num_ifs; i++) {
913 		bcifs->sin[i].sin_port = htons(SLP_PORT);
914 	}
915 
916 	return (SLP_OK);
917 }
918 
919 /*
920  * Sends msg on 1st fd in fds for multicast, or on all interfaces
921  * specified in net.slp.interfaces for broadcast. Returns SLP_OK if
922  * msg was sent successfully on at least one interface; otherwise
923  * returns SLP_NETWORK_ERROR if msg was not sent on any interfaces.
924  */
925 static SLPError mc_sendmsg(struct pollfd *fds,
926 				struct msghdr *msg, struct bc_ifs *bcifs) {
927 
928 	if (slp_get_usebroadcast()) {
929 	    char *ifs = (char *)SLPGetProperty(SLP_CONFIG_INTERFACES);
930 
931 	    /* hand off to broadcast-specific send function */
932 	    if (ifs && *ifs && bc_sendmsg(fds, msg, bcifs) == SLP_OK) {
933 		return (SLP_OK);
934 	    }
935 
936 		/*
937 		 * else  no ifs given, or bc_sendmsg failed, so send on
938 		 * general broadcast addr (255.255.255.255). This will
939 		 * cause the message to be sent on all interfaces. The
940 		 * address will have been set in make_mc_target.
941 		 */
942 	}
943 
944 	/*
945 	 * Send only on one interface -- let routing take care of
946 	 * sending the message everywhere it needs to go. Sending
947 	 * on more than one interface can cause nasty routing loops.
948 	 * Note that this approach doesn't work with partitioned
949 	 * networks.
950 	 */
951 	if (sendmsg(fds[0].fd, msg, 0) < 0) {
952 		slp_err(LOG_CRIT, 0, "mc_sendmsg",
953 			"sendmsg failed: %s", strerror(errno));
954 		return (SLP_NETWORK_ERROR);
955 	}
956 
957 	return (SLP_OK);
958 }
959 
960 /*
961  * Send msg to each subnet broadcast address in bcifs->sin. Note
962  * that we can send on any fd (regardless of which interface to which
963  * it is bound), since the kernel will take care of routing for us.
964  * Returns err != SLP_OK only if no message was sent on any interface.
965  */
966 static SLPError bc_sendmsg(struct pollfd *fds, struct msghdr *msg,
967 				struct bc_ifs *bcifs) {
968 	int i;
969 	SLPBoolean sent_one = SLP_FALSE;
970 
971 	for (i = 0; i < bcifs->num_ifs; i++) {
972 		msg->msg_name = (caddr_t)&(bcifs->sin[i]);
973 
974 		if (sendmsg(fds[0].fd, msg, 0) < 0) {
975 			slp_err(LOG_CRIT, 0, "bc_sendmsg",
976 				"sendmsg failed: %s", strerror(errno));
977 			continue;
978 		}
979 		sent_one = SLP_TRUE;
980 	}
981 	return (sent_one ? SLP_OK : SLP_NETWORK_ERROR);
982 }
983 
984 /*
985  * This is where the bulk of the multicast convergance algorithm resides.
986  * mc_recvmsg() waits for data to be ready on any fd in pfd, iterates
987  * through pfd and reads data from ready fd's. It also checks timeouts
988  * and user-cancels.
989  *
990  * Parameters:
991  *   pfd	IN	an array of pollfd structs containing fds to poll
992  *   nfds	IN	number of elements in pfd
993  *   hp		IN	SLPHandle from originating call
994  *   scopes	IN	scopes to use for this message
995  *   header	IN	the SLP message header for this message
996  *   collator	IN/OUT	btree collator for PR list
997  *   final_to	IN	final timeout
998  *   sent	IN	time when message was sent
999  *   now	IN/OUT	set to current time at beginning of convergance
1000  *   noresults	OUT	set to 0 if any results are received
1001  *   anyresults	OUT	set to true if any results are received
1002  *   timeout	IN	time for this convergence iteration
1003  *
1004  * Returns only if an error has occured, or if either this retransmit
1005  * timeout or the final timeout has expired, or if hp->cancel becomes true.
1006  */
1007 static void mc_recvmsg(struct pollfd *pfd, nfds_t nfds, slp_handle_impl_t *hp,
1008 			const char *scopes, char *header, void **collator,
1009 			unsigned long long final_to,
1010 			unsigned long long sent,
1011 			unsigned long long *now,
1012 			int *noresults, int *anyresults, int timeout) {
1013 	char *reply = NULL;
1014 	nfds_t i;
1015 	struct sockaddr_in responder;
1016 	int pollerr;
1017 	socklen_t addrlen = sizeof (responder);
1018 	size_t mtu = slp_get_mtu();
1019 
1020 	for (; !hp->cancel; ) {
1021 	    /* wait until we can read something */
1022 	    pollerr = wait_for_response(
1023 				final_to, &timeout, sent, now, pfd, nfds);
1024 	    if (pollerr == 0)
1025 		/* timeout */
1026 		goto cleanup;
1027 	    if (pollerr < 0)
1028 		/* error */
1029 		goto cleanup;
1030 
1031 	    /* iterate through all fds to find one with data to read */
1032 	    for (i = 0; !hp->cancel && i < nfds; i++) {
1033 
1034 		if (pfd[i].fd < 0 ||
1035 		    !(pfd[i].revents & (POLLRDNORM | POLLERR))) {
1036 
1037 		    /* unused fd or unwanted event */
1038 		    continue;
1039 		}
1040 
1041 		/* alloc reply buffer */
1042 		if (!reply && !(reply = malloc(mtu))) {
1043 		    slp_err(LOG_CRIT, 0, "mc_revcmsg", "out of memory");
1044 		    return;
1045 	    }
1046 		if (recvfrom(pfd[i].fd, reply, mtu, 0,
1047 				(struct sockaddr *)&responder,
1048 				(int *)&addrlen) < 0) {
1049 
1050 		    /* if reply overflows, hand off to TCP */
1051 		    if (errno == ENOMEM) {
1052 			free(reply); reply = NULL;
1053 			tcp_handoff(hp, scopes,
1054 					&responder, slp_get_xid(header));
1055 			continue;
1056 		    }
1057 
1058 		    /* else something nasty happened */
1059 		    slp_err(LOG_CRIT, 0, "mc_recvmsg",
1060 					"recvfrom failed: %s",
1061 					strerror(errno));
1062 		    continue;
1063 		} else {
1064 		    /* success */
1065 		    if (slp_get_overflow(reply)) {
1066 			tcp_handoff(hp, scopes,
1067 					&responder, slp_get_xid(header));
1068 		    }
1069 			/*
1070 			 * Add to the PR list. If this responder has already
1071 			 * answered, it doesn't count.
1072 			 */
1073 		    if (add2pr_list(&(hp->msg), &responder, collator)) {
1074 			(void) slp_enqueue(hp->q, reply);
1075 			*noresults = 0;
1076 			*anyresults = 1;
1077 			reply = NULL;
1078 		    }
1079 
1080 		    /* if we've exceeded maxwait, break out */
1081 		    *now = now_millis();
1082 		    if (*now > final_to)
1083 			goto cleanup;
1084 
1085 		} /* end successful receive */
1086 
1087 	    } /* end fd iteration */
1088 
1089 	    /* reset poll's timeout */
1090 	    timeout = timeout - (int)(*now - sent);
1091 	    if (timeout <= 0) {
1092 		goto cleanup;
1093 	    }
1094 
1095 	} /* end main poll loop */
1096 
1097 cleanup:
1098 	if (reply) {
1099 	    free(reply);
1100 	}
1101 }
1102 
1103 /*
1104  * Closes any open sockets and frees the pollfd array.
1105  */
1106 static void free_pfds(struct pollfd *pfds, nfds_t nfds) {
1107 	nfds_t i;
1108 
1109 	for (i = 0; i < nfds; i++) {
1110 	    if (pfds[i].fd <= 0) {
1111 		continue;
1112 	    }
1113 
1114 	    (void) close(pfds[i].fd);
1115 	}
1116 
1117 	free(pfds);
1118 }
1119 
1120 /*
1121  * Hands off a message to the TCP thread, fabricating a new target
1122  * from 'sin'. 'xid' will be used to create the XID for the TCP message.
1123  */
1124 static void tcp_handoff(slp_handle_impl_t *hp, const char *scopes,
1125 			struct sockaddr_in *sin, unsigned short xid) {
1126 	slp_target_t *target;
1127 
1128 	target = slp_fabricate_target(sin);
1129 	slp_uc_tcp_send(hp, target, scopes, SLP_TRUE, xid);
1130 }
1131 
1132 /*
1133  * Returns the current time in milliseconds.
1134  */
1135 static unsigned long long now_millis() {
1136 	unsigned long long i;
1137 	struct timeval tv[1];
1138 
1139 	(void) gettimeofday(tv, NULL);
1140 	i = (unsigned long long) tv->tv_sec * 1000;
1141 	i += tv->tv_usec / 1000;
1142 	return (i);
1143 }
1144 
1145 /*
1146  * A wrapper around poll which waits until a reply comes in. This will
1147  * wait no longer than 'timeout' before returning. poll can return
1148  * even if no data is on the pipe or timeout has occured, so the
1149  * additional paramaters are used to break out of the wait loop if
1150  * we have exceeded the timeout value. 'final_to' is ignored if it is 0.
1151  *
1152  * returns:	< 0 on error
1153  *		0 on timeout
1154  *		> 0 on success (i.e. ready to read data).
1155  * side effect: 'now' is set to the time when poll found data on the pipe.
1156  */
1157 static int wait_for_response(
1158 	unsigned long long final_to,
1159 	int *timeout,
1160 	unsigned long long sent,
1161 	unsigned long long *now,
1162 	struct pollfd pfd[], nfds_t nfds) {
1163 
1164 	int when, pollerr;
1165 
1166 	/* wait until we can read something */
1167 	for (;;) {
1168 		pollerr = poll(pfd, nfds, *timeout);
1169 		*now = now_millis();
1170 
1171 		/* ready to read */
1172 		if (pollerr > 0)
1173 			return (pollerr);
1174 
1175 		/* time out */
1176 		if (pollerr == 0)
1177 			/* timeout */
1178 			return (0);
1179 
1180 		/* error */
1181 		if (pollerr < 0)
1182 			if (errno == EAGAIN || errno == EINTR) {
1183 				/* poll is weird. */
1184 				when = (int)(*now - sent);
1185 				if (
1186 					(final_to != 0 && *now > final_to) ||
1187 					when > *timeout)
1188 					break;
1189 				*timeout = *timeout - when;
1190 				continue;
1191 			} else {
1192 				slp_err(LOG_INFO, 0, "wait for response",
1193 					"poll error: %s",
1194 					strerror(errno));
1195 				return (pollerr);
1196 			}
1197 	}
1198 
1199 	return (0);
1200 }
1201 
1202 /*
1203  * Adds the cname of the host whose address is in 'sin' to this message's
1204  * previous responder list. The message is contained in 'msg'.
1205  * 'collator' contains the complete previous responder list, so that
1206  * even if the PR list in the message overflows and must be truncated,
1207  * the function can still correctly determine if we have heard from this
1208  * host before.
1209  *
1210  * returns:	1 if this is the first time we've heard from this host
1211  *		0 is this is a duplicate reply
1212  */
1213 static int add2pr_list(
1214 	slp_msg_t *msg,
1215 	struct sockaddr_in *sin,
1216 	void **collator) {
1217 
1218 	char **res, *cname, *p, *header;
1219 	size_t mtu;
1220 	size_t len, off, namelen;
1221 	unsigned short prlen;
1222 
1223 	/* Attempt to resolve the responder's IP address to its host name */
1224 	if (!(cname = slp_gethostbyaddr((char *)&(sin->sin_addr),
1225 					sizeof (sin->sin_addr))))
1226 		return (0);
1227 
1228 	res = slp_tsearch(
1229 		cname, collator,
1230 		(int (*)(const void *, const void *)) strcasecmp);
1231 	if (*res != cname) {
1232 		/* duplicate */
1233 		slp_err(LOG_INFO, 0, "add2pr_list",
1234 			"drop PR ignored by host: %s",
1235 			cname);
1236 		free(cname);
1237 		return (0);
1238 	}
1239 
1240 	/* new responder: add to the msg PR list if there is room */
1241 	mtu = slp_get_mtu();
1242 
1243 	header = msg->iov[0].iov_base;
1244 	len = slp_get_length(header);
1245 
1246 	namelen = strlen(cname);
1247 	if ((namelen + 2 + len) >= mtu)
1248 		return (1);	/* no room */
1249 
1250 	/* else  there is enough room */
1251 	prlen = (unsigned short)msg->prlist->iov_len;
1252 	p = msg->prlist->iov_base + prlen;
1253 	*p = 0;
1254 
1255 	if (prlen) {
1256 		namelen++;	/* add the ',' */
1257 		(void) strcat(p, ",");
1258 	}
1259 	(void) strcat(p, cname);
1260 
1261 	/* update msg and pr list length */
1262 	len += namelen;
1263 	slp_set_length(header, len);
1264 	prlen += (unsigned short)namelen;
1265 	off = 0;
1266 	(void) slp_add_sht(msg->prlistlen.iov_base, 2, prlen, &off);
1267 	msg->prlist->iov_len += namelen;
1268 
1269 	return (1);
1270 }
1271 
1272 /*
1273  * The iterator function used while traversing the previous responder
1274  * tree. Just frees resources.
1275  */
1276 /*ARGSUSED2*/
1277 static void free_pr_node(void *node, VISIT order, int level, void *cookie) {
1278 	if (order == endorder || order == leaf) {
1279 		char *pr = *(char **)node;
1280 		free(pr);
1281 		free(node);
1282 	}
1283 }
1284