xref: /illumos-gate/usr/src/lib/gss_mechs/mech_krb5/krb5/os/sendto_kdc.c (revision 80ab886d233f514d54c2a6bdeb9fdfd951bd6881)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 #pragma ident	"%Z%%M%	%I%	%E% SMI"
6 /*
7  * lib/krb5/os/sendto_kdc.c
8  *
9  * Copyright 1990,1991,2001,2002 by the Massachusetts Institute of Technology.
10  * All Rights Reserved.
11  *
12  * Export of this software from the United States of America may
13  *   require a specific license from the United States Government.
14  *   It is the responsibility of any person or organization contemplating
15  *   export to obtain such a license before exporting.
16  *
17  * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
18  * distribute this software and its documentation for any purpose and
19  * without fee is hereby granted, provided that the above copyright
20  * notice appear in all copies and that both that copyright notice and
21  * this permission notice appear in supporting documentation, and that
22  * the name of M.I.T. not be used in advertising or publicity pertaining
23  * to distribution of the software without specific, written prior
24  * permission.  Furthermore if you modify this software you must label
25  * your software as modified software and not distribute it in such a
26  * fashion that it might be confused with the original M.I.T. software.
27  * M.I.T. makes no representations about the suitability of
28  * this software for any purpose.  It is provided "as is" without express
29  * or implied warranty.
30  *
31  *
32  * Send packet to KDC for realm; wait for response, retransmitting
33  * as necessary.
34  */
35 
36 #define NEED_SOCKETS
37 #define NEED_LOWLEVEL_IO
38 #include <fake-addrinfo.h>
39 #include <k5-int.h>
40 
41 #ifdef HAVE_SYS_TIME_H
42 #include <sys/time.h>
43 #else
44 #include <time.h>
45 #endif
46 #include "os-proto.h"
47 
48 #ifdef _AIX
49 #include <sys/select.h>
50 #endif
51 
52 /* For FIONBIO.  */
53 #include <sys/ioctl.h>
54 #ifdef HAVE_SYS_FILIO_H
55 #include <sys/filio.h>
56 #endif
57 
58 #define MAX_PASS		    3
59 /* Solaris Kerberos: moved to k5-int.h */
60 /* #define DEFAULT_UDP_PREF_LIMIT	 1465 */
61 #define HARD_UDP_LIMIT		32700 /* could probably do 64K-epsilon ? */
62 
63 krb5_error_code krb5int_sendto(krb5_context, const krb5_data *,
64 			    const struct addrlist *, krb5_data *,
65 			    struct sockaddr_storage *,
66 			    socklen_t *, int *);
67 
68 /* Solaris kerberos: leaving this here because other code depends on this. */
69 static void default_debug_handler (const void *data, size_t len)
70 {
71     fwrite(data, 1, len, stderr);
72     /* stderr is unbuffered */
73 }
74 
75 void (*krb5int_sendtokdc_debug_handler) (const void *, size_t) = default_debug_handler;
76 
77 /*
78  * Solaris Kerberos: only including the debug stuff if DEBUG defined outside
79  * this file.
80  */
81 #ifdef  DEBUG
82 
83 static char global_err_str[NI_MAXHOST + NI_MAXSERV + 1024];
84 
85 /* Solaris kerberos: removed put() since it isn't needed. */
86 
87 static void putstr(const char *str)
88 {
89     /* Solaris kerberos: build the string which will be passed to syslog later */
90     strlcat(global_err_str, str, sizeof (global_err_str));
91 }
92 
93 #define dprint krb5int_debug_fprint
94 #define dperror dprint
95 
96 #include <com_err.h>
97 
98 static void
99 krb5int_debug_fprint (const char *fmt, ...)
100 {
101     va_list args;
102 
103     /* Temporaries for variable arguments, etc.  */
104     krb5_error_code kerr;
105     int err;
106     fd_set *rfds, *wfds, *xfds;
107     int i;
108     int maxfd;
109     struct timeval *tv;
110     struct addrinfo *ai;
111     const krb5_data *d;
112     char addrbuf[NI_MAXHOST], portbuf[NI_MAXSERV];
113     const char *p;
114     char tmpbuf[NI_MAXHOST + NI_MAXSERV + 30];
115 
116     /*
117      * Solaris kerberos: modified this function to create a string to pass to
118      * syslog()
119      */
120     global_err_str[0] = NULL;
121 
122     va_start(args, fmt);
123 
124 #define putf(FMT,X)	(sprintf(tmpbuf,FMT,X),putstr(tmpbuf))
125 
126     for (; *fmt; fmt++) {
127 	if (*fmt != '%') {
128 	    /* Possible optimization: Look for % and print all chars
129 	       up to it in one call.  */
130 	    putf("%c", *fmt);
131 	    continue;
132 	}
133 	/* After this, always processing a '%' sequence.  */
134 	fmt++;
135 	switch (*fmt) {
136 	case 0:
137 	default:
138 	    abort();
139 	case 'E':
140 	    /* %E => krb5_error_code */
141 	    kerr = va_arg(args, krb5_error_code);
142 	    sprintf(tmpbuf, "%lu/", (unsigned long) kerr);
143 	    putstr(tmpbuf);
144 	    p = error_message(kerr);
145 	    putstr(p);
146 	    break;
147 	case 'm':
148 	    /* %m => errno value (int) */
149 	    /* Like syslog's %m except the errno value is passed in
150 	       rather than the current value.  */
151 	    err = va_arg(args, int);
152 	    putf("%d/", err);
153 	    p = strerror(err);
154 	    putstr(p);
155 	    break;
156 	case 'F':
157 	    /* %F => fd_set *, fd_set *, fd_set *, int */
158 	    rfds = va_arg(args, fd_set *);
159 	    wfds = va_arg(args, fd_set *);
160 	    xfds = va_arg(args, fd_set *);
161 	    maxfd = va_arg(args, int);
162 
163 	    for (i = 0; i < maxfd; i++) {
164 		int r = FD_ISSET(i, rfds);
165 		int w = wfds && FD_ISSET(i, wfds);
166 		int x = xfds && FD_ISSET(i, xfds);
167 		if (r || w || x) {
168 		    putf(" %d", i);
169 		    if (r)
170 			putstr("r");
171 		    if (w)
172 			putstr("w");
173 		    if (x)
174 			putstr("x");
175 		}
176 	    }
177 	    putstr(" ");
178 	    break;
179 	case 's':
180 	    /* %s => char * */
181 	    p = va_arg(args, const char *);
182 	    putstr(p);
183 	    break;
184 	case 't':
185 	    /* %t => struct timeval * */
186 	    tv = va_arg(args, struct timeval *);
187 	    if (tv) {
188 		sprintf(tmpbuf, "%ld.%06ld",
189 			(long) tv->tv_sec, (long) tv->tv_usec);
190 		putstr(tmpbuf);
191 	    } else
192 		putstr("never");
193 	    break;
194 	case 'd':
195 	    /* %d => int */
196 	    putf("%d", va_arg(args, int));
197 	    break;
198 	case 'p':
199 	    /* %p => pointer */
200 	    putf("%p", va_arg(args, void*));
201 	    break;
202 	case 'A':
203 	    /* %A => addrinfo */
204 	    ai = va_arg(args, struct addrinfo *);
205 	    if (0 != getnameinfo (ai->ai_addr, ai->ai_addrlen,
206 				  addrbuf, sizeof (addrbuf),
207 				  portbuf, sizeof (portbuf),
208 				  NI_NUMERICHOST | NI_NUMERICSERV))
209 		strcpy (addrbuf, "??"), strcpy (portbuf, "??");
210 	    sprintf(tmpbuf, "%s %s.%s",
211 		    (ai->ai_socktype == SOCK_DGRAM
212 		     ? "udp"
213 		     : ai->ai_socktype == SOCK_STREAM
214 		     ? "tcp"
215 		     : "???"),
216 		    addrbuf, portbuf);
217 	    putstr(tmpbuf);
218 	    break;
219 	case 'D':
220 	    /* %D => krb5_data * */
221 	    d = va_arg(args, krb5_data *);
222 	    p = d->data;
223 	    putstr("0x");
224 	    for (i = 0; i < d->length; i++) {
225 		putf("%.2x", *p++);
226 	    }
227 	    break;
228 	}
229     }
230     va_end(args);
231 
232     /* Solaris kerberos: use syslog() for debug output */
233     syslog(LOG_DEBUG, global_err_str);
234 }
235 
236 #else
237 #define dprint (void)
238 #define dperror(MSG) ((void)(MSG))
239 #endif
240 
241 static int
242 merge_addrlists (struct addrlist *dest, struct addrlist *src)
243 {
244     int err, i;
245 
246 #ifdef DEBUG
247     /*LINTED*/
248     dprint("merging addrlists:\n\tlist1: ");
249     for (i = 0; i < dest->naddrs; i++)
250 	/*LINTED*/
251 	dprint(" %A", dest->addrs[i]);
252     /*LINTED*/
253     dprint("\n\tlist2: ");
254     for (i = 0; i < src->naddrs; i++)
255 	/*LINTED*/
256 	dprint(" %A", src->addrs[i]);
257     /*LINTED*/
258     dprint("\n");
259 #endif
260 
261     err = krb5int_grow_addrlist (dest, src->naddrs);
262     if (err)
263 	return err;
264     for (i = 0; i < src->naddrs; i++) {
265 	dest->addrs[dest->naddrs + i] = src->addrs[i];
266 	src->addrs[i] = 0;
267     }
268     dest->naddrs += i;
269     src->naddrs = 0;
270 
271 #ifdef DEBUG
272     /*LINTED*/
273     dprint("\tout:   ");
274     for (i = 0; i < dest->naddrs; i++)
275 	/*LINTED*/
276 	dprint(" %A", dest->addrs[i]);
277     /*LINTED*/
278     dprint("\n");
279 #endif
280 
281     return 0;
282 }
283 
284 /*
285  * send the formatted request 'message' to a KDC for realm 'realm' and
286  * return the response (if any) in 'reply'.
287  *
288  * If the message is sent and a response is received, 0 is returned,
289  * otherwise an error code is returned.
290  *
291  * The storage for 'reply' is allocated and should be freed by the caller
292  * when finished.
293  */
294 
295 krb5_error_code
296 krb5_sendto_kdc (krb5_context context, const krb5_data *message,
297 		 const krb5_data *realm, krb5_data *reply,
298 		 int *use_master, int tcp_only)
299 {
300     krb5_error_code retval;
301     struct addrlist addrs;
302     int socktype1 = 0, socktype2 = 0, addr_used;
303 
304     /*
305      * find KDC location(s) for realm
306      */
307 
308     /*
309      * BUG: This code won't return "interesting" errors (e.g., out of mem,
310      * bad config file) from locate_kdc.  KRB5_REALM_CANT_RESOLVE can be
311      * ignored from one query of two, but if only one query is done, or
312      * both return that error, it should be returned to the caller.  Also,
313      * "interesting" errors (not KRB5_KDC_UNREACH) from sendto_{udp,tcp}
314      * should probably be returned as well.
315      */
316 
317     /*LINTED*/
318     dprint("krb5_sendto_kdc(%d@%p, \"%D\", use_master=%d, tcp_only=%d)\n",
319     /*LINTED*/
320 	   message->length, message->data, realm, *use_master, tcp_only);
321 
322     /*
323      * Solaris Kerberos: keep it simple by not supporting a udp_preference_limit
324      */
325 #if 0 /************** Begin IFDEF'ed OUT *******************************/
326     if (!tcp_only && context->udp_pref_limit < 0) {
327 	int tmp;
328 	retval = profile_get_integer(context->profile,
329 				     "libdefaults", "udp_preference_limit", 0,
330 				     DEFAULT_UDP_PREF_LIMIT, &tmp);
331 	if (retval)
332 	    return retval;
333 	if (tmp < 0)
334 	    tmp = DEFAULT_UDP_PREF_LIMIT;
335 	else if (tmp > HARD_UDP_LIMIT) {
336 	    /* In the unlikely case that a *really* big value is
337 	       given, let 'em use as big as we think we can
338 	       support.  */
339 	    tmp = HARD_UDP_LIMIT;
340 	}
341 	context->udp_pref_limit = tmp;
342     }
343 #endif /**************** END IFDEF'ed OUT *******************************/
344 
345     retval = (*use_master ? KRB5_KDC_UNREACH : KRB5_REALM_UNKNOWN);
346 
347     if (tcp_only)
348 	socktype1 = SOCK_STREAM, socktype2 = 0;
349     else if (message->length <= context->udp_pref_limit)
350 	socktype1 = SOCK_DGRAM, socktype2 = SOCK_STREAM;
351     else
352 	socktype1 = SOCK_STREAM, socktype2 = SOCK_DGRAM;
353 
354     retval = krb5_locate_kdc(context, realm, &addrs, *use_master, socktype1, 0);
355     if (socktype2) {
356 	struct addrlist addrs2;
357 
358 	retval = krb5_locate_kdc(context, realm, &addrs2, *use_master,
359 				 socktype2, 0);
360 	if (retval == 0) {
361 	    (void) merge_addrlists(&addrs, &addrs2);
362 	    krb5int_free_addrlist(&addrs2);
363 	}
364     }
365     if (addrs.naddrs > 0) {
366         retval = krb5int_sendto (context, message, &addrs, reply, 0, 0,
367 		&addr_used);
368 	if (retval == 0) {
369             /*
370 	     * Set use_master to 1 if we ended up talking to a master when
371 	     * didn't explicitly request to
372 	     */
373 
374 	    if (*use_master == 0) {
375 	        struct addrlist addrs3;
376 		retval = krb5_locate_kdc(context, realm, &addrs3, 1,
377 					addrs.addrs[addr_used]->ai_socktype,
378 					addrs.addrs[addr_used]->ai_family);
379 		if (retval == 0) {
380 		    int i;
381 		    for (i = 0; i < addrs3.naddrs; i++) {
382 			if (addrs.addrs[addr_used]->ai_addrlen ==
383 			    addrs3.addrs[i]->ai_addrlen &&
384 			    memcmp(addrs.addrs[addr_used]->ai_addr,
385 				addrs3.addrs[i]->ai_addr,
386 				addrs.addrs[addr_used]->ai_addrlen) == 0) {
387 				*use_master = 1;
388 				break;
389 			}
390 		    }
391 		    krb5int_free_addrlist (&addrs3);
392 		}
393 	    }
394 	    krb5int_free_addrlist (&addrs);
395 	    return 0;
396 	}
397 	krb5int_free_addrlist (&addrs);
398     }
399     return retval;
400 }
401 
402 
403 /*
404  * Notes:
405  *
406  * Getting "connection refused" on a connected UDP socket causes
407  * select to indicate write capability on UNIX, but only shows up
408  * as an exception on Windows.  (I don't think any UNIX system flags
409  * the error as an exception.)  So we check for both, or make it
410  * system-specific.
411  *
412  * Always watch for responses from *any* of the servers.  Eventually
413  * fix the UDP code to do the same.
414  *
415  * To do:
416  * - TCP NOPUSH/CORK socket options?
417  * - error codes that don't suck
418  * - getsockopt(SO_ERROR) to check connect status
419  * - handle error RESPONSE_TOO_BIG from UDP server and use TCP
420  *   connections already in progress
421  */
422 
423 #include <cm.h>
424 
425 static const char *const state_strings[] = {
426     "INITIALIZING", "CONNECTING", "WRITING", "READING", "FAILED"
427 };
428 enum conn_states { INITIALIZING, CONNECTING, WRITING, READING, FAILED };
429 struct incoming_krb5_message {
430     size_t bufsizebytes_read;
431     size_t bufsize;
432     char *buf;
433     char *pos;
434     unsigned char bufsizebytes[4];
435     size_t n_left;
436 };
437 struct conn_state {
438     SOCKET fd;
439     krb5_error_code err;
440     enum conn_states state;
441     unsigned int is_udp : 1;
442     int (*service)(struct conn_state *, struct select_state *, int);
443     struct addrinfo *addr;
444     struct {
445 	struct {
446 	    sg_buf sgbuf[2];
447 	    sg_buf *sgp;
448 	    int sg_count;
449 	} out;
450 	struct incoming_krb5_message in;
451     } x;
452 };
453 
454 static int getcurtime (struct timeval *tvp)
455 {
456     if (gettimeofday(tvp, 0)) {
457 	dperror("gettimeofday");
458 	return errno;
459     }
460     return 0;
461 }
462 
463 /*
464  * Call select and return results.
465  * Input: interesting file descriptors and absolute timeout
466  * Output: select return value (-1 or num fds ready) and fd_sets
467  * Return: 0 (for i/o available or timeout) or error code.
468  */
469 krb5_error_code
470 krb5int_cm_call_select (const struct select_state *in,
471 			struct select_state *out, int *sret)
472 {
473     struct timeval now, *timo;
474     krb5_error_code e;
475 
476     *out = *in;
477     e = getcurtime(&now);
478     if (e)
479 	return e;
480     if (out->end_time.tv_sec == 0)
481 	timo = 0;
482     else {
483 	timo = &out->end_time;
484 	out->end_time.tv_sec -= now.tv_sec;
485 	out->end_time.tv_usec -= now.tv_usec;
486 	if (out->end_time.tv_usec < 0) {
487 	    out->end_time.tv_usec += 1000000;
488 	    out->end_time.tv_sec--;
489 	}
490 	if (out->end_time.tv_sec < 0) {
491 	    *sret = 0;
492 	    return 0;
493 	}
494     }
495     /*LINTED*/
496     dprint("selecting on max=%d sockets [%F] timeout %t\n",
497 	    /*LINTED*/
498 	   out->max, &out->rfds, &out->wfds, &out->xfds, out->max, timo);
499     *sret = select(out->max, &out->rfds, &out->wfds, &out->xfds, timo);
500     e = SOCKET_ERRNO;
501 
502 #ifdef DEBUG
503     /*LINTED*/
504     dprint("select returns %d", *sret);
505     if (*sret < 0)
506 	/*LINTED*/
507 	dprint(", error = %E\n", e);
508     else if (*sret == 0)
509 	/*LINTED*/
510 	dprint(" (timeout)\n");
511     else
512 	/*LINTED*/
513 	dprint(":%F\n", &out->rfds, &out->wfds, &out->xfds, out->max);
514 #endif
515 
516     if (*sret < 0)
517 	return e;
518     return 0;
519 }
520 
521 static int service_tcp_fd (struct conn_state *conn,
522 			   struct select_state *selstate, int ssflags);
523 static int service_udp_fd (struct conn_state *conn,
524 			   struct select_state *selstate, int ssflags);
525 
526 
527 static int
528 setup_connection (struct conn_state *state, struct addrinfo *ai,
529 		  const krb5_data *message, unsigned char *message_len_buf,
530 		  char **udpbufp)
531 {
532     state->state = INITIALIZING;
533     state->err = 0;
534     state->x.out.sgp = state->x.out.sgbuf;
535     state->addr = ai;
536     state->fd = INVALID_SOCKET;
537     SG_SET(&state->x.out.sgbuf[1], 0, 0);
538     if (ai->ai_socktype == SOCK_STREAM) {
539 	SG_SET(&state->x.out.sgbuf[0], message_len_buf, 4);
540 	SG_SET(&state->x.out.sgbuf[1], message->data, message->length);
541 	state->x.out.sg_count = 2;
542 	state->is_udp = 0;
543 	state->service = service_tcp_fd;
544     } else {
545 	SG_SET(&state->x.out.sgbuf[0], message->data, message->length);
546 	SG_SET(&state->x.out.sgbuf[1], 0, 0);
547 	state->x.out.sg_count = 1;
548 	state->is_udp = 1;
549 	state->service = service_udp_fd;
550 
551 	if (*udpbufp == 0) {
552 	    *udpbufp = malloc(krb5_max_dgram_size);
553 	    if (*udpbufp == 0) {
554 		dperror("malloc(krb5_max_dgram_size)");
555 		(void) closesocket(state->fd);
556 		state->fd = INVALID_SOCKET;
557 		state->state = FAILED;
558 		return 1;
559 	    }
560 	}
561 	state->x.in.buf = *udpbufp;
562 	state->x.in.bufsize = krb5_max_dgram_size;
563     }
564     return 0;
565 }
566 
567 static int
568 start_connection (struct conn_state *state, struct select_state *selstate)
569 {
570     int fd, e;
571     struct addrinfo *ai = state->addr;
572 
573     /*LINTED*/
574     dprint("start_connection(@%p)\ngetting %s socket in family %d...", state,
575 	   /*LINTED*/
576 	   ai->ai_socktype == SOCK_STREAM ? "stream" : "dgram", ai->ai_family);
577     fd = socket(ai->ai_family, ai->ai_socktype, 0);
578     if (fd == INVALID_SOCKET) {
579 	state->err = SOCKET_ERRNO;
580 	/*LINTED*/
581 	dprint("socket: %m creating with af %d\n", state->err, ai->ai_family);
582 	return -1;		/* try other hosts */
583     }
584     /* Make it non-blocking.  */
585     if (ai->ai_socktype == SOCK_STREAM) {
586 	static const int one = 1;
587 	static const struct linger lopt = { 0, 0 };
588 
589 	if (ioctlsocket(fd, FIONBIO, (const void *) &one))
590 	    dperror("sendto_kdc: ioctl(FIONBIO)");
591 	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lopt, sizeof(lopt)))
592 	    dperror("sendto_kdc: setsockopt(SO_LINGER)");
593     }
594 
595     /* Start connecting to KDC.  */
596     /*LINTED*/
597     dprint(" fd %d; connecting to %A...\n", fd, ai);
598     e = connect(fd, ai->ai_addr, ai->ai_addrlen);
599     if (e != 0) {
600 	/*
601 	 * This is the path that should be followed for non-blocking
602 	 * connections.
603 	 */
604 	if (SOCKET_ERRNO == EINPROGRESS || SOCKET_ERRNO == EWOULDBLOCK) {
605 	    state->state = CONNECTING;
606 	} else {
607 	    /*LINTED*/
608 	    dprint("connect failed: %m\n", SOCKET_ERRNO);
609 	    state->err = SOCKET_ERRNO;
610 	    state->state = FAILED;
611 	    return -2;
612 	}
613     } else {
614 	/*
615 	 * Connect returned zero even though we tried to make it
616 	 * non-blocking, which should have caused it to return before
617 	 * finishing the connection.  Oh well.  Someone's network
618 	 * stack is broken, but if they gave us a connection, use it.
619 	 */
620 	state->state = WRITING;
621     }
622     /*LINTED*/
623     dprint("new state = %s\n", state_strings[state->state]);
624 
625     state->fd = fd;
626 
627     if (ai->ai_socktype == SOCK_DGRAM) {
628 	/* Send it now.  */
629 	int ret;
630 	sg_buf *sg = &state->x.out.sgbuf[0];
631 
632 	/*LINTED*/
633 	dprint("sending %d bytes on fd %d\n", SG_LEN(sg), state->fd);
634 	ret = send(state->fd, SG_BUF(sg), SG_LEN(sg), 0);
635 	if (ret != SG_LEN(sg)) {
636 	    dperror("sendto");
637 	    (void) closesocket(state->fd);
638 	    state->fd = INVALID_SOCKET;
639 	    state->state = FAILED;
640 	    return -3;
641 	} else {
642 	    state->state = READING;
643 	}
644     }
645 
646     FD_SET(state->fd, &selstate->rfds);
647     if (state->state == CONNECTING || state->state == WRITING)
648 	FD_SET(state->fd, &selstate->wfds);
649     FD_SET(state->fd, &selstate->xfds);
650     if (selstate->max <= state->fd)
651 	selstate->max = state->fd + 1;
652     selstate->nfds++;
653 
654     /*LINTED*/
655     dprint("new select vectors: %F\n",
656 	   /*LINTED*/
657 	   &selstate->rfds, &selstate->wfds, &selstate->xfds, selstate->max);
658 
659     return 0;
660 }
661 
662 /* Return 0 if we sent something, non-0 otherwise.
663    If 0 is returned, the caller should delay waiting for a response.
664    Otherwise, the caller should immediately move on to process the
665    next connection.  */
666 static int
667 maybe_send (struct conn_state *conn, struct select_state *selstate)
668 {
669     sg_buf *sg;
670 
671     /*LINTED*/
672     dprint("maybe_send(@%p) state=%s type=%s\n", conn,
673 	   /*LINTED*/
674 	   state_strings[conn->state], conn->is_udp ? "udp" : "tcp");
675     if (conn->state == INITIALIZING)
676 	return start_connection(conn, selstate);
677 
678     /* Did we already shut down this channel?  */
679     if (conn->state == FAILED) {
680 	dprint("connection already closed\n");
681 	return -1;
682     }
683 
684     if (conn->addr->ai_socktype == SOCK_STREAM) {
685 	dprint("skipping stream socket\n");
686 	/* The select callback will handle flushing any data we
687 	   haven't written yet, and we only write it once.  */
688 	return -1;
689     }
690 
691     /* UDP - Send message, possibly for the first time, possibly a
692        retransmit if a previous attempt timed out.  */
693     sg = &conn->x.out.sgbuf[0];
694     /*LINTED*/
695     dprint("sending %d bytes on fd %d\n", SG_LEN(sg), conn->fd);
696     if (send(conn->fd, SG_BUF(sg), SG_LEN(sg), 0) != SG_LEN(sg)) {
697 	dperror("send");
698 	/* Keep connection alive, we'll try again next pass.
699 
700 	   Is this likely to catch any errors we didn't get from the
701 	   select callbacks?  */
702 	return -1;
703     }
704     /* Yay, it worked.  */
705     return 0;
706 }
707 
708 static void
709 kill_conn(struct conn_state *conn, struct select_state *selstate, int err)
710 {
711     conn->state = FAILED;
712     shutdown(conn->fd, SHUTDOWN_BOTH);
713     FD_CLR(conn->fd, &selstate->rfds);
714     FD_CLR(conn->fd, &selstate->wfds);
715     FD_CLR(conn->fd, &selstate->xfds);
716     conn->err = err;
717     /*LINTED*/
718     dprint("abandoning connection %d: %m\n", conn->fd, err);
719     /* Fix up max fd for next select call.  */
720     if (selstate->max == 1 + conn->fd) {
721 	while (selstate->max > 0
722 	       && ! FD_ISSET(selstate->max-1, &selstate->rfds)
723 	       && ! FD_ISSET(selstate->max-1, &selstate->wfds)
724 	       && ! FD_ISSET(selstate->max-1, &selstate->xfds))
725 	    selstate->max--;
726 	/*LINTED*/
727 	dprint("new max_fd + 1 is %d\n", selstate->max);
728     }
729     selstate->nfds--;
730 }
731 
732 /* Return nonzero only if we're finished and the caller should exit
733    its loop.  This happens in two cases: We have a complete message,
734    or the socket has closed and no others are open.  */
735 
736 static int
737 service_tcp_fd (struct conn_state *conn, struct select_state *selstate,
738 		int ssflags)
739 {
740     krb5_error_code e = 0;
741     int nwritten, nread;
742 
743     if (!(ssflags & (SSF_READ|SSF_WRITE|SSF_EXCEPTION)))
744 	abort();
745     switch (conn->state) {
746 	SOCKET_WRITEV_TEMP tmp;
747 
748     case CONNECTING:
749 	if (ssflags & SSF_READ) {
750 	    /* Bad -- the KDC shouldn't be sending to us first.  */
751 	    e = EINVAL /* ?? */;
752 	kill_conn:
753 	    kill_conn(conn, selstate, e);
754 	    if (e == EINVAL) {
755 		closesocket(conn->fd);
756 		conn->fd = INVALID_SOCKET;
757 	    }
758 	    return e == 0;
759 	}
760 	if (ssflags & SSF_EXCEPTION) {
761 	handle_exception:
762 	    e = 1;		/* need only be non-zero */
763 	    goto kill_conn;
764 	}
765 
766 	/*
767 	 * Connect finished -- but did it succeed or fail?
768 	 * UNIX sets can_write if failed.
769 	 * Try writing, I guess, and find out.
770 	 */
771 	conn->state = WRITING;
772 	goto try_writing;
773 
774     case WRITING:
775 	if (ssflags & SSF_READ) {
776 	    e = E2BIG;
777 	    /* Bad -- the KDC shouldn't be sending anything yet.  */
778 	    goto kill_conn;
779 	}
780 	if (ssflags & SSF_EXCEPTION)
781 	    goto handle_exception;
782 
783     try_writing:
784 	/*LINTED*/
785 	dprint("trying to writev %d (%d bytes) to fd %d\n",
786 		/*LINTED*/
787 	       conn->x.out.sg_count,
788 	       ((conn->x.out.sg_count == 2 ? SG_LEN(&conn->x.out.sgp[1]) : 0)
789 		/*LINTED*/
790 		+ SG_LEN(&conn->x.out.sgp[0])),
791 	       conn->fd);
792 	nwritten = SOCKET_WRITEV(conn->fd, conn->x.out.sgp,
793 				 conn->x.out.sg_count, tmp);
794 	if (nwritten < 0) {
795 	    e = SOCKET_ERRNO;
796 	    /*LINTED*/
797 	    dprint("failed: %m\n", e);
798 	    goto kill_conn;
799 	}
800 	/*LINTED*/
801 	dprint("wrote %d bytes\n", nwritten);
802 	while (nwritten) {
803 	    sg_buf *sgp = conn->x.out.sgp;
804 	    if (nwritten < SG_LEN(sgp)) {
805 		/*LINTED*/
806 		SG_ADVANCE(sgp, nwritten);
807 		nwritten = 0;
808 	    } else {
809 		nwritten -= SG_LEN(conn->x.out.sgp);
810 		conn->x.out.sgp++;
811 		conn->x.out.sg_count--;
812 		if (conn->x.out.sg_count == 0 && nwritten != 0)
813 		    /* Wrote more than we wanted to?  */
814 		    abort();
815 	    }
816 	}
817 	if (conn->x.out.sg_count == 0) {
818 	    /* Done writing, switch to reading.  */
819 	    /* Don't call shutdown at this point because
820 	     * some implementations cannot deal with half-closed connections.*/
821 	    FD_CLR(conn->fd, &selstate->wfds);
822 	    /* Q: How do we detect failures to send the remaining data
823 	       to the remote side, since we're in non-blocking mode?
824 	       Will we always get errors on the reading side?  */
825 	    /*LINTED*/
826 	    dprint("switching fd %d to READING\n", conn->fd);
827 	    conn->state = READING;
828 	    conn->x.in.bufsizebytes_read = 0;
829 	    conn->x.in.bufsize = 0;
830 	    conn->x.in.buf = 0;
831 	    conn->x.in.pos = 0;
832 	    conn->x.in.n_left = 0;
833 	}
834 	return 0;
835 
836     case READING:
837 	if (ssflags & SSF_EXCEPTION) {
838 	    if (conn->x.in.buf) {
839 		free(conn->x.in.buf);
840 		conn->x.in.buf = 0;
841 	    }
842 	    goto handle_exception;
843 	}
844 
845 	if (conn->x.in.bufsizebytes_read == 4) {
846 	    /* Reading data.  */
847 	    /*LINTED*/
848 	    dprint("reading %d bytes of data from fd %d\n",
849 		   (int) conn->x.in.n_left, conn->fd);
850 	    nread = SOCKET_READ(conn->fd, conn->x.in.pos, conn->x.in.n_left);
851 	    if (nread <= 0) {
852 		e = nread ? SOCKET_ERRNO : ECONNRESET;
853 		free(conn->x.in.buf);
854 		conn->x.in.buf = 0;
855 		goto kill_conn;
856 	    }
857 	    conn->x.in.n_left -= nread;
858 	    conn->x.in.pos += nread;
859 	    if ((long)conn->x.in.n_left <= 0) {
860 		/* We win!  */
861 		return 1;
862 	    }
863 	} else {
864 	    /* Reading length.  */
865 	    nread = SOCKET_READ(conn->fd,
866 				conn->x.in.bufsizebytes + conn->x.in.bufsizebytes_read,
867 				4 - conn->x.in.bufsizebytes_read);
868 	    if (nread < 0) {
869 		e = SOCKET_ERRNO;
870 		goto kill_conn;
871 	    }
872 	    conn->x.in.bufsizebytes_read += nread;
873 	    if (conn->x.in.bufsizebytes_read == 4) {
874 		unsigned long len;
875 		len = conn->x.in.bufsizebytes[0];
876 		len = (len << 8) + conn->x.in.bufsizebytes[1];
877 		len = (len << 8) + conn->x.in.bufsizebytes[2];
878 		len = (len << 8) + conn->x.in.bufsizebytes[3];
879 		/*LINTED*/
880 		dprint("received length on fd %d is %d\n", conn->fd, (int)len);
881 		/* Arbitrary 1M cap.  */
882 		if (len > 1 * 1024 * 1024) {
883 		    e = E2BIG;
884 		    goto kill_conn;
885 		}
886 		conn->x.in.bufsize = conn->x.in.n_left = len;
887 		conn->x.in.buf = conn->x.in.pos = malloc(len);
888 		/*LINTED*/
889 		dprint("allocated %d byte buffer at %p\n", (int) len,
890 		       conn->x.in.buf);
891 		if (conn->x.in.buf == 0) {
892 		    /* allocation failure */
893 		    e = errno;
894 		    goto kill_conn;
895 		}
896 	    }
897 	}
898 	break;
899 
900     default:
901 	abort();
902     }
903     return 0;
904 }
905 
906 static int
907 service_udp_fd(struct conn_state *conn, struct select_state *selstate,
908 	       int ssflags)
909 {
910     int nread;
911 
912     if (!(ssflags & (SSF_READ|SSF_EXCEPTION)))
913 	abort();
914     if (conn->state != READING)
915 	abort();
916 
917     nread = recv(conn->fd, conn->x.in.buf, conn->x.in.bufsize, 0);
918     if (nread < 0) {
919 	kill_conn(conn, selstate, SOCKET_ERRNO);
920 	return 0;
921     }
922     conn->x.in.pos = conn->x.in.buf + nread;
923     return 1;
924 }
925 
926 static int
927 service_fds (struct select_state *selstate,
928 	     struct conn_state *conns, size_t n_conns, int *winning_conn)
929 {
930     int e, selret;
931     struct select_state sel_results;
932 
933     e = 0;
934     while (selstate->nfds > 0
935 	   && (e = krb5int_cm_call_select(selstate, &sel_results, &selret)) == 0) {
936 	int i;
937 
938 	/*LINTED*/
939 	dprint("service_fds examining results, selret=%d\n", selret);
940 
941 	if (selret == 0)
942 	    /* Timeout, return to caller.  */
943 	    return 0;
944 
945 	/* Got something on a socket, process it.  */
946 	for (i = 0; i <= selstate->max && selret > 0 && i < n_conns; i++) {
947 	    int ssflags;
948 
949 	    if (conns[i].fd == INVALID_SOCKET)
950 		continue;
951 	    ssflags = 0;
952 	    if (FD_ISSET(conns[i].fd, &sel_results.rfds))
953 		ssflags |= SSF_READ, selret--;
954 	    if (FD_ISSET(conns[i].fd, &sel_results.wfds))
955 		ssflags |= SSF_WRITE, selret--;
956 	    if (FD_ISSET(conns[i].fd, &sel_results.xfds))
957 		ssflags |= SSF_EXCEPTION, selret--;
958 	    if (!ssflags)
959 		continue;
960 
961 	    /*LINTED*/
962 	    dprint("handling flags '%s%s%s' on fd %d (%A) in state %s\n",
963 		    /*LINTED*/
964 		   (ssflags & SSF_READ) ? "r" : "",
965 		    /*LINTED*/
966 		   (ssflags & SSF_WRITE) ? "w" : "",
967 		    /*LINTED*/
968 		   (ssflags & SSF_EXCEPTION) ? "x" : "",
969 		    /*LINTED*/
970 		   conns[i].fd, conns[i].addr,
971 		   state_strings[(int) conns[i].state]);
972 
973 	    if (conns[i].service (&conns[i], selstate, ssflags)) {
974 		dprint("fd service routine says we're done\n");
975 		*winning_conn = i;
976 		return 1;
977 	    }
978 	}
979     }
980     if (e != 0) {
981 	/*LINTED*/
982 	dprint("select returned %m\n", e);
983 	*winning_conn = -1;
984 	return 1;
985     }
986     return 0;
987 }
988 
989 /*
990  * Current worst-case timeout behavior:
991  *
992  * First pass, 1s per udp or tcp server, plus 2s at end.
993  * Second pass, 1s per udp server, plus 4s.
994  * Third pass, 1s per udp server, plus 8s.
995  * Fourth => 16s, etc.
996  *
997  * Restated:
998  * Per UDP server, 1s per pass.
999  * Per TCP server, 1s.
1000  * Backoff delay, 2**(P+1) - 2, where P is total number of passes.
1001  *
1002  * Total = 2**(P+1) + U*P + T - 2.
1003  *
1004  * If P=3, Total = 3*U + T + 14.
1005  * If P=4, Total = 4*U + T + 30.
1006  *
1007  * Note that if you try to reach two ports (e.g., both 88 and 750) on
1008  * one server, it counts as two.
1009  */
1010 
1011 krb5_error_code
1012 /*ARGSUSED*/
1013 krb5int_sendto (krb5_context context, const krb5_data *message,
1014 		const struct addrlist *addrs, krb5_data *reply,
1015 		struct sockaddr_storage *localaddr, socklen_t *localaddrlen,
1016 		int *addr_used)
1017 {
1018     int i, pass;
1019     int delay_this_pass = 2;
1020     krb5_error_code retval;
1021     struct conn_state *conns;
1022     size_t n_conns, host;
1023     struct select_state select_state;
1024     struct timeval now;
1025     int winning_conn = -1, e = 0;
1026     unsigned char message_len_buf[4];
1027     char *udpbuf = 0;
1028 
1029     /*LINTED*/
1030     dprint("krb5int_sendto(message=%d@%p)\n", message->length, message->data);
1031 
1032     reply->data = 0;
1033     reply->length = 0;
1034 
1035     n_conns = addrs->naddrs;
1036     conns = malloc(n_conns * sizeof(struct conn_state));
1037     if (conns == NULL) {
1038 	return ENOMEM;
1039     }
1040     memset(conns, 0, n_conns * sizeof(conns[i]));
1041     for (i = 0; i < n_conns; i++) {
1042 	conns[i].fd = INVALID_SOCKET;
1043     }
1044 
1045     select_state.max = 0;
1046     select_state.nfds = 0;
1047     FD_ZERO(&select_state.rfds);
1048     FD_ZERO(&select_state.wfds);
1049     FD_ZERO(&select_state.xfds);
1050 
1051     message_len_buf[0] = (message->length >> 24) & 0xff;
1052     message_len_buf[1] = (message->length >> 16) & 0xff;
1053     message_len_buf[2] = (message->length >>  8) & 0xff;
1054     message_len_buf[3] =  message->length        & 0xff;
1055 
1056     /* Set up connections.  */
1057     for (host = 0; host < n_conns; host++) {
1058 	retval = setup_connection (&conns[host], addrs->addrs[host],
1059 				   message, message_len_buf, &udpbuf);
1060 	if (retval)
1061 	    continue;
1062     }
1063     for (pass = 0; pass < MAX_PASS; pass++) {
1064 	/* Possible optimization: Make only one pass if TCP only.
1065 	   Stop making passes if all UDP ports are closed down.  */
1066 	/*LINTED*/
1067 	dprint("pass %d delay=%d\n", pass, delay_this_pass);
1068 	for (host = 0; host < n_conns; host++) {
1069 	    /*LINTED*/
1070 	    dprint("host %d\n", host);
1071 
1072 	    /* Send to the host, wait for a response, then move on. */
1073 	    if (maybe_send(&conns[host], &select_state))
1074 		continue;
1075 
1076 	    retval = getcurtime(&now);
1077 	    if (retval)
1078 		goto egress;
1079 	    select_state.end_time = now;
1080 	    select_state.end_time.tv_sec += 1;
1081 	    e = service_fds(&select_state, conns, host+1, &winning_conn);
1082 	    if (e)
1083 		break;
1084 	    if (pass > 0 && select_state.nfds == 0)
1085 		/*
1086 		 * After the first pass, if we close all fds, break
1087 		 * out right away.  During the first pass, it's okay,
1088 		 * we're probably about to open another connection.
1089 		 */
1090 		break;
1091 	}
1092 	if (e)
1093 	    break;
1094 	retval = getcurtime(&now);
1095 	if (retval)
1096 	    goto egress;
1097 	/* Possible optimization: Find a way to integrate this select
1098 	   call with the last one from the above loop, if the loop
1099 	   actually calls select.  */
1100 	select_state.end_time.tv_sec += delay_this_pass;
1101 	e = service_fds(&select_state, conns, host+1, &winning_conn);
1102 	if (e)
1103 	    break;
1104 	if (select_state.nfds == 0)
1105 	    break;
1106 	delay_this_pass *= 2;
1107     }
1108 
1109     if (select_state.nfds == 0) {
1110 	/* No addresses?  */
1111 	retval = KRB5_KDC_UNREACH;
1112 	goto egress;
1113     }
1114     if (e == 0 || winning_conn < 0) {
1115 	retval = KRB5_KDC_UNREACH;
1116 	goto egress;
1117     }
1118     /* Success!  */
1119     reply->data = conns[winning_conn].x.in.buf;
1120     reply->length = (conns[winning_conn].x.in.pos
1121 		     - conns[winning_conn].x.in.buf);
1122     /*LINTED*/
1123     dprint("returning %d bytes in buffer %p (winning_conn=%d)\n",
1124 	(int) reply->length, reply->data, winning_conn);
1125     retval = 0;
1126     conns[winning_conn].x.in.buf = 0;
1127     if (addr_used)
1128 	    *addr_used = winning_conn;
1129     if (localaddr != 0 && localaddrlen != 0 && *localaddrlen > 0)
1130 	(void) getsockname(conns[winning_conn].fd, (struct sockaddr *)localaddr,
1131 			   localaddrlen);
1132 egress:
1133     for (i = 0; i < n_conns; i++) {
1134 	if (conns[i].fd != INVALID_SOCKET)
1135 	    close(conns[i].fd);
1136 	if (conns[i].state == READING
1137 	    && conns[i].x.in.buf != 0
1138 	    && conns[i].x.in.buf != udpbuf)
1139 	    free(conns[i].x.in.buf);
1140     }
1141     free(conns);
1142     if (reply->data != udpbuf)
1143 	free(udpbuf);
1144     return retval;
1145 }
1146