xref: /freebsd/contrib/unbound/util/netevent.c (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/tcp_conn_limit.h"
47 #include "util/fptr_wlist.h"
48 #include "sldns/pkthdr.h"
49 #include "sldns/sbuffer.h"
50 #include "sldns/str2wire.h"
51 #include "dnstap/dnstap.h"
52 #include "dnscrypt/dnscrypt.h"
53 #include "services/listen_dnsport.h"
54 #ifdef HAVE_OPENSSL_SSL_H
55 #include <openssl/ssl.h>
56 #endif
57 #ifdef HAVE_OPENSSL_ERR_H
58 #include <openssl/err.h>
59 #endif
60 
61 /* -------- Start of local definitions -------- */
62 /** if CMSG_ALIGN is not defined on this platform, a workaround */
63 #ifndef CMSG_ALIGN
64 #  ifdef __CMSG_ALIGN
65 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
66 #  elif defined(CMSG_DATA_ALIGN)
67 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
68 #  else
69 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
70 #  endif
71 #endif
72 
73 /** if CMSG_LEN is not defined on this platform, a workaround */
74 #ifndef CMSG_LEN
75 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
76 #endif
77 
78 /** if CMSG_SPACE is not defined on this platform, a workaround */
79 #ifndef CMSG_SPACE
80 #  ifdef _CMSG_HDR_ALIGN
81 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
82 #  else
83 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
84 #  endif
85 #endif
86 
87 /** The TCP writing query timeout in milliseconds */
88 #define TCP_QUERY_TIMEOUT 120000
89 /** The minimum actual TCP timeout to use, regardless of what we advertise,
90  * in msec */
91 #define TCP_QUERY_TIMEOUT_MINIMUM 200
92 
93 #ifndef NONBLOCKING_IS_BROKEN
94 /** number of UDP reads to perform per read indication from select */
95 #define NUM_UDP_PER_SELECT 100
96 #else
97 #define NUM_UDP_PER_SELECT 1
98 #endif
99 
100 /**
101  * The internal event structure for keeping ub_event info for the event.
102  * Possibly other structures (list, tree) this is part of.
103  */
104 struct internal_event {
105 	/** the comm base */
106 	struct comm_base* base;
107 	/** ub_event event type */
108 	struct ub_event* ev;
109 };
110 
111 /**
112  * Internal base structure, so that every thread has its own events.
113  */
114 struct internal_base {
115 	/** ub_event event_base type. */
116 	struct ub_event_base* base;
117 	/** seconds time pointer points here */
118 	time_t secs;
119 	/** timeval with current time */
120 	struct timeval now;
121 	/** the event used for slow_accept timeouts */
122 	struct ub_event* slow_accept;
123 	/** true if slow_accept is enabled */
124 	int slow_accept_enabled;
125 };
126 
127 /**
128  * Internal timer structure, to store timer event in.
129  */
130 struct internal_timer {
131 	/** the super struct from which derived */
132 	struct comm_timer super;
133 	/** the comm base */
134 	struct comm_base* base;
135 	/** ub_event event type */
136 	struct ub_event* ev;
137 	/** is timer enabled */
138 	uint8_t enabled;
139 };
140 
141 /**
142  * Internal signal structure, to store signal event in.
143  */
144 struct internal_signal {
145 	/** ub_event event type */
146 	struct ub_event* ev;
147 	/** next in signal list */
148 	struct internal_signal* next;
149 };
150 
151 /** create a tcp handler with a parent */
152 static struct comm_point* comm_point_create_tcp_handler(
153 	struct comm_base *base, struct comm_point* parent, size_t bufsize,
154 	struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
155 	void* callback_arg);
156 
157 /* -------- End of local definitions -------- */
158 
159 struct comm_base*
160 comm_base_create(int sigs)
161 {
162 	struct comm_base* b = (struct comm_base*)calloc(1,
163 		sizeof(struct comm_base));
164 	const char *evnm="event", *evsys="", *evmethod="";
165 
166 	if(!b)
167 		return NULL;
168 	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
169 	if(!b->eb) {
170 		free(b);
171 		return NULL;
172 	}
173 	b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
174 	if(!b->eb->base) {
175 		free(b->eb);
176 		free(b);
177 		return NULL;
178 	}
179 	ub_comm_base_now(b);
180 	ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
181 	verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
182 	return b;
183 }
184 
185 struct comm_base*
186 comm_base_create_event(struct ub_event_base* base)
187 {
188 	struct comm_base* b = (struct comm_base*)calloc(1,
189 		sizeof(struct comm_base));
190 	if(!b)
191 		return NULL;
192 	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
193 	if(!b->eb) {
194 		free(b);
195 		return NULL;
196 	}
197 	b->eb->base = base;
198 	ub_comm_base_now(b);
199 	return b;
200 }
201 
202 void
203 comm_base_delete(struct comm_base* b)
204 {
205 	if(!b)
206 		return;
207 	if(b->eb->slow_accept_enabled) {
208 		if(ub_event_del(b->eb->slow_accept) != 0) {
209 			log_err("could not event_del slow_accept");
210 		}
211 		ub_event_free(b->eb->slow_accept);
212 	}
213 	ub_event_base_free(b->eb->base);
214 	b->eb->base = NULL;
215 	free(b->eb);
216 	free(b);
217 }
218 
219 void
220 comm_base_delete_no_base(struct comm_base* b)
221 {
222 	if(!b)
223 		return;
224 	if(b->eb->slow_accept_enabled) {
225 		if(ub_event_del(b->eb->slow_accept) != 0) {
226 			log_err("could not event_del slow_accept");
227 		}
228 		ub_event_free(b->eb->slow_accept);
229 	}
230 	b->eb->base = NULL;
231 	free(b->eb);
232 	free(b);
233 }
234 
235 void
236 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
237 {
238 	*tt = &b->eb->secs;
239 	*tv = &b->eb->now;
240 }
241 
242 void
243 comm_base_dispatch(struct comm_base* b)
244 {
245 	int retval;
246 	retval = ub_event_base_dispatch(b->eb->base);
247 	if(retval < 0) {
248 		fatal_exit("event_dispatch returned error %d, "
249 			"errno is %s", retval, strerror(errno));
250 	}
251 }
252 
253 void comm_base_exit(struct comm_base* b)
254 {
255 	if(ub_event_base_loopexit(b->eb->base) != 0) {
256 		log_err("Could not loopexit");
257 	}
258 }
259 
260 void comm_base_set_slow_accept_handlers(struct comm_base* b,
261 	void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
262 {
263 	b->stop_accept = stop_acc;
264 	b->start_accept = start_acc;
265 	b->cb_arg = arg;
266 }
267 
268 struct ub_event_base* comm_base_internal(struct comm_base* b)
269 {
270 	return b->eb->base;
271 }
272 
273 /** see if errno for udp has to be logged or not uses globals */
274 static int
275 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
276 {
277 	/* do not log transient errors (unless high verbosity) */
278 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
279 	switch(errno) {
280 #  ifdef ENETUNREACH
281 		case ENETUNREACH:
282 #  endif
283 #  ifdef EHOSTDOWN
284 		case EHOSTDOWN:
285 #  endif
286 #  ifdef EHOSTUNREACH
287 		case EHOSTUNREACH:
288 #  endif
289 #  ifdef ENETDOWN
290 		case ENETDOWN:
291 #  endif
292 			if(verbosity < VERB_ALGO)
293 				return 0;
294 		default:
295 			break;
296 	}
297 #endif
298 	/* permission denied is gotten for every send if the
299 	 * network is disconnected (on some OS), squelch it */
300 	if( ((errno == EPERM)
301 #  ifdef EADDRNOTAVAIL
302 		/* 'Cannot assign requested address' also when disconnected */
303 		|| (errno == EADDRNOTAVAIL)
304 #  endif
305 		) && verbosity < VERB_DETAIL)
306 		return 0;
307 #  ifdef EADDRINUSE
308 	/* If SO_REUSEADDR is set, we could try to connect to the same server
309 	 * from the same source port twice. */
310 	if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
311 		return 0;
312 #  endif
313 	/* squelch errors where people deploy AAAA ::ffff:bla for
314 	 * authority servers, which we try for intranets. */
315 	if(errno == EINVAL && addr_is_ip4mapped(
316 		(struct sockaddr_storage*)addr, addrlen) &&
317 		verbosity < VERB_DETAIL)
318 		return 0;
319 	/* SO_BROADCAST sockopt can give access to 255.255.255.255,
320 	 * but a dns cache does not need it. */
321 	if(errno == EACCES && addr_is_broadcast(
322 		(struct sockaddr_storage*)addr, addrlen) &&
323 		verbosity < VERB_DETAIL)
324 		return 0;
325 	return 1;
326 }
327 
328 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
329 {
330 	return udp_send_errno_needs_log(addr, addrlen);
331 }
332 
333 /* send a UDP reply */
334 int
335 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
336 	struct sockaddr* addr, socklen_t addrlen)
337 {
338 	ssize_t sent;
339 	log_assert(c->fd != -1);
340 #ifdef UNBOUND_DEBUG
341 	if(sldns_buffer_remaining(packet) == 0)
342 		log_err("error: send empty UDP packet");
343 #endif
344 	log_assert(addr && addrlen > 0);
345 	sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
346 		sldns_buffer_remaining(packet), 0,
347 		addr, addrlen);
348 	if(sent == -1) {
349 		/* try again and block, waiting for IO to complete,
350 		 * we want to send the answer, and we will wait for
351 		 * the ethernet interface buffer to have space. */
352 #ifndef USE_WINSOCK
353 		if(errno == EAGAIN ||
354 #  ifdef EWOULDBLOCK
355 			errno == EWOULDBLOCK ||
356 #  endif
357 			errno == ENOBUFS) {
358 #else
359 		if(WSAGetLastError() == WSAEINPROGRESS ||
360 			WSAGetLastError() == WSAENOBUFS ||
361 			WSAGetLastError() == WSAEWOULDBLOCK) {
362 #endif
363 			int e;
364 			fd_set_block(c->fd);
365 			sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
366 				sldns_buffer_remaining(packet), 0,
367 				addr, addrlen);
368 			e = errno;
369 			fd_set_nonblock(c->fd);
370 			errno = e;
371 		}
372 	}
373 	if(sent == -1) {
374 		if(!udp_send_errno_needs_log(addr, addrlen))
375 			return 0;
376 #ifndef USE_WINSOCK
377 		verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
378 #else
379 		verbose(VERB_OPS, "sendto failed: %s",
380 			wsa_strerror(WSAGetLastError()));
381 #endif
382 		log_addr(VERB_OPS, "remote address is",
383 			(struct sockaddr_storage*)addr, addrlen);
384 		return 0;
385 	} else if((size_t)sent != sldns_buffer_remaining(packet)) {
386 		log_err("sent %d in place of %d bytes",
387 			(int)sent, (int)sldns_buffer_remaining(packet));
388 		return 0;
389 	}
390 	return 1;
391 }
392 
393 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
394 /** print debug ancillary info */
395 static void p_ancil(const char* str, struct comm_reply* r)
396 {
397 	if(r->srctype != 4 && r->srctype != 6) {
398 		log_info("%s: unknown srctype %d", str, r->srctype);
399 		return;
400 	}
401 	if(r->srctype == 6) {
402 		char buf[1024];
403 		if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
404 			buf, (socklen_t)sizeof(buf)) == 0) {
405 			(void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
406 		}
407 		buf[sizeof(buf)-1]=0;
408 		log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
409 	} else if(r->srctype == 4) {
410 #ifdef IP_PKTINFO
411 		char buf1[1024], buf2[1024];
412 		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
413 			buf1, (socklen_t)sizeof(buf1)) == 0) {
414 			(void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
415 		}
416 		buf1[sizeof(buf1)-1]=0;
417 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
418 		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
419 			buf2, (socklen_t)sizeof(buf2)) == 0) {
420 			(void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
421 		}
422 		buf2[sizeof(buf2)-1]=0;
423 #else
424 		buf2[0]=0;
425 #endif
426 		log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
427 			buf1, buf2);
428 #elif defined(IP_RECVDSTADDR)
429 		char buf1[1024];
430 		if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
431 			buf1, (socklen_t)sizeof(buf1)) == 0) {
432 			(void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
433 		}
434 		buf1[sizeof(buf1)-1]=0;
435 		log_info("%s: %s", str, buf1);
436 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
437 	}
438 }
439 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
440 
441 /** send a UDP reply over specified interface*/
442 static int
443 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
444 	struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
445 {
446 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
447 	ssize_t sent;
448 	struct msghdr msg;
449 	struct iovec iov[1];
450 	union {
451 		struct cmsghdr hdr;
452 		char buf[256];
453 	} control;
454 #ifndef S_SPLINT_S
455 	struct cmsghdr *cmsg;
456 #endif /* S_SPLINT_S */
457 
458 	log_assert(c->fd != -1);
459 #ifdef UNBOUND_DEBUG
460 	if(sldns_buffer_remaining(packet) == 0)
461 		log_err("error: send empty UDP packet");
462 #endif
463 	log_assert(addr && addrlen > 0);
464 
465 	msg.msg_name = addr;
466 	msg.msg_namelen = addrlen;
467 	iov[0].iov_base = sldns_buffer_begin(packet);
468 	iov[0].iov_len = sldns_buffer_remaining(packet);
469 	msg.msg_iov = iov;
470 	msg.msg_iovlen = 1;
471 	msg.msg_control = control.buf;
472 #ifndef S_SPLINT_S
473 	msg.msg_controllen = sizeof(control.buf);
474 #endif /* S_SPLINT_S */
475 	msg.msg_flags = 0;
476 
477 #ifndef S_SPLINT_S
478 	cmsg = CMSG_FIRSTHDR(&msg);
479 	if(r->srctype == 4) {
480 #ifdef IP_PKTINFO
481 		void* cmsg_data;
482 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
483 		log_assert(msg.msg_controllen <= sizeof(control.buf));
484 		cmsg->cmsg_level = IPPROTO_IP;
485 		cmsg->cmsg_type = IP_PKTINFO;
486 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
487 			sizeof(struct in_pktinfo));
488 		/* unset the ifindex to not bypass the routing tables */
489 		cmsg_data = CMSG_DATA(cmsg);
490 		((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
491 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
492 #elif defined(IP_SENDSRCADDR)
493 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
494 		log_assert(msg.msg_controllen <= sizeof(control.buf));
495 		cmsg->cmsg_level = IPPROTO_IP;
496 		cmsg->cmsg_type = IP_SENDSRCADDR;
497 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
498 			sizeof(struct in_addr));
499 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
500 #else
501 		verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
502 		msg.msg_control = NULL;
503 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
504 	} else if(r->srctype == 6) {
505 		void* cmsg_data;
506 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
507 		log_assert(msg.msg_controllen <= sizeof(control.buf));
508 		cmsg->cmsg_level = IPPROTO_IPV6;
509 		cmsg->cmsg_type = IPV6_PKTINFO;
510 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
511 			sizeof(struct in6_pktinfo));
512 		/* unset the ifindex to not bypass the routing tables */
513 		cmsg_data = CMSG_DATA(cmsg);
514 		((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
515 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
516 	} else {
517 		/* try to pass all 0 to use default route */
518 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
519 		log_assert(msg.msg_controllen <= sizeof(control.buf));
520 		cmsg->cmsg_level = IPPROTO_IPV6;
521 		cmsg->cmsg_type = IPV6_PKTINFO;
522 		memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
523 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
524 	}
525 #endif /* S_SPLINT_S */
526 	if(verbosity >= VERB_ALGO)
527 		p_ancil("send_udp over interface", r);
528 	sent = sendmsg(c->fd, &msg, 0);
529 	if(sent == -1) {
530 		/* try again and block, waiting for IO to complete,
531 		 * we want to send the answer, and we will wait for
532 		 * the ethernet interface buffer to have space. */
533 #ifndef USE_WINSOCK
534 		if(errno == EAGAIN ||
535 #  ifdef EWOULDBLOCK
536 			errno == EWOULDBLOCK ||
537 #  endif
538 			errno == ENOBUFS) {
539 #else
540 		if(WSAGetLastError() == WSAEINPROGRESS ||
541 			WSAGetLastError() == WSAENOBUFS ||
542 			WSAGetLastError() == WSAEWOULDBLOCK) {
543 #endif
544 			int e;
545 			fd_set_block(c->fd);
546 			sent = sendmsg(c->fd, &msg, 0);
547 			e = errno;
548 			fd_set_nonblock(c->fd);
549 			errno = e;
550 		}
551 	}
552 	if(sent == -1) {
553 		if(!udp_send_errno_needs_log(addr, addrlen))
554 			return 0;
555 		verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
556 		log_addr(VERB_OPS, "remote address is",
557 			(struct sockaddr_storage*)addr, addrlen);
558 #ifdef __NetBSD__
559 		/* netbsd 7 has IP_PKTINFO for recv but not send */
560 		if(errno == EINVAL && r->srctype == 4)
561 			log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
562 				"Please disable interface-automatic");
563 #endif
564 		return 0;
565 	} else if((size_t)sent != sldns_buffer_remaining(packet)) {
566 		log_err("sent %d in place of %d bytes",
567 			(int)sent, (int)sldns_buffer_remaining(packet));
568 		return 0;
569 	}
570 	return 1;
571 #else
572 	(void)c;
573 	(void)packet;
574 	(void)addr;
575 	(void)addrlen;
576 	(void)r;
577 	log_err("sendmsg: IPV6_PKTINFO not supported");
578 	return 0;
579 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
580 }
581 
582 void
583 comm_point_udp_ancil_callback(int fd, short event, void* arg)
584 {
585 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
586 	struct comm_reply rep;
587 	struct msghdr msg;
588 	struct iovec iov[1];
589 	ssize_t rcv;
590 	union {
591 		struct cmsghdr hdr;
592 		char buf[256];
593 	} ancil;
594 	int i;
595 #ifndef S_SPLINT_S
596 	struct cmsghdr* cmsg;
597 #endif /* S_SPLINT_S */
598 
599 	rep.c = (struct comm_point*)arg;
600 	log_assert(rep.c->type == comm_udp);
601 
602 	if(!(event&UB_EV_READ))
603 		return;
604 	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
605 	ub_comm_base_now(rep.c->ev->base);
606 	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
607 		sldns_buffer_clear(rep.c->buffer);
608 		rep.addrlen = (socklen_t)sizeof(rep.addr);
609 		log_assert(fd != -1);
610 		log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
611 		msg.msg_name = &rep.addr;
612 		msg.msg_namelen = (socklen_t)sizeof(rep.addr);
613 		iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
614 		iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
615 		msg.msg_iov = iov;
616 		msg.msg_iovlen = 1;
617 		msg.msg_control = ancil.buf;
618 #ifndef S_SPLINT_S
619 		msg.msg_controllen = sizeof(ancil.buf);
620 #endif /* S_SPLINT_S */
621 		msg.msg_flags = 0;
622 		rcv = recvmsg(fd, &msg, 0);
623 		if(rcv == -1) {
624 			if(errno != EAGAIN && errno != EINTR) {
625 				log_err("recvmsg failed: %s", strerror(errno));
626 			}
627 			return;
628 		}
629 		rep.addrlen = msg.msg_namelen;
630 		sldns_buffer_skip(rep.c->buffer, rcv);
631 		sldns_buffer_flip(rep.c->buffer);
632 		rep.srctype = 0;
633 #ifndef S_SPLINT_S
634 		for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
635 			cmsg = CMSG_NXTHDR(&msg, cmsg)) {
636 			if( cmsg->cmsg_level == IPPROTO_IPV6 &&
637 				cmsg->cmsg_type == IPV6_PKTINFO) {
638 				rep.srctype = 6;
639 				memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
640 					sizeof(struct in6_pktinfo));
641 				break;
642 #ifdef IP_PKTINFO
643 			} else if( cmsg->cmsg_level == IPPROTO_IP &&
644 				cmsg->cmsg_type == IP_PKTINFO) {
645 				rep.srctype = 4;
646 				memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
647 					sizeof(struct in_pktinfo));
648 				break;
649 #elif defined(IP_RECVDSTADDR)
650 			} else if( cmsg->cmsg_level == IPPROTO_IP &&
651 				cmsg->cmsg_type == IP_RECVDSTADDR) {
652 				rep.srctype = 4;
653 				memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
654 					sizeof(struct in_addr));
655 				break;
656 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
657 			}
658 		}
659 		if(verbosity >= VERB_ALGO)
660 			p_ancil("receive_udp on interface", &rep);
661 #endif /* S_SPLINT_S */
662 		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
663 		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
664 			/* send back immediate reply */
665 			(void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
666 				(struct sockaddr*)&rep.addr, rep.addrlen, &rep);
667 		}
668 		if(!rep.c || rep.c->fd == -1) /* commpoint closed */
669 			break;
670 	}
671 #else
672 	(void)fd;
673 	(void)event;
674 	(void)arg;
675 	fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
676 		"Please disable interface-automatic");
677 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
678 }
679 
680 void
681 comm_point_udp_callback(int fd, short event, void* arg)
682 {
683 	struct comm_reply rep;
684 	ssize_t rcv;
685 	int i;
686 	struct sldns_buffer *buffer;
687 
688 	rep.c = (struct comm_point*)arg;
689 	log_assert(rep.c->type == comm_udp);
690 
691 	if(!(event&UB_EV_READ))
692 		return;
693 	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
694 	ub_comm_base_now(rep.c->ev->base);
695 	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
696 		sldns_buffer_clear(rep.c->buffer);
697 		rep.addrlen = (socklen_t)sizeof(rep.addr);
698 		log_assert(fd != -1);
699 		log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
700 		rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
701 			sldns_buffer_remaining(rep.c->buffer), 0,
702 			(struct sockaddr*)&rep.addr, &rep.addrlen);
703 		if(rcv == -1) {
704 #ifndef USE_WINSOCK
705 			if(errno != EAGAIN && errno != EINTR)
706 				log_err("recvfrom %d failed: %s",
707 					fd, strerror(errno));
708 #else
709 			if(WSAGetLastError() != WSAEINPROGRESS &&
710 				WSAGetLastError() != WSAECONNRESET &&
711 				WSAGetLastError()!= WSAEWOULDBLOCK)
712 				log_err("recvfrom failed: %s",
713 					wsa_strerror(WSAGetLastError()));
714 #endif
715 			return;
716 		}
717 		sldns_buffer_skip(rep.c->buffer, rcv);
718 		sldns_buffer_flip(rep.c->buffer);
719 		rep.srctype = 0;
720 		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
721 		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
722 			/* send back immediate reply */
723 #ifdef USE_DNSCRYPT
724 			buffer = rep.c->dnscrypt_buffer;
725 #else
726 			buffer = rep.c->buffer;
727 #endif
728 			(void)comm_point_send_udp_msg(rep.c, buffer,
729 				(struct sockaddr*)&rep.addr, rep.addrlen);
730 		}
731 		if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
732 		another UDP port. Note rep.c cannot be reused with TCP fd. */
733 			break;
734 	}
735 }
736 
737 /** Use a new tcp handler for new query fd, set to read query */
738 static void
739 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max)
740 {
741 	int handler_usage;
742 	log_assert(c->type == comm_tcp);
743 	log_assert(c->fd == -1);
744 	sldns_buffer_clear(c->buffer);
745 #ifdef USE_DNSCRYPT
746 	if (c->dnscrypt)
747 		sldns_buffer_clear(c->dnscrypt_buffer);
748 #endif
749 	c->tcp_is_reading = 1;
750 	c->tcp_byte_count = 0;
751 	/* if more than half the tcp handlers are in use, use a shorter
752 	 * timeout for this TCP connection, we need to make space for
753 	 * other connections to be able to get attention */
754 	/* If > 50% TCP handler structures in use, set timeout to 1/100th
755 	 * 	configured value.
756 	 * If > 65%TCP handler structures in use, set to 1/500th configured
757 	 * 	value.
758 	 * If > 80% TCP handler structures in use, set to 0.
759 	 *
760 	 * If the timeout to use falls below 200 milliseconds, an actual
761 	 * timeout of 200ms is used.
762 	 */
763 	handler_usage = (cur * 100) / max;
764 	if(handler_usage > 50 && handler_usage <= 65)
765 		c->tcp_timeout_msec /= 100;
766 	else if (handler_usage > 65 && handler_usage <= 80)
767 		c->tcp_timeout_msec /= 500;
768 	else if (handler_usage > 80)
769 		c->tcp_timeout_msec = 0;
770 	comm_point_start_listening(c, fd,
771 		c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM
772 			? TCP_QUERY_TIMEOUT_MINIMUM
773 			: c->tcp_timeout_msec);
774 }
775 
776 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
777 	short ATTR_UNUSED(event), void* arg)
778 {
779 	struct comm_base* b = (struct comm_base*)arg;
780 	/* timeout for the slow accept, re-enable accepts again */
781 	if(b->start_accept) {
782 		verbose(VERB_ALGO, "wait is over, slow accept disabled");
783 		fptr_ok(fptr_whitelist_start_accept(b->start_accept));
784 		(*b->start_accept)(b->cb_arg);
785 		b->eb->slow_accept_enabled = 0;
786 	}
787 }
788 
789 int comm_point_perform_accept(struct comm_point* c,
790 	struct sockaddr_storage* addr, socklen_t* addrlen)
791 {
792 	int new_fd;
793 	*addrlen = (socklen_t)sizeof(*addr);
794 #ifndef HAVE_ACCEPT4
795 	new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
796 #else
797 	/* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
798 	new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
799 #endif
800 	if(new_fd == -1) {
801 #ifndef USE_WINSOCK
802 		/* EINTR is signal interrupt. others are closed connection. */
803 		if(	errno == EINTR || errno == EAGAIN
804 #ifdef EWOULDBLOCK
805 			|| errno == EWOULDBLOCK
806 #endif
807 #ifdef ECONNABORTED
808 			|| errno == ECONNABORTED
809 #endif
810 #ifdef EPROTO
811 			|| errno == EPROTO
812 #endif /* EPROTO */
813 			)
814 			return -1;
815 #if defined(ENFILE) && defined(EMFILE)
816 		if(errno == ENFILE || errno == EMFILE) {
817 			/* out of file descriptors, likely outside of our
818 			 * control. stop accept() calls for some time */
819 			if(c->ev->base->stop_accept) {
820 				struct comm_base* b = c->ev->base;
821 				struct timeval tv;
822 				verbose(VERB_ALGO, "out of file descriptors: "
823 					"slow accept");
824 				b->eb->slow_accept_enabled = 1;
825 				fptr_ok(fptr_whitelist_stop_accept(
826 					b->stop_accept));
827 				(*b->stop_accept)(b->cb_arg);
828 				/* set timeout, no mallocs */
829 				tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
830 				tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
831 				b->eb->slow_accept = ub_event_new(b->eb->base,
832 					-1, UB_EV_TIMEOUT,
833 					comm_base_handle_slow_accept, b);
834 				if(b->eb->slow_accept == NULL) {
835 					/* we do not want to log here, because
836 					 * that would spam the logfiles.
837 					 * error: "event_base_set failed." */
838 				}
839 				else if(ub_event_add(b->eb->slow_accept, &tv)
840 					!= 0) {
841 					/* we do not want to log here,
842 					 * error: "event_add failed." */
843 				}
844 			}
845 			return -1;
846 		}
847 #endif
848 		log_err_addr("accept failed", strerror(errno), addr, *addrlen);
849 #else /* USE_WINSOCK */
850 		if(WSAGetLastError() == WSAEINPROGRESS ||
851 			WSAGetLastError() == WSAECONNRESET)
852 			return -1;
853 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
854 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
855 			return -1;
856 		}
857 		log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
858 			addr, *addrlen);
859 #endif
860 		return -1;
861 	}
862 	if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
863 		c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
864 		if(!tcl_new_connection(c->tcl_addr)) {
865 			if(verbosity >= 3)
866 				log_err_addr("accept rejected",
867 				"connection limit exceeded", addr, *addrlen);
868 			close(new_fd);
869 			return -1;
870 		}
871 	}
872 #ifndef HAVE_ACCEPT4
873 	fd_set_nonblock(new_fd);
874 #endif
875 	return new_fd;
876 }
877 
878 #ifdef USE_WINSOCK
879 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
880         int ATTR_UNUSED(argi), long argl, long retvalue)
881 {
882 	int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
883 	verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
884 		(oper&BIO_CB_RETURN)?"return":"before",
885 		(oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
886 		wsa_err==WSAEWOULDBLOCK?"wsawb":"");
887 	/* on windows, check if previous operation caused EWOULDBLOCK */
888 	if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
889 		(oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
890 		if(wsa_err == WSAEWOULDBLOCK)
891 			ub_winsock_tcp_wouldblock((struct ub_event*)
892 				BIO_get_callback_arg(b), UB_EV_READ);
893 	}
894 	if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
895 		(oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
896 		if(wsa_err == WSAEWOULDBLOCK)
897 			ub_winsock_tcp_wouldblock((struct ub_event*)
898 				BIO_get_callback_arg(b), UB_EV_WRITE);
899 	}
900 	/* return original return value */
901 	return retvalue;
902 }
903 
904 /** set win bio callbacks for nonblocking operations */
905 void
906 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
907 {
908 	SSL* ssl = (SSL*)thessl;
909 	/* set them both just in case, but usually they are the same BIO */
910 	BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
911 	BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
912 	BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
913 	BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
914 }
915 #endif
916 
917 void
918 comm_point_tcp_accept_callback(int fd, short event, void* arg)
919 {
920 	struct comm_point* c = (struct comm_point*)arg, *c_hdl;
921 	int new_fd;
922 	log_assert(c->type == comm_tcp_accept);
923 	if(!(event & UB_EV_READ)) {
924 		log_info("ignoring tcp accept event %d", (int)event);
925 		return;
926 	}
927 	ub_comm_base_now(c->ev->base);
928 	/* find free tcp handler. */
929 	if(!c->tcp_free) {
930 		log_warn("accepted too many tcp, connections full");
931 		return;
932 	}
933 	/* accept incoming connection. */
934 	c_hdl = c->tcp_free;
935 	/* clear leftover flags from previous use, and then set the
936 	 * correct event base for the event structure for libevent */
937 	ub_event_free(c_hdl->ev->ev);
938 	c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, comm_point_tcp_handle_callback, c_hdl);
939 	if(!c_hdl->ev->ev) {
940 		log_warn("could not ub_event_new, dropped tcp");
941 		return;
942 	}
943 	log_assert(fd != -1);
944 	(void)fd;
945 	new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
946 		&c_hdl->repinfo.addrlen);
947 	if(new_fd == -1)
948 		return;
949 	if(c->ssl) {
950 		c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
951 		if(!c_hdl->ssl) {
952 			c_hdl->fd = new_fd;
953 			comm_point_close(c_hdl);
954 			return;
955 		}
956 		c_hdl->ssl_shake_state = comm_ssl_shake_read;
957 #ifdef USE_WINSOCK
958 		comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
959 #endif
960 	}
961 
962 	/* grab the tcp handler buffers */
963 	c->cur_tcp_count++;
964 	c->tcp_free = c_hdl->tcp_free;
965 	if(!c->tcp_free) {
966 		/* stop accepting incoming queries for now. */
967 		comm_point_stop_listening(c);
968 	}
969 	setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
970 }
971 
972 /** Make tcp handler free for next assignment */
973 static void
974 reclaim_tcp_handler(struct comm_point* c)
975 {
976 	log_assert(c->type == comm_tcp);
977 	if(c->ssl) {
978 #ifdef HAVE_SSL
979 		SSL_shutdown(c->ssl);
980 		SSL_free(c->ssl);
981 		c->ssl = NULL;
982 #endif
983 	}
984 	comm_point_close(c);
985 	if(c->tcp_parent) {
986 		c->tcp_parent->cur_tcp_count--;
987 		c->tcp_free = c->tcp_parent->tcp_free;
988 		c->tcp_parent->tcp_free = c;
989 		if(!c->tcp_free) {
990 			/* re-enable listening on accept socket */
991 			comm_point_start_listening(c->tcp_parent, -1, -1);
992 		}
993 	}
994 }
995 
996 /** do the callback when writing is done */
997 static void
998 tcp_callback_writer(struct comm_point* c)
999 {
1000 	log_assert(c->type == comm_tcp);
1001 	sldns_buffer_clear(c->buffer);
1002 	if(c->tcp_do_toggle_rw)
1003 		c->tcp_is_reading = 1;
1004 	c->tcp_byte_count = 0;
1005 	/* switch from listening(write) to listening(read) */
1006 	if(c->tcp_req_info) {
1007 		tcp_req_info_handle_writedone(c->tcp_req_info);
1008 	} else {
1009 		comm_point_stop_listening(c);
1010 		comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1011 	}
1012 }
1013 
1014 /** do the callback when reading is done */
1015 static void
1016 tcp_callback_reader(struct comm_point* c)
1017 {
1018 	log_assert(c->type == comm_tcp || c->type == comm_local);
1019 	sldns_buffer_flip(c->buffer);
1020 	if(c->tcp_do_toggle_rw)
1021 		c->tcp_is_reading = 0;
1022 	c->tcp_byte_count = 0;
1023 	if(c->tcp_req_info) {
1024 		tcp_req_info_handle_readdone(c->tcp_req_info);
1025 	} else {
1026 		if(c->type == comm_tcp)
1027 			comm_point_stop_listening(c);
1028 		fptr_ok(fptr_whitelist_comm_point(c->callback));
1029 		if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1030 			comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1031 		}
1032 	}
1033 }
1034 
1035 #ifdef HAVE_SSL
1036 /** true if the ssl handshake error has to be squelched from the logs */
1037 int
1038 squelch_err_ssl_handshake(unsigned long err)
1039 {
1040 	if(verbosity >= VERB_QUERY)
1041 		return 0; /* only squelch on low verbosity */
1042 	/* this is very specific, we could filter on ERR_GET_REASON()
1043 	 * (the third element in ERR_PACK) */
1044 	if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
1045 		err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
1046 		err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
1047 		err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
1048 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
1049 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
1050 #endif
1051 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
1052 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
1053 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
1054 #  ifdef SSL_R_VERSION_TOO_LOW
1055 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
1056 #  endif
1057 #endif
1058 		)
1059 		return 1;
1060 	return 0;
1061 }
1062 #endif /* HAVE_SSL */
1063 
1064 /** continue ssl handshake */
1065 #ifdef HAVE_SSL
1066 static int
1067 ssl_handshake(struct comm_point* c)
1068 {
1069 	int r;
1070 	if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1071 		/* read condition satisfied back to writing */
1072 		comm_point_listen_for_rw(c, 1, 1);
1073 		c->ssl_shake_state = comm_ssl_shake_none;
1074 		return 1;
1075 	}
1076 	if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1077 		/* write condition satisfied, back to reading */
1078 		comm_point_listen_for_rw(c, 1, 0);
1079 		c->ssl_shake_state = comm_ssl_shake_none;
1080 		return 1;
1081 	}
1082 
1083 	ERR_clear_error();
1084 	r = SSL_do_handshake(c->ssl);
1085 	if(r != 1) {
1086 		int want = SSL_get_error(c->ssl, r);
1087 		if(want == SSL_ERROR_WANT_READ) {
1088 			if(c->ssl_shake_state == comm_ssl_shake_read)
1089 				return 1;
1090 			c->ssl_shake_state = comm_ssl_shake_read;
1091 			comm_point_listen_for_rw(c, 1, 0);
1092 			return 1;
1093 		} else if(want == SSL_ERROR_WANT_WRITE) {
1094 			if(c->ssl_shake_state == comm_ssl_shake_write)
1095 				return 1;
1096 			c->ssl_shake_state = comm_ssl_shake_write;
1097 			comm_point_listen_for_rw(c, 0, 1);
1098 			return 1;
1099 		} else if(r == 0) {
1100 			return 0; /* closed */
1101 		} else if(want == SSL_ERROR_SYSCALL) {
1102 			/* SYSCALL and errno==0 means closed uncleanly */
1103 #ifdef EPIPE
1104 			if(errno == EPIPE && verbosity < 2)
1105 				return 0; /* silence 'broken pipe' */
1106 #endif
1107 #ifdef ECONNRESET
1108 			if(errno == ECONNRESET && verbosity < 2)
1109 				return 0; /* silence reset by peer */
1110 #endif
1111 			if(errno != 0)
1112 				log_err("SSL_handshake syscall: %s",
1113 					strerror(errno));
1114 			return 0;
1115 		} else {
1116 			unsigned long err = ERR_get_error();
1117 			if(!squelch_err_ssl_handshake(err)) {
1118 				log_crypto_err_code("ssl handshake failed", err);
1119 				log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr,
1120 					c->repinfo.addrlen);
1121 			}
1122 			return 0;
1123 		}
1124 	}
1125 	/* this is where peer verification could take place */
1126 	if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
1127 		/* verification */
1128 		if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
1129 			X509* x = SSL_get_peer_certificate(c->ssl);
1130 			if(!x) {
1131 				log_addr(VERB_ALGO, "SSL connection failed: "
1132 					"no certificate",
1133 					&c->repinfo.addr, c->repinfo.addrlen);
1134 				return 0;
1135 			}
1136 			log_cert(VERB_ALGO, "peer certificate", x);
1137 #ifdef HAVE_SSL_GET0_PEERNAME
1138 			if(SSL_get0_peername(c->ssl)) {
1139 				char buf[255];
1140 				snprintf(buf, sizeof(buf), "SSL connection "
1141 					"to %s authenticated",
1142 					SSL_get0_peername(c->ssl));
1143 				log_addr(VERB_ALGO, buf, &c->repinfo.addr,
1144 					c->repinfo.addrlen);
1145 			} else {
1146 #endif
1147 				log_addr(VERB_ALGO, "SSL connection "
1148 					"authenticated", &c->repinfo.addr,
1149 					c->repinfo.addrlen);
1150 #ifdef HAVE_SSL_GET0_PEERNAME
1151 			}
1152 #endif
1153 			X509_free(x);
1154 		} else {
1155 			X509* x = SSL_get_peer_certificate(c->ssl);
1156 			if(x) {
1157 				log_cert(VERB_ALGO, "peer certificate", x);
1158 				X509_free(x);
1159 			}
1160 			log_addr(VERB_ALGO, "SSL connection failed: "
1161 				"failed to authenticate",
1162 				&c->repinfo.addr, c->repinfo.addrlen);
1163 			return 0;
1164 		}
1165 	} else {
1166 		/* unauthenticated, the verify peer flag was not set
1167 		 * in c->ssl when the ssl object was created from ssl_ctx */
1168 		log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr,
1169 			c->repinfo.addrlen);
1170 	}
1171 
1172 	/* setup listen rw correctly */
1173 	if(c->tcp_is_reading) {
1174 		if(c->ssl_shake_state != comm_ssl_shake_read)
1175 			comm_point_listen_for_rw(c, 1, 0);
1176 	} else {
1177 		comm_point_listen_for_rw(c, 1, 1);
1178 	}
1179 	c->ssl_shake_state = comm_ssl_shake_none;
1180 	return 1;
1181 }
1182 #endif /* HAVE_SSL */
1183 
1184 /** ssl read callback on TCP */
1185 static int
1186 ssl_handle_read(struct comm_point* c)
1187 {
1188 #ifdef HAVE_SSL
1189 	int r;
1190 	if(c->ssl_shake_state != comm_ssl_shake_none) {
1191 		if(!ssl_handshake(c))
1192 			return 0;
1193 		if(c->ssl_shake_state != comm_ssl_shake_none)
1194 			return 1;
1195 	}
1196 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1197 		/* read length bytes */
1198 		ERR_clear_error();
1199 		if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1200 			c->tcp_byte_count), (int)(sizeof(uint16_t) -
1201 			c->tcp_byte_count))) <= 0) {
1202 			int want = SSL_get_error(c->ssl, r);
1203 			if(want == SSL_ERROR_ZERO_RETURN) {
1204 				if(c->tcp_req_info)
1205 					return tcp_req_info_handle_read_close(c->tcp_req_info);
1206 				return 0; /* shutdown, closed */
1207 			} else if(want == SSL_ERROR_WANT_READ) {
1208 				ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1209 				return 1; /* read more later */
1210 			} else if(want == SSL_ERROR_WANT_WRITE) {
1211 				c->ssl_shake_state = comm_ssl_shake_hs_write;
1212 				comm_point_listen_for_rw(c, 0, 1);
1213 				return 1;
1214 			} else if(want == SSL_ERROR_SYSCALL) {
1215 #ifdef ECONNRESET
1216 				if(errno == ECONNRESET && verbosity < 2)
1217 					return 0; /* silence reset by peer */
1218 #endif
1219 				if(errno != 0)
1220 					log_err("SSL_read syscall: %s",
1221 						strerror(errno));
1222 				return 0;
1223 			}
1224 			log_crypto_err("could not SSL_read");
1225 			return 0;
1226 		}
1227 		c->tcp_byte_count += r;
1228 		if(c->tcp_byte_count < sizeof(uint16_t))
1229 			return 1;
1230 		if(sldns_buffer_read_u16_at(c->buffer, 0) >
1231 			sldns_buffer_capacity(c->buffer)) {
1232 			verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1233 			return 0;
1234 		}
1235 		sldns_buffer_set_limit(c->buffer,
1236 			sldns_buffer_read_u16_at(c->buffer, 0));
1237 		if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1238 			verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1239 			return 0;
1240 		}
1241 		sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
1242 		verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1243 			(int)sldns_buffer_limit(c->buffer));
1244 	}
1245 	if(sldns_buffer_remaining(c->buffer) > 0) {
1246 		ERR_clear_error();
1247 		r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1248 			(int)sldns_buffer_remaining(c->buffer));
1249 		if(r <= 0) {
1250 			int want = SSL_get_error(c->ssl, r);
1251 			if(want == SSL_ERROR_ZERO_RETURN) {
1252 				if(c->tcp_req_info)
1253 					return tcp_req_info_handle_read_close(c->tcp_req_info);
1254 				return 0; /* shutdown, closed */
1255 			} else if(want == SSL_ERROR_WANT_READ) {
1256 				ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1257 				return 1; /* read more later */
1258 			} else if(want == SSL_ERROR_WANT_WRITE) {
1259 				c->ssl_shake_state = comm_ssl_shake_hs_write;
1260 				comm_point_listen_for_rw(c, 0, 1);
1261 				return 1;
1262 			} else if(want == SSL_ERROR_SYSCALL) {
1263 #ifdef ECONNRESET
1264 				if(errno == ECONNRESET && verbosity < 2)
1265 					return 0; /* silence reset by peer */
1266 #endif
1267 				if(errno != 0)
1268 					log_err("SSL_read syscall: %s",
1269 						strerror(errno));
1270 				return 0;
1271 			}
1272 			log_crypto_err("could not SSL_read");
1273 			return 0;
1274 		}
1275 		sldns_buffer_skip(c->buffer, (ssize_t)r);
1276 	}
1277 	if(sldns_buffer_remaining(c->buffer) <= 0) {
1278 		tcp_callback_reader(c);
1279 	}
1280 	return 1;
1281 #else
1282 	(void)c;
1283 	return 0;
1284 #endif /* HAVE_SSL */
1285 }
1286 
1287 /** ssl write callback on TCP */
1288 static int
1289 ssl_handle_write(struct comm_point* c)
1290 {
1291 #ifdef HAVE_SSL
1292 	int r;
1293 	if(c->ssl_shake_state != comm_ssl_shake_none) {
1294 		if(!ssl_handshake(c))
1295 			return 0;
1296 		if(c->ssl_shake_state != comm_ssl_shake_none)
1297 			return 1;
1298 	}
1299 	/* ignore return, if fails we may simply block */
1300 	(void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
1301 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1302 		uint16_t len = htons(sldns_buffer_limit(c->buffer));
1303 		ERR_clear_error();
1304 		if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
1305 			LDNS_RR_BUF_SIZE) {
1306 			/* combine the tcp length and the query for write,
1307 			 * this emulates writev */
1308 			uint8_t buf[LDNS_RR_BUF_SIZE];
1309 			memmove(buf, &len, sizeof(uint16_t));
1310 			memmove(buf+sizeof(uint16_t),
1311 				sldns_buffer_current(c->buffer),
1312 				sldns_buffer_remaining(c->buffer));
1313 			r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
1314 				(int)(sizeof(uint16_t)+
1315 				sldns_buffer_remaining(c->buffer)
1316 				- c->tcp_byte_count));
1317 		} else {
1318 			r = SSL_write(c->ssl,
1319 				(void*)(((uint8_t*)&len)+c->tcp_byte_count),
1320 				(int)(sizeof(uint16_t)-c->tcp_byte_count));
1321 		}
1322 		if(r <= 0) {
1323 			int want = SSL_get_error(c->ssl, r);
1324 			if(want == SSL_ERROR_ZERO_RETURN) {
1325 				return 0; /* closed */
1326 			} else if(want == SSL_ERROR_WANT_READ) {
1327 				c->ssl_shake_state = comm_ssl_shake_hs_read;
1328 				comm_point_listen_for_rw(c, 1, 0);
1329 				return 1; /* wait for read condition */
1330 			} else if(want == SSL_ERROR_WANT_WRITE) {
1331 				ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1332 				return 1; /* write more later */
1333 			} else if(want == SSL_ERROR_SYSCALL) {
1334 #ifdef EPIPE
1335 				if(errno == EPIPE && verbosity < 2)
1336 					return 0; /* silence 'broken pipe' */
1337 #endif
1338 				if(errno != 0)
1339 					log_err("SSL_write syscall: %s",
1340 						strerror(errno));
1341 				return 0;
1342 			}
1343 			log_crypto_err("could not SSL_write");
1344 			return 0;
1345 		}
1346 		c->tcp_byte_count += r;
1347 		if(c->tcp_byte_count < sizeof(uint16_t))
1348 			return 1;
1349 		sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1350 			sizeof(uint16_t));
1351 		if(sldns_buffer_remaining(c->buffer) == 0) {
1352 			tcp_callback_writer(c);
1353 			return 1;
1354 		}
1355 	}
1356 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1357 	ERR_clear_error();
1358 	r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1359 		(int)sldns_buffer_remaining(c->buffer));
1360 	if(r <= 0) {
1361 		int want = SSL_get_error(c->ssl, r);
1362 		if(want == SSL_ERROR_ZERO_RETURN) {
1363 			return 0; /* closed */
1364 		} else if(want == SSL_ERROR_WANT_READ) {
1365 			c->ssl_shake_state = comm_ssl_shake_hs_read;
1366 			comm_point_listen_for_rw(c, 1, 0);
1367 			return 1; /* wait for read condition */
1368 		} else if(want == SSL_ERROR_WANT_WRITE) {
1369 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1370 			return 1; /* write more later */
1371 		} else if(want == SSL_ERROR_SYSCALL) {
1372 #ifdef EPIPE
1373 			if(errno == EPIPE && verbosity < 2)
1374 				return 0; /* silence 'broken pipe' */
1375 #endif
1376 			if(errno != 0)
1377 				log_err("SSL_write syscall: %s",
1378 					strerror(errno));
1379 			return 0;
1380 		}
1381 		log_crypto_err("could not SSL_write");
1382 		return 0;
1383 	}
1384 	sldns_buffer_skip(c->buffer, (ssize_t)r);
1385 
1386 	if(sldns_buffer_remaining(c->buffer) == 0) {
1387 		tcp_callback_writer(c);
1388 	}
1389 	return 1;
1390 #else
1391 	(void)c;
1392 	return 0;
1393 #endif /* HAVE_SSL */
1394 }
1395 
1396 /** handle ssl tcp connection with dns contents */
1397 static int
1398 ssl_handle_it(struct comm_point* c)
1399 {
1400 	if(c->tcp_is_reading)
1401 		return ssl_handle_read(c);
1402 	return ssl_handle_write(c);
1403 }
1404 
1405 /** Handle tcp reading callback.
1406  * @param fd: file descriptor of socket.
1407  * @param c: comm point to read from into buffer.
1408  * @param short_ok: if true, very short packets are OK (for comm_local).
1409  * @return: 0 on error
1410  */
1411 static int
1412 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1413 {
1414 	ssize_t r;
1415 	log_assert(c->type == comm_tcp || c->type == comm_local);
1416 	if(c->ssl)
1417 		return ssl_handle_it(c);
1418 	if(!c->tcp_is_reading)
1419 		return 0;
1420 
1421 	log_assert(fd != -1);
1422 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1423 		/* read length bytes */
1424 		r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1425 			sizeof(uint16_t)-c->tcp_byte_count, 0);
1426 		if(r == 0) {
1427 			if(c->tcp_req_info)
1428 				return tcp_req_info_handle_read_close(c->tcp_req_info);
1429 			return 0;
1430 		} else if(r == -1) {
1431 #ifndef USE_WINSOCK
1432 			if(errno == EINTR || errno == EAGAIN)
1433 				return 1;
1434 #ifdef ECONNRESET
1435 			if(errno == ECONNRESET && verbosity < 2)
1436 				return 0; /* silence reset by peer */
1437 #endif
1438 			log_err_addr("read (in tcp s)", strerror(errno),
1439 				&c->repinfo.addr, c->repinfo.addrlen);
1440 #else /* USE_WINSOCK */
1441 			if(WSAGetLastError() == WSAECONNRESET)
1442 				return 0;
1443 			if(WSAGetLastError() == WSAEINPROGRESS)
1444 				return 1;
1445 			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1446 				ub_winsock_tcp_wouldblock(c->ev->ev,
1447 					UB_EV_READ);
1448 				return 1;
1449 			}
1450 			log_err_addr("read (in tcp s)",
1451 				wsa_strerror(WSAGetLastError()),
1452 				&c->repinfo.addr, c->repinfo.addrlen);
1453 #endif
1454 			return 0;
1455 		}
1456 		c->tcp_byte_count += r;
1457 		if(c->tcp_byte_count != sizeof(uint16_t))
1458 			return 1;
1459 		if(sldns_buffer_read_u16_at(c->buffer, 0) >
1460 			sldns_buffer_capacity(c->buffer)) {
1461 			verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1462 			return 0;
1463 		}
1464 		sldns_buffer_set_limit(c->buffer,
1465 			sldns_buffer_read_u16_at(c->buffer, 0));
1466 		if(!short_ok &&
1467 			sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1468 			verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1469 			return 0;
1470 		}
1471 		verbose(VERB_ALGO, "Reading tcp query of length %d",
1472 			(int)sldns_buffer_limit(c->buffer));
1473 	}
1474 
1475 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1476 	r = recv(fd, (void*)sldns_buffer_current(c->buffer),
1477 		sldns_buffer_remaining(c->buffer), 0);
1478 	if(r == 0) {
1479 		if(c->tcp_req_info)
1480 			return tcp_req_info_handle_read_close(c->tcp_req_info);
1481 		return 0;
1482 	} else if(r == -1) {
1483 #ifndef USE_WINSOCK
1484 		if(errno == EINTR || errno == EAGAIN)
1485 			return 1;
1486 		log_err_addr("read (in tcp r)", strerror(errno),
1487 			&c->repinfo.addr, c->repinfo.addrlen);
1488 #else /* USE_WINSOCK */
1489 		if(WSAGetLastError() == WSAECONNRESET)
1490 			return 0;
1491 		if(WSAGetLastError() == WSAEINPROGRESS)
1492 			return 1;
1493 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1494 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1495 			return 1;
1496 		}
1497 		log_err_addr("read (in tcp r)",
1498 			wsa_strerror(WSAGetLastError()),
1499 			&c->repinfo.addr, c->repinfo.addrlen);
1500 #endif
1501 		return 0;
1502 	}
1503 	sldns_buffer_skip(c->buffer, r);
1504 	if(sldns_buffer_remaining(c->buffer) <= 0) {
1505 		tcp_callback_reader(c);
1506 	}
1507 	return 1;
1508 }
1509 
1510 /**
1511  * Handle tcp writing callback.
1512  * @param fd: file descriptor of socket.
1513  * @param c: comm point to write buffer out of.
1514  * @return: 0 on error
1515  */
1516 static int
1517 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1518 {
1519 	ssize_t r;
1520 	struct sldns_buffer *buffer;
1521 	log_assert(c->type == comm_tcp);
1522 #ifdef USE_DNSCRYPT
1523 	buffer = c->dnscrypt_buffer;
1524 #else
1525 	buffer = c->buffer;
1526 #endif
1527 	if(c->tcp_is_reading && !c->ssl)
1528 		return 0;
1529 	log_assert(fd != -1);
1530 	if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1531 		/* check for pending error from nonblocking connect */
1532 		/* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1533 		int error = 0;
1534 		socklen_t len = (socklen_t)sizeof(error);
1535 		if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
1536 			&len) < 0){
1537 #ifndef USE_WINSOCK
1538 			error = errno; /* on solaris errno is error */
1539 #else /* USE_WINSOCK */
1540 			error = WSAGetLastError();
1541 #endif
1542 		}
1543 #ifndef USE_WINSOCK
1544 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1545 		if(error == EINPROGRESS || error == EWOULDBLOCK)
1546 			return 1; /* try again later */
1547 		else
1548 #endif
1549 		if(error != 0 && verbosity < 2)
1550 			return 0; /* silence lots of chatter in the logs */
1551                 else if(error != 0) {
1552 			log_err_addr("tcp connect", strerror(error),
1553 				&c->repinfo.addr, c->repinfo.addrlen);
1554 #else /* USE_WINSOCK */
1555 		/* examine error */
1556 		if(error == WSAEINPROGRESS)
1557 			return 1;
1558 		else if(error == WSAEWOULDBLOCK) {
1559 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1560 			return 1;
1561 		} else if(error != 0 && verbosity < 2)
1562 			return 0;
1563 		else if(error != 0) {
1564 			log_err_addr("tcp connect", wsa_strerror(error),
1565 				&c->repinfo.addr, c->repinfo.addrlen);
1566 #endif /* USE_WINSOCK */
1567 			return 0;
1568 		}
1569 	}
1570 	if(c->ssl)
1571 		return ssl_handle_it(c);
1572 
1573 #ifdef USE_MSG_FASTOPEN
1574 	/* Only try this on first use of a connection that uses tfo,
1575 	   otherwise fall through to normal write */
1576 	/* Also, TFO support on WINDOWS not implemented at the moment */
1577 	if(c->tcp_do_fastopen == 1) {
1578 		/* this form of sendmsg() does both a connect() and send() so need to
1579 		   look for various flavours of error*/
1580 		uint16_t len = htons(sldns_buffer_limit(buffer));
1581 		struct msghdr msg;
1582 		struct iovec iov[2];
1583 		c->tcp_do_fastopen = 0;
1584 		memset(&msg, 0, sizeof(msg));
1585 		iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1586 		iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1587 		iov[1].iov_base = sldns_buffer_begin(buffer);
1588 		iov[1].iov_len = sldns_buffer_limit(buffer);
1589 		log_assert(iov[0].iov_len > 0);
1590 		msg.msg_name = &c->repinfo.addr;
1591 		msg.msg_namelen = c->repinfo.addrlen;
1592 		msg.msg_iov = iov;
1593 		msg.msg_iovlen = 2;
1594 		r = sendmsg(fd, &msg, MSG_FASTOPEN);
1595 		if (r == -1) {
1596 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1597 			/* Handshake is underway, maybe because no TFO cookie available.
1598 			   Come back to write the message*/
1599 			if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1600 				return 1;
1601 #endif
1602 			if(errno == EINTR || errno == EAGAIN)
1603 				return 1;
1604 			/* Not handling EISCONN here as shouldn't ever hit that case.*/
1605 			if(errno != EPIPE && errno != 0 && verbosity < 2)
1606 				return 0; /* silence lots of chatter in the logs */
1607 			if(errno != EPIPE && errno != 0) {
1608 				log_err_addr("tcp sendmsg", strerror(errno),
1609 					&c->repinfo.addr, c->repinfo.addrlen);
1610 				return 0;
1611 			}
1612 			/* fallthrough to nonFASTOPEN
1613 			 * (MSG_FASTOPEN on Linux 3 produces EPIPE)
1614 			 * we need to perform connect() */
1615 			if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) {
1616 #ifdef EINPROGRESS
1617 				if(errno == EINPROGRESS)
1618 					return 1; /* wait until connect done*/
1619 #endif
1620 #ifdef USE_WINSOCK
1621 				if(WSAGetLastError() == WSAEINPROGRESS ||
1622 					WSAGetLastError() == WSAEWOULDBLOCK)
1623 					return 1; /* wait until connect done*/
1624 #endif
1625 				if(tcp_connect_errno_needs_log(
1626 					(struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) {
1627 					log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
1628 						strerror(errno), &c->repinfo.addr, c->repinfo.addrlen);
1629 				}
1630 				return 0;
1631 			}
1632 
1633 		} else {
1634 			c->tcp_byte_count += r;
1635 			if(c->tcp_byte_count < sizeof(uint16_t))
1636 				return 1;
1637 			sldns_buffer_set_position(buffer, c->tcp_byte_count -
1638 				sizeof(uint16_t));
1639 			if(sldns_buffer_remaining(buffer) == 0) {
1640 				tcp_callback_writer(c);
1641 				return 1;
1642 			}
1643 		}
1644 	}
1645 #endif /* USE_MSG_FASTOPEN */
1646 
1647 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1648 		uint16_t len = htons(sldns_buffer_limit(buffer));
1649 #ifdef HAVE_WRITEV
1650 		struct iovec iov[2];
1651 		iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1652 		iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1653 		iov[1].iov_base = sldns_buffer_begin(buffer);
1654 		iov[1].iov_len = sldns_buffer_limit(buffer);
1655 		log_assert(iov[0].iov_len > 0);
1656 		r = writev(fd, iov, 2);
1657 #else /* HAVE_WRITEV */
1658 		r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1659 			sizeof(uint16_t)-c->tcp_byte_count, 0);
1660 #endif /* HAVE_WRITEV */
1661 		if(r == -1) {
1662 #ifndef USE_WINSOCK
1663 #  ifdef EPIPE
1664                 	if(errno == EPIPE && verbosity < 2)
1665                         	return 0; /* silence 'broken pipe' */
1666   #endif
1667 			if(errno == EINTR || errno == EAGAIN)
1668 				return 1;
1669 #ifdef ECONNRESET
1670 			if(errno == ECONNRESET && verbosity < 2)
1671 				return 0; /* silence reset by peer */
1672 #endif
1673 #  ifdef HAVE_WRITEV
1674 			log_err_addr("tcp writev", strerror(errno),
1675 				&c->repinfo.addr, c->repinfo.addrlen);
1676 #  else /* HAVE_WRITEV */
1677 			log_err_addr("tcp send s", strerror(errno),
1678 				&c->repinfo.addr, c->repinfo.addrlen);
1679 #  endif /* HAVE_WRITEV */
1680 #else
1681 			if(WSAGetLastError() == WSAENOTCONN)
1682 				return 1;
1683 			if(WSAGetLastError() == WSAEINPROGRESS)
1684 				return 1;
1685 			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1686 				ub_winsock_tcp_wouldblock(c->ev->ev,
1687 					UB_EV_WRITE);
1688 				return 1;
1689 			}
1690 			if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
1691 				return 0; /* silence reset by peer */
1692 			log_err_addr("tcp send s",
1693 				wsa_strerror(WSAGetLastError()),
1694 				&c->repinfo.addr, c->repinfo.addrlen);
1695 #endif
1696 			return 0;
1697 		}
1698 		c->tcp_byte_count += r;
1699 		if(c->tcp_byte_count < sizeof(uint16_t))
1700 			return 1;
1701 		sldns_buffer_set_position(buffer, c->tcp_byte_count -
1702 			sizeof(uint16_t));
1703 		if(sldns_buffer_remaining(buffer) == 0) {
1704 			tcp_callback_writer(c);
1705 			return 1;
1706 		}
1707 	}
1708 	log_assert(sldns_buffer_remaining(buffer) > 0);
1709 	r = send(fd, (void*)sldns_buffer_current(buffer),
1710 		sldns_buffer_remaining(buffer), 0);
1711 	if(r == -1) {
1712 #ifndef USE_WINSOCK
1713 		if(errno == EINTR || errno == EAGAIN)
1714 			return 1;
1715 #ifdef ECONNRESET
1716 		if(errno == ECONNRESET && verbosity < 2)
1717 			return 0; /* silence reset by peer */
1718 #endif
1719 		log_err_addr("tcp send r", strerror(errno),
1720 			&c->repinfo.addr, c->repinfo.addrlen);
1721 #else
1722 		if(WSAGetLastError() == WSAEINPROGRESS)
1723 			return 1;
1724 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1725 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1726 			return 1;
1727 		}
1728 		if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
1729 			return 0; /* silence reset by peer */
1730 		log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1731 			&c->repinfo.addr, c->repinfo.addrlen);
1732 #endif
1733 		return 0;
1734 	}
1735 	sldns_buffer_skip(buffer, r);
1736 
1737 	if(sldns_buffer_remaining(buffer) == 0) {
1738 		tcp_callback_writer(c);
1739 	}
1740 
1741 	return 1;
1742 }
1743 
1744 /** read again to drain buffers when there could be more to read */
1745 static void
1746 tcp_req_info_read_again(int fd, struct comm_point* c)
1747 {
1748 	while(c->tcp_req_info->read_again) {
1749 		int r;
1750 		c->tcp_req_info->read_again = 0;
1751 		if(c->tcp_is_reading)
1752 			r = comm_point_tcp_handle_read(fd, c, 0);
1753 		else 	r = comm_point_tcp_handle_write(fd, c);
1754 		if(!r) {
1755 			reclaim_tcp_handler(c);
1756 			if(!c->tcp_do_close) {
1757 				fptr_ok(fptr_whitelist_comm_point(
1758 					c->callback));
1759 				(void)(*c->callback)(c, c->cb_arg,
1760 					NETEVENT_CLOSED, NULL);
1761 			}
1762 			return;
1763 		}
1764 	}
1765 }
1766 
1767 void
1768 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1769 {
1770 	struct comm_point* c = (struct comm_point*)arg;
1771 	log_assert(c->type == comm_tcp);
1772 	ub_comm_base_now(c->ev->base);
1773 
1774 #ifdef USE_DNSCRYPT
1775 	/* Initialize if this is a dnscrypt socket */
1776 	if(c->tcp_parent) {
1777 		c->dnscrypt = c->tcp_parent->dnscrypt;
1778 	}
1779 	if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
1780 		c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
1781 		if(!c->dnscrypt_buffer) {
1782 			log_err("Could not allocate dnscrypt buffer");
1783 			reclaim_tcp_handler(c);
1784 			if(!c->tcp_do_close) {
1785 				fptr_ok(fptr_whitelist_comm_point(
1786 					c->callback));
1787 				(void)(*c->callback)(c, c->cb_arg,
1788 					NETEVENT_CLOSED, NULL);
1789 			}
1790 			return;
1791 		}
1792 	}
1793 #endif
1794 
1795 	if(event&UB_EV_TIMEOUT) {
1796 		verbose(VERB_QUERY, "tcp took too long, dropped");
1797 		reclaim_tcp_handler(c);
1798 		if(!c->tcp_do_close) {
1799 			fptr_ok(fptr_whitelist_comm_point(c->callback));
1800 			(void)(*c->callback)(c, c->cb_arg,
1801 				NETEVENT_TIMEOUT, NULL);
1802 		}
1803 		return;
1804 	}
1805 	if(event&UB_EV_READ) {
1806 		int has_tcpq = (c->tcp_req_info != NULL);
1807 		if(!comm_point_tcp_handle_read(fd, c, 0)) {
1808 			reclaim_tcp_handler(c);
1809 			if(!c->tcp_do_close) {
1810 				fptr_ok(fptr_whitelist_comm_point(
1811 					c->callback));
1812 				(void)(*c->callback)(c, c->cb_arg,
1813 					NETEVENT_CLOSED, NULL);
1814 			}
1815 		}
1816 		if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
1817 			tcp_req_info_read_again(fd, c);
1818 		return;
1819 	}
1820 	if(event&UB_EV_WRITE) {
1821 		int has_tcpq = (c->tcp_req_info != NULL);
1822 		if(!comm_point_tcp_handle_write(fd, c)) {
1823 			reclaim_tcp_handler(c);
1824 			if(!c->tcp_do_close) {
1825 				fptr_ok(fptr_whitelist_comm_point(
1826 					c->callback));
1827 				(void)(*c->callback)(c, c->cb_arg,
1828 					NETEVENT_CLOSED, NULL);
1829 			}
1830 		}
1831 		if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
1832 			tcp_req_info_read_again(fd, c);
1833 		return;
1834 	}
1835 	log_err("Ignored event %d for tcphdl.", event);
1836 }
1837 
1838 /** Make http handler free for next assignment */
1839 static void
1840 reclaim_http_handler(struct comm_point* c)
1841 {
1842 	log_assert(c->type == comm_http);
1843 	if(c->ssl) {
1844 #ifdef HAVE_SSL
1845 		SSL_shutdown(c->ssl);
1846 		SSL_free(c->ssl);
1847 		c->ssl = NULL;
1848 #endif
1849 	}
1850 	comm_point_close(c);
1851 	if(c->tcp_parent) {
1852 		c->tcp_parent->cur_tcp_count--;
1853 		c->tcp_free = c->tcp_parent->tcp_free;
1854 		c->tcp_parent->tcp_free = c;
1855 		if(!c->tcp_free) {
1856 			/* re-enable listening on accept socket */
1857 			comm_point_start_listening(c->tcp_parent, -1, -1);
1858 		}
1859 	}
1860 }
1861 
1862 /** read more data for http (with ssl) */
1863 static int
1864 ssl_http_read_more(struct comm_point* c)
1865 {
1866 #ifdef HAVE_SSL
1867 	int r;
1868 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1869 	ERR_clear_error();
1870 	r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1871 		(int)sldns_buffer_remaining(c->buffer));
1872 	if(r <= 0) {
1873 		int want = SSL_get_error(c->ssl, r);
1874 		if(want == SSL_ERROR_ZERO_RETURN) {
1875 			return 0; /* shutdown, closed */
1876 		} else if(want == SSL_ERROR_WANT_READ) {
1877 			return 1; /* read more later */
1878 		} else if(want == SSL_ERROR_WANT_WRITE) {
1879 			c->ssl_shake_state = comm_ssl_shake_hs_write;
1880 			comm_point_listen_for_rw(c, 0, 1);
1881 			return 1;
1882 		} else if(want == SSL_ERROR_SYSCALL) {
1883 #ifdef ECONNRESET
1884 			if(errno == ECONNRESET && verbosity < 2)
1885 				return 0; /* silence reset by peer */
1886 #endif
1887 			if(errno != 0)
1888 				log_err("SSL_read syscall: %s",
1889 					strerror(errno));
1890 			return 0;
1891 		}
1892 		log_crypto_err("could not SSL_read");
1893 		return 0;
1894 	}
1895 	sldns_buffer_skip(c->buffer, (ssize_t)r);
1896 	return 1;
1897 #else
1898 	(void)c;
1899 	return 0;
1900 #endif /* HAVE_SSL */
1901 }
1902 
1903 /** read more data for http */
1904 static int
1905 http_read_more(int fd, struct comm_point* c)
1906 {
1907 	ssize_t r;
1908 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1909 	r = recv(fd, (void*)sldns_buffer_current(c->buffer),
1910 		sldns_buffer_remaining(c->buffer), 0);
1911 	if(r == 0) {
1912 		return 0;
1913 	} else if(r == -1) {
1914 #ifndef USE_WINSOCK
1915 		if(errno == EINTR || errno == EAGAIN)
1916 			return 1;
1917 		log_err_addr("read (in http r)", strerror(errno),
1918 			&c->repinfo.addr, c->repinfo.addrlen);
1919 #else /* USE_WINSOCK */
1920 		if(WSAGetLastError() == WSAECONNRESET)
1921 			return 0;
1922 		if(WSAGetLastError() == WSAEINPROGRESS)
1923 			return 1;
1924 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1925 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1926 			return 1;
1927 		}
1928 		log_err_addr("read (in http r)",
1929 			wsa_strerror(WSAGetLastError()),
1930 			&c->repinfo.addr, c->repinfo.addrlen);
1931 #endif
1932 		return 0;
1933 	}
1934 	sldns_buffer_skip(c->buffer, r);
1935 	return 1;
1936 }
1937 
1938 /** return true if http header has been read (one line complete) */
1939 static int
1940 http_header_done(sldns_buffer* buf)
1941 {
1942 	size_t i;
1943 	for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1944 		/* there was a \r before the \n, but we ignore that */
1945 		if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
1946 			return 1;
1947 	}
1948 	return 0;
1949 }
1950 
1951 /** return character string into buffer for header line, moves buffer
1952  * past that line and puts zero terminator into linefeed-newline */
1953 static char*
1954 http_header_line(sldns_buffer* buf)
1955 {
1956 	char* result = (char*)sldns_buffer_current(buf);
1957 	size_t i;
1958 	for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1959 		/* terminate the string on the \r */
1960 		if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
1961 			sldns_buffer_write_u8_at(buf, i, 0);
1962 		/* terminate on the \n and skip past the it and done */
1963 		if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
1964 			sldns_buffer_write_u8_at(buf, i, 0);
1965 			sldns_buffer_set_position(buf, i+1);
1966 			return result;
1967 		}
1968 	}
1969 	return NULL;
1970 }
1971 
1972 /** move unread buffer to start and clear rest for putting the rest into it */
1973 static void
1974 http_moveover_buffer(sldns_buffer* buf)
1975 {
1976 	size_t pos = sldns_buffer_position(buf);
1977 	size_t len = sldns_buffer_remaining(buf);
1978 	sldns_buffer_clear(buf);
1979 	memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
1980 	sldns_buffer_set_position(buf, len);
1981 }
1982 
1983 /** a http header is complete, process it */
1984 static int
1985 http_process_initial_header(struct comm_point* c)
1986 {
1987 	char* line = http_header_line(c->buffer);
1988 	if(!line) return 1;
1989 	verbose(VERB_ALGO, "http header: %s", line);
1990 	if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
1991 		/* check returncode */
1992 		if(line[9] != '2') {
1993 			verbose(VERB_ALGO, "http bad status %s", line+9);
1994 			return 0;
1995 		}
1996 	} else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
1997 		if(!c->http_is_chunked)
1998 			c->tcp_byte_count = (size_t)atoi(line+16);
1999 	} else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
2000 		c->tcp_byte_count = 0;
2001 		c->http_is_chunked = 1;
2002 	} else if(line[0] == 0) {
2003 		/* end of initial headers */
2004 		c->http_in_headers = 0;
2005 		if(c->http_is_chunked)
2006 			c->http_in_chunk_headers = 1;
2007 		/* remove header text from front of buffer
2008 		 * the buffer is going to be used to return the data segment
2009 		 * itself and we don't want the header to get returned
2010 		 * prepended with it */
2011 		http_moveover_buffer(c->buffer);
2012 		sldns_buffer_flip(c->buffer);
2013 		return 1;
2014 	}
2015 	/* ignore other headers */
2016 	return 1;
2017 }
2018 
2019 /** a chunk header is complete, process it, return 0=fail, 1=continue next
2020  * header line, 2=done with chunked transfer*/
2021 static int
2022 http_process_chunk_header(struct comm_point* c)
2023 {
2024 	char* line = http_header_line(c->buffer);
2025 	if(!line) return 1;
2026 	if(c->http_in_chunk_headers == 3) {
2027 		verbose(VERB_ALGO, "http chunk trailer: %s", line);
2028 		/* are we done ? */
2029 		if(line[0] == 0 && c->tcp_byte_count == 0) {
2030 			/* callback of http reader when NETEVENT_DONE,
2031 			 * end of data, with no data in buffer */
2032 			sldns_buffer_set_position(c->buffer, 0);
2033 			sldns_buffer_set_limit(c->buffer, 0);
2034 			fptr_ok(fptr_whitelist_comm_point(c->callback));
2035 			(void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2036 			/* return that we are done */
2037 			return 2;
2038 		}
2039 		if(line[0] == 0) {
2040 			/* continue with header of the next chunk */
2041 			c->http_in_chunk_headers = 1;
2042 			/* remove header text from front of buffer */
2043 			http_moveover_buffer(c->buffer);
2044 			sldns_buffer_flip(c->buffer);
2045 			return 1;
2046 		}
2047 		/* ignore further trail headers */
2048 		return 1;
2049 	}
2050 	verbose(VERB_ALGO, "http chunk header: %s", line);
2051 	if(c->http_in_chunk_headers == 1) {
2052 		/* read chunked start line */
2053 		char* end = NULL;
2054 		c->tcp_byte_count = (size_t)strtol(line, &end, 16);
2055 		if(end == line)
2056 			return 0;
2057 		c->http_in_chunk_headers = 0;
2058 		/* remove header text from front of buffer */
2059 		http_moveover_buffer(c->buffer);
2060 		sldns_buffer_flip(c->buffer);
2061 		if(c->tcp_byte_count == 0) {
2062 			/* done with chunks, process chunk_trailer lines */
2063 			c->http_in_chunk_headers = 3;
2064 		}
2065 		return 1;
2066 	}
2067 	/* ignore other headers */
2068 	return 1;
2069 }
2070 
2071 /** handle nonchunked data segment */
2072 static int
2073 http_nonchunk_segment(struct comm_point* c)
2074 {
2075 	/* c->buffer at position..limit has new data we read in.
2076 	 * the buffer itself is full of nonchunked data.
2077 	 * we are looking to read tcp_byte_count more data
2078 	 * and then the transfer is done. */
2079 	size_t remainbufferlen;
2080 	size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2081 	if(c->tcp_byte_count <= got_now) {
2082 		/* done, this is the last data fragment */
2083 		c->http_stored = 0;
2084 		sldns_buffer_set_position(c->buffer, 0);
2085 		fptr_ok(fptr_whitelist_comm_point(c->callback));
2086 		(void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2087 		return 1;
2088 	}
2089 	c->tcp_byte_count -= got_now;
2090 	/* if we have the buffer space,
2091 	 * read more data collected into the buffer */
2092 	remainbufferlen = sldns_buffer_capacity(c->buffer) -
2093 		sldns_buffer_limit(c->buffer);
2094 	if(remainbufferlen >= c->tcp_byte_count ||
2095 		remainbufferlen >= 2048) {
2096 		size_t total = sldns_buffer_limit(c->buffer);
2097 		sldns_buffer_clear(c->buffer);
2098 		sldns_buffer_set_position(c->buffer, total);
2099 		c->http_stored = total;
2100 		/* return and wait to read more */
2101 		return 1;
2102 	}
2103 	/* call callback with this data amount, then
2104 	 * wait for more */
2105 	c->http_stored = 0;
2106 	sldns_buffer_set_position(c->buffer, 0);
2107 	fptr_ok(fptr_whitelist_comm_point(c->callback));
2108 	(void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2109 	/* c->callback has to buffer_clear(c->buffer). */
2110 	/* return and wait to read more */
2111 	return 1;
2112 }
2113 
2114 /** handle nonchunked data segment, return 0=fail, 1=wait, 2=process more */
2115 static int
2116 http_chunked_segment(struct comm_point* c)
2117 {
2118 	/* the c->buffer has from position..limit new data we read. */
2119 	/* the current chunk has length tcp_byte_count.
2120 	 * once we read that read more chunk headers.
2121 	 */
2122 	size_t remainbufferlen;
2123 	size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2124 	if(c->tcp_byte_count <= got_now) {
2125 		/* the chunk has completed (with perhaps some extra data
2126 		 * from next chunk header and next chunk) */
2127 		/* save too much info into temp buffer */
2128 		size_t fraglen;
2129 		struct comm_reply repinfo;
2130 		c->http_stored = 0;
2131 		sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
2132 		sldns_buffer_clear(c->http_temp);
2133 		sldns_buffer_write(c->http_temp,
2134 			sldns_buffer_current(c->buffer),
2135 			sldns_buffer_remaining(c->buffer));
2136 		sldns_buffer_flip(c->http_temp);
2137 
2138 		/* callback with this fragment */
2139 		fraglen = sldns_buffer_position(c->buffer);
2140 		sldns_buffer_set_position(c->buffer, 0);
2141 		sldns_buffer_set_limit(c->buffer, fraglen);
2142 		repinfo = c->repinfo;
2143 		fptr_ok(fptr_whitelist_comm_point(c->callback));
2144 		(void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
2145 		/* c->callback has to buffer_clear(). */
2146 
2147 		/* is commpoint deleted? */
2148 		if(!repinfo.c) {
2149 			return 1;
2150 		}
2151 		/* copy waiting info */
2152 		sldns_buffer_clear(c->buffer);
2153 		sldns_buffer_write(c->buffer,
2154 			sldns_buffer_begin(c->http_temp),
2155 			sldns_buffer_remaining(c->http_temp));
2156 		sldns_buffer_flip(c->buffer);
2157 		/* process end of chunk trailer header lines, until
2158 		 * an empty line */
2159 		c->http_in_chunk_headers = 3;
2160 		/* process more data in buffer (if any) */
2161 		return 2;
2162 	}
2163 	c->tcp_byte_count -= got_now;
2164 
2165 	/* if we have the buffer space,
2166 	 * read more data collected into the buffer */
2167 	remainbufferlen = sldns_buffer_capacity(c->buffer) -
2168 		sldns_buffer_limit(c->buffer);
2169 	if(remainbufferlen >= c->tcp_byte_count ||
2170 		remainbufferlen >= 2048) {
2171 		size_t total = sldns_buffer_limit(c->buffer);
2172 		sldns_buffer_clear(c->buffer);
2173 		sldns_buffer_set_position(c->buffer, total);
2174 		c->http_stored = total;
2175 		/* return and wait to read more */
2176 		return 1;
2177 	}
2178 
2179 	/* callback of http reader for a new part of the data */
2180 	c->http_stored = 0;
2181 	sldns_buffer_set_position(c->buffer, 0);
2182 	fptr_ok(fptr_whitelist_comm_point(c->callback));
2183 	(void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2184 	/* c->callback has to buffer_clear(c->buffer). */
2185 	/* return and wait to read more */
2186 	return 1;
2187 }
2188 
2189 /**
2190  * Handle http reading callback.
2191  * @param fd: file descriptor of socket.
2192  * @param c: comm point to read from into buffer.
2193  * @return: 0 on error
2194  */
2195 static int
2196 comm_point_http_handle_read(int fd, struct comm_point* c)
2197 {
2198 	log_assert(c->type == comm_http);
2199 	log_assert(fd != -1);
2200 
2201 	/* if we are in ssl handshake, handle SSL handshake */
2202 #ifdef HAVE_SSL
2203 	if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2204 		if(!ssl_handshake(c))
2205 			return 0;
2206 		if(c->ssl_shake_state != comm_ssl_shake_none)
2207 			return 1;
2208 	}
2209 #endif /* HAVE_SSL */
2210 
2211 	if(!c->tcp_is_reading)
2212 		return 1;
2213 	/* read more data */
2214 	if(c->ssl) {
2215 		if(!ssl_http_read_more(c))
2216 			return 0;
2217 	} else {
2218 		if(!http_read_more(fd, c))
2219 			return 0;
2220 	}
2221 
2222 	sldns_buffer_flip(c->buffer);
2223 	while(sldns_buffer_remaining(c->buffer) > 0) {
2224 		/* if we are reading headers, read more headers */
2225 		if(c->http_in_headers || c->http_in_chunk_headers) {
2226 			/* if header is done, process the header */
2227 			if(!http_header_done(c->buffer)) {
2228 				/* copy remaining data to front of buffer
2229 				 * and set rest for writing into it */
2230 				http_moveover_buffer(c->buffer);
2231 				/* return and wait to read more */
2232 				return 1;
2233 			}
2234 			if(!c->http_in_chunk_headers) {
2235 				/* process initial headers */
2236 				if(!http_process_initial_header(c))
2237 					return 0;
2238 			} else {
2239 				/* process chunk headers */
2240 				int r = http_process_chunk_header(c);
2241 				if(r == 0) return 0;
2242 				if(r == 2) return 1; /* done */
2243 				/* r == 1, continue */
2244 			}
2245 			/* see if we have more to process */
2246 			continue;
2247 		}
2248 
2249 		if(!c->http_is_chunked) {
2250 			/* if we are reading nonchunks, process that*/
2251 			return http_nonchunk_segment(c);
2252 		} else {
2253 			/* if we are reading chunks, read the chunk */
2254 			int r = http_chunked_segment(c);
2255 			if(r == 0) return 0;
2256 			if(r == 1) return 1;
2257 			continue;
2258 		}
2259 	}
2260 	/* broke out of the loop; could not process header instead need
2261 	 * to read more */
2262 	/* moveover any remaining data and read more data */
2263 	http_moveover_buffer(c->buffer);
2264 	/* return and wait to read more */
2265 	return 1;
2266 }
2267 
2268 /** check pending connect for http */
2269 static int
2270 http_check_connect(int fd, struct comm_point* c)
2271 {
2272 	/* check for pending error from nonblocking connect */
2273 	/* from Stevens, unix network programming, vol1, 3rd ed, p450*/
2274 	int error = 0;
2275 	socklen_t len = (socklen_t)sizeof(error);
2276 	if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
2277 		&len) < 0){
2278 #ifndef USE_WINSOCK
2279 		error = errno; /* on solaris errno is error */
2280 #else /* USE_WINSOCK */
2281 		error = WSAGetLastError();
2282 #endif
2283 	}
2284 #ifndef USE_WINSOCK
2285 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
2286 	if(error == EINPROGRESS || error == EWOULDBLOCK)
2287 		return 1; /* try again later */
2288 	else
2289 #endif
2290 	if(error != 0 && verbosity < 2)
2291 		return 0; /* silence lots of chatter in the logs */
2292 	else if(error != 0) {
2293 		log_err_addr("http connect", strerror(error),
2294 			&c->repinfo.addr, c->repinfo.addrlen);
2295 #else /* USE_WINSOCK */
2296 	/* examine error */
2297 	if(error == WSAEINPROGRESS)
2298 		return 1;
2299 	else if(error == WSAEWOULDBLOCK) {
2300 		ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2301 		return 1;
2302 	} else if(error != 0 && verbosity < 2)
2303 		return 0;
2304 	else if(error != 0) {
2305 		log_err_addr("http connect", wsa_strerror(error),
2306 			&c->repinfo.addr, c->repinfo.addrlen);
2307 #endif /* USE_WINSOCK */
2308 		return 0;
2309 	}
2310 	/* keep on processing this socket */
2311 	return 2;
2312 }
2313 
2314 /** write more data for http (with ssl) */
2315 static int
2316 ssl_http_write_more(struct comm_point* c)
2317 {
2318 #ifdef HAVE_SSL
2319 	int r;
2320 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
2321 	ERR_clear_error();
2322 	r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
2323 		(int)sldns_buffer_remaining(c->buffer));
2324 	if(r <= 0) {
2325 		int want = SSL_get_error(c->ssl, r);
2326 		if(want == SSL_ERROR_ZERO_RETURN) {
2327 			return 0; /* closed */
2328 		} else if(want == SSL_ERROR_WANT_READ) {
2329 			c->ssl_shake_state = comm_ssl_shake_hs_read;
2330 			comm_point_listen_for_rw(c, 1, 0);
2331 			return 1; /* wait for read condition */
2332 		} else if(want == SSL_ERROR_WANT_WRITE) {
2333 			return 1; /* write more later */
2334 		} else if(want == SSL_ERROR_SYSCALL) {
2335 #ifdef EPIPE
2336 			if(errno == EPIPE && verbosity < 2)
2337 				return 0; /* silence 'broken pipe' */
2338 #endif
2339 			if(errno != 0)
2340 				log_err("SSL_write syscall: %s",
2341 					strerror(errno));
2342 			return 0;
2343 		}
2344 		log_crypto_err("could not SSL_write");
2345 		return 0;
2346 	}
2347 	sldns_buffer_skip(c->buffer, (ssize_t)r);
2348 	return 1;
2349 #else
2350 	(void)c;
2351 	return 0;
2352 #endif /* HAVE_SSL */
2353 }
2354 
2355 /** write more data for http */
2356 static int
2357 http_write_more(int fd, struct comm_point* c)
2358 {
2359 	ssize_t r;
2360 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
2361 	r = send(fd, (void*)sldns_buffer_current(c->buffer),
2362 		sldns_buffer_remaining(c->buffer), 0);
2363 	if(r == -1) {
2364 #ifndef USE_WINSOCK
2365 		if(errno == EINTR || errno == EAGAIN)
2366 			return 1;
2367 		log_err_addr("http send r", strerror(errno),
2368 			&c->repinfo.addr, c->repinfo.addrlen);
2369 #else
2370 		if(WSAGetLastError() == WSAEINPROGRESS)
2371 			return 1;
2372 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
2373 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2374 			return 1;
2375 		}
2376 		log_err_addr("http send r", wsa_strerror(WSAGetLastError()),
2377 			&c->repinfo.addr, c->repinfo.addrlen);
2378 #endif
2379 		return 0;
2380 	}
2381 	sldns_buffer_skip(c->buffer, r);
2382 	return 1;
2383 }
2384 
2385 /**
2386  * Handle http writing callback.
2387  * @param fd: file descriptor of socket.
2388  * @param c: comm point to write buffer out of.
2389  * @return: 0 on error
2390  */
2391 static int
2392 comm_point_http_handle_write(int fd, struct comm_point* c)
2393 {
2394 	log_assert(c->type == comm_http);
2395 	log_assert(fd != -1);
2396 
2397 	/* check pending connect errors, if that fails, we wait for more,
2398 	 * or we can continue to write contents */
2399 	if(c->tcp_check_nb_connect) {
2400 		int r = http_check_connect(fd, c);
2401 		if(r == 0) return 0;
2402 		if(r == 1) return 1;
2403 		c->tcp_check_nb_connect = 0;
2404 	}
2405 	/* if we are in ssl handshake, handle SSL handshake */
2406 #ifdef HAVE_SSL
2407 	if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2408 		if(!ssl_handshake(c))
2409 			return 0;
2410 		if(c->ssl_shake_state != comm_ssl_shake_none)
2411 			return 1;
2412 	}
2413 #endif /* HAVE_SSL */
2414 	if(c->tcp_is_reading)
2415 		return 1;
2416 	/* if we are writing, write more */
2417 	if(c->ssl) {
2418 		if(!ssl_http_write_more(c))
2419 			return 0;
2420 	} else {
2421 		if(!http_write_more(fd, c))
2422 			return 0;
2423 	}
2424 
2425 	/* we write a single buffer contents, that can contain
2426 	 * the http request, and then flip to read the results */
2427 	/* see if write is done */
2428 	if(sldns_buffer_remaining(c->buffer) == 0) {
2429 		sldns_buffer_clear(c->buffer);
2430 		if(c->tcp_do_toggle_rw)
2431 			c->tcp_is_reading = 1;
2432 		c->tcp_byte_count = 0;
2433 		/* switch from listening(write) to listening(read) */
2434 		comm_point_stop_listening(c);
2435 		comm_point_start_listening(c, -1, -1);
2436 	}
2437 	return 1;
2438 }
2439 
2440 void
2441 comm_point_http_handle_callback(int fd, short event, void* arg)
2442 {
2443 	struct comm_point* c = (struct comm_point*)arg;
2444 	log_assert(c->type == comm_http);
2445 	ub_comm_base_now(c->ev->base);
2446 
2447 	if(event&UB_EV_TIMEOUT) {
2448 		verbose(VERB_QUERY, "http took too long, dropped");
2449 		reclaim_http_handler(c);
2450 		if(!c->tcp_do_close) {
2451 			fptr_ok(fptr_whitelist_comm_point(c->callback));
2452 			(void)(*c->callback)(c, c->cb_arg,
2453 				NETEVENT_TIMEOUT, NULL);
2454 		}
2455 		return;
2456 	}
2457 	if(event&UB_EV_READ) {
2458 		if(!comm_point_http_handle_read(fd, c)) {
2459 			reclaim_http_handler(c);
2460 			if(!c->tcp_do_close) {
2461 				fptr_ok(fptr_whitelist_comm_point(
2462 					c->callback));
2463 				(void)(*c->callback)(c, c->cb_arg,
2464 					NETEVENT_CLOSED, NULL);
2465 			}
2466 		}
2467 		return;
2468 	}
2469 	if(event&UB_EV_WRITE) {
2470 		if(!comm_point_http_handle_write(fd, c)) {
2471 			reclaim_http_handler(c);
2472 			if(!c->tcp_do_close) {
2473 				fptr_ok(fptr_whitelist_comm_point(
2474 					c->callback));
2475 				(void)(*c->callback)(c, c->cb_arg,
2476 					NETEVENT_CLOSED, NULL);
2477 			}
2478 		}
2479 		return;
2480 	}
2481 	log_err("Ignored event %d for httphdl.", event);
2482 }
2483 
2484 void comm_point_local_handle_callback(int fd, short event, void* arg)
2485 {
2486 	struct comm_point* c = (struct comm_point*)arg;
2487 	log_assert(c->type == comm_local);
2488 	ub_comm_base_now(c->ev->base);
2489 
2490 	if(event&UB_EV_READ) {
2491 		if(!comm_point_tcp_handle_read(fd, c, 1)) {
2492 			fptr_ok(fptr_whitelist_comm_point(c->callback));
2493 			(void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
2494 				NULL);
2495 		}
2496 		return;
2497 	}
2498 	log_err("Ignored event %d for localhdl.", event);
2499 }
2500 
2501 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
2502 	short event, void* arg)
2503 {
2504 	struct comm_point* c = (struct comm_point*)arg;
2505 	int err = NETEVENT_NOERROR;
2506 	log_assert(c->type == comm_raw);
2507 	ub_comm_base_now(c->ev->base);
2508 
2509 	if(event&UB_EV_TIMEOUT)
2510 		err = NETEVENT_TIMEOUT;
2511 	fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
2512 	(void)(*c->callback)(c, c->cb_arg, err, NULL);
2513 }
2514 
2515 struct comm_point*
2516 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
2517 	comm_point_callback_type* callback, void* callback_arg)
2518 {
2519 	struct comm_point* c = (struct comm_point*)calloc(1,
2520 		sizeof(struct comm_point));
2521 	short evbits;
2522 	if(!c)
2523 		return NULL;
2524 	c->ev = (struct internal_event*)calloc(1,
2525 		sizeof(struct internal_event));
2526 	if(!c->ev) {
2527 		free(c);
2528 		return NULL;
2529 	}
2530 	c->ev->base = base;
2531 	c->fd = fd;
2532 	c->buffer = buffer;
2533 	c->timeout = NULL;
2534 	c->tcp_is_reading = 0;
2535 	c->tcp_byte_count = 0;
2536 	c->tcp_parent = NULL;
2537 	c->max_tcp_count = 0;
2538 	c->cur_tcp_count = 0;
2539 	c->tcp_handlers = NULL;
2540 	c->tcp_free = NULL;
2541 	c->type = comm_udp;
2542 	c->tcp_do_close = 0;
2543 	c->do_not_close = 0;
2544 	c->tcp_do_toggle_rw = 0;
2545 	c->tcp_check_nb_connect = 0;
2546 #ifdef USE_MSG_FASTOPEN
2547 	c->tcp_do_fastopen = 0;
2548 #endif
2549 #ifdef USE_DNSCRYPT
2550 	c->dnscrypt = 0;
2551 	c->dnscrypt_buffer = buffer;
2552 #endif
2553 	c->inuse = 0;
2554 	c->callback = callback;
2555 	c->cb_arg = callback_arg;
2556 	evbits = UB_EV_READ | UB_EV_PERSIST;
2557 	/* ub_event stuff */
2558 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2559 		comm_point_udp_callback, c);
2560 	if(c->ev->ev == NULL) {
2561 		log_err("could not baseset udp event");
2562 		comm_point_delete(c);
2563 		return NULL;
2564 	}
2565 	if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2566 		log_err("could not add udp event");
2567 		comm_point_delete(c);
2568 		return NULL;
2569 	}
2570 	return c;
2571 }
2572 
2573 struct comm_point*
2574 comm_point_create_udp_ancil(struct comm_base *base, int fd,
2575 	sldns_buffer* buffer,
2576 	comm_point_callback_type* callback, void* callback_arg)
2577 {
2578 	struct comm_point* c = (struct comm_point*)calloc(1,
2579 		sizeof(struct comm_point));
2580 	short evbits;
2581 	if(!c)
2582 		return NULL;
2583 	c->ev = (struct internal_event*)calloc(1,
2584 		sizeof(struct internal_event));
2585 	if(!c->ev) {
2586 		free(c);
2587 		return NULL;
2588 	}
2589 	c->ev->base = base;
2590 	c->fd = fd;
2591 	c->buffer = buffer;
2592 	c->timeout = NULL;
2593 	c->tcp_is_reading = 0;
2594 	c->tcp_byte_count = 0;
2595 	c->tcp_parent = NULL;
2596 	c->max_tcp_count = 0;
2597 	c->cur_tcp_count = 0;
2598 	c->tcp_handlers = NULL;
2599 	c->tcp_free = NULL;
2600 	c->type = comm_udp;
2601 	c->tcp_do_close = 0;
2602 	c->do_not_close = 0;
2603 #ifdef USE_DNSCRYPT
2604 	c->dnscrypt = 0;
2605 	c->dnscrypt_buffer = buffer;
2606 #endif
2607 	c->inuse = 0;
2608 	c->tcp_do_toggle_rw = 0;
2609 	c->tcp_check_nb_connect = 0;
2610 #ifdef USE_MSG_FASTOPEN
2611 	c->tcp_do_fastopen = 0;
2612 #endif
2613 	c->callback = callback;
2614 	c->cb_arg = callback_arg;
2615 	evbits = UB_EV_READ | UB_EV_PERSIST;
2616 	/* ub_event stuff */
2617 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2618 		comm_point_udp_ancil_callback, c);
2619 	if(c->ev->ev == NULL) {
2620 		log_err("could not baseset udp event");
2621 		comm_point_delete(c);
2622 		return NULL;
2623 	}
2624 	if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2625 		log_err("could not add udp event");
2626 		comm_point_delete(c);
2627 		return NULL;
2628 	}
2629 	return c;
2630 }
2631 
2632 static struct comm_point*
2633 comm_point_create_tcp_handler(struct comm_base *base,
2634 	struct comm_point* parent, size_t bufsize,
2635 	struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
2636 	void* callback_arg)
2637 {
2638 	struct comm_point* c = (struct comm_point*)calloc(1,
2639 		sizeof(struct comm_point));
2640 	short evbits;
2641 	if(!c)
2642 		return NULL;
2643 	c->ev = (struct internal_event*)calloc(1,
2644 		sizeof(struct internal_event));
2645 	if(!c->ev) {
2646 		free(c);
2647 		return NULL;
2648 	}
2649 	c->ev->base = base;
2650 	c->fd = -1;
2651 	c->buffer = sldns_buffer_new(bufsize);
2652 	if(!c->buffer) {
2653 		free(c->ev);
2654 		free(c);
2655 		return NULL;
2656 	}
2657 	c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
2658 	if(!c->timeout) {
2659 		sldns_buffer_free(c->buffer);
2660 		free(c->ev);
2661 		free(c);
2662 		return NULL;
2663 	}
2664 	c->tcp_is_reading = 0;
2665 	c->tcp_byte_count = 0;
2666 	c->tcp_parent = parent;
2667 	c->tcp_timeout_msec = parent->tcp_timeout_msec;
2668 	c->tcp_conn_limit = parent->tcp_conn_limit;
2669 	c->tcl_addr = NULL;
2670 	c->tcp_keepalive = 0;
2671 	c->max_tcp_count = 0;
2672 	c->cur_tcp_count = 0;
2673 	c->tcp_handlers = NULL;
2674 	c->tcp_free = NULL;
2675 	c->type = comm_tcp;
2676 	c->tcp_do_close = 0;
2677 	c->do_not_close = 0;
2678 	c->tcp_do_toggle_rw = 1;
2679 	c->tcp_check_nb_connect = 0;
2680 #ifdef USE_MSG_FASTOPEN
2681 	c->tcp_do_fastopen = 0;
2682 #endif
2683 #ifdef USE_DNSCRYPT
2684 	c->dnscrypt = 0;
2685 	/* We don't know just yet if this is a dnscrypt channel. Allocation
2686 	 * will be done when handling the callback. */
2687 	c->dnscrypt_buffer = c->buffer;
2688 #endif
2689 	c->repinfo.c = c;
2690 	c->callback = callback;
2691 	c->cb_arg = callback_arg;
2692 	if(spoolbuf) {
2693 		c->tcp_req_info = tcp_req_info_create(spoolbuf);
2694 		if(!c->tcp_req_info) {
2695 			log_err("could not create tcp commpoint");
2696 			sldns_buffer_free(c->buffer);
2697 			free(c->timeout);
2698 			free(c->ev);
2699 			free(c);
2700 			return NULL;
2701 		}
2702 		c->tcp_req_info->cp = c;
2703 		c->tcp_do_close = 1;
2704 		c->tcp_do_toggle_rw = 0;
2705 	}
2706 	/* add to parent free list */
2707 	c->tcp_free = parent->tcp_free;
2708 	parent->tcp_free = c;
2709 	/* ub_event stuff */
2710 	evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
2711 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2712 		comm_point_tcp_handle_callback, c);
2713 	if(c->ev->ev == NULL)
2714 	{
2715 		log_err("could not basetset tcphdl event");
2716 		parent->tcp_free = c->tcp_free;
2717 		tcp_req_info_delete(c->tcp_req_info);
2718 		sldns_buffer_free(c->buffer);
2719 		free(c->timeout);
2720 		free(c->ev);
2721 		free(c);
2722 		return NULL;
2723 	}
2724 	return c;
2725 }
2726 
2727 struct comm_point*
2728 comm_point_create_tcp(struct comm_base *base, int fd, int num,
2729 	int idle_timeout, struct tcl_list* tcp_conn_limit, size_t bufsize,
2730 	struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
2731 	void* callback_arg)
2732 {
2733 	struct comm_point* c = (struct comm_point*)calloc(1,
2734 		sizeof(struct comm_point));
2735 	short evbits;
2736 	int i;
2737 	/* first allocate the TCP accept listener */
2738 	if(!c)
2739 		return NULL;
2740 	c->ev = (struct internal_event*)calloc(1,
2741 		sizeof(struct internal_event));
2742 	if(!c->ev) {
2743 		free(c);
2744 		return NULL;
2745 	}
2746 	c->ev->base = base;
2747 	c->fd = fd;
2748 	c->buffer = NULL;
2749 	c->timeout = NULL;
2750 	c->tcp_is_reading = 0;
2751 	c->tcp_byte_count = 0;
2752 	c->tcp_timeout_msec = idle_timeout;
2753 	c->tcp_conn_limit = tcp_conn_limit;
2754 	c->tcl_addr = NULL;
2755 	c->tcp_keepalive = 0;
2756 	c->tcp_parent = NULL;
2757 	c->max_tcp_count = num;
2758 	c->cur_tcp_count = 0;
2759 	c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
2760 		sizeof(struct comm_point*));
2761 	if(!c->tcp_handlers) {
2762 		free(c->ev);
2763 		free(c);
2764 		return NULL;
2765 	}
2766 	c->tcp_free = NULL;
2767 	c->type = comm_tcp_accept;
2768 	c->tcp_do_close = 0;
2769 	c->do_not_close = 0;
2770 	c->tcp_do_toggle_rw = 0;
2771 	c->tcp_check_nb_connect = 0;
2772 #ifdef USE_MSG_FASTOPEN
2773 	c->tcp_do_fastopen = 0;
2774 #endif
2775 #ifdef USE_DNSCRYPT
2776 	c->dnscrypt = 0;
2777 	c->dnscrypt_buffer = NULL;
2778 #endif
2779 	c->callback = NULL;
2780 	c->cb_arg = NULL;
2781 	evbits = UB_EV_READ | UB_EV_PERSIST;
2782 	/* ub_event stuff */
2783 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2784 		comm_point_tcp_accept_callback, c);
2785 	if(c->ev->ev == NULL) {
2786 		log_err("could not baseset tcpacc event");
2787 		comm_point_delete(c);
2788 		return NULL;
2789 	}
2790 	if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2791 		log_err("could not add tcpacc event");
2792 		comm_point_delete(c);
2793 		return NULL;
2794 	}
2795 	/* now prealloc the tcp handlers */
2796 	for(i=0; i<num; i++) {
2797 		c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
2798 			c, bufsize, spoolbuf, callback, callback_arg);
2799 		if(!c->tcp_handlers[i]) {
2800 			comm_point_delete(c);
2801 			return NULL;
2802 		}
2803 	}
2804 
2805 	return c;
2806 }
2807 
2808 struct comm_point*
2809 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
2810         comm_point_callback_type* callback, void* callback_arg)
2811 {
2812 	struct comm_point* c = (struct comm_point*)calloc(1,
2813 		sizeof(struct comm_point));
2814 	short evbits;
2815 	if(!c)
2816 		return NULL;
2817 	c->ev = (struct internal_event*)calloc(1,
2818 		sizeof(struct internal_event));
2819 	if(!c->ev) {
2820 		free(c);
2821 		return NULL;
2822 	}
2823 	c->ev->base = base;
2824 	c->fd = -1;
2825 	c->buffer = sldns_buffer_new(bufsize);
2826 	if(!c->buffer) {
2827 		free(c->ev);
2828 		free(c);
2829 		return NULL;
2830 	}
2831 	c->timeout = NULL;
2832 	c->tcp_is_reading = 0;
2833 	c->tcp_byte_count = 0;
2834 	c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
2835 	c->tcp_conn_limit = NULL;
2836 	c->tcl_addr = NULL;
2837 	c->tcp_keepalive = 0;
2838 	c->tcp_parent = NULL;
2839 	c->max_tcp_count = 0;
2840 	c->cur_tcp_count = 0;
2841 	c->tcp_handlers = NULL;
2842 	c->tcp_free = NULL;
2843 	c->type = comm_tcp;
2844 	c->tcp_do_close = 0;
2845 	c->do_not_close = 0;
2846 	c->tcp_do_toggle_rw = 1;
2847 	c->tcp_check_nb_connect = 1;
2848 #ifdef USE_MSG_FASTOPEN
2849 	c->tcp_do_fastopen = 1;
2850 #endif
2851 #ifdef USE_DNSCRYPT
2852 	c->dnscrypt = 0;
2853 	c->dnscrypt_buffer = c->buffer;
2854 #endif
2855 	c->repinfo.c = c;
2856 	c->callback = callback;
2857 	c->cb_arg = callback_arg;
2858 	evbits = UB_EV_PERSIST | UB_EV_WRITE;
2859 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2860 		comm_point_tcp_handle_callback, c);
2861 	if(c->ev->ev == NULL)
2862 	{
2863 		log_err("could not baseset tcpout event");
2864 		sldns_buffer_free(c->buffer);
2865 		free(c->ev);
2866 		free(c);
2867 		return NULL;
2868 	}
2869 
2870 	return c;
2871 }
2872 
2873 struct comm_point*
2874 comm_point_create_http_out(struct comm_base *base, size_t bufsize,
2875         comm_point_callback_type* callback, void* callback_arg,
2876 	sldns_buffer* temp)
2877 {
2878 	struct comm_point* c = (struct comm_point*)calloc(1,
2879 		sizeof(struct comm_point));
2880 	short evbits;
2881 	if(!c)
2882 		return NULL;
2883 	c->ev = (struct internal_event*)calloc(1,
2884 		sizeof(struct internal_event));
2885 	if(!c->ev) {
2886 		free(c);
2887 		return NULL;
2888 	}
2889 	c->ev->base = base;
2890 	c->fd = -1;
2891 	c->buffer = sldns_buffer_new(bufsize);
2892 	if(!c->buffer) {
2893 		free(c->ev);
2894 		free(c);
2895 		return NULL;
2896 	}
2897 	c->timeout = NULL;
2898 	c->tcp_is_reading = 0;
2899 	c->tcp_byte_count = 0;
2900 	c->tcp_parent = NULL;
2901 	c->max_tcp_count = 0;
2902 	c->cur_tcp_count = 0;
2903 	c->tcp_handlers = NULL;
2904 	c->tcp_free = NULL;
2905 	c->type = comm_http;
2906 	c->tcp_do_close = 0;
2907 	c->do_not_close = 0;
2908 	c->tcp_do_toggle_rw = 1;
2909 	c->tcp_check_nb_connect = 1;
2910 	c->http_in_headers = 1;
2911 	c->http_in_chunk_headers = 0;
2912 	c->http_is_chunked = 0;
2913 	c->http_temp = temp;
2914 #ifdef USE_MSG_FASTOPEN
2915 	c->tcp_do_fastopen = 1;
2916 #endif
2917 #ifdef USE_DNSCRYPT
2918 	c->dnscrypt = 0;
2919 	c->dnscrypt_buffer = c->buffer;
2920 #endif
2921 	c->repinfo.c = c;
2922 	c->callback = callback;
2923 	c->cb_arg = callback_arg;
2924 	evbits = UB_EV_PERSIST | UB_EV_WRITE;
2925 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2926 		comm_point_http_handle_callback, c);
2927 	if(c->ev->ev == NULL)
2928 	{
2929 		log_err("could not baseset tcpout event");
2930 #ifdef HAVE_SSL
2931 		SSL_free(c->ssl);
2932 #endif
2933 		sldns_buffer_free(c->buffer);
2934 		free(c->ev);
2935 		free(c);
2936 		return NULL;
2937 	}
2938 
2939 	return c;
2940 }
2941 
2942 struct comm_point*
2943 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
2944         comm_point_callback_type* callback, void* callback_arg)
2945 {
2946 	struct comm_point* c = (struct comm_point*)calloc(1,
2947 		sizeof(struct comm_point));
2948 	short evbits;
2949 	if(!c)
2950 		return NULL;
2951 	c->ev = (struct internal_event*)calloc(1,
2952 		sizeof(struct internal_event));
2953 	if(!c->ev) {
2954 		free(c);
2955 		return NULL;
2956 	}
2957 	c->ev->base = base;
2958 	c->fd = fd;
2959 	c->buffer = sldns_buffer_new(bufsize);
2960 	if(!c->buffer) {
2961 		free(c->ev);
2962 		free(c);
2963 		return NULL;
2964 	}
2965 	c->timeout = NULL;
2966 	c->tcp_is_reading = 1;
2967 	c->tcp_byte_count = 0;
2968 	c->tcp_parent = NULL;
2969 	c->max_tcp_count = 0;
2970 	c->cur_tcp_count = 0;
2971 	c->tcp_handlers = NULL;
2972 	c->tcp_free = NULL;
2973 	c->type = comm_local;
2974 	c->tcp_do_close = 0;
2975 	c->do_not_close = 1;
2976 	c->tcp_do_toggle_rw = 0;
2977 	c->tcp_check_nb_connect = 0;
2978 #ifdef USE_MSG_FASTOPEN
2979 	c->tcp_do_fastopen = 0;
2980 #endif
2981 #ifdef USE_DNSCRYPT
2982 	c->dnscrypt = 0;
2983 	c->dnscrypt_buffer = c->buffer;
2984 #endif
2985 	c->callback = callback;
2986 	c->cb_arg = callback_arg;
2987 	/* ub_event stuff */
2988 	evbits = UB_EV_PERSIST | UB_EV_READ;
2989 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2990 		comm_point_local_handle_callback, c);
2991 	if(c->ev->ev == NULL) {
2992 		log_err("could not baseset localhdl event");
2993 		free(c->ev);
2994 		free(c);
2995 		return NULL;
2996 	}
2997 	if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2998 		log_err("could not add localhdl event");
2999 		ub_event_free(c->ev->ev);
3000 		free(c->ev);
3001 		free(c);
3002 		return NULL;
3003 	}
3004 	return c;
3005 }
3006 
3007 struct comm_point*
3008 comm_point_create_raw(struct comm_base* base, int fd, int writing,
3009 	comm_point_callback_type* callback, void* callback_arg)
3010 {
3011 	struct comm_point* c = (struct comm_point*)calloc(1,
3012 		sizeof(struct comm_point));
3013 	short evbits;
3014 	if(!c)
3015 		return NULL;
3016 	c->ev = (struct internal_event*)calloc(1,
3017 		sizeof(struct internal_event));
3018 	if(!c->ev) {
3019 		free(c);
3020 		return NULL;
3021 	}
3022 	c->ev->base = base;
3023 	c->fd = fd;
3024 	c->buffer = NULL;
3025 	c->timeout = NULL;
3026 	c->tcp_is_reading = 0;
3027 	c->tcp_byte_count = 0;
3028 	c->tcp_parent = NULL;
3029 	c->max_tcp_count = 0;
3030 	c->cur_tcp_count = 0;
3031 	c->tcp_handlers = NULL;
3032 	c->tcp_free = NULL;
3033 	c->type = comm_raw;
3034 	c->tcp_do_close = 0;
3035 	c->do_not_close = 1;
3036 	c->tcp_do_toggle_rw = 0;
3037 	c->tcp_check_nb_connect = 0;
3038 #ifdef USE_MSG_FASTOPEN
3039 	c->tcp_do_fastopen = 0;
3040 #endif
3041 #ifdef USE_DNSCRYPT
3042 	c->dnscrypt = 0;
3043 	c->dnscrypt_buffer = c->buffer;
3044 #endif
3045 	c->callback = callback;
3046 	c->cb_arg = callback_arg;
3047 	/* ub_event stuff */
3048 	if(writing)
3049 		evbits = UB_EV_PERSIST | UB_EV_WRITE;
3050 	else 	evbits = UB_EV_PERSIST | UB_EV_READ;
3051 	c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3052 		comm_point_raw_handle_callback, c);
3053 	if(c->ev->ev == NULL) {
3054 		log_err("could not baseset rawhdl event");
3055 		free(c->ev);
3056 		free(c);
3057 		return NULL;
3058 	}
3059 	if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3060 		log_err("could not add rawhdl event");
3061 		ub_event_free(c->ev->ev);
3062 		free(c->ev);
3063 		free(c);
3064 		return NULL;
3065 	}
3066 	return c;
3067 }
3068 
3069 void
3070 comm_point_close(struct comm_point* c)
3071 {
3072 	if(!c)
3073 		return;
3074 	if(c->fd != -1) {
3075 		if(ub_event_del(c->ev->ev) != 0) {
3076 			log_err("could not event_del on close");
3077 		}
3078 	}
3079 	tcl_close_connection(c->tcl_addr);
3080 	if(c->tcp_req_info)
3081 		tcp_req_info_clear(c->tcp_req_info);
3082 	/* close fd after removing from event lists, or epoll.. is messed up */
3083 	if(c->fd != -1 && !c->do_not_close) {
3084 		if(c->type == comm_tcp || c->type == comm_http) {
3085 			/* delete sticky events for the fd, it gets closed */
3086 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3087 			ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3088 		}
3089 		verbose(VERB_ALGO, "close fd %d", c->fd);
3090 #ifndef USE_WINSOCK
3091 		close(c->fd);
3092 #else
3093 		closesocket(c->fd);
3094 #endif
3095 	}
3096 	c->fd = -1;
3097 }
3098 
3099 void
3100 comm_point_delete(struct comm_point* c)
3101 {
3102 	if(!c)
3103 		return;
3104 	if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
3105 #ifdef HAVE_SSL
3106 		SSL_shutdown(c->ssl);
3107 		SSL_free(c->ssl);
3108 #endif
3109 	}
3110 	comm_point_close(c);
3111 	if(c->tcp_handlers) {
3112 		int i;
3113 		for(i=0; i<c->max_tcp_count; i++)
3114 			comm_point_delete(c->tcp_handlers[i]);
3115 		free(c->tcp_handlers);
3116 	}
3117 	free(c->timeout);
3118 	if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
3119 		sldns_buffer_free(c->buffer);
3120 #ifdef USE_DNSCRYPT
3121 		if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
3122 			sldns_buffer_free(c->dnscrypt_buffer);
3123 		}
3124 #endif
3125 		if(c->tcp_req_info) {
3126 			tcp_req_info_delete(c->tcp_req_info);
3127 		}
3128 	}
3129 	ub_event_free(c->ev->ev);
3130 	free(c->ev);
3131 	free(c);
3132 }
3133 
3134 void
3135 comm_point_send_reply(struct comm_reply *repinfo)
3136 {
3137 	struct sldns_buffer* buffer;
3138 	log_assert(repinfo && repinfo->c);
3139 #ifdef USE_DNSCRYPT
3140 	buffer = repinfo->c->dnscrypt_buffer;
3141 	if(!dnsc_handle_uncurved_request(repinfo)) {
3142 		return;
3143 	}
3144 #else
3145 	buffer = repinfo->c->buffer;
3146 #endif
3147 	if(repinfo->c->type == comm_udp) {
3148 		if(repinfo->srctype)
3149 			comm_point_send_udp_msg_if(repinfo->c,
3150 			buffer, (struct sockaddr*)&repinfo->addr,
3151 			repinfo->addrlen, repinfo);
3152 		else
3153 			comm_point_send_udp_msg(repinfo->c, buffer,
3154 			(struct sockaddr*)&repinfo->addr, repinfo->addrlen);
3155 #ifdef USE_DNSTAP
3156 		if(repinfo->c->dtenv != NULL &&
3157 		   repinfo->c->dtenv->log_client_response_messages)
3158 			dt_msg_send_client_response(repinfo->c->dtenv,
3159 			&repinfo->addr, repinfo->c->type, repinfo->c->buffer);
3160 #endif
3161 	} else {
3162 #ifdef USE_DNSTAP
3163 		if(repinfo->c->tcp_parent->dtenv != NULL &&
3164 		   repinfo->c->tcp_parent->dtenv->log_client_response_messages)
3165 			dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
3166 			&repinfo->addr, repinfo->c->type,
3167 			( repinfo->c->tcp_req_info
3168 			? repinfo->c->tcp_req_info->spool_buffer
3169 			: repinfo->c->buffer ));
3170 #endif
3171 		if(repinfo->c->tcp_req_info) {
3172 			tcp_req_info_send_reply(repinfo->c->tcp_req_info);
3173 		} else {
3174 			comm_point_start_listening(repinfo->c, -1,
3175 				repinfo->c->tcp_timeout_msec);
3176 		}
3177 	}
3178 }
3179 
3180 void
3181 comm_point_drop_reply(struct comm_reply* repinfo)
3182 {
3183 	if(!repinfo)
3184 		return;
3185 	log_assert(repinfo->c);
3186 	log_assert(repinfo->c->type != comm_tcp_accept);
3187 	if(repinfo->c->type == comm_udp)
3188 		return;
3189 	if(repinfo->c->tcp_req_info)
3190 		repinfo->c->tcp_req_info->is_drop = 1;
3191 	reclaim_tcp_handler(repinfo->c);
3192 }
3193 
3194 void
3195 comm_point_stop_listening(struct comm_point* c)
3196 {
3197 	verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
3198 	if(ub_event_del(c->ev->ev) != 0) {
3199 		log_err("event_del error to stoplisten");
3200 	}
3201 }
3202 
3203 void
3204 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
3205 {
3206 	verbose(VERB_ALGO, "comm point start listening %d (%d msec)",
3207 		c->fd==-1?newfd:c->fd, msec);
3208 	if(c->type == comm_tcp_accept && !c->tcp_free) {
3209 		/* no use to start listening no free slots. */
3210 		return;
3211 	}
3212 	if(msec != -1 && msec != 0) {
3213 		if(!c->timeout) {
3214 			c->timeout = (struct timeval*)malloc(sizeof(
3215 				struct timeval));
3216 			if(!c->timeout) {
3217 				log_err("cpsl: malloc failed. No net read.");
3218 				return;
3219 			}
3220 		}
3221 		ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
3222 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
3223 		c->timeout->tv_sec = msec/1000;
3224 		c->timeout->tv_usec = (msec%1000)*1000;
3225 #endif /* S_SPLINT_S */
3226 	}
3227 	if(c->type == comm_tcp || c->type == comm_http) {
3228 		ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3229 		if(c->tcp_is_reading)
3230 			ub_event_add_bits(c->ev->ev, UB_EV_READ);
3231 		else	ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3232 	}
3233 	if(newfd != -1) {
3234 		if(c->fd != -1) {
3235 #ifndef USE_WINSOCK
3236 			close(c->fd);
3237 #else
3238 			closesocket(c->fd);
3239 #endif
3240 		}
3241 		c->fd = newfd;
3242 		ub_event_set_fd(c->ev->ev, c->fd);
3243 	}
3244 	if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
3245 		log_err("event_add failed. in cpsl.");
3246 	}
3247 }
3248 
3249 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
3250 {
3251 	verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
3252 	if(ub_event_del(c->ev->ev) != 0) {
3253 		log_err("event_del error to cplf");
3254 	}
3255 	ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3256 	if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
3257 	if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3258 	if(ub_event_add(c->ev->ev, c->timeout) != 0) {
3259 		log_err("event_add failed. in cplf.");
3260 	}
3261 }
3262 
3263 size_t comm_point_get_mem(struct comm_point* c)
3264 {
3265 	size_t s;
3266 	if(!c)
3267 		return 0;
3268 	s = sizeof(*c) + sizeof(*c->ev);
3269 	if(c->timeout)
3270 		s += sizeof(*c->timeout);
3271 	if(c->type == comm_tcp || c->type == comm_local) {
3272 		s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
3273 #ifdef USE_DNSCRYPT
3274 		s += sizeof(*c->dnscrypt_buffer);
3275 		if(c->buffer != c->dnscrypt_buffer) {
3276 			s += sldns_buffer_capacity(c->dnscrypt_buffer);
3277 		}
3278 #endif
3279 	}
3280 	if(c->type == comm_tcp_accept) {
3281 		int i;
3282 		for(i=0; i<c->max_tcp_count; i++)
3283 			s += comm_point_get_mem(c->tcp_handlers[i]);
3284 	}
3285 	return s;
3286 }
3287 
3288 struct comm_timer*
3289 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
3290 {
3291 	struct internal_timer *tm = (struct internal_timer*)calloc(1,
3292 		sizeof(struct internal_timer));
3293 	if(!tm) {
3294 		log_err("malloc failed");
3295 		return NULL;
3296 	}
3297 	tm->super.ev_timer = tm;
3298 	tm->base = base;
3299 	tm->super.callback = cb;
3300 	tm->super.cb_arg = cb_arg;
3301 	tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT,
3302 		comm_timer_callback, &tm->super);
3303 	if(tm->ev == NULL) {
3304 		log_err("timer_create: event_base_set failed.");
3305 		free(tm);
3306 		return NULL;
3307 	}
3308 	return &tm->super;
3309 }
3310 
3311 void
3312 comm_timer_disable(struct comm_timer* timer)
3313 {
3314 	if(!timer)
3315 		return;
3316 	ub_timer_del(timer->ev_timer->ev);
3317 	timer->ev_timer->enabled = 0;
3318 }
3319 
3320 void
3321 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
3322 {
3323 	log_assert(tv);
3324 	if(timer->ev_timer->enabled)
3325 		comm_timer_disable(timer);
3326 	if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
3327 		comm_timer_callback, timer, tv) != 0)
3328 		log_err("comm_timer_set: evtimer_add failed.");
3329 	timer->ev_timer->enabled = 1;
3330 }
3331 
3332 void
3333 comm_timer_delete(struct comm_timer* timer)
3334 {
3335 	if(!timer)
3336 		return;
3337 	comm_timer_disable(timer);
3338 	/* Free the sub struct timer->ev_timer derived from the super struct timer.
3339 	 * i.e. assert(timer == timer->ev_timer)
3340 	 */
3341 	ub_event_free(timer->ev_timer->ev);
3342 	free(timer->ev_timer);
3343 }
3344 
3345 void
3346 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
3347 {
3348 	struct comm_timer* tm = (struct comm_timer*)arg;
3349 	if(!(event&UB_EV_TIMEOUT))
3350 		return;
3351 	ub_comm_base_now(tm->ev_timer->base);
3352 	tm->ev_timer->enabled = 0;
3353 	fptr_ok(fptr_whitelist_comm_timer(tm->callback));
3354 	(*tm->callback)(tm->cb_arg);
3355 }
3356 
3357 int
3358 comm_timer_is_set(struct comm_timer* timer)
3359 {
3360 	return (int)timer->ev_timer->enabled;
3361 }
3362 
3363 size_t
3364 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
3365 {
3366 	return sizeof(struct internal_timer);
3367 }
3368 
3369 struct comm_signal*
3370 comm_signal_create(struct comm_base* base,
3371         void (*callback)(int, void*), void* cb_arg)
3372 {
3373 	struct comm_signal* com = (struct comm_signal*)malloc(
3374 		sizeof(struct comm_signal));
3375 	if(!com) {
3376 		log_err("malloc failed");
3377 		return NULL;
3378 	}
3379 	com->base = base;
3380 	com->callback = callback;
3381 	com->cb_arg = cb_arg;
3382 	com->ev_signal = NULL;
3383 	return com;
3384 }
3385 
3386 void
3387 comm_signal_callback(int sig, short event, void* arg)
3388 {
3389 	struct comm_signal* comsig = (struct comm_signal*)arg;
3390 	if(!(event & UB_EV_SIGNAL))
3391 		return;
3392 	ub_comm_base_now(comsig->base);
3393 	fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
3394 	(*comsig->callback)(sig, comsig->cb_arg);
3395 }
3396 
3397 int
3398 comm_signal_bind(struct comm_signal* comsig, int sig)
3399 {
3400 	struct internal_signal* entry = (struct internal_signal*)calloc(1,
3401 		sizeof(struct internal_signal));
3402 	if(!entry) {
3403 		log_err("malloc failed");
3404 		return 0;
3405 	}
3406 	log_assert(comsig);
3407 	/* add signal event */
3408 	entry->ev = ub_signal_new(comsig->base->eb->base, sig,
3409 		comm_signal_callback, comsig);
3410 	if(entry->ev == NULL) {
3411 		log_err("Could not create signal event");
3412 		free(entry);
3413 		return 0;
3414 	}
3415 	if(ub_signal_add(entry->ev, NULL) != 0) {
3416 		log_err("Could not add signal handler");
3417 		ub_event_free(entry->ev);
3418 		free(entry);
3419 		return 0;
3420 	}
3421 	/* link into list */
3422 	entry->next = comsig->ev_signal;
3423 	comsig->ev_signal = entry;
3424 	return 1;
3425 }
3426 
3427 void
3428 comm_signal_delete(struct comm_signal* comsig)
3429 {
3430 	struct internal_signal* p, *np;
3431 	if(!comsig)
3432 		return;
3433 	p=comsig->ev_signal;
3434 	while(p) {
3435 		np = p->next;
3436 		ub_signal_del(p->ev);
3437 		ub_event_free(p->ev);
3438 		free(p);
3439 		p = np;
3440 	}
3441 	free(comsig);
3442 }
3443