xref: /freebsd/contrib/unbound/services/outside_network.c (revision 361e428888e630eb708c72cf31579a25ba5d4f03)
1 /*
2  * services/outside_network.c - implement sending of queries and wait answer.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to send queries to authoritative servers and
40  * wait for the pending answer events.
41  */
42 #include "config.h"
43 #include <ctype.h>
44 #ifdef HAVE_SYS_TYPES_H
45 #  include <sys/types.h>
46 #endif
47 #include <sys/time.h>
48 #include "services/outside_network.h"
49 #include "services/listen_dnsport.h"
50 #include "services/cache/infra.h"
51 #include "util/data/msgparse.h"
52 #include "util/data/msgreply.h"
53 #include "util/data/msgencode.h"
54 #include "util/data/dname.h"
55 #include "util/netevent.h"
56 #include "util/log.h"
57 #include "util/net_help.h"
58 #include "util/random.h"
59 #include "util/fptr_wlist.h"
60 #include "sldns/sbuffer.h"
61 #include "dnstap/dnstap.h"
62 #ifdef HAVE_OPENSSL_SSL_H
63 #include <openssl/ssl.h>
64 #endif
65 
66 #ifdef HAVE_NETDB_H
67 #include <netdb.h>
68 #endif
69 #include <fcntl.h>
70 
71 /** number of times to retry making a random ID that is unique. */
72 #define MAX_ID_RETRY 1000
73 /** number of times to retry finding interface, port that can be opened. */
74 #define MAX_PORT_RETRY 10000
75 /** number of retries on outgoing UDP queries */
76 #define OUTBOUND_UDP_RETRY 1
77 
78 /** initiate TCP transaction for serviced query */
79 static void serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff);
80 /** with a fd available, randomize and send UDP */
81 static int randomize_and_send_udp(struct pending* pend, sldns_buffer* packet,
82 	int timeout);
83 
84 /** remove waiting tcp from the outnet waiting list */
85 static void waiting_list_remove(struct outside_network* outnet,
86 	struct waiting_tcp* w);
87 
88 int
89 pending_cmp(const void* key1, const void* key2)
90 {
91 	struct pending *p1 = (struct pending*)key1;
92 	struct pending *p2 = (struct pending*)key2;
93 	if(p1->id < p2->id)
94 		return -1;
95 	if(p1->id > p2->id)
96 		return 1;
97 	log_assert(p1->id == p2->id);
98 	return sockaddr_cmp(&p1->addr, p1->addrlen, &p2->addr, p2->addrlen);
99 }
100 
101 int
102 serviced_cmp(const void* key1, const void* key2)
103 {
104 	struct serviced_query* q1 = (struct serviced_query*)key1;
105 	struct serviced_query* q2 = (struct serviced_query*)key2;
106 	int r;
107 	if(q1->qbuflen < q2->qbuflen)
108 		return -1;
109 	if(q1->qbuflen > q2->qbuflen)
110 		return 1;
111 	log_assert(q1->qbuflen == q2->qbuflen);
112 	log_assert(q1->qbuflen >= 15 /* 10 header, root, type, class */);
113 	/* alternate casing of qname is still the same query */
114 	if((r = memcmp(q1->qbuf, q2->qbuf, 10)) != 0)
115 		return r;
116 	if((r = memcmp(q1->qbuf+q1->qbuflen-4, q2->qbuf+q2->qbuflen-4, 4)) != 0)
117 		return r;
118 	if(q1->dnssec != q2->dnssec) {
119 		if(q1->dnssec < q2->dnssec)
120 			return -1;
121 		return 1;
122 	}
123 	if((r = query_dname_compare(q1->qbuf+10, q2->qbuf+10)) != 0)
124 		return r;
125 	return sockaddr_cmp(&q1->addr, q1->addrlen, &q2->addr, q2->addrlen);
126 }
127 
128 /** delete waiting_tcp entry. Does not unlink from waiting list.
129  * @param w: to delete.
130  */
131 static void
132 waiting_tcp_delete(struct waiting_tcp* w)
133 {
134 	if(!w) return;
135 	if(w->timer)
136 		comm_timer_delete(w->timer);
137 	free(w);
138 }
139 
140 /**
141  * Pick random outgoing-interface of that family, and bind it.
142  * port set to 0 so OS picks a port number for us.
143  * if it is the ANY address, do not bind.
144  * @param w: tcp structure with destination address.
145  * @param s: socket fd.
146  * @return false on error, socket closed.
147  */
148 static int
149 pick_outgoing_tcp(struct waiting_tcp* w, int s)
150 {
151 	struct port_if* pi = NULL;
152 	int num;
153 #ifdef INET6
154 	if(addr_is_ip6(&w->addr, w->addrlen))
155 		num = w->outnet->num_ip6;
156 	else
157 #endif
158 		num = w->outnet->num_ip4;
159 	if(num == 0) {
160 		log_err("no TCP outgoing interfaces of family");
161 		log_addr(VERB_OPS, "for addr", &w->addr, w->addrlen);
162 #ifndef USE_WINSOCK
163 		close(s);
164 #else
165 		closesocket(s);
166 #endif
167 		return 0;
168 	}
169 #ifdef INET6
170 	if(addr_is_ip6(&w->addr, w->addrlen))
171 		pi = &w->outnet->ip6_ifs[ub_random_max(w->outnet->rnd, num)];
172 	else
173 #endif
174 		pi = &w->outnet->ip4_ifs[ub_random_max(w->outnet->rnd, num)];
175 	log_assert(pi);
176 	if(addr_is_any(&pi->addr, pi->addrlen)) {
177 		/* binding to the ANY interface is for listening sockets */
178 		return 1;
179 	}
180 	/* set port to 0 */
181 	if(addr_is_ip6(&pi->addr, pi->addrlen))
182 		((struct sockaddr_in6*)&pi->addr)->sin6_port = 0;
183 	else	((struct sockaddr_in*)&pi->addr)->sin_port = 0;
184 	if(bind(s, (struct sockaddr*)&pi->addr, pi->addrlen) != 0) {
185 #ifndef USE_WINSOCK
186 		log_err("outgoing tcp: bind: %s", strerror(errno));
187 		close(s);
188 #else
189 		log_err("outgoing tcp: bind: %s",
190 			wsa_strerror(WSAGetLastError()));
191 		closesocket(s);
192 #endif
193 		return 0;
194 	}
195 	log_addr(VERB_ALGO, "tcp bound to src", &pi->addr, pi->addrlen);
196 	return 1;
197 }
198 
199 /** use next free buffer to service a tcp query */
200 static int
201 outnet_tcp_take_into_use(struct waiting_tcp* w, uint8_t* pkt, size_t pkt_len)
202 {
203 	struct pending_tcp* pend = w->outnet->tcp_free;
204 	int s;
205 	log_assert(pend);
206 	log_assert(pkt);
207 	log_assert(w->addrlen > 0);
208 	/* open socket */
209 #ifdef INET6
210 	if(addr_is_ip6(&w->addr, w->addrlen))
211 		s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
212 	else
213 #endif
214 		s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
215 	if(s == -1) {
216 #ifndef USE_WINSOCK
217 		log_err_addr("outgoing tcp: socket", strerror(errno),
218 			&w->addr, w->addrlen);
219 #else
220 		log_err_addr("outgoing tcp: socket",
221 			wsa_strerror(WSAGetLastError()), &w->addr, w->addrlen);
222 #endif
223 		return 0;
224 	}
225 	if(!pick_outgoing_tcp(w, s))
226 		return 0;
227 
228 	fd_set_nonblock(s);
229 	if(connect(s, (struct sockaddr*)&w->addr, w->addrlen) == -1) {
230 #ifndef USE_WINSOCK
231 #ifdef EINPROGRESS
232 		if(errno != EINPROGRESS) {
233 #else
234 		if(1) {
235 #endif
236 			if(tcp_connect_errno_needs_log(
237 				(struct sockaddr*)&w->addr, w->addrlen))
238 				log_err_addr("outgoing tcp: connect",
239 					strerror(errno), &w->addr, w->addrlen);
240 			close(s);
241 #else /* USE_WINSOCK */
242 		if(WSAGetLastError() != WSAEINPROGRESS &&
243 			WSAGetLastError() != WSAEWOULDBLOCK) {
244 			closesocket(s);
245 #endif
246 			return 0;
247 		}
248 	}
249 	if(w->outnet->sslctx && w->ssl_upstream) {
250 		pend->c->ssl = outgoing_ssl_fd(w->outnet->sslctx, s);
251 		if(!pend->c->ssl) {
252 			pend->c->fd = s;
253 			comm_point_close(pend->c);
254 			return 0;
255 		}
256 #ifdef USE_WINSOCK
257 		comm_point_tcp_win_bio_cb(pend->c, pend->c->ssl);
258 #endif
259 		pend->c->ssl_shake_state = comm_ssl_shake_write;
260 	}
261 	w->pkt = NULL;
262 	w->next_waiting = (void*)pend;
263 	pend->id = LDNS_ID_WIRE(pkt);
264 	w->outnet->num_tcp_outgoing++;
265 	w->outnet->tcp_free = pend->next_free;
266 	pend->next_free = NULL;
267 	pend->query = w;
268 	pend->c->repinfo.addrlen = w->addrlen;
269 	memcpy(&pend->c->repinfo.addr, &w->addr, w->addrlen);
270 	sldns_buffer_clear(pend->c->buffer);
271 	sldns_buffer_write(pend->c->buffer, pkt, pkt_len);
272 	sldns_buffer_flip(pend->c->buffer);
273 	pend->c->tcp_is_reading = 0;
274 	pend->c->tcp_byte_count = 0;
275 	comm_point_start_listening(pend->c, s, -1);
276 	return 1;
277 }
278 
279 /** see if buffers can be used to service TCP queries */
280 static void
281 use_free_buffer(struct outside_network* outnet)
282 {
283 	struct waiting_tcp* w;
284 	while(outnet->tcp_free && outnet->tcp_wait_first
285 		&& !outnet->want_to_quit) {
286 		w = outnet->tcp_wait_first;
287 		outnet->tcp_wait_first = w->next_waiting;
288 		if(outnet->tcp_wait_last == w)
289 			outnet->tcp_wait_last = NULL;
290 		if(!outnet_tcp_take_into_use(w, w->pkt, w->pkt_len)) {
291 			comm_point_callback_t* cb = w->cb;
292 			void* cb_arg = w->cb_arg;
293 			waiting_tcp_delete(w);
294 			fptr_ok(fptr_whitelist_pending_tcp(cb));
295 			(void)(*cb)(NULL, cb_arg, NETEVENT_CLOSED, NULL);
296 		}
297 	}
298 }
299 
300 /** decomission a tcp buffer, closes commpoint and frees waiting_tcp entry */
301 static void
302 decomission_pending_tcp(struct outside_network* outnet,
303 	struct pending_tcp* pend)
304 {
305 	if(pend->c->ssl) {
306 #ifdef HAVE_SSL
307 		SSL_shutdown(pend->c->ssl);
308 		SSL_free(pend->c->ssl);
309 		pend->c->ssl = NULL;
310 #endif
311 	}
312 	comm_point_close(pend->c);
313 	pend->next_free = outnet->tcp_free;
314 	outnet->tcp_free = pend;
315 	waiting_tcp_delete(pend->query);
316 	pend->query = NULL;
317 	use_free_buffer(outnet);
318 }
319 
320 int
321 outnet_tcp_cb(struct comm_point* c, void* arg, int error,
322 	struct comm_reply *reply_info)
323 {
324 	struct pending_tcp* pend = (struct pending_tcp*)arg;
325 	struct outside_network* outnet = pend->query->outnet;
326 	verbose(VERB_ALGO, "outnettcp cb");
327 	if(error != NETEVENT_NOERROR) {
328 		verbose(VERB_QUERY, "outnettcp got tcp error %d", error);
329 		/* pass error below and exit */
330 	} else {
331 		/* check ID */
332 		if(sldns_buffer_limit(c->buffer) < sizeof(uint16_t) ||
333 			LDNS_ID_WIRE(sldns_buffer_begin(c->buffer))!=pend->id) {
334 			log_addr(VERB_QUERY,
335 				"outnettcp: bad ID in reply, from:",
336 				&pend->query->addr, pend->query->addrlen);
337 			error = NETEVENT_CLOSED;
338 		}
339 	}
340 	fptr_ok(fptr_whitelist_pending_tcp(pend->query->cb));
341 	(void)(*pend->query->cb)(c, pend->query->cb_arg, error, reply_info);
342 	decomission_pending_tcp(outnet, pend);
343 	return 0;
344 }
345 
346 /** lower use count on pc, see if it can be closed */
347 static void
348 portcomm_loweruse(struct outside_network* outnet, struct port_comm* pc)
349 {
350 	struct port_if* pif;
351 	pc->num_outstanding--;
352 	if(pc->num_outstanding > 0) {
353 		return;
354 	}
355 	/* close it and replace in unused list */
356 	verbose(VERB_ALGO, "close of port %d", pc->number);
357 	comm_point_close(pc->cp);
358 	pif = pc->pif;
359 	log_assert(pif->inuse > 0);
360 	pif->avail_ports[pif->avail_total - pif->inuse] = pc->number;
361 	pif->inuse--;
362 	pif->out[pc->index] = pif->out[pif->inuse];
363 	pif->out[pc->index]->index = pc->index;
364 	pc->next = outnet->unused_fds;
365 	outnet->unused_fds = pc;
366 }
367 
368 /** try to send waiting UDP queries */
369 static void
370 outnet_send_wait_udp(struct outside_network* outnet)
371 {
372 	struct pending* pend;
373 	/* process waiting queries */
374 	while(outnet->udp_wait_first && outnet->unused_fds
375 		&& !outnet->want_to_quit) {
376 		pend = outnet->udp_wait_first;
377 		outnet->udp_wait_first = pend->next_waiting;
378 		if(!pend->next_waiting) outnet->udp_wait_last = NULL;
379 		sldns_buffer_clear(outnet->udp_buff);
380 		sldns_buffer_write(outnet->udp_buff, pend->pkt, pend->pkt_len);
381 		sldns_buffer_flip(outnet->udp_buff);
382 		free(pend->pkt); /* freeing now makes get_mem correct */
383 		pend->pkt = NULL;
384 		pend->pkt_len = 0;
385 		if(!randomize_and_send_udp(pend, outnet->udp_buff,
386 			pend->timeout)) {
387 			/* callback error on pending */
388 			if(pend->cb) {
389 				fptr_ok(fptr_whitelist_pending_udp(pend->cb));
390 				(void)(*pend->cb)(outnet->unused_fds->cp, pend->cb_arg,
391 					NETEVENT_CLOSED, NULL);
392 			}
393 			pending_delete(outnet, pend);
394 		}
395 	}
396 }
397 
398 int
399 outnet_udp_cb(struct comm_point* c, void* arg, int error,
400 	struct comm_reply *reply_info)
401 {
402 	struct outside_network* outnet = (struct outside_network*)arg;
403 	struct pending key;
404 	struct pending* p;
405 	verbose(VERB_ALGO, "answer cb");
406 
407 	if(error != NETEVENT_NOERROR) {
408 		verbose(VERB_QUERY, "outnetudp got udp error %d", error);
409 		return 0;
410 	}
411 	if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
412 		verbose(VERB_QUERY, "outnetudp udp too short");
413 		return 0;
414 	}
415 	log_assert(reply_info);
416 
417 	/* setup lookup key */
418 	key.id = (unsigned)LDNS_ID_WIRE(sldns_buffer_begin(c->buffer));
419 	memcpy(&key.addr, &reply_info->addr, reply_info->addrlen);
420 	key.addrlen = reply_info->addrlen;
421 	verbose(VERB_ALGO, "Incoming reply id = %4.4x", key.id);
422 	log_addr(VERB_ALGO, "Incoming reply addr =",
423 		&reply_info->addr, reply_info->addrlen);
424 
425 	/* find it, see if this thing is a valid query response */
426 	verbose(VERB_ALGO, "lookup size is %d entries", (int)outnet->pending->count);
427 	p = (struct pending*)rbtree_search(outnet->pending, &key);
428 	if(!p) {
429 		verbose(VERB_QUERY, "received unwanted or unsolicited udp reply dropped.");
430 		log_buf(VERB_ALGO, "dropped message", c->buffer);
431 		outnet->unwanted_replies++;
432 		if(outnet->unwanted_threshold && ++outnet->unwanted_total
433 			>= outnet->unwanted_threshold) {
434 			log_warn("unwanted reply total reached threshold (%u)"
435 				" you may be under attack."
436 				" defensive action: clearing the cache",
437 				(unsigned)outnet->unwanted_threshold);
438 			fptr_ok(fptr_whitelist_alloc_cleanup(
439 				outnet->unwanted_action));
440 			(*outnet->unwanted_action)(outnet->unwanted_param);
441 			outnet->unwanted_total = 0;
442 		}
443 		return 0;
444 	}
445 
446 	verbose(VERB_ALGO, "received udp reply.");
447 	log_buf(VERB_ALGO, "udp message", c->buffer);
448 	if(p->pc->cp != c) {
449 		verbose(VERB_QUERY, "received reply id,addr on wrong port. "
450 			"dropped.");
451 		outnet->unwanted_replies++;
452 		if(outnet->unwanted_threshold && ++outnet->unwanted_total
453 			>= outnet->unwanted_threshold) {
454 			log_warn("unwanted reply total reached threshold (%u)"
455 				" you may be under attack."
456 				" defensive action: clearing the cache",
457 				(unsigned)outnet->unwanted_threshold);
458 			fptr_ok(fptr_whitelist_alloc_cleanup(
459 				outnet->unwanted_action));
460 			(*outnet->unwanted_action)(outnet->unwanted_param);
461 			outnet->unwanted_total = 0;
462 		}
463 		return 0;
464 	}
465 	comm_timer_disable(p->timer);
466 	verbose(VERB_ALGO, "outnet handle udp reply");
467 	/* delete from tree first in case callback creates a retry */
468 	(void)rbtree_delete(outnet->pending, p->node.key);
469 	if(p->cb) {
470 		fptr_ok(fptr_whitelist_pending_udp(p->cb));
471 		(void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_NOERROR, reply_info);
472 	}
473 	portcomm_loweruse(outnet, p->pc);
474 	pending_delete(NULL, p);
475 	outnet_send_wait_udp(outnet);
476 	return 0;
477 }
478 
479 /** calculate number of ip4 and ip6 interfaces*/
480 static void
481 calc_num46(char** ifs, int num_ifs, int do_ip4, int do_ip6,
482 	int* num_ip4, int* num_ip6)
483 {
484 	int i;
485 	*num_ip4 = 0;
486 	*num_ip6 = 0;
487 	if(num_ifs <= 0) {
488 		if(do_ip4)
489 			*num_ip4 = 1;
490 		if(do_ip6)
491 			*num_ip6 = 1;
492 		return;
493 	}
494 	for(i=0; i<num_ifs; i++)
495 	{
496 		if(str_is_ip6(ifs[i])) {
497 			if(do_ip6)
498 				(*num_ip6)++;
499 		} else {
500 			if(do_ip4)
501 				(*num_ip4)++;
502 		}
503 	}
504 
505 }
506 
507 void
508 pending_udp_timer_delay_cb(void* arg)
509 {
510 	struct pending* p = (struct pending*)arg;
511 	struct outside_network* outnet = p->outnet;
512 	verbose(VERB_ALGO, "timeout udp with delay");
513 	portcomm_loweruse(outnet, p->pc);
514 	pending_delete(outnet, p);
515 	outnet_send_wait_udp(outnet);
516 }
517 
518 void
519 pending_udp_timer_cb(void *arg)
520 {
521 	struct pending* p = (struct pending*)arg;
522 	struct outside_network* outnet = p->outnet;
523 	/* it timed out */
524 	verbose(VERB_ALGO, "timeout udp");
525 	if(p->cb) {
526 		fptr_ok(fptr_whitelist_pending_udp(p->cb));
527 		(void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_TIMEOUT, NULL);
528 	}
529 	/* if delayclose, keep port open for a longer time.
530 	 * But if the udpwaitlist exists, then we are struggling to
531 	 * keep up with demand for sockets, so do not wait, but service
532 	 * the customer (customer service more important than portICMPs) */
533 	if(outnet->delayclose && !outnet->udp_wait_first) {
534 		p->cb = NULL;
535 		p->timer->callback = &pending_udp_timer_delay_cb;
536 		comm_timer_set(p->timer, &outnet->delay_tv);
537 		return;
538 	}
539 	portcomm_loweruse(outnet, p->pc);
540 	pending_delete(outnet, p);
541 	outnet_send_wait_udp(outnet);
542 }
543 
544 /** create pending_tcp buffers */
545 static int
546 create_pending_tcp(struct outside_network* outnet, size_t bufsize)
547 {
548 	size_t i;
549 	if(outnet->num_tcp == 0)
550 		return 1; /* no tcp needed, nothing to do */
551 	if(!(outnet->tcp_conns = (struct pending_tcp **)calloc(
552 			outnet->num_tcp, sizeof(struct pending_tcp*))))
553 		return 0;
554 	for(i=0; i<outnet->num_tcp; i++) {
555 		if(!(outnet->tcp_conns[i] = (struct pending_tcp*)calloc(1,
556 			sizeof(struct pending_tcp))))
557 			return 0;
558 		outnet->tcp_conns[i]->next_free = outnet->tcp_free;
559 		outnet->tcp_free = outnet->tcp_conns[i];
560 		outnet->tcp_conns[i]->c = comm_point_create_tcp_out(
561 			outnet->base, bufsize, outnet_tcp_cb,
562 			outnet->tcp_conns[i]);
563 		if(!outnet->tcp_conns[i]->c)
564 			return 0;
565 	}
566 	return 1;
567 }
568 
569 /** setup an outgoing interface, ready address */
570 static int setup_if(struct port_if* pif, const char* addrstr,
571 	int* avail, int numavail, size_t numfd)
572 {
573 	pif->avail_total = numavail;
574 	pif->avail_ports = (int*)memdup(avail, (size_t)numavail*sizeof(int));
575 	if(!pif->avail_ports)
576 		return 0;
577 	if(!ipstrtoaddr(addrstr, UNBOUND_DNS_PORT, &pif->addr, &pif->addrlen))
578 		return 0;
579 	pif->maxout = (int)numfd;
580 	pif->inuse = 0;
581 	pif->out = (struct port_comm**)calloc(numfd,
582 		sizeof(struct port_comm*));
583 	if(!pif->out)
584 		return 0;
585 	return 1;
586 }
587 
588 struct outside_network*
589 outside_network_create(struct comm_base *base, size_t bufsize,
590 	size_t num_ports, char** ifs, int num_ifs, int do_ip4,
591 	int do_ip6, size_t num_tcp, struct infra_cache* infra,
592 	struct ub_randstate* rnd, int use_caps_for_id, int* availports,
593 	int numavailports, size_t unwanted_threshold,
594 	void (*unwanted_action)(void*), void* unwanted_param, int do_udp,
595 	void* sslctx, int delayclose, struct dt_env* dtenv)
596 {
597 	struct outside_network* outnet = (struct outside_network*)
598 		calloc(1, sizeof(struct outside_network));
599 	size_t k;
600 	if(!outnet) {
601 		log_err("malloc failed");
602 		return NULL;
603 	}
604 	comm_base_timept(base, &outnet->now_secs, &outnet->now_tv);
605 	outnet->base = base;
606 	outnet->num_tcp = num_tcp;
607 	outnet->num_tcp_outgoing = 0;
608 	outnet->infra = infra;
609 	outnet->rnd = rnd;
610 	outnet->sslctx = sslctx;
611 #ifdef USE_DNSTAP
612 	outnet->dtenv = dtenv;
613 #else
614 	(void)dtenv;
615 #endif
616 	outnet->svcd_overhead = 0;
617 	outnet->want_to_quit = 0;
618 	outnet->unwanted_threshold = unwanted_threshold;
619 	outnet->unwanted_action = unwanted_action;
620 	outnet->unwanted_param = unwanted_param;
621 	outnet->use_caps_for_id = use_caps_for_id;
622 	outnet->do_udp = do_udp;
623 #ifndef S_SPLINT_S
624 	if(delayclose) {
625 		outnet->delayclose = 1;
626 		outnet->delay_tv.tv_sec = delayclose/1000;
627 		outnet->delay_tv.tv_usec = (delayclose%1000)*1000;
628 	}
629 #endif
630 	if(numavailports == 0) {
631 		log_err("no outgoing ports available");
632 		outside_network_delete(outnet);
633 		return NULL;
634 	}
635 #ifndef INET6
636 	do_ip6 = 0;
637 #endif
638 	calc_num46(ifs, num_ifs, do_ip4, do_ip6,
639 		&outnet->num_ip4, &outnet->num_ip6);
640 	if(outnet->num_ip4 != 0) {
641 		if(!(outnet->ip4_ifs = (struct port_if*)calloc(
642 			(size_t)outnet->num_ip4, sizeof(struct port_if)))) {
643 			log_err("malloc failed");
644 			outside_network_delete(outnet);
645 			return NULL;
646 		}
647 	}
648 	if(outnet->num_ip6 != 0) {
649 		if(!(outnet->ip6_ifs = (struct port_if*)calloc(
650 			(size_t)outnet->num_ip6, sizeof(struct port_if)))) {
651 			log_err("malloc failed");
652 			outside_network_delete(outnet);
653 			return NULL;
654 		}
655 	}
656 	if(	!(outnet->udp_buff = sldns_buffer_new(bufsize)) ||
657 		!(outnet->pending = rbtree_create(pending_cmp)) ||
658 		!(outnet->serviced = rbtree_create(serviced_cmp)) ||
659 		!create_pending_tcp(outnet, bufsize)) {
660 		log_err("malloc failed");
661 		outside_network_delete(outnet);
662 		return NULL;
663 	}
664 
665 	/* allocate commpoints */
666 	for(k=0; k<num_ports; k++) {
667 		struct port_comm* pc;
668 		pc = (struct port_comm*)calloc(1, sizeof(*pc));
669 		if(!pc) {
670 			log_err("malloc failed");
671 			outside_network_delete(outnet);
672 			return NULL;
673 		}
674 		pc->cp = comm_point_create_udp(outnet->base, -1,
675 			outnet->udp_buff, outnet_udp_cb, outnet);
676 		if(!pc->cp) {
677 			log_err("malloc failed");
678 			free(pc);
679 			outside_network_delete(outnet);
680 			return NULL;
681 		}
682 		pc->next = outnet->unused_fds;
683 		outnet->unused_fds = pc;
684 	}
685 
686 	/* allocate interfaces */
687 	if(num_ifs == 0) {
688 		if(do_ip4 && !setup_if(&outnet->ip4_ifs[0], "0.0.0.0",
689 			availports, numavailports, num_ports)) {
690 			log_err("malloc failed");
691 			outside_network_delete(outnet);
692 			return NULL;
693 		}
694 		if(do_ip6 && !setup_if(&outnet->ip6_ifs[0], "::",
695 			availports, numavailports, num_ports)) {
696 			log_err("malloc failed");
697 			outside_network_delete(outnet);
698 			return NULL;
699 		}
700 	} else {
701 		size_t done_4 = 0, done_6 = 0;
702 		int i;
703 		for(i=0; i<num_ifs; i++) {
704 			if(str_is_ip6(ifs[i]) && do_ip6) {
705 				if(!setup_if(&outnet->ip6_ifs[done_6], ifs[i],
706 					availports, numavailports, num_ports)){
707 					log_err("malloc failed");
708 					outside_network_delete(outnet);
709 					return NULL;
710 				}
711 				done_6++;
712 			}
713 			if(!str_is_ip6(ifs[i]) && do_ip4) {
714 				if(!setup_if(&outnet->ip4_ifs[done_4], ifs[i],
715 					availports, numavailports, num_ports)){
716 					log_err("malloc failed");
717 					outside_network_delete(outnet);
718 					return NULL;
719 				}
720 				done_4++;
721 			}
722 		}
723 	}
724 	return outnet;
725 }
726 
727 /** helper pending delete */
728 static void
729 pending_node_del(rbnode_t* node, void* arg)
730 {
731 	struct pending* pend = (struct pending*)node;
732 	struct outside_network* outnet = (struct outside_network*)arg;
733 	pending_delete(outnet, pend);
734 }
735 
736 /** helper serviced delete */
737 static void
738 serviced_node_del(rbnode_t* node, void* ATTR_UNUSED(arg))
739 {
740 	struct serviced_query* sq = (struct serviced_query*)node;
741 	struct service_callback* p = sq->cblist, *np;
742 	free(sq->qbuf);
743 	free(sq->zone);
744 	while(p) {
745 		np = p->next;
746 		free(p);
747 		p = np;
748 	}
749 	free(sq);
750 }
751 
752 void
753 outside_network_quit_prepare(struct outside_network* outnet)
754 {
755 	if(!outnet)
756 		return;
757 	/* prevent queued items from being sent */
758 	outnet->want_to_quit = 1;
759 }
760 
761 void
762 outside_network_delete(struct outside_network* outnet)
763 {
764 	if(!outnet)
765 		return;
766 	outnet->want_to_quit = 1;
767 	/* check every element, since we can be called on malloc error */
768 	if(outnet->pending) {
769 		/* free pending elements, but do no unlink from tree. */
770 		traverse_postorder(outnet->pending, pending_node_del, NULL);
771 		free(outnet->pending);
772 	}
773 	if(outnet->serviced) {
774 		traverse_postorder(outnet->serviced, serviced_node_del, NULL);
775 		free(outnet->serviced);
776 	}
777 	if(outnet->udp_buff)
778 		sldns_buffer_free(outnet->udp_buff);
779 	if(outnet->unused_fds) {
780 		struct port_comm* p = outnet->unused_fds, *np;
781 		while(p) {
782 			np = p->next;
783 			comm_point_delete(p->cp);
784 			free(p);
785 			p = np;
786 		}
787 		outnet->unused_fds = NULL;
788 	}
789 	if(outnet->ip4_ifs) {
790 		int i, k;
791 		for(i=0; i<outnet->num_ip4; i++) {
792 			for(k=0; k<outnet->ip4_ifs[i].inuse; k++) {
793 				struct port_comm* pc = outnet->ip4_ifs[i].
794 					out[k];
795 				comm_point_delete(pc->cp);
796 				free(pc);
797 			}
798 			free(outnet->ip4_ifs[i].avail_ports);
799 			free(outnet->ip4_ifs[i].out);
800 		}
801 		free(outnet->ip4_ifs);
802 	}
803 	if(outnet->ip6_ifs) {
804 		int i, k;
805 		for(i=0; i<outnet->num_ip6; i++) {
806 			for(k=0; k<outnet->ip6_ifs[i].inuse; k++) {
807 				struct port_comm* pc = outnet->ip6_ifs[i].
808 					out[k];
809 				comm_point_delete(pc->cp);
810 				free(pc);
811 			}
812 			free(outnet->ip6_ifs[i].avail_ports);
813 			free(outnet->ip6_ifs[i].out);
814 		}
815 		free(outnet->ip6_ifs);
816 	}
817 	if(outnet->tcp_conns) {
818 		size_t i;
819 		for(i=0; i<outnet->num_tcp; i++)
820 			if(outnet->tcp_conns[i]) {
821 				comm_point_delete(outnet->tcp_conns[i]->c);
822 				waiting_tcp_delete(outnet->tcp_conns[i]->query);
823 				free(outnet->tcp_conns[i]);
824 			}
825 		free(outnet->tcp_conns);
826 	}
827 	if(outnet->tcp_wait_first) {
828 		struct waiting_tcp* p = outnet->tcp_wait_first, *np;
829 		while(p) {
830 			np = p->next_waiting;
831 			waiting_tcp_delete(p);
832 			p = np;
833 		}
834 	}
835 	if(outnet->udp_wait_first) {
836 		struct pending* p = outnet->udp_wait_first, *np;
837 		while(p) {
838 			np = p->next_waiting;
839 			pending_delete(NULL, p);
840 			p = np;
841 		}
842 	}
843 	free(outnet);
844 }
845 
846 void
847 pending_delete(struct outside_network* outnet, struct pending* p)
848 {
849 	if(!p)
850 		return;
851 	if(outnet && outnet->udp_wait_first &&
852 		(p->next_waiting || p == outnet->udp_wait_last) ) {
853 		/* delete from waiting list, if it is in the waiting list */
854 		struct pending* prev = NULL, *x = outnet->udp_wait_first;
855 		while(x && x != p) {
856 			prev = x;
857 			x = x->next_waiting;
858 		}
859 		if(x) {
860 			log_assert(x == p);
861 			if(prev)
862 				prev->next_waiting = p->next_waiting;
863 			else	outnet->udp_wait_first = p->next_waiting;
864 			if(outnet->udp_wait_last == p)
865 				outnet->udp_wait_last = prev;
866 		}
867 	}
868 	if(outnet) {
869 		(void)rbtree_delete(outnet->pending, p->node.key);
870 	}
871 	if(p->timer)
872 		comm_timer_delete(p->timer);
873 	free(p->pkt);
874 	free(p);
875 }
876 
877 /**
878  * Try to open a UDP socket for outgoing communication.
879  * Sets sockets options as needed.
880  * @param addr: socket address.
881  * @param addrlen: length of address.
882  * @param port: port override for addr.
883  * @param inuse: if -1 is returned, this bool means the port was in use.
884  * @return fd or -1
885  */
886 static int
887 udp_sockport(struct sockaddr_storage* addr, socklen_t addrlen, int port,
888 	int* inuse)
889 {
890 	int fd, noproto;
891 	if(addr_is_ip6(addr, addrlen)) {
892 		struct sockaddr_in6* sa = (struct sockaddr_in6*)addr;
893 		sa->sin6_port = (in_port_t)htons((uint16_t)port);
894 		fd = create_udp_sock(AF_INET6, SOCK_DGRAM,
895 			(struct sockaddr*)addr, addrlen, 1, inuse, &noproto,
896 			0, 0, 0, NULL, 0);
897 	} else {
898 		struct sockaddr_in* sa = (struct sockaddr_in*)addr;
899 		sa->sin_port = (in_port_t)htons((uint16_t)port);
900 		fd = create_udp_sock(AF_INET, SOCK_DGRAM,
901 			(struct sockaddr*)addr, addrlen, 1, inuse, &noproto,
902 			0, 0, 0, NULL, 0);
903 	}
904 	return fd;
905 }
906 
907 /** Select random ID */
908 static int
909 select_id(struct outside_network* outnet, struct pending* pend,
910 	sldns_buffer* packet)
911 {
912 	int id_tries = 0;
913 	pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
914 	LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
915 
916 	/* insert in tree */
917 	pend->node.key = pend;
918 	while(!rbtree_insert(outnet->pending, &pend->node)) {
919 		/* change ID to avoid collision */
920 		pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
921 		LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
922 		id_tries++;
923 		if(id_tries == MAX_ID_RETRY) {
924 			pend->id=99999; /* non existant ID */
925 			log_err("failed to generate unique ID, drop msg");
926 			return 0;
927 		}
928 	}
929 	verbose(VERB_ALGO, "inserted new pending reply id=%4.4x", pend->id);
930 	return 1;
931 }
932 
933 /** Select random interface and port */
934 static int
935 select_ifport(struct outside_network* outnet, struct pending* pend,
936 	int num_if, struct port_if* ifs)
937 {
938 	int my_if, my_port, fd, portno, inuse, tries=0;
939 	struct port_if* pif;
940 	/* randomly select interface and port */
941 	if(num_if == 0) {
942 		verbose(VERB_QUERY, "Need to send query but have no "
943 			"outgoing interfaces of that family");
944 		return 0;
945 	}
946 	log_assert(outnet->unused_fds);
947 	tries = 0;
948 	while(1) {
949 		my_if = ub_random_max(outnet->rnd, num_if);
950 		pif = &ifs[my_if];
951 		my_port = ub_random_max(outnet->rnd, pif->avail_total);
952 		if(my_port < pif->inuse) {
953 			/* port already open */
954 			pend->pc = pif->out[my_port];
955 			verbose(VERB_ALGO, "using UDP if=%d port=%d",
956 				my_if, pend->pc->number);
957 			break;
958 		}
959 		/* try to open new port, if fails, loop to try again */
960 		log_assert(pif->inuse < pif->maxout);
961 		portno = pif->avail_ports[my_port - pif->inuse];
962 		fd = udp_sockport(&pif->addr, pif->addrlen, portno, &inuse);
963 		if(fd == -1 && !inuse) {
964 			/* nonrecoverable error making socket */
965 			return 0;
966 		}
967 		if(fd != -1) {
968 			verbose(VERB_ALGO, "opened UDP if=%d port=%d",
969 				my_if, portno);
970 			/* grab fd */
971 			pend->pc = outnet->unused_fds;
972 			outnet->unused_fds = pend->pc->next;
973 
974 			/* setup portcomm */
975 			pend->pc->next = NULL;
976 			pend->pc->number = portno;
977 			pend->pc->pif = pif;
978 			pend->pc->index = pif->inuse;
979 			pend->pc->num_outstanding = 0;
980 			comm_point_start_listening(pend->pc->cp, fd, -1);
981 
982 			/* grab port in interface */
983 			pif->out[pif->inuse] = pend->pc;
984 			pif->avail_ports[my_port - pif->inuse] =
985 				pif->avail_ports[pif->avail_total-pif->inuse-1];
986 			pif->inuse++;
987 			break;
988 		}
989 		/* failed, already in use */
990 		verbose(VERB_QUERY, "port %d in use, trying another", portno);
991 		tries++;
992 		if(tries == MAX_PORT_RETRY) {
993 			log_err("failed to find an open port, drop msg");
994 			return 0;
995 		}
996 	}
997 	log_assert(pend->pc);
998 	pend->pc->num_outstanding++;
999 
1000 	return 1;
1001 }
1002 
1003 static int
1004 randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout)
1005 {
1006 	struct timeval tv;
1007 	struct outside_network* outnet = pend->sq->outnet;
1008 
1009 	/* select id */
1010 	if(!select_id(outnet, pend, packet)) {
1011 		return 0;
1012 	}
1013 
1014 	/* select src_if, port */
1015 	if(addr_is_ip6(&pend->addr, pend->addrlen)) {
1016 		if(!select_ifport(outnet, pend,
1017 			outnet->num_ip6, outnet->ip6_ifs))
1018 			return 0;
1019 	} else {
1020 		if(!select_ifport(outnet, pend,
1021 			outnet->num_ip4, outnet->ip4_ifs))
1022 			return 0;
1023 	}
1024 	log_assert(pend->pc && pend->pc->cp);
1025 
1026 	/* send it over the commlink */
1027 	if(!comm_point_send_udp_msg(pend->pc->cp, packet,
1028 		(struct sockaddr*)&pend->addr, pend->addrlen)) {
1029 		portcomm_loweruse(outnet, pend->pc);
1030 		return 0;
1031 	}
1032 
1033 	/* system calls to set timeout after sending UDP to make roundtrip
1034 	   smaller. */
1035 #ifndef S_SPLINT_S
1036 	tv.tv_sec = timeout/1000;
1037 	tv.tv_usec = (timeout%1000)*1000;
1038 #endif
1039 	comm_timer_set(pend->timer, &tv);
1040 
1041 #ifdef USE_DNSTAP
1042 	if(outnet->dtenv &&
1043 	   (outnet->dtenv->log_resolver_query_messages ||
1044 	    outnet->dtenv->log_forwarder_query_messages))
1045 		dt_msg_send_outside_query(outnet->dtenv, &pend->addr, comm_udp,
1046 		pend->sq->zone, pend->sq->zonelen, packet);
1047 #endif
1048 	return 1;
1049 }
1050 
1051 struct pending*
1052 pending_udp_query(struct serviced_query* sq, struct sldns_buffer* packet,
1053 	int timeout, comm_point_callback_t* cb, void* cb_arg)
1054 {
1055 	struct pending* pend = (struct pending*)calloc(1, sizeof(*pend));
1056 	if(!pend) return NULL;
1057 	pend->outnet = sq->outnet;
1058 	pend->sq = sq;
1059 	pend->addrlen = sq->addrlen;
1060 	memmove(&pend->addr, &sq->addr, sq->addrlen);
1061 	pend->cb = cb;
1062 	pend->cb_arg = cb_arg;
1063 	pend->node.key = pend;
1064 	pend->timer = comm_timer_create(sq->outnet->base, pending_udp_timer_cb,
1065 		pend);
1066 	if(!pend->timer) {
1067 		free(pend);
1068 		return NULL;
1069 	}
1070 
1071 	if(sq->outnet->unused_fds == NULL) {
1072 		/* no unused fd, cannot create a new port (randomly) */
1073 		verbose(VERB_ALGO, "no fds available, udp query waiting");
1074 		pend->timeout = timeout;
1075 		pend->pkt_len = sldns_buffer_limit(packet);
1076 		pend->pkt = (uint8_t*)memdup(sldns_buffer_begin(packet),
1077 			pend->pkt_len);
1078 		if(!pend->pkt) {
1079 			comm_timer_delete(pend->timer);
1080 			free(pend);
1081 			return NULL;
1082 		}
1083 		/* put at end of waiting list */
1084 		if(sq->outnet->udp_wait_last)
1085 			sq->outnet->udp_wait_last->next_waiting = pend;
1086 		else
1087 			sq->outnet->udp_wait_first = pend;
1088 		sq->outnet->udp_wait_last = pend;
1089 		return pend;
1090 	}
1091 	if(!randomize_and_send_udp(pend, packet, timeout)) {
1092 		pending_delete(sq->outnet, pend);
1093 		return NULL;
1094 	}
1095 	return pend;
1096 }
1097 
1098 void
1099 outnet_tcptimer(void* arg)
1100 {
1101 	struct waiting_tcp* w = (struct waiting_tcp*)arg;
1102 	struct outside_network* outnet = w->outnet;
1103 	comm_point_callback_t* cb;
1104 	void* cb_arg;
1105 	if(w->pkt) {
1106 		/* it is on the waiting list */
1107 		waiting_list_remove(outnet, w);
1108 	} else {
1109 		/* it was in use */
1110 		struct pending_tcp* pend=(struct pending_tcp*)w->next_waiting;
1111 		comm_point_close(pend->c);
1112 		pend->query = NULL;
1113 		pend->next_free = outnet->tcp_free;
1114 		outnet->tcp_free = pend;
1115 	}
1116 	cb = w->cb;
1117 	cb_arg = w->cb_arg;
1118 	waiting_tcp_delete(w);
1119 	fptr_ok(fptr_whitelist_pending_tcp(cb));
1120 	(void)(*cb)(NULL, cb_arg, NETEVENT_TIMEOUT, NULL);
1121 	use_free_buffer(outnet);
1122 }
1123 
1124 struct waiting_tcp*
1125 pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet,
1126 	int timeout, comm_point_callback_t* callback, void* callback_arg)
1127 {
1128 	struct pending_tcp* pend = sq->outnet->tcp_free;
1129 	struct waiting_tcp* w;
1130 	struct timeval tv;
1131 	uint16_t id;
1132 	/* if no buffer is free allocate space to store query */
1133 	w = (struct waiting_tcp*)malloc(sizeof(struct waiting_tcp)
1134 		+ (pend?0:sldns_buffer_limit(packet)));
1135 	if(!w) {
1136 		return NULL;
1137 	}
1138 	if(!(w->timer = comm_timer_create(sq->outnet->base, outnet_tcptimer, w))) {
1139 		free(w);
1140 		return NULL;
1141 	}
1142 	w->pkt = NULL;
1143 	w->pkt_len = 0;
1144 	id = ((unsigned)ub_random(sq->outnet->rnd)>>8) & 0xffff;
1145 	LDNS_ID_SET(sldns_buffer_begin(packet), id);
1146 	memcpy(&w->addr, &sq->addr, sq->addrlen);
1147 	w->addrlen = sq->addrlen;
1148 	w->outnet = sq->outnet;
1149 	w->cb = callback;
1150 	w->cb_arg = callback_arg;
1151 	w->ssl_upstream = sq->ssl_upstream;
1152 #ifndef S_SPLINT_S
1153 	tv.tv_sec = timeout;
1154 	tv.tv_usec = 0;
1155 #endif
1156 	comm_timer_set(w->timer, &tv);
1157 	if(pend) {
1158 		/* we have a buffer available right now */
1159 		if(!outnet_tcp_take_into_use(w, sldns_buffer_begin(packet),
1160 			sldns_buffer_limit(packet))) {
1161 			waiting_tcp_delete(w);
1162 			return NULL;
1163 		}
1164 #ifdef USE_DNSTAP
1165 		if(sq->outnet->dtenv &&
1166 		   (sq->outnet->dtenv->log_resolver_query_messages ||
1167 		    sq->outnet->dtenv->log_forwarder_query_messages))
1168 		dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr,
1169 		comm_tcp, sq->zone, sq->zonelen, packet);
1170 #endif
1171 	} else {
1172 		/* queue up */
1173 		w->pkt = (uint8_t*)w + sizeof(struct waiting_tcp);
1174 		w->pkt_len = sldns_buffer_limit(packet);
1175 		memmove(w->pkt, sldns_buffer_begin(packet), w->pkt_len);
1176 		w->next_waiting = NULL;
1177 		if(sq->outnet->tcp_wait_last)
1178 			sq->outnet->tcp_wait_last->next_waiting = w;
1179 		else	sq->outnet->tcp_wait_first = w;
1180 		sq->outnet->tcp_wait_last = w;
1181 	}
1182 	return w;
1183 }
1184 
1185 /** create query for serviced queries */
1186 static void
1187 serviced_gen_query(sldns_buffer* buff, uint8_t* qname, size_t qnamelen,
1188 	uint16_t qtype, uint16_t qclass, uint16_t flags)
1189 {
1190 	sldns_buffer_clear(buff);
1191 	/* skip id */
1192 	sldns_buffer_write_u16(buff, flags);
1193 	sldns_buffer_write_u16(buff, 1); /* qdcount */
1194 	sldns_buffer_write_u16(buff, 0); /* ancount */
1195 	sldns_buffer_write_u16(buff, 0); /* nscount */
1196 	sldns_buffer_write_u16(buff, 0); /* arcount */
1197 	sldns_buffer_write(buff, qname, qnamelen);
1198 	sldns_buffer_write_u16(buff, qtype);
1199 	sldns_buffer_write_u16(buff, qclass);
1200 	sldns_buffer_flip(buff);
1201 }
1202 
1203 /** lookup serviced query in serviced query rbtree */
1204 static struct serviced_query*
1205 lookup_serviced(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
1206 	struct sockaddr_storage* addr, socklen_t addrlen)
1207 {
1208 	struct serviced_query key;
1209 	key.node.key = &key;
1210 	key.qbuf = sldns_buffer_begin(buff);
1211 	key.qbuflen = sldns_buffer_limit(buff);
1212 	key.dnssec = dnssec;
1213 	memcpy(&key.addr, addr, addrlen);
1214 	key.addrlen = addrlen;
1215 	key.outnet = outnet;
1216 	return (struct serviced_query*)rbtree_search(outnet->serviced, &key);
1217 }
1218 
1219 /** Create new serviced entry */
1220 static struct serviced_query*
1221 serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
1222 	int want_dnssec, int nocaps, int tcp_upstream, int ssl_upstream,
1223 	struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone,
1224 	size_t zonelen, int qtype)
1225 {
1226 	struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq));
1227 #ifdef UNBOUND_DEBUG
1228 	rbnode_t* ins;
1229 #endif
1230 	if(!sq)
1231 		return NULL;
1232 	sq->node.key = sq;
1233 	sq->qbuf = memdup(sldns_buffer_begin(buff), sldns_buffer_limit(buff));
1234 	if(!sq->qbuf) {
1235 		free(sq);
1236 		return NULL;
1237 	}
1238 	sq->qbuflen = sldns_buffer_limit(buff);
1239 	sq->zone = memdup(zone, zonelen);
1240 	if(!sq->zone) {
1241 		free(sq->qbuf);
1242 		free(sq);
1243 		return NULL;
1244 	}
1245 	sq->zonelen = zonelen;
1246 	sq->qtype = qtype;
1247 	sq->dnssec = dnssec;
1248 	sq->want_dnssec = want_dnssec;
1249 	sq->nocaps = nocaps;
1250 	sq->tcp_upstream = tcp_upstream;
1251 	sq->ssl_upstream = ssl_upstream;
1252 	memcpy(&sq->addr, addr, addrlen);
1253 	sq->addrlen = addrlen;
1254 	sq->outnet = outnet;
1255 	sq->cblist = NULL;
1256 	sq->pending = NULL;
1257 	sq->status = serviced_initial;
1258 	sq->retry = 0;
1259 	sq->to_be_deleted = 0;
1260 #ifdef UNBOUND_DEBUG
1261 	ins =
1262 #else
1263 	(void)
1264 #endif
1265 	rbtree_insert(outnet->serviced, &sq->node);
1266 	log_assert(ins != NULL); /* must not be already present */
1267 	return sq;
1268 }
1269 
1270 /** remove waiting tcp from the outnet waiting list */
1271 static void
1272 waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w)
1273 {
1274 	struct waiting_tcp* p = outnet->tcp_wait_first, *prev = NULL;
1275 	while(p) {
1276 		if(p == w) {
1277 			/* remove w */
1278 			if(prev)
1279 				prev->next_waiting = w->next_waiting;
1280 			else	outnet->tcp_wait_first = w->next_waiting;
1281 			if(outnet->tcp_wait_last == w)
1282 				outnet->tcp_wait_last = prev;
1283 			return;
1284 		}
1285 		prev = p;
1286 		p = p->next_waiting;
1287 	}
1288 }
1289 
1290 /** cleanup serviced query entry */
1291 static void
1292 serviced_delete(struct serviced_query* sq)
1293 {
1294 	if(sq->pending) {
1295 		/* clear up the pending query */
1296 		if(sq->status == serviced_query_UDP_EDNS ||
1297 			sq->status == serviced_query_UDP ||
1298 			sq->status == serviced_query_PROBE_EDNS ||
1299 			sq->status == serviced_query_UDP_EDNS_FRAG ||
1300 			sq->status == serviced_query_UDP_EDNS_fallback) {
1301 			struct pending* p = (struct pending*)sq->pending;
1302 			if(p->pc)
1303 				portcomm_loweruse(sq->outnet, p->pc);
1304 			pending_delete(sq->outnet, p);
1305 			/* this call can cause reentrant calls back into the
1306 			 * mesh */
1307 			outnet_send_wait_udp(sq->outnet);
1308 		} else {
1309 			struct waiting_tcp* p = (struct waiting_tcp*)
1310 				sq->pending;
1311 			if(p->pkt == NULL) {
1312 				decomission_pending_tcp(sq->outnet,
1313 					(struct pending_tcp*)p->next_waiting);
1314 			} else {
1315 				waiting_list_remove(sq->outnet, p);
1316 				waiting_tcp_delete(p);
1317 			}
1318 		}
1319 	}
1320 	/* does not delete from tree, caller has to do that */
1321 	serviced_node_del(&sq->node, NULL);
1322 }
1323 
1324 /** perturb a dname capitalization randomly */
1325 static void
1326 serviced_perturb_qname(struct ub_randstate* rnd, uint8_t* qbuf, size_t len)
1327 {
1328 	uint8_t lablen;
1329 	uint8_t* d = qbuf + 10;
1330 	long int random = 0;
1331 	int bits = 0;
1332 	log_assert(len >= 10 + 5 /* offset qname, root, qtype, qclass */);
1333 	lablen = *d++;
1334 	while(lablen) {
1335 		while(lablen--) {
1336 			/* only perturb A-Z, a-z */
1337 			if(isalpha((unsigned char)*d)) {
1338 				/* get a random bit */
1339 				if(bits == 0) {
1340 					random = ub_random(rnd);
1341 					bits = 30;
1342 				}
1343 				if(random & 0x1) {
1344 					*d = (uint8_t)toupper((unsigned char)*d);
1345 				} else {
1346 					*d = (uint8_t)tolower((unsigned char)*d);
1347 				}
1348 				random >>= 1;
1349 				bits--;
1350 			}
1351 			d++;
1352 		}
1353 		lablen = *d++;
1354 	}
1355 	if(verbosity >= VERB_ALGO) {
1356 		char buf[LDNS_MAX_DOMAINLEN+1];
1357 		dname_str(qbuf+10, buf);
1358 		verbose(VERB_ALGO, "qname perturbed to %s", buf);
1359 	}
1360 }
1361 
1362 /** put serviced query into a buffer */
1363 static void
1364 serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns)
1365 {
1366 	/* if we are using 0x20 bits for ID randomness, perturb them */
1367 	if(sq->outnet->use_caps_for_id && !sq->nocaps) {
1368 		serviced_perturb_qname(sq->outnet->rnd, sq->qbuf, sq->qbuflen);
1369 	}
1370 	/* generate query */
1371 	sldns_buffer_clear(buff);
1372 	sldns_buffer_write_u16(buff, 0); /* id placeholder */
1373 	sldns_buffer_write(buff, sq->qbuf, sq->qbuflen);
1374 	sldns_buffer_flip(buff);
1375 	if(with_edns) {
1376 		/* add edns section */
1377 		struct edns_data edns;
1378 		edns.edns_present = 1;
1379 		edns.ext_rcode = 0;
1380 		edns.edns_version = EDNS_ADVERTISED_VERSION;
1381 		if(sq->status == serviced_query_UDP_EDNS_FRAG) {
1382 			if(addr_is_ip6(&sq->addr, sq->addrlen)) {
1383 				if(EDNS_FRAG_SIZE_IP6 < EDNS_ADVERTISED_SIZE)
1384 					edns.udp_size = EDNS_FRAG_SIZE_IP6;
1385 				else	edns.udp_size = EDNS_ADVERTISED_SIZE;
1386 			} else {
1387 				if(EDNS_FRAG_SIZE_IP4 < EDNS_ADVERTISED_SIZE)
1388 					edns.udp_size = EDNS_FRAG_SIZE_IP4;
1389 				else	edns.udp_size = EDNS_ADVERTISED_SIZE;
1390 			}
1391 		} else {
1392 			edns.udp_size = EDNS_ADVERTISED_SIZE;
1393 		}
1394 		edns.bits = 0;
1395 		if(sq->dnssec & EDNS_DO)
1396 			edns.bits = EDNS_DO;
1397 		if(sq->dnssec & BIT_CD)
1398 			LDNS_CD_SET(sldns_buffer_begin(buff));
1399 		attach_edns_record(buff, &edns);
1400 	}
1401 }
1402 
1403 /**
1404  * Perform serviced query UDP sending operation.
1405  * Sends UDP with EDNS, unless infra host marked non EDNS.
1406  * @param sq: query to send.
1407  * @param buff: buffer scratch space.
1408  * @return 0 on error.
1409  */
1410 static int
1411 serviced_udp_send(struct serviced_query* sq, sldns_buffer* buff)
1412 {
1413 	int rtt, vs;
1414 	uint8_t edns_lame_known;
1415 	time_t now = *sq->outnet->now_secs;
1416 
1417 	if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
1418 		sq->zonelen, now, &vs, &edns_lame_known, &rtt))
1419 		return 0;
1420 	sq->last_rtt = rtt;
1421 	verbose(VERB_ALGO, "EDNS lookup known=%d vs=%d", edns_lame_known, vs);
1422 	if(sq->status == serviced_initial) {
1423 		if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) {
1424 			/* perform EDNS lame probe - check if server is
1425 			 * EDNS lame (EDNS queries to it are dropped) */
1426 			verbose(VERB_ALGO, "serviced query: send probe to see "
1427 				" if use of EDNS causes timeouts");
1428 			/* even 700 msec may be too small */
1429 			rtt = 1000;
1430 			sq->status = serviced_query_PROBE_EDNS;
1431 		} else if(vs != -1) {
1432 			sq->status = serviced_query_UDP_EDNS;
1433 		} else {
1434 			sq->status = serviced_query_UDP;
1435 		}
1436 	}
1437 	serviced_encode(sq, buff, (sq->status == serviced_query_UDP_EDNS) ||
1438 		(sq->status == serviced_query_UDP_EDNS_FRAG));
1439 	sq->last_sent_time = *sq->outnet->now_tv;
1440 	sq->edns_lame_known = (int)edns_lame_known;
1441 	verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt);
1442 	sq->pending = pending_udp_query(sq, buff, rtt,
1443 		serviced_udp_callback, sq);
1444 	if(!sq->pending)
1445 		return 0;
1446 	return 1;
1447 }
1448 
1449 /** check that perturbed qname is identical */
1450 static int
1451 serviced_check_qname(sldns_buffer* pkt, uint8_t* qbuf, size_t qbuflen)
1452 {
1453 	uint8_t* d1 = sldns_buffer_at(pkt, 12);
1454 	uint8_t* d2 = qbuf+10;
1455 	uint8_t len1, len2;
1456 	int count = 0;
1457 	log_assert(qbuflen >= 15 /* 10 header, root, type, class */);
1458 	len1 = *d1++;
1459 	len2 = *d2++;
1460 	if(sldns_buffer_limit(pkt) < 12+1+4) /* packet too small for qname */
1461 		return 0;
1462 	while(len1 != 0 || len2 != 0) {
1463 		if(LABEL_IS_PTR(len1)) {
1464 			d1 = sldns_buffer_at(pkt, PTR_OFFSET(len1, *d1));
1465 			if(d1 >= sldns_buffer_at(pkt, sldns_buffer_limit(pkt)))
1466 				return 0;
1467 			len1 = *d1++;
1468 			if(count++ > MAX_COMPRESS_PTRS)
1469 				return 0;
1470 			continue;
1471 		}
1472 		if(d2 > qbuf+qbuflen)
1473 			return 0;
1474 		if(len1 != len2)
1475 			return 0;
1476 		if(len1 > LDNS_MAX_LABELLEN)
1477 			return 0;
1478 		log_assert(len1 <= LDNS_MAX_LABELLEN);
1479 		log_assert(len2 <= LDNS_MAX_LABELLEN);
1480 		log_assert(len1 == len2 && len1 != 0);
1481 		/* compare the labels - bitwise identical */
1482 		if(memcmp(d1, d2, len1) != 0)
1483 			return 0;
1484 		d1 += len1;
1485 		d2 += len2;
1486 		len1 = *d1++;
1487 		len2 = *d2++;
1488 	}
1489 	return 1;
1490 }
1491 
1492 /** call the callbacks for a serviced query */
1493 static void
1494 serviced_callbacks(struct serviced_query* sq, int error, struct comm_point* c,
1495 	struct comm_reply* rep)
1496 {
1497 	struct service_callback* p;
1498 	int dobackup = (sq->cblist && sq->cblist->next); /* >1 cb*/
1499 	uint8_t *backup_p = NULL;
1500 	size_t backlen = 0;
1501 #ifdef UNBOUND_DEBUG
1502 	rbnode_t* rem =
1503 #else
1504 	(void)
1505 #endif
1506 	/* remove from tree, and schedule for deletion, so that callbacks
1507 	 * can safely deregister themselves and even create new serviced
1508 	 * queries that are identical to this one. */
1509 	rbtree_delete(sq->outnet->serviced, sq);
1510 	log_assert(rem); /* should have been present */
1511 	sq->to_be_deleted = 1;
1512 	verbose(VERB_ALGO, "svcd callbacks start");
1513 	if(sq->outnet->use_caps_for_id && error == NETEVENT_NOERROR && c &&
1514 		!sq->nocaps) {
1515 		/* noerror and nxdomain must have a qname in reply */
1516 		if(sldns_buffer_read_u16_at(c->buffer, 4) == 0 &&
1517 			(LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1518 				== LDNS_RCODE_NOERROR ||
1519 			 LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1520 				== LDNS_RCODE_NXDOMAIN)) {
1521 			verbose(VERB_DETAIL, "no qname in reply to check 0x20ID");
1522 			log_addr(VERB_DETAIL, "from server",
1523 				&sq->addr, sq->addrlen);
1524 			log_buf(VERB_DETAIL, "for packet", c->buffer);
1525 			error = NETEVENT_CLOSED;
1526 			c = NULL;
1527 		} else if(sldns_buffer_read_u16_at(c->buffer, 4) > 0 &&
1528 			!serviced_check_qname(c->buffer, sq->qbuf,
1529 			sq->qbuflen)) {
1530 			verbose(VERB_DETAIL, "wrong 0x20-ID in reply qname");
1531 			log_addr(VERB_DETAIL, "from server",
1532 				&sq->addr, sq->addrlen);
1533 			log_buf(VERB_DETAIL, "for packet", c->buffer);
1534 			error = NETEVENT_CAPSFAIL;
1535 			/* and cleanup too */
1536 			pkt_dname_tolower(c->buffer,
1537 				sldns_buffer_at(c->buffer, 12));
1538 		} else {
1539 			verbose(VERB_ALGO, "good 0x20-ID in reply qname");
1540 			/* cleanup caps, prettier cache contents. */
1541 			pkt_dname_tolower(c->buffer,
1542 				sldns_buffer_at(c->buffer, 12));
1543 		}
1544 	}
1545 	if(dobackup && c) {
1546 		/* make a backup of the query, since the querystate processing
1547 		 * may send outgoing queries that overwrite the buffer.
1548 		 * use secondary buffer to store the query.
1549 		 * This is a data copy, but faster than packet to server */
1550 		backlen = sldns_buffer_limit(c->buffer);
1551 		backup_p = memdup(sldns_buffer_begin(c->buffer), backlen);
1552 		if(!backup_p) {
1553 			log_err("malloc failure in serviced query callbacks");
1554 			error = NETEVENT_CLOSED;
1555 			c = NULL;
1556 		}
1557 		sq->outnet->svcd_overhead = backlen;
1558 	}
1559 	/* test the actual sq->cblist, because the next elem could be deleted*/
1560 	while((p=sq->cblist) != NULL) {
1561 		sq->cblist = p->next; /* remove this element */
1562 		if(dobackup && c) {
1563 			sldns_buffer_clear(c->buffer);
1564 			sldns_buffer_write(c->buffer, backup_p, backlen);
1565 			sldns_buffer_flip(c->buffer);
1566 		}
1567 		fptr_ok(fptr_whitelist_serviced_query(p->cb));
1568 		(void)(*p->cb)(c, p->cb_arg, error, rep);
1569 		free(p);
1570 	}
1571 	if(backup_p) {
1572 		free(backup_p);
1573 		sq->outnet->svcd_overhead = 0;
1574 	}
1575 	verbose(VERB_ALGO, "svcd callbacks end");
1576 	log_assert(sq->cblist == NULL);
1577 	serviced_delete(sq);
1578 }
1579 
1580 int
1581 serviced_tcp_callback(struct comm_point* c, void* arg, int error,
1582         struct comm_reply* rep)
1583 {
1584 	struct serviced_query* sq = (struct serviced_query*)arg;
1585 	struct comm_reply r2;
1586 	sq->pending = NULL; /* removed after this callback */
1587 	if(error != NETEVENT_NOERROR)
1588 		log_addr(VERB_QUERY, "tcp error for address",
1589 			&sq->addr, sq->addrlen);
1590 	if(error==NETEVENT_NOERROR)
1591 		infra_update_tcp_works(sq->outnet->infra, &sq->addr,
1592 			sq->addrlen, sq->zone, sq->zonelen);
1593 #ifdef USE_DNSTAP
1594 	if(error==NETEVENT_NOERROR && sq->outnet->dtenv &&
1595 	   (sq->outnet->dtenv->log_resolver_response_messages ||
1596 	    sq->outnet->dtenv->log_forwarder_response_messages))
1597 		dt_msg_send_outside_response(sq->outnet->dtenv, &sq->addr,
1598 		c->type, sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
1599 		&sq->last_sent_time, sq->outnet->now_tv, c->buffer);
1600 #endif
1601 	if(error==NETEVENT_NOERROR && sq->status == serviced_query_TCP_EDNS &&
1602 		(LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) ==
1603 		LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(sldns_buffer_begin(
1604 		c->buffer)) == LDNS_RCODE_NOTIMPL) ) {
1605 		/* attempt to fallback to nonEDNS */
1606 		sq->status = serviced_query_TCP_EDNS_fallback;
1607 		serviced_tcp_initiate(sq, c->buffer);
1608 		return 0;
1609 	} else if(error==NETEVENT_NOERROR &&
1610 		sq->status == serviced_query_TCP_EDNS_fallback &&
1611 			(LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) ==
1612 			LDNS_RCODE_NOERROR || LDNS_RCODE_WIRE(
1613 			sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NXDOMAIN
1614 			|| LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1615 			== LDNS_RCODE_YXDOMAIN)) {
1616 		/* the fallback produced a result that looks promising, note
1617 		 * that this server should be approached without EDNS */
1618 		/* only store noEDNS in cache if domain is noDNSSEC */
1619 		if(!sq->want_dnssec)
1620 		  if(!infra_edns_update(sq->outnet->infra, &sq->addr,
1621 			sq->addrlen, sq->zone, sq->zonelen, -1,
1622 			*sq->outnet->now_secs))
1623 			log_err("Out of memory caching no edns for host");
1624 		sq->status = serviced_query_TCP;
1625 	}
1626 	if(sq->tcp_upstream || sq->ssl_upstream) {
1627 	    struct timeval now = *sq->outnet->now_tv;
1628 	    if(now.tv_sec > sq->last_sent_time.tv_sec ||
1629 		(now.tv_sec == sq->last_sent_time.tv_sec &&
1630 		now.tv_usec > sq->last_sent_time.tv_usec)) {
1631 		/* convert from microseconds to milliseconds */
1632 		int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
1633 		  + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
1634 		verbose(VERB_ALGO, "measured TCP-time at %d msec", roundtime);
1635 		log_assert(roundtime >= 0);
1636 		/* only store if less then AUTH_TIMEOUT seconds, it could be
1637 		 * huge due to system-hibernated and we woke up */
1638 		if(roundtime < TCP_AUTH_QUERY_TIMEOUT*1000) {
1639 		    if(!infra_rtt_update(sq->outnet->infra, &sq->addr,
1640 			sq->addrlen, sq->zone, sq->zonelen, sq->qtype,
1641 			roundtime, sq->last_rtt, (time_t)now.tv_sec))
1642 			log_err("out of memory noting rtt.");
1643 		}
1644 	    }
1645 	}
1646 	/* insert address into reply info */
1647 	if(!rep) {
1648 		/* create one if there isn't (on errors) */
1649 		rep = &r2;
1650 		r2.c = c;
1651 	}
1652 	memcpy(&rep->addr, &sq->addr, sq->addrlen);
1653 	rep->addrlen = sq->addrlen;
1654 	serviced_callbacks(sq, error, c, rep);
1655 	return 0;
1656 }
1657 
1658 static void
1659 serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff)
1660 {
1661 	verbose(VERB_ALGO, "initiate TCP query %s",
1662 		sq->status==serviced_query_TCP_EDNS?"EDNS":"");
1663 	serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
1664 	sq->last_sent_time = *sq->outnet->now_tv;
1665 	sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
1666 		serviced_tcp_callback, sq);
1667 	if(!sq->pending) {
1668 		/* delete from tree so that a retry by above layer does not
1669 		 * clash with this entry */
1670 		log_err("serviced_tcp_initiate: failed to send tcp query");
1671 		serviced_callbacks(sq, NETEVENT_CLOSED, NULL, NULL);
1672 	}
1673 }
1674 
1675 /** Send serviced query over TCP return false on initial failure */
1676 static int
1677 serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff)
1678 {
1679 	int vs, rtt;
1680 	uint8_t edns_lame_known;
1681 	if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
1682 		sq->zonelen, *sq->outnet->now_secs, &vs, &edns_lame_known,
1683 		&rtt))
1684 		return 0;
1685 	if(vs != -1)
1686 		sq->status = serviced_query_TCP_EDNS;
1687 	else 	sq->status = serviced_query_TCP;
1688 	serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
1689 	sq->last_sent_time = *sq->outnet->now_tv;
1690 	sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
1691 		serviced_tcp_callback, sq);
1692 	return sq->pending != NULL;
1693 }
1694 
1695 int
1696 serviced_udp_callback(struct comm_point* c, void* arg, int error,
1697         struct comm_reply* rep)
1698 {
1699 	struct serviced_query* sq = (struct serviced_query*)arg;
1700 	struct outside_network* outnet = sq->outnet;
1701 	struct timeval now = *sq->outnet->now_tv;
1702 	int fallback_tcp = 0;
1703 
1704 	sq->pending = NULL; /* removed after callback */
1705 	if(error == NETEVENT_TIMEOUT) {
1706 		int rto = 0;
1707 		if(sq->status == serviced_query_PROBE_EDNS) {
1708 			/* non-EDNS probe failed; we do not know its status,
1709 			 * keep trying with EDNS, timeout may not be caused
1710 			 * by EDNS. */
1711 			sq->status = serviced_query_UDP_EDNS;
1712 		}
1713 		if(sq->status == serviced_query_UDP_EDNS && sq->last_rtt < 5000) {
1714 			/* fallback to 1480/1280 */
1715 			sq->status = serviced_query_UDP_EDNS_FRAG;
1716 			log_name_addr(VERB_ALGO, "try edns1xx0", sq->qbuf+10,
1717 				&sq->addr, sq->addrlen);
1718 			if(!serviced_udp_send(sq, c->buffer)) {
1719 				serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1720 			}
1721 			return 0;
1722 		}
1723 		if(sq->status == serviced_query_UDP_EDNS_FRAG) {
1724 			/* fragmentation size did not fix it */
1725 			sq->status = serviced_query_UDP_EDNS;
1726 		}
1727 		sq->retry++;
1728 		if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
1729 			sq->zone, sq->zonelen, sq->qtype, -1, sq->last_rtt,
1730 			(time_t)now.tv_sec)))
1731 			log_err("out of memory in UDP exponential backoff");
1732 		if(sq->retry < OUTBOUND_UDP_RETRY) {
1733 			log_name_addr(VERB_ALGO, "retry query", sq->qbuf+10,
1734 				&sq->addr, sq->addrlen);
1735 			if(!serviced_udp_send(sq, c->buffer)) {
1736 				serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1737 			}
1738 			return 0;
1739 		}
1740 		if(rto >= RTT_MAX_TIMEOUT) {
1741 			fallback_tcp = 1;
1742 			/* UDP does not work, fallback to TCP below */
1743 		} else {
1744 			serviced_callbacks(sq, NETEVENT_TIMEOUT, c, rep);
1745 			return 0;
1746 		}
1747 	} else if(error != NETEVENT_NOERROR) {
1748 		/* udp returns error (due to no ID or interface available) */
1749 		serviced_callbacks(sq, error, c, rep);
1750 		return 0;
1751 	}
1752 #ifdef USE_DNSTAP
1753 	if(outnet->dtenv &&
1754 	   (outnet->dtenv->log_resolver_response_messages ||
1755 	    outnet->dtenv->log_forwarder_response_messages))
1756 		dt_msg_send_outside_response(outnet->dtenv, &sq->addr, c->type,
1757 		sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
1758 		&sq->last_sent_time, sq->outnet->now_tv, c->buffer);
1759 #endif
1760 	if(!fallback_tcp) {
1761 	    if( (sq->status == serviced_query_UDP_EDNS
1762 	        ||sq->status == serviced_query_UDP_EDNS_FRAG)
1763 		&& (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1764 			== LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(
1765 			sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL)) {
1766 		/* try to get an answer by falling back without EDNS */
1767 		verbose(VERB_ALGO, "serviced query: attempt without EDNS");
1768 		sq->status = serviced_query_UDP_EDNS_fallback;
1769 		sq->retry = 0;
1770 		if(!serviced_udp_send(sq, c->buffer)) {
1771 			serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1772 		}
1773 		return 0;
1774 	    } else if(sq->status == serviced_query_PROBE_EDNS) {
1775 		/* probe without EDNS succeeds, so we conclude that this
1776 		 * host likely has EDNS packets dropped */
1777 		log_addr(VERB_DETAIL, "timeouts, concluded that connection to "
1778 			"host drops EDNS packets", &sq->addr, sq->addrlen);
1779 		/* only store noEDNS in cache if domain is noDNSSEC */
1780 		if(!sq->want_dnssec)
1781 		  if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
1782 			sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
1783 			log_err("Out of memory caching no edns for host");
1784 		  }
1785 		sq->status = serviced_query_UDP;
1786 	    } else if(sq->status == serviced_query_UDP_EDNS &&
1787 		!sq->edns_lame_known) {
1788 		/* now we know that edns queries received answers store that */
1789 		log_addr(VERB_ALGO, "serviced query: EDNS works for",
1790 			&sq->addr, sq->addrlen);
1791 		if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
1792 			sq->zone, sq->zonelen, 0, (time_t)now.tv_sec)) {
1793 			log_err("Out of memory caching edns works");
1794 		}
1795 		sq->edns_lame_known = 1;
1796 	    } else if(sq->status == serviced_query_UDP_EDNS_fallback &&
1797 		!sq->edns_lame_known && (LDNS_RCODE_WIRE(
1798 		sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR ||
1799 		LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) ==
1800 		LDNS_RCODE_NXDOMAIN || LDNS_RCODE_WIRE(sldns_buffer_begin(
1801 		c->buffer)) == LDNS_RCODE_YXDOMAIN)) {
1802 		/* the fallback produced a result that looks promising, note
1803 		 * that this server should be approached without EDNS */
1804 		/* only store noEDNS in cache if domain is noDNSSEC */
1805 		if(!sq->want_dnssec) {
1806 		  log_addr(VERB_ALGO, "serviced query: EDNS fails for",
1807 			&sq->addr, sq->addrlen);
1808 		  if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
1809 			sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
1810 			log_err("Out of memory caching no edns for host");
1811 		  }
1812 		} else {
1813 		  log_addr(VERB_ALGO, "serviced query: EDNS fails, but "
1814 		  	"not stored because need DNSSEC for", &sq->addr,
1815 			sq->addrlen);
1816 		}
1817 		sq->status = serviced_query_UDP;
1818 	    }
1819 	    if(now.tv_sec > sq->last_sent_time.tv_sec ||
1820 		(now.tv_sec == sq->last_sent_time.tv_sec &&
1821 		now.tv_usec > sq->last_sent_time.tv_usec)) {
1822 		/* convert from microseconds to milliseconds */
1823 		int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
1824 		  + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
1825 		verbose(VERB_ALGO, "measured roundtrip at %d msec", roundtime);
1826 		log_assert(roundtime >= 0);
1827 		/* in case the system hibernated, do not enter a huge value,
1828 		 * above this value gives trouble with server selection */
1829 		if(roundtime < 60000) {
1830 		    if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
1831 			sq->zone, sq->zonelen, sq->qtype, roundtime,
1832 			sq->last_rtt, (time_t)now.tv_sec))
1833 			log_err("out of memory noting rtt.");
1834 		}
1835 	    }
1836 	} /* end of if_!fallback_tcp */
1837 	/* perform TC flag check and TCP fallback after updating our
1838 	 * cache entries for EDNS status and RTT times */
1839 	if(LDNS_TC_WIRE(sldns_buffer_begin(c->buffer)) || fallback_tcp) {
1840 		/* fallback to TCP */
1841 		/* this discards partial UDP contents */
1842 		if(sq->status == serviced_query_UDP_EDNS ||
1843 			sq->status == serviced_query_UDP_EDNS_FRAG ||
1844 			sq->status == serviced_query_UDP_EDNS_fallback)
1845 			/* if we have unfinished EDNS_fallback, start again */
1846 			sq->status = serviced_query_TCP_EDNS;
1847 		else	sq->status = serviced_query_TCP;
1848 		serviced_tcp_initiate(sq, c->buffer);
1849 		return 0;
1850 	}
1851 	/* yay! an answer */
1852 	serviced_callbacks(sq, error, c, rep);
1853 	return 0;
1854 }
1855 
1856 struct serviced_query*
1857 outnet_serviced_query(struct outside_network* outnet,
1858 	uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
1859 	uint16_t flags, int dnssec, int want_dnssec, int nocaps,
1860 	int tcp_upstream, int ssl_upstream, struct sockaddr_storage* addr,
1861 	socklen_t addrlen, uint8_t* zone, size_t zonelen,
1862 	comm_point_callback_t* callback, void* callback_arg,
1863 	sldns_buffer* buff)
1864 {
1865 	struct serviced_query* sq;
1866 	struct service_callback* cb;
1867 	serviced_gen_query(buff, qname, qnamelen, qtype, qclass, flags);
1868 	sq = lookup_serviced(outnet, buff, dnssec, addr, addrlen);
1869 	/* duplicate entries are included in the callback list, because
1870 	 * there is a counterpart registration by our caller that needs to
1871 	 * be doubly-removed (with callbacks perhaps). */
1872 	if(!(cb = (struct service_callback*)malloc(sizeof(*cb))))
1873 		return NULL;
1874 	if(!sq) {
1875 		/* make new serviced query entry */
1876 		sq = serviced_create(outnet, buff, dnssec, want_dnssec, nocaps,
1877 			tcp_upstream, ssl_upstream, addr, addrlen, zone,
1878 			zonelen, (int)qtype);
1879 		if(!sq) {
1880 			free(cb);
1881 			return NULL;
1882 		}
1883 		/* perform first network action */
1884 		if(outnet->do_udp && !(tcp_upstream || ssl_upstream)) {
1885 			if(!serviced_udp_send(sq, buff)) {
1886 				(void)rbtree_delete(outnet->serviced, sq);
1887 				free(sq->qbuf);
1888 				free(sq->zone);
1889 				free(sq);
1890 				free(cb);
1891 				return NULL;
1892 			}
1893 		} else {
1894 			if(!serviced_tcp_send(sq, buff)) {
1895 				(void)rbtree_delete(outnet->serviced, sq);
1896 				free(sq->qbuf);
1897 				free(sq->zone);
1898 				free(sq);
1899 				free(cb);
1900 				return NULL;
1901 			}
1902 		}
1903 	}
1904 	/* add callback to list of callbacks */
1905 	cb->cb = callback;
1906 	cb->cb_arg = callback_arg;
1907 	cb->next = sq->cblist;
1908 	sq->cblist = cb;
1909 	return sq;
1910 }
1911 
1912 /** remove callback from list */
1913 static void
1914 callback_list_remove(struct serviced_query* sq, void* cb_arg)
1915 {
1916 	struct service_callback** pp = &sq->cblist;
1917 	while(*pp) {
1918 		if((*pp)->cb_arg == cb_arg) {
1919 			struct service_callback* del = *pp;
1920 			*pp = del->next;
1921 			free(del);
1922 			return;
1923 		}
1924 		pp = &(*pp)->next;
1925 	}
1926 }
1927 
1928 void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg)
1929 {
1930 	if(!sq)
1931 		return;
1932 	callback_list_remove(sq, cb_arg);
1933 	/* if callbacks() routine scheduled deletion, let it do that */
1934 	if(!sq->cblist && !sq->to_be_deleted) {
1935 #ifdef UNBOUND_DEBUG
1936 		rbnode_t* rem =
1937 #else
1938 		(void)
1939 #endif
1940 		rbtree_delete(sq->outnet->serviced, sq);
1941 		log_assert(rem); /* should be present */
1942 		serviced_delete(sq);
1943 	}
1944 }
1945 
1946 /** get memory used by waiting tcp entry (in use or not) */
1947 static size_t
1948 waiting_tcp_get_mem(struct waiting_tcp* w)
1949 {
1950 	size_t s;
1951 	if(!w) return 0;
1952 	s = sizeof(*w) + w->pkt_len;
1953 	if(w->timer)
1954 		s += comm_timer_get_mem(w->timer);
1955 	return s;
1956 }
1957 
1958 /** get memory used by port if */
1959 static size_t
1960 if_get_mem(struct port_if* pif)
1961 {
1962 	size_t s;
1963 	int i;
1964 	s = sizeof(*pif) + sizeof(int)*pif->avail_total +
1965 		sizeof(struct port_comm*)*pif->maxout;
1966 	for(i=0; i<pif->inuse; i++)
1967 		s += sizeof(*pif->out[i]) +
1968 			comm_point_get_mem(pif->out[i]->cp);
1969 	return s;
1970 }
1971 
1972 /** get memory used by waiting udp */
1973 static size_t
1974 waiting_udp_get_mem(struct pending* w)
1975 {
1976 	size_t s;
1977 	s = sizeof(*w) + comm_timer_get_mem(w->timer) + w->pkt_len;
1978 	return s;
1979 }
1980 
1981 size_t outnet_get_mem(struct outside_network* outnet)
1982 {
1983 	size_t i;
1984 	int k;
1985 	struct waiting_tcp* w;
1986 	struct pending* u;
1987 	struct serviced_query* sq;
1988 	struct service_callback* sb;
1989 	struct port_comm* pc;
1990 	size_t s = sizeof(*outnet) + sizeof(*outnet->base) +
1991 		sizeof(*outnet->udp_buff) +
1992 		sldns_buffer_capacity(outnet->udp_buff);
1993 	/* second buffer is not ours */
1994 	for(pc = outnet->unused_fds; pc; pc = pc->next) {
1995 		s += sizeof(*pc) + comm_point_get_mem(pc->cp);
1996 	}
1997 	for(k=0; k<outnet->num_ip4; k++)
1998 		s += if_get_mem(&outnet->ip4_ifs[k]);
1999 	for(k=0; k<outnet->num_ip6; k++)
2000 		s += if_get_mem(&outnet->ip6_ifs[k]);
2001 	for(u=outnet->udp_wait_first; u; u=u->next_waiting)
2002 		s += waiting_udp_get_mem(u);
2003 
2004 	s += sizeof(struct pending_tcp*)*outnet->num_tcp;
2005 	for(i=0; i<outnet->num_tcp; i++) {
2006 		s += sizeof(struct pending_tcp);
2007 		s += comm_point_get_mem(outnet->tcp_conns[i]->c);
2008 		if(outnet->tcp_conns[i]->query)
2009 			s += waiting_tcp_get_mem(outnet->tcp_conns[i]->query);
2010 	}
2011 	for(w=outnet->tcp_wait_first; w; w = w->next_waiting)
2012 		s += waiting_tcp_get_mem(w);
2013 	s += sizeof(*outnet->pending);
2014 	s += (sizeof(struct pending) + comm_timer_get_mem(NULL)) *
2015 		outnet->pending->count;
2016 	s += sizeof(*outnet->serviced);
2017 	s += outnet->svcd_overhead;
2018 	RBTREE_FOR(sq, struct serviced_query*, outnet->serviced) {
2019 		s += sizeof(*sq) + sq->qbuflen;
2020 		for(sb = sq->cblist; sb; sb = sb->next)
2021 			s += sizeof(*sb);
2022 	}
2023 	return s;
2024 }
2025 
2026 size_t
2027 serviced_get_mem(struct serviced_query* sq)
2028 {
2029 	struct service_callback* sb;
2030 	size_t s;
2031 	s = sizeof(*sq) + sq->qbuflen;
2032 	for(sb = sq->cblist; sb; sb = sb->next)
2033 		s += sizeof(*sb);
2034 	if(sq->status == serviced_query_UDP_EDNS ||
2035 		sq->status == serviced_query_UDP ||
2036 		sq->status == serviced_query_PROBE_EDNS ||
2037 		sq->status == serviced_query_UDP_EDNS_FRAG ||
2038 		sq->status == serviced_query_UDP_EDNS_fallback) {
2039 		s += sizeof(struct pending);
2040 		s += comm_timer_get_mem(NULL);
2041 	} else {
2042 		/* does not have size of the pkt pointer */
2043 		/* always has a timer except on malloc failures */
2044 
2045 		/* these sizes are part of the main outside network mem */
2046 		/*
2047 		s += sizeof(struct waiting_tcp);
2048 		s += comm_timer_get_mem(NULL);
2049 		*/
2050 	}
2051 	return s;
2052 }
2053 
2054