xref: /freebsd/contrib/unbound/edns-subnet/subnetmod.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*
2  * edns-subnet/subnetmod.c - edns subnet module. Must be called before validator
3  * and iterator.
4  *
5  * Copyright (c) 2013, NLnet Labs. All rights reserved.
6  *
7  * This software is open source.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  *
16  * Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  *
20  * Neither the name of the NLNET LABS nor the names of its contributors may
21  * be used to endorse or promote products derived from this software without
22  * specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  */
36  /**
37  * \file
38  * subnet module for unbound.
39  */
40 
41 #include "config.h"
42 
43 #ifdef CLIENT_SUBNET /* keeps splint happy */
44 
45 #include "edns-subnet/subnetmod.h"
46 #include "edns-subnet/edns-subnet.h"
47 #include "edns-subnet/addrtree.h"
48 #include "edns-subnet/subnet-whitelist.h"
49 
50 #include "services/mesh.h"
51 #include "services/cache/dns.h"
52 #include "util/module.h"
53 #include "util/regional.h"
54 #include "util/storage/slabhash.h"
55 #include "util/config_file.h"
56 #include "util/data/msgreply.h"
57 #include "sldns/sbuffer.h"
58 #include "sldns/wire2str.h"
59 #include "iterator/iter_utils.h"
60 #ifdef USE_CACHEDB
61 #include "cachedb/cachedb.h"
62 #endif
63 
64 /** externally called */
65 void
66 subnet_data_delete(void *d, void *ATTR_UNUSED(arg))
67 {
68 	struct subnet_msg_cache_data *r;
69 	r = (struct subnet_msg_cache_data*)d;
70 	addrtree_delete(r->tree4);
71 	addrtree_delete(r->tree6);
72 	free(r);
73 }
74 
75 /** externally called */
76 size_t
77 msg_cache_sizefunc(void *k, void *d)
78 {
79 	struct msgreply_entry *q = (struct msgreply_entry*)k;
80 	struct subnet_msg_cache_data *r = (struct subnet_msg_cache_data*)d;
81 	size_t s = sizeof(struct msgreply_entry)
82 		+ sizeof(struct subnet_msg_cache_data)
83 		+ q->key.qname_len + lock_get_mem(&q->entry.lock);
84 	s += addrtree_size(r->tree4);
85 	s += addrtree_size(r->tree6);
86 	return s;
87 }
88 
89 /** new query for ecs module */
90 static int
91 subnet_new_qstate(struct module_qstate *qstate, int id)
92 {
93 	struct subnet_qstate *sq = (struct subnet_qstate*)regional_alloc(
94 		qstate->region, sizeof(struct subnet_qstate));
95 	if(!sq)
96 		return 0;
97 	qstate->minfo[id] = sq;
98 	memset(sq, 0, sizeof(*sq));
99 	sq->started_no_cache_store = qstate->no_cache_store;
100 	sq->started_no_cache_lookup = qstate->no_cache_lookup;
101 	return 1;
102 }
103 
104 /** Add ecs struct to edns list, after parsing it to wire format. */
105 void
106 subnet_ecs_opt_list_append(struct ecs_data* ecs, struct edns_option** list,
107 	struct module_qstate *qstate, struct regional *region)
108 {
109 	size_t sn_octs, sn_octs_remainder;
110 	sldns_buffer* buf = qstate->env->scratch_buffer;
111 
112 	if(ecs->subnet_validdata) {
113 		log_assert(ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 ||
114 			ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6);
115 		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP4 ||
116 			ecs->subnet_source_mask <=  INET_SIZE*8);
117 		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP6 ||
118 			ecs->subnet_source_mask <= INET6_SIZE*8);
119 
120 		sn_octs = ecs->subnet_source_mask / 8;
121 		sn_octs_remainder =
122 			(size_t)((ecs->subnet_source_mask % 8)>0?1:0);
123 
124 		log_assert(sn_octs + sn_octs_remainder <= INET6_SIZE);
125 
126 		sldns_buffer_clear(buf);
127 		sldns_buffer_write_u16(buf, ecs->subnet_addr_fam);
128 		sldns_buffer_write_u8(buf, ecs->subnet_source_mask);
129 		sldns_buffer_write_u8(buf, ecs->subnet_scope_mask);
130 		sldns_buffer_write(buf, ecs->subnet_addr, sn_octs);
131 		if(sn_octs_remainder)
132 			sldns_buffer_write_u8(buf, ecs->subnet_addr[sn_octs] &
133 				~(0xFF >> (ecs->subnet_source_mask % 8)));
134 		sldns_buffer_flip(buf);
135 
136 		edns_opt_list_append(list,
137 				qstate->env->cfg->client_subnet_opcode,
138 				sn_octs + sn_octs_remainder + 4,
139 				sldns_buffer_begin(buf), region);
140 	}
141 }
142 
143 int ecs_whitelist_check(struct query_info* qinfo,
144 	uint16_t ATTR_UNUSED(flags), struct module_qstate* qstate,
145 	struct sockaddr_storage* addr, socklen_t addrlen,
146 	uint8_t* ATTR_UNUSED(zone), size_t ATTR_UNUSED(zonelen),
147 	struct regional *region, int id, void* ATTR_UNUSED(cbargs))
148 {
149 	struct subnet_qstate *sq;
150 	struct subnet_env *sn_env;
151 
152 	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
153 		return 1;
154 	sn_env = (struct subnet_env*)qstate->env->modinfo[id];
155 
156 	/* Cache by default, might be disabled after parsing EDNS option
157 	 * received from nameserver. */
158 	if(!iter_stub_fwd_no_cache(qstate, &qstate->qinfo, NULL, NULL, NULL, 0)) {
159 		qstate->no_cache_store = 0;
160 	}
161 
162 	sq->subnet_sent_no_subnet = 0;
163 	if(sq->ecs_server_out.subnet_validdata && ((sq->subnet_downstream &&
164 		qstate->env->cfg->client_subnet_always_forward) ||
165 		ecs_is_whitelisted(sn_env->whitelist,
166 		addr, addrlen, qinfo->qname, qinfo->qname_len,
167 		qinfo->qclass))) {
168 		/* Address on whitelist or client query contains ECS option, we
169 		 * want to sent out ECS. Only add option if it is not already
170 		 * set. */
171 		if(!edns_opt_list_find(qstate->edns_opts_back_out,
172 			qstate->env->cfg->client_subnet_opcode)) {
173 			/* if the client is not wanting an EDNS subnet option,
174 			 * omit it and store that we omitted it but actually
175 			 * are doing EDNS subnet to the server. */
176 			if(sq->ecs_server_out.subnet_source_mask == 0) {
177 				sq->subnet_sent_no_subnet = 1;
178 				sq->subnet_sent = 0;
179 				return 1;
180 			}
181 			subnet_ecs_opt_list_append(&sq->ecs_server_out,
182 				&qstate->edns_opts_back_out, qstate, region);
183 		}
184 		sq->subnet_sent = 1;
185 	}
186 	else {
187 		/* Outgoing ECS option is set, but we don't want to sent it to
188 		 * this address, remove option. */
189 		if(edns_opt_list_find(qstate->edns_opts_back_out,
190 			qstate->env->cfg->client_subnet_opcode)) {
191 			edns_opt_list_remove(&qstate->edns_opts_back_out,
192 				qstate->env->cfg->client_subnet_opcode);
193 		}
194 		sq->subnet_sent = 0;
195 	}
196 	return 1;
197 }
198 
199 
200 void
201 subnet_markdel(void* key)
202 {
203 	struct msgreply_entry *e = (struct msgreply_entry*)key;
204 	e->key.qtype = 0;
205 	e->key.qclass = 0;
206 }
207 
208 int
209 subnetmod_init(struct module_env *env, int id)
210 {
211 	struct subnet_env *sn_env = (struct subnet_env*)calloc(1,
212 		sizeof(struct subnet_env));
213 	if(!sn_env) {
214 		log_err("malloc failure");
215 		return 0;
216 	}
217 	alloc_init(&sn_env->alloc, NULL, 0);
218 	env->modinfo[id] = (void*)sn_env;
219 
220 	/* Warn that serve-expired and prefetch do not work with the subnet
221 	 * module cache. */
222 	if(env->cfg->serve_expired)
223 		log_warn(
224 			"subnetcache: serve-expired is set but not working "
225 			"for data originating from the subnet module cache.");
226 	if(env->cfg->prefetch)
227 		log_warn(
228 			"subnetcache: prefetch is set but not working "
229 			"for data originating from the subnet module cache.");
230 	/* Copy msg_cache settings */
231 	sn_env->subnet_msg_cache = slabhash_create(env->cfg->msg_cache_slabs,
232 		HASH_DEFAULT_STARTARRAY, env->cfg->msg_cache_size,
233 		msg_cache_sizefunc, query_info_compare, query_entry_delete,
234 		subnet_data_delete, NULL);
235 	slabhash_setmarkdel(sn_env->subnet_msg_cache, &subnet_markdel);
236 	if(!sn_env->subnet_msg_cache) {
237 		log_err("subnetcache: could not create cache");
238 		free(sn_env);
239 		env->modinfo[id] = NULL;
240 		return 0;
241 	}
242 	/* whitelist for edns subnet capable servers */
243 	sn_env->whitelist = ecs_whitelist_create();
244 	if(!sn_env->whitelist ||
245 		!ecs_whitelist_apply_cfg(sn_env->whitelist, env->cfg)) {
246 		log_err("subnetcache: could not create ECS whitelist");
247 		slabhash_delete(sn_env->subnet_msg_cache);
248 		free(sn_env);
249 		env->modinfo[id] = NULL;
250 		return 0;
251 	}
252 
253 	verbose(VERB_QUERY, "subnetcache: option registered (%d)",
254 		env->cfg->client_subnet_opcode);
255 	/* Create new mesh state for all queries. */
256 	env->unique_mesh = 1;
257 	if(!edns_register_option(env->cfg->client_subnet_opcode,
258 		env->cfg->client_subnet_always_forward /* bypass cache */,
259 		1 /* no aggregation */, env)) {
260 		log_err("subnetcache: could not register opcode");
261 		ecs_whitelist_delete(sn_env->whitelist);
262 		slabhash_delete(sn_env->subnet_msg_cache);
263 		free(sn_env);
264 		env->modinfo[id] = NULL;
265 		return 0;
266 	}
267 	inplace_cb_register((void*)ecs_whitelist_check, inplace_cb_query, NULL,
268 		env, id);
269 	inplace_cb_register((void*)ecs_edns_back_parsed,
270 		inplace_cb_edns_back_parsed, NULL, env, id);
271 	inplace_cb_register((void*)ecs_query_response,
272 		inplace_cb_query_response, NULL, env, id);
273 	lock_rw_init(&sn_env->biglock);
274 	return 1;
275 }
276 
277 void
278 subnetmod_deinit(struct module_env *env, int id)
279 {
280 	struct subnet_env *sn_env;
281 	if(!env || !env->modinfo[id])
282 		return;
283 	sn_env = (struct subnet_env*)env->modinfo[id];
284 	lock_rw_destroy(&sn_env->biglock);
285 	inplace_cb_delete(env, inplace_cb_edns_back_parsed, id);
286 	inplace_cb_delete(env, inplace_cb_query, id);
287 	inplace_cb_delete(env, inplace_cb_query_response, id);
288 	ecs_whitelist_delete(sn_env->whitelist);
289 	slabhash_delete(sn_env->subnet_msg_cache);
290 	alloc_clear(&sn_env->alloc);
291 	free(sn_env);
292 	env->modinfo[id] = NULL;
293 }
294 
295 /** Tells client that upstream has no/improper support */
296 static void
297 cp_edns_bad_response(struct ecs_data *target, struct ecs_data *source)
298 {
299 	target->subnet_scope_mask  = 0;
300 	target->subnet_source_mask = source->subnet_source_mask;
301 	target->subnet_addr_fam    = source->subnet_addr_fam;
302 	memcpy(target->subnet_addr, source->subnet_addr, INET6_SIZE);
303 	target->subnet_validdata = 1;
304 }
305 
306 static void
307 delfunc(void *envptr, void *elemptr) {
308 	struct reply_info *elem = (struct reply_info *)elemptr;
309 	struct subnet_env *env = (struct subnet_env *)envptr;
310 	reply_info_parsedelete(elem, &env->alloc);
311 }
312 
313 static size_t
314 sizefunc(void *elemptr) {
315 	struct reply_info *elem  = (struct reply_info *)elemptr;
316 	size_t s = sizeof (struct reply_info) - sizeof (struct rrset_ref)
317 		+ elem->rrset_count * sizeof (struct rrset_ref)
318 		+ elem->rrset_count * sizeof (struct ub_packed_rrset_key *);
319 	size_t i;
320 	for (i = 0; i < elem->rrset_count; i++) {
321 		struct ub_packed_rrset_key *key = elem->rrsets[i];
322 		struct packed_rrset_data *data = key->entry.data;
323 		s += ub_rrset_sizefunc(key, data);
324 	}
325 	if(elem->reason_bogus_str)
326 		s += strlen(elem->reason_bogus_str)+1;
327 	return s;
328 }
329 
330 /**
331  * Select tree from cache entry based on edns data.
332  * If for address family not present it will create a new one.
333  * NULL on failure to create. */
334 static struct addrtree*
335 get_tree(struct subnet_msg_cache_data *data, struct ecs_data *edns,
336 	struct subnet_env *env, struct config_file* cfg)
337 {
338 	struct addrtree *tree;
339 	if (edns->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
340 		if (!data->tree4)
341 			data->tree4 = addrtree_create(
342 				cfg->max_client_subnet_ipv4, &delfunc,
343 				&sizefunc, env, cfg->max_ecs_tree_size_ipv4);
344 		tree = data->tree4;
345 	} else {
346 		if (!data->tree6)
347 			data->tree6 = addrtree_create(
348 				cfg->max_client_subnet_ipv6, &delfunc,
349 				&sizefunc, env, cfg->max_ecs_tree_size_ipv6);
350 		tree = data->tree6;
351 	}
352 	return tree;
353 }
354 
355 static void
356 update_cache(struct module_qstate *qstate, int id)
357 {
358 	struct msgreply_entry *mrep_entry;
359 	struct addrtree *tree;
360 	struct reply_info *rep;
361 	struct query_info qinf;
362 	struct subnet_env *sne = qstate->env->modinfo[id];
363 	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
364 	struct slabhash *subnet_msg_cache = sne->subnet_msg_cache;
365 	struct ecs_data *edns = &sq->ecs_client_in;
366 	size_t i;
367 	int only_match_scope_zero, diff_size;
368 
369 	/* We already calculated hash upon lookup (lookup_and_reply) if we were
370 	 * allowed to look in the ECS cache */
371 	hashvalue_type h = qstate->minfo[id] &&
372 		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash_calculated?
373 		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash :
374 		query_info_hash(&qstate->qinfo, qstate->query_flags);
375 	/* Step 1, general qinfo lookup */
376 	struct lruhash_entry* lru_entry = slabhash_lookup(subnet_msg_cache, h,
377 		&qstate->qinfo, 1);
378 	int need_to_insert = (lru_entry == NULL);
379 	if (!lru_entry) {
380 		void* data = calloc(1,
381 			sizeof(struct subnet_msg_cache_data));
382 		if(!data) {
383 			log_err("malloc failed");
384 			return;
385 		}
386 		qinf = qstate->qinfo;
387 		qinf.qname = memdup(qstate->qinfo.qname,
388 			qstate->qinfo.qname_len);
389 		if(!qinf.qname) {
390 			free(data);
391 			log_err("memdup failed");
392 			return;
393 		}
394 		mrep_entry = query_info_entrysetup(&qinf, data, h);
395 		free(qinf.qname); /* if qname 'consumed', it is set to NULL */
396 		if (!mrep_entry) {
397 			free(data);
398 			log_err("query_info_entrysetup failed");
399 			return;
400 		}
401 		lru_entry = &mrep_entry->entry;
402 		lock_rw_wrlock(&lru_entry->lock);
403 	}
404 	/* lru_entry->lock is locked regardless of how we got here,
405 	 * either from the slabhash_lookup, or above in the new allocated */
406 	/* Step 2, find the correct tree */
407 	if (!(tree = get_tree(lru_entry->data, edns, sne, qstate->env->cfg))) {
408 		lock_rw_unlock(&lru_entry->lock);
409 		log_err("subnetcache: cache insertion failed");
410 		return;
411 	}
412 	lock_quick_lock(&sne->alloc.lock);
413 	rep = reply_info_copy(qstate->return_msg->rep, &sne->alloc, NULL);
414 	lock_quick_unlock(&sne->alloc.lock);
415 	if (!rep) {
416 		lock_rw_unlock(&lru_entry->lock);
417 		log_err("subnetcache: cache insertion failed");
418 		return;
419 	}
420 
421 	/* store RRsets */
422 	for(i=0; i<rep->rrset_count; i++) {
423 		rep->ref[i].key = rep->rrsets[i];
424 		rep->ref[i].id = rep->rrsets[i]->id;
425 	}
426 	reply_info_set_ttls(rep, *qstate->env->now);
427 	reply_info_sortref(rep);
428 	rep->flags |= (BIT_RA | BIT_QR); /* fix flags to be sensible for */
429 	rep->flags &= ~(BIT_AA | BIT_CD);/* a reply based on the cache   */
430 	if(edns->subnet_source_mask == 0 && edns->subnet_scope_mask == 0)
431 		only_match_scope_zero = 1;
432 	else only_match_scope_zero = 0;
433 	diff_size = (int)tree->size_bytes;
434 	addrtree_insert(tree, (addrkey_t*)edns->subnet_addr,
435 		edns->subnet_source_mask, sq->max_scope, rep,
436 		rep->ttl, *qstate->env->now, only_match_scope_zero);
437 	diff_size = (int)tree->size_bytes - diff_size;
438 
439 	lock_rw_unlock(&lru_entry->lock);
440 	if (need_to_insert) {
441 		slabhash_insert(subnet_msg_cache, h, lru_entry, lru_entry->data,
442 			NULL);
443 	} else {
444 		slabhash_update_space_used(subnet_msg_cache, h, NULL,
445 			diff_size);
446 	}
447 }
448 
449 /** Lookup in cache and reply true iff reply is sent. */
450 static int
451 lookup_and_reply(struct module_qstate *qstate, int id, struct subnet_qstate *sq, int prefetch)
452 {
453 	struct lruhash_entry *e;
454 	struct module_env *env = qstate->env;
455 	struct subnet_env *sne = (struct subnet_env*)env->modinfo[id];
456 	hashvalue_type h = query_info_hash(&qstate->qinfo, qstate->query_flags);
457 	struct subnet_msg_cache_data *data;
458 	struct ecs_data *ecs = &sq->ecs_client_in;
459 	struct addrtree *tree;
460 	struct addrnode *node;
461 	uint8_t scope;
462 
463 	memset(&sq->ecs_client_out, 0, sizeof(sq->ecs_client_out));
464 
465 	if (sq) {
466 		sq->qinfo_hash = h; /* Might be useful on cache miss */
467 		sq->qinfo_hash_calculated = 1;
468 	}
469 	e = slabhash_lookup(sne->subnet_msg_cache, h, &qstate->qinfo, 1);
470 	if (!e) return 0; /* qinfo not in cache */
471 	data = e->data;
472 	tree = (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4)?
473 		data->tree4 : data->tree6;
474 	if (!tree) { /* qinfo in cache but not for this family */
475 		lock_rw_unlock(&e->lock);
476 		return 0;
477 	}
478 	node = addrtree_find(tree, (addrkey_t*)ecs->subnet_addr,
479 		ecs->subnet_source_mask, *env->now);
480 	if (!node) { /* plain old cache miss */
481 		lock_rw_unlock(&e->lock);
482 		return 0;
483 	}
484 
485 	qstate->return_msg = tomsg(NULL, &qstate->qinfo,
486 		(struct reply_info *)node->elem, qstate->region, *env->now, 0,
487 		env->scratch);
488 	scope = (uint8_t)node->scope;
489 	lock_rw_unlock(&e->lock);
490 
491 	if (!qstate->return_msg) { /* Failed allocation or expired TTL */
492 		return 0;
493 	}
494 
495 	if (sq->subnet_downstream) { /* relay to interested client */
496 		sq->ecs_client_out.subnet_scope_mask = scope;
497 		sq->ecs_client_out.subnet_addr_fam = ecs->subnet_addr_fam;
498 		sq->ecs_client_out.subnet_source_mask = ecs->subnet_source_mask;
499 		memcpy(&sq->ecs_client_out.subnet_addr, &ecs->subnet_addr,
500 			INET6_SIZE);
501 		sq->ecs_client_out.subnet_validdata = 1;
502 	}
503 
504 	if (prefetch && *qstate->env->now >= ((struct reply_info *)node->elem)->prefetch_ttl) {
505 		qstate->need_refetch = 1;
506 	}
507 	return 1;
508 }
509 
510 /**
511  * Test first bits of addresses for equality. Caller is responsible
512  * for making sure that both a and b are at least net/8 octets long.
513  * @param a: first address.
514  * @param a: seconds address.
515  * @param net: Number of bits to test.
516  * @return: 1 if equal, 0 otherwise.
517  */
518 static int
519 common_prefix(uint8_t *a, uint8_t *b, uint8_t net)
520 {
521 	size_t n = (size_t)net / 8;
522 	return !memcmp(a, b, n) && ((net % 8) == 0 || a[n] == b[n]);
523 }
524 
525 static enum module_ext_state
526 eval_response(struct module_qstate *qstate, int id, struct subnet_qstate *sq)
527 {
528 	struct subnet_env *sne = qstate->env->modinfo[id];
529 
530 	struct ecs_data *c_in  = &sq->ecs_client_in; /* rcvd from client */
531 	struct ecs_data *c_out = &sq->ecs_client_out;/* will send to client */
532 	struct ecs_data *s_in  = &sq->ecs_server_in; /* rcvd from auth */
533 	struct ecs_data *s_out = &sq->ecs_server_out;/* sent to auth */
534 
535 	memset(c_out, 0, sizeof(*c_out));
536 
537 	if (!qstate->return_msg) {
538 		/* already an answer and its not a message, but retain
539 		 * the actual rcode, instead of module_error, so send
540 		 * module_finished */
541 		return module_finished;
542 	}
543 
544 	/* We have not asked for subnet data */
545 	if (!sq->subnet_sent && !sq->subnet_sent_no_subnet) {
546 		if (s_in->subnet_validdata)
547 			verbose(VERB_QUERY, "subnetcache: received spurious data");
548 		if (sq->subnet_downstream) /* Copy back to client */
549 			cp_edns_bad_response(c_out, c_in);
550 		return module_finished;
551 	}
552 
553 	/* subnet sent but nothing came back */
554 	if (!s_in->subnet_validdata && !sq->subnet_sent_no_subnet) {
555 		/* The authority indicated no support for edns subnet. As a
556 		 * consequence the answer ended up in the regular cache. It
557 		 * is still useful to put it in the edns subnet cache for
558 		 * when a client explicitly asks for subnet specific answer. */
559 		verbose(VERB_QUERY, "subnetcache: Authority indicates no support");
560 		if(!sq->started_no_cache_store) {
561 			lock_rw_wrlock(&sne->biglock);
562 			update_cache(qstate, id);
563 			lock_rw_unlock(&sne->biglock);
564 		}
565 		if (sq->subnet_downstream)
566 			cp_edns_bad_response(c_out, c_in);
567 		return module_finished;
568 	}
569 
570 	/* Purposefully there was no sent subnet, and there is consequently
571 	 * no subnet in the answer. If there was, use the subnet in the answer
572 	 * anyway. But if there is not, treat it as a prefix 0 answer. */
573 	if(sq->subnet_sent_no_subnet && !s_in->subnet_validdata) {
574 		/* Fill in 0.0.0.0/0 scope 0, or ::0/0 scope 0, for caching. */
575 		s_in->subnet_addr_fam = s_out->subnet_addr_fam;
576 		s_in->subnet_source_mask = 0;
577 		s_in->subnet_scope_mask = 0;
578 		memset(s_in->subnet_addr, 0, INET6_SIZE);
579 		s_in->subnet_validdata = 1;
580 	}
581 
582 	/* Being here means we have asked for and got a subnet specific
583 	 * answer. Also, the answer from the authority is not yet cached
584 	 * anywhere. */
585 
586 	/* can we accept response? */
587 	if(s_out->subnet_addr_fam != s_in->subnet_addr_fam ||
588 		s_out->subnet_source_mask != s_in->subnet_source_mask ||
589 		!common_prefix(s_out->subnet_addr, s_in->subnet_addr,
590 			s_out->subnet_source_mask))
591 	{
592 		/* we can not accept, restart query without option */
593 		verbose(VERB_QUERY, "subnetcache: forged data");
594 		s_out->subnet_validdata = 0;
595 		(void)edns_opt_list_remove(&qstate->edns_opts_back_out,
596 			qstate->env->cfg->client_subnet_opcode);
597 		sq->subnet_sent = 0;
598 		sq->subnet_sent_no_subnet = 0;
599 		return module_restart_next;
600 	}
601 
602 	lock_rw_wrlock(&sne->biglock);
603 	if(!sq->started_no_cache_store) {
604 		update_cache(qstate, id);
605 	}
606 	sne->num_msg_nocache++;
607 	lock_rw_unlock(&sne->biglock);
608 
609 	/* If there is an expired answer in the global cache, remove that,
610 	 * because expired answers would otherwise resurface once the ecs data
611 	 * expires, giving once in a while global data responses for ecs
612 	 * domains, with serve expired enabled. */
613 	if(qstate->env->cfg->serve_expired) {
614 		msg_cache_remove(qstate->env, qstate->qinfo.qname,
615 			qstate->qinfo.qname_len, qstate->qinfo.qtype,
616 			qstate->qinfo.qclass, 0);
617 #ifdef USE_CACHEDB
618 		if(qstate->env->cachedb_enabled)
619 			cachedb_msg_remove(qstate);
620 #endif
621 	}
622 
623 	if (sq->subnet_downstream) {
624 		/* Client wants to see the answer, echo option back
625 		 * and adjust the scope. */
626 		c_out->subnet_addr_fam = c_in->subnet_addr_fam;
627 		c_out->subnet_source_mask = c_in->subnet_source_mask;
628 		memcpy(&c_out->subnet_addr, &c_in->subnet_addr, INET6_SIZE);
629 		c_out->subnet_scope_mask = sq->max_scope;
630 		/* Limit scope returned to client to scope used for caching. */
631 		if(c_out->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
632 			if(c_out->subnet_scope_mask >
633 				qstate->env->cfg->max_client_subnet_ipv4) {
634 				c_out->subnet_scope_mask =
635 					qstate->env->cfg->max_client_subnet_ipv4;
636 			}
637 		}
638 		else if(c_out->subnet_scope_mask >
639 				qstate->env->cfg->max_client_subnet_ipv6) {
640 				c_out->subnet_scope_mask =
641 					qstate->env->cfg->max_client_subnet_ipv6;
642 		}
643 		c_out->subnet_validdata = 1;
644 	}
645 	return module_finished;
646 }
647 
648 /** Parse EDNS opt data containing ECS */
649 static int
650 parse_subnet_option(struct edns_option* ecs_option, struct ecs_data* ecs)
651 {
652 	memset(ecs, 0, sizeof(*ecs));
653 	if (ecs_option->opt_len < 4)
654 		return 0;
655 
656 	ecs->subnet_addr_fam = sldns_read_uint16(ecs_option->opt_data);
657 	ecs->subnet_source_mask = ecs_option->opt_data[2];
658 	ecs->subnet_scope_mask = ecs_option->opt_data[3];
659 	/* remaining bytes indicate address */
660 
661 	/* validate input*/
662 	/* option length matches calculated length? */
663 	if (ecs_option->opt_len != (size_t)((ecs->subnet_source_mask+7)/8 + 4))
664 		return 0;
665 	if (ecs_option->opt_len - 4 > INET6_SIZE || ecs_option->opt_len == 0)
666 		return 0;
667 	if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
668 		if (ecs->subnet_source_mask > 32 || ecs->subnet_scope_mask > 32)
669 			return 0;
670 	} else if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6) {
671 		if (ecs->subnet_source_mask > 128 ||
672 			ecs->subnet_scope_mask > 128)
673 			return 0;
674 	} else
675 		return 0;
676 
677 	/* valid ECS data, write to ecs_data */
678 	if (copy_clear(ecs->subnet_addr, INET6_SIZE, ecs_option->opt_data + 4,
679 		ecs_option->opt_len - 4, ecs->subnet_source_mask))
680 		return 0;
681 	ecs->subnet_validdata = 1;
682 	return 1;
683 }
684 
685 void
686 subnet_option_from_ss(struct sockaddr_storage *ss, struct ecs_data* ecs,
687 	struct config_file* cfg)
688 {
689 	void* sinaddr;
690 
691 	/* Construct subnet option from original query */
692 	if(((struct sockaddr_in*)ss)->sin_family == AF_INET) {
693 		ecs->subnet_source_mask = cfg->max_client_subnet_ipv4;
694 		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP4;
695 		sinaddr = &((struct sockaddr_in*)ss)->sin_addr;
696 		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
697 			(uint8_t *)sinaddr, INET_SIZE,
698 			ecs->subnet_source_mask)) {
699 			ecs->subnet_validdata = 1;
700 		}
701 	}
702 #ifdef INET6
703 	else {
704 		ecs->subnet_source_mask = cfg->max_client_subnet_ipv6;
705 		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP6;
706 		sinaddr = &((struct sockaddr_in6*)ss)->sin6_addr;
707 		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
708 			(uint8_t *)sinaddr, INET6_SIZE,
709 			ecs->subnet_source_mask)) {
710 			ecs->subnet_validdata = 1;
711 		}
712 	}
713 #else
714 			/* We don't know how to handle ip6, just pass */
715 #endif /* INET6 */
716 }
717 
718 int
719 ecs_query_response(struct module_qstate* qstate, struct dns_msg* response,
720 	int id, void* ATTR_UNUSED(cbargs))
721 {
722 	struct subnet_qstate *sq;
723 
724 	if(!response || !(sq=(struct subnet_qstate*)qstate->minfo[id]))
725 		return 1;
726 
727 	if(sq->subnet_sent &&
728 		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_REFUSED) {
729 		/* REFUSED response to ECS query, remove ECS option. */
730 		edns_opt_list_remove(&qstate->edns_opts_back_out,
731 			qstate->env->cfg->client_subnet_opcode);
732 		sq->subnet_sent = 0;
733 		sq->subnet_sent_no_subnet = 0;
734 		memset(&sq->ecs_server_out, 0, sizeof(sq->ecs_server_out));
735 	} else if (!sq->track_max_scope &&
736 		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_NOERROR &&
737 		response->rep->an_numrrsets > 0
738 		) {
739 		struct ub_packed_rrset_key* s = response->rep->rrsets[0];
740 		if(ntohs(s->rk.type) == LDNS_RR_TYPE_CNAME &&
741 			query_dname_compare(qstate->qinfo.qname,
742 			s->rk.dname) == 0) {
743 			/* CNAME response for QNAME. From now on keep track of
744 			 * longest received ECS prefix for all queries on this
745 			 * qstate. */
746 			sq->track_max_scope = 1;
747 		}
748 	}
749 	return 1;
750 }
751 
752 /** verbose print edns subnet option in pretty print */
753 static void
754 subnet_log_print(const char* s, struct edns_option* ecs_opt)
755 {
756 	if(verbosity >= VERB_ALGO) {
757 		char buf[256];
758 		char* str = buf;
759 		size_t str_len = sizeof(buf);
760 		if(!ecs_opt) {
761 			verbose(VERB_ALGO, "%s (null)", s);
762 			return;
763 		}
764 		(void)sldns_wire2str_edns_subnet_print(&str, &str_len,
765 			ecs_opt->opt_data, ecs_opt->opt_len);
766 		verbose(VERB_ALGO, "%s %s", s, buf);
767 	}
768 }
769 
770 int
771 ecs_edns_back_parsed(struct module_qstate* qstate, int id,
772 	void* ATTR_UNUSED(cbargs))
773 {
774 	struct subnet_qstate *sq;
775 	struct edns_option* ecs_opt;
776 
777 	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
778 		return 1;
779 	if((ecs_opt = edns_opt_list_find(
780 		qstate->edns_opts_back_in,
781 		qstate->env->cfg->client_subnet_opcode)) &&
782 		parse_subnet_option(ecs_opt, &sq->ecs_server_in) &&
783 		sq->subnet_sent && sq->ecs_server_in.subnet_validdata) {
784 			subnet_log_print("answer has edns subnet", ecs_opt);
785 			/* Only skip global cache store if we sent an ECS option
786 			 * and received one back. Answers from non-whitelisted
787 			 * servers will end up in global cache. Answers for
788 			 * queries with 0 source will not (unless nameserver
789 			 * does not support ECS). */
790 			qstate->no_cache_store = 1;
791 			if(!sq->track_max_scope || (sq->track_max_scope &&
792 				sq->ecs_server_in.subnet_scope_mask >
793 				sq->max_scope))
794 				sq->max_scope = sq->ecs_server_in.subnet_scope_mask;
795 	} else if(sq->subnet_sent_no_subnet) {
796 		/* The answer can be stored as scope 0, not in global cache. */
797 		qstate->no_cache_store = 1;
798 	}
799 
800 	return 1;
801 }
802 
803 void
804 subnetmod_operate(struct module_qstate *qstate, enum module_ev event,
805 	int id, struct outbound_entry* outbound)
806 {
807 	struct subnet_env *sne = qstate->env->modinfo[id];
808 	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
809 
810 	verbose(VERB_QUERY, "subnetcache[module %d] operate: extstate:%s "
811 		"event:%s", id, strextstate(qstate->ext_state[id]),
812 		strmodulevent(event));
813 	log_query_info(VERB_QUERY, "subnetcache operate: query", &qstate->qinfo);
814 
815 	if((event == module_event_new || event == module_event_pass) &&
816 		sq == NULL) {
817 		struct edns_option* ecs_opt;
818 		if(!subnet_new_qstate(qstate, id)) {
819 			qstate->return_msg = NULL;
820 			qstate->ext_state[id] = module_finished;
821 			return;
822 		}
823 
824 		sq = (struct subnet_qstate*)qstate->minfo[id];
825 
826 		if((ecs_opt = edns_opt_list_find(
827 			qstate->edns_opts_front_in,
828 			qstate->env->cfg->client_subnet_opcode))) {
829 			if(!parse_subnet_option(ecs_opt, &sq->ecs_client_in)) {
830 				/* Wrongly formatted ECS option. RFC mandates to
831 				 * return FORMERROR. */
832 				qstate->return_rcode = LDNS_RCODE_FORMERR;
833 				qstate->ext_state[id] = module_finished;
834 				return;
835 			}
836 			subnet_log_print("query has edns subnet", ecs_opt);
837 			sq->subnet_downstream = 1;
838 		}
839 		else if(qstate->mesh_info->reply_list) {
840 			subnet_option_from_ss(
841 				&qstate->mesh_info->reply_list->query_reply.client_addr,
842 				&sq->ecs_client_in, qstate->env->cfg);
843 		}
844 		else if(qstate->client_addr.ss_family != AF_UNSPEC) {
845 			subnet_option_from_ss(
846 				&qstate->client_addr,
847 				&sq->ecs_client_in, qstate->env->cfg);
848 		}
849 
850 		if(sq->ecs_client_in.subnet_validdata == 0) {
851 			/* No clients are interested in result or we could not
852 			 * parse it, we don't do client subnet */
853 			sq->ecs_server_out.subnet_validdata = 0;
854 			verbose(VERB_ALGO, "subnetcache: pass to next module");
855 			qstate->ext_state[id] = module_wait_module;
856 			return;
857 		}
858 
859 		/* Limit to minimum allowed source mask */
860 		if(sq->ecs_client_in.subnet_source_mask != 0 && (
861 			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 &&
862 			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv4) ||
863 			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6 &&
864 			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv6))) {
865 				qstate->return_rcode = LDNS_RCODE_REFUSED;
866 				qstate->ext_state[id] = module_finished;
867 				return;
868 		}
869 
870 		if(!sq->started_no_cache_lookup && !qstate->blacklist) {
871 			lock_rw_wrlock(&sne->biglock);
872 			if(qstate->mesh_info->reply_list &&
873 				lookup_and_reply(qstate, id, sq,
874 				qstate->env->cfg->prefetch)) {
875 				sne->num_msg_cache++;
876 				lock_rw_unlock(&sne->biglock);
877 				verbose(VERB_QUERY, "subnetcache: answered from cache");
878 				qstate->ext_state[id] = module_finished;
879 
880 				subnet_ecs_opt_list_append(&sq->ecs_client_out,
881 					&qstate->edns_opts_front_out, qstate,
882 					qstate->region);
883 				if(verbosity >= VERB_ALGO) {
884 					subnet_log_print("reply has edns subnet",
885 						edns_opt_list_find(
886 						qstate->edns_opts_front_out,
887 						qstate->env->cfg->
888 						client_subnet_opcode));
889 				}
890 				return;
891 			}
892 			lock_rw_unlock(&sne->biglock);
893 		}
894 
895 		sq->ecs_server_out.subnet_addr_fam =
896 			sq->ecs_client_in.subnet_addr_fam;
897 		sq->ecs_server_out.subnet_source_mask =
898 			sq->ecs_client_in.subnet_source_mask;
899 		/* Limit source prefix to configured maximum */
900 		if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4
901 			&& sq->ecs_server_out.subnet_source_mask >
902 			qstate->env->cfg->max_client_subnet_ipv4)
903 			sq->ecs_server_out.subnet_source_mask =
904 				qstate->env->cfg->max_client_subnet_ipv4;
905 		else if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6
906 			&& sq->ecs_server_out.subnet_source_mask >
907 			qstate->env->cfg->max_client_subnet_ipv6)
908 			sq->ecs_server_out.subnet_source_mask =
909 				qstate->env->cfg->max_client_subnet_ipv6;
910 		/* Safe to copy completely, even if the source is limited by the
911 		 * configuration. subnet_ecs_opt_list_append() will limit the address.
912 		 * */
913 		memcpy(&sq->ecs_server_out.subnet_addr,
914 			sq->ecs_client_in.subnet_addr, INET6_SIZE);
915 		sq->ecs_server_out.subnet_scope_mask = 0;
916 		sq->ecs_server_out.subnet_validdata = 1;
917 		if(sq->ecs_server_out.subnet_source_mask != 0 &&
918 			qstate->env->cfg->client_subnet_always_forward &&
919 			sq->subnet_downstream)
920 			/* ECS specific data required, do not look at the global
921 			 * cache in other modules. */
922 			qstate->no_cache_lookup = 1;
923 
924 		/* pass request to next module */
925 		verbose(VERB_ALGO,
926 			"subnetcache: not found in cache. pass to next module");
927 		qstate->ext_state[id] = module_wait_module;
928 		return;
929 	}
930 	/* Query handed back by next module, we have a 'final' answer */
931 	if(sq && event == module_event_moddone) {
932 		qstate->ext_state[id] = eval_response(qstate, id, sq);
933 		if(qstate->ext_state[id] == module_finished &&
934 			qstate->return_msg) {
935 			subnet_ecs_opt_list_append(&sq->ecs_client_out,
936 				&qstate->edns_opts_front_out, qstate,
937 				qstate->region);
938 			if(verbosity >= VERB_ALGO) {
939 				subnet_log_print("reply has edns subnet",
940 					edns_opt_list_find(
941 					qstate->edns_opts_front_out,
942 					qstate->env->cfg->
943 					client_subnet_opcode));
944 			}
945 		}
946 		qstate->no_cache_store = sq->started_no_cache_store;
947 		qstate->no_cache_lookup = sq->started_no_cache_lookup;
948 		return;
949 	}
950 	if(sq && outbound) {
951 		return;
952 	}
953 	/* We are being revisited */
954 	if(event == module_event_pass || event == module_event_new) {
955 		/* Just pass it on, we already did the work */
956 		verbose(VERB_ALGO, "subnetcache: pass to next module");
957 		qstate->ext_state[id] = module_wait_module;
958 		return;
959 	}
960 	if(!sq && (event == module_event_moddone)) {
961 		/* during priming, module done but we never started */
962 		qstate->ext_state[id] = module_finished;
963 		return;
964 	}
965 	log_err("subnetcache: bad event %s", strmodulevent(event));
966 	qstate->ext_state[id] = module_error;
967 	return;
968 }
969 
970 void
971 subnetmod_clear(struct module_qstate *ATTR_UNUSED(qstate),
972 	int ATTR_UNUSED(id))
973 {
974 	/* qstate has no data outside region */
975 }
976 
977 void
978 subnetmod_inform_super(struct module_qstate *ATTR_UNUSED(qstate),
979 	int ATTR_UNUSED(id), struct module_qstate *ATTR_UNUSED(super))
980 {
981 	/* Not used */
982 }
983 
984 size_t
985 subnetmod_get_mem(struct module_env *env, int id)
986 {
987 	struct subnet_env *sn_env = env->modinfo[id];
988 	if (!sn_env) return 0;
989 	return sizeof(*sn_env) +
990 		slabhash_get_mem(sn_env->subnet_msg_cache) +
991 		ecs_whitelist_get_mem(sn_env->whitelist);
992 }
993 
994 /**
995  * The module function block
996  */
997 static struct module_func_block subnetmod_block = {
998 	"subnetcache",
999 	NULL, NULL, &subnetmod_init, &subnetmod_deinit, &subnetmod_operate,
1000 	&subnetmod_inform_super, &subnetmod_clear, &subnetmod_get_mem
1001 };
1002 
1003 struct module_func_block*
1004 subnetmod_get_funcblock(void)
1005 {
1006 	return &subnetmod_block;
1007 }
1008 
1009 /** Wrappers for static functions to unit test */
1010 size_t
1011 unittest_wrapper_subnetmod_sizefunc(void *elemptr)
1012 {
1013 	return sizefunc(elemptr);
1014 }
1015 
1016 #endif  /* CLIENT_SUBNET */
1017