1 /* 2 * services/mesh.c - deal with mesh of query states and handle events for that. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains functions to assist in dealing with a mesh of 40 * query states. This mesh is supposed to be thread-specific. 41 * It consists of query states (per qname, qtype, qclass) and connections 42 * between query states and the super and subquery states, and replies to 43 * send back to clients. 44 */ 45 #include "config.h" 46 #include "services/mesh.h" 47 #include "services/outbound_list.h" 48 #include "services/cache/dns.h" 49 #include "services/cache/rrset.h" 50 #include "util/log.h" 51 #include "util/net_help.h" 52 #include "util/module.h" 53 #include "util/regional.h" 54 #include "util/data/msgencode.h" 55 #include "util/timehist.h" 56 #include "util/fptr_wlist.h" 57 #include "util/alloc.h" 58 #include "util/config_file.h" 59 #include "util/edns.h" 60 #include "sldns/sbuffer.h" 61 #include "sldns/wire2str.h" 62 #include "services/localzone.h" 63 #include "util/data/dname.h" 64 #include "respip/respip.h" 65 #include "services/listen_dnsport.h" 66 67 /** subtract timers and the values do not overflow or become negative */ 68 static void 69 timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start) 70 { 71 #ifndef S_SPLINT_S 72 time_t end_usec = end->tv_usec; 73 d->tv_sec = end->tv_sec - start->tv_sec; 74 if(end_usec < start->tv_usec) { 75 end_usec += 1000000; 76 d->tv_sec--; 77 } 78 d->tv_usec = end_usec - start->tv_usec; 79 #endif 80 } 81 82 /** add timers and the values do not overflow or become negative */ 83 static void 84 timeval_add(struct timeval* d, const struct timeval* add) 85 { 86 #ifndef S_SPLINT_S 87 d->tv_sec += add->tv_sec; 88 d->tv_usec += add->tv_usec; 89 if(d->tv_usec >= 1000000 ) { 90 d->tv_usec -= 1000000; 91 d->tv_sec++; 92 } 93 #endif 94 } 95 96 /** divide sum of timers to get average */ 97 static void 98 timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d) 99 { 100 #ifndef S_SPLINT_S 101 size_t leftover; 102 if(d == 0) { 103 avg->tv_sec = 0; 104 avg->tv_usec = 0; 105 return; 106 } 107 avg->tv_sec = sum->tv_sec / d; 108 avg->tv_usec = sum->tv_usec / d; 109 /* handle fraction from seconds divide */ 110 leftover = sum->tv_sec - avg->tv_sec*d; 111 avg->tv_usec += (leftover*1000000)/d; 112 #endif 113 } 114 115 /** histogram compare of time values */ 116 static int 117 timeval_smaller(const struct timeval* x, const struct timeval* y) 118 { 119 #ifndef S_SPLINT_S 120 if(x->tv_sec < y->tv_sec) 121 return 1; 122 else if(x->tv_sec == y->tv_sec) { 123 if(x->tv_usec <= y->tv_usec) 124 return 1; 125 else return 0; 126 } 127 else return 0; 128 #endif 129 } 130 131 /** 132 * Compare two response-ip client info entries for the purpose of mesh state 133 * compare. It returns 0 if ci_a and ci_b are considered equal; otherwise 134 * 1 or -1 (they mean 'ci_a is larger/smaller than ci_b', respectively, but 135 * in practice it should be only used to mean they are different). 136 * We cannot share the mesh state for two queries if different response-ip 137 * actions can apply in the end, even if those queries are otherwise identical. 138 * For this purpose we compare tag lists and tag action lists; they should be 139 * identical to share the same state. 140 * For tag data, we don't look into the data content, as it can be 141 * expensive; unless tag data are not defined for both or they point to the 142 * exact same data in memory (i.e., they come from the same ACL entry), we 143 * consider these data different. 144 * Likewise, if the client info is associated with views, we don't look into 145 * the views. They are considered different unless they are exactly the same 146 * even if the views only differ in the names. 147 */ 148 static int 149 client_info_compare(const struct respip_client_info* ci_a, 150 const struct respip_client_info* ci_b) 151 { 152 int cmp; 153 154 if(!ci_a && !ci_b) 155 return 0; 156 if(ci_a && !ci_b) 157 return -1; 158 if(!ci_a && ci_b) 159 return 1; 160 if(ci_a->taglen != ci_b->taglen) 161 return (ci_a->taglen < ci_b->taglen) ? -1 : 1; 162 cmp = memcmp(ci_a->taglist, ci_b->taglist, ci_a->taglen); 163 if(cmp != 0) 164 return cmp; 165 if(ci_a->tag_actions_size != ci_b->tag_actions_size) 166 return (ci_a->tag_actions_size < ci_b->tag_actions_size) ? 167 -1 : 1; 168 cmp = memcmp(ci_a->tag_actions, ci_b->tag_actions, 169 ci_a->tag_actions_size); 170 if(cmp != 0) 171 return cmp; 172 if(ci_a->tag_datas != ci_b->tag_datas) 173 return ci_a->tag_datas < ci_b->tag_datas ? -1 : 1; 174 if(ci_a->view != ci_b->view) 175 return ci_a->view < ci_b->view ? -1 : 1; 176 /* For the unbound daemon these should be non-NULL and identical, 177 * but we check that just in case. */ 178 if(ci_a->respip_set != ci_b->respip_set) 179 return ci_a->respip_set < ci_b->respip_set ? -1 : 1; 180 return 0; 181 } 182 183 int 184 mesh_state_compare(const void* ap, const void* bp) 185 { 186 struct mesh_state* a = (struct mesh_state*)ap; 187 struct mesh_state* b = (struct mesh_state*)bp; 188 int cmp; 189 190 if(a->unique < b->unique) 191 return -1; 192 if(a->unique > b->unique) 193 return 1; 194 195 if(a->s.is_priming && !b->s.is_priming) 196 return -1; 197 if(!a->s.is_priming && b->s.is_priming) 198 return 1; 199 200 if(a->s.is_valrec && !b->s.is_valrec) 201 return -1; 202 if(!a->s.is_valrec && b->s.is_valrec) 203 return 1; 204 205 if((a->s.query_flags&BIT_RD) && !(b->s.query_flags&BIT_RD)) 206 return -1; 207 if(!(a->s.query_flags&BIT_RD) && (b->s.query_flags&BIT_RD)) 208 return 1; 209 210 if((a->s.query_flags&BIT_CD) && !(b->s.query_flags&BIT_CD)) 211 return -1; 212 if(!(a->s.query_flags&BIT_CD) && (b->s.query_flags&BIT_CD)) 213 return 1; 214 215 cmp = query_info_compare(&a->s.qinfo, &b->s.qinfo); 216 if(cmp != 0) 217 return cmp; 218 return client_info_compare(a->s.client_info, b->s.client_info); 219 } 220 221 int 222 mesh_state_ref_compare(const void* ap, const void* bp) 223 { 224 struct mesh_state_ref* a = (struct mesh_state_ref*)ap; 225 struct mesh_state_ref* b = (struct mesh_state_ref*)bp; 226 return mesh_state_compare(a->s, b->s); 227 } 228 229 struct mesh_area* 230 mesh_create(struct module_stack* stack, struct module_env* env) 231 { 232 struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area)); 233 if(!mesh) { 234 log_err("mesh area alloc: out of memory"); 235 return NULL; 236 } 237 mesh->histogram = timehist_setup(); 238 mesh->qbuf_bak = sldns_buffer_new(env->cfg->msg_buffer_size); 239 if(!mesh->histogram || !mesh->qbuf_bak) { 240 free(mesh); 241 log_err("mesh area alloc: out of memory"); 242 return NULL; 243 } 244 mesh->mods = *stack; 245 mesh->env = env; 246 rbtree_init(&mesh->run, &mesh_state_compare); 247 rbtree_init(&mesh->all, &mesh_state_compare); 248 mesh->num_reply_addrs = 0; 249 mesh->num_reply_states = 0; 250 mesh->num_detached_states = 0; 251 mesh->num_forever_states = 0; 252 mesh->stats_jostled = 0; 253 mesh->stats_dropped = 0; 254 mesh->ans_expired = 0; 255 mesh->max_reply_states = env->cfg->num_queries_per_thread; 256 mesh->max_forever_states = (mesh->max_reply_states+1)/2; 257 #ifndef S_SPLINT_S 258 mesh->jostle_max.tv_sec = (time_t)(env->cfg->jostle_time / 1000); 259 mesh->jostle_max.tv_usec = (time_t)((env->cfg->jostle_time % 1000) 260 *1000); 261 #endif 262 return mesh; 263 } 264 265 /** help mesh delete delete mesh states */ 266 static void 267 mesh_delete_helper(rbnode_type* n) 268 { 269 struct mesh_state* mstate = (struct mesh_state*)n->key; 270 /* perform a full delete, not only 'cleanup' routine, 271 * because other callbacks expect a clean state in the mesh. 272 * For 're-entrant' calls */ 273 mesh_state_delete(&mstate->s); 274 /* but because these delete the items from the tree, postorder 275 * traversal and rbtree rebalancing do not work together */ 276 } 277 278 void 279 mesh_delete(struct mesh_area* mesh) 280 { 281 if(!mesh) 282 return; 283 /* free all query states */ 284 while(mesh->all.count) 285 mesh_delete_helper(mesh->all.root); 286 timehist_delete(mesh->histogram); 287 sldns_buffer_free(mesh->qbuf_bak); 288 free(mesh); 289 } 290 291 void 292 mesh_delete_all(struct mesh_area* mesh) 293 { 294 /* free all query states */ 295 while(mesh->all.count) 296 mesh_delete_helper(mesh->all.root); 297 mesh->stats_dropped += mesh->num_reply_addrs; 298 /* clear mesh area references */ 299 rbtree_init(&mesh->run, &mesh_state_compare); 300 rbtree_init(&mesh->all, &mesh_state_compare); 301 mesh->num_reply_addrs = 0; 302 mesh->num_reply_states = 0; 303 mesh->num_detached_states = 0; 304 mesh->num_forever_states = 0; 305 mesh->forever_first = NULL; 306 mesh->forever_last = NULL; 307 mesh->jostle_first = NULL; 308 mesh->jostle_last = NULL; 309 } 310 311 int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf) 312 { 313 struct mesh_state* m = mesh->jostle_first; 314 /* free space is available */ 315 if(mesh->num_reply_states < mesh->max_reply_states) 316 return 1; 317 /* try to kick out a jostle-list item */ 318 if(m && m->reply_list && m->list_select == mesh_jostle_list) { 319 /* how old is it? */ 320 struct timeval age; 321 timeval_subtract(&age, mesh->env->now_tv, 322 &m->reply_list->start_time); 323 if(timeval_smaller(&mesh->jostle_max, &age)) { 324 /* its a goner */ 325 log_nametypeclass(VERB_ALGO, "query jostled out to " 326 "make space for a new one", 327 m->s.qinfo.qname, m->s.qinfo.qtype, 328 m->s.qinfo.qclass); 329 /* backup the query */ 330 if(qbuf) sldns_buffer_copy(mesh->qbuf_bak, qbuf); 331 /* notify supers */ 332 if(m->super_set.count > 0) { 333 verbose(VERB_ALGO, "notify supers of failure"); 334 m->s.return_msg = NULL; 335 m->s.return_rcode = LDNS_RCODE_SERVFAIL; 336 mesh_walk_supers(mesh, m); 337 } 338 mesh->stats_jostled ++; 339 mesh_state_delete(&m->s); 340 /* restore the query - note that the qinfo ptr to 341 * the querybuffer is then correct again. */ 342 if(qbuf) sldns_buffer_copy(qbuf, mesh->qbuf_bak); 343 return 1; 344 } 345 } 346 /* no space for new item */ 347 return 0; 348 } 349 350 struct dns_msg* 351 mesh_serve_expired_lookup(struct module_qstate* qstate, 352 struct query_info* lookup_qinfo) 353 { 354 hashvalue_type h; 355 struct lruhash_entry* e; 356 struct dns_msg* msg; 357 struct reply_info* data; 358 struct msgreply_entry* key; 359 time_t timenow = *qstate->env->now; 360 int must_validate = (!(qstate->query_flags&BIT_CD) 361 || qstate->env->cfg->ignore_cd) && qstate->env->need_to_validate; 362 /* Lookup cache */ 363 h = query_info_hash(lookup_qinfo, qstate->query_flags); 364 e = slabhash_lookup(qstate->env->msg_cache, h, lookup_qinfo, 0); 365 if(!e) return NULL; 366 367 key = (struct msgreply_entry*)e->key; 368 data = (struct reply_info*)e->data; 369 msg = tomsg(qstate->env, &key->key, data, qstate->region, timenow, 370 qstate->env->cfg->serve_expired, qstate->env->scratch); 371 if(!msg) 372 goto bail_out; 373 374 /* Check CNAME chain (if any) 375 * This is part of tomsg above; no need to check now. */ 376 377 /* Check security status of the cached answer. 378 * tomsg above has a subset of these checks, so we are leaving 379 * these as is. 380 * In case of bogus or revalidation we don't care to reply here. */ 381 if(must_validate && (msg->rep->security == sec_status_bogus || 382 msg->rep->security == sec_status_secure_sentinel_fail)) { 383 verbose(VERB_ALGO, "Serve expired: bogus answer found in cache"); 384 goto bail_out; 385 } else if(msg->rep->security == sec_status_unchecked && must_validate) { 386 verbose(VERB_ALGO, "Serve expired: unchecked entry needs " 387 "validation"); 388 goto bail_out; /* need to validate cache entry first */ 389 } else if(msg->rep->security == sec_status_secure && 390 !reply_all_rrsets_secure(msg->rep) && must_validate) { 391 verbose(VERB_ALGO, "Serve expired: secure entry" 392 " changed status"); 393 goto bail_out; /* rrset changed, re-verify */ 394 } 395 396 lock_rw_unlock(&e->lock); 397 return msg; 398 399 bail_out: 400 lock_rw_unlock(&e->lock); 401 return NULL; 402 } 403 404 405 /** Init the serve expired data structure */ 406 static int 407 mesh_serve_expired_init(struct mesh_state* mstate, int timeout) 408 { 409 struct timeval t; 410 411 /* Create serve_expired_data if not there yet */ 412 if(!mstate->s.serve_expired_data) { 413 mstate->s.serve_expired_data = (struct serve_expired_data*) 414 regional_alloc_zero( 415 mstate->s.region, sizeof(struct serve_expired_data)); 416 if(!mstate->s.serve_expired_data) 417 return 0; 418 } 419 420 /* Don't overwrite the function if already set */ 421 mstate->s.serve_expired_data->get_cached_answer = 422 mstate->s.serve_expired_data->get_cached_answer? 423 mstate->s.serve_expired_data->get_cached_answer: 424 mesh_serve_expired_lookup; 425 426 /* In case this timer already popped, start it again */ 427 if(!mstate->s.serve_expired_data->timer) { 428 mstate->s.serve_expired_data->timer = comm_timer_create( 429 mstate->s.env->worker_base, mesh_serve_expired_callback, mstate); 430 if(!mstate->s.serve_expired_data->timer) 431 return 0; 432 #ifndef S_SPLINT_S 433 t.tv_sec = timeout/1000; 434 t.tv_usec = (timeout%1000)*1000; 435 #endif 436 comm_timer_set(mstate->s.serve_expired_data->timer, &t); 437 } 438 return 1; 439 } 440 441 void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, 442 struct respip_client_info* cinfo, uint16_t qflags, 443 struct edns_data* edns, struct comm_reply* rep, uint16_t qid) 444 { 445 struct mesh_state* s = NULL; 446 int unique = unique_mesh_state(edns->opt_list, mesh->env); 447 int was_detached = 0; 448 int was_noreply = 0; 449 int added = 0; 450 int timeout = mesh->env->cfg->serve_expired? 451 mesh->env->cfg->serve_expired_client_timeout:0; 452 struct sldns_buffer* r_buffer = rep->c->buffer; 453 if(rep->c->tcp_req_info) { 454 r_buffer = rep->c->tcp_req_info->spool_buffer; 455 } 456 if(!unique) 457 s = mesh_area_find(mesh, cinfo, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 458 /* does this create a new reply state? */ 459 if(!s || s->list_select == mesh_no_list) { 460 if(!mesh_make_new_space(mesh, rep->c->buffer)) { 461 verbose(VERB_ALGO, "Too many queries. dropping " 462 "incoming query."); 463 comm_point_drop_reply(rep); 464 mesh->stats_dropped++; 465 return; 466 } 467 /* for this new reply state, the reply address is free, 468 * so the limit of reply addresses does not stop reply states*/ 469 } else { 470 /* protect our memory usage from storing reply addresses */ 471 if(mesh->num_reply_addrs > mesh->max_reply_states*16) { 472 verbose(VERB_ALGO, "Too many requests queued. " 473 "dropping incoming query."); 474 comm_point_drop_reply(rep); 475 mesh->stats_dropped++; 476 return; 477 } 478 } 479 /* see if it already exists, if not, create one */ 480 if(!s) { 481 #ifdef UNBOUND_DEBUG 482 struct rbnode_type* n; 483 #endif 484 s = mesh_state_create(mesh->env, qinfo, cinfo, 485 qflags&(BIT_RD|BIT_CD), 0, 0); 486 if(!s) { 487 log_err("mesh_state_create: out of memory; SERVFAIL"); 488 if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, NULL, NULL, 489 LDNS_RCODE_SERVFAIL, edns, rep, mesh->env->scratch)) 490 edns->opt_list = NULL; 491 error_encode(r_buffer, LDNS_RCODE_SERVFAIL, 492 qinfo, qid, qflags, edns); 493 comm_point_send_reply(rep); 494 return; 495 } 496 if(unique) 497 mesh_state_make_unique(s); 498 /* copy the edns options we got from the front */ 499 if(edns->opt_list) { 500 s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list, 501 s->s.region); 502 if(!s->s.edns_opts_front_in) { 503 log_err("mesh_state_create: out of memory; SERVFAIL"); 504 if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, NULL, 505 NULL, LDNS_RCODE_SERVFAIL, edns, rep, mesh->env->scratch)) 506 edns->opt_list = NULL; 507 error_encode(r_buffer, LDNS_RCODE_SERVFAIL, 508 qinfo, qid, qflags, edns); 509 comm_point_send_reply(rep); 510 return; 511 } 512 } 513 514 #ifdef UNBOUND_DEBUG 515 n = 516 #else 517 (void) 518 #endif 519 rbtree_insert(&mesh->all, &s->node); 520 log_assert(n != NULL); 521 /* set detached (it is now) */ 522 mesh->num_detached_states++; 523 added = 1; 524 } 525 if(!s->reply_list && !s->cb_list) { 526 was_noreply = 1; 527 if(s->super_set.count == 0) { 528 was_detached = 1; 529 } 530 } 531 /* add reply to s */ 532 if(!mesh_state_add_reply(s, edns, rep, qid, qflags, qinfo)) { 533 log_err("mesh_new_client: out of memory; SERVFAIL"); 534 goto servfail_mem; 535 } 536 if(rep->c->tcp_req_info) { 537 if(!tcp_req_info_add_meshstate(rep->c->tcp_req_info, mesh, s)) { 538 log_err("mesh_new_client: out of memory add tcpreqinfo"); 539 goto servfail_mem; 540 } 541 } 542 /* add serve expired timer if required and not already there */ 543 if(timeout && !mesh_serve_expired_init(s, timeout)) { 544 log_err("mesh_new_client: out of memory initializing serve expired"); 545 goto servfail_mem; 546 } 547 /* update statistics */ 548 if(was_detached) { 549 log_assert(mesh->num_detached_states > 0); 550 mesh->num_detached_states--; 551 } 552 if(was_noreply) { 553 mesh->num_reply_states ++; 554 } 555 mesh->num_reply_addrs++; 556 if(s->list_select == mesh_no_list) { 557 /* move to either the forever or the jostle_list */ 558 if(mesh->num_forever_states < mesh->max_forever_states) { 559 mesh->num_forever_states ++; 560 mesh_list_insert(s, &mesh->forever_first, 561 &mesh->forever_last); 562 s->list_select = mesh_forever_list; 563 } else { 564 mesh_list_insert(s, &mesh->jostle_first, 565 &mesh->jostle_last); 566 s->list_select = mesh_jostle_list; 567 } 568 } 569 if(added) 570 mesh_run(mesh, s, module_event_new, NULL); 571 return; 572 573 servfail_mem: 574 if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, &s->s, 575 NULL, LDNS_RCODE_SERVFAIL, edns, rep, mesh->env->scratch)) 576 edns->opt_list = NULL; 577 error_encode(r_buffer, LDNS_RCODE_SERVFAIL, 578 qinfo, qid, qflags, edns); 579 comm_point_send_reply(rep); 580 if(added) 581 mesh_state_delete(&s->s); 582 return; 583 } 584 585 int 586 mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, 587 uint16_t qflags, struct edns_data* edns, sldns_buffer* buf, 588 uint16_t qid, mesh_cb_func_type cb, void* cb_arg) 589 { 590 struct mesh_state* s = NULL; 591 int unique = unique_mesh_state(edns->opt_list, mesh->env); 592 int timeout = mesh->env->cfg->serve_expired? 593 mesh->env->cfg->serve_expired_client_timeout:0; 594 int was_detached = 0; 595 int was_noreply = 0; 596 int added = 0; 597 if(!unique) 598 s = mesh_area_find(mesh, NULL, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 599 600 /* there are no limits on the number of callbacks */ 601 602 /* see if it already exists, if not, create one */ 603 if(!s) { 604 #ifdef UNBOUND_DEBUG 605 struct rbnode_type* n; 606 #endif 607 s = mesh_state_create(mesh->env, qinfo, NULL, 608 qflags&(BIT_RD|BIT_CD), 0, 0); 609 if(!s) { 610 return 0; 611 } 612 if(unique) 613 mesh_state_make_unique(s); 614 if(edns->opt_list) { 615 s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list, 616 s->s.region); 617 if(!s->s.edns_opts_front_in) { 618 return 0; 619 } 620 } 621 #ifdef UNBOUND_DEBUG 622 n = 623 #else 624 (void) 625 #endif 626 rbtree_insert(&mesh->all, &s->node); 627 log_assert(n != NULL); 628 /* set detached (it is now) */ 629 mesh->num_detached_states++; 630 added = 1; 631 } 632 if(!s->reply_list && !s->cb_list) { 633 was_noreply = 1; 634 if(s->super_set.count == 0) { 635 was_detached = 1; 636 } 637 } 638 /* add reply to s */ 639 if(!mesh_state_add_cb(s, edns, buf, cb, cb_arg, qid, qflags)) { 640 if(added) 641 mesh_state_delete(&s->s); 642 return 0; 643 } 644 /* add serve expired timer if not already there */ 645 if(timeout && !mesh_serve_expired_init(s, timeout)) { 646 return 0; 647 } 648 /* update statistics */ 649 if(was_detached) { 650 log_assert(mesh->num_detached_states > 0); 651 mesh->num_detached_states--; 652 } 653 if(was_noreply) { 654 mesh->num_reply_states ++; 655 } 656 mesh->num_reply_addrs++; 657 if(added) 658 mesh_run(mesh, s, module_event_new, NULL); 659 return 1; 660 } 661 662 /* Internal backend routine of mesh_new_prefetch(). It takes one additional 663 * parameter, 'run', which controls whether to run the prefetch state 664 * immediately. When this function is called internally 'run' could be 665 * 0 (false), in which case the new state is only made runnable so it 666 * will not be run recursively on top of the current state. */ 667 static void mesh_schedule_prefetch(struct mesh_area* mesh, 668 struct query_info* qinfo, uint16_t qflags, time_t leeway, int run) 669 { 670 struct mesh_state* s = mesh_area_find(mesh, NULL, qinfo, 671 qflags&(BIT_RD|BIT_CD), 0, 0); 672 #ifdef UNBOUND_DEBUG 673 struct rbnode_type* n; 674 #endif 675 /* already exists, and for a different purpose perhaps. 676 * if mesh_no_list, keep it that way. */ 677 if(s) { 678 /* make it ignore the cache from now on */ 679 if(!s->s.blacklist) 680 sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region); 681 if(s->s.prefetch_leeway < leeway) 682 s->s.prefetch_leeway = leeway; 683 return; 684 } 685 if(!mesh_make_new_space(mesh, NULL)) { 686 verbose(VERB_ALGO, "Too many queries. dropped prefetch."); 687 mesh->stats_dropped ++; 688 return; 689 } 690 691 s = mesh_state_create(mesh->env, qinfo, NULL, 692 qflags&(BIT_RD|BIT_CD), 0, 0); 693 if(!s) { 694 log_err("prefetch mesh_state_create: out of memory"); 695 return; 696 } 697 #ifdef UNBOUND_DEBUG 698 n = 699 #else 700 (void) 701 #endif 702 rbtree_insert(&mesh->all, &s->node); 703 log_assert(n != NULL); 704 /* set detached (it is now) */ 705 mesh->num_detached_states++; 706 /* make it ignore the cache */ 707 sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region); 708 s->s.prefetch_leeway = leeway; 709 710 if(s->list_select == mesh_no_list) { 711 /* move to either the forever or the jostle_list */ 712 if(mesh->num_forever_states < mesh->max_forever_states) { 713 mesh->num_forever_states ++; 714 mesh_list_insert(s, &mesh->forever_first, 715 &mesh->forever_last); 716 s->list_select = mesh_forever_list; 717 } else { 718 mesh_list_insert(s, &mesh->jostle_first, 719 &mesh->jostle_last); 720 s->list_select = mesh_jostle_list; 721 } 722 } 723 724 if(!run) { 725 #ifdef UNBOUND_DEBUG 726 n = 727 #else 728 (void) 729 #endif 730 rbtree_insert(&mesh->run, &s->run_node); 731 log_assert(n != NULL); 732 return; 733 } 734 735 mesh_run(mesh, s, module_event_new, NULL); 736 } 737 738 void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo, 739 uint16_t qflags, time_t leeway) 740 { 741 mesh_schedule_prefetch(mesh, qinfo, qflags, leeway, 1); 742 } 743 744 void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e, 745 struct comm_reply* reply, int what) 746 { 747 enum module_ev event = module_event_reply; 748 e->qstate->reply = reply; 749 if(what != NETEVENT_NOERROR) { 750 event = module_event_noreply; 751 if(what == NETEVENT_CAPSFAIL) 752 event = module_event_capsfail; 753 } 754 mesh_run(mesh, e->qstate->mesh_info, event, e); 755 } 756 757 struct mesh_state* 758 mesh_state_create(struct module_env* env, struct query_info* qinfo, 759 struct respip_client_info* cinfo, uint16_t qflags, int prime, 760 int valrec) 761 { 762 struct regional* region = alloc_reg_obtain(env->alloc); 763 struct mesh_state* mstate; 764 int i; 765 if(!region) 766 return NULL; 767 mstate = (struct mesh_state*)regional_alloc(region, 768 sizeof(struct mesh_state)); 769 if(!mstate) { 770 alloc_reg_release(env->alloc, region); 771 return NULL; 772 } 773 memset(mstate, 0, sizeof(*mstate)); 774 mstate->node = *RBTREE_NULL; 775 mstate->run_node = *RBTREE_NULL; 776 mstate->node.key = mstate; 777 mstate->run_node.key = mstate; 778 mstate->reply_list = NULL; 779 mstate->list_select = mesh_no_list; 780 mstate->replies_sent = 0; 781 rbtree_init(&mstate->super_set, &mesh_state_ref_compare); 782 rbtree_init(&mstate->sub_set, &mesh_state_ref_compare); 783 mstate->num_activated = 0; 784 mstate->unique = NULL; 785 /* init module qstate */ 786 mstate->s.qinfo.qtype = qinfo->qtype; 787 mstate->s.qinfo.qclass = qinfo->qclass; 788 mstate->s.qinfo.local_alias = NULL; 789 mstate->s.qinfo.qname_len = qinfo->qname_len; 790 mstate->s.qinfo.qname = regional_alloc_init(region, qinfo->qname, 791 qinfo->qname_len); 792 if(!mstate->s.qinfo.qname) { 793 alloc_reg_release(env->alloc, region); 794 return NULL; 795 } 796 if(cinfo) { 797 mstate->s.client_info = regional_alloc_init(region, cinfo, 798 sizeof(*cinfo)); 799 if(!mstate->s.client_info) { 800 alloc_reg_release(env->alloc, region); 801 return NULL; 802 } 803 } 804 /* remove all weird bits from qflags */ 805 mstate->s.query_flags = (qflags & (BIT_RD|BIT_CD)); 806 mstate->s.is_priming = prime; 807 mstate->s.is_valrec = valrec; 808 mstate->s.reply = NULL; 809 mstate->s.region = region; 810 mstate->s.curmod = 0; 811 mstate->s.return_msg = 0; 812 mstate->s.return_rcode = LDNS_RCODE_NOERROR; 813 mstate->s.env = env; 814 mstate->s.mesh_info = mstate; 815 mstate->s.prefetch_leeway = 0; 816 mstate->s.serve_expired_data = NULL; 817 mstate->s.no_cache_lookup = 0; 818 mstate->s.no_cache_store = 0; 819 mstate->s.need_refetch = 0; 820 mstate->s.was_ratelimited = 0; 821 822 /* init modules */ 823 for(i=0; i<env->mesh->mods.num; i++) { 824 mstate->s.minfo[i] = NULL; 825 mstate->s.ext_state[i] = module_state_initial; 826 } 827 /* init edns option lists */ 828 mstate->s.edns_opts_front_in = NULL; 829 mstate->s.edns_opts_back_out = NULL; 830 mstate->s.edns_opts_back_in = NULL; 831 mstate->s.edns_opts_front_out = NULL; 832 833 return mstate; 834 } 835 836 int 837 mesh_state_is_unique(struct mesh_state* mstate) 838 { 839 return mstate->unique != NULL; 840 } 841 842 void 843 mesh_state_make_unique(struct mesh_state* mstate) 844 { 845 mstate->unique = mstate; 846 } 847 848 void 849 mesh_state_cleanup(struct mesh_state* mstate) 850 { 851 struct mesh_area* mesh; 852 int i; 853 if(!mstate) 854 return; 855 mesh = mstate->s.env->mesh; 856 /* Stop and delete the serve expired timer */ 857 if(mstate->s.serve_expired_data && mstate->s.serve_expired_data->timer) { 858 comm_timer_delete(mstate->s.serve_expired_data->timer); 859 mstate->s.serve_expired_data->timer = NULL; 860 } 861 /* drop unsent replies */ 862 if(!mstate->replies_sent) { 863 struct mesh_reply* rep = mstate->reply_list; 864 struct mesh_cb* cb; 865 /* in tcp_req_info, the mstates linked are removed, but 866 * the reply_list is now NULL, so the remove-from-empty-list 867 * takes no time and also it does not do the mesh accounting */ 868 mstate->reply_list = NULL; 869 for(; rep; rep=rep->next) { 870 comm_point_drop_reply(&rep->query_reply); 871 log_assert(mesh->num_reply_addrs > 0); 872 mesh->num_reply_addrs--; 873 } 874 while((cb = mstate->cb_list)!=NULL) { 875 mstate->cb_list = cb->next; 876 fptr_ok(fptr_whitelist_mesh_cb(cb->cb)); 877 (*cb->cb)(cb->cb_arg, LDNS_RCODE_SERVFAIL, NULL, 878 sec_status_unchecked, NULL, 0); 879 log_assert(mesh->num_reply_addrs > 0); 880 mesh->num_reply_addrs--; 881 } 882 } 883 884 /* de-init modules */ 885 for(i=0; i<mesh->mods.num; i++) { 886 fptr_ok(fptr_whitelist_mod_clear(mesh->mods.mod[i]->clear)); 887 (*mesh->mods.mod[i]->clear)(&mstate->s, i); 888 mstate->s.minfo[i] = NULL; 889 mstate->s.ext_state[i] = module_finished; 890 } 891 alloc_reg_release(mstate->s.env->alloc, mstate->s.region); 892 } 893 894 void 895 mesh_state_delete(struct module_qstate* qstate) 896 { 897 struct mesh_area* mesh; 898 struct mesh_state_ref* super, ref; 899 struct mesh_state* mstate; 900 if(!qstate) 901 return; 902 mstate = qstate->mesh_info; 903 mesh = mstate->s.env->mesh; 904 mesh_detach_subs(&mstate->s); 905 if(mstate->list_select == mesh_forever_list) { 906 mesh->num_forever_states --; 907 mesh_list_remove(mstate, &mesh->forever_first, 908 &mesh->forever_last); 909 } else if(mstate->list_select == mesh_jostle_list) { 910 mesh_list_remove(mstate, &mesh->jostle_first, 911 &mesh->jostle_last); 912 } 913 if(!mstate->reply_list && !mstate->cb_list 914 && mstate->super_set.count == 0) { 915 log_assert(mesh->num_detached_states > 0); 916 mesh->num_detached_states--; 917 } 918 if(mstate->reply_list || mstate->cb_list) { 919 log_assert(mesh->num_reply_states > 0); 920 mesh->num_reply_states--; 921 } 922 ref.node.key = &ref; 923 ref.s = mstate; 924 RBTREE_FOR(super, struct mesh_state_ref*, &mstate->super_set) { 925 (void)rbtree_delete(&super->s->sub_set, &ref); 926 } 927 (void)rbtree_delete(&mesh->run, mstate); 928 (void)rbtree_delete(&mesh->all, mstate); 929 mesh_state_cleanup(mstate); 930 } 931 932 /** helper recursive rbtree find routine */ 933 static int 934 find_in_subsub(struct mesh_state* m, struct mesh_state* tofind, size_t *c) 935 { 936 struct mesh_state_ref* r; 937 if((*c)++ > MESH_MAX_SUBSUB) 938 return 1; 939 RBTREE_FOR(r, struct mesh_state_ref*, &m->sub_set) { 940 if(r->s == tofind || find_in_subsub(r->s, tofind, c)) 941 return 1; 942 } 943 return 0; 944 } 945 946 /** find cycle for already looked up mesh_state */ 947 static int 948 mesh_detect_cycle_found(struct module_qstate* qstate, struct mesh_state* dep_m) 949 { 950 struct mesh_state* cyc_m = qstate->mesh_info; 951 size_t counter = 0; 952 if(!dep_m) 953 return 0; 954 if(dep_m == cyc_m || find_in_subsub(dep_m, cyc_m, &counter)) { 955 if(counter > MESH_MAX_SUBSUB) 956 return 2; 957 return 1; 958 } 959 return 0; 960 } 961 962 void mesh_detach_subs(struct module_qstate* qstate) 963 { 964 struct mesh_area* mesh = qstate->env->mesh; 965 struct mesh_state_ref* ref, lookup; 966 #ifdef UNBOUND_DEBUG 967 struct rbnode_type* n; 968 #endif 969 lookup.node.key = &lookup; 970 lookup.s = qstate->mesh_info; 971 RBTREE_FOR(ref, struct mesh_state_ref*, &qstate->mesh_info->sub_set) { 972 #ifdef UNBOUND_DEBUG 973 n = 974 #else 975 (void) 976 #endif 977 rbtree_delete(&ref->s->super_set, &lookup); 978 log_assert(n != NULL); /* must have been present */ 979 if(!ref->s->reply_list && !ref->s->cb_list 980 && ref->s->super_set.count == 0) { 981 mesh->num_detached_states++; 982 log_assert(mesh->num_detached_states + 983 mesh->num_reply_states <= mesh->all.count); 984 } 985 } 986 rbtree_init(&qstate->mesh_info->sub_set, &mesh_state_ref_compare); 987 } 988 989 int mesh_add_sub(struct module_qstate* qstate, struct query_info* qinfo, 990 uint16_t qflags, int prime, int valrec, struct module_qstate** newq, 991 struct mesh_state** sub) 992 { 993 /* find it, if not, create it */ 994 struct mesh_area* mesh = qstate->env->mesh; 995 *sub = mesh_area_find(mesh, NULL, qinfo, qflags, 996 prime, valrec); 997 if(mesh_detect_cycle_found(qstate, *sub)) { 998 verbose(VERB_ALGO, "attach failed, cycle detected"); 999 return 0; 1000 } 1001 if(!*sub) { 1002 #ifdef UNBOUND_DEBUG 1003 struct rbnode_type* n; 1004 #endif 1005 /* create a new one */ 1006 *sub = mesh_state_create(qstate->env, qinfo, NULL, qflags, prime, 1007 valrec); 1008 if(!*sub) { 1009 log_err("mesh_attach_sub: out of memory"); 1010 return 0; 1011 } 1012 #ifdef UNBOUND_DEBUG 1013 n = 1014 #else 1015 (void) 1016 #endif 1017 rbtree_insert(&mesh->all, &(*sub)->node); 1018 log_assert(n != NULL); 1019 /* set detached (it is now) */ 1020 mesh->num_detached_states++; 1021 /* set new query state to run */ 1022 #ifdef UNBOUND_DEBUG 1023 n = 1024 #else 1025 (void) 1026 #endif 1027 rbtree_insert(&mesh->run, &(*sub)->run_node); 1028 log_assert(n != NULL); 1029 *newq = &(*sub)->s; 1030 } else 1031 *newq = NULL; 1032 return 1; 1033 } 1034 1035 int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo, 1036 uint16_t qflags, int prime, int valrec, struct module_qstate** newq) 1037 { 1038 struct mesh_area* mesh = qstate->env->mesh; 1039 struct mesh_state* sub = NULL; 1040 int was_detached; 1041 if(!mesh_add_sub(qstate, qinfo, qflags, prime, valrec, newq, &sub)) 1042 return 0; 1043 was_detached = (sub->super_set.count == 0); 1044 if(!mesh_state_attachment(qstate->mesh_info, sub)) 1045 return 0; 1046 /* if it was a duplicate attachment, the count was not zero before */ 1047 if(!sub->reply_list && !sub->cb_list && was_detached && 1048 sub->super_set.count == 1) { 1049 /* it used to be detached, before this one got added */ 1050 log_assert(mesh->num_detached_states > 0); 1051 mesh->num_detached_states--; 1052 } 1053 /* *newq will be run when inited after the current module stops */ 1054 return 1; 1055 } 1056 1057 int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub) 1058 { 1059 #ifdef UNBOUND_DEBUG 1060 struct rbnode_type* n; 1061 #endif 1062 struct mesh_state_ref* subref; /* points to sub, inserted in super */ 1063 struct mesh_state_ref* superref; /* points to super, inserted in sub */ 1064 if( !(subref = regional_alloc(super->s.region, 1065 sizeof(struct mesh_state_ref))) || 1066 !(superref = regional_alloc(sub->s.region, 1067 sizeof(struct mesh_state_ref))) ) { 1068 log_err("mesh_state_attachment: out of memory"); 1069 return 0; 1070 } 1071 superref->node.key = superref; 1072 superref->s = super; 1073 subref->node.key = subref; 1074 subref->s = sub; 1075 if(!rbtree_insert(&sub->super_set, &superref->node)) { 1076 /* this should not happen, iterator and validator do not 1077 * attach subqueries that are identical. */ 1078 /* already attached, we are done, nothing todo. 1079 * since superref and subref already allocated in region, 1080 * we cannot free them */ 1081 return 1; 1082 } 1083 #ifdef UNBOUND_DEBUG 1084 n = 1085 #else 1086 (void) 1087 #endif 1088 rbtree_insert(&super->sub_set, &subref->node); 1089 log_assert(n != NULL); /* we checked above if statement, the reverse 1090 administration should not fail now, unless they are out of sync */ 1091 return 1; 1092 } 1093 1094 /** 1095 * callback results to mesh cb entry 1096 * @param m: mesh state to send it for. 1097 * @param rcode: if not 0, error code. 1098 * @param rep: reply to send (or NULL if rcode is set). 1099 * @param r: callback entry 1100 */ 1101 static void 1102 mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, 1103 struct mesh_cb* r) 1104 { 1105 int secure; 1106 char* reason = NULL; 1107 int was_ratelimited = m->s.was_ratelimited; 1108 /* bogus messages are not made into servfail, sec_status passed 1109 * to the callback function */ 1110 if(rep && rep->security == sec_status_secure) 1111 secure = 1; 1112 else secure = 0; 1113 if(!rep && rcode == LDNS_RCODE_NOERROR) 1114 rcode = LDNS_RCODE_SERVFAIL; 1115 if(!rcode && (rep->security == sec_status_bogus || 1116 rep->security == sec_status_secure_sentinel_fail)) { 1117 if(!(reason = errinf_to_str_bogus(&m->s))) 1118 rcode = LDNS_RCODE_SERVFAIL; 1119 } 1120 /* send the reply */ 1121 if(rcode) { 1122 if(rcode == LDNS_RCODE_SERVFAIL) { 1123 if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s, 1124 rep, rcode, &r->edns, NULL, m->s.region)) 1125 r->edns.opt_list = NULL; 1126 } else { 1127 if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode, 1128 &r->edns, NULL, m->s.region)) 1129 r->edns.opt_list = NULL; 1130 } 1131 fptr_ok(fptr_whitelist_mesh_cb(r->cb)); 1132 (*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL, 1133 was_ratelimited); 1134 } else { 1135 size_t udp_size = r->edns.udp_size; 1136 sldns_buffer_clear(r->buf); 1137 r->edns.edns_version = EDNS_ADVERTISED_VERSION; 1138 r->edns.udp_size = EDNS_ADVERTISED_SIZE; 1139 r->edns.ext_rcode = 0; 1140 r->edns.bits &= EDNS_DO; 1141 1142 if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, 1143 LDNS_RCODE_NOERROR, &r->edns, NULL, m->s.region) || 1144 !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 1145 r->qflags, r->buf, 0, 1, 1146 m->s.env->scratch, udp_size, &r->edns, 1147 (int)(r->edns.bits & EDNS_DO), secure)) 1148 { 1149 fptr_ok(fptr_whitelist_mesh_cb(r->cb)); 1150 (*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf, 1151 sec_status_unchecked, NULL, 0); 1152 } else { 1153 fptr_ok(fptr_whitelist_mesh_cb(r->cb)); 1154 (*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf, 1155 rep->security, reason, was_ratelimited); 1156 } 1157 } 1158 free(reason); 1159 log_assert(m->s.env->mesh->num_reply_addrs > 0); 1160 m->s.env->mesh->num_reply_addrs--; 1161 } 1162 1163 /** 1164 * Send reply to mesh reply entry 1165 * @param m: mesh state to send it for. 1166 * @param rcode: if not 0, error code. 1167 * @param rep: reply to send (or NULL if rcode is set). 1168 * @param r: reply entry 1169 * @param r_buffer: buffer to use for reply entry. 1170 * @param prev: previous reply, already has its answer encoded in buffer. 1171 * @param prev_buffer: buffer for previous reply. 1172 */ 1173 static void 1174 mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, 1175 struct mesh_reply* r, struct sldns_buffer* r_buffer, 1176 struct mesh_reply* prev, struct sldns_buffer* prev_buffer) 1177 { 1178 struct timeval end_time; 1179 struct timeval duration; 1180 int secure; 1181 /* Copy the client's EDNS for later restore, to make sure the edns 1182 * compare is with the correct edns options. */ 1183 struct edns_data edns_bak = r->edns; 1184 /* examine security status */ 1185 if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) || 1186 m->s.env->cfg->ignore_cd) && rep && 1187 (rep->security <= sec_status_bogus || 1188 rep->security == sec_status_secure_sentinel_fail)) { 1189 rcode = LDNS_RCODE_SERVFAIL; 1190 if(m->s.env->cfg->stat_extended) 1191 m->s.env->mesh->ans_bogus++; 1192 } 1193 if(rep && rep->security == sec_status_secure) 1194 secure = 1; 1195 else secure = 0; 1196 if(!rep && rcode == LDNS_RCODE_NOERROR) 1197 rcode = LDNS_RCODE_SERVFAIL; 1198 /* send the reply */ 1199 /* We don't reuse the encoded answer if either the previous or current 1200 * response has a local alias. We could compare the alias records 1201 * and still reuse the previous answer if they are the same, but that 1202 * would be complicated and error prone for the relatively minor case. 1203 * So we err on the side of safety. */ 1204 if(prev && prev_buffer && prev->qflags == r->qflags && 1205 !prev->local_alias && !r->local_alias && 1206 prev->edns.edns_present == r->edns.edns_present && 1207 prev->edns.bits == r->edns.bits && 1208 prev->edns.udp_size == r->edns.udp_size && 1209 edns_opt_list_compare(prev->edns.opt_list, r->edns.opt_list) 1210 == 0) { 1211 /* if the previous reply is identical to this one, fix ID */ 1212 if(prev_buffer != r_buffer) 1213 sldns_buffer_copy(r_buffer, prev_buffer); 1214 sldns_buffer_write_at(r_buffer, 0, &r->qid, sizeof(uint16_t)); 1215 sldns_buffer_write_at(r_buffer, 12, r->qname, 1216 m->s.qinfo.qname_len); 1217 comm_point_send_reply(&r->query_reply); 1218 } else if(rcode) { 1219 m->s.qinfo.qname = r->qname; 1220 m->s.qinfo.local_alias = r->local_alias; 1221 if(rcode == LDNS_RCODE_SERVFAIL) { 1222 if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s, 1223 rep, rcode, &r->edns, NULL, m->s.region)) 1224 r->edns.opt_list = NULL; 1225 } else { 1226 if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode, 1227 &r->edns, NULL, m->s.region)) 1228 r->edns.opt_list = NULL; 1229 } 1230 error_encode(r_buffer, rcode, &m->s.qinfo, r->qid, 1231 r->qflags, &r->edns); 1232 comm_point_send_reply(&r->query_reply); 1233 } else { 1234 size_t udp_size = r->edns.udp_size; 1235 r->edns.edns_version = EDNS_ADVERTISED_VERSION; 1236 r->edns.udp_size = EDNS_ADVERTISED_SIZE; 1237 r->edns.ext_rcode = 0; 1238 r->edns.bits &= EDNS_DO; 1239 m->s.qinfo.qname = r->qname; 1240 m->s.qinfo.local_alias = r->local_alias; 1241 if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, 1242 LDNS_RCODE_NOERROR, &r->edns, NULL, m->s.region) || 1243 !apply_edns_options(&r->edns, &edns_bak, 1244 m->s.env->cfg, r->query_reply.c, 1245 m->s.region) || 1246 !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 1247 r->qflags, r_buffer, 0, 1, m->s.env->scratch, 1248 udp_size, &r->edns, (int)(r->edns.bits & EDNS_DO), 1249 secure)) 1250 { 1251 if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s, 1252 rep, LDNS_RCODE_SERVFAIL, &r->edns, NULL, m->s.region)) 1253 r->edns.opt_list = NULL; 1254 error_encode(r_buffer, LDNS_RCODE_SERVFAIL, 1255 &m->s.qinfo, r->qid, r->qflags, &r->edns); 1256 } 1257 r->edns = edns_bak; 1258 comm_point_send_reply(&r->query_reply); 1259 } 1260 /* account */ 1261 log_assert(m->s.env->mesh->num_reply_addrs > 0); 1262 m->s.env->mesh->num_reply_addrs--; 1263 end_time = *m->s.env->now_tv; 1264 timeval_subtract(&duration, &end_time, &r->start_time); 1265 verbose(VERB_ALGO, "query took " ARG_LL "d.%6.6d sec", 1266 (long long)duration.tv_sec, (int)duration.tv_usec); 1267 m->s.env->mesh->replies_sent++; 1268 timeval_add(&m->s.env->mesh->replies_sum_wait, &duration); 1269 timehist_insert(m->s.env->mesh->histogram, &duration); 1270 if(m->s.env->cfg->stat_extended) { 1271 uint16_t rc = FLAGS_GET_RCODE(sldns_buffer_read_u16_at( 1272 r_buffer, 2)); 1273 if(secure) m->s.env->mesh->ans_secure++; 1274 m->s.env->mesh->ans_rcode[ rc ] ++; 1275 if(rc == 0 && LDNS_ANCOUNT(sldns_buffer_begin(r_buffer)) == 0) 1276 m->s.env->mesh->ans_nodata++; 1277 } 1278 /* Log reply sent */ 1279 if(m->s.env->cfg->log_replies) { 1280 log_reply_info(NO_VERBOSE, &m->s.qinfo, &r->query_reply.addr, 1281 r->query_reply.addrlen, duration, 0, r_buffer); 1282 } 1283 } 1284 1285 void mesh_query_done(struct mesh_state* mstate) 1286 { 1287 struct mesh_reply* r, *reply_list = NULL; 1288 struct mesh_reply* prev = NULL; 1289 struct sldns_buffer* prev_buffer = NULL; 1290 struct mesh_cb* c; 1291 struct reply_info* rep = (mstate->s.return_msg? 1292 mstate->s.return_msg->rep:NULL); 1293 /* No need for the serve expired timer anymore; we are going to reply. */ 1294 if(mstate->s.serve_expired_data) { 1295 comm_timer_delete(mstate->s.serve_expired_data->timer); 1296 mstate->s.serve_expired_data->timer = NULL; 1297 } 1298 if(mstate->s.return_rcode == LDNS_RCODE_SERVFAIL || 1299 (rep && FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_SERVFAIL)) { 1300 /* we are SERVFAILing; check for expired asnwer here */ 1301 mesh_serve_expired_callback(mstate); 1302 if((mstate->reply_list || mstate->cb_list) 1303 && mstate->s.env->cfg->log_servfail 1304 && !mstate->s.env->cfg->val_log_squelch) { 1305 char* err = errinf_to_str_servfail(&mstate->s); 1306 if(err) 1307 log_err("%s", err); 1308 free(err); 1309 } 1310 } 1311 if(mstate->reply_list) { 1312 /* set the reply_list to NULL during the mesh_query_done 1313 * processing, so that calls back into the mesh from 1314 * tcp_req_info (deciding to drop the reply and thus 1315 * unregister the mesh_reply from the mstate) are stopped 1316 * because the list is empty. 1317 * The mstate is then likely not a reply_state, and maybe 1318 * also a detached_state. 1319 */ 1320 reply_list = mstate->reply_list; 1321 mstate->reply_list = NULL; 1322 if(!mstate->reply_list && !mstate->cb_list) { 1323 /* was a reply state, not anymore */ 1324 log_assert(mstate->s.env->mesh->num_reply_states > 0); 1325 mstate->s.env->mesh->num_reply_states--; 1326 } 1327 if(!mstate->reply_list && !mstate->cb_list && 1328 mstate->super_set.count == 0) 1329 mstate->s.env->mesh->num_detached_states++; 1330 } 1331 for(r = reply_list; r; r = r->next) { 1332 /* if a response-ip address block has been stored the 1333 * information should be logged for each client. */ 1334 if(mstate->s.respip_action_info && 1335 mstate->s.respip_action_info->addrinfo) { 1336 respip_inform_print(mstate->s.respip_action_info, 1337 r->qname, mstate->s.qinfo.qtype, 1338 mstate->s.qinfo.qclass, r->local_alias, 1339 &r->query_reply); 1340 if(mstate->s.env->cfg->stat_extended && 1341 mstate->s.respip_action_info->rpz_used) { 1342 if(mstate->s.respip_action_info->rpz_disabled) 1343 mstate->s.env->mesh->rpz_action[RPZ_DISABLED_ACTION]++; 1344 if(mstate->s.respip_action_info->rpz_cname_override) 1345 mstate->s.env->mesh->rpz_action[RPZ_CNAME_OVERRIDE_ACTION]++; 1346 else 1347 mstate->s.env->mesh->rpz_action[respip_action_to_rpz_action( 1348 mstate->s.respip_action_info->action)]++; 1349 } 1350 } 1351 1352 /* if this query is determined to be dropped during the 1353 * mesh processing, this is the point to take that action. */ 1354 if(mstate->s.is_drop) { 1355 comm_point_drop_reply(&r->query_reply); 1356 } else { 1357 struct sldns_buffer* r_buffer = r->query_reply.c->buffer; 1358 if(r->query_reply.c->tcp_req_info) { 1359 r_buffer = r->query_reply.c->tcp_req_info->spool_buffer; 1360 prev_buffer = NULL; 1361 } 1362 mesh_send_reply(mstate, mstate->s.return_rcode, rep, 1363 r, r_buffer, prev, prev_buffer); 1364 if(r->query_reply.c->tcp_req_info) { 1365 tcp_req_info_remove_mesh_state(r->query_reply.c->tcp_req_info, mstate); 1366 r_buffer = NULL; 1367 } 1368 prev = r; 1369 prev_buffer = r_buffer; 1370 } 1371 } 1372 mstate->replies_sent = 1; 1373 while((c = mstate->cb_list) != NULL) { 1374 /* take this cb off the list; so that the list can be 1375 * changed, eg. by adds from the callback routine */ 1376 if(!mstate->reply_list && mstate->cb_list && !c->next) { 1377 /* was a reply state, not anymore */ 1378 log_assert(mstate->s.env->mesh->num_reply_states > 0); 1379 mstate->s.env->mesh->num_reply_states--; 1380 } 1381 mstate->cb_list = c->next; 1382 if(!mstate->reply_list && !mstate->cb_list && 1383 mstate->super_set.count == 0) 1384 mstate->s.env->mesh->num_detached_states++; 1385 mesh_do_callback(mstate, mstate->s.return_rcode, rep, c); 1386 } 1387 } 1388 1389 void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate) 1390 { 1391 struct mesh_state_ref* ref; 1392 RBTREE_FOR(ref, struct mesh_state_ref*, &mstate->super_set) 1393 { 1394 /* make super runnable */ 1395 (void)rbtree_insert(&mesh->run, &ref->s->run_node); 1396 /* callback the function to inform super of result */ 1397 fptr_ok(fptr_whitelist_mod_inform_super( 1398 mesh->mods.mod[ref->s->s.curmod]->inform_super)); 1399 (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s, 1400 ref->s->s.curmod, &ref->s->s); 1401 /* copy state that is always relevant to super */ 1402 copy_state_to_super(&mstate->s, ref->s->s.curmod, &ref->s->s); 1403 } 1404 } 1405 1406 struct mesh_state* mesh_area_find(struct mesh_area* mesh, 1407 struct respip_client_info* cinfo, struct query_info* qinfo, 1408 uint16_t qflags, int prime, int valrec) 1409 { 1410 struct mesh_state key; 1411 struct mesh_state* result; 1412 1413 key.node.key = &key; 1414 key.s.is_priming = prime; 1415 key.s.is_valrec = valrec; 1416 key.s.qinfo = *qinfo; 1417 key.s.query_flags = qflags; 1418 /* We are searching for a similar mesh state when we DO want to 1419 * aggregate the state. Thus unique is set to NULL. (default when we 1420 * desire aggregation).*/ 1421 key.unique = NULL; 1422 key.s.client_info = cinfo; 1423 1424 result = (struct mesh_state*)rbtree_search(&mesh->all, &key); 1425 return result; 1426 } 1427 1428 int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns, 1429 sldns_buffer* buf, mesh_cb_func_type cb, void* cb_arg, 1430 uint16_t qid, uint16_t qflags) 1431 { 1432 struct mesh_cb* r = regional_alloc(s->s.region, 1433 sizeof(struct mesh_cb)); 1434 if(!r) 1435 return 0; 1436 r->buf = buf; 1437 log_assert(fptr_whitelist_mesh_cb(cb)); /* early failure ifmissing*/ 1438 r->cb = cb; 1439 r->cb_arg = cb_arg; 1440 r->edns = *edns; 1441 if(edns->opt_list) { 1442 r->edns.opt_list = edns_opt_copy_region(edns->opt_list, 1443 s->s.region); 1444 if(!r->edns.opt_list) 1445 return 0; 1446 } 1447 r->qid = qid; 1448 r->qflags = qflags; 1449 r->next = s->cb_list; 1450 s->cb_list = r; 1451 return 1; 1452 1453 } 1454 1455 int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns, 1456 struct comm_reply* rep, uint16_t qid, uint16_t qflags, 1457 const struct query_info* qinfo) 1458 { 1459 struct mesh_reply* r = regional_alloc(s->s.region, 1460 sizeof(struct mesh_reply)); 1461 if(!r) 1462 return 0; 1463 r->query_reply = *rep; 1464 r->edns = *edns; 1465 if(edns->opt_list) { 1466 r->edns.opt_list = edns_opt_copy_region(edns->opt_list, 1467 s->s.region); 1468 if(!r->edns.opt_list) 1469 return 0; 1470 } 1471 r->qid = qid; 1472 r->qflags = qflags; 1473 r->start_time = *s->s.env->now_tv; 1474 r->next = s->reply_list; 1475 r->qname = regional_alloc_init(s->s.region, qinfo->qname, 1476 s->s.qinfo.qname_len); 1477 if(!r->qname) 1478 return 0; 1479 1480 /* Data related to local alias stored in 'qinfo' (if any) is ephemeral 1481 * and can be different for different original queries (even if the 1482 * replaced query name is the same). So we need to make a deep copy 1483 * and store the copy for each reply info. */ 1484 if(qinfo->local_alias) { 1485 struct packed_rrset_data* d; 1486 struct packed_rrset_data* dsrc; 1487 r->local_alias = regional_alloc_zero(s->s.region, 1488 sizeof(*qinfo->local_alias)); 1489 if(!r->local_alias) 1490 return 0; 1491 r->local_alias->rrset = regional_alloc_init(s->s.region, 1492 qinfo->local_alias->rrset, 1493 sizeof(*qinfo->local_alias->rrset)); 1494 if(!r->local_alias->rrset) 1495 return 0; 1496 dsrc = qinfo->local_alias->rrset->entry.data; 1497 1498 /* In the current implementation, a local alias must be 1499 * a single CNAME RR (see worker_handle_request()). */ 1500 log_assert(!qinfo->local_alias->next && dsrc->count == 1 && 1501 qinfo->local_alias->rrset->rk.type == 1502 htons(LDNS_RR_TYPE_CNAME)); 1503 /* we should make a local copy for the owner name of 1504 * the RRset */ 1505 r->local_alias->rrset->rk.dname_len = 1506 qinfo->local_alias->rrset->rk.dname_len; 1507 r->local_alias->rrset->rk.dname = regional_alloc_init( 1508 s->s.region, qinfo->local_alias->rrset->rk.dname, 1509 qinfo->local_alias->rrset->rk.dname_len); 1510 if(!r->local_alias->rrset->rk.dname) 1511 return 0; 1512 1513 /* the rrset is not packed, like in the cache, but it is 1514 * individualy allocated with an allocator from localzone. */ 1515 d = regional_alloc_zero(s->s.region, sizeof(*d)); 1516 if(!d) 1517 return 0; 1518 r->local_alias->rrset->entry.data = d; 1519 if(!rrset_insert_rr(s->s.region, d, dsrc->rr_data[0], 1520 dsrc->rr_len[0], dsrc->rr_ttl[0], "CNAME local alias")) 1521 return 0; 1522 } else 1523 r->local_alias = NULL; 1524 1525 s->reply_list = r; 1526 return 1; 1527 } 1528 1529 /* Extract the query info and flags from 'mstate' into '*qinfop' and '*qflags'. 1530 * Since this is only used for internal refetch of otherwise-expired answer, 1531 * we simply ignore the rare failure mode when memory allocation fails. */ 1532 static void 1533 mesh_copy_qinfo(struct mesh_state* mstate, struct query_info** qinfop, 1534 uint16_t* qflags) 1535 { 1536 struct regional* region = mstate->s.env->scratch; 1537 struct query_info* qinfo; 1538 1539 qinfo = regional_alloc_init(region, &mstate->s.qinfo, sizeof(*qinfo)); 1540 if(!qinfo) 1541 return; 1542 qinfo->qname = regional_alloc_init(region, qinfo->qname, 1543 qinfo->qname_len); 1544 if(!qinfo->qname) 1545 return; 1546 *qinfop = qinfo; 1547 *qflags = mstate->s.query_flags; 1548 } 1549 1550 /** 1551 * Continue processing the mesh state at another module. 1552 * Handles module to modules transfer of control. 1553 * Handles module finished. 1554 * @param mesh: the mesh area. 1555 * @param mstate: currently active mesh state. 1556 * Deleted if finished, calls _done and _supers to 1557 * send replies to clients and inform other mesh states. 1558 * This in turn may create additional runnable mesh states. 1559 * @param s: state at which the current module exited. 1560 * @param ev: the event sent to the module. 1561 * returned is the event to send to the next module. 1562 * @return true if continue processing at the new module. 1563 * false if not continued processing is needed. 1564 */ 1565 static int 1566 mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate, 1567 enum module_ext_state s, enum module_ev* ev) 1568 { 1569 mstate->num_activated++; 1570 if(mstate->num_activated > MESH_MAX_ACTIVATION) { 1571 /* module is looping. Stop it. */ 1572 log_err("internal error: looping module (%s) stopped", 1573 mesh->mods.mod[mstate->s.curmod]->name); 1574 log_query_info(NO_VERBOSE, "pass error for qstate", 1575 &mstate->s.qinfo); 1576 s = module_error; 1577 } 1578 if(s == module_wait_module || s == module_restart_next) { 1579 /* start next module */ 1580 mstate->s.curmod++; 1581 if(mesh->mods.num == mstate->s.curmod) { 1582 log_err("Cannot pass to next module; at last module"); 1583 log_query_info(VERB_QUERY, "pass error for qstate", 1584 &mstate->s.qinfo); 1585 mstate->s.curmod--; 1586 return mesh_continue(mesh, mstate, module_error, ev); 1587 } 1588 if(s == module_restart_next) { 1589 int curmod = mstate->s.curmod; 1590 for(; mstate->s.curmod < mesh->mods.num; 1591 mstate->s.curmod++) { 1592 fptr_ok(fptr_whitelist_mod_clear( 1593 mesh->mods.mod[mstate->s.curmod]->clear)); 1594 (*mesh->mods.mod[mstate->s.curmod]->clear) 1595 (&mstate->s, mstate->s.curmod); 1596 mstate->s.minfo[mstate->s.curmod] = NULL; 1597 } 1598 mstate->s.curmod = curmod; 1599 } 1600 *ev = module_event_pass; 1601 return 1; 1602 } 1603 if(s == module_wait_subquery && mstate->sub_set.count == 0) { 1604 log_err("module cannot wait for subquery, subquery list empty"); 1605 log_query_info(VERB_QUERY, "pass error for qstate", 1606 &mstate->s.qinfo); 1607 s = module_error; 1608 } 1609 if(s == module_error && mstate->s.return_rcode == LDNS_RCODE_NOERROR) { 1610 /* error is bad, handle pass back up below */ 1611 mstate->s.return_rcode = LDNS_RCODE_SERVFAIL; 1612 } 1613 if(s == module_error) { 1614 mesh_query_done(mstate); 1615 mesh_walk_supers(mesh, mstate); 1616 mesh_state_delete(&mstate->s); 1617 return 0; 1618 } 1619 if(s == module_finished) { 1620 if(mstate->s.curmod == 0) { 1621 struct query_info* qinfo = NULL; 1622 uint16_t qflags; 1623 1624 mesh_query_done(mstate); 1625 mesh_walk_supers(mesh, mstate); 1626 1627 /* If the answer to the query needs to be refetched 1628 * from an external DNS server, we'll need to schedule 1629 * a prefetch after removing the current state, so 1630 * we need to make a copy of the query info here. */ 1631 if(mstate->s.need_refetch) 1632 mesh_copy_qinfo(mstate, &qinfo, &qflags); 1633 1634 mesh_state_delete(&mstate->s); 1635 if(qinfo) { 1636 mesh_schedule_prefetch(mesh, qinfo, qflags, 1637 0, 1); 1638 } 1639 return 0; 1640 } 1641 /* pass along the locus of control */ 1642 mstate->s.curmod --; 1643 *ev = module_event_moddone; 1644 return 1; 1645 } 1646 return 0; 1647 } 1648 1649 void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate, 1650 enum module_ev ev, struct outbound_entry* e) 1651 { 1652 enum module_ext_state s; 1653 verbose(VERB_ALGO, "mesh_run: start"); 1654 while(mstate) { 1655 /* run the module */ 1656 fptr_ok(fptr_whitelist_mod_operate( 1657 mesh->mods.mod[mstate->s.curmod]->operate)); 1658 (*mesh->mods.mod[mstate->s.curmod]->operate) 1659 (&mstate->s, ev, mstate->s.curmod, e); 1660 1661 /* examine results */ 1662 mstate->s.reply = NULL; 1663 regional_free_all(mstate->s.env->scratch); 1664 s = mstate->s.ext_state[mstate->s.curmod]; 1665 verbose(VERB_ALGO, "mesh_run: %s module exit state is %s", 1666 mesh->mods.mod[mstate->s.curmod]->name, strextstate(s)); 1667 e = NULL; 1668 if(mesh_continue(mesh, mstate, s, &ev)) 1669 continue; 1670 1671 /* run more modules */ 1672 ev = module_event_pass; 1673 if(mesh->run.count > 0) { 1674 /* pop random element off the runnable tree */ 1675 mstate = (struct mesh_state*)mesh->run.root->key; 1676 (void)rbtree_delete(&mesh->run, mstate); 1677 } else mstate = NULL; 1678 } 1679 if(verbosity >= VERB_ALGO) { 1680 mesh_stats(mesh, "mesh_run: end"); 1681 mesh_log_list(mesh); 1682 } 1683 } 1684 1685 void 1686 mesh_log_list(struct mesh_area* mesh) 1687 { 1688 char buf[30]; 1689 struct mesh_state* m; 1690 int num = 0; 1691 RBTREE_FOR(m, struct mesh_state*, &mesh->all) { 1692 snprintf(buf, sizeof(buf), "%d%s%s%s%s%s%s mod%d %s%s", 1693 num++, (m->s.is_priming)?"p":"", /* prime */ 1694 (m->s.is_valrec)?"v":"", /* prime */ 1695 (m->s.query_flags&BIT_RD)?"RD":"", 1696 (m->s.query_flags&BIT_CD)?"CD":"", 1697 (m->super_set.count==0)?"d":"", /* detached */ 1698 (m->sub_set.count!=0)?"c":"", /* children */ 1699 m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/ 1700 (m->cb_list)?"cb":"" /* callbacks */ 1701 ); 1702 log_query_info(VERB_ALGO, buf, &m->s.qinfo); 1703 } 1704 } 1705 1706 void 1707 mesh_stats(struct mesh_area* mesh, const char* str) 1708 { 1709 verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, " 1710 "%u detached), %u waiting replies, %u recursion replies " 1711 "sent, %d replies dropped, %d states jostled out", 1712 str, (unsigned)mesh->all.count, 1713 (unsigned)mesh->num_reply_states, 1714 (unsigned)mesh->num_detached_states, 1715 (unsigned)mesh->num_reply_addrs, 1716 (unsigned)mesh->replies_sent, 1717 (unsigned)mesh->stats_dropped, 1718 (unsigned)mesh->stats_jostled); 1719 if(mesh->replies_sent > 0) { 1720 struct timeval avg; 1721 timeval_divide(&avg, &mesh->replies_sum_wait, 1722 mesh->replies_sent); 1723 log_info("average recursion processing time " 1724 ARG_LL "d.%6.6d sec", 1725 (long long)avg.tv_sec, (int)avg.tv_usec); 1726 log_info("histogram of recursion processing times"); 1727 timehist_log(mesh->histogram, "recursions"); 1728 } 1729 } 1730 1731 void 1732 mesh_stats_clear(struct mesh_area* mesh) 1733 { 1734 if(!mesh) 1735 return; 1736 mesh->replies_sent = 0; 1737 mesh->replies_sum_wait.tv_sec = 0; 1738 mesh->replies_sum_wait.tv_usec = 0; 1739 mesh->stats_jostled = 0; 1740 mesh->stats_dropped = 0; 1741 timehist_clear(mesh->histogram); 1742 mesh->ans_secure = 0; 1743 mesh->ans_bogus = 0; 1744 mesh->ans_expired = 0; 1745 memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*UB_STATS_RCODE_NUM); 1746 memset(&mesh->rpz_action[0], 0, sizeof(size_t)*UB_STATS_RPZ_ACTION_NUM); 1747 mesh->ans_nodata = 0; 1748 } 1749 1750 size_t 1751 mesh_get_mem(struct mesh_area* mesh) 1752 { 1753 struct mesh_state* m; 1754 size_t s = sizeof(*mesh) + sizeof(struct timehist) + 1755 sizeof(struct th_buck)*mesh->histogram->num + 1756 sizeof(sldns_buffer) + sldns_buffer_capacity(mesh->qbuf_bak); 1757 RBTREE_FOR(m, struct mesh_state*, &mesh->all) { 1758 /* all, including m itself allocated in qstate region */ 1759 s += regional_get_mem(m->s.region); 1760 } 1761 return s; 1762 } 1763 1764 int 1765 mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo, 1766 uint16_t flags, int prime, int valrec) 1767 { 1768 struct mesh_area* mesh = qstate->env->mesh; 1769 struct mesh_state* dep_m = NULL; 1770 if(!mesh_state_is_unique(qstate->mesh_info)) 1771 dep_m = mesh_area_find(mesh, NULL, qinfo, flags, prime, valrec); 1772 return mesh_detect_cycle_found(qstate, dep_m); 1773 } 1774 1775 void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp, 1776 struct mesh_state** lp) 1777 { 1778 /* insert as last element */ 1779 m->prev = *lp; 1780 m->next = NULL; 1781 if(*lp) 1782 (*lp)->next = m; 1783 else *fp = m; 1784 *lp = m; 1785 } 1786 1787 void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp, 1788 struct mesh_state** lp) 1789 { 1790 if(m->next) 1791 m->next->prev = m->prev; 1792 else *lp = m->prev; 1793 if(m->prev) 1794 m->prev->next = m->next; 1795 else *fp = m->next; 1796 } 1797 1798 void mesh_state_remove_reply(struct mesh_area* mesh, struct mesh_state* m, 1799 struct comm_point* cp) 1800 { 1801 struct mesh_reply* n, *prev = NULL; 1802 n = m->reply_list; 1803 /* when in mesh_cleanup, it sets the reply_list to NULL, so that 1804 * there is no accounting twice */ 1805 if(!n) return; /* nothing to remove, also no accounting needed */ 1806 while(n) { 1807 if(n->query_reply.c == cp) { 1808 /* unlink it */ 1809 if(prev) prev->next = n->next; 1810 else m->reply_list = n->next; 1811 /* delete it, but allocated in m region */ 1812 log_assert(mesh->num_reply_addrs > 0); 1813 mesh->num_reply_addrs--; 1814 1815 /* prev = prev; */ 1816 n = n->next; 1817 continue; 1818 } 1819 prev = n; 1820 n = n->next; 1821 } 1822 /* it was not detached (because it had a reply list), could be now */ 1823 if(!m->reply_list && !m->cb_list 1824 && m->super_set.count == 0) { 1825 mesh->num_detached_states++; 1826 } 1827 /* if not replies any more in mstate, it is no longer a reply_state */ 1828 if(!m->reply_list && !m->cb_list) { 1829 log_assert(mesh->num_reply_states > 0); 1830 mesh->num_reply_states--; 1831 } 1832 } 1833 1834 1835 static int 1836 apply_respip_action(struct module_qstate* qstate, 1837 const struct query_info* qinfo, struct respip_client_info* cinfo, 1838 struct respip_action_info* actinfo, struct reply_info* rep, 1839 struct ub_packed_rrset_key** alias_rrset, 1840 struct reply_info** encode_repp, struct auth_zones* az) 1841 { 1842 if(qinfo->qtype != LDNS_RR_TYPE_A && 1843 qinfo->qtype != LDNS_RR_TYPE_AAAA && 1844 qinfo->qtype != LDNS_RR_TYPE_ANY) 1845 return 1; 1846 1847 if(!respip_rewrite_reply(qinfo, cinfo, rep, encode_repp, actinfo, 1848 alias_rrset, 0, qstate->region, az)) 1849 return 0; 1850 1851 /* xxx_deny actions mean dropping the reply, unless the original reply 1852 * was redirected to response-ip data. */ 1853 if((actinfo->action == respip_deny || 1854 actinfo->action == respip_inform_deny) && 1855 *encode_repp == rep) 1856 *encode_repp = NULL; 1857 1858 return 1; 1859 } 1860 1861 void 1862 mesh_serve_expired_callback(void* arg) 1863 { 1864 struct mesh_state* mstate = (struct mesh_state*) arg; 1865 struct module_qstate* qstate = &mstate->s; 1866 struct mesh_reply* r; 1867 struct mesh_area* mesh = qstate->env->mesh; 1868 struct dns_msg* msg; 1869 struct mesh_cb* c; 1870 struct mesh_reply* prev = NULL; 1871 struct sldns_buffer* prev_buffer = NULL; 1872 struct sldns_buffer* r_buffer = NULL; 1873 struct reply_info* partial_rep = NULL; 1874 struct ub_packed_rrset_key* alias_rrset = NULL; 1875 struct reply_info* encode_rep = NULL; 1876 struct respip_action_info actinfo; 1877 struct query_info* lookup_qinfo = &qstate->qinfo; 1878 struct query_info qinfo_tmp; 1879 int must_validate = (!(qstate->query_flags&BIT_CD) 1880 || qstate->env->cfg->ignore_cd) && qstate->env->need_to_validate; 1881 if(!qstate->serve_expired_data) return; 1882 verbose(VERB_ALGO, "Serve expired: Trying to reply with expired data"); 1883 comm_timer_delete(qstate->serve_expired_data->timer); 1884 qstate->serve_expired_data->timer = NULL; 1885 if(qstate->blacklist || qstate->no_cache_lookup || qstate->is_drop) { 1886 verbose(VERB_ALGO, 1887 "Serve expired: Not allowed to look into cache for stale"); 1888 return; 1889 } 1890 /* The following while is used instead of the `goto lookup_cache` 1891 * like in the worker. */ 1892 while(1) { 1893 fptr_ok(fptr_whitelist_serve_expired_lookup( 1894 qstate->serve_expired_data->get_cached_answer)); 1895 msg = qstate->serve_expired_data->get_cached_answer(qstate, 1896 lookup_qinfo); 1897 if(!msg) 1898 return; 1899 /* Reset these in case we pass a second time from here. */ 1900 encode_rep = msg->rep; 1901 memset(&actinfo, 0, sizeof(actinfo)); 1902 actinfo.action = respip_none; 1903 alias_rrset = NULL; 1904 if((mesh->use_response_ip || mesh->use_rpz) && 1905 !partial_rep && !apply_respip_action(qstate, &qstate->qinfo, 1906 qstate->client_info, &actinfo, msg->rep, &alias_rrset, &encode_rep, 1907 qstate->env->auth_zones)) { 1908 return; 1909 } else if(partial_rep && 1910 !respip_merge_cname(partial_rep, &qstate->qinfo, msg->rep, 1911 qstate->client_info, must_validate, &encode_rep, qstate->region, 1912 qstate->env->auth_zones)) { 1913 return; 1914 } 1915 if(!encode_rep || alias_rrset) { 1916 if(!encode_rep) { 1917 /* Needs drop */ 1918 return; 1919 } else { 1920 /* A partial CNAME chain is found. */ 1921 partial_rep = encode_rep; 1922 } 1923 } 1924 /* We've found a partial reply ending with an 1925 * alias. Replace the lookup qinfo for the 1926 * alias target and lookup the cache again to 1927 * (possibly) complete the reply. As we're 1928 * passing the "base" reply, there will be no 1929 * more alias chasing. */ 1930 if(partial_rep) { 1931 memset(&qinfo_tmp, 0, sizeof(qinfo_tmp)); 1932 get_cname_target(alias_rrset, &qinfo_tmp.qname, 1933 &qinfo_tmp.qname_len); 1934 if(!qinfo_tmp.qname) { 1935 log_err("Serve expired: unexpected: invalid answer alias"); 1936 return; 1937 } 1938 qinfo_tmp.qtype = qstate->qinfo.qtype; 1939 qinfo_tmp.qclass = qstate->qinfo.qclass; 1940 lookup_qinfo = &qinfo_tmp; 1941 continue; 1942 } 1943 break; 1944 } 1945 1946 if(verbosity >= VERB_ALGO) 1947 log_dns_msg("Serve expired lookup", &qstate->qinfo, msg->rep); 1948 1949 r = mstate->reply_list; 1950 mstate->reply_list = NULL; 1951 if(!mstate->reply_list && !mstate->cb_list) { 1952 log_assert(mesh->num_reply_states > 0); 1953 mesh->num_reply_states--; 1954 if(mstate->super_set.count == 0) { 1955 mesh->num_detached_states++; 1956 } 1957 } 1958 for(; r; r = r->next) { 1959 /* If address info is returned, it means the action should be an 1960 * 'inform' variant and the information should be logged. */ 1961 if(actinfo.addrinfo) { 1962 respip_inform_print(&actinfo, r->qname, 1963 qstate->qinfo.qtype, qstate->qinfo.qclass, 1964 r->local_alias, &r->query_reply); 1965 1966 if(qstate->env->cfg->stat_extended && actinfo.rpz_used) { 1967 if(actinfo.rpz_disabled) 1968 qstate->env->mesh->rpz_action[RPZ_DISABLED_ACTION]++; 1969 if(actinfo.rpz_cname_override) 1970 qstate->env->mesh->rpz_action[RPZ_CNAME_OVERRIDE_ACTION]++; 1971 else 1972 qstate->env->mesh->rpz_action[ 1973 respip_action_to_rpz_action(actinfo.action)]++; 1974 } 1975 } 1976 1977 r_buffer = r->query_reply.c->buffer; 1978 if(r->query_reply.c->tcp_req_info) 1979 r_buffer = r->query_reply.c->tcp_req_info->spool_buffer; 1980 mesh_send_reply(mstate, LDNS_RCODE_NOERROR, msg->rep, 1981 r, r_buffer, prev, prev_buffer); 1982 if(r->query_reply.c->tcp_req_info) 1983 tcp_req_info_remove_mesh_state(r->query_reply.c->tcp_req_info, mstate); 1984 prev = r; 1985 prev_buffer = r_buffer; 1986 1987 /* Account for each reply sent. */ 1988 mesh->ans_expired++; 1989 1990 } 1991 while((c = mstate->cb_list) != NULL) { 1992 /* take this cb off the list; so that the list can be 1993 * changed, eg. by adds from the callback routine */ 1994 if(!mstate->reply_list && mstate->cb_list && !c->next) { 1995 /* was a reply state, not anymore */ 1996 log_assert(qstate->env->mesh->num_reply_states > 0); 1997 qstate->env->mesh->num_reply_states--; 1998 } 1999 mstate->cb_list = c->next; 2000 if(!mstate->reply_list && !mstate->cb_list && 2001 mstate->super_set.count == 0) 2002 qstate->env->mesh->num_detached_states++; 2003 mesh_do_callback(mstate, LDNS_RCODE_NOERROR, msg->rep, c); 2004 } 2005 } 2006