1 /* 2 * services/mesh.c - deal with mesh of query states and handle events for that. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains functions to assist in dealing with a mesh of 40 * query states. This mesh is supposed to be thread-specific. 41 * It consists of query states (per qname, qtype, qclass) and connections 42 * between query states and the super and subquery states, and replies to 43 * send back to clients. 44 */ 45 #include "config.h" 46 #include "services/mesh.h" 47 #include "services/outbound_list.h" 48 #include "services/cache/dns.h" 49 #include "util/log.h" 50 #include "util/net_help.h" 51 #include "util/module.h" 52 #include "util/regional.h" 53 #include "util/data/msgencode.h" 54 #include "util/timehist.h" 55 #include "util/fptr_wlist.h" 56 #include "util/alloc.h" 57 #include "util/config_file.h" 58 #include "ldns/sbuffer.h" 59 60 /** subtract timers and the values do not overflow or become negative */ 61 static void 62 timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start) 63 { 64 #ifndef S_SPLINT_S 65 time_t end_usec = end->tv_usec; 66 d->tv_sec = end->tv_sec - start->tv_sec; 67 if(end_usec < start->tv_usec) { 68 end_usec += 1000000; 69 d->tv_sec--; 70 } 71 d->tv_usec = end_usec - start->tv_usec; 72 #endif 73 } 74 75 /** add timers and the values do not overflow or become negative */ 76 static void 77 timeval_add(struct timeval* d, const struct timeval* add) 78 { 79 #ifndef S_SPLINT_S 80 d->tv_sec += add->tv_sec; 81 d->tv_usec += add->tv_usec; 82 if(d->tv_usec > 1000000 ) { 83 d->tv_usec -= 1000000; 84 d->tv_sec++; 85 } 86 #endif 87 } 88 89 /** divide sum of timers to get average */ 90 static void 91 timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d) 92 { 93 #ifndef S_SPLINT_S 94 size_t leftover; 95 if(d == 0) { 96 avg->tv_sec = 0; 97 avg->tv_usec = 0; 98 return; 99 } 100 avg->tv_sec = sum->tv_sec / d; 101 avg->tv_usec = sum->tv_usec / d; 102 /* handle fraction from seconds divide */ 103 leftover = sum->tv_sec - avg->tv_sec*d; 104 avg->tv_usec += (leftover*1000000)/d; 105 #endif 106 } 107 108 /** histogram compare of time values */ 109 static int 110 timeval_smaller(const struct timeval* x, const struct timeval* y) 111 { 112 #ifndef S_SPLINT_S 113 if(x->tv_sec < y->tv_sec) 114 return 1; 115 else if(x->tv_sec == y->tv_sec) { 116 if(x->tv_usec <= y->tv_usec) 117 return 1; 118 else return 0; 119 } 120 else return 0; 121 #endif 122 } 123 124 int 125 mesh_state_compare(const void* ap, const void* bp) 126 { 127 struct mesh_state* a = (struct mesh_state*)ap; 128 struct mesh_state* b = (struct mesh_state*)bp; 129 130 if(a->s.is_priming && !b->s.is_priming) 131 return -1; 132 if(!a->s.is_priming && b->s.is_priming) 133 return 1; 134 135 if(a->s.is_valrec && !b->s.is_valrec) 136 return -1; 137 if(!a->s.is_valrec && b->s.is_valrec) 138 return 1; 139 140 if((a->s.query_flags&BIT_RD) && !(b->s.query_flags&BIT_RD)) 141 return -1; 142 if(!(a->s.query_flags&BIT_RD) && (b->s.query_flags&BIT_RD)) 143 return 1; 144 145 if((a->s.query_flags&BIT_CD) && !(b->s.query_flags&BIT_CD)) 146 return -1; 147 if(!(a->s.query_flags&BIT_CD) && (b->s.query_flags&BIT_CD)) 148 return 1; 149 150 return query_info_compare(&a->s.qinfo, &b->s.qinfo); 151 } 152 153 int 154 mesh_state_ref_compare(const void* ap, const void* bp) 155 { 156 struct mesh_state_ref* a = (struct mesh_state_ref*)ap; 157 struct mesh_state_ref* b = (struct mesh_state_ref*)bp; 158 return mesh_state_compare(a->s, b->s); 159 } 160 161 struct mesh_area* 162 mesh_create(struct module_stack* stack, struct module_env* env) 163 { 164 struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area)); 165 if(!mesh) { 166 log_err("mesh area alloc: out of memory"); 167 return NULL; 168 } 169 mesh->histogram = timehist_setup(); 170 mesh->qbuf_bak = sldns_buffer_new(env->cfg->msg_buffer_size); 171 if(!mesh->histogram || !mesh->qbuf_bak) { 172 free(mesh); 173 log_err("mesh area alloc: out of memory"); 174 return NULL; 175 } 176 mesh->mods = *stack; 177 mesh->env = env; 178 rbtree_init(&mesh->run, &mesh_state_compare); 179 rbtree_init(&mesh->all, &mesh_state_compare); 180 mesh->num_reply_addrs = 0; 181 mesh->num_reply_states = 0; 182 mesh->num_detached_states = 0; 183 mesh->num_forever_states = 0; 184 mesh->stats_jostled = 0; 185 mesh->stats_dropped = 0; 186 mesh->max_reply_states = env->cfg->num_queries_per_thread; 187 mesh->max_forever_states = (mesh->max_reply_states+1)/2; 188 #ifndef S_SPLINT_S 189 mesh->jostle_max.tv_sec = (time_t)(env->cfg->jostle_time / 1000); 190 mesh->jostle_max.tv_usec = (time_t)((env->cfg->jostle_time % 1000) 191 *1000); 192 #endif 193 return mesh; 194 } 195 196 /** help mesh delete delete mesh states */ 197 static void 198 mesh_delete_helper(rbnode_t* n) 199 { 200 struct mesh_state* mstate = (struct mesh_state*)n->key; 201 /* perform a full delete, not only 'cleanup' routine, 202 * because other callbacks expect a clean state in the mesh. 203 * For 're-entrant' calls */ 204 mesh_state_delete(&mstate->s); 205 /* but because these delete the items from the tree, postorder 206 * traversal and rbtree rebalancing do not work together */ 207 } 208 209 void 210 mesh_delete(struct mesh_area* mesh) 211 { 212 if(!mesh) 213 return; 214 /* free all query states */ 215 while(mesh->all.count) 216 mesh_delete_helper(mesh->all.root); 217 timehist_delete(mesh->histogram); 218 sldns_buffer_free(mesh->qbuf_bak); 219 free(mesh); 220 } 221 222 void 223 mesh_delete_all(struct mesh_area* mesh) 224 { 225 /* free all query states */ 226 while(mesh->all.count) 227 mesh_delete_helper(mesh->all.root); 228 mesh->stats_dropped += mesh->num_reply_addrs; 229 /* clear mesh area references */ 230 rbtree_init(&mesh->run, &mesh_state_compare); 231 rbtree_init(&mesh->all, &mesh_state_compare); 232 mesh->num_reply_addrs = 0; 233 mesh->num_reply_states = 0; 234 mesh->num_detached_states = 0; 235 mesh->num_forever_states = 0; 236 mesh->forever_first = NULL; 237 mesh->forever_last = NULL; 238 mesh->jostle_first = NULL; 239 mesh->jostle_last = NULL; 240 } 241 242 int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf) 243 { 244 struct mesh_state* m = mesh->jostle_first; 245 /* free space is available */ 246 if(mesh->num_reply_states < mesh->max_reply_states) 247 return 1; 248 /* try to kick out a jostle-list item */ 249 if(m && m->reply_list && m->list_select == mesh_jostle_list) { 250 /* how old is it? */ 251 struct timeval age; 252 timeval_subtract(&age, mesh->env->now_tv, 253 &m->reply_list->start_time); 254 if(timeval_smaller(&mesh->jostle_max, &age)) { 255 /* its a goner */ 256 log_nametypeclass(VERB_ALGO, "query jostled out to " 257 "make space for a new one", 258 m->s.qinfo.qname, m->s.qinfo.qtype, 259 m->s.qinfo.qclass); 260 /* backup the query */ 261 if(qbuf) sldns_buffer_copy(mesh->qbuf_bak, qbuf); 262 /* notify supers */ 263 if(m->super_set.count > 0) { 264 verbose(VERB_ALGO, "notify supers of failure"); 265 m->s.return_msg = NULL; 266 m->s.return_rcode = LDNS_RCODE_SERVFAIL; 267 mesh_walk_supers(mesh, m); 268 } 269 mesh->stats_jostled ++; 270 mesh_state_delete(&m->s); 271 /* restore the query - note that the qinfo ptr to 272 * the querybuffer is then correct again. */ 273 if(qbuf) sldns_buffer_copy(qbuf, mesh->qbuf_bak); 274 return 1; 275 } 276 } 277 /* no space for new item */ 278 return 0; 279 } 280 281 void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, 282 uint16_t qflags, struct edns_data* edns, struct comm_reply* rep, 283 uint16_t qid) 284 { 285 struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 286 int was_detached = 0; 287 int was_noreply = 0; 288 int added = 0; 289 /* does this create a new reply state? */ 290 if(!s || s->list_select == mesh_no_list) { 291 if(!mesh_make_new_space(mesh, rep->c->buffer)) { 292 verbose(VERB_ALGO, "Too many queries. dropping " 293 "incoming query."); 294 comm_point_drop_reply(rep); 295 mesh->stats_dropped ++; 296 return; 297 } 298 /* for this new reply state, the reply address is free, 299 * so the limit of reply addresses does not stop reply states*/ 300 } else { 301 /* protect our memory usage from storing reply addresses */ 302 if(mesh->num_reply_addrs > mesh->max_reply_states*16) { 303 verbose(VERB_ALGO, "Too many requests queued. " 304 "dropping incoming query."); 305 mesh->stats_dropped++; 306 comm_point_drop_reply(rep); 307 return; 308 } 309 } 310 /* see if it already exists, if not, create one */ 311 if(!s) { 312 #ifdef UNBOUND_DEBUG 313 struct rbnode_t* n; 314 #endif 315 s = mesh_state_create(mesh->env, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 316 if(!s) { 317 log_err("mesh_state_create: out of memory; SERVFAIL"); 318 error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL, 319 qinfo, qid, qflags, edns); 320 comm_point_send_reply(rep); 321 return; 322 } 323 #ifdef UNBOUND_DEBUG 324 n = 325 #else 326 (void) 327 #endif 328 rbtree_insert(&mesh->all, &s->node); 329 log_assert(n != NULL); 330 /* set detached (it is now) */ 331 mesh->num_detached_states++; 332 added = 1; 333 } 334 if(!s->reply_list && !s->cb_list && s->super_set.count == 0) 335 was_detached = 1; 336 if(!s->reply_list && !s->cb_list) 337 was_noreply = 1; 338 /* add reply to s */ 339 if(!mesh_state_add_reply(s, edns, rep, qid, qflags, qinfo->qname)) { 340 log_err("mesh_new_client: out of memory; SERVFAIL"); 341 error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL, 342 qinfo, qid, qflags, edns); 343 comm_point_send_reply(rep); 344 if(added) 345 mesh_state_delete(&s->s); 346 return; 347 } 348 /* update statistics */ 349 if(was_detached) { 350 log_assert(mesh->num_detached_states > 0); 351 mesh->num_detached_states--; 352 } 353 if(was_noreply) { 354 mesh->num_reply_states ++; 355 } 356 mesh->num_reply_addrs++; 357 if(s->list_select == mesh_no_list) { 358 /* move to either the forever or the jostle_list */ 359 if(mesh->num_forever_states < mesh->max_forever_states) { 360 mesh->num_forever_states ++; 361 mesh_list_insert(s, &mesh->forever_first, 362 &mesh->forever_last); 363 s->list_select = mesh_forever_list; 364 } else { 365 mesh_list_insert(s, &mesh->jostle_first, 366 &mesh->jostle_last); 367 s->list_select = mesh_jostle_list; 368 } 369 } 370 if(added) 371 mesh_run(mesh, s, module_event_new, NULL); 372 } 373 374 int 375 mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, 376 uint16_t qflags, struct edns_data* edns, sldns_buffer* buf, 377 uint16_t qid, mesh_cb_func_t cb, void* cb_arg) 378 { 379 struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 380 int was_detached = 0; 381 int was_noreply = 0; 382 int added = 0; 383 /* there are no limits on the number of callbacks */ 384 385 /* see if it already exists, if not, create one */ 386 if(!s) { 387 #ifdef UNBOUND_DEBUG 388 struct rbnode_t* n; 389 #endif 390 s = mesh_state_create(mesh->env, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 391 if(!s) { 392 return 0; 393 } 394 #ifdef UNBOUND_DEBUG 395 n = 396 #else 397 (void) 398 #endif 399 rbtree_insert(&mesh->all, &s->node); 400 log_assert(n != NULL); 401 /* set detached (it is now) */ 402 mesh->num_detached_states++; 403 added = 1; 404 } 405 if(!s->reply_list && !s->cb_list && s->super_set.count == 0) 406 was_detached = 1; 407 if(!s->reply_list && !s->cb_list) 408 was_noreply = 1; 409 /* add reply to s */ 410 if(!mesh_state_add_cb(s, edns, buf, cb, cb_arg, qid, qflags)) { 411 if(added) 412 mesh_state_delete(&s->s); 413 return 0; 414 } 415 /* update statistics */ 416 if(was_detached) { 417 log_assert(mesh->num_detached_states > 0); 418 mesh->num_detached_states--; 419 } 420 if(was_noreply) { 421 mesh->num_reply_states ++; 422 } 423 mesh->num_reply_addrs++; 424 if(added) 425 mesh_run(mesh, s, module_event_new, NULL); 426 return 1; 427 } 428 429 void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo, 430 uint16_t qflags, time_t leeway) 431 { 432 struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 433 #ifdef UNBOUND_DEBUG 434 struct rbnode_t* n; 435 #endif 436 /* already exists, and for a different purpose perhaps. 437 * if mesh_no_list, keep it that way. */ 438 if(s) { 439 /* make it ignore the cache from now on */ 440 if(!s->s.blacklist) 441 sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region); 442 if(s->s.prefetch_leeway < leeway) 443 s->s.prefetch_leeway = leeway; 444 return; 445 } 446 if(!mesh_make_new_space(mesh, NULL)) { 447 verbose(VERB_ALGO, "Too many queries. dropped prefetch."); 448 mesh->stats_dropped ++; 449 return; 450 } 451 s = mesh_state_create(mesh->env, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); 452 if(!s) { 453 log_err("prefetch mesh_state_create: out of memory"); 454 return; 455 } 456 #ifdef UNBOUND_DEBUG 457 n = 458 #else 459 (void) 460 #endif 461 rbtree_insert(&mesh->all, &s->node); 462 log_assert(n != NULL); 463 /* set detached (it is now) */ 464 mesh->num_detached_states++; 465 /* make it ignore the cache */ 466 sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region); 467 s->s.prefetch_leeway = leeway; 468 469 if(s->list_select == mesh_no_list) { 470 /* move to either the forever or the jostle_list */ 471 if(mesh->num_forever_states < mesh->max_forever_states) { 472 mesh->num_forever_states ++; 473 mesh_list_insert(s, &mesh->forever_first, 474 &mesh->forever_last); 475 s->list_select = mesh_forever_list; 476 } else { 477 mesh_list_insert(s, &mesh->jostle_first, 478 &mesh->jostle_last); 479 s->list_select = mesh_jostle_list; 480 } 481 } 482 mesh_run(mesh, s, module_event_new, NULL); 483 } 484 485 void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e, 486 struct comm_reply* reply, int what) 487 { 488 enum module_ev event = module_event_reply; 489 e->qstate->reply = reply; 490 if(what != NETEVENT_NOERROR) { 491 event = module_event_noreply; 492 if(what == NETEVENT_CAPSFAIL) 493 event = module_event_capsfail; 494 } 495 mesh_run(mesh, e->qstate->mesh_info, event, e); 496 } 497 498 struct mesh_state* 499 mesh_state_create(struct module_env* env, struct query_info* qinfo, 500 uint16_t qflags, int prime, int valrec) 501 { 502 struct regional* region = alloc_reg_obtain(env->alloc); 503 struct mesh_state* mstate; 504 int i; 505 if(!region) 506 return NULL; 507 mstate = (struct mesh_state*)regional_alloc(region, 508 sizeof(struct mesh_state)); 509 if(!mstate) { 510 alloc_reg_release(env->alloc, region); 511 return NULL; 512 } 513 memset(mstate, 0, sizeof(*mstate)); 514 mstate->node = *RBTREE_NULL; 515 mstate->run_node = *RBTREE_NULL; 516 mstate->node.key = mstate; 517 mstate->run_node.key = mstate; 518 mstate->reply_list = NULL; 519 mstate->list_select = mesh_no_list; 520 mstate->replies_sent = 0; 521 rbtree_init(&mstate->super_set, &mesh_state_ref_compare); 522 rbtree_init(&mstate->sub_set, &mesh_state_ref_compare); 523 mstate->num_activated = 0; 524 /* init module qstate */ 525 mstate->s.qinfo.qtype = qinfo->qtype; 526 mstate->s.qinfo.qclass = qinfo->qclass; 527 mstate->s.qinfo.qname_len = qinfo->qname_len; 528 mstate->s.qinfo.qname = regional_alloc_init(region, qinfo->qname, 529 qinfo->qname_len); 530 if(!mstate->s.qinfo.qname) { 531 alloc_reg_release(env->alloc, region); 532 return NULL; 533 } 534 /* remove all weird bits from qflags */ 535 mstate->s.query_flags = (qflags & (BIT_RD|BIT_CD)); 536 mstate->s.is_priming = prime; 537 mstate->s.is_valrec = valrec; 538 mstate->s.reply = NULL; 539 mstate->s.region = region; 540 mstate->s.curmod = 0; 541 mstate->s.return_msg = 0; 542 mstate->s.return_rcode = LDNS_RCODE_NOERROR; 543 mstate->s.env = env; 544 mstate->s.mesh_info = mstate; 545 mstate->s.prefetch_leeway = 0; 546 /* init modules */ 547 for(i=0; i<env->mesh->mods.num; i++) { 548 mstate->s.minfo[i] = NULL; 549 mstate->s.ext_state[i] = module_state_initial; 550 } 551 return mstate; 552 } 553 554 void 555 mesh_state_cleanup(struct mesh_state* mstate) 556 { 557 struct mesh_area* mesh; 558 int i; 559 if(!mstate) 560 return; 561 mesh = mstate->s.env->mesh; 562 /* drop unsent replies */ 563 if(!mstate->replies_sent) { 564 struct mesh_reply* rep; 565 struct mesh_cb* cb; 566 for(rep=mstate->reply_list; rep; rep=rep->next) { 567 comm_point_drop_reply(&rep->query_reply); 568 mesh->num_reply_addrs--; 569 } 570 for(cb=mstate->cb_list; cb; cb=cb->next) { 571 fptr_ok(fptr_whitelist_mesh_cb(cb->cb)); 572 (*cb->cb)(cb->cb_arg, LDNS_RCODE_SERVFAIL, NULL, 573 sec_status_unchecked, NULL); 574 mesh->num_reply_addrs--; 575 } 576 } 577 578 /* de-init modules */ 579 for(i=0; i<mesh->mods.num; i++) { 580 fptr_ok(fptr_whitelist_mod_clear(mesh->mods.mod[i]->clear)); 581 (*mesh->mods.mod[i]->clear)(&mstate->s, i); 582 mstate->s.minfo[i] = NULL; 583 mstate->s.ext_state[i] = module_finished; 584 } 585 alloc_reg_release(mstate->s.env->alloc, mstate->s.region); 586 } 587 588 void 589 mesh_state_delete(struct module_qstate* qstate) 590 { 591 struct mesh_area* mesh; 592 struct mesh_state_ref* super, ref; 593 struct mesh_state* mstate; 594 if(!qstate) 595 return; 596 mstate = qstate->mesh_info; 597 mesh = mstate->s.env->mesh; 598 mesh_detach_subs(&mstate->s); 599 if(mstate->list_select == mesh_forever_list) { 600 mesh->num_forever_states --; 601 mesh_list_remove(mstate, &mesh->forever_first, 602 &mesh->forever_last); 603 } else if(mstate->list_select == mesh_jostle_list) { 604 mesh_list_remove(mstate, &mesh->jostle_first, 605 &mesh->jostle_last); 606 } 607 if(!mstate->reply_list && !mstate->cb_list 608 && mstate->super_set.count == 0) { 609 log_assert(mesh->num_detached_states > 0); 610 mesh->num_detached_states--; 611 } 612 if(mstate->reply_list || mstate->cb_list) { 613 log_assert(mesh->num_reply_states > 0); 614 mesh->num_reply_states--; 615 } 616 ref.node.key = &ref; 617 ref.s = mstate; 618 RBTREE_FOR(super, struct mesh_state_ref*, &mstate->super_set) { 619 (void)rbtree_delete(&super->s->sub_set, &ref); 620 } 621 (void)rbtree_delete(&mesh->run, mstate); 622 (void)rbtree_delete(&mesh->all, mstate); 623 mesh_state_cleanup(mstate); 624 } 625 626 /** helper recursive rbtree find routine */ 627 static int 628 find_in_subsub(struct mesh_state* m, struct mesh_state* tofind, size_t *c) 629 { 630 struct mesh_state_ref* r; 631 if((*c)++ > MESH_MAX_SUBSUB) 632 return 1; 633 RBTREE_FOR(r, struct mesh_state_ref*, &m->sub_set) { 634 if(r->s == tofind || find_in_subsub(r->s, tofind, c)) 635 return 1; 636 } 637 return 0; 638 } 639 640 /** find cycle for already looked up mesh_state */ 641 static int 642 mesh_detect_cycle_found(struct module_qstate* qstate, struct mesh_state* dep_m) 643 { 644 struct mesh_state* cyc_m = qstate->mesh_info; 645 size_t counter = 0; 646 if(!dep_m) 647 return 0; 648 if(dep_m == cyc_m || find_in_subsub(dep_m, cyc_m, &counter)) { 649 if(counter > MESH_MAX_SUBSUB) 650 return 2; 651 return 1; 652 } 653 return 0; 654 } 655 656 void mesh_detach_subs(struct module_qstate* qstate) 657 { 658 struct mesh_area* mesh = qstate->env->mesh; 659 struct mesh_state_ref* ref, lookup; 660 #ifdef UNBOUND_DEBUG 661 struct rbnode_t* n; 662 #endif 663 lookup.node.key = &lookup; 664 lookup.s = qstate->mesh_info; 665 RBTREE_FOR(ref, struct mesh_state_ref*, &qstate->mesh_info->sub_set) { 666 #ifdef UNBOUND_DEBUG 667 n = 668 #else 669 (void) 670 #endif 671 rbtree_delete(&ref->s->super_set, &lookup); 672 log_assert(n != NULL); /* must have been present */ 673 if(!ref->s->reply_list && !ref->s->cb_list 674 && ref->s->super_set.count == 0) { 675 mesh->num_detached_states++; 676 log_assert(mesh->num_detached_states + 677 mesh->num_reply_states <= mesh->all.count); 678 } 679 } 680 rbtree_init(&qstate->mesh_info->sub_set, &mesh_state_ref_compare); 681 } 682 683 int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo, 684 uint16_t qflags, int prime, int valrec, struct module_qstate** newq) 685 { 686 /* find it, if not, create it */ 687 struct mesh_area* mesh = qstate->env->mesh; 688 struct mesh_state* sub = mesh_area_find(mesh, qinfo, qflags, prime, 689 valrec); 690 int was_detached; 691 if(mesh_detect_cycle_found(qstate, sub)) { 692 verbose(VERB_ALGO, "attach failed, cycle detected"); 693 return 0; 694 } 695 if(!sub) { 696 #ifdef UNBOUND_DEBUG 697 struct rbnode_t* n; 698 #endif 699 /* create a new one */ 700 sub = mesh_state_create(qstate->env, qinfo, qflags, prime, 701 valrec); 702 if(!sub) { 703 log_err("mesh_attach_sub: out of memory"); 704 return 0; 705 } 706 #ifdef UNBOUND_DEBUG 707 n = 708 #else 709 (void) 710 #endif 711 rbtree_insert(&mesh->all, &sub->node); 712 log_assert(n != NULL); 713 /* set detached (it is now) */ 714 mesh->num_detached_states++; 715 /* set new query state to run */ 716 #ifdef UNBOUND_DEBUG 717 n = 718 #else 719 (void) 720 #endif 721 rbtree_insert(&mesh->run, &sub->run_node); 722 log_assert(n != NULL); 723 *newq = &sub->s; 724 } else 725 *newq = NULL; 726 was_detached = (sub->super_set.count == 0); 727 if(!mesh_state_attachment(qstate->mesh_info, sub)) 728 return 0; 729 /* if it was a duplicate attachment, the count was not zero before */ 730 if(!sub->reply_list && !sub->cb_list && was_detached && 731 sub->super_set.count == 1) { 732 /* it used to be detached, before this one got added */ 733 log_assert(mesh->num_detached_states > 0); 734 mesh->num_detached_states--; 735 } 736 /* *newq will be run when inited after the current module stops */ 737 return 1; 738 } 739 740 int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub) 741 { 742 #ifdef UNBOUND_DEBUG 743 struct rbnode_t* n; 744 #endif 745 struct mesh_state_ref* subref; /* points to sub, inserted in super */ 746 struct mesh_state_ref* superref; /* points to super, inserted in sub */ 747 if( !(subref = regional_alloc(super->s.region, 748 sizeof(struct mesh_state_ref))) || 749 !(superref = regional_alloc(sub->s.region, 750 sizeof(struct mesh_state_ref))) ) { 751 log_err("mesh_state_attachment: out of memory"); 752 return 0; 753 } 754 superref->node.key = superref; 755 superref->s = super; 756 subref->node.key = subref; 757 subref->s = sub; 758 if(!rbtree_insert(&sub->super_set, &superref->node)) { 759 /* this should not happen, iterator and validator do not 760 * attach subqueries that are identical. */ 761 /* already attached, we are done, nothing todo. 762 * since superref and subref already allocated in region, 763 * we cannot free them */ 764 return 1; 765 } 766 #ifdef UNBOUND_DEBUG 767 n = 768 #else 769 (void) 770 #endif 771 rbtree_insert(&super->sub_set, &subref->node); 772 log_assert(n != NULL); /* we checked above if statement, the reverse 773 administration should not fail now, unless they are out of sync */ 774 return 1; 775 } 776 777 /** 778 * callback results to mesh cb entry 779 * @param m: mesh state to send it for. 780 * @param rcode: if not 0, error code. 781 * @param rep: reply to send (or NULL if rcode is set). 782 * @param r: callback entry 783 */ 784 static void 785 mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, 786 struct mesh_cb* r) 787 { 788 int secure; 789 char* reason = NULL; 790 /* bogus messages are not made into servfail, sec_status passed 791 * to the callback function */ 792 if(rep && rep->security == sec_status_secure) 793 secure = 1; 794 else secure = 0; 795 if(!rep && rcode == LDNS_RCODE_NOERROR) 796 rcode = LDNS_RCODE_SERVFAIL; 797 if(!rcode && rep->security == sec_status_bogus) { 798 if(!(reason = errinf_to_str(&m->s))) 799 rcode = LDNS_RCODE_SERVFAIL; 800 } 801 /* send the reply */ 802 if(rcode) { 803 fptr_ok(fptr_whitelist_mesh_cb(r->cb)); 804 (*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL); 805 } else { 806 size_t udp_size = r->edns.udp_size; 807 sldns_buffer_clear(r->buf); 808 r->edns.edns_version = EDNS_ADVERTISED_VERSION; 809 r->edns.udp_size = EDNS_ADVERTISED_SIZE; 810 r->edns.ext_rcode = 0; 811 r->edns.bits &= EDNS_DO; 812 if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 813 r->qflags, r->buf, 0, 1, 814 m->s.env->scratch, udp_size, &r->edns, 815 (int)(r->edns.bits & EDNS_DO), secure)) 816 { 817 fptr_ok(fptr_whitelist_mesh_cb(r->cb)); 818 (*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf, 819 sec_status_unchecked, NULL); 820 } else { 821 fptr_ok(fptr_whitelist_mesh_cb(r->cb)); 822 (*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf, 823 rep->security, reason); 824 } 825 } 826 free(reason); 827 m->s.env->mesh->num_reply_addrs--; 828 } 829 830 /** 831 * Send reply to mesh reply entry 832 * @param m: mesh state to send it for. 833 * @param rcode: if not 0, error code. 834 * @param rep: reply to send (or NULL if rcode is set). 835 * @param r: reply entry 836 * @param prev: previous reply, already has its answer encoded in buffer. 837 */ 838 static void 839 mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, 840 struct mesh_reply* r, struct mesh_reply* prev) 841 { 842 struct timeval end_time; 843 struct timeval duration; 844 int secure; 845 /* examine security status */ 846 if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) || 847 m->s.env->cfg->ignore_cd) && rep && 848 rep->security <= sec_status_bogus) { 849 rcode = LDNS_RCODE_SERVFAIL; 850 if(m->s.env->cfg->stat_extended) 851 m->s.env->mesh->ans_bogus++; 852 } 853 if(rep && rep->security == sec_status_secure) 854 secure = 1; 855 else secure = 0; 856 if(!rep && rcode == LDNS_RCODE_NOERROR) 857 rcode = LDNS_RCODE_SERVFAIL; 858 /* send the reply */ 859 if(prev && prev->qflags == r->qflags && 860 prev->edns.edns_present == r->edns.edns_present && 861 prev->edns.bits == r->edns.bits && 862 prev->edns.udp_size == r->edns.udp_size) { 863 /* if the previous reply is identical to this one, fix ID */ 864 if(prev->query_reply.c->buffer != r->query_reply.c->buffer) 865 sldns_buffer_copy(r->query_reply.c->buffer, 866 prev->query_reply.c->buffer); 867 sldns_buffer_write_at(r->query_reply.c->buffer, 0, 868 &r->qid, sizeof(uint16_t)); 869 sldns_buffer_write_at(r->query_reply.c->buffer, 12, 870 r->qname, m->s.qinfo.qname_len); 871 comm_point_send_reply(&r->query_reply); 872 } else if(rcode) { 873 m->s.qinfo.qname = r->qname; 874 error_encode(r->query_reply.c->buffer, rcode, &m->s.qinfo, 875 r->qid, r->qflags, &r->edns); 876 comm_point_send_reply(&r->query_reply); 877 } else { 878 size_t udp_size = r->edns.udp_size; 879 r->edns.edns_version = EDNS_ADVERTISED_VERSION; 880 r->edns.udp_size = EDNS_ADVERTISED_SIZE; 881 r->edns.ext_rcode = 0; 882 r->edns.bits &= EDNS_DO; 883 m->s.qinfo.qname = r->qname; 884 if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 885 r->qflags, r->query_reply.c->buffer, 0, 1, 886 m->s.env->scratch, udp_size, &r->edns, 887 (int)(r->edns.bits & EDNS_DO), secure)) 888 { 889 error_encode(r->query_reply.c->buffer, 890 LDNS_RCODE_SERVFAIL, &m->s.qinfo, r->qid, 891 r->qflags, &r->edns); 892 } 893 comm_point_send_reply(&r->query_reply); 894 } 895 /* account */ 896 m->s.env->mesh->num_reply_addrs--; 897 end_time = *m->s.env->now_tv; 898 timeval_subtract(&duration, &end_time, &r->start_time); 899 verbose(VERB_ALGO, "query took " ARG_LL "d.%6.6d sec", 900 (long long)duration.tv_sec, (int)duration.tv_usec); 901 m->s.env->mesh->replies_sent++; 902 timeval_add(&m->s.env->mesh->replies_sum_wait, &duration); 903 timehist_insert(m->s.env->mesh->histogram, &duration); 904 if(m->s.env->cfg->stat_extended) { 905 uint16_t rc = FLAGS_GET_RCODE(sldns_buffer_read_u16_at(r-> 906 query_reply.c->buffer, 2)); 907 if(secure) m->s.env->mesh->ans_secure++; 908 m->s.env->mesh->ans_rcode[ rc ] ++; 909 if(rc == 0 && LDNS_ANCOUNT(sldns_buffer_begin(r-> 910 query_reply.c->buffer)) == 0) 911 m->s.env->mesh->ans_nodata++; 912 } 913 } 914 915 void mesh_query_done(struct mesh_state* mstate) 916 { 917 struct mesh_reply* r; 918 struct mesh_reply* prev = NULL; 919 struct mesh_cb* c; 920 struct reply_info* rep = (mstate->s.return_msg? 921 mstate->s.return_msg->rep:NULL); 922 for(r = mstate->reply_list; r; r = r->next) { 923 mesh_send_reply(mstate, mstate->s.return_rcode, rep, r, prev); 924 prev = r; 925 } 926 mstate->replies_sent = 1; 927 for(c = mstate->cb_list; c; c = c->next) { 928 mesh_do_callback(mstate, mstate->s.return_rcode, rep, c); 929 } 930 } 931 932 void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate) 933 { 934 struct mesh_state_ref* ref; 935 RBTREE_FOR(ref, struct mesh_state_ref*, &mstate->super_set) 936 { 937 /* make super runnable */ 938 (void)rbtree_insert(&mesh->run, &ref->s->run_node); 939 /* callback the function to inform super of result */ 940 fptr_ok(fptr_whitelist_mod_inform_super( 941 mesh->mods.mod[ref->s->s.curmod]->inform_super)); 942 (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s, 943 ref->s->s.curmod, &ref->s->s); 944 } 945 } 946 947 struct mesh_state* mesh_area_find(struct mesh_area* mesh, 948 struct query_info* qinfo, uint16_t qflags, int prime, int valrec) 949 { 950 struct mesh_state key; 951 struct mesh_state* result; 952 953 key.node.key = &key; 954 key.s.is_priming = prime; 955 key.s.is_valrec = valrec; 956 key.s.qinfo = *qinfo; 957 key.s.query_flags = qflags; 958 959 result = (struct mesh_state*)rbtree_search(&mesh->all, &key); 960 return result; 961 } 962 963 int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns, 964 sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg, 965 uint16_t qid, uint16_t qflags) 966 { 967 struct mesh_cb* r = regional_alloc(s->s.region, 968 sizeof(struct mesh_cb)); 969 if(!r) 970 return 0; 971 r->buf = buf; 972 log_assert(fptr_whitelist_mesh_cb(cb)); /* early failure ifmissing*/ 973 r->cb = cb; 974 r->cb_arg = cb_arg; 975 r->edns = *edns; 976 r->qid = qid; 977 r->qflags = qflags; 978 r->next = s->cb_list; 979 s->cb_list = r; 980 return 1; 981 982 } 983 984 int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns, 985 struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname) 986 { 987 struct mesh_reply* r = regional_alloc(s->s.region, 988 sizeof(struct mesh_reply)); 989 if(!r) 990 return 0; 991 r->query_reply = *rep; 992 r->edns = *edns; 993 r->qid = qid; 994 r->qflags = qflags; 995 r->start_time = *s->s.env->now_tv; 996 r->next = s->reply_list; 997 r->qname = regional_alloc_init(s->s.region, qname, 998 s->s.qinfo.qname_len); 999 if(!r->qname) 1000 return 0; 1001 s->reply_list = r; 1002 return 1; 1003 1004 } 1005 1006 /** 1007 * Continue processing the mesh state at another module. 1008 * Handles module to modules tranfer of control. 1009 * Handles module finished. 1010 * @param mesh: the mesh area. 1011 * @param mstate: currently active mesh state. 1012 * Deleted if finished, calls _done and _supers to 1013 * send replies to clients and inform other mesh states. 1014 * This in turn may create additional runnable mesh states. 1015 * @param s: state at which the current module exited. 1016 * @param ev: the event sent to the module. 1017 * returned is the event to send to the next module. 1018 * @return true if continue processing at the new module. 1019 * false if not continued processing is needed. 1020 */ 1021 static int 1022 mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate, 1023 enum module_ext_state s, enum module_ev* ev) 1024 { 1025 mstate->num_activated++; 1026 if(mstate->num_activated > MESH_MAX_ACTIVATION) { 1027 /* module is looping. Stop it. */ 1028 log_err("internal error: looping module stopped"); 1029 log_query_info(VERB_QUERY, "pass error for qstate", 1030 &mstate->s.qinfo); 1031 s = module_error; 1032 } 1033 if(s == module_wait_module || s == module_restart_next) { 1034 /* start next module */ 1035 mstate->s.curmod++; 1036 if(mesh->mods.num == mstate->s.curmod) { 1037 log_err("Cannot pass to next module; at last module"); 1038 log_query_info(VERB_QUERY, "pass error for qstate", 1039 &mstate->s.qinfo); 1040 mstate->s.curmod--; 1041 return mesh_continue(mesh, mstate, module_error, ev); 1042 } 1043 if(s == module_restart_next) { 1044 fptr_ok(fptr_whitelist_mod_clear( 1045 mesh->mods.mod[mstate->s.curmod]->clear)); 1046 (*mesh->mods.mod[mstate->s.curmod]->clear) 1047 (&mstate->s, mstate->s.curmod); 1048 mstate->s.minfo[mstate->s.curmod] = NULL; 1049 } 1050 *ev = module_event_pass; 1051 return 1; 1052 } 1053 if(s == module_error && mstate->s.return_rcode == LDNS_RCODE_NOERROR) { 1054 /* error is bad, handle pass back up below */ 1055 mstate->s.return_rcode = LDNS_RCODE_SERVFAIL; 1056 } 1057 if(s == module_error || s == module_finished) { 1058 if(mstate->s.curmod == 0) { 1059 mesh_query_done(mstate); 1060 mesh_walk_supers(mesh, mstate); 1061 mesh_state_delete(&mstate->s); 1062 return 0; 1063 } 1064 /* pass along the locus of control */ 1065 mstate->s.curmod --; 1066 *ev = module_event_moddone; 1067 return 1; 1068 } 1069 return 0; 1070 } 1071 1072 void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate, 1073 enum module_ev ev, struct outbound_entry* e) 1074 { 1075 enum module_ext_state s; 1076 verbose(VERB_ALGO, "mesh_run: start"); 1077 while(mstate) { 1078 /* run the module */ 1079 fptr_ok(fptr_whitelist_mod_operate( 1080 mesh->mods.mod[mstate->s.curmod]->operate)); 1081 (*mesh->mods.mod[mstate->s.curmod]->operate) 1082 (&mstate->s, ev, mstate->s.curmod, e); 1083 1084 /* examine results */ 1085 mstate->s.reply = NULL; 1086 regional_free_all(mstate->s.env->scratch); 1087 s = mstate->s.ext_state[mstate->s.curmod]; 1088 verbose(VERB_ALGO, "mesh_run: %s module exit state is %s", 1089 mesh->mods.mod[mstate->s.curmod]->name, strextstate(s)); 1090 e = NULL; 1091 if(mesh_continue(mesh, mstate, s, &ev)) 1092 continue; 1093 1094 /* run more modules */ 1095 ev = module_event_pass; 1096 if(mesh->run.count > 0) { 1097 /* pop random element off the runnable tree */ 1098 mstate = (struct mesh_state*)mesh->run.root->key; 1099 (void)rbtree_delete(&mesh->run, mstate); 1100 } else mstate = NULL; 1101 } 1102 if(verbosity >= VERB_ALGO) { 1103 mesh_stats(mesh, "mesh_run: end"); 1104 mesh_log_list(mesh); 1105 } 1106 } 1107 1108 void 1109 mesh_log_list(struct mesh_area* mesh) 1110 { 1111 char buf[30]; 1112 struct mesh_state* m; 1113 int num = 0; 1114 RBTREE_FOR(m, struct mesh_state*, &mesh->all) { 1115 snprintf(buf, sizeof(buf), "%d%s%s%s%s%s%s mod%d %s%s", 1116 num++, (m->s.is_priming)?"p":"", /* prime */ 1117 (m->s.is_valrec)?"v":"", /* prime */ 1118 (m->s.query_flags&BIT_RD)?"RD":"", 1119 (m->s.query_flags&BIT_CD)?"CD":"", 1120 (m->super_set.count==0)?"d":"", /* detached */ 1121 (m->sub_set.count!=0)?"c":"", /* children */ 1122 m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/ 1123 (m->cb_list)?"cb":"" /* callbacks */ 1124 ); 1125 log_query_info(VERB_ALGO, buf, &m->s.qinfo); 1126 } 1127 } 1128 1129 void 1130 mesh_stats(struct mesh_area* mesh, const char* str) 1131 { 1132 verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, " 1133 "%u detached), %u waiting replies, %u recursion replies " 1134 "sent, %d replies dropped, %d states jostled out", 1135 str, (unsigned)mesh->all.count, 1136 (unsigned)mesh->num_reply_states, 1137 (unsigned)mesh->num_detached_states, 1138 (unsigned)mesh->num_reply_addrs, 1139 (unsigned)mesh->replies_sent, 1140 (unsigned)mesh->stats_dropped, 1141 (unsigned)mesh->stats_jostled); 1142 if(mesh->replies_sent > 0) { 1143 struct timeval avg; 1144 timeval_divide(&avg, &mesh->replies_sum_wait, 1145 mesh->replies_sent); 1146 log_info("average recursion processing time " 1147 ARG_LL "d.%6.6d sec", 1148 (long long)avg.tv_sec, (int)avg.tv_usec); 1149 log_info("histogram of recursion processing times"); 1150 timehist_log(mesh->histogram, "recursions"); 1151 } 1152 } 1153 1154 void 1155 mesh_stats_clear(struct mesh_area* mesh) 1156 { 1157 if(!mesh) 1158 return; 1159 mesh->replies_sent = 0; 1160 mesh->replies_sum_wait.tv_sec = 0; 1161 mesh->replies_sum_wait.tv_usec = 0; 1162 mesh->stats_jostled = 0; 1163 mesh->stats_dropped = 0; 1164 timehist_clear(mesh->histogram); 1165 mesh->ans_secure = 0; 1166 mesh->ans_bogus = 0; 1167 memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*16); 1168 mesh->ans_nodata = 0; 1169 } 1170 1171 size_t 1172 mesh_get_mem(struct mesh_area* mesh) 1173 { 1174 struct mesh_state* m; 1175 size_t s = sizeof(*mesh) + sizeof(struct timehist) + 1176 sizeof(struct th_buck)*mesh->histogram->num + 1177 sizeof(sldns_buffer) + sldns_buffer_capacity(mesh->qbuf_bak); 1178 RBTREE_FOR(m, struct mesh_state*, &mesh->all) { 1179 /* all, including m itself allocated in qstate region */ 1180 s += regional_get_mem(m->s.region); 1181 } 1182 return s; 1183 } 1184 1185 int 1186 mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo, 1187 uint16_t flags, int prime, int valrec) 1188 { 1189 struct mesh_area* mesh = qstate->env->mesh; 1190 struct mesh_state* dep_m = mesh_area_find(mesh, qinfo, flags, prime, 1191 valrec); 1192 return mesh_detect_cycle_found(qstate, dep_m); 1193 } 1194 1195 void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp, 1196 struct mesh_state** lp) 1197 { 1198 /* insert as last element */ 1199 m->prev = *lp; 1200 m->next = NULL; 1201 if(*lp) 1202 (*lp)->next = m; 1203 else *fp = m; 1204 *lp = m; 1205 } 1206 1207 void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp, 1208 struct mesh_state** lp) 1209 { 1210 if(m->next) 1211 m->next->prev = m->prev; 1212 else *lp = m->prev; 1213 if(m->prev) 1214 m->prev->next = m->next; 1215 else *fp = m->next; 1216 } 1217