1 /* 2 * services/cache/infra.h - infrastructure cache, server rtt and capabilities 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the infrastructure cache, as well as rate limiting. 40 * Note that there are two sorts of rate-limiting here: 41 * - Pre-cache, per-query rate limiting (query ratelimits) 42 * - Post-cache, per-domain name rate limiting (infra-ratelimits) 43 */ 44 45 #ifndef SERVICES_CACHE_INFRA_H 46 #define SERVICES_CACHE_INFRA_H 47 #include "util/storage/lruhash.h" 48 #include "util/storage/dnstree.h" 49 #include "util/rtt.h" 50 #include "util/netevent.h" 51 #include "util/data/msgreply.h" 52 struct slabhash; 53 struct config_file; 54 55 /** number of timeouts for a type when the domain can be blocked ; 56 * even if another type has completely rtt maxed it, the different type 57 * can do this number of packets (until those all timeout too) */ 58 #define TIMEOUT_COUNT_MAX 3 59 60 61 /** Timeout when only a single probe query per IP is allowed. 62 * Any RTO above this number is considered a probe. 63 * It is synchronized (caped) with USEFUL_SERVER_TOP_TIMEOUT so that probing 64 * keeps working even if that configurable number drops below the default 65 * 12000 ms of probing. */ 66 extern int PROBE_MAXRTO; 67 68 /** 69 * Host information kept for every server, per zone. 70 */ 71 struct infra_key { 72 /** the host address. */ 73 struct sockaddr_storage addr; 74 /** length of addr. */ 75 socklen_t addrlen; 76 /** zone name in wireformat */ 77 uint8_t* zonename; 78 /** length of zonename */ 79 size_t namelen; 80 /** hash table entry, data of type infra_data. */ 81 struct lruhash_entry entry; 82 }; 83 84 /** 85 * Host information encompasses host capabilities and retransmission timeouts. 86 * And lameness information (notAuthoritative, noEDNS, Recursive) 87 */ 88 struct infra_data { 89 /** TTL value for this entry. absolute time. */ 90 time_t ttl; 91 92 /** time in seconds (absolute) when probing re-commences, 0 disabled */ 93 time_t probedelay; 94 /** round trip times for timeout calculation */ 95 struct rtt_info rtt; 96 97 /** edns version that the host supports, -1 means no EDNS */ 98 int edns_version; 99 /** if the EDNS lameness is already known or not. 100 * EDNS lame is when EDNS queries or replies are dropped, 101 * and cause a timeout */ 102 uint8_t edns_lame_known; 103 104 /** is the host lame (does not serve the zone authoritatively), 105 * or is the host dnssec lame (does not serve DNSSEC data) */ 106 uint8_t isdnsseclame; 107 /** is the host recursion lame (not AA, but RA) */ 108 uint8_t rec_lame; 109 /** the host is lame (not authoritative) for A records */ 110 uint8_t lame_type_A; 111 /** the host is lame (not authoritative) for other query types */ 112 uint8_t lame_other; 113 114 /** timeouts counter for type A */ 115 uint8_t timeout_A; 116 /** timeouts counter for type AAAA */ 117 uint8_t timeout_AAAA; 118 /** timeouts counter for others */ 119 uint8_t timeout_other; 120 }; 121 122 /** 123 * Infra cache 124 */ 125 struct infra_cache { 126 /** The hash table with hosts */ 127 struct slabhash* hosts; 128 /** TTL value for host information, in seconds */ 129 int host_ttl; 130 /** the hosts that are down are kept probed for recovery */ 131 int infra_keep_probing; 132 /** hash table with query rates per name: rate_key, rate_data */ 133 struct slabhash* domain_rates; 134 /** ratelimit settings for domains, struct domain_limit_data */ 135 rbtree_type domain_limits; 136 /** hash table with query rates per client ip: ip_rate_key, ip_rate_data */ 137 struct slabhash* client_ip_rates; 138 /** tree of addr_tree_node, with wait_limit_netblock_info information */ 139 rbtree_type wait_limits_netblock; 140 /** tree of addr_tree_node, with wait_limit_netblock_info information */ 141 rbtree_type wait_limits_cookie_netblock; 142 }; 143 144 /** ratelimit, unless overridden by domain_limits, 0 is off */ 145 extern int infra_dp_ratelimit; 146 147 /** 148 * ratelimit settings for domains 149 */ 150 struct domain_limit_data { 151 /** key for rbtree, must be first in struct, name of domain */ 152 struct name_tree_node node; 153 /** ratelimit for exact match with this name, -1 if not set */ 154 int lim; 155 /** ratelimit for names below this name, -1 if not set */ 156 int below; 157 }; 158 159 /** 160 * key for ratelimit lookups, a domain name 161 */ 162 struct rate_key { 163 /** lruhash key entry */ 164 struct lruhash_entry entry; 165 /** domain name in uncompressed wireformat */ 166 uint8_t* name; 167 /** length of name */ 168 size_t namelen; 169 }; 170 171 /** ip ratelimit, 0 is off */ 172 extern int infra_ip_ratelimit; 173 /** ip ratelimit for DNS Cookie clients, 0 is off */ 174 extern int infra_ip_ratelimit_cookie; 175 176 /** 177 * key for ip_ratelimit lookups, a source IP. 178 */ 179 struct ip_rate_key { 180 /** lruhash key entry */ 181 struct lruhash_entry entry; 182 /** client ip information */ 183 struct sockaddr_storage addr; 184 /** length of address */ 185 socklen_t addrlen; 186 }; 187 188 /** number of seconds to track qps rate */ 189 #define RATE_WINDOW 2 190 191 /** 192 * Data for ratelimits per domain name 193 * It is incremented when a non-cache-lookup happens for that domain name. 194 * The name is the delegation point we have for the name. 195 * If a new delegation point is found (a referral reply), the previous 196 * delegation point is decremented, and the new one is charged with the query. 197 */ 198 struct rate_data { 199 /** queries counted, for that second. 0 if not in use. */ 200 int qps[RATE_WINDOW]; 201 /** what the timestamp is of the qps array members, counter is 202 * valid for that timestamp. Usually now and now-1. */ 203 time_t timestamp[RATE_WINDOW]; 204 /** the number of queries waiting in the mesh */ 205 int mesh_wait; 206 }; 207 208 #define ip_rate_data rate_data 209 210 /** 211 * Data to store the configuration per netblock for the wait limit 212 */ 213 struct wait_limit_netblock_info { 214 /** The addr tree node, this must be first. */ 215 struct addr_tree_node node; 216 /** the limit on the amount */ 217 int limit; 218 }; 219 220 /** infra host cache default hash lookup size */ 221 #define INFRA_HOST_STARTSIZE 32 222 /** bytes per zonename reserved in the hostcache, dnamelen(zonename.com.) */ 223 #define INFRA_BYTES_NAME 14 224 225 /** 226 * Create infra cache. 227 * @param cfg: config parameters or NULL for defaults. 228 * @return: new infra cache, or NULL. 229 */ 230 struct infra_cache* infra_create(struct config_file* cfg); 231 232 /** 233 * Delete infra cache. 234 * @param infra: infrastructure cache to delete. 235 */ 236 void infra_delete(struct infra_cache* infra); 237 238 /** 239 * Adjust infra cache to use updated configuration settings. 240 * This may clean the cache. Operates a bit like realloc. 241 * There may be no threading or use by other threads. 242 * @param infra: existing cache. If NULL a new infra cache is returned. 243 * @param cfg: config options. 244 * @return the new infra cache pointer or NULL on error. 245 */ 246 struct infra_cache* infra_adjust(struct infra_cache* infra, 247 struct config_file* cfg); 248 249 /** 250 * Plain find infra data function (used by the other functions) 251 * @param infra: infrastructure cache. 252 * @param addr: host address. 253 * @param addrlen: length of addr. 254 * @param name: domain name of zone. 255 * @param namelen: length of domain name. 256 * @param wr: if true, writelock, else readlock. 257 * @return the entry, could be expired (this is not checked) or NULL. 258 */ 259 struct lruhash_entry* infra_lookup_nottl(struct infra_cache* infra, 260 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name, 261 size_t namelen, int wr); 262 263 /** 264 * Find host information to send a packet. Creates new entry if not found. 265 * Lameness is empty. EDNS is 0 (try with first), and rtt is returned for 266 * the first message to it. 267 * Use this to send a packet only, because it also locks out others when 268 * probing is restricted. 269 * @param infra: infrastructure cache. 270 * @param addr: host address. 271 * @param addrlen: length of addr. 272 * @param name: domain name of zone. 273 * @param namelen: length of domain name. 274 * @param timenow: what time it is now. 275 * @param edns_vs: edns version it supports, is returned. 276 * @param edns_lame_known: if EDNS lame (EDNS is dropped in transit) has 277 * already been probed, is returned. 278 * @param to: timeout to use, is returned. 279 * @return: 0 on error. 280 */ 281 int infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, 282 socklen_t addrlen, uint8_t* name, size_t namelen, 283 time_t timenow, int* edns_vs, uint8_t* edns_lame_known, int* to); 284 285 /** 286 * Set a host to be lame for the given zone. 287 * @param infra: infrastructure cache. 288 * @param addr: host address. 289 * @param addrlen: length of addr. 290 * @param name: domain name of zone apex. 291 * @param namelen: length of domain name. 292 * @param timenow: what time it is now. 293 * @param dnsseclame: if true the host is set dnssec lame. 294 * if false, the host is marked lame (not serving the zone). 295 * @param reclame: if true host is a recursor not AA server. 296 * if false, dnsseclame or marked lame. 297 * @param qtype: the query type for which it is lame. 298 * @return: 0 on error. 299 */ 300 int infra_set_lame(struct infra_cache* infra, 301 struct sockaddr_storage* addr, socklen_t addrlen, 302 uint8_t* name, size_t namelen, time_t timenow, int dnsseclame, 303 int reclame, uint16_t qtype); 304 305 /** 306 * Update rtt information for the host. 307 * @param infra: infrastructure cache. 308 * @param addr: host address. 309 * @param addrlen: length of addr. 310 * @param name: zone name 311 * @param namelen: zone name length 312 * @param qtype: query type. 313 * @param roundtrip: estimate of roundtrip time in milliseconds or -1 for 314 * timeout. 315 * @param orig_rtt: original rtt for the query that timed out (roundtrip==-1). 316 * ignored if roundtrip != -1. 317 * @param timenow: what time it is now. 318 * @return: 0 on error. new rto otherwise. 319 */ 320 int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, 321 socklen_t addrlen, uint8_t* name, size_t namelen, int qtype, 322 int roundtrip, int orig_rtt, time_t timenow); 323 324 /** 325 * Update information for the host, store that a TCP transaction works. 326 * @param infra: infrastructure cache. 327 * @param addr: host address. 328 * @param addrlen: length of addr. 329 * @param name: name of zone 330 * @param namelen: length of name 331 */ 332 void infra_update_tcp_works(struct infra_cache* infra, 333 struct sockaddr_storage* addr, socklen_t addrlen, 334 uint8_t* name, size_t namelen); 335 336 /** 337 * Update edns information for the host. 338 * @param infra: infrastructure cache. 339 * @param addr: host address. 340 * @param addrlen: length of addr. 341 * @param name: name of zone 342 * @param namelen: length of name 343 * @param edns_version: the version that it publishes. 344 * If it is known to support EDNS then no-EDNS is not stored over it. 345 * @param timenow: what time it is now. 346 * @return: 0 on error. 347 */ 348 int infra_edns_update(struct infra_cache* infra, 349 struct sockaddr_storage* addr, socklen_t addrlen, 350 uint8_t* name, size_t namelen, int edns_version, time_t timenow); 351 352 /** 353 * Get Lameness information and average RTT if host is in the cache. 354 * This information is to be used for server selection. 355 * @param infra: infrastructure cache. 356 * @param addr: host address. 357 * @param addrlen: length of addr. 358 * @param name: zone name. 359 * @param namelen: zone name length. 360 * @param qtype: the query to be made. 361 * @param lame: if function returns true, this returns lameness of the zone. 362 * @param dnsseclame: if function returns true, this returns if the zone 363 * is dnssec-lame. 364 * @param reclame: if function returns true, this is if it is recursion lame. 365 * @param rtt: if function returns true, this returns avg rtt of the server. 366 * The rtt value is unclamped and reflects recent timeouts. 367 * @param timenow: what time it is now. 368 * @return if found in cache, or false if not (or TTL bad). 369 */ 370 int infra_get_lame_rtt(struct infra_cache* infra, 371 struct sockaddr_storage* addr, socklen_t addrlen, 372 uint8_t* name, size_t namelen, uint16_t qtype, 373 int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow); 374 375 /** 376 * Get additional (debug) info on timing. 377 * @param infra: infra cache. 378 * @param addr: host address. 379 * @param addrlen: length of addr. 380 * @param name: zone name 381 * @param namelen: zone name length 382 * @param rtt: the rtt_info is copied into here (caller alloced return struct). 383 * @param delay: probe delay (if any). 384 * @param timenow: what time it is now. 385 * @param tA: timeout counter on type A. 386 * @param tAAAA: timeout counter on type AAAA. 387 * @param tother: timeout counter on type other. 388 * @return TTL the infra host element is valid for. If -1: not found in cache. 389 * TTL -2: found but expired. 390 */ 391 long long infra_get_host_rto(struct infra_cache* infra, 392 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name, 393 size_t namelen, struct rtt_info* rtt, int* delay, time_t timenow, 394 int* tA, int* tAAAA, int* tother); 395 396 /** 397 * Increment the query rate counter for a delegation point. 398 * @param infra: infra cache. 399 * @param name: zone name 400 * @param namelen: zone name length 401 * @param timenow: what time it is now. 402 * @param backoff: if backoff is enabled. 403 * @param qinfo: for logging, query name. 404 * @param replylist: for logging, querier's address (if any). 405 * @return 1 if it could be incremented. 0 if the increment overshot the 406 * ratelimit or if in the previous second the ratelimit was exceeded. 407 * Failures like alloc failures are not returned (probably as 1). 408 */ 409 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name, 410 size_t namelen, time_t timenow, int backoff, struct query_info* qinfo, 411 struct comm_reply* replylist); 412 413 /** 414 * Decrement the query rate counter for a delegation point. 415 * Because the reply received for the delegation point was pleasant, 416 * we do not charge this delegation point with it (i.e. it was a referral). 417 * Should call it with same second as when inc() was called. 418 * @param infra: infra cache. 419 * @param name: zone name 420 * @param namelen: zone name length 421 * @param timenow: what time it is now. 422 */ 423 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name, 424 size_t namelen, time_t timenow); 425 426 /** 427 * See if the query rate counter for a delegation point is exceeded. 428 * So, no queries are going to be allowed. 429 * @param infra: infra cache. 430 * @param name: zone name 431 * @param namelen: zone name length 432 * @param timenow: what time it is now. 433 * @param backoff: if backoff is enabled. 434 * @return true if exceeded. 435 */ 436 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name, 437 size_t namelen, time_t timenow, int backoff); 438 439 /** find the maximum rate stored. 0 if no information. 440 * When backoff is enabled look for the maximum in the whole RATE_WINDOW. */ 441 int infra_rate_max(void* data, time_t now, int backoff); 442 443 /** find the ratelimit in qps for a domain. 0 if no limit for domain. */ 444 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name, 445 size_t namelen); 446 447 /** Update query ratelimit hash and decide 448 * whether or not a query should be dropped. 449 * @param infra: infra cache 450 * @param addr: client address 451 * @param addrlen: client address length 452 * @param timenow: what time it is now. 453 * @param has_cookie: if the request came with a DNS Cookie. 454 * @param backoff: if backoff is enabled. 455 * @param buffer: with query for logging. 456 * @return 1 if it could be incremented. 0 if the increment overshot the 457 * ratelimit and the query should be dropped. */ 458 int infra_ip_ratelimit_inc(struct infra_cache* infra, 459 struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow, 460 int has_cookie, int backoff, struct sldns_buffer* buffer); 461 462 /** 463 * Get memory used by the infra cache. 464 * @param infra: infrastructure cache. 465 * @return memory in use in bytes. 466 */ 467 size_t infra_get_mem(struct infra_cache* infra); 468 469 /** calculate size for the hashtable, does not count size of lameness, 470 * so the hashtable is a fixed number of items */ 471 size_t infra_sizefunc(void* k, void* d); 472 473 /** compare two addresses, returns -1, 0, or +1 */ 474 int infra_compfunc(void* key1, void* key2); 475 476 /** delete key, and destroy the lock */ 477 void infra_delkeyfunc(void* k, void* arg); 478 479 /** delete data and destroy the lameness hashtable */ 480 void infra_deldatafunc(void* d, void* arg); 481 482 /** calculate size for the hashtable */ 483 size_t rate_sizefunc(void* k, void* d); 484 485 /** compare two names, returns -1, 0, or +1 */ 486 int rate_compfunc(void* key1, void* key2); 487 488 /** delete key, and destroy the lock */ 489 void rate_delkeyfunc(void* k, void* arg); 490 491 /** delete data */ 492 void rate_deldatafunc(void* d, void* arg); 493 494 /* calculate size for the client ip hashtable */ 495 size_t ip_rate_sizefunc(void* k, void* d); 496 497 /* compare two addresses */ 498 int ip_rate_compfunc(void* key1, void* key2); 499 500 /* delete key, and destroy the lock */ 501 void ip_rate_delkeyfunc(void* d, void* arg); 502 503 /* delete data */ 504 #define ip_rate_deldatafunc rate_deldatafunc 505 506 /** See if the IP address can have another reply in the wait limit */ 507 int infra_wait_limit_allowed(struct infra_cache* infra, struct comm_reply* rep, 508 int cookie_valid, struct config_file* cfg); 509 510 /** Increment number of waiting replies for IP */ 511 void infra_wait_limit_inc(struct infra_cache* infra, struct comm_reply* rep, 512 time_t timenow, struct config_file* cfg); 513 514 /** Decrement number of waiting replies for IP */ 515 void infra_wait_limit_dec(struct infra_cache* infra, struct comm_reply* rep, 516 struct config_file* cfg); 517 518 /** setup wait limits tree (0 on failure) */ 519 int setup_wait_limits(struct rbtree_type* wait_limits_netblock, 520 struct rbtree_type* wait_limits_cookie_netblock, 521 struct config_file* cfg); 522 523 /** Free the wait limits and wait cookie limits tree. */ 524 void wait_limits_free(struct rbtree_type* wait_limits_tree); 525 526 /** setup domain limits tree (0 on failure) */ 527 int setup_domain_limits(struct rbtree_type* domain_limits, 528 struct config_file* cfg); 529 530 /** Free the domain limits tree. */ 531 void domain_limits_free(struct rbtree_type* domain_limits); 532 533 /** exported for unit test */ 534 int still_useful_timeout(); 535 536 #endif /* SERVICES_CACHE_INFRA_H */ 537