1 /* 2 * daemon/stats.c - collect runtime performance indicators. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file describes the data structure used to collect runtime performance 40 * numbers. These 'statistics' may be of interest to the operator. 41 */ 42 #include "config.h" 43 #ifdef HAVE_TIME_H 44 #include <time.h> 45 #endif 46 #include <sys/time.h> 47 #include <sys/types.h> 48 #include "daemon/stats.h" 49 #include "daemon/worker.h" 50 #include "daemon/daemon.h" 51 #include "services/mesh.h" 52 #include "services/outside_network.h" 53 #include "services/listen_dnsport.h" 54 #include "util/config_file.h" 55 #include "util/tube.h" 56 #include "util/timehist.h" 57 #include "util/net_help.h" 58 #include "validator/validator.h" 59 #include "iterator/iterator.h" 60 #include "sldns/sbuffer.h" 61 #include "services/cache/rrset.h" 62 #include "services/cache/infra.h" 63 #include "services/authzone.h" 64 #include "validator/val_kcache.h" 65 #include "validator/val_neg.h" 66 67 /** add timers and the values do not overflow or become negative */ 68 static void 69 stats_timeval_add(long long* d_sec, long long* d_usec, long long add_sec, long long add_usec) 70 { 71 #ifndef S_SPLINT_S 72 (*d_sec) += add_sec; 73 (*d_usec) += add_usec; 74 if((*d_usec) > 1000000) { 75 (*d_usec) -= 1000000; 76 (*d_sec)++; 77 } 78 #endif 79 } 80 81 void server_stats_init(struct ub_server_stats* stats, struct config_file* cfg) 82 { 83 memset(stats, 0, sizeof(*stats)); 84 stats->extended = cfg->stat_extended; 85 } 86 87 void server_stats_querymiss(struct ub_server_stats* stats, struct worker* worker) 88 { 89 stats->num_queries_missed_cache++; 90 stats->sum_query_list_size += worker->env.mesh->all.count; 91 if((long long)worker->env.mesh->all.count > stats->max_query_list_size) 92 stats->max_query_list_size = (long long)worker->env.mesh->all.count; 93 } 94 95 void server_stats_prefetch(struct ub_server_stats* stats, struct worker* worker) 96 { 97 stats->num_queries_prefetch++; 98 /* changes the query list size so account that, like a querymiss */ 99 stats->sum_query_list_size += worker->env.mesh->all.count; 100 if((long long)worker->env.mesh->all.count > stats->max_query_list_size) 101 stats->max_query_list_size = (long long)worker->env.mesh->all.count; 102 } 103 104 void server_stats_log(struct ub_server_stats* stats, struct worker* worker, 105 int threadnum) 106 { 107 log_info("server stats for thread %d: %u queries, " 108 "%u answers from cache, %u recursions, %u prefetch, %u rejected by " 109 "ip ratelimiting", 110 threadnum, (unsigned)stats->num_queries, 111 (unsigned)(stats->num_queries - 112 stats->num_queries_missed_cache), 113 (unsigned)stats->num_queries_missed_cache, 114 (unsigned)stats->num_queries_prefetch, 115 (unsigned)stats->num_queries_ip_ratelimited); 116 log_info("server stats for thread %d: requestlist max %u avg %g " 117 "exceeded %u jostled %u", threadnum, 118 (unsigned)stats->max_query_list_size, 119 (stats->num_queries_missed_cache+stats->num_queries_prefetch)? 120 (double)stats->sum_query_list_size/ 121 (double)(stats->num_queries_missed_cache+ 122 stats->num_queries_prefetch) : 0.0, 123 (unsigned)worker->env.mesh->stats_dropped, 124 (unsigned)worker->env.mesh->stats_jostled); 125 } 126 127 /** Set the neg cache stats. */ 128 static void 129 set_neg_cache_stats(struct worker* worker, struct ub_server_stats* svr, 130 int reset) 131 { 132 int m = modstack_find(&worker->env.mesh->mods, "validator"); 133 struct val_env* ve; 134 struct val_neg_cache* neg; 135 if(m == -1) 136 return; 137 ve = (struct val_env*)worker->env.modinfo[m]; 138 if(!ve->neg_cache) 139 return; 140 neg = ve->neg_cache; 141 lock_basic_lock(&neg->lock); 142 svr->num_neg_cache_noerror = (long long)neg->num_neg_cache_noerror; 143 svr->num_neg_cache_nxdomain = (long long)neg->num_neg_cache_nxdomain; 144 if(reset && !worker->env.cfg->stat_cumulative) { 145 neg->num_neg_cache_noerror = 0; 146 neg->num_neg_cache_nxdomain = 0; 147 } 148 lock_basic_unlock(&neg->lock); 149 } 150 151 /** get rrsets bogus number from validator */ 152 static size_t 153 get_rrset_bogus(struct worker* worker, int reset) 154 { 155 int m = modstack_find(&worker->env.mesh->mods, "validator"); 156 struct val_env* ve; 157 size_t r; 158 if(m == -1) 159 return 0; 160 ve = (struct val_env*)worker->env.modinfo[m]; 161 lock_basic_lock(&ve->bogus_lock); 162 r = ve->num_rrset_bogus; 163 if(reset && !worker->env.cfg->stat_cumulative) 164 ve->num_rrset_bogus = 0; 165 lock_basic_unlock(&ve->bogus_lock); 166 return r; 167 } 168 169 /** get number of ratelimited queries from iterator */ 170 static size_t 171 get_queries_ratelimit(struct worker* worker, int reset) 172 { 173 int m = modstack_find(&worker->env.mesh->mods, "iterator"); 174 struct iter_env* ie; 175 size_t r; 176 if(m == -1) 177 return 0; 178 ie = (struct iter_env*)worker->env.modinfo[m]; 179 lock_basic_lock(&ie->queries_ratelimit_lock); 180 r = ie->num_queries_ratelimited; 181 if(reset && !worker->env.cfg->stat_cumulative) 182 ie->num_queries_ratelimited = 0; 183 lock_basic_unlock(&ie->queries_ratelimit_lock); 184 return r; 185 } 186 187 #ifdef USE_DNSCRYPT 188 /** get the number of shared secret cache miss */ 189 static size_t 190 get_dnscrypt_cache_miss(struct worker* worker, int reset) 191 { 192 size_t r; 193 struct dnsc_env* de = worker->daemon->dnscenv; 194 if(!de) return 0; 195 196 lock_basic_lock(&de->shared_secrets_cache_lock); 197 r = de->num_query_dnscrypt_secret_missed_cache; 198 if(reset && !worker->env.cfg->stat_cumulative) 199 de->num_query_dnscrypt_secret_missed_cache = 0; 200 lock_basic_unlock(&de->shared_secrets_cache_lock); 201 return r; 202 } 203 204 /** get the number of replayed queries */ 205 static size_t 206 get_dnscrypt_replay(struct worker* worker, int reset) 207 { 208 size_t r; 209 struct dnsc_env* de = worker->daemon->dnscenv; 210 211 lock_basic_lock(&de->nonces_cache_lock); 212 r = de->num_query_dnscrypt_replay; 213 if(reset && !worker->env.cfg->stat_cumulative) 214 de->num_query_dnscrypt_replay = 0; 215 lock_basic_unlock(&de->nonces_cache_lock); 216 return r; 217 } 218 #endif /* USE_DNSCRYPT */ 219 220 void 221 server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset) 222 { 223 int i; 224 struct listen_list* lp; 225 226 s->svr = worker->stats; 227 s->mesh_num_states = (long long)worker->env.mesh->all.count; 228 s->mesh_num_reply_states = (long long)worker->env.mesh->num_reply_states; 229 s->mesh_jostled = (long long)worker->env.mesh->stats_jostled; 230 s->mesh_dropped = (long long)worker->env.mesh->stats_dropped; 231 s->mesh_replies_sent = (long long)worker->env.mesh->replies_sent; 232 s->mesh_replies_sum_wait_sec = (long long)worker->env.mesh->replies_sum_wait.tv_sec; 233 s->mesh_replies_sum_wait_usec = (long long)worker->env.mesh->replies_sum_wait.tv_usec; 234 s->mesh_time_median = timehist_quartile(worker->env.mesh->histogram, 235 0.50); 236 237 /* add in the values from the mesh */ 238 s->svr.ans_secure += (long long)worker->env.mesh->ans_secure; 239 s->svr.ans_bogus += (long long)worker->env.mesh->ans_bogus; 240 s->svr.ans_rcode_nodata += (long long)worker->env.mesh->ans_nodata; 241 for(i=0; i<16; i++) 242 s->svr.ans_rcode[i] += (long long)worker->env.mesh->ans_rcode[i]; 243 timehist_export(worker->env.mesh->histogram, s->svr.hist, 244 NUM_BUCKETS_HIST); 245 /* values from outside network */ 246 s->svr.unwanted_replies = (long long)worker->back->unwanted_replies; 247 s->svr.qtcp_outgoing = (long long)worker->back->num_tcp_outgoing; 248 249 /* get and reset validator rrset bogus number */ 250 s->svr.rrset_bogus = (long long)get_rrset_bogus(worker, reset); 251 252 /* get and reset iterator query ratelimit number */ 253 s->svr.queries_ratelimited = (long long)get_queries_ratelimit(worker, reset); 254 255 /* get cache sizes */ 256 s->svr.msg_cache_count = (long long)count_slabhash_entries(worker->env.msg_cache); 257 s->svr.rrset_cache_count = (long long)count_slabhash_entries(&worker->env.rrset_cache->table); 258 s->svr.infra_cache_count = (long long)count_slabhash_entries(worker->env.infra_cache->hosts); 259 if(worker->env.key_cache) 260 s->svr.key_cache_count = (long long)count_slabhash_entries(worker->env.key_cache->slab); 261 else s->svr.key_cache_count = 0; 262 263 #ifdef USE_DNSCRYPT 264 if(worker->daemon->dnscenv) { 265 s->svr.num_query_dnscrypt_secret_missed_cache = 266 (long long)get_dnscrypt_cache_miss(worker, reset); 267 s->svr.shared_secret_cache_count = (long long)count_slabhash_entries( 268 worker->daemon->dnscenv->shared_secrets_cache); 269 s->svr.nonce_cache_count = (long long)count_slabhash_entries( 270 worker->daemon->dnscenv->nonces_cache); 271 s->svr.num_query_dnscrypt_replay = 272 (long long)get_dnscrypt_replay(worker, reset); 273 } else { 274 s->svr.num_query_dnscrypt_secret_missed_cache = 0; 275 s->svr.shared_secret_cache_count = 0; 276 s->svr.nonce_cache_count = 0; 277 s->svr.num_query_dnscrypt_replay = 0; 278 } 279 #else 280 s->svr.num_query_dnscrypt_secret_missed_cache = 0; 281 s->svr.shared_secret_cache_count = 0; 282 s->svr.nonce_cache_count = 0; 283 s->svr.num_query_dnscrypt_replay = 0; 284 #endif /* USE_DNSCRYPT */ 285 if(worker->env.auth_zones) { 286 if(reset && !worker->env.cfg->stat_cumulative) { 287 lock_rw_wrlock(&worker->env.auth_zones->lock); 288 } else { 289 lock_rw_rdlock(&worker->env.auth_zones->lock); 290 } 291 s->svr.num_query_authzone_up = (long long)worker->env. 292 auth_zones->num_query_up; 293 s->svr.num_query_authzone_down = (long long)worker->env. 294 auth_zones->num_query_down; 295 if(reset && !worker->env.cfg->stat_cumulative) { 296 worker->env.auth_zones->num_query_up = 0; 297 worker->env.auth_zones->num_query_down = 0; 298 } 299 lock_rw_unlock(&worker->env.auth_zones->lock); 300 } 301 302 /* Set neg cache usage numbers */ 303 set_neg_cache_stats(worker, &s->svr, reset); 304 305 /* get tcp accept usage */ 306 s->svr.tcp_accept_usage = 0; 307 for(lp = worker->front->cps; lp; lp = lp->next) { 308 if(lp->com->type == comm_tcp_accept) 309 s->svr.tcp_accept_usage += (long long)lp->com->cur_tcp_count; 310 } 311 312 if(reset && !worker->env.cfg->stat_cumulative) { 313 worker_stats_clear(worker); 314 } 315 } 316 317 void server_stats_obtain(struct worker* worker, struct worker* who, 318 struct ub_stats_info* s, int reset) 319 { 320 uint8_t *reply = NULL; 321 uint32_t len = 0; 322 if(worker == who) { 323 /* just fill it in */ 324 server_stats_compile(worker, s, reset); 325 return; 326 } 327 /* communicate over tube */ 328 verbose(VERB_ALGO, "write stats cmd"); 329 if(reset) 330 worker_send_cmd(who, worker_cmd_stats); 331 else worker_send_cmd(who, worker_cmd_stats_noreset); 332 verbose(VERB_ALGO, "wait for stats reply"); 333 if(!tube_read_msg(worker->cmd, &reply, &len, 0)) 334 fatal_exit("failed to read stats over cmd channel"); 335 if(len != (uint32_t)sizeof(*s)) 336 fatal_exit("stats on cmd channel wrong length %d %d", 337 (int)len, (int)sizeof(*s)); 338 memcpy(s, reply, (size_t)len); 339 free(reply); 340 } 341 342 void server_stats_reply(struct worker* worker, int reset) 343 { 344 struct ub_stats_info s; 345 server_stats_compile(worker, &s, reset); 346 verbose(VERB_ALGO, "write stats replymsg"); 347 if(!tube_write_msg(worker->daemon->workers[0]->cmd, 348 (uint8_t*)&s, sizeof(s), 0)) 349 fatal_exit("could not write stat values over cmd channel"); 350 } 351 352 void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a) 353 { 354 total->svr.num_queries += a->svr.num_queries; 355 total->svr.num_queries_ip_ratelimited += a->svr.num_queries_ip_ratelimited; 356 total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache; 357 total->svr.num_queries_prefetch += a->svr.num_queries_prefetch; 358 total->svr.sum_query_list_size += a->svr.sum_query_list_size; 359 #ifdef USE_DNSCRYPT 360 total->svr.num_query_dnscrypt_crypted += a->svr.num_query_dnscrypt_crypted; 361 total->svr.num_query_dnscrypt_cert += a->svr.num_query_dnscrypt_cert; 362 total->svr.num_query_dnscrypt_cleartext += \ 363 a->svr.num_query_dnscrypt_cleartext; 364 total->svr.num_query_dnscrypt_crypted_malformed += \ 365 a->svr.num_query_dnscrypt_crypted_malformed; 366 #endif /* USE_DNSCRYPT */ 367 /* the max size reached is upped to higher of both */ 368 if(a->svr.max_query_list_size > total->svr.max_query_list_size) 369 total->svr.max_query_list_size = a->svr.max_query_list_size; 370 371 if(a->svr.extended) { 372 int i; 373 total->svr.qtype_big += a->svr.qtype_big; 374 total->svr.qclass_big += a->svr.qclass_big; 375 total->svr.qtcp += a->svr.qtcp; 376 total->svr.qtcp_outgoing += a->svr.qtcp_outgoing; 377 total->svr.qipv6 += a->svr.qipv6; 378 total->svr.qbit_QR += a->svr.qbit_QR; 379 total->svr.qbit_AA += a->svr.qbit_AA; 380 total->svr.qbit_TC += a->svr.qbit_TC; 381 total->svr.qbit_RD += a->svr.qbit_RD; 382 total->svr.qbit_RA += a->svr.qbit_RA; 383 total->svr.qbit_Z += a->svr.qbit_Z; 384 total->svr.qbit_AD += a->svr.qbit_AD; 385 total->svr.qbit_CD += a->svr.qbit_CD; 386 total->svr.qEDNS += a->svr.qEDNS; 387 total->svr.qEDNS_DO += a->svr.qEDNS_DO; 388 total->svr.ans_rcode_nodata += a->svr.ans_rcode_nodata; 389 total->svr.zero_ttl_responses += a->svr.zero_ttl_responses; 390 total->svr.ans_secure += a->svr.ans_secure; 391 total->svr.ans_bogus += a->svr.ans_bogus; 392 total->svr.unwanted_replies += a->svr.unwanted_replies; 393 total->svr.unwanted_queries += a->svr.unwanted_queries; 394 total->svr.tcp_accept_usage += a->svr.tcp_accept_usage; 395 for(i=0; i<UB_STATS_QTYPE_NUM; i++) 396 total->svr.qtype[i] += a->svr.qtype[i]; 397 for(i=0; i<UB_STATS_QCLASS_NUM; i++) 398 total->svr.qclass[i] += a->svr.qclass[i]; 399 for(i=0; i<UB_STATS_OPCODE_NUM; i++) 400 total->svr.qopcode[i] += a->svr.qopcode[i]; 401 for(i=0; i<UB_STATS_RCODE_NUM; i++) 402 total->svr.ans_rcode[i] += a->svr.ans_rcode[i]; 403 for(i=0; i<NUM_BUCKETS_HIST; i++) 404 total->svr.hist[i] += a->svr.hist[i]; 405 } 406 407 total->mesh_num_states += a->mesh_num_states; 408 total->mesh_num_reply_states += a->mesh_num_reply_states; 409 total->mesh_jostled += a->mesh_jostled; 410 total->mesh_dropped += a->mesh_dropped; 411 total->mesh_replies_sent += a->mesh_replies_sent; 412 stats_timeval_add(&total->mesh_replies_sum_wait_sec, &total->mesh_replies_sum_wait_usec, a->mesh_replies_sum_wait_sec, a->mesh_replies_sum_wait_usec); 413 /* the medians are averaged together, this is not as accurate as 414 * taking the median over all of the data, but is good and fast 415 * added up here, division later*/ 416 total->mesh_time_median += a->mesh_time_median; 417 } 418 419 void server_stats_insquery(struct ub_server_stats* stats, struct comm_point* c, 420 uint16_t qtype, uint16_t qclass, struct edns_data* edns, 421 struct comm_reply* repinfo) 422 { 423 uint16_t flags = sldns_buffer_read_u16_at(c->buffer, 2); 424 if(qtype < UB_STATS_QTYPE_NUM) 425 stats->qtype[qtype]++; 426 else stats->qtype_big++; 427 if(qclass < UB_STATS_QCLASS_NUM) 428 stats->qclass[qclass]++; 429 else stats->qclass_big++; 430 stats->qopcode[ LDNS_OPCODE_WIRE(sldns_buffer_begin(c->buffer)) ]++; 431 if(c->type != comm_udp) 432 stats->qtcp++; 433 if(repinfo && addr_is_ip6(&repinfo->addr, repinfo->addrlen)) 434 stats->qipv6++; 435 if( (flags&BIT_QR) ) 436 stats->qbit_QR++; 437 if( (flags&BIT_AA) ) 438 stats->qbit_AA++; 439 if( (flags&BIT_TC) ) 440 stats->qbit_TC++; 441 if( (flags&BIT_RD) ) 442 stats->qbit_RD++; 443 if( (flags&BIT_RA) ) 444 stats->qbit_RA++; 445 if( (flags&BIT_Z) ) 446 stats->qbit_Z++; 447 if( (flags&BIT_AD) ) 448 stats->qbit_AD++; 449 if( (flags&BIT_CD) ) 450 stats->qbit_CD++; 451 if(edns->edns_present) { 452 stats->qEDNS++; 453 if( (edns->bits & EDNS_DO) ) 454 stats->qEDNS_DO++; 455 } 456 } 457 458 void server_stats_insrcode(struct ub_server_stats* stats, sldns_buffer* buf) 459 { 460 if(stats->extended && sldns_buffer_limit(buf) != 0) { 461 int r = (int)LDNS_RCODE_WIRE( sldns_buffer_begin(buf) ); 462 stats->ans_rcode[r] ++; 463 if(r == 0 && LDNS_ANCOUNT( sldns_buffer_begin(buf) ) == 0) 464 stats->ans_rcode_nodata ++; 465 } 466 } 467