1 /* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41 #include "config.h" 42 #include "util/netevent.h" 43 #include "util/ub_event.h" 44 #include "util/log.h" 45 #include "util/net_help.h" 46 #include "util/tcp_conn_limit.h" 47 #include "util/fptr_wlist.h" 48 #include "sldns/pkthdr.h" 49 #include "sldns/sbuffer.h" 50 #include "sldns/str2wire.h" 51 #include "dnstap/dnstap.h" 52 #include "dnscrypt/dnscrypt.h" 53 #include "services/listen_dnsport.h" 54 #ifdef HAVE_SYS_TYPES_H 55 #include <sys/types.h> 56 #endif 57 #ifdef HAVE_SYS_SOCKET_H 58 #include <sys/socket.h> 59 #endif 60 #ifdef HAVE_NETDB_H 61 #include <netdb.h> 62 #endif 63 64 #ifdef HAVE_OPENSSL_SSL_H 65 #include <openssl/ssl.h> 66 #endif 67 #ifdef HAVE_OPENSSL_ERR_H 68 #include <openssl/err.h> 69 #endif 70 71 /* -------- Start of local definitions -------- */ 72 /** if CMSG_ALIGN is not defined on this platform, a workaround */ 73 #ifndef CMSG_ALIGN 74 # ifdef __CMSG_ALIGN 75 # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 76 # elif defined(CMSG_DATA_ALIGN) 77 # define CMSG_ALIGN _CMSG_DATA_ALIGN 78 # else 79 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 80 # endif 81 #endif 82 83 /** if CMSG_LEN is not defined on this platform, a workaround */ 84 #ifndef CMSG_LEN 85 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 86 #endif 87 88 /** if CMSG_SPACE is not defined on this platform, a workaround */ 89 #ifndef CMSG_SPACE 90 # ifdef _CMSG_HDR_ALIGN 91 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 92 # else 93 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 94 # endif 95 #endif 96 97 /** The TCP writing query timeout in milliseconds */ 98 #define TCP_QUERY_TIMEOUT 120000 99 /** The minimum actual TCP timeout to use, regardless of what we advertise, 100 * in msec */ 101 #define TCP_QUERY_TIMEOUT_MINIMUM 200 102 103 #ifndef NONBLOCKING_IS_BROKEN 104 /** number of UDP reads to perform per read indication from select */ 105 #define NUM_UDP_PER_SELECT 100 106 #else 107 #define NUM_UDP_PER_SELECT 1 108 #endif 109 110 /** 111 * The internal event structure for keeping ub_event info for the event. 112 * Possibly other structures (list, tree) this is part of. 113 */ 114 struct internal_event { 115 /** the comm base */ 116 struct comm_base* base; 117 /** ub_event event type */ 118 struct ub_event* ev; 119 }; 120 121 /** 122 * Internal base structure, so that every thread has its own events. 123 */ 124 struct internal_base { 125 /** ub_event event_base type. */ 126 struct ub_event_base* base; 127 /** seconds time pointer points here */ 128 time_t secs; 129 /** timeval with current time */ 130 struct timeval now; 131 /** the event used for slow_accept timeouts */ 132 struct ub_event* slow_accept; 133 /** true if slow_accept is enabled */ 134 int slow_accept_enabled; 135 }; 136 137 /** 138 * Internal timer structure, to store timer event in. 139 */ 140 struct internal_timer { 141 /** the super struct from which derived */ 142 struct comm_timer super; 143 /** the comm base */ 144 struct comm_base* base; 145 /** ub_event event type */ 146 struct ub_event* ev; 147 /** is timer enabled */ 148 uint8_t enabled; 149 }; 150 151 /** 152 * Internal signal structure, to store signal event in. 153 */ 154 struct internal_signal { 155 /** ub_event event type */ 156 struct ub_event* ev; 157 /** next in signal list */ 158 struct internal_signal* next; 159 }; 160 161 /** create a tcp handler with a parent */ 162 static struct comm_point* comm_point_create_tcp_handler( 163 struct comm_base *base, struct comm_point* parent, size_t bufsize, 164 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 165 void* callback_arg, struct unbound_socket* socket); 166 167 /* -------- End of local definitions -------- */ 168 169 struct comm_base* 170 comm_base_create(int sigs) 171 { 172 struct comm_base* b = (struct comm_base*)calloc(1, 173 sizeof(struct comm_base)); 174 const char *evnm="event", *evsys="", *evmethod=""; 175 176 if(!b) 177 return NULL; 178 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 179 if(!b->eb) { 180 free(b); 181 return NULL; 182 } 183 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 184 if(!b->eb->base) { 185 free(b->eb); 186 free(b); 187 return NULL; 188 } 189 ub_comm_base_now(b); 190 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 191 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 192 return b; 193 } 194 195 struct comm_base* 196 comm_base_create_event(struct ub_event_base* base) 197 { 198 struct comm_base* b = (struct comm_base*)calloc(1, 199 sizeof(struct comm_base)); 200 if(!b) 201 return NULL; 202 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 203 if(!b->eb) { 204 free(b); 205 return NULL; 206 } 207 b->eb->base = base; 208 ub_comm_base_now(b); 209 return b; 210 } 211 212 void 213 comm_base_delete(struct comm_base* b) 214 { 215 if(!b) 216 return; 217 if(b->eb->slow_accept_enabled) { 218 if(ub_event_del(b->eb->slow_accept) != 0) { 219 log_err("could not event_del slow_accept"); 220 } 221 ub_event_free(b->eb->slow_accept); 222 } 223 ub_event_base_free(b->eb->base); 224 b->eb->base = NULL; 225 free(b->eb); 226 free(b); 227 } 228 229 void 230 comm_base_delete_no_base(struct comm_base* b) 231 { 232 if(!b) 233 return; 234 if(b->eb->slow_accept_enabled) { 235 if(ub_event_del(b->eb->slow_accept) != 0) { 236 log_err("could not event_del slow_accept"); 237 } 238 ub_event_free(b->eb->slow_accept); 239 } 240 b->eb->base = NULL; 241 free(b->eb); 242 free(b); 243 } 244 245 void 246 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 247 { 248 *tt = &b->eb->secs; 249 *tv = &b->eb->now; 250 } 251 252 void 253 comm_base_dispatch(struct comm_base* b) 254 { 255 int retval; 256 retval = ub_event_base_dispatch(b->eb->base); 257 if(retval < 0) { 258 fatal_exit("event_dispatch returned error %d, " 259 "errno is %s", retval, strerror(errno)); 260 } 261 } 262 263 void comm_base_exit(struct comm_base* b) 264 { 265 if(ub_event_base_loopexit(b->eb->base) != 0) { 266 log_err("Could not loopexit"); 267 } 268 } 269 270 void comm_base_set_slow_accept_handlers(struct comm_base* b, 271 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 272 { 273 b->stop_accept = stop_acc; 274 b->start_accept = start_acc; 275 b->cb_arg = arg; 276 } 277 278 struct ub_event_base* comm_base_internal(struct comm_base* b) 279 { 280 return b->eb->base; 281 } 282 283 /** see if errno for udp has to be logged or not uses globals */ 284 static int 285 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 286 { 287 /* do not log transient errors (unless high verbosity) */ 288 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 289 switch(errno) { 290 # ifdef ENETUNREACH 291 case ENETUNREACH: 292 # endif 293 # ifdef EHOSTDOWN 294 case EHOSTDOWN: 295 # endif 296 # ifdef EHOSTUNREACH 297 case EHOSTUNREACH: 298 # endif 299 # ifdef ENETDOWN 300 case ENETDOWN: 301 # endif 302 case EPERM: 303 case EACCES: 304 if(verbosity < VERB_ALGO) 305 return 0; 306 default: 307 break; 308 } 309 #endif 310 /* permission denied is gotten for every send if the 311 * network is disconnected (on some OS), squelch it */ 312 if( ((errno == EPERM) 313 # ifdef EADDRNOTAVAIL 314 /* 'Cannot assign requested address' also when disconnected */ 315 || (errno == EADDRNOTAVAIL) 316 # endif 317 ) && verbosity < VERB_ALGO) 318 return 0; 319 # ifdef EADDRINUSE 320 /* If SO_REUSEADDR is set, we could try to connect to the same server 321 * from the same source port twice. */ 322 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 323 return 0; 324 # endif 325 /* squelch errors where people deploy AAAA ::ffff:bla for 326 * authority servers, which we try for intranets. */ 327 if(errno == EINVAL && addr_is_ip4mapped( 328 (struct sockaddr_storage*)addr, addrlen) && 329 verbosity < VERB_DETAIL) 330 return 0; 331 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 332 * but a dns cache does not need it. */ 333 if(errno == EACCES && addr_is_broadcast( 334 (struct sockaddr_storage*)addr, addrlen) && 335 verbosity < VERB_DETAIL) 336 return 0; 337 return 1; 338 } 339 340 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 341 { 342 return udp_send_errno_needs_log(addr, addrlen); 343 } 344 345 /* send a UDP reply */ 346 int 347 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 348 struct sockaddr* addr, socklen_t addrlen, int is_connected) 349 { 350 ssize_t sent; 351 log_assert(c->fd != -1); 352 #ifdef UNBOUND_DEBUG 353 if(sldns_buffer_remaining(packet) == 0) 354 log_err("error: send empty UDP packet"); 355 #endif 356 log_assert(addr && addrlen > 0); 357 if(!is_connected) { 358 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 359 sldns_buffer_remaining(packet), 0, 360 addr, addrlen); 361 } else { 362 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 363 sldns_buffer_remaining(packet), 0); 364 } 365 if(sent == -1) { 366 /* try again and block, waiting for IO to complete, 367 * we want to send the answer, and we will wait for 368 * the ethernet interface buffer to have space. */ 369 #ifndef USE_WINSOCK 370 if(errno == EAGAIN || 371 # ifdef EWOULDBLOCK 372 errno == EWOULDBLOCK || 373 # endif 374 errno == ENOBUFS) { 375 #else 376 if(WSAGetLastError() == WSAEINPROGRESS || 377 WSAGetLastError() == WSAENOBUFS || 378 WSAGetLastError() == WSAEWOULDBLOCK) { 379 #endif 380 int e; 381 fd_set_block(c->fd); 382 if (!is_connected) { 383 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 384 sldns_buffer_remaining(packet), 0, 385 addr, addrlen); 386 } else { 387 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 388 sldns_buffer_remaining(packet), 0); 389 } 390 e = errno; 391 fd_set_nonblock(c->fd); 392 errno = e; 393 } 394 } 395 if(sent == -1) { 396 if(!udp_send_errno_needs_log(addr, addrlen)) 397 return 0; 398 if (!is_connected) { 399 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 400 } else { 401 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 402 } 403 if(addr) 404 log_addr(VERB_OPS, "remote address is", 405 (struct sockaddr_storage*)addr, addrlen); 406 return 0; 407 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 408 log_err("sent %d in place of %d bytes", 409 (int)sent, (int)sldns_buffer_remaining(packet)); 410 return 0; 411 } 412 return 1; 413 } 414 415 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 416 /** print debug ancillary info */ 417 static void p_ancil(const char* str, struct comm_reply* r) 418 { 419 if(r->srctype != 4 && r->srctype != 6) { 420 log_info("%s: unknown srctype %d", str, r->srctype); 421 return; 422 } 423 424 if(r->srctype == 6) { 425 #ifdef IPV6_PKTINFO 426 char buf[1024]; 427 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 428 buf, (socklen_t)sizeof(buf)) == 0) { 429 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 430 } 431 buf[sizeof(buf)-1]=0; 432 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 433 #endif 434 } else if(r->srctype == 4) { 435 #ifdef IP_PKTINFO 436 char buf1[1024], buf2[1024]; 437 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 438 buf1, (socklen_t)sizeof(buf1)) == 0) { 439 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 440 } 441 buf1[sizeof(buf1)-1]=0; 442 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 443 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 444 buf2, (socklen_t)sizeof(buf2)) == 0) { 445 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 446 } 447 buf2[sizeof(buf2)-1]=0; 448 #else 449 buf2[0]=0; 450 #endif 451 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 452 buf1, buf2); 453 #elif defined(IP_RECVDSTADDR) 454 char buf1[1024]; 455 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 456 buf1, (socklen_t)sizeof(buf1)) == 0) { 457 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 458 } 459 buf1[sizeof(buf1)-1]=0; 460 log_info("%s: %s", str, buf1); 461 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 462 } 463 } 464 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 465 466 /** send a UDP reply over specified interface*/ 467 static int 468 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 469 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 470 { 471 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 472 ssize_t sent; 473 struct msghdr msg; 474 struct iovec iov[1]; 475 union { 476 struct cmsghdr hdr; 477 char buf[256]; 478 } control; 479 #ifndef S_SPLINT_S 480 struct cmsghdr *cmsg; 481 #endif /* S_SPLINT_S */ 482 483 log_assert(c->fd != -1); 484 #ifdef UNBOUND_DEBUG 485 if(sldns_buffer_remaining(packet) == 0) 486 log_err("error: send empty UDP packet"); 487 #endif 488 log_assert(addr && addrlen > 0); 489 490 msg.msg_name = addr; 491 msg.msg_namelen = addrlen; 492 iov[0].iov_base = sldns_buffer_begin(packet); 493 iov[0].iov_len = sldns_buffer_remaining(packet); 494 msg.msg_iov = iov; 495 msg.msg_iovlen = 1; 496 msg.msg_control = control.buf; 497 #ifndef S_SPLINT_S 498 msg.msg_controllen = sizeof(control.buf); 499 #endif /* S_SPLINT_S */ 500 msg.msg_flags = 0; 501 502 #ifndef S_SPLINT_S 503 cmsg = CMSG_FIRSTHDR(&msg); 504 if(r->srctype == 4) { 505 #ifdef IP_PKTINFO 506 void* cmsg_data; 507 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 508 log_assert(msg.msg_controllen <= sizeof(control.buf)); 509 cmsg->cmsg_level = IPPROTO_IP; 510 cmsg->cmsg_type = IP_PKTINFO; 511 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 512 sizeof(struct in_pktinfo)); 513 /* unset the ifindex to not bypass the routing tables */ 514 cmsg_data = CMSG_DATA(cmsg); 515 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 516 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 517 #elif defined(IP_SENDSRCADDR) 518 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 519 log_assert(msg.msg_controllen <= sizeof(control.buf)); 520 cmsg->cmsg_level = IPPROTO_IP; 521 cmsg->cmsg_type = IP_SENDSRCADDR; 522 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 523 sizeof(struct in_addr)); 524 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 525 #else 526 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 527 msg.msg_control = NULL; 528 #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 529 } else if(r->srctype == 6) { 530 void* cmsg_data; 531 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 532 log_assert(msg.msg_controllen <= sizeof(control.buf)); 533 cmsg->cmsg_level = IPPROTO_IPV6; 534 cmsg->cmsg_type = IPV6_PKTINFO; 535 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 536 sizeof(struct in6_pktinfo)); 537 /* unset the ifindex to not bypass the routing tables */ 538 cmsg_data = CMSG_DATA(cmsg); 539 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 540 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 541 } else { 542 /* try to pass all 0 to use default route */ 543 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 544 log_assert(msg.msg_controllen <= sizeof(control.buf)); 545 cmsg->cmsg_level = IPPROTO_IPV6; 546 cmsg->cmsg_type = IPV6_PKTINFO; 547 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 548 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 549 } 550 #endif /* S_SPLINT_S */ 551 if(verbosity >= VERB_ALGO) 552 p_ancil("send_udp over interface", r); 553 sent = sendmsg(c->fd, &msg, 0); 554 if(sent == -1) { 555 /* try again and block, waiting for IO to complete, 556 * we want to send the answer, and we will wait for 557 * the ethernet interface buffer to have space. */ 558 #ifndef USE_WINSOCK 559 if(errno == EAGAIN || 560 # ifdef EWOULDBLOCK 561 errno == EWOULDBLOCK || 562 # endif 563 errno == ENOBUFS) { 564 #else 565 if(WSAGetLastError() == WSAEINPROGRESS || 566 WSAGetLastError() == WSAENOBUFS || 567 WSAGetLastError() == WSAEWOULDBLOCK) { 568 #endif 569 int e; 570 fd_set_block(c->fd); 571 sent = sendmsg(c->fd, &msg, 0); 572 e = errno; 573 fd_set_nonblock(c->fd); 574 errno = e; 575 } 576 } 577 if(sent == -1) { 578 if(!udp_send_errno_needs_log(addr, addrlen)) 579 return 0; 580 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 581 log_addr(VERB_OPS, "remote address is", 582 (struct sockaddr_storage*)addr, addrlen); 583 #ifdef __NetBSD__ 584 /* netbsd 7 has IP_PKTINFO for recv but not send */ 585 if(errno == EINVAL && r->srctype == 4) 586 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 587 "Please disable interface-automatic"); 588 #endif 589 return 0; 590 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 591 log_err("sent %d in place of %d bytes", 592 (int)sent, (int)sldns_buffer_remaining(packet)); 593 return 0; 594 } 595 return 1; 596 #else 597 (void)c; 598 (void)packet; 599 (void)addr; 600 (void)addrlen; 601 (void)r; 602 log_err("sendmsg: IPV6_PKTINFO not supported"); 603 return 0; 604 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 605 } 606 607 /** return true is UDP receive error needs to be logged */ 608 static int udp_recv_needs_log(int err) 609 { 610 switch(err) { 611 case EACCES: /* some hosts send ICMP 'Permission Denied' */ 612 #ifndef USE_WINSOCK 613 case ECONNREFUSED: 614 # ifdef ENETUNREACH 615 case ENETUNREACH: 616 # endif 617 # ifdef EHOSTDOWN 618 case EHOSTDOWN: 619 # endif 620 # ifdef EHOSTUNREACH 621 case EHOSTUNREACH: 622 # endif 623 # ifdef ENETDOWN 624 case ENETDOWN: 625 # endif 626 #else /* USE_WINSOCK */ 627 case WSAECONNREFUSED: 628 case WSAENETUNREACH: 629 case WSAEHOSTDOWN: 630 case WSAEHOSTUNREACH: 631 case WSAENETDOWN: 632 #endif 633 if(verbosity >= VERB_ALGO) 634 return 1; 635 return 0; 636 default: 637 break; 638 } 639 return 1; 640 } 641 642 void 643 comm_point_udp_ancil_callback(int fd, short event, void* arg) 644 { 645 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 646 struct comm_reply rep; 647 struct msghdr msg; 648 struct iovec iov[1]; 649 ssize_t rcv; 650 union { 651 struct cmsghdr hdr; 652 char buf[256]; 653 } ancil; 654 int i; 655 #ifndef S_SPLINT_S 656 struct cmsghdr* cmsg; 657 #endif /* S_SPLINT_S */ 658 659 rep.c = (struct comm_point*)arg; 660 log_assert(rep.c->type == comm_udp); 661 662 if(!(event&UB_EV_READ)) 663 return; 664 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 665 ub_comm_base_now(rep.c->ev->base); 666 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 667 sldns_buffer_clear(rep.c->buffer); 668 rep.addrlen = (socklen_t)sizeof(rep.addr); 669 log_assert(fd != -1); 670 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 671 msg.msg_name = &rep.addr; 672 msg.msg_namelen = (socklen_t)sizeof(rep.addr); 673 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 674 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 675 msg.msg_iov = iov; 676 msg.msg_iovlen = 1; 677 msg.msg_control = ancil.buf; 678 #ifndef S_SPLINT_S 679 msg.msg_controllen = sizeof(ancil.buf); 680 #endif /* S_SPLINT_S */ 681 msg.msg_flags = 0; 682 rcv = recvmsg(fd, &msg, 0); 683 if(rcv == -1) { 684 if(errno != EAGAIN && errno != EINTR 685 && udp_recv_needs_log(errno)) { 686 log_err("recvmsg failed: %s", strerror(errno)); 687 } 688 return; 689 } 690 rep.addrlen = msg.msg_namelen; 691 sldns_buffer_skip(rep.c->buffer, rcv); 692 sldns_buffer_flip(rep.c->buffer); 693 rep.srctype = 0; 694 #ifndef S_SPLINT_S 695 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 696 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 697 if( cmsg->cmsg_level == IPPROTO_IPV6 && 698 cmsg->cmsg_type == IPV6_PKTINFO) { 699 rep.srctype = 6; 700 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 701 sizeof(struct in6_pktinfo)); 702 break; 703 #ifdef IP_PKTINFO 704 } else if( cmsg->cmsg_level == IPPROTO_IP && 705 cmsg->cmsg_type == IP_PKTINFO) { 706 rep.srctype = 4; 707 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 708 sizeof(struct in_pktinfo)); 709 break; 710 #elif defined(IP_RECVDSTADDR) 711 } else if( cmsg->cmsg_level == IPPROTO_IP && 712 cmsg->cmsg_type == IP_RECVDSTADDR) { 713 rep.srctype = 4; 714 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 715 sizeof(struct in_addr)); 716 break; 717 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 718 } 719 } 720 if(verbosity >= VERB_ALGO) 721 p_ancil("receive_udp on interface", &rep); 722 #endif /* S_SPLINT_S */ 723 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 724 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 725 /* send back immediate reply */ 726 (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer, 727 (struct sockaddr*)&rep.addr, rep.addrlen, &rep); 728 } 729 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 730 break; 731 } 732 #else 733 (void)fd; 734 (void)event; 735 (void)arg; 736 fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. " 737 "Please disable interface-automatic"); 738 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 739 } 740 741 void 742 comm_point_udp_callback(int fd, short event, void* arg) 743 { 744 struct comm_reply rep; 745 ssize_t rcv; 746 int i; 747 struct sldns_buffer *buffer; 748 749 rep.c = (struct comm_point*)arg; 750 log_assert(rep.c->type == comm_udp); 751 752 if(!(event&UB_EV_READ)) 753 return; 754 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 755 ub_comm_base_now(rep.c->ev->base); 756 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 757 sldns_buffer_clear(rep.c->buffer); 758 rep.addrlen = (socklen_t)sizeof(rep.addr); 759 log_assert(fd != -1); 760 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 761 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 762 sldns_buffer_remaining(rep.c->buffer), 0, 763 (struct sockaddr*)&rep.addr, &rep.addrlen); 764 if(rcv == -1) { 765 #ifndef USE_WINSOCK 766 if(errno != EAGAIN && errno != EINTR 767 && udp_recv_needs_log(errno)) 768 log_err("recvfrom %d failed: %s", 769 fd, strerror(errno)); 770 #else 771 if(WSAGetLastError() != WSAEINPROGRESS && 772 WSAGetLastError() != WSAECONNRESET && 773 WSAGetLastError()!= WSAEWOULDBLOCK && 774 udp_recv_needs_log(WSAGetLastError())) 775 log_err("recvfrom failed: %s", 776 wsa_strerror(WSAGetLastError())); 777 #endif 778 return; 779 } 780 sldns_buffer_skip(rep.c->buffer, rcv); 781 sldns_buffer_flip(rep.c->buffer); 782 rep.srctype = 0; 783 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 784 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 785 /* send back immediate reply */ 786 #ifdef USE_DNSCRYPT 787 buffer = rep.c->dnscrypt_buffer; 788 #else 789 buffer = rep.c->buffer; 790 #endif 791 (void)comm_point_send_udp_msg(rep.c, buffer, 792 (struct sockaddr*)&rep.addr, rep.addrlen, 0); 793 } 794 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 795 another UDP port. Note rep.c cannot be reused with TCP fd. */ 796 break; 797 } 798 } 799 800 int adjusted_tcp_timeout(struct comm_point* c) 801 { 802 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 803 return TCP_QUERY_TIMEOUT_MINIMUM; 804 return c->tcp_timeout_msec; 805 } 806 807 /** Use a new tcp handler for new query fd, set to read query */ 808 static void 809 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 810 { 811 int handler_usage; 812 log_assert(c->type == comm_tcp || c->type == comm_http); 813 log_assert(c->fd == -1); 814 sldns_buffer_clear(c->buffer); 815 #ifdef USE_DNSCRYPT 816 if (c->dnscrypt) 817 sldns_buffer_clear(c->dnscrypt_buffer); 818 #endif 819 c->tcp_is_reading = 1; 820 c->tcp_byte_count = 0; 821 c->tcp_keepalive = 0; 822 /* if more than half the tcp handlers are in use, use a shorter 823 * timeout for this TCP connection, we need to make space for 824 * other connections to be able to get attention */ 825 /* If > 50% TCP handler structures in use, set timeout to 1/100th 826 * configured value. 827 * If > 65%TCP handler structures in use, set to 1/500th configured 828 * value. 829 * If > 80% TCP handler structures in use, set to 0. 830 * 831 * If the timeout to use falls below 200 milliseconds, an actual 832 * timeout of 200ms is used. 833 */ 834 handler_usage = (cur * 100) / max; 835 if(handler_usage > 50 && handler_usage <= 65) 836 c->tcp_timeout_msec /= 100; 837 else if (handler_usage > 65 && handler_usage <= 80) 838 c->tcp_timeout_msec /= 500; 839 else if (handler_usage > 80) 840 c->tcp_timeout_msec = 0; 841 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 842 } 843 844 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 845 short ATTR_UNUSED(event), void* arg) 846 { 847 struct comm_base* b = (struct comm_base*)arg; 848 /* timeout for the slow accept, re-enable accepts again */ 849 if(b->start_accept) { 850 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 851 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 852 (*b->start_accept)(b->cb_arg); 853 b->eb->slow_accept_enabled = 0; 854 } 855 } 856 857 int comm_point_perform_accept(struct comm_point* c, 858 struct sockaddr_storage* addr, socklen_t* addrlen) 859 { 860 int new_fd; 861 *addrlen = (socklen_t)sizeof(*addr); 862 #ifndef HAVE_ACCEPT4 863 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 864 #else 865 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 866 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 867 #endif 868 if(new_fd == -1) { 869 #ifndef USE_WINSOCK 870 /* EINTR is signal interrupt. others are closed connection. */ 871 if( errno == EINTR || errno == EAGAIN 872 #ifdef EWOULDBLOCK 873 || errno == EWOULDBLOCK 874 #endif 875 #ifdef ECONNABORTED 876 || errno == ECONNABORTED 877 #endif 878 #ifdef EPROTO 879 || errno == EPROTO 880 #endif /* EPROTO */ 881 ) 882 return -1; 883 #if defined(ENFILE) && defined(EMFILE) 884 if(errno == ENFILE || errno == EMFILE) { 885 /* out of file descriptors, likely outside of our 886 * control. stop accept() calls for some time */ 887 if(c->ev->base->stop_accept) { 888 struct comm_base* b = c->ev->base; 889 struct timeval tv; 890 verbose(VERB_ALGO, "out of file descriptors: " 891 "slow accept"); 892 b->eb->slow_accept_enabled = 1; 893 fptr_ok(fptr_whitelist_stop_accept( 894 b->stop_accept)); 895 (*b->stop_accept)(b->cb_arg); 896 /* set timeout, no mallocs */ 897 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 898 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 899 b->eb->slow_accept = ub_event_new(b->eb->base, 900 -1, UB_EV_TIMEOUT, 901 comm_base_handle_slow_accept, b); 902 if(b->eb->slow_accept == NULL) { 903 /* we do not want to log here, because 904 * that would spam the logfiles. 905 * error: "event_base_set failed." */ 906 } 907 else if(ub_event_add(b->eb->slow_accept, &tv) 908 != 0) { 909 /* we do not want to log here, 910 * error: "event_add failed." */ 911 } 912 } 913 return -1; 914 } 915 #endif 916 #else /* USE_WINSOCK */ 917 if(WSAGetLastError() == WSAEINPROGRESS || 918 WSAGetLastError() == WSAECONNRESET) 919 return -1; 920 if(WSAGetLastError() == WSAEWOULDBLOCK) { 921 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 922 return -1; 923 } 924 #endif 925 log_err_addr("accept failed", sock_strerror(errno), addr, 926 *addrlen); 927 return -1; 928 } 929 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 930 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 931 if(!tcl_new_connection(c->tcl_addr)) { 932 if(verbosity >= 3) 933 log_err_addr("accept rejected", 934 "connection limit exceeded", addr, *addrlen); 935 close(new_fd); 936 return -1; 937 } 938 } 939 #ifndef HAVE_ACCEPT4 940 fd_set_nonblock(new_fd); 941 #endif 942 return new_fd; 943 } 944 945 #ifdef USE_WINSOCK 946 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 947 #ifdef HAVE_BIO_SET_CALLBACK_EX 948 size_t ATTR_UNUSED(len), 949 #endif 950 int ATTR_UNUSED(argi), long argl, 951 #ifndef HAVE_BIO_SET_CALLBACK_EX 952 long retvalue 953 #else 954 int retvalue, size_t* ATTR_UNUSED(processed) 955 #endif 956 ) 957 { 958 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 959 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 960 (oper&BIO_CB_RETURN)?"return":"before", 961 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 962 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 963 /* on windows, check if previous operation caused EWOULDBLOCK */ 964 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 965 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 966 if(wsa_err == WSAEWOULDBLOCK) 967 ub_winsock_tcp_wouldblock((struct ub_event*) 968 BIO_get_callback_arg(b), UB_EV_READ); 969 } 970 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 971 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 972 if(wsa_err == WSAEWOULDBLOCK) 973 ub_winsock_tcp_wouldblock((struct ub_event*) 974 BIO_get_callback_arg(b), UB_EV_WRITE); 975 } 976 /* return original return value */ 977 return retvalue; 978 } 979 980 /** set win bio callbacks for nonblocking operations */ 981 void 982 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 983 { 984 SSL* ssl = (SSL*)thessl; 985 /* set them both just in case, but usually they are the same BIO */ 986 #ifdef HAVE_BIO_SET_CALLBACK_EX 987 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 988 #else 989 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 990 #endif 991 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 992 #ifdef HAVE_BIO_SET_CALLBACK_EX 993 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 994 #else 995 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 996 #endif 997 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 998 } 999 #endif 1000 1001 #ifdef HAVE_NGHTTP2 1002 /** Create http2 session server. Per connection, after TCP accepted.*/ 1003 static int http2_session_server_create(struct http2_session* h2_session) 1004 { 1005 log_assert(h2_session->callbacks); 1006 h2_session->is_drop = 0; 1007 if(nghttp2_session_server_new(&h2_session->session, 1008 h2_session->callbacks, 1009 h2_session) == NGHTTP2_ERR_NOMEM) { 1010 log_err("failed to create nghttp2 session server"); 1011 return 0; 1012 } 1013 1014 return 1; 1015 } 1016 1017 /** Submit http2 setting to session. Once per session. */ 1018 static int http2_submit_settings(struct http2_session* h2_session) 1019 { 1020 int ret; 1021 nghttp2_settings_entry settings[1] = { 1022 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1023 h2_session->c->http2_max_streams}}; 1024 1025 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 1026 settings, 1); 1027 if(ret) { 1028 verbose(VERB_QUERY, "http2: submit_settings failed, " 1029 "error: %s", nghttp2_strerror(ret)); 1030 return 0; 1031 } 1032 return 1; 1033 } 1034 #endif /* HAVE_NGHTTP2 */ 1035 1036 1037 void 1038 comm_point_tcp_accept_callback(int fd, short event, void* arg) 1039 { 1040 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 1041 int new_fd; 1042 log_assert(c->type == comm_tcp_accept); 1043 if(!(event & UB_EV_READ)) { 1044 log_info("ignoring tcp accept event %d", (int)event); 1045 return; 1046 } 1047 ub_comm_base_now(c->ev->base); 1048 /* find free tcp handler. */ 1049 if(!c->tcp_free) { 1050 log_warn("accepted too many tcp, connections full"); 1051 return; 1052 } 1053 /* accept incoming connection. */ 1054 c_hdl = c->tcp_free; 1055 /* clear leftover flags from previous use, and then set the 1056 * correct event base for the event structure for libevent */ 1057 ub_event_free(c_hdl->ev->ev); 1058 c_hdl->ev->ev = NULL; 1059 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1060 c_hdl->type == comm_local || c_hdl->type == comm_raw) 1061 c_hdl->tcp_do_toggle_rw = 0; 1062 else c_hdl->tcp_do_toggle_rw = 1; 1063 1064 if(c_hdl->type == comm_http) { 1065 #ifdef HAVE_NGHTTP2 1066 if(!c_hdl->h2_session || 1067 !http2_session_server_create(c_hdl->h2_session)) { 1068 log_warn("failed to create nghttp2"); 1069 return; 1070 } 1071 if(!c_hdl->h2_session || 1072 !http2_submit_settings(c_hdl->h2_session)) { 1073 log_warn("failed to submit http2 settings"); 1074 return; 1075 } 1076 if(!c->ssl) { 1077 c_hdl->tcp_do_toggle_rw = 0; 1078 c_hdl->use_h2 = 1; 1079 } 1080 #endif 1081 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1082 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1083 comm_point_http_handle_callback, c_hdl); 1084 } else { 1085 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1086 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1087 comm_point_tcp_handle_callback, c_hdl); 1088 } 1089 if(!c_hdl->ev->ev) { 1090 log_warn("could not ub_event_new, dropped tcp"); 1091 return; 1092 } 1093 log_assert(fd != -1); 1094 (void)fd; 1095 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr, 1096 &c_hdl->repinfo.addrlen); 1097 if(new_fd == -1) 1098 return; 1099 if(c->ssl) { 1100 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1101 if(!c_hdl->ssl) { 1102 c_hdl->fd = new_fd; 1103 comm_point_close(c_hdl); 1104 return; 1105 } 1106 c_hdl->ssl_shake_state = comm_ssl_shake_read; 1107 #ifdef USE_WINSOCK 1108 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1109 #endif 1110 } 1111 1112 /* grab the tcp handler buffers */ 1113 c->cur_tcp_count++; 1114 c->tcp_free = c_hdl->tcp_free; 1115 c_hdl->tcp_free = NULL; 1116 if(!c->tcp_free) { 1117 /* stop accepting incoming queries for now. */ 1118 comm_point_stop_listening(c); 1119 } 1120 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1121 } 1122 1123 /** Make tcp handler free for next assignment */ 1124 static void 1125 reclaim_tcp_handler(struct comm_point* c) 1126 { 1127 log_assert(c->type == comm_tcp); 1128 if(c->ssl) { 1129 #ifdef HAVE_SSL 1130 SSL_shutdown(c->ssl); 1131 SSL_free(c->ssl); 1132 c->ssl = NULL; 1133 #endif 1134 } 1135 comm_point_close(c); 1136 if(c->tcp_parent) { 1137 if(c != c->tcp_parent->tcp_free) { 1138 c->tcp_parent->cur_tcp_count--; 1139 c->tcp_free = c->tcp_parent->tcp_free; 1140 c->tcp_parent->tcp_free = c; 1141 } 1142 if(!c->tcp_free) { 1143 /* re-enable listening on accept socket */ 1144 comm_point_start_listening(c->tcp_parent, -1, -1); 1145 } 1146 } 1147 c->tcp_more_read_again = NULL; 1148 c->tcp_more_write_again = NULL; 1149 c->tcp_byte_count = 0; 1150 sldns_buffer_clear(c->buffer); 1151 } 1152 1153 /** do the callback when writing is done */ 1154 static void 1155 tcp_callback_writer(struct comm_point* c) 1156 { 1157 log_assert(c->type == comm_tcp); 1158 if(!c->tcp_write_and_read) { 1159 sldns_buffer_clear(c->buffer); 1160 c->tcp_byte_count = 0; 1161 } 1162 if(c->tcp_do_toggle_rw) 1163 c->tcp_is_reading = 1; 1164 /* switch from listening(write) to listening(read) */ 1165 if(c->tcp_req_info) { 1166 tcp_req_info_handle_writedone(c->tcp_req_info); 1167 } else { 1168 comm_point_stop_listening(c); 1169 if(c->tcp_write_and_read) { 1170 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1171 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1172 &c->repinfo) ) { 1173 comm_point_start_listening(c, -1, 1174 adjusted_tcp_timeout(c)); 1175 } 1176 } else { 1177 comm_point_start_listening(c, -1, 1178 adjusted_tcp_timeout(c)); 1179 } 1180 } 1181 } 1182 1183 /** do the callback when reading is done */ 1184 static void 1185 tcp_callback_reader(struct comm_point* c) 1186 { 1187 log_assert(c->type == comm_tcp || c->type == comm_local); 1188 sldns_buffer_flip(c->buffer); 1189 if(c->tcp_do_toggle_rw) 1190 c->tcp_is_reading = 0; 1191 c->tcp_byte_count = 0; 1192 if(c->tcp_req_info) { 1193 tcp_req_info_handle_readdone(c->tcp_req_info); 1194 } else { 1195 if(c->type == comm_tcp) 1196 comm_point_stop_listening(c); 1197 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1198 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 1199 comm_point_start_listening(c, -1, 1200 adjusted_tcp_timeout(c)); 1201 } 1202 } 1203 } 1204 1205 #ifdef HAVE_SSL 1206 /** true if the ssl handshake error has to be squelched from the logs */ 1207 int 1208 squelch_err_ssl_handshake(unsigned long err) 1209 { 1210 if(verbosity >= VERB_QUERY) 1211 return 0; /* only squelch on low verbosity */ 1212 if(ERR_GET_LIB(err) == ERR_LIB_SSL && 1213 (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST || 1214 ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST || 1215 ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER || 1216 ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE 1217 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1218 || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER 1219 #endif 1220 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1221 || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL 1222 || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL 1223 # ifdef SSL_R_VERSION_TOO_LOW 1224 || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW 1225 # endif 1226 #endif 1227 )) 1228 return 1; 1229 return 0; 1230 } 1231 #endif /* HAVE_SSL */ 1232 1233 /** continue ssl handshake */ 1234 #ifdef HAVE_SSL 1235 static int 1236 ssl_handshake(struct comm_point* c) 1237 { 1238 int r; 1239 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1240 /* read condition satisfied back to writing */ 1241 comm_point_listen_for_rw(c, 0, 1); 1242 c->ssl_shake_state = comm_ssl_shake_none; 1243 return 1; 1244 } 1245 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1246 /* write condition satisfied, back to reading */ 1247 comm_point_listen_for_rw(c, 1, 0); 1248 c->ssl_shake_state = comm_ssl_shake_none; 1249 return 1; 1250 } 1251 1252 ERR_clear_error(); 1253 r = SSL_do_handshake(c->ssl); 1254 if(r != 1) { 1255 int want = SSL_get_error(c->ssl, r); 1256 if(want == SSL_ERROR_WANT_READ) { 1257 if(c->ssl_shake_state == comm_ssl_shake_read) 1258 return 1; 1259 c->ssl_shake_state = comm_ssl_shake_read; 1260 comm_point_listen_for_rw(c, 1, 0); 1261 return 1; 1262 } else if(want == SSL_ERROR_WANT_WRITE) { 1263 if(c->ssl_shake_state == comm_ssl_shake_write) 1264 return 1; 1265 c->ssl_shake_state = comm_ssl_shake_write; 1266 comm_point_listen_for_rw(c, 0, 1); 1267 return 1; 1268 } else if(r == 0) { 1269 return 0; /* closed */ 1270 } else if(want == SSL_ERROR_SYSCALL) { 1271 /* SYSCALL and errno==0 means closed uncleanly */ 1272 #ifdef EPIPE 1273 if(errno == EPIPE && verbosity < 2) 1274 return 0; /* silence 'broken pipe' */ 1275 #endif 1276 #ifdef ECONNRESET 1277 if(errno == ECONNRESET && verbosity < 2) 1278 return 0; /* silence reset by peer */ 1279 #endif 1280 if(!tcp_connect_errno_needs_log( 1281 (struct sockaddr*)&c->repinfo.addr, 1282 c->repinfo.addrlen)) 1283 return 0; /* silence connect failures that 1284 show up because after connect this is the 1285 first system call that accesses the socket */ 1286 if(errno != 0) 1287 log_err("SSL_handshake syscall: %s", 1288 strerror(errno)); 1289 return 0; 1290 } else { 1291 unsigned long err = ERR_get_error(); 1292 if(!squelch_err_ssl_handshake(err)) { 1293 log_crypto_err_code("ssl handshake failed", err); 1294 log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr, 1295 c->repinfo.addrlen); 1296 } 1297 return 0; 1298 } 1299 } 1300 /* this is where peer verification could take place */ 1301 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1302 /* verification */ 1303 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1304 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1305 X509* x = SSL_get1_peer_certificate(c->ssl); 1306 #else 1307 X509* x = SSL_get_peer_certificate(c->ssl); 1308 #endif 1309 if(!x) { 1310 log_addr(VERB_ALGO, "SSL connection failed: " 1311 "no certificate", 1312 &c->repinfo.addr, c->repinfo.addrlen); 1313 return 0; 1314 } 1315 log_cert(VERB_ALGO, "peer certificate", x); 1316 #ifdef HAVE_SSL_GET0_PEERNAME 1317 if(SSL_get0_peername(c->ssl)) { 1318 char buf[255]; 1319 snprintf(buf, sizeof(buf), "SSL connection " 1320 "to %s authenticated", 1321 SSL_get0_peername(c->ssl)); 1322 log_addr(VERB_ALGO, buf, &c->repinfo.addr, 1323 c->repinfo.addrlen); 1324 } else { 1325 #endif 1326 log_addr(VERB_ALGO, "SSL connection " 1327 "authenticated", &c->repinfo.addr, 1328 c->repinfo.addrlen); 1329 #ifdef HAVE_SSL_GET0_PEERNAME 1330 } 1331 #endif 1332 X509_free(x); 1333 } else { 1334 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1335 X509* x = SSL_get1_peer_certificate(c->ssl); 1336 #else 1337 X509* x = SSL_get_peer_certificate(c->ssl); 1338 #endif 1339 if(x) { 1340 log_cert(VERB_ALGO, "peer certificate", x); 1341 X509_free(x); 1342 } 1343 log_addr(VERB_ALGO, "SSL connection failed: " 1344 "failed to authenticate", 1345 &c->repinfo.addr, c->repinfo.addrlen); 1346 return 0; 1347 } 1348 } else { 1349 /* unauthenticated, the verify peer flag was not set 1350 * in c->ssl when the ssl object was created from ssl_ctx */ 1351 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr, 1352 c->repinfo.addrlen); 1353 } 1354 1355 #ifdef HAVE_SSL_GET0_ALPN_SELECTED 1356 /* check if http2 use is negotiated */ 1357 if(c->type == comm_http && c->h2_session) { 1358 const unsigned char *alpn; 1359 unsigned int alpnlen = 0; 1360 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 1361 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 1362 /* connection upgraded to HTTP2 */ 1363 c->tcp_do_toggle_rw = 0; 1364 c->use_h2 = 1; 1365 } 1366 } 1367 #endif 1368 1369 /* setup listen rw correctly */ 1370 if(c->tcp_is_reading) { 1371 if(c->ssl_shake_state != comm_ssl_shake_read) 1372 comm_point_listen_for_rw(c, 1, 0); 1373 } else { 1374 comm_point_listen_for_rw(c, 0, 1); 1375 } 1376 c->ssl_shake_state = comm_ssl_shake_none; 1377 return 1; 1378 } 1379 #endif /* HAVE_SSL */ 1380 1381 /** ssl read callback on TCP */ 1382 static int 1383 ssl_handle_read(struct comm_point* c) 1384 { 1385 #ifdef HAVE_SSL 1386 int r; 1387 if(c->ssl_shake_state != comm_ssl_shake_none) { 1388 if(!ssl_handshake(c)) 1389 return 0; 1390 if(c->ssl_shake_state != comm_ssl_shake_none) 1391 return 1; 1392 } 1393 if(c->tcp_byte_count < sizeof(uint16_t)) { 1394 /* read length bytes */ 1395 ERR_clear_error(); 1396 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1397 c->tcp_byte_count), (int)(sizeof(uint16_t) - 1398 c->tcp_byte_count))) <= 0) { 1399 int want = SSL_get_error(c->ssl, r); 1400 if(want == SSL_ERROR_ZERO_RETURN) { 1401 if(c->tcp_req_info) 1402 return tcp_req_info_handle_read_close(c->tcp_req_info); 1403 return 0; /* shutdown, closed */ 1404 } else if(want == SSL_ERROR_WANT_READ) { 1405 #ifdef USE_WINSOCK 1406 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1407 #endif 1408 return 1; /* read more later */ 1409 } else if(want == SSL_ERROR_WANT_WRITE) { 1410 c->ssl_shake_state = comm_ssl_shake_hs_write; 1411 comm_point_listen_for_rw(c, 0, 1); 1412 return 1; 1413 } else if(want == SSL_ERROR_SYSCALL) { 1414 #ifdef ECONNRESET 1415 if(errno == ECONNRESET && verbosity < 2) 1416 return 0; /* silence reset by peer */ 1417 #endif 1418 if(errno != 0) 1419 log_err("SSL_read syscall: %s", 1420 strerror(errno)); 1421 return 0; 1422 } 1423 log_crypto_err("could not SSL_read"); 1424 return 0; 1425 } 1426 c->tcp_byte_count += r; 1427 if(c->tcp_byte_count < sizeof(uint16_t)) 1428 return 1; 1429 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1430 sldns_buffer_capacity(c->buffer)) { 1431 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1432 return 0; 1433 } 1434 sldns_buffer_set_limit(c->buffer, 1435 sldns_buffer_read_u16_at(c->buffer, 0)); 1436 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1437 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1438 return 0; 1439 } 1440 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1441 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 1442 (int)sldns_buffer_limit(c->buffer)); 1443 } 1444 if(sldns_buffer_remaining(c->buffer) > 0) { 1445 ERR_clear_error(); 1446 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 1447 (int)sldns_buffer_remaining(c->buffer)); 1448 if(r <= 0) { 1449 int want = SSL_get_error(c->ssl, r); 1450 if(want == SSL_ERROR_ZERO_RETURN) { 1451 if(c->tcp_req_info) 1452 return tcp_req_info_handle_read_close(c->tcp_req_info); 1453 return 0; /* shutdown, closed */ 1454 } else if(want == SSL_ERROR_WANT_READ) { 1455 #ifdef USE_WINSOCK 1456 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1457 #endif 1458 return 1; /* read more later */ 1459 } else if(want == SSL_ERROR_WANT_WRITE) { 1460 c->ssl_shake_state = comm_ssl_shake_hs_write; 1461 comm_point_listen_for_rw(c, 0, 1); 1462 return 1; 1463 } else if(want == SSL_ERROR_SYSCALL) { 1464 #ifdef ECONNRESET 1465 if(errno == ECONNRESET && verbosity < 2) 1466 return 0; /* silence reset by peer */ 1467 #endif 1468 if(errno != 0) 1469 log_err("SSL_read syscall: %s", 1470 strerror(errno)); 1471 return 0; 1472 } 1473 log_crypto_err("could not SSL_read"); 1474 return 0; 1475 } 1476 sldns_buffer_skip(c->buffer, (ssize_t)r); 1477 } 1478 if(sldns_buffer_remaining(c->buffer) <= 0) { 1479 tcp_callback_reader(c); 1480 } 1481 return 1; 1482 #else 1483 (void)c; 1484 return 0; 1485 #endif /* HAVE_SSL */ 1486 } 1487 1488 /** ssl write callback on TCP */ 1489 static int 1490 ssl_handle_write(struct comm_point* c) 1491 { 1492 #ifdef HAVE_SSL 1493 int r; 1494 if(c->ssl_shake_state != comm_ssl_shake_none) { 1495 if(!ssl_handshake(c)) 1496 return 0; 1497 if(c->ssl_shake_state != comm_ssl_shake_none) 1498 return 1; 1499 } 1500 /* ignore return, if fails we may simply block */ 1501 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 1502 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1503 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 1504 ERR_clear_error(); 1505 if(c->tcp_write_and_read) { 1506 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 1507 /* combine the tcp length and the query for 1508 * write, this emulates writev */ 1509 uint8_t buf[LDNS_RR_BUF_SIZE]; 1510 memmove(buf, &len, sizeof(uint16_t)); 1511 memmove(buf+sizeof(uint16_t), 1512 c->tcp_write_pkt, 1513 c->tcp_write_pkt_len); 1514 r = SSL_write(c->ssl, 1515 (void*)(buf+c->tcp_write_byte_count), 1516 c->tcp_write_pkt_len + 2 - 1517 c->tcp_write_byte_count); 1518 } else { 1519 r = SSL_write(c->ssl, 1520 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1521 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 1522 } 1523 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 1524 LDNS_RR_BUF_SIZE) { 1525 /* combine the tcp length and the query for write, 1526 * this emulates writev */ 1527 uint8_t buf[LDNS_RR_BUF_SIZE]; 1528 memmove(buf, &len, sizeof(uint16_t)); 1529 memmove(buf+sizeof(uint16_t), 1530 sldns_buffer_current(c->buffer), 1531 sldns_buffer_remaining(c->buffer)); 1532 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 1533 (int)(sizeof(uint16_t)+ 1534 sldns_buffer_remaining(c->buffer) 1535 - c->tcp_byte_count)); 1536 } else { 1537 r = SSL_write(c->ssl, 1538 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1539 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 1540 } 1541 if(r <= 0) { 1542 int want = SSL_get_error(c->ssl, r); 1543 if(want == SSL_ERROR_ZERO_RETURN) { 1544 return 0; /* closed */ 1545 } else if(want == SSL_ERROR_WANT_READ) { 1546 c->ssl_shake_state = comm_ssl_shake_hs_read; 1547 comm_point_listen_for_rw(c, 1, 0); 1548 return 1; /* wait for read condition */ 1549 } else if(want == SSL_ERROR_WANT_WRITE) { 1550 #ifdef USE_WINSOCK 1551 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1552 #endif 1553 return 1; /* write more later */ 1554 } else if(want == SSL_ERROR_SYSCALL) { 1555 #ifdef EPIPE 1556 if(errno == EPIPE && verbosity < 2) 1557 return 0; /* silence 'broken pipe' */ 1558 #endif 1559 if(errno != 0) 1560 log_err("SSL_write syscall: %s", 1561 strerror(errno)); 1562 return 0; 1563 } 1564 log_crypto_err("could not SSL_write"); 1565 return 0; 1566 } 1567 if(c->tcp_write_and_read) { 1568 c->tcp_write_byte_count += r; 1569 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1570 return 1; 1571 } else { 1572 c->tcp_byte_count += r; 1573 if(c->tcp_byte_count < sizeof(uint16_t)) 1574 return 1; 1575 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 1576 sizeof(uint16_t)); 1577 } 1578 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1579 tcp_callback_writer(c); 1580 return 1; 1581 } 1582 } 1583 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 1584 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 1585 ERR_clear_error(); 1586 if(c->tcp_write_and_read) { 1587 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 1588 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 1589 } else { 1590 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 1591 (int)sldns_buffer_remaining(c->buffer)); 1592 } 1593 if(r <= 0) { 1594 int want = SSL_get_error(c->ssl, r); 1595 if(want == SSL_ERROR_ZERO_RETURN) { 1596 return 0; /* closed */ 1597 } else if(want == SSL_ERROR_WANT_READ) { 1598 c->ssl_shake_state = comm_ssl_shake_hs_read; 1599 comm_point_listen_for_rw(c, 1, 0); 1600 return 1; /* wait for read condition */ 1601 } else if(want == SSL_ERROR_WANT_WRITE) { 1602 #ifdef USE_WINSOCK 1603 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1604 #endif 1605 return 1; /* write more later */ 1606 } else if(want == SSL_ERROR_SYSCALL) { 1607 #ifdef EPIPE 1608 if(errno == EPIPE && verbosity < 2) 1609 return 0; /* silence 'broken pipe' */ 1610 #endif 1611 if(errno != 0) 1612 log_err("SSL_write syscall: %s", 1613 strerror(errno)); 1614 return 0; 1615 } 1616 log_crypto_err("could not SSL_write"); 1617 return 0; 1618 } 1619 if(c->tcp_write_and_read) { 1620 c->tcp_write_byte_count += r; 1621 } else { 1622 sldns_buffer_skip(c->buffer, (ssize_t)r); 1623 } 1624 1625 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1626 tcp_callback_writer(c); 1627 } 1628 return 1; 1629 #else 1630 (void)c; 1631 return 0; 1632 #endif /* HAVE_SSL */ 1633 } 1634 1635 /** handle ssl tcp connection with dns contents */ 1636 static int 1637 ssl_handle_it(struct comm_point* c, int is_write) 1638 { 1639 /* handle case where renegotiation wants read during write call 1640 * or write during read calls */ 1641 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 1642 return ssl_handle_read(c); 1643 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 1644 return ssl_handle_write(c); 1645 /* handle read events for read operation and write events for a 1646 * write operation */ 1647 else if(!is_write) 1648 return ssl_handle_read(c); 1649 return ssl_handle_write(c); 1650 } 1651 1652 /** Handle tcp reading callback. 1653 * @param fd: file descriptor of socket. 1654 * @param c: comm point to read from into buffer. 1655 * @param short_ok: if true, very short packets are OK (for comm_local). 1656 * @return: 0 on error 1657 */ 1658 static int 1659 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 1660 { 1661 ssize_t r; 1662 log_assert(c->type == comm_tcp || c->type == comm_local); 1663 if(c->ssl) 1664 return ssl_handle_it(c, 0); 1665 if(!c->tcp_is_reading && !c->tcp_write_and_read) 1666 return 0; 1667 1668 log_assert(fd != -1); 1669 if(c->tcp_byte_count < sizeof(uint16_t)) { 1670 /* read length bytes */ 1671 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 1672 sizeof(uint16_t)-c->tcp_byte_count, 0); 1673 if(r == 0) { 1674 if(c->tcp_req_info) 1675 return tcp_req_info_handle_read_close(c->tcp_req_info); 1676 return 0; 1677 } else if(r == -1) { 1678 #ifndef USE_WINSOCK 1679 if(errno == EINTR || errno == EAGAIN) 1680 return 1; 1681 #ifdef ECONNRESET 1682 if(errno == ECONNRESET && verbosity < 2) 1683 return 0; /* silence reset by peer */ 1684 #endif 1685 #ifdef ECONNREFUSED 1686 if(errno == ECONNREFUSED && verbosity < 2) 1687 return 0; /* silence reset by peer */ 1688 #endif 1689 #ifdef ENETUNREACH 1690 if(errno == ENETUNREACH && verbosity < 2) 1691 return 0; /* silence it */ 1692 #endif 1693 #ifdef EHOSTDOWN 1694 if(errno == EHOSTDOWN && verbosity < 2) 1695 return 0; /* silence it */ 1696 #endif 1697 #ifdef EHOSTUNREACH 1698 if(errno == EHOSTUNREACH && verbosity < 2) 1699 return 0; /* silence it */ 1700 #endif 1701 #ifdef ENETDOWN 1702 if(errno == ENETDOWN && verbosity < 2) 1703 return 0; /* silence it */ 1704 #endif 1705 #ifdef EACCES 1706 if(errno == EACCES && verbosity < 2) 1707 return 0; /* silence it */ 1708 #endif 1709 #ifdef ENOTCONN 1710 if(errno == ENOTCONN) { 1711 log_err_addr("read (in tcp s) failed and this could be because TCP Fast Open is enabled [--disable-tfo-client --disable-tfo-server] but does not work", sock_strerror(errno), 1712 &c->repinfo.addr, c->repinfo.addrlen); 1713 return 0; 1714 } 1715 #endif 1716 #else /* USE_WINSOCK */ 1717 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 1718 return 0; 1719 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 1720 return 0; 1721 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 1722 return 0; 1723 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 1724 return 0; 1725 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 1726 return 0; 1727 if(WSAGetLastError() == WSAECONNRESET) 1728 return 0; 1729 if(WSAGetLastError() == WSAEINPROGRESS) 1730 return 1; 1731 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1732 ub_winsock_tcp_wouldblock(c->ev->ev, 1733 UB_EV_READ); 1734 return 1; 1735 } 1736 #endif 1737 log_err_addr("read (in tcp s)", sock_strerror(errno), 1738 &c->repinfo.addr, c->repinfo.addrlen); 1739 return 0; 1740 } 1741 c->tcp_byte_count += r; 1742 if(c->tcp_byte_count != sizeof(uint16_t)) 1743 return 1; 1744 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1745 sldns_buffer_capacity(c->buffer)) { 1746 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 1747 return 0; 1748 } 1749 sldns_buffer_set_limit(c->buffer, 1750 sldns_buffer_read_u16_at(c->buffer, 0)); 1751 if(!short_ok && 1752 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1753 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 1754 return 0; 1755 } 1756 verbose(VERB_ALGO, "Reading tcp query of length %d", 1757 (int)sldns_buffer_limit(c->buffer)); 1758 } 1759 1760 if(sldns_buffer_remaining(c->buffer) == 0) 1761 log_err("in comm_point_tcp_handle_read buffer_remaining is not > 0 as expected, continuing with (harmless) 0 length recv"); 1762 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 1763 sldns_buffer_remaining(c->buffer), 0); 1764 if(r == 0) { 1765 if(c->tcp_req_info) 1766 return tcp_req_info_handle_read_close(c->tcp_req_info); 1767 return 0; 1768 } else if(r == -1) { 1769 #ifndef USE_WINSOCK 1770 if(errno == EINTR || errno == EAGAIN) 1771 return 1; 1772 #else /* USE_WINSOCK */ 1773 if(WSAGetLastError() == WSAECONNRESET) 1774 return 0; 1775 if(WSAGetLastError() == WSAEINPROGRESS) 1776 return 1; 1777 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1778 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1779 return 1; 1780 } 1781 #endif 1782 log_err_addr("read (in tcp r)", sock_strerror(errno), 1783 &c->repinfo.addr, c->repinfo.addrlen); 1784 return 0; 1785 } 1786 sldns_buffer_skip(c->buffer, r); 1787 if(sldns_buffer_remaining(c->buffer) <= 0) { 1788 tcp_callback_reader(c); 1789 } 1790 return 1; 1791 } 1792 1793 /** 1794 * Handle tcp writing callback. 1795 * @param fd: file descriptor of socket. 1796 * @param c: comm point to write buffer out of. 1797 * @return: 0 on error 1798 */ 1799 static int 1800 comm_point_tcp_handle_write(int fd, struct comm_point* c) 1801 { 1802 ssize_t r; 1803 struct sldns_buffer *buffer; 1804 log_assert(c->type == comm_tcp); 1805 #ifdef USE_DNSCRYPT 1806 buffer = c->dnscrypt_buffer; 1807 #else 1808 buffer = c->buffer; 1809 #endif 1810 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 1811 return 0; 1812 log_assert(fd != -1); 1813 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 1814 /* check for pending error from nonblocking connect */ 1815 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 1816 int error = 0; 1817 socklen_t len = (socklen_t)sizeof(error); 1818 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 1819 &len) < 0){ 1820 #ifndef USE_WINSOCK 1821 error = errno; /* on solaris errno is error */ 1822 #else /* USE_WINSOCK */ 1823 error = WSAGetLastError(); 1824 #endif 1825 } 1826 #ifndef USE_WINSOCK 1827 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1828 if(error == EINPROGRESS || error == EWOULDBLOCK) 1829 return 1; /* try again later */ 1830 else 1831 #endif 1832 if(error != 0 && verbosity < 2) 1833 return 0; /* silence lots of chatter in the logs */ 1834 else if(error != 0) { 1835 log_err_addr("tcp connect", strerror(error), 1836 &c->repinfo.addr, c->repinfo.addrlen); 1837 #else /* USE_WINSOCK */ 1838 /* examine error */ 1839 if(error == WSAEINPROGRESS) 1840 return 1; 1841 else if(error == WSAEWOULDBLOCK) { 1842 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1843 return 1; 1844 } else if(error != 0 && verbosity < 2) 1845 return 0; 1846 else if(error != 0) { 1847 log_err_addr("tcp connect", wsa_strerror(error), 1848 &c->repinfo.addr, c->repinfo.addrlen); 1849 #endif /* USE_WINSOCK */ 1850 return 0; 1851 } 1852 } 1853 if(c->ssl) 1854 return ssl_handle_it(c, 1); 1855 1856 #ifdef USE_MSG_FASTOPEN 1857 /* Only try this on first use of a connection that uses tfo, 1858 otherwise fall through to normal write */ 1859 /* Also, TFO support on WINDOWS not implemented at the moment */ 1860 if(c->tcp_do_fastopen == 1) { 1861 /* this form of sendmsg() does both a connect() and send() so need to 1862 look for various flavours of error*/ 1863 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1864 struct msghdr msg; 1865 struct iovec iov[2]; 1866 c->tcp_do_fastopen = 0; 1867 memset(&msg, 0, sizeof(msg)); 1868 if(c->tcp_write_and_read) { 1869 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1870 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1871 iov[1].iov_base = c->tcp_write_pkt; 1872 iov[1].iov_len = c->tcp_write_pkt_len; 1873 } else { 1874 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1875 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1876 iov[1].iov_base = sldns_buffer_begin(buffer); 1877 iov[1].iov_len = sldns_buffer_limit(buffer); 1878 } 1879 log_assert(iov[0].iov_len > 0); 1880 msg.msg_name = &c->repinfo.addr; 1881 msg.msg_namelen = c->repinfo.addrlen; 1882 msg.msg_iov = iov; 1883 msg.msg_iovlen = 2; 1884 r = sendmsg(fd, &msg, MSG_FASTOPEN); 1885 if (r == -1) { 1886 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1887 /* Handshake is underway, maybe because no TFO cookie available. 1888 Come back to write the message*/ 1889 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 1890 return 1; 1891 #endif 1892 if(errno == EINTR || errno == EAGAIN) 1893 return 1; 1894 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 1895 if(errno != EPIPE 1896 #ifdef EOPNOTSUPP 1897 /* if /proc/sys/net/ipv4/tcp_fastopen is 1898 * disabled on Linux, sendmsg may return 1899 * 'Operation not supported', if so 1900 * fallthrough to ordinary connect. */ 1901 && errno != EOPNOTSUPP 1902 #endif 1903 && errno != 0) { 1904 if(verbosity < 2) 1905 return 0; /* silence lots of chatter in the logs */ 1906 log_err_addr("tcp sendmsg", strerror(errno), 1907 &c->repinfo.addr, c->repinfo.addrlen); 1908 return 0; 1909 } 1910 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 1911 /* fallthrough to nonFASTOPEN 1912 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 1913 * we need to perform connect() */ 1914 if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) { 1915 #ifdef EINPROGRESS 1916 if(errno == EINPROGRESS) 1917 return 1; /* wait until connect done*/ 1918 #endif 1919 #ifdef USE_WINSOCK 1920 if(WSAGetLastError() == WSAEINPROGRESS || 1921 WSAGetLastError() == WSAEWOULDBLOCK) 1922 return 1; /* wait until connect done*/ 1923 #endif 1924 if(tcp_connect_errno_needs_log( 1925 (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) { 1926 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 1927 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen); 1928 } 1929 return 0; 1930 } 1931 1932 } else { 1933 if(c->tcp_write_and_read) { 1934 c->tcp_write_byte_count += r; 1935 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1936 return 1; 1937 } else { 1938 c->tcp_byte_count += r; 1939 if(c->tcp_byte_count < sizeof(uint16_t)) 1940 return 1; 1941 sldns_buffer_set_position(buffer, c->tcp_byte_count - 1942 sizeof(uint16_t)); 1943 } 1944 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1945 tcp_callback_writer(c); 1946 return 1; 1947 } 1948 } 1949 } 1950 #endif /* USE_MSG_FASTOPEN */ 1951 1952 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1953 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1954 #ifdef HAVE_WRITEV 1955 struct iovec iov[2]; 1956 if(c->tcp_write_and_read) { 1957 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1958 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1959 iov[1].iov_base = c->tcp_write_pkt; 1960 iov[1].iov_len = c->tcp_write_pkt_len; 1961 } else { 1962 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1963 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1964 iov[1].iov_base = sldns_buffer_begin(buffer); 1965 iov[1].iov_len = sldns_buffer_limit(buffer); 1966 } 1967 log_assert(iov[0].iov_len > 0); 1968 r = writev(fd, iov, 2); 1969 #else /* HAVE_WRITEV */ 1970 if(c->tcp_write_and_read) { 1971 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1972 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 1973 } else { 1974 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1975 sizeof(uint16_t)-c->tcp_byte_count, 0); 1976 } 1977 #endif /* HAVE_WRITEV */ 1978 if(r == -1) { 1979 #ifndef USE_WINSOCK 1980 # ifdef EPIPE 1981 if(errno == EPIPE && verbosity < 2) 1982 return 0; /* silence 'broken pipe' */ 1983 #endif 1984 if(errno == EINTR || errno == EAGAIN) 1985 return 1; 1986 #ifdef ECONNRESET 1987 if(errno == ECONNRESET && verbosity < 2) 1988 return 0; /* silence reset by peer */ 1989 #endif 1990 # ifdef HAVE_WRITEV 1991 log_err_addr("tcp writev", strerror(errno), 1992 &c->repinfo.addr, c->repinfo.addrlen); 1993 # else /* HAVE_WRITEV */ 1994 log_err_addr("tcp send s", strerror(errno), 1995 &c->repinfo.addr, c->repinfo.addrlen); 1996 # endif /* HAVE_WRITEV */ 1997 #else 1998 if(WSAGetLastError() == WSAENOTCONN) 1999 return 1; 2000 if(WSAGetLastError() == WSAEINPROGRESS) 2001 return 1; 2002 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2003 ub_winsock_tcp_wouldblock(c->ev->ev, 2004 UB_EV_WRITE); 2005 return 1; 2006 } 2007 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2008 return 0; /* silence reset by peer */ 2009 log_err_addr("tcp send s", 2010 wsa_strerror(WSAGetLastError()), 2011 &c->repinfo.addr, c->repinfo.addrlen); 2012 #endif 2013 return 0; 2014 } 2015 if(c->tcp_write_and_read) { 2016 c->tcp_write_byte_count += r; 2017 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2018 return 1; 2019 } else { 2020 c->tcp_byte_count += r; 2021 if(c->tcp_byte_count < sizeof(uint16_t)) 2022 return 1; 2023 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2024 sizeof(uint16_t)); 2025 } 2026 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2027 tcp_callback_writer(c); 2028 return 1; 2029 } 2030 } 2031 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 2032 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2033 if(c->tcp_write_and_read) { 2034 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2035 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 2036 } else { 2037 r = send(fd, (void*)sldns_buffer_current(buffer), 2038 sldns_buffer_remaining(buffer), 0); 2039 } 2040 if(r == -1) { 2041 #ifndef USE_WINSOCK 2042 if(errno == EINTR || errno == EAGAIN) 2043 return 1; 2044 #ifdef ECONNRESET 2045 if(errno == ECONNRESET && verbosity < 2) 2046 return 0; /* silence reset by peer */ 2047 #endif 2048 #else 2049 if(WSAGetLastError() == WSAEINPROGRESS) 2050 return 1; 2051 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2052 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2053 return 1; 2054 } 2055 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2056 return 0; /* silence reset by peer */ 2057 #endif 2058 log_err_addr("tcp send r", sock_strerror(errno), 2059 &c->repinfo.addr, c->repinfo.addrlen); 2060 return 0; 2061 } 2062 if(c->tcp_write_and_read) { 2063 c->tcp_write_byte_count += r; 2064 } else { 2065 sldns_buffer_skip(buffer, r); 2066 } 2067 2068 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2069 tcp_callback_writer(c); 2070 } 2071 2072 return 1; 2073 } 2074 2075 /** read again to drain buffers when there could be more to read */ 2076 static void 2077 tcp_req_info_read_again(int fd, struct comm_point* c) 2078 { 2079 while(c->tcp_req_info->read_again) { 2080 int r; 2081 c->tcp_req_info->read_again = 0; 2082 if(c->tcp_is_reading) 2083 r = comm_point_tcp_handle_read(fd, c, 0); 2084 else r = comm_point_tcp_handle_write(fd, c); 2085 if(!r) { 2086 reclaim_tcp_handler(c); 2087 if(!c->tcp_do_close) { 2088 fptr_ok(fptr_whitelist_comm_point( 2089 c->callback)); 2090 (void)(*c->callback)(c, c->cb_arg, 2091 NETEVENT_CLOSED, NULL); 2092 } 2093 return; 2094 } 2095 } 2096 } 2097 2098 /** read again to drain buffers when there could be more to read */ 2099 static void 2100 tcp_more_read_again(int fd, struct comm_point* c) 2101 { 2102 /* if the packet is done, but another one could be waiting on 2103 * the connection, the callback signals this, and we try again */ 2104 /* this continues until the read routines get EAGAIN or so, 2105 * and thus does not call the callback, and the bool is 0 */ 2106 int* moreread = c->tcp_more_read_again; 2107 while(moreread && *moreread) { 2108 *moreread = 0; 2109 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2110 reclaim_tcp_handler(c); 2111 if(!c->tcp_do_close) { 2112 fptr_ok(fptr_whitelist_comm_point( 2113 c->callback)); 2114 (void)(*c->callback)(c, c->cb_arg, 2115 NETEVENT_CLOSED, NULL); 2116 } 2117 return; 2118 } 2119 } 2120 } 2121 2122 /** write again to fill up when there could be more to write */ 2123 static void 2124 tcp_more_write_again(int fd, struct comm_point* c) 2125 { 2126 /* if the packet is done, but another is waiting to be written, 2127 * the callback signals it and we try again. */ 2128 /* this continues until the write routines get EAGAIN or so, 2129 * and thus does not call the callback, and the bool is 0 */ 2130 int* morewrite = c->tcp_more_write_again; 2131 while(morewrite && *morewrite) { 2132 *morewrite = 0; 2133 if(!comm_point_tcp_handle_write(fd, c)) { 2134 reclaim_tcp_handler(c); 2135 if(!c->tcp_do_close) { 2136 fptr_ok(fptr_whitelist_comm_point( 2137 c->callback)); 2138 (void)(*c->callback)(c, c->cb_arg, 2139 NETEVENT_CLOSED, NULL); 2140 } 2141 return; 2142 } 2143 } 2144 } 2145 2146 void 2147 comm_point_tcp_handle_callback(int fd, short event, void* arg) 2148 { 2149 struct comm_point* c = (struct comm_point*)arg; 2150 log_assert(c->type == comm_tcp); 2151 ub_comm_base_now(c->ev->base); 2152 2153 #ifdef USE_DNSCRYPT 2154 /* Initialize if this is a dnscrypt socket */ 2155 if(c->tcp_parent) { 2156 c->dnscrypt = c->tcp_parent->dnscrypt; 2157 } 2158 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 2159 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 2160 if(!c->dnscrypt_buffer) { 2161 log_err("Could not allocate dnscrypt buffer"); 2162 reclaim_tcp_handler(c); 2163 if(!c->tcp_do_close) { 2164 fptr_ok(fptr_whitelist_comm_point( 2165 c->callback)); 2166 (void)(*c->callback)(c, c->cb_arg, 2167 NETEVENT_CLOSED, NULL); 2168 } 2169 return; 2170 } 2171 } 2172 #endif 2173 2174 if(event&UB_EV_TIMEOUT) { 2175 verbose(VERB_QUERY, "tcp took too long, dropped"); 2176 reclaim_tcp_handler(c); 2177 if(!c->tcp_do_close) { 2178 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2179 (void)(*c->callback)(c, c->cb_arg, 2180 NETEVENT_TIMEOUT, NULL); 2181 } 2182 return; 2183 } 2184 if(event&UB_EV_READ 2185 #ifdef USE_MSG_FASTOPEN 2186 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2187 #endif 2188 ) { 2189 int has_tcpq = (c->tcp_req_info != NULL); 2190 int* moreread = c->tcp_more_read_again; 2191 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2192 reclaim_tcp_handler(c); 2193 if(!c->tcp_do_close) { 2194 fptr_ok(fptr_whitelist_comm_point( 2195 c->callback)); 2196 (void)(*c->callback)(c, c->cb_arg, 2197 NETEVENT_CLOSED, NULL); 2198 } 2199 return; 2200 } 2201 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2202 tcp_req_info_read_again(fd, c); 2203 if(moreread && *moreread) 2204 tcp_more_read_again(fd, c); 2205 return; 2206 } 2207 if(event&UB_EV_WRITE) { 2208 int has_tcpq = (c->tcp_req_info != NULL); 2209 int* morewrite = c->tcp_more_write_again; 2210 if(!comm_point_tcp_handle_write(fd, c)) { 2211 reclaim_tcp_handler(c); 2212 if(!c->tcp_do_close) { 2213 fptr_ok(fptr_whitelist_comm_point( 2214 c->callback)); 2215 (void)(*c->callback)(c, c->cb_arg, 2216 NETEVENT_CLOSED, NULL); 2217 } 2218 return; 2219 } 2220 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2221 tcp_req_info_read_again(fd, c); 2222 if(morewrite && *morewrite) 2223 tcp_more_write_again(fd, c); 2224 return; 2225 } 2226 log_err("Ignored event %d for tcphdl.", event); 2227 } 2228 2229 /** Make http handler free for next assignment */ 2230 static void 2231 reclaim_http_handler(struct comm_point* c) 2232 { 2233 log_assert(c->type == comm_http); 2234 if(c->ssl) { 2235 #ifdef HAVE_SSL 2236 SSL_shutdown(c->ssl); 2237 SSL_free(c->ssl); 2238 c->ssl = NULL; 2239 #endif 2240 } 2241 comm_point_close(c); 2242 if(c->tcp_parent) { 2243 if(c != c->tcp_parent->tcp_free) { 2244 c->tcp_parent->cur_tcp_count--; 2245 c->tcp_free = c->tcp_parent->tcp_free; 2246 c->tcp_parent->tcp_free = c; 2247 } 2248 if(!c->tcp_free) { 2249 /* re-enable listening on accept socket */ 2250 comm_point_start_listening(c->tcp_parent, -1, -1); 2251 } 2252 } 2253 } 2254 2255 /** read more data for http (with ssl) */ 2256 static int 2257 ssl_http_read_more(struct comm_point* c) 2258 { 2259 #ifdef HAVE_SSL 2260 int r; 2261 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2262 ERR_clear_error(); 2263 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2264 (int)sldns_buffer_remaining(c->buffer)); 2265 if(r <= 0) { 2266 int want = SSL_get_error(c->ssl, r); 2267 if(want == SSL_ERROR_ZERO_RETURN) { 2268 return 0; /* shutdown, closed */ 2269 } else if(want == SSL_ERROR_WANT_READ) { 2270 return 1; /* read more later */ 2271 } else if(want == SSL_ERROR_WANT_WRITE) { 2272 c->ssl_shake_state = comm_ssl_shake_hs_write; 2273 comm_point_listen_for_rw(c, 0, 1); 2274 return 1; 2275 } else if(want == SSL_ERROR_SYSCALL) { 2276 #ifdef ECONNRESET 2277 if(errno == ECONNRESET && verbosity < 2) 2278 return 0; /* silence reset by peer */ 2279 #endif 2280 if(errno != 0) 2281 log_err("SSL_read syscall: %s", 2282 strerror(errno)); 2283 return 0; 2284 } 2285 log_crypto_err("could not SSL_read"); 2286 return 0; 2287 } 2288 verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 2289 (int)sldns_buffer_position(c->buffer), (int)r); 2290 sldns_buffer_skip(c->buffer, (ssize_t)r); 2291 return 1; 2292 #else 2293 (void)c; 2294 return 0; 2295 #endif /* HAVE_SSL */ 2296 } 2297 2298 /** read more data for http */ 2299 static int 2300 http_read_more(int fd, struct comm_point* c) 2301 { 2302 ssize_t r; 2303 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2304 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2305 sldns_buffer_remaining(c->buffer), 0); 2306 if(r == 0) { 2307 return 0; 2308 } else if(r == -1) { 2309 #ifndef USE_WINSOCK 2310 if(errno == EINTR || errno == EAGAIN) 2311 return 1; 2312 #else /* USE_WINSOCK */ 2313 if(WSAGetLastError() == WSAECONNRESET) 2314 return 0; 2315 if(WSAGetLastError() == WSAEINPROGRESS) 2316 return 1; 2317 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2318 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2319 return 1; 2320 } 2321 #endif 2322 log_err_addr("read (in http r)", sock_strerror(errno), 2323 &c->repinfo.addr, c->repinfo.addrlen); 2324 return 0; 2325 } 2326 verbose(VERB_ALGO, "http read more skip to %d + %d", 2327 (int)sldns_buffer_position(c->buffer), (int)r); 2328 sldns_buffer_skip(c->buffer, r); 2329 return 1; 2330 } 2331 2332 /** return true if http header has been read (one line complete) */ 2333 static int 2334 http_header_done(sldns_buffer* buf) 2335 { 2336 size_t i; 2337 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2338 /* there was a \r before the \n, but we ignore that */ 2339 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 2340 return 1; 2341 } 2342 return 0; 2343 } 2344 2345 /** return character string into buffer for header line, moves buffer 2346 * past that line and puts zero terminator into linefeed-newline */ 2347 static char* 2348 http_header_line(sldns_buffer* buf) 2349 { 2350 char* result = (char*)sldns_buffer_current(buf); 2351 size_t i; 2352 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2353 /* terminate the string on the \r */ 2354 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 2355 sldns_buffer_write_u8_at(buf, i, 0); 2356 /* terminate on the \n and skip past the it and done */ 2357 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 2358 sldns_buffer_write_u8_at(buf, i, 0); 2359 sldns_buffer_set_position(buf, i+1); 2360 return result; 2361 } 2362 } 2363 return NULL; 2364 } 2365 2366 /** move unread buffer to start and clear rest for putting the rest into it */ 2367 static void 2368 http_moveover_buffer(sldns_buffer* buf) 2369 { 2370 size_t pos = sldns_buffer_position(buf); 2371 size_t len = sldns_buffer_remaining(buf); 2372 sldns_buffer_clear(buf); 2373 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 2374 sldns_buffer_set_position(buf, len); 2375 } 2376 2377 /** a http header is complete, process it */ 2378 static int 2379 http_process_initial_header(struct comm_point* c) 2380 { 2381 char* line = http_header_line(c->buffer); 2382 if(!line) return 1; 2383 verbose(VERB_ALGO, "http header: %s", line); 2384 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 2385 /* check returncode */ 2386 if(line[9] != '2') { 2387 verbose(VERB_ALGO, "http bad status %s", line+9); 2388 return 0; 2389 } 2390 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 2391 if(!c->http_is_chunked) 2392 c->tcp_byte_count = (size_t)atoi(line+16); 2393 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 2394 c->tcp_byte_count = 0; 2395 c->http_is_chunked = 1; 2396 } else if(line[0] == 0) { 2397 /* end of initial headers */ 2398 c->http_in_headers = 0; 2399 if(c->http_is_chunked) 2400 c->http_in_chunk_headers = 1; 2401 /* remove header text from front of buffer 2402 * the buffer is going to be used to return the data segment 2403 * itself and we don't want the header to get returned 2404 * prepended with it */ 2405 http_moveover_buffer(c->buffer); 2406 sldns_buffer_flip(c->buffer); 2407 return 1; 2408 } 2409 /* ignore other headers */ 2410 return 1; 2411 } 2412 2413 /** a chunk header is complete, process it, return 0=fail, 1=continue next 2414 * header line, 2=done with chunked transfer*/ 2415 static int 2416 http_process_chunk_header(struct comm_point* c) 2417 { 2418 char* line = http_header_line(c->buffer); 2419 if(!line) return 1; 2420 if(c->http_in_chunk_headers == 3) { 2421 verbose(VERB_ALGO, "http chunk trailer: %s", line); 2422 /* are we done ? */ 2423 if(line[0] == 0 && c->tcp_byte_count == 0) { 2424 /* callback of http reader when NETEVENT_DONE, 2425 * end of data, with no data in buffer */ 2426 sldns_buffer_set_position(c->buffer, 0); 2427 sldns_buffer_set_limit(c->buffer, 0); 2428 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2429 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2430 /* return that we are done */ 2431 return 2; 2432 } 2433 if(line[0] == 0) { 2434 /* continue with header of the next chunk */ 2435 c->http_in_chunk_headers = 1; 2436 /* remove header text from front of buffer */ 2437 http_moveover_buffer(c->buffer); 2438 sldns_buffer_flip(c->buffer); 2439 return 1; 2440 } 2441 /* ignore further trail headers */ 2442 return 1; 2443 } 2444 verbose(VERB_ALGO, "http chunk header: %s", line); 2445 if(c->http_in_chunk_headers == 1) { 2446 /* read chunked start line */ 2447 char* end = NULL; 2448 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 2449 if(end == line) 2450 return 0; 2451 c->http_in_chunk_headers = 0; 2452 /* remove header text from front of buffer */ 2453 http_moveover_buffer(c->buffer); 2454 sldns_buffer_flip(c->buffer); 2455 if(c->tcp_byte_count == 0) { 2456 /* done with chunks, process chunk_trailer lines */ 2457 c->http_in_chunk_headers = 3; 2458 } 2459 return 1; 2460 } 2461 /* ignore other headers */ 2462 return 1; 2463 } 2464 2465 /** handle nonchunked data segment, 0=fail, 1=wait */ 2466 static int 2467 http_nonchunk_segment(struct comm_point* c) 2468 { 2469 /* c->buffer at position..limit has new data we read in. 2470 * the buffer itself is full of nonchunked data. 2471 * we are looking to read tcp_byte_count more data 2472 * and then the transfer is done. */ 2473 size_t remainbufferlen; 2474 size_t got_now = sldns_buffer_limit(c->buffer); 2475 if(c->tcp_byte_count <= got_now) { 2476 /* done, this is the last data fragment */ 2477 c->http_stored = 0; 2478 sldns_buffer_set_position(c->buffer, 0); 2479 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2480 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2481 return 1; 2482 } 2483 /* if we have the buffer space, 2484 * read more data collected into the buffer */ 2485 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2486 sldns_buffer_limit(c->buffer); 2487 if(remainbufferlen+got_now >= c->tcp_byte_count || 2488 remainbufferlen >= (size_t)(c->ssl?16384:2048)) { 2489 size_t total = sldns_buffer_limit(c->buffer); 2490 sldns_buffer_clear(c->buffer); 2491 sldns_buffer_set_position(c->buffer, total); 2492 c->http_stored = total; 2493 /* return and wait to read more */ 2494 return 1; 2495 } 2496 /* call callback with this data amount, then 2497 * wait for more */ 2498 c->tcp_byte_count -= got_now; 2499 c->http_stored = 0; 2500 sldns_buffer_set_position(c->buffer, 0); 2501 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2502 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2503 /* c->callback has to buffer_clear(c->buffer). */ 2504 /* return and wait to read more */ 2505 return 1; 2506 } 2507 2508 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 2509 static int 2510 http_chunked_segment(struct comm_point* c) 2511 { 2512 /* the c->buffer has from position..limit new data we read. */ 2513 /* the current chunk has length tcp_byte_count. 2514 * once we read that read more chunk headers. 2515 */ 2516 size_t remainbufferlen; 2517 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 2518 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 2519 if(c->tcp_byte_count <= got_now) { 2520 /* the chunk has completed (with perhaps some extra data 2521 * from next chunk header and next chunk) */ 2522 /* save too much info into temp buffer */ 2523 size_t fraglen; 2524 struct comm_reply repinfo; 2525 c->http_stored = 0; 2526 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 2527 sldns_buffer_clear(c->http_temp); 2528 sldns_buffer_write(c->http_temp, 2529 sldns_buffer_current(c->buffer), 2530 sldns_buffer_remaining(c->buffer)); 2531 sldns_buffer_flip(c->http_temp); 2532 2533 /* callback with this fragment */ 2534 fraglen = sldns_buffer_position(c->buffer); 2535 sldns_buffer_set_position(c->buffer, 0); 2536 sldns_buffer_set_limit(c->buffer, fraglen); 2537 repinfo = c->repinfo; 2538 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2539 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 2540 /* c->callback has to buffer_clear(). */ 2541 2542 /* is commpoint deleted? */ 2543 if(!repinfo.c) { 2544 return 1; 2545 } 2546 /* copy waiting info */ 2547 sldns_buffer_clear(c->buffer); 2548 sldns_buffer_write(c->buffer, 2549 sldns_buffer_begin(c->http_temp), 2550 sldns_buffer_remaining(c->http_temp)); 2551 sldns_buffer_flip(c->buffer); 2552 /* process end of chunk trailer header lines, until 2553 * an empty line */ 2554 c->http_in_chunk_headers = 3; 2555 /* process more data in buffer (if any) */ 2556 return 2; 2557 } 2558 c->tcp_byte_count -= got_now; 2559 2560 /* if we have the buffer space, 2561 * read more data collected into the buffer */ 2562 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2563 sldns_buffer_limit(c->buffer); 2564 if(remainbufferlen >= c->tcp_byte_count || 2565 remainbufferlen >= 2048) { 2566 size_t total = sldns_buffer_limit(c->buffer); 2567 sldns_buffer_clear(c->buffer); 2568 sldns_buffer_set_position(c->buffer, total); 2569 c->http_stored = total; 2570 /* return and wait to read more */ 2571 return 1; 2572 } 2573 2574 /* callback of http reader for a new part of the data */ 2575 c->http_stored = 0; 2576 sldns_buffer_set_position(c->buffer, 0); 2577 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2578 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2579 /* c->callback has to buffer_clear(c->buffer). */ 2580 /* return and wait to read more */ 2581 return 1; 2582 } 2583 2584 #ifdef HAVE_NGHTTP2 2585 /** Create new http2 session. Called when creating handling comm point. */ 2586 static struct http2_session* http2_session_create(struct comm_point* c) 2587 { 2588 struct http2_session* session = calloc(1, sizeof(*session)); 2589 if(!session) { 2590 log_err("malloc failure while creating http2 session"); 2591 return NULL; 2592 } 2593 session->c = c; 2594 2595 return session; 2596 } 2597 #endif 2598 2599 /** Delete http2 session. After closing connection or on error */ 2600 static void http2_session_delete(struct http2_session* h2_session) 2601 { 2602 #ifdef HAVE_NGHTTP2 2603 if(h2_session->callbacks) 2604 nghttp2_session_callbacks_del(h2_session->callbacks); 2605 free(h2_session); 2606 #else 2607 (void)h2_session; 2608 #endif 2609 } 2610 2611 #ifdef HAVE_NGHTTP2 2612 struct http2_stream* http2_stream_create(int32_t stream_id) 2613 { 2614 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 2615 if(!h2_stream) { 2616 log_err("malloc failure while creating http2 stream"); 2617 return NULL; 2618 } 2619 h2_stream->stream_id = stream_id; 2620 return h2_stream; 2621 } 2622 2623 /** Delete http2 stream. After session delete or stream close callback */ 2624 static void http2_stream_delete(struct http2_session* h2_session, 2625 struct http2_stream* h2_stream) 2626 { 2627 if(h2_stream->mesh_state) { 2628 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 2629 h2_session->c); 2630 h2_stream->mesh_state = NULL; 2631 } 2632 http2_req_stream_clear(h2_stream); 2633 free(h2_stream); 2634 } 2635 #endif 2636 2637 void http2_stream_add_meshstate(struct http2_stream* h2_stream, 2638 struct mesh_area* mesh, struct mesh_state* m) 2639 { 2640 h2_stream->mesh = mesh; 2641 h2_stream->mesh_state = m; 2642 } 2643 2644 /** delete http2 session server. After closing connection. */ 2645 static void http2_session_server_delete(struct http2_session* h2_session) 2646 { 2647 #ifdef HAVE_NGHTTP2 2648 struct http2_stream* h2_stream, *next; 2649 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 2650 h2_session->session = NULL; 2651 for(h2_stream = h2_session->first_stream; h2_stream;) { 2652 next = h2_stream->next; 2653 http2_stream_delete(h2_session, h2_stream); 2654 h2_stream = next; 2655 } 2656 h2_session->first_stream = NULL; 2657 h2_session->is_drop = 0; 2658 h2_session->postpone_drop = 0; 2659 h2_session->c->h2_stream = NULL; 2660 #endif 2661 (void)h2_session; 2662 } 2663 2664 #ifdef HAVE_NGHTTP2 2665 void http2_session_add_stream(struct http2_session* h2_session, 2666 struct http2_stream* h2_stream) 2667 { 2668 if(h2_session->first_stream) 2669 h2_session->first_stream->prev = h2_stream; 2670 h2_stream->next = h2_session->first_stream; 2671 h2_session->first_stream = h2_stream; 2672 } 2673 2674 /** remove stream from session linked list. After stream close callback or 2675 * closing connection */ 2676 static void http2_session_remove_stream(struct http2_session* h2_session, 2677 struct http2_stream* h2_stream) 2678 { 2679 if(h2_stream->prev) 2680 h2_stream->prev->next = h2_stream->next; 2681 else 2682 h2_session->first_stream = h2_stream->next; 2683 if(h2_stream->next) 2684 h2_stream->next->prev = h2_stream->prev; 2685 2686 } 2687 2688 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 2689 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 2690 { 2691 struct http2_stream* h2_stream; 2692 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2693 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2694 h2_session->session, stream_id))) { 2695 return 0; 2696 } 2697 http2_session_remove_stream(h2_session, h2_stream); 2698 http2_stream_delete(h2_session, h2_stream); 2699 return 0; 2700 } 2701 2702 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 2703 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 2704 { 2705 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2706 ssize_t ret; 2707 2708 log_assert(h2_session->c->type == comm_http); 2709 log_assert(h2_session->c->h2_session); 2710 2711 #ifdef HAVE_SSL 2712 if(h2_session->c->ssl) { 2713 int r; 2714 ERR_clear_error(); 2715 r = SSL_read(h2_session->c->ssl, buf, len); 2716 if(r <= 0) { 2717 int want = SSL_get_error(h2_session->c->ssl, r); 2718 if(want == SSL_ERROR_ZERO_RETURN) { 2719 return NGHTTP2_ERR_EOF; 2720 } else if(want == SSL_ERROR_WANT_READ) { 2721 return NGHTTP2_ERR_WOULDBLOCK; 2722 } else if(want == SSL_ERROR_WANT_WRITE) { 2723 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 2724 comm_point_listen_for_rw(h2_session->c, 0, 1); 2725 return NGHTTP2_ERR_WOULDBLOCK; 2726 } else if(want == SSL_ERROR_SYSCALL) { 2727 #ifdef ECONNRESET 2728 if(errno == ECONNRESET && verbosity < 2) 2729 return NGHTTP2_ERR_CALLBACK_FAILURE; 2730 #endif 2731 if(errno != 0) 2732 log_err("SSL_read syscall: %s", 2733 strerror(errno)); 2734 return NGHTTP2_ERR_CALLBACK_FAILURE; 2735 } 2736 log_crypto_err("could not SSL_read"); 2737 return NGHTTP2_ERR_CALLBACK_FAILURE; 2738 } 2739 return r; 2740 } 2741 #endif /* HAVE_SSL */ 2742 2743 ret = recv(h2_session->c->fd, buf, len, 0); 2744 if(ret == 0) { 2745 return NGHTTP2_ERR_EOF; 2746 } else if(ret < 0) { 2747 #ifndef USE_WINSOCK 2748 if(errno == EINTR || errno == EAGAIN) 2749 return NGHTTP2_ERR_WOULDBLOCK; 2750 #ifdef ECONNRESET 2751 if(errno == ECONNRESET && verbosity < 2) 2752 return NGHTTP2_ERR_CALLBACK_FAILURE; 2753 #endif 2754 log_err_addr("could not http2 recv: %s", strerror(errno), 2755 &h2_session->c->repinfo.addr, 2756 h2_session->c->repinfo.addrlen); 2757 #else /* USE_WINSOCK */ 2758 if(WSAGetLastError() == WSAECONNRESET) 2759 return NGHTTP2_ERR_CALLBACK_FAILURE; 2760 if(WSAGetLastError() == WSAEINPROGRESS) 2761 return NGHTTP2_ERR_WOULDBLOCK; 2762 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2763 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 2764 UB_EV_READ); 2765 return NGHTTP2_ERR_WOULDBLOCK; 2766 } 2767 log_err_addr("could not http2 recv: %s", 2768 wsa_strerror(WSAGetLastError()), 2769 &h2_session->c->repinfo.addr, 2770 h2_session->c->repinfo.addrlen); 2771 #endif 2772 return NGHTTP2_ERR_CALLBACK_FAILURE; 2773 } 2774 return ret; 2775 } 2776 #endif /* HAVE_NGHTTP2 */ 2777 2778 /** Handle http2 read */ 2779 static int 2780 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 2781 { 2782 #ifdef HAVE_NGHTTP2 2783 int ret; 2784 log_assert(c->h2_session); 2785 2786 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 2787 ret = nghttp2_session_recv(c->h2_session->session); 2788 if(ret) { 2789 if(ret != NGHTTP2_ERR_EOF && 2790 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 2791 char a[256]; 2792 addr_to_str(&c->repinfo.addr, c->repinfo.addrlen, 2793 a, sizeof(a)); 2794 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 2795 "error: %s", a, nghttp2_strerror(ret)); 2796 } 2797 return 0; 2798 } 2799 if(nghttp2_session_want_write(c->h2_session->session)) { 2800 c->tcp_is_reading = 0; 2801 comm_point_stop_listening(c); 2802 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2803 } else if(!nghttp2_session_want_read(c->h2_session->session)) 2804 return 0; /* connection can be closed */ 2805 return 1; 2806 #else 2807 (void)c; 2808 return 0; 2809 #endif 2810 } 2811 2812 /** 2813 * Handle http reading callback. 2814 * @param fd: file descriptor of socket. 2815 * @param c: comm point to read from into buffer. 2816 * @return: 0 on error 2817 */ 2818 static int 2819 comm_point_http_handle_read(int fd, struct comm_point* c) 2820 { 2821 log_assert(c->type == comm_http); 2822 log_assert(fd != -1); 2823 2824 /* if we are in ssl handshake, handle SSL handshake */ 2825 #ifdef HAVE_SSL 2826 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 2827 if(!ssl_handshake(c)) 2828 return 0; 2829 if(c->ssl_shake_state != comm_ssl_shake_none) 2830 return 1; 2831 } 2832 #endif /* HAVE_SSL */ 2833 2834 if(!c->tcp_is_reading) 2835 return 1; 2836 2837 if(c->use_h2) { 2838 return comm_point_http2_handle_read(fd, c); 2839 } 2840 2841 /* http version is <= http/1.1 */ 2842 2843 if(c->http_min_version >= http_version_2) { 2844 /* HTTP/2 failed, not allowed to use lower version. */ 2845 return 0; 2846 } 2847 2848 /* read more data */ 2849 if(c->ssl) { 2850 if(!ssl_http_read_more(c)) 2851 return 0; 2852 } else { 2853 if(!http_read_more(fd, c)) 2854 return 0; 2855 } 2856 2857 if(c->http_stored >= sldns_buffer_position(c->buffer)) { 2858 /* read did not work but we wanted more data, there is 2859 * no bytes to process now. */ 2860 return 1; 2861 } 2862 sldns_buffer_flip(c->buffer); 2863 /* if we are partway in a segment of data, position us at the point 2864 * where we left off previously */ 2865 if(c->http_stored < sldns_buffer_limit(c->buffer)) 2866 sldns_buffer_set_position(c->buffer, c->http_stored); 2867 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 2868 2869 while(sldns_buffer_remaining(c->buffer) > 0) { 2870 /* Handle HTTP/1.x data */ 2871 /* if we are reading headers, read more headers */ 2872 if(c->http_in_headers || c->http_in_chunk_headers) { 2873 /* if header is done, process the header */ 2874 if(!http_header_done(c->buffer)) { 2875 /* copy remaining data to front of buffer 2876 * and set rest for writing into it */ 2877 http_moveover_buffer(c->buffer); 2878 /* return and wait to read more */ 2879 return 1; 2880 } 2881 if(!c->http_in_chunk_headers) { 2882 /* process initial headers */ 2883 if(!http_process_initial_header(c)) 2884 return 0; 2885 } else { 2886 /* process chunk headers */ 2887 int r = http_process_chunk_header(c); 2888 if(r == 0) return 0; 2889 if(r == 2) return 1; /* done */ 2890 /* r == 1, continue */ 2891 } 2892 /* see if we have more to process */ 2893 continue; 2894 } 2895 2896 if(!c->http_is_chunked) { 2897 /* if we are reading nonchunks, process that*/ 2898 return http_nonchunk_segment(c); 2899 } else { 2900 /* if we are reading chunks, read the chunk */ 2901 int r = http_chunked_segment(c); 2902 if(r == 0) return 0; 2903 if(r == 1) return 1; 2904 continue; 2905 } 2906 } 2907 /* broke out of the loop; could not process header instead need 2908 * to read more */ 2909 /* moveover any remaining data and read more data */ 2910 http_moveover_buffer(c->buffer); 2911 /* return and wait to read more */ 2912 return 1; 2913 } 2914 2915 /** check pending connect for http */ 2916 static int 2917 http_check_connect(int fd, struct comm_point* c) 2918 { 2919 /* check for pending error from nonblocking connect */ 2920 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2921 int error = 0; 2922 socklen_t len = (socklen_t)sizeof(error); 2923 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2924 &len) < 0){ 2925 #ifndef USE_WINSOCK 2926 error = errno; /* on solaris errno is error */ 2927 #else /* USE_WINSOCK */ 2928 error = WSAGetLastError(); 2929 #endif 2930 } 2931 #ifndef USE_WINSOCK 2932 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2933 if(error == EINPROGRESS || error == EWOULDBLOCK) 2934 return 1; /* try again later */ 2935 else 2936 #endif 2937 if(error != 0 && verbosity < 2) 2938 return 0; /* silence lots of chatter in the logs */ 2939 else if(error != 0) { 2940 log_err_addr("http connect", strerror(error), 2941 &c->repinfo.addr, c->repinfo.addrlen); 2942 #else /* USE_WINSOCK */ 2943 /* examine error */ 2944 if(error == WSAEINPROGRESS) 2945 return 1; 2946 else if(error == WSAEWOULDBLOCK) { 2947 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2948 return 1; 2949 } else if(error != 0 && verbosity < 2) 2950 return 0; 2951 else if(error != 0) { 2952 log_err_addr("http connect", wsa_strerror(error), 2953 &c->repinfo.addr, c->repinfo.addrlen); 2954 #endif /* USE_WINSOCK */ 2955 return 0; 2956 } 2957 /* keep on processing this socket */ 2958 return 2; 2959 } 2960 2961 /** write more data for http (with ssl) */ 2962 static int 2963 ssl_http_write_more(struct comm_point* c) 2964 { 2965 #ifdef HAVE_SSL 2966 int r; 2967 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2968 ERR_clear_error(); 2969 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 2970 (int)sldns_buffer_remaining(c->buffer)); 2971 if(r <= 0) { 2972 int want = SSL_get_error(c->ssl, r); 2973 if(want == SSL_ERROR_ZERO_RETURN) { 2974 return 0; /* closed */ 2975 } else if(want == SSL_ERROR_WANT_READ) { 2976 c->ssl_shake_state = comm_ssl_shake_hs_read; 2977 comm_point_listen_for_rw(c, 1, 0); 2978 return 1; /* wait for read condition */ 2979 } else if(want == SSL_ERROR_WANT_WRITE) { 2980 return 1; /* write more later */ 2981 } else if(want == SSL_ERROR_SYSCALL) { 2982 #ifdef EPIPE 2983 if(errno == EPIPE && verbosity < 2) 2984 return 0; /* silence 'broken pipe' */ 2985 #endif 2986 if(errno != 0) 2987 log_err("SSL_write syscall: %s", 2988 strerror(errno)); 2989 return 0; 2990 } 2991 log_crypto_err("could not SSL_write"); 2992 return 0; 2993 } 2994 sldns_buffer_skip(c->buffer, (ssize_t)r); 2995 return 1; 2996 #else 2997 (void)c; 2998 return 0; 2999 #endif /* HAVE_SSL */ 3000 } 3001 3002 /** write more data for http */ 3003 static int 3004 http_write_more(int fd, struct comm_point* c) 3005 { 3006 ssize_t r; 3007 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3008 r = send(fd, (void*)sldns_buffer_current(c->buffer), 3009 sldns_buffer_remaining(c->buffer), 0); 3010 if(r == -1) { 3011 #ifndef USE_WINSOCK 3012 if(errno == EINTR || errno == EAGAIN) 3013 return 1; 3014 #else 3015 if(WSAGetLastError() == WSAEINPROGRESS) 3016 return 1; 3017 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3018 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3019 return 1; 3020 } 3021 #endif 3022 log_err_addr("http send r", sock_strerror(errno), 3023 &c->repinfo.addr, c->repinfo.addrlen); 3024 return 0; 3025 } 3026 sldns_buffer_skip(c->buffer, r); 3027 return 1; 3028 } 3029 3030 #ifdef HAVE_NGHTTP2 3031 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 3032 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3033 { 3034 ssize_t ret; 3035 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3036 log_assert(h2_session->c->type == comm_http); 3037 log_assert(h2_session->c->h2_session); 3038 3039 #ifdef HAVE_SSL 3040 if(h2_session->c->ssl) { 3041 int r; 3042 ERR_clear_error(); 3043 r = SSL_write(h2_session->c->ssl, buf, len); 3044 if(r <= 0) { 3045 int want = SSL_get_error(h2_session->c->ssl, r); 3046 if(want == SSL_ERROR_ZERO_RETURN) { 3047 return NGHTTP2_ERR_CALLBACK_FAILURE; 3048 } else if(want == SSL_ERROR_WANT_READ) { 3049 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 3050 comm_point_listen_for_rw(h2_session->c, 1, 0); 3051 return NGHTTP2_ERR_WOULDBLOCK; 3052 } else if(want == SSL_ERROR_WANT_WRITE) { 3053 return NGHTTP2_ERR_WOULDBLOCK; 3054 } else if(want == SSL_ERROR_SYSCALL) { 3055 #ifdef EPIPE 3056 if(errno == EPIPE && verbosity < 2) 3057 return NGHTTP2_ERR_CALLBACK_FAILURE; 3058 #endif 3059 if(errno != 0) 3060 log_err("SSL_write syscall: %s", 3061 strerror(errno)); 3062 return NGHTTP2_ERR_CALLBACK_FAILURE; 3063 } 3064 log_crypto_err("could not SSL_write"); 3065 return NGHTTP2_ERR_CALLBACK_FAILURE; 3066 } 3067 return r; 3068 } 3069 #endif /* HAVE_SSL */ 3070 3071 ret = send(h2_session->c->fd, buf, len, 0); 3072 if(ret == 0) { 3073 return NGHTTP2_ERR_CALLBACK_FAILURE; 3074 } else if(ret < 0) { 3075 #ifndef USE_WINSOCK 3076 if(errno == EINTR || errno == EAGAIN) 3077 return NGHTTP2_ERR_WOULDBLOCK; 3078 #ifdef EPIPE 3079 if(errno == EPIPE && verbosity < 2) 3080 return NGHTTP2_ERR_CALLBACK_FAILURE; 3081 #endif 3082 #ifdef ECONNRESET 3083 if(errno == ECONNRESET && verbosity < 2) 3084 return NGHTTP2_ERR_CALLBACK_FAILURE; 3085 #endif 3086 log_err_addr("could not http2 write: %s", strerror(errno), 3087 &h2_session->c->repinfo.addr, 3088 h2_session->c->repinfo.addrlen); 3089 #else /* USE_WINSOCK */ 3090 if(WSAGetLastError() == WSAENOTCONN) 3091 return NGHTTP2_ERR_WOULDBLOCK; 3092 if(WSAGetLastError() == WSAEINPROGRESS) 3093 return NGHTTP2_ERR_WOULDBLOCK; 3094 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3095 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3096 UB_EV_WRITE); 3097 return NGHTTP2_ERR_WOULDBLOCK; 3098 } 3099 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 3100 return NGHTTP2_ERR_CALLBACK_FAILURE; 3101 log_err_addr("could not http2 write: %s", 3102 wsa_strerror(WSAGetLastError()), 3103 &h2_session->c->repinfo.addr, 3104 h2_session->c->repinfo.addrlen); 3105 #endif 3106 return NGHTTP2_ERR_CALLBACK_FAILURE; 3107 } 3108 return ret; 3109 } 3110 #endif /* HAVE_NGHTTP2 */ 3111 3112 /** Handle http2 writing */ 3113 static int 3114 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 3115 { 3116 #ifdef HAVE_NGHTTP2 3117 int ret; 3118 log_assert(c->h2_session); 3119 3120 ret = nghttp2_session_send(c->h2_session->session); 3121 if(ret) { 3122 verbose(VERB_QUERY, "http2: session_send failed, " 3123 "error: %s", nghttp2_strerror(ret)); 3124 return 0; 3125 } 3126 3127 if(nghttp2_session_want_read(c->h2_session->session)) { 3128 c->tcp_is_reading = 1; 3129 comm_point_stop_listening(c); 3130 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3131 } else if(!nghttp2_session_want_write(c->h2_session->session)) 3132 return 0; /* connection can be closed */ 3133 return 1; 3134 #else 3135 (void)c; 3136 return 0; 3137 #endif 3138 } 3139 3140 /** 3141 * Handle http writing callback. 3142 * @param fd: file descriptor of socket. 3143 * @param c: comm point to write buffer out of. 3144 * @return: 0 on error 3145 */ 3146 static int 3147 comm_point_http_handle_write(int fd, struct comm_point* c) 3148 { 3149 log_assert(c->type == comm_http); 3150 log_assert(fd != -1); 3151 3152 /* check pending connect errors, if that fails, we wait for more, 3153 * or we can continue to write contents */ 3154 if(c->tcp_check_nb_connect) { 3155 int r = http_check_connect(fd, c); 3156 if(r == 0) return 0; 3157 if(r == 1) return 1; 3158 c->tcp_check_nb_connect = 0; 3159 } 3160 /* if we are in ssl handshake, handle SSL handshake */ 3161 #ifdef HAVE_SSL 3162 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3163 if(!ssl_handshake(c)) 3164 return 0; 3165 if(c->ssl_shake_state != comm_ssl_shake_none) 3166 return 1; 3167 } 3168 #endif /* HAVE_SSL */ 3169 if(c->tcp_is_reading) 3170 return 1; 3171 3172 if(c->use_h2) { 3173 return comm_point_http2_handle_write(fd, c); 3174 } 3175 3176 /* http version is <= http/1.1 */ 3177 3178 if(c->http_min_version >= http_version_2) { 3179 /* HTTP/2 failed, not allowed to use lower version. */ 3180 return 0; 3181 } 3182 3183 /* if we are writing, write more */ 3184 if(c->ssl) { 3185 if(!ssl_http_write_more(c)) 3186 return 0; 3187 } else { 3188 if(!http_write_more(fd, c)) 3189 return 0; 3190 } 3191 3192 /* we write a single buffer contents, that can contain 3193 * the http request, and then flip to read the results */ 3194 /* see if write is done */ 3195 if(sldns_buffer_remaining(c->buffer) == 0) { 3196 sldns_buffer_clear(c->buffer); 3197 if(c->tcp_do_toggle_rw) 3198 c->tcp_is_reading = 1; 3199 c->tcp_byte_count = 0; 3200 /* switch from listening(write) to listening(read) */ 3201 comm_point_stop_listening(c); 3202 comm_point_start_listening(c, -1, -1); 3203 } 3204 return 1; 3205 } 3206 3207 void 3208 comm_point_http_handle_callback(int fd, short event, void* arg) 3209 { 3210 struct comm_point* c = (struct comm_point*)arg; 3211 log_assert(c->type == comm_http); 3212 ub_comm_base_now(c->ev->base); 3213 3214 if(event&UB_EV_TIMEOUT) { 3215 verbose(VERB_QUERY, "http took too long, dropped"); 3216 reclaim_http_handler(c); 3217 if(!c->tcp_do_close) { 3218 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3219 (void)(*c->callback)(c, c->cb_arg, 3220 NETEVENT_TIMEOUT, NULL); 3221 } 3222 return; 3223 } 3224 if(event&UB_EV_READ) { 3225 if(!comm_point_http_handle_read(fd, c)) { 3226 reclaim_http_handler(c); 3227 if(!c->tcp_do_close) { 3228 fptr_ok(fptr_whitelist_comm_point( 3229 c->callback)); 3230 (void)(*c->callback)(c, c->cb_arg, 3231 NETEVENT_CLOSED, NULL); 3232 } 3233 } 3234 return; 3235 } 3236 if(event&UB_EV_WRITE) { 3237 if(!comm_point_http_handle_write(fd, c)) { 3238 reclaim_http_handler(c); 3239 if(!c->tcp_do_close) { 3240 fptr_ok(fptr_whitelist_comm_point( 3241 c->callback)); 3242 (void)(*c->callback)(c, c->cb_arg, 3243 NETEVENT_CLOSED, NULL); 3244 } 3245 } 3246 return; 3247 } 3248 log_err("Ignored event %d for httphdl.", event); 3249 } 3250 3251 void comm_point_local_handle_callback(int fd, short event, void* arg) 3252 { 3253 struct comm_point* c = (struct comm_point*)arg; 3254 log_assert(c->type == comm_local); 3255 ub_comm_base_now(c->ev->base); 3256 3257 if(event&UB_EV_READ) { 3258 if(!comm_point_tcp_handle_read(fd, c, 1)) { 3259 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3260 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3261 NULL); 3262 } 3263 return; 3264 } 3265 log_err("Ignored event %d for localhdl.", event); 3266 } 3267 3268 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3269 short event, void* arg) 3270 { 3271 struct comm_point* c = (struct comm_point*)arg; 3272 int err = NETEVENT_NOERROR; 3273 log_assert(c->type == comm_raw); 3274 ub_comm_base_now(c->ev->base); 3275 3276 if(event&UB_EV_TIMEOUT) 3277 err = NETEVENT_TIMEOUT; 3278 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3279 (void)(*c->callback)(c, c->cb_arg, err, NULL); 3280 } 3281 3282 struct comm_point* 3283 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 3284 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3285 { 3286 struct comm_point* c = (struct comm_point*)calloc(1, 3287 sizeof(struct comm_point)); 3288 short evbits; 3289 if(!c) 3290 return NULL; 3291 c->ev = (struct internal_event*)calloc(1, 3292 sizeof(struct internal_event)); 3293 if(!c->ev) { 3294 free(c); 3295 return NULL; 3296 } 3297 c->ev->base = base; 3298 c->fd = fd; 3299 c->buffer = buffer; 3300 c->timeout = NULL; 3301 c->tcp_is_reading = 0; 3302 c->tcp_byte_count = 0; 3303 c->tcp_parent = NULL; 3304 c->max_tcp_count = 0; 3305 c->cur_tcp_count = 0; 3306 c->tcp_handlers = NULL; 3307 c->tcp_free = NULL; 3308 c->type = comm_udp; 3309 c->tcp_do_close = 0; 3310 c->do_not_close = 0; 3311 c->tcp_do_toggle_rw = 0; 3312 c->tcp_check_nb_connect = 0; 3313 #ifdef USE_MSG_FASTOPEN 3314 c->tcp_do_fastopen = 0; 3315 #endif 3316 #ifdef USE_DNSCRYPT 3317 c->dnscrypt = 0; 3318 c->dnscrypt_buffer = buffer; 3319 #endif 3320 c->inuse = 0; 3321 c->callback = callback; 3322 c->cb_arg = callback_arg; 3323 c->socket = socket; 3324 evbits = UB_EV_READ | UB_EV_PERSIST; 3325 /* ub_event stuff */ 3326 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3327 comm_point_udp_callback, c); 3328 if(c->ev->ev == NULL) { 3329 log_err("could not baseset udp event"); 3330 comm_point_delete(c); 3331 return NULL; 3332 } 3333 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3334 log_err("could not add udp event"); 3335 comm_point_delete(c); 3336 return NULL; 3337 } 3338 c->event_added = 1; 3339 return c; 3340 } 3341 3342 struct comm_point* 3343 comm_point_create_udp_ancil(struct comm_base *base, int fd, 3344 sldns_buffer* buffer, 3345 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3346 { 3347 struct comm_point* c = (struct comm_point*)calloc(1, 3348 sizeof(struct comm_point)); 3349 short evbits; 3350 if(!c) 3351 return NULL; 3352 c->ev = (struct internal_event*)calloc(1, 3353 sizeof(struct internal_event)); 3354 if(!c->ev) { 3355 free(c); 3356 return NULL; 3357 } 3358 c->ev->base = base; 3359 c->fd = fd; 3360 c->buffer = buffer; 3361 c->timeout = NULL; 3362 c->tcp_is_reading = 0; 3363 c->tcp_byte_count = 0; 3364 c->tcp_parent = NULL; 3365 c->max_tcp_count = 0; 3366 c->cur_tcp_count = 0; 3367 c->tcp_handlers = NULL; 3368 c->tcp_free = NULL; 3369 c->type = comm_udp; 3370 c->tcp_do_close = 0; 3371 c->do_not_close = 0; 3372 #ifdef USE_DNSCRYPT 3373 c->dnscrypt = 0; 3374 c->dnscrypt_buffer = buffer; 3375 #endif 3376 c->inuse = 0; 3377 c->tcp_do_toggle_rw = 0; 3378 c->tcp_check_nb_connect = 0; 3379 #ifdef USE_MSG_FASTOPEN 3380 c->tcp_do_fastopen = 0; 3381 #endif 3382 c->callback = callback; 3383 c->cb_arg = callback_arg; 3384 c->socket = socket; 3385 evbits = UB_EV_READ | UB_EV_PERSIST; 3386 /* ub_event stuff */ 3387 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3388 comm_point_udp_ancil_callback, c); 3389 if(c->ev->ev == NULL) { 3390 log_err("could not baseset udp event"); 3391 comm_point_delete(c); 3392 return NULL; 3393 } 3394 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3395 log_err("could not add udp event"); 3396 comm_point_delete(c); 3397 return NULL; 3398 } 3399 c->event_added = 1; 3400 return c; 3401 } 3402 3403 static struct comm_point* 3404 comm_point_create_tcp_handler(struct comm_base *base, 3405 struct comm_point* parent, size_t bufsize, 3406 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 3407 void* callback_arg, struct unbound_socket* socket) 3408 { 3409 struct comm_point* c = (struct comm_point*)calloc(1, 3410 sizeof(struct comm_point)); 3411 short evbits; 3412 if(!c) 3413 return NULL; 3414 c->ev = (struct internal_event*)calloc(1, 3415 sizeof(struct internal_event)); 3416 if(!c->ev) { 3417 free(c); 3418 return NULL; 3419 } 3420 c->ev->base = base; 3421 c->fd = -1; 3422 c->buffer = sldns_buffer_new(bufsize); 3423 if(!c->buffer) { 3424 free(c->ev); 3425 free(c); 3426 return NULL; 3427 } 3428 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3429 if(!c->timeout) { 3430 sldns_buffer_free(c->buffer); 3431 free(c->ev); 3432 free(c); 3433 return NULL; 3434 } 3435 c->tcp_is_reading = 0; 3436 c->tcp_byte_count = 0; 3437 c->tcp_parent = parent; 3438 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3439 c->tcp_conn_limit = parent->tcp_conn_limit; 3440 c->tcl_addr = NULL; 3441 c->tcp_keepalive = 0; 3442 c->max_tcp_count = 0; 3443 c->cur_tcp_count = 0; 3444 c->tcp_handlers = NULL; 3445 c->tcp_free = NULL; 3446 c->type = comm_tcp; 3447 c->tcp_do_close = 0; 3448 c->do_not_close = 0; 3449 c->tcp_do_toggle_rw = 1; 3450 c->tcp_check_nb_connect = 0; 3451 #ifdef USE_MSG_FASTOPEN 3452 c->tcp_do_fastopen = 0; 3453 #endif 3454 #ifdef USE_DNSCRYPT 3455 c->dnscrypt = 0; 3456 /* We don't know just yet if this is a dnscrypt channel. Allocation 3457 * will be done when handling the callback. */ 3458 c->dnscrypt_buffer = c->buffer; 3459 #endif 3460 c->repinfo.c = c; 3461 c->callback = callback; 3462 c->cb_arg = callback_arg; 3463 c->socket = socket; 3464 if(spoolbuf) { 3465 c->tcp_req_info = tcp_req_info_create(spoolbuf); 3466 if(!c->tcp_req_info) { 3467 log_err("could not create tcp commpoint"); 3468 sldns_buffer_free(c->buffer); 3469 free(c->timeout); 3470 free(c->ev); 3471 free(c); 3472 return NULL; 3473 } 3474 c->tcp_req_info->cp = c; 3475 c->tcp_do_close = 1; 3476 c->tcp_do_toggle_rw = 0; 3477 } 3478 /* add to parent free list */ 3479 c->tcp_free = parent->tcp_free; 3480 parent->tcp_free = c; 3481 /* ub_event stuff */ 3482 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3483 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3484 comm_point_tcp_handle_callback, c); 3485 if(c->ev->ev == NULL) 3486 { 3487 log_err("could not basetset tcphdl event"); 3488 parent->tcp_free = c->tcp_free; 3489 tcp_req_info_delete(c->tcp_req_info); 3490 sldns_buffer_free(c->buffer); 3491 free(c->timeout); 3492 free(c->ev); 3493 free(c); 3494 return NULL; 3495 } 3496 return c; 3497 } 3498 3499 static struct comm_point* 3500 comm_point_create_http_handler(struct comm_base *base, 3501 struct comm_point* parent, size_t bufsize, int harden_large_queries, 3502 uint32_t http_max_streams, char* http_endpoint, 3503 comm_point_callback_type* callback, void* callback_arg, 3504 struct unbound_socket* socket) 3505 { 3506 struct comm_point* c = (struct comm_point*)calloc(1, 3507 sizeof(struct comm_point)); 3508 short evbits; 3509 if(!c) 3510 return NULL; 3511 c->ev = (struct internal_event*)calloc(1, 3512 sizeof(struct internal_event)); 3513 if(!c->ev) { 3514 free(c); 3515 return NULL; 3516 } 3517 c->ev->base = base; 3518 c->fd = -1; 3519 c->buffer = sldns_buffer_new(bufsize); 3520 if(!c->buffer) { 3521 free(c->ev); 3522 free(c); 3523 return NULL; 3524 } 3525 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3526 if(!c->timeout) { 3527 sldns_buffer_free(c->buffer); 3528 free(c->ev); 3529 free(c); 3530 return NULL; 3531 } 3532 c->tcp_is_reading = 0; 3533 c->tcp_byte_count = 0; 3534 c->tcp_parent = parent; 3535 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3536 c->tcp_conn_limit = parent->tcp_conn_limit; 3537 c->tcl_addr = NULL; 3538 c->tcp_keepalive = 0; 3539 c->max_tcp_count = 0; 3540 c->cur_tcp_count = 0; 3541 c->tcp_handlers = NULL; 3542 c->tcp_free = NULL; 3543 c->type = comm_http; 3544 c->tcp_do_close = 1; 3545 c->do_not_close = 0; 3546 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 3547 c->tcp_check_nb_connect = 0; 3548 #ifdef USE_MSG_FASTOPEN 3549 c->tcp_do_fastopen = 0; 3550 #endif 3551 #ifdef USE_DNSCRYPT 3552 c->dnscrypt = 0; 3553 c->dnscrypt_buffer = NULL; 3554 #endif 3555 c->repinfo.c = c; 3556 c->callback = callback; 3557 c->cb_arg = callback_arg; 3558 c->socket = socket; 3559 3560 c->http_min_version = http_version_2; 3561 c->http2_stream_max_qbuffer_size = bufsize; 3562 if(harden_large_queries && bufsize > 512) 3563 c->http2_stream_max_qbuffer_size = 512; 3564 c->http2_max_streams = http_max_streams; 3565 if(!(c->http_endpoint = strdup(http_endpoint))) { 3566 log_err("could not strdup http_endpoint"); 3567 sldns_buffer_free(c->buffer); 3568 free(c->timeout); 3569 free(c->ev); 3570 free(c); 3571 return NULL; 3572 } 3573 c->use_h2 = 0; 3574 #ifdef HAVE_NGHTTP2 3575 if(!(c->h2_session = http2_session_create(c))) { 3576 log_err("could not create http2 session"); 3577 free(c->http_endpoint); 3578 sldns_buffer_free(c->buffer); 3579 free(c->timeout); 3580 free(c->ev); 3581 free(c); 3582 return NULL; 3583 } 3584 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 3585 log_err("could not create http2 callbacks"); 3586 http2_session_delete(c->h2_session); 3587 free(c->http_endpoint); 3588 sldns_buffer_free(c->buffer); 3589 free(c->timeout); 3590 free(c->ev); 3591 free(c); 3592 return NULL; 3593 } 3594 #endif 3595 3596 /* add to parent free list */ 3597 c->tcp_free = parent->tcp_free; 3598 parent->tcp_free = c; 3599 /* ub_event stuff */ 3600 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3601 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3602 comm_point_http_handle_callback, c); 3603 if(c->ev->ev == NULL) 3604 { 3605 log_err("could not set http handler event"); 3606 parent->tcp_free = c->tcp_free; 3607 http2_session_delete(c->h2_session); 3608 sldns_buffer_free(c->buffer); 3609 free(c->timeout); 3610 free(c->ev); 3611 free(c); 3612 return NULL; 3613 } 3614 return c; 3615 } 3616 3617 struct comm_point* 3618 comm_point_create_tcp(struct comm_base *base, int fd, int num, 3619 int idle_timeout, int harden_large_queries, 3620 uint32_t http_max_streams, char* http_endpoint, 3621 struct tcl_list* tcp_conn_limit, size_t bufsize, 3622 struct sldns_buffer* spoolbuf, enum listen_type port_type, 3623 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3624 { 3625 struct comm_point* c = (struct comm_point*)calloc(1, 3626 sizeof(struct comm_point)); 3627 short evbits; 3628 int i; 3629 /* first allocate the TCP accept listener */ 3630 if(!c) 3631 return NULL; 3632 c->ev = (struct internal_event*)calloc(1, 3633 sizeof(struct internal_event)); 3634 if(!c->ev) { 3635 free(c); 3636 return NULL; 3637 } 3638 c->ev->base = base; 3639 c->fd = fd; 3640 c->buffer = NULL; 3641 c->timeout = NULL; 3642 c->tcp_is_reading = 0; 3643 c->tcp_byte_count = 0; 3644 c->tcp_timeout_msec = idle_timeout; 3645 c->tcp_conn_limit = tcp_conn_limit; 3646 c->tcl_addr = NULL; 3647 c->tcp_keepalive = 0; 3648 c->tcp_parent = NULL; 3649 c->max_tcp_count = num; 3650 c->cur_tcp_count = 0; 3651 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 3652 sizeof(struct comm_point*)); 3653 if(!c->tcp_handlers) { 3654 free(c->ev); 3655 free(c); 3656 return NULL; 3657 } 3658 c->tcp_free = NULL; 3659 c->type = comm_tcp_accept; 3660 c->tcp_do_close = 0; 3661 c->do_not_close = 0; 3662 c->tcp_do_toggle_rw = 0; 3663 c->tcp_check_nb_connect = 0; 3664 #ifdef USE_MSG_FASTOPEN 3665 c->tcp_do_fastopen = 0; 3666 #endif 3667 #ifdef USE_DNSCRYPT 3668 c->dnscrypt = 0; 3669 c->dnscrypt_buffer = NULL; 3670 #endif 3671 c->callback = NULL; 3672 c->cb_arg = NULL; 3673 c->socket = socket; 3674 evbits = UB_EV_READ | UB_EV_PERSIST; 3675 /* ub_event stuff */ 3676 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3677 comm_point_tcp_accept_callback, c); 3678 if(c->ev->ev == NULL) { 3679 log_err("could not baseset tcpacc event"); 3680 comm_point_delete(c); 3681 return NULL; 3682 } 3683 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3684 log_err("could not add tcpacc event"); 3685 comm_point_delete(c); 3686 return NULL; 3687 } 3688 c->event_added = 1; 3689 /* now prealloc the handlers */ 3690 for(i=0; i<num; i++) { 3691 if(port_type == listen_type_tcp || 3692 port_type == listen_type_ssl || 3693 port_type == listen_type_tcp_dnscrypt) { 3694 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 3695 c, bufsize, spoolbuf, callback, callback_arg, socket); 3696 } else if(port_type == listen_type_http) { 3697 c->tcp_handlers[i] = comm_point_create_http_handler( 3698 base, c, bufsize, harden_large_queries, 3699 http_max_streams, http_endpoint, 3700 callback, callback_arg, socket); 3701 } 3702 else { 3703 log_err("could not create tcp handler, unknown listen " 3704 "type"); 3705 return NULL; 3706 } 3707 if(!c->tcp_handlers[i]) { 3708 comm_point_delete(c); 3709 return NULL; 3710 } 3711 } 3712 3713 return c; 3714 } 3715 3716 struct comm_point* 3717 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 3718 comm_point_callback_type* callback, void* callback_arg) 3719 { 3720 struct comm_point* c = (struct comm_point*)calloc(1, 3721 sizeof(struct comm_point)); 3722 short evbits; 3723 if(!c) 3724 return NULL; 3725 c->ev = (struct internal_event*)calloc(1, 3726 sizeof(struct internal_event)); 3727 if(!c->ev) { 3728 free(c); 3729 return NULL; 3730 } 3731 c->ev->base = base; 3732 c->fd = -1; 3733 c->buffer = sldns_buffer_new(bufsize); 3734 if(!c->buffer) { 3735 free(c->ev); 3736 free(c); 3737 return NULL; 3738 } 3739 c->timeout = NULL; 3740 c->tcp_is_reading = 0; 3741 c->tcp_byte_count = 0; 3742 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 3743 c->tcp_conn_limit = NULL; 3744 c->tcl_addr = NULL; 3745 c->tcp_keepalive = 0; 3746 c->tcp_parent = NULL; 3747 c->max_tcp_count = 0; 3748 c->cur_tcp_count = 0; 3749 c->tcp_handlers = NULL; 3750 c->tcp_free = NULL; 3751 c->type = comm_tcp; 3752 c->tcp_do_close = 0; 3753 c->do_not_close = 0; 3754 c->tcp_do_toggle_rw = 1; 3755 c->tcp_check_nb_connect = 1; 3756 #ifdef USE_MSG_FASTOPEN 3757 c->tcp_do_fastopen = 1; 3758 #endif 3759 #ifdef USE_DNSCRYPT 3760 c->dnscrypt = 0; 3761 c->dnscrypt_buffer = c->buffer; 3762 #endif 3763 c->repinfo.c = c; 3764 c->callback = callback; 3765 c->cb_arg = callback_arg; 3766 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3767 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3768 comm_point_tcp_handle_callback, c); 3769 if(c->ev->ev == NULL) 3770 { 3771 log_err("could not baseset tcpout event"); 3772 sldns_buffer_free(c->buffer); 3773 free(c->ev); 3774 free(c); 3775 return NULL; 3776 } 3777 3778 return c; 3779 } 3780 3781 struct comm_point* 3782 comm_point_create_http_out(struct comm_base *base, size_t bufsize, 3783 comm_point_callback_type* callback, void* callback_arg, 3784 sldns_buffer* temp) 3785 { 3786 struct comm_point* c = (struct comm_point*)calloc(1, 3787 sizeof(struct comm_point)); 3788 short evbits; 3789 if(!c) 3790 return NULL; 3791 c->ev = (struct internal_event*)calloc(1, 3792 sizeof(struct internal_event)); 3793 if(!c->ev) { 3794 free(c); 3795 return NULL; 3796 } 3797 c->ev->base = base; 3798 c->fd = -1; 3799 c->buffer = sldns_buffer_new(bufsize); 3800 if(!c->buffer) { 3801 free(c->ev); 3802 free(c); 3803 return NULL; 3804 } 3805 c->timeout = NULL; 3806 c->tcp_is_reading = 0; 3807 c->tcp_byte_count = 0; 3808 c->tcp_parent = NULL; 3809 c->max_tcp_count = 0; 3810 c->cur_tcp_count = 0; 3811 c->tcp_handlers = NULL; 3812 c->tcp_free = NULL; 3813 c->type = comm_http; 3814 c->tcp_do_close = 0; 3815 c->do_not_close = 0; 3816 c->tcp_do_toggle_rw = 1; 3817 c->tcp_check_nb_connect = 1; 3818 c->http_in_headers = 1; 3819 c->http_in_chunk_headers = 0; 3820 c->http_is_chunked = 0; 3821 c->http_temp = temp; 3822 #ifdef USE_MSG_FASTOPEN 3823 c->tcp_do_fastopen = 1; 3824 #endif 3825 #ifdef USE_DNSCRYPT 3826 c->dnscrypt = 0; 3827 c->dnscrypt_buffer = c->buffer; 3828 #endif 3829 c->repinfo.c = c; 3830 c->callback = callback; 3831 c->cb_arg = callback_arg; 3832 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3833 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3834 comm_point_http_handle_callback, c); 3835 if(c->ev->ev == NULL) 3836 { 3837 log_err("could not baseset tcpout event"); 3838 #ifdef HAVE_SSL 3839 SSL_free(c->ssl); 3840 #endif 3841 sldns_buffer_free(c->buffer); 3842 free(c->ev); 3843 free(c); 3844 return NULL; 3845 } 3846 3847 return c; 3848 } 3849 3850 struct comm_point* 3851 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 3852 comm_point_callback_type* callback, void* callback_arg) 3853 { 3854 struct comm_point* c = (struct comm_point*)calloc(1, 3855 sizeof(struct comm_point)); 3856 short evbits; 3857 if(!c) 3858 return NULL; 3859 c->ev = (struct internal_event*)calloc(1, 3860 sizeof(struct internal_event)); 3861 if(!c->ev) { 3862 free(c); 3863 return NULL; 3864 } 3865 c->ev->base = base; 3866 c->fd = fd; 3867 c->buffer = sldns_buffer_new(bufsize); 3868 if(!c->buffer) { 3869 free(c->ev); 3870 free(c); 3871 return NULL; 3872 } 3873 c->timeout = NULL; 3874 c->tcp_is_reading = 1; 3875 c->tcp_byte_count = 0; 3876 c->tcp_parent = NULL; 3877 c->max_tcp_count = 0; 3878 c->cur_tcp_count = 0; 3879 c->tcp_handlers = NULL; 3880 c->tcp_free = NULL; 3881 c->type = comm_local; 3882 c->tcp_do_close = 0; 3883 c->do_not_close = 1; 3884 c->tcp_do_toggle_rw = 0; 3885 c->tcp_check_nb_connect = 0; 3886 #ifdef USE_MSG_FASTOPEN 3887 c->tcp_do_fastopen = 0; 3888 #endif 3889 #ifdef USE_DNSCRYPT 3890 c->dnscrypt = 0; 3891 c->dnscrypt_buffer = c->buffer; 3892 #endif 3893 c->callback = callback; 3894 c->cb_arg = callback_arg; 3895 /* ub_event stuff */ 3896 evbits = UB_EV_PERSIST | UB_EV_READ; 3897 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3898 comm_point_local_handle_callback, c); 3899 if(c->ev->ev == NULL) { 3900 log_err("could not baseset localhdl event"); 3901 free(c->ev); 3902 free(c); 3903 return NULL; 3904 } 3905 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3906 log_err("could not add localhdl event"); 3907 ub_event_free(c->ev->ev); 3908 free(c->ev); 3909 free(c); 3910 return NULL; 3911 } 3912 c->event_added = 1; 3913 return c; 3914 } 3915 3916 struct comm_point* 3917 comm_point_create_raw(struct comm_base* base, int fd, int writing, 3918 comm_point_callback_type* callback, void* callback_arg) 3919 { 3920 struct comm_point* c = (struct comm_point*)calloc(1, 3921 sizeof(struct comm_point)); 3922 short evbits; 3923 if(!c) 3924 return NULL; 3925 c->ev = (struct internal_event*)calloc(1, 3926 sizeof(struct internal_event)); 3927 if(!c->ev) { 3928 free(c); 3929 return NULL; 3930 } 3931 c->ev->base = base; 3932 c->fd = fd; 3933 c->buffer = NULL; 3934 c->timeout = NULL; 3935 c->tcp_is_reading = 0; 3936 c->tcp_byte_count = 0; 3937 c->tcp_parent = NULL; 3938 c->max_tcp_count = 0; 3939 c->cur_tcp_count = 0; 3940 c->tcp_handlers = NULL; 3941 c->tcp_free = NULL; 3942 c->type = comm_raw; 3943 c->tcp_do_close = 0; 3944 c->do_not_close = 1; 3945 c->tcp_do_toggle_rw = 0; 3946 c->tcp_check_nb_connect = 0; 3947 #ifdef USE_MSG_FASTOPEN 3948 c->tcp_do_fastopen = 0; 3949 #endif 3950 #ifdef USE_DNSCRYPT 3951 c->dnscrypt = 0; 3952 c->dnscrypt_buffer = c->buffer; 3953 #endif 3954 c->callback = callback; 3955 c->cb_arg = callback_arg; 3956 /* ub_event stuff */ 3957 if(writing) 3958 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3959 else evbits = UB_EV_PERSIST | UB_EV_READ; 3960 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3961 comm_point_raw_handle_callback, c); 3962 if(c->ev->ev == NULL) { 3963 log_err("could not baseset rawhdl event"); 3964 free(c->ev); 3965 free(c); 3966 return NULL; 3967 } 3968 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3969 log_err("could not add rawhdl event"); 3970 ub_event_free(c->ev->ev); 3971 free(c->ev); 3972 free(c); 3973 return NULL; 3974 } 3975 c->event_added = 1; 3976 return c; 3977 } 3978 3979 void 3980 comm_point_close(struct comm_point* c) 3981 { 3982 if(!c) 3983 return; 3984 if(c->fd != -1) { 3985 verbose(5, "comm_point_close of %d: event_del", c->fd); 3986 if(c->event_added) { 3987 if(ub_event_del(c->ev->ev) != 0) { 3988 log_err("could not event_del on close"); 3989 } 3990 c->event_added = 0; 3991 } 3992 } 3993 tcl_close_connection(c->tcl_addr); 3994 if(c->tcp_req_info) 3995 tcp_req_info_clear(c->tcp_req_info); 3996 if(c->h2_session) 3997 http2_session_server_delete(c->h2_session); 3998 3999 /* close fd after removing from event lists, or epoll.. is messed up */ 4000 if(c->fd != -1 && !c->do_not_close) { 4001 #ifdef USE_WINSOCK 4002 if(c->type == comm_tcp || c->type == comm_http) { 4003 /* delete sticky events for the fd, it gets closed */ 4004 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 4005 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 4006 } 4007 #endif 4008 verbose(VERB_ALGO, "close fd %d", c->fd); 4009 sock_close(c->fd); 4010 } 4011 c->fd = -1; 4012 } 4013 4014 void 4015 comm_point_delete(struct comm_point* c) 4016 { 4017 if(!c) 4018 return; 4019 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 4020 #ifdef HAVE_SSL 4021 SSL_shutdown(c->ssl); 4022 SSL_free(c->ssl); 4023 #endif 4024 } 4025 if(c->type == comm_http && c->http_endpoint) { 4026 free(c->http_endpoint); 4027 c->http_endpoint = NULL; 4028 } 4029 comm_point_close(c); 4030 if(c->tcp_handlers) { 4031 int i; 4032 for(i=0; i<c->max_tcp_count; i++) 4033 comm_point_delete(c->tcp_handlers[i]); 4034 free(c->tcp_handlers); 4035 } 4036 free(c->timeout); 4037 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 4038 sldns_buffer_free(c->buffer); 4039 #ifdef USE_DNSCRYPT 4040 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 4041 sldns_buffer_free(c->dnscrypt_buffer); 4042 } 4043 #endif 4044 if(c->tcp_req_info) { 4045 tcp_req_info_delete(c->tcp_req_info); 4046 } 4047 if(c->h2_session) { 4048 http2_session_delete(c->h2_session); 4049 } 4050 } 4051 ub_event_free(c->ev->ev); 4052 free(c->ev); 4053 free(c); 4054 } 4055 4056 void 4057 comm_point_send_reply(struct comm_reply *repinfo) 4058 { 4059 struct sldns_buffer* buffer; 4060 log_assert(repinfo && repinfo->c); 4061 #ifdef USE_DNSCRYPT 4062 buffer = repinfo->c->dnscrypt_buffer; 4063 if(!dnsc_handle_uncurved_request(repinfo)) { 4064 return; 4065 } 4066 #else 4067 buffer = repinfo->c->buffer; 4068 #endif 4069 if(repinfo->c->type == comm_udp) { 4070 if(repinfo->srctype) 4071 comm_point_send_udp_msg_if(repinfo->c, 4072 buffer, (struct sockaddr*)&repinfo->addr, 4073 repinfo->addrlen, repinfo); 4074 else 4075 comm_point_send_udp_msg(repinfo->c, buffer, 4076 (struct sockaddr*)&repinfo->addr, repinfo->addrlen, 0); 4077 #ifdef USE_DNSTAP 4078 /* 4079 * sending src (client)/dst (local service) addresses over DNSTAP from udp callback 4080 */ 4081 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 4082 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4083 log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen); 4084 dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, repinfo->c->buffer); 4085 } 4086 #endif 4087 } else { 4088 #ifdef USE_DNSTAP 4089 /* 4090 * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback 4091 */ 4092 if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { 4093 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4094 log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen); 4095 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, 4096 ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); 4097 } 4098 #endif 4099 if(repinfo->c->tcp_req_info) { 4100 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 4101 } else if(repinfo->c->use_h2) { 4102 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 4103 comm_point_drop_reply(repinfo); 4104 return; 4105 } 4106 repinfo->c->h2_stream = NULL; 4107 repinfo->c->tcp_is_reading = 0; 4108 comm_point_stop_listening(repinfo->c); 4109 comm_point_start_listening(repinfo->c, -1, 4110 adjusted_tcp_timeout(repinfo->c)); 4111 return; 4112 } else { 4113 comm_point_start_listening(repinfo->c, -1, 4114 adjusted_tcp_timeout(repinfo->c)); 4115 } 4116 } 4117 } 4118 4119 void 4120 comm_point_drop_reply(struct comm_reply* repinfo) 4121 { 4122 if(!repinfo) 4123 return; 4124 log_assert(repinfo->c); 4125 log_assert(repinfo->c->type != comm_tcp_accept); 4126 if(repinfo->c->type == comm_udp) 4127 return; 4128 if(repinfo->c->tcp_req_info) 4129 repinfo->c->tcp_req_info->is_drop = 1; 4130 if(repinfo->c->type == comm_http) { 4131 if(repinfo->c->h2_session) { 4132 repinfo->c->h2_session->is_drop = 1; 4133 if(!repinfo->c->h2_session->postpone_drop) 4134 reclaim_http_handler(repinfo->c); 4135 return; 4136 } 4137 reclaim_http_handler(repinfo->c); 4138 return; 4139 } 4140 reclaim_tcp_handler(repinfo->c); 4141 } 4142 4143 void 4144 comm_point_stop_listening(struct comm_point* c) 4145 { 4146 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 4147 if(c->event_added) { 4148 if(ub_event_del(c->ev->ev) != 0) { 4149 log_err("event_del error to stoplisten"); 4150 } 4151 c->event_added = 0; 4152 } 4153 } 4154 4155 void 4156 comm_point_start_listening(struct comm_point* c, int newfd, int msec) 4157 { 4158 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 4159 c->fd==-1?newfd:c->fd, msec); 4160 if(c->type == comm_tcp_accept && !c->tcp_free) { 4161 /* no use to start listening no free slots. */ 4162 return; 4163 } 4164 if(c->event_added) { 4165 if(ub_event_del(c->ev->ev) != 0) { 4166 log_err("event_del error to startlisten"); 4167 } 4168 c->event_added = 0; 4169 } 4170 if(msec != -1 && msec != 0) { 4171 if(!c->timeout) { 4172 c->timeout = (struct timeval*)malloc(sizeof( 4173 struct timeval)); 4174 if(!c->timeout) { 4175 log_err("cpsl: malloc failed. No net read."); 4176 return; 4177 } 4178 } 4179 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 4180 #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 4181 c->timeout->tv_sec = msec/1000; 4182 c->timeout->tv_usec = (msec%1000)*1000; 4183 #endif /* S_SPLINT_S */ 4184 } else { 4185 if(msec == 0 || !c->timeout) { 4186 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4187 } 4188 } 4189 if(c->type == comm_tcp || c->type == comm_http) { 4190 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4191 if(c->tcp_write_and_read) { 4192 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4193 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4194 } else if(c->tcp_is_reading) { 4195 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 4196 ub_event_add_bits(c->ev->ev, UB_EV_READ); 4197 } else { 4198 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4199 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4200 } 4201 } 4202 if(newfd != -1) { 4203 if(c->fd != -1 && c->fd != newfd) { 4204 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 4205 sock_close(c->fd); 4206 } 4207 c->fd = newfd; 4208 ub_event_set_fd(c->ev->ev, c->fd); 4209 } 4210 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4211 log_err("event_add failed. in cpsl."); 4212 return; 4213 } 4214 c->event_added = 1; 4215 } 4216 4217 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4218 { 4219 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 4220 if(c->event_added) { 4221 if(ub_event_del(c->ev->ev) != 0) { 4222 log_err("event_del error to cplf"); 4223 } 4224 c->event_added = 0; 4225 } 4226 if(!c->timeout) { 4227 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4228 } 4229 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4230 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 4231 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4232 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4233 log_err("event_add failed. in cplf."); 4234 return; 4235 } 4236 c->event_added = 1; 4237 } 4238 4239 size_t comm_point_get_mem(struct comm_point* c) 4240 { 4241 size_t s; 4242 if(!c) 4243 return 0; 4244 s = sizeof(*c) + sizeof(*c->ev); 4245 if(c->timeout) 4246 s += sizeof(*c->timeout); 4247 if(c->type == comm_tcp || c->type == comm_local) { 4248 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 4249 #ifdef USE_DNSCRYPT 4250 s += sizeof(*c->dnscrypt_buffer); 4251 if(c->buffer != c->dnscrypt_buffer) { 4252 s += sldns_buffer_capacity(c->dnscrypt_buffer); 4253 } 4254 #endif 4255 } 4256 if(c->type == comm_tcp_accept) { 4257 int i; 4258 for(i=0; i<c->max_tcp_count; i++) 4259 s += comm_point_get_mem(c->tcp_handlers[i]); 4260 } 4261 return s; 4262 } 4263 4264 struct comm_timer* 4265 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4266 { 4267 struct internal_timer *tm = (struct internal_timer*)calloc(1, 4268 sizeof(struct internal_timer)); 4269 if(!tm) { 4270 log_err("malloc failed"); 4271 return NULL; 4272 } 4273 tm->super.ev_timer = tm; 4274 tm->base = base; 4275 tm->super.callback = cb; 4276 tm->super.cb_arg = cb_arg; 4277 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 4278 comm_timer_callback, &tm->super); 4279 if(tm->ev == NULL) { 4280 log_err("timer_create: event_base_set failed."); 4281 free(tm); 4282 return NULL; 4283 } 4284 return &tm->super; 4285 } 4286 4287 void 4288 comm_timer_disable(struct comm_timer* timer) 4289 { 4290 if(!timer) 4291 return; 4292 ub_timer_del(timer->ev_timer->ev); 4293 timer->ev_timer->enabled = 0; 4294 } 4295 4296 void 4297 comm_timer_set(struct comm_timer* timer, struct timeval* tv) 4298 { 4299 log_assert(tv); 4300 if(timer->ev_timer->enabled) 4301 comm_timer_disable(timer); 4302 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 4303 comm_timer_callback, timer, tv) != 0) 4304 log_err("comm_timer_set: evtimer_add failed."); 4305 timer->ev_timer->enabled = 1; 4306 } 4307 4308 void 4309 comm_timer_delete(struct comm_timer* timer) 4310 { 4311 if(!timer) 4312 return; 4313 comm_timer_disable(timer); 4314 /* Free the sub struct timer->ev_timer derived from the super struct timer. 4315 * i.e. assert(timer == timer->ev_timer) 4316 */ 4317 ub_event_free(timer->ev_timer->ev); 4318 free(timer->ev_timer); 4319 } 4320 4321 void 4322 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 4323 { 4324 struct comm_timer* tm = (struct comm_timer*)arg; 4325 if(!(event&UB_EV_TIMEOUT)) 4326 return; 4327 ub_comm_base_now(tm->ev_timer->base); 4328 tm->ev_timer->enabled = 0; 4329 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 4330 (*tm->callback)(tm->cb_arg); 4331 } 4332 4333 int 4334 comm_timer_is_set(struct comm_timer* timer) 4335 { 4336 return (int)timer->ev_timer->enabled; 4337 } 4338 4339 size_t 4340 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 4341 { 4342 return sizeof(struct internal_timer); 4343 } 4344 4345 struct comm_signal* 4346 comm_signal_create(struct comm_base* base, 4347 void (*callback)(int, void*), void* cb_arg) 4348 { 4349 struct comm_signal* com = (struct comm_signal*)malloc( 4350 sizeof(struct comm_signal)); 4351 if(!com) { 4352 log_err("malloc failed"); 4353 return NULL; 4354 } 4355 com->base = base; 4356 com->callback = callback; 4357 com->cb_arg = cb_arg; 4358 com->ev_signal = NULL; 4359 return com; 4360 } 4361 4362 void 4363 comm_signal_callback(int sig, short event, void* arg) 4364 { 4365 struct comm_signal* comsig = (struct comm_signal*)arg; 4366 if(!(event & UB_EV_SIGNAL)) 4367 return; 4368 ub_comm_base_now(comsig->base); 4369 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 4370 (*comsig->callback)(sig, comsig->cb_arg); 4371 } 4372 4373 int 4374 comm_signal_bind(struct comm_signal* comsig, int sig) 4375 { 4376 struct internal_signal* entry = (struct internal_signal*)calloc(1, 4377 sizeof(struct internal_signal)); 4378 if(!entry) { 4379 log_err("malloc failed"); 4380 return 0; 4381 } 4382 log_assert(comsig); 4383 /* add signal event */ 4384 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 4385 comm_signal_callback, comsig); 4386 if(entry->ev == NULL) { 4387 log_err("Could not create signal event"); 4388 free(entry); 4389 return 0; 4390 } 4391 if(ub_signal_add(entry->ev, NULL) != 0) { 4392 log_err("Could not add signal handler"); 4393 ub_event_free(entry->ev); 4394 free(entry); 4395 return 0; 4396 } 4397 /* link into list */ 4398 entry->next = comsig->ev_signal; 4399 comsig->ev_signal = entry; 4400 return 1; 4401 } 4402 4403 void 4404 comm_signal_delete(struct comm_signal* comsig) 4405 { 4406 struct internal_signal* p, *np; 4407 if(!comsig) 4408 return; 4409 p=comsig->ev_signal; 4410 while(p) { 4411 np = p->next; 4412 ub_signal_del(p->ev); 4413 ub_event_free(p->ev); 4414 free(p); 4415 p = np; 4416 } 4417 free(comsig); 4418 } 4419