1 /* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41 #include "config.h" 42 #include "util/netevent.h" 43 #include "util/ub_event.h" 44 #include "util/log.h" 45 #include "util/net_help.h" 46 #include "util/tcp_conn_limit.h" 47 #include "util/fptr_wlist.h" 48 #include "util/proxy_protocol.h" 49 #include "sldns/pkthdr.h" 50 #include "sldns/sbuffer.h" 51 #include "sldns/str2wire.h" 52 #include "dnstap/dnstap.h" 53 #include "dnscrypt/dnscrypt.h" 54 #include "services/listen_dnsport.h" 55 #ifdef HAVE_SYS_TYPES_H 56 #include <sys/types.h> 57 #endif 58 #ifdef HAVE_SYS_SOCKET_H 59 #include <sys/socket.h> 60 #endif 61 #ifdef HAVE_NETDB_H 62 #include <netdb.h> 63 #endif 64 #ifdef HAVE_POLL_H 65 #include <poll.h> 66 #endif 67 68 #ifdef HAVE_OPENSSL_SSL_H 69 #include <openssl/ssl.h> 70 #endif 71 #ifdef HAVE_OPENSSL_ERR_H 72 #include <openssl/err.h> 73 #endif 74 75 /* -------- Start of local definitions -------- */ 76 /** if CMSG_ALIGN is not defined on this platform, a workaround */ 77 #ifndef CMSG_ALIGN 78 # ifdef __CMSG_ALIGN 79 # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 80 # elif defined(CMSG_DATA_ALIGN) 81 # define CMSG_ALIGN _CMSG_DATA_ALIGN 82 # else 83 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 84 # endif 85 #endif 86 87 /** if CMSG_LEN is not defined on this platform, a workaround */ 88 #ifndef CMSG_LEN 89 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 90 #endif 91 92 /** if CMSG_SPACE is not defined on this platform, a workaround */ 93 #ifndef CMSG_SPACE 94 # ifdef _CMSG_HDR_ALIGN 95 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 96 # else 97 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 98 # endif 99 #endif 100 101 /** The TCP writing query timeout in milliseconds */ 102 #define TCP_QUERY_TIMEOUT 120000 103 /** The minimum actual TCP timeout to use, regardless of what we advertise, 104 * in msec */ 105 #define TCP_QUERY_TIMEOUT_MINIMUM 200 106 107 #ifndef NONBLOCKING_IS_BROKEN 108 /** number of UDP reads to perform per read indication from select */ 109 #define NUM_UDP_PER_SELECT 100 110 #else 111 #define NUM_UDP_PER_SELECT 1 112 #endif 113 114 /** timeout in millisec to wait for write to unblock, packets dropped after.*/ 115 #define SEND_BLOCKED_WAIT_TIMEOUT 200 116 117 /** 118 * The internal event structure for keeping ub_event info for the event. 119 * Possibly other structures (list, tree) this is part of. 120 */ 121 struct internal_event { 122 /** the comm base */ 123 struct comm_base* base; 124 /** ub_event event type */ 125 struct ub_event* ev; 126 }; 127 128 /** 129 * Internal base structure, so that every thread has its own events. 130 */ 131 struct internal_base { 132 /** ub_event event_base type. */ 133 struct ub_event_base* base; 134 /** seconds time pointer points here */ 135 time_t secs; 136 /** timeval with current time */ 137 struct timeval now; 138 /** the event used for slow_accept timeouts */ 139 struct ub_event* slow_accept; 140 /** true if slow_accept is enabled */ 141 int slow_accept_enabled; 142 /** last log time for slow logging of file descriptor errors */ 143 time_t last_slow_log; 144 /** last log time for slow logging of write wait failures */ 145 time_t last_writewait_log; 146 }; 147 148 /** 149 * Internal timer structure, to store timer event in. 150 */ 151 struct internal_timer { 152 /** the super struct from which derived */ 153 struct comm_timer super; 154 /** the comm base */ 155 struct comm_base* base; 156 /** ub_event event type */ 157 struct ub_event* ev; 158 /** is timer enabled */ 159 uint8_t enabled; 160 }; 161 162 /** 163 * Internal signal structure, to store signal event in. 164 */ 165 struct internal_signal { 166 /** ub_event event type */ 167 struct ub_event* ev; 168 /** next in signal list */ 169 struct internal_signal* next; 170 }; 171 172 /** create a tcp handler with a parent */ 173 static struct comm_point* comm_point_create_tcp_handler( 174 struct comm_base *base, struct comm_point* parent, size_t bufsize, 175 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 176 void* callback_arg, struct unbound_socket* socket); 177 178 /* -------- End of local definitions -------- */ 179 180 struct comm_base* 181 comm_base_create(int sigs) 182 { 183 struct comm_base* b = (struct comm_base*)calloc(1, 184 sizeof(struct comm_base)); 185 const char *evnm="event", *evsys="", *evmethod=""; 186 187 if(!b) 188 return NULL; 189 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 190 if(!b->eb) { 191 free(b); 192 return NULL; 193 } 194 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 195 if(!b->eb->base) { 196 free(b->eb); 197 free(b); 198 return NULL; 199 } 200 ub_comm_base_now(b); 201 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 202 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 203 return b; 204 } 205 206 struct comm_base* 207 comm_base_create_event(struct ub_event_base* base) 208 { 209 struct comm_base* b = (struct comm_base*)calloc(1, 210 sizeof(struct comm_base)); 211 if(!b) 212 return NULL; 213 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 214 if(!b->eb) { 215 free(b); 216 return NULL; 217 } 218 b->eb->base = base; 219 ub_comm_base_now(b); 220 return b; 221 } 222 223 void 224 comm_base_delete(struct comm_base* b) 225 { 226 if(!b) 227 return; 228 if(b->eb->slow_accept_enabled) { 229 if(ub_event_del(b->eb->slow_accept) != 0) { 230 log_err("could not event_del slow_accept"); 231 } 232 ub_event_free(b->eb->slow_accept); 233 } 234 ub_event_base_free(b->eb->base); 235 b->eb->base = NULL; 236 free(b->eb); 237 free(b); 238 } 239 240 void 241 comm_base_delete_no_base(struct comm_base* b) 242 { 243 if(!b) 244 return; 245 if(b->eb->slow_accept_enabled) { 246 if(ub_event_del(b->eb->slow_accept) != 0) { 247 log_err("could not event_del slow_accept"); 248 } 249 ub_event_free(b->eb->slow_accept); 250 } 251 b->eb->base = NULL; 252 free(b->eb); 253 free(b); 254 } 255 256 void 257 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 258 { 259 *tt = &b->eb->secs; 260 *tv = &b->eb->now; 261 } 262 263 void 264 comm_base_dispatch(struct comm_base* b) 265 { 266 int retval; 267 retval = ub_event_base_dispatch(b->eb->base); 268 if(retval < 0) { 269 fatal_exit("event_dispatch returned error %d, " 270 "errno is %s", retval, strerror(errno)); 271 } 272 } 273 274 void comm_base_exit(struct comm_base* b) 275 { 276 if(ub_event_base_loopexit(b->eb->base) != 0) { 277 log_err("Could not loopexit"); 278 } 279 } 280 281 void comm_base_set_slow_accept_handlers(struct comm_base* b, 282 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 283 { 284 b->stop_accept = stop_acc; 285 b->start_accept = start_acc; 286 b->cb_arg = arg; 287 } 288 289 struct ub_event_base* comm_base_internal(struct comm_base* b) 290 { 291 return b->eb->base; 292 } 293 294 /** see if errno for udp has to be logged or not uses globals */ 295 static int 296 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 297 { 298 /* do not log transient errors (unless high verbosity) */ 299 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 300 switch(errno) { 301 # ifdef ENETUNREACH 302 case ENETUNREACH: 303 # endif 304 # ifdef EHOSTDOWN 305 case EHOSTDOWN: 306 # endif 307 # ifdef EHOSTUNREACH 308 case EHOSTUNREACH: 309 # endif 310 # ifdef ENETDOWN 311 case ENETDOWN: 312 # endif 313 case EPERM: 314 case EACCES: 315 if(verbosity < VERB_ALGO) 316 return 0; 317 default: 318 break; 319 } 320 #endif 321 /* permission denied is gotten for every send if the 322 * network is disconnected (on some OS), squelch it */ 323 if( ((errno == EPERM) 324 # ifdef EADDRNOTAVAIL 325 /* 'Cannot assign requested address' also when disconnected */ 326 || (errno == EADDRNOTAVAIL) 327 # endif 328 ) && verbosity < VERB_ALGO) 329 return 0; 330 # ifdef EADDRINUSE 331 /* If SO_REUSEADDR is set, we could try to connect to the same server 332 * from the same source port twice. */ 333 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 334 return 0; 335 # endif 336 /* squelch errors where people deploy AAAA ::ffff:bla for 337 * authority servers, which we try for intranets. */ 338 if(errno == EINVAL && addr_is_ip4mapped( 339 (struct sockaddr_storage*)addr, addrlen) && 340 verbosity < VERB_DETAIL) 341 return 0; 342 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 343 * but a dns cache does not need it. */ 344 if(errno == EACCES && addr_is_broadcast( 345 (struct sockaddr_storage*)addr, addrlen) && 346 verbosity < VERB_DETAIL) 347 return 0; 348 return 1; 349 } 350 351 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 352 { 353 return udp_send_errno_needs_log(addr, addrlen); 354 } 355 356 /* send a UDP reply */ 357 int 358 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 359 struct sockaddr* addr, socklen_t addrlen, int is_connected) 360 { 361 ssize_t sent; 362 log_assert(c->fd != -1); 363 #ifdef UNBOUND_DEBUG 364 if(sldns_buffer_remaining(packet) == 0) 365 log_err("error: send empty UDP packet"); 366 #endif 367 log_assert(addr && addrlen > 0); 368 if(!is_connected) { 369 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 370 sldns_buffer_remaining(packet), 0, 371 addr, addrlen); 372 } else { 373 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 374 sldns_buffer_remaining(packet), 0); 375 } 376 if(sent == -1) { 377 /* try again and block, waiting for IO to complete, 378 * we want to send the answer, and we will wait for 379 * the ethernet interface buffer to have space. */ 380 #ifndef USE_WINSOCK 381 if(errno == EAGAIN || errno == EINTR || 382 # ifdef EWOULDBLOCK 383 errno == EWOULDBLOCK || 384 # endif 385 errno == ENOBUFS) { 386 #else 387 if(WSAGetLastError() == WSAEINPROGRESS || 388 WSAGetLastError() == WSAEINTR || 389 WSAGetLastError() == WSAENOBUFS || 390 WSAGetLastError() == WSAEWOULDBLOCK) { 391 #endif 392 /* if we set the fd blocking, other threads suddenly 393 * have a blocking fd that they operate on */ 394 while(sent == -1 && ( 395 #ifndef USE_WINSOCK 396 errno == EAGAIN || errno == EINTR || 397 # ifdef EWOULDBLOCK 398 errno == EWOULDBLOCK || 399 # endif 400 errno == ENOBUFS 401 #else 402 WSAGetLastError() == WSAEINPROGRESS || 403 WSAGetLastError() == WSAEINTR || 404 WSAGetLastError() == WSAENOBUFS || 405 WSAGetLastError() == WSAEWOULDBLOCK 406 #endif 407 )) { 408 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 409 struct pollfd p; 410 int pret; 411 memset(&p, 0, sizeof(p)); 412 p.fd = c->fd; 413 p.events = POLLOUT | POLLERR | POLLHUP; 414 # ifndef USE_WINSOCK 415 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 416 # else 417 pret = WSAPoll(&p, 1, 418 SEND_BLOCKED_WAIT_TIMEOUT); 419 # endif 420 if(pret == 0) { 421 /* timer expired */ 422 struct comm_base* b = c->ev->base; 423 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 424 b->eb->secs) { 425 b->eb->last_writewait_log = b->eb->secs; 426 verbose(VERB_OPS, "send udp blocked " 427 "for long, dropping packet."); 428 } 429 return 0; 430 } else if(pret < 0 && 431 #ifndef USE_WINSOCK 432 errno != EAGAIN && errno != EINTR && 433 # ifdef EWOULDBLOCK 434 errno != EWOULDBLOCK && 435 # endif 436 errno != ENOBUFS 437 #else 438 WSAGetLastError() != WSAEINPROGRESS && 439 WSAGetLastError() != WSAEINTR && 440 WSAGetLastError() != WSAENOBUFS && 441 WSAGetLastError() != WSAEWOULDBLOCK 442 #endif 443 ) { 444 log_err("poll udp out failed: %s", 445 sock_strerror(errno)); 446 return 0; 447 } 448 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 449 if (!is_connected) { 450 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 451 sldns_buffer_remaining(packet), 0, 452 addr, addrlen); 453 } else { 454 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 455 sldns_buffer_remaining(packet), 0); 456 } 457 } 458 } 459 } 460 if(sent == -1) { 461 if(!udp_send_errno_needs_log(addr, addrlen)) 462 return 0; 463 if (!is_connected) { 464 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 465 } else { 466 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 467 } 468 if(addr) 469 log_addr(VERB_OPS, "remote address is", 470 (struct sockaddr_storage*)addr, addrlen); 471 return 0; 472 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 473 log_err("sent %d in place of %d bytes", 474 (int)sent, (int)sldns_buffer_remaining(packet)); 475 return 0; 476 } 477 return 1; 478 } 479 480 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 481 /** print debug ancillary info */ 482 static void p_ancil(const char* str, struct comm_reply* r) 483 { 484 if(r->srctype != 4 && r->srctype != 6) { 485 log_info("%s: unknown srctype %d", str, r->srctype); 486 return; 487 } 488 489 if(r->srctype == 6) { 490 #ifdef IPV6_PKTINFO 491 char buf[1024]; 492 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 493 buf, (socklen_t)sizeof(buf)) == 0) { 494 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 495 } 496 buf[sizeof(buf)-1]=0; 497 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 498 #endif 499 } else if(r->srctype == 4) { 500 #ifdef IP_PKTINFO 501 char buf1[1024], buf2[1024]; 502 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 503 buf1, (socklen_t)sizeof(buf1)) == 0) { 504 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 505 } 506 buf1[sizeof(buf1)-1]=0; 507 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 508 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 509 buf2, (socklen_t)sizeof(buf2)) == 0) { 510 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 511 } 512 buf2[sizeof(buf2)-1]=0; 513 #else 514 buf2[0]=0; 515 #endif 516 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 517 buf1, buf2); 518 #elif defined(IP_RECVDSTADDR) 519 char buf1[1024]; 520 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 521 buf1, (socklen_t)sizeof(buf1)) == 0) { 522 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 523 } 524 buf1[sizeof(buf1)-1]=0; 525 log_info("%s: %s", str, buf1); 526 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 527 } 528 } 529 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 530 531 /** send a UDP reply over specified interface*/ 532 static int 533 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 534 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 535 { 536 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 537 ssize_t sent; 538 struct msghdr msg; 539 struct iovec iov[1]; 540 union { 541 struct cmsghdr hdr; 542 char buf[256]; 543 } control; 544 #ifndef S_SPLINT_S 545 struct cmsghdr *cmsg; 546 #endif /* S_SPLINT_S */ 547 548 log_assert(c->fd != -1); 549 #ifdef UNBOUND_DEBUG 550 if(sldns_buffer_remaining(packet) == 0) 551 log_err("error: send empty UDP packet"); 552 #endif 553 log_assert(addr && addrlen > 0); 554 555 msg.msg_name = addr; 556 msg.msg_namelen = addrlen; 557 iov[0].iov_base = sldns_buffer_begin(packet); 558 iov[0].iov_len = sldns_buffer_remaining(packet); 559 msg.msg_iov = iov; 560 msg.msg_iovlen = 1; 561 msg.msg_control = control.buf; 562 #ifndef S_SPLINT_S 563 msg.msg_controllen = sizeof(control.buf); 564 #endif /* S_SPLINT_S */ 565 msg.msg_flags = 0; 566 567 #ifndef S_SPLINT_S 568 cmsg = CMSG_FIRSTHDR(&msg); 569 if(r->srctype == 4) { 570 #ifdef IP_PKTINFO 571 void* cmsg_data; 572 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 573 log_assert(msg.msg_controllen <= sizeof(control.buf)); 574 cmsg->cmsg_level = IPPROTO_IP; 575 cmsg->cmsg_type = IP_PKTINFO; 576 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 577 sizeof(struct in_pktinfo)); 578 /* unset the ifindex to not bypass the routing tables */ 579 cmsg_data = CMSG_DATA(cmsg); 580 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 581 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 582 #elif defined(IP_SENDSRCADDR) 583 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 584 log_assert(msg.msg_controllen <= sizeof(control.buf)); 585 cmsg->cmsg_level = IPPROTO_IP; 586 cmsg->cmsg_type = IP_SENDSRCADDR; 587 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 588 sizeof(struct in_addr)); 589 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 590 #else 591 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 592 msg.msg_control = NULL; 593 #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 594 } else if(r->srctype == 6) { 595 void* cmsg_data; 596 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 597 log_assert(msg.msg_controllen <= sizeof(control.buf)); 598 cmsg->cmsg_level = IPPROTO_IPV6; 599 cmsg->cmsg_type = IPV6_PKTINFO; 600 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 601 sizeof(struct in6_pktinfo)); 602 /* unset the ifindex to not bypass the routing tables */ 603 cmsg_data = CMSG_DATA(cmsg); 604 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 605 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 606 } else { 607 /* try to pass all 0 to use default route */ 608 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 609 log_assert(msg.msg_controllen <= sizeof(control.buf)); 610 cmsg->cmsg_level = IPPROTO_IPV6; 611 cmsg->cmsg_type = IPV6_PKTINFO; 612 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 613 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 614 } 615 #endif /* S_SPLINT_S */ 616 if(verbosity >= VERB_ALGO) 617 p_ancil("send_udp over interface", r); 618 sent = sendmsg(c->fd, &msg, 0); 619 if(sent == -1) { 620 /* try again and block, waiting for IO to complete, 621 * we want to send the answer, and we will wait for 622 * the ethernet interface buffer to have space. */ 623 #ifndef USE_WINSOCK 624 if(errno == EAGAIN || errno == EINTR || 625 # ifdef EWOULDBLOCK 626 errno == EWOULDBLOCK || 627 # endif 628 errno == ENOBUFS) { 629 #else 630 if(WSAGetLastError() == WSAEINPROGRESS || 631 WSAGetLastError() == WSAEINTR || 632 WSAGetLastError() == WSAENOBUFS || 633 WSAGetLastError() == WSAEWOULDBLOCK) { 634 #endif 635 while(sent == -1 && ( 636 #ifndef USE_WINSOCK 637 errno == EAGAIN || errno == EINTR || 638 # ifdef EWOULDBLOCK 639 errno == EWOULDBLOCK || 640 # endif 641 errno == ENOBUFS 642 #else 643 WSAGetLastError() == WSAEINPROGRESS || 644 WSAGetLastError() == WSAEINTR || 645 WSAGetLastError() == WSAENOBUFS || 646 WSAGetLastError() == WSAEWOULDBLOCK 647 #endif 648 )) { 649 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 650 struct pollfd p; 651 int pret; 652 memset(&p, 0, sizeof(p)); 653 p.fd = c->fd; 654 p.events = POLLOUT | POLLERR | POLLHUP; 655 # ifndef USE_WINSOCK 656 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 657 # else 658 pret = WSAPoll(&p, 1, 659 SEND_BLOCKED_WAIT_TIMEOUT); 660 # endif 661 if(pret == 0) { 662 /* timer expired */ 663 struct comm_base* b = c->ev->base; 664 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 665 b->eb->secs) { 666 b->eb->last_writewait_log = b->eb->secs; 667 verbose(VERB_OPS, "send udp blocked " 668 "for long, dropping packet."); 669 } 670 return 0; 671 } else if(pret < 0 && 672 #ifndef USE_WINSOCK 673 errno != EAGAIN && errno != EINTR && 674 # ifdef EWOULDBLOCK 675 errno != EWOULDBLOCK && 676 # endif 677 errno != ENOBUFS 678 #else 679 WSAGetLastError() != WSAEINPROGRESS && 680 WSAGetLastError() != WSAEINTR && 681 WSAGetLastError() != WSAENOBUFS && 682 WSAGetLastError() != WSAEWOULDBLOCK 683 #endif 684 ) { 685 log_err("poll udp out failed: %s", 686 sock_strerror(errno)); 687 return 0; 688 } 689 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 690 sent = sendmsg(c->fd, &msg, 0); 691 } 692 } 693 } 694 if(sent == -1) { 695 if(!udp_send_errno_needs_log(addr, addrlen)) 696 return 0; 697 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 698 log_addr(VERB_OPS, "remote address is", 699 (struct sockaddr_storage*)addr, addrlen); 700 #ifdef __NetBSD__ 701 /* netbsd 7 has IP_PKTINFO for recv but not send */ 702 if(errno == EINVAL && r->srctype == 4) 703 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 704 "Please disable interface-automatic"); 705 #endif 706 return 0; 707 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 708 log_err("sent %d in place of %d bytes", 709 (int)sent, (int)sldns_buffer_remaining(packet)); 710 return 0; 711 } 712 return 1; 713 #else 714 (void)c; 715 (void)packet; 716 (void)addr; 717 (void)addrlen; 718 (void)r; 719 log_err("sendmsg: IPV6_PKTINFO not supported"); 720 return 0; 721 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 722 } 723 724 /** return true is UDP receive error needs to be logged */ 725 static int udp_recv_needs_log(int err) 726 { 727 switch(err) { 728 case EACCES: /* some hosts send ICMP 'Permission Denied' */ 729 #ifndef USE_WINSOCK 730 case ECONNREFUSED: 731 # ifdef ENETUNREACH 732 case ENETUNREACH: 733 # endif 734 # ifdef EHOSTDOWN 735 case EHOSTDOWN: 736 # endif 737 # ifdef EHOSTUNREACH 738 case EHOSTUNREACH: 739 # endif 740 # ifdef ENETDOWN 741 case ENETDOWN: 742 # endif 743 #else /* USE_WINSOCK */ 744 case WSAECONNREFUSED: 745 case WSAENETUNREACH: 746 case WSAEHOSTDOWN: 747 case WSAEHOSTUNREACH: 748 case WSAENETDOWN: 749 #endif 750 if(verbosity >= VERB_ALGO) 751 return 1; 752 return 0; 753 default: 754 break; 755 } 756 return 1; 757 } 758 759 /** Parses the PROXYv2 header from buf and updates the comm_reply struct. 760 * Returns 1 on success, 0 on failure. */ 761 static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep, 762 int stream) { 763 size_t size; 764 struct pp2_header *header = pp2_read_header(buf); 765 if(header == NULL) return 0; 766 size = PP2_HEADER_SIZE + ntohs(header->len); 767 if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) { 768 /* A connection from the proxy itself. 769 * No need to do anything with addresses. */ 770 goto done; 771 } 772 if(header->fam_prot == 0x00) { 773 /* Unspecified family and protocol. This could be used for 774 * health checks by proxies. 775 * No need to do anything with addresses. */ 776 goto done; 777 } 778 /* Read the proxied address */ 779 switch(header->fam_prot) { 780 case 0x11: /* AF_INET|STREAM */ 781 case 0x12: /* AF_INET|DGRAM */ 782 { 783 struct sockaddr_in* addr = 784 (struct sockaddr_in*)&rep->client_addr; 785 addr->sin_family = AF_INET; 786 addr->sin_addr.s_addr = header->addr.addr4.src_addr; 787 addr->sin_port = header->addr.addr4.src_port; 788 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in); 789 } 790 /* Ignore the destination address; it should be us. */ 791 break; 792 case 0x21: /* AF_INET6|STREAM */ 793 case 0x22: /* AF_INET6|DGRAM */ 794 { 795 struct sockaddr_in6* addr = 796 (struct sockaddr_in6*)&rep->client_addr; 797 memset(addr, 0, sizeof(*addr)); 798 addr->sin6_family = AF_INET6; 799 memcpy(&addr->sin6_addr, 800 header->addr.addr6.src_addr, 16); 801 addr->sin6_port = header->addr.addr6.src_port; 802 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6); 803 } 804 /* Ignore the destination address; it should be us. */ 805 break; 806 } 807 rep->is_proxied = 1; 808 done: 809 if(!stream) { 810 /* We are reading a whole packet; 811 * Move the rest of the data to overwrite the PROXYv2 header */ 812 /* XXX can we do better to avoid memmove? */ 813 memmove(header, ((void*)header)+size, 814 sldns_buffer_limit(buf)-size); 815 sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size); 816 } 817 return 1; 818 } 819 820 void 821 comm_point_udp_ancil_callback(int fd, short event, void* arg) 822 { 823 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 824 struct comm_reply rep; 825 struct msghdr msg; 826 struct iovec iov[1]; 827 ssize_t rcv; 828 union { 829 struct cmsghdr hdr; 830 char buf[256]; 831 } ancil; 832 int i; 833 #ifndef S_SPLINT_S 834 struct cmsghdr* cmsg; 835 #endif /* S_SPLINT_S */ 836 837 rep.c = (struct comm_point*)arg; 838 log_assert(rep.c->type == comm_udp); 839 840 if(!(event&UB_EV_READ)) 841 return; 842 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 843 ub_comm_base_now(rep.c->ev->base); 844 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 845 sldns_buffer_clear(rep.c->buffer); 846 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 847 log_assert(fd != -1); 848 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 849 msg.msg_name = &rep.remote_addr; 850 msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr); 851 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 852 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 853 msg.msg_iov = iov; 854 msg.msg_iovlen = 1; 855 msg.msg_control = ancil.buf; 856 #ifndef S_SPLINT_S 857 msg.msg_controllen = sizeof(ancil.buf); 858 #endif /* S_SPLINT_S */ 859 msg.msg_flags = 0; 860 rcv = recvmsg(fd, &msg, MSG_DONTWAIT); 861 if(rcv == -1) { 862 if(errno != EAGAIN && errno != EINTR 863 && udp_recv_needs_log(errno)) { 864 log_err("recvmsg failed: %s", strerror(errno)); 865 } 866 return; 867 } 868 rep.remote_addrlen = msg.msg_namelen; 869 sldns_buffer_skip(rep.c->buffer, rcv); 870 sldns_buffer_flip(rep.c->buffer); 871 rep.srctype = 0; 872 rep.is_proxied = 0; 873 #ifndef S_SPLINT_S 874 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 875 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 876 if( cmsg->cmsg_level == IPPROTO_IPV6 && 877 cmsg->cmsg_type == IPV6_PKTINFO) { 878 rep.srctype = 6; 879 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 880 sizeof(struct in6_pktinfo)); 881 break; 882 #ifdef IP_PKTINFO 883 } else if( cmsg->cmsg_level == IPPROTO_IP && 884 cmsg->cmsg_type == IP_PKTINFO) { 885 rep.srctype = 4; 886 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 887 sizeof(struct in_pktinfo)); 888 break; 889 #elif defined(IP_RECVDSTADDR) 890 } else if( cmsg->cmsg_level == IPPROTO_IP && 891 cmsg->cmsg_type == IP_RECVDSTADDR) { 892 rep.srctype = 4; 893 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 894 sizeof(struct in_addr)); 895 break; 896 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 897 } 898 } 899 if(verbosity >= VERB_ALGO) 900 p_ancil("receive_udp on interface", &rep); 901 #endif /* S_SPLINT_S */ 902 903 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 904 &rep, 0)) { 905 log_err("proxy_protocol: could not consume PROXYv2 header"); 906 return; 907 } 908 if(!rep.is_proxied) { 909 rep.client_addrlen = rep.remote_addrlen; 910 memmove(&rep.client_addr, &rep.remote_addr, 911 rep.remote_addrlen); 912 } 913 914 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 915 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 916 /* send back immediate reply */ 917 (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer, 918 (struct sockaddr*)&rep.remote_addr, 919 rep.remote_addrlen, &rep); 920 } 921 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 922 break; 923 } 924 #else 925 (void)fd; 926 (void)event; 927 (void)arg; 928 fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. " 929 "Please disable interface-automatic"); 930 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 931 } 932 933 void 934 comm_point_udp_callback(int fd, short event, void* arg) 935 { 936 struct comm_reply rep; 937 ssize_t rcv; 938 int i; 939 struct sldns_buffer *buffer; 940 941 rep.c = (struct comm_point*)arg; 942 log_assert(rep.c->type == comm_udp); 943 944 if(!(event&UB_EV_READ)) 945 return; 946 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 947 ub_comm_base_now(rep.c->ev->base); 948 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 949 sldns_buffer_clear(rep.c->buffer); 950 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 951 log_assert(fd != -1); 952 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 953 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 954 sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT, 955 (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen); 956 if(rcv == -1) { 957 #ifndef USE_WINSOCK 958 if(errno != EAGAIN && errno != EINTR 959 && udp_recv_needs_log(errno)) 960 log_err("recvfrom %d failed: %s", 961 fd, strerror(errno)); 962 #else 963 if(WSAGetLastError() != WSAEINPROGRESS && 964 WSAGetLastError() != WSAECONNRESET && 965 WSAGetLastError()!= WSAEWOULDBLOCK && 966 udp_recv_needs_log(WSAGetLastError())) 967 log_err("recvfrom failed: %s", 968 wsa_strerror(WSAGetLastError())); 969 #endif 970 return; 971 } 972 sldns_buffer_skip(rep.c->buffer, rcv); 973 sldns_buffer_flip(rep.c->buffer); 974 rep.srctype = 0; 975 rep.is_proxied = 0; 976 977 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 978 &rep, 0)) { 979 log_err("proxy_protocol: could not consume PROXYv2 header"); 980 return; 981 } 982 if(!rep.is_proxied) { 983 rep.client_addrlen = rep.remote_addrlen; 984 memmove(&rep.client_addr, &rep.remote_addr, 985 rep.remote_addrlen); 986 } 987 988 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 989 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 990 /* send back immediate reply */ 991 #ifdef USE_DNSCRYPT 992 buffer = rep.c->dnscrypt_buffer; 993 #else 994 buffer = rep.c->buffer; 995 #endif 996 (void)comm_point_send_udp_msg(rep.c, buffer, 997 (struct sockaddr*)&rep.remote_addr, 998 rep.remote_addrlen, 0); 999 } 1000 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 1001 another UDP port. Note rep.c cannot be reused with TCP fd. */ 1002 break; 1003 } 1004 } 1005 1006 int adjusted_tcp_timeout(struct comm_point* c) 1007 { 1008 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 1009 return TCP_QUERY_TIMEOUT_MINIMUM; 1010 return c->tcp_timeout_msec; 1011 } 1012 1013 /** Use a new tcp handler for new query fd, set to read query */ 1014 static void 1015 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 1016 { 1017 int handler_usage; 1018 log_assert(c->type == comm_tcp || c->type == comm_http); 1019 log_assert(c->fd == -1); 1020 sldns_buffer_clear(c->buffer); 1021 #ifdef USE_DNSCRYPT 1022 if (c->dnscrypt) 1023 sldns_buffer_clear(c->dnscrypt_buffer); 1024 #endif 1025 c->tcp_is_reading = 1; 1026 c->tcp_byte_count = 0; 1027 c->tcp_keepalive = 0; 1028 /* if more than half the tcp handlers are in use, use a shorter 1029 * timeout for this TCP connection, we need to make space for 1030 * other connections to be able to get attention */ 1031 /* If > 50% TCP handler structures in use, set timeout to 1/100th 1032 * configured value. 1033 * If > 65%TCP handler structures in use, set to 1/500th configured 1034 * value. 1035 * If > 80% TCP handler structures in use, set to 0. 1036 * 1037 * If the timeout to use falls below 200 milliseconds, an actual 1038 * timeout of 200ms is used. 1039 */ 1040 handler_usage = (cur * 100) / max; 1041 if(handler_usage > 50 && handler_usage <= 65) 1042 c->tcp_timeout_msec /= 100; 1043 else if (handler_usage > 65 && handler_usage <= 80) 1044 c->tcp_timeout_msec /= 500; 1045 else if (handler_usage > 80) 1046 c->tcp_timeout_msec = 0; 1047 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 1048 } 1049 1050 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 1051 short ATTR_UNUSED(event), void* arg) 1052 { 1053 struct comm_base* b = (struct comm_base*)arg; 1054 /* timeout for the slow accept, re-enable accepts again */ 1055 if(b->start_accept) { 1056 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 1057 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 1058 (*b->start_accept)(b->cb_arg); 1059 b->eb->slow_accept_enabled = 0; 1060 } 1061 } 1062 1063 int comm_point_perform_accept(struct comm_point* c, 1064 struct sockaddr_storage* addr, socklen_t* addrlen) 1065 { 1066 int new_fd; 1067 *addrlen = (socklen_t)sizeof(*addr); 1068 #ifndef HAVE_ACCEPT4 1069 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 1070 #else 1071 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 1072 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 1073 #endif 1074 if(new_fd == -1) { 1075 #ifndef USE_WINSOCK 1076 /* EINTR is signal interrupt. others are closed connection. */ 1077 if( errno == EINTR || errno == EAGAIN 1078 #ifdef EWOULDBLOCK 1079 || errno == EWOULDBLOCK 1080 #endif 1081 #ifdef ECONNABORTED 1082 || errno == ECONNABORTED 1083 #endif 1084 #ifdef EPROTO 1085 || errno == EPROTO 1086 #endif /* EPROTO */ 1087 ) 1088 return -1; 1089 #if defined(ENFILE) && defined(EMFILE) 1090 if(errno == ENFILE || errno == EMFILE) { 1091 /* out of file descriptors, likely outside of our 1092 * control. stop accept() calls for some time */ 1093 if(c->ev->base->stop_accept) { 1094 struct comm_base* b = c->ev->base; 1095 struct timeval tv; 1096 verbose(VERB_ALGO, "out of file descriptors: " 1097 "slow accept"); 1098 ub_comm_base_now(b); 1099 if(b->eb->last_slow_log+SLOW_LOG_TIME <= 1100 b->eb->secs) { 1101 b->eb->last_slow_log = b->eb->secs; 1102 verbose(VERB_OPS, "accept failed, " 1103 "slow down accept for %d " 1104 "msec: %s", 1105 NETEVENT_SLOW_ACCEPT_TIME, 1106 sock_strerror(errno)); 1107 } 1108 b->eb->slow_accept_enabled = 1; 1109 fptr_ok(fptr_whitelist_stop_accept( 1110 b->stop_accept)); 1111 (*b->stop_accept)(b->cb_arg); 1112 /* set timeout, no mallocs */ 1113 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 1114 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 1115 b->eb->slow_accept = ub_event_new(b->eb->base, 1116 -1, UB_EV_TIMEOUT, 1117 comm_base_handle_slow_accept, b); 1118 if(b->eb->slow_accept == NULL) { 1119 /* we do not want to log here, because 1120 * that would spam the logfiles. 1121 * error: "event_base_set failed." */ 1122 } 1123 else if(ub_event_add(b->eb->slow_accept, &tv) 1124 != 0) { 1125 /* we do not want to log here, 1126 * error: "event_add failed." */ 1127 } 1128 } else { 1129 log_err("accept, with no slow down, " 1130 "failed: %s", sock_strerror(errno)); 1131 } 1132 return -1; 1133 } 1134 #endif 1135 #else /* USE_WINSOCK */ 1136 if(WSAGetLastError() == WSAEINPROGRESS || 1137 WSAGetLastError() == WSAECONNRESET) 1138 return -1; 1139 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1140 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1141 return -1; 1142 } 1143 #endif 1144 log_err_addr("accept failed", sock_strerror(errno), addr, 1145 *addrlen); 1146 return -1; 1147 } 1148 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 1149 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 1150 if(!tcl_new_connection(c->tcl_addr)) { 1151 if(verbosity >= 3) 1152 log_err_addr("accept rejected", 1153 "connection limit exceeded", addr, *addrlen); 1154 close(new_fd); 1155 return -1; 1156 } 1157 } 1158 #ifndef HAVE_ACCEPT4 1159 fd_set_nonblock(new_fd); 1160 #endif 1161 return new_fd; 1162 } 1163 1164 #ifdef USE_WINSOCK 1165 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 1166 #ifdef HAVE_BIO_SET_CALLBACK_EX 1167 size_t ATTR_UNUSED(len), 1168 #endif 1169 int ATTR_UNUSED(argi), long argl, 1170 #ifndef HAVE_BIO_SET_CALLBACK_EX 1171 long retvalue 1172 #else 1173 int retvalue, size_t* ATTR_UNUSED(processed) 1174 #endif 1175 ) 1176 { 1177 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 1178 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 1179 (oper&BIO_CB_RETURN)?"return":"before", 1180 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 1181 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 1182 /* on windows, check if previous operation caused EWOULDBLOCK */ 1183 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 1184 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 1185 if(wsa_err == WSAEWOULDBLOCK) 1186 ub_winsock_tcp_wouldblock((struct ub_event*) 1187 BIO_get_callback_arg(b), UB_EV_READ); 1188 } 1189 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 1190 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 1191 if(wsa_err == WSAEWOULDBLOCK) 1192 ub_winsock_tcp_wouldblock((struct ub_event*) 1193 BIO_get_callback_arg(b), UB_EV_WRITE); 1194 } 1195 /* return original return value */ 1196 return retvalue; 1197 } 1198 1199 /** set win bio callbacks for nonblocking operations */ 1200 void 1201 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 1202 { 1203 SSL* ssl = (SSL*)thessl; 1204 /* set them both just in case, but usually they are the same BIO */ 1205 #ifdef HAVE_BIO_SET_CALLBACK_EX 1206 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 1207 #else 1208 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 1209 #endif 1210 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 1211 #ifdef HAVE_BIO_SET_CALLBACK_EX 1212 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 1213 #else 1214 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 1215 #endif 1216 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 1217 } 1218 #endif 1219 1220 #ifdef HAVE_NGHTTP2 1221 /** Create http2 session server. Per connection, after TCP accepted.*/ 1222 static int http2_session_server_create(struct http2_session* h2_session) 1223 { 1224 log_assert(h2_session->callbacks); 1225 h2_session->is_drop = 0; 1226 if(nghttp2_session_server_new(&h2_session->session, 1227 h2_session->callbacks, 1228 h2_session) == NGHTTP2_ERR_NOMEM) { 1229 log_err("failed to create nghttp2 session server"); 1230 return 0; 1231 } 1232 1233 return 1; 1234 } 1235 1236 /** Submit http2 setting to session. Once per session. */ 1237 static int http2_submit_settings(struct http2_session* h2_session) 1238 { 1239 int ret; 1240 nghttp2_settings_entry settings[1] = { 1241 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1242 h2_session->c->http2_max_streams}}; 1243 1244 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 1245 settings, 1); 1246 if(ret) { 1247 verbose(VERB_QUERY, "http2: submit_settings failed, " 1248 "error: %s", nghttp2_strerror(ret)); 1249 return 0; 1250 } 1251 return 1; 1252 } 1253 #endif /* HAVE_NGHTTP2 */ 1254 1255 1256 void 1257 comm_point_tcp_accept_callback(int fd, short event, void* arg) 1258 { 1259 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 1260 int new_fd; 1261 log_assert(c->type == comm_tcp_accept); 1262 if(!(event & UB_EV_READ)) { 1263 log_info("ignoring tcp accept event %d", (int)event); 1264 return; 1265 } 1266 ub_comm_base_now(c->ev->base); 1267 /* find free tcp handler. */ 1268 if(!c->tcp_free) { 1269 log_warn("accepted too many tcp, connections full"); 1270 return; 1271 } 1272 /* accept incoming connection. */ 1273 c_hdl = c->tcp_free; 1274 /* clear leftover flags from previous use, and then set the 1275 * correct event base for the event structure for libevent */ 1276 ub_event_free(c_hdl->ev->ev); 1277 c_hdl->ev->ev = NULL; 1278 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1279 c_hdl->type == comm_local || c_hdl->type == comm_raw) 1280 c_hdl->tcp_do_toggle_rw = 0; 1281 else c_hdl->tcp_do_toggle_rw = 1; 1282 1283 if(c_hdl->type == comm_http) { 1284 #ifdef HAVE_NGHTTP2 1285 if(!c_hdl->h2_session || 1286 !http2_session_server_create(c_hdl->h2_session)) { 1287 log_warn("failed to create nghttp2"); 1288 return; 1289 } 1290 if(!c_hdl->h2_session || 1291 !http2_submit_settings(c_hdl->h2_session)) { 1292 log_warn("failed to submit http2 settings"); 1293 return; 1294 } 1295 if(!c->ssl) { 1296 c_hdl->tcp_do_toggle_rw = 0; 1297 c_hdl->use_h2 = 1; 1298 } 1299 #endif 1300 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1301 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1302 comm_point_http_handle_callback, c_hdl); 1303 } else { 1304 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1305 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1306 comm_point_tcp_handle_callback, c_hdl); 1307 } 1308 if(!c_hdl->ev->ev) { 1309 log_warn("could not ub_event_new, dropped tcp"); 1310 return; 1311 } 1312 log_assert(fd != -1); 1313 (void)fd; 1314 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr, 1315 &c_hdl->repinfo.remote_addrlen); 1316 if(new_fd == -1) 1317 return; 1318 /* Copy remote_address to client_address. 1319 * Simplest way/time for streams to do that. */ 1320 c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen; 1321 memmove(&c_hdl->repinfo.client_addr, 1322 &c_hdl->repinfo.remote_addr, 1323 c_hdl->repinfo.remote_addrlen); 1324 if(c->ssl) { 1325 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1326 if(!c_hdl->ssl) { 1327 c_hdl->fd = new_fd; 1328 comm_point_close(c_hdl); 1329 return; 1330 } 1331 c_hdl->ssl_shake_state = comm_ssl_shake_read; 1332 #ifdef USE_WINSOCK 1333 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1334 #endif 1335 } 1336 1337 /* grab the tcp handler buffers */ 1338 c->cur_tcp_count++; 1339 c->tcp_free = c_hdl->tcp_free; 1340 c_hdl->tcp_free = NULL; 1341 if(!c->tcp_free) { 1342 /* stop accepting incoming queries for now. */ 1343 comm_point_stop_listening(c); 1344 } 1345 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1346 } 1347 1348 /** Make tcp handler free for next assignment */ 1349 static void 1350 reclaim_tcp_handler(struct comm_point* c) 1351 { 1352 log_assert(c->type == comm_tcp); 1353 if(c->ssl) { 1354 #ifdef HAVE_SSL 1355 SSL_shutdown(c->ssl); 1356 SSL_free(c->ssl); 1357 c->ssl = NULL; 1358 #endif 1359 } 1360 comm_point_close(c); 1361 if(c->tcp_parent) { 1362 if(c != c->tcp_parent->tcp_free) { 1363 c->tcp_parent->cur_tcp_count--; 1364 c->tcp_free = c->tcp_parent->tcp_free; 1365 c->tcp_parent->tcp_free = c; 1366 } 1367 if(!c->tcp_free) { 1368 /* re-enable listening on accept socket */ 1369 comm_point_start_listening(c->tcp_parent, -1, -1); 1370 } 1371 } 1372 c->tcp_more_read_again = NULL; 1373 c->tcp_more_write_again = NULL; 1374 c->tcp_byte_count = 0; 1375 c->pp2_header_state = pp2_header_none; 1376 sldns_buffer_clear(c->buffer); 1377 } 1378 1379 /** do the callback when writing is done */ 1380 static void 1381 tcp_callback_writer(struct comm_point* c) 1382 { 1383 log_assert(c->type == comm_tcp); 1384 if(!c->tcp_write_and_read) { 1385 sldns_buffer_clear(c->buffer); 1386 c->tcp_byte_count = 0; 1387 } 1388 if(c->tcp_do_toggle_rw) 1389 c->tcp_is_reading = 1; 1390 /* switch from listening(write) to listening(read) */ 1391 if(c->tcp_req_info) { 1392 tcp_req_info_handle_writedone(c->tcp_req_info); 1393 } else { 1394 comm_point_stop_listening(c); 1395 if(c->tcp_write_and_read) { 1396 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1397 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1398 &c->repinfo) ) { 1399 comm_point_start_listening(c, -1, 1400 adjusted_tcp_timeout(c)); 1401 } 1402 } else { 1403 comm_point_start_listening(c, -1, 1404 adjusted_tcp_timeout(c)); 1405 } 1406 } 1407 } 1408 1409 /** do the callback when reading is done */ 1410 static void 1411 tcp_callback_reader(struct comm_point* c) 1412 { 1413 log_assert(c->type == comm_tcp || c->type == comm_local); 1414 sldns_buffer_flip(c->buffer); 1415 if(c->tcp_do_toggle_rw) 1416 c->tcp_is_reading = 0; 1417 c->tcp_byte_count = 0; 1418 if(c->tcp_req_info) { 1419 tcp_req_info_handle_readdone(c->tcp_req_info); 1420 } else { 1421 if(c->type == comm_tcp) 1422 comm_point_stop_listening(c); 1423 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1424 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 1425 comm_point_start_listening(c, -1, 1426 adjusted_tcp_timeout(c)); 1427 } 1428 } 1429 } 1430 1431 #ifdef HAVE_SSL 1432 /** true if the ssl handshake error has to be squelched from the logs */ 1433 int 1434 squelch_err_ssl_handshake(unsigned long err) 1435 { 1436 if(verbosity >= VERB_QUERY) 1437 return 0; /* only squelch on low verbosity */ 1438 if(ERR_GET_LIB(err) == ERR_LIB_SSL && 1439 (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST || 1440 ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST || 1441 ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER || 1442 ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE 1443 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1444 || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER 1445 #endif 1446 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1447 || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL 1448 || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL 1449 # ifdef SSL_R_VERSION_TOO_LOW 1450 || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW 1451 # endif 1452 #endif 1453 )) 1454 return 1; 1455 return 0; 1456 } 1457 #endif /* HAVE_SSL */ 1458 1459 /** continue ssl handshake */ 1460 #ifdef HAVE_SSL 1461 static int 1462 ssl_handshake(struct comm_point* c) 1463 { 1464 int r; 1465 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1466 /* read condition satisfied back to writing */ 1467 comm_point_listen_for_rw(c, 0, 1); 1468 c->ssl_shake_state = comm_ssl_shake_none; 1469 return 1; 1470 } 1471 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1472 /* write condition satisfied, back to reading */ 1473 comm_point_listen_for_rw(c, 1, 0); 1474 c->ssl_shake_state = comm_ssl_shake_none; 1475 return 1; 1476 } 1477 1478 ERR_clear_error(); 1479 r = SSL_do_handshake(c->ssl); 1480 if(r != 1) { 1481 int want = SSL_get_error(c->ssl, r); 1482 if(want == SSL_ERROR_WANT_READ) { 1483 if(c->ssl_shake_state == comm_ssl_shake_read) 1484 return 1; 1485 c->ssl_shake_state = comm_ssl_shake_read; 1486 comm_point_listen_for_rw(c, 1, 0); 1487 return 1; 1488 } else if(want == SSL_ERROR_WANT_WRITE) { 1489 if(c->ssl_shake_state == comm_ssl_shake_write) 1490 return 1; 1491 c->ssl_shake_state = comm_ssl_shake_write; 1492 comm_point_listen_for_rw(c, 0, 1); 1493 return 1; 1494 } else if(r == 0) { 1495 return 0; /* closed */ 1496 } else if(want == SSL_ERROR_SYSCALL) { 1497 /* SYSCALL and errno==0 means closed uncleanly */ 1498 #ifdef EPIPE 1499 if(errno == EPIPE && verbosity < 2) 1500 return 0; /* silence 'broken pipe' */ 1501 #endif 1502 #ifdef ECONNRESET 1503 if(errno == ECONNRESET && verbosity < 2) 1504 return 0; /* silence reset by peer */ 1505 #endif 1506 if(!tcp_connect_errno_needs_log( 1507 (struct sockaddr*)&c->repinfo.remote_addr, 1508 c->repinfo.remote_addrlen)) 1509 return 0; /* silence connect failures that 1510 show up because after connect this is the 1511 first system call that accesses the socket */ 1512 if(errno != 0) 1513 log_err("SSL_handshake syscall: %s", 1514 strerror(errno)); 1515 return 0; 1516 } else { 1517 unsigned long err = ERR_get_error(); 1518 if(!squelch_err_ssl_handshake(err)) { 1519 log_crypto_err_code("ssl handshake failed", err); 1520 log_addr(VERB_OPS, "ssl handshake failed", 1521 &c->repinfo.remote_addr, 1522 c->repinfo.remote_addrlen); 1523 } 1524 return 0; 1525 } 1526 } 1527 /* this is where peer verification could take place */ 1528 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1529 /* verification */ 1530 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1531 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1532 X509* x = SSL_get1_peer_certificate(c->ssl); 1533 #else 1534 X509* x = SSL_get_peer_certificate(c->ssl); 1535 #endif 1536 if(!x) { 1537 log_addr(VERB_ALGO, "SSL connection failed: " 1538 "no certificate", 1539 &c->repinfo.remote_addr, 1540 c->repinfo.remote_addrlen); 1541 return 0; 1542 } 1543 log_cert(VERB_ALGO, "peer certificate", x); 1544 #ifdef HAVE_SSL_GET0_PEERNAME 1545 if(SSL_get0_peername(c->ssl)) { 1546 char buf[255]; 1547 snprintf(buf, sizeof(buf), "SSL connection " 1548 "to %s authenticated", 1549 SSL_get0_peername(c->ssl)); 1550 log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr, 1551 c->repinfo.remote_addrlen); 1552 } else { 1553 #endif 1554 log_addr(VERB_ALGO, "SSL connection " 1555 "authenticated", &c->repinfo.remote_addr, 1556 c->repinfo.remote_addrlen); 1557 #ifdef HAVE_SSL_GET0_PEERNAME 1558 } 1559 #endif 1560 X509_free(x); 1561 } else { 1562 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1563 X509* x = SSL_get1_peer_certificate(c->ssl); 1564 #else 1565 X509* x = SSL_get_peer_certificate(c->ssl); 1566 #endif 1567 if(x) { 1568 log_cert(VERB_ALGO, "peer certificate", x); 1569 X509_free(x); 1570 } 1571 log_addr(VERB_ALGO, "SSL connection failed: " 1572 "failed to authenticate", 1573 &c->repinfo.remote_addr, 1574 c->repinfo.remote_addrlen); 1575 return 0; 1576 } 1577 } else { 1578 /* unauthenticated, the verify peer flag was not set 1579 * in c->ssl when the ssl object was created from ssl_ctx */ 1580 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr, 1581 c->repinfo.remote_addrlen); 1582 } 1583 1584 #ifdef HAVE_SSL_GET0_ALPN_SELECTED 1585 /* check if http2 use is negotiated */ 1586 if(c->type == comm_http && c->h2_session) { 1587 const unsigned char *alpn; 1588 unsigned int alpnlen = 0; 1589 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 1590 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 1591 /* connection upgraded to HTTP2 */ 1592 c->tcp_do_toggle_rw = 0; 1593 c->use_h2 = 1; 1594 } 1595 } 1596 #endif 1597 1598 /* setup listen rw correctly */ 1599 if(c->tcp_is_reading) { 1600 if(c->ssl_shake_state != comm_ssl_shake_read) 1601 comm_point_listen_for_rw(c, 1, 0); 1602 } else { 1603 comm_point_listen_for_rw(c, 0, 1); 1604 } 1605 c->ssl_shake_state = comm_ssl_shake_none; 1606 return 1; 1607 } 1608 #endif /* HAVE_SSL */ 1609 1610 /** ssl read callback on TCP */ 1611 static int 1612 ssl_handle_read(struct comm_point* c) 1613 { 1614 #ifdef HAVE_SSL 1615 int r; 1616 if(c->ssl_shake_state != comm_ssl_shake_none) { 1617 if(!ssl_handshake(c)) 1618 return 0; 1619 if(c->ssl_shake_state != comm_ssl_shake_none) 1620 return 1; 1621 } 1622 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 1623 struct pp2_header* header = NULL; 1624 size_t want_read_size = 0; 1625 size_t current_read_size = 0; 1626 if(c->pp2_header_state == pp2_header_none) { 1627 want_read_size = PP2_HEADER_SIZE; 1628 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 1629 log_err_addr("proxy_protocol: not enough " 1630 "buffer size to read PROXYv2 header", "", 1631 &c->repinfo.remote_addr, 1632 c->repinfo.remote_addrlen); 1633 return 0; 1634 } 1635 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 1636 "part of PROXYv2 header (len %lu)", 1637 (unsigned long)want_read_size); 1638 current_read_size = want_read_size; 1639 if(c->tcp_byte_count < current_read_size) { 1640 ERR_clear_error(); 1641 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 1642 c->buffer, c->tcp_byte_count), 1643 current_read_size - 1644 c->tcp_byte_count)) <= 0) { 1645 int want = SSL_get_error(c->ssl, r); 1646 if(want == SSL_ERROR_ZERO_RETURN) { 1647 if(c->tcp_req_info) 1648 return tcp_req_info_handle_read_close(c->tcp_req_info); 1649 return 0; /* shutdown, closed */ 1650 } else if(want == SSL_ERROR_WANT_READ) { 1651 #ifdef USE_WINSOCK 1652 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1653 #endif 1654 return 1; /* read more later */ 1655 } else if(want == SSL_ERROR_WANT_WRITE) { 1656 c->ssl_shake_state = comm_ssl_shake_hs_write; 1657 comm_point_listen_for_rw(c, 0, 1); 1658 return 1; 1659 } else if(want == SSL_ERROR_SYSCALL) { 1660 #ifdef ECONNRESET 1661 if(errno == ECONNRESET && verbosity < 2) 1662 return 0; /* silence reset by peer */ 1663 #endif 1664 if(errno != 0) 1665 log_err("SSL_read syscall: %s", 1666 strerror(errno)); 1667 return 0; 1668 } 1669 log_crypto_err("could not SSL_read"); 1670 return 0; 1671 } 1672 c->tcp_byte_count += r; 1673 if(c->tcp_byte_count != current_read_size) return 1; 1674 c->pp2_header_state = pp2_header_init; 1675 } 1676 } 1677 if(c->pp2_header_state == pp2_header_init) { 1678 header = pp2_read_header(c->buffer); 1679 if(!header) { 1680 log_err("proxy_protocol: could not parse " 1681 "PROXYv2 header"); 1682 return 0; 1683 } 1684 want_read_size = ntohs(header->len); 1685 if(sldns_buffer_remaining(c->buffer) < 1686 PP2_HEADER_SIZE + want_read_size) { 1687 log_err_addr("proxy_protocol: not enough " 1688 "buffer size to read PROXYv2 header", "", 1689 &c->repinfo.remote_addr, 1690 c->repinfo.remote_addrlen); 1691 return 0; 1692 } 1693 verbose(VERB_ALGO, "proxy_protocol: reading variable " 1694 "part of PROXYv2 header (len %lu)", 1695 (unsigned long)want_read_size); 1696 current_read_size = PP2_HEADER_SIZE + want_read_size; 1697 if(want_read_size == 0) { 1698 /* nothing more to read; header is complete */ 1699 c->pp2_header_state = pp2_header_done; 1700 } else if(c->tcp_byte_count < current_read_size) { 1701 ERR_clear_error(); 1702 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 1703 c->buffer, c->tcp_byte_count), 1704 current_read_size - 1705 c->tcp_byte_count)) <= 0) { 1706 int want = SSL_get_error(c->ssl, r); 1707 if(want == SSL_ERROR_ZERO_RETURN) { 1708 if(c->tcp_req_info) 1709 return tcp_req_info_handle_read_close(c->tcp_req_info); 1710 return 0; /* shutdown, closed */ 1711 } else if(want == SSL_ERROR_WANT_READ) { 1712 #ifdef USE_WINSOCK 1713 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1714 #endif 1715 return 1; /* read more later */ 1716 } else if(want == SSL_ERROR_WANT_WRITE) { 1717 c->ssl_shake_state = comm_ssl_shake_hs_write; 1718 comm_point_listen_for_rw(c, 0, 1); 1719 return 1; 1720 } else if(want == SSL_ERROR_SYSCALL) { 1721 #ifdef ECONNRESET 1722 if(errno == ECONNRESET && verbosity < 2) 1723 return 0; /* silence reset by peer */ 1724 #endif 1725 if(errno != 0) 1726 log_err("SSL_read syscall: %s", 1727 strerror(errno)); 1728 return 0; 1729 } 1730 log_crypto_err("could not SSL_read"); 1731 return 0; 1732 } 1733 c->tcp_byte_count += r; 1734 if(c->tcp_byte_count != current_read_size) return 1; 1735 c->pp2_header_state = pp2_header_done; 1736 } 1737 } 1738 if(c->pp2_header_state != pp2_header_done || !header) { 1739 log_err_addr("proxy_protocol: wrong state for the " 1740 "PROXYv2 header", "", &c->repinfo.remote_addr, 1741 c->repinfo.remote_addrlen); 1742 return 0; 1743 } 1744 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 1745 log_err_addr("proxy_protocol: could not consume " 1746 "PROXYv2 header", "", &c->repinfo.remote_addr, 1747 c->repinfo.remote_addrlen); 1748 return 0; 1749 } 1750 verbose(VERB_ALGO, "proxy_protocol: successful read of " 1751 "PROXYv2 header"); 1752 /* Clear and reset the buffer to read the following 1753 * DNS packet(s). */ 1754 sldns_buffer_clear(c->buffer); 1755 c->tcp_byte_count = 0; 1756 return 1; 1757 } 1758 if(c->tcp_byte_count < sizeof(uint16_t)) { 1759 /* read length bytes */ 1760 ERR_clear_error(); 1761 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1762 c->tcp_byte_count), (int)(sizeof(uint16_t) - 1763 c->tcp_byte_count))) <= 0) { 1764 int want = SSL_get_error(c->ssl, r); 1765 if(want == SSL_ERROR_ZERO_RETURN) { 1766 if(c->tcp_req_info) 1767 return tcp_req_info_handle_read_close(c->tcp_req_info); 1768 return 0; /* shutdown, closed */ 1769 } else if(want == SSL_ERROR_WANT_READ) { 1770 #ifdef USE_WINSOCK 1771 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1772 #endif 1773 return 1; /* read more later */ 1774 } else if(want == SSL_ERROR_WANT_WRITE) { 1775 c->ssl_shake_state = comm_ssl_shake_hs_write; 1776 comm_point_listen_for_rw(c, 0, 1); 1777 return 1; 1778 } else if(want == SSL_ERROR_SYSCALL) { 1779 #ifdef ECONNRESET 1780 if(errno == ECONNRESET && verbosity < 2) 1781 return 0; /* silence reset by peer */ 1782 #endif 1783 if(errno != 0) 1784 log_err("SSL_read syscall: %s", 1785 strerror(errno)); 1786 return 0; 1787 } 1788 log_crypto_err("could not SSL_read"); 1789 return 0; 1790 } 1791 c->tcp_byte_count += r; 1792 if(c->tcp_byte_count < sizeof(uint16_t)) 1793 return 1; 1794 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1795 sldns_buffer_capacity(c->buffer)) { 1796 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1797 return 0; 1798 } 1799 sldns_buffer_set_limit(c->buffer, 1800 sldns_buffer_read_u16_at(c->buffer, 0)); 1801 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1802 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1803 return 0; 1804 } 1805 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1806 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 1807 (int)sldns_buffer_limit(c->buffer)); 1808 } 1809 if(sldns_buffer_remaining(c->buffer) > 0) { 1810 ERR_clear_error(); 1811 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 1812 (int)sldns_buffer_remaining(c->buffer)); 1813 if(r <= 0) { 1814 int want = SSL_get_error(c->ssl, r); 1815 if(want == SSL_ERROR_ZERO_RETURN) { 1816 if(c->tcp_req_info) 1817 return tcp_req_info_handle_read_close(c->tcp_req_info); 1818 return 0; /* shutdown, closed */ 1819 } else if(want == SSL_ERROR_WANT_READ) { 1820 #ifdef USE_WINSOCK 1821 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1822 #endif 1823 return 1; /* read more later */ 1824 } else if(want == SSL_ERROR_WANT_WRITE) { 1825 c->ssl_shake_state = comm_ssl_shake_hs_write; 1826 comm_point_listen_for_rw(c, 0, 1); 1827 return 1; 1828 } else if(want == SSL_ERROR_SYSCALL) { 1829 #ifdef ECONNRESET 1830 if(errno == ECONNRESET && verbosity < 2) 1831 return 0; /* silence reset by peer */ 1832 #endif 1833 if(errno != 0) 1834 log_err("SSL_read syscall: %s", 1835 strerror(errno)); 1836 return 0; 1837 } 1838 log_crypto_err("could not SSL_read"); 1839 return 0; 1840 } 1841 sldns_buffer_skip(c->buffer, (ssize_t)r); 1842 } 1843 if(sldns_buffer_remaining(c->buffer) <= 0) { 1844 tcp_callback_reader(c); 1845 } 1846 return 1; 1847 #else 1848 (void)c; 1849 return 0; 1850 #endif /* HAVE_SSL */ 1851 } 1852 1853 /** ssl write callback on TCP */ 1854 static int 1855 ssl_handle_write(struct comm_point* c) 1856 { 1857 #ifdef HAVE_SSL 1858 int r; 1859 if(c->ssl_shake_state != comm_ssl_shake_none) { 1860 if(!ssl_handshake(c)) 1861 return 0; 1862 if(c->ssl_shake_state != comm_ssl_shake_none) 1863 return 1; 1864 } 1865 /* ignore return, if fails we may simply block */ 1866 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 1867 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1868 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 1869 ERR_clear_error(); 1870 if(c->tcp_write_and_read) { 1871 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 1872 /* combine the tcp length and the query for 1873 * write, this emulates writev */ 1874 uint8_t buf[LDNS_RR_BUF_SIZE]; 1875 memmove(buf, &len, sizeof(uint16_t)); 1876 memmove(buf+sizeof(uint16_t), 1877 c->tcp_write_pkt, 1878 c->tcp_write_pkt_len); 1879 r = SSL_write(c->ssl, 1880 (void*)(buf+c->tcp_write_byte_count), 1881 c->tcp_write_pkt_len + 2 - 1882 c->tcp_write_byte_count); 1883 } else { 1884 r = SSL_write(c->ssl, 1885 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1886 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 1887 } 1888 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 1889 LDNS_RR_BUF_SIZE) { 1890 /* combine the tcp length and the query for write, 1891 * this emulates writev */ 1892 uint8_t buf[LDNS_RR_BUF_SIZE]; 1893 memmove(buf, &len, sizeof(uint16_t)); 1894 memmove(buf+sizeof(uint16_t), 1895 sldns_buffer_current(c->buffer), 1896 sldns_buffer_remaining(c->buffer)); 1897 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 1898 (int)(sizeof(uint16_t)+ 1899 sldns_buffer_remaining(c->buffer) 1900 - c->tcp_byte_count)); 1901 } else { 1902 r = SSL_write(c->ssl, 1903 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1904 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 1905 } 1906 if(r <= 0) { 1907 int want = SSL_get_error(c->ssl, r); 1908 if(want == SSL_ERROR_ZERO_RETURN) { 1909 return 0; /* closed */ 1910 } else if(want == SSL_ERROR_WANT_READ) { 1911 c->ssl_shake_state = comm_ssl_shake_hs_read; 1912 comm_point_listen_for_rw(c, 1, 0); 1913 return 1; /* wait for read condition */ 1914 } else if(want == SSL_ERROR_WANT_WRITE) { 1915 #ifdef USE_WINSOCK 1916 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1917 #endif 1918 return 1; /* write more later */ 1919 } else if(want == SSL_ERROR_SYSCALL) { 1920 #ifdef EPIPE 1921 if(errno == EPIPE && verbosity < 2) 1922 return 0; /* silence 'broken pipe' */ 1923 #endif 1924 if(errno != 0) 1925 log_err("SSL_write syscall: %s", 1926 strerror(errno)); 1927 return 0; 1928 } 1929 log_crypto_err("could not SSL_write"); 1930 return 0; 1931 } 1932 if(c->tcp_write_and_read) { 1933 c->tcp_write_byte_count += r; 1934 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1935 return 1; 1936 } else { 1937 c->tcp_byte_count += r; 1938 if(c->tcp_byte_count < sizeof(uint16_t)) 1939 return 1; 1940 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 1941 sizeof(uint16_t)); 1942 } 1943 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1944 tcp_callback_writer(c); 1945 return 1; 1946 } 1947 } 1948 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 1949 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 1950 ERR_clear_error(); 1951 if(c->tcp_write_and_read) { 1952 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 1953 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 1954 } else { 1955 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 1956 (int)sldns_buffer_remaining(c->buffer)); 1957 } 1958 if(r <= 0) { 1959 int want = SSL_get_error(c->ssl, r); 1960 if(want == SSL_ERROR_ZERO_RETURN) { 1961 return 0; /* closed */ 1962 } else if(want == SSL_ERROR_WANT_READ) { 1963 c->ssl_shake_state = comm_ssl_shake_hs_read; 1964 comm_point_listen_for_rw(c, 1, 0); 1965 return 1; /* wait for read condition */ 1966 } else if(want == SSL_ERROR_WANT_WRITE) { 1967 #ifdef USE_WINSOCK 1968 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1969 #endif 1970 return 1; /* write more later */ 1971 } else if(want == SSL_ERROR_SYSCALL) { 1972 #ifdef EPIPE 1973 if(errno == EPIPE && verbosity < 2) 1974 return 0; /* silence 'broken pipe' */ 1975 #endif 1976 if(errno != 0) 1977 log_err("SSL_write syscall: %s", 1978 strerror(errno)); 1979 return 0; 1980 } 1981 log_crypto_err("could not SSL_write"); 1982 return 0; 1983 } 1984 if(c->tcp_write_and_read) { 1985 c->tcp_write_byte_count += r; 1986 } else { 1987 sldns_buffer_skip(c->buffer, (ssize_t)r); 1988 } 1989 1990 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1991 tcp_callback_writer(c); 1992 } 1993 return 1; 1994 #else 1995 (void)c; 1996 return 0; 1997 #endif /* HAVE_SSL */ 1998 } 1999 2000 /** handle ssl tcp connection with dns contents */ 2001 static int 2002 ssl_handle_it(struct comm_point* c, int is_write) 2003 { 2004 /* handle case where renegotiation wants read during write call 2005 * or write during read calls */ 2006 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 2007 return ssl_handle_read(c); 2008 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 2009 return ssl_handle_write(c); 2010 /* handle read events for read operation and write events for a 2011 * write operation */ 2012 else if(!is_write) 2013 return ssl_handle_read(c); 2014 return ssl_handle_write(c); 2015 } 2016 2017 /** 2018 * Handle tcp reading callback. 2019 * @param fd: file descriptor of socket. 2020 * @param c: comm point to read from into buffer. 2021 * @param short_ok: if true, very short packets are OK (for comm_local). 2022 * @return: 0 on error 2023 */ 2024 static int 2025 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 2026 { 2027 ssize_t r; 2028 int recv_initial = 0; 2029 log_assert(c->type == comm_tcp || c->type == comm_local); 2030 if(c->ssl) 2031 return ssl_handle_it(c, 0); 2032 if(!c->tcp_is_reading && !c->tcp_write_and_read) 2033 return 0; 2034 2035 log_assert(fd != -1); 2036 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 2037 struct pp2_header* header = NULL; 2038 size_t want_read_size = 0; 2039 size_t current_read_size = 0; 2040 if(c->pp2_header_state == pp2_header_none) { 2041 want_read_size = PP2_HEADER_SIZE; 2042 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 2043 log_err_addr("proxy_protocol: not enough " 2044 "buffer size to read PROXYv2 header", "", 2045 &c->repinfo.remote_addr, 2046 c->repinfo.remote_addrlen); 2047 return 0; 2048 } 2049 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 2050 "part of PROXYv2 header (len %lu)", 2051 (unsigned long)want_read_size); 2052 current_read_size = want_read_size; 2053 if(c->tcp_byte_count < current_read_size) { 2054 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 2055 c->tcp_byte_count), 2056 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 2057 if(r == 0) { 2058 if(c->tcp_req_info) 2059 return tcp_req_info_handle_read_close(c->tcp_req_info); 2060 return 0; 2061 } else if(r == -1) { 2062 goto recv_error_initial; 2063 } 2064 c->tcp_byte_count += r; 2065 if(c->tcp_byte_count != current_read_size) return 1; 2066 c->pp2_header_state = pp2_header_init; 2067 } 2068 } 2069 if(c->pp2_header_state == pp2_header_init) { 2070 header = pp2_read_header(c->buffer); 2071 if(!header) { 2072 log_err("proxy_protocol: could not parse " 2073 "PROXYv2 header"); 2074 return 0; 2075 } 2076 want_read_size = ntohs(header->len); 2077 if(sldns_buffer_remaining(c->buffer) < 2078 PP2_HEADER_SIZE + want_read_size) { 2079 log_err_addr("proxy_protocol: not enough " 2080 "buffer size to read PROXYv2 header", "", 2081 &c->repinfo.remote_addr, 2082 c->repinfo.remote_addrlen); 2083 return 0; 2084 } 2085 verbose(VERB_ALGO, "proxy_protocol: reading variable " 2086 "part of PROXYv2 header (len %lu)", 2087 (unsigned long)want_read_size); 2088 current_read_size = PP2_HEADER_SIZE + want_read_size; 2089 if(want_read_size == 0) { 2090 /* nothing more to read; header is complete */ 2091 c->pp2_header_state = pp2_header_done; 2092 } else if(c->tcp_byte_count < current_read_size) { 2093 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 2094 c->tcp_byte_count), 2095 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 2096 if(r == 0) { 2097 if(c->tcp_req_info) 2098 return tcp_req_info_handle_read_close(c->tcp_req_info); 2099 return 0; 2100 } else if(r == -1) { 2101 goto recv_error; 2102 } 2103 c->tcp_byte_count += r; 2104 if(c->tcp_byte_count != current_read_size) return 1; 2105 c->pp2_header_state = pp2_header_done; 2106 } 2107 } 2108 if(c->pp2_header_state != pp2_header_done || !header) { 2109 log_err_addr("proxy_protocol: wrong state for the " 2110 "PROXYv2 header", "", &c->repinfo.remote_addr, 2111 c->repinfo.remote_addrlen); 2112 return 0; 2113 } 2114 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 2115 log_err_addr("proxy_protocol: could not consume " 2116 "PROXYv2 header", "", &c->repinfo.remote_addr, 2117 c->repinfo.remote_addrlen); 2118 return 0; 2119 } 2120 verbose(VERB_ALGO, "proxy_protocol: successful read of " 2121 "PROXYv2 header"); 2122 /* Clear and reset the buffer to read the following 2123 * DNS packet(s). */ 2124 sldns_buffer_clear(c->buffer); 2125 c->tcp_byte_count = 0; 2126 return 1; 2127 } 2128 2129 if(c->tcp_byte_count < sizeof(uint16_t)) { 2130 /* read length bytes */ 2131 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 2132 sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT); 2133 if(r == 0) { 2134 if(c->tcp_req_info) 2135 return tcp_req_info_handle_read_close(c->tcp_req_info); 2136 return 0; 2137 } else if(r == -1) { 2138 if(c->pp2_enabled) goto recv_error; 2139 goto recv_error_initial; 2140 } 2141 c->tcp_byte_count += r; 2142 if(c->tcp_byte_count != sizeof(uint16_t)) 2143 return 1; 2144 if(sldns_buffer_read_u16_at(c->buffer, 0) > 2145 sldns_buffer_capacity(c->buffer)) { 2146 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 2147 return 0; 2148 } 2149 sldns_buffer_set_limit(c->buffer, 2150 sldns_buffer_read_u16_at(c->buffer, 0)); 2151 if(!short_ok && 2152 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 2153 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 2154 return 0; 2155 } 2156 verbose(VERB_ALGO, "Reading tcp query of length %d", 2157 (int)sldns_buffer_limit(c->buffer)); 2158 } 2159 2160 if(sldns_buffer_remaining(c->buffer) == 0) 2161 log_err("in comm_point_tcp_handle_read buffer_remaining is " 2162 "not > 0 as expected, continuing with (harmless) 0 " 2163 "length recv"); 2164 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2165 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 2166 if(r == 0) { 2167 if(c->tcp_req_info) 2168 return tcp_req_info_handle_read_close(c->tcp_req_info); 2169 return 0; 2170 } else if(r == -1) { 2171 goto recv_error; 2172 } 2173 sldns_buffer_skip(c->buffer, r); 2174 if(sldns_buffer_remaining(c->buffer) <= 0) { 2175 tcp_callback_reader(c); 2176 } 2177 return 1; 2178 2179 recv_error_initial: 2180 recv_initial = 1; 2181 recv_error: 2182 #ifndef USE_WINSOCK 2183 if(errno == EINTR || errno == EAGAIN) 2184 return 1; 2185 if(recv_initial) { 2186 #ifdef ECONNRESET 2187 if(errno == ECONNRESET && verbosity < 2) 2188 return 0; /* silence reset by peer */ 2189 #endif 2190 #ifdef ECONNREFUSED 2191 if(errno == ECONNREFUSED && verbosity < 2) 2192 return 0; /* silence reset by peer */ 2193 #endif 2194 #ifdef ENETUNREACH 2195 if(errno == ENETUNREACH && verbosity < 2) 2196 return 0; /* silence it */ 2197 #endif 2198 #ifdef EHOSTDOWN 2199 if(errno == EHOSTDOWN && verbosity < 2) 2200 return 0; /* silence it */ 2201 #endif 2202 #ifdef EHOSTUNREACH 2203 if(errno == EHOSTUNREACH && verbosity < 2) 2204 return 0; /* silence it */ 2205 #endif 2206 #ifdef ENETDOWN 2207 if(errno == ENETDOWN && verbosity < 2) 2208 return 0; /* silence it */ 2209 #endif 2210 #ifdef EACCES 2211 if(errno == EACCES && verbosity < 2) 2212 return 0; /* silence it */ 2213 #endif 2214 #ifdef ENOTCONN 2215 if(errno == ENOTCONN) { 2216 log_err_addr("read (in tcp s) failed and this " 2217 "could be because TCP Fast Open is " 2218 "enabled [--disable-tfo-client " 2219 "--disable-tfo-server] but does not " 2220 "work", sock_strerror(errno), 2221 &c->repinfo.remote_addr, 2222 c->repinfo.remote_addrlen); 2223 return 0; 2224 } 2225 #endif 2226 } 2227 #else /* USE_WINSOCK */ 2228 if(recv_initial) { 2229 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 2230 return 0; 2231 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 2232 return 0; 2233 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 2234 return 0; 2235 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 2236 return 0; 2237 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 2238 return 0; 2239 } 2240 if(WSAGetLastError() == WSAECONNRESET) 2241 return 0; 2242 if(WSAGetLastError() == WSAEINPROGRESS) 2243 return 1; 2244 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2245 ub_winsock_tcp_wouldblock(c->ev->ev, 2246 UB_EV_READ); 2247 return 1; 2248 } 2249 #endif 2250 log_err_addr("read (in tcp s)", sock_strerror(errno), 2251 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 2252 return 0; 2253 } 2254 2255 /** 2256 * Handle tcp writing callback. 2257 * @param fd: file descriptor of socket. 2258 * @param c: comm point to write buffer out of. 2259 * @return: 0 on error 2260 */ 2261 static int 2262 comm_point_tcp_handle_write(int fd, struct comm_point* c) 2263 { 2264 ssize_t r; 2265 struct sldns_buffer *buffer; 2266 log_assert(c->type == comm_tcp); 2267 #ifdef USE_DNSCRYPT 2268 buffer = c->dnscrypt_buffer; 2269 #else 2270 buffer = c->buffer; 2271 #endif 2272 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 2273 return 0; 2274 log_assert(fd != -1); 2275 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 2276 /* check for pending error from nonblocking connect */ 2277 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2278 int error = 0; 2279 socklen_t len = (socklen_t)sizeof(error); 2280 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2281 &len) < 0){ 2282 #ifndef USE_WINSOCK 2283 error = errno; /* on solaris errno is error */ 2284 #else /* USE_WINSOCK */ 2285 error = WSAGetLastError(); 2286 #endif 2287 } 2288 #ifndef USE_WINSOCK 2289 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2290 if(error == EINPROGRESS || error == EWOULDBLOCK) 2291 return 1; /* try again later */ 2292 else 2293 #endif 2294 if(error != 0 && verbosity < 2) 2295 return 0; /* silence lots of chatter in the logs */ 2296 else if(error != 0) { 2297 log_err_addr("tcp connect", strerror(error), 2298 &c->repinfo.remote_addr, 2299 c->repinfo.remote_addrlen); 2300 #else /* USE_WINSOCK */ 2301 /* examine error */ 2302 if(error == WSAEINPROGRESS) 2303 return 1; 2304 else if(error == WSAEWOULDBLOCK) { 2305 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2306 return 1; 2307 } else if(error != 0 && verbosity < 2) 2308 return 0; 2309 else if(error != 0) { 2310 log_err_addr("tcp connect", wsa_strerror(error), 2311 &c->repinfo.remote_addr, 2312 c->repinfo.remote_addrlen); 2313 #endif /* USE_WINSOCK */ 2314 return 0; 2315 } 2316 } 2317 if(c->ssl) 2318 return ssl_handle_it(c, 1); 2319 2320 #ifdef USE_MSG_FASTOPEN 2321 /* Only try this on first use of a connection that uses tfo, 2322 otherwise fall through to normal write */ 2323 /* Also, TFO support on WINDOWS not implemented at the moment */ 2324 if(c->tcp_do_fastopen == 1) { 2325 /* this form of sendmsg() does both a connect() and send() so need to 2326 look for various flavours of error*/ 2327 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 2328 struct msghdr msg; 2329 struct iovec iov[2]; 2330 c->tcp_do_fastopen = 0; 2331 memset(&msg, 0, sizeof(msg)); 2332 if(c->tcp_write_and_read) { 2333 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2334 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2335 iov[1].iov_base = c->tcp_write_pkt; 2336 iov[1].iov_len = c->tcp_write_pkt_len; 2337 } else { 2338 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 2339 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 2340 iov[1].iov_base = sldns_buffer_begin(buffer); 2341 iov[1].iov_len = sldns_buffer_limit(buffer); 2342 } 2343 log_assert(iov[0].iov_len > 0); 2344 msg.msg_name = &c->repinfo.remote_addr; 2345 msg.msg_namelen = c->repinfo.remote_addrlen; 2346 msg.msg_iov = iov; 2347 msg.msg_iovlen = 2; 2348 r = sendmsg(fd, &msg, MSG_FASTOPEN); 2349 if (r == -1) { 2350 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2351 /* Handshake is underway, maybe because no TFO cookie available. 2352 Come back to write the message*/ 2353 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 2354 return 1; 2355 #endif 2356 if(errno == EINTR || errno == EAGAIN) 2357 return 1; 2358 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 2359 if(errno != EPIPE 2360 #ifdef EOPNOTSUPP 2361 /* if /proc/sys/net/ipv4/tcp_fastopen is 2362 * disabled on Linux, sendmsg may return 2363 * 'Operation not supported', if so 2364 * fallthrough to ordinary connect. */ 2365 && errno != EOPNOTSUPP 2366 #endif 2367 && errno != 0) { 2368 if(verbosity < 2) 2369 return 0; /* silence lots of chatter in the logs */ 2370 log_err_addr("tcp sendmsg", strerror(errno), 2371 &c->repinfo.remote_addr, 2372 c->repinfo.remote_addrlen); 2373 return 0; 2374 } 2375 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 2376 /* fallthrough to nonFASTOPEN 2377 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 2378 * we need to perform connect() */ 2379 if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr, 2380 c->repinfo.remote_addrlen) == -1) { 2381 #ifdef EINPROGRESS 2382 if(errno == EINPROGRESS) 2383 return 1; /* wait until connect done*/ 2384 #endif 2385 #ifdef USE_WINSOCK 2386 if(WSAGetLastError() == WSAEINPROGRESS || 2387 WSAGetLastError() == WSAEWOULDBLOCK) 2388 return 1; /* wait until connect done*/ 2389 #endif 2390 if(tcp_connect_errno_needs_log( 2391 (struct sockaddr *)&c->repinfo.remote_addr, 2392 c->repinfo.remote_addrlen)) { 2393 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 2394 strerror(errno), 2395 &c->repinfo.remote_addr, 2396 c->repinfo.remote_addrlen); 2397 } 2398 return 0; 2399 } 2400 2401 } else { 2402 if(c->tcp_write_and_read) { 2403 c->tcp_write_byte_count += r; 2404 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2405 return 1; 2406 } else { 2407 c->tcp_byte_count += r; 2408 if(c->tcp_byte_count < sizeof(uint16_t)) 2409 return 1; 2410 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2411 sizeof(uint16_t)); 2412 } 2413 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2414 tcp_callback_writer(c); 2415 return 1; 2416 } 2417 } 2418 } 2419 #endif /* USE_MSG_FASTOPEN */ 2420 2421 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 2422 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 2423 #ifdef HAVE_WRITEV 2424 struct iovec iov[2]; 2425 if(c->tcp_write_and_read) { 2426 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2427 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2428 iov[1].iov_base = c->tcp_write_pkt; 2429 iov[1].iov_len = c->tcp_write_pkt_len; 2430 } else { 2431 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 2432 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 2433 iov[1].iov_base = sldns_buffer_begin(buffer); 2434 iov[1].iov_len = sldns_buffer_limit(buffer); 2435 } 2436 log_assert(iov[0].iov_len > 0); 2437 r = writev(fd, iov, 2); 2438 #else /* HAVE_WRITEV */ 2439 if(c->tcp_write_and_read) { 2440 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 2441 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 2442 } else { 2443 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 2444 sizeof(uint16_t)-c->tcp_byte_count, 0); 2445 } 2446 #endif /* HAVE_WRITEV */ 2447 if(r == -1) { 2448 #ifndef USE_WINSOCK 2449 # ifdef EPIPE 2450 if(errno == EPIPE && verbosity < 2) 2451 return 0; /* silence 'broken pipe' */ 2452 #endif 2453 if(errno == EINTR || errno == EAGAIN) 2454 return 1; 2455 #ifdef ECONNRESET 2456 if(errno == ECONNRESET && verbosity < 2) 2457 return 0; /* silence reset by peer */ 2458 #endif 2459 # ifdef HAVE_WRITEV 2460 log_err_addr("tcp writev", strerror(errno), 2461 &c->repinfo.remote_addr, 2462 c->repinfo.remote_addrlen); 2463 # else /* HAVE_WRITEV */ 2464 log_err_addr("tcp send s", strerror(errno), 2465 &c->repinfo.remote_addr, 2466 c->repinfo.remote_addrlen); 2467 # endif /* HAVE_WRITEV */ 2468 #else 2469 if(WSAGetLastError() == WSAENOTCONN) 2470 return 1; 2471 if(WSAGetLastError() == WSAEINPROGRESS) 2472 return 1; 2473 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2474 ub_winsock_tcp_wouldblock(c->ev->ev, 2475 UB_EV_WRITE); 2476 return 1; 2477 } 2478 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2479 return 0; /* silence reset by peer */ 2480 log_err_addr("tcp send s", 2481 wsa_strerror(WSAGetLastError()), 2482 &c->repinfo.remote_addr, 2483 c->repinfo.remote_addrlen); 2484 #endif 2485 return 0; 2486 } 2487 if(c->tcp_write_and_read) { 2488 c->tcp_write_byte_count += r; 2489 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2490 return 1; 2491 } else { 2492 c->tcp_byte_count += r; 2493 if(c->tcp_byte_count < sizeof(uint16_t)) 2494 return 1; 2495 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2496 sizeof(uint16_t)); 2497 } 2498 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2499 tcp_callback_writer(c); 2500 return 1; 2501 } 2502 } 2503 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 2504 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2505 if(c->tcp_write_and_read) { 2506 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2507 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 2508 } else { 2509 r = send(fd, (void*)sldns_buffer_current(buffer), 2510 sldns_buffer_remaining(buffer), 0); 2511 } 2512 if(r == -1) { 2513 #ifndef USE_WINSOCK 2514 if(errno == EINTR || errno == EAGAIN) 2515 return 1; 2516 #ifdef ECONNRESET 2517 if(errno == ECONNRESET && verbosity < 2) 2518 return 0; /* silence reset by peer */ 2519 #endif 2520 #else 2521 if(WSAGetLastError() == WSAEINPROGRESS) 2522 return 1; 2523 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2524 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2525 return 1; 2526 } 2527 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2528 return 0; /* silence reset by peer */ 2529 #endif 2530 log_err_addr("tcp send r", sock_strerror(errno), 2531 &c->repinfo.remote_addr, 2532 c->repinfo.remote_addrlen); 2533 return 0; 2534 } 2535 if(c->tcp_write_and_read) { 2536 c->tcp_write_byte_count += r; 2537 } else { 2538 sldns_buffer_skip(buffer, r); 2539 } 2540 2541 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2542 tcp_callback_writer(c); 2543 } 2544 2545 return 1; 2546 } 2547 2548 /** read again to drain buffers when there could be more to read */ 2549 static void 2550 tcp_req_info_read_again(int fd, struct comm_point* c) 2551 { 2552 while(c->tcp_req_info->read_again) { 2553 int r; 2554 c->tcp_req_info->read_again = 0; 2555 if(c->tcp_is_reading) 2556 r = comm_point_tcp_handle_read(fd, c, 0); 2557 else r = comm_point_tcp_handle_write(fd, c); 2558 if(!r) { 2559 reclaim_tcp_handler(c); 2560 if(!c->tcp_do_close) { 2561 fptr_ok(fptr_whitelist_comm_point( 2562 c->callback)); 2563 (void)(*c->callback)(c, c->cb_arg, 2564 NETEVENT_CLOSED, NULL); 2565 } 2566 return; 2567 } 2568 } 2569 } 2570 2571 /** read again to drain buffers when there could be more to read */ 2572 static void 2573 tcp_more_read_again(int fd, struct comm_point* c) 2574 { 2575 /* if the packet is done, but another one could be waiting on 2576 * the connection, the callback signals this, and we try again */ 2577 /* this continues until the read routines get EAGAIN or so, 2578 * and thus does not call the callback, and the bool is 0 */ 2579 int* moreread = c->tcp_more_read_again; 2580 while(moreread && *moreread) { 2581 *moreread = 0; 2582 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2583 reclaim_tcp_handler(c); 2584 if(!c->tcp_do_close) { 2585 fptr_ok(fptr_whitelist_comm_point( 2586 c->callback)); 2587 (void)(*c->callback)(c, c->cb_arg, 2588 NETEVENT_CLOSED, NULL); 2589 } 2590 return; 2591 } 2592 } 2593 } 2594 2595 /** write again to fill up when there could be more to write */ 2596 static void 2597 tcp_more_write_again(int fd, struct comm_point* c) 2598 { 2599 /* if the packet is done, but another is waiting to be written, 2600 * the callback signals it and we try again. */ 2601 /* this continues until the write routines get EAGAIN or so, 2602 * and thus does not call the callback, and the bool is 0 */ 2603 int* morewrite = c->tcp_more_write_again; 2604 while(morewrite && *morewrite) { 2605 *morewrite = 0; 2606 if(!comm_point_tcp_handle_write(fd, c)) { 2607 reclaim_tcp_handler(c); 2608 if(!c->tcp_do_close) { 2609 fptr_ok(fptr_whitelist_comm_point( 2610 c->callback)); 2611 (void)(*c->callback)(c, c->cb_arg, 2612 NETEVENT_CLOSED, NULL); 2613 } 2614 return; 2615 } 2616 } 2617 } 2618 2619 void 2620 comm_point_tcp_handle_callback(int fd, short event, void* arg) 2621 { 2622 struct comm_point* c = (struct comm_point*)arg; 2623 log_assert(c->type == comm_tcp); 2624 ub_comm_base_now(c->ev->base); 2625 2626 #ifdef USE_DNSCRYPT 2627 /* Initialize if this is a dnscrypt socket */ 2628 if(c->tcp_parent) { 2629 c->dnscrypt = c->tcp_parent->dnscrypt; 2630 } 2631 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 2632 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 2633 if(!c->dnscrypt_buffer) { 2634 log_err("Could not allocate dnscrypt buffer"); 2635 reclaim_tcp_handler(c); 2636 if(!c->tcp_do_close) { 2637 fptr_ok(fptr_whitelist_comm_point( 2638 c->callback)); 2639 (void)(*c->callback)(c, c->cb_arg, 2640 NETEVENT_CLOSED, NULL); 2641 } 2642 return; 2643 } 2644 } 2645 #endif 2646 2647 if(event&UB_EV_TIMEOUT) { 2648 verbose(VERB_QUERY, "tcp took too long, dropped"); 2649 reclaim_tcp_handler(c); 2650 if(!c->tcp_do_close) { 2651 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2652 (void)(*c->callback)(c, c->cb_arg, 2653 NETEVENT_TIMEOUT, NULL); 2654 } 2655 return; 2656 } 2657 if(event&UB_EV_READ 2658 #ifdef USE_MSG_FASTOPEN 2659 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2660 #endif 2661 ) { 2662 int has_tcpq = (c->tcp_req_info != NULL); 2663 int* moreread = c->tcp_more_read_again; 2664 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2665 reclaim_tcp_handler(c); 2666 if(!c->tcp_do_close) { 2667 fptr_ok(fptr_whitelist_comm_point( 2668 c->callback)); 2669 (void)(*c->callback)(c, c->cb_arg, 2670 NETEVENT_CLOSED, NULL); 2671 } 2672 return; 2673 } 2674 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2675 tcp_req_info_read_again(fd, c); 2676 if(moreread && *moreread) 2677 tcp_more_read_again(fd, c); 2678 return; 2679 } 2680 if(event&UB_EV_WRITE) { 2681 int has_tcpq = (c->tcp_req_info != NULL); 2682 int* morewrite = c->tcp_more_write_again; 2683 if(!comm_point_tcp_handle_write(fd, c)) { 2684 reclaim_tcp_handler(c); 2685 if(!c->tcp_do_close) { 2686 fptr_ok(fptr_whitelist_comm_point( 2687 c->callback)); 2688 (void)(*c->callback)(c, c->cb_arg, 2689 NETEVENT_CLOSED, NULL); 2690 } 2691 return; 2692 } 2693 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2694 tcp_req_info_read_again(fd, c); 2695 if(morewrite && *morewrite) 2696 tcp_more_write_again(fd, c); 2697 return; 2698 } 2699 log_err("Ignored event %d for tcphdl.", event); 2700 } 2701 2702 /** Make http handler free for next assignment */ 2703 static void 2704 reclaim_http_handler(struct comm_point* c) 2705 { 2706 log_assert(c->type == comm_http); 2707 if(c->ssl) { 2708 #ifdef HAVE_SSL 2709 SSL_shutdown(c->ssl); 2710 SSL_free(c->ssl); 2711 c->ssl = NULL; 2712 #endif 2713 } 2714 comm_point_close(c); 2715 if(c->tcp_parent) { 2716 if(c != c->tcp_parent->tcp_free) { 2717 c->tcp_parent->cur_tcp_count--; 2718 c->tcp_free = c->tcp_parent->tcp_free; 2719 c->tcp_parent->tcp_free = c; 2720 } 2721 if(!c->tcp_free) { 2722 /* re-enable listening on accept socket */ 2723 comm_point_start_listening(c->tcp_parent, -1, -1); 2724 } 2725 } 2726 } 2727 2728 /** read more data for http (with ssl) */ 2729 static int 2730 ssl_http_read_more(struct comm_point* c) 2731 { 2732 #ifdef HAVE_SSL 2733 int r; 2734 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2735 ERR_clear_error(); 2736 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2737 (int)sldns_buffer_remaining(c->buffer)); 2738 if(r <= 0) { 2739 int want = SSL_get_error(c->ssl, r); 2740 if(want == SSL_ERROR_ZERO_RETURN) { 2741 return 0; /* shutdown, closed */ 2742 } else if(want == SSL_ERROR_WANT_READ) { 2743 return 1; /* read more later */ 2744 } else if(want == SSL_ERROR_WANT_WRITE) { 2745 c->ssl_shake_state = comm_ssl_shake_hs_write; 2746 comm_point_listen_for_rw(c, 0, 1); 2747 return 1; 2748 } else if(want == SSL_ERROR_SYSCALL) { 2749 #ifdef ECONNRESET 2750 if(errno == ECONNRESET && verbosity < 2) 2751 return 0; /* silence reset by peer */ 2752 #endif 2753 if(errno != 0) 2754 log_err("SSL_read syscall: %s", 2755 strerror(errno)); 2756 return 0; 2757 } 2758 log_crypto_err("could not SSL_read"); 2759 return 0; 2760 } 2761 verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 2762 (int)sldns_buffer_position(c->buffer), (int)r); 2763 sldns_buffer_skip(c->buffer, (ssize_t)r); 2764 return 1; 2765 #else 2766 (void)c; 2767 return 0; 2768 #endif /* HAVE_SSL */ 2769 } 2770 2771 /** read more data for http */ 2772 static int 2773 http_read_more(int fd, struct comm_point* c) 2774 { 2775 ssize_t r; 2776 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2777 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2778 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 2779 if(r == 0) { 2780 return 0; 2781 } else if(r == -1) { 2782 #ifndef USE_WINSOCK 2783 if(errno == EINTR || errno == EAGAIN) 2784 return 1; 2785 #else /* USE_WINSOCK */ 2786 if(WSAGetLastError() == WSAECONNRESET) 2787 return 0; 2788 if(WSAGetLastError() == WSAEINPROGRESS) 2789 return 1; 2790 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2791 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2792 return 1; 2793 } 2794 #endif 2795 log_err_addr("read (in http r)", sock_strerror(errno), 2796 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 2797 return 0; 2798 } 2799 verbose(VERB_ALGO, "http read more skip to %d + %d", 2800 (int)sldns_buffer_position(c->buffer), (int)r); 2801 sldns_buffer_skip(c->buffer, r); 2802 return 1; 2803 } 2804 2805 /** return true if http header has been read (one line complete) */ 2806 static int 2807 http_header_done(sldns_buffer* buf) 2808 { 2809 size_t i; 2810 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2811 /* there was a \r before the \n, but we ignore that */ 2812 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 2813 return 1; 2814 } 2815 return 0; 2816 } 2817 2818 /** return character string into buffer for header line, moves buffer 2819 * past that line and puts zero terminator into linefeed-newline */ 2820 static char* 2821 http_header_line(sldns_buffer* buf) 2822 { 2823 char* result = (char*)sldns_buffer_current(buf); 2824 size_t i; 2825 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2826 /* terminate the string on the \r */ 2827 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 2828 sldns_buffer_write_u8_at(buf, i, 0); 2829 /* terminate on the \n and skip past the it and done */ 2830 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 2831 sldns_buffer_write_u8_at(buf, i, 0); 2832 sldns_buffer_set_position(buf, i+1); 2833 return result; 2834 } 2835 } 2836 return NULL; 2837 } 2838 2839 /** move unread buffer to start and clear rest for putting the rest into it */ 2840 static void 2841 http_moveover_buffer(sldns_buffer* buf) 2842 { 2843 size_t pos = sldns_buffer_position(buf); 2844 size_t len = sldns_buffer_remaining(buf); 2845 sldns_buffer_clear(buf); 2846 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 2847 sldns_buffer_set_position(buf, len); 2848 } 2849 2850 /** a http header is complete, process it */ 2851 static int 2852 http_process_initial_header(struct comm_point* c) 2853 { 2854 char* line = http_header_line(c->buffer); 2855 if(!line) return 1; 2856 verbose(VERB_ALGO, "http header: %s", line); 2857 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 2858 /* check returncode */ 2859 if(line[9] != '2') { 2860 verbose(VERB_ALGO, "http bad status %s", line+9); 2861 return 0; 2862 } 2863 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 2864 if(!c->http_is_chunked) 2865 c->tcp_byte_count = (size_t)atoi(line+16); 2866 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 2867 c->tcp_byte_count = 0; 2868 c->http_is_chunked = 1; 2869 } else if(line[0] == 0) { 2870 /* end of initial headers */ 2871 c->http_in_headers = 0; 2872 if(c->http_is_chunked) 2873 c->http_in_chunk_headers = 1; 2874 /* remove header text from front of buffer 2875 * the buffer is going to be used to return the data segment 2876 * itself and we don't want the header to get returned 2877 * prepended with it */ 2878 http_moveover_buffer(c->buffer); 2879 sldns_buffer_flip(c->buffer); 2880 return 1; 2881 } 2882 /* ignore other headers */ 2883 return 1; 2884 } 2885 2886 /** a chunk header is complete, process it, return 0=fail, 1=continue next 2887 * header line, 2=done with chunked transfer*/ 2888 static int 2889 http_process_chunk_header(struct comm_point* c) 2890 { 2891 char* line = http_header_line(c->buffer); 2892 if(!line) return 1; 2893 if(c->http_in_chunk_headers == 3) { 2894 verbose(VERB_ALGO, "http chunk trailer: %s", line); 2895 /* are we done ? */ 2896 if(line[0] == 0 && c->tcp_byte_count == 0) { 2897 /* callback of http reader when NETEVENT_DONE, 2898 * end of data, with no data in buffer */ 2899 sldns_buffer_set_position(c->buffer, 0); 2900 sldns_buffer_set_limit(c->buffer, 0); 2901 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2902 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2903 /* return that we are done */ 2904 return 2; 2905 } 2906 if(line[0] == 0) { 2907 /* continue with header of the next chunk */ 2908 c->http_in_chunk_headers = 1; 2909 /* remove header text from front of buffer */ 2910 http_moveover_buffer(c->buffer); 2911 sldns_buffer_flip(c->buffer); 2912 return 1; 2913 } 2914 /* ignore further trail headers */ 2915 return 1; 2916 } 2917 verbose(VERB_ALGO, "http chunk header: %s", line); 2918 if(c->http_in_chunk_headers == 1) { 2919 /* read chunked start line */ 2920 char* end = NULL; 2921 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 2922 if(end == line) 2923 return 0; 2924 c->http_in_chunk_headers = 0; 2925 /* remove header text from front of buffer */ 2926 http_moveover_buffer(c->buffer); 2927 sldns_buffer_flip(c->buffer); 2928 if(c->tcp_byte_count == 0) { 2929 /* done with chunks, process chunk_trailer lines */ 2930 c->http_in_chunk_headers = 3; 2931 } 2932 return 1; 2933 } 2934 /* ignore other headers */ 2935 return 1; 2936 } 2937 2938 /** handle nonchunked data segment, 0=fail, 1=wait */ 2939 static int 2940 http_nonchunk_segment(struct comm_point* c) 2941 { 2942 /* c->buffer at position..limit has new data we read in. 2943 * the buffer itself is full of nonchunked data. 2944 * we are looking to read tcp_byte_count more data 2945 * and then the transfer is done. */ 2946 size_t remainbufferlen; 2947 size_t got_now = sldns_buffer_limit(c->buffer); 2948 if(c->tcp_byte_count <= got_now) { 2949 /* done, this is the last data fragment */ 2950 c->http_stored = 0; 2951 sldns_buffer_set_position(c->buffer, 0); 2952 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2953 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2954 return 1; 2955 } 2956 /* if we have the buffer space, 2957 * read more data collected into the buffer */ 2958 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2959 sldns_buffer_limit(c->buffer); 2960 if(remainbufferlen+got_now >= c->tcp_byte_count || 2961 remainbufferlen >= (size_t)(c->ssl?16384:2048)) { 2962 size_t total = sldns_buffer_limit(c->buffer); 2963 sldns_buffer_clear(c->buffer); 2964 sldns_buffer_set_position(c->buffer, total); 2965 c->http_stored = total; 2966 /* return and wait to read more */ 2967 return 1; 2968 } 2969 /* call callback with this data amount, then 2970 * wait for more */ 2971 c->tcp_byte_count -= got_now; 2972 c->http_stored = 0; 2973 sldns_buffer_set_position(c->buffer, 0); 2974 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2975 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2976 /* c->callback has to buffer_clear(c->buffer). */ 2977 /* return and wait to read more */ 2978 return 1; 2979 } 2980 2981 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 2982 static int 2983 http_chunked_segment(struct comm_point* c) 2984 { 2985 /* the c->buffer has from position..limit new data we read. */ 2986 /* the current chunk has length tcp_byte_count. 2987 * once we read that read more chunk headers. 2988 */ 2989 size_t remainbufferlen; 2990 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 2991 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 2992 if(c->tcp_byte_count <= got_now) { 2993 /* the chunk has completed (with perhaps some extra data 2994 * from next chunk header and next chunk) */ 2995 /* save too much info into temp buffer */ 2996 size_t fraglen; 2997 struct comm_reply repinfo; 2998 c->http_stored = 0; 2999 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 3000 sldns_buffer_clear(c->http_temp); 3001 sldns_buffer_write(c->http_temp, 3002 sldns_buffer_current(c->buffer), 3003 sldns_buffer_remaining(c->buffer)); 3004 sldns_buffer_flip(c->http_temp); 3005 3006 /* callback with this fragment */ 3007 fraglen = sldns_buffer_position(c->buffer); 3008 sldns_buffer_set_position(c->buffer, 0); 3009 sldns_buffer_set_limit(c->buffer, fraglen); 3010 repinfo = c->repinfo; 3011 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3012 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 3013 /* c->callback has to buffer_clear(). */ 3014 3015 /* is commpoint deleted? */ 3016 if(!repinfo.c) { 3017 return 1; 3018 } 3019 /* copy waiting info */ 3020 sldns_buffer_clear(c->buffer); 3021 sldns_buffer_write(c->buffer, 3022 sldns_buffer_begin(c->http_temp), 3023 sldns_buffer_remaining(c->http_temp)); 3024 sldns_buffer_flip(c->buffer); 3025 /* process end of chunk trailer header lines, until 3026 * an empty line */ 3027 c->http_in_chunk_headers = 3; 3028 /* process more data in buffer (if any) */ 3029 return 2; 3030 } 3031 c->tcp_byte_count -= got_now; 3032 3033 /* if we have the buffer space, 3034 * read more data collected into the buffer */ 3035 remainbufferlen = sldns_buffer_capacity(c->buffer) - 3036 sldns_buffer_limit(c->buffer); 3037 if(remainbufferlen >= c->tcp_byte_count || 3038 remainbufferlen >= 2048) { 3039 size_t total = sldns_buffer_limit(c->buffer); 3040 sldns_buffer_clear(c->buffer); 3041 sldns_buffer_set_position(c->buffer, total); 3042 c->http_stored = total; 3043 /* return and wait to read more */ 3044 return 1; 3045 } 3046 3047 /* callback of http reader for a new part of the data */ 3048 c->http_stored = 0; 3049 sldns_buffer_set_position(c->buffer, 0); 3050 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3051 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 3052 /* c->callback has to buffer_clear(c->buffer). */ 3053 /* return and wait to read more */ 3054 return 1; 3055 } 3056 3057 #ifdef HAVE_NGHTTP2 3058 /** Create new http2 session. Called when creating handling comm point. */ 3059 static struct http2_session* http2_session_create(struct comm_point* c) 3060 { 3061 struct http2_session* session = calloc(1, sizeof(*session)); 3062 if(!session) { 3063 log_err("malloc failure while creating http2 session"); 3064 return NULL; 3065 } 3066 session->c = c; 3067 3068 return session; 3069 } 3070 #endif 3071 3072 /** Delete http2 session. After closing connection or on error */ 3073 static void http2_session_delete(struct http2_session* h2_session) 3074 { 3075 #ifdef HAVE_NGHTTP2 3076 if(h2_session->callbacks) 3077 nghttp2_session_callbacks_del(h2_session->callbacks); 3078 free(h2_session); 3079 #else 3080 (void)h2_session; 3081 #endif 3082 } 3083 3084 #ifdef HAVE_NGHTTP2 3085 struct http2_stream* http2_stream_create(int32_t stream_id) 3086 { 3087 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 3088 if(!h2_stream) { 3089 log_err("malloc failure while creating http2 stream"); 3090 return NULL; 3091 } 3092 h2_stream->stream_id = stream_id; 3093 return h2_stream; 3094 } 3095 3096 /** Delete http2 stream. After session delete or stream close callback */ 3097 static void http2_stream_delete(struct http2_session* h2_session, 3098 struct http2_stream* h2_stream) 3099 { 3100 if(h2_stream->mesh_state) { 3101 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 3102 h2_session->c); 3103 h2_stream->mesh_state = NULL; 3104 } 3105 http2_req_stream_clear(h2_stream); 3106 free(h2_stream); 3107 } 3108 #endif 3109 3110 void http2_stream_add_meshstate(struct http2_stream* h2_stream, 3111 struct mesh_area* mesh, struct mesh_state* m) 3112 { 3113 h2_stream->mesh = mesh; 3114 h2_stream->mesh_state = m; 3115 } 3116 3117 /** delete http2 session server. After closing connection. */ 3118 static void http2_session_server_delete(struct http2_session* h2_session) 3119 { 3120 #ifdef HAVE_NGHTTP2 3121 struct http2_stream* h2_stream, *next; 3122 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 3123 h2_session->session = NULL; 3124 for(h2_stream = h2_session->first_stream; h2_stream;) { 3125 next = h2_stream->next; 3126 http2_stream_delete(h2_session, h2_stream); 3127 h2_stream = next; 3128 } 3129 h2_session->first_stream = NULL; 3130 h2_session->is_drop = 0; 3131 h2_session->postpone_drop = 0; 3132 h2_session->c->h2_stream = NULL; 3133 #endif 3134 (void)h2_session; 3135 } 3136 3137 #ifdef HAVE_NGHTTP2 3138 void http2_session_add_stream(struct http2_session* h2_session, 3139 struct http2_stream* h2_stream) 3140 { 3141 if(h2_session->first_stream) 3142 h2_session->first_stream->prev = h2_stream; 3143 h2_stream->next = h2_session->first_stream; 3144 h2_session->first_stream = h2_stream; 3145 } 3146 3147 /** remove stream from session linked list. After stream close callback or 3148 * closing connection */ 3149 static void http2_session_remove_stream(struct http2_session* h2_session, 3150 struct http2_stream* h2_stream) 3151 { 3152 if(h2_stream->prev) 3153 h2_stream->prev->next = h2_stream->next; 3154 else 3155 h2_session->first_stream = h2_stream->next; 3156 if(h2_stream->next) 3157 h2_stream->next->prev = h2_stream->prev; 3158 3159 } 3160 3161 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 3162 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 3163 { 3164 struct http2_stream* h2_stream; 3165 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3166 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3167 h2_session->session, stream_id))) { 3168 return 0; 3169 } 3170 http2_session_remove_stream(h2_session, h2_stream); 3171 http2_stream_delete(h2_session, h2_stream); 3172 return 0; 3173 } 3174 3175 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 3176 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3177 { 3178 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3179 ssize_t ret; 3180 3181 log_assert(h2_session->c->type == comm_http); 3182 log_assert(h2_session->c->h2_session); 3183 3184 #ifdef HAVE_SSL 3185 if(h2_session->c->ssl) { 3186 int r; 3187 ERR_clear_error(); 3188 r = SSL_read(h2_session->c->ssl, buf, len); 3189 if(r <= 0) { 3190 int want = SSL_get_error(h2_session->c->ssl, r); 3191 if(want == SSL_ERROR_ZERO_RETURN) { 3192 return NGHTTP2_ERR_EOF; 3193 } else if(want == SSL_ERROR_WANT_READ) { 3194 return NGHTTP2_ERR_WOULDBLOCK; 3195 } else if(want == SSL_ERROR_WANT_WRITE) { 3196 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 3197 comm_point_listen_for_rw(h2_session->c, 0, 1); 3198 return NGHTTP2_ERR_WOULDBLOCK; 3199 } else if(want == SSL_ERROR_SYSCALL) { 3200 #ifdef ECONNRESET 3201 if(errno == ECONNRESET && verbosity < 2) 3202 return NGHTTP2_ERR_CALLBACK_FAILURE; 3203 #endif 3204 if(errno != 0) 3205 log_err("SSL_read syscall: %s", 3206 strerror(errno)); 3207 return NGHTTP2_ERR_CALLBACK_FAILURE; 3208 } 3209 log_crypto_err("could not SSL_read"); 3210 return NGHTTP2_ERR_CALLBACK_FAILURE; 3211 } 3212 return r; 3213 } 3214 #endif /* HAVE_SSL */ 3215 3216 ret = recv(h2_session->c->fd, buf, len, MSG_DONTWAIT); 3217 if(ret == 0) { 3218 return NGHTTP2_ERR_EOF; 3219 } else if(ret < 0) { 3220 #ifndef USE_WINSOCK 3221 if(errno == EINTR || errno == EAGAIN) 3222 return NGHTTP2_ERR_WOULDBLOCK; 3223 #ifdef ECONNRESET 3224 if(errno == ECONNRESET && verbosity < 2) 3225 return NGHTTP2_ERR_CALLBACK_FAILURE; 3226 #endif 3227 log_err_addr("could not http2 recv: %s", strerror(errno), 3228 &h2_session->c->repinfo.remote_addr, 3229 h2_session->c->repinfo.remote_addrlen); 3230 #else /* USE_WINSOCK */ 3231 if(WSAGetLastError() == WSAECONNRESET) 3232 return NGHTTP2_ERR_CALLBACK_FAILURE; 3233 if(WSAGetLastError() == WSAEINPROGRESS) 3234 return NGHTTP2_ERR_WOULDBLOCK; 3235 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3236 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3237 UB_EV_READ); 3238 return NGHTTP2_ERR_WOULDBLOCK; 3239 } 3240 log_err_addr("could not http2 recv: %s", 3241 wsa_strerror(WSAGetLastError()), 3242 &h2_session->c->repinfo.remote_addr, 3243 h2_session->c->repinfo.remote_addrlen); 3244 #endif 3245 return NGHTTP2_ERR_CALLBACK_FAILURE; 3246 } 3247 return ret; 3248 } 3249 #endif /* HAVE_NGHTTP2 */ 3250 3251 /** Handle http2 read */ 3252 static int 3253 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 3254 { 3255 #ifdef HAVE_NGHTTP2 3256 int ret; 3257 log_assert(c->h2_session); 3258 3259 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 3260 ret = nghttp2_session_recv(c->h2_session->session); 3261 if(ret) { 3262 if(ret != NGHTTP2_ERR_EOF && 3263 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 3264 char a[256]; 3265 addr_to_str(&c->repinfo.remote_addr, 3266 c->repinfo.remote_addrlen, a, sizeof(a)); 3267 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 3268 "error: %s", a, nghttp2_strerror(ret)); 3269 } 3270 return 0; 3271 } 3272 if(nghttp2_session_want_write(c->h2_session->session)) { 3273 c->tcp_is_reading = 0; 3274 comm_point_stop_listening(c); 3275 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3276 } else if(!nghttp2_session_want_read(c->h2_session->session)) 3277 return 0; /* connection can be closed */ 3278 return 1; 3279 #else 3280 (void)c; 3281 return 0; 3282 #endif 3283 } 3284 3285 /** 3286 * Handle http reading callback. 3287 * @param fd: file descriptor of socket. 3288 * @param c: comm point to read from into buffer. 3289 * @return: 0 on error 3290 */ 3291 static int 3292 comm_point_http_handle_read(int fd, struct comm_point* c) 3293 { 3294 log_assert(c->type == comm_http); 3295 log_assert(fd != -1); 3296 3297 /* if we are in ssl handshake, handle SSL handshake */ 3298 #ifdef HAVE_SSL 3299 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3300 if(!ssl_handshake(c)) 3301 return 0; 3302 if(c->ssl_shake_state != comm_ssl_shake_none) 3303 return 1; 3304 } 3305 #endif /* HAVE_SSL */ 3306 3307 if(!c->tcp_is_reading) 3308 return 1; 3309 3310 if(c->use_h2) { 3311 return comm_point_http2_handle_read(fd, c); 3312 } 3313 3314 /* http version is <= http/1.1 */ 3315 3316 if(c->http_min_version >= http_version_2) { 3317 /* HTTP/2 failed, not allowed to use lower version. */ 3318 return 0; 3319 } 3320 3321 /* read more data */ 3322 if(c->ssl) { 3323 if(!ssl_http_read_more(c)) 3324 return 0; 3325 } else { 3326 if(!http_read_more(fd, c)) 3327 return 0; 3328 } 3329 3330 if(c->http_stored >= sldns_buffer_position(c->buffer)) { 3331 /* read did not work but we wanted more data, there is 3332 * no bytes to process now. */ 3333 return 1; 3334 } 3335 sldns_buffer_flip(c->buffer); 3336 /* if we are partway in a segment of data, position us at the point 3337 * where we left off previously */ 3338 if(c->http_stored < sldns_buffer_limit(c->buffer)) 3339 sldns_buffer_set_position(c->buffer, c->http_stored); 3340 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 3341 3342 while(sldns_buffer_remaining(c->buffer) > 0) { 3343 /* Handle HTTP/1.x data */ 3344 /* if we are reading headers, read more headers */ 3345 if(c->http_in_headers || c->http_in_chunk_headers) { 3346 /* if header is done, process the header */ 3347 if(!http_header_done(c->buffer)) { 3348 /* copy remaining data to front of buffer 3349 * and set rest for writing into it */ 3350 http_moveover_buffer(c->buffer); 3351 /* return and wait to read more */ 3352 return 1; 3353 } 3354 if(!c->http_in_chunk_headers) { 3355 /* process initial headers */ 3356 if(!http_process_initial_header(c)) 3357 return 0; 3358 } else { 3359 /* process chunk headers */ 3360 int r = http_process_chunk_header(c); 3361 if(r == 0) return 0; 3362 if(r == 2) return 1; /* done */ 3363 /* r == 1, continue */ 3364 } 3365 /* see if we have more to process */ 3366 continue; 3367 } 3368 3369 if(!c->http_is_chunked) { 3370 /* if we are reading nonchunks, process that*/ 3371 return http_nonchunk_segment(c); 3372 } else { 3373 /* if we are reading chunks, read the chunk */ 3374 int r = http_chunked_segment(c); 3375 if(r == 0) return 0; 3376 if(r == 1) return 1; 3377 continue; 3378 } 3379 } 3380 /* broke out of the loop; could not process header instead need 3381 * to read more */ 3382 /* moveover any remaining data and read more data */ 3383 http_moveover_buffer(c->buffer); 3384 /* return and wait to read more */ 3385 return 1; 3386 } 3387 3388 /** check pending connect for http */ 3389 static int 3390 http_check_connect(int fd, struct comm_point* c) 3391 { 3392 /* check for pending error from nonblocking connect */ 3393 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 3394 int error = 0; 3395 socklen_t len = (socklen_t)sizeof(error); 3396 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 3397 &len) < 0){ 3398 #ifndef USE_WINSOCK 3399 error = errno; /* on solaris errno is error */ 3400 #else /* USE_WINSOCK */ 3401 error = WSAGetLastError(); 3402 #endif 3403 } 3404 #ifndef USE_WINSOCK 3405 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 3406 if(error == EINPROGRESS || error == EWOULDBLOCK) 3407 return 1; /* try again later */ 3408 else 3409 #endif 3410 if(error != 0 && verbosity < 2) 3411 return 0; /* silence lots of chatter in the logs */ 3412 else if(error != 0) { 3413 log_err_addr("http connect", strerror(error), 3414 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3415 #else /* USE_WINSOCK */ 3416 /* examine error */ 3417 if(error == WSAEINPROGRESS) 3418 return 1; 3419 else if(error == WSAEWOULDBLOCK) { 3420 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3421 return 1; 3422 } else if(error != 0 && verbosity < 2) 3423 return 0; 3424 else if(error != 0) { 3425 log_err_addr("http connect", wsa_strerror(error), 3426 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3427 #endif /* USE_WINSOCK */ 3428 return 0; 3429 } 3430 /* keep on processing this socket */ 3431 return 2; 3432 } 3433 3434 /** write more data for http (with ssl) */ 3435 static int 3436 ssl_http_write_more(struct comm_point* c) 3437 { 3438 #ifdef HAVE_SSL 3439 int r; 3440 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3441 ERR_clear_error(); 3442 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 3443 (int)sldns_buffer_remaining(c->buffer)); 3444 if(r <= 0) { 3445 int want = SSL_get_error(c->ssl, r); 3446 if(want == SSL_ERROR_ZERO_RETURN) { 3447 return 0; /* closed */ 3448 } else if(want == SSL_ERROR_WANT_READ) { 3449 c->ssl_shake_state = comm_ssl_shake_hs_read; 3450 comm_point_listen_for_rw(c, 1, 0); 3451 return 1; /* wait for read condition */ 3452 } else if(want == SSL_ERROR_WANT_WRITE) { 3453 return 1; /* write more later */ 3454 } else if(want == SSL_ERROR_SYSCALL) { 3455 #ifdef EPIPE 3456 if(errno == EPIPE && verbosity < 2) 3457 return 0; /* silence 'broken pipe' */ 3458 #endif 3459 if(errno != 0) 3460 log_err("SSL_write syscall: %s", 3461 strerror(errno)); 3462 return 0; 3463 } 3464 log_crypto_err("could not SSL_write"); 3465 return 0; 3466 } 3467 sldns_buffer_skip(c->buffer, (ssize_t)r); 3468 return 1; 3469 #else 3470 (void)c; 3471 return 0; 3472 #endif /* HAVE_SSL */ 3473 } 3474 3475 /** write more data for http */ 3476 static int 3477 http_write_more(int fd, struct comm_point* c) 3478 { 3479 ssize_t r; 3480 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3481 r = send(fd, (void*)sldns_buffer_current(c->buffer), 3482 sldns_buffer_remaining(c->buffer), 0); 3483 if(r == -1) { 3484 #ifndef USE_WINSOCK 3485 if(errno == EINTR || errno == EAGAIN) 3486 return 1; 3487 #else 3488 if(WSAGetLastError() == WSAEINPROGRESS) 3489 return 1; 3490 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3491 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3492 return 1; 3493 } 3494 #endif 3495 log_err_addr("http send r", sock_strerror(errno), 3496 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3497 return 0; 3498 } 3499 sldns_buffer_skip(c->buffer, r); 3500 return 1; 3501 } 3502 3503 #ifdef HAVE_NGHTTP2 3504 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 3505 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3506 { 3507 ssize_t ret; 3508 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3509 log_assert(h2_session->c->type == comm_http); 3510 log_assert(h2_session->c->h2_session); 3511 3512 #ifdef HAVE_SSL 3513 if(h2_session->c->ssl) { 3514 int r; 3515 ERR_clear_error(); 3516 r = SSL_write(h2_session->c->ssl, buf, len); 3517 if(r <= 0) { 3518 int want = SSL_get_error(h2_session->c->ssl, r); 3519 if(want == SSL_ERROR_ZERO_RETURN) { 3520 return NGHTTP2_ERR_CALLBACK_FAILURE; 3521 } else if(want == SSL_ERROR_WANT_READ) { 3522 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 3523 comm_point_listen_for_rw(h2_session->c, 1, 0); 3524 return NGHTTP2_ERR_WOULDBLOCK; 3525 } else if(want == SSL_ERROR_WANT_WRITE) { 3526 return NGHTTP2_ERR_WOULDBLOCK; 3527 } else if(want == SSL_ERROR_SYSCALL) { 3528 #ifdef EPIPE 3529 if(errno == EPIPE && verbosity < 2) 3530 return NGHTTP2_ERR_CALLBACK_FAILURE; 3531 #endif 3532 if(errno != 0) 3533 log_err("SSL_write syscall: %s", 3534 strerror(errno)); 3535 return NGHTTP2_ERR_CALLBACK_FAILURE; 3536 } 3537 log_crypto_err("could not SSL_write"); 3538 return NGHTTP2_ERR_CALLBACK_FAILURE; 3539 } 3540 return r; 3541 } 3542 #endif /* HAVE_SSL */ 3543 3544 ret = send(h2_session->c->fd, buf, len, 0); 3545 if(ret == 0) { 3546 return NGHTTP2_ERR_CALLBACK_FAILURE; 3547 } else if(ret < 0) { 3548 #ifndef USE_WINSOCK 3549 if(errno == EINTR || errno == EAGAIN) 3550 return NGHTTP2_ERR_WOULDBLOCK; 3551 #ifdef EPIPE 3552 if(errno == EPIPE && verbosity < 2) 3553 return NGHTTP2_ERR_CALLBACK_FAILURE; 3554 #endif 3555 #ifdef ECONNRESET 3556 if(errno == ECONNRESET && verbosity < 2) 3557 return NGHTTP2_ERR_CALLBACK_FAILURE; 3558 #endif 3559 log_err_addr("could not http2 write: %s", strerror(errno), 3560 &h2_session->c->repinfo.remote_addr, 3561 h2_session->c->repinfo.remote_addrlen); 3562 #else /* USE_WINSOCK */ 3563 if(WSAGetLastError() == WSAENOTCONN) 3564 return NGHTTP2_ERR_WOULDBLOCK; 3565 if(WSAGetLastError() == WSAEINPROGRESS) 3566 return NGHTTP2_ERR_WOULDBLOCK; 3567 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3568 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3569 UB_EV_WRITE); 3570 return NGHTTP2_ERR_WOULDBLOCK; 3571 } 3572 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 3573 return NGHTTP2_ERR_CALLBACK_FAILURE; 3574 log_err_addr("could not http2 write: %s", 3575 wsa_strerror(WSAGetLastError()), 3576 &h2_session->c->repinfo.remote_addr, 3577 h2_session->c->repinfo.remote_addrlen); 3578 #endif 3579 return NGHTTP2_ERR_CALLBACK_FAILURE; 3580 } 3581 return ret; 3582 } 3583 #endif /* HAVE_NGHTTP2 */ 3584 3585 /** Handle http2 writing */ 3586 static int 3587 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 3588 { 3589 #ifdef HAVE_NGHTTP2 3590 int ret; 3591 log_assert(c->h2_session); 3592 3593 ret = nghttp2_session_send(c->h2_session->session); 3594 if(ret) { 3595 verbose(VERB_QUERY, "http2: session_send failed, " 3596 "error: %s", nghttp2_strerror(ret)); 3597 return 0; 3598 } 3599 3600 if(nghttp2_session_want_read(c->h2_session->session)) { 3601 c->tcp_is_reading = 1; 3602 comm_point_stop_listening(c); 3603 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3604 } else if(!nghttp2_session_want_write(c->h2_session->session)) 3605 return 0; /* connection can be closed */ 3606 return 1; 3607 #else 3608 (void)c; 3609 return 0; 3610 #endif 3611 } 3612 3613 /** 3614 * Handle http writing callback. 3615 * @param fd: file descriptor of socket. 3616 * @param c: comm point to write buffer out of. 3617 * @return: 0 on error 3618 */ 3619 static int 3620 comm_point_http_handle_write(int fd, struct comm_point* c) 3621 { 3622 log_assert(c->type == comm_http); 3623 log_assert(fd != -1); 3624 3625 /* check pending connect errors, if that fails, we wait for more, 3626 * or we can continue to write contents */ 3627 if(c->tcp_check_nb_connect) { 3628 int r = http_check_connect(fd, c); 3629 if(r == 0) return 0; 3630 if(r == 1) return 1; 3631 c->tcp_check_nb_connect = 0; 3632 } 3633 /* if we are in ssl handshake, handle SSL handshake */ 3634 #ifdef HAVE_SSL 3635 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3636 if(!ssl_handshake(c)) 3637 return 0; 3638 if(c->ssl_shake_state != comm_ssl_shake_none) 3639 return 1; 3640 } 3641 #endif /* HAVE_SSL */ 3642 if(c->tcp_is_reading) 3643 return 1; 3644 3645 if(c->use_h2) { 3646 return comm_point_http2_handle_write(fd, c); 3647 } 3648 3649 /* http version is <= http/1.1 */ 3650 3651 if(c->http_min_version >= http_version_2) { 3652 /* HTTP/2 failed, not allowed to use lower version. */ 3653 return 0; 3654 } 3655 3656 /* if we are writing, write more */ 3657 if(c->ssl) { 3658 if(!ssl_http_write_more(c)) 3659 return 0; 3660 } else { 3661 if(!http_write_more(fd, c)) 3662 return 0; 3663 } 3664 3665 /* we write a single buffer contents, that can contain 3666 * the http request, and then flip to read the results */ 3667 /* see if write is done */ 3668 if(sldns_buffer_remaining(c->buffer) == 0) { 3669 sldns_buffer_clear(c->buffer); 3670 if(c->tcp_do_toggle_rw) 3671 c->tcp_is_reading = 1; 3672 c->tcp_byte_count = 0; 3673 /* switch from listening(write) to listening(read) */ 3674 comm_point_stop_listening(c); 3675 comm_point_start_listening(c, -1, -1); 3676 } 3677 return 1; 3678 } 3679 3680 void 3681 comm_point_http_handle_callback(int fd, short event, void* arg) 3682 { 3683 struct comm_point* c = (struct comm_point*)arg; 3684 log_assert(c->type == comm_http); 3685 ub_comm_base_now(c->ev->base); 3686 3687 if(event&UB_EV_TIMEOUT) { 3688 verbose(VERB_QUERY, "http took too long, dropped"); 3689 reclaim_http_handler(c); 3690 if(!c->tcp_do_close) { 3691 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3692 (void)(*c->callback)(c, c->cb_arg, 3693 NETEVENT_TIMEOUT, NULL); 3694 } 3695 return; 3696 } 3697 if(event&UB_EV_READ) { 3698 if(!comm_point_http_handle_read(fd, c)) { 3699 reclaim_http_handler(c); 3700 if(!c->tcp_do_close) { 3701 fptr_ok(fptr_whitelist_comm_point( 3702 c->callback)); 3703 (void)(*c->callback)(c, c->cb_arg, 3704 NETEVENT_CLOSED, NULL); 3705 } 3706 } 3707 return; 3708 } 3709 if(event&UB_EV_WRITE) { 3710 if(!comm_point_http_handle_write(fd, c)) { 3711 reclaim_http_handler(c); 3712 if(!c->tcp_do_close) { 3713 fptr_ok(fptr_whitelist_comm_point( 3714 c->callback)); 3715 (void)(*c->callback)(c, c->cb_arg, 3716 NETEVENT_CLOSED, NULL); 3717 } 3718 } 3719 return; 3720 } 3721 log_err("Ignored event %d for httphdl.", event); 3722 } 3723 3724 void comm_point_local_handle_callback(int fd, short event, void* arg) 3725 { 3726 struct comm_point* c = (struct comm_point*)arg; 3727 log_assert(c->type == comm_local); 3728 ub_comm_base_now(c->ev->base); 3729 3730 if(event&UB_EV_READ) { 3731 if(!comm_point_tcp_handle_read(fd, c, 1)) { 3732 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3733 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3734 NULL); 3735 } 3736 return; 3737 } 3738 log_err("Ignored event %d for localhdl.", event); 3739 } 3740 3741 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3742 short event, void* arg) 3743 { 3744 struct comm_point* c = (struct comm_point*)arg; 3745 int err = NETEVENT_NOERROR; 3746 log_assert(c->type == comm_raw); 3747 ub_comm_base_now(c->ev->base); 3748 3749 if(event&UB_EV_TIMEOUT) 3750 err = NETEVENT_TIMEOUT; 3751 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3752 (void)(*c->callback)(c, c->cb_arg, err, NULL); 3753 } 3754 3755 struct comm_point* 3756 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 3757 int pp2_enabled, comm_point_callback_type* callback, 3758 void* callback_arg, struct unbound_socket* socket) 3759 { 3760 struct comm_point* c = (struct comm_point*)calloc(1, 3761 sizeof(struct comm_point)); 3762 short evbits; 3763 if(!c) 3764 return NULL; 3765 c->ev = (struct internal_event*)calloc(1, 3766 sizeof(struct internal_event)); 3767 if(!c->ev) { 3768 free(c); 3769 return NULL; 3770 } 3771 c->ev->base = base; 3772 c->fd = fd; 3773 c->buffer = buffer; 3774 c->timeout = NULL; 3775 c->tcp_is_reading = 0; 3776 c->tcp_byte_count = 0; 3777 c->tcp_parent = NULL; 3778 c->max_tcp_count = 0; 3779 c->cur_tcp_count = 0; 3780 c->tcp_handlers = NULL; 3781 c->tcp_free = NULL; 3782 c->type = comm_udp; 3783 c->tcp_do_close = 0; 3784 c->do_not_close = 0; 3785 c->tcp_do_toggle_rw = 0; 3786 c->tcp_check_nb_connect = 0; 3787 #ifdef USE_MSG_FASTOPEN 3788 c->tcp_do_fastopen = 0; 3789 #endif 3790 #ifdef USE_DNSCRYPT 3791 c->dnscrypt = 0; 3792 c->dnscrypt_buffer = buffer; 3793 #endif 3794 c->inuse = 0; 3795 c->callback = callback; 3796 c->cb_arg = callback_arg; 3797 c->socket = socket; 3798 c->pp2_enabled = pp2_enabled; 3799 c->pp2_header_state = pp2_header_none; 3800 evbits = UB_EV_READ | UB_EV_PERSIST; 3801 /* ub_event stuff */ 3802 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3803 comm_point_udp_callback, c); 3804 if(c->ev->ev == NULL) { 3805 log_err("could not baseset udp event"); 3806 comm_point_delete(c); 3807 return NULL; 3808 } 3809 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3810 log_err("could not add udp event"); 3811 comm_point_delete(c); 3812 return NULL; 3813 } 3814 c->event_added = 1; 3815 return c; 3816 } 3817 3818 struct comm_point* 3819 comm_point_create_udp_ancil(struct comm_base *base, int fd, 3820 sldns_buffer* buffer, int pp2_enabled, 3821 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3822 { 3823 struct comm_point* c = (struct comm_point*)calloc(1, 3824 sizeof(struct comm_point)); 3825 short evbits; 3826 if(!c) 3827 return NULL; 3828 c->ev = (struct internal_event*)calloc(1, 3829 sizeof(struct internal_event)); 3830 if(!c->ev) { 3831 free(c); 3832 return NULL; 3833 } 3834 c->ev->base = base; 3835 c->fd = fd; 3836 c->buffer = buffer; 3837 c->timeout = NULL; 3838 c->tcp_is_reading = 0; 3839 c->tcp_byte_count = 0; 3840 c->tcp_parent = NULL; 3841 c->max_tcp_count = 0; 3842 c->cur_tcp_count = 0; 3843 c->tcp_handlers = NULL; 3844 c->tcp_free = NULL; 3845 c->type = comm_udp; 3846 c->tcp_do_close = 0; 3847 c->do_not_close = 0; 3848 #ifdef USE_DNSCRYPT 3849 c->dnscrypt = 0; 3850 c->dnscrypt_buffer = buffer; 3851 #endif 3852 c->inuse = 0; 3853 c->tcp_do_toggle_rw = 0; 3854 c->tcp_check_nb_connect = 0; 3855 #ifdef USE_MSG_FASTOPEN 3856 c->tcp_do_fastopen = 0; 3857 #endif 3858 c->callback = callback; 3859 c->cb_arg = callback_arg; 3860 c->socket = socket; 3861 c->pp2_enabled = pp2_enabled; 3862 c->pp2_header_state = pp2_header_none; 3863 evbits = UB_EV_READ | UB_EV_PERSIST; 3864 /* ub_event stuff */ 3865 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3866 comm_point_udp_ancil_callback, c); 3867 if(c->ev->ev == NULL) { 3868 log_err("could not baseset udp event"); 3869 comm_point_delete(c); 3870 return NULL; 3871 } 3872 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3873 log_err("could not add udp event"); 3874 comm_point_delete(c); 3875 return NULL; 3876 } 3877 c->event_added = 1; 3878 return c; 3879 } 3880 3881 static struct comm_point* 3882 comm_point_create_tcp_handler(struct comm_base *base, 3883 struct comm_point* parent, size_t bufsize, 3884 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 3885 void* callback_arg, struct unbound_socket* socket) 3886 { 3887 struct comm_point* c = (struct comm_point*)calloc(1, 3888 sizeof(struct comm_point)); 3889 short evbits; 3890 if(!c) 3891 return NULL; 3892 c->ev = (struct internal_event*)calloc(1, 3893 sizeof(struct internal_event)); 3894 if(!c->ev) { 3895 free(c); 3896 return NULL; 3897 } 3898 c->ev->base = base; 3899 c->fd = -1; 3900 c->buffer = sldns_buffer_new(bufsize); 3901 if(!c->buffer) { 3902 free(c->ev); 3903 free(c); 3904 return NULL; 3905 } 3906 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3907 if(!c->timeout) { 3908 sldns_buffer_free(c->buffer); 3909 free(c->ev); 3910 free(c); 3911 return NULL; 3912 } 3913 c->tcp_is_reading = 0; 3914 c->tcp_byte_count = 0; 3915 c->tcp_parent = parent; 3916 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3917 c->tcp_conn_limit = parent->tcp_conn_limit; 3918 c->tcl_addr = NULL; 3919 c->tcp_keepalive = 0; 3920 c->max_tcp_count = 0; 3921 c->cur_tcp_count = 0; 3922 c->tcp_handlers = NULL; 3923 c->tcp_free = NULL; 3924 c->type = comm_tcp; 3925 c->tcp_do_close = 0; 3926 c->do_not_close = 0; 3927 c->tcp_do_toggle_rw = 1; 3928 c->tcp_check_nb_connect = 0; 3929 #ifdef USE_MSG_FASTOPEN 3930 c->tcp_do_fastopen = 0; 3931 #endif 3932 #ifdef USE_DNSCRYPT 3933 c->dnscrypt = 0; 3934 /* We don't know just yet if this is a dnscrypt channel. Allocation 3935 * will be done when handling the callback. */ 3936 c->dnscrypt_buffer = c->buffer; 3937 #endif 3938 c->repinfo.c = c; 3939 c->callback = callback; 3940 c->cb_arg = callback_arg; 3941 c->socket = socket; 3942 c->pp2_enabled = parent->pp2_enabled; 3943 c->pp2_header_state = pp2_header_none; 3944 if(spoolbuf) { 3945 c->tcp_req_info = tcp_req_info_create(spoolbuf); 3946 if(!c->tcp_req_info) { 3947 log_err("could not create tcp commpoint"); 3948 sldns_buffer_free(c->buffer); 3949 free(c->timeout); 3950 free(c->ev); 3951 free(c); 3952 return NULL; 3953 } 3954 c->tcp_req_info->cp = c; 3955 c->tcp_do_close = 1; 3956 c->tcp_do_toggle_rw = 0; 3957 } 3958 /* add to parent free list */ 3959 c->tcp_free = parent->tcp_free; 3960 parent->tcp_free = c; 3961 /* ub_event stuff */ 3962 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3963 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3964 comm_point_tcp_handle_callback, c); 3965 if(c->ev->ev == NULL) 3966 { 3967 log_err("could not basetset tcphdl event"); 3968 parent->tcp_free = c->tcp_free; 3969 tcp_req_info_delete(c->tcp_req_info); 3970 sldns_buffer_free(c->buffer); 3971 free(c->timeout); 3972 free(c->ev); 3973 free(c); 3974 return NULL; 3975 } 3976 return c; 3977 } 3978 3979 static struct comm_point* 3980 comm_point_create_http_handler(struct comm_base *base, 3981 struct comm_point* parent, size_t bufsize, int harden_large_queries, 3982 uint32_t http_max_streams, char* http_endpoint, 3983 comm_point_callback_type* callback, void* callback_arg, 3984 struct unbound_socket* socket) 3985 { 3986 struct comm_point* c = (struct comm_point*)calloc(1, 3987 sizeof(struct comm_point)); 3988 short evbits; 3989 if(!c) 3990 return NULL; 3991 c->ev = (struct internal_event*)calloc(1, 3992 sizeof(struct internal_event)); 3993 if(!c->ev) { 3994 free(c); 3995 return NULL; 3996 } 3997 c->ev->base = base; 3998 c->fd = -1; 3999 c->buffer = sldns_buffer_new(bufsize); 4000 if(!c->buffer) { 4001 free(c->ev); 4002 free(c); 4003 return NULL; 4004 } 4005 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 4006 if(!c->timeout) { 4007 sldns_buffer_free(c->buffer); 4008 free(c->ev); 4009 free(c); 4010 return NULL; 4011 } 4012 c->tcp_is_reading = 0; 4013 c->tcp_byte_count = 0; 4014 c->tcp_parent = parent; 4015 c->tcp_timeout_msec = parent->tcp_timeout_msec; 4016 c->tcp_conn_limit = parent->tcp_conn_limit; 4017 c->tcl_addr = NULL; 4018 c->tcp_keepalive = 0; 4019 c->max_tcp_count = 0; 4020 c->cur_tcp_count = 0; 4021 c->tcp_handlers = NULL; 4022 c->tcp_free = NULL; 4023 c->type = comm_http; 4024 c->tcp_do_close = 1; 4025 c->do_not_close = 0; 4026 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 4027 c->tcp_check_nb_connect = 0; 4028 #ifdef USE_MSG_FASTOPEN 4029 c->tcp_do_fastopen = 0; 4030 #endif 4031 #ifdef USE_DNSCRYPT 4032 c->dnscrypt = 0; 4033 c->dnscrypt_buffer = NULL; 4034 #endif 4035 c->repinfo.c = c; 4036 c->callback = callback; 4037 c->cb_arg = callback_arg; 4038 c->socket = socket; 4039 c->pp2_enabled = 0; 4040 c->pp2_header_state = pp2_header_none; 4041 4042 c->http_min_version = http_version_2; 4043 c->http2_stream_max_qbuffer_size = bufsize; 4044 if(harden_large_queries && bufsize > 512) 4045 c->http2_stream_max_qbuffer_size = 512; 4046 c->http2_max_streams = http_max_streams; 4047 if(!(c->http_endpoint = strdup(http_endpoint))) { 4048 log_err("could not strdup http_endpoint"); 4049 sldns_buffer_free(c->buffer); 4050 free(c->timeout); 4051 free(c->ev); 4052 free(c); 4053 return NULL; 4054 } 4055 c->use_h2 = 0; 4056 #ifdef HAVE_NGHTTP2 4057 if(!(c->h2_session = http2_session_create(c))) { 4058 log_err("could not create http2 session"); 4059 free(c->http_endpoint); 4060 sldns_buffer_free(c->buffer); 4061 free(c->timeout); 4062 free(c->ev); 4063 free(c); 4064 return NULL; 4065 } 4066 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 4067 log_err("could not create http2 callbacks"); 4068 http2_session_delete(c->h2_session); 4069 free(c->http_endpoint); 4070 sldns_buffer_free(c->buffer); 4071 free(c->timeout); 4072 free(c->ev); 4073 free(c); 4074 return NULL; 4075 } 4076 #endif 4077 4078 /* add to parent free list */ 4079 c->tcp_free = parent->tcp_free; 4080 parent->tcp_free = c; 4081 /* ub_event stuff */ 4082 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 4083 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4084 comm_point_http_handle_callback, c); 4085 if(c->ev->ev == NULL) 4086 { 4087 log_err("could not set http handler event"); 4088 parent->tcp_free = c->tcp_free; 4089 http2_session_delete(c->h2_session); 4090 sldns_buffer_free(c->buffer); 4091 free(c->timeout); 4092 free(c->ev); 4093 free(c); 4094 return NULL; 4095 } 4096 return c; 4097 } 4098 4099 struct comm_point* 4100 comm_point_create_tcp(struct comm_base *base, int fd, int num, 4101 int idle_timeout, int harden_large_queries, 4102 uint32_t http_max_streams, char* http_endpoint, 4103 struct tcl_list* tcp_conn_limit, size_t bufsize, 4104 struct sldns_buffer* spoolbuf, enum listen_type port_type, 4105 int pp2_enabled, comm_point_callback_type* callback, 4106 void* callback_arg, struct unbound_socket* socket) 4107 { 4108 struct comm_point* c = (struct comm_point*)calloc(1, 4109 sizeof(struct comm_point)); 4110 short evbits; 4111 int i; 4112 /* first allocate the TCP accept listener */ 4113 if(!c) 4114 return NULL; 4115 c->ev = (struct internal_event*)calloc(1, 4116 sizeof(struct internal_event)); 4117 if(!c->ev) { 4118 free(c); 4119 return NULL; 4120 } 4121 c->ev->base = base; 4122 c->fd = fd; 4123 c->buffer = NULL; 4124 c->timeout = NULL; 4125 c->tcp_is_reading = 0; 4126 c->tcp_byte_count = 0; 4127 c->tcp_timeout_msec = idle_timeout; 4128 c->tcp_conn_limit = tcp_conn_limit; 4129 c->tcl_addr = NULL; 4130 c->tcp_keepalive = 0; 4131 c->tcp_parent = NULL; 4132 c->max_tcp_count = num; 4133 c->cur_tcp_count = 0; 4134 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 4135 sizeof(struct comm_point*)); 4136 if(!c->tcp_handlers) { 4137 free(c->ev); 4138 free(c); 4139 return NULL; 4140 } 4141 c->tcp_free = NULL; 4142 c->type = comm_tcp_accept; 4143 c->tcp_do_close = 0; 4144 c->do_not_close = 0; 4145 c->tcp_do_toggle_rw = 0; 4146 c->tcp_check_nb_connect = 0; 4147 #ifdef USE_MSG_FASTOPEN 4148 c->tcp_do_fastopen = 0; 4149 #endif 4150 #ifdef USE_DNSCRYPT 4151 c->dnscrypt = 0; 4152 c->dnscrypt_buffer = NULL; 4153 #endif 4154 c->callback = NULL; 4155 c->cb_arg = NULL; 4156 c->socket = socket; 4157 c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled); 4158 c->pp2_header_state = pp2_header_none; 4159 evbits = UB_EV_READ | UB_EV_PERSIST; 4160 /* ub_event stuff */ 4161 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4162 comm_point_tcp_accept_callback, c); 4163 if(c->ev->ev == NULL) { 4164 log_err("could not baseset tcpacc event"); 4165 comm_point_delete(c); 4166 return NULL; 4167 } 4168 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4169 log_err("could not add tcpacc event"); 4170 comm_point_delete(c); 4171 return NULL; 4172 } 4173 c->event_added = 1; 4174 /* now prealloc the handlers */ 4175 for(i=0; i<num; i++) { 4176 if(port_type == listen_type_tcp || 4177 port_type == listen_type_ssl || 4178 port_type == listen_type_tcp_dnscrypt) { 4179 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 4180 c, bufsize, spoolbuf, callback, callback_arg, socket); 4181 } else if(port_type == listen_type_http) { 4182 c->tcp_handlers[i] = comm_point_create_http_handler( 4183 base, c, bufsize, harden_large_queries, 4184 http_max_streams, http_endpoint, 4185 callback, callback_arg, socket); 4186 } 4187 else { 4188 log_err("could not create tcp handler, unknown listen " 4189 "type"); 4190 return NULL; 4191 } 4192 if(!c->tcp_handlers[i]) { 4193 comm_point_delete(c); 4194 return NULL; 4195 } 4196 } 4197 4198 return c; 4199 } 4200 4201 struct comm_point* 4202 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 4203 comm_point_callback_type* callback, void* callback_arg) 4204 { 4205 struct comm_point* c = (struct comm_point*)calloc(1, 4206 sizeof(struct comm_point)); 4207 short evbits; 4208 if(!c) 4209 return NULL; 4210 c->ev = (struct internal_event*)calloc(1, 4211 sizeof(struct internal_event)); 4212 if(!c->ev) { 4213 free(c); 4214 return NULL; 4215 } 4216 c->ev->base = base; 4217 c->fd = -1; 4218 c->buffer = sldns_buffer_new(bufsize); 4219 if(!c->buffer) { 4220 free(c->ev); 4221 free(c); 4222 return NULL; 4223 } 4224 c->timeout = NULL; 4225 c->tcp_is_reading = 0; 4226 c->tcp_byte_count = 0; 4227 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 4228 c->tcp_conn_limit = NULL; 4229 c->tcl_addr = NULL; 4230 c->tcp_keepalive = 0; 4231 c->tcp_parent = NULL; 4232 c->max_tcp_count = 0; 4233 c->cur_tcp_count = 0; 4234 c->tcp_handlers = NULL; 4235 c->tcp_free = NULL; 4236 c->type = comm_tcp; 4237 c->tcp_do_close = 0; 4238 c->do_not_close = 0; 4239 c->tcp_do_toggle_rw = 1; 4240 c->tcp_check_nb_connect = 1; 4241 #ifdef USE_MSG_FASTOPEN 4242 c->tcp_do_fastopen = 1; 4243 #endif 4244 #ifdef USE_DNSCRYPT 4245 c->dnscrypt = 0; 4246 c->dnscrypt_buffer = c->buffer; 4247 #endif 4248 c->repinfo.c = c; 4249 c->callback = callback; 4250 c->cb_arg = callback_arg; 4251 c->pp2_enabled = 0; 4252 c->pp2_header_state = pp2_header_none; 4253 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4254 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4255 comm_point_tcp_handle_callback, c); 4256 if(c->ev->ev == NULL) 4257 { 4258 log_err("could not baseset tcpout event"); 4259 sldns_buffer_free(c->buffer); 4260 free(c->ev); 4261 free(c); 4262 return NULL; 4263 } 4264 4265 return c; 4266 } 4267 4268 struct comm_point* 4269 comm_point_create_http_out(struct comm_base *base, size_t bufsize, 4270 comm_point_callback_type* callback, void* callback_arg, 4271 sldns_buffer* temp) 4272 { 4273 struct comm_point* c = (struct comm_point*)calloc(1, 4274 sizeof(struct comm_point)); 4275 short evbits; 4276 if(!c) 4277 return NULL; 4278 c->ev = (struct internal_event*)calloc(1, 4279 sizeof(struct internal_event)); 4280 if(!c->ev) { 4281 free(c); 4282 return NULL; 4283 } 4284 c->ev->base = base; 4285 c->fd = -1; 4286 c->buffer = sldns_buffer_new(bufsize); 4287 if(!c->buffer) { 4288 free(c->ev); 4289 free(c); 4290 return NULL; 4291 } 4292 c->timeout = NULL; 4293 c->tcp_is_reading = 0; 4294 c->tcp_byte_count = 0; 4295 c->tcp_parent = NULL; 4296 c->max_tcp_count = 0; 4297 c->cur_tcp_count = 0; 4298 c->tcp_handlers = NULL; 4299 c->tcp_free = NULL; 4300 c->type = comm_http; 4301 c->tcp_do_close = 0; 4302 c->do_not_close = 0; 4303 c->tcp_do_toggle_rw = 1; 4304 c->tcp_check_nb_connect = 1; 4305 c->http_in_headers = 1; 4306 c->http_in_chunk_headers = 0; 4307 c->http_is_chunked = 0; 4308 c->http_temp = temp; 4309 #ifdef USE_MSG_FASTOPEN 4310 c->tcp_do_fastopen = 1; 4311 #endif 4312 #ifdef USE_DNSCRYPT 4313 c->dnscrypt = 0; 4314 c->dnscrypt_buffer = c->buffer; 4315 #endif 4316 c->repinfo.c = c; 4317 c->callback = callback; 4318 c->cb_arg = callback_arg; 4319 c->pp2_enabled = 0; 4320 c->pp2_header_state = pp2_header_none; 4321 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4322 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4323 comm_point_http_handle_callback, c); 4324 if(c->ev->ev == NULL) 4325 { 4326 log_err("could not baseset tcpout event"); 4327 #ifdef HAVE_SSL 4328 SSL_free(c->ssl); 4329 #endif 4330 sldns_buffer_free(c->buffer); 4331 free(c->ev); 4332 free(c); 4333 return NULL; 4334 } 4335 4336 return c; 4337 } 4338 4339 struct comm_point* 4340 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 4341 comm_point_callback_type* callback, void* callback_arg) 4342 { 4343 struct comm_point* c = (struct comm_point*)calloc(1, 4344 sizeof(struct comm_point)); 4345 short evbits; 4346 if(!c) 4347 return NULL; 4348 c->ev = (struct internal_event*)calloc(1, 4349 sizeof(struct internal_event)); 4350 if(!c->ev) { 4351 free(c); 4352 return NULL; 4353 } 4354 c->ev->base = base; 4355 c->fd = fd; 4356 c->buffer = sldns_buffer_new(bufsize); 4357 if(!c->buffer) { 4358 free(c->ev); 4359 free(c); 4360 return NULL; 4361 } 4362 c->timeout = NULL; 4363 c->tcp_is_reading = 1; 4364 c->tcp_byte_count = 0; 4365 c->tcp_parent = NULL; 4366 c->max_tcp_count = 0; 4367 c->cur_tcp_count = 0; 4368 c->tcp_handlers = NULL; 4369 c->tcp_free = NULL; 4370 c->type = comm_local; 4371 c->tcp_do_close = 0; 4372 c->do_not_close = 1; 4373 c->tcp_do_toggle_rw = 0; 4374 c->tcp_check_nb_connect = 0; 4375 #ifdef USE_MSG_FASTOPEN 4376 c->tcp_do_fastopen = 0; 4377 #endif 4378 #ifdef USE_DNSCRYPT 4379 c->dnscrypt = 0; 4380 c->dnscrypt_buffer = c->buffer; 4381 #endif 4382 c->callback = callback; 4383 c->cb_arg = callback_arg; 4384 c->pp2_enabled = 0; 4385 c->pp2_header_state = pp2_header_none; 4386 /* ub_event stuff */ 4387 evbits = UB_EV_PERSIST | UB_EV_READ; 4388 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4389 comm_point_local_handle_callback, c); 4390 if(c->ev->ev == NULL) { 4391 log_err("could not baseset localhdl event"); 4392 free(c->ev); 4393 free(c); 4394 return NULL; 4395 } 4396 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4397 log_err("could not add localhdl event"); 4398 ub_event_free(c->ev->ev); 4399 free(c->ev); 4400 free(c); 4401 return NULL; 4402 } 4403 c->event_added = 1; 4404 return c; 4405 } 4406 4407 struct comm_point* 4408 comm_point_create_raw(struct comm_base* base, int fd, int writing, 4409 comm_point_callback_type* callback, void* callback_arg) 4410 { 4411 struct comm_point* c = (struct comm_point*)calloc(1, 4412 sizeof(struct comm_point)); 4413 short evbits; 4414 if(!c) 4415 return NULL; 4416 c->ev = (struct internal_event*)calloc(1, 4417 sizeof(struct internal_event)); 4418 if(!c->ev) { 4419 free(c); 4420 return NULL; 4421 } 4422 c->ev->base = base; 4423 c->fd = fd; 4424 c->buffer = NULL; 4425 c->timeout = NULL; 4426 c->tcp_is_reading = 0; 4427 c->tcp_byte_count = 0; 4428 c->tcp_parent = NULL; 4429 c->max_tcp_count = 0; 4430 c->cur_tcp_count = 0; 4431 c->tcp_handlers = NULL; 4432 c->tcp_free = NULL; 4433 c->type = comm_raw; 4434 c->tcp_do_close = 0; 4435 c->do_not_close = 1; 4436 c->tcp_do_toggle_rw = 0; 4437 c->tcp_check_nb_connect = 0; 4438 #ifdef USE_MSG_FASTOPEN 4439 c->tcp_do_fastopen = 0; 4440 #endif 4441 #ifdef USE_DNSCRYPT 4442 c->dnscrypt = 0; 4443 c->dnscrypt_buffer = c->buffer; 4444 #endif 4445 c->callback = callback; 4446 c->cb_arg = callback_arg; 4447 c->pp2_enabled = 0; 4448 c->pp2_header_state = pp2_header_none; 4449 /* ub_event stuff */ 4450 if(writing) 4451 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4452 else evbits = UB_EV_PERSIST | UB_EV_READ; 4453 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4454 comm_point_raw_handle_callback, c); 4455 if(c->ev->ev == NULL) { 4456 log_err("could not baseset rawhdl event"); 4457 free(c->ev); 4458 free(c); 4459 return NULL; 4460 } 4461 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4462 log_err("could not add rawhdl event"); 4463 ub_event_free(c->ev->ev); 4464 free(c->ev); 4465 free(c); 4466 return NULL; 4467 } 4468 c->event_added = 1; 4469 return c; 4470 } 4471 4472 void 4473 comm_point_close(struct comm_point* c) 4474 { 4475 if(!c) 4476 return; 4477 if(c->fd != -1) { 4478 verbose(5, "comm_point_close of %d: event_del", c->fd); 4479 if(c->event_added) { 4480 if(ub_event_del(c->ev->ev) != 0) { 4481 log_err("could not event_del on close"); 4482 } 4483 c->event_added = 0; 4484 } 4485 } 4486 tcl_close_connection(c->tcl_addr); 4487 if(c->tcp_req_info) 4488 tcp_req_info_clear(c->tcp_req_info); 4489 if(c->h2_session) 4490 http2_session_server_delete(c->h2_session); 4491 4492 /* close fd after removing from event lists, or epoll.. is messed up */ 4493 if(c->fd != -1 && !c->do_not_close) { 4494 #ifdef USE_WINSOCK 4495 if(c->type == comm_tcp || c->type == comm_http) { 4496 /* delete sticky events for the fd, it gets closed */ 4497 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 4498 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 4499 } 4500 #endif 4501 verbose(VERB_ALGO, "close fd %d", c->fd); 4502 sock_close(c->fd); 4503 } 4504 c->fd = -1; 4505 } 4506 4507 void 4508 comm_point_delete(struct comm_point* c) 4509 { 4510 if(!c) 4511 return; 4512 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 4513 #ifdef HAVE_SSL 4514 SSL_shutdown(c->ssl); 4515 SSL_free(c->ssl); 4516 #endif 4517 } 4518 if(c->type == comm_http && c->http_endpoint) { 4519 free(c->http_endpoint); 4520 c->http_endpoint = NULL; 4521 } 4522 comm_point_close(c); 4523 if(c->tcp_handlers) { 4524 int i; 4525 for(i=0; i<c->max_tcp_count; i++) 4526 comm_point_delete(c->tcp_handlers[i]); 4527 free(c->tcp_handlers); 4528 } 4529 free(c->timeout); 4530 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 4531 sldns_buffer_free(c->buffer); 4532 #ifdef USE_DNSCRYPT 4533 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 4534 sldns_buffer_free(c->dnscrypt_buffer); 4535 } 4536 #endif 4537 if(c->tcp_req_info) { 4538 tcp_req_info_delete(c->tcp_req_info); 4539 } 4540 if(c->h2_session) { 4541 http2_session_delete(c->h2_session); 4542 } 4543 } 4544 ub_event_free(c->ev->ev); 4545 free(c->ev); 4546 free(c); 4547 } 4548 4549 void 4550 comm_point_send_reply(struct comm_reply *repinfo) 4551 { 4552 struct sldns_buffer* buffer; 4553 log_assert(repinfo && repinfo->c); 4554 #ifdef USE_DNSCRYPT 4555 buffer = repinfo->c->dnscrypt_buffer; 4556 if(!dnsc_handle_uncurved_request(repinfo)) { 4557 return; 4558 } 4559 #else 4560 buffer = repinfo->c->buffer; 4561 #endif 4562 if(repinfo->c->type == comm_udp) { 4563 if(repinfo->srctype) 4564 comm_point_send_udp_msg_if(repinfo->c, buffer, 4565 (struct sockaddr*)&repinfo->remote_addr, 4566 repinfo->remote_addrlen, repinfo); 4567 else 4568 comm_point_send_udp_msg(repinfo->c, buffer, 4569 (struct sockaddr*)&repinfo->remote_addr, 4570 repinfo->remote_addrlen, 0); 4571 #ifdef USE_DNSTAP 4572 /* 4573 * sending src (client)/dst (local service) addresses over DNSTAP from udp callback 4574 */ 4575 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 4576 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4577 log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 4578 dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, repinfo->c->buffer); 4579 } 4580 #endif 4581 } else { 4582 #ifdef USE_DNSTAP 4583 /* 4584 * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback 4585 */ 4586 if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { 4587 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4588 log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 4589 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, 4590 ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); 4591 } 4592 #endif 4593 if(repinfo->c->tcp_req_info) { 4594 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 4595 } else if(repinfo->c->use_h2) { 4596 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 4597 comm_point_drop_reply(repinfo); 4598 return; 4599 } 4600 repinfo->c->h2_stream = NULL; 4601 repinfo->c->tcp_is_reading = 0; 4602 comm_point_stop_listening(repinfo->c); 4603 comm_point_start_listening(repinfo->c, -1, 4604 adjusted_tcp_timeout(repinfo->c)); 4605 return; 4606 } else { 4607 comm_point_start_listening(repinfo->c, -1, 4608 adjusted_tcp_timeout(repinfo->c)); 4609 } 4610 } 4611 } 4612 4613 void 4614 comm_point_drop_reply(struct comm_reply* repinfo) 4615 { 4616 if(!repinfo) 4617 return; 4618 log_assert(repinfo->c); 4619 log_assert(repinfo->c->type != comm_tcp_accept); 4620 if(repinfo->c->type == comm_udp) 4621 return; 4622 if(repinfo->c->tcp_req_info) 4623 repinfo->c->tcp_req_info->is_drop = 1; 4624 if(repinfo->c->type == comm_http) { 4625 if(repinfo->c->h2_session) { 4626 repinfo->c->h2_session->is_drop = 1; 4627 if(!repinfo->c->h2_session->postpone_drop) 4628 reclaim_http_handler(repinfo->c); 4629 return; 4630 } 4631 reclaim_http_handler(repinfo->c); 4632 return; 4633 } 4634 reclaim_tcp_handler(repinfo->c); 4635 } 4636 4637 void 4638 comm_point_stop_listening(struct comm_point* c) 4639 { 4640 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 4641 if(c->event_added) { 4642 if(ub_event_del(c->ev->ev) != 0) { 4643 log_err("event_del error to stoplisten"); 4644 } 4645 c->event_added = 0; 4646 } 4647 } 4648 4649 void 4650 comm_point_start_listening(struct comm_point* c, int newfd, int msec) 4651 { 4652 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 4653 c->fd==-1?newfd:c->fd, msec); 4654 if(c->type == comm_tcp_accept && !c->tcp_free) { 4655 /* no use to start listening no free slots. */ 4656 return; 4657 } 4658 if(c->event_added) { 4659 if(ub_event_del(c->ev->ev) != 0) { 4660 log_err("event_del error to startlisten"); 4661 } 4662 c->event_added = 0; 4663 } 4664 if(msec != -1 && msec != 0) { 4665 if(!c->timeout) { 4666 c->timeout = (struct timeval*)malloc(sizeof( 4667 struct timeval)); 4668 if(!c->timeout) { 4669 log_err("cpsl: malloc failed. No net read."); 4670 return; 4671 } 4672 } 4673 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 4674 #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 4675 c->timeout->tv_sec = msec/1000; 4676 c->timeout->tv_usec = (msec%1000)*1000; 4677 #endif /* S_SPLINT_S */ 4678 } else { 4679 if(msec == 0 || !c->timeout) { 4680 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4681 } 4682 } 4683 if(c->type == comm_tcp || c->type == comm_http) { 4684 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4685 if(c->tcp_write_and_read) { 4686 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4687 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4688 } else if(c->tcp_is_reading) { 4689 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 4690 ub_event_add_bits(c->ev->ev, UB_EV_READ); 4691 } else { 4692 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4693 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4694 } 4695 } 4696 if(newfd != -1) { 4697 if(c->fd != -1 && c->fd != newfd) { 4698 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 4699 sock_close(c->fd); 4700 } 4701 c->fd = newfd; 4702 ub_event_set_fd(c->ev->ev, c->fd); 4703 } 4704 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4705 log_err("event_add failed. in cpsl."); 4706 return; 4707 } 4708 c->event_added = 1; 4709 } 4710 4711 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4712 { 4713 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 4714 if(c->event_added) { 4715 if(ub_event_del(c->ev->ev) != 0) { 4716 log_err("event_del error to cplf"); 4717 } 4718 c->event_added = 0; 4719 } 4720 if(!c->timeout) { 4721 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4722 } 4723 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4724 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 4725 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4726 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4727 log_err("event_add failed. in cplf."); 4728 return; 4729 } 4730 c->event_added = 1; 4731 } 4732 4733 size_t comm_point_get_mem(struct comm_point* c) 4734 { 4735 size_t s; 4736 if(!c) 4737 return 0; 4738 s = sizeof(*c) + sizeof(*c->ev); 4739 if(c->timeout) 4740 s += sizeof(*c->timeout); 4741 if(c->type == comm_tcp || c->type == comm_local) { 4742 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 4743 #ifdef USE_DNSCRYPT 4744 s += sizeof(*c->dnscrypt_buffer); 4745 if(c->buffer != c->dnscrypt_buffer) { 4746 s += sldns_buffer_capacity(c->dnscrypt_buffer); 4747 } 4748 #endif 4749 } 4750 if(c->type == comm_tcp_accept) { 4751 int i; 4752 for(i=0; i<c->max_tcp_count; i++) 4753 s += comm_point_get_mem(c->tcp_handlers[i]); 4754 } 4755 return s; 4756 } 4757 4758 struct comm_timer* 4759 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4760 { 4761 struct internal_timer *tm = (struct internal_timer*)calloc(1, 4762 sizeof(struct internal_timer)); 4763 if(!tm) { 4764 log_err("malloc failed"); 4765 return NULL; 4766 } 4767 tm->super.ev_timer = tm; 4768 tm->base = base; 4769 tm->super.callback = cb; 4770 tm->super.cb_arg = cb_arg; 4771 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 4772 comm_timer_callback, &tm->super); 4773 if(tm->ev == NULL) { 4774 log_err("timer_create: event_base_set failed."); 4775 free(tm); 4776 return NULL; 4777 } 4778 return &tm->super; 4779 } 4780 4781 void 4782 comm_timer_disable(struct comm_timer* timer) 4783 { 4784 if(!timer) 4785 return; 4786 ub_timer_del(timer->ev_timer->ev); 4787 timer->ev_timer->enabled = 0; 4788 } 4789 4790 void 4791 comm_timer_set(struct comm_timer* timer, struct timeval* tv) 4792 { 4793 log_assert(tv); 4794 if(timer->ev_timer->enabled) 4795 comm_timer_disable(timer); 4796 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 4797 comm_timer_callback, timer, tv) != 0) 4798 log_err("comm_timer_set: evtimer_add failed."); 4799 timer->ev_timer->enabled = 1; 4800 } 4801 4802 void 4803 comm_timer_delete(struct comm_timer* timer) 4804 { 4805 if(!timer) 4806 return; 4807 comm_timer_disable(timer); 4808 /* Free the sub struct timer->ev_timer derived from the super struct timer. 4809 * i.e. assert(timer == timer->ev_timer) 4810 */ 4811 ub_event_free(timer->ev_timer->ev); 4812 free(timer->ev_timer); 4813 } 4814 4815 void 4816 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 4817 { 4818 struct comm_timer* tm = (struct comm_timer*)arg; 4819 if(!(event&UB_EV_TIMEOUT)) 4820 return; 4821 ub_comm_base_now(tm->ev_timer->base); 4822 tm->ev_timer->enabled = 0; 4823 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 4824 (*tm->callback)(tm->cb_arg); 4825 } 4826 4827 int 4828 comm_timer_is_set(struct comm_timer* timer) 4829 { 4830 return (int)timer->ev_timer->enabled; 4831 } 4832 4833 size_t 4834 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 4835 { 4836 return sizeof(struct internal_timer); 4837 } 4838 4839 struct comm_signal* 4840 comm_signal_create(struct comm_base* base, 4841 void (*callback)(int, void*), void* cb_arg) 4842 { 4843 struct comm_signal* com = (struct comm_signal*)malloc( 4844 sizeof(struct comm_signal)); 4845 if(!com) { 4846 log_err("malloc failed"); 4847 return NULL; 4848 } 4849 com->base = base; 4850 com->callback = callback; 4851 com->cb_arg = cb_arg; 4852 com->ev_signal = NULL; 4853 return com; 4854 } 4855 4856 void 4857 comm_signal_callback(int sig, short event, void* arg) 4858 { 4859 struct comm_signal* comsig = (struct comm_signal*)arg; 4860 if(!(event & UB_EV_SIGNAL)) 4861 return; 4862 ub_comm_base_now(comsig->base); 4863 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 4864 (*comsig->callback)(sig, comsig->cb_arg); 4865 } 4866 4867 int 4868 comm_signal_bind(struct comm_signal* comsig, int sig) 4869 { 4870 struct internal_signal* entry = (struct internal_signal*)calloc(1, 4871 sizeof(struct internal_signal)); 4872 if(!entry) { 4873 log_err("malloc failed"); 4874 return 0; 4875 } 4876 log_assert(comsig); 4877 /* add signal event */ 4878 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 4879 comm_signal_callback, comsig); 4880 if(entry->ev == NULL) { 4881 log_err("Could not create signal event"); 4882 free(entry); 4883 return 0; 4884 } 4885 if(ub_signal_add(entry->ev, NULL) != 0) { 4886 log_err("Could not add signal handler"); 4887 ub_event_free(entry->ev); 4888 free(entry); 4889 return 0; 4890 } 4891 /* link into list */ 4892 entry->next = comsig->ev_signal; 4893 comsig->ev_signal = entry; 4894 return 1; 4895 } 4896 4897 void 4898 comm_signal_delete(struct comm_signal* comsig) 4899 { 4900 struct internal_signal* p, *np; 4901 if(!comsig) 4902 return; 4903 p=comsig->ev_signal; 4904 while(p) { 4905 np = p->next; 4906 ub_signal_del(p->ev); 4907 ub_event_free(p->ev); 4908 free(p); 4909 p = np; 4910 } 4911 free(comsig); 4912 } 4913