1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2019 Isilon Systems, LLC. 5 * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. 6 * Copyright (c) 2000 Darrell Anderson 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_ddb.h" 35 #include "opt_inet.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/endian.h> 40 #include <sys/errno.h> 41 #include <sys/eventhandler.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 48 #ifdef DDB 49 #include <ddb/ddb.h> 50 #include <ddb/db_lex.h> 51 #endif 52 53 #include <net/ethernet.h> 54 #include <net/if.h> 55 #include <net/if_arp.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/if_var.h> 59 #include <net/if_private.h> 60 #include <net/vnet.h> 61 #include <net/route.h> 62 #include <net/route/nhop.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_fib.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_var.h> 68 #include <netinet/ip.h> 69 #include <netinet/ip_var.h> 70 #include <netinet/ip_options.h> 71 #include <netinet/udp.h> 72 #include <netinet/udp_var.h> 73 74 #include <machine/in_cksum.h> 75 #include <machine/pcb.h> 76 77 #include <net/debugnet.h> 78 #define DEBUGNET_INTERNAL 79 #include <net/debugnet_int.h> 80 81 FEATURE(debugnet, "Debugnet support"); 82 83 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 84 "debugnet parameters"); 85 86 unsigned debugnet_debug; 87 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN, 88 &debugnet_debug, 0, 89 "Debug message verbosity (0: off; 1: on; 2: verbose)"); 90 91 int debugnet_npolls = 2000; 92 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN, 93 &debugnet_npolls, 0, 94 "Number of times to poll before assuming packet loss (0.5ms per poll)"); 95 int debugnet_nretries = 10; 96 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN, 97 &debugnet_nretries, 0, 98 "Number of retransmit attempts before giving up"); 99 int debugnet_fib = RT_DEFAULT_FIB; 100 SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN, 101 &debugnet_fib, 0, 102 "Fib to use when sending dump"); 103 104 static bool g_debugnet_pcb_inuse; 105 static struct debugnet_pcb g_dnet_pcb; 106 107 /* 108 * Simple accessors for opaque PCB. 109 */ 110 const unsigned char * 111 debugnet_get_gw_mac(const struct debugnet_pcb *pcb) 112 { 113 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && 114 pcb->dp_state >= DN_STATE_HAVE_GW_MAC); 115 return (pcb->dp_gw_mac.octet); 116 } 117 118 const in_addr_t * 119 debugnet_get_server_addr(const struct debugnet_pcb *pcb) 120 { 121 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && 122 pcb->dp_state >= DN_STATE_GOT_HERALD_PORT); 123 return (&pcb->dp_server); 124 } 125 126 const uint16_t 127 debugnet_get_server_port(const struct debugnet_pcb *pcb) 128 { 129 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && 130 pcb->dp_state >= DN_STATE_GOT_HERALD_PORT); 131 return (pcb->dp_server_port); 132 } 133 134 /* 135 * Start of network primitives, beginning with output primitives. 136 */ 137 138 /* 139 * Handles creation of the ethernet header, then places outgoing packets into 140 * the tx buffer for the NIC 141 * 142 * Parameters: 143 * m The mbuf containing the packet to be sent (will be freed by 144 * this function or the NIC driver) 145 * ifp The interface to send on 146 * dst The destination ethernet address (source address will be looked 147 * up using ifp) 148 * etype The ETHERTYPE_* value for the protocol that is being sent 149 * 150 * Returns: 151 * int see errno.h, 0 for success 152 */ 153 int 154 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, 155 u_short etype) 156 { 157 struct ether_header *eh; 158 159 if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || 160 (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { 161 if_printf(ifp, "%s: interface isn't up\n", __func__); 162 m_freem(m); 163 return (ENETDOWN); 164 } 165 166 /* Fill in the ethernet header. */ 167 M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); 168 if (m == NULL) { 169 printf("%s: out of mbufs\n", __func__); 170 return (ENOBUFS); 171 } 172 eh = mtod(m, struct ether_header *); 173 memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); 174 memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); 175 eh->ether_type = htons(etype); 176 return (ifp->if_debugnet_methods->dn_transmit(ifp, m)); 177 } 178 179 /* 180 * Unreliable transmission of an mbuf chain to the debugnet server 181 * Note: can't handle fragmentation; fails if the packet is larger than 182 * ifp->if_mtu after adding the UDP/IP headers 183 * 184 * Parameters: 185 * pcb The debugnet context block 186 * m mbuf chain 187 * 188 * Returns: 189 * int see errno.h, 0 for success 190 */ 191 static int 192 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m) 193 { 194 struct udphdr *udp; 195 196 MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC); 197 198 M_PREPEND(m, sizeof(*udp), M_NOWAIT); 199 if (m == NULL) { 200 printf("%s: out of mbufs\n", __func__); 201 return (ENOBUFS); 202 } 203 204 udp = mtod(m, void *); 205 udp->uh_ulen = htons(m->m_pkthdr.len); 206 /* Use this src port so that the server can connect() the socket */ 207 udp->uh_sport = htons(pcb->dp_client_port); 208 udp->uh_dport = htons(pcb->dp_server_port); 209 /* Computed later (protocol-dependent). */ 210 udp->uh_sum = 0; 211 212 return (debugnet_ip_output(pcb, m)); 213 } 214 215 int 216 debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */) 217 { 218 struct debugnet_ack *dn_ack; 219 struct mbuf *m; 220 221 DNETDEBUG("Acking with seqno %u\n", ntohl(seqno)); 222 223 m = m_gethdr(M_NOWAIT, MT_DATA); 224 if (m == NULL) { 225 printf("%s: Out of mbufs\n", __func__); 226 return (ENOBUFS); 227 } 228 m->m_len = sizeof(*dn_ack); 229 m->m_pkthdr.len = sizeof(*dn_ack); 230 MH_ALIGN(m, sizeof(*dn_ack)); 231 dn_ack = mtod(m, void *); 232 dn_ack->da_seqno = seqno; 233 234 return (debugnet_udp_output(pcb, m)); 235 } 236 237 /* 238 * Dummy free function for debugnet clusters. 239 */ 240 static void 241 debugnet_mbuf_free(struct mbuf *m __unused) 242 { 243 } 244 245 /* 246 * Construct and reliably send a debugnet packet. May fail from a resource 247 * shortage or extreme number of unacknowledged retransmissions. Wait for 248 * an acknowledgement before returning. Splits packets into chunks small 249 * enough to be sent without fragmentation (looks up the interface MTU) 250 * 251 * Parameters: 252 * type debugnet packet type (HERALD, FINISHED, ...) 253 * data data 254 * datalen data size (bytes) 255 * auxdata optional auxiliary information 256 * 257 * Returns: 258 * int see errno.h, 0 for success 259 */ 260 int 261 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data, 262 uint32_t datalen, const struct debugnet_proto_aux *auxdata) 263 { 264 struct debugnet_msg_hdr *dn_msg_hdr; 265 struct mbuf *m, *m2; 266 uint64_t want_acks; 267 uint32_t i, pktlen, sent_so_far; 268 int retries, polls, error; 269 270 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) 271 return (ECONNRESET); 272 273 want_acks = 0; 274 pcb->dp_rcvd_acks = 0; 275 retries = 0; 276 277 retransmit: 278 /* Chunks can be too big to fit in packets. */ 279 for (i = sent_so_far = 0; sent_so_far < datalen || 280 (i == 0 && datalen == 0); i++) { 281 pktlen = datalen - sent_so_far; 282 283 /* Bound: the interface MTU (assume no IP options). */ 284 pktlen = min(pktlen, pcb->dp_ifp->if_mtu - 285 sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr)); 286 287 /* 288 * Check if it is retransmitting and this has been ACKed 289 * already. 290 */ 291 if ((pcb->dp_rcvd_acks & (1 << i)) != 0) { 292 sent_so_far += pktlen; 293 continue; 294 } 295 296 /* 297 * Get and fill a header mbuf, then chain data as an extended 298 * mbuf. 299 */ 300 m = m_gethdr(M_NOWAIT, MT_DATA); 301 if (m == NULL) { 302 printf("%s: Out of mbufs\n", __func__); 303 return (ENOBUFS); 304 } 305 m->m_len = sizeof(struct debugnet_msg_hdr); 306 m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr); 307 MH_ALIGN(m, sizeof(struct debugnet_msg_hdr)); 308 dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *); 309 dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i); 310 dn_msg_hdr->mh_type = htonl(type); 311 dn_msg_hdr->mh_len = htonl(pktlen); 312 313 if (auxdata != NULL) { 314 dn_msg_hdr->mh_offset = 315 htobe64(auxdata->dp_offset_start + sent_so_far); 316 dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2); 317 } else { 318 dn_msg_hdr->mh_offset = htobe64(sent_so_far); 319 dn_msg_hdr->mh_aux2 = 0; 320 } 321 322 if (pktlen != 0) { 323 m2 = m_get(M_NOWAIT, MT_DATA); 324 if (m2 == NULL) { 325 m_freem(m); 326 printf("%s: Out of mbufs\n", __func__); 327 return (ENOBUFS); 328 } 329 MEXTADD(m2, __DECONST(char *, data) + sent_so_far, 330 pktlen, debugnet_mbuf_free, NULL, NULL, 0, 331 EXT_DISPOSABLE); 332 m2->m_len = pktlen; 333 334 m_cat(m, m2); 335 m->m_pkthdr.len += pktlen; 336 } 337 error = debugnet_udp_output(pcb, m); 338 if (error != 0) 339 return (error); 340 341 /* Note that we're waiting for this packet in the bitfield. */ 342 want_acks |= (1 << i); 343 sent_so_far += pktlen; 344 } 345 if (i >= DEBUGNET_MAX_IN_FLIGHT) 346 printf("Warning: Sent more than %d packets (%d). " 347 "Acknowledgements will fail unless the size of " 348 "rcvd_acks/want_acks is increased.\n", 349 DEBUGNET_MAX_IN_FLIGHT, i); 350 351 /* 352 * Wait for acks. A *real* window would speed things up considerably. 353 */ 354 polls = 0; 355 while (pcb->dp_rcvd_acks != want_acks) { 356 if (polls++ > debugnet_npolls) { 357 if (retries++ > debugnet_nretries) 358 return (ETIMEDOUT); 359 printf(". "); 360 goto retransmit; 361 } 362 debugnet_network_poll(pcb); 363 DELAY(500); 364 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) 365 return (ECONNRESET); 366 } 367 pcb->dp_seqno += i; 368 return (0); 369 } 370 371 /* 372 * Network input primitives. 373 */ 374 375 /* 376 * Just introspect the header enough to fire off a seqno ack and validate 377 * length fits. 378 */ 379 static void 380 debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb) 381 { 382 const struct debugnet_msg_hdr *dnh; 383 struct mbuf *m; 384 uint32_t hdr_type; 385 uint32_t seqno; 386 int error; 387 388 m = *mb; 389 390 if (m->m_pkthdr.len < sizeof(*dnh)) { 391 DNETDEBUG("ignoring small debugnet_msg packet\n"); 392 return; 393 } 394 395 /* Get ND header. */ 396 if (m->m_len < sizeof(*dnh)) { 397 m = m_pullup(m, sizeof(*dnh)); 398 *mb = m; 399 if (m == NULL) { 400 DNETDEBUG("m_pullup failed\n"); 401 return; 402 } 403 } 404 405 dnh = mtod(m, const void *); 406 if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) { 407 DNETDEBUG("Dropping short packet.\n"); 408 return; 409 } 410 411 hdr_type = ntohl(dnh->mh_type); 412 if (hdr_type != DEBUGNET_DATA) { 413 if (hdr_type == DEBUGNET_FINISHED) { 414 printf("Remote shut down the connection on us!\n"); 415 pcb->dp_state = DN_STATE_REMOTE_CLOSED; 416 if (pcb->dp_finish_handler != NULL) { 417 pcb->dp_finish_handler(); 418 } 419 } else { 420 DNETDEBUG("Got unexpected debugnet message %u\n", hdr_type); 421 } 422 return; 423 } 424 425 /* 426 * If the issue is transient (ENOBUFS), sender should resend. If 427 * non-transient (like driver objecting to rx -> tx from the same 428 * thread), not much else we can do. 429 */ 430 seqno = dnh->mh_seqno; /* net endian */ 431 m_adj(m, sizeof(*dnh)); 432 dnh = NULL; 433 error = pcb->dp_rx_handler(m); 434 if (error != 0) { 435 DNETDEBUG("RX handler was not able to accept message, error %d. " 436 "Skipping ack.\n", error); 437 return; 438 } 439 440 error = debugnet_ack_output(pcb, seqno); 441 if (error != 0) { 442 DNETDEBUG("Couldn't ACK rx packet %u; %d\n", ntohl(seqno), error); 443 } 444 } 445 446 static void 447 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport) 448 { 449 const struct debugnet_ack *dn_ack; 450 struct mbuf *m; 451 uint32_t rcv_ackno; 452 453 m = *mb; 454 455 /* Get Ack. */ 456 if (m->m_len < sizeof(*dn_ack)) { 457 m = m_pullup(m, sizeof(*dn_ack)); 458 *mb = m; 459 if (m == NULL) { 460 DNETDEBUG("m_pullup failed\n"); 461 return; 462 } 463 } 464 dn_ack = mtod(m, const void *); 465 466 /* Debugnet processing. */ 467 /* 468 * Packet is meant for us. Extract the ack sequence number and the 469 * port number if necessary. 470 */ 471 rcv_ackno = ntohl(dn_ack->da_seqno); 472 if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) { 473 pcb->dp_server_port = sport; 474 pcb->dp_state = DN_STATE_GOT_HERALD_PORT; 475 } 476 if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT) 477 printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno); 478 else if (rcv_ackno >= pcb->dp_seqno) { 479 /* We're interested in this ack. Record it. */ 480 pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno); 481 } 482 } 483 484 void 485 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb) 486 { 487 const struct udphdr *udp; 488 struct mbuf *m; 489 uint16_t sport, ulen; 490 491 /* UDP processing. */ 492 493 m = *mb; 494 if (m->m_pkthdr.len < sizeof(*udp)) { 495 DNETDEBUG("ignoring small UDP packet\n"); 496 return; 497 } 498 499 /* Get UDP headers. */ 500 if (m->m_len < sizeof(*udp)) { 501 m = m_pullup(m, sizeof(*udp)); 502 *mb = m; 503 if (m == NULL) { 504 DNETDEBUG("m_pullup failed\n"); 505 return; 506 } 507 } 508 udp = mtod(m, const void *); 509 510 /* We expect to receive UDP packets on the configured client port. */ 511 if (ntohs(udp->uh_dport) != pcb->dp_client_port) { 512 DNETDEBUG("not on the expected port.\n"); 513 return; 514 } 515 516 /* Check that ulen does not exceed actual size of data. */ 517 ulen = ntohs(udp->uh_ulen); 518 if (m->m_pkthdr.len < ulen) { 519 DNETDEBUG("ignoring runt UDP packet\n"); 520 return; 521 } 522 523 sport = ntohs(udp->uh_sport); 524 525 m_adj(m, sizeof(*udp)); 526 ulen -= sizeof(*udp); 527 528 if (ulen == sizeof(struct debugnet_ack)) { 529 debugnet_handle_ack(pcb, mb, sport); 530 return; 531 } 532 533 if (pcb->dp_rx_handler == NULL) { 534 if (ulen < sizeof(struct debugnet_ack)) 535 DNETDEBUG("ignoring small ACK packet\n"); 536 else 537 DNETDEBUG("ignoring unexpected non-ACK packet on " 538 "half-duplex connection.\n"); 539 return; 540 } 541 542 debugnet_handle_rx_msg(pcb, mb); 543 } 544 545 /* 546 * Handler for incoming packets directly from the network adapter 547 * Identifies the packet type (IP or ARP) and passes it along to one of the 548 * helper functions debugnet_handle_ip or debugnet_handle_arp. 549 * 550 * It needs to partially replicate the behaviour of ether_input() and 551 * ether_demux(). 552 * 553 * Parameters: 554 * ifp the interface the packet came from 555 * m an mbuf containing the packet received 556 */ 557 static void 558 debugnet_input_one(struct ifnet *ifp, struct mbuf *m) 559 { 560 struct ifreq ifr; 561 struct ether_header *eh; 562 u_short etype; 563 564 /* Ethernet processing. */ 565 if ((m->m_flags & M_PKTHDR) == 0) { 566 DNETDEBUG_IF(ifp, "discard frame without packet header\n"); 567 goto done; 568 } 569 if (m->m_len < ETHER_HDR_LEN) { 570 DNETDEBUG_IF(ifp, 571 "discard frame without leading eth header (len %u pktlen %u)\n", 572 m->m_len, m->m_pkthdr.len); 573 goto done; 574 } 575 if ((m->m_flags & M_HASFCS) != 0) { 576 m_adj(m, -ETHER_CRC_LEN); 577 m->m_flags &= ~M_HASFCS; 578 } 579 eh = mtod(m, struct ether_header *); 580 etype = ntohs(eh->ether_type); 581 if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { 582 DNETDEBUG_IF(ifp, "ignoring vlan packets\n"); 583 goto done; 584 } 585 if (if_gethwaddr(ifp, &ifr) != 0) { 586 DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n"); 587 goto done; 588 } 589 if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost, 590 ETHER_ADDR_LEN) != 0 && 591 (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) { 592 DNETDEBUG_IF(ifp, 593 "discard frame with incorrect destination addr\n"); 594 goto done; 595 } 596 597 MPASS(g_debugnet_pcb_inuse); 598 599 /* Done ethernet processing. Strip off the ethernet header. */ 600 m_adj(m, ETHER_HDR_LEN); 601 switch (etype) { 602 case ETHERTYPE_ARP: 603 debugnet_handle_arp(&g_dnet_pcb, &m); 604 break; 605 case ETHERTYPE_IP: 606 debugnet_handle_ip(&g_dnet_pcb, &m); 607 break; 608 default: 609 DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); 610 break; 611 } 612 done: 613 if (m != NULL) 614 m_freem(m); 615 } 616 617 static void 618 debugnet_input(struct ifnet *ifp, struct mbuf *m) 619 { 620 struct mbuf *n; 621 622 do { 623 n = m->m_nextpkt; 624 m->m_nextpkt = NULL; 625 debugnet_input_one(ifp, m); 626 m = n; 627 } while (m != NULL); 628 } 629 630 /* 631 * Network polling primitive. 632 * 633 * Instead of assuming that most of the network stack is sane, we just poll the 634 * driver directly for packets. 635 */ 636 void 637 debugnet_network_poll(struct debugnet_pcb *pcb) 638 { 639 struct ifnet *ifp; 640 641 ifp = pcb->dp_ifp; 642 ifp->if_debugnet_methods->dn_poll(ifp, 1000); 643 } 644 645 /* 646 * Start of consumer API surface. 647 */ 648 void 649 debugnet_free(struct debugnet_pcb *pcb) 650 { 651 struct ifnet *ifp; 652 653 MPASS(pcb == &g_dnet_pcb); 654 MPASS(pcb->dp_drv_input == NULL || g_debugnet_pcb_inuse); 655 656 ifp = pcb->dp_ifp; 657 if (ifp != NULL) { 658 if (pcb->dp_drv_input != NULL) 659 ifp->if_input = pcb->dp_drv_input; 660 if (pcb->dp_event_started) 661 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END); 662 } 663 debugnet_mbuf_finish(); 664 665 g_debugnet_pcb_inuse = false; 666 memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb)); 667 } 668 669 int 670 debugnet_connect(const struct debugnet_conn_params *dcp, 671 struct debugnet_pcb **pcb_out) 672 { 673 struct debugnet_proto_aux herald_auxdata; 674 struct debugnet_pcb *pcb; 675 struct ifnet *ifp; 676 int error; 677 678 if (g_debugnet_pcb_inuse) { 679 printf("%s: Only one connection at a time.\n", __func__); 680 return (EBUSY); 681 } 682 683 pcb = &g_dnet_pcb; 684 *pcb = (struct debugnet_pcb) { 685 .dp_state = DN_STATE_INIT, 686 .dp_client = dcp->dc_client, 687 .dp_server = dcp->dc_server, 688 .dp_gateway = dcp->dc_gateway, 689 .dp_server_port = dcp->dc_herald_port, /* Initially */ 690 .dp_client_port = dcp->dc_client_port, 691 .dp_seqno = 1, 692 .dp_ifp = dcp->dc_ifp, 693 .dp_rx_handler = dcp->dc_rx_handler, 694 .dp_drv_input = NULL, 695 }; 696 697 /* Switch to the debugnet mbuf zones. */ 698 debugnet_mbuf_start(); 699 700 /* At least one needed parameter is missing; infer it. */ 701 if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY || 702 pcb->dp_ifp == NULL) { 703 struct sockaddr_in dest_sin, *gw_sin, *local_sin; 704 struct ifnet *rt_ifp; 705 struct nhop_object *nh; 706 707 memset(&dest_sin, 0, sizeof(dest_sin)); 708 dest_sin = (struct sockaddr_in) { 709 .sin_len = sizeof(dest_sin), 710 .sin_family = AF_INET, 711 .sin_addr.s_addr = pcb->dp_server, 712 }; 713 714 CURVNET_SET(vnet0); 715 nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0, 716 NHR_NONE); 717 CURVNET_RESTORE(); 718 719 if (nh == NULL) { 720 printf("%s: Could not get route for that server.\n", 721 __func__); 722 error = ENOENT; 723 goto cleanup; 724 } 725 726 /* TODO support AF_INET6 */ 727 if (nh->gw_sa.sa_family == AF_INET) 728 gw_sin = &nh->gw4_sa; 729 else { 730 if (nh->gw_sa.sa_family == AF_LINK) 731 DNETDEBUG("Destination address is on link.\n"); 732 gw_sin = NULL; 733 } 734 735 MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET); 736 local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr; 737 738 rt_ifp = nh->nh_ifp; 739 740 if (pcb->dp_client == INADDR_ANY) 741 pcb->dp_client = local_sin->sin_addr.s_addr; 742 if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL) 743 pcb->dp_gateway = gw_sin->sin_addr.s_addr; 744 if (pcb->dp_ifp == NULL) 745 pcb->dp_ifp = rt_ifp; 746 } 747 748 ifp = pcb->dp_ifp; 749 750 if (debugnet_debug > 0) { 751 char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN], 752 gwbuf[INET_ADDRSTRLEN]; 753 inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf)); 754 inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf)); 755 if (pcb->dp_gateway != INADDR_ANY) 756 inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf)); 757 DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n", 758 serbuf, pcb->dp_server_port, 759 (pcb->dp_gateway == INADDR_ANY) ? "" : " via ", 760 (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf, 761 clibuf, pcb->dp_client_port, if_name(ifp)); 762 } 763 764 /* Validate iface is online and supported. */ 765 if (!DEBUGNET_SUPPORTED_NIC(ifp)) { 766 printf("%s: interface '%s' does not support debugnet\n", 767 __func__, if_name(ifp)); 768 error = ENODEV; 769 goto cleanup; 770 } 771 if ((if_getflags(ifp) & IFF_UP) == 0) { 772 printf("%s: interface '%s' link is down\n", __func__, 773 if_name(ifp)); 774 error = ENXIO; 775 goto cleanup; 776 } 777 778 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START); 779 pcb->dp_event_started = true; 780 781 /* 782 * We maintain the invariant that g_debugnet_pcb_inuse is always true 783 * while the debugnet ifp's if_input is overridden with 784 * debugnet_input(). 785 */ 786 g_debugnet_pcb_inuse = true; 787 788 /* Make the card use *our* receive callback. */ 789 pcb->dp_drv_input = ifp->if_input; 790 ifp->if_input = debugnet_input; 791 792 printf("%s: searching for %s MAC...\n", __func__, 793 (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway"); 794 795 error = debugnet_arp_gw(pcb); 796 if (error != 0) { 797 printf("%s: failed to locate MAC address\n", __func__); 798 goto cleanup; 799 } 800 MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC); 801 802 herald_auxdata = (struct debugnet_proto_aux) { 803 .dp_offset_start = dcp->dc_herald_offset, 804 .dp_aux2 = dcp->dc_herald_aux2, 805 }; 806 error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data, 807 dcp->dc_herald_datalen, &herald_auxdata); 808 if (error != 0) { 809 printf("%s: failed to herald debugnet server\n", __func__); 810 goto cleanup; 811 } 812 813 *pcb_out = pcb; 814 return (0); 815 816 cleanup: 817 debugnet_free(pcb); 818 return (error); 819 } 820 821 /* 822 * Pre-allocated dump-time mbuf tracking. 823 * 824 * We just track the high water mark we've ever seen and allocate appropriately 825 * for that iface/mtu combo. 826 */ 827 static struct { 828 int nmbuf; 829 int ncl; 830 int clsize; 831 } dn_hwm; 832 static struct mtx dn_hwm_lk; 833 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF); 834 835 static void 836 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize) 837 { 838 bool any; 839 840 any = false; 841 mtx_lock(&dn_hwm_lk); 842 843 if (nmbuf > dn_hwm.nmbuf) { 844 any = true; 845 dn_hwm.nmbuf = nmbuf; 846 } else 847 nmbuf = dn_hwm.nmbuf; 848 849 if (ncl > dn_hwm.ncl) { 850 any = true; 851 dn_hwm.ncl = ncl; 852 } else 853 ncl = dn_hwm.ncl; 854 855 if (clsize > dn_hwm.clsize) { 856 any = true; 857 dn_hwm.clsize = clsize; 858 } else 859 clsize = dn_hwm.clsize; 860 861 mtx_unlock(&dn_hwm_lk); 862 863 if (any) 864 debugnet_mbuf_reinit(nmbuf, ncl, clsize); 865 } 866 867 void 868 debugnet_any_ifnet_update(struct ifnet *ifp) 869 { 870 int clsize, nmbuf, ncl, nrxr; 871 872 if (!DEBUGNET_SUPPORTED_NIC(ifp)) 873 return; 874 875 ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize); 876 KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); 877 878 /* 879 * We need two headers per message on the transmit side. Multiply by 880 * four to give us some breathing room. 881 */ 882 nmbuf = ncl * (4 + nrxr); 883 ncl *= nrxr; 884 885 /* 886 * Bandaid for drivers that (incorrectly) advertise LinkUp before their 887 * dn_init method is available. 888 */ 889 if (nmbuf == 0 || ncl == 0 || clsize == 0) { 890 #ifndef INVARIANTS 891 if (bootverbose) 892 #endif 893 printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n", 894 __func__, if_name(ifp), ifp); 895 return; 896 } 897 dn_maybe_reinit_mbufs(nmbuf, ncl, clsize); 898 } 899 900 /* 901 * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless 902 * for us because drivers tend to if_attach before invoking DEBUGNET_SET(). 903 * 904 * On the other hand, hooking DEBUGNET_SET() itself may still be too early, 905 * because the driver is still in attach. Since we cannot use down interfaces, 906 * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least 907 * with vtnet and dhcpclient that event just never occurs. 908 * 909 * So that's how I've landed on the lower level ifnet_link_event. 910 */ 911 912 static void 913 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state) 914 { 915 if (link_state == LINK_STATE_UP) 916 debugnet_any_ifnet_update(ifp); 917 } 918 919 static eventhandler_tag dn_attach_cookie; 920 static void 921 dn_evh_init(void *ctx __unused) 922 { 923 dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event, 924 dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); 925 } 926 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL); 927 928 /* 929 * DDB parsing helpers for debugnet(4) consumers. 930 */ 931 #ifdef DDB 932 struct my_inet_opt { 933 bool has_opt; 934 const char *printname; 935 in_addr_t *result; 936 }; 937 938 static int 939 dn_parse_optarg_ipv4(struct my_inet_opt *opt) 940 { 941 in_addr_t tmp; 942 unsigned octet; 943 int t; 944 945 tmp = 0; 946 for (octet = 0; octet < 4; octet++) { 947 t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL); 948 if (t != tNUMBER) { 949 db_printf("%s:%s: octet %u expected number; found %d\n", 950 __func__, opt->printname, octet, t); 951 return (EINVAL); 952 } 953 /* 954 * db_lex lexes '-' distinctly from the number itself, but 955 * let's document that invariant. 956 */ 957 MPASS(db_tok_number >= 0); 958 959 if (db_tok_number > UINT8_MAX) { 960 db_printf("%s:%s: octet %u out of range: %jd\n", __func__, 961 opt->printname, octet, (intmax_t)db_tok_number); 962 return (EDOM); 963 } 964 965 /* Constructed host-endian and converted to network later. */ 966 tmp = (tmp << 8) | db_tok_number; 967 968 if (octet < 3) { 969 t = db_read_token_flags(DRT_WSPACE); 970 if (t != tDOT) { 971 db_printf("%s:%s: octet %u expected '.'; found" 972 " %d\n", __func__, opt->printname, octet, 973 t); 974 return (EINVAL); 975 } 976 } 977 } 978 979 *opt->result = htonl(tmp); 980 opt->has_opt = true; 981 return (0); 982 } 983 984 int 985 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result) 986 { 987 struct ifnet *ifp; 988 int t, error; 989 bool want_ifp; 990 char ch; 991 992 struct my_inet_opt opt_client = { 993 .printname = "client", 994 .result = &result->dd_client, 995 }, 996 opt_server = { 997 .printname = "server", 998 .result = &result->dd_server, 999 }, 1000 opt_gateway = { 1001 .printname = "gateway", 1002 .result = &result->dd_gateway, 1003 }, 1004 *cur_inet_opt; 1005 1006 ifp = NULL; 1007 memset(result, 0, sizeof(*result)); 1008 1009 /* 1010 * command [space] [-] [opt] [[space] [optarg]] ... 1011 * 1012 * db_command has already lexed 'command' for us. 1013 */ 1014 t = db_read_token_flags(DRT_WSPACE); 1015 if (t == tWSPACE) 1016 t = db_read_token_flags(DRT_WSPACE); 1017 1018 while (t != tEOL) { 1019 if (t != tMINUS) { 1020 db_printf("%s: Bad syntax; expected '-', got %d\n", 1021 cmd, t); 1022 goto usage; 1023 } 1024 1025 t = db_read_token_flags(DRT_WSPACE); 1026 if (t != tIDENT) { 1027 db_printf("%s: Bad syntax; expected tIDENT, got %d\n", 1028 cmd, t); 1029 goto usage; 1030 } 1031 1032 if (strlen(db_tok_string) > 1) { 1033 db_printf("%s: Bad syntax; expected single option " 1034 "flag, got '%s'\n", cmd, db_tok_string); 1035 goto usage; 1036 } 1037 1038 want_ifp = false; 1039 cur_inet_opt = NULL; 1040 switch ((ch = db_tok_string[0])) { 1041 default: 1042 DNETDEBUG("Unexpected: '%c'\n", ch); 1043 /* FALLTHROUGH */ 1044 case 'h': 1045 goto usage; 1046 case 'c': 1047 cur_inet_opt = &opt_client; 1048 break; 1049 case 'g': 1050 cur_inet_opt = &opt_gateway; 1051 break; 1052 case 's': 1053 cur_inet_opt = &opt_server; 1054 break; 1055 case 'i': 1056 want_ifp = true; 1057 break; 1058 } 1059 1060 t = db_read_token_flags(DRT_WSPACE); 1061 if (t != tWSPACE) { 1062 db_printf("%s: Bad syntax; expected space after " 1063 "flag %c, got %d\n", cmd, ch, t); 1064 goto usage; 1065 } 1066 1067 if (want_ifp) { 1068 t = db_read_token_flags(DRT_WSPACE); 1069 if (t != tIDENT) { 1070 db_printf("%s: Expected interface but got %d\n", 1071 cmd, t); 1072 goto usage; 1073 } 1074 1075 CURVNET_SET(vnet0); 1076 /* 1077 * We *don't* take a ref here because the only current 1078 * consumer, db_netdump_cmd, does not need it. It 1079 * (somewhat redundantly) extracts the if_name(), 1080 * re-lookups the ifp, and takes its own reference. 1081 */ 1082 ifp = ifunit(db_tok_string); 1083 CURVNET_RESTORE(); 1084 if (ifp == NULL) { 1085 db_printf("Could not locate interface %s\n", 1086 db_tok_string); 1087 error = ENOENT; 1088 goto cleanup; 1089 } 1090 } else { 1091 MPASS(cur_inet_opt != NULL); 1092 /* Assume IPv4 for now. */ 1093 error = dn_parse_optarg_ipv4(cur_inet_opt); 1094 if (error != 0) 1095 goto cleanup; 1096 } 1097 1098 /* Skip (mandatory) whitespace after option, if not EOL. */ 1099 t = db_read_token_flags(DRT_WSPACE); 1100 if (t == tEOL) 1101 break; 1102 if (t != tWSPACE) { 1103 db_printf("%s: Bad syntax; expected space after " 1104 "flag %c option; got %d\n", cmd, ch, t); 1105 goto usage; 1106 } 1107 t = db_read_token_flags(DRT_WSPACE); 1108 } 1109 1110 if (!opt_server.has_opt) { 1111 db_printf("%s: need a destination server address\n", cmd); 1112 goto usage; 1113 } 1114 1115 result->dd_has_client = opt_client.has_opt; 1116 result->dd_has_gateway = opt_gateway.has_opt; 1117 result->dd_ifp = ifp; 1118 1119 /* We parsed the full line to tEOL already, or bailed with an error. */ 1120 return (0); 1121 1122 usage: 1123 db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> " 1124 "-i <interface>]\n", cmd); 1125 error = EINVAL; 1126 /* FALLTHROUGH */ 1127 cleanup: 1128 db_skip_to_eol(); 1129 return (error); 1130 } 1131 #endif /* DDB */ 1132