1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2019 Isilon Systems, LLC. 5 * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. 6 * Copyright (c) 2000 Darrell Anderson 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_ddb.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/endian.h> 38 #include <sys/errno.h> 39 #include <sys/eventhandler.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/socket.h> 44 #include <sys/sysctl.h> 45 46 #ifdef DDB 47 #include <ddb/ddb.h> 48 #include <ddb/db_lex.h> 49 #endif 50 51 #include <net/ethernet.h> 52 #include <net/if.h> 53 #include <net/if_arp.h> 54 #include <net/if_dl.h> 55 #include <net/if_types.h> 56 #include <net/if_var.h> 57 #include <net/if_private.h> 58 #include <net/vnet.h> 59 #include <net/route.h> 60 #include <net/route/nhop.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_fib.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_options.h> 69 #include <netinet/udp.h> 70 #include <netinet/udp_var.h> 71 72 #include <machine/in_cksum.h> 73 #include <machine/pcb.h> 74 75 #include <net/debugnet.h> 76 #define DEBUGNET_INTERNAL 77 #include <net/debugnet_int.h> 78 79 FEATURE(debugnet, "Debugnet support"); 80 81 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 82 "debugnet parameters"); 83 84 unsigned debugnet_debug; 85 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN, 86 &debugnet_debug, 0, 87 "Debug message verbosity (0: off; 1: on; 2: verbose)"); 88 89 int debugnet_npolls = 2000; 90 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN, 91 &debugnet_npolls, 0, 92 "Number of times to poll before assuming packet loss (0.5ms per poll)"); 93 int debugnet_nretries = 10; 94 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN, 95 &debugnet_nretries, 0, 96 "Number of retransmit attempts before giving up"); 97 int debugnet_fib = RT_DEFAULT_FIB; 98 SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN, 99 &debugnet_fib, 0, 100 "Fib to use when sending dump"); 101 102 static bool g_debugnet_pcb_inuse; 103 static struct debugnet_pcb g_dnet_pcb; 104 105 /* 106 * Simple accessors for opaque PCB. 107 */ 108 const unsigned char * 109 debugnet_get_gw_mac(const struct debugnet_pcb *pcb) 110 { 111 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && 112 pcb->dp_state >= DN_STATE_HAVE_GW_MAC); 113 return (pcb->dp_gw_mac.octet); 114 } 115 116 const in_addr_t * 117 debugnet_get_server_addr(const struct debugnet_pcb *pcb) 118 { 119 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && 120 pcb->dp_state >= DN_STATE_GOT_HERALD_PORT); 121 return (&pcb->dp_server); 122 } 123 124 const uint16_t 125 debugnet_get_server_port(const struct debugnet_pcb *pcb) 126 { 127 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && 128 pcb->dp_state >= DN_STATE_GOT_HERALD_PORT); 129 return (pcb->dp_server_port); 130 } 131 132 /* 133 * Start of network primitives, beginning with output primitives. 134 */ 135 136 /* 137 * Handles creation of the ethernet header, then places outgoing packets into 138 * the tx buffer for the NIC 139 * 140 * Parameters: 141 * m The mbuf containing the packet to be sent (will be freed by 142 * this function or the NIC driver) 143 * ifp The interface to send on 144 * dst The destination ethernet address (source address will be looked 145 * up using ifp) 146 * etype The ETHERTYPE_* value for the protocol that is being sent 147 * 148 * Returns: 149 * int see errno.h, 0 for success 150 */ 151 int 152 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, 153 u_short etype) 154 { 155 struct ether_header *eh; 156 157 if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || 158 (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { 159 if_printf(ifp, "%s: interface isn't up\n", __func__); 160 m_freem(m); 161 return (ENETDOWN); 162 } 163 164 /* Fill in the ethernet header. */ 165 M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); 166 if (m == NULL) { 167 printf("%s: out of mbufs\n", __func__); 168 return (ENOBUFS); 169 } 170 eh = mtod(m, struct ether_header *); 171 memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); 172 memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); 173 eh->ether_type = htons(etype); 174 return (ifp->if_debugnet_methods->dn_transmit(ifp, m)); 175 } 176 177 /* 178 * Unreliable transmission of an mbuf chain to the debugnet server 179 * Note: can't handle fragmentation; fails if the packet is larger than 180 * ifp->if_mtu after adding the UDP/IP headers 181 * 182 * Parameters: 183 * pcb The debugnet context block 184 * m mbuf chain 185 * 186 * Returns: 187 * int see errno.h, 0 for success 188 */ 189 static int 190 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m) 191 { 192 struct udphdr *udp; 193 194 MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC); 195 196 M_PREPEND(m, sizeof(*udp), M_NOWAIT); 197 if (m == NULL) { 198 printf("%s: out of mbufs\n", __func__); 199 return (ENOBUFS); 200 } 201 202 udp = mtod(m, void *); 203 udp->uh_ulen = htons(m->m_pkthdr.len); 204 /* Use this src port so that the server can connect() the socket */ 205 udp->uh_sport = htons(pcb->dp_client_port); 206 udp->uh_dport = htons(pcb->dp_server_port); 207 /* Computed later (protocol-dependent). */ 208 udp->uh_sum = 0; 209 210 return (debugnet_ip_output(pcb, m)); 211 } 212 213 int 214 debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */) 215 { 216 struct debugnet_ack *dn_ack; 217 struct mbuf *m; 218 219 DNETDEBUG("Acking with seqno %u\n", ntohl(seqno)); 220 221 m = m_gethdr(M_NOWAIT, MT_DATA); 222 if (m == NULL) { 223 printf("%s: Out of mbufs\n", __func__); 224 return (ENOBUFS); 225 } 226 m->m_len = sizeof(*dn_ack); 227 m->m_pkthdr.len = sizeof(*dn_ack); 228 MH_ALIGN(m, sizeof(*dn_ack)); 229 dn_ack = mtod(m, void *); 230 dn_ack->da_seqno = seqno; 231 232 return (debugnet_udp_output(pcb, m)); 233 } 234 235 /* 236 * Dummy free function for debugnet clusters. 237 */ 238 static void 239 debugnet_mbuf_free(struct mbuf *m __unused) 240 { 241 } 242 243 /* 244 * Construct and reliably send a debugnet packet. May fail from a resource 245 * shortage or extreme number of unacknowledged retransmissions. Wait for 246 * an acknowledgement before returning. Splits packets into chunks small 247 * enough to be sent without fragmentation (looks up the interface MTU) 248 * 249 * Parameters: 250 * type debugnet packet type (HERALD, FINISHED, ...) 251 * data data 252 * datalen data size (bytes) 253 * auxdata optional auxiliary information 254 * 255 * Returns: 256 * int see errno.h, 0 for success 257 */ 258 int 259 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data, 260 uint32_t datalen, const struct debugnet_proto_aux *auxdata) 261 { 262 struct debugnet_msg_hdr *dn_msg_hdr; 263 struct mbuf *m, *m2; 264 uint64_t want_acks; 265 uint32_t i, pktlen, sent_so_far; 266 int retries, polls, error; 267 268 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) 269 return (ECONNRESET); 270 271 want_acks = 0; 272 pcb->dp_rcvd_acks = 0; 273 retries = 0; 274 275 retransmit: 276 /* Chunks can be too big to fit in packets. */ 277 for (i = sent_so_far = 0; sent_so_far < datalen || 278 (i == 0 && datalen == 0); i++) { 279 pktlen = datalen - sent_so_far; 280 281 /* Bound: the interface MTU (assume no IP options). */ 282 pktlen = min(pktlen, pcb->dp_ifp->if_mtu - 283 sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr)); 284 285 /* 286 * Check if it is retransmitting and this has been ACKed 287 * already. 288 */ 289 if ((pcb->dp_rcvd_acks & (1 << i)) != 0) { 290 sent_so_far += pktlen; 291 continue; 292 } 293 294 /* 295 * Get and fill a header mbuf, then chain data as an extended 296 * mbuf. 297 */ 298 m = m_gethdr(M_NOWAIT, MT_DATA); 299 if (m == NULL) { 300 printf("%s: Out of mbufs\n", __func__); 301 return (ENOBUFS); 302 } 303 m->m_len = sizeof(struct debugnet_msg_hdr); 304 m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr); 305 MH_ALIGN(m, sizeof(struct debugnet_msg_hdr)); 306 dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *); 307 dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i); 308 dn_msg_hdr->mh_type = htonl(type); 309 dn_msg_hdr->mh_len = htonl(pktlen); 310 311 if (auxdata != NULL) { 312 dn_msg_hdr->mh_offset = 313 htobe64(auxdata->dp_offset_start + sent_so_far); 314 dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2); 315 } else { 316 dn_msg_hdr->mh_offset = htobe64(sent_so_far); 317 dn_msg_hdr->mh_aux2 = 0; 318 } 319 320 if (pktlen != 0) { 321 m2 = m_get(M_NOWAIT, MT_DATA); 322 if (m2 == NULL) { 323 m_freem(m); 324 printf("%s: Out of mbufs\n", __func__); 325 return (ENOBUFS); 326 } 327 MEXTADD(m2, __DECONST(char *, data) + sent_so_far, 328 pktlen, debugnet_mbuf_free, NULL, NULL, 0, 329 EXT_DISPOSABLE); 330 m2->m_len = pktlen; 331 332 m_cat(m, m2); 333 m->m_pkthdr.len += pktlen; 334 } 335 error = debugnet_udp_output(pcb, m); 336 if (error != 0) 337 return (error); 338 339 /* Note that we're waiting for this packet in the bitfield. */ 340 want_acks |= (1 << i); 341 sent_so_far += pktlen; 342 } 343 if (i >= DEBUGNET_MAX_IN_FLIGHT) 344 printf("Warning: Sent more than %d packets (%d). " 345 "Acknowledgements will fail unless the size of " 346 "rcvd_acks/want_acks is increased.\n", 347 DEBUGNET_MAX_IN_FLIGHT, i); 348 349 /* 350 * Wait for acks. A *real* window would speed things up considerably. 351 */ 352 polls = 0; 353 while (pcb->dp_rcvd_acks != want_acks) { 354 if (polls++ > debugnet_npolls) { 355 if (retries++ > debugnet_nretries) 356 return (ETIMEDOUT); 357 printf(". "); 358 goto retransmit; 359 } 360 debugnet_network_poll(pcb); 361 DELAY(500); 362 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) 363 return (ECONNRESET); 364 } 365 pcb->dp_seqno += i; 366 return (0); 367 } 368 369 /* 370 * Network input primitives. 371 */ 372 373 /* 374 * Just introspect the header enough to fire off a seqno ack and validate 375 * length fits. 376 */ 377 static void 378 debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb) 379 { 380 const struct debugnet_msg_hdr *dnh; 381 struct mbuf *m; 382 uint32_t hdr_type; 383 uint32_t seqno; 384 int error; 385 386 m = *mb; 387 388 if (m->m_pkthdr.len < sizeof(*dnh)) { 389 DNETDEBUG("ignoring small debugnet_msg packet\n"); 390 return; 391 } 392 393 /* Get ND header. */ 394 if (m->m_len < sizeof(*dnh)) { 395 m = m_pullup(m, sizeof(*dnh)); 396 *mb = m; 397 if (m == NULL) { 398 DNETDEBUG("m_pullup failed\n"); 399 return; 400 } 401 } 402 403 dnh = mtod(m, const void *); 404 if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) { 405 DNETDEBUG("Dropping short packet.\n"); 406 return; 407 } 408 409 hdr_type = ntohl(dnh->mh_type); 410 if (hdr_type != DEBUGNET_DATA) { 411 if (hdr_type == DEBUGNET_FINISHED) { 412 printf("Remote shut down the connection on us!\n"); 413 pcb->dp_state = DN_STATE_REMOTE_CLOSED; 414 if (pcb->dp_finish_handler != NULL) { 415 pcb->dp_finish_handler(); 416 } 417 } else { 418 DNETDEBUG("Got unexpected debugnet message %u\n", hdr_type); 419 } 420 return; 421 } 422 423 /* 424 * If the issue is transient (ENOBUFS), sender should resend. If 425 * non-transient (like driver objecting to rx -> tx from the same 426 * thread), not much else we can do. 427 */ 428 seqno = dnh->mh_seqno; /* net endian */ 429 m_adj(m, sizeof(*dnh)); 430 dnh = NULL; 431 error = pcb->dp_rx_handler(m); 432 if (error != 0) { 433 DNETDEBUG("RX handler was not able to accept message, error %d. " 434 "Skipping ack.\n", error); 435 return; 436 } 437 438 error = debugnet_ack_output(pcb, seqno); 439 if (error != 0) { 440 DNETDEBUG("Couldn't ACK rx packet %u; %d\n", ntohl(seqno), error); 441 } 442 } 443 444 static void 445 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport) 446 { 447 const struct debugnet_ack *dn_ack; 448 struct mbuf *m; 449 uint32_t rcv_ackno; 450 451 m = *mb; 452 453 /* Get Ack. */ 454 if (m->m_len < sizeof(*dn_ack)) { 455 m = m_pullup(m, sizeof(*dn_ack)); 456 *mb = m; 457 if (m == NULL) { 458 DNETDEBUG("m_pullup failed\n"); 459 return; 460 } 461 } 462 dn_ack = mtod(m, const void *); 463 464 /* Debugnet processing. */ 465 /* 466 * Packet is meant for us. Extract the ack sequence number and the 467 * port number if necessary. 468 */ 469 rcv_ackno = ntohl(dn_ack->da_seqno); 470 if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) { 471 pcb->dp_server_port = sport; 472 pcb->dp_state = DN_STATE_GOT_HERALD_PORT; 473 } 474 if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT) 475 printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno); 476 else if (rcv_ackno >= pcb->dp_seqno) { 477 /* We're interested in this ack. Record it. */ 478 pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno); 479 } 480 } 481 482 void 483 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb) 484 { 485 const struct udphdr *udp; 486 struct mbuf *m; 487 uint16_t sport, ulen; 488 489 /* UDP processing. */ 490 491 m = *mb; 492 if (m->m_pkthdr.len < sizeof(*udp)) { 493 DNETDEBUG("ignoring small UDP packet\n"); 494 return; 495 } 496 497 /* Get UDP headers. */ 498 if (m->m_len < sizeof(*udp)) { 499 m = m_pullup(m, sizeof(*udp)); 500 *mb = m; 501 if (m == NULL) { 502 DNETDEBUG("m_pullup failed\n"); 503 return; 504 } 505 } 506 udp = mtod(m, const void *); 507 508 /* We expect to receive UDP packets on the configured client port. */ 509 if (ntohs(udp->uh_dport) != pcb->dp_client_port) { 510 DNETDEBUG("not on the expected port.\n"); 511 return; 512 } 513 514 /* Check that ulen does not exceed actual size of data. */ 515 ulen = ntohs(udp->uh_ulen); 516 if (m->m_pkthdr.len < ulen) { 517 DNETDEBUG("ignoring runt UDP packet\n"); 518 return; 519 } 520 521 sport = ntohs(udp->uh_sport); 522 523 m_adj(m, sizeof(*udp)); 524 ulen -= sizeof(*udp); 525 526 if (ulen == sizeof(struct debugnet_ack)) { 527 debugnet_handle_ack(pcb, mb, sport); 528 return; 529 } 530 531 if (pcb->dp_rx_handler == NULL) { 532 if (ulen < sizeof(struct debugnet_ack)) 533 DNETDEBUG("ignoring small ACK packet\n"); 534 else 535 DNETDEBUG("ignoring unexpected non-ACK packet on " 536 "half-duplex connection.\n"); 537 return; 538 } 539 540 debugnet_handle_rx_msg(pcb, mb); 541 } 542 543 /* 544 * Handler for incoming packets directly from the network adapter 545 * Identifies the packet type (IP or ARP) and passes it along to one of the 546 * helper functions debugnet_handle_ip or debugnet_handle_arp. 547 * 548 * It needs to partially replicate the behaviour of ether_input() and 549 * ether_demux(). 550 * 551 * Parameters: 552 * ifp the interface the packet came from 553 * m an mbuf containing the packet received 554 */ 555 static void 556 debugnet_input_one(struct ifnet *ifp, struct mbuf *m) 557 { 558 struct ifreq ifr; 559 struct ether_header *eh; 560 u_short etype; 561 562 /* Ethernet processing. */ 563 if ((m->m_flags & M_PKTHDR) == 0) { 564 DNETDEBUG_IF(ifp, "discard frame without packet header\n"); 565 goto done; 566 } 567 if (m->m_len < ETHER_HDR_LEN) { 568 DNETDEBUG_IF(ifp, 569 "discard frame without leading eth header (len %d pktlen %d)\n", 570 m->m_len, m->m_pkthdr.len); 571 goto done; 572 } 573 if ((m->m_flags & M_HASFCS) != 0) { 574 m_adj(m, -ETHER_CRC_LEN); 575 m->m_flags &= ~M_HASFCS; 576 } 577 eh = mtod(m, struct ether_header *); 578 etype = ntohs(eh->ether_type); 579 if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { 580 DNETDEBUG_IF(ifp, "ignoring vlan packets\n"); 581 goto done; 582 } 583 if (if_gethwaddr(ifp, &ifr) != 0) { 584 DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n"); 585 goto done; 586 } 587 if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost, 588 ETHER_ADDR_LEN) != 0 && 589 (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) { 590 DNETDEBUG_IF(ifp, 591 "discard frame with incorrect destination addr\n"); 592 goto done; 593 } 594 595 MPASS(g_debugnet_pcb_inuse); 596 597 /* Done ethernet processing. Strip off the ethernet header. */ 598 m_adj(m, ETHER_HDR_LEN); 599 switch (etype) { 600 case ETHERTYPE_ARP: 601 debugnet_handle_arp(&g_dnet_pcb, &m); 602 break; 603 case ETHERTYPE_IP: 604 debugnet_handle_ip(&g_dnet_pcb, &m); 605 break; 606 default: 607 DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); 608 break; 609 } 610 done: 611 if (m != NULL) 612 m_freem(m); 613 } 614 615 static void 616 debugnet_input(struct ifnet *ifp, struct mbuf *m) 617 { 618 struct mbuf *n; 619 620 do { 621 n = m->m_nextpkt; 622 m->m_nextpkt = NULL; 623 debugnet_input_one(ifp, m); 624 m = n; 625 } while (m != NULL); 626 } 627 628 /* 629 * Network polling primitive. 630 * 631 * Instead of assuming that most of the network stack is sane, we just poll the 632 * driver directly for packets. 633 */ 634 void 635 debugnet_network_poll(struct debugnet_pcb *pcb) 636 { 637 struct ifnet *ifp; 638 639 ifp = pcb->dp_ifp; 640 ifp->if_debugnet_methods->dn_poll(ifp, 1000); 641 } 642 643 /* 644 * Start of consumer API surface. 645 */ 646 void 647 debugnet_free(struct debugnet_pcb *pcb) 648 { 649 struct ifnet *ifp; 650 651 MPASS(pcb == &g_dnet_pcb); 652 MPASS(pcb->dp_drv_input == NULL || g_debugnet_pcb_inuse); 653 654 ifp = pcb->dp_ifp; 655 if (ifp != NULL) { 656 if (pcb->dp_drv_input != NULL) 657 ifp->if_input = pcb->dp_drv_input; 658 if (pcb->dp_event_started) 659 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END); 660 } 661 debugnet_mbuf_finish(); 662 663 g_debugnet_pcb_inuse = false; 664 memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb)); 665 } 666 667 int 668 debugnet_connect(const struct debugnet_conn_params *dcp, 669 struct debugnet_pcb **pcb_out) 670 { 671 struct debugnet_proto_aux herald_auxdata; 672 struct debugnet_pcb *pcb; 673 struct ifnet *ifp; 674 int error; 675 676 if (g_debugnet_pcb_inuse) { 677 printf("%s: Only one connection at a time.\n", __func__); 678 return (EBUSY); 679 } 680 681 pcb = &g_dnet_pcb; 682 *pcb = (struct debugnet_pcb) { 683 .dp_state = DN_STATE_INIT, 684 .dp_client = dcp->dc_client, 685 .dp_server = dcp->dc_server, 686 .dp_gateway = dcp->dc_gateway, 687 .dp_server_port = dcp->dc_herald_port, /* Initially */ 688 .dp_client_port = dcp->dc_client_port, 689 .dp_seqno = 1, 690 .dp_ifp = dcp->dc_ifp, 691 .dp_rx_handler = dcp->dc_rx_handler, 692 .dp_drv_input = NULL, 693 }; 694 695 /* Switch to the debugnet mbuf zones. */ 696 debugnet_mbuf_start(); 697 698 /* At least one needed parameter is missing; infer it. */ 699 if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY || 700 pcb->dp_ifp == NULL) { 701 struct sockaddr_in dest_sin, *gw_sin, *local_sin; 702 struct ifnet *rt_ifp; 703 struct nhop_object *nh; 704 705 memset(&dest_sin, 0, sizeof(dest_sin)); 706 dest_sin = (struct sockaddr_in) { 707 .sin_len = sizeof(dest_sin), 708 .sin_family = AF_INET, 709 .sin_addr.s_addr = pcb->dp_server, 710 }; 711 712 CURVNET_SET(vnet0); 713 nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0, 714 NHR_NONE); 715 CURVNET_RESTORE(); 716 717 if (nh == NULL) { 718 printf("%s: Could not get route for that server.\n", 719 __func__); 720 error = ENOENT; 721 goto cleanup; 722 } 723 724 /* TODO support AF_INET6 */ 725 if (nh->gw_sa.sa_family == AF_INET) 726 gw_sin = &nh->gw4_sa; 727 else { 728 if (nh->gw_sa.sa_family == AF_LINK) 729 DNETDEBUG("Destination address is on link.\n"); 730 gw_sin = NULL; 731 } 732 733 MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET); 734 local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr; 735 736 rt_ifp = nh->nh_ifp; 737 738 if (pcb->dp_client == INADDR_ANY) 739 pcb->dp_client = local_sin->sin_addr.s_addr; 740 if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL) 741 pcb->dp_gateway = gw_sin->sin_addr.s_addr; 742 if (pcb->dp_ifp == NULL) 743 pcb->dp_ifp = rt_ifp; 744 } 745 746 ifp = pcb->dp_ifp; 747 748 if (debugnet_debug > 0) { 749 char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN], 750 gwbuf[INET_ADDRSTRLEN]; 751 inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf)); 752 inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf)); 753 if (pcb->dp_gateway != INADDR_ANY) 754 inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf)); 755 DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n", 756 serbuf, pcb->dp_server_port, 757 (pcb->dp_gateway == INADDR_ANY) ? "" : " via ", 758 (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf, 759 clibuf, pcb->dp_client_port, if_name(ifp)); 760 } 761 762 /* Validate iface is online and supported. */ 763 if (!DEBUGNET_SUPPORTED_NIC(ifp)) { 764 printf("%s: interface '%s' does not support debugnet\n", 765 __func__, if_name(ifp)); 766 error = ENODEV; 767 goto cleanup; 768 } 769 if ((if_getflags(ifp) & IFF_UP) == 0) { 770 printf("%s: interface '%s' link is down\n", __func__, 771 if_name(ifp)); 772 error = ENXIO; 773 goto cleanup; 774 } 775 776 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START); 777 pcb->dp_event_started = true; 778 779 /* 780 * We maintain the invariant that g_debugnet_pcb_inuse is always true 781 * while the debugnet ifp's if_input is overridden with 782 * debugnet_input(). 783 */ 784 g_debugnet_pcb_inuse = true; 785 786 /* Make the card use *our* receive callback. */ 787 pcb->dp_drv_input = ifp->if_input; 788 ifp->if_input = debugnet_input; 789 790 printf("%s: searching for %s MAC...\n", __func__, 791 (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway"); 792 793 error = debugnet_arp_gw(pcb); 794 if (error != 0) { 795 printf("%s: failed to locate MAC address\n", __func__); 796 goto cleanup; 797 } 798 MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC); 799 800 herald_auxdata = (struct debugnet_proto_aux) { 801 .dp_offset_start = dcp->dc_herald_offset, 802 .dp_aux2 = dcp->dc_herald_aux2, 803 }; 804 error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data, 805 dcp->dc_herald_datalen, &herald_auxdata); 806 if (error != 0) { 807 printf("%s: failed to herald debugnet server\n", __func__); 808 goto cleanup; 809 } 810 811 *pcb_out = pcb; 812 return (0); 813 814 cleanup: 815 debugnet_free(pcb); 816 return (error); 817 } 818 819 /* 820 * Pre-allocated dump-time mbuf tracking. 821 * 822 * We just track the high water mark we've ever seen and allocate appropriately 823 * for that iface/mtu combo. 824 */ 825 static struct { 826 int nmbuf; 827 int ncl; 828 int clsize; 829 } dn_hwm; 830 static struct mtx dn_hwm_lk; 831 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF); 832 833 static void 834 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize) 835 { 836 bool any; 837 838 any = false; 839 mtx_lock(&dn_hwm_lk); 840 841 if (nmbuf > dn_hwm.nmbuf) { 842 any = true; 843 dn_hwm.nmbuf = nmbuf; 844 } else 845 nmbuf = dn_hwm.nmbuf; 846 847 if (ncl > dn_hwm.ncl) { 848 any = true; 849 dn_hwm.ncl = ncl; 850 } else 851 ncl = dn_hwm.ncl; 852 853 if (clsize > dn_hwm.clsize) { 854 any = true; 855 dn_hwm.clsize = clsize; 856 } else 857 clsize = dn_hwm.clsize; 858 859 mtx_unlock(&dn_hwm_lk); 860 861 if (any) 862 debugnet_mbuf_reinit(nmbuf, ncl, clsize); 863 } 864 865 void 866 debugnet_any_ifnet_update(struct ifnet *ifp) 867 { 868 int clsize, nmbuf, ncl, nrxr; 869 870 if (!DEBUGNET_SUPPORTED_NIC(ifp)) 871 return; 872 873 ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize); 874 KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); 875 876 /* 877 * We need two headers per message on the transmit side. Multiply by 878 * four to give us some breathing room. 879 */ 880 nmbuf = ncl * (4 + nrxr); 881 ncl *= nrxr; 882 883 /* 884 * Bandaid for drivers that (incorrectly) advertise LinkUp before their 885 * dn_init method is available. 886 */ 887 if (nmbuf == 0 || ncl == 0 || clsize == 0) { 888 #ifndef INVARIANTS 889 if (bootverbose) 890 #endif 891 printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n", 892 __func__, if_name(ifp), ifp); 893 return; 894 } 895 dn_maybe_reinit_mbufs(nmbuf, ncl, clsize); 896 } 897 898 /* 899 * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless 900 * for us because drivers tend to if_attach before invoking DEBUGNET_SET(). 901 * 902 * On the other hand, hooking DEBUGNET_SET() itself may still be too early, 903 * because the driver is still in attach. Since we cannot use down interfaces, 904 * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least 905 * with vtnet and dhcpclient that event just never occurs. 906 * 907 * So that's how I've landed on the lower level ifnet_link_event. 908 */ 909 910 static void 911 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state) 912 { 913 if (link_state == LINK_STATE_UP) 914 debugnet_any_ifnet_update(ifp); 915 } 916 917 static eventhandler_tag dn_attach_cookie; 918 static void 919 dn_evh_init(void *ctx __unused) 920 { 921 dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event, 922 dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); 923 } 924 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL); 925 926 /* 927 * DDB parsing helpers for debugnet(4) consumers. 928 */ 929 #ifdef DDB 930 struct my_inet_opt { 931 bool has_opt; 932 const char *printname; 933 in_addr_t *result; 934 }; 935 936 static int 937 dn_parse_optarg_ipv4(struct my_inet_opt *opt) 938 { 939 in_addr_t tmp; 940 unsigned octet; 941 int t; 942 943 tmp = 0; 944 for (octet = 0; octet < 4; octet++) { 945 t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL); 946 if (t != tNUMBER) { 947 db_printf("%s:%s: octet %u expected number; found %d\n", 948 __func__, opt->printname, octet, t); 949 return (EINVAL); 950 } 951 /* 952 * db_lex lexes '-' distinctly from the number itself, but 953 * let's document that invariant. 954 */ 955 MPASS(db_tok_number >= 0); 956 957 if (db_tok_number > UINT8_MAX) { 958 db_printf("%s:%s: octet %u out of range: %jd\n", __func__, 959 opt->printname, octet, (intmax_t)db_tok_number); 960 return (EDOM); 961 } 962 963 /* Constructed host-endian and converted to network later. */ 964 tmp = (tmp << 8) | db_tok_number; 965 966 if (octet < 3) { 967 t = db_read_token_flags(DRT_WSPACE); 968 if (t != tDOT) { 969 db_printf("%s:%s: octet %u expected '.'; found" 970 " %d\n", __func__, opt->printname, octet, 971 t); 972 return (EINVAL); 973 } 974 } 975 } 976 977 *opt->result = htonl(tmp); 978 opt->has_opt = true; 979 return (0); 980 } 981 982 int 983 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result) 984 { 985 struct ifnet *ifp; 986 int t, error; 987 bool want_ifp; 988 char ch; 989 990 struct my_inet_opt opt_client = { 991 .printname = "client", 992 .result = &result->dd_client, 993 }, 994 opt_server = { 995 .printname = "server", 996 .result = &result->dd_server, 997 }, 998 opt_gateway = { 999 .printname = "gateway", 1000 .result = &result->dd_gateway, 1001 }, 1002 *cur_inet_opt; 1003 1004 ifp = NULL; 1005 memset(result, 0, sizeof(*result)); 1006 1007 /* 1008 * command [space] [-] [opt] [[space] [optarg]] ... 1009 * 1010 * db_command has already lexed 'command' for us. 1011 */ 1012 t = db_read_token_flags(DRT_WSPACE); 1013 if (t == tWSPACE) 1014 t = db_read_token_flags(DRT_WSPACE); 1015 1016 while (t != tEOL) { 1017 if (t != tMINUS) { 1018 db_printf("%s: Bad syntax; expected '-', got %d\n", 1019 cmd, t); 1020 goto usage; 1021 } 1022 1023 t = db_read_token_flags(DRT_WSPACE); 1024 if (t != tIDENT) { 1025 db_printf("%s: Bad syntax; expected tIDENT, got %d\n", 1026 cmd, t); 1027 goto usage; 1028 } 1029 1030 if (strlen(db_tok_string) > 1) { 1031 db_printf("%s: Bad syntax; expected single option " 1032 "flag, got '%s'\n", cmd, db_tok_string); 1033 goto usage; 1034 } 1035 1036 want_ifp = false; 1037 cur_inet_opt = NULL; 1038 switch ((ch = db_tok_string[0])) { 1039 default: 1040 DNETDEBUG("Unexpected: '%c'\n", ch); 1041 /* FALLTHROUGH */ 1042 case 'h': 1043 goto usage; 1044 case 'c': 1045 cur_inet_opt = &opt_client; 1046 break; 1047 case 'g': 1048 cur_inet_opt = &opt_gateway; 1049 break; 1050 case 's': 1051 cur_inet_opt = &opt_server; 1052 break; 1053 case 'i': 1054 want_ifp = true; 1055 break; 1056 } 1057 1058 t = db_read_token_flags(DRT_WSPACE); 1059 if (t != tWSPACE) { 1060 db_printf("%s: Bad syntax; expected space after " 1061 "flag %c, got %d\n", cmd, ch, t); 1062 goto usage; 1063 } 1064 1065 if (want_ifp) { 1066 t = db_read_token_flags(DRT_WSPACE); 1067 if (t != tIDENT) { 1068 db_printf("%s: Expected interface but got %d\n", 1069 cmd, t); 1070 goto usage; 1071 } 1072 1073 CURVNET_SET(vnet0); 1074 /* 1075 * We *don't* take a ref here because the only current 1076 * consumer, db_netdump_cmd, does not need it. It 1077 * (somewhat redundantly) extracts the if_name(), 1078 * re-lookups the ifp, and takes its own reference. 1079 */ 1080 ifp = ifunit(db_tok_string); 1081 CURVNET_RESTORE(); 1082 if (ifp == NULL) { 1083 db_printf("Could not locate interface %s\n", 1084 db_tok_string); 1085 error = ENOENT; 1086 goto cleanup; 1087 } 1088 } else { 1089 MPASS(cur_inet_opt != NULL); 1090 /* Assume IPv4 for now. */ 1091 error = dn_parse_optarg_ipv4(cur_inet_opt); 1092 if (error != 0) 1093 goto cleanup; 1094 } 1095 1096 /* Skip (mandatory) whitespace after option, if not EOL. */ 1097 t = db_read_token_flags(DRT_WSPACE); 1098 if (t == tEOL) 1099 break; 1100 if (t != tWSPACE) { 1101 db_printf("%s: Bad syntax; expected space after " 1102 "flag %c option; got %d\n", cmd, ch, t); 1103 goto usage; 1104 } 1105 t = db_read_token_flags(DRT_WSPACE); 1106 } 1107 1108 if (!opt_server.has_opt) { 1109 db_printf("%s: need a destination server address\n", cmd); 1110 goto usage; 1111 } 1112 1113 result->dd_has_client = opt_client.has_opt; 1114 result->dd_has_gateway = opt_gateway.has_opt; 1115 result->dd_ifp = ifp; 1116 1117 /* We parsed the full line to tEOL already, or bailed with an error. */ 1118 return (0); 1119 1120 usage: 1121 db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> " 1122 "-i <interface>]\n", cmd); 1123 error = EINVAL; 1124 /* FALLTHROUGH */ 1125 cleanup: 1126 db_skip_to_eol(); 1127 return (error); 1128 } 1129 #endif /* DDB */ 1130