1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019 Isilon Systems, LLC. 5 * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. 6 * Copyright (c) 2000 Darrell Anderson 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_ddb.h" 35 #include "opt_inet.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/endian.h> 40 #include <sys/errno.h> 41 #include <sys/eventhandler.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 48 #ifdef DDB 49 #include <ddb/ddb.h> 50 #include <ddb/db_lex.h> 51 #endif 52 53 #include <net/ethernet.h> 54 #include <net/if.h> 55 #include <net/if_arp.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/if_var.h> 59 #include <net/vnet.h> 60 #include <net/route.h> 61 #include <net/route/nhop.h> 62 63 #include <netinet/in.h> 64 #include <netinet/in_fib.h> 65 #include <netinet/in_systm.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_options.h> 70 #include <netinet/udp.h> 71 #include <netinet/udp_var.h> 72 73 #include <machine/in_cksum.h> 74 #include <machine/pcb.h> 75 76 #include <net/debugnet.h> 77 #define DEBUGNET_INTERNAL 78 #include <net/debugnet_int.h> 79 80 FEATURE(debugnet, "Debugnet support"); 81 82 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 83 "debugnet parameters"); 84 85 unsigned debugnet_debug; 86 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN, 87 &debugnet_debug, 0, 88 "Debug message verbosity (0: off; 1: on; 2: verbose)"); 89 90 int debugnet_npolls = 2000; 91 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN, 92 &debugnet_npolls, 0, 93 "Number of times to poll before assuming packet loss (0.5ms per poll)"); 94 int debugnet_nretries = 10; 95 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN, 96 &debugnet_nretries, 0, 97 "Number of retransmit attempts before giving up"); 98 int debugnet_fib = RT_DEFAULT_FIB; 99 SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN, 100 &debugnet_fib, 0, 101 "Fib to use when sending dump"); 102 103 static bool g_debugnet_pcb_inuse; 104 static struct debugnet_pcb g_dnet_pcb; 105 106 /* 107 * Simple accessors for opaque PCB. 108 */ 109 const unsigned char * 110 debugnet_get_gw_mac(const struct debugnet_pcb *pcb) 111 { 112 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && 113 pcb->dp_state >= DN_STATE_HAVE_GW_MAC); 114 return (pcb->dp_gw_mac.octet); 115 } 116 117 /* 118 * Start of network primitives, beginning with output primitives. 119 */ 120 121 /* 122 * Handles creation of the ethernet header, then places outgoing packets into 123 * the tx buffer for the NIC 124 * 125 * Parameters: 126 * m The mbuf containing the packet to be sent (will be freed by 127 * this function or the NIC driver) 128 * ifp The interface to send on 129 * dst The destination ethernet address (source address will be looked 130 * up using ifp) 131 * etype The ETHERTYPE_* value for the protocol that is being sent 132 * 133 * Returns: 134 * int see errno.h, 0 for success 135 */ 136 int 137 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, 138 u_short etype) 139 { 140 struct ether_header *eh; 141 142 if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || 143 (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { 144 if_printf(ifp, "%s: interface isn't up\n", __func__); 145 m_freem(m); 146 return (ENETDOWN); 147 } 148 149 /* Fill in the ethernet header. */ 150 M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); 151 if (m == NULL) { 152 printf("%s: out of mbufs\n", __func__); 153 return (ENOBUFS); 154 } 155 eh = mtod(m, struct ether_header *); 156 memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); 157 memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); 158 eh->ether_type = htons(etype); 159 return (ifp->if_debugnet_methods->dn_transmit(ifp, m)); 160 } 161 162 /* 163 * Unreliable transmission of an mbuf chain to the debugnet server 164 * Note: can't handle fragmentation; fails if the packet is larger than 165 * ifp->if_mtu after adding the UDP/IP headers 166 * 167 * Parameters: 168 * pcb The debugnet context block 169 * m mbuf chain 170 * 171 * Returns: 172 * int see errno.h, 0 for success 173 */ 174 static int 175 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m) 176 { 177 struct udphdr *udp; 178 179 MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC); 180 181 M_PREPEND(m, sizeof(*udp), M_NOWAIT); 182 if (m == NULL) { 183 printf("%s: out of mbufs\n", __func__); 184 return (ENOBUFS); 185 } 186 187 udp = mtod(m, void *); 188 udp->uh_ulen = htons(m->m_pkthdr.len); 189 /* Use this src port so that the server can connect() the socket */ 190 udp->uh_sport = htons(pcb->dp_client_port); 191 udp->uh_dport = htons(pcb->dp_server_port); 192 /* Computed later (protocol-dependent). */ 193 udp->uh_sum = 0; 194 195 return (debugnet_ip_output(pcb, m)); 196 } 197 198 int 199 debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */) 200 { 201 struct debugnet_ack *dn_ack; 202 struct mbuf *m; 203 204 DNETDEBUG("Acking with seqno %u\n", ntohl(seqno)); 205 206 m = m_gethdr(M_NOWAIT, MT_DATA); 207 if (m == NULL) { 208 printf("%s: Out of mbufs\n", __func__); 209 return (ENOBUFS); 210 } 211 m->m_len = sizeof(*dn_ack); 212 m->m_pkthdr.len = sizeof(*dn_ack); 213 MH_ALIGN(m, sizeof(*dn_ack)); 214 dn_ack = mtod(m, void *); 215 dn_ack->da_seqno = seqno; 216 217 return (debugnet_udp_output(pcb, m)); 218 } 219 220 /* 221 * Dummy free function for debugnet clusters. 222 */ 223 static void 224 debugnet_mbuf_free(struct mbuf *m __unused) 225 { 226 } 227 228 /* 229 * Construct and reliably send a debugnet packet. May fail from a resource 230 * shortage or extreme number of unacknowledged retransmissions. Wait for 231 * an acknowledgement before returning. Splits packets into chunks small 232 * enough to be sent without fragmentation (looks up the interface MTU) 233 * 234 * Parameters: 235 * type debugnet packet type (HERALD, FINISHED, ...) 236 * data data 237 * datalen data size (bytes) 238 * auxdata optional auxiliary information 239 * 240 * Returns: 241 * int see errno.h, 0 for success 242 */ 243 int 244 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data, 245 uint32_t datalen, const struct debugnet_proto_aux *auxdata) 246 { 247 struct debugnet_msg_hdr *dn_msg_hdr; 248 struct mbuf *m, *m2; 249 uint64_t want_acks; 250 uint32_t i, pktlen, sent_so_far; 251 int retries, polls, error; 252 253 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) 254 return (ECONNRESET); 255 256 want_acks = 0; 257 pcb->dp_rcvd_acks = 0; 258 retries = 0; 259 260 retransmit: 261 /* Chunks can be too big to fit in packets. */ 262 for (i = sent_so_far = 0; sent_so_far < datalen || 263 (i == 0 && datalen == 0); i++) { 264 pktlen = datalen - sent_so_far; 265 266 /* Bound: the interface MTU (assume no IP options). */ 267 pktlen = min(pktlen, pcb->dp_ifp->if_mtu - 268 sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr)); 269 270 /* 271 * Check if it is retransmitting and this has been ACKed 272 * already. 273 */ 274 if ((pcb->dp_rcvd_acks & (1 << i)) != 0) { 275 sent_so_far += pktlen; 276 continue; 277 } 278 279 /* 280 * Get and fill a header mbuf, then chain data as an extended 281 * mbuf. 282 */ 283 m = m_gethdr(M_NOWAIT, MT_DATA); 284 if (m == NULL) { 285 printf("%s: Out of mbufs\n", __func__); 286 return (ENOBUFS); 287 } 288 m->m_len = sizeof(struct debugnet_msg_hdr); 289 m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr); 290 MH_ALIGN(m, sizeof(struct debugnet_msg_hdr)); 291 dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *); 292 dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i); 293 dn_msg_hdr->mh_type = htonl(type); 294 dn_msg_hdr->mh_len = htonl(pktlen); 295 296 if (auxdata != NULL) { 297 dn_msg_hdr->mh_offset = 298 htobe64(auxdata->dp_offset_start + sent_so_far); 299 dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2); 300 } else { 301 dn_msg_hdr->mh_offset = htobe64(sent_so_far); 302 dn_msg_hdr->mh_aux2 = 0; 303 } 304 305 if (pktlen != 0) { 306 m2 = m_get(M_NOWAIT, MT_DATA); 307 if (m2 == NULL) { 308 m_freem(m); 309 printf("%s: Out of mbufs\n", __func__); 310 return (ENOBUFS); 311 } 312 MEXTADD(m2, __DECONST(char *, data) + sent_so_far, 313 pktlen, debugnet_mbuf_free, NULL, NULL, 0, 314 EXT_DISPOSABLE); 315 m2->m_len = pktlen; 316 317 m_cat(m, m2); 318 m->m_pkthdr.len += pktlen; 319 } 320 error = debugnet_udp_output(pcb, m); 321 if (error != 0) 322 return (error); 323 324 /* Note that we're waiting for this packet in the bitfield. */ 325 want_acks |= (1 << i); 326 sent_so_far += pktlen; 327 } 328 if (i >= DEBUGNET_MAX_IN_FLIGHT) 329 printf("Warning: Sent more than %d packets (%d). " 330 "Acknowledgements will fail unless the size of " 331 "rcvd_acks/want_acks is increased.\n", 332 DEBUGNET_MAX_IN_FLIGHT, i); 333 334 /* 335 * Wait for acks. A *real* window would speed things up considerably. 336 */ 337 polls = 0; 338 while (pcb->dp_rcvd_acks != want_acks) { 339 if (polls++ > debugnet_npolls) { 340 if (retries++ > debugnet_nretries) 341 return (ETIMEDOUT); 342 printf(". "); 343 goto retransmit; 344 } 345 debugnet_network_poll(pcb); 346 DELAY(500); 347 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) 348 return (ECONNRESET); 349 } 350 pcb->dp_seqno += i; 351 return (0); 352 } 353 354 /* 355 * Network input primitives. 356 */ 357 358 /* 359 * Just introspect the header enough to fire off a seqno ack and validate 360 * length fits. 361 */ 362 static void 363 debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb) 364 { 365 const struct debugnet_msg_hdr *dnh; 366 struct mbuf *m; 367 int error; 368 369 m = *mb; 370 371 if (m->m_pkthdr.len < sizeof(*dnh)) { 372 DNETDEBUG("ignoring small debugnet_msg packet\n"); 373 return; 374 } 375 376 /* Get ND header. */ 377 if (m->m_len < sizeof(*dnh)) { 378 m = m_pullup(m, sizeof(*dnh)); 379 *mb = m; 380 if (m == NULL) { 381 DNETDEBUG("m_pullup failed\n"); 382 return; 383 } 384 } 385 dnh = mtod(m, const void *); 386 387 if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) { 388 DNETDEBUG("Dropping short packet.\n"); 389 return; 390 } 391 392 /* 393 * If the issue is transient (ENOBUFS), sender should resend. If 394 * non-transient (like driver objecting to rx -> tx from the same 395 * thread), not much else we can do. 396 */ 397 error = debugnet_ack_output(pcb, dnh->mh_seqno); 398 if (error != 0) 399 return; 400 401 if (ntohl(dnh->mh_type) == DEBUGNET_FINISHED) { 402 printf("Remote shut down the connection on us!\n"); 403 pcb->dp_state = DN_STATE_REMOTE_CLOSED; 404 405 /* 406 * Continue through to the user handler so they are signalled 407 * not to wait for further rx. 408 */ 409 } 410 411 pcb->dp_rx_handler(pcb, mb); 412 } 413 414 static void 415 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport) 416 { 417 const struct debugnet_ack *dn_ack; 418 struct mbuf *m; 419 uint32_t rcv_ackno; 420 421 m = *mb; 422 423 /* Get Ack. */ 424 if (m->m_len < sizeof(*dn_ack)) { 425 m = m_pullup(m, sizeof(*dn_ack)); 426 *mb = m; 427 if (m == NULL) { 428 DNETDEBUG("m_pullup failed\n"); 429 return; 430 } 431 } 432 dn_ack = mtod(m, const void *); 433 434 /* Debugnet processing. */ 435 /* 436 * Packet is meant for us. Extract the ack sequence number and the 437 * port number if necessary. 438 */ 439 rcv_ackno = ntohl(dn_ack->da_seqno); 440 if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) { 441 pcb->dp_server_port = sport; 442 pcb->dp_state = DN_STATE_GOT_HERALD_PORT; 443 } 444 if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT) 445 printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno); 446 else if (rcv_ackno >= pcb->dp_seqno) { 447 /* We're interested in this ack. Record it. */ 448 pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno); 449 } 450 } 451 452 void 453 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb) 454 { 455 const struct udphdr *udp; 456 struct mbuf *m; 457 uint16_t sport, ulen; 458 459 /* UDP processing. */ 460 461 m = *mb; 462 if (m->m_pkthdr.len < sizeof(*udp)) { 463 DNETDEBUG("ignoring small UDP packet\n"); 464 return; 465 } 466 467 /* Get UDP headers. */ 468 if (m->m_len < sizeof(*udp)) { 469 m = m_pullup(m, sizeof(*udp)); 470 *mb = m; 471 if (m == NULL) { 472 DNETDEBUG("m_pullup failed\n"); 473 return; 474 } 475 } 476 udp = mtod(m, const void *); 477 478 /* We expect to receive UDP packets on the configured client port. */ 479 if (ntohs(udp->uh_dport) != pcb->dp_client_port) { 480 DNETDEBUG("not on the expected port.\n"); 481 return; 482 } 483 484 /* Check that ulen does not exceed actual size of data. */ 485 ulen = ntohs(udp->uh_ulen); 486 if (m->m_pkthdr.len < ulen) { 487 DNETDEBUG("ignoring runt UDP packet\n"); 488 return; 489 } 490 491 sport = ntohs(udp->uh_sport); 492 493 m_adj(m, sizeof(*udp)); 494 ulen -= sizeof(*udp); 495 496 if (ulen == sizeof(struct debugnet_ack)) { 497 debugnet_handle_ack(pcb, mb, sport); 498 return; 499 } 500 501 if (pcb->dp_rx_handler == NULL) { 502 if (ulen < sizeof(struct debugnet_ack)) 503 DNETDEBUG("ignoring small ACK packet\n"); 504 else 505 DNETDEBUG("ignoring unexpected non-ACK packet on " 506 "half-duplex connection.\n"); 507 return; 508 } 509 510 debugnet_handle_rx_msg(pcb, mb); 511 } 512 513 /* 514 * Handler for incoming packets directly from the network adapter 515 * Identifies the packet type (IP or ARP) and passes it along to one of the 516 * helper functions debugnet_handle_ip or debugnet_handle_arp. 517 * 518 * It needs to partially replicate the behaviour of ether_input() and 519 * ether_demux(). 520 * 521 * Parameters: 522 * ifp the interface the packet came from 523 * m an mbuf containing the packet received 524 */ 525 static void 526 debugnet_input_one(struct ifnet *ifp, struct mbuf *m) 527 { 528 struct ifreq ifr; 529 struct ether_header *eh; 530 u_short etype; 531 532 /* Ethernet processing. */ 533 if ((m->m_flags & M_PKTHDR) == 0) { 534 DNETDEBUG_IF(ifp, "discard frame without packet header\n"); 535 goto done; 536 } 537 if (m->m_len < ETHER_HDR_LEN) { 538 DNETDEBUG_IF(ifp, 539 "discard frame without leading eth header (len %u pktlen %u)\n", 540 m->m_len, m->m_pkthdr.len); 541 goto done; 542 } 543 if ((m->m_flags & M_HASFCS) != 0) { 544 m_adj(m, -ETHER_CRC_LEN); 545 m->m_flags &= ~M_HASFCS; 546 } 547 eh = mtod(m, struct ether_header *); 548 etype = ntohs(eh->ether_type); 549 if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { 550 DNETDEBUG_IF(ifp, "ignoring vlan packets\n"); 551 goto done; 552 } 553 if (if_gethwaddr(ifp, &ifr) != 0) { 554 DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n"); 555 goto done; 556 } 557 if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost, 558 ETHER_ADDR_LEN) != 0 && 559 (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) { 560 DNETDEBUG_IF(ifp, 561 "discard frame with incorrect destination addr\n"); 562 goto done; 563 } 564 565 MPASS(g_debugnet_pcb_inuse); 566 567 /* Done ethernet processing. Strip off the ethernet header. */ 568 m_adj(m, ETHER_HDR_LEN); 569 switch (etype) { 570 case ETHERTYPE_ARP: 571 debugnet_handle_arp(&g_dnet_pcb, &m); 572 break; 573 case ETHERTYPE_IP: 574 debugnet_handle_ip(&g_dnet_pcb, &m); 575 break; 576 default: 577 DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); 578 break; 579 } 580 done: 581 if (m != NULL) 582 m_freem(m); 583 } 584 585 static void 586 debugnet_input(struct ifnet *ifp, struct mbuf *m) 587 { 588 struct mbuf *n; 589 590 do { 591 n = m->m_nextpkt; 592 m->m_nextpkt = NULL; 593 debugnet_input_one(ifp, m); 594 m = n; 595 } while (m != NULL); 596 } 597 598 /* 599 * Network polling primitive. 600 * 601 * Instead of assuming that most of the network stack is sane, we just poll the 602 * driver directly for packets. 603 */ 604 void 605 debugnet_network_poll(struct debugnet_pcb *pcb) 606 { 607 struct ifnet *ifp; 608 609 ifp = pcb->dp_ifp; 610 ifp->if_debugnet_methods->dn_poll(ifp, 1000); 611 } 612 613 /* 614 * Start of consumer API surface. 615 */ 616 void 617 debugnet_free(struct debugnet_pcb *pcb) 618 { 619 struct ifnet *ifp; 620 621 MPASS(pcb == &g_dnet_pcb); 622 MPASS(pcb->dp_drv_input == NULL || g_debugnet_pcb_inuse); 623 624 ifp = pcb->dp_ifp; 625 if (ifp != NULL) { 626 if (pcb->dp_drv_input != NULL) 627 ifp->if_input = pcb->dp_drv_input; 628 if (pcb->dp_event_started) 629 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END); 630 } 631 debugnet_mbuf_finish(); 632 633 g_debugnet_pcb_inuse = false; 634 memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb)); 635 } 636 637 int 638 debugnet_connect(const struct debugnet_conn_params *dcp, 639 struct debugnet_pcb **pcb_out) 640 { 641 struct debugnet_proto_aux herald_auxdata; 642 struct debugnet_pcb *pcb; 643 struct ifnet *ifp; 644 int error; 645 646 if (g_debugnet_pcb_inuse) { 647 printf("%s: Only one connection at a time.\n", __func__); 648 return (EBUSY); 649 } 650 651 pcb = &g_dnet_pcb; 652 *pcb = (struct debugnet_pcb) { 653 .dp_state = DN_STATE_INIT, 654 .dp_client = dcp->dc_client, 655 .dp_server = dcp->dc_server, 656 .dp_gateway = dcp->dc_gateway, 657 .dp_server_port = dcp->dc_herald_port, /* Initially */ 658 .dp_client_port = dcp->dc_client_port, 659 .dp_seqno = 1, 660 .dp_ifp = dcp->dc_ifp, 661 .dp_rx_handler = dcp->dc_rx_handler, 662 .dp_drv_input = NULL, 663 }; 664 665 /* Switch to the debugnet mbuf zones. */ 666 debugnet_mbuf_start(); 667 668 /* At least one needed parameter is missing; infer it. */ 669 if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY || 670 pcb->dp_ifp == NULL) { 671 struct sockaddr_in dest_sin, *gw_sin, *local_sin; 672 struct ifnet *rt_ifp; 673 struct nhop_object *nh; 674 675 memset(&dest_sin, 0, sizeof(dest_sin)); 676 dest_sin = (struct sockaddr_in) { 677 .sin_len = sizeof(dest_sin), 678 .sin_family = AF_INET, 679 .sin_addr.s_addr = pcb->dp_server, 680 }; 681 682 CURVNET_SET(vnet0); 683 nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0, 684 NHR_NONE); 685 CURVNET_RESTORE(); 686 687 if (nh == NULL) { 688 printf("%s: Could not get route for that server.\n", 689 __func__); 690 error = ENOENT; 691 goto cleanup; 692 } 693 694 /* TODO support AF_INET6 */ 695 if (nh->gw_sa.sa_family == AF_INET) 696 gw_sin = &nh->gw4_sa; 697 else { 698 if (nh->gw_sa.sa_family == AF_LINK) 699 DNETDEBUG("Destination address is on link.\n"); 700 gw_sin = NULL; 701 } 702 703 MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET); 704 local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr; 705 706 rt_ifp = nh->nh_ifp; 707 708 if (pcb->dp_client == INADDR_ANY) 709 pcb->dp_client = local_sin->sin_addr.s_addr; 710 if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL) 711 pcb->dp_gateway = gw_sin->sin_addr.s_addr; 712 if (pcb->dp_ifp == NULL) 713 pcb->dp_ifp = rt_ifp; 714 } 715 716 ifp = pcb->dp_ifp; 717 718 if (debugnet_debug > 0) { 719 char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN], 720 gwbuf[INET_ADDRSTRLEN]; 721 inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf)); 722 inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf)); 723 if (pcb->dp_gateway != INADDR_ANY) 724 inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf)); 725 DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n", 726 serbuf, pcb->dp_server_port, 727 (pcb->dp_gateway == INADDR_ANY) ? "" : " via ", 728 (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf, 729 clibuf, pcb->dp_client_port, if_name(ifp)); 730 } 731 732 /* Validate iface is online and supported. */ 733 if (!DEBUGNET_SUPPORTED_NIC(ifp)) { 734 printf("%s: interface '%s' does not support debugnet\n", 735 __func__, if_name(ifp)); 736 error = ENODEV; 737 goto cleanup; 738 } 739 if ((if_getflags(ifp) & IFF_UP) == 0) { 740 printf("%s: interface '%s' link is down\n", __func__, 741 if_name(ifp)); 742 error = ENXIO; 743 goto cleanup; 744 } 745 746 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START); 747 pcb->dp_event_started = true; 748 749 /* 750 * We maintain the invariant that g_debugnet_pcb_inuse is always true 751 * while the debugnet ifp's if_input is overridden with 752 * debugnet_input(). 753 */ 754 g_debugnet_pcb_inuse = true; 755 756 /* Make the card use *our* receive callback. */ 757 pcb->dp_drv_input = ifp->if_input; 758 ifp->if_input = debugnet_input; 759 760 printf("%s: searching for %s MAC...\n", __func__, 761 (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway"); 762 763 error = debugnet_arp_gw(pcb); 764 if (error != 0) { 765 printf("%s: failed to locate MAC address\n", __func__); 766 goto cleanup; 767 } 768 MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC); 769 770 herald_auxdata = (struct debugnet_proto_aux) { 771 .dp_offset_start = dcp->dc_herald_offset, 772 .dp_aux2 = dcp->dc_herald_aux2, 773 }; 774 error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data, 775 dcp->dc_herald_datalen, &herald_auxdata); 776 if (error != 0) { 777 printf("%s: failed to herald debugnet server\n", __func__); 778 goto cleanup; 779 } 780 781 *pcb_out = pcb; 782 return (0); 783 784 cleanup: 785 debugnet_free(pcb); 786 return (error); 787 } 788 789 /* 790 * Pre-allocated dump-time mbuf tracking. 791 * 792 * We just track the high water mark we've ever seen and allocate appropriately 793 * for that iface/mtu combo. 794 */ 795 static struct { 796 int nmbuf; 797 int ncl; 798 int clsize; 799 } dn_hwm; 800 static struct mtx dn_hwm_lk; 801 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF); 802 803 static void 804 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize) 805 { 806 bool any; 807 808 any = false; 809 mtx_lock(&dn_hwm_lk); 810 811 if (nmbuf > dn_hwm.nmbuf) { 812 any = true; 813 dn_hwm.nmbuf = nmbuf; 814 } else 815 nmbuf = dn_hwm.nmbuf; 816 817 if (ncl > dn_hwm.ncl) { 818 any = true; 819 dn_hwm.ncl = ncl; 820 } else 821 ncl = dn_hwm.ncl; 822 823 if (clsize > dn_hwm.clsize) { 824 any = true; 825 dn_hwm.clsize = clsize; 826 } else 827 clsize = dn_hwm.clsize; 828 829 mtx_unlock(&dn_hwm_lk); 830 831 if (any) 832 debugnet_mbuf_reinit(nmbuf, ncl, clsize); 833 } 834 835 void 836 debugnet_any_ifnet_update(struct ifnet *ifp) 837 { 838 int clsize, nmbuf, ncl, nrxr; 839 840 if (!DEBUGNET_SUPPORTED_NIC(ifp)) 841 return; 842 843 ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize); 844 KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); 845 846 /* 847 * We need two headers per message on the transmit side. Multiply by 848 * four to give us some breathing room. 849 */ 850 nmbuf = ncl * (4 + nrxr); 851 ncl *= nrxr; 852 853 /* 854 * Bandaid for drivers that (incorrectly) advertise LinkUp before their 855 * dn_init method is available. 856 */ 857 if (nmbuf == 0 || ncl == 0 || clsize == 0) { 858 printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n", 859 __func__, if_name(ifp), ifp); 860 return; 861 } 862 dn_maybe_reinit_mbufs(nmbuf, ncl, clsize); 863 } 864 865 /* 866 * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless 867 * for us because drivers tend to if_attach before invoking DEBUGNET_SET(). 868 * 869 * On the other hand, hooking DEBUGNET_SET() itself may still be too early, 870 * because the driver is still in attach. Since we cannot use down interfaces, 871 * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least 872 * with vtnet and dhcpclient that event just never occurs. 873 * 874 * So that's how I've landed on the lower level ifnet_link_event. 875 */ 876 877 static void 878 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state) 879 { 880 if (link_state == LINK_STATE_UP) 881 debugnet_any_ifnet_update(ifp); 882 } 883 884 static eventhandler_tag dn_attach_cookie; 885 static void 886 dn_evh_init(void *ctx __unused) 887 { 888 dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event, 889 dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); 890 } 891 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL); 892 893 /* 894 * DDB parsing helpers for debugnet(4) consumers. 895 */ 896 #ifdef DDB 897 struct my_inet_opt { 898 bool has_opt; 899 const char *printname; 900 in_addr_t *result; 901 }; 902 903 static int 904 dn_parse_optarg_ipv4(struct my_inet_opt *opt) 905 { 906 in_addr_t tmp; 907 unsigned octet; 908 int t; 909 910 tmp = 0; 911 for (octet = 0; octet < 4; octet++) { 912 t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL); 913 if (t != tNUMBER) { 914 db_printf("%s:%s: octet %u expected number; found %d\n", 915 __func__, opt->printname, octet, t); 916 return (EINVAL); 917 } 918 /* 919 * db_lex lexes '-' distinctly from the number itself, but 920 * let's document that invariant. 921 */ 922 MPASS(db_tok_number >= 0); 923 924 if (db_tok_number > UINT8_MAX) { 925 db_printf("%s:%s: octet %u out of range: %jd\n", __func__, 926 opt->printname, octet, (intmax_t)db_tok_number); 927 return (EDOM); 928 } 929 930 /* Constructed host-endian and converted to network later. */ 931 tmp = (tmp << 8) | db_tok_number; 932 933 if (octet < 3) { 934 t = db_read_token_flags(DRT_WSPACE); 935 if (t != tDOT) { 936 db_printf("%s:%s: octet %u expected '.'; found" 937 " %d\n", __func__, opt->printname, octet, 938 t); 939 return (EINVAL); 940 } 941 } 942 } 943 944 *opt->result = htonl(tmp); 945 opt->has_opt = true; 946 return (0); 947 } 948 949 int 950 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result) 951 { 952 struct ifnet *ifp; 953 int t, error; 954 bool want_ifp; 955 char ch; 956 957 struct my_inet_opt opt_client = { 958 .printname = "client", 959 .result = &result->dd_client, 960 }, 961 opt_server = { 962 .printname = "server", 963 .result = &result->dd_server, 964 }, 965 opt_gateway = { 966 .printname = "gateway", 967 .result = &result->dd_gateway, 968 }, 969 *cur_inet_opt; 970 971 ifp = NULL; 972 memset(result, 0, sizeof(*result)); 973 974 /* 975 * command [space] [-] [opt] [[space] [optarg]] ... 976 * 977 * db_command has already lexed 'command' for us. 978 */ 979 t = db_read_token_flags(DRT_WSPACE); 980 if (t == tWSPACE) 981 t = db_read_token_flags(DRT_WSPACE); 982 983 while (t != tEOL) { 984 if (t != tMINUS) { 985 db_printf("%s: Bad syntax; expected '-', got %d\n", 986 cmd, t); 987 goto usage; 988 } 989 990 t = db_read_token_flags(DRT_WSPACE); 991 if (t != tIDENT) { 992 db_printf("%s: Bad syntax; expected tIDENT, got %d\n", 993 cmd, t); 994 goto usage; 995 } 996 997 if (strlen(db_tok_string) > 1) { 998 db_printf("%s: Bad syntax; expected single option " 999 "flag, got '%s'\n", cmd, db_tok_string); 1000 goto usage; 1001 } 1002 1003 want_ifp = false; 1004 cur_inet_opt = NULL; 1005 switch ((ch = db_tok_string[0])) { 1006 default: 1007 DNETDEBUG("Unexpected: '%c'\n", ch); 1008 /* FALLTHROUGH */ 1009 case 'h': 1010 goto usage; 1011 case 'c': 1012 cur_inet_opt = &opt_client; 1013 break; 1014 case 'g': 1015 cur_inet_opt = &opt_gateway; 1016 break; 1017 case 's': 1018 cur_inet_opt = &opt_server; 1019 break; 1020 case 'i': 1021 want_ifp = true; 1022 break; 1023 } 1024 1025 t = db_read_token_flags(DRT_WSPACE); 1026 if (t != tWSPACE) { 1027 db_printf("%s: Bad syntax; expected space after " 1028 "flag %c, got %d\n", cmd, ch, t); 1029 goto usage; 1030 } 1031 1032 if (want_ifp) { 1033 t = db_read_token_flags(DRT_WSPACE); 1034 if (t != tIDENT) { 1035 db_printf("%s: Expected interface but got %d\n", 1036 cmd, t); 1037 goto usage; 1038 } 1039 1040 CURVNET_SET(vnet0); 1041 /* 1042 * We *don't* take a ref here because the only current 1043 * consumer, db_netdump_cmd, does not need it. It 1044 * (somewhat redundantly) extracts the if_name(), 1045 * re-lookups the ifp, and takes its own reference. 1046 */ 1047 ifp = ifunit(db_tok_string); 1048 CURVNET_RESTORE(); 1049 if (ifp == NULL) { 1050 db_printf("Could not locate interface %s\n", 1051 db_tok_string); 1052 error = ENOENT; 1053 goto cleanup; 1054 } 1055 } else { 1056 MPASS(cur_inet_opt != NULL); 1057 /* Assume IPv4 for now. */ 1058 error = dn_parse_optarg_ipv4(cur_inet_opt); 1059 if (error != 0) 1060 goto cleanup; 1061 } 1062 1063 /* Skip (mandatory) whitespace after option, if not EOL. */ 1064 t = db_read_token_flags(DRT_WSPACE); 1065 if (t == tEOL) 1066 break; 1067 if (t != tWSPACE) { 1068 db_printf("%s: Bad syntax; expected space after " 1069 "flag %c option; got %d\n", cmd, ch, t); 1070 goto usage; 1071 } 1072 t = db_read_token_flags(DRT_WSPACE); 1073 } 1074 1075 if (!opt_server.has_opt) { 1076 db_printf("%s: need a destination server address\n", cmd); 1077 goto usage; 1078 } 1079 1080 result->dd_has_client = opt_client.has_opt; 1081 result->dd_has_gateway = opt_gateway.has_opt; 1082 result->dd_ifp = ifp; 1083 1084 /* We parsed the full line to tEOL already, or bailed with an error. */ 1085 return (0); 1086 1087 usage: 1088 db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> " 1089 "-i <interface>]\n", cmd); 1090 error = EINVAL; 1091 /* FALLTHROUGH */ 1092 cleanup: 1093 db_skip_to_eol(); 1094 return (error); 1095 } 1096 #endif /* DDB */ 1097