xref: /freebsd/sys/net/debugnet.c (revision 0634308df224aee0a0a5ce66f52910787221f378)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Isilon Systems, LLC.
5  * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
6  * Copyright (c) 2000 Darrell Anderson
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_ddb.h"
35 #include "opt_inet.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/endian.h>
40 #include <sys/errno.h>
41 #include <sys/socket.h>
42 #include <sys/sysctl.h>
43 
44 #ifdef DDB
45 #include <ddb/ddb.h>
46 #include <ddb/db_lex.h>
47 #endif
48 
49 #include <net/ethernet.h>
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/if_dl.h>
53 #include <net/if_types.h>
54 #include <net/if_var.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip.h>
60 #include <netinet/ip_var.h>
61 #include <netinet/ip_options.h>
62 #include <netinet/udp.h>
63 #include <netinet/udp_var.h>
64 
65 #include <machine/in_cksum.h>
66 #include <machine/pcb.h>
67 
68 #include <net/debugnet.h>
69 #define	DEBUGNET_INTERNAL
70 #include <net/debugnet_int.h>
71 
72 FEATURE(debugnet, "Debugnet support");
73 
74 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD, NULL,
75     "debugnet parameters");
76 
77 unsigned debugnet_debug;
78 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN,
79     &debugnet_debug, 0,
80     "Debug message verbosity (0: off; 1: on; 2: verbose)");
81 
82 int debugnet_npolls = 2000;
83 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN,
84     &debugnet_npolls, 0,
85     "Number of times to poll before assuming packet loss (0.5ms per poll)");
86 int debugnet_nretries = 10;
87 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN,
88     &debugnet_nretries, 0,
89     "Number of retransmit attempts before giving up");
90 
91 static bool g_debugnet_pcb_inuse;
92 static struct debugnet_pcb g_dnet_pcb;
93 
94 /*
95  * Simple accessors for opaque PCB.
96  */
97 const unsigned char *
98 debugnet_get_gw_mac(const struct debugnet_pcb *pcb)
99 {
100 	MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
101 	    pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
102 	return (pcb->dp_gw_mac.octet);
103 }
104 
105 /*
106  * Start of network primitives, beginning with output primitives.
107  */
108 
109 /*
110  * Handles creation of the ethernet header, then places outgoing packets into
111  * the tx buffer for the NIC
112  *
113  * Parameters:
114  *	m	The mbuf containing the packet to be sent (will be freed by
115  *		this function or the NIC driver)
116  *	ifp	The interface to send on
117  *	dst	The destination ethernet address (source address will be looked
118  *		up using ifp)
119  *	etype	The ETHERTYPE_* value for the protocol that is being sent
120  *
121  * Returns:
122  *	int	see errno.h, 0 for success
123  */
124 int
125 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
126     u_short etype)
127 {
128 	struct ether_header *eh;
129 
130 	if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
131 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
132 		if_printf(ifp, "%s: interface isn't up\n", __func__);
133 		m_freem(m);
134 		return (ENETDOWN);
135 	}
136 
137 	/* Fill in the ethernet header. */
138 	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
139 	if (m == NULL) {
140 		printf("%s: out of mbufs\n", __func__);
141 		return (ENOBUFS);
142 	}
143 	eh = mtod(m, struct ether_header *);
144 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
145 	memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
146 	eh->ether_type = htons(etype);
147 	return (ifp->if_debugnet_methods->dn_transmit(ifp, m));
148 }
149 
150 /*
151  * Unreliable transmission of an mbuf chain to the debugnet server
152  * Note: can't handle fragmentation; fails if the packet is larger than
153  *	 ifp->if_mtu after adding the UDP/IP headers
154  *
155  * Parameters:
156  *	pcb	The debugnet context block
157  *	m	mbuf chain
158  *
159  * Returns:
160  *	int	see errno.h, 0 for success
161  */
162 static int
163 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m)
164 {
165 	struct udphdr *udp;
166 
167 	MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
168 
169 	M_PREPEND(m, sizeof(*udp), M_NOWAIT);
170 	if (m == NULL) {
171 		printf("%s: out of mbufs\n", __func__);
172 		return (ENOBUFS);
173 	}
174 
175 	udp = mtod(m, void *);
176 	udp->uh_ulen = htons(m->m_pkthdr.len);
177 	/* Use this src port so that the server can connect() the socket */
178 	udp->uh_sport = htons(pcb->dp_client_port);
179 	udp->uh_dport = htons(pcb->dp_server_port);
180 	/* Computed later (protocol-dependent). */
181 	udp->uh_sum = 0;
182 
183 	return (debugnet_ip_output(pcb, m));
184 }
185 
186 int
187 debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */)
188 {
189 	struct debugnet_ack *dn_ack;
190 	struct mbuf *m;
191 
192 	DNETDEBUG("Acking with seqno %u\n", ntohl(seqno));
193 
194 	m = m_gethdr(M_NOWAIT, MT_DATA);
195 	if (m == NULL) {
196 		printf("%s: Out of mbufs\n", __func__);
197 		return (ENOBUFS);
198 	}
199 	m->m_len = sizeof(*dn_ack);
200 	m->m_pkthdr.len = sizeof(*dn_ack);
201 	MH_ALIGN(m, sizeof(*dn_ack));
202 	dn_ack = mtod(m, void *);
203 	dn_ack->da_seqno = seqno;
204 
205 	return (debugnet_udp_output(pcb, m));
206 }
207 
208 /*
209  * Dummy free function for debugnet clusters.
210  */
211 static void
212 debugnet_mbuf_free(struct mbuf *m __unused)
213 {
214 }
215 
216 /*
217  * Construct and reliably send a debugnet packet.  May fail from a resource
218  * shortage or extreme number of unacknowledged retransmissions.  Wait for
219  * an acknowledgement before returning.  Splits packets into chunks small
220  * enough to be sent without fragmentation (looks up the interface MTU)
221  *
222  * Parameters:
223  *	type	debugnet packet type (HERALD, FINISHED, ...)
224  *	data	data
225  *	datalen	data size (bytes)
226  *	auxdata	optional auxiliary information
227  *
228  * Returns:
229  *	int see errno.h, 0 for success
230  */
231 int
232 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data,
233     uint32_t datalen, const struct debugnet_proto_aux *auxdata)
234 {
235 	struct debugnet_msg_hdr *dn_msg_hdr;
236 	struct mbuf *m, *m2;
237 	uint64_t want_acks;
238 	uint32_t i, pktlen, sent_so_far;
239 	int retries, polls, error;
240 
241 	if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
242 		return (ECONNRESET);
243 
244 	want_acks = 0;
245 	pcb->dp_rcvd_acks = 0;
246 	retries = 0;
247 
248 retransmit:
249 	/* Chunks can be too big to fit in packets. */
250 	for (i = sent_so_far = 0; sent_so_far < datalen ||
251 	    (i == 0 && datalen == 0); i++) {
252 		pktlen = datalen - sent_so_far;
253 
254 		/* Bound: the interface MTU (assume no IP options). */
255 		pktlen = min(pktlen, pcb->dp_ifp->if_mtu -
256 		    sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr));
257 
258 		/*
259 		 * Check if it is retransmitting and this has been ACKed
260 		 * already.
261 		 */
262 		if ((pcb->dp_rcvd_acks & (1 << i)) != 0) {
263 			sent_so_far += pktlen;
264 			continue;
265 		}
266 
267 		/*
268 		 * Get and fill a header mbuf, then chain data as an extended
269 		 * mbuf.
270 		 */
271 		m = m_gethdr(M_NOWAIT, MT_DATA);
272 		if (m == NULL) {
273 			printf("%s: Out of mbufs\n", __func__);
274 			return (ENOBUFS);
275 		}
276 		m->m_len = sizeof(struct debugnet_msg_hdr);
277 		m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr);
278 		MH_ALIGN(m, sizeof(struct debugnet_msg_hdr));
279 		dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *);
280 		dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i);
281 		dn_msg_hdr->mh_type = htonl(type);
282 		dn_msg_hdr->mh_len = htonl(pktlen);
283 
284 		if (auxdata != NULL) {
285 			dn_msg_hdr->mh_offset =
286 			    htobe64(auxdata->dp_offset_start + sent_so_far);
287 			dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2);
288 		} else {
289 			dn_msg_hdr->mh_offset = htobe64(sent_so_far);
290 			dn_msg_hdr->mh_aux2 = 0;
291 		}
292 
293 		if (pktlen != 0) {
294 			m2 = m_get(M_NOWAIT, MT_DATA);
295 			if (m2 == NULL) {
296 				m_freem(m);
297 				printf("%s: Out of mbufs\n", __func__);
298 				return (ENOBUFS);
299 			}
300 			MEXTADD(m2, __DECONST(char *, data) + sent_so_far,
301 			    pktlen, debugnet_mbuf_free, NULL, NULL, 0,
302 			    EXT_DISPOSABLE);
303 			m2->m_len = pktlen;
304 
305 			m_cat(m, m2);
306 			m->m_pkthdr.len += pktlen;
307 		}
308 		error = debugnet_udp_output(pcb, m);
309 		if (error != 0)
310 			return (error);
311 
312 		/* Note that we're waiting for this packet in the bitfield. */
313 		want_acks |= (1 << i);
314 		sent_so_far += pktlen;
315 	}
316 	if (i >= DEBUGNET_MAX_IN_FLIGHT)
317 		printf("Warning: Sent more than %d packets (%d). "
318 		    "Acknowledgements will fail unless the size of "
319 		    "rcvd_acks/want_acks is increased.\n",
320 		    DEBUGNET_MAX_IN_FLIGHT, i);
321 
322 	/*
323 	 * Wait for acks.  A *real* window would speed things up considerably.
324 	 */
325 	polls = 0;
326 	while (pcb->dp_rcvd_acks != want_acks) {
327 		if (polls++ > debugnet_npolls) {
328 			if (retries++ > debugnet_nretries)
329 				return (ETIMEDOUT);
330 			printf(". ");
331 			goto retransmit;
332 		}
333 		debugnet_network_poll(pcb);
334 		DELAY(500);
335 		if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
336 			return (ECONNRESET);
337 	}
338 	pcb->dp_seqno += i;
339 	return (0);
340 }
341 
342 /*
343  * Network input primitives.
344  */
345 
346 /*
347  * Just introspect the header enough to fire off a seqno ack and validate
348  * length fits.
349  */
350 static void
351 debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb)
352 {
353 	const struct debugnet_msg_hdr *dnh;
354 	struct mbuf *m;
355 	int error;
356 
357 	m = *mb;
358 
359 	if (m->m_pkthdr.len < sizeof(*dnh)) {
360 		DNETDEBUG("ignoring small debugnet_msg packet\n");
361 		return;
362 	}
363 
364 	/* Get ND header. */
365 	if (m->m_len < sizeof(*dnh)) {
366 		m = m_pullup(m, sizeof(*dnh));
367 		*mb = m;
368 		if (m == NULL) {
369 			DNETDEBUG("m_pullup failed\n");
370 			return;
371 		}
372 	}
373 	dnh = mtod(m, const void *);
374 
375 	if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) {
376 		DNETDEBUG("Dropping short packet.\n");
377 		return;
378 	}
379 
380 	/*
381 	 * If the issue is transient (ENOBUFS), sender should resend.  If
382 	 * non-transient (like driver objecting to rx -> tx from the same
383 	 * thread), not much else we can do.
384 	 */
385 	error = debugnet_ack_output(pcb, dnh->mh_seqno);
386 	if (error != 0)
387 		return;
388 
389 	if (ntohl(dnh->mh_type) == DEBUGNET_FINISHED) {
390 		printf("Remote shut down the connection on us!\n");
391 		pcb->dp_state = DN_STATE_REMOTE_CLOSED;
392 
393 		/*
394 		 * Continue through to the user handler so they are signalled
395 		 * not to wait for further rx.
396 		 */
397 	}
398 
399 	pcb->dp_rx_handler(pcb, mb);
400 }
401 
402 static void
403 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport)
404 {
405 	const struct debugnet_ack *dn_ack;
406 	struct mbuf *m;
407 	uint32_t rcv_ackno;
408 
409 	m = *mb;
410 
411 	/* Get Ack. */
412 	if (m->m_len < sizeof(*dn_ack)) {
413 		m = m_pullup(m, sizeof(*dn_ack));
414 		*mb = m;
415 		if (m == NULL) {
416 			DNETDEBUG("m_pullup failed\n");
417 			return;
418 		}
419 	}
420 	dn_ack = mtod(m, const void *);
421 
422 	/* Debugnet processing. */
423 	/*
424 	 * Packet is meant for us.  Extract the ack sequence number and the
425 	 * port number if necessary.
426 	 */
427 	rcv_ackno = ntohl(dn_ack->da_seqno);
428 	if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) {
429 		pcb->dp_server_port = sport;
430 		pcb->dp_state = DN_STATE_GOT_HERALD_PORT;
431 	}
432 	if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT)
433 		printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno);
434 	else if (rcv_ackno >= pcb->dp_seqno) {
435 		/* We're interested in this ack. Record it. */
436 		pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno);
437 	}
438 }
439 
440 void
441 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb)
442 {
443 	const struct udphdr *udp;
444 	struct mbuf *m;
445 	uint16_t sport, ulen;
446 
447 	/* UDP processing. */
448 
449 	m = *mb;
450 	if (m->m_pkthdr.len < sizeof(*udp)) {
451 		DNETDEBUG("ignoring small UDP packet\n");
452 		return;
453 	}
454 
455 	/* Get UDP headers. */
456 	if (m->m_len < sizeof(*udp)) {
457 		m = m_pullup(m, sizeof(*udp));
458 		*mb = m;
459 		if (m == NULL) {
460 			DNETDEBUG("m_pullup failed\n");
461 			return;
462 		}
463 	}
464 	udp = mtod(m, const void *);
465 
466 	/* We expect to receive UDP packets on the configured client port. */
467 	if (ntohs(udp->uh_dport) != pcb->dp_client_port) {
468 		DNETDEBUG("not on the expected port.\n");
469 		return;
470 	}
471 
472 	/* Check that ulen does not exceed actual size of data. */
473 	ulen = ntohs(udp->uh_ulen);
474 	if (m->m_pkthdr.len < ulen) {
475 		DNETDEBUG("ignoring runt UDP packet\n");
476 		return;
477 	}
478 
479 	sport = ntohs(udp->uh_sport);
480 
481 	m_adj(m, sizeof(*udp));
482 	ulen -= sizeof(*udp);
483 
484 	if (ulen == sizeof(struct debugnet_ack)) {
485 		debugnet_handle_ack(pcb, mb, sport);
486 		return;
487 	}
488 
489 	if (pcb->dp_rx_handler == NULL) {
490 		if (ulen < sizeof(struct debugnet_ack))
491 			DNETDEBUG("ignoring small ACK packet\n");
492 		else
493 			DNETDEBUG("ignoring unexpected non-ACK packet on "
494 			    "half-duplex connection.\n");
495 		return;
496 	}
497 
498 	debugnet_handle_rx_msg(pcb, mb);
499 }
500 
501 /*
502  * Handler for incoming packets directly from the network adapter
503  * Identifies the packet type (IP or ARP) and passes it along to one of the
504  * helper functions debugnet_handle_ip or debugnet_handle_arp.
505  *
506  * It needs to partially replicate the behaviour of ether_input() and
507  * ether_demux().
508  *
509  * Parameters:
510  *	ifp	the interface the packet came from
511  *	m	an mbuf containing the packet received
512  */
513 static void
514 debugnet_pkt_in(struct ifnet *ifp, struct mbuf *m)
515 {
516 	struct ifreq ifr;
517 	struct ether_header *eh;
518 	u_short etype;
519 
520 	/* Ethernet processing. */
521 	if ((m->m_flags & M_PKTHDR) == 0) {
522 		DNETDEBUG_IF(ifp, "discard frame without packet header\n");
523 		goto done;
524 	}
525 	if (m->m_len < ETHER_HDR_LEN) {
526 		DNETDEBUG_IF(ifp,
527 	    "discard frame without leading eth header (len %u pktlen %u)\n",
528 		    m->m_len, m->m_pkthdr.len);
529 		goto done;
530 	}
531 	if ((m->m_flags & M_HASFCS) != 0) {
532 		m_adj(m, -ETHER_CRC_LEN);
533 		m->m_flags &= ~M_HASFCS;
534 	}
535 	eh = mtod(m, struct ether_header *);
536 	etype = ntohs(eh->ether_type);
537 	if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
538 		DNETDEBUG_IF(ifp, "ignoring vlan packets\n");
539 		goto done;
540 	}
541 	if (if_gethwaddr(ifp, &ifr) != 0) {
542 		DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n");
543 		goto done;
544 	}
545 	if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
546 	    ETHER_ADDR_LEN) != 0 &&
547 	    (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) {
548 		DNETDEBUG_IF(ifp,
549 		    "discard frame with incorrect destination addr\n");
550 		goto done;
551 	}
552 
553 	MPASS(g_debugnet_pcb_inuse);
554 
555 	/* Done ethernet processing. Strip off the ethernet header. */
556 	m_adj(m, ETHER_HDR_LEN);
557 	switch (etype) {
558 	case ETHERTYPE_ARP:
559 		debugnet_handle_arp(&g_dnet_pcb, &m);
560 		break;
561 	case ETHERTYPE_IP:
562 		debugnet_handle_ip(&g_dnet_pcb, &m);
563 		break;
564 	default:
565 		DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
566 		break;
567 	}
568 done:
569 	if (m != NULL)
570 		m_freem(m);
571 }
572 
573 /*
574  * Network polling primitive.
575  *
576  * Instead of assuming that most of the network stack is sane, we just poll the
577  * driver directly for packets.
578  */
579 void
580 debugnet_network_poll(struct debugnet_pcb *pcb)
581 {
582 	struct ifnet *ifp;
583 
584 	ifp = pcb->dp_ifp;
585 	ifp->if_debugnet_methods->dn_poll(ifp, 1000);
586 }
587 
588 /*
589  * Start of consumer API surface.
590  */
591 void
592 debugnet_free(struct debugnet_pcb *pcb)
593 {
594 	struct ifnet *ifp;
595 
596 	MPASS(g_debugnet_pcb_inuse);
597 	MPASS(pcb == &g_dnet_pcb);
598 
599 	ifp = pcb->dp_ifp;
600 	if (ifp != NULL) {
601 		if (pcb->dp_drv_input != NULL)
602 			ifp->if_input = pcb->dp_drv_input;
603 		if (pcb->dp_event_started)
604 			ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END);
605 	}
606 	debugnet_mbuf_finish();
607 
608 	g_debugnet_pcb_inuse = false;
609 	memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb));
610 }
611 
612 int
613 debugnet_connect(const struct debugnet_conn_params *dcp,
614     struct debugnet_pcb **pcb_out)
615 {
616 	struct debugnet_proto_aux herald_auxdata;
617 	struct debugnet_pcb *pcb;
618 	struct ifnet *ifp;
619 	int error;
620 
621 	if (g_debugnet_pcb_inuse) {
622 		printf("%s: Only one connection at a time.\n", __func__);
623 		return (EBUSY);
624 	}
625 
626 	pcb = &g_dnet_pcb;
627 	*pcb = (struct debugnet_pcb) {
628 		.dp_state = DN_STATE_INIT,
629 		.dp_client = dcp->dc_client,
630 		.dp_server = dcp->dc_server,
631 		.dp_gateway = dcp->dc_gateway,
632 		.dp_server_port = dcp->dc_herald_port,	/* Initially */
633 		.dp_client_port = dcp->dc_client_port,
634 		.dp_seqno = 1,
635 		.dp_ifp = dcp->dc_ifp,
636 		.dp_rx_handler = dcp->dc_rx_handler,
637 	};
638 
639 	/* Switch to the debugnet mbuf zones. */
640 	debugnet_mbuf_start();
641 
642 	/* At least one needed parameter is missing; infer it. */
643 	if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY ||
644 	    pcb->dp_ifp == NULL) {
645 		struct sockaddr_in dest_sin, *gw_sin, *local_sin;
646 		struct rtentry *dest_rt;
647 		struct ifnet *rt_ifp;
648 
649 		memset(&dest_sin, 0, sizeof(dest_sin));
650 		dest_sin = (struct sockaddr_in) {
651 			.sin_len = sizeof(dest_sin),
652 			.sin_family = AF_INET,
653 			.sin_addr.s_addr = pcb->dp_server,
654 		};
655 
656 		CURVNET_SET(vnet0);
657 		dest_rt = rtalloc1((struct sockaddr *)&dest_sin, 0,
658 		    RTF_RNH_LOCKED);
659 		CURVNET_RESTORE();
660 
661 		if (dest_rt == NULL) {
662 			printf("%s: Could not get route for that server.\n",
663 			    __func__);
664 			error = ENOENT;
665 			goto cleanup;
666 		}
667 
668 		if (dest_rt->rt_gateway->sa_family == AF_INET)
669 			gw_sin = (struct sockaddr_in *)dest_rt->rt_gateway;
670 		else {
671 			if (dest_rt->rt_gateway->sa_family == AF_LINK)
672 				DNETDEBUG("Destination address is on link.\n");
673 			gw_sin = NULL;
674 		}
675 
676 		MPASS(dest_rt->rt_ifa->ifa_addr->sa_family == AF_INET);
677 		local_sin = (struct sockaddr_in *)dest_rt->rt_ifa->ifa_addr;
678 
679 		rt_ifp = dest_rt->rt_ifp;
680 
681 		if (pcb->dp_client == INADDR_ANY)
682 			pcb->dp_client = local_sin->sin_addr.s_addr;
683 		if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL)
684 			pcb->dp_gateway = gw_sin->sin_addr.s_addr;
685 		if (pcb->dp_ifp == NULL)
686 			pcb->dp_ifp = rt_ifp;
687 
688 		RTFREE_LOCKED(dest_rt);
689 	}
690 
691 	ifp = pcb->dp_ifp;
692 
693 	if (debugnet_debug > 0) {
694 		char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN],
695 		    gwbuf[INET_ADDRSTRLEN];
696 		inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf));
697 		inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf));
698 		if (pcb->dp_gateway != INADDR_ANY)
699 			inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf));
700 		DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n",
701 		    serbuf, pcb->dp_server_port,
702 		    (pcb->dp_gateway == INADDR_ANY) ? "" : " via ",
703 		    (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf,
704 		    clibuf, pcb->dp_client_port, if_name(ifp));
705 	}
706 
707 	/* Validate iface is online and supported. */
708 	if (!DEBUGNET_SUPPORTED_NIC(ifp)) {
709 		printf("%s: interface '%s' does not support debugnet\n",
710 		    __func__, if_name(ifp));
711 		error = ENODEV;
712 		goto cleanup;
713 	}
714 	if ((if_getflags(ifp) & IFF_UP) == 0) {
715 		printf("%s: interface '%s' link is down\n", __func__,
716 		    if_name(ifp));
717 		error = ENXIO;
718 		goto cleanup;
719 	}
720 
721 	ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START);
722 	pcb->dp_event_started = true;
723 
724 	/*
725 	 * We maintain the invariant that g_debugnet_pcb_inuse is always true
726 	 * while the debugnet ifp's if_input is overridden with
727 	 * debugnet_pkt_in.
728 	 */
729 	g_debugnet_pcb_inuse = true;
730 
731 	/* Make the card use *our* receive callback. */
732 	pcb->dp_drv_input = ifp->if_input;
733 	ifp->if_input = debugnet_pkt_in;
734 
735 	printf("%s: searching for %s MAC...\n", __func__,
736 	    (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway");
737 
738 	error = debugnet_arp_gw(pcb);
739 	if (error != 0) {
740 		printf("%s: failed to locate MAC address\n", __func__);
741 		goto cleanup;
742 	}
743 	MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC);
744 
745 	herald_auxdata = (struct debugnet_proto_aux) {
746 		.dp_offset_start = dcp->dc_herald_offset,
747 		.dp_aux2 = dcp->dc_herald_aux2,
748 	};
749 	error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data,
750 	    dcp->dc_herald_datalen, &herald_auxdata);
751 	if (error != 0) {
752 		printf("%s: failed to herald debugnet server\n", __func__);
753 		goto cleanup;
754 	}
755 
756 	*pcb_out = pcb;
757 	return (0);
758 
759 cleanup:
760 	debugnet_free(pcb);
761 	return (error);
762 }
763 
764 /*
765  * Pre-allocated dump-time mbuf tracking.
766  *
767  * We just track the high water mark we've ever seen and allocate appropriately
768  * for that iface/mtu combo.
769  */
770 static struct {
771 	int nmbuf;
772 	int ncl;
773 	int clsize;
774 } dn_hwm;
775 static struct mtx dn_hwm_lk;
776 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF);
777 
778 static void
779 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize)
780 {
781 	bool any;
782 
783 	any = false;
784 	mtx_lock(&dn_hwm_lk);
785 
786 	if (nmbuf > dn_hwm.nmbuf) {
787 		any = true;
788 		dn_hwm.nmbuf = nmbuf;
789 	} else
790 		nmbuf = dn_hwm.nmbuf;
791 
792 	if (ncl > dn_hwm.ncl) {
793 		any = true;
794 		dn_hwm.ncl = ncl;
795 	} else
796 		ncl = dn_hwm.ncl;
797 
798 	if (clsize > dn_hwm.clsize) {
799 		any = true;
800 		dn_hwm.clsize = clsize;
801 	} else
802 		clsize = dn_hwm.clsize;
803 
804 	mtx_unlock(&dn_hwm_lk);
805 
806 	if (any)
807 		debugnet_mbuf_reinit(nmbuf, ncl, clsize);
808 }
809 
810 void
811 debugnet_any_ifnet_update(struct ifnet *ifp)
812 {
813 	int clsize, nmbuf, ncl, nrxr;
814 
815 	if (!DEBUGNET_SUPPORTED_NIC(ifp))
816 		return;
817 
818 	ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize);
819 	KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
820 
821 	/*
822 	 * We need two headers per message on the transmit side. Multiply by
823 	 * four to give us some breathing room.
824 	 */
825 	nmbuf = ncl * (4 + nrxr);
826 	ncl *= nrxr;
827 
828 	dn_maybe_reinit_mbufs(nmbuf, ncl, clsize);
829 }
830 
831 /*
832  * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless
833  * for us because drivers tend to if_attach before invoking DEBUGNET_SET().
834  *
835  * On the other hand, hooking DEBUGNET_SET() itself may still be too early,
836  * because the driver is still in attach.  Since we cannot use down interfaces,
837  * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient?  ... Nope, at least
838  * with vtnet and dhcpclient that event just never occurs.
839  *
840  * So that's how I've landed on the lower level ifnet_link_event.
841  */
842 
843 static void
844 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state)
845 {
846 	if (link_state == LINK_STATE_UP)
847 		debugnet_any_ifnet_update(ifp);
848 }
849 
850 static eventhandler_tag dn_attach_cookie;
851 static void
852 dn_evh_init(void *ctx __unused)
853 {
854 	dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event,
855 	    dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
856 }
857 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL);
858 
859 /*
860  * DDB parsing helpers for debugnet(4) consumers.
861  */
862 #ifdef DDB
863 struct my_inet_opt {
864 	bool has_opt;
865 	const char *printname;
866 	in_addr_t *result;
867 };
868 
869 static int
870 dn_parse_optarg_ipv4(struct my_inet_opt *opt)
871 {
872 	in_addr_t tmp;
873 	unsigned octet;
874 	int t;
875 
876 	tmp = 0;
877 	for (octet = 0; octet < 4; octet++) {
878 		t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL);
879 		if (t != tNUMBER) {
880 			db_printf("%s:%s: octet %u expected number; found %d\n",
881 			    __func__, opt->printname, octet, t);
882 			return (EINVAL);
883 		}
884 		/*
885 		 * db_lex lexes '-' distinctly from the number itself, but
886 		 * let's document that invariant.
887 		 */
888 		MPASS(db_tok_number >= 0);
889 
890 		if (db_tok_number > UINT8_MAX) {
891 			db_printf("%s:%s: octet %u out of range: %jd\n", __func__,
892 			    opt->printname, octet, (intmax_t)db_tok_number);
893 			return (EDOM);
894 		}
895 
896 		/* Constructed host-endian and converted to network later. */
897 		tmp = (tmp << 8) | db_tok_number;
898 
899 		if (octet < 3) {
900 			t = db_read_token_flags(DRT_WSPACE);
901 			if (t != tDOT) {
902 				db_printf("%s:%s: octet %u expected '.'; found"
903 				    " %d\n", __func__, opt->printname, octet,
904 				    t);
905 				return (EINVAL);
906 			}
907 		}
908 	}
909 
910 	*opt->result = htonl(tmp);
911 	opt->has_opt = true;
912 	return (0);
913 }
914 
915 int
916 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result)
917 {
918 	struct ifnet *ifp;
919 	int t, error;
920 	bool want_ifp;
921 	char ch;
922 
923 	struct my_inet_opt opt_client = {
924 		.printname = "client",
925 		.result = &result->dd_client,
926 	},
927 	opt_server = {
928 		.printname = "server",
929 		.result = &result->dd_server,
930 	},
931 	opt_gateway = {
932 		.printname = "gateway",
933 		.result = &result->dd_gateway,
934 	},
935 	*cur_inet_opt;
936 
937 	ifp = NULL;
938 	memset(result, 0, sizeof(*result));
939 
940 	/*
941 	 * command [space] [-] [opt] [[space] [optarg]] ...
942 	 *
943 	 * db_command has already lexed 'command' for us.
944 	 */
945 	t = db_read_token_flags(DRT_WSPACE);
946 	if (t == tWSPACE)
947 		t = db_read_token_flags(DRT_WSPACE);
948 
949 	while (t != tEOL) {
950 		if (t != tMINUS) {
951 			db_printf("%s: Bad syntax; expected '-', got %d\n",
952 			    cmd, t);
953 			goto usage;
954 		}
955 
956 		t = db_read_token_flags(DRT_WSPACE);
957 		if (t != tIDENT) {
958 			db_printf("%s: Bad syntax; expected tIDENT, got %d\n",
959 			    cmd, t);
960 			goto usage;
961 		}
962 
963 		if (strlen(db_tok_string) > 1) {
964 			db_printf("%s: Bad syntax; expected single option "
965 			    "flag, got '%s'\n", cmd, db_tok_string);
966 			goto usage;
967 		}
968 
969 		want_ifp = false;
970 		cur_inet_opt = NULL;
971 		switch ((ch = db_tok_string[0])) {
972 		default:
973 			DNETDEBUG("Unexpected: '%c'\n", ch);
974 			/* FALLTHROUGH */
975 		case 'h':
976 			goto usage;
977 		case 'c':
978 			cur_inet_opt = &opt_client;
979 			break;
980 		case 'g':
981 			cur_inet_opt = &opt_gateway;
982 			break;
983 		case 's':
984 			cur_inet_opt = &opt_server;
985 			break;
986 		case 'i':
987 			want_ifp = true;
988 			break;
989 		}
990 
991 		t = db_read_token_flags(DRT_WSPACE);
992 		if (t != tWSPACE) {
993 			db_printf("%s: Bad syntax; expected space after "
994 			    "flag %c, got %d\n", cmd, ch, t);
995 			goto usage;
996 		}
997 
998 		if (want_ifp) {
999 			t = db_read_token_flags(DRT_WSPACE);
1000 			if (t != tIDENT) {
1001 				db_printf("%s: Expected interface but got %d\n",
1002 				    cmd, t);
1003 				goto usage;
1004 			}
1005 
1006 			CURVNET_SET(vnet0);
1007 			/*
1008 			 * We *don't* take a ref here because the only current
1009 			 * consumer, db_netdump_cmd, does not need it.  It
1010 			 * (somewhat redundantly) extracts the if_name(),
1011 			 * re-lookups the ifp, and takes its own reference.
1012 			 */
1013 			ifp = ifunit(db_tok_string);
1014 			CURVNET_RESTORE();
1015 			if (ifp == NULL) {
1016 				db_printf("Could not locate interface %s\n",
1017 				    db_tok_string);
1018 				goto cleanup;
1019 			}
1020 		} else {
1021 			MPASS(cur_inet_opt != NULL);
1022 			/* Assume IPv4 for now. */
1023 			error = dn_parse_optarg_ipv4(cur_inet_opt);
1024 			if (error != 0)
1025 				goto cleanup;
1026 		}
1027 
1028 		/* Skip (mandatory) whitespace after option, if not EOL. */
1029 		t = db_read_token_flags(DRT_WSPACE);
1030 		if (t == tEOL)
1031 			break;
1032 		if (t != tWSPACE) {
1033 			db_printf("%s: Bad syntax; expected space after "
1034 			    "flag %c option; got %d\n", cmd, ch, t);
1035 			goto usage;
1036 		}
1037 		t = db_read_token_flags(DRT_WSPACE);
1038 	}
1039 
1040 	if (!opt_server.has_opt) {
1041 		db_printf("%s: need a destination server address\n", cmd);
1042 		goto usage;
1043 	}
1044 
1045 	result->dd_has_client = opt_client.has_opt;
1046 	result->dd_has_gateway = opt_gateway.has_opt;
1047 	result->dd_ifp = ifp;
1048 
1049 	/* We parsed the full line to tEOL already, or bailed with an error. */
1050 	return (0);
1051 
1052 usage:
1053 	db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> "
1054 	    "-i <interface>]\n", cmd);
1055 	error = EINVAL;
1056 	/* FALLTHROUGH */
1057 cleanup:
1058 	db_skip_to_eol();
1059 	return (error);
1060 }
1061 #endif /* DDB */
1062