xref: /freebsd/sys/net/debugnet.c (revision 6966ac055c3b7a39266fb982493330df7a097997)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Isilon Systems, LLC.
5  * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
6  * Copyright (c) 2000 Darrell Anderson
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_ddb.h"
35 #include "opt_inet.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/endian.h>
40 #include <sys/errno.h>
41 #include <sys/eventhandler.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 
45 #ifdef DDB
46 #include <ddb/ddb.h>
47 #include <ddb/db_lex.h>
48 #endif
49 
50 #include <net/ethernet.h>
51 #include <net/if.h>
52 #include <net/if_arp.h>
53 #include <net/if_dl.h>
54 #include <net/if_types.h>
55 #include <net/if_var.h>
56 
57 #include <netinet/in.h>
58 #include <netinet/in_systm.h>
59 #include <netinet/in_var.h>
60 #include <netinet/ip.h>
61 #include <netinet/ip_var.h>
62 #include <netinet/ip_options.h>
63 #include <netinet/udp.h>
64 #include <netinet/udp_var.h>
65 
66 #include <machine/in_cksum.h>
67 #include <machine/pcb.h>
68 
69 #include <net/debugnet.h>
70 #define	DEBUGNET_INTERNAL
71 #include <net/debugnet_int.h>
72 
73 FEATURE(debugnet, "Debugnet support");
74 
75 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD, NULL,
76     "debugnet parameters");
77 
78 unsigned debugnet_debug;
79 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN,
80     &debugnet_debug, 0,
81     "Debug message verbosity (0: off; 1: on; 2: verbose)");
82 
83 int debugnet_npolls = 2000;
84 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN,
85     &debugnet_npolls, 0,
86     "Number of times to poll before assuming packet loss (0.5ms per poll)");
87 int debugnet_nretries = 10;
88 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN,
89     &debugnet_nretries, 0,
90     "Number of retransmit attempts before giving up");
91 
92 static bool g_debugnet_pcb_inuse;
93 static struct debugnet_pcb g_dnet_pcb;
94 
95 /*
96  * Simple accessors for opaque PCB.
97  */
98 const unsigned char *
99 debugnet_get_gw_mac(const struct debugnet_pcb *pcb)
100 {
101 	MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
102 	    pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
103 	return (pcb->dp_gw_mac.octet);
104 }
105 
106 /*
107  * Start of network primitives, beginning with output primitives.
108  */
109 
110 /*
111  * Handles creation of the ethernet header, then places outgoing packets into
112  * the tx buffer for the NIC
113  *
114  * Parameters:
115  *	m	The mbuf containing the packet to be sent (will be freed by
116  *		this function or the NIC driver)
117  *	ifp	The interface to send on
118  *	dst	The destination ethernet address (source address will be looked
119  *		up using ifp)
120  *	etype	The ETHERTYPE_* value for the protocol that is being sent
121  *
122  * Returns:
123  *	int	see errno.h, 0 for success
124  */
125 int
126 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
127     u_short etype)
128 {
129 	struct ether_header *eh;
130 
131 	if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
132 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
133 		if_printf(ifp, "%s: interface isn't up\n", __func__);
134 		m_freem(m);
135 		return (ENETDOWN);
136 	}
137 
138 	/* Fill in the ethernet header. */
139 	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
140 	if (m == NULL) {
141 		printf("%s: out of mbufs\n", __func__);
142 		return (ENOBUFS);
143 	}
144 	eh = mtod(m, struct ether_header *);
145 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
146 	memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
147 	eh->ether_type = htons(etype);
148 	return (ifp->if_debugnet_methods->dn_transmit(ifp, m));
149 }
150 
151 /*
152  * Unreliable transmission of an mbuf chain to the debugnet server
153  * Note: can't handle fragmentation; fails if the packet is larger than
154  *	 ifp->if_mtu after adding the UDP/IP headers
155  *
156  * Parameters:
157  *	pcb	The debugnet context block
158  *	m	mbuf chain
159  *
160  * Returns:
161  *	int	see errno.h, 0 for success
162  */
163 static int
164 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m)
165 {
166 	struct udphdr *udp;
167 
168 	MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
169 
170 	M_PREPEND(m, sizeof(*udp), M_NOWAIT);
171 	if (m == NULL) {
172 		printf("%s: out of mbufs\n", __func__);
173 		return (ENOBUFS);
174 	}
175 
176 	udp = mtod(m, void *);
177 	udp->uh_ulen = htons(m->m_pkthdr.len);
178 	/* Use this src port so that the server can connect() the socket */
179 	udp->uh_sport = htons(pcb->dp_client_port);
180 	udp->uh_dport = htons(pcb->dp_server_port);
181 	/* Computed later (protocol-dependent). */
182 	udp->uh_sum = 0;
183 
184 	return (debugnet_ip_output(pcb, m));
185 }
186 
187 int
188 debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */)
189 {
190 	struct debugnet_ack *dn_ack;
191 	struct mbuf *m;
192 
193 	DNETDEBUG("Acking with seqno %u\n", ntohl(seqno));
194 
195 	m = m_gethdr(M_NOWAIT, MT_DATA);
196 	if (m == NULL) {
197 		printf("%s: Out of mbufs\n", __func__);
198 		return (ENOBUFS);
199 	}
200 	m->m_len = sizeof(*dn_ack);
201 	m->m_pkthdr.len = sizeof(*dn_ack);
202 	MH_ALIGN(m, sizeof(*dn_ack));
203 	dn_ack = mtod(m, void *);
204 	dn_ack->da_seqno = seqno;
205 
206 	return (debugnet_udp_output(pcb, m));
207 }
208 
209 /*
210  * Dummy free function for debugnet clusters.
211  */
212 static void
213 debugnet_mbuf_free(struct mbuf *m __unused)
214 {
215 }
216 
217 /*
218  * Construct and reliably send a debugnet packet.  May fail from a resource
219  * shortage or extreme number of unacknowledged retransmissions.  Wait for
220  * an acknowledgement before returning.  Splits packets into chunks small
221  * enough to be sent without fragmentation (looks up the interface MTU)
222  *
223  * Parameters:
224  *	type	debugnet packet type (HERALD, FINISHED, ...)
225  *	data	data
226  *	datalen	data size (bytes)
227  *	auxdata	optional auxiliary information
228  *
229  * Returns:
230  *	int see errno.h, 0 for success
231  */
232 int
233 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data,
234     uint32_t datalen, const struct debugnet_proto_aux *auxdata)
235 {
236 	struct debugnet_msg_hdr *dn_msg_hdr;
237 	struct mbuf *m, *m2;
238 	uint64_t want_acks;
239 	uint32_t i, pktlen, sent_so_far;
240 	int retries, polls, error;
241 
242 	if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
243 		return (ECONNRESET);
244 
245 	want_acks = 0;
246 	pcb->dp_rcvd_acks = 0;
247 	retries = 0;
248 
249 retransmit:
250 	/* Chunks can be too big to fit in packets. */
251 	for (i = sent_so_far = 0; sent_so_far < datalen ||
252 	    (i == 0 && datalen == 0); i++) {
253 		pktlen = datalen - sent_so_far;
254 
255 		/* Bound: the interface MTU (assume no IP options). */
256 		pktlen = min(pktlen, pcb->dp_ifp->if_mtu -
257 		    sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr));
258 
259 		/*
260 		 * Check if it is retransmitting and this has been ACKed
261 		 * already.
262 		 */
263 		if ((pcb->dp_rcvd_acks & (1 << i)) != 0) {
264 			sent_so_far += pktlen;
265 			continue;
266 		}
267 
268 		/*
269 		 * Get and fill a header mbuf, then chain data as an extended
270 		 * mbuf.
271 		 */
272 		m = m_gethdr(M_NOWAIT, MT_DATA);
273 		if (m == NULL) {
274 			printf("%s: Out of mbufs\n", __func__);
275 			return (ENOBUFS);
276 		}
277 		m->m_len = sizeof(struct debugnet_msg_hdr);
278 		m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr);
279 		MH_ALIGN(m, sizeof(struct debugnet_msg_hdr));
280 		dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *);
281 		dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i);
282 		dn_msg_hdr->mh_type = htonl(type);
283 		dn_msg_hdr->mh_len = htonl(pktlen);
284 
285 		if (auxdata != NULL) {
286 			dn_msg_hdr->mh_offset =
287 			    htobe64(auxdata->dp_offset_start + sent_so_far);
288 			dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2);
289 		} else {
290 			dn_msg_hdr->mh_offset = htobe64(sent_so_far);
291 			dn_msg_hdr->mh_aux2 = 0;
292 		}
293 
294 		if (pktlen != 0) {
295 			m2 = m_get(M_NOWAIT, MT_DATA);
296 			if (m2 == NULL) {
297 				m_freem(m);
298 				printf("%s: Out of mbufs\n", __func__);
299 				return (ENOBUFS);
300 			}
301 			MEXTADD(m2, __DECONST(char *, data) + sent_so_far,
302 			    pktlen, debugnet_mbuf_free, NULL, NULL, 0,
303 			    EXT_DISPOSABLE);
304 			m2->m_len = pktlen;
305 
306 			m_cat(m, m2);
307 			m->m_pkthdr.len += pktlen;
308 		}
309 		error = debugnet_udp_output(pcb, m);
310 		if (error != 0)
311 			return (error);
312 
313 		/* Note that we're waiting for this packet in the bitfield. */
314 		want_acks |= (1 << i);
315 		sent_so_far += pktlen;
316 	}
317 	if (i >= DEBUGNET_MAX_IN_FLIGHT)
318 		printf("Warning: Sent more than %d packets (%d). "
319 		    "Acknowledgements will fail unless the size of "
320 		    "rcvd_acks/want_acks is increased.\n",
321 		    DEBUGNET_MAX_IN_FLIGHT, i);
322 
323 	/*
324 	 * Wait for acks.  A *real* window would speed things up considerably.
325 	 */
326 	polls = 0;
327 	while (pcb->dp_rcvd_acks != want_acks) {
328 		if (polls++ > debugnet_npolls) {
329 			if (retries++ > debugnet_nretries)
330 				return (ETIMEDOUT);
331 			printf(". ");
332 			goto retransmit;
333 		}
334 		debugnet_network_poll(pcb);
335 		DELAY(500);
336 		if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
337 			return (ECONNRESET);
338 	}
339 	pcb->dp_seqno += i;
340 	return (0);
341 }
342 
343 /*
344  * Network input primitives.
345  */
346 
347 /*
348  * Just introspect the header enough to fire off a seqno ack and validate
349  * length fits.
350  */
351 static void
352 debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb)
353 {
354 	const struct debugnet_msg_hdr *dnh;
355 	struct mbuf *m;
356 	int error;
357 
358 	m = *mb;
359 
360 	if (m->m_pkthdr.len < sizeof(*dnh)) {
361 		DNETDEBUG("ignoring small debugnet_msg packet\n");
362 		return;
363 	}
364 
365 	/* Get ND header. */
366 	if (m->m_len < sizeof(*dnh)) {
367 		m = m_pullup(m, sizeof(*dnh));
368 		*mb = m;
369 		if (m == NULL) {
370 			DNETDEBUG("m_pullup failed\n");
371 			return;
372 		}
373 	}
374 	dnh = mtod(m, const void *);
375 
376 	if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) {
377 		DNETDEBUG("Dropping short packet.\n");
378 		return;
379 	}
380 
381 	/*
382 	 * If the issue is transient (ENOBUFS), sender should resend.  If
383 	 * non-transient (like driver objecting to rx -> tx from the same
384 	 * thread), not much else we can do.
385 	 */
386 	error = debugnet_ack_output(pcb, dnh->mh_seqno);
387 	if (error != 0)
388 		return;
389 
390 	if (ntohl(dnh->mh_type) == DEBUGNET_FINISHED) {
391 		printf("Remote shut down the connection on us!\n");
392 		pcb->dp_state = DN_STATE_REMOTE_CLOSED;
393 
394 		/*
395 		 * Continue through to the user handler so they are signalled
396 		 * not to wait for further rx.
397 		 */
398 	}
399 
400 	pcb->dp_rx_handler(pcb, mb);
401 }
402 
403 static void
404 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport)
405 {
406 	const struct debugnet_ack *dn_ack;
407 	struct mbuf *m;
408 	uint32_t rcv_ackno;
409 
410 	m = *mb;
411 
412 	/* Get Ack. */
413 	if (m->m_len < sizeof(*dn_ack)) {
414 		m = m_pullup(m, sizeof(*dn_ack));
415 		*mb = m;
416 		if (m == NULL) {
417 			DNETDEBUG("m_pullup failed\n");
418 			return;
419 		}
420 	}
421 	dn_ack = mtod(m, const void *);
422 
423 	/* Debugnet processing. */
424 	/*
425 	 * Packet is meant for us.  Extract the ack sequence number and the
426 	 * port number if necessary.
427 	 */
428 	rcv_ackno = ntohl(dn_ack->da_seqno);
429 	if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) {
430 		pcb->dp_server_port = sport;
431 		pcb->dp_state = DN_STATE_GOT_HERALD_PORT;
432 	}
433 	if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT)
434 		printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno);
435 	else if (rcv_ackno >= pcb->dp_seqno) {
436 		/* We're interested in this ack. Record it. */
437 		pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno);
438 	}
439 }
440 
441 void
442 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb)
443 {
444 	const struct udphdr *udp;
445 	struct mbuf *m;
446 	uint16_t sport, ulen;
447 
448 	/* UDP processing. */
449 
450 	m = *mb;
451 	if (m->m_pkthdr.len < sizeof(*udp)) {
452 		DNETDEBUG("ignoring small UDP packet\n");
453 		return;
454 	}
455 
456 	/* Get UDP headers. */
457 	if (m->m_len < sizeof(*udp)) {
458 		m = m_pullup(m, sizeof(*udp));
459 		*mb = m;
460 		if (m == NULL) {
461 			DNETDEBUG("m_pullup failed\n");
462 			return;
463 		}
464 	}
465 	udp = mtod(m, const void *);
466 
467 	/* We expect to receive UDP packets on the configured client port. */
468 	if (ntohs(udp->uh_dport) != pcb->dp_client_port) {
469 		DNETDEBUG("not on the expected port.\n");
470 		return;
471 	}
472 
473 	/* Check that ulen does not exceed actual size of data. */
474 	ulen = ntohs(udp->uh_ulen);
475 	if (m->m_pkthdr.len < ulen) {
476 		DNETDEBUG("ignoring runt UDP packet\n");
477 		return;
478 	}
479 
480 	sport = ntohs(udp->uh_sport);
481 
482 	m_adj(m, sizeof(*udp));
483 	ulen -= sizeof(*udp);
484 
485 	if (ulen == sizeof(struct debugnet_ack)) {
486 		debugnet_handle_ack(pcb, mb, sport);
487 		return;
488 	}
489 
490 	if (pcb->dp_rx_handler == NULL) {
491 		if (ulen < sizeof(struct debugnet_ack))
492 			DNETDEBUG("ignoring small ACK packet\n");
493 		else
494 			DNETDEBUG("ignoring unexpected non-ACK packet on "
495 			    "half-duplex connection.\n");
496 		return;
497 	}
498 
499 	debugnet_handle_rx_msg(pcb, mb);
500 }
501 
502 /*
503  * Handler for incoming packets directly from the network adapter
504  * Identifies the packet type (IP or ARP) and passes it along to one of the
505  * helper functions debugnet_handle_ip or debugnet_handle_arp.
506  *
507  * It needs to partially replicate the behaviour of ether_input() and
508  * ether_demux().
509  *
510  * Parameters:
511  *	ifp	the interface the packet came from
512  *	m	an mbuf containing the packet received
513  */
514 static void
515 debugnet_pkt_in(struct ifnet *ifp, struct mbuf *m)
516 {
517 	struct ifreq ifr;
518 	struct ether_header *eh;
519 	u_short etype;
520 
521 	/* Ethernet processing. */
522 	if ((m->m_flags & M_PKTHDR) == 0) {
523 		DNETDEBUG_IF(ifp, "discard frame without packet header\n");
524 		goto done;
525 	}
526 	if (m->m_len < ETHER_HDR_LEN) {
527 		DNETDEBUG_IF(ifp,
528 	    "discard frame without leading eth header (len %u pktlen %u)\n",
529 		    m->m_len, m->m_pkthdr.len);
530 		goto done;
531 	}
532 	if ((m->m_flags & M_HASFCS) != 0) {
533 		m_adj(m, -ETHER_CRC_LEN);
534 		m->m_flags &= ~M_HASFCS;
535 	}
536 	eh = mtod(m, struct ether_header *);
537 	etype = ntohs(eh->ether_type);
538 	if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
539 		DNETDEBUG_IF(ifp, "ignoring vlan packets\n");
540 		goto done;
541 	}
542 	if (if_gethwaddr(ifp, &ifr) != 0) {
543 		DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n");
544 		goto done;
545 	}
546 	if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
547 	    ETHER_ADDR_LEN) != 0 &&
548 	    (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) {
549 		DNETDEBUG_IF(ifp,
550 		    "discard frame with incorrect destination addr\n");
551 		goto done;
552 	}
553 
554 	MPASS(g_debugnet_pcb_inuse);
555 
556 	/* Done ethernet processing. Strip off the ethernet header. */
557 	m_adj(m, ETHER_HDR_LEN);
558 	switch (etype) {
559 	case ETHERTYPE_ARP:
560 		debugnet_handle_arp(&g_dnet_pcb, &m);
561 		break;
562 	case ETHERTYPE_IP:
563 		debugnet_handle_ip(&g_dnet_pcb, &m);
564 		break;
565 	default:
566 		DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
567 		break;
568 	}
569 done:
570 	if (m != NULL)
571 		m_freem(m);
572 }
573 
574 /*
575  * Network polling primitive.
576  *
577  * Instead of assuming that most of the network stack is sane, we just poll the
578  * driver directly for packets.
579  */
580 void
581 debugnet_network_poll(struct debugnet_pcb *pcb)
582 {
583 	struct ifnet *ifp;
584 
585 	ifp = pcb->dp_ifp;
586 	ifp->if_debugnet_methods->dn_poll(ifp, 1000);
587 }
588 
589 /*
590  * Start of consumer API surface.
591  */
592 void
593 debugnet_free(struct debugnet_pcb *pcb)
594 {
595 	struct ifnet *ifp;
596 
597 	MPASS(g_debugnet_pcb_inuse);
598 	MPASS(pcb == &g_dnet_pcb);
599 
600 	ifp = pcb->dp_ifp;
601 	if (ifp != NULL) {
602 		if (pcb->dp_drv_input != NULL)
603 			ifp->if_input = pcb->dp_drv_input;
604 		if (pcb->dp_event_started)
605 			ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END);
606 	}
607 	debugnet_mbuf_finish();
608 
609 	g_debugnet_pcb_inuse = false;
610 	memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb));
611 }
612 
613 int
614 debugnet_connect(const struct debugnet_conn_params *dcp,
615     struct debugnet_pcb **pcb_out)
616 {
617 	struct debugnet_proto_aux herald_auxdata;
618 	struct debugnet_pcb *pcb;
619 	struct ifnet *ifp;
620 	int error;
621 
622 	if (g_debugnet_pcb_inuse) {
623 		printf("%s: Only one connection at a time.\n", __func__);
624 		return (EBUSY);
625 	}
626 
627 	pcb = &g_dnet_pcb;
628 	*pcb = (struct debugnet_pcb) {
629 		.dp_state = DN_STATE_INIT,
630 		.dp_client = dcp->dc_client,
631 		.dp_server = dcp->dc_server,
632 		.dp_gateway = dcp->dc_gateway,
633 		.dp_server_port = dcp->dc_herald_port,	/* Initially */
634 		.dp_client_port = dcp->dc_client_port,
635 		.dp_seqno = 1,
636 		.dp_ifp = dcp->dc_ifp,
637 		.dp_rx_handler = dcp->dc_rx_handler,
638 	};
639 
640 	/* Switch to the debugnet mbuf zones. */
641 	debugnet_mbuf_start();
642 
643 	/* At least one needed parameter is missing; infer it. */
644 	if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY ||
645 	    pcb->dp_ifp == NULL) {
646 		struct sockaddr_in dest_sin, *gw_sin, *local_sin;
647 		struct rtentry *dest_rt;
648 		struct ifnet *rt_ifp;
649 
650 		memset(&dest_sin, 0, sizeof(dest_sin));
651 		dest_sin = (struct sockaddr_in) {
652 			.sin_len = sizeof(dest_sin),
653 			.sin_family = AF_INET,
654 			.sin_addr.s_addr = pcb->dp_server,
655 		};
656 
657 		CURVNET_SET(vnet0);
658 		dest_rt = rtalloc1((struct sockaddr *)&dest_sin, 0,
659 		    RTF_RNH_LOCKED);
660 		CURVNET_RESTORE();
661 
662 		if (dest_rt == NULL) {
663 			printf("%s: Could not get route for that server.\n",
664 			    __func__);
665 			error = ENOENT;
666 			goto cleanup;
667 		}
668 
669 		if (dest_rt->rt_gateway->sa_family == AF_INET)
670 			gw_sin = (struct sockaddr_in *)dest_rt->rt_gateway;
671 		else {
672 			if (dest_rt->rt_gateway->sa_family == AF_LINK)
673 				DNETDEBUG("Destination address is on link.\n");
674 			gw_sin = NULL;
675 		}
676 
677 		MPASS(dest_rt->rt_ifa->ifa_addr->sa_family == AF_INET);
678 		local_sin = (struct sockaddr_in *)dest_rt->rt_ifa->ifa_addr;
679 
680 		rt_ifp = dest_rt->rt_ifp;
681 
682 		if (pcb->dp_client == INADDR_ANY)
683 			pcb->dp_client = local_sin->sin_addr.s_addr;
684 		if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL)
685 			pcb->dp_gateway = gw_sin->sin_addr.s_addr;
686 		if (pcb->dp_ifp == NULL)
687 			pcb->dp_ifp = rt_ifp;
688 
689 		RTFREE_LOCKED(dest_rt);
690 	}
691 
692 	ifp = pcb->dp_ifp;
693 
694 	if (debugnet_debug > 0) {
695 		char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN],
696 		    gwbuf[INET_ADDRSTRLEN];
697 		inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf));
698 		inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf));
699 		if (pcb->dp_gateway != INADDR_ANY)
700 			inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf));
701 		DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n",
702 		    serbuf, pcb->dp_server_port,
703 		    (pcb->dp_gateway == INADDR_ANY) ? "" : " via ",
704 		    (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf,
705 		    clibuf, pcb->dp_client_port, if_name(ifp));
706 	}
707 
708 	/* Validate iface is online and supported. */
709 	if (!DEBUGNET_SUPPORTED_NIC(ifp)) {
710 		printf("%s: interface '%s' does not support debugnet\n",
711 		    __func__, if_name(ifp));
712 		error = ENODEV;
713 		goto cleanup;
714 	}
715 	if ((if_getflags(ifp) & IFF_UP) == 0) {
716 		printf("%s: interface '%s' link is down\n", __func__,
717 		    if_name(ifp));
718 		error = ENXIO;
719 		goto cleanup;
720 	}
721 
722 	ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START);
723 	pcb->dp_event_started = true;
724 
725 	/*
726 	 * We maintain the invariant that g_debugnet_pcb_inuse is always true
727 	 * while the debugnet ifp's if_input is overridden with
728 	 * debugnet_pkt_in.
729 	 */
730 	g_debugnet_pcb_inuse = true;
731 
732 	/* Make the card use *our* receive callback. */
733 	pcb->dp_drv_input = ifp->if_input;
734 	ifp->if_input = debugnet_pkt_in;
735 
736 	printf("%s: searching for %s MAC...\n", __func__,
737 	    (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway");
738 
739 	error = debugnet_arp_gw(pcb);
740 	if (error != 0) {
741 		printf("%s: failed to locate MAC address\n", __func__);
742 		goto cleanup;
743 	}
744 	MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC);
745 
746 	herald_auxdata = (struct debugnet_proto_aux) {
747 		.dp_offset_start = dcp->dc_herald_offset,
748 		.dp_aux2 = dcp->dc_herald_aux2,
749 	};
750 	error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data,
751 	    dcp->dc_herald_datalen, &herald_auxdata);
752 	if (error != 0) {
753 		printf("%s: failed to herald debugnet server\n", __func__);
754 		goto cleanup;
755 	}
756 
757 	*pcb_out = pcb;
758 	return (0);
759 
760 cleanup:
761 	debugnet_free(pcb);
762 	return (error);
763 }
764 
765 /*
766  * Pre-allocated dump-time mbuf tracking.
767  *
768  * We just track the high water mark we've ever seen and allocate appropriately
769  * for that iface/mtu combo.
770  */
771 static struct {
772 	int nmbuf;
773 	int ncl;
774 	int clsize;
775 } dn_hwm;
776 static struct mtx dn_hwm_lk;
777 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF);
778 
779 static void
780 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize)
781 {
782 	bool any;
783 
784 	any = false;
785 	mtx_lock(&dn_hwm_lk);
786 
787 	if (nmbuf > dn_hwm.nmbuf) {
788 		any = true;
789 		dn_hwm.nmbuf = nmbuf;
790 	} else
791 		nmbuf = dn_hwm.nmbuf;
792 
793 	if (ncl > dn_hwm.ncl) {
794 		any = true;
795 		dn_hwm.ncl = ncl;
796 	} else
797 		ncl = dn_hwm.ncl;
798 
799 	if (clsize > dn_hwm.clsize) {
800 		any = true;
801 		dn_hwm.clsize = clsize;
802 	} else
803 		clsize = dn_hwm.clsize;
804 
805 	mtx_unlock(&dn_hwm_lk);
806 
807 	if (any)
808 		debugnet_mbuf_reinit(nmbuf, ncl, clsize);
809 }
810 
811 void
812 debugnet_any_ifnet_update(struct ifnet *ifp)
813 {
814 	int clsize, nmbuf, ncl, nrxr;
815 
816 	if (!DEBUGNET_SUPPORTED_NIC(ifp))
817 		return;
818 
819 	ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize);
820 	KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
821 
822 	/*
823 	 * We need two headers per message on the transmit side. Multiply by
824 	 * four to give us some breathing room.
825 	 */
826 	nmbuf = ncl * (4 + nrxr);
827 	ncl *= nrxr;
828 
829 	/*
830 	 * Bandaid for drivers that (incorrectly) advertise LinkUp before their
831 	 * dn_init method is available.
832 	 */
833 	if (nmbuf == 0 || ncl == 0 || clsize == 0) {
834 		printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n",
835 		    __func__, if_name(ifp), ifp);
836 		return;
837 	}
838 	dn_maybe_reinit_mbufs(nmbuf, ncl, clsize);
839 }
840 
841 /*
842  * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless
843  * for us because drivers tend to if_attach before invoking DEBUGNET_SET().
844  *
845  * On the other hand, hooking DEBUGNET_SET() itself may still be too early,
846  * because the driver is still in attach.  Since we cannot use down interfaces,
847  * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient?  ... Nope, at least
848  * with vtnet and dhcpclient that event just never occurs.
849  *
850  * So that's how I've landed on the lower level ifnet_link_event.
851  */
852 
853 static void
854 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state)
855 {
856 	if (link_state == LINK_STATE_UP)
857 		debugnet_any_ifnet_update(ifp);
858 }
859 
860 static eventhandler_tag dn_attach_cookie;
861 static void
862 dn_evh_init(void *ctx __unused)
863 {
864 	dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event,
865 	    dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
866 }
867 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL);
868 
869 /*
870  * DDB parsing helpers for debugnet(4) consumers.
871  */
872 #ifdef DDB
873 struct my_inet_opt {
874 	bool has_opt;
875 	const char *printname;
876 	in_addr_t *result;
877 };
878 
879 static int
880 dn_parse_optarg_ipv4(struct my_inet_opt *opt)
881 {
882 	in_addr_t tmp;
883 	unsigned octet;
884 	int t;
885 
886 	tmp = 0;
887 	for (octet = 0; octet < 4; octet++) {
888 		t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL);
889 		if (t != tNUMBER) {
890 			db_printf("%s:%s: octet %u expected number; found %d\n",
891 			    __func__, opt->printname, octet, t);
892 			return (EINVAL);
893 		}
894 		/*
895 		 * db_lex lexes '-' distinctly from the number itself, but
896 		 * let's document that invariant.
897 		 */
898 		MPASS(db_tok_number >= 0);
899 
900 		if (db_tok_number > UINT8_MAX) {
901 			db_printf("%s:%s: octet %u out of range: %jd\n", __func__,
902 			    opt->printname, octet, (intmax_t)db_tok_number);
903 			return (EDOM);
904 		}
905 
906 		/* Constructed host-endian and converted to network later. */
907 		tmp = (tmp << 8) | db_tok_number;
908 
909 		if (octet < 3) {
910 			t = db_read_token_flags(DRT_WSPACE);
911 			if (t != tDOT) {
912 				db_printf("%s:%s: octet %u expected '.'; found"
913 				    " %d\n", __func__, opt->printname, octet,
914 				    t);
915 				return (EINVAL);
916 			}
917 		}
918 	}
919 
920 	*opt->result = htonl(tmp);
921 	opt->has_opt = true;
922 	return (0);
923 }
924 
925 int
926 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result)
927 {
928 	struct ifnet *ifp;
929 	int t, error;
930 	bool want_ifp;
931 	char ch;
932 
933 	struct my_inet_opt opt_client = {
934 		.printname = "client",
935 		.result = &result->dd_client,
936 	},
937 	opt_server = {
938 		.printname = "server",
939 		.result = &result->dd_server,
940 	},
941 	opt_gateway = {
942 		.printname = "gateway",
943 		.result = &result->dd_gateway,
944 	},
945 	*cur_inet_opt;
946 
947 	ifp = NULL;
948 	memset(result, 0, sizeof(*result));
949 
950 	/*
951 	 * command [space] [-] [opt] [[space] [optarg]] ...
952 	 *
953 	 * db_command has already lexed 'command' for us.
954 	 */
955 	t = db_read_token_flags(DRT_WSPACE);
956 	if (t == tWSPACE)
957 		t = db_read_token_flags(DRT_WSPACE);
958 
959 	while (t != tEOL) {
960 		if (t != tMINUS) {
961 			db_printf("%s: Bad syntax; expected '-', got %d\n",
962 			    cmd, t);
963 			goto usage;
964 		}
965 
966 		t = db_read_token_flags(DRT_WSPACE);
967 		if (t != tIDENT) {
968 			db_printf("%s: Bad syntax; expected tIDENT, got %d\n",
969 			    cmd, t);
970 			goto usage;
971 		}
972 
973 		if (strlen(db_tok_string) > 1) {
974 			db_printf("%s: Bad syntax; expected single option "
975 			    "flag, got '%s'\n", cmd, db_tok_string);
976 			goto usage;
977 		}
978 
979 		want_ifp = false;
980 		cur_inet_opt = NULL;
981 		switch ((ch = db_tok_string[0])) {
982 		default:
983 			DNETDEBUG("Unexpected: '%c'\n", ch);
984 			/* FALLTHROUGH */
985 		case 'h':
986 			goto usage;
987 		case 'c':
988 			cur_inet_opt = &opt_client;
989 			break;
990 		case 'g':
991 			cur_inet_opt = &opt_gateway;
992 			break;
993 		case 's':
994 			cur_inet_opt = &opt_server;
995 			break;
996 		case 'i':
997 			want_ifp = true;
998 			break;
999 		}
1000 
1001 		t = db_read_token_flags(DRT_WSPACE);
1002 		if (t != tWSPACE) {
1003 			db_printf("%s: Bad syntax; expected space after "
1004 			    "flag %c, got %d\n", cmd, ch, t);
1005 			goto usage;
1006 		}
1007 
1008 		if (want_ifp) {
1009 			t = db_read_token_flags(DRT_WSPACE);
1010 			if (t != tIDENT) {
1011 				db_printf("%s: Expected interface but got %d\n",
1012 				    cmd, t);
1013 				goto usage;
1014 			}
1015 
1016 			CURVNET_SET(vnet0);
1017 			/*
1018 			 * We *don't* take a ref here because the only current
1019 			 * consumer, db_netdump_cmd, does not need it.  It
1020 			 * (somewhat redundantly) extracts the if_name(),
1021 			 * re-lookups the ifp, and takes its own reference.
1022 			 */
1023 			ifp = ifunit(db_tok_string);
1024 			CURVNET_RESTORE();
1025 			if (ifp == NULL) {
1026 				db_printf("Could not locate interface %s\n",
1027 				    db_tok_string);
1028 				goto cleanup;
1029 			}
1030 		} else {
1031 			MPASS(cur_inet_opt != NULL);
1032 			/* Assume IPv4 for now. */
1033 			error = dn_parse_optarg_ipv4(cur_inet_opt);
1034 			if (error != 0)
1035 				goto cleanup;
1036 		}
1037 
1038 		/* Skip (mandatory) whitespace after option, if not EOL. */
1039 		t = db_read_token_flags(DRT_WSPACE);
1040 		if (t == tEOL)
1041 			break;
1042 		if (t != tWSPACE) {
1043 			db_printf("%s: Bad syntax; expected space after "
1044 			    "flag %c option; got %d\n", cmd, ch, t);
1045 			goto usage;
1046 		}
1047 		t = db_read_token_flags(DRT_WSPACE);
1048 	}
1049 
1050 	if (!opt_server.has_opt) {
1051 		db_printf("%s: need a destination server address\n", cmd);
1052 		goto usage;
1053 	}
1054 
1055 	result->dd_has_client = opt_client.has_opt;
1056 	result->dd_has_gateway = opt_gateway.has_opt;
1057 	result->dd_ifp = ifp;
1058 
1059 	/* We parsed the full line to tEOL already, or bailed with an error. */
1060 	return (0);
1061 
1062 usage:
1063 	db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> "
1064 	    "-i <interface>]\n", cmd);
1065 	error = EINVAL;
1066 	/* FALLTHROUGH */
1067 cleanup:
1068 	db_skip_to_eol();
1069 	return (error);
1070 }
1071 #endif /* DDB */
1072