xref: /freebsd/sys/net/debugnet.c (revision fde2cf65ce9bcc407bad46acad5fe16e93d16c52)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Isilon Systems, LLC.
5  * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
6  * Copyright (c) 2000 Darrell Anderson
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_ddb.h"
35 #include "opt_inet.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/endian.h>
40 #include <sys/errno.h>
41 #include <sys/socket.h>
42 #include <sys/sysctl.h>
43 
44 #ifdef DDB
45 #include <ddb/ddb.h>
46 #include <ddb/db_lex.h>
47 #endif
48 
49 #include <net/ethernet.h>
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/if_dl.h>
53 #include <net/if_types.h>
54 #include <net/if_var.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip.h>
60 #include <netinet/ip_var.h>
61 #include <netinet/ip_options.h>
62 #include <netinet/udp.h>
63 #include <netinet/udp_var.h>
64 
65 #include <machine/in_cksum.h>
66 #include <machine/pcb.h>
67 
68 #include <net/debugnet.h>
69 #define	DEBUGNET_INTERNAL
70 #include <net/debugnet_int.h>
71 
72 FEATURE(debugnet, "Debugnet support");
73 
74 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD, NULL,
75     "debugnet parameters");
76 
77 unsigned debugnet_debug;
78 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN,
79     &debugnet_debug, 0,
80     "Debug message verbosity (0: off; 1: on; 2: verbose)");
81 
82 int debugnet_npolls = 2000;
83 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN,
84     &debugnet_npolls, 0,
85     "Number of times to poll before assuming packet loss (0.5ms per poll)");
86 int debugnet_nretries = 10;
87 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN,
88     &debugnet_nretries, 0,
89     "Number of retransmit attempts before giving up");
90 
91 static bool g_debugnet_pcb_inuse;
92 static struct debugnet_pcb g_dnet_pcb;
93 
94 /*
95  * Simple accessors for opaque PCB.
96  */
97 const unsigned char *
98 debugnet_get_gw_mac(const struct debugnet_pcb *pcb)
99 {
100 	MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
101 	    pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
102 	return (pcb->dp_gw_mac.octet);
103 }
104 
105 /*
106  * Start of network primitives, beginning with output primitives.
107  */
108 
109 /*
110  * Handles creation of the ethernet header, then places outgoing packets into
111  * the tx buffer for the NIC
112  *
113  * Parameters:
114  *	m	The mbuf containing the packet to be sent (will be freed by
115  *		this function or the NIC driver)
116  *	ifp	The interface to send on
117  *	dst	The destination ethernet address (source address will be looked
118  *		up using ifp)
119  *	etype	The ETHERTYPE_* value for the protocol that is being sent
120  *
121  * Returns:
122  *	int	see errno.h, 0 for success
123  */
124 int
125 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
126     u_short etype)
127 {
128 	struct ether_header *eh;
129 
130 	if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
131 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
132 		if_printf(ifp, "%s: interface isn't up\n", __func__);
133 		m_freem(m);
134 		return (ENETDOWN);
135 	}
136 
137 	/* Fill in the ethernet header. */
138 	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
139 	if (m == NULL) {
140 		printf("%s: out of mbufs\n", __func__);
141 		return (ENOBUFS);
142 	}
143 	eh = mtod(m, struct ether_header *);
144 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
145 	memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
146 	eh->ether_type = htons(etype);
147 	return (ifp->if_debugnet_methods->dn_transmit(ifp, m));
148 }
149 
150 /*
151  * Unreliable transmission of an mbuf chain to the debugnet server
152  * Note: can't handle fragmentation; fails if the packet is larger than
153  *	 ifp->if_mtu after adding the UDP/IP headers
154  *
155  * Parameters:
156  *	pcb	The debugnet context block
157  *	m	mbuf chain
158  *
159  * Returns:
160  *	int	see errno.h, 0 for success
161  */
162 static int
163 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m)
164 {
165 	struct udphdr *udp;
166 
167 	MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
168 
169 	M_PREPEND(m, sizeof(*udp), M_NOWAIT);
170 	if (m == NULL) {
171 		printf("%s: out of mbufs\n", __func__);
172 		return (ENOBUFS);
173 	}
174 
175 	udp = mtod(m, void *);
176 	udp->uh_ulen = htons(m->m_pkthdr.len);
177 	/* Use this src port so that the server can connect() the socket */
178 	udp->uh_sport = htons(pcb->dp_client_ack_port);
179 	udp->uh_dport = htons(pcb->dp_server_port);
180 	/* Computed later (protocol-dependent). */
181 	udp->uh_sum = 0;
182 
183 	return (debugnet_ip_output(pcb, m));
184 }
185 
186 /*
187  * Dummy free function for debugnet clusters.
188  */
189 static void
190 debugnet_mbuf_free(struct mbuf *m __unused)
191 {
192 }
193 
194 /*
195  * Construct and reliably send a debugnet packet.  May fail from a resource
196  * shortage or extreme number of unacknowledged retransmissions.  Wait for
197  * an acknowledgement before returning.  Splits packets into chunks small
198  * enough to be sent without fragmentation (looks up the interface MTU)
199  *
200  * Parameters:
201  *	type	debugnet packet type (HERALD, FINISHED, ...)
202  *	data	data
203  *	datalen	data size (bytes)
204  *	auxdata	optional auxiliary information
205  *
206  * Returns:
207  *	int see errno.h, 0 for success
208  */
209 int
210 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data,
211     uint32_t datalen, const struct debugnet_proto_aux *auxdata)
212 {
213 	struct debugnet_msg_hdr *dn_msg_hdr;
214 	struct mbuf *m, *m2;
215 	uint64_t want_acks;
216 	uint32_t i, pktlen, sent_so_far;
217 	int retries, polls, error;
218 
219 	want_acks = 0;
220 	pcb->dp_rcvd_acks = 0;
221 	retries = 0;
222 
223 retransmit:
224 	/* Chunks can be too big to fit in packets. */
225 	for (i = sent_so_far = 0; sent_so_far < datalen ||
226 	    (i == 0 && datalen == 0); i++) {
227 		pktlen = datalen - sent_so_far;
228 
229 		/* Bound: the interface MTU (assume no IP options). */
230 		pktlen = min(pktlen, pcb->dp_ifp->if_mtu -
231 		    sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr));
232 
233 		/*
234 		 * Check if it is retransmitting and this has been ACKed
235 		 * already.
236 		 */
237 		if ((pcb->dp_rcvd_acks & (1 << i)) != 0) {
238 			sent_so_far += pktlen;
239 			continue;
240 		}
241 
242 		/*
243 		 * Get and fill a header mbuf, then chain data as an extended
244 		 * mbuf.
245 		 */
246 		m = m_gethdr(M_NOWAIT, MT_DATA);
247 		if (m == NULL) {
248 			printf("%s: Out of mbufs\n", __func__);
249 			return (ENOBUFS);
250 		}
251 		m->m_len = sizeof(struct debugnet_msg_hdr);
252 		m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr);
253 		MH_ALIGN(m, sizeof(struct debugnet_msg_hdr));
254 		dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *);
255 		dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i);
256 		dn_msg_hdr->mh_type = htonl(type);
257 		dn_msg_hdr->mh_len = htonl(pktlen);
258 
259 		if (auxdata != NULL) {
260 			dn_msg_hdr->mh_offset =
261 			    htobe64(auxdata->dp_offset_start + sent_so_far);
262 			dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2);
263 		} else {
264 			dn_msg_hdr->mh_offset = htobe64(sent_so_far);
265 			dn_msg_hdr->mh_aux2 = 0;
266 		}
267 
268 		if (pktlen != 0) {
269 			m2 = m_get(M_NOWAIT, MT_DATA);
270 			if (m2 == NULL) {
271 				m_freem(m);
272 				printf("%s: Out of mbufs\n", __func__);
273 				return (ENOBUFS);
274 			}
275 			MEXTADD(m2, __DECONST(char *, data) + sent_so_far,
276 			    pktlen, debugnet_mbuf_free, NULL, NULL, 0,
277 			    EXT_DISPOSABLE);
278 			m2->m_len = pktlen;
279 
280 			m_cat(m, m2);
281 			m->m_pkthdr.len += pktlen;
282 		}
283 		error = debugnet_udp_output(pcb, m);
284 		if (error != 0)
285 			return (error);
286 
287 		/* Note that we're waiting for this packet in the bitfield. */
288 		want_acks |= (1 << i);
289 		sent_so_far += pktlen;
290 	}
291 	if (i >= DEBUGNET_MAX_IN_FLIGHT)
292 		printf("Warning: Sent more than %d packets (%d). "
293 		    "Acknowledgements will fail unless the size of "
294 		    "rcvd_acks/want_acks is increased.\n",
295 		    DEBUGNET_MAX_IN_FLIGHT, i);
296 
297 	/*
298 	 * Wait for acks.  A *real* window would speed things up considerably.
299 	 */
300 	polls = 0;
301 	while (pcb->dp_rcvd_acks != want_acks) {
302 		if (polls++ > debugnet_npolls) {
303 			if (retries++ > debugnet_nretries)
304 				return (ETIMEDOUT);
305 			printf(". ");
306 			goto retransmit;
307 		}
308 		debugnet_network_poll(pcb->dp_ifp);
309 		DELAY(500);
310 	}
311 	pcb->dp_seqno += i;
312 	return (0);
313 }
314 
315 /*
316  * Network input primitives.
317  */
318 
319 static void
320 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport)
321 {
322 	const struct debugnet_ack *dn_ack;
323 	struct mbuf *m;
324 	uint32_t rcv_ackno;
325 
326 	m = *mb;
327 
328 	if (m->m_pkthdr.len < sizeof(*dn_ack)) {
329 		DNETDEBUG("ignoring small ACK packet\n");
330 		return;
331 	}
332 	/* Get Ack. */
333 	if (m->m_len < sizeof(*dn_ack)) {
334 		m = m_pullup(m, sizeof(*dn_ack));
335 		*mb = m;
336 		if (m == NULL) {
337 			DNETDEBUG("m_pullup failed\n");
338 			return;
339 		}
340 	}
341 	dn_ack = mtod(m, const void *);
342 
343 	/* Debugnet processing. */
344 	/*
345 	 * Packet is meant for us.  Extract the ack sequence number and the
346 	 * port number if necessary.
347 	 */
348 	rcv_ackno = ntohl(dn_ack->da_seqno);
349 	if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) {
350 		pcb->dp_server_port = sport;
351 		pcb->dp_state = DN_STATE_GOT_HERALD_PORT;
352 	}
353 	if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT)
354 		printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno);
355 	else if (rcv_ackno >= pcb->dp_seqno) {
356 		/* We're interested in this ack. Record it. */
357 		pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno);
358 	}
359 }
360 
361 void
362 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb)
363 {
364 	const struct udphdr *udp;
365 	struct mbuf *m;
366 	uint16_t sport;
367 
368 	/* UDP processing. */
369 
370 	m = *mb;
371 	if (m->m_pkthdr.len < sizeof(*udp)) {
372 		DNETDEBUG("ignoring small UDP packet\n");
373 		return;
374 	}
375 
376 	/* Get UDP headers. */
377 	if (m->m_len < sizeof(*udp)) {
378 		m = m_pullup(m, sizeof(*udp));
379 		*mb = m;
380 		if (m == NULL) {
381 			DNETDEBUG("m_pullup failed\n");
382 			return;
383 		}
384 	}
385 	udp = mtod(m, const void *);
386 
387 	/* For now, the only UDP packets we expect to receive are acks. */
388 	if (ntohs(udp->uh_dport) != pcb->dp_client_ack_port) {
389 		DNETDEBUG("not on the expected ACK port.\n");
390 		return;
391 	}
392 	sport = ntohs(udp->uh_sport);
393 
394 	m_adj(m, sizeof(*udp));
395 	debugnet_handle_ack(pcb, mb, sport);
396 }
397 
398 /*
399  * Handler for incoming packets directly from the network adapter
400  * Identifies the packet type (IP or ARP) and passes it along to one of the
401  * helper functions debugnet_handle_ip or debugnet_handle_arp.
402  *
403  * It needs to partially replicate the behaviour of ether_input() and
404  * ether_demux().
405  *
406  * Parameters:
407  *	ifp	the interface the packet came from
408  *	m	an mbuf containing the packet received
409  */
410 static void
411 debugnet_pkt_in(struct ifnet *ifp, struct mbuf *m)
412 {
413 	struct ifreq ifr;
414 	struct ether_header *eh;
415 	u_short etype;
416 
417 	/* Ethernet processing. */
418 	if ((m->m_flags & M_PKTHDR) == 0) {
419 		DNETDEBUG_IF(ifp, "discard frame without packet header\n");
420 		goto done;
421 	}
422 	if (m->m_len < ETHER_HDR_LEN) {
423 		DNETDEBUG_IF(ifp,
424 	    "discard frame without leading eth header (len %u pktlen %u)\n",
425 		    m->m_len, m->m_pkthdr.len);
426 		goto done;
427 	}
428 	if ((m->m_flags & M_HASFCS) != 0) {
429 		m_adj(m, -ETHER_CRC_LEN);
430 		m->m_flags &= ~M_HASFCS;
431 	}
432 	eh = mtod(m, struct ether_header *);
433 	etype = ntohs(eh->ether_type);
434 	if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
435 		DNETDEBUG_IF(ifp, "ignoring vlan packets\n");
436 		goto done;
437 	}
438 	if (if_gethwaddr(ifp, &ifr) != 0) {
439 		DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n");
440 		goto done;
441 	}
442 	if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
443 	    ETHER_ADDR_LEN) != 0 &&
444 	    (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) {
445 		DNETDEBUG_IF(ifp,
446 		    "discard frame with incorrect destination addr\n");
447 		goto done;
448 	}
449 
450 	MPASS(g_debugnet_pcb_inuse);
451 
452 	/* Done ethernet processing. Strip off the ethernet header. */
453 	m_adj(m, ETHER_HDR_LEN);
454 	switch (etype) {
455 	case ETHERTYPE_ARP:
456 		debugnet_handle_arp(&g_dnet_pcb, &m);
457 		break;
458 	case ETHERTYPE_IP:
459 		debugnet_handle_ip(&g_dnet_pcb, &m);
460 		break;
461 	default:
462 		DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
463 		break;
464 	}
465 done:
466 	if (m != NULL)
467 		m_freem(m);
468 }
469 
470 /*
471  * Network polling primitive.
472  *
473  * Instead of assuming that most of the network stack is sane, we just poll the
474  * driver directly for packets.
475  */
476 void
477 debugnet_network_poll(struct ifnet *ifp)
478 {
479 	ifp->if_debugnet_methods->dn_poll(ifp, 1000);
480 }
481 
482 /*
483  * Start of consumer API surface.
484  */
485 void
486 debugnet_free(struct debugnet_pcb *pcb)
487 {
488 	struct ifnet *ifp;
489 
490 	MPASS(g_debugnet_pcb_inuse);
491 	MPASS(pcb == &g_dnet_pcb);
492 
493 	ifp = pcb->dp_ifp;
494 	if (ifp != NULL) {
495 		if (pcb->dp_drv_input != NULL)
496 			ifp->if_input = pcb->dp_drv_input;
497 		if (pcb->dp_event_started)
498 			ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END);
499 	}
500 	debugnet_mbuf_finish();
501 
502 	g_debugnet_pcb_inuse = false;
503 	memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb));
504 }
505 
506 int
507 debugnet_connect(const struct debugnet_conn_params *dcp,
508     struct debugnet_pcb **pcb_out)
509 {
510 	struct debugnet_pcb *pcb;
511 	struct ifnet *ifp;
512 	int error;
513 
514 	if (g_debugnet_pcb_inuse) {
515 		printf("%s: Only one connection at a time.\n", __func__);
516 		return (EBUSY);
517 	}
518 
519 	pcb = &g_dnet_pcb;
520 	*pcb = (struct debugnet_pcb) {
521 		.dp_state = DN_STATE_INIT,
522 		.dp_client = dcp->dc_client,
523 		.dp_server = dcp->dc_server,
524 		.dp_gateway = dcp->dc_gateway,
525 		.dp_server_port = dcp->dc_herald_port,	/* Initially */
526 		.dp_client_ack_port = dcp->dc_client_ack_port,
527 		.dp_seqno = 1,
528 		.dp_ifp = dcp->dc_ifp,
529 	};
530 
531 	/* Switch to the debugnet mbuf zones. */
532 	debugnet_mbuf_start();
533 
534 	/* At least one needed parameter is missing; infer it. */
535 	if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY ||
536 	    pcb->dp_ifp == NULL) {
537 		struct sockaddr_in dest_sin, *gw_sin, *local_sin;
538 		struct rtentry *dest_rt;
539 		struct ifnet *rt_ifp;
540 
541 		memset(&dest_sin, 0, sizeof(dest_sin));
542 		dest_sin = (struct sockaddr_in) {
543 			.sin_len = sizeof(dest_sin),
544 			.sin_family = AF_INET,
545 			.sin_addr.s_addr = pcb->dp_server,
546 		};
547 
548 		CURVNET_SET(vnet0);
549 		dest_rt = rtalloc1((struct sockaddr *)&dest_sin, 0,
550 		    RTF_RNH_LOCKED);
551 		CURVNET_RESTORE();
552 
553 		if (dest_rt == NULL) {
554 			db_printf("%s: Could not get route for that server.\n",
555 			    __func__);
556 			error = ENOENT;
557 			goto cleanup;
558 		}
559 
560 		if (dest_rt->rt_gateway->sa_family == AF_INET)
561 			gw_sin = (struct sockaddr_in *)dest_rt->rt_gateway;
562 		else {
563 			if (dest_rt->rt_gateway->sa_family == AF_LINK)
564 				DNETDEBUG("Destination address is on link.\n");
565 			gw_sin = NULL;
566 		}
567 
568 		MPASS(dest_rt->rt_ifa->ifa_addr->sa_family == AF_INET);
569 		local_sin = (struct sockaddr_in *)dest_rt->rt_ifa->ifa_addr;
570 
571 		rt_ifp = dest_rt->rt_ifp;
572 
573 		if (pcb->dp_client == INADDR_ANY)
574 			pcb->dp_client = local_sin->sin_addr.s_addr;
575 		if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL)
576 			pcb->dp_gateway = gw_sin->sin_addr.s_addr;
577 		if (pcb->dp_ifp == NULL)
578 			pcb->dp_ifp = rt_ifp;
579 
580 		RTFREE_LOCKED(dest_rt);
581 	}
582 
583 	ifp = pcb->dp_ifp;
584 
585 	if (debugnet_debug > 0) {
586 		char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN],
587 		    gwbuf[INET_ADDRSTRLEN];
588 		inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf));
589 		inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf));
590 		if (pcb->dp_gateway != INADDR_ANY)
591 			inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf));
592 		DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n",
593 		    serbuf, pcb->dp_server_port,
594 		    (pcb->dp_gateway == INADDR_ANY) ? "" : " via ",
595 		    (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf,
596 		    clibuf, pcb->dp_client_ack_port, if_name(ifp));
597 	}
598 
599 	/* Validate iface is online and supported. */
600 	if (!DEBUGNET_SUPPORTED_NIC(ifp)) {
601 		printf("%s: interface '%s' does not support debugnet\n",
602 		    __func__, if_name(ifp));
603 		error = ENODEV;
604 		goto cleanup;
605 	}
606 	if ((if_getflags(ifp) & IFF_UP) == 0) {
607 		printf("%s: interface '%s' link is down\n", __func__,
608 		    if_name(ifp));
609 		error = ENXIO;
610 		goto cleanup;
611 	}
612 
613 	ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START);
614 	pcb->dp_event_started = true;
615 
616 	/*
617 	 * We maintain the invariant that g_debugnet_pcb_inuse is always true
618 	 * while the debugnet ifp's if_input is overridden with
619 	 * debugnet_pkt_in.
620 	 */
621 	g_debugnet_pcb_inuse = true;
622 
623 	/* Make the card use *our* receive callback. */
624 	pcb->dp_drv_input = ifp->if_input;
625 	ifp->if_input = debugnet_pkt_in;
626 
627 	printf("%s: searching for %s MAC...\n", __func__,
628 	    (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway");
629 
630 	error = debugnet_arp_gw(pcb);
631 	if (error != 0) {
632 		printf("%s: failed to locate MAC address\n", __func__);
633 		goto cleanup;
634 	}
635 	MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC);
636 
637 	error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data,
638 	    dcp->dc_herald_datalen, NULL);
639 	if (error != 0) {
640 		printf("%s: failed to herald debugnet server\n", __func__);
641 		goto cleanup;
642 	}
643 
644 	*pcb_out = pcb;
645 	return (0);
646 
647 cleanup:
648 	debugnet_free(pcb);
649 	return (error);
650 }
651 
652 /*
653  * Pre-allocated dump-time mbuf tracking.
654  *
655  * We just track the high water mark we've ever seen and allocate appropriately
656  * for that iface/mtu combo.
657  */
658 static struct {
659 	int nmbuf;
660 	int ncl;
661 	int clsize;
662 } dn_hwm;
663 static struct mtx dn_hwm_lk;
664 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF);
665 
666 static void
667 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize)
668 {
669 	bool any;
670 
671 	any = false;
672 	mtx_lock(&dn_hwm_lk);
673 
674 	if (nmbuf > dn_hwm.nmbuf) {
675 		any = true;
676 		dn_hwm.nmbuf = nmbuf;
677 	} else
678 		nmbuf = dn_hwm.nmbuf;
679 
680 	if (ncl > dn_hwm.ncl) {
681 		any = true;
682 		dn_hwm.ncl = ncl;
683 	} else
684 		ncl = dn_hwm.ncl;
685 
686 	if (clsize > dn_hwm.clsize) {
687 		any = true;
688 		dn_hwm.clsize = clsize;
689 	} else
690 		clsize = dn_hwm.clsize;
691 
692 	mtx_unlock(&dn_hwm_lk);
693 
694 	if (any)
695 		debugnet_mbuf_reinit(nmbuf, ncl, clsize);
696 }
697 
698 void
699 debugnet_any_ifnet_update(struct ifnet *ifp)
700 {
701 	int clsize, nmbuf, ncl, nrxr;
702 
703 	if (!DEBUGNET_SUPPORTED_NIC(ifp))
704 		return;
705 
706 	ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize);
707 	KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
708 
709 	/*
710 	 * We need two headers per message on the transmit side. Multiply by
711 	 * four to give us some breathing room.
712 	 */
713 	nmbuf = ncl * (4 + nrxr);
714 	ncl *= nrxr;
715 
716 	dn_maybe_reinit_mbufs(nmbuf, ncl, clsize);
717 }
718 
719 /*
720  * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless
721  * for us because drivers tend to if_attach before invoking DEBUGNET_SET().
722  *
723  * On the other hand, hooking DEBUGNET_SET() itself may still be too early,
724  * because the driver is still in attach.  Since we cannot use down interfaces,
725  * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient?  ... Nope, at least
726  * with vtnet and dhcpclient that event just never occurs.
727  *
728  * So that's how I've landed on the lower level ifnet_link_event.
729  */
730 
731 static void
732 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state)
733 {
734 	if (link_state == LINK_STATE_UP)
735 		debugnet_any_ifnet_update(ifp);
736 }
737 
738 static eventhandler_tag dn_attach_cookie;
739 static void
740 dn_evh_init(void *ctx __unused)
741 {
742 	dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event,
743 	    dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
744 }
745 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL);
746 
747 /*
748  * DDB parsing helpers for debugnet(4) consumers.
749  */
750 #ifdef DDB
751 struct my_inet_opt {
752 	bool has_opt;
753 	const char *printname;
754 	in_addr_t *result;
755 };
756 
757 static int
758 dn_parse_optarg_ipv4(struct my_inet_opt *opt)
759 {
760 	in_addr_t tmp;
761 	unsigned octet;
762 	int t;
763 
764 	tmp = 0;
765 	for (octet = 0; octet < 4; octet++) {
766 		t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL);
767 		if (t != tNUMBER) {
768 			db_printf("%s:%s: octet %u expected number; found %d\n",
769 			    __func__, opt->printname, octet, t);
770 			return (EINVAL);
771 		}
772 		/*
773 		 * db_lex lexes '-' distinctly from the number itself, but
774 		 * let's document that invariant.
775 		 */
776 		MPASS(db_tok_number >= 0);
777 
778 		if (db_tok_number > UINT8_MAX) {
779 			db_printf("%s:%s: octet %u out of range: %jd\n", __func__,
780 			    opt->printname, octet, (intmax_t)db_tok_number);
781 			return (EDOM);
782 		}
783 
784 		/* Constructed host-endian and converted to network later. */
785 		tmp = (tmp << 8) | db_tok_number;
786 
787 		if (octet < 3) {
788 			t = db_read_token_flags(DRT_WSPACE);
789 			if (t != tDOT) {
790 				db_printf("%s:%s: octet %u expected '.'; found"
791 				    " %d\n", __func__, opt->printname, octet,
792 				    t);
793 				return (EINVAL);
794 			}
795 		}
796 	}
797 
798 	*opt->result = htonl(tmp);
799 	opt->has_opt = true;
800 	return (0);
801 }
802 
803 int
804 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result)
805 {
806 	struct ifnet *ifp;
807 	int t, error;
808 	bool want_ifp;
809 	char ch;
810 
811 	struct my_inet_opt opt_client = {
812 		.printname = "client",
813 		.result = &result->dd_client,
814 	},
815 	opt_server = {
816 		.printname = "server",
817 		.result = &result->dd_server,
818 	},
819 	opt_gateway = {
820 		.printname = "gateway",
821 		.result = &result->dd_gateway,
822 	},
823 	*cur_inet_opt;
824 
825 	ifp = NULL;
826 	memset(result, 0, sizeof(*result));
827 
828 	/*
829 	 * command [space] [-] [opt] [[space] [optarg]] ...
830 	 *
831 	 * db_command has already lexed 'command' for us.
832 	 */
833 	t = db_read_token_flags(DRT_WSPACE);
834 	if (t == tWSPACE)
835 		t = db_read_token_flags(DRT_WSPACE);
836 
837 	while (t != tEOL) {
838 		if (t != tMINUS) {
839 			db_printf("%s: Bad syntax; expected '-', got %d\n",
840 			    cmd, t);
841 			goto usage;
842 		}
843 
844 		t = db_read_token_flags(DRT_WSPACE);
845 		if (t != tIDENT) {
846 			db_printf("%s: Bad syntax; expected tIDENT, got %d\n",
847 			    cmd, t);
848 			goto usage;
849 		}
850 
851 		if (strlen(db_tok_string) > 1) {
852 			db_printf("%s: Bad syntax; expected single option "
853 			    "flag, got '%s'\n", cmd, db_tok_string);
854 			goto usage;
855 		}
856 
857 		want_ifp = false;
858 		cur_inet_opt = NULL;
859 		switch ((ch = db_tok_string[0])) {
860 		default:
861 			DNETDEBUG("Unexpected: '%c'\n", ch);
862 			/* FALLTHROUGH */
863 		case 'h':
864 			goto usage;
865 		case 'c':
866 			cur_inet_opt = &opt_client;
867 			break;
868 		case 'g':
869 			cur_inet_opt = &opt_gateway;
870 			break;
871 		case 's':
872 			cur_inet_opt = &opt_server;
873 			break;
874 		case 'i':
875 			want_ifp = true;
876 			break;
877 		}
878 
879 		t = db_read_token_flags(DRT_WSPACE);
880 		if (t != tWSPACE) {
881 			db_printf("%s: Bad syntax; expected space after "
882 			    "flag %c, got %d\n", cmd, ch, t);
883 			goto usage;
884 		}
885 
886 		if (want_ifp) {
887 			t = db_read_token_flags(DRT_WSPACE);
888 			if (t != tIDENT) {
889 				db_printf("%s: Expected interface but got %d\n",
890 				    cmd, t);
891 				goto usage;
892 			}
893 
894 			CURVNET_SET(vnet0);
895 			/*
896 			 * We *don't* take a ref here because the only current
897 			 * consumer, db_netdump_cmd, does not need it.  It
898 			 * (somewhat redundantly) extracts the if_name(),
899 			 * re-lookups the ifp, and takes its own reference.
900 			 */
901 			ifp = ifunit(db_tok_string);
902 			CURVNET_RESTORE();
903 			if (ifp == NULL) {
904 				db_printf("Could not locate interface %s\n",
905 				    db_tok_string);
906 				goto cleanup;
907 			}
908 		} else {
909 			MPASS(cur_inet_opt != NULL);
910 			/* Assume IPv4 for now. */
911 			error = dn_parse_optarg_ipv4(cur_inet_opt);
912 			if (error != 0)
913 				goto cleanup;
914 		}
915 
916 		/* Skip (mandatory) whitespace after option, if not EOL. */
917 		t = db_read_token_flags(DRT_WSPACE);
918 		if (t == tEOL)
919 			break;
920 		if (t != tWSPACE) {
921 			db_printf("%s: Bad syntax; expected space after "
922 			    "flag %c option; got %d\n", cmd, ch, t);
923 			goto usage;
924 		}
925 		t = db_read_token_flags(DRT_WSPACE);
926 	}
927 
928 	if (!opt_server.has_opt) {
929 		db_printf("%s: need a destination server address\n", cmd);
930 		goto usage;
931 	}
932 
933 	result->dd_has_client = opt_client.has_opt;
934 	result->dd_has_gateway = opt_gateway.has_opt;
935 	result->dd_ifp = ifp;
936 
937 	/* We parsed the full line to tEOL already, or bailed with an error. */
938 	return (0);
939 
940 usage:
941 	db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> "
942 	    "-i <interface>]\n", cmd);
943 	error = EINVAL;
944 	/* FALLTHROUGH */
945 cleanup:
946 	db_skip_to_eol();
947 	return (error);
948 }
949 #endif /* DDB */
950