xref: /freebsd/sys/net/if.c (revision 9517e866259191fcd39434a97ad849a9b59b9b9f)
1 /*-
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)if.c	8.5 (Berkeley) 1/9/95
30  * $FreeBSD$
31  */
32 
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36 #include "opt_carp.h"
37 
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/conf.h>
41 #include <sys/malloc.h>
42 #include <sys/sbuf.h>
43 #include <sys/bus.h>
44 #include <sys/mbuf.h>
45 #include <sys/systm.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/protosw.h>
51 #include <sys/kernel.h>
52 #include <sys/lock.h>
53 #include <sys/refcount.h>
54 #include <sys/module.h>
55 #include <sys/rwlock.h>
56 #include <sys/sockio.h>
57 #include <sys/syslog.h>
58 #include <sys/sysctl.h>
59 #include <sys/taskqueue.h>
60 #include <sys/domain.h>
61 #include <sys/jail.h>
62 #include <machine/stdarg.h>
63 #include <vm/uma.h>
64 
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_clone.h>
68 #include <net/if_dl.h>
69 #include <net/if_types.h>
70 #include <net/if_var.h>
71 #include <net/radix.h>
72 #include <net/route.h>
73 #include <net/vnet.h>
74 
75 #if defined(INET) || defined(INET6)
76 /*XXX*/
77 #include <netinet/in.h>
78 #include <netinet/in_var.h>
79 #ifdef INET6
80 #include <netinet6/in6_var.h>
81 #include <netinet6/in6_ifattach.h>
82 #endif
83 #endif
84 #ifdef INET
85 #include <netinet/if_ether.h>
86 #endif
87 #if defined(INET) || defined(INET6)
88 #ifdef DEV_CARP
89 #include <netinet/ip_carp.h>
90 #endif
91 #endif
92 
93 #include <security/mac/mac_framework.h>
94 
95 struct ifindex_entry {
96 	struct  ifnet *ife_ifnet;
97 };
98 
99 static int slowtimo_started;
100 
101 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
102 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
103 
104 /* Log link state change events */
105 static int log_link_state_change = 1;
106 
107 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
108 	&log_link_state_change, 0,
109 	"log interface link state change events");
110 
111 void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
112 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
113 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
114 
115 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
116 
117 /*
118  * XXX: Style; these should be sorted alphabetically, and unprototyped
119  * static functions should be prototyped. Currently they are sorted by
120  * declaration order.
121  */
122 static void	if_attachdomain(void *);
123 static void	if_attachdomain1(struct ifnet *);
124 static int	ifconf(u_long, caddr_t);
125 static void	if_freemulti(struct ifmultiaddr *);
126 static void	if_init(void *);
127 static void	if_check(void *);
128 static void	if_route(struct ifnet *, int flag, int fam);
129 static int	if_setflag(struct ifnet *, int, int, int *, int);
130 static void	if_slowtimo(void *);
131 static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
132 static void	if_unroute(struct ifnet *, int flag, int fam);
133 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
134 static int	if_rtdel(struct radix_node *, void *);
135 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
136 static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
137 static void	do_link_state_change(void *, int);
138 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
139 static int	if_getgroupmembers(struct ifgroupreq *);
140 static void	if_delgroups(struct ifnet *);
141 static void	if_attach_internal(struct ifnet *, int);
142 static void	if_detach_internal(struct ifnet *, int);
143 
144 #ifdef INET6
145 /*
146  * XXX: declare here to avoid to include many inet6 related files..
147  * should be more generalized?
148  */
149 extern void	nd6_setmtu(struct ifnet *);
150 #endif
151 
152 VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
153 VNET_DEFINE(struct ifgrouphead, ifg_head);
154 VNET_DEFINE(int, if_index);
155 static VNET_DEFINE(int, if_indexlim) = 8;
156 
157 /* Table of ifnet by index.  Locked with ifnet_lock. */
158 static VNET_DEFINE(struct ifindex_entry *, ifindex_table);
159 
160 #define	V_if_indexlim		VNET(if_indexlim)
161 #define	V_ifindex_table		VNET(ifindex_table)
162 
163 int	ifqmaxlen = IFQ_MAXLEN;
164 struct rwlock ifnet_lock;
165 static	if_com_alloc_t *if_com_alloc[256];
166 static	if_com_free_t *if_com_free[256];
167 
168 /*
169  * System initialization
170  */
171 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL);
172 
173 MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
174 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
175 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
176 
177 struct ifnet *
178 ifnet_byindex_locked(u_short idx)
179 {
180 
181 	if (idx > V_if_index)
182 		return (NULL);
183 	return (V_ifindex_table[idx].ife_ifnet);
184 }
185 
186 struct ifnet *
187 ifnet_byindex(u_short idx)
188 {
189 	struct ifnet *ifp;
190 
191 	IFNET_RLOCK();
192 	ifp = ifnet_byindex_locked(idx);
193 	IFNET_RUNLOCK();
194 	return (ifp);
195 }
196 
197 struct ifnet *
198 ifnet_byindex_ref(u_short idx)
199 {
200 	struct ifnet *ifp;
201 
202 	IFNET_RLOCK();
203 	ifp = ifnet_byindex_locked(idx);
204 	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
205 		IFNET_RUNLOCK();
206 		return (NULL);
207 	}
208 	if_ref(ifp);
209 	IFNET_RUNLOCK();
210 	return (ifp);
211 }
212 
213 static void
214 ifnet_setbyindex(u_short idx, struct ifnet *ifp)
215 {
216 
217 	IFNET_WLOCK_ASSERT();
218 
219 	V_ifindex_table[idx].ife_ifnet = ifp;
220 }
221 
222 struct ifaddr *
223 ifaddr_byindex(u_short idx)
224 {
225 	struct ifaddr *ifa;
226 
227 	IFNET_RLOCK();
228 	ifa = ifnet_byindex_locked(idx)->if_addr;
229 	if (ifa != NULL)
230 		ifa_ref(ifa);
231 	IFNET_RUNLOCK();
232 	return (ifa);
233 }
234 
235 /*
236  * Network interface utility routines.
237  *
238  * Routines with ifa_ifwith* names take sockaddr *'s as
239  * parameters.
240  */
241 
242 static void
243 vnet_if_init(const void *unused __unused)
244 {
245 
246 	TAILQ_INIT(&V_ifnet);
247 	TAILQ_INIT(&V_ifg_head);
248 	if_grow();				/* create initial table */
249 	vnet_if_clone_init();
250 }
251 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_init,
252     NULL);
253 
254 /* ARGSUSED*/
255 static void
256 if_init(void *dummy __unused)
257 {
258 
259 	IFNET_LOCK_INIT();
260 	if_clone_init();
261 }
262 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
263 
264 
265 #ifdef VIMAGE
266 static void
267 vnet_if_uninit(const void *unused __unused)
268 {
269 
270 	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet));
271 	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head));
272 
273 	free((caddr_t)V_ifindex_table, M_IFNET);
274 }
275 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
276     vnet_if_uninit, NULL);
277 #endif
278 
279 void
280 if_grow(void)
281 {
282 	u_int n;
283 	struct ifindex_entry *e;
284 
285 	V_if_indexlim <<= 1;
286 	n = V_if_indexlim * sizeof(*e);
287 	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
288 	if (V_ifindex_table != NULL) {
289 		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
290 		free((caddr_t)V_ifindex_table, M_IFNET);
291 	}
292 	V_ifindex_table = e;
293 }
294 
295 static void
296 if_check(void *dummy __unused)
297 {
298 
299 	/*
300 	 * If at least one interface added during boot uses
301 	 * if_watchdog then start the timer.
302 	 */
303 	if (slowtimo_started)
304 		if_slowtimo(0);
305 }
306 
307 /*
308  * Allocate a struct ifnet and an index for an interface.  A layer 2
309  * common structure will also be allocated if an allocation routine is
310  * registered for the passed type.
311  */
312 struct ifnet *
313 if_alloc(u_char type)
314 {
315 	struct ifnet *ifp;
316 
317 	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
318 
319 	/*
320 	 * Try to find an empty slot below if_index.  If we fail, take
321 	 * the next slot.
322 	 *
323 	 * XXX: should be locked!
324 	 */
325 	for (ifp->if_index = 1; ifp->if_index <= V_if_index; ifp->if_index++) {
326 		if (ifnet_byindex(ifp->if_index) == NULL)
327 			break;
328 	}
329 	/* Catch if_index overflow. */
330 	if (ifp->if_index < 1) {
331 		free(ifp, M_IFNET);
332 		return (NULL);
333 	}
334 	if (ifp->if_index > V_if_index)
335 		V_if_index = ifp->if_index;
336 	if (V_if_index >= V_if_indexlim)
337 		if_grow();
338 
339 	ifp->if_type = type;
340 	ifp->if_alloctype = type;
341 
342 	if (if_com_alloc[type] != NULL) {
343 		ifp->if_l2com = if_com_alloc[type](type, ifp);
344 		if (ifp->if_l2com == NULL) {
345 			free(ifp, M_IFNET);
346 			return (NULL);
347 		}
348 	}
349 
350 	IF_ADDR_LOCK_INIT(ifp);
351 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
352 	ifp->if_afdata_initialized = 0;
353 	IF_AFDATA_LOCK_INIT(ifp);
354 	TAILQ_INIT(&ifp->if_addrhead);
355 	TAILQ_INIT(&ifp->if_prefixhead);
356 	TAILQ_INIT(&ifp->if_multiaddrs);
357 	TAILQ_INIT(&ifp->if_groups);
358 #ifdef MAC
359 	mac_ifnet_init(ifp);
360 #endif
361 	ifq_init(&ifp->if_snd, ifp);
362 
363 	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
364 	IFNET_WLOCK();
365 	ifnet_setbyindex(ifp->if_index, ifp);
366 	IFNET_WUNLOCK();
367 	return (ifp);
368 }
369 
370 /*
371  * Do the actual work of freeing a struct ifnet, associated index, and layer
372  * 2 common structure.  This call is made when the last reference to an
373  * interface is released.
374  */
375 static void
376 if_free_internal(struct ifnet *ifp)
377 {
378 
379 	KASSERT((ifp->if_flags & IFF_DYING),
380 	    ("if_free_internal: interface not dying"));
381 
382 	IFNET_WLOCK();
383 	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
384 	    ("%s: freeing unallocated ifnet", ifp->if_xname));
385 
386 	ifnet_setbyindex(ifp->if_index, NULL);
387 	while (V_if_index > 0 && ifnet_byindex_locked(V_if_index) == NULL)
388 		V_if_index--;
389 	IFNET_WUNLOCK();
390 
391 	if (if_com_free[ifp->if_alloctype] != NULL)
392 		if_com_free[ifp->if_alloctype](ifp->if_l2com,
393 		    ifp->if_alloctype);
394 
395 #ifdef MAC
396 	mac_ifnet_destroy(ifp);
397 #endif /* MAC */
398 	IF_AFDATA_DESTROY(ifp);
399 	IF_ADDR_LOCK_DESTROY(ifp);
400 	ifq_delete(&ifp->if_snd);
401 	free(ifp, M_IFNET);
402 }
403 
404 /*
405  * This version should only be called by intefaces that switch their type
406  * after calling if_alloc().  if_free_type() will go away again now that we
407  * have if_alloctype to cache the original allocation type.  For now, assert
408  * that they match, since we require that in practice.
409  */
410 void
411 if_free_type(struct ifnet *ifp, u_char type)
412 {
413 
414 	KASSERT(ifp->if_alloctype == type,
415 	    ("if_free_type: type (%d) != alloctype (%d)", type,
416 	    ifp->if_alloctype));
417 
418 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
419 	if (!refcount_release(&ifp->if_refcount))
420 		return;
421 	if_free_internal(ifp);
422 }
423 
424 /*
425  * This is the normal version of if_free(), used by device drivers to free a
426  * detached network interface.  The contents of if_free_type() will move into
427  * here when if_free_type() goes away.
428  */
429 void
430 if_free(struct ifnet *ifp)
431 {
432 
433 	if_free_type(ifp, ifp->if_alloctype);
434 }
435 
436 /*
437  * Interfaces to keep an ifnet type-stable despite the possibility of the
438  * driver calling if_free().  If there are additional references, we defer
439  * freeing the underlying data structure.
440  */
441 void
442 if_ref(struct ifnet *ifp)
443 {
444 
445 	/* We don't assert the ifnet list lock here, but arguably should. */
446 	refcount_acquire(&ifp->if_refcount);
447 }
448 
449 void
450 if_rele(struct ifnet *ifp)
451 {
452 
453 	if (!refcount_release(&ifp->if_refcount))
454 		return;
455 	if_free_internal(ifp);
456 }
457 
458 void
459 ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
460 {
461 
462 	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
463 
464 	if (ifq->ifq_maxlen == 0)
465 		ifq->ifq_maxlen = ifqmaxlen;
466 
467 	ifq->altq_type = 0;
468 	ifq->altq_disc = NULL;
469 	ifq->altq_flags &= ALTQF_CANTCHANGE;
470 	ifq->altq_tbr  = NULL;
471 	ifq->altq_ifp  = ifp;
472 }
473 
474 void
475 ifq_delete(struct ifaltq *ifq)
476 {
477 	mtx_destroy(&ifq->ifq_mtx);
478 }
479 
480 /*
481  * Perform generic interface initalization tasks and attach the interface
482  * to the list of "active" interfaces.  If vmove flag is set on entry
483  * to if_attach_internal(), perform only a limited subset of initialization
484  * tasks, given that we are moving from one vnet to another an ifnet which
485  * has already been fully initialized.
486  *
487  * XXX:
488  *  - The decision to return void and thus require this function to
489  *    succeed is questionable.
490  *  - We should probably do more sanity checking.  For instance we don't
491  *    do anything to insure if_xname is unique or non-empty.
492  */
493 void
494 if_attach(struct ifnet *ifp)
495 {
496 
497 	if_attach_internal(ifp, 0);
498 }
499 
500 static void
501 if_attach_internal(struct ifnet *ifp, int vmove)
502 {
503 	unsigned socksize, ifasize;
504 	int namelen, masklen;
505 	struct sockaddr_dl *sdl;
506 	struct ifaddr *ifa;
507 
508 	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
509 		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
510 		    ifp->if_xname);
511 
512 #ifdef VIMAGE
513 	ifp->if_vnet = curvnet;
514 	if (ifp->if_home_vnet == NULL)
515 		ifp->if_home_vnet = curvnet;
516 #endif
517 
518 	if_addgroup(ifp, IFG_ALL);
519 
520 	getmicrotime(&ifp->if_lastchange);
521 	ifp->if_data.ifi_epoch = time_uptime;
522 	ifp->if_data.ifi_datalen = sizeof(struct if_data);
523 
524 	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
525 	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
526 	    ("transmit and qflush must both either be set or both be NULL"));
527 	if (ifp->if_transmit == NULL) {
528 		ifp->if_transmit = if_transmit;
529 		ifp->if_qflush = if_qflush;
530 	}
531 
532 	if (!vmove) {
533 #ifdef MAC
534 		mac_ifnet_create(ifp);
535 #endif
536 
537 		/*
538 		 * Create a Link Level name for this device.
539 		 */
540 		namelen = strlen(ifp->if_xname);
541 		/*
542 		 * Always save enough space for any possiable name so we
543 		 * can do a rename in place later.
544 		 */
545 		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
546 		socksize = masklen + ifp->if_addrlen;
547 		if (socksize < sizeof(*sdl))
548 			socksize = sizeof(*sdl);
549 		socksize = roundup2(socksize, sizeof(long));
550 		ifasize = sizeof(*ifa) + 2 * socksize;
551 		ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
552 		ifa_init(ifa);
553 		sdl = (struct sockaddr_dl *)(ifa + 1);
554 		sdl->sdl_len = socksize;
555 		sdl->sdl_family = AF_LINK;
556 		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
557 		sdl->sdl_nlen = namelen;
558 		sdl->sdl_index = ifp->if_index;
559 		sdl->sdl_type = ifp->if_type;
560 		ifp->if_addr = ifa;
561 		ifa->ifa_ifp = ifp;
562 		ifa->ifa_rtrequest = link_rtrequest;
563 		ifa->ifa_addr = (struct sockaddr *)sdl;
564 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
565 		ifa->ifa_netmask = (struct sockaddr *)sdl;
566 		sdl->sdl_len = masklen;
567 		while (namelen != 0)
568 			sdl->sdl_data[--namelen] = 0xff;
569 		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
570 		/* Reliably crash if used uninitialized. */
571 		ifp->if_broadcastaddr = NULL;
572 	}
573 
574 	IFNET_WLOCK();
575 	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
576 #ifdef VIMAGE
577 	curvnet->vnet_ifcnt++;
578 #endif
579 	IFNET_WUNLOCK();
580 
581 	if (domain_init_status >= 2)
582 		if_attachdomain1(ifp);
583 
584 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
585 	if (IS_DEFAULT_VNET(curvnet))
586 		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
587 
588 	/* Announce the interface. */
589 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
590 
591 	if (!vmove && ifp->if_watchdog != NULL) {
592 		if_printf(ifp,
593 		    "WARNING: using obsoleted if_watchdog interface\n");
594 
595 		/*
596 		 * Note that we need if_slowtimo().  If this happens after
597 		 * boot, then call if_slowtimo() directly.
598 		 */
599 		if (atomic_cmpset_int(&slowtimo_started, 0, 1) && !cold)
600 			if_slowtimo(0);
601 	}
602 }
603 
604 static void
605 if_attachdomain(void *dummy)
606 {
607 	struct ifnet *ifp;
608 	int s;
609 
610 	s = splnet();
611 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
612 		if_attachdomain1(ifp);
613 	splx(s);
614 }
615 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
616     if_attachdomain, NULL);
617 
618 static void
619 if_attachdomain1(struct ifnet *ifp)
620 {
621 	struct domain *dp;
622 	int s;
623 
624 	s = splnet();
625 
626 	/*
627 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
628 	 * cannot lock ifp->if_afdata initialization, entirely.
629 	 */
630 	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
631 		splx(s);
632 		return;
633 	}
634 	if (ifp->if_afdata_initialized >= domain_init_status) {
635 		IF_AFDATA_UNLOCK(ifp);
636 		splx(s);
637 		printf("if_attachdomain called more than once on %s\n",
638 		    ifp->if_xname);
639 		return;
640 	}
641 	ifp->if_afdata_initialized = domain_init_status;
642 	IF_AFDATA_UNLOCK(ifp);
643 
644 	/* address family dependent data region */
645 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
646 	for (dp = domains; dp; dp = dp->dom_next) {
647 		if (dp->dom_ifattach)
648 			ifp->if_afdata[dp->dom_family] =
649 			    (*dp->dom_ifattach)(ifp);
650 	}
651 
652 	splx(s);
653 }
654 
655 /*
656  * Remove any unicast or broadcast network addresses from an interface.
657  */
658 void
659 if_purgeaddrs(struct ifnet *ifp)
660 {
661 	struct ifaddr *ifa, *next;
662 
663 	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
664 		if (ifa->ifa_addr->sa_family == AF_LINK)
665 			continue;
666 #ifdef INET
667 		/* XXX: Ugly!! ad hoc just for INET */
668 		if (ifa->ifa_addr->sa_family == AF_INET) {
669 			struct ifaliasreq ifr;
670 
671 			bzero(&ifr, sizeof(ifr));
672 			ifr.ifra_addr = *ifa->ifa_addr;
673 			if (ifa->ifa_dstaddr)
674 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
675 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
676 			    NULL) == 0)
677 				continue;
678 		}
679 #endif /* INET */
680 #ifdef INET6
681 		if (ifa->ifa_addr->sa_family == AF_INET6) {
682 			in6_purgeaddr(ifa);
683 			/* ifp_addrhead is already updated */
684 			continue;
685 		}
686 #endif /* INET6 */
687 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
688 		ifa_free(ifa);
689 	}
690 }
691 
692 /*
693  * Remove any multicast network addresses from an interface.
694  */
695 void
696 if_purgemaddrs(struct ifnet *ifp)
697 {
698 	struct ifmultiaddr *ifma;
699 	struct ifmultiaddr *next;
700 
701 	IF_ADDR_LOCK(ifp);
702 	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
703 		if_delmulti_locked(ifp, ifma, 1);
704 	IF_ADDR_UNLOCK(ifp);
705 }
706 
707 /*
708  * Detach an interface, removing it from the list of "active" interfaces.
709  * If vmove flag is set on entry to if_detach_internal(), perform only a
710  * limited subset of cleanup tasks, given that we are moving an ifnet from
711  * one vnet to another, where it must be fully operational.
712  *
713  * XXXRW: There are some significant questions about event ordering, and
714  * how to prevent things from starting to use the interface during detach.
715  */
716 void
717 if_detach(struct ifnet *ifp)
718 {
719 
720 	if_detach_internal(ifp, 0);
721 }
722 
723 static void
724 if_detach_internal(struct ifnet *ifp, int vmove)
725 {
726 	struct ifaddr *ifa;
727 	struct radix_node_head	*rnh;
728 	int i, j;
729 	struct domain *dp;
730  	struct ifnet *iter;
731  	int found = 0;
732 
733 	IFNET_WLOCK();
734 	TAILQ_FOREACH(iter, &V_ifnet, if_link)
735 		if (iter == ifp) {
736 			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
737 			found = 1;
738 			break;
739 		}
740 #ifdef VIMAGE
741 	if (found)
742 		curvnet->vnet_ifcnt--;
743 #endif
744 	IFNET_WUNLOCK();
745 	if (!found) {
746 		if (vmove)
747 			panic("interface not in it's own ifnet list");
748 		else
749 			return; /* XXX this should panic as well? */
750 	}
751 
752 	/*
753 	 * Remove/wait for pending events.
754 	 */
755 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
756 
757 	/*
758 	 * Remove routes and flush queues.
759 	 */
760 	if_down(ifp);
761 #ifdef ALTQ
762 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
763 		altq_disable(&ifp->if_snd);
764 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
765 		altq_detach(&ifp->if_snd);
766 #endif
767 
768 	if_purgeaddrs(ifp);
769 
770 #ifdef INET
771 	in_ifdetach(ifp);
772 #endif
773 
774 #ifdef INET6
775 	/*
776 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
777 	 * before removing routing entries below, since IPv6 interface direct
778 	 * routes are expected to be removed by the IPv6-specific kernel API.
779 	 * Otherwise, the kernel will detect some inconsistency and bark it.
780 	 */
781 	in6_ifdetach(ifp);
782 #endif
783 	if_purgemaddrs(ifp);
784 
785 	if (!vmove) {
786 		/*
787 		 * Prevent further calls into the device driver via ifnet.
788 		 */
789 		if_dead(ifp);
790 
791 		/*
792 		 * Remove link ifaddr pointer and maybe decrement if_index.
793 		 * Clean up all addresses.
794 		 */
795 		ifp->if_addr = NULL;
796 
797 		/* We can now free link ifaddr. */
798 		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
799 			ifa = TAILQ_FIRST(&ifp->if_addrhead);
800 			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
801 			ifa_free(ifa);
802 		}
803 	}
804 
805 	/*
806 	 * Delete all remaining routes using this interface
807 	 * Unfortuneatly the only way to do this is to slog through
808 	 * the entire routing table looking for routes which point
809 	 * to this interface...oh well...
810 	 */
811 	for (i = 1; i <= AF_MAX; i++) {
812 		for (j = 0; j < rt_numfibs; j++) {
813 			rnh = rt_tables_get_rnh(j, i);
814 			if (rnh == NULL)
815 				continue;
816 			RADIX_NODE_HEAD_LOCK(rnh);
817 			(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
818 			RADIX_NODE_HEAD_UNLOCK(rnh);
819 		}
820 	}
821 
822 	/* Announce that the interface is gone. */
823 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
824 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
825 	if (IS_DEFAULT_VNET(curvnet))
826 		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
827 	if_delgroups(ifp);
828 
829 	IF_AFDATA_LOCK(ifp);
830 	for (dp = domains; dp; dp = dp->dom_next) {
831 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
832 			(*dp->dom_ifdetach)(ifp,
833 			    ifp->if_afdata[dp->dom_family]);
834 	}
835 	ifp->if_afdata_initialized = 0;
836 	IF_AFDATA_UNLOCK(ifp);
837 }
838 
839 #ifdef VIMAGE
840 /*
841  * if_vmove() performs a limited version of if_detach() in current
842  * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
843  * An attempt is made to shrink if_index in current vnet, find an
844  * unused if_index in target vnet and calls if_grow() if necessary,
845  * and finally find an unused if_xname for the target vnet.
846  */
847 void
848 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
849 {
850 
851 	/*
852 	 * Detach from current vnet, but preserve LLADDR info, do not
853 	 * mark as dead etc. so that the ifnet can be reattached later.
854 	 */
855 	if_detach_internal(ifp, 1);
856 
857 	/*
858 	 * Unlink the ifnet from ifindex_table[] in current vnet,
859 	 * and shrink the if_index for that vnet if possible.
860 	 */
861 	IFNET_WLOCK();
862 	ifnet_setbyindex(ifp->if_index, NULL);
863 	while (V_if_index > 0 && ifnet_byindex_locked(V_if_index) == NULL)
864 		V_if_index--;
865 	IFNET_WUNLOCK();
866 
867 	/*
868 	 * Switch to the context of the target vnet.
869 	 */
870 	CURVNET_SET_QUIET(new_vnet);
871 
872 	/*
873 	 * Try to find an empty slot below if_index.  If we fail, take
874 	 * the next slot.
875 	 */
876 	IFNET_WLOCK();
877 	for (ifp->if_index = 1; ifp->if_index <= V_if_index; ifp->if_index++) {
878 		if (ifnet_byindex_locked(ifp->if_index) == NULL)
879 			break;
880 	}
881 	/* Catch if_index overflow. */
882 	if (ifp->if_index < 1)
883 		panic("if_index overflow");
884 
885 	if (ifp->if_index > V_if_index)
886 		V_if_index = ifp->if_index;
887 	if (V_if_index >= V_if_indexlim)
888 		if_grow();
889 	ifnet_setbyindex(ifp->if_index, ifp);
890 	IFNET_WUNLOCK();
891 
892 	if_attach_internal(ifp, 1);
893 
894 	CURVNET_RESTORE();
895 }
896 
897 /*
898  * Move an ifnet to or from another child prison/vnet, specified by the jail id.
899  */
900 static int
901 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
902 {
903 	struct prison *pr;
904 	struct ifnet *difp;
905 
906 	/* Try to find the prison within our visibility. */
907 	sx_slock(&allprison_lock);
908 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
909 	sx_sunlock(&allprison_lock);
910 	if (pr == NULL)
911 		return (ENXIO);
912 	prison_hold_locked(pr);
913 	mtx_unlock(&pr->pr_mtx);
914 
915 	/* Do not try to move the iface from and to the same prison. */
916 	if (pr->pr_vnet == ifp->if_vnet) {
917 		prison_free(pr);
918 		return (EEXIST);
919 	}
920 
921 	/* Make sure the named iface does not exists in the dst. prison/vnet. */
922 	/* XXX Lock interfaces to avoid races. */
923 	CURVNET_SET(pr->pr_vnet);
924 	difp = ifunit(ifname);
925 	CURVNET_RESTORE();
926 	if (difp != NULL) {
927 		prison_free(pr);
928 		return (EEXIST);
929 	}
930 
931 	/* Move the interface into the child jail/vnet. */
932 	if_vmove(ifp, pr->pr_vnet);
933 
934 	/* Report the new if_xname back to the userland. */
935 	sprintf(ifname, "%s", ifp->if_xname);
936 
937 	prison_free(pr);
938 	return (0);
939 }
940 
941 static int
942 if_vmove_reclaim(struct thread *td, char *ifname, int jid)
943 {
944 	struct prison *pr;
945 	struct vnet *vnet_dst;
946 	struct ifnet *ifp;
947 
948 	/* Try to find the prison within our visibility. */
949 	sx_slock(&allprison_lock);
950 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
951 	sx_sunlock(&allprison_lock);
952 	if (pr == NULL)
953 		return (ENXIO);
954 	prison_hold_locked(pr);
955 	mtx_unlock(&pr->pr_mtx);
956 
957 	/* Make sure the named iface exists in the source prison/vnet. */
958 	CURVNET_SET(pr->pr_vnet);
959 	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
960 	if (ifp == NULL) {
961 		CURVNET_RESTORE();
962 		prison_free(pr);
963 		return (ENXIO);
964 	}
965 
966 	/* Do not try to move the iface from and to the same prison. */
967 	vnet_dst = TD_TO_VNET(td);
968 	if (vnet_dst == ifp->if_vnet) {
969 		CURVNET_RESTORE();
970 		prison_free(pr);
971 		return (EEXIST);
972 	}
973 
974 	/* Get interface back from child jail/vnet. */
975 	if_vmove(ifp, vnet_dst);
976 	CURVNET_RESTORE();
977 
978 	/* Report the new if_xname back to the userland. */
979 	sprintf(ifname, "%s", ifp->if_xname);
980 
981 	prison_free(pr);
982 	return (0);
983 }
984 #endif /* VIMAGE */
985 
986 /*
987  * Add a group to an interface
988  */
989 int
990 if_addgroup(struct ifnet *ifp, const char *groupname)
991 {
992 	struct ifg_list		*ifgl;
993 	struct ifg_group	*ifg = NULL;
994 	struct ifg_member	*ifgm;
995 
996 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
997 	    groupname[strlen(groupname) - 1] <= '9')
998 		return (EINVAL);
999 
1000 	IFNET_WLOCK();
1001 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1002 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1003 			IFNET_WUNLOCK();
1004 			return (EEXIST);
1005 		}
1006 
1007 	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1008 	    M_NOWAIT)) == NULL) {
1009 	    	IFNET_WUNLOCK();
1010 		return (ENOMEM);
1011 	}
1012 
1013 	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1014 	    M_TEMP, M_NOWAIT)) == NULL) {
1015 		free(ifgl, M_TEMP);
1016 		IFNET_WUNLOCK();
1017 		return (ENOMEM);
1018 	}
1019 
1020 	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1021 		if (!strcmp(ifg->ifg_group, groupname))
1022 			break;
1023 
1024 	if (ifg == NULL) {
1025 		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1026 		    M_TEMP, M_NOWAIT)) == NULL) {
1027 			free(ifgl, M_TEMP);
1028 			free(ifgm, M_TEMP);
1029 			IFNET_WUNLOCK();
1030 			return (ENOMEM);
1031 		}
1032 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1033 		ifg->ifg_refcnt = 0;
1034 		TAILQ_INIT(&ifg->ifg_members);
1035 		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1036 		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1037 	}
1038 
1039 	ifg->ifg_refcnt++;
1040 	ifgl->ifgl_group = ifg;
1041 	ifgm->ifgm_ifp = ifp;
1042 
1043 	IF_ADDR_LOCK(ifp);
1044 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1045 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1046 	IF_ADDR_UNLOCK(ifp);
1047 
1048 	IFNET_WUNLOCK();
1049 
1050 	EVENTHANDLER_INVOKE(group_change_event, groupname);
1051 
1052 	return (0);
1053 }
1054 
1055 /*
1056  * Remove a group from an interface
1057  */
1058 int
1059 if_delgroup(struct ifnet *ifp, const char *groupname)
1060 {
1061 	struct ifg_list		*ifgl;
1062 	struct ifg_member	*ifgm;
1063 
1064 	IFNET_WLOCK();
1065 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1066 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1067 			break;
1068 	if (ifgl == NULL) {
1069 		IFNET_WUNLOCK();
1070 		return (ENOENT);
1071 	}
1072 
1073 	IF_ADDR_LOCK(ifp);
1074 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1075 	IF_ADDR_UNLOCK(ifp);
1076 
1077 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1078 		if (ifgm->ifgm_ifp == ifp)
1079 			break;
1080 
1081 	if (ifgm != NULL) {
1082 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1083 		free(ifgm, M_TEMP);
1084 	}
1085 
1086 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1087 		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1088 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1089 		free(ifgl->ifgl_group, M_TEMP);
1090 	}
1091 	IFNET_WUNLOCK();
1092 
1093 	free(ifgl, M_TEMP);
1094 
1095 	EVENTHANDLER_INVOKE(group_change_event, groupname);
1096 
1097 	return (0);
1098 }
1099 
1100 /*
1101  * Remove an interface from all groups
1102  */
1103 static void
1104 if_delgroups(struct ifnet *ifp)
1105 {
1106 	struct ifg_list		*ifgl;
1107 	struct ifg_member	*ifgm;
1108 	char groupname[IFNAMSIZ];
1109 
1110 	IFNET_WLOCK();
1111 	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1112 		ifgl = TAILQ_FIRST(&ifp->if_groups);
1113 
1114 		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1115 
1116 		IF_ADDR_LOCK(ifp);
1117 		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1118 		IF_ADDR_UNLOCK(ifp);
1119 
1120 		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1121 			if (ifgm->ifgm_ifp == ifp)
1122 				break;
1123 
1124 		if (ifgm != NULL) {
1125 			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1126 			    ifgm_next);
1127 			free(ifgm, M_TEMP);
1128 		}
1129 
1130 		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1131 			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1132 			EVENTHANDLER_INVOKE(group_detach_event,
1133 			    ifgl->ifgl_group);
1134 			free(ifgl->ifgl_group, M_TEMP);
1135 		}
1136 		IFNET_WUNLOCK();
1137 
1138 		free(ifgl, M_TEMP);
1139 
1140 		EVENTHANDLER_INVOKE(group_change_event, groupname);
1141 
1142 		IFNET_WLOCK();
1143 	}
1144 	IFNET_WUNLOCK();
1145 }
1146 
1147 /*
1148  * Stores all groups from an interface in memory pointed
1149  * to by data
1150  */
1151 static int
1152 if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
1153 {
1154 	int			 len, error;
1155 	struct ifg_list		*ifgl;
1156 	struct ifg_req		 ifgrq, *ifgp;
1157 	struct ifgroupreq	*ifgr = data;
1158 
1159 	if (ifgr->ifgr_len == 0) {
1160 		IF_ADDR_LOCK(ifp);
1161 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1162 			ifgr->ifgr_len += sizeof(struct ifg_req);
1163 		IF_ADDR_UNLOCK(ifp);
1164 		return (0);
1165 	}
1166 
1167 	len = ifgr->ifgr_len;
1168 	ifgp = ifgr->ifgr_groups;
1169 	/* XXX: wire */
1170 	IF_ADDR_LOCK(ifp);
1171 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1172 		if (len < sizeof(ifgrq)) {
1173 			IF_ADDR_UNLOCK(ifp);
1174 			return (EINVAL);
1175 		}
1176 		bzero(&ifgrq, sizeof ifgrq);
1177 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1178 		    sizeof(ifgrq.ifgrq_group));
1179 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1180 		    	IF_ADDR_UNLOCK(ifp);
1181 			return (error);
1182 		}
1183 		len -= sizeof(ifgrq);
1184 		ifgp++;
1185 	}
1186 	IF_ADDR_UNLOCK(ifp);
1187 
1188 	return (0);
1189 }
1190 
1191 /*
1192  * Stores all members of a group in memory pointed to by data
1193  */
1194 static int
1195 if_getgroupmembers(struct ifgroupreq *data)
1196 {
1197 	struct ifgroupreq	*ifgr = data;
1198 	struct ifg_group	*ifg;
1199 	struct ifg_member	*ifgm;
1200 	struct ifg_req		 ifgrq, *ifgp;
1201 	int			 len, error;
1202 
1203 	IFNET_RLOCK();
1204 	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1205 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1206 			break;
1207 	if (ifg == NULL) {
1208 		IFNET_RUNLOCK();
1209 		return (ENOENT);
1210 	}
1211 
1212 	if (ifgr->ifgr_len == 0) {
1213 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1214 			ifgr->ifgr_len += sizeof(ifgrq);
1215 		IFNET_RUNLOCK();
1216 		return (0);
1217 	}
1218 
1219 	len = ifgr->ifgr_len;
1220 	ifgp = ifgr->ifgr_groups;
1221 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1222 		if (len < sizeof(ifgrq)) {
1223 			IFNET_RUNLOCK();
1224 			return (EINVAL);
1225 		}
1226 		bzero(&ifgrq, sizeof ifgrq);
1227 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1228 		    sizeof(ifgrq.ifgrq_member));
1229 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1230 			IFNET_RUNLOCK();
1231 			return (error);
1232 		}
1233 		len -= sizeof(ifgrq);
1234 		ifgp++;
1235 	}
1236 	IFNET_RUNLOCK();
1237 
1238 	return (0);
1239 }
1240 
1241 /*
1242  * Delete Routes for a Network Interface
1243  *
1244  * Called for each routing entry via the rnh->rnh_walktree() call above
1245  * to delete all route entries referencing a detaching network interface.
1246  *
1247  * Arguments:
1248  *	rn	pointer to node in the routing table
1249  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
1250  *
1251  * Returns:
1252  *	0	successful
1253  *	errno	failed - reason indicated
1254  *
1255  */
1256 static int
1257 if_rtdel(struct radix_node *rn, void *arg)
1258 {
1259 	struct rtentry	*rt = (struct rtentry *)rn;
1260 	struct ifnet	*ifp = arg;
1261 	int		err;
1262 
1263 	if (rt->rt_ifp == ifp) {
1264 
1265 		/*
1266 		 * Protect (sorta) against walktree recursion problems
1267 		 * with cloned routes
1268 		 */
1269 		if ((rt->rt_flags & RTF_UP) == 0)
1270 			return (0);
1271 
1272 		err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1273 				rt_mask(rt), rt->rt_flags|RTF_RNH_LOCKED,
1274 				(struct rtentry **) NULL, rt->rt_fibnum);
1275 		if (err) {
1276 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
1277 		}
1278 	}
1279 
1280 	return (0);
1281 }
1282 
1283 /*
1284  * Wrapper functions for struct ifnet address list locking macros.  These are
1285  * used by kernel modules to avoid encoding programming interface or binary
1286  * interface assumptions that may be violated when kernel-internal locking
1287  * approaches change.
1288  */
1289 void
1290 if_addr_rlock(struct ifnet *ifp)
1291 {
1292 
1293 	IF_ADDR_LOCK(ifp);
1294 }
1295 
1296 void
1297 if_addr_runlock(struct ifnet *ifp)
1298 {
1299 
1300 	IF_ADDR_UNLOCK(ifp);
1301 }
1302 
1303 void
1304 if_maddr_rlock(struct ifnet *ifp)
1305 {
1306 
1307 	IF_ADDR_LOCK(ifp);
1308 }
1309 
1310 void
1311 if_maddr_runlock(struct ifnet *ifp)
1312 {
1313 
1314 	IF_ADDR_UNLOCK(ifp);
1315 }
1316 
1317 /*
1318  * Reference count functions for ifaddrs.
1319  */
1320 void
1321 ifa_init(struct ifaddr *ifa)
1322 {
1323 
1324 	mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
1325 	refcount_init(&ifa->ifa_refcnt, 1);
1326 }
1327 
1328 void
1329 ifa_ref(struct ifaddr *ifa)
1330 {
1331 
1332 	refcount_acquire(&ifa->ifa_refcnt);
1333 }
1334 
1335 void
1336 ifa_free(struct ifaddr *ifa)
1337 {
1338 
1339 	if (refcount_release(&ifa->ifa_refcnt)) {
1340 		mtx_destroy(&ifa->ifa_mtx);
1341 		free(ifa, M_IFADDR);
1342 	}
1343 }
1344 
1345 /*
1346  * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1347  * structs used to represent other address families, it is necessary
1348  * to perform a different comparison.
1349  */
1350 
1351 #define	sa_equal(a1, a2)	\
1352 	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
1353 
1354 #define	sa_dl_equal(a1, a2)	\
1355 	((((struct sockaddr_dl *)(a1))->sdl_len ==			\
1356 	 ((struct sockaddr_dl *)(a2))->sdl_len) &&			\
1357 	 (bcmp(LLADDR((struct sockaddr_dl *)(a1)),			\
1358 	       LLADDR((struct sockaddr_dl *)(a2)),			\
1359 	       ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1360 
1361 /*
1362  * Locate an interface based on a complete address.
1363  */
1364 /*ARGSUSED*/
1365 static struct ifaddr *
1366 ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
1367 {
1368 	struct ifnet *ifp;
1369 	struct ifaddr *ifa;
1370 
1371 	IFNET_RLOCK();
1372 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1373 		IF_ADDR_LOCK(ifp);
1374 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1375 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1376 				continue;
1377 			if (sa_equal(addr, ifa->ifa_addr)) {
1378 				if (getref)
1379 					ifa_ref(ifa);
1380 				IF_ADDR_UNLOCK(ifp);
1381 				goto done;
1382 			}
1383 			/* IP6 doesn't have broadcast */
1384 			if ((ifp->if_flags & IFF_BROADCAST) &&
1385 			    ifa->ifa_broadaddr &&
1386 			    ifa->ifa_broadaddr->sa_len != 0 &&
1387 			    sa_equal(ifa->ifa_broadaddr, addr)) {
1388 				if (getref)
1389 					ifa_ref(ifa);
1390 				IF_ADDR_UNLOCK(ifp);
1391 				goto done;
1392 			}
1393 		}
1394 		IF_ADDR_UNLOCK(ifp);
1395 	}
1396 	ifa = NULL;
1397 done:
1398 	IFNET_RUNLOCK();
1399 	return (ifa);
1400 }
1401 
1402 struct ifaddr *
1403 ifa_ifwithaddr(struct sockaddr *addr)
1404 {
1405 
1406 	return (ifa_ifwithaddr_internal(addr, 1));
1407 }
1408 
1409 int
1410 ifa_ifwithaddr_check(struct sockaddr *addr)
1411 {
1412 
1413 	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
1414 }
1415 
1416 /*
1417  * Locate an interface based on the broadcast address.
1418  */
1419 /* ARGSUSED */
1420 struct ifaddr *
1421 ifa_ifwithbroadaddr(struct sockaddr *addr)
1422 {
1423 	struct ifnet *ifp;
1424 	struct ifaddr *ifa;
1425 
1426 	IFNET_RLOCK();
1427 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1428 		IF_ADDR_LOCK(ifp);
1429 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1430 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1431 				continue;
1432 			if ((ifp->if_flags & IFF_BROADCAST) &&
1433 			    ifa->ifa_broadaddr &&
1434 			    ifa->ifa_broadaddr->sa_len != 0 &&
1435 			    sa_equal(ifa->ifa_broadaddr, addr)) {
1436 				ifa_ref(ifa);
1437 				IF_ADDR_UNLOCK(ifp);
1438 				goto done;
1439 			}
1440 		}
1441 		IF_ADDR_UNLOCK(ifp);
1442 	}
1443 	ifa = NULL;
1444 done:
1445 	IFNET_RUNLOCK();
1446 	return (ifa);
1447 }
1448 
1449 /*
1450  * Locate the point to point interface with a given destination address.
1451  */
1452 /*ARGSUSED*/
1453 struct ifaddr *
1454 ifa_ifwithdstaddr(struct sockaddr *addr)
1455 {
1456 	struct ifnet *ifp;
1457 	struct ifaddr *ifa;
1458 
1459 	IFNET_RLOCK();
1460 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1461 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1462 			continue;
1463 		IF_ADDR_LOCK(ifp);
1464 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1465 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1466 				continue;
1467 			if (ifa->ifa_dstaddr != NULL &&
1468 			    sa_equal(addr, ifa->ifa_dstaddr)) {
1469 				ifa_ref(ifa);
1470 				IF_ADDR_UNLOCK(ifp);
1471 				goto done;
1472 			}
1473 		}
1474 		IF_ADDR_UNLOCK(ifp);
1475 	}
1476 	ifa = NULL;
1477 done:
1478 	IFNET_RUNLOCK();
1479 	return (ifa);
1480 }
1481 
1482 /*
1483  * Find an interface on a specific network.  If many, choice
1484  * is most specific found.
1485  */
1486 struct ifaddr *
1487 ifa_ifwithnet(struct sockaddr *addr)
1488 {
1489 	struct ifnet *ifp;
1490 	struct ifaddr *ifa;
1491 	struct ifaddr *ifa_maybe = NULL;
1492 	u_int af = addr->sa_family;
1493 	char *addr_data = addr->sa_data, *cplim;
1494 
1495 	/*
1496 	 * AF_LINK addresses can be looked up directly by their index number,
1497 	 * so do that if we can.
1498 	 */
1499 	if (af == AF_LINK) {
1500 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1501 	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
1502 		return (ifaddr_byindex(sdl->sdl_index));
1503 	}
1504 
1505 	/*
1506 	 * Scan though each interface, looking for ones that have addresses
1507 	 * in this address family.  Maintain a reference on ifa_maybe once
1508 	 * we find one, as we release the IF_ADDR_LOCK() that kept it stable
1509 	 * when we move onto the next interface.
1510 	 */
1511 	IFNET_RLOCK();
1512 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1513 		IF_ADDR_LOCK(ifp);
1514 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1515 			char *cp, *cp2, *cp3;
1516 
1517 			if (ifa->ifa_addr->sa_family != af)
1518 next:				continue;
1519 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1520 				/*
1521 				 * This is a bit broken as it doesn't
1522 				 * take into account that the remote end may
1523 				 * be a single node in the network we are
1524 				 * looking for.
1525 				 * The trouble is that we don't know the
1526 				 * netmask for the remote end.
1527 				 */
1528 				if (ifa->ifa_dstaddr != NULL &&
1529 				    sa_equal(addr, ifa->ifa_dstaddr)) {
1530 					ifa_ref(ifa);
1531 					IF_ADDR_UNLOCK(ifp);
1532 					goto done;
1533 				}
1534 			} else {
1535 				/*
1536 				 * if we have a special address handler,
1537 				 * then use it instead of the generic one.
1538 				 */
1539 				if (ifa->ifa_claim_addr) {
1540 					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1541 						ifa_ref(ifa);
1542 						IF_ADDR_UNLOCK(ifp);
1543 						goto done;
1544 					}
1545 					continue;
1546 				}
1547 
1548 				/*
1549 				 * Scan all the bits in the ifa's address.
1550 				 * If a bit dissagrees with what we are
1551 				 * looking for, mask it with the netmask
1552 				 * to see if it really matters.
1553 				 * (A byte at a time)
1554 				 */
1555 				if (ifa->ifa_netmask == 0)
1556 					continue;
1557 				cp = addr_data;
1558 				cp2 = ifa->ifa_addr->sa_data;
1559 				cp3 = ifa->ifa_netmask->sa_data;
1560 				cplim = ifa->ifa_netmask->sa_len
1561 					+ (char *)ifa->ifa_netmask;
1562 				while (cp3 < cplim)
1563 					if ((*cp++ ^ *cp2++) & *cp3++)
1564 						goto next; /* next address! */
1565 				/*
1566 				 * If the netmask of what we just found
1567 				 * is more specific than what we had before
1568 				 * (if we had one) then remember the new one
1569 				 * before continuing to search
1570 				 * for an even better one.
1571 				 */
1572 				if (ifa_maybe == NULL ||
1573 				    rn_refines((caddr_t)ifa->ifa_netmask,
1574 				    (caddr_t)ifa_maybe->ifa_netmask)) {
1575 					if (ifa_maybe != NULL)
1576 						ifa_free(ifa_maybe);
1577 					ifa_maybe = ifa;
1578 					ifa_ref(ifa_maybe);
1579 				}
1580 			}
1581 		}
1582 		IF_ADDR_UNLOCK(ifp);
1583 	}
1584 	ifa = ifa_maybe;
1585 	ifa_maybe = NULL;
1586 done:
1587 	IFNET_RUNLOCK();
1588 	if (ifa_maybe != NULL)
1589 		ifa_free(ifa_maybe);
1590 	return (ifa);
1591 }
1592 
1593 /*
1594  * Find an interface address specific to an interface best matching
1595  * a given address.
1596  */
1597 struct ifaddr *
1598 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1599 {
1600 	struct ifaddr *ifa;
1601 	char *cp, *cp2, *cp3;
1602 	char *cplim;
1603 	struct ifaddr *ifa_maybe = NULL;
1604 	u_int af = addr->sa_family;
1605 
1606 	if (af >= AF_MAX)
1607 		return (0);
1608 	IF_ADDR_LOCK(ifp);
1609 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1610 		if (ifa->ifa_addr->sa_family != af)
1611 			continue;
1612 		if (ifa_maybe == NULL)
1613 			ifa_maybe = ifa;
1614 		if (ifa->ifa_netmask == 0) {
1615 			if (sa_equal(addr, ifa->ifa_addr) ||
1616 			    (ifa->ifa_dstaddr &&
1617 			    sa_equal(addr, ifa->ifa_dstaddr)))
1618 				goto done;
1619 			continue;
1620 		}
1621 		if (ifp->if_flags & IFF_POINTOPOINT) {
1622 			if (sa_equal(addr, ifa->ifa_dstaddr))
1623 				goto done;
1624 		} else {
1625 			cp = addr->sa_data;
1626 			cp2 = ifa->ifa_addr->sa_data;
1627 			cp3 = ifa->ifa_netmask->sa_data;
1628 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1629 			for (; cp3 < cplim; cp3++)
1630 				if ((*cp++ ^ *cp2++) & *cp3)
1631 					break;
1632 			if (cp3 == cplim)
1633 				goto done;
1634 		}
1635 	}
1636 	ifa = ifa_maybe;
1637 done:
1638 	if (ifa != NULL)
1639 		ifa_ref(ifa);
1640 	IF_ADDR_UNLOCK(ifp);
1641 	return (ifa);
1642 }
1643 
1644 #include <net/if_llatbl.h>
1645 
1646 /*
1647  * Default action when installing a route with a Link Level gateway.
1648  * Lookup an appropriate real ifa to point to.
1649  * This should be moved to /sys/net/link.c eventually.
1650  */
1651 static void
1652 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1653 {
1654 	struct ifaddr *ifa, *oifa;
1655 	struct sockaddr *dst;
1656 	struct ifnet *ifp;
1657 
1658 	RT_LOCK_ASSERT(rt);
1659 
1660 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1661 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1662 		return;
1663 	ifa = ifaof_ifpforaddr(dst, ifp);
1664 	if (ifa) {
1665 		oifa = rt->rt_ifa;
1666 		rt->rt_ifa = ifa;
1667 		ifa_free(oifa);
1668 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1669 			ifa->ifa_rtrequest(cmd, rt, info);
1670 	}
1671 }
1672 
1673 /*
1674  * Mark an interface down and notify protocols of
1675  * the transition.
1676  * NOTE: must be called at splnet or eqivalent.
1677  */
1678 static void
1679 if_unroute(struct ifnet *ifp, int flag, int fam)
1680 {
1681 	struct ifaddr *ifa;
1682 
1683 	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
1684 
1685 	ifp->if_flags &= ~flag;
1686 	getmicrotime(&ifp->if_lastchange);
1687 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1688 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1689 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1690 	ifp->if_qflush(ifp);
1691 
1692 #if defined(INET) || defined(INET6)
1693 #ifdef DEV_CARP
1694 	if (ifp->if_carp)
1695 		carp_carpdev_state(ifp->if_carp);
1696 #endif
1697 #endif
1698 	rt_ifmsg(ifp);
1699 }
1700 
1701 /*
1702  * Mark an interface up and notify protocols of
1703  * the transition.
1704  * NOTE: must be called at splnet or eqivalent.
1705  */
1706 static void
1707 if_route(struct ifnet *ifp, int flag, int fam)
1708 {
1709 	struct ifaddr *ifa;
1710 
1711 	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
1712 
1713 	ifp->if_flags |= flag;
1714 	getmicrotime(&ifp->if_lastchange);
1715 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1716 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1717 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1718 #if defined(INET) || defined(INET6)
1719 #ifdef DEV_CARP
1720 	if (ifp->if_carp)
1721 		carp_carpdev_state(ifp->if_carp);
1722 #endif
1723 #endif
1724 	rt_ifmsg(ifp);
1725 #ifdef INET6
1726 	in6_if_up(ifp);
1727 #endif
1728 }
1729 
1730 void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
1731 void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
1732 
1733 /*
1734  * Handle a change in the interface link state. To avoid LORs
1735  * between driver lock and upper layer locks, as well as possible
1736  * recursions, we post event to taskqueue, and all job
1737  * is done in static do_link_state_change().
1738  */
1739 void
1740 if_link_state_change(struct ifnet *ifp, int link_state)
1741 {
1742 	/* Return if state hasn't changed. */
1743 	if (ifp->if_link_state == link_state)
1744 		return;
1745 
1746 	ifp->if_link_state = link_state;
1747 
1748 	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
1749 }
1750 
1751 static void
1752 do_link_state_change(void *arg, int pending)
1753 {
1754 	struct ifnet *ifp = (struct ifnet *)arg;
1755 	int link_state = ifp->if_link_state;
1756 	int link;
1757 	CURVNET_SET(ifp->if_vnet);
1758 
1759 	/* Notify that the link state has changed. */
1760 	rt_ifmsg(ifp);
1761 	if (link_state == LINK_STATE_UP)
1762 		link = NOTE_LINKUP;
1763 	else if (link_state == LINK_STATE_DOWN)
1764 		link = NOTE_LINKDOWN;
1765 	else
1766 		link = NOTE_LINKINV;
1767 	if (ifp->if_vlantrunk != NULL)
1768 		(*vlan_link_state_p)(ifp, link);
1769 
1770 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
1771 	    IFP2AC(ifp)->ac_netgraph != NULL)
1772 		(*ng_ether_link_state_p)(ifp, link_state);
1773 #if defined(INET) || defined(INET6)
1774 #ifdef DEV_CARP
1775 	if (ifp->if_carp)
1776 		carp_carpdev_state(ifp->if_carp);
1777 #endif
1778 #endif
1779 	if (ifp->if_bridge) {
1780 		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
1781 		(*bstp_linkstate_p)(ifp, link_state);
1782 	}
1783 	if (ifp->if_lagg) {
1784 		KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
1785 		(*lagg_linkstate_p)(ifp, link_state);
1786 	}
1787 
1788 	if (IS_DEFAULT_VNET(curvnet))
1789 		devctl_notify("IFNET", ifp->if_xname,
1790 		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
1791 		    NULL);
1792 	if (pending > 1)
1793 		if_printf(ifp, "%d link states coalesced\n", pending);
1794 	if (log_link_state_change)
1795 		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
1796 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
1797 	CURVNET_RESTORE();
1798 }
1799 
1800 /*
1801  * Mark an interface down and notify protocols of
1802  * the transition.
1803  * NOTE: must be called at splnet or eqivalent.
1804  */
1805 void
1806 if_down(struct ifnet *ifp)
1807 {
1808 
1809 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1810 }
1811 
1812 /*
1813  * Mark an interface up and notify protocols of
1814  * the transition.
1815  * NOTE: must be called at splnet or eqivalent.
1816  */
1817 void
1818 if_up(struct ifnet *ifp)
1819 {
1820 
1821 	if_route(ifp, IFF_UP, AF_UNSPEC);
1822 }
1823 
1824 /*
1825  * Flush an interface queue.
1826  */
1827 void
1828 if_qflush(struct ifnet *ifp)
1829 {
1830 	struct mbuf *m, *n;
1831 	struct ifaltq *ifq;
1832 
1833 	ifq = &ifp->if_snd;
1834 	IFQ_LOCK(ifq);
1835 #ifdef ALTQ
1836 	if (ALTQ_IS_ENABLED(ifq))
1837 		ALTQ_PURGE(ifq);
1838 #endif
1839 	n = ifq->ifq_head;
1840 	while ((m = n) != 0) {
1841 		n = m->m_act;
1842 		m_freem(m);
1843 	}
1844 	ifq->ifq_head = 0;
1845 	ifq->ifq_tail = 0;
1846 	ifq->ifq_len = 0;
1847 	IFQ_UNLOCK(ifq);
1848 }
1849 
1850 /*
1851  * Handle interface watchdog timer routines.  Called
1852  * from softclock, we decrement timers (if set) and
1853  * call the appropriate interface routine on expiration.
1854  *
1855  * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1856  * holding Giant.
1857  */
1858 static void
1859 if_slowtimo(void *arg)
1860 {
1861 	VNET_ITERATOR_DECL(vnet_iter);
1862 	struct ifnet *ifp;
1863 	int s = splimp();
1864 
1865 	VNET_LIST_RLOCK_NOSLEEP();
1866 	IFNET_RLOCK();
1867 	VNET_FOREACH(vnet_iter) {
1868 		CURVNET_SET(vnet_iter);
1869 		TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1870 			if (ifp->if_timer == 0 || --ifp->if_timer)
1871 				continue;
1872 			if (ifp->if_watchdog)
1873 				(*ifp->if_watchdog)(ifp);
1874 		}
1875 		CURVNET_RESTORE();
1876 	}
1877 	IFNET_RUNLOCK();
1878 	VNET_LIST_RUNLOCK_NOSLEEP();
1879 	splx(s);
1880 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1881 }
1882 
1883 /*
1884  * Map interface name to interface structure pointer, with or without
1885  * returning a reference.
1886  */
1887 struct ifnet *
1888 ifunit_ref(const char *name)
1889 {
1890 	struct ifnet *ifp;
1891 
1892 	IFNET_RLOCK();
1893 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1894 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
1895 		    !(ifp->if_flags & IFF_DYING))
1896 			break;
1897 	}
1898 	if (ifp != NULL)
1899 		if_ref(ifp);
1900 	IFNET_RUNLOCK();
1901 	return (ifp);
1902 }
1903 
1904 struct ifnet *
1905 ifunit(const char *name)
1906 {
1907 	struct ifnet *ifp;
1908 
1909 	IFNET_RLOCK();
1910 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1911 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1912 			break;
1913 	}
1914 	IFNET_RUNLOCK();
1915 	return (ifp);
1916 }
1917 
1918 /*
1919  * Hardware specific interface ioctls.
1920  */
1921 static int
1922 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1923 {
1924 	struct ifreq *ifr;
1925 	struct ifstat *ifs;
1926 	int error = 0;
1927 	int new_flags, temp_flags;
1928 	size_t namelen, onamelen;
1929 	char new_name[IFNAMSIZ];
1930 	struct ifaddr *ifa;
1931 	struct sockaddr_dl *sdl;
1932 
1933 	ifr = (struct ifreq *)data;
1934 	switch (cmd) {
1935 	case SIOCGIFINDEX:
1936 		ifr->ifr_index = ifp->if_index;
1937 		break;
1938 
1939 	case SIOCGIFFLAGS:
1940 		temp_flags = ifp->if_flags | ifp->if_drv_flags;
1941 		ifr->ifr_flags = temp_flags & 0xffff;
1942 		ifr->ifr_flagshigh = temp_flags >> 16;
1943 		break;
1944 
1945 	case SIOCGIFCAP:
1946 		ifr->ifr_reqcap = ifp->if_capabilities;
1947 		ifr->ifr_curcap = ifp->if_capenable;
1948 		break;
1949 
1950 #ifdef MAC
1951 	case SIOCGIFMAC:
1952 		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
1953 		break;
1954 #endif
1955 
1956 	case SIOCGIFMETRIC:
1957 		ifr->ifr_metric = ifp->if_metric;
1958 		break;
1959 
1960 	case SIOCGIFMTU:
1961 		ifr->ifr_mtu = ifp->if_mtu;
1962 		break;
1963 
1964 	case SIOCGIFPHYS:
1965 		ifr->ifr_phys = ifp->if_physical;
1966 		break;
1967 
1968 	case SIOCSIFFLAGS:
1969 		error = priv_check(td, PRIV_NET_SETIFFLAGS);
1970 		if (error)
1971 			return (error);
1972 		/*
1973 		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
1974 		 * check, so we don't need special handling here yet.
1975 		 */
1976 		new_flags = (ifr->ifr_flags & 0xffff) |
1977 		    (ifr->ifr_flagshigh << 16);
1978 		if (ifp->if_flags & IFF_SMART) {
1979 			/* Smart drivers twiddle their own routes */
1980 		} else if (ifp->if_flags & IFF_UP &&
1981 		    (new_flags & IFF_UP) == 0) {
1982 			int s = splimp();
1983 			if_down(ifp);
1984 			splx(s);
1985 		} else if (new_flags & IFF_UP &&
1986 		    (ifp->if_flags & IFF_UP) == 0) {
1987 			int s = splimp();
1988 			if_up(ifp);
1989 			splx(s);
1990 		}
1991 		/* See if permanently promiscuous mode bit is about to flip */
1992 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
1993 			if (new_flags & IFF_PPROMISC)
1994 				ifp->if_flags |= IFF_PROMISC;
1995 			else if (ifp->if_pcount == 0)
1996 				ifp->if_flags &= ~IFF_PROMISC;
1997 			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
1998 			    ifp->if_xname,
1999 			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
2000 		}
2001 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2002 			(new_flags &~ IFF_CANTCHANGE);
2003 		if (ifp->if_ioctl) {
2004 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2005 		}
2006 		getmicrotime(&ifp->if_lastchange);
2007 		break;
2008 
2009 	case SIOCSIFCAP:
2010 		error = priv_check(td, PRIV_NET_SETIFCAP);
2011 		if (error)
2012 			return (error);
2013 		if (ifp->if_ioctl == NULL)
2014 			return (EOPNOTSUPP);
2015 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2016 			return (EINVAL);
2017 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2018 		if (error == 0)
2019 			getmicrotime(&ifp->if_lastchange);
2020 		break;
2021 
2022 #ifdef MAC
2023 	case SIOCSIFMAC:
2024 		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2025 		break;
2026 #endif
2027 
2028 	case SIOCSIFNAME:
2029 		error = priv_check(td, PRIV_NET_SETIFNAME);
2030 		if (error)
2031 			return (error);
2032 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
2033 		if (error != 0)
2034 			return (error);
2035 		if (new_name[0] == '\0')
2036 			return (EINVAL);
2037 		if (ifunit(new_name) != NULL)
2038 			return (EEXIST);
2039 
2040 		/* Announce the departure of the interface. */
2041 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2042 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2043 
2044 		log(LOG_INFO, "%s: changing name to '%s'\n",
2045 		    ifp->if_xname, new_name);
2046 
2047 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2048 		ifa = ifp->if_addr;
2049 		IFA_LOCK(ifa);
2050 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2051 		namelen = strlen(new_name);
2052 		onamelen = sdl->sdl_nlen;
2053 		/*
2054 		 * Move the address if needed.  This is safe because we
2055 		 * allocate space for a name of length IFNAMSIZ when we
2056 		 * create this in if_attach().
2057 		 */
2058 		if (namelen != onamelen) {
2059 			bcopy(sdl->sdl_data + onamelen,
2060 			    sdl->sdl_data + namelen, sdl->sdl_alen);
2061 		}
2062 		bcopy(new_name, sdl->sdl_data, namelen);
2063 		sdl->sdl_nlen = namelen;
2064 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2065 		bzero(sdl->sdl_data, onamelen);
2066 		while (namelen != 0)
2067 			sdl->sdl_data[--namelen] = 0xff;
2068 		IFA_UNLOCK(ifa);
2069 
2070 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2071 		/* Announce the return of the interface. */
2072 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2073 		break;
2074 
2075 #ifdef VIMAGE
2076 	case SIOCSIFVNET:
2077 		error = priv_check(td, PRIV_NET_SETIFVNET);
2078 		if (error)
2079 			return (error);
2080 		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2081 		break;
2082 #endif
2083 
2084 	case SIOCSIFMETRIC:
2085 		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2086 		if (error)
2087 			return (error);
2088 		ifp->if_metric = ifr->ifr_metric;
2089 		getmicrotime(&ifp->if_lastchange);
2090 		break;
2091 
2092 	case SIOCSIFPHYS:
2093 		error = priv_check(td, PRIV_NET_SETIFPHYS);
2094 		if (error)
2095 			return (error);
2096 		if (ifp->if_ioctl == NULL)
2097 			return (EOPNOTSUPP);
2098 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2099 		if (error == 0)
2100 			getmicrotime(&ifp->if_lastchange);
2101 		break;
2102 
2103 	case SIOCSIFMTU:
2104 	{
2105 		u_long oldmtu = ifp->if_mtu;
2106 
2107 		error = priv_check(td, PRIV_NET_SETIFMTU);
2108 		if (error)
2109 			return (error);
2110 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2111 			return (EINVAL);
2112 		if (ifp->if_ioctl == NULL)
2113 			return (EOPNOTSUPP);
2114 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2115 		if (error == 0) {
2116 			getmicrotime(&ifp->if_lastchange);
2117 			rt_ifmsg(ifp);
2118 		}
2119 		/*
2120 		 * If the link MTU changed, do network layer specific procedure.
2121 		 */
2122 		if (ifp->if_mtu != oldmtu) {
2123 #ifdef INET6
2124 			nd6_setmtu(ifp);
2125 #endif
2126 		}
2127 		break;
2128 	}
2129 
2130 	case SIOCADDMULTI:
2131 	case SIOCDELMULTI:
2132 		if (cmd == SIOCADDMULTI)
2133 			error = priv_check(td, PRIV_NET_ADDMULTI);
2134 		else
2135 			error = priv_check(td, PRIV_NET_DELMULTI);
2136 		if (error)
2137 			return (error);
2138 
2139 		/* Don't allow group membership on non-multicast interfaces. */
2140 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2141 			return (EOPNOTSUPP);
2142 
2143 		/* Don't let users screw up protocols' entries. */
2144 		if (ifr->ifr_addr.sa_family != AF_LINK)
2145 			return (EINVAL);
2146 
2147 		if (cmd == SIOCADDMULTI) {
2148 			struct ifmultiaddr *ifma;
2149 
2150 			/*
2151 			 * Userland is only permitted to join groups once
2152 			 * via the if_addmulti() KPI, because it cannot hold
2153 			 * struct ifmultiaddr * between calls. It may also
2154 			 * lose a race while we check if the membership
2155 			 * already exists.
2156 			 */
2157 			IF_ADDR_LOCK(ifp);
2158 			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2159 			IF_ADDR_UNLOCK(ifp);
2160 			if (ifma != NULL)
2161 				error = EADDRINUSE;
2162 			else
2163 				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2164 		} else {
2165 			error = if_delmulti(ifp, &ifr->ifr_addr);
2166 		}
2167 		if (error == 0)
2168 			getmicrotime(&ifp->if_lastchange);
2169 		break;
2170 
2171 	case SIOCSIFPHYADDR:
2172 	case SIOCDIFPHYADDR:
2173 #ifdef INET6
2174 	case SIOCSIFPHYADDR_IN6:
2175 #endif
2176 	case SIOCSLIFPHYADDR:
2177 	case SIOCSIFMEDIA:
2178 	case SIOCSIFGENERIC:
2179 		error = priv_check(td, PRIV_NET_HWIOCTL);
2180 		if (error)
2181 			return (error);
2182 		if (ifp->if_ioctl == NULL)
2183 			return (EOPNOTSUPP);
2184 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2185 		if (error == 0)
2186 			getmicrotime(&ifp->if_lastchange);
2187 		break;
2188 
2189 	case SIOCGIFSTATUS:
2190 		ifs = (struct ifstat *)data;
2191 		ifs->ascii[0] = '\0';
2192 
2193 	case SIOCGIFPSRCADDR:
2194 	case SIOCGIFPDSTADDR:
2195 	case SIOCGLIFPHYADDR:
2196 	case SIOCGIFMEDIA:
2197 	case SIOCGIFGENERIC:
2198 		if (ifp->if_ioctl == NULL)
2199 			return (EOPNOTSUPP);
2200 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2201 		break;
2202 
2203 	case SIOCSIFLLADDR:
2204 		error = priv_check(td, PRIV_NET_SETLLADDR);
2205 		if (error)
2206 			return (error);
2207 		error = if_setlladdr(ifp,
2208 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2209 		break;
2210 
2211 	case SIOCAIFGROUP:
2212 	{
2213 		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2214 
2215 		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2216 		if (error)
2217 			return (error);
2218 		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2219 			return (error);
2220 		break;
2221 	}
2222 
2223 	case SIOCGIFGROUP:
2224 		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
2225 			return (error);
2226 		break;
2227 
2228 	case SIOCDIFGROUP:
2229 	{
2230 		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2231 
2232 		error = priv_check(td, PRIV_NET_DELIFGROUP);
2233 		if (error)
2234 			return (error);
2235 		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2236 			return (error);
2237 		break;
2238 	}
2239 
2240 	default:
2241 		error = ENOIOCTL;
2242 		break;
2243 	}
2244 	return (error);
2245 }
2246 
2247 /*
2248  * Interface ioctls.
2249  */
2250 int
2251 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2252 {
2253 	struct ifnet *ifp;
2254 	struct ifreq *ifr;
2255 	int error;
2256 	int oif_flags;
2257 
2258 	switch (cmd) {
2259 	case SIOCGIFCONF:
2260 	case OSIOCGIFCONF:
2261 #ifdef __amd64__
2262 	case SIOCGIFCONF32:
2263 #endif
2264 		return (ifconf(cmd, data));
2265 	}
2266 	ifr = (struct ifreq *)data;
2267 
2268 	switch (cmd) {
2269 #ifdef VIMAGE
2270 	case SIOCSIFRVNET:
2271 		error = priv_check(td, PRIV_NET_SETIFVNET);
2272 		if (error)
2273 			return (error);
2274 		return (if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid));
2275 #endif
2276 	case SIOCIFCREATE:
2277 	case SIOCIFCREATE2:
2278 		error = priv_check(td, PRIV_NET_IFCREATE);
2279 		if (error)
2280 			return (error);
2281 		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
2282 			cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
2283 	case SIOCIFDESTROY:
2284 		error = priv_check(td, PRIV_NET_IFDESTROY);
2285 		if (error)
2286 			return (error);
2287 		return if_clone_destroy(ifr->ifr_name);
2288 
2289 	case SIOCIFGCLONERS:
2290 		return (if_clone_list((struct if_clonereq *)data));
2291 	case SIOCGIFGMEMB:
2292 		return (if_getgroupmembers((struct ifgroupreq *)data));
2293 	}
2294 
2295 	ifp = ifunit_ref(ifr->ifr_name);
2296 	if (ifp == NULL)
2297 		return (ENXIO);
2298 
2299 	error = ifhwioctl(cmd, ifp, data, td);
2300 	if (error != ENOIOCTL) {
2301 		if_rele(ifp);
2302 		return (error);
2303 	}
2304 
2305 	oif_flags = ifp->if_flags;
2306 	if (so->so_proto == NULL) {
2307 		if_rele(ifp);
2308 		return (EOPNOTSUPP);
2309 	}
2310 #ifndef COMPAT_43
2311 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
2312 								 data,
2313 								 ifp, td));
2314 	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
2315 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2316 #else
2317 	{
2318 		u_long ocmd = cmd;
2319 
2320 		switch (cmd) {
2321 
2322 		case SIOCSIFDSTADDR:
2323 		case SIOCSIFADDR:
2324 		case SIOCSIFBRDADDR:
2325 		case SIOCSIFNETMASK:
2326 #if BYTE_ORDER != BIG_ENDIAN
2327 			if (ifr->ifr_addr.sa_family == 0 &&
2328 			    ifr->ifr_addr.sa_len < 16) {
2329 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
2330 				ifr->ifr_addr.sa_len = 16;
2331 			}
2332 #else
2333 			if (ifr->ifr_addr.sa_len == 0)
2334 				ifr->ifr_addr.sa_len = 16;
2335 #endif
2336 			break;
2337 
2338 		case OSIOCGIFADDR:
2339 			cmd = SIOCGIFADDR;
2340 			break;
2341 
2342 		case OSIOCGIFDSTADDR:
2343 			cmd = SIOCGIFDSTADDR;
2344 			break;
2345 
2346 		case OSIOCGIFBRDADDR:
2347 			cmd = SIOCGIFBRDADDR;
2348 			break;
2349 
2350 		case OSIOCGIFNETMASK:
2351 			cmd = SIOCGIFNETMASK;
2352 		}
2353 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
2354 								   cmd,
2355 								   data,
2356 								   ifp, td));
2357 		if (error == EOPNOTSUPP && ifp != NULL &&
2358 		    ifp->if_ioctl != NULL)
2359 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2360 		switch (ocmd) {
2361 
2362 		case OSIOCGIFADDR:
2363 		case OSIOCGIFDSTADDR:
2364 		case OSIOCGIFBRDADDR:
2365 		case OSIOCGIFNETMASK:
2366 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
2367 
2368 		}
2369 	}
2370 #endif /* COMPAT_43 */
2371 
2372 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
2373 #ifdef INET6
2374 		if (ifp->if_flags & IFF_UP) {
2375 			int s = splimp();
2376 			in6_if_up(ifp);
2377 			splx(s);
2378 		}
2379 #endif
2380 	}
2381 	if_rele(ifp);
2382 	return (error);
2383 }
2384 
2385 /*
2386  * The code common to handling reference counted flags,
2387  * e.g., in ifpromisc() and if_allmulti().
2388  * The "pflag" argument can specify a permanent mode flag to check,
2389  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
2390  *
2391  * Only to be used on stack-owned flags, not driver-owned flags.
2392  */
2393 static int
2394 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
2395 {
2396 	struct ifreq ifr;
2397 	int error;
2398 	int oldflags, oldcount;
2399 
2400 	/* Sanity checks to catch programming errors */
2401 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
2402 	    ("%s: setting driver-owned flag %d", __func__, flag));
2403 
2404 	if (onswitch)
2405 		KASSERT(*refcount >= 0,
2406 		    ("%s: increment negative refcount %d for flag %d",
2407 		    __func__, *refcount, flag));
2408 	else
2409 		KASSERT(*refcount > 0,
2410 		    ("%s: decrement non-positive refcount %d for flag %d",
2411 		    __func__, *refcount, flag));
2412 
2413 	/* In case this mode is permanent, just touch refcount */
2414 	if (ifp->if_flags & pflag) {
2415 		*refcount += onswitch ? 1 : -1;
2416 		return (0);
2417 	}
2418 
2419 	/* Save ifnet parameters for if_ioctl() may fail */
2420 	oldcount = *refcount;
2421 	oldflags = ifp->if_flags;
2422 
2423 	/*
2424 	 * See if we aren't the only and touching refcount is enough.
2425 	 * Actually toggle interface flag if we are the first or last.
2426 	 */
2427 	if (onswitch) {
2428 		if ((*refcount)++)
2429 			return (0);
2430 		ifp->if_flags |= flag;
2431 	} else {
2432 		if (--(*refcount))
2433 			return (0);
2434 		ifp->if_flags &= ~flag;
2435 	}
2436 
2437 	/* Call down the driver since we've changed interface flags */
2438 	if (ifp->if_ioctl == NULL) {
2439 		error = EOPNOTSUPP;
2440 		goto recover;
2441 	}
2442 	ifr.ifr_flags = ifp->if_flags & 0xffff;
2443 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
2444 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2445 	if (error)
2446 		goto recover;
2447 	/* Notify userland that interface flags have changed */
2448 	rt_ifmsg(ifp);
2449 	return (0);
2450 
2451 recover:
2452 	/* Recover after driver error */
2453 	*refcount = oldcount;
2454 	ifp->if_flags = oldflags;
2455 	return (error);
2456 }
2457 
2458 /*
2459  * Set/clear promiscuous mode on interface ifp based on the truth value
2460  * of pswitch.  The calls are reference counted so that only the first
2461  * "on" request actually has an effect, as does the final "off" request.
2462  * Results are undefined if the "off" and "on" requests are not matched.
2463  */
2464 int
2465 ifpromisc(struct ifnet *ifp, int pswitch)
2466 {
2467 	int error;
2468 	int oldflags = ifp->if_flags;
2469 
2470 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
2471 			   &ifp->if_pcount, pswitch);
2472 	/* If promiscuous mode status has changed, log a message */
2473 	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
2474 		log(LOG_INFO, "%s: promiscuous mode %s\n",
2475 		    ifp->if_xname,
2476 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
2477 	return (error);
2478 }
2479 
2480 /*
2481  * Return interface configuration
2482  * of system.  List may be used
2483  * in later ioctl's (above) to get
2484  * other information.
2485  */
2486 /*ARGSUSED*/
2487 static int
2488 ifconf(u_long cmd, caddr_t data)
2489 {
2490 	struct ifconf *ifc = (struct ifconf *)data;
2491 #ifdef __amd64__
2492 	struct ifconf32 *ifc32 = (struct ifconf32 *)data;
2493 	struct ifconf ifc_swab;
2494 #endif
2495 	struct ifnet *ifp;
2496 	struct ifaddr *ifa;
2497 	struct ifreq ifr;
2498 	struct sbuf *sb;
2499 	int error, full = 0, valid_len, max_len;
2500 
2501 #ifdef __amd64__
2502 	if (cmd == SIOCGIFCONF32) {
2503 		ifc_swab.ifc_len = ifc32->ifc_len;
2504 		ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
2505 		ifc = &ifc_swab;
2506 	}
2507 #endif
2508 	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
2509 	max_len = MAXPHYS - 1;
2510 
2511 	/* Prevent hostile input from being able to crash the system */
2512 	if (ifc->ifc_len <= 0)
2513 		return (EINVAL);
2514 
2515 again:
2516 	if (ifc->ifc_len <= max_len) {
2517 		max_len = ifc->ifc_len;
2518 		full = 1;
2519 	}
2520 	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
2521 	max_len = 0;
2522 	valid_len = 0;
2523 
2524 	IFNET_RLOCK();		/* could sleep XXX */
2525 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2526 		int addrs;
2527 
2528 		/*
2529 		 * Zero the ifr_name buffer to make sure we don't
2530 		 * disclose the contents of the stack.
2531 		 */
2532 		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
2533 
2534 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
2535 		    >= sizeof(ifr.ifr_name)) {
2536 			sbuf_delete(sb);
2537 			IFNET_RUNLOCK();
2538 			return (ENAMETOOLONG);
2539 		}
2540 
2541 		addrs = 0;
2542 		IF_ADDR_LOCK(ifp);
2543 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2544 			struct sockaddr *sa = ifa->ifa_addr;
2545 
2546 			if (prison_if(curthread->td_ucred, sa) != 0)
2547 				continue;
2548 			addrs++;
2549 #ifdef COMPAT_43
2550 			if (cmd == OSIOCGIFCONF) {
2551 				struct osockaddr *osa =
2552 					 (struct osockaddr *)&ifr.ifr_addr;
2553 				ifr.ifr_addr = *sa;
2554 				osa->sa_family = sa->sa_family;
2555 				sbuf_bcat(sb, &ifr, sizeof(ifr));
2556 				max_len += sizeof(ifr);
2557 			} else
2558 #endif
2559 			if (sa->sa_len <= sizeof(*sa)) {
2560 				ifr.ifr_addr = *sa;
2561 				sbuf_bcat(sb, &ifr, sizeof(ifr));
2562 				max_len += sizeof(ifr);
2563 			} else {
2564 				sbuf_bcat(sb, &ifr,
2565 				    offsetof(struct ifreq, ifr_addr));
2566 				max_len += offsetof(struct ifreq, ifr_addr);
2567 				sbuf_bcat(sb, sa, sa->sa_len);
2568 				max_len += sa->sa_len;
2569 			}
2570 
2571 			if (!sbuf_overflowed(sb))
2572 				valid_len = sbuf_len(sb);
2573 		}
2574 		IF_ADDR_UNLOCK(ifp);
2575 		if (addrs == 0) {
2576 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2577 			sbuf_bcat(sb, &ifr, sizeof(ifr));
2578 			max_len += sizeof(ifr);
2579 
2580 			if (!sbuf_overflowed(sb))
2581 				valid_len = sbuf_len(sb);
2582 		}
2583 	}
2584 	IFNET_RUNLOCK();
2585 
2586 	/*
2587 	 * If we didn't allocate enough space (uncommon), try again.  If
2588 	 * we have already allocated as much space as we are allowed,
2589 	 * return what we've got.
2590 	 */
2591 	if (valid_len != max_len && !full) {
2592 		sbuf_delete(sb);
2593 		goto again;
2594 	}
2595 
2596 	ifc->ifc_len = valid_len;
2597 #ifdef __amd64__
2598 	if (cmd == SIOCGIFCONF32)
2599 		ifc32->ifc_len = valid_len;
2600 #endif
2601 	sbuf_finish(sb);
2602 	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
2603 	sbuf_delete(sb);
2604 	return (error);
2605 }
2606 
2607 /*
2608  * Just like ifpromisc(), but for all-multicast-reception mode.
2609  */
2610 int
2611 if_allmulti(struct ifnet *ifp, int onswitch)
2612 {
2613 
2614 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
2615 }
2616 
2617 struct ifmultiaddr *
2618 if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
2619 {
2620 	struct ifmultiaddr *ifma;
2621 
2622 	IF_ADDR_LOCK_ASSERT(ifp);
2623 
2624 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2625 		if (sa->sa_family == AF_LINK) {
2626 			if (sa_dl_equal(ifma->ifma_addr, sa))
2627 				break;
2628 		} else {
2629 			if (sa_equal(ifma->ifma_addr, sa))
2630 				break;
2631 		}
2632 	}
2633 
2634 	return ifma;
2635 }
2636 
2637 /*
2638  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
2639  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
2640  * the ifnet multicast address list here, so the caller must do that and
2641  * other setup work (such as notifying the device driver).  The reference
2642  * count is initialized to 1.
2643  */
2644 static struct ifmultiaddr *
2645 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
2646     int mflags)
2647 {
2648 	struct ifmultiaddr *ifma;
2649 	struct sockaddr *dupsa;
2650 
2651 	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
2652 	    M_ZERO);
2653 	if (ifma == NULL)
2654 		return (NULL);
2655 
2656 	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
2657 	if (dupsa == NULL) {
2658 		free(ifma, M_IFMADDR);
2659 		return (NULL);
2660 	}
2661 	bcopy(sa, dupsa, sa->sa_len);
2662 	ifma->ifma_addr = dupsa;
2663 
2664 	ifma->ifma_ifp = ifp;
2665 	ifma->ifma_refcount = 1;
2666 	ifma->ifma_protospec = NULL;
2667 
2668 	if (llsa == NULL) {
2669 		ifma->ifma_lladdr = NULL;
2670 		return (ifma);
2671 	}
2672 
2673 	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
2674 	if (dupsa == NULL) {
2675 		free(ifma->ifma_addr, M_IFMADDR);
2676 		free(ifma, M_IFMADDR);
2677 		return (NULL);
2678 	}
2679 	bcopy(llsa, dupsa, llsa->sa_len);
2680 	ifma->ifma_lladdr = dupsa;
2681 
2682 	return (ifma);
2683 }
2684 
2685 /*
2686  * if_freemulti: free ifmultiaddr structure and possibly attached related
2687  * addresses.  The caller is responsible for implementing reference
2688  * counting, notifying the driver, handling routing messages, and releasing
2689  * any dependent link layer state.
2690  */
2691 static void
2692 if_freemulti(struct ifmultiaddr *ifma)
2693 {
2694 
2695 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
2696 	    ifma->ifma_refcount));
2697 	KASSERT(ifma->ifma_protospec == NULL,
2698 	    ("if_freemulti: protospec not NULL"));
2699 
2700 	if (ifma->ifma_lladdr != NULL)
2701 		free(ifma->ifma_lladdr, M_IFMADDR);
2702 	free(ifma->ifma_addr, M_IFMADDR);
2703 	free(ifma, M_IFMADDR);
2704 }
2705 
2706 /*
2707  * Register an additional multicast address with a network interface.
2708  *
2709  * - If the address is already present, bump the reference count on the
2710  *   address and return.
2711  * - If the address is not link-layer, look up a link layer address.
2712  * - Allocate address structures for one or both addresses, and attach to the
2713  *   multicast address list on the interface.  If automatically adding a link
2714  *   layer address, the protocol address will own a reference to the link
2715  *   layer address, to be freed when it is freed.
2716  * - Notify the network device driver of an addition to the multicast address
2717  *   list.
2718  *
2719  * 'sa' points to caller-owned memory with the desired multicast address.
2720  *
2721  * 'retifma' will be used to return a pointer to the resulting multicast
2722  * address reference, if desired.
2723  */
2724 int
2725 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
2726     struct ifmultiaddr **retifma)
2727 {
2728 	struct ifmultiaddr *ifma, *ll_ifma;
2729 	struct sockaddr *llsa;
2730 	int error;
2731 
2732 	/*
2733 	 * If the address is already present, return a new reference to it;
2734 	 * otherwise, allocate storage and set up a new address.
2735 	 */
2736 	IF_ADDR_LOCK(ifp);
2737 	ifma = if_findmulti(ifp, sa);
2738 	if (ifma != NULL) {
2739 		ifma->ifma_refcount++;
2740 		if (retifma != NULL)
2741 			*retifma = ifma;
2742 		IF_ADDR_UNLOCK(ifp);
2743 		return (0);
2744 	}
2745 
2746 	/*
2747 	 * The address isn't already present; resolve the protocol address
2748 	 * into a link layer address, and then look that up, bump its
2749 	 * refcount or allocate an ifma for that also.  If 'llsa' was
2750 	 * returned, we will need to free it later.
2751 	 */
2752 	llsa = NULL;
2753 	ll_ifma = NULL;
2754 	if (ifp->if_resolvemulti != NULL) {
2755 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2756 		if (error)
2757 			goto unlock_out;
2758 	}
2759 
2760 	/*
2761 	 * Allocate the new address.  Don't hook it up yet, as we may also
2762 	 * need to allocate a link layer multicast address.
2763 	 */
2764 	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
2765 	if (ifma == NULL) {
2766 		error = ENOMEM;
2767 		goto free_llsa_out;
2768 	}
2769 
2770 	/*
2771 	 * If a link layer address is found, we'll need to see if it's
2772 	 * already present in the address list, or allocate is as well.
2773 	 * When this block finishes, the link layer address will be on the
2774 	 * list.
2775 	 */
2776 	if (llsa != NULL) {
2777 		ll_ifma = if_findmulti(ifp, llsa);
2778 		if (ll_ifma == NULL) {
2779 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
2780 			if (ll_ifma == NULL) {
2781 				--ifma->ifma_refcount;
2782 				if_freemulti(ifma);
2783 				error = ENOMEM;
2784 				goto free_llsa_out;
2785 			}
2786 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
2787 			    ifma_link);
2788 		} else
2789 			ll_ifma->ifma_refcount++;
2790 		ifma->ifma_llifma = ll_ifma;
2791 	}
2792 
2793 	/*
2794 	 * We now have a new multicast address, ifma, and possibly a new or
2795 	 * referenced link layer address.  Add the primary address to the
2796 	 * ifnet address list.
2797 	 */
2798 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2799 
2800 	if (retifma != NULL)
2801 		*retifma = ifma;
2802 
2803 	/*
2804 	 * Must generate the message while holding the lock so that 'ifma'
2805 	 * pointer is still valid.
2806 	 */
2807 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2808 	IF_ADDR_UNLOCK(ifp);
2809 
2810 	/*
2811 	 * We are certain we have added something, so call down to the
2812 	 * interface to let them know about it.
2813 	 */
2814 	if (ifp->if_ioctl != NULL) {
2815 		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
2816 	}
2817 
2818 	if (llsa != NULL)
2819 		free(llsa, M_IFMADDR);
2820 
2821 	return (0);
2822 
2823 free_llsa_out:
2824 	if (llsa != NULL)
2825 		free(llsa, M_IFMADDR);
2826 
2827 unlock_out:
2828 	IF_ADDR_UNLOCK(ifp);
2829 	return (error);
2830 }
2831 
2832 /*
2833  * Delete a multicast group membership by network-layer group address.
2834  *
2835  * Returns ENOENT if the entry could not be found. If ifp no longer
2836  * exists, results are undefined. This entry point should only be used
2837  * from subsystems which do appropriate locking to hold ifp for the
2838  * duration of the call.
2839  * Network-layer protocol domains must use if_delmulti_ifma().
2840  */
2841 int
2842 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2843 {
2844 	struct ifmultiaddr *ifma;
2845 	int lastref;
2846 #ifdef INVARIANTS
2847 	struct ifnet *oifp;
2848 
2849 	IFNET_RLOCK();
2850 	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
2851 		if (ifp == oifp)
2852 			break;
2853 	if (ifp != oifp)
2854 		ifp = NULL;
2855 	IFNET_RUNLOCK();
2856 
2857 	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
2858 #endif
2859 	if (ifp == NULL)
2860 		return (ENOENT);
2861 
2862 	IF_ADDR_LOCK(ifp);
2863 	lastref = 0;
2864 	ifma = if_findmulti(ifp, sa);
2865 	if (ifma != NULL)
2866 		lastref = if_delmulti_locked(ifp, ifma, 0);
2867 	IF_ADDR_UNLOCK(ifp);
2868 
2869 	if (ifma == NULL)
2870 		return (ENOENT);
2871 
2872 	if (lastref && ifp->if_ioctl != NULL) {
2873 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
2874 	}
2875 
2876 	return (0);
2877 }
2878 
2879 /*
2880  * Delete a multicast group membership by group membership pointer.
2881  * Network-layer protocol domains must use this routine.
2882  *
2883  * It is safe to call this routine if the ifp disappeared.
2884  */
2885 void
2886 if_delmulti_ifma(struct ifmultiaddr *ifma)
2887 {
2888 	struct ifnet *ifp;
2889 	int lastref;
2890 
2891 	ifp = ifma->ifma_ifp;
2892 #ifdef DIAGNOSTIC
2893 	if (ifp == NULL) {
2894 		printf("%s: ifma_ifp seems to be detached\n", __func__);
2895 	} else {
2896 		struct ifnet *oifp;
2897 
2898 		IFNET_RLOCK();
2899 		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
2900 			if (ifp == oifp)
2901 				break;
2902 		if (ifp != oifp) {
2903 			printf("%s: ifnet %p disappeared\n", __func__, ifp);
2904 			ifp = NULL;
2905 		}
2906 		IFNET_RUNLOCK();
2907 	}
2908 #endif
2909 	/*
2910 	 * If and only if the ifnet instance exists: Acquire the address lock.
2911 	 */
2912 	if (ifp != NULL)
2913 		IF_ADDR_LOCK(ifp);
2914 
2915 	lastref = if_delmulti_locked(ifp, ifma, 0);
2916 
2917 	if (ifp != NULL) {
2918 		/*
2919 		 * If and only if the ifnet instance exists:
2920 		 *  Release the address lock.
2921 		 *  If the group was left: update the hardware hash filter.
2922 		 */
2923 		IF_ADDR_UNLOCK(ifp);
2924 		if (lastref && ifp->if_ioctl != NULL) {
2925 			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
2926 		}
2927 	}
2928 }
2929 
2930 /*
2931  * Perform deletion of network-layer and/or link-layer multicast address.
2932  *
2933  * Return 0 if the reference count was decremented.
2934  * Return 1 if the final reference was released, indicating that the
2935  * hardware hash filter should be reprogrammed.
2936  */
2937 static int
2938 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
2939 {
2940 	struct ifmultiaddr *ll_ifma;
2941 
2942 	if (ifp != NULL && ifma->ifma_ifp != NULL) {
2943 		KASSERT(ifma->ifma_ifp == ifp,
2944 		    ("%s: inconsistent ifp %p", __func__, ifp));
2945 		IF_ADDR_LOCK_ASSERT(ifp);
2946 	}
2947 
2948 	ifp = ifma->ifma_ifp;
2949 
2950 	/*
2951 	 * If the ifnet is detaching, null out references to ifnet,
2952 	 * so that upper protocol layers will notice, and not attempt
2953 	 * to obtain locks for an ifnet which no longer exists. The
2954 	 * routing socket announcement must happen before the ifnet
2955 	 * instance is detached from the system.
2956 	 */
2957 	if (detaching) {
2958 #ifdef DIAGNOSTIC
2959 		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
2960 #endif
2961 		/*
2962 		 * ifp may already be nulled out if we are being reentered
2963 		 * to delete the ll_ifma.
2964 		 */
2965 		if (ifp != NULL) {
2966 			rt_newmaddrmsg(RTM_DELMADDR, ifma);
2967 			ifma->ifma_ifp = NULL;
2968 		}
2969 	}
2970 
2971 	if (--ifma->ifma_refcount > 0)
2972 		return 0;
2973 
2974 	/*
2975 	 * If this ifma is a network-layer ifma, a link-layer ifma may
2976 	 * have been associated with it. Release it first if so.
2977 	 */
2978 	ll_ifma = ifma->ifma_llifma;
2979 	if (ll_ifma != NULL) {
2980 		KASSERT(ifma->ifma_lladdr != NULL,
2981 		    ("%s: llifma w/o lladdr", __func__));
2982 		if (detaching)
2983 			ll_ifma->ifma_ifp = NULL;	/* XXX */
2984 		if (--ll_ifma->ifma_refcount == 0) {
2985 			if (ifp != NULL) {
2986 				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
2987 				    ifma_link);
2988 			}
2989 			if_freemulti(ll_ifma);
2990 		}
2991 	}
2992 
2993 	if (ifp != NULL)
2994 		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2995 
2996 	if_freemulti(ifma);
2997 
2998 	/*
2999 	 * The last reference to this instance of struct ifmultiaddr
3000 	 * was released; the hardware should be notified of this change.
3001 	 */
3002 	return 1;
3003 }
3004 
3005 /*
3006  * Set the link layer address on an interface.
3007  *
3008  * At this time we only support certain types of interfaces,
3009  * and we don't allow the length of the address to change.
3010  */
3011 int
3012 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3013 {
3014 	struct sockaddr_dl *sdl;
3015 	struct ifaddr *ifa;
3016 	struct ifreq ifr;
3017 
3018 	IF_ADDR_LOCK(ifp);
3019 	ifa = ifp->if_addr;
3020 	if (ifa == NULL) {
3021 		IF_ADDR_UNLOCK(ifp);
3022 		return (EINVAL);
3023 	}
3024 	ifa_ref(ifa);
3025 	IF_ADDR_UNLOCK(ifp);
3026 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3027 	if (sdl == NULL) {
3028 		ifa_free(ifa);
3029 		return (EINVAL);
3030 	}
3031 	if (len != sdl->sdl_alen) {	/* don't allow length to change */
3032 		ifa_free(ifa);
3033 		return (EINVAL);
3034 	}
3035 	switch (ifp->if_type) {
3036 	case IFT_ETHER:
3037 	case IFT_FDDI:
3038 	case IFT_XETHER:
3039 	case IFT_ISO88025:
3040 	case IFT_L2VLAN:
3041 	case IFT_BRIDGE:
3042 	case IFT_ARCNET:
3043 	case IFT_IEEE8023ADLAG:
3044 	case IFT_IEEE80211:
3045 		bcopy(lladdr, LLADDR(sdl), len);
3046 		ifa_free(ifa);
3047 		break;
3048 	default:
3049 		ifa_free(ifa);
3050 		return (ENODEV);
3051 	}
3052 
3053 	/*
3054 	 * If the interface is already up, we need
3055 	 * to re-init it in order to reprogram its
3056 	 * address filter.
3057 	 */
3058 	if ((ifp->if_flags & IFF_UP) != 0) {
3059 		if (ifp->if_ioctl) {
3060 			ifp->if_flags &= ~IFF_UP;
3061 			ifr.ifr_flags = ifp->if_flags & 0xffff;
3062 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3063 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3064 			ifp->if_flags |= IFF_UP;
3065 			ifr.ifr_flags = ifp->if_flags & 0xffff;
3066 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3067 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3068 		}
3069 #ifdef INET
3070 		/*
3071 		 * Also send gratuitous ARPs to notify other nodes about
3072 		 * the address change.
3073 		 */
3074 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3075 			if (ifa->ifa_addr->sa_family == AF_INET)
3076 				arp_ifinit(ifp, ifa);
3077 		}
3078 #endif
3079 	}
3080 	return (0);
3081 }
3082 
3083 /*
3084  * The name argument must be a pointer to storage which will last as
3085  * long as the interface does.  For physical devices, the result of
3086  * device_get_name(dev) is a good choice and for pseudo-devices a
3087  * static string works well.
3088  */
3089 void
3090 if_initname(struct ifnet *ifp, const char *name, int unit)
3091 {
3092 	ifp->if_dname = name;
3093 	ifp->if_dunit = unit;
3094 	if (unit != IF_DUNIT_NONE)
3095 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3096 	else
3097 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3098 }
3099 
3100 int
3101 if_printf(struct ifnet *ifp, const char * fmt, ...)
3102 {
3103 	va_list ap;
3104 	int retval;
3105 
3106 	retval = printf("%s: ", ifp->if_xname);
3107 	va_start(ap, fmt);
3108 	retval += vprintf(fmt, ap);
3109 	va_end(ap);
3110 	return (retval);
3111 }
3112 
3113 void
3114 if_start(struct ifnet *ifp)
3115 {
3116 
3117 	(*(ifp)->if_start)(ifp);
3118 }
3119 
3120 /*
3121  * Backwards compatibility interface for drivers
3122  * that have not implemented it
3123  */
3124 static int
3125 if_transmit(struct ifnet *ifp, struct mbuf *m)
3126 {
3127 	int error;
3128 
3129 	IFQ_HANDOFF(ifp, m, error);
3130 	return (error);
3131 }
3132 
3133 int
3134 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3135 {
3136 	int active = 0;
3137 
3138 	IF_LOCK(ifq);
3139 	if (_IF_QFULL(ifq)) {
3140 		_IF_DROP(ifq);
3141 		IF_UNLOCK(ifq);
3142 		m_freem(m);
3143 		return (0);
3144 	}
3145 	if (ifp != NULL) {
3146 		ifp->if_obytes += m->m_pkthdr.len + adjust;
3147 		if (m->m_flags & (M_BCAST|M_MCAST))
3148 			ifp->if_omcasts++;
3149 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3150 	}
3151 	_IF_ENQUEUE(ifq, m);
3152 	IF_UNLOCK(ifq);
3153 	if (ifp != NULL && !active)
3154 		(*(ifp)->if_start)(ifp);
3155 	return (1);
3156 }
3157 
3158 void
3159 if_register_com_alloc(u_char type,
3160     if_com_alloc_t *a, if_com_free_t *f)
3161 {
3162 
3163 	KASSERT(if_com_alloc[type] == NULL,
3164 	    ("if_register_com_alloc: %d already registered", type));
3165 	KASSERT(if_com_free[type] == NULL,
3166 	    ("if_register_com_alloc: %d free already registered", type));
3167 
3168 	if_com_alloc[type] = a;
3169 	if_com_free[type] = f;
3170 }
3171 
3172 void
3173 if_deregister_com_alloc(u_char type)
3174 {
3175 
3176 	KASSERT(if_com_alloc[type] != NULL,
3177 	    ("if_deregister_com_alloc: %d not registered", type));
3178 	KASSERT(if_com_free[type] != NULL,
3179 	    ("if_deregister_com_alloc: %d free not registered", type));
3180 	if_com_alloc[type] = NULL;
3181 	if_com_free[type] = NULL;
3182 }
3183