xref: /freebsd/sys/net/if.c (revision a4eb85b6acb49cb60c72c2cab0d0d3f00eaa6d46)
1 /*-
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)if.c	8.5 (Berkeley) 1/9/95
30  * $FreeBSD$
31  */
32 
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36 #include "opt_mac.h"
37 #include "opt_carp.h"
38 
39 #include <sys/param.h>
40 #include <sys/types.h>
41 #include <sys/conf.h>
42 #include <sys/mac.h>
43 #include <sys/malloc.h>
44 #include <sys/sbuf.h>
45 #include <sys/bus.h>
46 #include <sys/mbuf.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/protosw.h>
52 #include <sys/kernel.h>
53 #include <sys/sockio.h>
54 #include <sys/syslog.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/domain.h>
58 #include <sys/jail.h>
59 #include <machine/stdarg.h>
60 
61 #include <net/if.h>
62 #include <net/if_clone.h>
63 #include <net/if_dl.h>
64 #include <net/if_types.h>
65 #include <net/if_var.h>
66 #include <net/radix.h>
67 #include <net/route.h>
68 
69 #if defined(INET) || defined(INET6)
70 /*XXX*/
71 #include <netinet/in.h>
72 #include <netinet/in_var.h>
73 #ifdef INET6
74 #include <netinet6/in6_var.h>
75 #include <netinet6/in6_ifattach.h>
76 #endif
77 #endif
78 #ifdef INET
79 #include <netinet/if_ether.h>
80 #endif
81 #ifdef DEV_CARP
82 #include <netinet/ip_carp.h>
83 #endif
84 
85 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
86 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
87 
88 /* Log link state change events */
89 static int log_link_state_change = 1;
90 
91 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
92 	&log_link_state_change, 0,
93 	"log interface link state change events");
94 
95 void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
96 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
97 
98 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
99 
100 static void	if_attachdomain(void *);
101 static void	if_attachdomain1(struct ifnet *);
102 static int	ifconf(u_long, caddr_t);
103 static void	if_grow(void);
104 static void	if_init(void *);
105 static void	if_check(void *);
106 static void	if_qflush(struct ifaltq *);
107 static void	if_route(struct ifnet *, int flag, int fam);
108 static int	if_setflag(struct ifnet *, int, int, int *, int);
109 static void	if_slowtimo(void *);
110 static void	if_unroute(struct ifnet *, int flag, int fam);
111 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
112 static int	if_rtdel(struct radix_node *, void *);
113 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
114 static void	if_start_deferred(void *context, int pending);
115 static void	do_link_state_change(void *, int);
116 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
117 static int	if_getgroupmembers(struct ifgroupreq *);
118 #ifdef INET6
119 /*
120  * XXX: declare here to avoid to include many inet6 related files..
121  * should be more generalized?
122  */
123 extern void	nd6_setmtu(struct ifnet *);
124 #endif
125 
126 int	if_index = 0;
127 struct	ifindex_entry *ifindex_table = NULL;
128 int	ifqmaxlen = IFQ_MAXLEN;
129 struct	ifnethead ifnet;	/* depend on static init XXX */
130 struct	ifgrouphead ifg_head;
131 struct	mtx ifnet_lock;
132 static	if_com_alloc_t *if_com_alloc[256];
133 static	if_com_free_t *if_com_free[256];
134 
135 static int	if_indexlim = 8;
136 static struct	knlist ifklist;
137 
138 static void	filt_netdetach(struct knote *kn);
139 static int	filt_netdev(struct knote *kn, long hint);
140 
141 static struct filterops netdev_filtops =
142     { 1, NULL, filt_netdetach, filt_netdev };
143 
144 /*
145  * System initialization
146  */
147 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
148 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
149 
150 MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
151 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
152 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
153 
154 static d_open_t		netopen;
155 static d_close_t	netclose;
156 static d_ioctl_t	netioctl;
157 static d_kqfilter_t	netkqfilter;
158 
159 static struct cdevsw net_cdevsw = {
160 	.d_version =	D_VERSION,
161 	.d_flags =	D_NEEDGIANT,
162 	.d_open =	netopen,
163 	.d_close =	netclose,
164 	.d_ioctl =	netioctl,
165 	.d_name =	"net",
166 	.d_kqfilter =	netkqfilter,
167 };
168 
169 static int
170 netopen(struct cdev *dev, int flag, int mode, struct thread *td)
171 {
172 	return (0);
173 }
174 
175 static int
176 netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
177 {
178 	return (0);
179 }
180 
181 static int
182 netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
183 {
184 	struct ifnet *ifp;
185 	int error, idx;
186 
187 	/* only support interface specific ioctls */
188 	if (IOCGROUP(cmd) != 'i')
189 		return (EOPNOTSUPP);
190 	idx = minor(dev);
191 	if (idx == 0) {
192 		/*
193 		 * special network device, not interface.
194 		 */
195 		if (cmd == SIOCGIFCONF)
196 			return (ifconf(cmd, data));	/* XXX remove cmd */
197 #ifdef __amd64__
198 		if (cmd == SIOCGIFCONF32)
199 			return (ifconf(cmd, data));	/* XXX remove cmd */
200 #endif
201 		return (EOPNOTSUPP);
202 	}
203 
204 	ifp = ifnet_byindex(idx);
205 	if (ifp == NULL)
206 		return (ENXIO);
207 
208 	error = ifhwioctl(cmd, ifp, data, td);
209 	if (error == ENOIOCTL)
210 		error = EOPNOTSUPP;
211 	return (error);
212 }
213 
214 static int
215 netkqfilter(struct cdev *dev, struct knote *kn)
216 {
217 	struct knlist *klist;
218 	struct ifnet *ifp;
219 	int idx;
220 
221 	switch (kn->kn_filter) {
222 	case EVFILT_NETDEV:
223 		kn->kn_fop = &netdev_filtops;
224 		break;
225 	default:
226 		return (EINVAL);
227 	}
228 
229 	idx = minor(dev);
230 	if (idx == 0) {
231 		klist = &ifklist;
232 	} else {
233 		ifp = ifnet_byindex(idx);
234 		if (ifp == NULL)
235 			return (1);
236 		klist = &ifp->if_klist;
237 	}
238 
239 	kn->kn_hook = (caddr_t)klist;
240 
241 	knlist_add(klist, kn, 0);
242 
243 	return (0);
244 }
245 
246 static void
247 filt_netdetach(struct knote *kn)
248 {
249 	struct knlist *klist = (struct knlist *)kn->kn_hook;
250 
251 	knlist_remove(klist, kn, 0);
252 }
253 
254 static int
255 filt_netdev(struct knote *kn, long hint)
256 {
257 	struct knlist *klist = (struct knlist *)kn->kn_hook;
258 
259 	/*
260 	 * Currently NOTE_EXIT is abused to indicate device detach.
261 	 */
262 	if (hint == NOTE_EXIT) {
263 		kn->kn_data = NOTE_LINKINV;
264 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
265 		knlist_remove_inevent(klist, kn);
266 		return (1);
267 	}
268 	if (hint != 0)
269 		kn->kn_data = hint;			/* current status */
270 	if (kn->kn_sfflags & hint)
271 		kn->kn_fflags |= hint;
272 	return (kn->kn_fflags != 0);
273 }
274 
275 /*
276  * Network interface utility routines.
277  *
278  * Routines with ifa_ifwith* names take sockaddr *'s as
279  * parameters.
280  */
281 /* ARGSUSED*/
282 static void
283 if_init(void *dummy __unused)
284 {
285 
286 	IFNET_LOCK_INIT();
287 	TAILQ_INIT(&ifnet);
288 	TAILQ_INIT(&ifg_head);
289 	knlist_init(&ifklist, NULL, NULL, NULL, NULL);
290 	if_grow();				/* create initial table */
291 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
292 	    UID_ROOT, GID_WHEEL, 0600, "network");
293 	if_clone_init();
294 }
295 
296 static void
297 if_grow(void)
298 {
299 	u_int n;
300 	struct ifindex_entry *e;
301 
302 	if_indexlim <<= 1;
303 	n = if_indexlim * sizeof(*e);
304 	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
305 	if (ifindex_table != NULL) {
306 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
307 		free((caddr_t)ifindex_table, M_IFNET);
308 	}
309 	ifindex_table = e;
310 }
311 
312 /* ARGSUSED*/
313 static void
314 if_check(void *dummy __unused)
315 {
316 	struct ifnet *ifp;
317 	int s;
318 
319 	s = splimp();
320 	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
321 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
322 		if (ifp->if_snd.ifq_maxlen == 0) {
323 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
324 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
325 		}
326 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
327 			if_printf(ifp,
328 			    "XXX: driver didn't initialize queue mtx\n");
329 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
330 			    MTX_NETWORK_LOCK, MTX_DEF);
331 		}
332 	}
333 	IFNET_RUNLOCK();
334 	splx(s);
335 	if_slowtimo(0);
336 }
337 
338 /*
339  * Allocate a struct ifnet and in index for an interface.
340  */
341 struct ifnet*
342 if_alloc(u_char type)
343 {
344 	struct ifnet *ifp;
345 
346 	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
347 
348 	/*
349 	 * Try to find an empty slot below if_index.  If we fail, take
350 	 * the next slot.
351 	 *
352 	 * XXX: should be locked!
353 	 */
354 	for (ifp->if_index = 1; ifp->if_index <= if_index; ifp->if_index++) {
355 		if (ifnet_byindex(ifp->if_index) == NULL)
356 			break;
357 	}
358 	/* Catch if_index overflow. */
359 	if (ifp->if_index < 1) {
360 		free(ifp, M_IFNET);
361 		return (NULL);
362 	}
363 	if (ifp->if_index > if_index)
364 		if_index = ifp->if_index;
365 	if (if_index >= if_indexlim)
366 		if_grow();
367 	ifnet_byindex(ifp->if_index) = ifp;
368 
369 	ifp->if_type = type;
370 
371 	if (if_com_alloc[type] != NULL) {
372 		ifp->if_l2com = if_com_alloc[type](type, ifp);
373 		if (ifp->if_l2com == NULL) {
374 			free(ifp, M_IFNET);
375 			return (NULL);
376 		}
377 	}
378 	IF_ADDR_LOCK_INIT(ifp);
379 
380 	return (ifp);
381 }
382 
383 void
384 if_free(struct ifnet *ifp)
385 {
386 
387 	/* Do not add code to this function!  Add it to if_free_type(). */
388 	if_free_type(ifp, ifp->if_type);
389 }
390 
391 void
392 if_free_type(struct ifnet *ifp, u_char type)
393 {
394 
395 	if (ifp != ifnet_byindex(ifp->if_index)) {
396 		if_printf(ifp, "%s: value was not if_alloced, skipping\n",
397 		    __func__);
398 		return;
399 	}
400 
401 	IF_ADDR_LOCK_DESTROY(ifp);
402 
403 	ifnet_byindex(ifp->if_index) = NULL;
404 
405 	/* XXX: should be locked with if_findindex() */
406 	while (if_index > 0 && ifnet_byindex(if_index) == NULL)
407 		if_index--;
408 
409 	if (if_com_free[type] != NULL)
410 		if_com_free[type](ifp->if_l2com, type);
411 
412 	free(ifp, M_IFNET);
413 };
414 
415 /*
416  * Attach an interface to the
417  * list of "active" interfaces.
418  */
419 void
420 if_attach(struct ifnet *ifp)
421 {
422 	unsigned socksize, ifasize;
423 	int namelen, masklen;
424 	struct sockaddr_dl *sdl;
425 	struct ifaddr *ifa;
426 
427 	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
428 		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
429 		    ifp->if_xname);
430 
431 	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
432 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
433 	IF_AFDATA_LOCK_INIT(ifp);
434 	ifp->if_afdata_initialized = 0;
435 	/*
436 	 * XXX -
437 	 * The old code would work if the interface passed a pre-existing
438 	 * chain of ifaddrs to this code.  We don't trust our callers to
439 	 * properly initialize the tailq, however, so we no longer allow
440 	 * this unlikely case.
441 	 */
442 	TAILQ_INIT(&ifp->if_addrhead);
443 	TAILQ_INIT(&ifp->if_prefixhead);
444 	TAILQ_INIT(&ifp->if_multiaddrs);
445 	TAILQ_INIT(&ifp->if_groups);
446 
447 	if_addgroup(ifp, IFG_ALL);
448 
449 	knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
450 	getmicrotime(&ifp->if_lastchange);
451 	ifp->if_data.ifi_epoch = time_uptime;
452 	ifp->if_data.ifi_datalen = sizeof(struct if_data);
453 
454 #ifdef MAC
455 	mac_init_ifnet(ifp);
456 	mac_create_ifnet(ifp);
457 #endif
458 
459 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
460 	    unit2minor(ifp->if_index),
461 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
462 	    net_cdevsw.d_name, ifp->if_xname);
463 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
464 	    net_cdevsw.d_name, ifp->if_index);
465 
466 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
467 
468 	/*
469 	 * create a Link Level name for this device
470 	 */
471 	namelen = strlen(ifp->if_xname);
472 	/*
473 	 * Always save enough space for any possiable name so we can do
474 	 * a rename in place later.
475 	 */
476 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
477 	socksize = masklen + ifp->if_addrlen;
478 	if (socksize < sizeof(*sdl))
479 		socksize = sizeof(*sdl);
480 	socksize = roundup2(socksize, sizeof(long));
481 	ifasize = sizeof(*ifa) + 2 * socksize;
482 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
483 	IFA_LOCK_INIT(ifa);
484 	sdl = (struct sockaddr_dl *)(ifa + 1);
485 	sdl->sdl_len = socksize;
486 	sdl->sdl_family = AF_LINK;
487 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
488 	sdl->sdl_nlen = namelen;
489 	sdl->sdl_index = ifp->if_index;
490 	sdl->sdl_type = ifp->if_type;
491 	ifp->if_addr = ifa;
492 	ifa->ifa_ifp = ifp;
493 	ifa->ifa_rtrequest = link_rtrequest;
494 	ifa->ifa_addr = (struct sockaddr *)sdl;
495 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
496 	ifa->ifa_netmask = (struct sockaddr *)sdl;
497 	sdl->sdl_len = masklen;
498 	while (namelen != 0)
499 		sdl->sdl_data[--namelen] = 0xff;
500 	ifa->ifa_refcnt = 1;
501 	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
502 	ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
503 	ifp->if_snd.altq_type = 0;
504 	ifp->if_snd.altq_disc = NULL;
505 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
506 	ifp->if_snd.altq_tbr  = NULL;
507 	ifp->if_snd.altq_ifp  = ifp;
508 
509 	IFNET_WLOCK();
510 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
511 	IFNET_WUNLOCK();
512 
513 	if (domain_init_status >= 2)
514 		if_attachdomain1(ifp);
515 
516 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
517 	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
518 
519 	/* Announce the interface. */
520 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
521 }
522 
523 static void
524 if_attachdomain(void *dummy)
525 {
526 	struct ifnet *ifp;
527 	int s;
528 
529 	s = splnet();
530 	TAILQ_FOREACH(ifp, &ifnet, if_link)
531 		if_attachdomain1(ifp);
532 	splx(s);
533 }
534 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
535     if_attachdomain, NULL);
536 
537 static void
538 if_attachdomain1(struct ifnet *ifp)
539 {
540 	struct domain *dp;
541 	int s;
542 
543 	s = splnet();
544 
545 	/*
546 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
547 	 * cannot lock ifp->if_afdata initialization, entirely.
548 	 */
549 	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
550 		splx(s);
551 		return;
552 	}
553 	if (ifp->if_afdata_initialized >= domain_init_status) {
554 		IF_AFDATA_UNLOCK(ifp);
555 		splx(s);
556 		printf("if_attachdomain called more than once on %s\n",
557 		    ifp->if_xname);
558 		return;
559 	}
560 	ifp->if_afdata_initialized = domain_init_status;
561 	IF_AFDATA_UNLOCK(ifp);
562 
563 	/* address family dependent data region */
564 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
565 	for (dp = domains; dp; dp = dp->dom_next) {
566 		if (dp->dom_ifattach)
567 			ifp->if_afdata[dp->dom_family] =
568 			    (*dp->dom_ifattach)(ifp);
569 	}
570 
571 	splx(s);
572 }
573 
574 /*
575  * Remove any network addresses from an interface.
576  */
577 
578 void
579 if_purgeaddrs(struct ifnet *ifp)
580 {
581 	struct ifaddr *ifa, *next;
582 
583 	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
584 		if (ifa->ifa_addr->sa_family == AF_LINK)
585 			continue;
586 #ifdef INET
587 		/* XXX: Ugly!! ad hoc just for INET */
588 		if (ifa->ifa_addr->sa_family == AF_INET) {
589 			struct ifaliasreq ifr;
590 
591 			bzero(&ifr, sizeof(ifr));
592 			ifr.ifra_addr = *ifa->ifa_addr;
593 			if (ifa->ifa_dstaddr)
594 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
595 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
596 			    NULL) == 0)
597 				continue;
598 		}
599 #endif /* INET */
600 #ifdef INET6
601 		if (ifa->ifa_addr->sa_family == AF_INET6) {
602 			in6_purgeaddr(ifa);
603 			/* ifp_addrhead is already updated */
604 			continue;
605 		}
606 #endif /* INET6 */
607 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
608 		IFAFREE(ifa);
609 	}
610 }
611 
612 /*
613  * Detach an interface, removing it from the
614  * list of "active" interfaces.
615  *
616  * XXXRW: There are some significant questions about event ordering, and
617  * how to prevent things from starting to use the interface during detach.
618  */
619 void
620 if_detach(struct ifnet *ifp)
621 {
622 	struct ifaddr *ifa;
623 	struct radix_node_head	*rnh;
624 	int s;
625 	int i;
626 	struct domain *dp;
627  	struct ifnet *iter;
628  	int found = 0;
629 
630 	IFNET_WLOCK();
631 	TAILQ_FOREACH(iter, &ifnet, if_link)
632 		if (iter == ifp) {
633 			TAILQ_REMOVE(&ifnet, ifp, if_link);
634 			found = 1;
635 			break;
636 		}
637 	IFNET_WUNLOCK();
638 	if (!found)
639 		return;
640 
641 	/*
642 	 * Remove/wait for pending events.
643 	 */
644 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
645 
646 	/*
647 	 * Remove routes and flush queues.
648 	 */
649 	s = splnet();
650 	if_down(ifp);
651 #ifdef ALTQ
652 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
653 		altq_disable(&ifp->if_snd);
654 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
655 		altq_detach(&ifp->if_snd);
656 #endif
657 
658 	if_purgeaddrs(ifp);
659 
660 #ifdef INET
661 	in_ifdetach(ifp);
662 #endif
663 
664 #ifdef INET6
665 	/*
666 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
667 	 * before removing routing entries below, since IPv6 interface direct
668 	 * routes are expected to be removed by the IPv6-specific kernel API.
669 	 * Otherwise, the kernel will detect some inconsistency and bark it.
670 	 */
671 	in6_ifdetach(ifp);
672 #endif
673 	/*
674 	 * Remove link ifaddr pointer and maybe decrement if_index.
675 	 * Clean up all addresses.
676 	 */
677 	ifp->if_addr = NULL;
678 	destroy_dev(ifdev_byindex(ifp->if_index));
679 	ifdev_byindex(ifp->if_index) = NULL;
680 
681 	/* We can now free link ifaddr. */
682 	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
683 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
684 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
685 		IFAFREE(ifa);
686 	}
687 
688 	/*
689 	 * Delete all remaining routes using this interface
690 	 * Unfortuneatly the only way to do this is to slog through
691 	 * the entire routing table looking for routes which point
692 	 * to this interface...oh well...
693 	 */
694 	for (i = 1; i <= AF_MAX; i++) {
695 		if ((rnh = rt_tables[i]) == NULL)
696 			continue;
697 		RADIX_NODE_HEAD_LOCK(rnh);
698 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
699 		RADIX_NODE_HEAD_UNLOCK(rnh);
700 	}
701 
702 	/* Announce that the interface is gone. */
703 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
704 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
705 	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
706 
707 	IF_AFDATA_LOCK(ifp);
708 	for (dp = domains; dp; dp = dp->dom_next) {
709 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
710 			(*dp->dom_ifdetach)(ifp,
711 			    ifp->if_afdata[dp->dom_family]);
712 	}
713 	IF_AFDATA_UNLOCK(ifp);
714 
715 #ifdef MAC
716 	mac_destroy_ifnet(ifp);
717 #endif /* MAC */
718 	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
719 	knlist_clear(&ifp->if_klist, 0);
720 	knlist_destroy(&ifp->if_klist);
721 	mtx_destroy(&ifp->if_snd.ifq_mtx);
722 	IF_AFDATA_DESTROY(ifp);
723 	splx(s);
724 }
725 
726 /*
727  * Add a group to an interface
728  */
729 int
730 if_addgroup(struct ifnet *ifp, const char *groupname)
731 {
732 	struct ifg_list		*ifgl;
733 	struct ifg_group	*ifg = NULL;
734 	struct ifg_member	*ifgm;
735 
736 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
737 	    groupname[strlen(groupname) - 1] <= '9')
738 		return (EINVAL);
739 
740 	IFNET_WLOCK();
741 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
742 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
743 			IFNET_WUNLOCK();
744 			return (EEXIST);
745 		}
746 
747 	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
748 	    M_NOWAIT)) == NULL) {
749 	    	IFNET_WUNLOCK();
750 		return (ENOMEM);
751 	}
752 
753 	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
754 	    M_TEMP, M_NOWAIT)) == NULL) {
755 		free(ifgl, M_TEMP);
756 		IFNET_WUNLOCK();
757 		return (ENOMEM);
758 	}
759 
760 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
761 		if (!strcmp(ifg->ifg_group, groupname))
762 			break;
763 
764 	if (ifg == NULL) {
765 		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
766 		    M_TEMP, M_NOWAIT)) == NULL) {
767 			free(ifgl, M_TEMP);
768 			free(ifgm, M_TEMP);
769 			IFNET_WUNLOCK();
770 			return (ENOMEM);
771 		}
772 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
773 		ifg->ifg_refcnt = 0;
774 		TAILQ_INIT(&ifg->ifg_members);
775 		EVENTHANDLER_INVOKE(group_attach_event, ifg);
776 		TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
777 	}
778 
779 	ifg->ifg_refcnt++;
780 	ifgl->ifgl_group = ifg;
781 	ifgm->ifgm_ifp = ifp;
782 
783 	IF_ADDR_LOCK(ifp);
784 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
785 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
786 	IF_ADDR_UNLOCK(ifp);
787 
788 	IFNET_WUNLOCK();
789 
790 	EVENTHANDLER_INVOKE(group_change_event, groupname);
791 
792 	return (0);
793 }
794 
795 /*
796  * Remove a group from an interface
797  */
798 int
799 if_delgroup(struct ifnet *ifp, const char *groupname)
800 {
801 	struct ifg_list		*ifgl;
802 	struct ifg_member	*ifgm;
803 
804 	IFNET_WLOCK();
805 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
806 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
807 			break;
808 	if (ifgl == NULL) {
809 		IFNET_WUNLOCK();
810 		return (ENOENT);
811 	}
812 
813 	IF_ADDR_LOCK(ifp);
814 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
815 	IF_ADDR_UNLOCK(ifp);
816 
817 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
818 		if (ifgm->ifgm_ifp == ifp)
819 			break;
820 
821 	if (ifgm != NULL) {
822 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
823 		free(ifgm, M_TEMP);
824 	}
825 
826 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
827 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
828 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
829 		free(ifgl->ifgl_group, M_TEMP);
830 	}
831 	IFNET_WUNLOCK();
832 
833 	free(ifgl, M_TEMP);
834 
835 	EVENTHANDLER_INVOKE(group_change_event, groupname);
836 
837 	return (0);
838 }
839 
840 /*
841  * Stores all groups from an interface in memory pointed
842  * to by data
843  */
844 static int
845 if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
846 {
847 	int			 len, error;
848 	struct ifg_list		*ifgl;
849 	struct ifg_req		 ifgrq, *ifgp;
850 	struct ifgroupreq	*ifgr = data;
851 
852 	if (ifgr->ifgr_len == 0) {
853 		IF_ADDR_LOCK(ifp);
854 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
855 			ifgr->ifgr_len += sizeof(struct ifg_req);
856 		IF_ADDR_UNLOCK(ifp);
857 		return (0);
858 	}
859 
860 	len = ifgr->ifgr_len;
861 	ifgp = ifgr->ifgr_groups;
862 	/* XXX: wire */
863 	IF_ADDR_LOCK(ifp);
864 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
865 		if (len < sizeof(ifgrq)) {
866 			IF_ADDR_UNLOCK(ifp);
867 			return (EINVAL);
868 		}
869 		bzero(&ifgrq, sizeof ifgrq);
870 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
871 		    sizeof(ifgrq.ifgrq_group));
872 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
873 		    	IF_ADDR_UNLOCK(ifp);
874 			return (error);
875 		}
876 		len -= sizeof(ifgrq);
877 		ifgp++;
878 	}
879 	IF_ADDR_UNLOCK(ifp);
880 
881 	return (0);
882 }
883 
884 /*
885  * Stores all members of a group in memory pointed to by data
886  */
887 static int
888 if_getgroupmembers(struct ifgroupreq *data)
889 {
890 	struct ifgroupreq	*ifgr = data;
891 	struct ifg_group	*ifg;
892 	struct ifg_member	*ifgm;
893 	struct ifg_req		 ifgrq, *ifgp;
894 	int			 len, error;
895 
896 	IFNET_RLOCK();
897 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
898 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
899 			break;
900 	if (ifg == NULL) {
901 		IFNET_RUNLOCK();
902 		return (ENOENT);
903 	}
904 
905 	if (ifgr->ifgr_len == 0) {
906 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
907 			ifgr->ifgr_len += sizeof(ifgrq);
908 		IFNET_RUNLOCK();
909 		return (0);
910 	}
911 
912 	len = ifgr->ifgr_len;
913 	ifgp = ifgr->ifgr_groups;
914 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
915 		if (len < sizeof(ifgrq)) {
916 			IFNET_RUNLOCK();
917 			return (EINVAL);
918 		}
919 		bzero(&ifgrq, sizeof ifgrq);
920 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
921 		    sizeof(ifgrq.ifgrq_member));
922 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
923 			IFNET_RUNLOCK();
924 			return (error);
925 		}
926 		len -= sizeof(ifgrq);
927 		ifgp++;
928 	}
929 	IFNET_RUNLOCK();
930 
931 	return (0);
932 }
933 
934 /*
935  * Delete Routes for a Network Interface
936  *
937  * Called for each routing entry via the rnh->rnh_walktree() call above
938  * to delete all route entries referencing a detaching network interface.
939  *
940  * Arguments:
941  *	rn	pointer to node in the routing table
942  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
943  *
944  * Returns:
945  *	0	successful
946  *	errno	failed - reason indicated
947  *
948  */
949 static int
950 if_rtdel(struct radix_node *rn, void *arg)
951 {
952 	struct rtentry	*rt = (struct rtentry *)rn;
953 	struct ifnet	*ifp = arg;
954 	int		err;
955 
956 	if (rt->rt_ifp == ifp) {
957 
958 		/*
959 		 * Protect (sorta) against walktree recursion problems
960 		 * with cloned routes
961 		 */
962 		if ((rt->rt_flags & RTF_UP) == 0)
963 			return (0);
964 
965 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
966 				rt_mask(rt), rt->rt_flags,
967 				(struct rtentry **) NULL);
968 		if (err) {
969 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
970 		}
971 	}
972 
973 	return (0);
974 }
975 
976 #define	sa_equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
977 
978 /*
979  * Locate an interface based on a complete address.
980  */
981 /*ARGSUSED*/
982 struct ifaddr *
983 ifa_ifwithaddr(struct sockaddr *addr)
984 {
985 	struct ifnet *ifp;
986 	struct ifaddr *ifa;
987 
988 	IFNET_RLOCK();
989 	TAILQ_FOREACH(ifp, &ifnet, if_link)
990 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
991 			if (ifa->ifa_addr->sa_family != addr->sa_family)
992 				continue;
993 			if (sa_equal(addr, ifa->ifa_addr))
994 				goto done;
995 			/* IP6 doesn't have broadcast */
996 			if ((ifp->if_flags & IFF_BROADCAST) &&
997 			    ifa->ifa_broadaddr &&
998 			    ifa->ifa_broadaddr->sa_len != 0 &&
999 			    sa_equal(ifa->ifa_broadaddr, addr))
1000 				goto done;
1001 		}
1002 	ifa = NULL;
1003 done:
1004 	IFNET_RUNLOCK();
1005 	return (ifa);
1006 }
1007 
1008 /*
1009  * Locate the point to point interface with a given destination address.
1010  */
1011 /*ARGSUSED*/
1012 struct ifaddr *
1013 ifa_ifwithdstaddr(struct sockaddr *addr)
1014 {
1015 	struct ifnet *ifp;
1016 	struct ifaddr *ifa;
1017 
1018 	IFNET_RLOCK();
1019 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1020 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1021 			continue;
1022 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1023 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1024 				continue;
1025 			if (ifa->ifa_dstaddr &&
1026 			    sa_equal(addr, ifa->ifa_dstaddr))
1027 				goto done;
1028 		}
1029 	}
1030 	ifa = NULL;
1031 done:
1032 	IFNET_RUNLOCK();
1033 	return (ifa);
1034 }
1035 
1036 /*
1037  * Find an interface on a specific network.  If many, choice
1038  * is most specific found.
1039  */
1040 struct ifaddr *
1041 ifa_ifwithnet(struct sockaddr *addr)
1042 {
1043 	struct ifnet *ifp;
1044 	struct ifaddr *ifa;
1045 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
1046 	u_int af = addr->sa_family;
1047 	char *addr_data = addr->sa_data, *cplim;
1048 
1049 	/*
1050 	 * AF_LINK addresses can be looked up directly by their index number,
1051 	 * so do that if we can.
1052 	 */
1053 	if (af == AF_LINK) {
1054 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1055 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
1056 		return (ifaddr_byindex(sdl->sdl_index));
1057 	}
1058 
1059 	/*
1060 	 * Scan though each interface, looking for ones that have
1061 	 * addresses in this address family.
1062 	 */
1063 	IFNET_RLOCK();
1064 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1065 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1066 			char *cp, *cp2, *cp3;
1067 
1068 			if (ifa->ifa_addr->sa_family != af)
1069 next:				continue;
1070 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1071 				/*
1072 				 * This is a bit broken as it doesn't
1073 				 * take into account that the remote end may
1074 				 * be a single node in the network we are
1075 				 * looking for.
1076 				 * The trouble is that we don't know the
1077 				 * netmask for the remote end.
1078 				 */
1079 				if (ifa->ifa_dstaddr != 0 &&
1080 				    sa_equal(addr, ifa->ifa_dstaddr))
1081 					goto done;
1082 			} else {
1083 				/*
1084 				 * if we have a special address handler,
1085 				 * then use it instead of the generic one.
1086 				 */
1087 				if (ifa->ifa_claim_addr) {
1088 					if ((*ifa->ifa_claim_addr)(ifa, addr))
1089 						goto done;
1090 					continue;
1091 				}
1092 
1093 				/*
1094 				 * Scan all the bits in the ifa's address.
1095 				 * If a bit dissagrees with what we are
1096 				 * looking for, mask it with the netmask
1097 				 * to see if it really matters.
1098 				 * (A byte at a time)
1099 				 */
1100 				if (ifa->ifa_netmask == 0)
1101 					continue;
1102 				cp = addr_data;
1103 				cp2 = ifa->ifa_addr->sa_data;
1104 				cp3 = ifa->ifa_netmask->sa_data;
1105 				cplim = ifa->ifa_netmask->sa_len
1106 					+ (char *)ifa->ifa_netmask;
1107 				while (cp3 < cplim)
1108 					if ((*cp++ ^ *cp2++) & *cp3++)
1109 						goto next; /* next address! */
1110 				/*
1111 				 * If the netmask of what we just found
1112 				 * is more specific than what we had before
1113 				 * (if we had one) then remember the new one
1114 				 * before continuing to search
1115 				 * for an even better one.
1116 				 */
1117 				if (ifa_maybe == 0 ||
1118 				    rn_refines((caddr_t)ifa->ifa_netmask,
1119 				    (caddr_t)ifa_maybe->ifa_netmask))
1120 					ifa_maybe = ifa;
1121 			}
1122 		}
1123 	}
1124 	ifa = ifa_maybe;
1125 done:
1126 	IFNET_RUNLOCK();
1127 	return (ifa);
1128 }
1129 
1130 /*
1131  * Find an interface address specific to an interface best matching
1132  * a given address.
1133  */
1134 struct ifaddr *
1135 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1136 {
1137 	struct ifaddr *ifa;
1138 	char *cp, *cp2, *cp3;
1139 	char *cplim;
1140 	struct ifaddr *ifa_maybe = 0;
1141 	u_int af = addr->sa_family;
1142 
1143 	if (af >= AF_MAX)
1144 		return (0);
1145 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1146 		if (ifa->ifa_addr->sa_family != af)
1147 			continue;
1148 		if (ifa_maybe == 0)
1149 			ifa_maybe = ifa;
1150 		if (ifa->ifa_netmask == 0) {
1151 			if (sa_equal(addr, ifa->ifa_addr) ||
1152 			    (ifa->ifa_dstaddr &&
1153 			    sa_equal(addr, ifa->ifa_dstaddr)))
1154 				goto done;
1155 			continue;
1156 		}
1157 		if (ifp->if_flags & IFF_POINTOPOINT) {
1158 			if (sa_equal(addr, ifa->ifa_dstaddr))
1159 				goto done;
1160 		} else {
1161 			cp = addr->sa_data;
1162 			cp2 = ifa->ifa_addr->sa_data;
1163 			cp3 = ifa->ifa_netmask->sa_data;
1164 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1165 			for (; cp3 < cplim; cp3++)
1166 				if ((*cp++ ^ *cp2++) & *cp3)
1167 					break;
1168 			if (cp3 == cplim)
1169 				goto done;
1170 		}
1171 	}
1172 	ifa = ifa_maybe;
1173 done:
1174 	return (ifa);
1175 }
1176 
1177 #include <net/route.h>
1178 
1179 /*
1180  * Default action when installing a route with a Link Level gateway.
1181  * Lookup an appropriate real ifa to point to.
1182  * This should be moved to /sys/net/link.c eventually.
1183  */
1184 static void
1185 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1186 {
1187 	struct ifaddr *ifa, *oifa;
1188 	struct sockaddr *dst;
1189 	struct ifnet *ifp;
1190 
1191 	RT_LOCK_ASSERT(rt);
1192 
1193 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1194 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1195 		return;
1196 	ifa = ifaof_ifpforaddr(dst, ifp);
1197 	if (ifa) {
1198 		IFAREF(ifa);		/* XXX */
1199 		oifa = rt->rt_ifa;
1200 		rt->rt_ifa = ifa;
1201 		IFAFREE(oifa);
1202 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1203 			ifa->ifa_rtrequest(cmd, rt, info);
1204 	}
1205 }
1206 
1207 /*
1208  * Mark an interface down and notify protocols of
1209  * the transition.
1210  * NOTE: must be called at splnet or eqivalent.
1211  */
1212 static void
1213 if_unroute(struct ifnet *ifp, int flag, int fam)
1214 {
1215 	struct ifaddr *ifa;
1216 
1217 	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
1218 
1219 	ifp->if_flags &= ~flag;
1220 	getmicrotime(&ifp->if_lastchange);
1221 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1222 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1223 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1224 	if_qflush(&ifp->if_snd);
1225 #ifdef DEV_CARP
1226 	if (ifp->if_carp)
1227 		carp_carpdev_state(ifp->if_carp);
1228 #endif
1229 	rt_ifmsg(ifp);
1230 }
1231 
1232 /*
1233  * Mark an interface up and notify protocols of
1234  * the transition.
1235  * NOTE: must be called at splnet or eqivalent.
1236  */
1237 static void
1238 if_route(struct ifnet *ifp, int flag, int fam)
1239 {
1240 	struct ifaddr *ifa;
1241 
1242 	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
1243 
1244 	ifp->if_flags |= flag;
1245 	getmicrotime(&ifp->if_lastchange);
1246 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1247 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1248 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1249 #ifdef DEV_CARP
1250 	if (ifp->if_carp)
1251 		carp_carpdev_state(ifp->if_carp);
1252 #endif
1253 	rt_ifmsg(ifp);
1254 #ifdef INET6
1255 	in6_if_up(ifp);
1256 #endif
1257 }
1258 
1259 void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
1260 void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
1261 
1262 /*
1263  * Handle a change in the interface link state. To avoid LORs
1264  * between driver lock and upper layer locks, as well as possible
1265  * recursions, we post event to taskqueue, and all job
1266  * is done in static do_link_state_change().
1267  */
1268 void
1269 if_link_state_change(struct ifnet *ifp, int link_state)
1270 {
1271 	/* Return if state hasn't changed. */
1272 	if (ifp->if_link_state == link_state)
1273 		return;
1274 
1275 	ifp->if_link_state = link_state;
1276 
1277 	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
1278 }
1279 
1280 static void
1281 do_link_state_change(void *arg, int pending)
1282 {
1283 	struct ifnet *ifp = (struct ifnet *)arg;
1284 	int link_state = ifp->if_link_state;
1285 	int link;
1286 
1287 	/* Notify that the link state has changed. */
1288 	rt_ifmsg(ifp);
1289 	if (link_state == LINK_STATE_UP)
1290 		link = NOTE_LINKUP;
1291 	else if (link_state == LINK_STATE_DOWN)
1292 		link = NOTE_LINKDOWN;
1293 	else
1294 		link = NOTE_LINKINV;
1295 	KNOTE_UNLOCKED(&ifp->if_klist, link);
1296 	if (ifp->if_vlantrunk != NULL)
1297 		(*vlan_link_state_p)(ifp, link);
1298 
1299 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
1300 	    IFP2AC(ifp)->ac_netgraph != NULL)
1301 		(*ng_ether_link_state_p)(ifp, link_state);
1302 #ifdef DEV_CARP
1303 	if (ifp->if_carp)
1304 		carp_carpdev_state(ifp->if_carp);
1305 #endif
1306 	if (ifp->if_bridge) {
1307 		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
1308 		(*bstp_linkstate_p)(ifp, link_state);
1309 	}
1310 
1311 	devctl_notify("IFNET", ifp->if_xname,
1312 	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1313 	if (pending > 1)
1314 		if_printf(ifp, "%d link states coalesced\n", pending);
1315 	if (log_link_state_change)
1316 		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
1317 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
1318 }
1319 
1320 /*
1321  * Mark an interface down and notify protocols of
1322  * the transition.
1323  * NOTE: must be called at splnet or eqivalent.
1324  */
1325 void
1326 if_down(struct ifnet *ifp)
1327 {
1328 
1329 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1330 }
1331 
1332 /*
1333  * Mark an interface up and notify protocols of
1334  * the transition.
1335  * NOTE: must be called at splnet or eqivalent.
1336  */
1337 void
1338 if_up(struct ifnet *ifp)
1339 {
1340 
1341 	if_route(ifp, IFF_UP, AF_UNSPEC);
1342 }
1343 
1344 /*
1345  * Flush an interface queue.
1346  */
1347 static void
1348 if_qflush(struct ifaltq *ifq)
1349 {
1350 	struct mbuf *m, *n;
1351 
1352 	IFQ_LOCK(ifq);
1353 #ifdef ALTQ
1354 	if (ALTQ_IS_ENABLED(ifq))
1355 		ALTQ_PURGE(ifq);
1356 #endif
1357 	n = ifq->ifq_head;
1358 	while ((m = n) != 0) {
1359 		n = m->m_act;
1360 		m_freem(m);
1361 	}
1362 	ifq->ifq_head = 0;
1363 	ifq->ifq_tail = 0;
1364 	ifq->ifq_len = 0;
1365 	IFQ_UNLOCK(ifq);
1366 }
1367 
1368 /*
1369  * Handle interface watchdog timer routines.  Called
1370  * from softclock, we decrement timers (if set) and
1371  * call the appropriate interface routine on expiration.
1372  *
1373  * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1374  * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1375  * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1376  */
1377 static void
1378 if_slowtimo(void *arg)
1379 {
1380 	struct ifnet *ifp;
1381 	int s = splimp();
1382 
1383 	IFNET_RLOCK();
1384 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1385 		if (ifp->if_timer == 0 || --ifp->if_timer)
1386 			continue;
1387 		if (ifp->if_watchdog)
1388 			(*ifp->if_watchdog)(ifp);
1389 	}
1390 	IFNET_RUNLOCK();
1391 	splx(s);
1392 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1393 }
1394 
1395 /*
1396  * Map interface name to
1397  * interface structure pointer.
1398  */
1399 struct ifnet *
1400 ifunit(const char *name)
1401 {
1402 	struct ifnet *ifp;
1403 
1404 	IFNET_RLOCK();
1405 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1406 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1407 			break;
1408 	}
1409 	IFNET_RUNLOCK();
1410 	return (ifp);
1411 }
1412 
1413 /*
1414  * Hardware specific interface ioctls.
1415  */
1416 static int
1417 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1418 {
1419 	struct ifreq *ifr;
1420 	struct ifstat *ifs;
1421 	int error = 0;
1422 	int new_flags, temp_flags;
1423 	size_t namelen, onamelen;
1424 	char new_name[IFNAMSIZ];
1425 	struct ifaddr *ifa;
1426 	struct sockaddr_dl *sdl;
1427 
1428 	ifr = (struct ifreq *)data;
1429 	switch (cmd) {
1430 	case SIOCGIFINDEX:
1431 		ifr->ifr_index = ifp->if_index;
1432 		break;
1433 
1434 	case SIOCGIFFLAGS:
1435 		temp_flags = ifp->if_flags | ifp->if_drv_flags;
1436 		ifr->ifr_flags = temp_flags & 0xffff;
1437 		ifr->ifr_flagshigh = temp_flags >> 16;
1438 		break;
1439 
1440 	case SIOCGIFCAP:
1441 		ifr->ifr_reqcap = ifp->if_capabilities;
1442 		ifr->ifr_curcap = ifp->if_capenable;
1443 		break;
1444 
1445 #ifdef MAC
1446 	case SIOCGIFMAC:
1447 		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1448 		break;
1449 #endif
1450 
1451 	case SIOCGIFMETRIC:
1452 		ifr->ifr_metric = ifp->if_metric;
1453 		break;
1454 
1455 	case SIOCGIFMTU:
1456 		ifr->ifr_mtu = ifp->if_mtu;
1457 		break;
1458 
1459 	case SIOCGIFPHYS:
1460 		ifr->ifr_phys = ifp->if_physical;
1461 		break;
1462 
1463 	case SIOCSIFFLAGS:
1464 		error = suser(td);
1465 		if (error)
1466 			return (error);
1467 		/*
1468 		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
1469 		 * check, so we don't need special handling here yet.
1470 		 */
1471 		new_flags = (ifr->ifr_flags & 0xffff) |
1472 		    (ifr->ifr_flagshigh << 16);
1473 		if (ifp->if_flags & IFF_SMART) {
1474 			/* Smart drivers twiddle their own routes */
1475 		} else if (ifp->if_flags & IFF_UP &&
1476 		    (new_flags & IFF_UP) == 0) {
1477 			int s = splimp();
1478 			if_down(ifp);
1479 			splx(s);
1480 		} else if (new_flags & IFF_UP &&
1481 		    (ifp->if_flags & IFF_UP) == 0) {
1482 			int s = splimp();
1483 			if_up(ifp);
1484 			splx(s);
1485 		}
1486 		/* See if permanently promiscuous mode bit is about to flip */
1487 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
1488 			if (new_flags & IFF_PPROMISC)
1489 				ifp->if_flags |= IFF_PROMISC;
1490 			else if (ifp->if_pcount == 0)
1491 				ifp->if_flags &= ~IFF_PROMISC;
1492 			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
1493 			    ifp->if_xname,
1494 			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
1495 		}
1496 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1497 			(new_flags &~ IFF_CANTCHANGE);
1498 		if (ifp->if_ioctl) {
1499 			IFF_LOCKGIANT(ifp);
1500 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1501 			IFF_UNLOCKGIANT(ifp);
1502 		}
1503 		getmicrotime(&ifp->if_lastchange);
1504 		break;
1505 
1506 	case SIOCSIFCAP:
1507 		error = suser(td);
1508 		if (error)
1509 			return (error);
1510 		if (ifp->if_ioctl == NULL)
1511 			return (EOPNOTSUPP);
1512 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1513 			return (EINVAL);
1514 		IFF_LOCKGIANT(ifp);
1515 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1516 		IFF_UNLOCKGIANT(ifp);
1517 		if (error == 0)
1518 			getmicrotime(&ifp->if_lastchange);
1519 		break;
1520 
1521 #ifdef MAC
1522 	case SIOCSIFMAC:
1523 		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1524 		break;
1525 #endif
1526 
1527 	case SIOCSIFNAME:
1528 		error = suser(td);
1529 		if (error != 0)
1530 			return (error);
1531 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1532 		if (error != 0)
1533 			return (error);
1534 		if (new_name[0] == '\0')
1535 			return (EINVAL);
1536 		if (ifunit(new_name) != NULL)
1537 			return (EEXIST);
1538 
1539 		/* Announce the departure of the interface. */
1540 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1541 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1542 
1543 		log(LOG_INFO, "%s: changing name to '%s'\n",
1544 		    ifp->if_xname, new_name);
1545 
1546 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1547 		ifa = ifp->if_addr;
1548 		IFA_LOCK(ifa);
1549 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1550 		namelen = strlen(new_name);
1551 		onamelen = sdl->sdl_nlen;
1552 		/*
1553 		 * Move the address if needed.  This is safe because we
1554 		 * allocate space for a name of length IFNAMSIZ when we
1555 		 * create this in if_attach().
1556 		 */
1557 		if (namelen != onamelen) {
1558 			bcopy(sdl->sdl_data + onamelen,
1559 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1560 		}
1561 		bcopy(new_name, sdl->sdl_data, namelen);
1562 		sdl->sdl_nlen = namelen;
1563 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1564 		bzero(sdl->sdl_data, onamelen);
1565 		while (namelen != 0)
1566 			sdl->sdl_data[--namelen] = 0xff;
1567 		IFA_UNLOCK(ifa);
1568 
1569 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1570 		/* Announce the return of the interface. */
1571 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1572 		break;
1573 
1574 	case SIOCSIFMETRIC:
1575 		error = suser(td);
1576 		if (error)
1577 			return (error);
1578 		ifp->if_metric = ifr->ifr_metric;
1579 		getmicrotime(&ifp->if_lastchange);
1580 		break;
1581 
1582 	case SIOCSIFPHYS:
1583 		error = suser(td);
1584 		if (error)
1585 			return (error);
1586 		if (ifp->if_ioctl == NULL)
1587 			return (EOPNOTSUPP);
1588 		IFF_LOCKGIANT(ifp);
1589 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1590 		IFF_UNLOCKGIANT(ifp);
1591 		if (error == 0)
1592 			getmicrotime(&ifp->if_lastchange);
1593 		break;
1594 
1595 	case SIOCSIFMTU:
1596 	{
1597 		u_long oldmtu = ifp->if_mtu;
1598 
1599 		error = suser(td);
1600 		if (error)
1601 			return (error);
1602 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1603 			return (EINVAL);
1604 		if (ifp->if_ioctl == NULL)
1605 			return (EOPNOTSUPP);
1606 		IFF_LOCKGIANT(ifp);
1607 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1608 		IFF_UNLOCKGIANT(ifp);
1609 		if (error == 0) {
1610 			getmicrotime(&ifp->if_lastchange);
1611 			rt_ifmsg(ifp);
1612 		}
1613 		/*
1614 		 * If the link MTU changed, do network layer specific procedure.
1615 		 */
1616 		if (ifp->if_mtu != oldmtu) {
1617 #ifdef INET6
1618 			nd6_setmtu(ifp);
1619 #endif
1620 		}
1621 		break;
1622 	}
1623 
1624 	case SIOCADDMULTI:
1625 	case SIOCDELMULTI:
1626 		error = suser(td);
1627 		if (error)
1628 			return (error);
1629 
1630 		/* Don't allow group membership on non-multicast interfaces. */
1631 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1632 			return (EOPNOTSUPP);
1633 
1634 		/* Don't let users screw up protocols' entries. */
1635 		if (ifr->ifr_addr.sa_family != AF_LINK)
1636 			return (EINVAL);
1637 
1638 		if (cmd == SIOCADDMULTI) {
1639 			struct ifmultiaddr *ifma;
1640 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1641 		} else {
1642 			error = if_delmulti(ifp, &ifr->ifr_addr);
1643 		}
1644 		if (error == 0)
1645 			getmicrotime(&ifp->if_lastchange);
1646 		break;
1647 
1648 	case SIOCSIFPHYADDR:
1649 	case SIOCDIFPHYADDR:
1650 #ifdef INET6
1651 	case SIOCSIFPHYADDR_IN6:
1652 #endif
1653 	case SIOCSLIFPHYADDR:
1654 	case SIOCSIFMEDIA:
1655 	case SIOCSIFGENERIC:
1656 		error = suser(td);
1657 		if (error)
1658 			return (error);
1659 		if (ifp->if_ioctl == NULL)
1660 			return (EOPNOTSUPP);
1661 		IFF_LOCKGIANT(ifp);
1662 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1663 		IFF_UNLOCKGIANT(ifp);
1664 		if (error == 0)
1665 			getmicrotime(&ifp->if_lastchange);
1666 		break;
1667 
1668 	case SIOCGIFSTATUS:
1669 		ifs = (struct ifstat *)data;
1670 		ifs->ascii[0] = '\0';
1671 
1672 	case SIOCGIFPSRCADDR:
1673 	case SIOCGIFPDSTADDR:
1674 	case SIOCGLIFPHYADDR:
1675 	case SIOCGIFMEDIA:
1676 	case SIOCGIFGENERIC:
1677 		if (ifp->if_ioctl == NULL)
1678 			return (EOPNOTSUPP);
1679 		IFF_LOCKGIANT(ifp);
1680 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1681 		IFF_UNLOCKGIANT(ifp);
1682 		break;
1683 
1684 	case SIOCSIFLLADDR:
1685 		error = suser(td);
1686 		if (error)
1687 			return (error);
1688 		error = if_setlladdr(ifp,
1689 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1690 		break;
1691 
1692 	case SIOCAIFGROUP:
1693 	{
1694 		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
1695 
1696 		error = suser(td);
1697 		if (error)
1698 			return (error);
1699 		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
1700 			return (error);
1701 		break;
1702 	}
1703 
1704 	case SIOCGIFGROUP:
1705 		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
1706 			return (error);
1707 		break;
1708 
1709 	case SIOCDIFGROUP:
1710 	{
1711 		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
1712 
1713 		error = suser(td);
1714 		if (error)
1715 			return (error);
1716 		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
1717 			return (error);
1718 		break;
1719 	}
1720 
1721 	default:
1722 		error = ENOIOCTL;
1723 		break;
1724 	}
1725 	return (error);
1726 }
1727 
1728 /*
1729  * Interface ioctls.
1730  */
1731 int
1732 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1733 {
1734 	struct ifnet *ifp;
1735 	struct ifreq *ifr;
1736 	int error;
1737 	int oif_flags;
1738 
1739 	switch (cmd) {
1740 	case SIOCGIFCONF:
1741 	case OSIOCGIFCONF:
1742 #ifdef __amd64__
1743 	case SIOCGIFCONF32:
1744 #endif
1745 		return (ifconf(cmd, data));
1746 	}
1747 	ifr = (struct ifreq *)data;
1748 
1749 	switch (cmd) {
1750 	case SIOCIFCREATE:
1751 	case SIOCIFCREATE2:
1752 		if ((error = suser(td)) != 0)
1753 			return (error);
1754 		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1755 			cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1756 	case SIOCIFDESTROY:
1757 		if ((error = suser(td)) != 0)
1758 			return (error);
1759 		return if_clone_destroy(ifr->ifr_name);
1760 
1761 	case SIOCIFGCLONERS:
1762 		return (if_clone_list((struct if_clonereq *)data));
1763 	case SIOCGIFGMEMB:
1764 		return (if_getgroupmembers((struct ifgroupreq *)data));
1765 	}
1766 
1767 	ifp = ifunit(ifr->ifr_name);
1768 	if (ifp == 0)
1769 		return (ENXIO);
1770 
1771 	error = ifhwioctl(cmd, ifp, data, td);
1772 	if (error != ENOIOCTL)
1773 		return (error);
1774 
1775 	oif_flags = ifp->if_flags;
1776 	if (so->so_proto == 0)
1777 		return (EOPNOTSUPP);
1778 #ifndef COMPAT_43
1779 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1780 								 data,
1781 								 ifp, td));
1782 #else
1783 	{
1784 		int ocmd = cmd;
1785 
1786 		switch (cmd) {
1787 
1788 		case SIOCSIFDSTADDR:
1789 		case SIOCSIFADDR:
1790 		case SIOCSIFBRDADDR:
1791 		case SIOCSIFNETMASK:
1792 #if BYTE_ORDER != BIG_ENDIAN
1793 			if (ifr->ifr_addr.sa_family == 0 &&
1794 			    ifr->ifr_addr.sa_len < 16) {
1795 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1796 				ifr->ifr_addr.sa_len = 16;
1797 			}
1798 #else
1799 			if (ifr->ifr_addr.sa_len == 0)
1800 				ifr->ifr_addr.sa_len = 16;
1801 #endif
1802 			break;
1803 
1804 		case OSIOCGIFADDR:
1805 			cmd = SIOCGIFADDR;
1806 			break;
1807 
1808 		case OSIOCGIFDSTADDR:
1809 			cmd = SIOCGIFDSTADDR;
1810 			break;
1811 
1812 		case OSIOCGIFBRDADDR:
1813 			cmd = SIOCGIFBRDADDR;
1814 			break;
1815 
1816 		case OSIOCGIFNETMASK:
1817 			cmd = SIOCGIFNETMASK;
1818 		}
1819 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1820 								   cmd,
1821 								   data,
1822 								   ifp, td));
1823 		switch (ocmd) {
1824 
1825 		case OSIOCGIFADDR:
1826 		case OSIOCGIFDSTADDR:
1827 		case OSIOCGIFBRDADDR:
1828 		case OSIOCGIFNETMASK:
1829 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1830 
1831 		}
1832 	}
1833 #endif /* COMPAT_43 */
1834 
1835 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1836 #ifdef INET6
1837 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1838 		if (ifp->if_flags & IFF_UP) {
1839 			int s = splimp();
1840 			in6_if_up(ifp);
1841 			splx(s);
1842 		}
1843 #endif
1844 	}
1845 	return (error);
1846 }
1847 
1848 /*
1849  * The code common to handling reference counted flags,
1850  * e.g., in ifpromisc() and if_allmulti().
1851  * The "pflag" argument can specify a permanent mode flag to check,
1852  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
1853  *
1854  * Only to be used on stack-owned flags, not driver-owned flags.
1855  */
1856 static int
1857 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
1858 {
1859 	struct ifreq ifr;
1860 	int error;
1861 	int oldflags, oldcount;
1862 
1863 	/* Sanity checks to catch programming errors */
1864 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
1865 	    ("%s: setting driver-owned flag %d", __func__, flag));
1866 
1867 	if (onswitch)
1868 		KASSERT(*refcount >= 0,
1869 		    ("%s: increment negative refcount %d for flag %d",
1870 		    __func__, *refcount, flag));
1871 	else
1872 		KASSERT(*refcount > 0,
1873 		    ("%s: decrement non-positive refcount %d for flag %d",
1874 		    __func__, *refcount, flag));
1875 
1876 	/* In case this mode is permanent, just touch refcount */
1877 	if (ifp->if_flags & pflag) {
1878 		*refcount += onswitch ? 1 : -1;
1879 		return (0);
1880 	}
1881 
1882 	/* Save ifnet parameters for if_ioctl() may fail */
1883 	oldcount = *refcount;
1884 	oldflags = ifp->if_flags;
1885 
1886 	/*
1887 	 * See if we aren't the only and touching refcount is enough.
1888 	 * Actually toggle interface flag if we are the first or last.
1889 	 */
1890 	if (onswitch) {
1891 		if ((*refcount)++)
1892 			return (0);
1893 		ifp->if_flags |= flag;
1894 	} else {
1895 		if (--(*refcount))
1896 			return (0);
1897 		ifp->if_flags &= ~flag;
1898 	}
1899 
1900 	/* Call down the driver since we've changed interface flags */
1901 	if (ifp->if_ioctl == NULL) {
1902 		error = EOPNOTSUPP;
1903 		goto recover;
1904 	}
1905 	ifr.ifr_flags = ifp->if_flags & 0xffff;
1906 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1907 	IFF_LOCKGIANT(ifp);
1908 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1909 	IFF_UNLOCKGIANT(ifp);
1910 	if (error)
1911 		goto recover;
1912 	/* Notify userland that interface flags have changed */
1913 	rt_ifmsg(ifp);
1914 	return (0);
1915 
1916 recover:
1917 	/* Recover after driver error */
1918 	*refcount = oldcount;
1919 	ifp->if_flags = oldflags;
1920 	return (error);
1921 }
1922 
1923 /*
1924  * Set/clear promiscuous mode on interface ifp based on the truth value
1925  * of pswitch.  The calls are reference counted so that only the first
1926  * "on" request actually has an effect, as does the final "off" request.
1927  * Results are undefined if the "off" and "on" requests are not matched.
1928  */
1929 int
1930 ifpromisc(struct ifnet *ifp, int pswitch)
1931 {
1932 	int error;
1933 	int oldflags = ifp->if_flags;
1934 
1935 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
1936 			   &ifp->if_pcount, pswitch);
1937 	/* If promiscuous mode status has changed, log a message */
1938 	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
1939 		log(LOG_INFO, "%s: promiscuous mode %s\n",
1940 		    ifp->if_xname,
1941 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1942 	return (error);
1943 }
1944 
1945 /*
1946  * Return interface configuration
1947  * of system.  List may be used
1948  * in later ioctl's (above) to get
1949  * other information.
1950  */
1951 /*ARGSUSED*/
1952 static int
1953 ifconf(u_long cmd, caddr_t data)
1954 {
1955 	struct ifconf *ifc = (struct ifconf *)data;
1956 #ifdef __amd64__
1957 	struct ifconf32 *ifc32 = (struct ifconf32 *)data;
1958 	struct ifconf ifc_swab;
1959 #endif
1960 	struct ifnet *ifp;
1961 	struct ifaddr *ifa;
1962 	struct ifreq ifr;
1963 	struct sbuf *sb;
1964 	int error, full = 0, valid_len, max_len;
1965 
1966 #ifdef __amd64__
1967 	if (cmd == SIOCGIFCONF32) {
1968 		ifc_swab.ifc_len = ifc32->ifc_len;
1969 		ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
1970 		ifc = &ifc_swab;
1971 	}
1972 #endif
1973 	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
1974 	max_len = MAXPHYS - 1;
1975 
1976 	/* Prevent hostile input from being able to crash the system */
1977 	if (ifc->ifc_len <= 0)
1978 		return (EINVAL);
1979 
1980 again:
1981 	if (ifc->ifc_len <= max_len) {
1982 		max_len = ifc->ifc_len;
1983 		full = 1;
1984 	}
1985 	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
1986 	max_len = 0;
1987 	valid_len = 0;
1988 
1989 	IFNET_RLOCK();		/* could sleep XXX */
1990 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1991 		int addrs;
1992 
1993 		/*
1994 		 * Zero the ifr_name buffer to make sure we don't
1995 		 * disclose the contents of the stack.
1996 		 */
1997 		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
1998 
1999 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
2000 		    >= sizeof(ifr.ifr_name)) {
2001 			sbuf_delete(sb);
2002 			IFNET_RUNLOCK();
2003 			return (ENAMETOOLONG);
2004 		}
2005 
2006 		addrs = 0;
2007 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2008 			struct sockaddr *sa = ifa->ifa_addr;
2009 
2010 			if (jailed(curthread->td_ucred) &&
2011 			    prison_if(curthread->td_ucred, sa))
2012 				continue;
2013 			addrs++;
2014 #ifdef COMPAT_43
2015 			if (cmd == OSIOCGIFCONF) {
2016 				struct osockaddr *osa =
2017 					 (struct osockaddr *)&ifr.ifr_addr;
2018 				ifr.ifr_addr = *sa;
2019 				osa->sa_family = sa->sa_family;
2020 				sbuf_bcat(sb, &ifr, sizeof(ifr));
2021 				max_len += sizeof(ifr);
2022 			} else
2023 #endif
2024 			if (sa->sa_len <= sizeof(*sa)) {
2025 				ifr.ifr_addr = *sa;
2026 				sbuf_bcat(sb, &ifr, sizeof(ifr));
2027 				max_len += sizeof(ifr);
2028 			} else {
2029 				sbuf_bcat(sb, &ifr,
2030 				    offsetof(struct ifreq, ifr_addr));
2031 				max_len += offsetof(struct ifreq, ifr_addr);
2032 				sbuf_bcat(sb, sa, sa->sa_len);
2033 				max_len += sa->sa_len;
2034 			}
2035 
2036 			if (!sbuf_overflowed(sb))
2037 				valid_len = sbuf_len(sb);
2038 		}
2039 		if (addrs == 0) {
2040 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2041 			sbuf_bcat(sb, &ifr, sizeof(ifr));
2042 			max_len += sizeof(ifr);
2043 
2044 			if (!sbuf_overflowed(sb))
2045 				valid_len = sbuf_len(sb);
2046 		}
2047 	}
2048 	IFNET_RUNLOCK();
2049 
2050 	/*
2051 	 * If we didn't allocate enough space (uncommon), try again.  If
2052 	 * we have already allocated as much space as we are allowed,
2053 	 * return what we've got.
2054 	 */
2055 	if (valid_len != max_len && !full) {
2056 		sbuf_delete(sb);
2057 		goto again;
2058 	}
2059 
2060 	ifc->ifc_len = valid_len;
2061 #ifdef __amd64__
2062 	if (cmd == SIOCGIFCONF32)
2063 		ifc32->ifc_len = valid_len;
2064 #endif
2065 	sbuf_finish(sb);
2066 	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
2067 	sbuf_delete(sb);
2068 	return (error);
2069 }
2070 
2071 /*
2072  * Just like ifpromisc(), but for all-multicast-reception mode.
2073  */
2074 int
2075 if_allmulti(struct ifnet *ifp, int onswitch)
2076 {
2077 
2078 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
2079 }
2080 
2081 static struct ifmultiaddr *
2082 if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
2083 {
2084 	struct ifmultiaddr *ifma;
2085 
2086 	IF_ADDR_LOCK_ASSERT(ifp);
2087 
2088 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2089 		if (sa_equal(ifma->ifma_addr, sa))
2090 			break;
2091 	}
2092 
2093 	return ifma;
2094 }
2095 
2096 /*
2097  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
2098  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
2099  * the ifnet multicast address list here, so the caller must do that and
2100  * other setup work (such as notifying the device driver).  The reference
2101  * count is initialized to 1.
2102  */
2103 static struct ifmultiaddr *
2104 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
2105     int mflags)
2106 {
2107 	struct ifmultiaddr *ifma;
2108 	struct sockaddr *dupsa;
2109 
2110 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, mflags |
2111 	    M_ZERO);
2112 	if (ifma == NULL)
2113 		return (NULL);
2114 
2115 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, mflags);
2116 	if (dupsa == NULL) {
2117 		FREE(ifma, M_IFMADDR);
2118 		return (NULL);
2119 	}
2120 	bcopy(sa, dupsa, sa->sa_len);
2121 	ifma->ifma_addr = dupsa;
2122 
2123 	ifma->ifma_ifp = ifp;
2124 	ifma->ifma_refcount = 1;
2125 	ifma->ifma_protospec = NULL;
2126 
2127 	if (llsa == NULL) {
2128 		ifma->ifma_lladdr = NULL;
2129 		return (ifma);
2130 	}
2131 
2132 	MALLOC(dupsa, struct sockaddr *, llsa->sa_len, M_IFMADDR, mflags);
2133 	if (dupsa == NULL) {
2134 		FREE(ifma->ifma_addr, M_IFMADDR);
2135 		FREE(ifma, M_IFMADDR);
2136 		return (NULL);
2137 	}
2138 	bcopy(llsa, dupsa, llsa->sa_len);
2139 	ifma->ifma_lladdr = dupsa;
2140 
2141 	return (ifma);
2142 }
2143 
2144 /*
2145  * if_freemulti: free ifmultiaddr structure and possibly attached related
2146  * addresses.  The caller is responsible for implementing reference
2147  * counting, notifying the driver, handling routing messages, and releasing
2148  * any dependent link layer state.
2149  */
2150 static void
2151 if_freemulti(struct ifmultiaddr *ifma)
2152 {
2153 
2154 	KASSERT(ifma->ifma_refcount == 1, ("if_freemulti: refcount %d",
2155 	    ifma->ifma_refcount));
2156 	KASSERT(ifma->ifma_protospec == NULL,
2157 	    ("if_freemulti: protospec not NULL"));
2158 
2159 	if (ifma->ifma_lladdr != NULL)
2160 		FREE(ifma->ifma_lladdr, M_IFMADDR);
2161 	FREE(ifma->ifma_addr, M_IFMADDR);
2162 	FREE(ifma, M_IFMADDR);
2163 }
2164 
2165 /*
2166  * Register an additional multicast address with a network interface.
2167  *
2168  * - If the address is already present, bump the reference count on the
2169  *   address and return.
2170  * - If the address is not link-layer, look up a link layer address.
2171  * - Allocate address structures for one or both addresses, and attach to the
2172  *   multicast address list on the interface.  If automatically adding a link
2173  *   layer address, the protocol address will own a reference to the link
2174  *   layer address, to be freed when it is freed.
2175  * - Notify the network device driver of an addition to the multicast address
2176  *   list.
2177  *
2178  * 'sa' points to caller-owned memory with the desired multicast address.
2179  *
2180  * 'retifma' will be used to return a pointer to the resulting multicast
2181  * address reference, if desired.
2182  */
2183 int
2184 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
2185     struct ifmultiaddr **retifma)
2186 {
2187 	struct ifmultiaddr *ifma, *ll_ifma;
2188 	struct sockaddr *llsa;
2189 	int error;
2190 
2191 	/*
2192 	 * If the address is already present, return a new reference to it;
2193 	 * otherwise, allocate storage and set up a new address.
2194 	 */
2195 	IF_ADDR_LOCK(ifp);
2196 	ifma = if_findmulti(ifp, sa);
2197 	if (ifma != NULL) {
2198 		ifma->ifma_refcount++;
2199 		if (retifma != NULL)
2200 			*retifma = ifma;
2201 		IF_ADDR_UNLOCK(ifp);
2202 		return (0);
2203 	}
2204 
2205 	/*
2206 	 * The address isn't already present; resolve the protocol address
2207 	 * into a link layer address, and then look that up, bump its
2208 	 * refcount or allocate an ifma for that also.  If 'llsa' was
2209 	 * returned, we will need to free it later.
2210 	 */
2211 	llsa = NULL;
2212 	ll_ifma = NULL;
2213 	if (ifp->if_resolvemulti != NULL) {
2214 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2215 		if (error)
2216 			goto unlock_out;
2217 	}
2218 
2219 	/*
2220 	 * Allocate the new address.  Don't hook it up yet, as we may also
2221 	 * need to allocate a link layer multicast address.
2222 	 */
2223 	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
2224 	if (ifma == NULL) {
2225 		error = ENOMEM;
2226 		goto free_llsa_out;
2227 	}
2228 
2229 	/*
2230 	 * If a link layer address is found, we'll need to see if it's
2231 	 * already present in the address list, or allocate is as well.
2232 	 * When this block finishes, the link layer address will be on the
2233 	 * list.
2234 	 */
2235 	if (llsa != NULL) {
2236 		ll_ifma = if_findmulti(ifp, llsa);
2237 		if (ll_ifma == NULL) {
2238 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
2239 			if (ll_ifma == NULL) {
2240 				if_freemulti(ifma);
2241 				error = ENOMEM;
2242 				goto free_llsa_out;
2243 			}
2244 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
2245 			    ifma_link);
2246 		} else
2247 			ll_ifma->ifma_refcount++;
2248 	}
2249 
2250 	/*
2251 	 * We now have a new multicast address, ifma, and possibly a new or
2252 	 * referenced link layer address.  Add the primary address to the
2253 	 * ifnet address list.
2254 	 */
2255 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2256 
2257 	if (retifma != NULL)
2258 		*retifma = ifma;
2259 
2260 	/*
2261 	 * Must generate the message while holding the lock so that 'ifma'
2262 	 * pointer is still valid.
2263 	 *
2264 	 * XXXRW: How come we don't announce ll_ifma?
2265 	 */
2266 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2267 	IF_ADDR_UNLOCK(ifp);
2268 
2269 	/*
2270 	 * We are certain we have added something, so call down to the
2271 	 * interface to let them know about it.
2272 	 */
2273 	if (ifp->if_ioctl != NULL) {
2274 		IFF_LOCKGIANT(ifp);
2275 		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
2276 		IFF_UNLOCKGIANT(ifp);
2277 	}
2278 
2279 	if (llsa != NULL)
2280 		FREE(llsa, M_IFMADDR);
2281 
2282 	return (0);
2283 
2284 free_llsa_out:
2285 	if (llsa != NULL)
2286 		FREE(llsa, M_IFMADDR);
2287 
2288 unlock_out:
2289 	IF_ADDR_UNLOCK(ifp);
2290 	return (error);
2291 }
2292 
2293 /*
2294  * Remove a reference to a multicast address on this interface.  Yell
2295  * if the request does not match an existing membership.
2296  */
2297 int
2298 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2299 {
2300 	struct ifmultiaddr *ifma, *ll_ifma;
2301 
2302 	IF_ADDR_LOCK(ifp);
2303 	ifma = if_findmulti(ifp, sa);
2304 	if (ifma == NULL) {
2305 		IF_ADDR_UNLOCK(ifp);
2306 		return ENOENT;
2307 	}
2308 
2309 	if (ifma->ifma_refcount > 1) {
2310 		ifma->ifma_refcount--;
2311 		IF_ADDR_UNLOCK(ifp);
2312 		return 0;
2313 	}
2314 
2315 	sa = ifma->ifma_lladdr;
2316 	if (sa != NULL)
2317 		ll_ifma = if_findmulti(ifp, sa);
2318 	else
2319 		ll_ifma = NULL;
2320 
2321 	/*
2322 	 * XXXRW: How come we don't announce ll_ifma?
2323 	 */
2324 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
2325 
2326 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2327 	if_freemulti(ifma);
2328 
2329 	if (ll_ifma != NULL) {
2330 		if (ll_ifma->ifma_refcount == 1) {
2331 			TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link);
2332 			if_freemulti(ll_ifma);
2333 		} else
2334 			ll_ifma->ifma_refcount--;
2335 	}
2336 	IF_ADDR_UNLOCK(ifp);
2337 
2338 	/*
2339 	 * Make sure the interface driver is notified
2340 	 * in the case of a link layer mcast group being left.
2341 	 */
2342 	if (ifp->if_ioctl) {
2343 		IFF_LOCKGIANT(ifp);
2344 		(void) (*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
2345 		IFF_UNLOCKGIANT(ifp);
2346 	}
2347 
2348 	return 0;
2349 }
2350 
2351 /*
2352  * Set the link layer address on an interface.
2353  *
2354  * At this time we only support certain types of interfaces,
2355  * and we don't allow the length of the address to change.
2356  */
2357 int
2358 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2359 {
2360 	struct sockaddr_dl *sdl;
2361 	struct ifaddr *ifa;
2362 	struct ifreq ifr;
2363 
2364 	ifa = ifp->if_addr;
2365 	if (ifa == NULL)
2366 		return (EINVAL);
2367 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2368 	if (sdl == NULL)
2369 		return (EINVAL);
2370 	if (len != sdl->sdl_alen)	/* don't allow length to change */
2371 		return (EINVAL);
2372 	switch (ifp->if_type) {
2373 	case IFT_ETHER:
2374 	case IFT_FDDI:
2375 	case IFT_XETHER:
2376 	case IFT_ISO88025:
2377 	case IFT_L2VLAN:
2378 	case IFT_BRIDGE:
2379 	case IFT_ARCNET:
2380 		bcopy(lladdr, LLADDR(sdl), len);
2381 		break;
2382 	default:
2383 		return (ENODEV);
2384 	}
2385 	/*
2386 	 * If the interface is already up, we need
2387 	 * to re-init it in order to reprogram its
2388 	 * address filter.
2389 	 */
2390 	if ((ifp->if_flags & IFF_UP) != 0) {
2391 		if (ifp->if_ioctl) {
2392 			IFF_LOCKGIANT(ifp);
2393 			ifp->if_flags &= ~IFF_UP;
2394 			ifr.ifr_flags = ifp->if_flags & 0xffff;
2395 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2396 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2397 			ifp->if_flags |= IFF_UP;
2398 			ifr.ifr_flags = ifp->if_flags & 0xffff;
2399 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2400 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2401 			IFF_UNLOCKGIANT(ifp);
2402 		}
2403 #ifdef INET
2404 		/*
2405 		 * Also send gratuitous ARPs to notify other nodes about
2406 		 * the address change.
2407 		 */
2408 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2409 			if (ifa->ifa_addr->sa_family == AF_INET)
2410 				arp_ifinit(ifp, ifa);
2411 		}
2412 #endif
2413 	}
2414 	return (0);
2415 }
2416 
2417 /*
2418  * The name argument must be a pointer to storage which will last as
2419  * long as the interface does.  For physical devices, the result of
2420  * device_get_name(dev) is a good choice and for pseudo-devices a
2421  * static string works well.
2422  */
2423 void
2424 if_initname(struct ifnet *ifp, const char *name, int unit)
2425 {
2426 	ifp->if_dname = name;
2427 	ifp->if_dunit = unit;
2428 	if (unit != IF_DUNIT_NONE)
2429 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2430 	else
2431 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2432 }
2433 
2434 int
2435 if_printf(struct ifnet *ifp, const char * fmt, ...)
2436 {
2437 	va_list ap;
2438 	int retval;
2439 
2440 	retval = printf("%s: ", ifp->if_xname);
2441 	va_start(ap, fmt);
2442 	retval += vprintf(fmt, ap);
2443 	va_end(ap);
2444 	return (retval);
2445 }
2446 
2447 /*
2448  * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
2449  * be called without Giant.  However, we often can't acquire the Giant lock
2450  * at those points; instead, we run it via a task queue that holds Giant via
2451  * if_start_deferred.
2452  *
2453  * XXXRW: We need to make sure that the ifnet isn't fully detached until any
2454  * outstanding if_start_deferred() tasks that will run after the free.  This
2455  * probably means waiting in if_detach().
2456  */
2457 void
2458 if_start(struct ifnet *ifp)
2459 {
2460 
2461 	NET_ASSERT_GIANT();
2462 
2463 	if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
2464 		if (mtx_owned(&Giant))
2465 			(*(ifp)->if_start)(ifp);
2466 		else
2467 			taskqueue_enqueue(taskqueue_swi_giant,
2468 			    &ifp->if_starttask);
2469 	} else
2470 		(*(ifp)->if_start)(ifp);
2471 }
2472 
2473 static void
2474 if_start_deferred(void *context, int pending)
2475 {
2476 	struct ifnet *ifp;
2477 
2478 	/*
2479 	 * This code must be entered with Giant, and should never run if
2480 	 * we're not running with debug.mpsafenet.
2481 	 */
2482 	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
2483 	GIANT_REQUIRED;
2484 
2485 	ifp = context;
2486 	(ifp->if_start)(ifp);
2487 }
2488 
2489 int
2490 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
2491 {
2492 	int active = 0;
2493 
2494 	IF_LOCK(ifq);
2495 	if (_IF_QFULL(ifq)) {
2496 		_IF_DROP(ifq);
2497 		IF_UNLOCK(ifq);
2498 		m_freem(m);
2499 		return (0);
2500 	}
2501 	if (ifp != NULL) {
2502 		ifp->if_obytes += m->m_pkthdr.len + adjust;
2503 		if (m->m_flags & (M_BCAST|M_MCAST))
2504 			ifp->if_omcasts++;
2505 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
2506 	}
2507 	_IF_ENQUEUE(ifq, m);
2508 	IF_UNLOCK(ifq);
2509 	if (ifp != NULL && !active)
2510 		if_start(ifp);
2511 	return (1);
2512 }
2513 
2514 void
2515 if_register_com_alloc(u_char type,
2516     if_com_alloc_t *a, if_com_free_t *f)
2517 {
2518 
2519 	KASSERT(if_com_alloc[type] == NULL,
2520 	    ("if_register_com_alloc: %d already registered", type));
2521 	KASSERT(if_com_free[type] == NULL,
2522 	    ("if_register_com_alloc: %d free already registered", type));
2523 
2524 	if_com_alloc[type] = a;
2525 	if_com_free[type] = f;
2526 }
2527 
2528 void
2529 if_deregister_com_alloc(u_char type)
2530 {
2531 
2532 	KASSERT(if_com_alloc[type] != NULL,
2533 	    ("if_deregister_com_alloc: %d not registered", type));
2534 	KASSERT(if_com_free[type] != NULL,
2535 	    ("if_deregister_com_alloc: %d free not registered", type));
2536 	if_com_alloc[type] = NULL;
2537 	if_com_free[type] = NULL;
2538 }
2539