xref: /freebsd/sys/net/if.c (revision 74bf4e164ba5851606a27d4feff27717452583e5)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)if.c	8.5 (Berkeley) 1/9/95
30  * $FreeBSD$
31  */
32 
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36 #include "opt_mac.h"
37 
38 #include <sys/param.h>
39 #include <sys/conf.h>
40 #include <sys/mac.h>
41 #include <sys/malloc.h>
42 #include <sys/bus.h>
43 #include <sys/mbuf.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/protosw.h>
49 #include <sys/kernel.h>
50 #include <sys/sockio.h>
51 #include <sys/syslog.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54 #include <sys/domain.h>
55 #include <sys/jail.h>
56 #include <machine/stdarg.h>
57 
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_clone.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/if_var.h>
64 #include <net/radix.h>
65 #include <net/route.h>
66 
67 #if defined(INET) || defined(INET6)
68 /*XXX*/
69 #include <netinet/in.h>
70 #include <netinet/in_var.h>
71 #ifdef INET6
72 #include <netinet6/in6_var.h>
73 #include <netinet6/in6_ifattach.h>
74 #endif
75 #endif
76 #ifdef INET
77 #include <netinet/if_ether.h>
78 #endif
79 
80 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
81 
82 static void	if_attachdomain(void *);
83 static void	if_attachdomain1(struct ifnet *);
84 static int	ifconf(u_long, caddr_t);
85 static void	if_grow(void);
86 static void	if_init(void *);
87 static void	if_check(void *);
88 static int	if_findindex(struct ifnet *);
89 static void	if_qflush(struct ifaltq *);
90 static void	if_route(struct ifnet *, int flag, int fam);
91 static void	if_slowtimo(void *);
92 static void	if_unroute(struct ifnet *, int flag, int fam);
93 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
94 static int	if_rtdel(struct radix_node *, void *);
95 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
96 static void	if_start_deferred(void *context, int pending);
97 #ifdef INET6
98 /*
99  * XXX: declare here to avoid to include many inet6 related files..
100  * should be more generalized?
101  */
102 extern void	nd6_setmtu(struct ifnet *);
103 #endif
104 
105 int	if_index = 0;
106 struct	ifindex_entry *ifindex_table = NULL;
107 int	ifqmaxlen = IFQ_MAXLEN;
108 struct	ifnethead ifnet;	/* depend on static init XXX */
109 struct	mtx ifnet_lock;
110 
111 static int	if_indexlim = 8;
112 static struct	knlist ifklist;
113 
114 static void	filt_netdetach(struct knote *kn);
115 static int	filt_netdev(struct knote *kn, long hint);
116 
117 static struct filterops netdev_filtops =
118     { 1, NULL, filt_netdetach, filt_netdev };
119 
120 /*
121  * System initialization
122  */
123 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
124 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
125 
126 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
127 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
128 
129 static d_open_t		netopen;
130 static d_close_t	netclose;
131 static d_ioctl_t	netioctl;
132 static d_kqfilter_t	netkqfilter;
133 
134 static struct cdevsw net_cdevsw = {
135 	.d_version =	D_VERSION,
136 	.d_flags =	D_NEEDGIANT,
137 	.d_open =	netopen,
138 	.d_close =	netclose,
139 	.d_ioctl =	netioctl,
140 	.d_name =	"net",
141 	.d_kqfilter =	netkqfilter,
142 };
143 
144 static int
145 netopen(struct cdev *dev, int flag, int mode, struct thread *td)
146 {
147 	return (0);
148 }
149 
150 static int
151 netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
152 {
153 	return (0);
154 }
155 
156 static int
157 netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
158 {
159 	struct ifnet *ifp;
160 	int error, idx;
161 
162 	/* only support interface specific ioctls */
163 	if (IOCGROUP(cmd) != 'i')
164 		return (EOPNOTSUPP);
165 	idx = minor(dev);
166 	if (idx == 0) {
167 		/*
168 		 * special network device, not interface.
169 		 */
170 		if (cmd == SIOCGIFCONF)
171 			return (ifconf(cmd, data));	/* XXX remove cmd */
172 		return (EOPNOTSUPP);
173 	}
174 
175 	ifp = ifnet_byindex(idx);
176 	if (ifp == NULL)
177 		return (ENXIO);
178 
179 	error = ifhwioctl(cmd, ifp, data, td);
180 	if (error == ENOIOCTL)
181 		error = EOPNOTSUPP;
182 	return (error);
183 }
184 
185 static int
186 netkqfilter(struct cdev *dev, struct knote *kn)
187 {
188 	struct knlist *klist;
189 	struct ifnet *ifp;
190 	int idx;
191 
192 	switch (kn->kn_filter) {
193 	case EVFILT_NETDEV:
194 		kn->kn_fop = &netdev_filtops;
195 		break;
196 	default:
197 		return (1);
198 	}
199 
200 	idx = minor(dev);
201 	if (idx == 0) {
202 		klist = &ifklist;
203 	} else {
204 		ifp = ifnet_byindex(idx);
205 		if (ifp == NULL)
206 			return (1);
207 		klist = &ifp->if_klist;
208 	}
209 
210 	kn->kn_hook = (caddr_t)klist;
211 
212 	knlist_add(klist, kn, 0);
213 
214 	return (0);
215 }
216 
217 static void
218 filt_netdetach(struct knote *kn)
219 {
220 	struct knlist *klist = (struct knlist *)kn->kn_hook;
221 
222 	if (kn->kn_status & KN_DETACHED)
223 		return;
224 
225 	knlist_remove(klist, kn, 0);
226 }
227 
228 static int
229 filt_netdev(struct knote *kn, long hint)
230 {
231 	struct knlist *klist = (struct knlist *)kn->kn_hook;
232 
233 	/*
234 	 * Currently NOTE_EXIT is abused to indicate device detach.
235 	 */
236 	if (hint == NOTE_EXIT) {
237 		kn->kn_data = NOTE_LINKINV;
238 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
239 		knlist_remove_inevent(klist, kn);
240 		return (1);
241 	}
242 	if (hint != 0)
243 		kn->kn_data = hint;			/* current status */
244 	if (kn->kn_sfflags & hint)
245 		kn->kn_fflags |= hint;
246 	return (kn->kn_fflags != 0);
247 }
248 
249 /*
250  * Network interface utility routines.
251  *
252  * Routines with ifa_ifwith* names take sockaddr *'s as
253  * parameters.
254  */
255 /* ARGSUSED*/
256 static void
257 if_init(void *dummy __unused)
258 {
259 
260 	IFNET_LOCK_INIT();
261 	TAILQ_INIT(&ifnet);
262 	knlist_init(&ifklist, NULL);
263 	if_grow();				/* create initial table */
264 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
265 	    UID_ROOT, GID_WHEEL, 0600, "network");
266 	if_clone_init();
267 }
268 
269 static void
270 if_grow(void)
271 {
272 	u_int n;
273 	struct ifindex_entry *e;
274 
275 	if_indexlim <<= 1;
276 	n = if_indexlim * sizeof(*e);
277 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
278 	if (ifindex_table != NULL) {
279 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
280 		free((caddr_t)ifindex_table, M_IFADDR);
281 	}
282 	ifindex_table = e;
283 }
284 
285 /* ARGSUSED*/
286 static void
287 if_check(void *dummy __unused)
288 {
289 	struct ifnet *ifp;
290 	int s;
291 
292 	s = splimp();
293 	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
294 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
295 		if (ifp->if_snd.ifq_maxlen == 0) {
296 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
297 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
298 		}
299 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
300 			if_printf(ifp,
301 			    "XXX: driver didn't initialize queue mtx\n");
302 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
303 			    MTX_NETWORK_LOCK, MTX_DEF);
304 		}
305 	}
306 	IFNET_RUNLOCK();
307 	splx(s);
308 	if_slowtimo(0);
309 }
310 
311 static int
312 if_findindex(struct ifnet *ifp)
313 {
314 	int i, unit;
315 	char eaddr[18], devname[32];
316 	const char *name, *p;
317 
318 	switch (ifp->if_type) {
319 	case IFT_ETHER:			/* these types use struct arpcom */
320 	case IFT_FDDI:
321 	case IFT_XETHER:
322 	case IFT_ISO88025:
323 	case IFT_L2VLAN:
324 		snprintf(eaddr, 18, "%6D", IFP2AC(ifp)->ac_enaddr, ":");
325 		break;
326 	default:
327 		eaddr[0] = '\0';
328 		break;
329 	}
330 	strlcpy(devname, ifp->if_xname, sizeof(devname));
331 	name = net_cdevsw.d_name;
332 	i = 0;
333 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
334 		if (resource_string_value(name, unit, "ether", &p) == 0)
335 			if (strcmp(p, eaddr) == 0)
336 				goto found;
337 		if (resource_string_value(name, unit, "dev", &p) == 0)
338 			if (strcmp(p, devname) == 0)
339 				goto found;
340 	}
341 	unit = 0;
342 found:
343 	if (unit != 0) {
344 		if (ifaddr_byindex(unit) == NULL)
345 			return (unit);
346 		printf("%s%d in use, cannot hardwire it to %s.\n",
347 		    name, unit, devname);
348 	}
349 	for (unit = 1; ; unit++) {
350 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
351 			continue;
352 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
353 		    resource_string_value(name, unit, "dev", &p) == 0)
354 			continue;
355 		break;
356 	}
357 	return (unit);
358 }
359 
360 /*
361  * Attach an interface to the
362  * list of "active" interfaces.
363  */
364 void
365 if_attach(struct ifnet *ifp)
366 {
367 	unsigned socksize, ifasize;
368 	int namelen, masklen;
369 	struct sockaddr_dl *sdl;
370 	struct ifaddr *ifa;
371 
372 	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
373 	IF_AFDATA_LOCK_INIT(ifp);
374 	ifp->if_afdata_initialized = 0;
375 	IFNET_WLOCK();
376 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
377 	IFNET_WUNLOCK();
378 	/*
379 	 * XXX -
380 	 * The old code would work if the interface passed a pre-existing
381 	 * chain of ifaddrs to this code.  We don't trust our callers to
382 	 * properly initialize the tailq, however, so we no longer allow
383 	 * this unlikely case.
384 	 */
385 	TAILQ_INIT(&ifp->if_addrhead);
386 	TAILQ_INIT(&ifp->if_prefixhead);
387 	TAILQ_INIT(&ifp->if_multiaddrs);
388 	knlist_init(&ifp->if_klist, NULL);
389 	getmicrotime(&ifp->if_lastchange);
390 	getmicrotime(&ifp->if_data.ifi_epoch);
391 
392 #ifdef MAC
393 	mac_init_ifnet(ifp);
394 	mac_create_ifnet(ifp);
395 #endif
396 
397 	ifp->if_index = if_findindex(ifp);
398 	if (ifp->if_index > if_index)
399 		if_index = ifp->if_index;
400 	if (if_index >= if_indexlim)
401 		if_grow();
402 
403 	ifnet_byindex(ifp->if_index) = ifp;
404 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
405 	    unit2minor(ifp->if_index),
406 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
407 	    net_cdevsw.d_name, ifp->if_xname);
408 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
409 	    net_cdevsw.d_name, ifp->if_index);
410 
411 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
412 
413 	/*
414 	 * create a Link Level name for this device
415 	 */
416 	namelen = strlen(ifp->if_xname);
417 	/*
418 	 * Always save enough space for any possiable name so we can do
419 	 * a rename in place later.
420 	 */
421 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
422 	socksize = masklen + ifp->if_addrlen;
423 	if (socksize < sizeof(*sdl))
424 		socksize = sizeof(*sdl);
425 	socksize = roundup2(socksize, sizeof(long));
426 	ifasize = sizeof(*ifa) + 2 * socksize;
427 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
428 	IFA_LOCK_INIT(ifa);
429 	sdl = (struct sockaddr_dl *)(ifa + 1);
430 	sdl->sdl_len = socksize;
431 	sdl->sdl_family = AF_LINK;
432 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
433 	sdl->sdl_nlen = namelen;
434 	sdl->sdl_index = ifp->if_index;
435 	sdl->sdl_type = ifp->if_type;
436 	ifaddr_byindex(ifp->if_index) = ifa;
437 	ifa->ifa_ifp = ifp;
438 	ifa->ifa_rtrequest = link_rtrequest;
439 	ifa->ifa_addr = (struct sockaddr *)sdl;
440 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
441 	ifa->ifa_netmask = (struct sockaddr *)sdl;
442 	sdl->sdl_len = masklen;
443 	while (namelen != 0)
444 		sdl->sdl_data[--namelen] = 0xff;
445 	ifa->ifa_refcnt = 1;
446 	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
447 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
448 	ifp->if_snd.altq_type = 0;
449 	ifp->if_snd.altq_disc = NULL;
450 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
451 	ifp->if_snd.altq_tbr  = NULL;
452 	ifp->if_snd.altq_ifp  = ifp;
453 
454 	if (domains)
455 		if_attachdomain1(ifp);
456 
457 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
458 
459 	/* Announce the interface. */
460 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
461 }
462 
463 static void
464 if_attachdomain(void *dummy)
465 {
466 	struct ifnet *ifp;
467 	int s;
468 
469 	s = splnet();
470 	TAILQ_FOREACH(ifp, &ifnet, if_link)
471 		if_attachdomain1(ifp);
472 	splx(s);
473 }
474 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
475     if_attachdomain, NULL);
476 
477 static void
478 if_attachdomain1(struct ifnet *ifp)
479 {
480 	struct domain *dp;
481 	int s;
482 
483 	s = splnet();
484 
485 	/*
486 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
487 	 * cannot lock ifp->if_afdata initialization, entirely.
488 	 */
489 	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
490 		splx(s);
491 		return;
492 	}
493 	if (ifp->if_afdata_initialized) {
494 		IF_AFDATA_UNLOCK(ifp);
495 		splx(s);
496 		return;
497 	}
498 	ifp->if_afdata_initialized = 1;
499 	IF_AFDATA_UNLOCK(ifp);
500 
501 	/* address family dependent data region */
502 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
503 	for (dp = domains; dp; dp = dp->dom_next) {
504 		if (dp->dom_ifattach)
505 			ifp->if_afdata[dp->dom_family] =
506 			    (*dp->dom_ifattach)(ifp);
507 	}
508 
509 	splx(s);
510 }
511 
512 /*
513  * Detach an interface, removing it from the
514  * list of "active" interfaces.
515  */
516 void
517 if_detach(struct ifnet *ifp)
518 {
519 	struct ifaddr *ifa, *next;
520 	struct radix_node_head	*rnh;
521 	int s;
522 	int i;
523 	struct domain *dp;
524  	struct ifnet *iter;
525  	int found;
526 
527 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
528 	/*
529 	 * Remove routes and flush queues.
530 	 */
531 	s = splnet();
532 	if_down(ifp);
533 #ifdef ALTQ
534 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
535 		altq_disable(&ifp->if_snd);
536 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
537 		altq_detach(&ifp->if_snd);
538 #endif
539 
540 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = next) {
541 		next = TAILQ_NEXT(ifa, ifa_link);
542 
543 		if (ifa->ifa_addr->sa_family == AF_LINK)
544 			continue;
545 #ifdef INET
546 		/* XXX: Ugly!! ad hoc just for INET */
547 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
548 			struct ifaliasreq ifr;
549 
550 			bzero(&ifr, sizeof(ifr));
551 			ifr.ifra_addr = *ifa->ifa_addr;
552 			if (ifa->ifa_dstaddr)
553 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
554 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
555 			    NULL) == 0)
556 				continue;
557 		}
558 #endif /* INET */
559 #ifdef INET6
560 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
561 			in6_purgeaddr(ifa);
562 			/* ifp_addrhead is already updated */
563 			continue;
564 		}
565 #endif /* INET6 */
566 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
567 		IFAFREE(ifa);
568 	}
569 
570 #ifdef INET6
571 	/*
572 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
573 	 * before removing routing entries below, since IPv6 interface direct
574 	 * routes are expected to be removed by the IPv6-specific kernel API.
575 	 * Otherwise, the kernel will detect some inconsistency and bark it.
576 	 */
577 	in6_ifdetach(ifp);
578 #endif
579 	/*
580 	 * Remove address from ifindex_table[] and maybe decrement if_index.
581 	 * Clean up all addresses.
582 	 */
583 	ifnet_byindex(ifp->if_index) = NULL;
584 	ifaddr_byindex(ifp->if_index) = NULL;
585 	destroy_dev(ifdev_byindex(ifp->if_index));
586 	ifdev_byindex(ifp->if_index) = NULL;
587 
588 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
589 		if_index--;
590 
591 
592 	/* We can now free link ifaddr. */
593 	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
594 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
595 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
596 		IFAFREE(ifa);
597 	}
598 
599 	/*
600 	 * Delete all remaining routes using this interface
601 	 * Unfortuneatly the only way to do this is to slog through
602 	 * the entire routing table looking for routes which point
603 	 * to this interface...oh well...
604 	 */
605 	for (i = 1; i <= AF_MAX; i++) {
606 		if ((rnh = rt_tables[i]) == NULL)
607 			continue;
608 		RADIX_NODE_HEAD_LOCK(rnh);
609 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
610 		RADIX_NODE_HEAD_UNLOCK(rnh);
611 	}
612 
613 	/* Announce that the interface is gone. */
614 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
615 
616 	IF_AFDATA_LOCK(ifp);
617 	for (dp = domains; dp; dp = dp->dom_next) {
618 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
619 			(*dp->dom_ifdetach)(ifp,
620 			    ifp->if_afdata[dp->dom_family]);
621 	}
622 	IF_AFDATA_UNLOCK(ifp);
623 
624 #ifdef MAC
625 	mac_destroy_ifnet(ifp);
626 #endif /* MAC */
627 	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
628 	knlist_clear(&ifp->if_klist, 0);
629 	knlist_destroy(&ifp->if_klist);
630 	IFNET_WLOCK();
631  	found = 0;
632  	TAILQ_FOREACH(iter, &ifnet, if_link)
633  		if (iter == ifp) {
634  			found = 1;
635  			break;
636  		}
637  	if (found)
638  		TAILQ_REMOVE(&ifnet, ifp, if_link);
639 	IFNET_WUNLOCK();
640 	mtx_destroy(&ifp->if_snd.ifq_mtx);
641 	IF_AFDATA_DESTROY(ifp);
642 	splx(s);
643 }
644 
645 /*
646  * Delete Routes for a Network Interface
647  *
648  * Called for each routing entry via the rnh->rnh_walktree() call above
649  * to delete all route entries referencing a detaching network interface.
650  *
651  * Arguments:
652  *	rn	pointer to node in the routing table
653  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
654  *
655  * Returns:
656  *	0	successful
657  *	errno	failed - reason indicated
658  *
659  */
660 static int
661 if_rtdel(struct radix_node *rn, void *arg)
662 {
663 	struct rtentry	*rt = (struct rtentry *)rn;
664 	struct ifnet	*ifp = arg;
665 	int		err;
666 
667 	if (rt->rt_ifp == ifp) {
668 
669 		/*
670 		 * Protect (sorta) against walktree recursion problems
671 		 * with cloned routes
672 		 */
673 		if ((rt->rt_flags & RTF_UP) == 0)
674 			return (0);
675 
676 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
677 				rt_mask(rt), rt->rt_flags,
678 				(struct rtentry **) NULL);
679 		if (err) {
680 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
681 		}
682 	}
683 
684 	return (0);
685 }
686 
687 #define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
688 
689 /*
690  * Locate an interface based on a complete address.
691  */
692 /*ARGSUSED*/
693 struct ifaddr *
694 ifa_ifwithaddr(struct sockaddr *addr)
695 {
696 	struct ifnet *ifp;
697 	struct ifaddr *ifa;
698 
699 	IFNET_RLOCK();
700 	TAILQ_FOREACH(ifp, &ifnet, if_link)
701 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
702 			if (ifa->ifa_addr->sa_family != addr->sa_family)
703 				continue;
704 			if (equal(addr, ifa->ifa_addr))
705 				goto done;
706 			/* IP6 doesn't have broadcast */
707 			if ((ifp->if_flags & IFF_BROADCAST) &&
708 			    ifa->ifa_broadaddr &&
709 			    ifa->ifa_broadaddr->sa_len != 0 &&
710 			    equal(ifa->ifa_broadaddr, addr))
711 				goto done;
712 		}
713 	ifa = NULL;
714 done:
715 	IFNET_RUNLOCK();
716 	return (ifa);
717 }
718 
719 /*
720  * Locate the point to point interface with a given destination address.
721  */
722 /*ARGSUSED*/
723 struct ifaddr *
724 ifa_ifwithdstaddr(struct sockaddr *addr)
725 {
726 	struct ifnet *ifp;
727 	struct ifaddr *ifa;
728 
729 	IFNET_RLOCK();
730 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
731 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
732 			continue;
733 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
734 			if (ifa->ifa_addr->sa_family != addr->sa_family)
735 				continue;
736 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
737 				goto done;
738 		}
739 	}
740 	ifa = NULL;
741 done:
742 	IFNET_RUNLOCK();
743 	return (ifa);
744 }
745 
746 /*
747  * Find an interface on a specific network.  If many, choice
748  * is most specific found.
749  */
750 struct ifaddr *
751 ifa_ifwithnet(struct sockaddr *addr)
752 {
753 	struct ifnet *ifp;
754 	struct ifaddr *ifa;
755 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
756 	u_int af = addr->sa_family;
757 	char *addr_data = addr->sa_data, *cplim;
758 
759 	/*
760 	 * AF_LINK addresses can be looked up directly by their index number,
761 	 * so do that if we can.
762 	 */
763 	if (af == AF_LINK) {
764 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
765 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
766 		return (ifaddr_byindex(sdl->sdl_index));
767 	}
768 
769 	/*
770 	 * Scan though each interface, looking for ones that have
771 	 * addresses in this address family.
772 	 */
773 	IFNET_RLOCK();
774 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
775 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
776 			char *cp, *cp2, *cp3;
777 
778 			if (ifa->ifa_addr->sa_family != af)
779 next:				continue;
780 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
781 				/*
782 				 * This is a bit broken as it doesn't
783 				 * take into account that the remote end may
784 				 * be a single node in the network we are
785 				 * looking for.
786 				 * The trouble is that we don't know the
787 				 * netmask for the remote end.
788 				 */
789 				if (ifa->ifa_dstaddr != 0
790 				    && equal(addr, ifa->ifa_dstaddr))
791 					goto done;
792 			} else {
793 				/*
794 				 * if we have a special address handler,
795 				 * then use it instead of the generic one.
796 				 */
797 				if (ifa->ifa_claim_addr) {
798 					if ((*ifa->ifa_claim_addr)(ifa, addr))
799 						goto done;
800 					continue;
801 				}
802 
803 				/*
804 				 * Scan all the bits in the ifa's address.
805 				 * If a bit dissagrees with what we are
806 				 * looking for, mask it with the netmask
807 				 * to see if it really matters.
808 				 * (A byte at a time)
809 				 */
810 				if (ifa->ifa_netmask == 0)
811 					continue;
812 				cp = addr_data;
813 				cp2 = ifa->ifa_addr->sa_data;
814 				cp3 = ifa->ifa_netmask->sa_data;
815 				cplim = ifa->ifa_netmask->sa_len
816 					+ (char *)ifa->ifa_netmask;
817 				while (cp3 < cplim)
818 					if ((*cp++ ^ *cp2++) & *cp3++)
819 						goto next; /* next address! */
820 				/*
821 				 * If the netmask of what we just found
822 				 * is more specific than what we had before
823 				 * (if we had one) then remember the new one
824 				 * before continuing to search
825 				 * for an even better one.
826 				 */
827 				if (ifa_maybe == 0 ||
828 				    rn_refines((caddr_t)ifa->ifa_netmask,
829 				    (caddr_t)ifa_maybe->ifa_netmask))
830 					ifa_maybe = ifa;
831 			}
832 		}
833 	}
834 	ifa = ifa_maybe;
835 done:
836 	IFNET_RUNLOCK();
837 	return (ifa);
838 }
839 
840 /*
841  * Find an interface address specific to an interface best matching
842  * a given address.
843  */
844 struct ifaddr *
845 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
846 {
847 	struct ifaddr *ifa;
848 	char *cp, *cp2, *cp3;
849 	char *cplim;
850 	struct ifaddr *ifa_maybe = 0;
851 	u_int af = addr->sa_family;
852 
853 	if (af >= AF_MAX)
854 		return (0);
855 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
856 		if (ifa->ifa_addr->sa_family != af)
857 			continue;
858 		if (ifa_maybe == 0)
859 			ifa_maybe = ifa;
860 		if (ifa->ifa_netmask == 0) {
861 			if (equal(addr, ifa->ifa_addr) ||
862 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
863 				goto done;
864 			continue;
865 		}
866 		if (ifp->if_flags & IFF_POINTOPOINT) {
867 			if (equal(addr, ifa->ifa_dstaddr))
868 				goto done;
869 		} else {
870 			cp = addr->sa_data;
871 			cp2 = ifa->ifa_addr->sa_data;
872 			cp3 = ifa->ifa_netmask->sa_data;
873 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
874 			for (; cp3 < cplim; cp3++)
875 				if ((*cp++ ^ *cp2++) & *cp3)
876 					break;
877 			if (cp3 == cplim)
878 				goto done;
879 		}
880 	}
881 	ifa = ifa_maybe;
882 done:
883 	return (ifa);
884 }
885 
886 #include <net/route.h>
887 
888 /*
889  * Default action when installing a route with a Link Level gateway.
890  * Lookup an appropriate real ifa to point to.
891  * This should be moved to /sys/net/link.c eventually.
892  */
893 static void
894 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
895 {
896 	struct ifaddr *ifa, *oifa;
897 	struct sockaddr *dst;
898 	struct ifnet *ifp;
899 
900 	RT_LOCK_ASSERT(rt);
901 
902 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
903 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
904 		return;
905 	ifa = ifaof_ifpforaddr(dst, ifp);
906 	if (ifa) {
907 		IFAREF(ifa);		/* XXX */
908 		oifa = rt->rt_ifa;
909 		rt->rt_ifa = ifa;
910 		IFAFREE(oifa);
911 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
912 			ifa->ifa_rtrequest(cmd, rt, info);
913 	}
914 }
915 
916 /*
917  * Mark an interface down and notify protocols of
918  * the transition.
919  * NOTE: must be called at splnet or eqivalent.
920  */
921 static void
922 if_unroute(struct ifnet *ifp, int flag, int fam)
923 {
924 	struct ifaddr *ifa;
925 
926 	ifp->if_flags &= ~flag;
927 	getmicrotime(&ifp->if_lastchange);
928 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
929 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
930 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
931 	if_qflush(&ifp->if_snd);
932 	rt_ifmsg(ifp);
933 }
934 
935 /*
936  * Mark an interface up and notify protocols of
937  * the transition.
938  * NOTE: must be called at splnet or eqivalent.
939  */
940 static void
941 if_route(struct ifnet *ifp, int flag, int fam)
942 {
943 	struct ifaddr *ifa;
944 
945 	ifp->if_flags |= flag;
946 	getmicrotime(&ifp->if_lastchange);
947 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
948 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
949 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
950 	rt_ifmsg(ifp);
951 #ifdef INET6
952 	in6_if_up(ifp);
953 #endif
954 }
955 
956 /*
957  * Mark an interface down and notify protocols of
958  * the transition.
959  * NOTE: must be called at splnet or eqivalent.
960  */
961 void
962 if_down(struct ifnet *ifp)
963 {
964 
965 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
966 }
967 
968 /*
969  * Mark an interface up and notify protocols of
970  * the transition.
971  * NOTE: must be called at splnet or eqivalent.
972  */
973 void
974 if_up(struct ifnet *ifp)
975 {
976 
977 	if_route(ifp, IFF_UP, AF_UNSPEC);
978 }
979 
980 /*
981  * Flush an interface queue.
982  */
983 static void
984 if_qflush(struct ifaltq *ifq)
985 {
986 	struct mbuf *m, *n;
987 
988 #ifdef ALTQ
989 	if (ALTQ_IS_ENABLED(ifq))
990 		ALTQ_PURGE(ifq);
991 #endif
992 	n = ifq->ifq_head;
993 	while ((m = n) != 0) {
994 		n = m->m_act;
995 		m_freem(m);
996 	}
997 	ifq->ifq_head = 0;
998 	ifq->ifq_tail = 0;
999 	ifq->ifq_len = 0;
1000 }
1001 
1002 /*
1003  * Handle interface watchdog timer routines.  Called
1004  * from softclock, we decrement timers (if set) and
1005  * call the appropriate interface routine on expiration.
1006  *
1007  * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1008  * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1009  * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1010  */
1011 static void
1012 if_slowtimo(void *arg)
1013 {
1014 	struct ifnet *ifp;
1015 	int s = splimp();
1016 
1017 	IFNET_RLOCK();
1018 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1019 		if (ifp->if_timer == 0 || --ifp->if_timer)
1020 			continue;
1021 		if (ifp->if_watchdog)
1022 			(*ifp->if_watchdog)(ifp);
1023 	}
1024 	IFNET_RUNLOCK();
1025 	splx(s);
1026 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1027 }
1028 
1029 /*
1030  * Map interface name to
1031  * interface structure pointer.
1032  */
1033 struct ifnet *
1034 ifunit(const char *name)
1035 {
1036 	struct ifnet *ifp;
1037 
1038 	IFNET_RLOCK();
1039 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1040 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1041 			break;
1042 	}
1043 	IFNET_RUNLOCK();
1044 	return (ifp);
1045 }
1046 
1047 /*
1048  * Hardware specific interface ioctls.
1049  */
1050 static int
1051 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1052 {
1053 	struct ifreq *ifr;
1054 	struct ifstat *ifs;
1055 	int error = 0;
1056 	int new_flags;
1057 	size_t namelen, onamelen;
1058 	char new_name[IFNAMSIZ];
1059 	struct ifaddr *ifa;
1060 	struct sockaddr_dl *sdl;
1061 
1062 	ifr = (struct ifreq *)data;
1063 	switch (cmd) {
1064 	case SIOCGIFINDEX:
1065 		ifr->ifr_index = ifp->if_index;
1066 		break;
1067 
1068 	case SIOCGIFFLAGS:
1069 		ifr->ifr_flags = ifp->if_flags & 0xffff;
1070 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1071 		break;
1072 
1073 	case SIOCGIFCAP:
1074 		ifr->ifr_reqcap = ifp->if_capabilities;
1075 		ifr->ifr_curcap = ifp->if_capenable;
1076 		break;
1077 
1078 #ifdef MAC
1079 	case SIOCGIFMAC:
1080 		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1081 		break;
1082 #endif
1083 
1084 	case SIOCGIFMETRIC:
1085 		ifr->ifr_metric = ifp->if_metric;
1086 		break;
1087 
1088 	case SIOCGIFMTU:
1089 		ifr->ifr_mtu = ifp->if_mtu;
1090 		break;
1091 
1092 	case SIOCGIFPHYS:
1093 		ifr->ifr_phys = ifp->if_physical;
1094 		break;
1095 
1096 	case SIOCSIFFLAGS:
1097 		error = suser(td);
1098 		if (error)
1099 			return (error);
1100 		new_flags = (ifr->ifr_flags & 0xffff) |
1101 		    (ifr->ifr_flagshigh << 16);
1102 		if (ifp->if_flags & IFF_SMART) {
1103 			/* Smart drivers twiddle their own routes */
1104 		} else if (ifp->if_flags & IFF_UP &&
1105 		    (new_flags & IFF_UP) == 0) {
1106 			int s = splimp();
1107 			if_down(ifp);
1108 			splx(s);
1109 		} else if (new_flags & IFF_UP &&
1110 		    (ifp->if_flags & IFF_UP) == 0) {
1111 			int s = splimp();
1112 			if_up(ifp);
1113 			splx(s);
1114 		}
1115 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1116 			(new_flags &~ IFF_CANTCHANGE);
1117 		if (new_flags & IFF_PPROMISC) {
1118 			/* Permanently promiscuous mode requested */
1119 			ifp->if_flags |= IFF_PROMISC;
1120 		} else if (ifp->if_pcount == 0) {
1121 			ifp->if_flags &= ~IFF_PROMISC;
1122 		}
1123 		if (ifp->if_ioctl)
1124 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1125 		getmicrotime(&ifp->if_lastchange);
1126 		break;
1127 
1128 	case SIOCSIFCAP:
1129 		error = suser(td);
1130 		if (error)
1131 			return (error);
1132 		if (ifp->if_ioctl == NULL)
1133 			return (EOPNOTSUPP);
1134 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1135 			return (EINVAL);
1136 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1137 		if (error == 0)
1138 			getmicrotime(&ifp->if_lastchange);
1139 		break;
1140 
1141 #ifdef MAC
1142 	case SIOCSIFMAC:
1143 		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1144 		break;
1145 #endif
1146 
1147 	case SIOCSIFNAME:
1148 		error = suser(td);
1149 		if (error != 0)
1150 			return (error);
1151 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1152 		if (error != 0)
1153 			return (error);
1154 		if (new_name[0] == '\0')
1155 			return (EINVAL);
1156 		if (ifunit(new_name) != NULL)
1157 			return (EEXIST);
1158 
1159 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1160 		/* Announce the departure of the interface. */
1161 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1162 
1163 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1164 		ifa = ifaddr_byindex(ifp->if_index);
1165 		IFA_LOCK(ifa);
1166 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1167 		namelen = strlen(new_name);
1168 		onamelen = sdl->sdl_nlen;
1169 		/*
1170 		 * Move the address if needed.  This is safe because we
1171 		 * allocate space for a name of length IFNAMSIZ when we
1172 		 * create this in if_attach().
1173 		 */
1174 		if (namelen != onamelen) {
1175 			bcopy(sdl->sdl_data + onamelen,
1176 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1177 		}
1178 		bcopy(new_name, sdl->sdl_data, namelen);
1179 		sdl->sdl_nlen = namelen;
1180 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1181 		bzero(sdl->sdl_data, onamelen);
1182 		while (namelen != 0)
1183 			sdl->sdl_data[--namelen] = 0xff;
1184 		IFA_UNLOCK(ifa);
1185 
1186 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1187 		/* Announce the return of the interface. */
1188 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1189 		break;
1190 
1191 	case SIOCSIFMETRIC:
1192 		error = suser(td);
1193 		if (error)
1194 			return (error);
1195 		ifp->if_metric = ifr->ifr_metric;
1196 		getmicrotime(&ifp->if_lastchange);
1197 		break;
1198 
1199 	case SIOCSIFPHYS:
1200 		error = suser(td);
1201 		if (error)
1202 			return (error);
1203 		if (ifp->if_ioctl == NULL)
1204 			return (EOPNOTSUPP);
1205 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1206 		if (error == 0)
1207 			getmicrotime(&ifp->if_lastchange);
1208 		break;
1209 
1210 	case SIOCSIFMTU:
1211 	{
1212 		u_long oldmtu = ifp->if_mtu;
1213 
1214 		error = suser(td);
1215 		if (error)
1216 			return (error);
1217 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1218 			return (EINVAL);
1219 		if (ifp->if_ioctl == NULL)
1220 			return (EOPNOTSUPP);
1221 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1222 		if (error == 0) {
1223 			getmicrotime(&ifp->if_lastchange);
1224 			rt_ifmsg(ifp);
1225 		}
1226 		/*
1227 		 * If the link MTU changed, do network layer specific procedure.
1228 		 */
1229 		if (ifp->if_mtu != oldmtu) {
1230 #ifdef INET6
1231 			nd6_setmtu(ifp);
1232 #endif
1233 		}
1234 		break;
1235 	}
1236 
1237 	case SIOCADDMULTI:
1238 	case SIOCDELMULTI:
1239 		error = suser(td);
1240 		if (error)
1241 			return (error);
1242 
1243 		/* Don't allow group membership on non-multicast interfaces. */
1244 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1245 			return (EOPNOTSUPP);
1246 
1247 		/* Don't let users screw up protocols' entries. */
1248 		if (ifr->ifr_addr.sa_family != AF_LINK)
1249 			return (EINVAL);
1250 
1251 		if (cmd == SIOCADDMULTI) {
1252 			struct ifmultiaddr *ifma;
1253 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1254 		} else {
1255 			error = if_delmulti(ifp, &ifr->ifr_addr);
1256 		}
1257 		if (error == 0)
1258 			getmicrotime(&ifp->if_lastchange);
1259 		break;
1260 
1261 	case SIOCSIFPHYADDR:
1262 	case SIOCDIFPHYADDR:
1263 #ifdef INET6
1264 	case SIOCSIFPHYADDR_IN6:
1265 #endif
1266 	case SIOCSLIFPHYADDR:
1267 	case SIOCSIFMEDIA:
1268 	case SIOCSIFGENERIC:
1269 		error = suser(td);
1270 		if (error)
1271 			return (error);
1272 		if (ifp->if_ioctl == NULL)
1273 			return (EOPNOTSUPP);
1274 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1275 		if (error == 0)
1276 			getmicrotime(&ifp->if_lastchange);
1277 		break;
1278 
1279 	case SIOCGIFSTATUS:
1280 		ifs = (struct ifstat *)data;
1281 		ifs->ascii[0] = '\0';
1282 
1283 	case SIOCGIFPSRCADDR:
1284 	case SIOCGIFPDSTADDR:
1285 	case SIOCGLIFPHYADDR:
1286 	case SIOCGIFMEDIA:
1287 	case SIOCGIFGENERIC:
1288 		if (ifp->if_ioctl == NULL)
1289 			return (EOPNOTSUPP);
1290 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1291 		break;
1292 
1293 	case SIOCSIFLLADDR:
1294 		error = suser(td);
1295 		if (error)
1296 			return (error);
1297 		error = if_setlladdr(ifp,
1298 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1299 		break;
1300 
1301 	default:
1302 		error = ENOIOCTL;
1303 		break;
1304 	}
1305 	return (error);
1306 }
1307 
1308 /*
1309  * Interface ioctls.
1310  */
1311 int
1312 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1313 {
1314 	struct ifnet *ifp;
1315 	struct ifreq *ifr;
1316 	int error;
1317 	int oif_flags;
1318 
1319 	switch (cmd) {
1320 	case SIOCGIFCONF:
1321 	case OSIOCGIFCONF:
1322 		return (ifconf(cmd, data));
1323 	}
1324 	ifr = (struct ifreq *)data;
1325 
1326 	switch (cmd) {
1327 	case SIOCIFCREATE:
1328 	case SIOCIFDESTROY:
1329 		if ((error = suser(td)) != 0)
1330 			return (error);
1331 		return ((cmd == SIOCIFCREATE) ?
1332 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1333 			if_clone_destroy(ifr->ifr_name));
1334 
1335 	case SIOCIFGCLONERS:
1336 		return (if_clone_list((struct if_clonereq *)data));
1337 	}
1338 
1339 	ifp = ifunit(ifr->ifr_name);
1340 	if (ifp == 0)
1341 		return (ENXIO);
1342 
1343 	error = ifhwioctl(cmd, ifp, data, td);
1344 	if (error != ENOIOCTL)
1345 		return (error);
1346 
1347 	oif_flags = ifp->if_flags;
1348 	if (so->so_proto == 0)
1349 		return (EOPNOTSUPP);
1350 #ifndef COMPAT_43
1351 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1352 								 data,
1353 								 ifp, td));
1354 #else
1355 	{
1356 		int ocmd = cmd;
1357 
1358 		switch (cmd) {
1359 
1360 		case SIOCSIFDSTADDR:
1361 		case SIOCSIFADDR:
1362 		case SIOCSIFBRDADDR:
1363 		case SIOCSIFNETMASK:
1364 #if BYTE_ORDER != BIG_ENDIAN
1365 			if (ifr->ifr_addr.sa_family == 0 &&
1366 			    ifr->ifr_addr.sa_len < 16) {
1367 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1368 				ifr->ifr_addr.sa_len = 16;
1369 			}
1370 #else
1371 			if (ifr->ifr_addr.sa_len == 0)
1372 				ifr->ifr_addr.sa_len = 16;
1373 #endif
1374 			break;
1375 
1376 		case OSIOCGIFADDR:
1377 			cmd = SIOCGIFADDR;
1378 			break;
1379 
1380 		case OSIOCGIFDSTADDR:
1381 			cmd = SIOCGIFDSTADDR;
1382 			break;
1383 
1384 		case OSIOCGIFBRDADDR:
1385 			cmd = SIOCGIFBRDADDR;
1386 			break;
1387 
1388 		case OSIOCGIFNETMASK:
1389 			cmd = SIOCGIFNETMASK;
1390 		}
1391 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1392 								   cmd,
1393 								   data,
1394 								   ifp, td));
1395 		switch (ocmd) {
1396 
1397 		case OSIOCGIFADDR:
1398 		case OSIOCGIFDSTADDR:
1399 		case OSIOCGIFBRDADDR:
1400 		case OSIOCGIFNETMASK:
1401 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1402 
1403 		}
1404 	}
1405 #endif /* COMPAT_43 */
1406 
1407 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1408 #ifdef INET6
1409 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1410 		if (ifp->if_flags & IFF_UP) {
1411 			int s = splimp();
1412 			in6_if_up(ifp);
1413 			splx(s);
1414 		}
1415 #endif
1416 	}
1417 	return (error);
1418 }
1419 
1420 /*
1421  * Set/clear promiscuous mode on interface ifp based on the truth value
1422  * of pswitch.  The calls are reference counted so that only the first
1423  * "on" request actually has an effect, as does the final "off" request.
1424  * Results are undefined if the "off" and "on" requests are not matched.
1425  */
1426 int
1427 ifpromisc(struct ifnet *ifp, int pswitch)
1428 {
1429 	struct ifreq ifr;
1430 	int error;
1431 	int oldflags, oldpcount;
1432 
1433 	oldpcount = ifp->if_pcount;
1434 	oldflags = ifp->if_flags;
1435 	if (ifp->if_flags & IFF_PPROMISC) {
1436 		/* Do nothing if device is in permanently promiscuous mode */
1437 		ifp->if_pcount += pswitch ? 1 : -1;
1438 		return (0);
1439 	}
1440 	if (pswitch) {
1441 		/*
1442 		 * If the device is not configured up, we cannot put it in
1443 		 * promiscuous mode.
1444 		 */
1445 		if ((ifp->if_flags & IFF_UP) == 0)
1446 			return (ENETDOWN);
1447 		if (ifp->if_pcount++ != 0)
1448 			return (0);
1449 		ifp->if_flags |= IFF_PROMISC;
1450 	} else {
1451 		if (--ifp->if_pcount > 0)
1452 			return (0);
1453 		ifp->if_flags &= ~IFF_PROMISC;
1454 	}
1455 	ifr.ifr_flags = ifp->if_flags & 0xffff;
1456 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1457 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1458 	if (error == 0) {
1459 		log(LOG_INFO, "%s: promiscuous mode %s\n",
1460 		    ifp->if_xname,
1461 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1462 		rt_ifmsg(ifp);
1463 	} else {
1464 		ifp->if_pcount = oldpcount;
1465 		ifp->if_flags = oldflags;
1466 	}
1467 	return error;
1468 }
1469 
1470 /*
1471  * Return interface configuration
1472  * of system.  List may be used
1473  * in later ioctl's (above) to get
1474  * other information.
1475  */
1476 /*ARGSUSED*/
1477 static int
1478 ifconf(u_long cmd, caddr_t data)
1479 {
1480 	struct ifconf *ifc = (struct ifconf *)data;
1481 	struct ifnet *ifp;
1482 	struct ifaddr *ifa;
1483 	struct ifreq ifr, *ifrp;
1484 	int space = ifc->ifc_len, error = 0;
1485 
1486 	ifrp = ifc->ifc_req;
1487 	IFNET_RLOCK();		/* could sleep XXX */
1488 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1489 		int addrs;
1490 
1491 		if (space < sizeof(ifr))
1492 			break;
1493 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1494 		    >= sizeof(ifr.ifr_name)) {
1495 			error = ENAMETOOLONG;
1496 			break;
1497 		}
1498 
1499 		addrs = 0;
1500 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1501 			struct sockaddr *sa = ifa->ifa_addr;
1502 
1503 			if (space < sizeof(ifr))
1504 				break;
1505 			if (jailed(curthread->td_ucred) &&
1506 			    prison_if(curthread->td_ucred, sa))
1507 				continue;
1508 			addrs++;
1509 #ifdef COMPAT_43
1510 			if (cmd == OSIOCGIFCONF) {
1511 				struct osockaddr *osa =
1512 					 (struct osockaddr *)&ifr.ifr_addr;
1513 				ifr.ifr_addr = *sa;
1514 				osa->sa_family = sa->sa_family;
1515 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1516 						sizeof (ifr));
1517 				ifrp++;
1518 			} else
1519 #endif
1520 			if (sa->sa_len <= sizeof(*sa)) {
1521 				ifr.ifr_addr = *sa;
1522 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1523 						sizeof (ifr));
1524 				ifrp++;
1525 			} else {
1526 				if (space < sizeof (ifr) + sa->sa_len -
1527 					    sizeof(*sa))
1528 					break;
1529 				space -= sa->sa_len - sizeof(*sa);
1530 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1531 						sizeof (ifr.ifr_name));
1532 				if (error == 0)
1533 				    error = copyout((caddr_t)sa,
1534 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1535 				ifrp = (struct ifreq *)
1536 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1537 			}
1538 			if (error)
1539 				break;
1540 			space -= sizeof (ifr);
1541 		}
1542 		if (error)
1543 			break;
1544 		if (!addrs) {
1545 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1546 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1547 			    sizeof (ifr));
1548 			if (error)
1549 				break;
1550 			space -= sizeof (ifr);
1551 			ifrp++;
1552 		}
1553 	}
1554 	IFNET_RUNLOCK();
1555 	ifc->ifc_len -= space;
1556 	return (error);
1557 }
1558 
1559 /*
1560  * Just like if_promisc(), but for all-multicast-reception mode.
1561  */
1562 int
1563 if_allmulti(struct ifnet *ifp, int onswitch)
1564 {
1565 	int error = 0;
1566 	int s = splimp();
1567 	struct ifreq ifr;
1568 
1569 	if (onswitch) {
1570 		if (ifp->if_amcount++ == 0) {
1571 			ifp->if_flags |= IFF_ALLMULTI;
1572 			ifr.ifr_flags = ifp->if_flags & 0xffff;
1573 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1574 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1575 		}
1576 	} else {
1577 		if (ifp->if_amcount > 1) {
1578 			ifp->if_amcount--;
1579 		} else {
1580 			ifp->if_amcount = 0;
1581 			ifp->if_flags &= ~IFF_ALLMULTI;
1582 			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1583 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1584 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1585 		}
1586 	}
1587 	splx(s);
1588 
1589 	if (error == 0)
1590 		rt_ifmsg(ifp);
1591 	return error;
1592 }
1593 
1594 /*
1595  * Add a multicast listenership to the interface in question.
1596  * The link layer provides a routine which converts
1597  */
1598 int
1599 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1600 {
1601 	struct sockaddr *llsa, *dupsa;
1602 	int error, s;
1603 	struct ifmultiaddr *ifma;
1604 
1605 	/*
1606 	 * If the matching multicast address already exists
1607 	 * then don't add a new one, just add a reference
1608 	 */
1609 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1610 		if (equal(sa, ifma->ifma_addr)) {
1611 			ifma->ifma_refcount++;
1612 			if (retifma)
1613 				*retifma = ifma;
1614 			return 0;
1615 		}
1616 	}
1617 
1618 	/*
1619 	 * Give the link layer a chance to accept/reject it, and also
1620 	 * find out which AF_LINK address this maps to, if it isn't one
1621 	 * already.
1622 	 */
1623 	if (ifp->if_resolvemulti) {
1624 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1625 		if (error) return error;
1626 	} else {
1627 		llsa = 0;
1628 	}
1629 
1630 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1631 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1632 	bcopy(sa, dupsa, sa->sa_len);
1633 
1634 	ifma->ifma_addr = dupsa;
1635 	ifma->ifma_lladdr = llsa;
1636 	ifma->ifma_ifp = ifp;
1637 	ifma->ifma_refcount = 1;
1638 	ifma->ifma_protospec = 0;
1639 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1640 
1641 	/*
1642 	 * Some network interfaces can scan the address list at
1643 	 * interrupt time; lock them out.
1644 	 */
1645 	s = splimp();
1646 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1647 	splx(s);
1648 	if (retifma != NULL)
1649 		*retifma = ifma;
1650 
1651 	if (llsa != 0) {
1652 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1653 			if (equal(ifma->ifma_addr, llsa))
1654 				break;
1655 		}
1656 		if (ifma) {
1657 			ifma->ifma_refcount++;
1658 		} else {
1659 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1660 			       M_IFMADDR, M_WAITOK);
1661 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1662 			       M_IFMADDR, M_WAITOK);
1663 			bcopy(llsa, dupsa, llsa->sa_len);
1664 			ifma->ifma_addr = dupsa;
1665 			ifma->ifma_lladdr = NULL;
1666 			ifma->ifma_ifp = ifp;
1667 			ifma->ifma_refcount = 1;
1668 			ifma->ifma_protospec = 0;
1669 			s = splimp();
1670 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1671 			splx(s);
1672 		}
1673 	}
1674 	/*
1675 	 * We are certain we have added something, so call down to the
1676 	 * interface to let them know about it.
1677 	 */
1678 	s = splimp();
1679 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1680 	splx(s);
1681 
1682 	return 0;
1683 }
1684 
1685 /*
1686  * Remove a reference to a multicast address on this interface.  Yell
1687  * if the request does not match an existing membership.
1688  */
1689 int
1690 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1691 {
1692 	struct ifmultiaddr *ifma;
1693 	int s;
1694 
1695 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1696 		if (equal(sa, ifma->ifma_addr))
1697 			break;
1698 	if (ifma == 0)
1699 		return ENOENT;
1700 
1701 	if (ifma->ifma_refcount > 1) {
1702 		ifma->ifma_refcount--;
1703 		return 0;
1704 	}
1705 
1706 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1707 	sa = ifma->ifma_lladdr;
1708 	s = splimp();
1709 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1710 	/*
1711 	 * Make sure the interface driver is notified
1712 	 * in the case of a link layer mcast group being left.
1713 	 */
1714 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1715 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1716 	splx(s);
1717 	free(ifma->ifma_addr, M_IFMADDR);
1718 	free(ifma, M_IFMADDR);
1719 	if (sa == 0)
1720 		return 0;
1721 
1722 	/*
1723 	 * Now look for the link-layer address which corresponds to
1724 	 * this network address.  It had been squirreled away in
1725 	 * ifma->ifma_lladdr for this purpose (so we don't have
1726 	 * to call ifp->if_resolvemulti() again), and we saved that
1727 	 * value in sa above.  If some nasty deleted the
1728 	 * link-layer address out from underneath us, we can deal because
1729 	 * the address we stored was is not the same as the one which was
1730 	 * in the record for the link-layer address.  (So we don't complain
1731 	 * in that case.)
1732 	 */
1733 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1734 		if (equal(sa, ifma->ifma_addr))
1735 			break;
1736 	if (ifma == 0)
1737 		return 0;
1738 
1739 	if (ifma->ifma_refcount > 1) {
1740 		ifma->ifma_refcount--;
1741 		return 0;
1742 	}
1743 
1744 	s = splimp();
1745 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1746 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1747 	splx(s);
1748 	free(ifma->ifma_addr, M_IFMADDR);
1749 	free(sa, M_IFMADDR);
1750 	free(ifma, M_IFMADDR);
1751 
1752 	return 0;
1753 }
1754 
1755 /*
1756  * Set the link layer address on an interface.
1757  *
1758  * At this time we only support certain types of interfaces,
1759  * and we don't allow the length of the address to change.
1760  */
1761 int
1762 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1763 {
1764 	struct sockaddr_dl *sdl;
1765 	struct ifaddr *ifa;
1766 	struct ifreq ifr;
1767 
1768 	ifa = ifaddr_byindex(ifp->if_index);
1769 	if (ifa == NULL)
1770 		return (EINVAL);
1771 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1772 	if (sdl == NULL)
1773 		return (EINVAL);
1774 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1775 		return (EINVAL);
1776 	switch (ifp->if_type) {
1777 	case IFT_ETHER:			/* these types use struct arpcom */
1778 	case IFT_FDDI:
1779 	case IFT_XETHER:
1780 	case IFT_ISO88025:
1781 	case IFT_L2VLAN:
1782 		bcopy(lladdr, IFP2AC(ifp)->ac_enaddr, len);
1783 		/*
1784 		 * XXX We also need to store the lladdr in LLADDR(sdl),
1785 		 * which is done below. This is a pain because we must
1786 		 * remember to keep the info in sync.
1787 		 */
1788 		/* FALLTHROUGH */
1789 	case IFT_ARCNET:
1790 		bcopy(lladdr, LLADDR(sdl), len);
1791 		break;
1792 	default:
1793 		return (ENODEV);
1794 	}
1795 	/*
1796 	 * If the interface is already up, we need
1797 	 * to re-init it in order to reprogram its
1798 	 * address filter.
1799 	 */
1800 	if ((ifp->if_flags & IFF_UP) != 0) {
1801 		ifp->if_flags &= ~IFF_UP;
1802 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1803 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1804 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1805 		ifp->if_flags |= IFF_UP;
1806 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1807 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1808 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1809 #ifdef INET
1810 		/*
1811 		 * Also send gratuitous ARPs to notify other nodes about
1812 		 * the address change.
1813 		 */
1814 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1815 			if (ifa->ifa_addr != NULL &&
1816 			    ifa->ifa_addr->sa_family == AF_INET)
1817 				arp_ifinit(ifp, ifa);
1818 		}
1819 #endif
1820 	}
1821 	return (0);
1822 }
1823 
1824 struct ifmultiaddr *
1825 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
1826 {
1827 	struct ifmultiaddr *ifma;
1828 
1829 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1830 		if (equal(ifma->ifma_addr, sa))
1831 			break;
1832 
1833 	return ifma;
1834 }
1835 
1836 /*
1837  * The name argument must be a pointer to storage which will last as
1838  * long as the interface does.  For physical devices, the result of
1839  * device_get_name(dev) is a good choice and for pseudo-devices a
1840  * static string works well.
1841  */
1842 void
1843 if_initname(struct ifnet *ifp, const char *name, int unit)
1844 {
1845 	ifp->if_dname = name;
1846 	ifp->if_dunit = unit;
1847 	if (unit != IF_DUNIT_NONE)
1848 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1849 	else
1850 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
1851 }
1852 
1853 int
1854 if_printf(struct ifnet *ifp, const char * fmt, ...)
1855 {
1856 	va_list ap;
1857 	int retval;
1858 
1859 	retval = printf("%s: ", ifp->if_xname);
1860 	va_start(ap, fmt);
1861 	retval += vprintf(fmt, ap);
1862 	va_end(ap);
1863 	return (retval);
1864 }
1865 
1866 /*
1867  * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
1868  * be called without Giant.  However, we often can't acquire the Giant lock
1869  * at those points; instead, we run it via a task queue that holds Giant via
1870  * if_start_deferred.
1871  *
1872  * XXXRW: We need to make sure that the ifnet isn't fully detached until any
1873  * outstanding if_start_deferred() tasks that will run after the free.  This
1874  * probably means waiting in if_detach().
1875  */
1876 void
1877 if_start(struct ifnet *ifp)
1878 {
1879 
1880 	NET_ASSERT_GIANT();
1881 
1882         if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
1883                 if (mtx_owned(&Giant))
1884                         (*(ifp)->if_start)(ifp);
1885                 else
1886 			taskqueue_enqueue(taskqueue_swi_giant,
1887 			    &ifp->if_starttask);
1888         } else
1889                 (*(ifp)->if_start)(ifp);
1890 }
1891 
1892 static void
1893 if_start_deferred(void *context, int pending)
1894 {
1895 	struct ifnet *ifp;
1896 
1897 	/*
1898 	 * This code must be entered with Giant, and should never run if
1899 	 * we're not running with debug.mpsafenet.
1900 	 */
1901 	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
1902 	GIANT_REQUIRED;
1903 
1904 	ifp = (struct ifnet *)context;
1905 	(ifp->if_start)(ifp);
1906 }
1907 
1908 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1909 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1910