xref: /freebsd/sys/net/if.c (revision c37420b0d5b3b6ef875fbf0b84a13f6f09be56d6)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)if.c	8.5 (Berkeley) 1/9/95
30  * $FreeBSD$
31  */
32 
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36 #include "opt_mac.h"
37 
38 #include <sys/param.h>
39 #include <sys/conf.h>
40 #include <sys/mac.h>
41 #include <sys/malloc.h>
42 #include <sys/bus.h>
43 #include <sys/mbuf.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/protosw.h>
49 #include <sys/kernel.h>
50 #include <sys/sockio.h>
51 #include <sys/syslog.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54 #include <sys/domain.h>
55 #include <sys/jail.h>
56 #include <machine/stdarg.h>
57 
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_clone.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/if_var.h>
64 #include <net/radix.h>
65 #include <net/route.h>
66 
67 #if defined(INET) || defined(INET6)
68 /*XXX*/
69 #include <netinet/in.h>
70 #include <netinet/in_var.h>
71 #ifdef INET6
72 #include <netinet6/in6_var.h>
73 #include <netinet6/in6_ifattach.h>
74 #endif
75 #endif
76 #ifdef INET
77 #include <netinet/if_ether.h>
78 #endif
79 
80 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
81 
82 static void	if_attachdomain(void *);
83 static void	if_attachdomain1(struct ifnet *);
84 static int	ifconf(u_long, caddr_t);
85 static void	if_grow(void);
86 static void	if_init(void *);
87 static void	if_check(void *);
88 static int	if_findindex(struct ifnet *);
89 static void	if_qflush(struct ifaltq *);
90 static void	if_route(struct ifnet *, int flag, int fam);
91 static void	if_slowtimo(void *);
92 static void	if_unroute(struct ifnet *, int flag, int fam);
93 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
94 static int	if_rtdel(struct radix_node *, void *);
95 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
96 static void	if_start_deferred(void *context, int pending);
97 #ifdef INET6
98 /*
99  * XXX: declare here to avoid to include many inet6 related files..
100  * should be more generalized?
101  */
102 extern void	nd6_setmtu(struct ifnet *);
103 #endif
104 
105 int	if_index = 0;
106 struct	ifindex_entry *ifindex_table = NULL;
107 int	ifqmaxlen = IFQ_MAXLEN;
108 struct	ifnethead ifnet;	/* depend on static init XXX */
109 struct	mtx ifnet_lock;
110 
111 static int	if_indexlim = 8;
112 static struct	knlist ifklist;
113 
114 static void	filt_netdetach(struct knote *kn);
115 static int	filt_netdev(struct knote *kn, long hint);
116 
117 static struct filterops netdev_filtops =
118     { 1, NULL, filt_netdetach, filt_netdev };
119 
120 /*
121  * System initialization
122  */
123 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
124 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
125 
126 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
127 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
128 
129 static d_open_t		netopen;
130 static d_close_t	netclose;
131 static d_ioctl_t	netioctl;
132 static d_kqfilter_t	netkqfilter;
133 
134 static struct cdevsw net_cdevsw = {
135 	.d_version =	D_VERSION,
136 	.d_flags =	D_NEEDGIANT,
137 	.d_open =	netopen,
138 	.d_close =	netclose,
139 	.d_ioctl =	netioctl,
140 	.d_name =	"net",
141 	.d_kqfilter =	netkqfilter,
142 };
143 
144 static int
145 netopen(struct cdev *dev, int flag, int mode, struct thread *td)
146 {
147 	return (0);
148 }
149 
150 static int
151 netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
152 {
153 	return (0);
154 }
155 
156 static int
157 netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
158 {
159 	struct ifnet *ifp;
160 	int error, idx;
161 
162 	/* only support interface specific ioctls */
163 	if (IOCGROUP(cmd) != 'i')
164 		return (EOPNOTSUPP);
165 	idx = minor(dev);
166 	if (idx == 0) {
167 		/*
168 		 * special network device, not interface.
169 		 */
170 		if (cmd == SIOCGIFCONF)
171 			return (ifconf(cmd, data));	/* XXX remove cmd */
172 		return (EOPNOTSUPP);
173 	}
174 
175 	ifp = ifnet_byindex(idx);
176 	if (ifp == NULL)
177 		return (ENXIO);
178 
179 	error = ifhwioctl(cmd, ifp, data, td);
180 	if (error == ENOIOCTL)
181 		error = EOPNOTSUPP;
182 	return (error);
183 }
184 
185 static int
186 netkqfilter(struct cdev *dev, struct knote *kn)
187 {
188 	struct knlist *klist;
189 	struct ifnet *ifp;
190 	int idx;
191 
192 	switch (kn->kn_filter) {
193 	case EVFILT_NETDEV:
194 		kn->kn_fop = &netdev_filtops;
195 		break;
196 	default:
197 		return (1);
198 	}
199 
200 	idx = minor(dev);
201 	if (idx == 0) {
202 		klist = &ifklist;
203 	} else {
204 		ifp = ifnet_byindex(idx);
205 		if (ifp == NULL)
206 			return (1);
207 		klist = &ifp->if_klist;
208 	}
209 
210 	kn->kn_hook = (caddr_t)klist;
211 
212 	knlist_add(klist, kn, 0);
213 
214 	return (0);
215 }
216 
217 static void
218 filt_netdetach(struct knote *kn)
219 {
220 	struct knlist *klist = (struct knlist *)kn->kn_hook;
221 
222 	if (kn->kn_status & KN_DETACHED)
223 		return;
224 
225 	knlist_remove(klist, kn, 0);
226 }
227 
228 static int
229 filt_netdev(struct knote *kn, long hint)
230 {
231 	struct knlist *klist = (struct knlist *)kn->kn_hook;
232 
233 	/*
234 	 * Currently NOTE_EXIT is abused to indicate device detach.
235 	 */
236 	if (hint == NOTE_EXIT) {
237 		kn->kn_data = NOTE_LINKINV;
238 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
239 		knlist_remove_inevent(klist, kn);
240 		return (1);
241 	}
242 	if (hint != 0)
243 		kn->kn_data = hint;			/* current status */
244 	if (kn->kn_sfflags & hint)
245 		kn->kn_fflags |= hint;
246 	return (kn->kn_fflags != 0);
247 }
248 
249 /*
250  * Network interface utility routines.
251  *
252  * Routines with ifa_ifwith* names take sockaddr *'s as
253  * parameters.
254  */
255 /* ARGSUSED*/
256 static void
257 if_init(void *dummy __unused)
258 {
259 
260 	IFNET_LOCK_INIT();
261 	TAILQ_INIT(&ifnet);
262 	knlist_init(&ifklist, NULL);
263 	if_grow();				/* create initial table */
264 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
265 	    UID_ROOT, GID_WHEEL, 0600, "network");
266 	if_clone_init();
267 }
268 
269 static void
270 if_grow(void)
271 {
272 	u_int n;
273 	struct ifindex_entry *e;
274 
275 	if_indexlim <<= 1;
276 	n = if_indexlim * sizeof(*e);
277 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
278 	if (ifindex_table != NULL) {
279 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
280 		free((caddr_t)ifindex_table, M_IFADDR);
281 	}
282 	ifindex_table = e;
283 }
284 
285 /* ARGSUSED*/
286 static void
287 if_check(void *dummy __unused)
288 {
289 	struct ifnet *ifp;
290 	int s;
291 
292 	s = splimp();
293 	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
294 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
295 		if (ifp->if_snd.ifq_maxlen == 0) {
296 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
297 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
298 		}
299 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
300 			if_printf(ifp,
301 			    "XXX: driver didn't initialize queue mtx\n");
302 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
303 			    MTX_NETWORK_LOCK, MTX_DEF);
304 		}
305 	}
306 	IFNET_RUNLOCK();
307 	splx(s);
308 	if_slowtimo(0);
309 }
310 
311 static int
312 if_findindex(struct ifnet *ifp)
313 {
314 	int i, unit;
315 	char eaddr[18], devname[32];
316 	const char *name, *p;
317 
318 	switch (ifp->if_type) {
319 	case IFT_ETHER:			/* these types use struct arpcom */
320 	case IFT_FDDI:
321 	case IFT_XETHER:
322 	case IFT_ISO88025:
323 	case IFT_L2VLAN:
324 		snprintf(eaddr, 18, "%6D", IFP2AC(ifp)->ac_enaddr, ":");
325 		break;
326 	default:
327 		eaddr[0] = '\0';
328 		break;
329 	}
330 	strlcpy(devname, ifp->if_xname, sizeof(devname));
331 	name = net_cdevsw.d_name;
332 	i = 0;
333 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
334 		if (resource_string_value(name, unit, "ether", &p) == 0)
335 			if (strcmp(p, eaddr) == 0)
336 				goto found;
337 		if (resource_string_value(name, unit, "dev", &p) == 0)
338 			if (strcmp(p, devname) == 0)
339 				goto found;
340 	}
341 	unit = 0;
342 found:
343 	if (unit != 0) {
344 		if (ifaddr_byindex(unit) == NULL)
345 			return (unit);
346 		printf("%s%d in use, cannot hardwire it to %s.\n",
347 		    name, unit, devname);
348 	}
349 	for (unit = 1; ; unit++) {
350 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
351 			continue;
352 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
353 		    resource_string_value(name, unit, "dev", &p) == 0)
354 			continue;
355 		break;
356 	}
357 	return (unit);
358 }
359 
360 /*
361  * Attach an interface to the
362  * list of "active" interfaces.
363  */
364 void
365 if_attach(struct ifnet *ifp)
366 {
367 	unsigned socksize, ifasize;
368 	int namelen, masklen;
369 	struct sockaddr_dl *sdl;
370 	struct ifaddr *ifa;
371 
372 	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
373 	IF_AFDATA_LOCK_INIT(ifp);
374 	ifp->if_afdata_initialized = 0;
375 	IFNET_WLOCK();
376 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
377 	IFNET_WUNLOCK();
378 	/*
379 	 * XXX -
380 	 * The old code would work if the interface passed a pre-existing
381 	 * chain of ifaddrs to this code.  We don't trust our callers to
382 	 * properly initialize the tailq, however, so we no longer allow
383 	 * this unlikely case.
384 	 */
385 	TAILQ_INIT(&ifp->if_addrhead);
386 	TAILQ_INIT(&ifp->if_prefixhead);
387 	TAILQ_INIT(&ifp->if_multiaddrs);
388 	knlist_init(&ifp->if_klist, NULL);
389 	getmicrotime(&ifp->if_lastchange);
390 
391 #ifdef MAC
392 	mac_init_ifnet(ifp);
393 	mac_create_ifnet(ifp);
394 #endif
395 
396 	ifp->if_index = if_findindex(ifp);
397 	if (ifp->if_index > if_index)
398 		if_index = ifp->if_index;
399 	if (if_index >= if_indexlim)
400 		if_grow();
401 
402 	ifnet_byindex(ifp->if_index) = ifp;
403 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
404 	    unit2minor(ifp->if_index),
405 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
406 	    net_cdevsw.d_name, ifp->if_xname);
407 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
408 	    net_cdevsw.d_name, ifp->if_index);
409 
410 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
411 
412 	/*
413 	 * create a Link Level name for this device
414 	 */
415 	namelen = strlen(ifp->if_xname);
416 	/*
417 	 * Always save enough space for any possiable name so we can do
418 	 * a rename in place later.
419 	 */
420 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
421 	socksize = masklen + ifp->if_addrlen;
422 	if (socksize < sizeof(*sdl))
423 		socksize = sizeof(*sdl);
424 	socksize = roundup2(socksize, sizeof(long));
425 	ifasize = sizeof(*ifa) + 2 * socksize;
426 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
427 	IFA_LOCK_INIT(ifa);
428 	sdl = (struct sockaddr_dl *)(ifa + 1);
429 	sdl->sdl_len = socksize;
430 	sdl->sdl_family = AF_LINK;
431 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
432 	sdl->sdl_nlen = namelen;
433 	sdl->sdl_index = ifp->if_index;
434 	sdl->sdl_type = ifp->if_type;
435 	ifaddr_byindex(ifp->if_index) = ifa;
436 	ifa->ifa_ifp = ifp;
437 	ifa->ifa_rtrequest = link_rtrequest;
438 	ifa->ifa_addr = (struct sockaddr *)sdl;
439 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
440 	ifa->ifa_netmask = (struct sockaddr *)sdl;
441 	sdl->sdl_len = masklen;
442 	while (namelen != 0)
443 		sdl->sdl_data[--namelen] = 0xff;
444 	ifa->ifa_refcnt = 1;
445 	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
446 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
447 	ifp->if_snd.altq_type = 0;
448 	ifp->if_snd.altq_disc = NULL;
449 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
450 	ifp->if_snd.altq_tbr  = NULL;
451 	ifp->if_snd.altq_ifp  = ifp;
452 
453 	if (domains)
454 		if_attachdomain1(ifp);
455 
456 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
457 
458 	/* Announce the interface. */
459 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
460 }
461 
462 static void
463 if_attachdomain(void *dummy)
464 {
465 	struct ifnet *ifp;
466 	int s;
467 
468 	s = splnet();
469 	TAILQ_FOREACH(ifp, &ifnet, if_link)
470 		if_attachdomain1(ifp);
471 	splx(s);
472 }
473 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
474     if_attachdomain, NULL);
475 
476 static void
477 if_attachdomain1(struct ifnet *ifp)
478 {
479 	struct domain *dp;
480 	int s;
481 
482 	s = splnet();
483 
484 	/*
485 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
486 	 * cannot lock ifp->if_afdata initialization, entirely.
487 	 */
488 	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
489 		splx(s);
490 		return;
491 	}
492 	if (ifp->if_afdata_initialized) {
493 		IF_AFDATA_UNLOCK(ifp);
494 		splx(s);
495 		return;
496 	}
497 	ifp->if_afdata_initialized = 1;
498 	IF_AFDATA_UNLOCK(ifp);
499 
500 	/* address family dependent data region */
501 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
502 	for (dp = domains; dp; dp = dp->dom_next) {
503 		if (dp->dom_ifattach)
504 			ifp->if_afdata[dp->dom_family] =
505 			    (*dp->dom_ifattach)(ifp);
506 	}
507 
508 	splx(s);
509 }
510 
511 /*
512  * Detach an interface, removing it from the
513  * list of "active" interfaces.
514  */
515 void
516 if_detach(struct ifnet *ifp)
517 {
518 	struct ifaddr *ifa, *next;
519 	struct radix_node_head	*rnh;
520 	int s;
521 	int i;
522 	struct domain *dp;
523  	struct ifnet *iter;
524  	int found;
525 
526 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
527 	/*
528 	 * Remove routes and flush queues.
529 	 */
530 	s = splnet();
531 	if_down(ifp);
532 #ifdef ALTQ
533 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
534 		altq_disable(&ifp->if_snd);
535 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
536 		altq_detach(&ifp->if_snd);
537 #endif
538 
539 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = next) {
540 		next = TAILQ_NEXT(ifa, ifa_link);
541 
542 		if (ifa->ifa_addr->sa_family == AF_LINK)
543 			continue;
544 #ifdef INET
545 		/* XXX: Ugly!! ad hoc just for INET */
546 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
547 			struct ifaliasreq ifr;
548 
549 			bzero(&ifr, sizeof(ifr));
550 			ifr.ifra_addr = *ifa->ifa_addr;
551 			if (ifa->ifa_dstaddr)
552 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
553 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
554 			    NULL) == 0)
555 				continue;
556 		}
557 #endif /* INET */
558 #ifdef INET6
559 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
560 			in6_purgeaddr(ifa);
561 			/* ifp_addrhead is already updated */
562 			continue;
563 		}
564 #endif /* INET6 */
565 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
566 		IFAFREE(ifa);
567 	}
568 
569 #ifdef INET6
570 	/*
571 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
572 	 * before removing routing entries below, since IPv6 interface direct
573 	 * routes are expected to be removed by the IPv6-specific kernel API.
574 	 * Otherwise, the kernel will detect some inconsistency and bark it.
575 	 */
576 	in6_ifdetach(ifp);
577 #endif
578 	/*
579 	 * Remove address from ifindex_table[] and maybe decrement if_index.
580 	 * Clean up all addresses.
581 	 */
582 	ifaddr_byindex(ifp->if_index) = NULL;
583 	destroy_dev(ifdev_byindex(ifp->if_index));
584 	ifdev_byindex(ifp->if_index) = NULL;
585 
586 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
587 		if_index--;
588 
589 
590 	/* We can now free link ifaddr. */
591 	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
592 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
593 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
594 		IFAFREE(ifa);
595 	}
596 
597 	/*
598 	 * Delete all remaining routes using this interface
599 	 * Unfortuneatly the only way to do this is to slog through
600 	 * the entire routing table looking for routes which point
601 	 * to this interface...oh well...
602 	 */
603 	for (i = 1; i <= AF_MAX; i++) {
604 		if ((rnh = rt_tables[i]) == NULL)
605 			continue;
606 		RADIX_NODE_HEAD_LOCK(rnh);
607 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
608 		RADIX_NODE_HEAD_UNLOCK(rnh);
609 	}
610 
611 	/* Announce that the interface is gone. */
612 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
613 
614 	IF_AFDATA_LOCK(ifp);
615 	for (dp = domains; dp; dp = dp->dom_next) {
616 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
617 			(*dp->dom_ifdetach)(ifp,
618 			    ifp->if_afdata[dp->dom_family]);
619 	}
620 	IF_AFDATA_UNLOCK(ifp);
621 
622 #ifdef MAC
623 	mac_destroy_ifnet(ifp);
624 #endif /* MAC */
625 	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
626 	knlist_clear(&ifp->if_klist, 0);
627 	knlist_destroy(&ifp->if_klist);
628 	IFNET_WLOCK();
629  	found = 0;
630  	TAILQ_FOREACH(iter, &ifnet, if_link)
631  		if (iter == ifp) {
632  			found = 1;
633  			break;
634  		}
635  	if (found)
636  		TAILQ_REMOVE(&ifnet, ifp, if_link);
637 	IFNET_WUNLOCK();
638 	mtx_destroy(&ifp->if_snd.ifq_mtx);
639 	IF_AFDATA_DESTROY(ifp);
640 	splx(s);
641 }
642 
643 /*
644  * Delete Routes for a Network Interface
645  *
646  * Called for each routing entry via the rnh->rnh_walktree() call above
647  * to delete all route entries referencing a detaching network interface.
648  *
649  * Arguments:
650  *	rn	pointer to node in the routing table
651  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
652  *
653  * Returns:
654  *	0	successful
655  *	errno	failed - reason indicated
656  *
657  */
658 static int
659 if_rtdel(struct radix_node *rn, void *arg)
660 {
661 	struct rtentry	*rt = (struct rtentry *)rn;
662 	struct ifnet	*ifp = arg;
663 	int		err;
664 
665 	if (rt->rt_ifp == ifp) {
666 
667 		/*
668 		 * Protect (sorta) against walktree recursion problems
669 		 * with cloned routes
670 		 */
671 		if ((rt->rt_flags & RTF_UP) == 0)
672 			return (0);
673 
674 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
675 				rt_mask(rt), rt->rt_flags,
676 				(struct rtentry **) NULL);
677 		if (err) {
678 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
679 		}
680 	}
681 
682 	return (0);
683 }
684 
685 #define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
686 
687 /*
688  * Locate an interface based on a complete address.
689  */
690 /*ARGSUSED*/
691 struct ifaddr *
692 ifa_ifwithaddr(struct sockaddr *addr)
693 {
694 	struct ifnet *ifp;
695 	struct ifaddr *ifa;
696 
697 	IFNET_RLOCK();
698 	TAILQ_FOREACH(ifp, &ifnet, if_link)
699 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
700 			if (ifa->ifa_addr->sa_family != addr->sa_family)
701 				continue;
702 			if (equal(addr, ifa->ifa_addr))
703 				goto done;
704 			/* IP6 doesn't have broadcast */
705 			if ((ifp->if_flags & IFF_BROADCAST) &&
706 			    ifa->ifa_broadaddr &&
707 			    ifa->ifa_broadaddr->sa_len != 0 &&
708 			    equal(ifa->ifa_broadaddr, addr))
709 				goto done;
710 		}
711 	ifa = NULL;
712 done:
713 	IFNET_RUNLOCK();
714 	return (ifa);
715 }
716 
717 /*
718  * Locate the point to point interface with a given destination address.
719  */
720 /*ARGSUSED*/
721 struct ifaddr *
722 ifa_ifwithdstaddr(struct sockaddr *addr)
723 {
724 	struct ifnet *ifp;
725 	struct ifaddr *ifa;
726 
727 	IFNET_RLOCK();
728 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
729 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
730 			continue;
731 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
732 			if (ifa->ifa_addr->sa_family != addr->sa_family)
733 				continue;
734 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
735 				goto done;
736 		}
737 	}
738 	ifa = NULL;
739 done:
740 	IFNET_RUNLOCK();
741 	return (ifa);
742 }
743 
744 /*
745  * Find an interface on a specific network.  If many, choice
746  * is most specific found.
747  */
748 struct ifaddr *
749 ifa_ifwithnet(struct sockaddr *addr)
750 {
751 	struct ifnet *ifp;
752 	struct ifaddr *ifa;
753 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
754 	u_int af = addr->sa_family;
755 	char *addr_data = addr->sa_data, *cplim;
756 
757 	/*
758 	 * AF_LINK addresses can be looked up directly by their index number,
759 	 * so do that if we can.
760 	 */
761 	if (af == AF_LINK) {
762 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
763 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
764 		return (ifaddr_byindex(sdl->sdl_index));
765 	}
766 
767 	/*
768 	 * Scan though each interface, looking for ones that have
769 	 * addresses in this address family.
770 	 */
771 	IFNET_RLOCK();
772 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
773 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
774 			char *cp, *cp2, *cp3;
775 
776 			if (ifa->ifa_addr->sa_family != af)
777 next:				continue;
778 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
779 				/*
780 				 * This is a bit broken as it doesn't
781 				 * take into account that the remote end may
782 				 * be a single node in the network we are
783 				 * looking for.
784 				 * The trouble is that we don't know the
785 				 * netmask for the remote end.
786 				 */
787 				if (ifa->ifa_dstaddr != 0
788 				    && equal(addr, ifa->ifa_dstaddr))
789 					goto done;
790 			} else {
791 				/*
792 				 * if we have a special address handler,
793 				 * then use it instead of the generic one.
794 				 */
795 				if (ifa->ifa_claim_addr) {
796 					if ((*ifa->ifa_claim_addr)(ifa, addr))
797 						goto done;
798 					continue;
799 				}
800 
801 				/*
802 				 * Scan all the bits in the ifa's address.
803 				 * If a bit dissagrees with what we are
804 				 * looking for, mask it with the netmask
805 				 * to see if it really matters.
806 				 * (A byte at a time)
807 				 */
808 				if (ifa->ifa_netmask == 0)
809 					continue;
810 				cp = addr_data;
811 				cp2 = ifa->ifa_addr->sa_data;
812 				cp3 = ifa->ifa_netmask->sa_data;
813 				cplim = ifa->ifa_netmask->sa_len
814 					+ (char *)ifa->ifa_netmask;
815 				while (cp3 < cplim)
816 					if ((*cp++ ^ *cp2++) & *cp3++)
817 						goto next; /* next address! */
818 				/*
819 				 * If the netmask of what we just found
820 				 * is more specific than what we had before
821 				 * (if we had one) then remember the new one
822 				 * before continuing to search
823 				 * for an even better one.
824 				 */
825 				if (ifa_maybe == 0 ||
826 				    rn_refines((caddr_t)ifa->ifa_netmask,
827 				    (caddr_t)ifa_maybe->ifa_netmask))
828 					ifa_maybe = ifa;
829 			}
830 		}
831 	}
832 	ifa = ifa_maybe;
833 done:
834 	IFNET_RUNLOCK();
835 	return (ifa);
836 }
837 
838 /*
839  * Find an interface address specific to an interface best matching
840  * a given address.
841  */
842 struct ifaddr *
843 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
844 {
845 	struct ifaddr *ifa;
846 	char *cp, *cp2, *cp3;
847 	char *cplim;
848 	struct ifaddr *ifa_maybe = 0;
849 	u_int af = addr->sa_family;
850 
851 	if (af >= AF_MAX)
852 		return (0);
853 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
854 		if (ifa->ifa_addr->sa_family != af)
855 			continue;
856 		if (ifa_maybe == 0)
857 			ifa_maybe = ifa;
858 		if (ifa->ifa_netmask == 0) {
859 			if (equal(addr, ifa->ifa_addr) ||
860 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
861 				goto done;
862 			continue;
863 		}
864 		if (ifp->if_flags & IFF_POINTOPOINT) {
865 			if (equal(addr, ifa->ifa_dstaddr))
866 				goto done;
867 		} else {
868 			cp = addr->sa_data;
869 			cp2 = ifa->ifa_addr->sa_data;
870 			cp3 = ifa->ifa_netmask->sa_data;
871 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
872 			for (; cp3 < cplim; cp3++)
873 				if ((*cp++ ^ *cp2++) & *cp3)
874 					break;
875 			if (cp3 == cplim)
876 				goto done;
877 		}
878 	}
879 	ifa = ifa_maybe;
880 done:
881 	return (ifa);
882 }
883 
884 #include <net/route.h>
885 
886 /*
887  * Default action when installing a route with a Link Level gateway.
888  * Lookup an appropriate real ifa to point to.
889  * This should be moved to /sys/net/link.c eventually.
890  */
891 static void
892 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
893 {
894 	struct ifaddr *ifa, *oifa;
895 	struct sockaddr *dst;
896 	struct ifnet *ifp;
897 
898 	RT_LOCK_ASSERT(rt);
899 
900 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
901 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
902 		return;
903 	ifa = ifaof_ifpforaddr(dst, ifp);
904 	if (ifa) {
905 		IFAREF(ifa);		/* XXX */
906 		oifa = rt->rt_ifa;
907 		rt->rt_ifa = ifa;
908 		IFAFREE(oifa);
909 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
910 			ifa->ifa_rtrequest(cmd, rt, info);
911 	}
912 }
913 
914 /*
915  * Mark an interface down and notify protocols of
916  * the transition.
917  * NOTE: must be called at splnet or eqivalent.
918  */
919 static void
920 if_unroute(struct ifnet *ifp, int flag, int fam)
921 {
922 	struct ifaddr *ifa;
923 
924 	ifp->if_flags &= ~flag;
925 	getmicrotime(&ifp->if_lastchange);
926 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
927 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
928 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
929 	if_qflush(&ifp->if_snd);
930 	rt_ifmsg(ifp);
931 }
932 
933 /*
934  * Mark an interface up and notify protocols of
935  * the transition.
936  * NOTE: must be called at splnet or eqivalent.
937  */
938 static void
939 if_route(struct ifnet *ifp, int flag, int fam)
940 {
941 	struct ifaddr *ifa;
942 
943 	ifp->if_flags |= flag;
944 	getmicrotime(&ifp->if_lastchange);
945 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
946 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
947 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
948 	rt_ifmsg(ifp);
949 #ifdef INET6
950 	in6_if_up(ifp);
951 #endif
952 }
953 
954 /*
955  * Mark an interface down and notify protocols of
956  * the transition.
957  * NOTE: must be called at splnet or eqivalent.
958  */
959 void
960 if_down(struct ifnet *ifp)
961 {
962 
963 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
964 }
965 
966 /*
967  * Mark an interface up and notify protocols of
968  * the transition.
969  * NOTE: must be called at splnet or eqivalent.
970  */
971 void
972 if_up(struct ifnet *ifp)
973 {
974 
975 	if_route(ifp, IFF_UP, AF_UNSPEC);
976 }
977 
978 /*
979  * Flush an interface queue.
980  */
981 static void
982 if_qflush(struct ifaltq *ifq)
983 {
984 	struct mbuf *m, *n;
985 
986 #ifdef ALTQ
987 	if (ALTQ_IS_ENABLED(ifq))
988 		ALTQ_PURGE(ifq);
989 #endif
990 	n = ifq->ifq_head;
991 	while ((m = n) != 0) {
992 		n = m->m_act;
993 		m_freem(m);
994 	}
995 	ifq->ifq_head = 0;
996 	ifq->ifq_tail = 0;
997 	ifq->ifq_len = 0;
998 }
999 
1000 /*
1001  * Handle interface watchdog timer routines.  Called
1002  * from softclock, we decrement timers (if set) and
1003  * call the appropriate interface routine on expiration.
1004  *
1005  * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1006  * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1007  * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1008  */
1009 static void
1010 if_slowtimo(void *arg)
1011 {
1012 	struct ifnet *ifp;
1013 	int s = splimp();
1014 
1015 	IFNET_RLOCK();
1016 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1017 		if (ifp->if_timer == 0 || --ifp->if_timer)
1018 			continue;
1019 		if (ifp->if_watchdog)
1020 			(*ifp->if_watchdog)(ifp);
1021 	}
1022 	IFNET_RUNLOCK();
1023 	splx(s);
1024 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1025 }
1026 
1027 /*
1028  * Map interface name to
1029  * interface structure pointer.
1030  */
1031 struct ifnet *
1032 ifunit(const char *name)
1033 {
1034 	struct ifnet *ifp;
1035 
1036 	IFNET_RLOCK();
1037 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1038 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1039 			break;
1040 	}
1041 	IFNET_RUNLOCK();
1042 	return (ifp);
1043 }
1044 
1045 /*
1046  * Hardware specific interface ioctls.
1047  */
1048 static int
1049 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1050 {
1051 	struct ifreq *ifr;
1052 	struct ifstat *ifs;
1053 	int error = 0;
1054 	int new_flags;
1055 	size_t namelen, onamelen;
1056 	char new_name[IFNAMSIZ];
1057 	struct ifaddr *ifa;
1058 	struct sockaddr_dl *sdl;
1059 
1060 	ifr = (struct ifreq *)data;
1061 	switch (cmd) {
1062 	case SIOCGIFINDEX:
1063 		ifr->ifr_index = ifp->if_index;
1064 		break;
1065 
1066 	case SIOCGIFFLAGS:
1067 		ifr->ifr_flags = ifp->if_flags & 0xffff;
1068 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1069 		break;
1070 
1071 	case SIOCGIFCAP:
1072 		ifr->ifr_reqcap = ifp->if_capabilities;
1073 		ifr->ifr_curcap = ifp->if_capenable;
1074 		break;
1075 
1076 #ifdef MAC
1077 	case SIOCGIFMAC:
1078 		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1079 		break;
1080 #endif
1081 
1082 	case SIOCGIFMETRIC:
1083 		ifr->ifr_metric = ifp->if_metric;
1084 		break;
1085 
1086 	case SIOCGIFMTU:
1087 		ifr->ifr_mtu = ifp->if_mtu;
1088 		break;
1089 
1090 	case SIOCGIFPHYS:
1091 		ifr->ifr_phys = ifp->if_physical;
1092 		break;
1093 
1094 	case SIOCSIFFLAGS:
1095 		error = suser(td);
1096 		if (error)
1097 			return (error);
1098 		new_flags = (ifr->ifr_flags & 0xffff) |
1099 		    (ifr->ifr_flagshigh << 16);
1100 		if (ifp->if_flags & IFF_SMART) {
1101 			/* Smart drivers twiddle their own routes */
1102 		} else if (ifp->if_flags & IFF_UP &&
1103 		    (new_flags & IFF_UP) == 0) {
1104 			int s = splimp();
1105 			if_down(ifp);
1106 			splx(s);
1107 		} else if (new_flags & IFF_UP &&
1108 		    (ifp->if_flags & IFF_UP) == 0) {
1109 			int s = splimp();
1110 			if_up(ifp);
1111 			splx(s);
1112 		}
1113 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1114 			(new_flags &~ IFF_CANTCHANGE);
1115 		if (new_flags & IFF_PPROMISC) {
1116 			/* Permanently promiscuous mode requested */
1117 			ifp->if_flags |= IFF_PROMISC;
1118 		} else if (ifp->if_pcount == 0) {
1119 			ifp->if_flags &= ~IFF_PROMISC;
1120 		}
1121 		if (ifp->if_ioctl)
1122 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1123 		getmicrotime(&ifp->if_lastchange);
1124 		break;
1125 
1126 	case SIOCSIFCAP:
1127 		error = suser(td);
1128 		if (error)
1129 			return (error);
1130 		if (ifp->if_ioctl == NULL)
1131 			return (EOPNOTSUPP);
1132 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1133 			return (EINVAL);
1134 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1135 		if (error == 0)
1136 			getmicrotime(&ifp->if_lastchange);
1137 		break;
1138 
1139 #ifdef MAC
1140 	case SIOCSIFMAC:
1141 		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1142 		break;
1143 #endif
1144 
1145 	case SIOCSIFNAME:
1146 		error = suser(td);
1147 		if (error != 0)
1148 			return (error);
1149 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1150 		if (error != 0)
1151 			return (error);
1152 		if (new_name[0] == '\0')
1153 			return (EINVAL);
1154 		if (ifunit(new_name) != NULL)
1155 			return (EEXIST);
1156 
1157 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1158 		/* Announce the departure of the interface. */
1159 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1160 
1161 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1162 		ifa = ifaddr_byindex(ifp->if_index);
1163 		IFA_LOCK(ifa);
1164 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1165 		namelen = strlen(new_name);
1166 		onamelen = sdl->sdl_nlen;
1167 		/*
1168 		 * Move the address if needed.  This is safe because we
1169 		 * allocate space for a name of length IFNAMSIZ when we
1170 		 * create this in if_attach().
1171 		 */
1172 		if (namelen != onamelen) {
1173 			bcopy(sdl->sdl_data + onamelen,
1174 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1175 		}
1176 		bcopy(new_name, sdl->sdl_data, namelen);
1177 		sdl->sdl_nlen = namelen;
1178 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1179 		bzero(sdl->sdl_data, onamelen);
1180 		while (namelen != 0)
1181 			sdl->sdl_data[--namelen] = 0xff;
1182 		IFA_UNLOCK(ifa);
1183 
1184 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1185 		/* Announce the return of the interface. */
1186 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1187 		break;
1188 
1189 	case SIOCSIFMETRIC:
1190 		error = suser(td);
1191 		if (error)
1192 			return (error);
1193 		ifp->if_metric = ifr->ifr_metric;
1194 		getmicrotime(&ifp->if_lastchange);
1195 		break;
1196 
1197 	case SIOCSIFPHYS:
1198 		error = suser(td);
1199 		if (error)
1200 			return (error);
1201 		if (ifp->if_ioctl == NULL)
1202 			return (EOPNOTSUPP);
1203 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1204 		if (error == 0)
1205 			getmicrotime(&ifp->if_lastchange);
1206 		break;
1207 
1208 	case SIOCSIFMTU:
1209 	{
1210 		u_long oldmtu = ifp->if_mtu;
1211 
1212 		error = suser(td);
1213 		if (error)
1214 			return (error);
1215 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1216 			return (EINVAL);
1217 		if (ifp->if_ioctl == NULL)
1218 			return (EOPNOTSUPP);
1219 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1220 		if (error == 0) {
1221 			getmicrotime(&ifp->if_lastchange);
1222 			rt_ifmsg(ifp);
1223 		}
1224 		/*
1225 		 * If the link MTU changed, do network layer specific procedure.
1226 		 */
1227 		if (ifp->if_mtu != oldmtu) {
1228 #ifdef INET6
1229 			nd6_setmtu(ifp);
1230 #endif
1231 		}
1232 		break;
1233 	}
1234 
1235 	case SIOCADDMULTI:
1236 	case SIOCDELMULTI:
1237 		error = suser(td);
1238 		if (error)
1239 			return (error);
1240 
1241 		/* Don't allow group membership on non-multicast interfaces. */
1242 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1243 			return (EOPNOTSUPP);
1244 
1245 		/* Don't let users screw up protocols' entries. */
1246 		if (ifr->ifr_addr.sa_family != AF_LINK)
1247 			return (EINVAL);
1248 
1249 		if (cmd == SIOCADDMULTI) {
1250 			struct ifmultiaddr *ifma;
1251 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1252 		} else {
1253 			error = if_delmulti(ifp, &ifr->ifr_addr);
1254 		}
1255 		if (error == 0)
1256 			getmicrotime(&ifp->if_lastchange);
1257 		break;
1258 
1259 	case SIOCSIFPHYADDR:
1260 	case SIOCDIFPHYADDR:
1261 #ifdef INET6
1262 	case SIOCSIFPHYADDR_IN6:
1263 #endif
1264 	case SIOCSLIFPHYADDR:
1265 	case SIOCSIFMEDIA:
1266 	case SIOCSIFGENERIC:
1267 		error = suser(td);
1268 		if (error)
1269 			return (error);
1270 		if (ifp->if_ioctl == NULL)
1271 			return (EOPNOTSUPP);
1272 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1273 		if (error == 0)
1274 			getmicrotime(&ifp->if_lastchange);
1275 		break;
1276 
1277 	case SIOCGIFSTATUS:
1278 		ifs = (struct ifstat *)data;
1279 		ifs->ascii[0] = '\0';
1280 
1281 	case SIOCGIFPSRCADDR:
1282 	case SIOCGIFPDSTADDR:
1283 	case SIOCGLIFPHYADDR:
1284 	case SIOCGIFMEDIA:
1285 	case SIOCGIFGENERIC:
1286 		if (ifp->if_ioctl == NULL)
1287 			return (EOPNOTSUPP);
1288 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1289 		break;
1290 
1291 	case SIOCSIFLLADDR:
1292 		error = suser(td);
1293 		if (error)
1294 			return (error);
1295 		error = if_setlladdr(ifp,
1296 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1297 		break;
1298 
1299 	default:
1300 		error = ENOIOCTL;
1301 		break;
1302 	}
1303 	return (error);
1304 }
1305 
1306 /*
1307  * Interface ioctls.
1308  */
1309 int
1310 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1311 {
1312 	struct ifnet *ifp;
1313 	struct ifreq *ifr;
1314 	int error;
1315 	int oif_flags;
1316 
1317 	switch (cmd) {
1318 	case SIOCGIFCONF:
1319 	case OSIOCGIFCONF:
1320 		return (ifconf(cmd, data));
1321 	}
1322 	ifr = (struct ifreq *)data;
1323 
1324 	switch (cmd) {
1325 	case SIOCIFCREATE:
1326 	case SIOCIFDESTROY:
1327 		if ((error = suser(td)) != 0)
1328 			return (error);
1329 		return ((cmd == SIOCIFCREATE) ?
1330 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1331 			if_clone_destroy(ifr->ifr_name));
1332 
1333 	case SIOCIFGCLONERS:
1334 		return (if_clone_list((struct if_clonereq *)data));
1335 	}
1336 
1337 	ifp = ifunit(ifr->ifr_name);
1338 	if (ifp == 0)
1339 		return (ENXIO);
1340 
1341 	error = ifhwioctl(cmd, ifp, data, td);
1342 	if (error != ENOIOCTL)
1343 		return (error);
1344 
1345 	oif_flags = ifp->if_flags;
1346 	if (so->so_proto == 0)
1347 		return (EOPNOTSUPP);
1348 #ifndef COMPAT_43
1349 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1350 								 data,
1351 								 ifp, td));
1352 #else
1353 	{
1354 		int ocmd = cmd;
1355 
1356 		switch (cmd) {
1357 
1358 		case SIOCSIFDSTADDR:
1359 		case SIOCSIFADDR:
1360 		case SIOCSIFBRDADDR:
1361 		case SIOCSIFNETMASK:
1362 #if BYTE_ORDER != BIG_ENDIAN
1363 			if (ifr->ifr_addr.sa_family == 0 &&
1364 			    ifr->ifr_addr.sa_len < 16) {
1365 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1366 				ifr->ifr_addr.sa_len = 16;
1367 			}
1368 #else
1369 			if (ifr->ifr_addr.sa_len == 0)
1370 				ifr->ifr_addr.sa_len = 16;
1371 #endif
1372 			break;
1373 
1374 		case OSIOCGIFADDR:
1375 			cmd = SIOCGIFADDR;
1376 			break;
1377 
1378 		case OSIOCGIFDSTADDR:
1379 			cmd = SIOCGIFDSTADDR;
1380 			break;
1381 
1382 		case OSIOCGIFBRDADDR:
1383 			cmd = SIOCGIFBRDADDR;
1384 			break;
1385 
1386 		case OSIOCGIFNETMASK:
1387 			cmd = SIOCGIFNETMASK;
1388 		}
1389 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1390 								   cmd,
1391 								   data,
1392 								   ifp, td));
1393 		switch (ocmd) {
1394 
1395 		case OSIOCGIFADDR:
1396 		case OSIOCGIFDSTADDR:
1397 		case OSIOCGIFBRDADDR:
1398 		case OSIOCGIFNETMASK:
1399 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1400 
1401 		}
1402 	}
1403 #endif /* COMPAT_43 */
1404 
1405 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1406 #ifdef INET6
1407 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1408 		if (ifp->if_flags & IFF_UP) {
1409 			int s = splimp();
1410 			in6_if_up(ifp);
1411 			splx(s);
1412 		}
1413 #endif
1414 	}
1415 	return (error);
1416 }
1417 
1418 /*
1419  * Set/clear promiscuous mode on interface ifp based on the truth value
1420  * of pswitch.  The calls are reference counted so that only the first
1421  * "on" request actually has an effect, as does the final "off" request.
1422  * Results are undefined if the "off" and "on" requests are not matched.
1423  */
1424 int
1425 ifpromisc(struct ifnet *ifp, int pswitch)
1426 {
1427 	struct ifreq ifr;
1428 	int error;
1429 	int oldflags, oldpcount;
1430 
1431 	oldpcount = ifp->if_pcount;
1432 	oldflags = ifp->if_flags;
1433 	if (ifp->if_flags & IFF_PPROMISC) {
1434 		/* Do nothing if device is in permanently promiscuous mode */
1435 		ifp->if_pcount += pswitch ? 1 : -1;
1436 		return (0);
1437 	}
1438 	if (pswitch) {
1439 		/*
1440 		 * If the device is not configured up, we cannot put it in
1441 		 * promiscuous mode.
1442 		 */
1443 		if ((ifp->if_flags & IFF_UP) == 0)
1444 			return (ENETDOWN);
1445 		if (ifp->if_pcount++ != 0)
1446 			return (0);
1447 		ifp->if_flags |= IFF_PROMISC;
1448 	} else {
1449 		if (--ifp->if_pcount > 0)
1450 			return (0);
1451 		ifp->if_flags &= ~IFF_PROMISC;
1452 	}
1453 	ifr.ifr_flags = ifp->if_flags & 0xffff;
1454 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1455 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1456 	if (error == 0) {
1457 		log(LOG_INFO, "%s: promiscuous mode %s\n",
1458 		    ifp->if_xname,
1459 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1460 		rt_ifmsg(ifp);
1461 	} else {
1462 		ifp->if_pcount = oldpcount;
1463 		ifp->if_flags = oldflags;
1464 	}
1465 	return error;
1466 }
1467 
1468 /*
1469  * Return interface configuration
1470  * of system.  List may be used
1471  * in later ioctl's (above) to get
1472  * other information.
1473  */
1474 /*ARGSUSED*/
1475 static int
1476 ifconf(u_long cmd, caddr_t data)
1477 {
1478 	struct ifconf *ifc = (struct ifconf *)data;
1479 	struct ifnet *ifp;
1480 	struct ifaddr *ifa;
1481 	struct ifreq ifr, *ifrp;
1482 	int space = ifc->ifc_len, error = 0;
1483 
1484 	ifrp = ifc->ifc_req;
1485 	IFNET_RLOCK();		/* could sleep XXX */
1486 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1487 		int addrs;
1488 
1489 		if (space < sizeof(ifr))
1490 			break;
1491 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1492 		    >= sizeof(ifr.ifr_name)) {
1493 			error = ENAMETOOLONG;
1494 			break;
1495 		}
1496 
1497 		addrs = 0;
1498 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1499 			struct sockaddr *sa = ifa->ifa_addr;
1500 
1501 			if (space < sizeof(ifr))
1502 				break;
1503 			if (jailed(curthread->td_ucred) &&
1504 			    prison_if(curthread->td_ucred, sa))
1505 				continue;
1506 			addrs++;
1507 #ifdef COMPAT_43
1508 			if (cmd == OSIOCGIFCONF) {
1509 				struct osockaddr *osa =
1510 					 (struct osockaddr *)&ifr.ifr_addr;
1511 				ifr.ifr_addr = *sa;
1512 				osa->sa_family = sa->sa_family;
1513 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1514 						sizeof (ifr));
1515 				ifrp++;
1516 			} else
1517 #endif
1518 			if (sa->sa_len <= sizeof(*sa)) {
1519 				ifr.ifr_addr = *sa;
1520 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1521 						sizeof (ifr));
1522 				ifrp++;
1523 			} else {
1524 				if (space < sizeof (ifr) + sa->sa_len -
1525 					    sizeof(*sa))
1526 					break;
1527 				space -= sa->sa_len - sizeof(*sa);
1528 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1529 						sizeof (ifr.ifr_name));
1530 				if (error == 0)
1531 				    error = copyout((caddr_t)sa,
1532 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1533 				ifrp = (struct ifreq *)
1534 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1535 			}
1536 			if (error)
1537 				break;
1538 			space -= sizeof (ifr);
1539 		}
1540 		if (error)
1541 			break;
1542 		if (!addrs) {
1543 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1544 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1545 			    sizeof (ifr));
1546 			if (error)
1547 				break;
1548 			space -= sizeof (ifr);
1549 			ifrp++;
1550 		}
1551 	}
1552 	IFNET_RUNLOCK();
1553 	ifc->ifc_len -= space;
1554 	return (error);
1555 }
1556 
1557 /*
1558  * Just like if_promisc(), but for all-multicast-reception mode.
1559  */
1560 int
1561 if_allmulti(struct ifnet *ifp, int onswitch)
1562 {
1563 	int error = 0;
1564 	int s = splimp();
1565 	struct ifreq ifr;
1566 
1567 	if (onswitch) {
1568 		if (ifp->if_amcount++ == 0) {
1569 			ifp->if_flags |= IFF_ALLMULTI;
1570 			ifr.ifr_flags = ifp->if_flags & 0xffff;
1571 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1572 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1573 		}
1574 	} else {
1575 		if (ifp->if_amcount > 1) {
1576 			ifp->if_amcount--;
1577 		} else {
1578 			ifp->if_amcount = 0;
1579 			ifp->if_flags &= ~IFF_ALLMULTI;
1580 			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1581 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1582 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1583 		}
1584 	}
1585 	splx(s);
1586 
1587 	if (error == 0)
1588 		rt_ifmsg(ifp);
1589 	return error;
1590 }
1591 
1592 /*
1593  * Add a multicast listenership to the interface in question.
1594  * The link layer provides a routine which converts
1595  */
1596 int
1597 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1598 {
1599 	struct sockaddr *llsa, *dupsa;
1600 	int error, s;
1601 	struct ifmultiaddr *ifma;
1602 
1603 	/*
1604 	 * If the matching multicast address already exists
1605 	 * then don't add a new one, just add a reference
1606 	 */
1607 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1608 		if (equal(sa, ifma->ifma_addr)) {
1609 			ifma->ifma_refcount++;
1610 			if (retifma)
1611 				*retifma = ifma;
1612 			return 0;
1613 		}
1614 	}
1615 
1616 	/*
1617 	 * Give the link layer a chance to accept/reject it, and also
1618 	 * find out which AF_LINK address this maps to, if it isn't one
1619 	 * already.
1620 	 */
1621 	if (ifp->if_resolvemulti) {
1622 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1623 		if (error) return error;
1624 	} else {
1625 		llsa = 0;
1626 	}
1627 
1628 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1629 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1630 	bcopy(sa, dupsa, sa->sa_len);
1631 
1632 	ifma->ifma_addr = dupsa;
1633 	ifma->ifma_lladdr = llsa;
1634 	ifma->ifma_ifp = ifp;
1635 	ifma->ifma_refcount = 1;
1636 	ifma->ifma_protospec = 0;
1637 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1638 
1639 	/*
1640 	 * Some network interfaces can scan the address list at
1641 	 * interrupt time; lock them out.
1642 	 */
1643 	s = splimp();
1644 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1645 	splx(s);
1646 	if (retifma != NULL)
1647 		*retifma = ifma;
1648 
1649 	if (llsa != 0) {
1650 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1651 			if (equal(ifma->ifma_addr, llsa))
1652 				break;
1653 		}
1654 		if (ifma) {
1655 			ifma->ifma_refcount++;
1656 		} else {
1657 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1658 			       M_IFMADDR, M_WAITOK);
1659 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1660 			       M_IFMADDR, M_WAITOK);
1661 			bcopy(llsa, dupsa, llsa->sa_len);
1662 			ifma->ifma_addr = dupsa;
1663 			ifma->ifma_lladdr = NULL;
1664 			ifma->ifma_ifp = ifp;
1665 			ifma->ifma_refcount = 1;
1666 			ifma->ifma_protospec = 0;
1667 			s = splimp();
1668 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1669 			splx(s);
1670 		}
1671 	}
1672 	/*
1673 	 * We are certain we have added something, so call down to the
1674 	 * interface to let them know about it.
1675 	 */
1676 	s = splimp();
1677 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1678 	splx(s);
1679 
1680 	return 0;
1681 }
1682 
1683 /*
1684  * Remove a reference to a multicast address on this interface.  Yell
1685  * if the request does not match an existing membership.
1686  */
1687 int
1688 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1689 {
1690 	struct ifmultiaddr *ifma;
1691 	int s;
1692 
1693 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1694 		if (equal(sa, ifma->ifma_addr))
1695 			break;
1696 	if (ifma == 0)
1697 		return ENOENT;
1698 
1699 	if (ifma->ifma_refcount > 1) {
1700 		ifma->ifma_refcount--;
1701 		return 0;
1702 	}
1703 
1704 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1705 	sa = ifma->ifma_lladdr;
1706 	s = splimp();
1707 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1708 	/*
1709 	 * Make sure the interface driver is notified
1710 	 * in the case of a link layer mcast group being left.
1711 	 */
1712 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1713 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1714 	splx(s);
1715 	free(ifma->ifma_addr, M_IFMADDR);
1716 	free(ifma, M_IFMADDR);
1717 	if (sa == 0)
1718 		return 0;
1719 
1720 	/*
1721 	 * Now look for the link-layer address which corresponds to
1722 	 * this network address.  It had been squirreled away in
1723 	 * ifma->ifma_lladdr for this purpose (so we don't have
1724 	 * to call ifp->if_resolvemulti() again), and we saved that
1725 	 * value in sa above.  If some nasty deleted the
1726 	 * link-layer address out from underneath us, we can deal because
1727 	 * the address we stored was is not the same as the one which was
1728 	 * in the record for the link-layer address.  (So we don't complain
1729 	 * in that case.)
1730 	 */
1731 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1732 		if (equal(sa, ifma->ifma_addr))
1733 			break;
1734 	if (ifma == 0)
1735 		return 0;
1736 
1737 	if (ifma->ifma_refcount > 1) {
1738 		ifma->ifma_refcount--;
1739 		return 0;
1740 	}
1741 
1742 	s = splimp();
1743 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1744 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1745 	splx(s);
1746 	free(ifma->ifma_addr, M_IFMADDR);
1747 	free(sa, M_IFMADDR);
1748 	free(ifma, M_IFMADDR);
1749 
1750 	return 0;
1751 }
1752 
1753 /*
1754  * Set the link layer address on an interface.
1755  *
1756  * At this time we only support certain types of interfaces,
1757  * and we don't allow the length of the address to change.
1758  */
1759 int
1760 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1761 {
1762 	struct sockaddr_dl *sdl;
1763 	struct ifaddr *ifa;
1764 	struct ifreq ifr;
1765 
1766 	ifa = ifaddr_byindex(ifp->if_index);
1767 	if (ifa == NULL)
1768 		return (EINVAL);
1769 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1770 	if (sdl == NULL)
1771 		return (EINVAL);
1772 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1773 		return (EINVAL);
1774 	switch (ifp->if_type) {
1775 	case IFT_ETHER:			/* these types use struct arpcom */
1776 	case IFT_FDDI:
1777 	case IFT_XETHER:
1778 	case IFT_ISO88025:
1779 	case IFT_L2VLAN:
1780 		bcopy(lladdr, IFP2AC(ifp)->ac_enaddr, len);
1781 		/*
1782 		 * XXX We also need to store the lladdr in LLADDR(sdl),
1783 		 * which is done below. This is a pain because we must
1784 		 * remember to keep the info in sync.
1785 		 */
1786 		/* FALLTHROUGH */
1787 	case IFT_ARCNET:
1788 		bcopy(lladdr, LLADDR(sdl), len);
1789 		break;
1790 	default:
1791 		return (ENODEV);
1792 	}
1793 	/*
1794 	 * If the interface is already up, we need
1795 	 * to re-init it in order to reprogram its
1796 	 * address filter.
1797 	 */
1798 	if ((ifp->if_flags & IFF_UP) != 0) {
1799 		ifp->if_flags &= ~IFF_UP;
1800 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1801 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1802 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1803 		ifp->if_flags |= IFF_UP;
1804 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1805 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1806 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1807 #ifdef INET
1808 		/*
1809 		 * Also send gratuitous ARPs to notify other nodes about
1810 		 * the address change.
1811 		 */
1812 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1813 			if (ifa->ifa_addr != NULL &&
1814 			    ifa->ifa_addr->sa_family == AF_INET)
1815 				arp_ifinit(ifp, ifa);
1816 		}
1817 #endif
1818 	}
1819 	return (0);
1820 }
1821 
1822 struct ifmultiaddr *
1823 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
1824 {
1825 	struct ifmultiaddr *ifma;
1826 
1827 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1828 		if (equal(ifma->ifma_addr, sa))
1829 			break;
1830 
1831 	return ifma;
1832 }
1833 
1834 /*
1835  * The name argument must be a pointer to storage which will last as
1836  * long as the interface does.  For physical devices, the result of
1837  * device_get_name(dev) is a good choice and for pseudo-devices a
1838  * static string works well.
1839  */
1840 void
1841 if_initname(struct ifnet *ifp, const char *name, int unit)
1842 {
1843 	ifp->if_dname = name;
1844 	ifp->if_dunit = unit;
1845 	if (unit != IF_DUNIT_NONE)
1846 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1847 	else
1848 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
1849 }
1850 
1851 int
1852 if_printf(struct ifnet *ifp, const char * fmt, ...)
1853 {
1854 	va_list ap;
1855 	int retval;
1856 
1857 	retval = printf("%s: ", ifp->if_xname);
1858 	va_start(ap, fmt);
1859 	retval += vprintf(fmt, ap);
1860 	va_end(ap);
1861 	return (retval);
1862 }
1863 
1864 /*
1865  * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
1866  * be called without Giant.  However, we often can't acquire the Giant lock
1867  * at those points; instead, we run it via a task queue that holds Giant via
1868  * if_start_deferred.
1869  *
1870  * XXXRW: We need to make sure that the ifnet isn't fully detached until any
1871  * outstanding if_start_deferred() tasks that will run after the free.  This
1872  * probably means waiting in if_detach().
1873  */
1874 void
1875 if_start(struct ifnet *ifp)
1876 {
1877 
1878 	NET_ASSERT_GIANT();
1879 
1880         if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
1881                 if (mtx_owned(&Giant))
1882                         (*(ifp)->if_start)(ifp);
1883                 else
1884 			taskqueue_enqueue(taskqueue_swi_giant,
1885 			    &ifp->if_starttask);
1886         } else
1887                 (*(ifp)->if_start)(ifp);
1888 }
1889 
1890 static void
1891 if_start_deferred(void *context, int pending)
1892 {
1893 	struct ifnet *ifp;
1894 
1895 	/*
1896 	 * This code must be entered with Giant, and should never run if
1897 	 * we're not running with debug.mpsafenet.
1898 	 */
1899 	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
1900 	GIANT_REQUIRED;
1901 
1902 	ifp = (struct ifnet *)context;
1903 	(ifp->if_start)(ifp);
1904 }
1905 
1906 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1907 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1908