xref: /freebsd/sys/net/if.c (revision cec50dea12481dc578c0805c887ab2097e1c06c5)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)if.c	8.5 (Berkeley) 1/9/95
30  * $FreeBSD$
31  */
32 
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36 #include "opt_mac.h"
37 
38 #include <sys/param.h>
39 #include <sys/conf.h>
40 #include <sys/mac.h>
41 #include <sys/malloc.h>
42 #include <sys/bus.h>
43 #include <sys/mbuf.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/protosw.h>
49 #include <sys/kernel.h>
50 #include <sys/sockio.h>
51 #include <sys/syslog.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54 #include <sys/domain.h>
55 #include <sys/jail.h>
56 #include <machine/stdarg.h>
57 
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_clone.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/if_var.h>
64 #include <net/radix.h>
65 #include <net/route.h>
66 
67 #if defined(INET) || defined(INET6)
68 /*XXX*/
69 #include <netinet/in.h>
70 #include <netinet/in_var.h>
71 #ifdef INET6
72 #include <netinet6/in6_var.h>
73 #include <netinet6/in6_ifattach.h>
74 #endif
75 #endif
76 #ifdef INET
77 #include <netinet/if_ether.h>
78 #endif
79 
80 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
81 
82 static void	if_attachdomain(void *);
83 static void	if_attachdomain1(struct ifnet *);
84 static int	ifconf(u_long, caddr_t);
85 static void	if_grow(void);
86 static void	if_init(void *);
87 static void	if_check(void *);
88 static int	if_findindex(struct ifnet *);
89 static void	if_qflush(struct ifaltq *);
90 static void	if_route(struct ifnet *, int flag, int fam);
91 static void	if_slowtimo(void *);
92 static void	if_unroute(struct ifnet *, int flag, int fam);
93 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
94 static int	if_rtdel(struct radix_node *, void *);
95 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
96 static void	if_start_deferred(void *context, int pending);
97 #ifdef INET6
98 /*
99  * XXX: declare here to avoid to include many inet6 related files..
100  * should be more generalized?
101  */
102 extern void	nd6_setmtu(struct ifnet *);
103 #endif
104 
105 int	if_index = 0;
106 struct	ifindex_entry *ifindex_table = NULL;
107 int	ifqmaxlen = IFQ_MAXLEN;
108 struct	ifnethead ifnet;	/* depend on static init XXX */
109 struct	mtx ifnet_lock;
110 
111 static int	if_indexlim = 8;
112 static struct	knlist ifklist;
113 
114 static void	filt_netdetach(struct knote *kn);
115 static int	filt_netdev(struct knote *kn, long hint);
116 
117 static struct filterops netdev_filtops =
118     { 1, NULL, filt_netdetach, filt_netdev };
119 
120 /*
121  * System initialization
122  */
123 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
124 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
125 
126 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
127 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
128 
129 static d_open_t		netopen;
130 static d_close_t	netclose;
131 static d_ioctl_t	netioctl;
132 static d_kqfilter_t	netkqfilter;
133 
134 static struct cdevsw net_cdevsw = {
135 	.d_version =	D_VERSION,
136 	.d_flags =	D_NEEDGIANT,
137 	.d_open =	netopen,
138 	.d_close =	netclose,
139 	.d_ioctl =	netioctl,
140 	.d_name =	"net",
141 	.d_kqfilter =	netkqfilter,
142 };
143 
144 static int
145 netopen(struct cdev *dev, int flag, int mode, struct thread *td)
146 {
147 	return (0);
148 }
149 
150 static int
151 netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
152 {
153 	return (0);
154 }
155 
156 static int
157 netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
158 {
159 	struct ifnet *ifp;
160 	int error, idx;
161 
162 	/* only support interface specific ioctls */
163 	if (IOCGROUP(cmd) != 'i')
164 		return (EOPNOTSUPP);
165 	idx = minor(dev);
166 	if (idx == 0) {
167 		/*
168 		 * special network device, not interface.
169 		 */
170 		if (cmd == SIOCGIFCONF)
171 			return (ifconf(cmd, data));	/* XXX remove cmd */
172 		return (EOPNOTSUPP);
173 	}
174 
175 	ifp = ifnet_byindex(idx);
176 	if (ifp == NULL)
177 		return (ENXIO);
178 
179 	error = ifhwioctl(cmd, ifp, data, td);
180 	if (error == ENOIOCTL)
181 		error = EOPNOTSUPP;
182 	return (error);
183 }
184 
185 static int
186 netkqfilter(struct cdev *dev, struct knote *kn)
187 {
188 	struct knlist *klist;
189 	struct ifnet *ifp;
190 	int idx;
191 
192 	switch (kn->kn_filter) {
193 	case EVFILT_NETDEV:
194 		kn->kn_fop = &netdev_filtops;
195 		break;
196 	default:
197 		return (1);
198 	}
199 
200 	idx = minor(dev);
201 	if (idx == 0) {
202 		klist = &ifklist;
203 	} else {
204 		ifp = ifnet_byindex(idx);
205 		if (ifp == NULL)
206 			return (1);
207 		klist = &ifp->if_klist;
208 	}
209 
210 	kn->kn_hook = (caddr_t)klist;
211 
212 	knlist_add(klist, kn, 0);
213 
214 	return (0);
215 }
216 
217 static void
218 filt_netdetach(struct knote *kn)
219 {
220 	struct knlist *klist = (struct knlist *)kn->kn_hook;
221 
222 	knlist_remove(klist, kn, 0);
223 }
224 
225 static int
226 filt_netdev(struct knote *kn, long hint)
227 {
228 	struct knlist *klist = (struct knlist *)kn->kn_hook;
229 
230 	/*
231 	 * Currently NOTE_EXIT is abused to indicate device detach.
232 	 */
233 	if (hint == NOTE_EXIT) {
234 		kn->kn_data = NOTE_LINKINV;
235 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
236 		knlist_remove_inevent(klist, kn);
237 		return (1);
238 	}
239 	if (hint != 0)
240 		kn->kn_data = hint;			/* current status */
241 	if (kn->kn_sfflags & hint)
242 		kn->kn_fflags |= hint;
243 	return (kn->kn_fflags != 0);
244 }
245 
246 /*
247  * Network interface utility routines.
248  *
249  * Routines with ifa_ifwith* names take sockaddr *'s as
250  * parameters.
251  */
252 /* ARGSUSED*/
253 static void
254 if_init(void *dummy __unused)
255 {
256 
257 	IFNET_LOCK_INIT();
258 	TAILQ_INIT(&ifnet);
259 	knlist_init(&ifklist, NULL);
260 	if_grow();				/* create initial table */
261 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
262 	    UID_ROOT, GID_WHEEL, 0600, "network");
263 	if_clone_init();
264 }
265 
266 static void
267 if_grow(void)
268 {
269 	u_int n;
270 	struct ifindex_entry *e;
271 
272 	if_indexlim <<= 1;
273 	n = if_indexlim * sizeof(*e);
274 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
275 	if (ifindex_table != NULL) {
276 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
277 		free((caddr_t)ifindex_table, M_IFADDR);
278 	}
279 	ifindex_table = e;
280 }
281 
282 /* ARGSUSED*/
283 static void
284 if_check(void *dummy __unused)
285 {
286 	struct ifnet *ifp;
287 	int s;
288 
289 	s = splimp();
290 	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
291 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
292 		if (ifp->if_snd.ifq_maxlen == 0) {
293 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
294 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
295 		}
296 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
297 			if_printf(ifp,
298 			    "XXX: driver didn't initialize queue mtx\n");
299 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
300 			    MTX_NETWORK_LOCK, MTX_DEF);
301 		}
302 	}
303 	IFNET_RUNLOCK();
304 	splx(s);
305 	if_slowtimo(0);
306 }
307 
308 static int
309 if_findindex(struct ifnet *ifp)
310 {
311 	int i, unit;
312 	char eaddr[18], devname[32];
313 	const char *name, *p;
314 
315 	switch (ifp->if_type) {
316 	case IFT_ETHER:			/* these types use struct arpcom */
317 	case IFT_FDDI:
318 	case IFT_XETHER:
319 	case IFT_ISO88025:
320 	case IFT_L2VLAN:
321 		snprintf(eaddr, 18, "%6D", IFP2AC(ifp)->ac_enaddr, ":");
322 		break;
323 	default:
324 		eaddr[0] = '\0';
325 		break;
326 	}
327 	strlcpy(devname, ifp->if_xname, sizeof(devname));
328 	name = net_cdevsw.d_name;
329 	i = 0;
330 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
331 		if (resource_string_value(name, unit, "ether", &p) == 0)
332 			if (strcmp(p, eaddr) == 0)
333 				goto found;
334 		if (resource_string_value(name, unit, "dev", &p) == 0)
335 			if (strcmp(p, devname) == 0)
336 				goto found;
337 	}
338 	unit = 0;
339 found:
340 	if (unit != 0) {
341 		if (ifaddr_byindex(unit) == NULL)
342 			return (unit);
343 		printf("%s%d in use, cannot hardwire it to %s.\n",
344 		    name, unit, devname);
345 	}
346 	for (unit = 1; ; unit++) {
347 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
348 			continue;
349 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
350 		    resource_string_value(name, unit, "dev", &p) == 0)
351 			continue;
352 		break;
353 	}
354 	return (unit);
355 }
356 
357 /*
358  * Attach an interface to the
359  * list of "active" interfaces.
360  */
361 void
362 if_attach(struct ifnet *ifp)
363 {
364 	unsigned socksize, ifasize;
365 	int namelen, masklen;
366 	struct sockaddr_dl *sdl;
367 	struct ifaddr *ifa;
368 
369 	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
370 	IF_AFDATA_LOCK_INIT(ifp);
371 	ifp->if_afdata_initialized = 0;
372 	IFNET_WLOCK();
373 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
374 	IFNET_WUNLOCK();
375 	/*
376 	 * XXX -
377 	 * The old code would work if the interface passed a pre-existing
378 	 * chain of ifaddrs to this code.  We don't trust our callers to
379 	 * properly initialize the tailq, however, so we no longer allow
380 	 * this unlikely case.
381 	 */
382 	TAILQ_INIT(&ifp->if_addrhead);
383 	TAILQ_INIT(&ifp->if_prefixhead);
384 	TAILQ_INIT(&ifp->if_multiaddrs);
385 	knlist_init(&ifp->if_klist, NULL);
386 	getmicrotime(&ifp->if_lastchange);
387 	ifp->if_data.ifi_epoch = time_second;
388 
389 #ifdef MAC
390 	mac_init_ifnet(ifp);
391 	mac_create_ifnet(ifp);
392 #endif
393 
394 	ifp->if_index = if_findindex(ifp);
395 	if (ifp->if_index > if_index)
396 		if_index = ifp->if_index;
397 	if (if_index >= if_indexlim)
398 		if_grow();
399 	ifp->if_data.ifi_datalen = sizeof(struct if_data);
400 
401 	ifnet_byindex(ifp->if_index) = ifp;
402 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
403 	    unit2minor(ifp->if_index),
404 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
405 	    net_cdevsw.d_name, ifp->if_xname);
406 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
407 	    net_cdevsw.d_name, ifp->if_index);
408 
409 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
410 
411 	/*
412 	 * create a Link Level name for this device
413 	 */
414 	namelen = strlen(ifp->if_xname);
415 	/*
416 	 * Always save enough space for any possiable name so we can do
417 	 * a rename in place later.
418 	 */
419 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
420 	socksize = masklen + ifp->if_addrlen;
421 	if (socksize < sizeof(*sdl))
422 		socksize = sizeof(*sdl);
423 	socksize = roundup2(socksize, sizeof(long));
424 	ifasize = sizeof(*ifa) + 2 * socksize;
425 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
426 	IFA_LOCK_INIT(ifa);
427 	sdl = (struct sockaddr_dl *)(ifa + 1);
428 	sdl->sdl_len = socksize;
429 	sdl->sdl_family = AF_LINK;
430 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
431 	sdl->sdl_nlen = namelen;
432 	sdl->sdl_index = ifp->if_index;
433 	sdl->sdl_type = ifp->if_type;
434 	ifaddr_byindex(ifp->if_index) = ifa;
435 	ifa->ifa_ifp = ifp;
436 	ifa->ifa_rtrequest = link_rtrequest;
437 	ifa->ifa_addr = (struct sockaddr *)sdl;
438 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
439 	ifa->ifa_netmask = (struct sockaddr *)sdl;
440 	sdl->sdl_len = masklen;
441 	while (namelen != 0)
442 		sdl->sdl_data[--namelen] = 0xff;
443 	ifa->ifa_refcnt = 1;
444 	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
445 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
446 	ifp->if_snd.altq_type = 0;
447 	ifp->if_snd.altq_disc = NULL;
448 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
449 	ifp->if_snd.altq_tbr  = NULL;
450 	ifp->if_snd.altq_ifp  = ifp;
451 
452 	if (domains)
453 		if_attachdomain1(ifp);
454 
455 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
456 
457 	/* Announce the interface. */
458 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
459 }
460 
461 static void
462 if_attachdomain(void *dummy)
463 {
464 	struct ifnet *ifp;
465 	int s;
466 
467 	s = splnet();
468 	TAILQ_FOREACH(ifp, &ifnet, if_link)
469 		if_attachdomain1(ifp);
470 	splx(s);
471 }
472 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
473     if_attachdomain, NULL);
474 
475 static void
476 if_attachdomain1(struct ifnet *ifp)
477 {
478 	struct domain *dp;
479 	int s;
480 
481 	s = splnet();
482 
483 	/*
484 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
485 	 * cannot lock ifp->if_afdata initialization, entirely.
486 	 */
487 	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
488 		splx(s);
489 		return;
490 	}
491 	if (ifp->if_afdata_initialized) {
492 		IF_AFDATA_UNLOCK(ifp);
493 		splx(s);
494 		return;
495 	}
496 	ifp->if_afdata_initialized = 1;
497 	IF_AFDATA_UNLOCK(ifp);
498 
499 	/* address family dependent data region */
500 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
501 	for (dp = domains; dp; dp = dp->dom_next) {
502 		if (dp->dom_ifattach)
503 			ifp->if_afdata[dp->dom_family] =
504 			    (*dp->dom_ifattach)(ifp);
505 	}
506 
507 	splx(s);
508 }
509 
510 /*
511  * Detach an interface, removing it from the
512  * list of "active" interfaces.
513  */
514 void
515 if_detach(struct ifnet *ifp)
516 {
517 	struct ifaddr *ifa, *next;
518 	struct radix_node_head	*rnh;
519 	int s;
520 	int i;
521 	struct domain *dp;
522  	struct ifnet *iter;
523  	int found;
524 
525 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
526 	/*
527 	 * Remove routes and flush queues.
528 	 */
529 	s = splnet();
530 	if_down(ifp);
531 #ifdef ALTQ
532 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
533 		altq_disable(&ifp->if_snd);
534 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
535 		altq_detach(&ifp->if_snd);
536 #endif
537 
538 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = next) {
539 		next = TAILQ_NEXT(ifa, ifa_link);
540 
541 		if (ifa->ifa_addr->sa_family == AF_LINK)
542 			continue;
543 #ifdef INET
544 		/* XXX: Ugly!! ad hoc just for INET */
545 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
546 			struct ifaliasreq ifr;
547 
548 			bzero(&ifr, sizeof(ifr));
549 			ifr.ifra_addr = *ifa->ifa_addr;
550 			if (ifa->ifa_dstaddr)
551 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
552 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
553 			    NULL) == 0)
554 				continue;
555 		}
556 #endif /* INET */
557 #ifdef INET6
558 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
559 			in6_purgeaddr(ifa);
560 			/* ifp_addrhead is already updated */
561 			continue;
562 		}
563 #endif /* INET6 */
564 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
565 		IFAFREE(ifa);
566 	}
567 
568 #ifdef INET6
569 	/*
570 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
571 	 * before removing routing entries below, since IPv6 interface direct
572 	 * routes are expected to be removed by the IPv6-specific kernel API.
573 	 * Otherwise, the kernel will detect some inconsistency and bark it.
574 	 */
575 	in6_ifdetach(ifp);
576 #endif
577 	/*
578 	 * Remove address from ifindex_table[] and maybe decrement if_index.
579 	 * Clean up all addresses.
580 	 */
581 	ifnet_byindex(ifp->if_index) = NULL;
582 	ifaddr_byindex(ifp->if_index) = NULL;
583 	destroy_dev(ifdev_byindex(ifp->if_index));
584 	ifdev_byindex(ifp->if_index) = NULL;
585 
586 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
587 		if_index--;
588 
589 
590 	/* We can now free link ifaddr. */
591 	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
592 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
593 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
594 		IFAFREE(ifa);
595 	}
596 
597 	/*
598 	 * Delete all remaining routes using this interface
599 	 * Unfortuneatly the only way to do this is to slog through
600 	 * the entire routing table looking for routes which point
601 	 * to this interface...oh well...
602 	 */
603 	for (i = 1; i <= AF_MAX; i++) {
604 		if ((rnh = rt_tables[i]) == NULL)
605 			continue;
606 		RADIX_NODE_HEAD_LOCK(rnh);
607 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
608 		RADIX_NODE_HEAD_UNLOCK(rnh);
609 	}
610 
611 	/* Announce that the interface is gone. */
612 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
613 
614 	IF_AFDATA_LOCK(ifp);
615 	for (dp = domains; dp; dp = dp->dom_next) {
616 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
617 			(*dp->dom_ifdetach)(ifp,
618 			    ifp->if_afdata[dp->dom_family]);
619 	}
620 	IF_AFDATA_UNLOCK(ifp);
621 
622 #ifdef MAC
623 	mac_destroy_ifnet(ifp);
624 #endif /* MAC */
625 	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
626 	knlist_clear(&ifp->if_klist, 0);
627 	knlist_destroy(&ifp->if_klist);
628 	IFNET_WLOCK();
629  	found = 0;
630  	TAILQ_FOREACH(iter, &ifnet, if_link)
631  		if (iter == ifp) {
632  			found = 1;
633  			break;
634  		}
635  	if (found)
636  		TAILQ_REMOVE(&ifnet, ifp, if_link);
637 	IFNET_WUNLOCK();
638 	mtx_destroy(&ifp->if_snd.ifq_mtx);
639 	IF_AFDATA_DESTROY(ifp);
640 	splx(s);
641 }
642 
643 /*
644  * Delete Routes for a Network Interface
645  *
646  * Called for each routing entry via the rnh->rnh_walktree() call above
647  * to delete all route entries referencing a detaching network interface.
648  *
649  * Arguments:
650  *	rn	pointer to node in the routing table
651  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
652  *
653  * Returns:
654  *	0	successful
655  *	errno	failed - reason indicated
656  *
657  */
658 static int
659 if_rtdel(struct radix_node *rn, void *arg)
660 {
661 	struct rtentry	*rt = (struct rtentry *)rn;
662 	struct ifnet	*ifp = arg;
663 	int		err;
664 
665 	if (rt->rt_ifp == ifp) {
666 
667 		/*
668 		 * Protect (sorta) against walktree recursion problems
669 		 * with cloned routes
670 		 */
671 		if ((rt->rt_flags & RTF_UP) == 0)
672 			return (0);
673 
674 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
675 				rt_mask(rt), rt->rt_flags,
676 				(struct rtentry **) NULL);
677 		if (err) {
678 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
679 		}
680 	}
681 
682 	return (0);
683 }
684 
685 #define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
686 
687 /*
688  * Locate an interface based on a complete address.
689  */
690 /*ARGSUSED*/
691 struct ifaddr *
692 ifa_ifwithaddr(struct sockaddr *addr)
693 {
694 	struct ifnet *ifp;
695 	struct ifaddr *ifa;
696 
697 	IFNET_RLOCK();
698 	TAILQ_FOREACH(ifp, &ifnet, if_link)
699 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
700 			if (ifa->ifa_addr->sa_family != addr->sa_family)
701 				continue;
702 			if (equal(addr, ifa->ifa_addr))
703 				goto done;
704 			/* IP6 doesn't have broadcast */
705 			if ((ifp->if_flags & IFF_BROADCAST) &&
706 			    ifa->ifa_broadaddr &&
707 			    ifa->ifa_broadaddr->sa_len != 0 &&
708 			    equal(ifa->ifa_broadaddr, addr))
709 				goto done;
710 		}
711 	ifa = NULL;
712 done:
713 	IFNET_RUNLOCK();
714 	return (ifa);
715 }
716 
717 /*
718  * Locate the point to point interface with a given destination address.
719  */
720 /*ARGSUSED*/
721 struct ifaddr *
722 ifa_ifwithdstaddr(struct sockaddr *addr)
723 {
724 	struct ifnet *ifp;
725 	struct ifaddr *ifa;
726 
727 	IFNET_RLOCK();
728 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
729 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
730 			continue;
731 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
732 			if (ifa->ifa_addr->sa_family != addr->sa_family)
733 				continue;
734 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
735 				goto done;
736 		}
737 	}
738 	ifa = NULL;
739 done:
740 	IFNET_RUNLOCK();
741 	return (ifa);
742 }
743 
744 /*
745  * Find an interface on a specific network.  If many, choice
746  * is most specific found.
747  */
748 struct ifaddr *
749 ifa_ifwithnet(struct sockaddr *addr)
750 {
751 	struct ifnet *ifp;
752 	struct ifaddr *ifa;
753 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
754 	u_int af = addr->sa_family;
755 	char *addr_data = addr->sa_data, *cplim;
756 
757 	/*
758 	 * AF_LINK addresses can be looked up directly by their index number,
759 	 * so do that if we can.
760 	 */
761 	if (af == AF_LINK) {
762 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
763 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
764 		return (ifaddr_byindex(sdl->sdl_index));
765 	}
766 
767 	/*
768 	 * Scan though each interface, looking for ones that have
769 	 * addresses in this address family.
770 	 */
771 	IFNET_RLOCK();
772 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
773 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
774 			char *cp, *cp2, *cp3;
775 
776 			if (ifa->ifa_addr->sa_family != af)
777 next:				continue;
778 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
779 				/*
780 				 * This is a bit broken as it doesn't
781 				 * take into account that the remote end may
782 				 * be a single node in the network we are
783 				 * looking for.
784 				 * The trouble is that we don't know the
785 				 * netmask for the remote end.
786 				 */
787 				if (ifa->ifa_dstaddr != 0
788 				    && equal(addr, ifa->ifa_dstaddr))
789 					goto done;
790 			} else {
791 				/*
792 				 * if we have a special address handler,
793 				 * then use it instead of the generic one.
794 				 */
795 				if (ifa->ifa_claim_addr) {
796 					if ((*ifa->ifa_claim_addr)(ifa, addr))
797 						goto done;
798 					continue;
799 				}
800 
801 				/*
802 				 * Scan all the bits in the ifa's address.
803 				 * If a bit dissagrees with what we are
804 				 * looking for, mask it with the netmask
805 				 * to see if it really matters.
806 				 * (A byte at a time)
807 				 */
808 				if (ifa->ifa_netmask == 0)
809 					continue;
810 				cp = addr_data;
811 				cp2 = ifa->ifa_addr->sa_data;
812 				cp3 = ifa->ifa_netmask->sa_data;
813 				cplim = ifa->ifa_netmask->sa_len
814 					+ (char *)ifa->ifa_netmask;
815 				while (cp3 < cplim)
816 					if ((*cp++ ^ *cp2++) & *cp3++)
817 						goto next; /* next address! */
818 				/*
819 				 * If the netmask of what we just found
820 				 * is more specific than what we had before
821 				 * (if we had one) then remember the new one
822 				 * before continuing to search
823 				 * for an even better one.
824 				 */
825 				if (ifa_maybe == 0 ||
826 				    rn_refines((caddr_t)ifa->ifa_netmask,
827 				    (caddr_t)ifa_maybe->ifa_netmask))
828 					ifa_maybe = ifa;
829 			}
830 		}
831 	}
832 	ifa = ifa_maybe;
833 done:
834 	IFNET_RUNLOCK();
835 	return (ifa);
836 }
837 
838 /*
839  * Find an interface address specific to an interface best matching
840  * a given address.
841  */
842 struct ifaddr *
843 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
844 {
845 	struct ifaddr *ifa;
846 	char *cp, *cp2, *cp3;
847 	char *cplim;
848 	struct ifaddr *ifa_maybe = 0;
849 	u_int af = addr->sa_family;
850 
851 	if (af >= AF_MAX)
852 		return (0);
853 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
854 		if (ifa->ifa_addr->sa_family != af)
855 			continue;
856 		if (ifa_maybe == 0)
857 			ifa_maybe = ifa;
858 		if (ifa->ifa_netmask == 0) {
859 			if (equal(addr, ifa->ifa_addr) ||
860 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
861 				goto done;
862 			continue;
863 		}
864 		if (ifp->if_flags & IFF_POINTOPOINT) {
865 			if (equal(addr, ifa->ifa_dstaddr))
866 				goto done;
867 		} else {
868 			cp = addr->sa_data;
869 			cp2 = ifa->ifa_addr->sa_data;
870 			cp3 = ifa->ifa_netmask->sa_data;
871 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
872 			for (; cp3 < cplim; cp3++)
873 				if ((*cp++ ^ *cp2++) & *cp3)
874 					break;
875 			if (cp3 == cplim)
876 				goto done;
877 		}
878 	}
879 	ifa = ifa_maybe;
880 done:
881 	return (ifa);
882 }
883 
884 #include <net/route.h>
885 
886 /*
887  * Default action when installing a route with a Link Level gateway.
888  * Lookup an appropriate real ifa to point to.
889  * This should be moved to /sys/net/link.c eventually.
890  */
891 static void
892 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
893 {
894 	struct ifaddr *ifa, *oifa;
895 	struct sockaddr *dst;
896 	struct ifnet *ifp;
897 
898 	RT_LOCK_ASSERT(rt);
899 
900 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
901 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
902 		return;
903 	ifa = ifaof_ifpforaddr(dst, ifp);
904 	if (ifa) {
905 		IFAREF(ifa);		/* XXX */
906 		oifa = rt->rt_ifa;
907 		rt->rt_ifa = ifa;
908 		IFAFREE(oifa);
909 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
910 			ifa->ifa_rtrequest(cmd, rt, info);
911 	}
912 }
913 
914 /*
915  * Mark an interface down and notify protocols of
916  * the transition.
917  * NOTE: must be called at splnet or eqivalent.
918  */
919 static void
920 if_unroute(struct ifnet *ifp, int flag, int fam)
921 {
922 	struct ifaddr *ifa;
923 
924 	ifp->if_flags &= ~flag;
925 	getmicrotime(&ifp->if_lastchange);
926 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
927 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
928 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
929 	if_qflush(&ifp->if_snd);
930 	rt_ifmsg(ifp);
931 }
932 
933 /*
934  * Mark an interface up and notify protocols of
935  * the transition.
936  * NOTE: must be called at splnet or eqivalent.
937  */
938 static void
939 if_route(struct ifnet *ifp, int flag, int fam)
940 {
941 	struct ifaddr *ifa;
942 
943 	ifp->if_flags |= flag;
944 	getmicrotime(&ifp->if_lastchange);
945 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
946 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
947 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
948 	rt_ifmsg(ifp);
949 #ifdef INET6
950 	in6_if_up(ifp);
951 #endif
952 }
953 
954 /*
955  * Mark an interface down and notify protocols of
956  * the transition.
957  * NOTE: must be called at splnet or eqivalent.
958  */
959 void
960 if_down(struct ifnet *ifp)
961 {
962 
963 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
964 }
965 
966 /*
967  * Mark an interface up and notify protocols of
968  * the transition.
969  * NOTE: must be called at splnet or eqivalent.
970  */
971 void
972 if_up(struct ifnet *ifp)
973 {
974 
975 	if_route(ifp, IFF_UP, AF_UNSPEC);
976 }
977 
978 /*
979  * Flush an interface queue.
980  */
981 static void
982 if_qflush(struct ifaltq *ifq)
983 {
984 	struct mbuf *m, *n;
985 
986 	IFQ_LOCK(ifq);
987 #ifdef ALTQ
988 	if (ALTQ_IS_ENABLED(ifq))
989 		ALTQ_PURGE(ifq);
990 #endif
991 	n = ifq->ifq_head;
992 	while ((m = n) != 0) {
993 		n = m->m_act;
994 		m_freem(m);
995 	}
996 	ifq->ifq_head = 0;
997 	ifq->ifq_tail = 0;
998 	ifq->ifq_len = 0;
999 	IFQ_UNLOCK(ifq);
1000 }
1001 
1002 /*
1003  * Handle interface watchdog timer routines.  Called
1004  * from softclock, we decrement timers (if set) and
1005  * call the appropriate interface routine on expiration.
1006  *
1007  * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1008  * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1009  * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1010  */
1011 static void
1012 if_slowtimo(void *arg)
1013 {
1014 	struct ifnet *ifp;
1015 	int s = splimp();
1016 
1017 	IFNET_RLOCK();
1018 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1019 		if (ifp->if_timer == 0 || --ifp->if_timer)
1020 			continue;
1021 		if (ifp->if_watchdog)
1022 			(*ifp->if_watchdog)(ifp);
1023 	}
1024 	IFNET_RUNLOCK();
1025 	splx(s);
1026 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1027 }
1028 
1029 /*
1030  * Map interface name to
1031  * interface structure pointer.
1032  */
1033 struct ifnet *
1034 ifunit(const char *name)
1035 {
1036 	struct ifnet *ifp;
1037 
1038 	IFNET_RLOCK();
1039 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1040 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1041 			break;
1042 	}
1043 	IFNET_RUNLOCK();
1044 	return (ifp);
1045 }
1046 
1047 /*
1048  * Hardware specific interface ioctls.
1049  */
1050 static int
1051 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1052 {
1053 	struct ifreq *ifr;
1054 	struct ifstat *ifs;
1055 	int error = 0;
1056 	int new_flags;
1057 	size_t namelen, onamelen;
1058 	char new_name[IFNAMSIZ];
1059 	struct ifaddr *ifa;
1060 	struct sockaddr_dl *sdl;
1061 
1062 	ifr = (struct ifreq *)data;
1063 	switch (cmd) {
1064 	case SIOCGIFINDEX:
1065 		ifr->ifr_index = ifp->if_index;
1066 		break;
1067 
1068 	case SIOCGIFFLAGS:
1069 		ifr->ifr_flags = ifp->if_flags & 0xffff;
1070 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1071 		break;
1072 
1073 	case SIOCGIFCAP:
1074 		ifr->ifr_reqcap = ifp->if_capabilities;
1075 		ifr->ifr_curcap = ifp->if_capenable;
1076 		break;
1077 
1078 #ifdef MAC
1079 	case SIOCGIFMAC:
1080 		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1081 		break;
1082 #endif
1083 
1084 	case SIOCGIFMETRIC:
1085 		ifr->ifr_metric = ifp->if_metric;
1086 		break;
1087 
1088 	case SIOCGIFMTU:
1089 		ifr->ifr_mtu = ifp->if_mtu;
1090 		break;
1091 
1092 	case SIOCGIFPHYS:
1093 		ifr->ifr_phys = ifp->if_physical;
1094 		break;
1095 
1096 	case SIOCSIFFLAGS:
1097 		error = suser(td);
1098 		if (error)
1099 			return (error);
1100 		new_flags = (ifr->ifr_flags & 0xffff) |
1101 		    (ifr->ifr_flagshigh << 16);
1102 		if (ifp->if_flags & IFF_SMART) {
1103 			/* Smart drivers twiddle their own routes */
1104 		} else if (ifp->if_flags & IFF_UP &&
1105 		    (new_flags & IFF_UP) == 0) {
1106 			int s = splimp();
1107 			if_down(ifp);
1108 			splx(s);
1109 		} else if (new_flags & IFF_UP &&
1110 		    (ifp->if_flags & IFF_UP) == 0) {
1111 			int s = splimp();
1112 			if_up(ifp);
1113 			splx(s);
1114 		}
1115 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1116 			(new_flags &~ IFF_CANTCHANGE);
1117 		if (new_flags & IFF_PPROMISC) {
1118 			/* Permanently promiscuous mode requested */
1119 			ifp->if_flags |= IFF_PROMISC;
1120 		} else if (ifp->if_pcount == 0) {
1121 			ifp->if_flags &= ~IFF_PROMISC;
1122 		}
1123 		if (ifp->if_ioctl)
1124 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1125 		getmicrotime(&ifp->if_lastchange);
1126 		break;
1127 
1128 	case SIOCSIFCAP:
1129 		error = suser(td);
1130 		if (error)
1131 			return (error);
1132 		if (ifp->if_ioctl == NULL)
1133 			return (EOPNOTSUPP);
1134 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1135 			return (EINVAL);
1136 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1137 		if (error == 0)
1138 			getmicrotime(&ifp->if_lastchange);
1139 		break;
1140 
1141 #ifdef MAC
1142 	case SIOCSIFMAC:
1143 		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1144 		break;
1145 #endif
1146 
1147 	case SIOCSIFNAME:
1148 		error = suser(td);
1149 		if (error != 0)
1150 			return (error);
1151 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1152 		if (error != 0)
1153 			return (error);
1154 		if (new_name[0] == '\0')
1155 			return (EINVAL);
1156 		if (ifunit(new_name) != NULL)
1157 			return (EEXIST);
1158 
1159 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1160 		/* Announce the departure of the interface. */
1161 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1162 
1163 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1164 		ifa = ifaddr_byindex(ifp->if_index);
1165 		IFA_LOCK(ifa);
1166 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1167 		namelen = strlen(new_name);
1168 		onamelen = sdl->sdl_nlen;
1169 		/*
1170 		 * Move the address if needed.  This is safe because we
1171 		 * allocate space for a name of length IFNAMSIZ when we
1172 		 * create this in if_attach().
1173 		 */
1174 		if (namelen != onamelen) {
1175 			bcopy(sdl->sdl_data + onamelen,
1176 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1177 		}
1178 		bcopy(new_name, sdl->sdl_data, namelen);
1179 		sdl->sdl_nlen = namelen;
1180 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1181 		bzero(sdl->sdl_data, onamelen);
1182 		while (namelen != 0)
1183 			sdl->sdl_data[--namelen] = 0xff;
1184 		IFA_UNLOCK(ifa);
1185 
1186 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1187 		/* Announce the return of the interface. */
1188 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1189 		break;
1190 
1191 	case SIOCSIFMETRIC:
1192 		error = suser(td);
1193 		if (error)
1194 			return (error);
1195 		ifp->if_metric = ifr->ifr_metric;
1196 		getmicrotime(&ifp->if_lastchange);
1197 		break;
1198 
1199 	case SIOCSIFPHYS:
1200 		error = suser(td);
1201 		if (error)
1202 			return (error);
1203 		if (ifp->if_ioctl == NULL)
1204 			return (EOPNOTSUPP);
1205 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1206 		if (error == 0)
1207 			getmicrotime(&ifp->if_lastchange);
1208 		break;
1209 
1210 	case SIOCSIFMTU:
1211 	{
1212 		u_long oldmtu = ifp->if_mtu;
1213 
1214 		error = suser(td);
1215 		if (error)
1216 			return (error);
1217 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1218 			return (EINVAL);
1219 		if (ifp->if_ioctl == NULL)
1220 			return (EOPNOTSUPP);
1221 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1222 		if (error == 0) {
1223 			getmicrotime(&ifp->if_lastchange);
1224 			rt_ifmsg(ifp);
1225 		}
1226 		/*
1227 		 * If the link MTU changed, do network layer specific procedure.
1228 		 */
1229 		if (ifp->if_mtu != oldmtu) {
1230 #ifdef INET6
1231 			nd6_setmtu(ifp);
1232 #endif
1233 		}
1234 		break;
1235 	}
1236 
1237 	case SIOCADDMULTI:
1238 	case SIOCDELMULTI:
1239 		error = suser(td);
1240 		if (error)
1241 			return (error);
1242 
1243 		/* Don't allow group membership on non-multicast interfaces. */
1244 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1245 			return (EOPNOTSUPP);
1246 
1247 		/* Don't let users screw up protocols' entries. */
1248 		if (ifr->ifr_addr.sa_family != AF_LINK)
1249 			return (EINVAL);
1250 
1251 		if (cmd == SIOCADDMULTI) {
1252 			struct ifmultiaddr *ifma;
1253 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1254 		} else {
1255 			error = if_delmulti(ifp, &ifr->ifr_addr);
1256 		}
1257 		if (error == 0)
1258 			getmicrotime(&ifp->if_lastchange);
1259 		break;
1260 
1261 	case SIOCSIFPHYADDR:
1262 	case SIOCDIFPHYADDR:
1263 #ifdef INET6
1264 	case SIOCSIFPHYADDR_IN6:
1265 #endif
1266 	case SIOCSLIFPHYADDR:
1267 	case SIOCSIFMEDIA:
1268 	case SIOCSIFGENERIC:
1269 		error = suser(td);
1270 		if (error)
1271 			return (error);
1272 		if (ifp->if_ioctl == NULL)
1273 			return (EOPNOTSUPP);
1274 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1275 		if (error == 0)
1276 			getmicrotime(&ifp->if_lastchange);
1277 		break;
1278 
1279 	case SIOCGIFSTATUS:
1280 		ifs = (struct ifstat *)data;
1281 		ifs->ascii[0] = '\0';
1282 
1283 	case SIOCGIFPSRCADDR:
1284 	case SIOCGIFPDSTADDR:
1285 	case SIOCGLIFPHYADDR:
1286 	case SIOCGIFMEDIA:
1287 	case SIOCGIFGENERIC:
1288 		if (ifp->if_ioctl == NULL)
1289 			return (EOPNOTSUPP);
1290 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1291 		break;
1292 
1293 	case SIOCSIFLLADDR:
1294 		error = suser(td);
1295 		if (error)
1296 			return (error);
1297 		error = if_setlladdr(ifp,
1298 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1299 		break;
1300 
1301 	default:
1302 		error = ENOIOCTL;
1303 		break;
1304 	}
1305 	return (error);
1306 }
1307 
1308 /*
1309  * Interface ioctls.
1310  */
1311 int
1312 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1313 {
1314 	struct ifnet *ifp;
1315 	struct ifreq *ifr;
1316 	int error;
1317 	int oif_flags;
1318 
1319 	switch (cmd) {
1320 	case SIOCGIFCONF:
1321 	case OSIOCGIFCONF:
1322 		return (ifconf(cmd, data));
1323 	}
1324 	ifr = (struct ifreq *)data;
1325 
1326 	switch (cmd) {
1327 	case SIOCIFCREATE:
1328 	case SIOCIFDESTROY:
1329 		if ((error = suser(td)) != 0)
1330 			return (error);
1331 		return ((cmd == SIOCIFCREATE) ?
1332 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1333 			if_clone_destroy(ifr->ifr_name));
1334 
1335 	case SIOCIFGCLONERS:
1336 		return (if_clone_list((struct if_clonereq *)data));
1337 	}
1338 
1339 	ifp = ifunit(ifr->ifr_name);
1340 	if (ifp == 0)
1341 		return (ENXIO);
1342 
1343 	error = ifhwioctl(cmd, ifp, data, td);
1344 	if (error != ENOIOCTL)
1345 		return (error);
1346 
1347 	oif_flags = ifp->if_flags;
1348 	if (so->so_proto == 0)
1349 		return (EOPNOTSUPP);
1350 #ifndef COMPAT_43
1351 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1352 								 data,
1353 								 ifp, td));
1354 #else
1355 	{
1356 		int ocmd = cmd;
1357 
1358 		switch (cmd) {
1359 
1360 		case SIOCSIFDSTADDR:
1361 		case SIOCSIFADDR:
1362 		case SIOCSIFBRDADDR:
1363 		case SIOCSIFNETMASK:
1364 #if BYTE_ORDER != BIG_ENDIAN
1365 			if (ifr->ifr_addr.sa_family == 0 &&
1366 			    ifr->ifr_addr.sa_len < 16) {
1367 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1368 				ifr->ifr_addr.sa_len = 16;
1369 			}
1370 #else
1371 			if (ifr->ifr_addr.sa_len == 0)
1372 				ifr->ifr_addr.sa_len = 16;
1373 #endif
1374 			break;
1375 
1376 		case OSIOCGIFADDR:
1377 			cmd = SIOCGIFADDR;
1378 			break;
1379 
1380 		case OSIOCGIFDSTADDR:
1381 			cmd = SIOCGIFDSTADDR;
1382 			break;
1383 
1384 		case OSIOCGIFBRDADDR:
1385 			cmd = SIOCGIFBRDADDR;
1386 			break;
1387 
1388 		case OSIOCGIFNETMASK:
1389 			cmd = SIOCGIFNETMASK;
1390 		}
1391 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1392 								   cmd,
1393 								   data,
1394 								   ifp, td));
1395 		switch (ocmd) {
1396 
1397 		case OSIOCGIFADDR:
1398 		case OSIOCGIFDSTADDR:
1399 		case OSIOCGIFBRDADDR:
1400 		case OSIOCGIFNETMASK:
1401 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1402 
1403 		}
1404 	}
1405 #endif /* COMPAT_43 */
1406 
1407 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1408 #ifdef INET6
1409 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1410 		if (ifp->if_flags & IFF_UP) {
1411 			int s = splimp();
1412 			in6_if_up(ifp);
1413 			splx(s);
1414 		}
1415 #endif
1416 	}
1417 	return (error);
1418 }
1419 
1420 /*
1421  * Set/clear promiscuous mode on interface ifp based on the truth value
1422  * of pswitch.  The calls are reference counted so that only the first
1423  * "on" request actually has an effect, as does the final "off" request.
1424  * Results are undefined if the "off" and "on" requests are not matched.
1425  */
1426 int
1427 ifpromisc(struct ifnet *ifp, int pswitch)
1428 {
1429 	struct ifreq ifr;
1430 	int error;
1431 	int oldflags, oldpcount;
1432 
1433 	oldpcount = ifp->if_pcount;
1434 	oldflags = ifp->if_flags;
1435 	if (ifp->if_flags & IFF_PPROMISC) {
1436 		/* Do nothing if device is in permanently promiscuous mode */
1437 		ifp->if_pcount += pswitch ? 1 : -1;
1438 		return (0);
1439 	}
1440 	if (pswitch) {
1441 		/*
1442 		 * If the device is not configured up, we cannot put it in
1443 		 * promiscuous mode.
1444 		 */
1445 		if ((ifp->if_flags & IFF_UP) == 0)
1446 			return (ENETDOWN);
1447 		if (ifp->if_pcount++ != 0)
1448 			return (0);
1449 		ifp->if_flags |= IFF_PROMISC;
1450 	} else {
1451 		if (--ifp->if_pcount > 0)
1452 			return (0);
1453 		ifp->if_flags &= ~IFF_PROMISC;
1454 	}
1455 	ifr.ifr_flags = ifp->if_flags & 0xffff;
1456 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1457 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1458 	if (error == 0) {
1459 		log(LOG_INFO, "%s: promiscuous mode %s\n",
1460 		    ifp->if_xname,
1461 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1462 		rt_ifmsg(ifp);
1463 	} else {
1464 		ifp->if_pcount = oldpcount;
1465 		ifp->if_flags = oldflags;
1466 	}
1467 	return error;
1468 }
1469 
1470 /*
1471  * Return interface configuration
1472  * of system.  List may be used
1473  * in later ioctl's (above) to get
1474  * other information.
1475  */
1476 /*ARGSUSED*/
1477 static int
1478 ifconf(u_long cmd, caddr_t data)
1479 {
1480 	struct ifconf *ifc = (struct ifconf *)data;
1481 	struct ifnet *ifp;
1482 	struct ifaddr *ifa;
1483 	struct ifreq ifr, *ifrp;
1484 	int space = ifc->ifc_len, error = 0;
1485 
1486 	ifrp = ifc->ifc_req;
1487 	IFNET_RLOCK();		/* could sleep XXX */
1488 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1489 		int addrs;
1490 
1491 		if (space < sizeof(ifr))
1492 			break;
1493 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1494 		    >= sizeof(ifr.ifr_name)) {
1495 			error = ENAMETOOLONG;
1496 			break;
1497 		}
1498 
1499 		addrs = 0;
1500 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1501 			struct sockaddr *sa = ifa->ifa_addr;
1502 
1503 			if (space < sizeof(ifr))
1504 				break;
1505 			if (jailed(curthread->td_ucred) &&
1506 			    prison_if(curthread->td_ucred, sa))
1507 				continue;
1508 			addrs++;
1509 #ifdef COMPAT_43
1510 			if (cmd == OSIOCGIFCONF) {
1511 				struct osockaddr *osa =
1512 					 (struct osockaddr *)&ifr.ifr_addr;
1513 				ifr.ifr_addr = *sa;
1514 				osa->sa_family = sa->sa_family;
1515 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1516 						sizeof (ifr));
1517 				ifrp++;
1518 			} else
1519 #endif
1520 			if (sa->sa_len <= sizeof(*sa)) {
1521 				ifr.ifr_addr = *sa;
1522 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1523 						sizeof (ifr));
1524 				ifrp++;
1525 			} else {
1526 				if (space < sizeof (ifr) + sa->sa_len -
1527 					    sizeof(*sa))
1528 					break;
1529 				space -= sa->sa_len - sizeof(*sa);
1530 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1531 						sizeof (ifr.ifr_name));
1532 				if (error == 0)
1533 				    error = copyout((caddr_t)sa,
1534 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1535 				ifrp = (struct ifreq *)
1536 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1537 			}
1538 			if (error)
1539 				break;
1540 			space -= sizeof (ifr);
1541 		}
1542 		if (error)
1543 			break;
1544 		if (!addrs) {
1545 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1546 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1547 			    sizeof (ifr));
1548 			if (error)
1549 				break;
1550 			space -= sizeof (ifr);
1551 			ifrp++;
1552 		}
1553 	}
1554 	IFNET_RUNLOCK();
1555 	ifc->ifc_len -= space;
1556 	return (error);
1557 }
1558 
1559 /*
1560  * Just like if_promisc(), but for all-multicast-reception mode.
1561  */
1562 int
1563 if_allmulti(struct ifnet *ifp, int onswitch)
1564 {
1565 	int error = 0;
1566 	int s = splimp();
1567 	struct ifreq ifr;
1568 
1569 	if (onswitch) {
1570 		if (ifp->if_amcount++ == 0) {
1571 			ifp->if_flags |= IFF_ALLMULTI;
1572 			ifr.ifr_flags = ifp->if_flags & 0xffff;
1573 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1574 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1575 		}
1576 	} else {
1577 		if (ifp->if_amcount > 1) {
1578 			ifp->if_amcount--;
1579 		} else {
1580 			ifp->if_amcount = 0;
1581 			ifp->if_flags &= ~IFF_ALLMULTI;
1582 			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1583 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1584 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1585 		}
1586 	}
1587 	splx(s);
1588 
1589 	if (error == 0)
1590 		rt_ifmsg(ifp);
1591 	return error;
1592 }
1593 
1594 /*
1595  * Add a multicast listenership to the interface in question.
1596  * The link layer provides a routine which converts
1597  */
1598 int
1599 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1600 {
1601 	struct sockaddr *llsa, *dupsa;
1602 	int error, s;
1603 	struct ifmultiaddr *ifma;
1604 
1605 	/*
1606 	 * If the matching multicast address already exists
1607 	 * then don't add a new one, just add a reference
1608 	 */
1609 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1610 		if (equal(sa, ifma->ifma_addr)) {
1611 			ifma->ifma_refcount++;
1612 			if (retifma)
1613 				*retifma = ifma;
1614 			return 0;
1615 		}
1616 	}
1617 
1618 	/*
1619 	 * Give the link layer a chance to accept/reject it, and also
1620 	 * find out which AF_LINK address this maps to, if it isn't one
1621 	 * already.
1622 	 */
1623 	if (ifp->if_resolvemulti) {
1624 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1625 		if (error) return error;
1626 	} else {
1627 		llsa = 0;
1628 	}
1629 
1630 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1631 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1632 	bcopy(sa, dupsa, sa->sa_len);
1633 
1634 	ifma->ifma_addr = dupsa;
1635 	ifma->ifma_lladdr = llsa;
1636 	ifma->ifma_ifp = ifp;
1637 	ifma->ifma_refcount = 1;
1638 	ifma->ifma_protospec = 0;
1639 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1640 
1641 	/*
1642 	 * Some network interfaces can scan the address list at
1643 	 * interrupt time; lock them out.
1644 	 */
1645 	s = splimp();
1646 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1647 	splx(s);
1648 	if (retifma != NULL)
1649 		*retifma = ifma;
1650 
1651 	if (llsa != 0) {
1652 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1653 			if (equal(ifma->ifma_addr, llsa))
1654 				break;
1655 		}
1656 		if (ifma) {
1657 			ifma->ifma_refcount++;
1658 		} else {
1659 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1660 			       M_IFMADDR, M_WAITOK);
1661 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1662 			       M_IFMADDR, M_WAITOK);
1663 			bcopy(llsa, dupsa, llsa->sa_len);
1664 			ifma->ifma_addr = dupsa;
1665 			ifma->ifma_lladdr = NULL;
1666 			ifma->ifma_ifp = ifp;
1667 			ifma->ifma_refcount = 1;
1668 			ifma->ifma_protospec = 0;
1669 			s = splimp();
1670 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1671 			splx(s);
1672 		}
1673 	}
1674 	/*
1675 	 * We are certain we have added something, so call down to the
1676 	 * interface to let them know about it.
1677 	 */
1678 	s = splimp();
1679 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1680 	splx(s);
1681 
1682 	return 0;
1683 }
1684 
1685 /*
1686  * Remove a reference to a multicast address on this interface.  Yell
1687  * if the request does not match an existing membership.
1688  */
1689 int
1690 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1691 {
1692 	struct ifmultiaddr *ifma;
1693 	int s;
1694 
1695 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1696 		if (equal(sa, ifma->ifma_addr))
1697 			break;
1698 	if (ifma == 0)
1699 		return ENOENT;
1700 
1701 	if (ifma->ifma_refcount > 1) {
1702 		ifma->ifma_refcount--;
1703 		return 0;
1704 	}
1705 
1706 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1707 	sa = ifma->ifma_lladdr;
1708 	s = splimp();
1709 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1710 	/*
1711 	 * Make sure the interface driver is notified
1712 	 * in the case of a link layer mcast group being left.
1713 	 */
1714 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1715 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1716 	splx(s);
1717 	free(ifma->ifma_addr, M_IFMADDR);
1718 	free(ifma, M_IFMADDR);
1719 	if (sa == 0)
1720 		return 0;
1721 
1722 	/*
1723 	 * Now look for the link-layer address which corresponds to
1724 	 * this network address.  It had been squirreled away in
1725 	 * ifma->ifma_lladdr for this purpose (so we don't have
1726 	 * to call ifp->if_resolvemulti() again), and we saved that
1727 	 * value in sa above.  If some nasty deleted the
1728 	 * link-layer address out from underneath us, we can deal because
1729 	 * the address we stored was is not the same as the one which was
1730 	 * in the record for the link-layer address.  (So we don't complain
1731 	 * in that case.)
1732 	 */
1733 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1734 		if (equal(sa, ifma->ifma_addr))
1735 			break;
1736 	if (ifma == 0)
1737 		return 0;
1738 
1739 	if (ifma->ifma_refcount > 1) {
1740 		ifma->ifma_refcount--;
1741 		return 0;
1742 	}
1743 
1744 	s = splimp();
1745 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1746 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1747 	splx(s);
1748 	free(ifma->ifma_addr, M_IFMADDR);
1749 	free(sa, M_IFMADDR);
1750 	free(ifma, M_IFMADDR);
1751 
1752 	return 0;
1753 }
1754 
1755 /*
1756  * Set the link layer address on an interface.
1757  *
1758  * At this time we only support certain types of interfaces,
1759  * and we don't allow the length of the address to change.
1760  */
1761 int
1762 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1763 {
1764 	struct sockaddr_dl *sdl;
1765 	struct ifaddr *ifa;
1766 	struct ifreq ifr;
1767 
1768 	ifa = ifaddr_byindex(ifp->if_index);
1769 	if (ifa == NULL)
1770 		return (EINVAL);
1771 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1772 	if (sdl == NULL)
1773 		return (EINVAL);
1774 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1775 		return (EINVAL);
1776 	switch (ifp->if_type) {
1777 	case IFT_ETHER:			/* these types use struct arpcom */
1778 	case IFT_FDDI:
1779 	case IFT_XETHER:
1780 	case IFT_ISO88025:
1781 	case IFT_L2VLAN:
1782 		bcopy(lladdr, IFP2AC(ifp)->ac_enaddr, len);
1783 		/*
1784 		 * XXX We also need to store the lladdr in LLADDR(sdl),
1785 		 * which is done below. This is a pain because we must
1786 		 * remember to keep the info in sync.
1787 		 */
1788 		/* FALLTHROUGH */
1789 	case IFT_ARCNET:
1790 		bcopy(lladdr, LLADDR(sdl), len);
1791 		break;
1792 	default:
1793 		return (ENODEV);
1794 	}
1795 	/*
1796 	 * If the interface is already up, we need
1797 	 * to re-init it in order to reprogram its
1798 	 * address filter.
1799 	 */
1800 	if ((ifp->if_flags & IFF_UP) != 0) {
1801 		ifp->if_flags &= ~IFF_UP;
1802 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1803 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1804 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1805 		ifp->if_flags |= IFF_UP;
1806 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1807 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1808 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1809 #ifdef INET
1810 		/*
1811 		 * Also send gratuitous ARPs to notify other nodes about
1812 		 * the address change.
1813 		 */
1814 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1815 			if (ifa->ifa_addr != NULL &&
1816 			    ifa->ifa_addr->sa_family == AF_INET)
1817 				arp_ifinit(ifp, ifa);
1818 		}
1819 #endif
1820 	}
1821 	return (0);
1822 }
1823 
1824 struct ifmultiaddr *
1825 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
1826 {
1827 	struct ifmultiaddr *ifma;
1828 
1829 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1830 		if (equal(ifma->ifma_addr, sa))
1831 			break;
1832 
1833 	return ifma;
1834 }
1835 
1836 /*
1837  * The name argument must be a pointer to storage which will last as
1838  * long as the interface does.  For physical devices, the result of
1839  * device_get_name(dev) is a good choice and for pseudo-devices a
1840  * static string works well.
1841  */
1842 void
1843 if_initname(struct ifnet *ifp, const char *name, int unit)
1844 {
1845 	ifp->if_dname = name;
1846 	ifp->if_dunit = unit;
1847 	if (unit != IF_DUNIT_NONE)
1848 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1849 	else
1850 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
1851 }
1852 
1853 int
1854 if_printf(struct ifnet *ifp, const char * fmt, ...)
1855 {
1856 	va_list ap;
1857 	int retval;
1858 
1859 	retval = printf("%s: ", ifp->if_xname);
1860 	va_start(ap, fmt);
1861 	retval += vprintf(fmt, ap);
1862 	va_end(ap);
1863 	return (retval);
1864 }
1865 
1866 /*
1867  * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
1868  * be called without Giant.  However, we often can't acquire the Giant lock
1869  * at those points; instead, we run it via a task queue that holds Giant via
1870  * if_start_deferred.
1871  *
1872  * XXXRW: We need to make sure that the ifnet isn't fully detached until any
1873  * outstanding if_start_deferred() tasks that will run after the free.  This
1874  * probably means waiting in if_detach().
1875  */
1876 void
1877 if_start(struct ifnet *ifp)
1878 {
1879 
1880 	NET_ASSERT_GIANT();
1881 
1882         if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
1883                 if (mtx_owned(&Giant))
1884                         (*(ifp)->if_start)(ifp);
1885                 else
1886 			taskqueue_enqueue(taskqueue_swi_giant,
1887 			    &ifp->if_starttask);
1888         } else
1889                 (*(ifp)->if_start)(ifp);
1890 }
1891 
1892 static void
1893 if_start_deferred(void *context, int pending)
1894 {
1895 	struct ifnet *ifp;
1896 
1897 	/*
1898 	 * This code must be entered with Giant, and should never run if
1899 	 * we're not running with debug.mpsafenet.
1900 	 */
1901 	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
1902 	GIANT_REQUIRED;
1903 
1904 	ifp = (struct ifnet *)context;
1905 	(ifp->if_start)(ifp);
1906 }
1907 
1908 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1909 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1910