xref: /freebsd/sys/net/if.c (revision c4f6a2a9e1b1879b618c436ab4f56ff75c73a0f5)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if.c	8.5 (Berkeley) 1/9/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/conf.h>
44 #include <sys/mac.h>
45 #include <sys/malloc.h>
46 #include <sys/bus.h>
47 #include <sys/mbuf.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/protosw.h>
53 #include <sys/kernel.h>
54 #include <sys/sockio.h>
55 #include <sys/syslog.h>
56 #include <sys/sysctl.h>
57 #include <sys/jail.h>
58 
59 #include <net/if.h>
60 #include <net/if_arp.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/if_var.h>
64 #include <net/radix.h>
65 #include <net/route.h>
66 
67 #if defined(INET) || defined(INET6)
68 /*XXX*/
69 #include <netinet/in.h>
70 #include <netinet/in_var.h>
71 #ifdef INET6
72 #include <netinet6/in6_var.h>
73 #include <netinet6/in6_ifattach.h>
74 #endif
75 #endif
76 #ifdef INET
77 #include <netinet/if_ether.h>
78 #endif
79 
80 static int	ifconf(u_long, caddr_t);
81 static void	if_grow(void);
82 static void	if_init(void *);
83 static void	if_check(void *);
84 static int	if_findindex(struct ifnet *);
85 static void	if_qflush(struct ifqueue *);
86 static void	if_slowtimo(void *);
87 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
88 static int	if_rtdel(struct radix_node *, void *);
89 static struct	if_clone *if_clone_lookup(const char *, int *);
90 static int	if_clone_list(struct if_clonereq *);
91 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
92 #ifdef INET6
93 /*
94  * XXX: declare here to avoid to include many inet6 related files..
95  * should be more generalized?
96  */
97 extern void	nd6_setmtu(struct ifnet *);
98 #endif
99 
100 int	if_index = 0;
101 struct	ifindex_entry *ifindex_table = NULL;
102 int	ifqmaxlen = IFQ_MAXLEN;
103 struct	ifnethead ifnet;	/* depend on static init XXX */
104 int	if_cloners_count;
105 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
106 
107 static int	if_indexlim = 8;
108 static struct	klist ifklist;
109 
110 static void	filt_netdetach(struct knote *kn);
111 static int	filt_netdev(struct knote *kn, long hint);
112 
113 static struct filterops netdev_filtops =
114     { 1, NULL, filt_netdetach, filt_netdev };
115 
116 /*
117  * System initialization
118  */
119 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
120 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
121 
122 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
123 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
124 MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
125 
126 #define CDEV_MAJOR	165
127 
128 static d_open_t		netopen;
129 static d_close_t	netclose;
130 static d_ioctl_t	netioctl;
131 static d_kqfilter_t	netkqfilter;
132 
133 static struct cdevsw net_cdevsw = {
134 	/* open */	netopen,
135 	/* close */	netclose,
136 	/* read */	noread,
137 	/* write */	nowrite,
138 	/* ioctl */	netioctl,
139 	/* poll */	nopoll,
140 	/* mmap */	nommap,
141 	/* strategy */	nostrategy,
142 	/* name */	"net",
143 	/* maj */	CDEV_MAJOR,
144 	/* dump */	nodump,
145 	/* psize */	nopsize,
146 	/* flags */	D_KQFILTER,
147 	/* kqfilter */	netkqfilter,
148 };
149 
150 static int
151 netopen(dev_t dev, int flag, int mode, struct thread *td)
152 {
153 	return (0);
154 }
155 
156 static int
157 netclose(dev_t dev, int flags, int fmt, struct thread *td)
158 {
159 	return (0);
160 }
161 
162 static int
163 netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
164 {
165 	struct ifnet *ifp;
166 	int error, idx;
167 
168 	/* only support interface specific ioctls */
169 	if (IOCGROUP(cmd) != 'i')
170 		return (EOPNOTSUPP);
171 	idx = minor(dev);
172 	if (idx == 0) {
173 		/*
174 		 * special network device, not interface.
175 		 */
176 		if (cmd == SIOCGIFCONF)
177 			return (ifconf(cmd, data));	/* XXX remove cmd */
178 		return (EOPNOTSUPP);
179 	}
180 
181 	ifp = ifnet_byindex(idx);
182 	if (ifp == NULL)
183 		return (ENXIO);
184 
185 	error = ifhwioctl(cmd, ifp, data, td);
186 	if (error == ENOIOCTL)
187 		error = EOPNOTSUPP;
188 	return (error);
189 }
190 
191 static int
192 netkqfilter(dev_t dev, struct knote *kn)
193 {
194 	struct klist *klist;
195 	struct ifnet *ifp;
196 	int idx;
197 
198 	idx = minor(dev);
199 	if (idx == 0) {
200 		klist = &ifklist;
201 	} else {
202 		ifp = ifnet_byindex(idx);
203 		if (ifp == NULL)
204 			return (1);
205 		klist = &ifp->if_klist;
206 	}
207 
208 	switch (kn->kn_filter) {
209 	case EVFILT_NETDEV:
210 		kn->kn_fop = &netdev_filtops;
211 		break;
212 	default:
213 		return (1);
214 	}
215 
216 	kn->kn_hook = (caddr_t)klist;
217 
218 	/* XXX locking? */
219 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
220 
221 	return (0);
222 }
223 
224 static void
225 filt_netdetach(struct knote *kn)
226 {
227 	struct klist *klist = (struct klist *)kn->kn_hook;
228 
229 	if (kn->kn_status & KN_DETACHED)
230 		return;
231 	SLIST_REMOVE(klist, kn, knote, kn_selnext);
232 }
233 
234 static int
235 filt_netdev(struct knote *kn, long hint)
236 {
237 
238 	/*
239 	 * Currently NOTE_EXIT is abused to indicate device detach.
240 	 */
241 	if (hint == NOTE_EXIT) {
242 		kn->kn_data = NOTE_LINKINV;
243                 kn->kn_status |= KN_DETACHED;
244                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
245                 return (1);
246         }
247 	kn->kn_data = hint;			/* current status */
248 	if (kn->kn_sfflags & hint)
249 		kn->kn_fflags |= hint;
250 	return (kn->kn_fflags != 0);
251 }
252 
253 /*
254  * Network interface utility routines.
255  *
256  * Routines with ifa_ifwith* names take sockaddr *'s as
257  * parameters.
258  */
259 /* ARGSUSED*/
260 static void
261 if_init(dummy)
262 	void *dummy;
263 {
264 
265 	TAILQ_INIT(&ifnet);
266 	SLIST_INIT(&ifklist);
267 	if_grow();				/* create initial table */
268 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
269 	    UID_ROOT, GID_WHEEL, 0600, "network");
270 }
271 
272 static void
273 if_grow(void)
274 {
275 	u_int n;
276 	struct ifindex_entry *e;
277 
278 	if_indexlim <<= 1;
279 	n = if_indexlim * sizeof(*e);
280 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
281 	if (ifindex_table != NULL) {
282 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
283 		free((caddr_t)ifindex_table, M_IFADDR);
284 	}
285 	ifindex_table = e;
286 }
287 
288 /* ARGSUSED*/
289 static void
290 if_check(dummy)
291 	void *dummy;
292 {
293 	struct ifnet *ifp;
294 	int s;
295 
296 	s = splimp();
297 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
298 		if (ifp->if_snd.ifq_maxlen == 0) {
299 			printf("%s%d XXX: driver didn't set ifq_maxlen\n",
300 			    ifp->if_name, ifp->if_unit);
301 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
302 		}
303 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
304 			printf("%s%d XXX: driver didn't initialize queue mtx\n",
305 			    ifp->if_name, ifp->if_unit);
306 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
307 			    MTX_NETWORK_LOCK, MTX_DEF);
308 		}
309 	}
310 	splx(s);
311 	if_slowtimo(0);
312 }
313 
314 static int
315 if_findindex(struct ifnet *ifp)
316 {
317 	int i, unit;
318 	char eaddr[18], devname[32];
319 	const char *name, *p;
320 
321 	switch (ifp->if_type) {
322 	case IFT_ETHER:			/* these types use struct arpcom */
323 	case IFT_FDDI:
324 	case IFT_XETHER:
325 	case IFT_ISO88025:
326 	case IFT_L2VLAN:
327 		snprintf(eaddr, 18, "%6D",
328 		    ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":");
329 		break;
330 	default:
331 		eaddr[0] = '\0';
332 		break;
333 	}
334 	snprintf(devname, 32, "%s%d", ifp->if_name, ifp->if_unit);
335 	name = net_cdevsw.d_name;
336 	i = 0;
337 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
338 		if (resource_string_value(name, unit, "ether", &p) == 0)
339 			if (strcmp(p, eaddr) == 0)
340 				goto found;
341 		if (resource_string_value(name, unit, "dev", &p) == 0)
342 			if (strcmp(p, devname) == 0)
343 				goto found;
344 	}
345 	unit = 0;
346 found:
347 	if (unit != 0) {
348 		if (ifaddr_byindex(unit) == NULL)
349 			return (unit);
350 		printf("%s%d in use, cannot hardwire it to %s.\n",
351 		    name, unit, devname);
352 	}
353 	for (unit = 1; ; unit++) {
354 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
355 			continue;
356 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
357 		    resource_string_value(name, unit, "dev", &p) == 0)
358 			continue;
359 		break;
360 	}
361 	return (unit);
362 }
363 
364 /*
365  * Attach an interface to the
366  * list of "active" interfaces.
367  */
368 void
369 if_attach(ifp)
370 	struct ifnet *ifp;
371 {
372 	unsigned socksize, ifasize;
373 	int namelen, masklen;
374 	char workbuf[64];
375 	register struct sockaddr_dl *sdl;
376 	register struct ifaddr *ifa;
377 
378 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
379 	/*
380 	 * XXX -
381 	 * The old code would work if the interface passed a pre-existing
382 	 * chain of ifaddrs to this code.  We don't trust our callers to
383 	 * properly initialize the tailq, however, so we no longer allow
384 	 * this unlikely case.
385 	 */
386 	TAILQ_INIT(&ifp->if_addrhead);
387 	TAILQ_INIT(&ifp->if_prefixhead);
388 	TAILQ_INIT(&ifp->if_multiaddrs);
389 	SLIST_INIT(&ifp->if_klist);
390 	getmicrotime(&ifp->if_lastchange);
391 
392 #ifdef MAC
393 	mac_init_ifnet(ifp);
394 	mac_create_ifnet(ifp);
395 #endif
396 
397 	ifp->if_index = if_findindex(ifp);
398 	if (ifp->if_index > if_index)
399 		if_index = ifp->if_index;
400 	if (if_index >= if_indexlim)
401 		if_grow();
402 
403 	ifnet_byindex(ifp->if_index) = ifp;
404 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw, ifp->if_index,
405 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s%d",
406 	    net_cdevsw.d_name, ifp->if_name, ifp->if_unit);
407 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
408 	    net_cdevsw.d_name, ifp->if_index);
409 
410 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_name, "if send queue", MTX_DEF);
411 
412 	/*
413 	 * create a Link Level name for this device
414 	 */
415 	namelen = snprintf(workbuf, sizeof(workbuf),
416 	    "%s%d", ifp->if_name, ifp->if_unit);
417 #define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
418 	masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
419 	socksize = masklen + ifp->if_addrlen;
420 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
421 	if (socksize < sizeof(*sdl))
422 		socksize = sizeof(*sdl);
423 	socksize = ROUNDUP(socksize);
424 	ifasize = sizeof(*ifa) + 2 * socksize;
425 	ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
426 	if (ifa) {
427 		sdl = (struct sockaddr_dl *)(ifa + 1);
428 		sdl->sdl_len = socksize;
429 		sdl->sdl_family = AF_LINK;
430 		bcopy(workbuf, sdl->sdl_data, namelen);
431 		sdl->sdl_nlen = namelen;
432 		sdl->sdl_index = ifp->if_index;
433 		sdl->sdl_type = ifp->if_type;
434 		ifaddr_byindex(ifp->if_index) = ifa;
435 		ifa->ifa_ifp = ifp;
436 		ifa->ifa_rtrequest = link_rtrequest;
437 		ifa->ifa_addr = (struct sockaddr *)sdl;
438 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
439 		ifa->ifa_netmask = (struct sockaddr *)sdl;
440 		sdl->sdl_len = masklen;
441 		while (namelen != 0)
442 			sdl->sdl_data[--namelen] = 0xff;
443 		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
444 	}
445 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
446 
447 	/* Announce the interface. */
448 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
449 }
450 
451 /*
452  * Detach an interface, removing it from the
453  * list of "active" interfaces.
454  */
455 void
456 if_detach(ifp)
457 	struct ifnet *ifp;
458 {
459 	struct ifaddr *ifa;
460 	struct radix_node_head	*rnh;
461 	int s;
462 	int i;
463 
464 	/*
465 	 * Remove routes and flush queues.
466 	 */
467 	s = splnet();
468 	if_down(ifp);
469 
470 	/*
471 	 * Remove address from ifindex_table[] and maybe decrement if_index.
472 	 * Clean up all addresses.
473 	 */
474 	ifaddr_byindex(ifp->if_index) = NULL;
475 	revoke_and_destroy_dev(ifdev_byindex(ifp->if_index));
476 	ifdev_byindex(ifp->if_index) = NULL;
477 
478 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
479 		if_index--;
480 
481 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
482 	     ifa = TAILQ_FIRST(&ifp->if_addrhead)) {
483 #ifdef INET
484 		/* XXX: Ugly!! ad hoc just for INET */
485 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
486 			struct ifaliasreq ifr;
487 
488 			bzero(&ifr, sizeof(ifr));
489 			ifr.ifra_addr = *ifa->ifa_addr;
490 			if (ifa->ifa_dstaddr)
491 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
492 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
493 			    NULL) == 0)
494 				continue;
495 		}
496 #endif /* INET */
497 #ifdef INET6
498 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
499 			in6_purgeaddr(ifa);
500 			/* ifp_addrhead is already updated */
501 			continue;
502 		}
503 #endif /* INET6 */
504 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
505 		IFAFREE(ifa);
506 	}
507 
508 #ifdef INET6
509 	/*
510 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
511 	 * before removing routing entries below, since IPv6 interface direct
512 	 * routes are expected to be removed by the IPv6-specific kernel API.
513 	 * Otherwise, the kernel will detect some inconsistency and bark it.
514 	 */
515 	in6_ifdetach(ifp);
516 #endif
517 
518 	/*
519 	 * Delete all remaining routes using this interface
520 	 * Unfortuneatly the only way to do this is to slog through
521 	 * the entire routing table looking for routes which point
522 	 * to this interface...oh well...
523 	 */
524 	for (i = 1; i <= AF_MAX; i++) {
525 		if ((rnh = rt_tables[i]) == NULL)
526 			continue;
527 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
528 	}
529 
530 	/* Announce that the interface is gone. */
531 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
532 
533 #ifdef MAC
534 	mac_destroy_ifnet(ifp);
535 #endif /* MAC */
536 	KNOTE(&ifp->if_klist, NOTE_EXIT);
537 	TAILQ_REMOVE(&ifnet, ifp, if_link);
538 	mtx_destroy(&ifp->if_snd.ifq_mtx);
539 	splx(s);
540 }
541 
542 /*
543  * Delete Routes for a Network Interface
544  *
545  * Called for each routing entry via the rnh->rnh_walktree() call above
546  * to delete all route entries referencing a detaching network interface.
547  *
548  * Arguments:
549  *	rn	pointer to node in the routing table
550  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
551  *
552  * Returns:
553  *	0	successful
554  *	errno	failed - reason indicated
555  *
556  */
557 static int
558 if_rtdel(rn, arg)
559 	struct radix_node	*rn;
560 	void			*arg;
561 {
562 	struct rtentry	*rt = (struct rtentry *)rn;
563 	struct ifnet	*ifp = arg;
564 	int		err;
565 
566 	if (rt->rt_ifp == ifp) {
567 
568 		/*
569 		 * Protect (sorta) against walktree recursion problems
570 		 * with cloned routes
571 		 */
572 		if ((rt->rt_flags & RTF_UP) == 0)
573 			return (0);
574 
575 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
576 				rt_mask(rt), rt->rt_flags,
577 				(struct rtentry **) NULL);
578 		if (err) {
579 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
580 		}
581 	}
582 
583 	return (0);
584 }
585 
586 /*
587  * Create a clone network interface.
588  */
589 int
590 if_clone_create(name, len)
591 	char *name;
592 	int len;
593 {
594 	struct if_clone *ifc;
595 	char *dp;
596 	int wildcard, bytoff, bitoff;
597 	int unit;
598 	int err;
599 
600 	ifc = if_clone_lookup(name, &unit);
601 	if (ifc == NULL)
602 		return (EINVAL);
603 
604 	if (ifunit(name) != NULL)
605 		return (EEXIST);
606 
607 	bytoff = bitoff = 0;
608 	wildcard = (unit < 0);
609 	/*
610 	 * Find a free unit if none was given.
611 	 */
612 	if (wildcard) {
613 		while ((bytoff < ifc->ifc_bmlen)
614 		    && (ifc->ifc_units[bytoff] == 0xff))
615 			bytoff++;
616 		if (bytoff >= ifc->ifc_bmlen)
617 			return (ENOSPC);
618 		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
619 			bitoff++;
620 		unit = (bytoff << 3) + bitoff;
621 	}
622 
623 	if (unit > ifc->ifc_maxunit)
624 		return (ENXIO);
625 
626 	err = (*ifc->ifc_create)(ifc, unit);
627 	if (err != 0)
628 		return (err);
629 
630 	if (!wildcard) {
631 		bytoff = unit >> 3;
632 		bitoff = unit - (bytoff << 3);
633 	}
634 
635 	/*
636 	 * Allocate the unit in the bitmap.
637 	 */
638 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
639 	    ("%s: bit is already set", __func__));
640 	ifc->ifc_units[bytoff] |= (1 << bitoff);
641 
642 	/* In the wildcard case, we need to update the name. */
643 	if (wildcard) {
644 		for (dp = name; *dp != '\0'; dp++);
645 		if (snprintf(dp, len - (dp-name), "%d", unit) >
646 		    len - (dp-name) - 1) {
647 			/*
648 			 * This can only be a programmer error and
649 			 * there's no straightforward way to recover if
650 			 * it happens.
651 			 */
652 			panic("if_clone_create(): interface name too long");
653 		}
654 
655 	}
656 
657 	return (0);
658 }
659 
660 /*
661  * Destroy a clone network interface.
662  */
663 int
664 if_clone_destroy(name)
665 	const char *name;
666 {
667 	struct if_clone *ifc;
668 	struct ifnet *ifp;
669 	int bytoff, bitoff;
670 	int unit;
671 
672 	ifc = if_clone_lookup(name, &unit);
673 	if (ifc == NULL)
674 		return (EINVAL);
675 
676 	if (unit < ifc->ifc_minifs)
677 		return (EINVAL);
678 
679 	ifp = ifunit(name);
680 	if (ifp == NULL)
681 		return (ENXIO);
682 
683 	if (ifc->ifc_destroy == NULL)
684 		return (EOPNOTSUPP);
685 
686 	(*ifc->ifc_destroy)(ifp);
687 
688 	/*
689 	 * Compute offset in the bitmap and deallocate the unit.
690 	 */
691 	bytoff = unit >> 3;
692 	bitoff = unit - (bytoff << 3);
693 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
694 	    ("%s: bit is already cleared", __func__));
695 	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
696 	return (0);
697 }
698 
699 /*
700  * Look up a network interface cloner.
701  */
702 static struct if_clone *
703 if_clone_lookup(name, unitp)
704 	const char *name;
705 	int *unitp;
706 {
707 	struct if_clone *ifc;
708 	const char *cp;
709 	int i;
710 
711 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
712 		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
713 			if (ifc->ifc_name[i] != *cp)
714 				goto next_ifc;
715 		}
716 		goto found_name;
717  next_ifc:
718 		ifc = LIST_NEXT(ifc, ifc_list);
719 	}
720 
721 	/* No match. */
722 	return ((struct if_clone *)NULL);
723 
724  found_name:
725 	if (*cp == '\0') {
726 		i = -1;
727 	} else {
728 		for (i = 0; *cp != '\0'; cp++) {
729 			if (*cp < '0' || *cp > '9') {
730 				/* Bogus unit number. */
731 				return (NULL);
732 			}
733 			i = (i * 10) + (*cp - '0');
734 		}
735 	}
736 
737 	if (unitp != NULL)
738 		*unitp = i;
739 	return (ifc);
740 }
741 
742 /*
743  * Register a network interface cloner.
744  */
745 void
746 if_clone_attach(ifc)
747 	struct if_clone *ifc;
748 {
749 	int bytoff, bitoff;
750 	int err;
751 	int len, maxclone;
752 	int unit;
753 
754 	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
755 	    ("%s: %s requested more units then allowed (%d > %d)",
756 	    __func__, ifc->ifc_name, ifc->ifc_minifs,
757 	    ifc->ifc_maxunit + 1));
758 	/*
759 	 * Compute bitmap size and allocate it.
760 	 */
761 	maxclone = ifc->ifc_maxunit + 1;
762 	len = maxclone >> 3;
763 	if ((len << 3) < maxclone)
764 		len++;
765 	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
766 	ifc->ifc_bmlen = len;
767 
768 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
769 	if_cloners_count++;
770 
771 	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
772 		err = (*ifc->ifc_create)(ifc, unit);
773 		KASSERT(err == 0,
774 		    ("%s: failed to create required interface %s%d",
775 		    __func__, ifc->ifc_name, unit));
776 
777 		/* Allocate the unit in the bitmap. */
778 		bytoff = unit >> 3;
779 		bitoff = unit - (bytoff << 3);
780 		ifc->ifc_units[bytoff] |= (1 << bitoff);
781 	}
782 }
783 
784 /*
785  * Unregister a network interface cloner.
786  */
787 void
788 if_clone_detach(ifc)
789 	struct if_clone *ifc;
790 {
791 
792 	LIST_REMOVE(ifc, ifc_list);
793 	free(ifc->ifc_units, M_CLONE);
794 	if_cloners_count--;
795 }
796 
797 /*
798  * Provide list of interface cloners to userspace.
799  */
800 static int
801 if_clone_list(ifcr)
802 	struct if_clonereq *ifcr;
803 {
804 	char outbuf[IFNAMSIZ], *dst;
805 	struct if_clone *ifc;
806 	int count, error = 0;
807 
808 	ifcr->ifcr_total = if_cloners_count;
809 	if ((dst = ifcr->ifcr_buffer) == NULL) {
810 		/* Just asking how many there are. */
811 		return (0);
812 	}
813 
814 	if (ifcr->ifcr_count < 0)
815 		return (EINVAL);
816 
817 	count = (if_cloners_count < ifcr->ifcr_count) ?
818 	    if_cloners_count : ifcr->ifcr_count;
819 
820 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
821 	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
822 		strncpy(outbuf, ifc->ifc_name, IFNAMSIZ);
823 		outbuf[IFNAMSIZ - 1] = '\0';	/* sanity */
824 		error = copyout(outbuf, dst, IFNAMSIZ);
825 		if (error)
826 			break;
827 	}
828 
829 	return (error);
830 }
831 
832 /*
833  * Locate an interface based on a complete address.
834  */
835 /*ARGSUSED*/
836 struct ifaddr *
837 ifa_ifwithaddr(addr)
838 	struct sockaddr *addr;
839 {
840 	struct ifnet *ifp;
841 	struct ifaddr *ifa;
842 
843 #define	equal(a1, a2) \
844   (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
845 	TAILQ_FOREACH(ifp, &ifnet, if_link)
846 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
847 			if (ifa->ifa_addr->sa_family != addr->sa_family)
848 				continue;
849 			if (equal(addr, ifa->ifa_addr))
850 				goto done;
851 			/* IP6 doesn't have broadcast */
852 			if ((ifp->if_flags & IFF_BROADCAST) &&
853 			    ifa->ifa_broadaddr &&
854 			    ifa->ifa_broadaddr->sa_len != 0 &&
855 			    equal(ifa->ifa_broadaddr, addr))
856 				goto done;
857 		}
858 	ifa = NULL;
859 done:
860 	return (ifa);
861 }
862 
863 /*
864  * Locate the point to point interface with a given destination address.
865  */
866 /*ARGSUSED*/
867 struct ifaddr *
868 ifa_ifwithdstaddr(addr)
869 	struct sockaddr *addr;
870 {
871 	struct ifnet *ifp;
872 	struct ifaddr *ifa;
873 
874 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
875 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
876 			continue;
877 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
878 			if (ifa->ifa_addr->sa_family != addr->sa_family)
879 				continue;
880 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
881 				goto done;
882 		}
883 	}
884 	ifa = NULL;
885 done:
886 	return (ifa);
887 }
888 
889 /*
890  * Find an interface on a specific network.  If many, choice
891  * is most specific found.
892  */
893 struct ifaddr *
894 ifa_ifwithnet(addr)
895 	struct sockaddr *addr;
896 {
897 	register struct ifnet *ifp;
898 	register struct ifaddr *ifa;
899 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
900 	u_int af = addr->sa_family;
901 	char *addr_data = addr->sa_data, *cplim;
902 
903 	/*
904 	 * AF_LINK addresses can be looked up directly by their index number,
905 	 * so do that if we can.
906 	 */
907 	if (af == AF_LINK) {
908 	    register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
909 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
910 		return (ifaddr_byindex(sdl->sdl_index));
911 	}
912 
913 	/*
914 	 * Scan though each interface, looking for ones that have
915 	 * addresses in this address family.
916 	 */
917 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
918 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
919 			register char *cp, *cp2, *cp3;
920 
921 			if (ifa->ifa_addr->sa_family != af)
922 next:				continue;
923 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
924 				/*
925 				 * This is a bit broken as it doesn't
926 				 * take into account that the remote end may
927 				 * be a single node in the network we are
928 				 * looking for.
929 				 * The trouble is that we don't know the
930 				 * netmask for the remote end.
931 				 */
932 				if (ifa->ifa_dstaddr != 0
933 				    && equal(addr, ifa->ifa_dstaddr))
934 					goto done;
935 			} else {
936 				/*
937 				 * if we have a special address handler,
938 				 * then use it instead of the generic one.
939 				 */
940 	          		if (ifa->ifa_claim_addr) {
941 					if ((*ifa->ifa_claim_addr)(ifa, addr))
942 						goto done;
943 					continue;
944 				}
945 
946 				/*
947 				 * Scan all the bits in the ifa's address.
948 				 * If a bit dissagrees with what we are
949 				 * looking for, mask it with the netmask
950 				 * to see if it really matters.
951 				 * (A byte at a time)
952 				 */
953 				if (ifa->ifa_netmask == 0)
954 					continue;
955 				cp = addr_data;
956 				cp2 = ifa->ifa_addr->sa_data;
957 				cp3 = ifa->ifa_netmask->sa_data;
958 				cplim = ifa->ifa_netmask->sa_len
959 					+ (char *)ifa->ifa_netmask;
960 				while (cp3 < cplim)
961 					if ((*cp++ ^ *cp2++) & *cp3++)
962 						goto next; /* next address! */
963 				/*
964 				 * If the netmask of what we just found
965 				 * is more specific than what we had before
966 				 * (if we had one) then remember the new one
967 				 * before continuing to search
968 				 * for an even better one.
969 				 */
970 				if (ifa_maybe == 0 ||
971 				    rn_refines((caddr_t)ifa->ifa_netmask,
972 				    (caddr_t)ifa_maybe->ifa_netmask))
973 					ifa_maybe = ifa;
974 			}
975 		}
976 	}
977 	ifa = ifa_maybe;
978 done:
979 	return (ifa);
980 }
981 
982 /*
983  * Find an interface address specific to an interface best matching
984  * a given address.
985  */
986 struct ifaddr *
987 ifaof_ifpforaddr(addr, ifp)
988 	struct sockaddr *addr;
989 	register struct ifnet *ifp;
990 {
991 	register struct ifaddr *ifa;
992 	register char *cp, *cp2, *cp3;
993 	register char *cplim;
994 	struct ifaddr *ifa_maybe = 0;
995 	u_int af = addr->sa_family;
996 
997 	if (af >= AF_MAX)
998 		return (0);
999 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1000 		if (ifa->ifa_addr->sa_family != af)
1001 			continue;
1002 		if (ifa_maybe == 0)
1003 			ifa_maybe = ifa;
1004 		if (ifa->ifa_netmask == 0) {
1005 			if (equal(addr, ifa->ifa_addr) ||
1006 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1007 				goto done;
1008 			continue;
1009 		}
1010 		if (ifp->if_flags & IFF_POINTOPOINT) {
1011 			if (equal(addr, ifa->ifa_dstaddr))
1012 				goto done;
1013 		} else {
1014 			cp = addr->sa_data;
1015 			cp2 = ifa->ifa_addr->sa_data;
1016 			cp3 = ifa->ifa_netmask->sa_data;
1017 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1018 			for (; cp3 < cplim; cp3++)
1019 				if ((*cp++ ^ *cp2++) & *cp3)
1020 					break;
1021 			if (cp3 == cplim)
1022 				goto done;
1023 		}
1024 	}
1025 	ifa = ifa_maybe;
1026 done:
1027 	return (ifa);
1028 }
1029 
1030 #include <net/route.h>
1031 
1032 /*
1033  * Default action when installing a route with a Link Level gateway.
1034  * Lookup an appropriate real ifa to point to.
1035  * This should be moved to /sys/net/link.c eventually.
1036  */
1037 static void
1038 link_rtrequest(cmd, rt, info)
1039 	int cmd;
1040 	register struct rtentry *rt;
1041 	struct rt_addrinfo *info;
1042 {
1043 	register struct ifaddr *ifa;
1044 	struct sockaddr *dst;
1045 	struct ifnet *ifp;
1046 
1047 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1048 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1049 		return;
1050 	ifa = ifaof_ifpforaddr(dst, ifp);
1051 	if (ifa) {
1052 		IFAFREE(rt->rt_ifa);
1053 		rt->rt_ifa = ifa;
1054 		ifa->ifa_refcnt++;
1055 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1056 			ifa->ifa_rtrequest(cmd, rt, info);
1057 	}
1058 }
1059 
1060 /*
1061  * Mark an interface down and notify protocols of
1062  * the transition.
1063  * NOTE: must be called at splnet or eqivalent.
1064  */
1065 void
1066 if_unroute(ifp, flag, fam)
1067 	register struct ifnet *ifp;
1068 	int flag, fam;
1069 {
1070 	register struct ifaddr *ifa;
1071 
1072 	ifp->if_flags &= ~flag;
1073 	getmicrotime(&ifp->if_lastchange);
1074 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1075 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1076 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1077 	if_qflush(&ifp->if_snd);
1078 	rt_ifmsg(ifp);
1079 }
1080 
1081 /*
1082  * Mark an interface up and notify protocols of
1083  * the transition.
1084  * NOTE: must be called at splnet or eqivalent.
1085  */
1086 void
1087 if_route(ifp, flag, fam)
1088 	register struct ifnet *ifp;
1089 	int flag, fam;
1090 {
1091 	register struct ifaddr *ifa;
1092 
1093 	ifp->if_flags |= flag;
1094 	getmicrotime(&ifp->if_lastchange);
1095 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1096 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1097 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1098 	rt_ifmsg(ifp);
1099 #ifdef INET6
1100 	in6_if_up(ifp);
1101 #endif
1102 }
1103 
1104 /*
1105  * Mark an interface down and notify protocols of
1106  * the transition.
1107  * NOTE: must be called at splnet or eqivalent.
1108  */
1109 void
1110 if_down(ifp)
1111 	register struct ifnet *ifp;
1112 {
1113 
1114 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1115 }
1116 
1117 /*
1118  * Mark an interface up and notify protocols of
1119  * the transition.
1120  * NOTE: must be called at splnet or eqivalent.
1121  */
1122 void
1123 if_up(ifp)
1124 	register struct ifnet *ifp;
1125 {
1126 
1127 	if_route(ifp, IFF_UP, AF_UNSPEC);
1128 }
1129 
1130 /*
1131  * Flush an interface queue.
1132  */
1133 static void
1134 if_qflush(ifq)
1135 	register struct ifqueue *ifq;
1136 {
1137 	register struct mbuf *m, *n;
1138 
1139 	n = ifq->ifq_head;
1140 	while ((m = n) != 0) {
1141 		n = m->m_act;
1142 		m_freem(m);
1143 	}
1144 	ifq->ifq_head = 0;
1145 	ifq->ifq_tail = 0;
1146 	ifq->ifq_len = 0;
1147 }
1148 
1149 /*
1150  * Handle interface watchdog timer routines.  Called
1151  * from softclock, we decrement timers (if set) and
1152  * call the appropriate interface routine on expiration.
1153  */
1154 static void
1155 if_slowtimo(arg)
1156 	void *arg;
1157 {
1158 	register struct ifnet *ifp;
1159 	int s = splimp();
1160 
1161 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1162 		if (ifp->if_timer == 0 || --ifp->if_timer)
1163 			continue;
1164 		if (ifp->if_watchdog)
1165 			(*ifp->if_watchdog)(ifp);
1166 	}
1167 	splx(s);
1168 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1169 }
1170 
1171 /*
1172  * Map interface name to
1173  * interface structure pointer.
1174  */
1175 struct ifnet *
1176 ifunit(const char *name)
1177 {
1178 	char namebuf[IFNAMSIZ + 1];
1179 	struct ifnet *ifp;
1180 	dev_t dev;
1181 
1182 	/*
1183 	 * Now search all the interfaces for this name/number
1184 	 */
1185 
1186 	/*
1187 	 * XXX
1188 	 * Devices should really be known as /dev/fooN, not /dev/net/fooN.
1189 	 */
1190 	snprintf(namebuf, IFNAMSIZ, "%s/%s", net_cdevsw.d_name, name);
1191 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1192 		dev = ifdev_byindex(ifp->if_index);
1193 		if (strcmp(devtoname(dev), namebuf) == 0)
1194 			break;
1195 		if (dev_named(dev, name))
1196 			break;
1197 	}
1198 	return (ifp);
1199 }
1200 
1201 /*
1202  * Map interface name in a sockaddr_dl to
1203  * interface structure pointer.
1204  */
1205 struct ifnet *
1206 if_withname(sa)
1207 	struct sockaddr *sa;
1208 {
1209 	char ifname[IFNAMSIZ+1];
1210 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1211 
1212 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1213 	     (sdl->sdl_nlen > IFNAMSIZ) )
1214 		return NULL;
1215 
1216 	/*
1217 	 * ifunit wants a NUL-terminated string.  It may not be NUL-terminated
1218 	 * in the sockaddr, and we don't want to change the caller's sockaddr
1219 	 * (there might not be room to add the trailing NUL anyway), so we make
1220 	 * a local copy that we know we can NUL-terminate safely.
1221 	 */
1222 
1223 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1224 	ifname[sdl->sdl_nlen] = '\0';
1225 	return ifunit(ifname);
1226 }
1227 
1228 /*
1229  * Hardware specific interface ioctls.
1230  */
1231 static int
1232 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1233 {
1234 	struct ifreq *ifr;
1235 	struct ifstat *ifs;
1236 	int error = 0;
1237 	int new_flags;
1238 
1239 	ifr = (struct ifreq *)data;
1240 	switch (cmd) {
1241 	case SIOCGIFINDEX:
1242 		ifr->ifr_index = ifp->if_index;
1243 		break;
1244 
1245 	case SIOCGIFFLAGS:
1246 		ifr->ifr_flags = ifp->if_flags & 0xffff;
1247 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1248 		break;
1249 
1250 	case SIOCGIFCAP:
1251 		ifr->ifr_reqcap = ifp->if_capabilities;
1252 		ifr->ifr_curcap = ifp->if_capenable;
1253 		break;
1254 
1255 #ifdef MAC
1256 	case SIOCGIFMAC:
1257 		error = mac_ioctl_ifnet_get(td->td_proc->p_ucred, ifr, ifp);
1258 		break;
1259 #endif
1260 
1261 	case SIOCGIFMETRIC:
1262 		ifr->ifr_metric = ifp->if_metric;
1263 		break;
1264 
1265 	case SIOCGIFMTU:
1266 		ifr->ifr_mtu = ifp->if_mtu;
1267 		break;
1268 
1269 	case SIOCGIFPHYS:
1270 		ifr->ifr_phys = ifp->if_physical;
1271 		break;
1272 
1273 	case SIOCSIFFLAGS:
1274 		error = suser(td);
1275 		if (error)
1276 			return (error);
1277 		new_flags = (ifr->ifr_flags & 0xffff) |
1278 		    (ifr->ifr_flagshigh << 16);
1279 		if (ifp->if_flags & IFF_SMART) {
1280 			/* Smart drivers twiddle their own routes */
1281 		} else if (ifp->if_flags & IFF_UP &&
1282 		    (new_flags & IFF_UP) == 0) {
1283 			int s = splimp();
1284 			if_down(ifp);
1285 			splx(s);
1286 		} else if (new_flags & IFF_UP &&
1287 		    (ifp->if_flags & IFF_UP) == 0) {
1288 			int s = splimp();
1289 			if_up(ifp);
1290 			splx(s);
1291 		}
1292 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1293 			(new_flags &~ IFF_CANTCHANGE);
1294 		if (new_flags & IFF_PPROMISC) {
1295 			/* Permanently promiscuous mode requested */
1296 			ifp->if_flags |= IFF_PROMISC;
1297 		} else if (ifp->if_pcount == 0) {
1298 			ifp->if_flags &= ~IFF_PROMISC;
1299 		}
1300 		if (ifp->if_ioctl)
1301 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1302 		getmicrotime(&ifp->if_lastchange);
1303 		break;
1304 
1305 	case SIOCSIFCAP:
1306 		error = suser(td);
1307 		if (error)
1308 			return (error);
1309 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1310 			return (EINVAL);
1311 		(void) (*ifp->if_ioctl)(ifp, cmd, data);
1312 		break;
1313 
1314 #ifdef MAC
1315 	case SIOCSIFMAC:
1316 		error = mac_ioctl_ifnet_set(td->td_proc->p_ucred, ifr, ifp);
1317 		break;
1318 #endif
1319 
1320 	case SIOCSIFMETRIC:
1321 		error = suser(td);
1322 		if (error)
1323 			return (error);
1324 		ifp->if_metric = ifr->ifr_metric;
1325 		getmicrotime(&ifp->if_lastchange);
1326 		break;
1327 
1328 	case SIOCSIFPHYS:
1329 		error = suser(td);
1330 		if (error)
1331 			return error;
1332 		if (!ifp->if_ioctl)
1333 		        return EOPNOTSUPP;
1334 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1335 		if (error == 0)
1336 			getmicrotime(&ifp->if_lastchange);
1337 		return(error);
1338 
1339 	case SIOCSIFMTU:
1340 	{
1341 		u_long oldmtu = ifp->if_mtu;
1342 
1343 		error = suser(td);
1344 		if (error)
1345 			return (error);
1346 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1347 			return (EINVAL);
1348 		if (ifp->if_ioctl == NULL)
1349 			return (EOPNOTSUPP);
1350 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1351 		if (error == 0) {
1352 			getmicrotime(&ifp->if_lastchange);
1353 			rt_ifmsg(ifp);
1354 		}
1355 		/*
1356 		 * If the link MTU changed, do network layer specific procedure.
1357 		 */
1358 		if (ifp->if_mtu != oldmtu) {
1359 #ifdef INET6
1360 			nd6_setmtu(ifp);
1361 #endif
1362 		}
1363 		break;
1364 	}
1365 
1366 	case SIOCADDMULTI:
1367 	case SIOCDELMULTI:
1368 		error = suser(td);
1369 		if (error)
1370 			return (error);
1371 
1372 		/* Don't allow group membership on non-multicast interfaces. */
1373 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1374 			return (EOPNOTSUPP);
1375 
1376 		/* Don't let users screw up protocols' entries. */
1377 		if (ifr->ifr_addr.sa_family != AF_LINK)
1378 			return (EINVAL);
1379 
1380 		if (cmd == SIOCADDMULTI) {
1381 			struct ifmultiaddr *ifma;
1382 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1383 		} else {
1384 			error = if_delmulti(ifp, &ifr->ifr_addr);
1385 		}
1386 		if (error == 0)
1387 			getmicrotime(&ifp->if_lastchange);
1388 		break;
1389 
1390 	case SIOCSIFPHYADDR:
1391 	case SIOCDIFPHYADDR:
1392 #ifdef INET6
1393 	case SIOCSIFPHYADDR_IN6:
1394 #endif
1395 	case SIOCSLIFPHYADDR:
1396         case SIOCSIFMEDIA:
1397 	case SIOCSIFGENERIC:
1398 		error = suser(td);
1399 		if (error)
1400 			return (error);
1401 		if (ifp->if_ioctl == NULL)
1402 			return (EOPNOTSUPP);
1403 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1404 		if (error == 0)
1405 			getmicrotime(&ifp->if_lastchange);
1406 		break;
1407 
1408 	case SIOCGIFSTATUS:
1409 		ifs = (struct ifstat *)data;
1410 		ifs->ascii[0] = '\0';
1411 
1412 	case SIOCGIFPSRCADDR:
1413 	case SIOCGIFPDSTADDR:
1414 	case SIOCGLIFPHYADDR:
1415 	case SIOCGIFMEDIA:
1416 	case SIOCGIFGENERIC:
1417 		if (ifp->if_ioctl == 0)
1418 			return (EOPNOTSUPP);
1419 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1420 		break;
1421 
1422 	case SIOCSIFLLADDR:
1423 		error = suser(td);
1424 		if (error)
1425 			return (error);
1426 		error = if_setlladdr(ifp,
1427 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1428 		break;
1429 
1430 	default:
1431 		error = ENOIOCTL;
1432 		break;
1433 	}
1434 	return (error);
1435 }
1436 
1437 /*
1438  * Interface ioctls.
1439  */
1440 int
1441 ifioctl(so, cmd, data, td)
1442 	struct socket *so;
1443 	u_long cmd;
1444 	caddr_t data;
1445 	struct thread *td;
1446 {
1447 	struct ifnet *ifp;
1448 	struct ifreq *ifr;
1449 	int error;
1450 	int oif_flags;
1451 
1452 	switch (cmd) {
1453 	case SIOCGIFCONF:
1454 	case OSIOCGIFCONF:
1455 		return (ifconf(cmd, data));
1456 	}
1457 	ifr = (struct ifreq *)data;
1458 
1459 	switch (cmd) {
1460 	case SIOCIFCREATE:
1461 	case SIOCIFDESTROY:
1462 		if ((error = suser(td)) != 0)
1463 			return (error);
1464 		return ((cmd == SIOCIFCREATE) ?
1465 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1466 			if_clone_destroy(ifr->ifr_name));
1467 
1468 	case SIOCIFGCLONERS:
1469 		return (if_clone_list((struct if_clonereq *)data));
1470 	}
1471 
1472 	ifp = ifunit(ifr->ifr_name);
1473 	if (ifp == 0)
1474 		return (ENXIO);
1475 
1476 	error = ifhwioctl(cmd, ifp, data, td);
1477 	if (error != ENOIOCTL)
1478 		return (error);
1479 
1480 	oif_flags = ifp->if_flags;
1481 	if (so->so_proto == 0)
1482 		return (EOPNOTSUPP);
1483 #ifndef COMPAT_43
1484 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1485 								 data,
1486 								 ifp, td));
1487 #else
1488 	{
1489 		int ocmd = cmd;
1490 
1491 		switch (cmd) {
1492 
1493 		case SIOCSIFDSTADDR:
1494 		case SIOCSIFADDR:
1495 		case SIOCSIFBRDADDR:
1496 		case SIOCSIFNETMASK:
1497 #if BYTE_ORDER != BIG_ENDIAN
1498 			if (ifr->ifr_addr.sa_family == 0 &&
1499 			    ifr->ifr_addr.sa_len < 16) {
1500 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1501 				ifr->ifr_addr.sa_len = 16;
1502 			}
1503 #else
1504 			if (ifr->ifr_addr.sa_len == 0)
1505 				ifr->ifr_addr.sa_len = 16;
1506 #endif
1507 			break;
1508 
1509 		case OSIOCGIFADDR:
1510 			cmd = SIOCGIFADDR;
1511 			break;
1512 
1513 		case OSIOCGIFDSTADDR:
1514 			cmd = SIOCGIFDSTADDR;
1515 			break;
1516 
1517 		case OSIOCGIFBRDADDR:
1518 			cmd = SIOCGIFBRDADDR;
1519 			break;
1520 
1521 		case OSIOCGIFNETMASK:
1522 			cmd = SIOCGIFNETMASK;
1523 		}
1524 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1525 								   cmd,
1526 								   data,
1527 								   ifp, td));
1528 		switch (ocmd) {
1529 
1530 		case OSIOCGIFADDR:
1531 		case OSIOCGIFDSTADDR:
1532 		case OSIOCGIFBRDADDR:
1533 		case OSIOCGIFNETMASK:
1534 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1535 
1536 		}
1537 	}
1538 #endif /* COMPAT_43 */
1539 
1540 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1541 #ifdef INET6
1542 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1543 		if (ifp->if_flags & IFF_UP) {
1544 			int s = splimp();
1545 			in6_if_up(ifp);
1546 			splx(s);
1547 		}
1548 #endif
1549 	}
1550 	return (error);
1551 }
1552 
1553 /*
1554  * Set/clear promiscuous mode on interface ifp based on the truth value
1555  * of pswitch.  The calls are reference counted so that only the first
1556  * "on" request actually has an effect, as does the final "off" request.
1557  * Results are undefined if the "off" and "on" requests are not matched.
1558  */
1559 int
1560 ifpromisc(ifp, pswitch)
1561 	struct ifnet *ifp;
1562 	int pswitch;
1563 {
1564 	struct ifreq ifr;
1565 	int error;
1566 	int oldflags, oldpcount;
1567 
1568 	oldpcount = ifp->if_pcount;
1569 	oldflags = ifp->if_flags;
1570 	if (ifp->if_flags & IFF_PPROMISC) {
1571 		/* Do nothing if device is in permanently promiscuous mode */
1572 		ifp->if_pcount += pswitch ? 1 : -1;
1573 		return (0);
1574 	}
1575 	if (pswitch) {
1576 		/*
1577 		 * If the device is not configured up, we cannot put it in
1578 		 * promiscuous mode.
1579 		 */
1580 		if ((ifp->if_flags & IFF_UP) == 0)
1581 			return (ENETDOWN);
1582 		if (ifp->if_pcount++ != 0)
1583 			return (0);
1584 		ifp->if_flags |= IFF_PROMISC;
1585 	} else {
1586 		if (--ifp->if_pcount > 0)
1587 			return (0);
1588 		ifp->if_flags &= ~IFF_PROMISC;
1589 	}
1590 	ifr.ifr_flags = ifp->if_flags & 0xffff;
1591 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1592 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1593 	if (error == 0) {
1594 		log(LOG_INFO, "%s%d: promiscuous mode %s\n",
1595 		    ifp->if_name, ifp->if_unit,
1596 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1597 		rt_ifmsg(ifp);
1598 	} else {
1599 		ifp->if_pcount = oldpcount;
1600 		ifp->if_flags = oldflags;
1601 	}
1602 	return error;
1603 }
1604 
1605 /*
1606  * Return interface configuration
1607  * of system.  List may be used
1608  * in later ioctl's (above) to get
1609  * other information.
1610  */
1611 /*ARGSUSED*/
1612 static int
1613 ifconf(cmd, data)
1614 	u_long cmd;
1615 	caddr_t data;
1616 {
1617 	struct ifconf *ifc = (struct ifconf *)data;
1618 	struct ifnet *ifp;
1619 	struct ifaddr *ifa;
1620 	struct ifreq ifr, *ifrp;
1621 	int space = ifc->ifc_len, error = 0;
1622 
1623 	ifrp = ifc->ifc_req;
1624 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1625 		char workbuf[64];
1626 		int ifnlen, addrs;
1627 
1628 		if (space < sizeof(ifr))
1629 			break;
1630 		ifnlen = snprintf(workbuf, sizeof(workbuf),
1631 		    "%s%d", ifp->if_name, ifp->if_unit);
1632 		if(ifnlen + 1 > sizeof ifr.ifr_name) {
1633 			error = ENAMETOOLONG;
1634 			break;
1635 		} else {
1636 			strcpy(ifr.ifr_name, workbuf);
1637 		}
1638 
1639 		addrs = 0;
1640 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1641 			struct sockaddr *sa = ifa->ifa_addr;
1642 
1643 			if (space < sizeof(ifr))
1644 				break;
1645 			if (jailed(curthread->td_ucred) &&
1646 			    prison_if(curthread->td_ucred, sa))
1647 				continue;
1648 			addrs++;
1649 #ifdef COMPAT_43
1650 			if (cmd == OSIOCGIFCONF) {
1651 				struct osockaddr *osa =
1652 					 (struct osockaddr *)&ifr.ifr_addr;
1653 				ifr.ifr_addr = *sa;
1654 				osa->sa_family = sa->sa_family;
1655 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1656 						sizeof (ifr));
1657 				ifrp++;
1658 			} else
1659 #endif
1660 			if (sa->sa_len <= sizeof(*sa)) {
1661 				ifr.ifr_addr = *sa;
1662 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1663 						sizeof (ifr));
1664 				ifrp++;
1665 			} else {
1666 				if (space < sizeof (ifr) + sa->sa_len -
1667 					    sizeof(*sa))
1668 					break;
1669 				space -= sa->sa_len - sizeof(*sa);
1670 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1671 						sizeof (ifr.ifr_name));
1672 				if (error == 0)
1673 				    error = copyout((caddr_t)sa,
1674 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1675 				ifrp = (struct ifreq *)
1676 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1677 			}
1678 			if (error)
1679 				break;
1680 			space -= sizeof (ifr);
1681 		}
1682 		if (error)
1683 			break;
1684 		if (!addrs) {
1685 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1686 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1687 			    sizeof (ifr));
1688 			if (error)
1689 				break;
1690 			space -= sizeof (ifr);
1691 			ifrp++;
1692 		}
1693 	}
1694 	ifc->ifc_len -= space;
1695 	return (error);
1696 }
1697 
1698 /*
1699  * Just like if_promisc(), but for all-multicast-reception mode.
1700  */
1701 int
1702 if_allmulti(ifp, onswitch)
1703 	struct ifnet *ifp;
1704 	int onswitch;
1705 {
1706 	int error = 0;
1707 	int s = splimp();
1708 	struct ifreq ifr;
1709 
1710 	if (onswitch) {
1711 		if (ifp->if_amcount++ == 0) {
1712 			ifp->if_flags |= IFF_ALLMULTI;
1713 			ifr.ifr_flags = ifp->if_flags & 0xffff;
1714 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1715 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1716 		}
1717 	} else {
1718 		if (ifp->if_amcount > 1) {
1719 			ifp->if_amcount--;
1720 		} else {
1721 			ifp->if_amcount = 0;
1722 			ifp->if_flags &= ~IFF_ALLMULTI;
1723 			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1724 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1725 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1726 		}
1727 	}
1728 	splx(s);
1729 
1730 	if (error == 0)
1731 		rt_ifmsg(ifp);
1732 	return error;
1733 }
1734 
1735 /*
1736  * Add a multicast listenership to the interface in question.
1737  * The link layer provides a routine which converts
1738  */
1739 int
1740 if_addmulti(ifp, sa, retifma)
1741 	struct ifnet *ifp;	/* interface to manipulate */
1742 	struct sockaddr *sa;	/* address to add */
1743 	struct ifmultiaddr **retifma;
1744 {
1745 	struct sockaddr *llsa, *dupsa;
1746 	int error, s;
1747 	struct ifmultiaddr *ifma;
1748 
1749 	/*
1750 	 * If the matching multicast address already exists
1751 	 * then don't add a new one, just add a reference
1752 	 */
1753 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1754 		if (equal(sa, ifma->ifma_addr)) {
1755 			ifma->ifma_refcount++;
1756 			if (retifma)
1757 				*retifma = ifma;
1758 			return 0;
1759 		}
1760 	}
1761 
1762 	/*
1763 	 * Give the link layer a chance to accept/reject it, and also
1764 	 * find out which AF_LINK address this maps to, if it isn't one
1765 	 * already.
1766 	 */
1767 	if (ifp->if_resolvemulti) {
1768 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1769 		if (error) return error;
1770 	} else {
1771 		llsa = 0;
1772 	}
1773 
1774 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1775 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1776 	bcopy(sa, dupsa, sa->sa_len);
1777 
1778 	ifma->ifma_addr = dupsa;
1779 	ifma->ifma_lladdr = llsa;
1780 	ifma->ifma_ifp = ifp;
1781 	ifma->ifma_refcount = 1;
1782 	ifma->ifma_protospec = 0;
1783 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1784 
1785 	/*
1786 	 * Some network interfaces can scan the address list at
1787 	 * interrupt time; lock them out.
1788 	 */
1789 	s = splimp();
1790 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1791 	splx(s);
1792 	if (retifma != NULL)
1793 		*retifma = ifma;
1794 
1795 	if (llsa != 0) {
1796 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1797 			if (equal(ifma->ifma_addr, llsa))
1798 				break;
1799 		}
1800 		if (ifma) {
1801 			ifma->ifma_refcount++;
1802 		} else {
1803 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1804 			       M_IFMADDR, M_WAITOK);
1805 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1806 			       M_IFMADDR, M_WAITOK);
1807 			bcopy(llsa, dupsa, llsa->sa_len);
1808 			ifma->ifma_addr = dupsa;
1809 			ifma->ifma_ifp = ifp;
1810 			ifma->ifma_refcount = 1;
1811 			s = splimp();
1812 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1813 			splx(s);
1814 		}
1815 	}
1816 	/*
1817 	 * We are certain we have added something, so call down to the
1818 	 * interface to let them know about it.
1819 	 */
1820 	s = splimp();
1821 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1822 	splx(s);
1823 
1824 	return 0;
1825 }
1826 
1827 /*
1828  * Remove a reference to a multicast address on this interface.  Yell
1829  * if the request does not match an existing membership.
1830  */
1831 int
1832 if_delmulti(ifp, sa)
1833 	struct ifnet *ifp;
1834 	struct sockaddr *sa;
1835 {
1836 	struct ifmultiaddr *ifma;
1837 	int s;
1838 
1839 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1840 		if (equal(sa, ifma->ifma_addr))
1841 			break;
1842 	if (ifma == 0)
1843 		return ENOENT;
1844 
1845 	if (ifma->ifma_refcount > 1) {
1846 		ifma->ifma_refcount--;
1847 		return 0;
1848 	}
1849 
1850 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1851 	sa = ifma->ifma_lladdr;
1852 	s = splimp();
1853 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1854 	/*
1855 	 * Make sure the interface driver is notified
1856 	 * in the case of a link layer mcast group being left.
1857 	 */
1858 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1859 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1860 	splx(s);
1861 	free(ifma->ifma_addr, M_IFMADDR);
1862 	free(ifma, M_IFMADDR);
1863 	if (sa == 0)
1864 		return 0;
1865 
1866 	/*
1867 	 * Now look for the link-layer address which corresponds to
1868 	 * this network address.  It had been squirreled away in
1869 	 * ifma->ifma_lladdr for this purpose (so we don't have
1870 	 * to call ifp->if_resolvemulti() again), and we saved that
1871 	 * value in sa above.  If some nasty deleted the
1872 	 * link-layer address out from underneath us, we can deal because
1873 	 * the address we stored was is not the same as the one which was
1874 	 * in the record for the link-layer address.  (So we don't complain
1875 	 * in that case.)
1876 	 */
1877 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1878 		if (equal(sa, ifma->ifma_addr))
1879 			break;
1880 	if (ifma == 0)
1881 		return 0;
1882 
1883 	if (ifma->ifma_refcount > 1) {
1884 		ifma->ifma_refcount--;
1885 		return 0;
1886 	}
1887 
1888 	s = splimp();
1889 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1890 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1891 	splx(s);
1892 	free(ifma->ifma_addr, M_IFMADDR);
1893 	free(sa, M_IFMADDR);
1894 	free(ifma, M_IFMADDR);
1895 
1896 	return 0;
1897 }
1898 
1899 /*
1900  * Set the link layer address on an interface.
1901  *
1902  * At this time we only support certain types of interfaces,
1903  * and we don't allow the length of the address to change.
1904  */
1905 int
1906 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1907 {
1908 	struct sockaddr_dl *sdl;
1909 	struct ifaddr *ifa;
1910 	struct ifreq ifr;
1911 
1912 	ifa = ifaddr_byindex(ifp->if_index);
1913 	if (ifa == NULL)
1914 		return (EINVAL);
1915 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1916 	if (sdl == NULL)
1917 		return (EINVAL);
1918 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1919 		return (EINVAL);
1920 	switch (ifp->if_type) {
1921 	case IFT_ETHER:			/* these types use struct arpcom */
1922 	case IFT_FDDI:
1923 	case IFT_XETHER:
1924 	case IFT_ISO88025:
1925 	case IFT_L2VLAN:
1926 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1927 		bcopy(lladdr, LLADDR(sdl), len);
1928 		break;
1929 	default:
1930 		return (ENODEV);
1931 	}
1932 	/*
1933 	 * If the interface is already up, we need
1934 	 * to re-init it in order to reprogram its
1935 	 * address filter.
1936 	 */
1937 	if ((ifp->if_flags & IFF_UP) != 0) {
1938 		ifp->if_flags &= ~IFF_UP;
1939 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1940 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1941 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1942 		ifp->if_flags |= IFF_UP;
1943 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1944 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1945 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1946 #ifdef INET
1947 		/*
1948 		 * Also send gratuitous ARPs to notify other nodes about
1949 		 * the address change.
1950 		 */
1951 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1952 			if (ifa->ifa_addr != NULL &&
1953 			    ifa->ifa_addr->sa_family == AF_INET)
1954 				arp_ifinit(ifp, ifa);
1955 		}
1956 #endif
1957 	}
1958 	return (0);
1959 }
1960 
1961 struct ifmultiaddr *
1962 ifmaof_ifpforaddr(sa, ifp)
1963 	struct sockaddr *sa;
1964 	struct ifnet *ifp;
1965 {
1966 	struct ifmultiaddr *ifma;
1967 
1968 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1969 		if (equal(ifma->ifma_addr, sa))
1970 			break;
1971 
1972 	return ifma;
1973 }
1974 
1975 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1976 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1977