xref: /freebsd/sys/net/if.c (revision ae83180158c4c937f170e31eff311b18c0286a93)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if.c	8.5 (Berkeley) 1/9/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/conf.h>
44 #include <sys/mac.h>
45 #include <sys/malloc.h>
46 #include <sys/bus.h>
47 #include <sys/mbuf.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/protosw.h>
53 #include <sys/kernel.h>
54 #include <sys/sockio.h>
55 #include <sys/syslog.h>
56 #include <sys/sysctl.h>
57 #include <sys/jail.h>
58 
59 #include <net/if.h>
60 #include <net/if_arp.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/if_var.h>
64 #include <net/radix.h>
65 #include <net/route.h>
66 
67 #if defined(INET) || defined(INET6)
68 /*XXX*/
69 #include <netinet/in.h>
70 #include <netinet/in_var.h>
71 #ifdef INET6
72 #include <netinet6/in6_var.h>
73 #include <netinet6/in6_ifattach.h>
74 #endif
75 #endif
76 #ifdef INET
77 #include <netinet/if_ether.h>
78 #endif
79 
80 static int	ifconf(u_long, caddr_t);
81 static void	if_grow(void);
82 static void	if_init(void *);
83 static void	if_check(void *);
84 static int	if_findindex(struct ifnet *);
85 static void	if_qflush(struct ifqueue *);
86 static void	if_slowtimo(void *);
87 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
88 static int	if_rtdel(struct radix_node *, void *);
89 static struct	if_clone *if_clone_lookup(const char *, int *);
90 static int	if_clone_list(struct if_clonereq *);
91 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
92 #ifdef INET6
93 /*
94  * XXX: declare here to avoid to include many inet6 related files..
95  * should be more generalized?
96  */
97 extern void	nd6_setmtu(struct ifnet *);
98 #endif
99 
100 int	if_index = 0;
101 struct	ifindex_entry *ifindex_table = NULL;
102 int	ifqmaxlen = IFQ_MAXLEN;
103 struct	ifnethead ifnet;	/* depend on static init XXX */
104 int	if_cloners_count;
105 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
106 
107 static int	if_indexlim = 8;
108 static struct	klist ifklist;
109 
110 static void	filt_netdetach(struct knote *kn);
111 static int	filt_netdev(struct knote *kn, long hint);
112 
113 static struct filterops netdev_filtops =
114     { 1, NULL, filt_netdetach, filt_netdev };
115 
116 /*
117  * System initialization
118  */
119 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
120 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
121 
122 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
123 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
124 MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
125 
126 #define CDEV_MAJOR	165
127 
128 static d_open_t		netopen;
129 static d_close_t	netclose;
130 static d_ioctl_t	netioctl;
131 static d_kqfilter_t	netkqfilter;
132 
133 static struct cdevsw net_cdevsw = {
134 	/* open */	netopen,
135 	/* close */	netclose,
136 	/* read */	noread,
137 	/* write */	nowrite,
138 	/* ioctl */	netioctl,
139 	/* poll */	nopoll,
140 	/* mmap */	nommap,
141 	/* strategy */	nostrategy,
142 	/* name */	"net",
143 	/* maj */	CDEV_MAJOR,
144 	/* dump */	nodump,
145 	/* psize */	nopsize,
146 	/* flags */	D_KQFILTER,
147 	/* kqfilter */	netkqfilter,
148 };
149 
150 static int
151 netopen(dev_t dev, int flag, int mode, struct thread *td)
152 {
153 	return (0);
154 }
155 
156 static int
157 netclose(dev_t dev, int flags, int fmt, struct thread *td)
158 {
159 	return (0);
160 }
161 
162 static int
163 netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
164 {
165 	struct ifnet *ifp;
166 	int error, idx;
167 
168 	/* only support interface specific ioctls */
169 	if (IOCGROUP(cmd) != 'i')
170 		return (EOPNOTSUPP);
171 	idx = minor(dev);
172 	if (idx == 0) {
173 		/*
174 		 * special network device, not interface.
175 		 */
176 		if (cmd == SIOCGIFCONF)
177 			return (ifconf(cmd, data));	/* XXX remove cmd */
178 		return (EOPNOTSUPP);
179 	}
180 
181 	ifp = ifnet_byindex(idx);
182 	if (ifp == NULL)
183 		return (ENXIO);
184 
185 	error = ifhwioctl(cmd, ifp, data, td);
186 	if (error == ENOIOCTL)
187 		error = EOPNOTSUPP;
188 	return (error);
189 }
190 
191 static int
192 netkqfilter(dev_t dev, struct knote *kn)
193 {
194 	struct klist *klist;
195 	struct ifnet *ifp;
196 	int idx;
197 
198 	idx = minor(dev);
199 	if (idx == 0) {
200 		klist = &ifklist;
201 	} else {
202 		ifp = ifnet_byindex(idx);
203 		if (ifp == NULL)
204 			return (1);
205 		klist = &ifp->if_klist;
206 	}
207 
208 	switch (kn->kn_filter) {
209 	case EVFILT_NETDEV:
210 		kn->kn_fop = &netdev_filtops;
211 		break;
212 	default:
213 		return (1);
214 	}
215 
216 	kn->kn_hook = (caddr_t)klist;
217 
218 	/* XXX locking? */
219 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
220 
221 	return (0);
222 }
223 
224 static void
225 filt_netdetach(struct knote *kn)
226 {
227 	struct klist *klist = (struct klist *)kn->kn_hook;
228 
229 	if (kn->kn_status & KN_DETACHED)
230 		return;
231 	SLIST_REMOVE(klist, kn, knote, kn_selnext);
232 }
233 
234 static int
235 filt_netdev(struct knote *kn, long hint)
236 {
237 
238 	/*
239 	 * Currently NOTE_EXIT is abused to indicate device detach.
240 	 */
241 	if (hint == NOTE_EXIT) {
242 		kn->kn_data = NOTE_LINKINV;
243                 kn->kn_status |= KN_DETACHED;
244                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
245                 return (1);
246         }
247 	kn->kn_data = hint;			/* current status */
248 	if (kn->kn_sfflags & hint)
249 		kn->kn_fflags |= hint;
250 	return (kn->kn_fflags != 0);
251 }
252 
253 /*
254  * Network interface utility routines.
255  *
256  * Routines with ifa_ifwith* names take sockaddr *'s as
257  * parameters.
258  */
259 /* ARGSUSED*/
260 static void
261 if_init(dummy)
262 	void *dummy;
263 {
264 
265 	TAILQ_INIT(&ifnet);
266 	SLIST_INIT(&ifklist);
267 	if_grow();				/* create initial table */
268 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
269 	    UID_ROOT, GID_WHEEL, 0600, "network");
270 }
271 
272 static void
273 if_grow(void)
274 {
275 	u_int n;
276 	struct ifindex_entry *e;
277 
278 	if_indexlim <<= 1;
279 	n = if_indexlim * sizeof(*e);
280 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
281 	if (ifindex_table != NULL) {
282 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
283 		free((caddr_t)ifindex_table, M_IFADDR);
284 	}
285 	ifindex_table = e;
286 }
287 
288 /* ARGSUSED*/
289 static void
290 if_check(dummy)
291 	void *dummy;
292 {
293 	struct ifnet *ifp;
294 	int s;
295 
296 	s = splimp();
297 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
298 		if (ifp->if_snd.ifq_maxlen == 0) {
299 			printf("%s%d XXX: driver didn't set ifq_maxlen\n",
300 			    ifp->if_name, ifp->if_unit);
301 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
302 		}
303 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
304 			printf("%s%d XXX: driver didn't initialize queue mtx\n",
305 			    ifp->if_name, ifp->if_unit);
306 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
307 			    MTX_NETWORK_LOCK, MTX_DEF);
308 		}
309 	}
310 	splx(s);
311 	if_slowtimo(0);
312 }
313 
314 static int
315 if_findindex(struct ifnet *ifp)
316 {
317 	int i, unit;
318 	char eaddr[18], devname[32];
319 	const char *name, *p;
320 
321 	switch (ifp->if_type) {
322 	case IFT_ETHER:			/* these types use struct arpcom */
323 	case IFT_FDDI:
324 	case IFT_XETHER:
325 	case IFT_ISO88025:
326 	case IFT_L2VLAN:
327 		snprintf(eaddr, 18, "%6D",
328 		    ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":");
329 		break;
330 	default:
331 		eaddr[0] = '\0';
332 		break;
333 	}
334 	snprintf(devname, 32, "%s%d", ifp->if_name, ifp->if_unit);
335 	name = net_cdevsw.d_name;
336 	i = 0;
337 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
338 		if (resource_string_value(name, unit, "ether", &p) == 0)
339 			if (strcmp(p, eaddr) == 0)
340 				goto found;
341 		if (resource_string_value(name, unit, "dev", &p) == 0)
342 			if (strcmp(p, devname) == 0)
343 				goto found;
344 	}
345 	unit = 0;
346 found:
347 	if (unit != 0) {
348 		if (ifaddr_byindex(unit) == NULL)
349 			return (unit);
350 		printf("%s%d in use, cannot hardwire it to %s.\n",
351 		    name, unit, devname);
352 	}
353 	for (unit = 1; ; unit++) {
354 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
355 			continue;
356 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
357 		    resource_string_value(name, unit, "dev", &p) == 0)
358 			continue;
359 		break;
360 	}
361 	return (unit);
362 }
363 
364 /*
365  * Attach an interface to the
366  * list of "active" interfaces.
367  */
368 void
369 if_attach(ifp)
370 	struct ifnet *ifp;
371 {
372 	unsigned socksize, ifasize;
373 	int namelen, masklen;
374 	char workbuf[64];
375 	register struct sockaddr_dl *sdl;
376 	register struct ifaddr *ifa;
377 
378 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
379 	/*
380 	 * XXX -
381 	 * The old code would work if the interface passed a pre-existing
382 	 * chain of ifaddrs to this code.  We don't trust our callers to
383 	 * properly initialize the tailq, however, so we no longer allow
384 	 * this unlikely case.
385 	 */
386 	TAILQ_INIT(&ifp->if_addrhead);
387 	TAILQ_INIT(&ifp->if_prefixhead);
388 	TAILQ_INIT(&ifp->if_multiaddrs);
389 	SLIST_INIT(&ifp->if_klist);
390 	getmicrotime(&ifp->if_lastchange);
391 
392 #ifdef MAC
393 	mac_init_ifnet(ifp);
394 	mac_create_ifnet(ifp);
395 #endif
396 
397 	ifp->if_index = if_findindex(ifp);
398 	if (ifp->if_index > if_index)
399 		if_index = ifp->if_index;
400 	if (if_index >= if_indexlim)
401 		if_grow();
402 
403 	ifnet_byindex(ifp->if_index) = ifp;
404 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw, ifp->if_index,
405 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s%d",
406 	    net_cdevsw.d_name, ifp->if_name, ifp->if_unit);
407 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
408 	    net_cdevsw.d_name, ifp->if_index);
409 
410 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_name, "if send queue", MTX_DEF);
411 
412 	/*
413 	 * create a Link Level name for this device
414 	 */
415 	namelen = snprintf(workbuf, sizeof(workbuf),
416 	    "%s%d", ifp->if_name, ifp->if_unit);
417 #define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
418 	masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
419 	socksize = masklen + ifp->if_addrlen;
420 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
421 	if (socksize < sizeof(*sdl))
422 		socksize = sizeof(*sdl);
423 	socksize = ROUNDUP(socksize);
424 	ifasize = sizeof(*ifa) + 2 * socksize;
425 	ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
426 	if (ifa) {
427 		sdl = (struct sockaddr_dl *)(ifa + 1);
428 		sdl->sdl_len = socksize;
429 		sdl->sdl_family = AF_LINK;
430 		bcopy(workbuf, sdl->sdl_data, namelen);
431 		sdl->sdl_nlen = namelen;
432 		sdl->sdl_index = ifp->if_index;
433 		sdl->sdl_type = ifp->if_type;
434 		ifaddr_byindex(ifp->if_index) = ifa;
435 		ifa->ifa_ifp = ifp;
436 		ifa->ifa_rtrequest = link_rtrequest;
437 		ifa->ifa_addr = (struct sockaddr *)sdl;
438 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
439 		ifa->ifa_netmask = (struct sockaddr *)sdl;
440 		sdl->sdl_len = masklen;
441 		while (namelen != 0)
442 			sdl->sdl_data[--namelen] = 0xff;
443 		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
444 	}
445 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
446 
447 	/* Announce the interface. */
448 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
449 }
450 
451 /*
452  * Detach an interface, removing it from the
453  * list of "active" interfaces.
454  */
455 void
456 if_detach(ifp)
457 	struct ifnet *ifp;
458 {
459 	struct ifaddr *ifa;
460 	struct radix_node_head	*rnh;
461 	int s;
462 	int i;
463 
464 	/*
465 	 * Remove routes and flush queues.
466 	 */
467 	s = splnet();
468 	if_down(ifp);
469 
470 	/*
471 	 * Remove address from ifindex_table[] and maybe decrement if_index.
472 	 * Clean up all addresses.
473 	 */
474 	ifaddr_byindex(ifp->if_index) = NULL;
475 	revoke_and_destroy_dev(ifdev_byindex(ifp->if_index));
476 	ifdev_byindex(ifp->if_index) = NULL;
477 
478 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
479 		if_index--;
480 
481 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
482 	     ifa = TAILQ_FIRST(&ifp->if_addrhead)) {
483 #ifdef INET
484 		/* XXX: Ugly!! ad hoc just for INET */
485 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
486 			struct ifaliasreq ifr;
487 
488 			bzero(&ifr, sizeof(ifr));
489 			ifr.ifra_addr = *ifa->ifa_addr;
490 			if (ifa->ifa_dstaddr)
491 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
492 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
493 			    NULL) == 0)
494 				continue;
495 		}
496 #endif /* INET */
497 #ifdef INET6
498 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
499 			in6_purgeaddr(ifa);
500 			/* ifp_addrhead is already updated */
501 			continue;
502 		}
503 #endif /* INET6 */
504 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
505 		IFAFREE(ifa);
506 	}
507 
508 #ifdef INET6
509 	/*
510 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
511 	 * before removing routing entries below, since IPv6 interface direct
512 	 * routes are expected to be removed by the IPv6-specific kernel API.
513 	 * Otherwise, the kernel will detect some inconsistency and bark it.
514 	 */
515 	in6_ifdetach(ifp);
516 #endif
517 
518 	/*
519 	 * Delete all remaining routes using this interface
520 	 * Unfortuneatly the only way to do this is to slog through
521 	 * the entire routing table looking for routes which point
522 	 * to this interface...oh well...
523 	 */
524 	for (i = 1; i <= AF_MAX; i++) {
525 		if ((rnh = rt_tables[i]) == NULL)
526 			continue;
527 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
528 	}
529 
530 	/* Announce that the interface is gone. */
531 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
532 
533 #ifdef MAC
534 	mac_destroy_ifnet(ifp);
535 #endif /* MAC */
536 	KNOTE(&ifp->if_klist, NOTE_EXIT);
537 	TAILQ_REMOVE(&ifnet, ifp, if_link);
538 	mtx_destroy(&ifp->if_snd.ifq_mtx);
539 	splx(s);
540 }
541 
542 /*
543  * Delete Routes for a Network Interface
544  *
545  * Called for each routing entry via the rnh->rnh_walktree() call above
546  * to delete all route entries referencing a detaching network interface.
547  *
548  * Arguments:
549  *	rn	pointer to node in the routing table
550  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
551  *
552  * Returns:
553  *	0	successful
554  *	errno	failed - reason indicated
555  *
556  */
557 static int
558 if_rtdel(rn, arg)
559 	struct radix_node	*rn;
560 	void			*arg;
561 {
562 	struct rtentry	*rt = (struct rtentry *)rn;
563 	struct ifnet	*ifp = arg;
564 	int		err;
565 
566 	if (rt->rt_ifp == ifp) {
567 
568 		/*
569 		 * Protect (sorta) against walktree recursion problems
570 		 * with cloned routes
571 		 */
572 		if ((rt->rt_flags & RTF_UP) == 0)
573 			return (0);
574 
575 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
576 				rt_mask(rt), rt->rt_flags,
577 				(struct rtentry **) NULL);
578 		if (err) {
579 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
580 		}
581 	}
582 
583 	return (0);
584 }
585 
586 /*
587  * Create a clone network interface.
588  */
589 int
590 if_clone_create(name, len)
591 	char *name;
592 	int len;
593 {
594 	struct if_clone *ifc;
595 	char *dp;
596 	int wildcard, bytoff, bitoff;
597 	int unit;
598 	int err;
599 
600 	ifc = if_clone_lookup(name, &unit);
601 	if (ifc == NULL)
602 		return (EINVAL);
603 
604 	if (ifunit(name) != NULL)
605 		return (EEXIST);
606 
607 	bytoff = bitoff = 0;
608 	wildcard = (unit < 0);
609 	/*
610 	 * Find a free unit if none was given.
611 	 */
612 	if (wildcard) {
613 		while ((bytoff < ifc->ifc_bmlen)
614 		    && (ifc->ifc_units[bytoff] == 0xff))
615 			bytoff++;
616 		if (bytoff >= ifc->ifc_bmlen)
617 			return (ENOSPC);
618 		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
619 			bitoff++;
620 		unit = (bytoff << 3) + bitoff;
621 	}
622 
623 	if (unit > ifc->ifc_maxunit)
624 		return (ENXIO);
625 
626 	err = (*ifc->ifc_create)(ifc, unit);
627 	if (err != 0)
628 		return (err);
629 
630 	if (!wildcard) {
631 		bytoff = unit >> 3;
632 		bitoff = unit - (bytoff << 3);
633 	}
634 
635 	/*
636 	 * Allocate the unit in the bitmap.
637 	 */
638 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
639 	    ("%s: bit is already set", __func__));
640 	ifc->ifc_units[bytoff] |= (1 << bitoff);
641 
642 	/* In the wildcard case, we need to update the name. */
643 	if (wildcard) {
644 		for (dp = name; *dp != '\0'; dp++);
645 		if (snprintf(dp, len - (dp-name), "%d", unit) >
646 		    len - (dp-name) - 1) {
647 			/*
648 			 * This can only be a programmer error and
649 			 * there's no straightforward way to recover if
650 			 * it happens.
651 			 */
652 			panic("if_clone_create(): interface name too long");
653 		}
654 
655 	}
656 
657 	return (0);
658 }
659 
660 /*
661  * Destroy a clone network interface.
662  */
663 int
664 if_clone_destroy(name)
665 	const char *name;
666 {
667 	struct if_clone *ifc;
668 	struct ifnet *ifp;
669 	int bytoff, bitoff;
670 	int unit;
671 
672 	ifc = if_clone_lookup(name, &unit);
673 	if (ifc == NULL)
674 		return (EINVAL);
675 
676 	if (unit < ifc->ifc_minifs)
677 		return (EINVAL);
678 
679 	ifp = ifunit(name);
680 	if (ifp == NULL)
681 		return (ENXIO);
682 
683 	if (ifc->ifc_destroy == NULL)
684 		return (EOPNOTSUPP);
685 
686 	(*ifc->ifc_destroy)(ifp);
687 
688 	/*
689 	 * Compute offset in the bitmap and deallocate the unit.
690 	 */
691 	bytoff = unit >> 3;
692 	bitoff = unit - (bytoff << 3);
693 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
694 	    ("%s: bit is already cleared", __func__));
695 	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
696 	return (0);
697 }
698 
699 /*
700  * Look up a network interface cloner.
701  */
702 static struct if_clone *
703 if_clone_lookup(name, unitp)
704 	const char *name;
705 	int *unitp;
706 {
707 	struct if_clone *ifc;
708 	const char *cp;
709 	int i;
710 
711 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
712 		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
713 			if (ifc->ifc_name[i] != *cp)
714 				goto next_ifc;
715 		}
716 		goto found_name;
717  next_ifc:
718 		ifc = LIST_NEXT(ifc, ifc_list);
719 	}
720 
721 	/* No match. */
722 	return ((struct if_clone *)NULL);
723 
724  found_name:
725 	if (*cp == '\0') {
726 		i = -1;
727 	} else {
728 		for (i = 0; *cp != '\0'; cp++) {
729 			if (*cp < '0' || *cp > '9') {
730 				/* Bogus unit number. */
731 				return (NULL);
732 			}
733 			i = (i * 10) + (*cp - '0');
734 		}
735 	}
736 
737 	if (unitp != NULL)
738 		*unitp = i;
739 	return (ifc);
740 }
741 
742 /*
743  * Register a network interface cloner.
744  */
745 void
746 if_clone_attach(ifc)
747 	struct if_clone *ifc;
748 {
749 	int bytoff, bitoff;
750 	int err;
751 	int len, maxclone;
752 	int unit;
753 
754 	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
755 	    ("%s: %s requested more units then allowed (%d > %d)",
756 	    __func__, ifc->ifc_name, ifc->ifc_minifs,
757 	    ifc->ifc_maxunit + 1));
758 	/*
759 	 * Compute bitmap size and allocate it.
760 	 */
761 	maxclone = ifc->ifc_maxunit + 1;
762 	len = maxclone >> 3;
763 	if ((len << 3) < maxclone)
764 		len++;
765 	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
766 	ifc->ifc_bmlen = len;
767 
768 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
769 	if_cloners_count++;
770 
771 	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
772 		err = (*ifc->ifc_create)(ifc, unit);
773 		KASSERT(err == 0,
774 		    ("%s: failed to create required interface %s%d",
775 		    __func__, ifc->ifc_name, unit));
776 
777 		/* Allocate the unit in the bitmap. */
778 		bytoff = unit >> 3;
779 		bitoff = unit - (bytoff << 3);
780 		ifc->ifc_units[bytoff] |= (1 << bitoff);
781 	}
782 }
783 
784 /*
785  * Unregister a network interface cloner.
786  */
787 void
788 if_clone_detach(ifc)
789 	struct if_clone *ifc;
790 {
791 
792 	LIST_REMOVE(ifc, ifc_list);
793 	free(ifc->ifc_units, M_CLONE);
794 	if_cloners_count--;
795 }
796 
797 /*
798  * Provide list of interface cloners to userspace.
799  */
800 static int
801 if_clone_list(ifcr)
802 	struct if_clonereq *ifcr;
803 {
804 	char outbuf[IFNAMSIZ], *dst;
805 	struct if_clone *ifc;
806 	int count, error = 0;
807 
808 	ifcr->ifcr_total = if_cloners_count;
809 	if ((dst = ifcr->ifcr_buffer) == NULL) {
810 		/* Just asking how many there are. */
811 		return (0);
812 	}
813 
814 	if (ifcr->ifcr_count < 0)
815 		return (EINVAL);
816 
817 	count = (if_cloners_count < ifcr->ifcr_count) ?
818 	    if_cloners_count : ifcr->ifcr_count;
819 
820 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
821 	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
822 		strncpy(outbuf, ifc->ifc_name, IFNAMSIZ);
823 		outbuf[IFNAMSIZ - 1] = '\0';	/* sanity */
824 		error = copyout(outbuf, dst, IFNAMSIZ);
825 		if (error)
826 			break;
827 	}
828 
829 	return (error);
830 }
831 
832 /*
833  * Locate an interface based on a complete address.
834  */
835 /*ARGSUSED*/
836 struct ifaddr *
837 ifa_ifwithaddr(addr)
838 	struct sockaddr *addr;
839 {
840 	struct ifnet *ifp;
841 	struct ifaddr *ifa;
842 
843 #define	equal(a1, a2) \
844   (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
845 	TAILQ_FOREACH(ifp, &ifnet, if_link)
846 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
847 			if (ifa->ifa_addr->sa_family != addr->sa_family)
848 				continue;
849 			if (equal(addr, ifa->ifa_addr))
850 				goto done;
851 			/* IP6 doesn't have broadcast */
852 			if ((ifp->if_flags & IFF_BROADCAST) &&
853 			    ifa->ifa_broadaddr &&
854 			    ifa->ifa_broadaddr->sa_len != 0 &&
855 			    equal(ifa->ifa_broadaddr, addr))
856 				goto done;
857 		}
858 	ifa = NULL;
859 done:
860 	return (ifa);
861 }
862 
863 /*
864  * Locate the point to point interface with a given destination address.
865  */
866 /*ARGSUSED*/
867 struct ifaddr *
868 ifa_ifwithdstaddr(addr)
869 	struct sockaddr *addr;
870 {
871 	struct ifnet *ifp;
872 	struct ifaddr *ifa;
873 
874 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
875 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
876 			continue;
877 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
878 			if (ifa->ifa_addr->sa_family != addr->sa_family)
879 				continue;
880 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
881 				goto done;
882 		}
883 	}
884 	ifa = NULL;
885 done:
886 	return (ifa);
887 }
888 
889 /*
890  * Find an interface on a specific network.  If many, choice
891  * is most specific found.
892  */
893 struct ifaddr *
894 ifa_ifwithnet(addr)
895 	struct sockaddr *addr;
896 {
897 	register struct ifnet *ifp;
898 	register struct ifaddr *ifa;
899 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
900 	u_int af = addr->sa_family;
901 	char *addr_data = addr->sa_data, *cplim;
902 
903 	/*
904 	 * AF_LINK addresses can be looked up directly by their index number,
905 	 * so do that if we can.
906 	 */
907 	if (af == AF_LINK) {
908 	    register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
909 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
910 		return (ifaddr_byindex(sdl->sdl_index));
911 	}
912 
913 	/*
914 	 * Scan though each interface, looking for ones that have
915 	 * addresses in this address family.
916 	 */
917 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
918 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
919 			register char *cp, *cp2, *cp3;
920 
921 			if (ifa->ifa_addr->sa_family != af)
922 next:				continue;
923 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
924 				/*
925 				 * This is a bit broken as it doesn't
926 				 * take into account that the remote end may
927 				 * be a single node in the network we are
928 				 * looking for.
929 				 * The trouble is that we don't know the
930 				 * netmask for the remote end.
931 				 */
932 				if (ifa->ifa_dstaddr != 0
933 				    && equal(addr, ifa->ifa_dstaddr))
934 					goto done;
935 			} else {
936 				/*
937 				 * if we have a special address handler,
938 				 * then use it instead of the generic one.
939 				 */
940 	          		if (ifa->ifa_claim_addr) {
941 					if ((*ifa->ifa_claim_addr)(ifa, addr))
942 						goto done;
943 					continue;
944 				}
945 
946 				/*
947 				 * Scan all the bits in the ifa's address.
948 				 * If a bit dissagrees with what we are
949 				 * looking for, mask it with the netmask
950 				 * to see if it really matters.
951 				 * (A byte at a time)
952 				 */
953 				if (ifa->ifa_netmask == 0)
954 					continue;
955 				cp = addr_data;
956 				cp2 = ifa->ifa_addr->sa_data;
957 				cp3 = ifa->ifa_netmask->sa_data;
958 				cplim = ifa->ifa_netmask->sa_len
959 					+ (char *)ifa->ifa_netmask;
960 				while (cp3 < cplim)
961 					if ((*cp++ ^ *cp2++) & *cp3++)
962 						goto next; /* next address! */
963 				/*
964 				 * If the netmask of what we just found
965 				 * is more specific than what we had before
966 				 * (if we had one) then remember the new one
967 				 * before continuing to search
968 				 * for an even better one.
969 				 */
970 				if (ifa_maybe == 0 ||
971 				    rn_refines((caddr_t)ifa->ifa_netmask,
972 				    (caddr_t)ifa_maybe->ifa_netmask))
973 					ifa_maybe = ifa;
974 			}
975 		}
976 	}
977 	ifa = ifa_maybe;
978 done:
979 	return (ifa);
980 }
981 
982 /*
983  * Find an interface address specific to an interface best matching
984  * a given address.
985  */
986 struct ifaddr *
987 ifaof_ifpforaddr(addr, ifp)
988 	struct sockaddr *addr;
989 	register struct ifnet *ifp;
990 {
991 	register struct ifaddr *ifa;
992 	register char *cp, *cp2, *cp3;
993 	register char *cplim;
994 	struct ifaddr *ifa_maybe = 0;
995 	u_int af = addr->sa_family;
996 
997 	if (af >= AF_MAX)
998 		return (0);
999 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1000 		if (ifa->ifa_addr->sa_family != af)
1001 			continue;
1002 		if (ifa_maybe == 0)
1003 			ifa_maybe = ifa;
1004 		if (ifa->ifa_netmask == 0) {
1005 			if (equal(addr, ifa->ifa_addr) ||
1006 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1007 				goto done;
1008 			continue;
1009 		}
1010 		if (ifp->if_flags & IFF_POINTOPOINT) {
1011 			if (equal(addr, ifa->ifa_dstaddr))
1012 				goto done;
1013 		} else {
1014 			cp = addr->sa_data;
1015 			cp2 = ifa->ifa_addr->sa_data;
1016 			cp3 = ifa->ifa_netmask->sa_data;
1017 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1018 			for (; cp3 < cplim; cp3++)
1019 				if ((*cp++ ^ *cp2++) & *cp3)
1020 					break;
1021 			if (cp3 == cplim)
1022 				goto done;
1023 		}
1024 	}
1025 	ifa = ifa_maybe;
1026 done:
1027 	return (ifa);
1028 }
1029 
1030 #include <net/route.h>
1031 
1032 /*
1033  * Default action when installing a route with a Link Level gateway.
1034  * Lookup an appropriate real ifa to point to.
1035  * This should be moved to /sys/net/link.c eventually.
1036  */
1037 static void
1038 link_rtrequest(cmd, rt, info)
1039 	int cmd;
1040 	register struct rtentry *rt;
1041 	struct rt_addrinfo *info;
1042 {
1043 	register struct ifaddr *ifa;
1044 	struct sockaddr *dst;
1045 	struct ifnet *ifp;
1046 
1047 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1048 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1049 		return;
1050 	ifa = ifaof_ifpforaddr(dst, ifp);
1051 	if (ifa) {
1052 		IFAFREE(rt->rt_ifa);
1053 		rt->rt_ifa = ifa;
1054 		ifa->ifa_refcnt++;
1055 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1056 			ifa->ifa_rtrequest(cmd, rt, info);
1057 	}
1058 }
1059 
1060 /*
1061  * Mark an interface down and notify protocols of
1062  * the transition.
1063  * NOTE: must be called at splnet or eqivalent.
1064  */
1065 void
1066 if_unroute(ifp, flag, fam)
1067 	register struct ifnet *ifp;
1068 	int flag, fam;
1069 {
1070 	register struct ifaddr *ifa;
1071 
1072 	ifp->if_flags &= ~flag;
1073 	getmicrotime(&ifp->if_lastchange);
1074 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1075 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1076 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1077 	if_qflush(&ifp->if_snd);
1078 	rt_ifmsg(ifp);
1079 }
1080 
1081 /*
1082  * Mark an interface up and notify protocols of
1083  * the transition.
1084  * NOTE: must be called at splnet or eqivalent.
1085  */
1086 void
1087 if_route(ifp, flag, fam)
1088 	register struct ifnet *ifp;
1089 	int flag, fam;
1090 {
1091 	register struct ifaddr *ifa;
1092 
1093 	ifp->if_flags |= flag;
1094 	getmicrotime(&ifp->if_lastchange);
1095 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1096 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1097 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1098 	rt_ifmsg(ifp);
1099 #ifdef INET6
1100 	in6_if_up(ifp);
1101 #endif
1102 }
1103 
1104 /*
1105  * Mark an interface down and notify protocols of
1106  * the transition.
1107  * NOTE: must be called at splnet or eqivalent.
1108  */
1109 void
1110 if_down(ifp)
1111 	register struct ifnet *ifp;
1112 {
1113 
1114 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1115 }
1116 
1117 /*
1118  * Mark an interface up and notify protocols of
1119  * the transition.
1120  * NOTE: must be called at splnet or eqivalent.
1121  */
1122 void
1123 if_up(ifp)
1124 	register struct ifnet *ifp;
1125 {
1126 
1127 	if_route(ifp, IFF_UP, AF_UNSPEC);
1128 }
1129 
1130 /*
1131  * Flush an interface queue.
1132  */
1133 static void
1134 if_qflush(ifq)
1135 	register struct ifqueue *ifq;
1136 {
1137 	register struct mbuf *m, *n;
1138 
1139 	n = ifq->ifq_head;
1140 	while ((m = n) != 0) {
1141 		n = m->m_act;
1142 		m_freem(m);
1143 	}
1144 	ifq->ifq_head = 0;
1145 	ifq->ifq_tail = 0;
1146 	ifq->ifq_len = 0;
1147 }
1148 
1149 /*
1150  * Handle interface watchdog timer routines.  Called
1151  * from softclock, we decrement timers (if set) and
1152  * call the appropriate interface routine on expiration.
1153  */
1154 static void
1155 if_slowtimo(arg)
1156 	void *arg;
1157 {
1158 	register struct ifnet *ifp;
1159 	int s = splimp();
1160 
1161 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1162 		if (ifp->if_timer == 0 || --ifp->if_timer)
1163 			continue;
1164 		if (ifp->if_watchdog)
1165 			(*ifp->if_watchdog)(ifp);
1166 	}
1167 	splx(s);
1168 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1169 }
1170 
1171 /*
1172  * Map interface name to
1173  * interface structure pointer.
1174  */
1175 struct ifnet *
1176 ifunit(const char *name)
1177 {
1178 	char namebuf[IFNAMSIZ + 1];
1179 	struct ifnet *ifp;
1180 	dev_t dev;
1181 
1182 	/*
1183 	 * Now search all the interfaces for this name/number
1184 	 */
1185 
1186 	/*
1187 	 * XXX
1188 	 * Devices should really be known as /dev/fooN, not /dev/net/fooN.
1189 	 */
1190 	snprintf(namebuf, IFNAMSIZ, "%s/%s", net_cdevsw.d_name, name);
1191 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1192 		dev = ifdev_byindex(ifp->if_index);
1193 		if (strcmp(devtoname(dev), namebuf) == 0)
1194 			break;
1195 		if (dev_named(dev, name))
1196 			break;
1197 	}
1198 	return (ifp);
1199 }
1200 
1201 /*
1202  * Map interface name in a sockaddr_dl to
1203  * interface structure pointer.
1204  */
1205 struct ifnet *
1206 if_withname(sa)
1207 	struct sockaddr *sa;
1208 {
1209 	char ifname[IFNAMSIZ+1];
1210 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1211 
1212 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1213 	     (sdl->sdl_nlen > IFNAMSIZ) )
1214 		return NULL;
1215 
1216 	/*
1217 	 * ifunit wants a null-terminated name.  It may not be null-terminated
1218 	 * in the sockaddr.  We don't want to change the caller's sockaddr,
1219 	 * and there might not be room to put the trailing null anyway, so we
1220 	 * make a local copy that we know we can null terminate safely.
1221 	 */
1222 
1223 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1224 	ifname[sdl->sdl_nlen] = '\0';
1225 	return ifunit(ifname);
1226 }
1227 
1228 /*
1229  * Hardware specific interface ioctls.
1230  */
1231 static int
1232 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1233 {
1234 	struct ifreq *ifr;
1235 	struct ifstat *ifs;
1236 	int error = 0;
1237 
1238 	ifr = (struct ifreq *)data;
1239 	switch (cmd) {
1240 	case SIOCGIFINDEX:
1241 		ifr->ifr_index = ifp->if_index;
1242 		break;
1243 
1244 	case SIOCGIFFLAGS:
1245 		ifr->ifr_flags = ifp->if_flags;
1246 		break;
1247 
1248 	case SIOCGIFCAP:
1249 		ifr->ifr_reqcap = ifp->if_capabilities;
1250 		ifr->ifr_curcap = ifp->if_capenable;
1251 		break;
1252 
1253 #ifdef MAC
1254 	case SIOCGIFMAC:
1255 		error = mac_ioctl_ifnet_get(td->td_proc->p_ucred, ifr, ifp);
1256 		break;
1257 #endif
1258 
1259 	case SIOCGIFMETRIC:
1260 		ifr->ifr_metric = ifp->if_metric;
1261 		break;
1262 
1263 	case SIOCGIFMTU:
1264 		ifr->ifr_mtu = ifp->if_mtu;
1265 		break;
1266 
1267 	case SIOCGIFPHYS:
1268 		ifr->ifr_phys = ifp->if_physical;
1269 		break;
1270 
1271 	case SIOCSIFFLAGS:
1272 		error = suser(td);
1273 		if (error)
1274 			return (error);
1275 		ifr->ifr_prevflags = ifp->if_flags;
1276 		if (ifp->if_flags & IFF_SMART) {
1277 			/* Smart drivers twiddle their own routes */
1278 		} else if (ifp->if_flags & IFF_UP &&
1279 		    (ifr->ifr_flags & IFF_UP) == 0) {
1280 			int s = splimp();
1281 			if_down(ifp);
1282 			splx(s);
1283 		} else if (ifr->ifr_flags & IFF_UP &&
1284 		    (ifp->if_flags & IFF_UP) == 0) {
1285 			int s = splimp();
1286 			if_up(ifp);
1287 			splx(s);
1288 		}
1289 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1290 			(ifr->ifr_flags &~ IFF_CANTCHANGE);
1291 		if (ifp->if_ioctl)
1292 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1293 		getmicrotime(&ifp->if_lastchange);
1294 		break;
1295 
1296 	case SIOCSIFCAP:
1297 		error = suser(td);
1298 		if (error)
1299 			return (error);
1300 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1301 			return (EINVAL);
1302 		(void) (*ifp->if_ioctl)(ifp, cmd, data);
1303 		break;
1304 
1305 #ifdef MAC
1306 	case SIOCSIFMAC:
1307 		error = mac_ioctl_ifnet_set(td->td_proc->p_ucred, ifr, ifp);
1308 		break;
1309 #endif
1310 
1311 	case SIOCSIFMETRIC:
1312 		error = suser(td);
1313 		if (error)
1314 			return (error);
1315 		ifp->if_metric = ifr->ifr_metric;
1316 		getmicrotime(&ifp->if_lastchange);
1317 		break;
1318 
1319 	case SIOCSIFPHYS:
1320 		error = suser(td);
1321 		if (error)
1322 			return error;
1323 		if (!ifp->if_ioctl)
1324 		        return EOPNOTSUPP;
1325 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1326 		if (error == 0)
1327 			getmicrotime(&ifp->if_lastchange);
1328 		return(error);
1329 
1330 	case SIOCSIFMTU:
1331 	{
1332 		u_long oldmtu = ifp->if_mtu;
1333 
1334 		error = suser(td);
1335 		if (error)
1336 			return (error);
1337 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1338 			return (EINVAL);
1339 		if (ifp->if_ioctl == NULL)
1340 			return (EOPNOTSUPP);
1341 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1342 		if (error == 0) {
1343 			getmicrotime(&ifp->if_lastchange);
1344 			rt_ifmsg(ifp);
1345 		}
1346 		/*
1347 		 * If the link MTU changed, do network layer specific procedure.
1348 		 */
1349 		if (ifp->if_mtu != oldmtu) {
1350 #ifdef INET6
1351 			nd6_setmtu(ifp);
1352 #endif
1353 		}
1354 		break;
1355 	}
1356 
1357 	case SIOCADDMULTI:
1358 	case SIOCDELMULTI:
1359 		error = suser(td);
1360 		if (error)
1361 			return (error);
1362 
1363 		/* Don't allow group membership on non-multicast interfaces. */
1364 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1365 			return (EOPNOTSUPP);
1366 
1367 		/* Don't let users screw up protocols' entries. */
1368 		if (ifr->ifr_addr.sa_family != AF_LINK)
1369 			return (EINVAL);
1370 
1371 		if (cmd == SIOCADDMULTI) {
1372 			struct ifmultiaddr *ifma;
1373 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1374 		} else {
1375 			error = if_delmulti(ifp, &ifr->ifr_addr);
1376 		}
1377 		if (error == 0)
1378 			getmicrotime(&ifp->if_lastchange);
1379 		break;
1380 
1381 	case SIOCSIFPHYADDR:
1382 	case SIOCDIFPHYADDR:
1383 #ifdef INET6
1384 	case SIOCSIFPHYADDR_IN6:
1385 #endif
1386 	case SIOCSLIFPHYADDR:
1387         case SIOCSIFMEDIA:
1388 	case SIOCSIFGENERIC:
1389 		error = suser(td);
1390 		if (error)
1391 			return (error);
1392 		if (ifp->if_ioctl == NULL)
1393 			return (EOPNOTSUPP);
1394 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1395 		if (error == 0)
1396 			getmicrotime(&ifp->if_lastchange);
1397 		break;
1398 
1399 	case SIOCGIFSTATUS:
1400 		ifs = (struct ifstat *)data;
1401 		ifs->ascii[0] = '\0';
1402 
1403 	case SIOCGIFPSRCADDR:
1404 	case SIOCGIFPDSTADDR:
1405 	case SIOCGLIFPHYADDR:
1406 	case SIOCGIFMEDIA:
1407 	case SIOCGIFGENERIC:
1408 		if (ifp->if_ioctl == 0)
1409 			return (EOPNOTSUPP);
1410 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1411 		break;
1412 
1413 	case SIOCSIFLLADDR:
1414 		error = suser(td);
1415 		if (error)
1416 			return (error);
1417 		error = if_setlladdr(ifp,
1418 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1419 		break;
1420 
1421 	default:
1422 		error = ENOIOCTL;
1423 		break;
1424 	}
1425 	return (error);
1426 }
1427 
1428 /*
1429  * Interface ioctls.
1430  */
1431 int
1432 ifioctl(so, cmd, data, td)
1433 	struct socket *so;
1434 	u_long cmd;
1435 	caddr_t data;
1436 	struct thread *td;
1437 {
1438 	struct ifnet *ifp;
1439 	struct ifreq *ifr;
1440 	int error;
1441 	short oif_flags;
1442 
1443 	switch (cmd) {
1444 	case SIOCGIFCONF:
1445 	case OSIOCGIFCONF:
1446 		return (ifconf(cmd, data));
1447 	}
1448 	ifr = (struct ifreq *)data;
1449 
1450 	switch (cmd) {
1451 	case SIOCIFCREATE:
1452 	case SIOCIFDESTROY:
1453 		if ((error = suser(td)) != 0)
1454 			return (error);
1455 		return ((cmd == SIOCIFCREATE) ?
1456 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1457 			if_clone_destroy(ifr->ifr_name));
1458 
1459 	case SIOCIFGCLONERS:
1460 		return (if_clone_list((struct if_clonereq *)data));
1461 	}
1462 
1463 	ifp = ifunit(ifr->ifr_name);
1464 	if (ifp == 0)
1465 		return (ENXIO);
1466 
1467 	error = ifhwioctl(cmd, ifp, data, td);
1468 	if (error != ENOIOCTL)
1469 		return (error);
1470 
1471 	oif_flags = ifp->if_flags;
1472 	if (so->so_proto == 0)
1473 		return (EOPNOTSUPP);
1474 #ifndef COMPAT_43
1475 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1476 								 data,
1477 								 ifp, td));
1478 #else
1479 	{
1480 		int ocmd = cmd;
1481 
1482 		switch (cmd) {
1483 
1484 		case SIOCSIFDSTADDR:
1485 		case SIOCSIFADDR:
1486 		case SIOCSIFBRDADDR:
1487 		case SIOCSIFNETMASK:
1488 #if BYTE_ORDER != BIG_ENDIAN
1489 			if (ifr->ifr_addr.sa_family == 0 &&
1490 			    ifr->ifr_addr.sa_len < 16) {
1491 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1492 				ifr->ifr_addr.sa_len = 16;
1493 			}
1494 #else
1495 			if (ifr->ifr_addr.sa_len == 0)
1496 				ifr->ifr_addr.sa_len = 16;
1497 #endif
1498 			break;
1499 
1500 		case OSIOCGIFADDR:
1501 			cmd = SIOCGIFADDR;
1502 			break;
1503 
1504 		case OSIOCGIFDSTADDR:
1505 			cmd = SIOCGIFDSTADDR;
1506 			break;
1507 
1508 		case OSIOCGIFBRDADDR:
1509 			cmd = SIOCGIFBRDADDR;
1510 			break;
1511 
1512 		case OSIOCGIFNETMASK:
1513 			cmd = SIOCGIFNETMASK;
1514 		}
1515 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1516 								   cmd,
1517 								   data,
1518 								   ifp, td));
1519 		switch (ocmd) {
1520 
1521 		case OSIOCGIFADDR:
1522 		case OSIOCGIFDSTADDR:
1523 		case OSIOCGIFBRDADDR:
1524 		case OSIOCGIFNETMASK:
1525 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1526 
1527 		}
1528 	}
1529 #endif /* COMPAT_43 */
1530 
1531 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1532 #ifdef INET6
1533 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1534 		if (ifp->if_flags & IFF_UP) {
1535 			int s = splimp();
1536 			in6_if_up(ifp);
1537 			splx(s);
1538 		}
1539 #endif
1540 	}
1541 	return (error);
1542 }
1543 
1544 /*
1545  * Set/clear promiscuous mode on interface ifp based on the truth value
1546  * of pswitch.  The calls are reference counted so that only the first
1547  * "on" request actually has an effect, as does the final "off" request.
1548  * Results are undefined if the "off" and "on" requests are not matched.
1549  */
1550 int
1551 ifpromisc(ifp, pswitch)
1552 	struct ifnet *ifp;
1553 	int pswitch;
1554 {
1555 	struct ifreq ifr;
1556 	int error;
1557 	int oldflags, oldpcount;
1558 
1559 	oldpcount = ifp->if_pcount;
1560 	oldflags = ifp->if_flags;
1561 	if (pswitch) {
1562 		/*
1563 		 * If the device is not configured up, we cannot put it in
1564 		 * promiscuous mode.
1565 		 */
1566 		if ((ifp->if_flags & IFF_UP) == 0)
1567 			return (ENETDOWN);
1568 		if (ifp->if_pcount++ != 0)
1569 			return (0);
1570 		ifp->if_flags |= IFF_PROMISC;
1571 	} else {
1572 		if (--ifp->if_pcount > 0)
1573 			return (0);
1574 		ifp->if_flags &= ~IFF_PROMISC;
1575 	}
1576 	ifr.ifr_flags = ifp->if_flags;
1577 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1578 	if (error == 0) {
1579 		log(LOG_INFO, "%s%d: promiscuous mode %s\n",
1580 		    ifp->if_name, ifp->if_unit,
1581 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1582 		rt_ifmsg(ifp);
1583 	} else {
1584 		ifp->if_pcount = oldpcount;
1585 		ifp->if_flags = oldflags;
1586 	}
1587 	return error;
1588 }
1589 
1590 /*
1591  * Return interface configuration
1592  * of system.  List may be used
1593  * in later ioctl's (above) to get
1594  * other information.
1595  */
1596 /*ARGSUSED*/
1597 static int
1598 ifconf(cmd, data)
1599 	u_long cmd;
1600 	caddr_t data;
1601 {
1602 	struct ifconf *ifc = (struct ifconf *)data;
1603 	struct ifnet *ifp;
1604 	struct ifaddr *ifa;
1605 	struct ifreq ifr, *ifrp;
1606 	int space = ifc->ifc_len, error = 0;
1607 
1608 	ifrp = ifc->ifc_req;
1609 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1610 		char workbuf[64];
1611 		int ifnlen, addrs;
1612 
1613 		if (space < sizeof(ifr))
1614 			break;
1615 		ifnlen = snprintf(workbuf, sizeof(workbuf),
1616 		    "%s%d", ifp->if_name, ifp->if_unit);
1617 		if(ifnlen + 1 > sizeof ifr.ifr_name) {
1618 			error = ENAMETOOLONG;
1619 			break;
1620 		} else {
1621 			strcpy(ifr.ifr_name, workbuf);
1622 		}
1623 
1624 		addrs = 0;
1625 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1626 			struct sockaddr *sa = ifa->ifa_addr;
1627 
1628 			if (space < sizeof(ifr))
1629 				break;
1630 			if (jailed(curthread->td_ucred) &&
1631 			    prison_if(curthread->td_ucred, sa))
1632 				continue;
1633 			addrs++;
1634 #ifdef COMPAT_43
1635 			if (cmd == OSIOCGIFCONF) {
1636 				struct osockaddr *osa =
1637 					 (struct osockaddr *)&ifr.ifr_addr;
1638 				ifr.ifr_addr = *sa;
1639 				osa->sa_family = sa->sa_family;
1640 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1641 						sizeof (ifr));
1642 				ifrp++;
1643 			} else
1644 #endif
1645 			if (sa->sa_len <= sizeof(*sa)) {
1646 				ifr.ifr_addr = *sa;
1647 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1648 						sizeof (ifr));
1649 				ifrp++;
1650 			} else {
1651 				if (space < sizeof (ifr) + sa->sa_len -
1652 					    sizeof(*sa))
1653 					break;
1654 				space -= sa->sa_len - sizeof(*sa);
1655 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1656 						sizeof (ifr.ifr_name));
1657 				if (error == 0)
1658 				    error = copyout((caddr_t)sa,
1659 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1660 				ifrp = (struct ifreq *)
1661 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1662 			}
1663 			if (error)
1664 				break;
1665 			space -= sizeof (ifr);
1666 		}
1667 		if (error)
1668 			break;
1669 		if (!addrs) {
1670 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1671 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1672 			    sizeof (ifr));
1673 			if (error)
1674 				break;
1675 			space -= sizeof (ifr);
1676 			ifrp++;
1677 		}
1678 	}
1679 	ifc->ifc_len -= space;
1680 	return (error);
1681 }
1682 
1683 /*
1684  * Just like if_promisc(), but for all-multicast-reception mode.
1685  */
1686 int
1687 if_allmulti(ifp, onswitch)
1688 	struct ifnet *ifp;
1689 	int onswitch;
1690 {
1691 	int error = 0;
1692 	int s = splimp();
1693 	struct ifreq ifr;
1694 
1695 	if (onswitch) {
1696 		if (ifp->if_amcount++ == 0) {
1697 			ifp->if_flags |= IFF_ALLMULTI;
1698 			ifr.ifr_flags = ifp->if_flags;
1699 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1700 		}
1701 	} else {
1702 		if (ifp->if_amcount > 1) {
1703 			ifp->if_amcount--;
1704 		} else {
1705 			ifp->if_amcount = 0;
1706 			ifp->if_flags &= ~IFF_ALLMULTI;
1707 			ifr.ifr_flags = ifp->if_flags;
1708 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1709 		}
1710 	}
1711 	splx(s);
1712 
1713 	if (error == 0)
1714 		rt_ifmsg(ifp);
1715 	return error;
1716 }
1717 
1718 /*
1719  * Add a multicast listenership to the interface in question.
1720  * The link layer provides a routine which converts
1721  */
1722 int
1723 if_addmulti(ifp, sa, retifma)
1724 	struct ifnet *ifp;	/* interface to manipulate */
1725 	struct sockaddr *sa;	/* address to add */
1726 	struct ifmultiaddr **retifma;
1727 {
1728 	struct sockaddr *llsa, *dupsa;
1729 	int error, s;
1730 	struct ifmultiaddr *ifma;
1731 
1732 	/*
1733 	 * If the matching multicast address already exists
1734 	 * then don't add a new one, just add a reference
1735 	 */
1736 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1737 		if (equal(sa, ifma->ifma_addr)) {
1738 			ifma->ifma_refcount++;
1739 			if (retifma)
1740 				*retifma = ifma;
1741 			return 0;
1742 		}
1743 	}
1744 
1745 	/*
1746 	 * Give the link layer a chance to accept/reject it, and also
1747 	 * find out which AF_LINK address this maps to, if it isn't one
1748 	 * already.
1749 	 */
1750 	if (ifp->if_resolvemulti) {
1751 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1752 		if (error) return error;
1753 	} else {
1754 		llsa = 0;
1755 	}
1756 
1757 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1758 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1759 	bcopy(sa, dupsa, sa->sa_len);
1760 
1761 	ifma->ifma_addr = dupsa;
1762 	ifma->ifma_lladdr = llsa;
1763 	ifma->ifma_ifp = ifp;
1764 	ifma->ifma_refcount = 1;
1765 	ifma->ifma_protospec = 0;
1766 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1767 
1768 	/*
1769 	 * Some network interfaces can scan the address list at
1770 	 * interrupt time; lock them out.
1771 	 */
1772 	s = splimp();
1773 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1774 	splx(s);
1775 	if (retifma != NULL)
1776 		*retifma = ifma;
1777 
1778 	if (llsa != 0) {
1779 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1780 			if (equal(ifma->ifma_addr, llsa))
1781 				break;
1782 		}
1783 		if (ifma) {
1784 			ifma->ifma_refcount++;
1785 		} else {
1786 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1787 			       M_IFMADDR, M_WAITOK);
1788 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1789 			       M_IFMADDR, M_WAITOK);
1790 			bcopy(llsa, dupsa, llsa->sa_len);
1791 			ifma->ifma_addr = dupsa;
1792 			ifma->ifma_ifp = ifp;
1793 			ifma->ifma_refcount = 1;
1794 			s = splimp();
1795 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1796 			splx(s);
1797 		}
1798 	}
1799 	/*
1800 	 * We are certain we have added something, so call down to the
1801 	 * interface to let them know about it.
1802 	 */
1803 	s = splimp();
1804 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1805 	splx(s);
1806 
1807 	return 0;
1808 }
1809 
1810 /*
1811  * Remove a reference to a multicast address on this interface.  Yell
1812  * if the request does not match an existing membership.
1813  */
1814 int
1815 if_delmulti(ifp, sa)
1816 	struct ifnet *ifp;
1817 	struct sockaddr *sa;
1818 {
1819 	struct ifmultiaddr *ifma;
1820 	int s;
1821 
1822 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1823 		if (equal(sa, ifma->ifma_addr))
1824 			break;
1825 	if (ifma == 0)
1826 		return ENOENT;
1827 
1828 	if (ifma->ifma_refcount > 1) {
1829 		ifma->ifma_refcount--;
1830 		return 0;
1831 	}
1832 
1833 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1834 	sa = ifma->ifma_lladdr;
1835 	s = splimp();
1836 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1837 	/*
1838 	 * Make sure the interface driver is notified
1839 	 * in the case of a link layer mcast group being left.
1840 	 */
1841 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1842 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1843 	splx(s);
1844 	free(ifma->ifma_addr, M_IFMADDR);
1845 	free(ifma, M_IFMADDR);
1846 	if (sa == 0)
1847 		return 0;
1848 
1849 	/*
1850 	 * Now look for the link-layer address which corresponds to
1851 	 * this network address.  It had been squirreled away in
1852 	 * ifma->ifma_lladdr for this purpose (so we don't have
1853 	 * to call ifp->if_resolvemulti() again), and we saved that
1854 	 * value in sa above.  If some nasty deleted the
1855 	 * link-layer address out from underneath us, we can deal because
1856 	 * the address we stored was is not the same as the one which was
1857 	 * in the record for the link-layer address.  (So we don't complain
1858 	 * in that case.)
1859 	 */
1860 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1861 		if (equal(sa, ifma->ifma_addr))
1862 			break;
1863 	if (ifma == 0)
1864 		return 0;
1865 
1866 	if (ifma->ifma_refcount > 1) {
1867 		ifma->ifma_refcount--;
1868 		return 0;
1869 	}
1870 
1871 	s = splimp();
1872 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1873 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1874 	splx(s);
1875 	free(ifma->ifma_addr, M_IFMADDR);
1876 	free(sa, M_IFMADDR);
1877 	free(ifma, M_IFMADDR);
1878 
1879 	return 0;
1880 }
1881 
1882 /*
1883  * Set the link layer address on an interface.
1884  *
1885  * At this time we only support certain types of interfaces,
1886  * and we don't allow the length of the address to change.
1887  */
1888 int
1889 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1890 {
1891 	struct sockaddr_dl *sdl;
1892 	struct ifaddr *ifa;
1893 	struct ifreq ifr;
1894 
1895 	ifa = ifaddr_byindex(ifp->if_index);
1896 	if (ifa == NULL)
1897 		return (EINVAL);
1898 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1899 	if (sdl == NULL)
1900 		return (EINVAL);
1901 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1902 		return (EINVAL);
1903 	switch (ifp->if_type) {
1904 	case IFT_ETHER:			/* these types use struct arpcom */
1905 	case IFT_FDDI:
1906 	case IFT_XETHER:
1907 	case IFT_ISO88025:
1908 	case IFT_L2VLAN:
1909 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1910 		bcopy(lladdr, LLADDR(sdl), len);
1911 		break;
1912 	default:
1913 		return (ENODEV);
1914 	}
1915 	/*
1916 	 * If the interface is already up, we need
1917 	 * to re-init it in order to reprogram its
1918 	 * address filter.
1919 	 */
1920 	if ((ifp->if_flags & IFF_UP) != 0) {
1921 		ifp->if_flags &= ~IFF_UP;
1922 		ifr.ifr_flags = ifp->if_flags;
1923 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1924 		ifp->if_flags |= IFF_UP;
1925 		ifr.ifr_flags = ifp->if_flags;
1926 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1927 #ifdef INET
1928 		/*
1929 		 * Also send gratuitous ARPs to notify other nodes about
1930 		 * the address change.
1931 		 */
1932 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1933 			if (ifa->ifa_addr != NULL &&
1934 			    ifa->ifa_addr->sa_family == AF_INET)
1935 				arp_ifinit(ifp, ifa);
1936 		}
1937 #endif
1938 	}
1939 	return (0);
1940 }
1941 
1942 struct ifmultiaddr *
1943 ifmaof_ifpforaddr(sa, ifp)
1944 	struct sockaddr *sa;
1945 	struct ifnet *ifp;
1946 {
1947 	struct ifmultiaddr *ifma;
1948 
1949 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1950 		if (equal(ifma->ifma_addr, sa))
1951 			break;
1952 
1953 	return ifma;
1954 }
1955 
1956 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1957 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1958