xref: /freebsd/sys/net/if.c (revision b52b9d56d4e96089873a75f9e29062eec19fabba)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if.c	8.5 (Berkeley) 1/9/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/malloc.h>
44 #include <sys/bus.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/protosw.h>
51 #include <sys/kernel.h>
52 #include <sys/sockio.h>
53 #include <sys/syslog.h>
54 #include <sys/sysctl.h>
55 #include <sys/jail.h>
56 
57 #include <net/if.h>
58 #include <net/if_arp.h>
59 #include <net/if_dl.h>
60 #include <net/if_types.h>
61 #include <net/if_var.h>
62 #include <net/radix.h>
63 #include <net/route.h>
64 
65 #if defined(INET) || defined(INET6)
66 /*XXX*/
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #ifdef INET6
70 #include <netinet6/in6_var.h>
71 #include <netinet6/in6_ifattach.h>
72 #endif
73 #endif
74 #ifdef INET
75 #include <netinet/if_ether.h>
76 #endif
77 
78 static int	ifconf(u_long, caddr_t);
79 static void	if_grow(void);
80 static void	if_init(void *);
81 static void	if_check(void *);
82 static int	if_findindex(struct ifnet *);
83 static void	if_qflush(struct ifqueue *);
84 static void	if_slowtimo(void *);
85 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
86 static int	if_rtdel(struct radix_node *, void *);
87 static struct	if_clone *if_clone_lookup(const char *, int *);
88 static int	if_clone_list(struct if_clonereq *);
89 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
90 #ifdef INET6
91 /*
92  * XXX: declare here to avoid to include many inet6 related files..
93  * should be more generalized?
94  */
95 extern void	nd6_setmtu(struct ifnet *);
96 #endif
97 
98 int	if_index = 0;
99 struct	ifindex_entry *ifindex_table = NULL;
100 int	ifqmaxlen = IFQ_MAXLEN;
101 struct	ifnethead ifnet;	/* depend on static init XXX */
102 int	if_cloners_count;
103 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
104 
105 static int	if_indexlim = 8;
106 static struct	klist ifklist;
107 
108 static void	filt_netdetach(struct knote *kn);
109 static int	filt_netdev(struct knote *kn, long hint);
110 
111 static struct filterops netdev_filtops =
112     { 1, NULL, filt_netdetach, filt_netdev };
113 
114 /*
115  * System initialization
116  */
117 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
118 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
119 
120 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
121 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
122 MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
123 
124 #define CDEV_MAJOR	165
125 
126 static d_open_t		netopen;
127 static d_close_t	netclose;
128 static d_ioctl_t	netioctl;
129 static d_kqfilter_t	netkqfilter;
130 
131 static struct cdevsw net_cdevsw = {
132 	/* open */	netopen,
133 	/* close */	netclose,
134 	/* read */	noread,
135 	/* write */	nowrite,
136 	/* ioctl */	netioctl,
137 	/* poll */	nopoll,
138 	/* mmap */	nommap,
139 	/* strategy */	nostrategy,
140 	/* name */	"net",
141 	/* maj */	CDEV_MAJOR,
142 	/* dump */	nodump,
143 	/* psize */	nopsize,
144 	/* flags */	D_KQFILTER,
145 	/* kqfilter */	netkqfilter,
146 };
147 
148 static int
149 netopen(dev_t dev, int flag, int mode, struct thread *td)
150 {
151 	return (0);
152 }
153 
154 static int
155 netclose(dev_t dev, int flags, int fmt, struct thread *td)
156 {
157 	return (0);
158 }
159 
160 static int
161 netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
162 {
163 	struct ifnet *ifp;
164 	int error, idx;
165 
166 	/* only support interface specific ioctls */
167 	if (IOCGROUP(cmd) != 'i')
168 		return (EOPNOTSUPP);
169 	idx = minor(dev);
170 	if (idx == 0) {
171 		/*
172 		 * special network device, not interface.
173 		 */
174 		if (cmd == SIOCGIFCONF)
175 			return (ifconf(cmd, data));	/* XXX remove cmd */
176 		return (EOPNOTSUPP);
177 	}
178 
179 	ifp = ifnet_byindex(idx);
180 	if (ifp == NULL)
181 		return (ENXIO);
182 
183 	error = ifhwioctl(cmd, ifp, data, td);
184 	if (error == ENOIOCTL)
185 		error = EOPNOTSUPP;
186 	return (error);
187 }
188 
189 static int
190 netkqfilter(dev_t dev, struct knote *kn)
191 {
192 	struct klist *klist;
193 	struct ifnet *ifp;
194 	int idx;
195 
196 	idx = minor(dev);
197 	if (idx == 0) {
198 		klist = &ifklist;
199 	} else {
200 		ifp = ifnet_byindex(idx);
201 		if (ifp == NULL)
202 			return (1);
203 		klist = &ifp->if_klist;
204 	}
205 
206 	switch (kn->kn_filter) {
207 	case EVFILT_NETDEV:
208 		kn->kn_fop = &netdev_filtops;
209 		break;
210 	default:
211 		return (1);
212 	}
213 
214 	kn->kn_hook = (caddr_t)klist;
215 
216 	/* XXX locking? */
217 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
218 
219 	return (0);
220 }
221 
222 static void
223 filt_netdetach(struct knote *kn)
224 {
225 	struct klist *klist = (struct klist *)kn->kn_hook;
226 
227 	if (kn->kn_status & KN_DETACHED)
228 		return;
229 	SLIST_REMOVE(klist, kn, knote, kn_selnext);
230 }
231 
232 static int
233 filt_netdev(struct knote *kn, long hint)
234 {
235 
236 	/*
237 	 * Currently NOTE_EXIT is abused to indicate device detach.
238 	 */
239 	if (hint == NOTE_EXIT) {
240 		kn->kn_data = NOTE_LINKINV;
241                 kn->kn_status |= KN_DETACHED;
242                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
243                 return (1);
244         }
245 	kn->kn_data = hint;			/* current status */
246 	if (kn->kn_sfflags & hint)
247 		kn->kn_fflags |= hint;
248 	return (kn->kn_fflags != 0);
249 }
250 
251 /*
252  * Network interface utility routines.
253  *
254  * Routines with ifa_ifwith* names take sockaddr *'s as
255  * parameters.
256  */
257 /* ARGSUSED*/
258 static void
259 if_init(dummy)
260 	void *dummy;
261 {
262 
263 	TAILQ_INIT(&ifnet);
264 	SLIST_INIT(&ifklist);
265 	if_grow();				/* create initial table */
266 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
267 	    UID_ROOT, GID_WHEEL, 0600, "network");
268 }
269 
270 static void
271 if_grow(void)
272 {
273 	u_int n;
274 	struct ifindex_entry *e;
275 
276 	if_indexlim <<= 1;
277 	n = if_indexlim * sizeof(*e);
278 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
279 	if (ifindex_table != NULL) {
280 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
281 		free((caddr_t)ifindex_table, M_IFADDR);
282 	}
283 	ifindex_table = e;
284 }
285 
286 /* ARGSUSED*/
287 static void
288 if_check(dummy)
289 	void *dummy;
290 {
291 	struct ifnet *ifp;
292 	int s;
293 
294 	s = splimp();
295 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
296 		if (ifp->if_snd.ifq_maxlen == 0) {
297 			printf("%s%d XXX: driver didn't set ifq_maxlen\n",
298 			    ifp->if_name, ifp->if_unit);
299 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
300 		}
301 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
302 			printf("%s%d XXX: driver didn't initialize queue mtx\n",
303 			    ifp->if_name, ifp->if_unit);
304 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
305 			    MTX_NETWORK_LOCK, MTX_DEF);
306 		}
307 	}
308 	splx(s);
309 	if_slowtimo(0);
310 }
311 
312 static int
313 if_findindex(struct ifnet *ifp)
314 {
315 	int i, unit;
316 	char eaddr[18], devname[32];
317 	const char *name, *p;
318 
319 	switch (ifp->if_type) {
320 	case IFT_ETHER:			/* these types use struct arpcom */
321 	case IFT_FDDI:
322 	case IFT_XETHER:
323 	case IFT_ISO88025:
324 	case IFT_L2VLAN:
325 		snprintf(eaddr, 18, "%6D",
326 		    ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":");
327 		break;
328 	default:
329 		eaddr[0] = '\0';
330 		break;
331 	}
332 	snprintf(devname, 32, "%s%d", ifp->if_name, ifp->if_unit);
333 	name = net_cdevsw.d_name;
334 	i = 0;
335 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
336 		if (resource_string_value(name, unit, "ether", &p) == 0)
337 			if (strcmp(p, eaddr) == 0)
338 				goto found;
339 		if (resource_string_value(name, unit, "dev", &p) == 0)
340 			if (strcmp(p, devname) == 0)
341 				goto found;
342 	}
343 	unit = 0;
344 found:
345 	if (unit != 0) {
346 		if (ifaddr_byindex(unit) == NULL)
347 			return (unit);
348 		printf("%s%d in use, cannot hardwire it to %s.\n",
349 		    name, unit, devname);
350 	}
351 	for (unit = 1; ; unit++) {
352 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
353 			continue;
354 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
355 		    resource_string_value(name, unit, "dev", &p) == 0)
356 			continue;
357 		break;
358 	}
359 	return (unit);
360 }
361 
362 /*
363  * Attach an interface to the
364  * list of "active" interfaces.
365  */
366 void
367 if_attach(ifp)
368 	struct ifnet *ifp;
369 {
370 	unsigned socksize, ifasize;
371 	int namelen, masklen;
372 	char workbuf[64];
373 	register struct sockaddr_dl *sdl;
374 	register struct ifaddr *ifa;
375 
376 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
377 	/*
378 	 * XXX -
379 	 * The old code would work if the interface passed a pre-existing
380 	 * chain of ifaddrs to this code.  We don't trust our callers to
381 	 * properly initialize the tailq, however, so we no longer allow
382 	 * this unlikely case.
383 	 */
384 	TAILQ_INIT(&ifp->if_addrhead);
385 	TAILQ_INIT(&ifp->if_prefixhead);
386 	TAILQ_INIT(&ifp->if_multiaddrs);
387 	SLIST_INIT(&ifp->if_klist);
388 	getmicrotime(&ifp->if_lastchange);
389 	ifp->if_index = if_findindex(ifp);
390 	if (ifp->if_index > if_index)
391 		if_index = ifp->if_index;
392 	if (if_index >= if_indexlim)
393 		if_grow();
394 
395 	ifnet_byindex(ifp->if_index) = ifp;
396 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw, ifp->if_index,
397 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s%d",
398 	    net_cdevsw.d_name, ifp->if_name, ifp->if_unit);
399 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
400 	    net_cdevsw.d_name, ifp->if_index);
401 
402 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_name, "if send queue", MTX_DEF);
403 
404 	/*
405 	 * create a Link Level name for this device
406 	 */
407 	namelen = snprintf(workbuf, sizeof(workbuf),
408 	    "%s%d", ifp->if_name, ifp->if_unit);
409 #define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
410 	masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
411 	socksize = masklen + ifp->if_addrlen;
412 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
413 	if (socksize < sizeof(*sdl))
414 		socksize = sizeof(*sdl);
415 	socksize = ROUNDUP(socksize);
416 	ifasize = sizeof(*ifa) + 2 * socksize;
417 	ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
418 	if (ifa) {
419 		sdl = (struct sockaddr_dl *)(ifa + 1);
420 		sdl->sdl_len = socksize;
421 		sdl->sdl_family = AF_LINK;
422 		bcopy(workbuf, sdl->sdl_data, namelen);
423 		sdl->sdl_nlen = namelen;
424 		sdl->sdl_index = ifp->if_index;
425 		sdl->sdl_type = ifp->if_type;
426 		ifaddr_byindex(ifp->if_index) = ifa;
427 		ifa->ifa_ifp = ifp;
428 		ifa->ifa_rtrequest = link_rtrequest;
429 		ifa->ifa_addr = (struct sockaddr *)sdl;
430 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
431 		ifa->ifa_netmask = (struct sockaddr *)sdl;
432 		sdl->sdl_len = masklen;
433 		while (namelen != 0)
434 			sdl->sdl_data[--namelen] = 0xff;
435 		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
436 	}
437 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
438 
439 	/* Announce the interface. */
440 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
441 }
442 
443 /*
444  * Detach an interface, removing it from the
445  * list of "active" interfaces.
446  */
447 void
448 if_detach(ifp)
449 	struct ifnet *ifp;
450 {
451 	struct ifaddr *ifa;
452 	struct radix_node_head	*rnh;
453 	int s;
454 	int i;
455 
456 	/*
457 	 * Remove routes and flush queues.
458 	 */
459 	s = splnet();
460 	if_down(ifp);
461 
462 	/*
463 	 * Remove address from ifindex_table[] and maybe decrement if_index.
464 	 * Clean up all addresses.
465 	 */
466 	ifaddr_byindex(ifp->if_index) = NULL;
467 	revoke_and_destroy_dev(ifdev_byindex(ifp->if_index));
468 	ifdev_byindex(ifp->if_index) = NULL;
469 
470 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
471 		if_index--;
472 
473 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
474 	     ifa = TAILQ_FIRST(&ifp->if_addrhead)) {
475 #ifdef INET
476 		/* XXX: Ugly!! ad hoc just for INET */
477 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
478 			struct ifaliasreq ifr;
479 
480 			bzero(&ifr, sizeof(ifr));
481 			ifr.ifra_addr = *ifa->ifa_addr;
482 			if (ifa->ifa_dstaddr)
483 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
484 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
485 			    NULL) == 0)
486 				continue;
487 		}
488 #endif /* INET */
489 #ifdef INET6
490 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
491 			in6_purgeaddr(ifa);
492 			/* ifp_addrhead is already updated */
493 			continue;
494 		}
495 #endif /* INET6 */
496 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
497 		IFAFREE(ifa);
498 	}
499 
500 #ifdef INET6
501 	/*
502 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
503 	 * before removing routing entries below, since IPv6 interface direct
504 	 * routes are expected to be removed by the IPv6-specific kernel API.
505 	 * Otherwise, the kernel will detect some inconsistency and bark it.
506 	 */
507 	in6_ifdetach(ifp);
508 #endif
509 
510 	/*
511 	 * Delete all remaining routes using this interface
512 	 * Unfortuneatly the only way to do this is to slog through
513 	 * the entire routing table looking for routes which point
514 	 * to this interface...oh well...
515 	 */
516 	for (i = 1; i <= AF_MAX; i++) {
517 		if ((rnh = rt_tables[i]) == NULL)
518 			continue;
519 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
520 	}
521 
522 	/* Announce that the interface is gone. */
523 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
524 
525 	KNOTE(&ifp->if_klist, NOTE_EXIT);
526 	TAILQ_REMOVE(&ifnet, ifp, if_link);
527 	mtx_destroy(&ifp->if_snd.ifq_mtx);
528 	splx(s);
529 }
530 
531 /*
532  * Delete Routes for a Network Interface
533  *
534  * Called for each routing entry via the rnh->rnh_walktree() call above
535  * to delete all route entries referencing a detaching network interface.
536  *
537  * Arguments:
538  *	rn	pointer to node in the routing table
539  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
540  *
541  * Returns:
542  *	0	successful
543  *	errno	failed - reason indicated
544  *
545  */
546 static int
547 if_rtdel(rn, arg)
548 	struct radix_node	*rn;
549 	void			*arg;
550 {
551 	struct rtentry	*rt = (struct rtentry *)rn;
552 	struct ifnet	*ifp = arg;
553 	int		err;
554 
555 	if (rt->rt_ifp == ifp) {
556 
557 		/*
558 		 * Protect (sorta) against walktree recursion problems
559 		 * with cloned routes
560 		 */
561 		if ((rt->rt_flags & RTF_UP) == 0)
562 			return (0);
563 
564 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
565 				rt_mask(rt), rt->rt_flags,
566 				(struct rtentry **) NULL);
567 		if (err) {
568 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
569 		}
570 	}
571 
572 	return (0);
573 }
574 
575 /*
576  * Create a clone network interface.
577  */
578 int
579 if_clone_create(name, len)
580 	char *name;
581 	int len;
582 {
583 	struct if_clone *ifc;
584 	char *dp;
585 	int wildcard, bytoff, bitoff;
586 	int unit;
587 	int err;
588 
589 	ifc = if_clone_lookup(name, &unit);
590 	if (ifc == NULL)
591 		return (EINVAL);
592 
593 	if (ifunit(name) != NULL)
594 		return (EEXIST);
595 
596 	bytoff = bitoff = 0;
597 	wildcard = (unit < 0);
598 	/*
599 	 * Find a free unit if none was given.
600 	 */
601 	if (wildcard) {
602 		while ((bytoff < ifc->ifc_bmlen)
603 		    && (ifc->ifc_units[bytoff] == 0xff))
604 			bytoff++;
605 		if (bytoff >= ifc->ifc_bmlen)
606 			return (ENOSPC);
607 		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
608 			bitoff++;
609 		unit = (bytoff << 3) + bitoff;
610 	}
611 
612 	if (unit > ifc->ifc_maxunit)
613 		return (ENXIO);
614 
615 	err = (*ifc->ifc_create)(ifc, unit);
616 	if (err != 0)
617 		return (err);
618 
619 	if (!wildcard) {
620 		bytoff = unit >> 3;
621 		bitoff = unit - (bytoff << 3);
622 	}
623 
624 	/*
625 	 * Allocate the unit in the bitmap.
626 	 */
627 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
628 	    ("%s: bit is already set", __func__));
629 	ifc->ifc_units[bytoff] |= (1 << bitoff);
630 
631 	/* In the wildcard case, we need to update the name. */
632 	if (wildcard) {
633 		for (dp = name; *dp != '\0'; dp++);
634 		if (snprintf(dp, len - (dp-name), "%d", unit) >
635 		    len - (dp-name) - 1) {
636 			/*
637 			 * This can only be a programmer error and
638 			 * there's no straightforward way to recover if
639 			 * it happens.
640 			 */
641 			panic("if_clone_create(): interface name too long");
642 		}
643 
644 	}
645 
646 	return (0);
647 }
648 
649 /*
650  * Destroy a clone network interface.
651  */
652 int
653 if_clone_destroy(name)
654 	const char *name;
655 {
656 	struct if_clone *ifc;
657 	struct ifnet *ifp;
658 	int bytoff, bitoff;
659 	int unit;
660 
661 	ifc = if_clone_lookup(name, &unit);
662 	if (ifc == NULL)
663 		return (EINVAL);
664 
665 	if (unit < ifc->ifc_minifs)
666 		return (EINVAL);
667 
668 	ifp = ifunit(name);
669 	if (ifp == NULL)
670 		return (ENXIO);
671 
672 	if (ifc->ifc_destroy == NULL)
673 		return (EOPNOTSUPP);
674 
675 	(*ifc->ifc_destroy)(ifp);
676 
677 	/*
678 	 * Compute offset in the bitmap and deallocate the unit.
679 	 */
680 	bytoff = unit >> 3;
681 	bitoff = unit - (bytoff << 3);
682 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
683 	    ("%s: bit is already cleared", __func__));
684 	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
685 	return (0);
686 }
687 
688 /*
689  * Look up a network interface cloner.
690  */
691 static struct if_clone *
692 if_clone_lookup(name, unitp)
693 	const char *name;
694 	int *unitp;
695 {
696 	struct if_clone *ifc;
697 	const char *cp;
698 	int i;
699 
700 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
701 		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
702 			if (ifc->ifc_name[i] != *cp)
703 				goto next_ifc;
704 		}
705 		goto found_name;
706  next_ifc:
707 		ifc = LIST_NEXT(ifc, ifc_list);
708 	}
709 
710 	/* No match. */
711 	return ((struct if_clone *)NULL);
712 
713  found_name:
714 	if (*cp == '\0') {
715 		i = -1;
716 	} else {
717 		for (i = 0; *cp != '\0'; cp++) {
718 			if (*cp < '0' || *cp > '9') {
719 				/* Bogus unit number. */
720 				return (NULL);
721 			}
722 			i = (i * 10) + (*cp - '0');
723 		}
724 	}
725 
726 	if (unitp != NULL)
727 		*unitp = i;
728 	return (ifc);
729 }
730 
731 /*
732  * Register a network interface cloner.
733  */
734 void
735 if_clone_attach(ifc)
736 	struct if_clone *ifc;
737 {
738 	int bytoff, bitoff;
739 	int err;
740 	int len, maxclone;
741 	int unit;
742 
743 	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
744 	    ("%s: %s requested more units then allowed (%d > %d)",
745 	    __func__, ifc->ifc_name, ifc->ifc_minifs,
746 	    ifc->ifc_maxunit + 1));
747 	/*
748 	 * Compute bitmap size and allocate it.
749 	 */
750 	maxclone = ifc->ifc_maxunit + 1;
751 	len = maxclone >> 3;
752 	if ((len << 3) < maxclone)
753 		len++;
754 	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
755 	ifc->ifc_bmlen = len;
756 
757 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
758 	if_cloners_count++;
759 
760 	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
761 		err = (*ifc->ifc_create)(ifc, unit);
762 		KASSERT(err == 0,
763 		    ("%s: failed to create required interface %s%d",
764 		    __func__, ifc->ifc_name, unit));
765 
766 		/* Allocate the unit in the bitmap. */
767 		bytoff = unit >> 3;
768 		bitoff = unit - (bytoff << 3);
769 		ifc->ifc_units[bytoff] |= (1 << bitoff);
770 	}
771 }
772 
773 /*
774  * Unregister a network interface cloner.
775  */
776 void
777 if_clone_detach(ifc)
778 	struct if_clone *ifc;
779 {
780 
781 	LIST_REMOVE(ifc, ifc_list);
782 	free(ifc->ifc_units, M_CLONE);
783 	if_cloners_count--;
784 }
785 
786 /*
787  * Provide list of interface cloners to userspace.
788  */
789 static int
790 if_clone_list(ifcr)
791 	struct if_clonereq *ifcr;
792 {
793 	char outbuf[IFNAMSIZ], *dst;
794 	struct if_clone *ifc;
795 	int count, error = 0;
796 
797 	ifcr->ifcr_total = if_cloners_count;
798 	if ((dst = ifcr->ifcr_buffer) == NULL) {
799 		/* Just asking how many there are. */
800 		return (0);
801 	}
802 
803 	if (ifcr->ifcr_count < 0)
804 		return (EINVAL);
805 
806 	count = (if_cloners_count < ifcr->ifcr_count) ?
807 	    if_cloners_count : ifcr->ifcr_count;
808 
809 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
810 	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
811 		strncpy(outbuf, ifc->ifc_name, IFNAMSIZ);
812 		outbuf[IFNAMSIZ - 1] = '\0';	/* sanity */
813 		error = copyout(outbuf, dst, IFNAMSIZ);
814 		if (error)
815 			break;
816 	}
817 
818 	return (error);
819 }
820 
821 /*
822  * Locate an interface based on a complete address.
823  */
824 /*ARGSUSED*/
825 struct ifaddr *
826 ifa_ifwithaddr(addr)
827 	struct sockaddr *addr;
828 {
829 	struct ifnet *ifp;
830 	struct ifaddr *ifa;
831 
832 #define	equal(a1, a2) \
833   (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
834 	TAILQ_FOREACH(ifp, &ifnet, if_link)
835 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
836 			if (ifa->ifa_addr->sa_family != addr->sa_family)
837 				continue;
838 			if (equal(addr, ifa->ifa_addr))
839 				goto done;
840 			/* IP6 doesn't have broadcast */
841 			if ((ifp->if_flags & IFF_BROADCAST) &&
842 			    ifa->ifa_broadaddr &&
843 			    ifa->ifa_broadaddr->sa_len != 0 &&
844 			    equal(ifa->ifa_broadaddr, addr))
845 				goto done;
846 		}
847 	ifa = NULL;
848 done:
849 	return (ifa);
850 }
851 
852 /*
853  * Locate the point to point interface with a given destination address.
854  */
855 /*ARGSUSED*/
856 struct ifaddr *
857 ifa_ifwithdstaddr(addr)
858 	struct sockaddr *addr;
859 {
860 	struct ifnet *ifp;
861 	struct ifaddr *ifa;
862 
863 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
864 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
865 			continue;
866 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
867 			if (ifa->ifa_addr->sa_family != addr->sa_family)
868 				continue;
869 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
870 				goto done;
871 		}
872 	}
873 	ifa = NULL;
874 done:
875 	return (ifa);
876 }
877 
878 /*
879  * Find an interface on a specific network.  If many, choice
880  * is most specific found.
881  */
882 struct ifaddr *
883 ifa_ifwithnet(addr)
884 	struct sockaddr *addr;
885 {
886 	register struct ifnet *ifp;
887 	register struct ifaddr *ifa;
888 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
889 	u_int af = addr->sa_family;
890 	char *addr_data = addr->sa_data, *cplim;
891 
892 	/*
893 	 * AF_LINK addresses can be looked up directly by their index number,
894 	 * so do that if we can.
895 	 */
896 	if (af == AF_LINK) {
897 	    register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
898 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
899 		return (ifaddr_byindex(sdl->sdl_index));
900 	}
901 
902 	/*
903 	 * Scan though each interface, looking for ones that have
904 	 * addresses in this address family.
905 	 */
906 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
907 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
908 			register char *cp, *cp2, *cp3;
909 
910 			if (ifa->ifa_addr->sa_family != af)
911 next:				continue;
912 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
913 				/*
914 				 * This is a bit broken as it doesn't
915 				 * take into account that the remote end may
916 				 * be a single node in the network we are
917 				 * looking for.
918 				 * The trouble is that we don't know the
919 				 * netmask for the remote end.
920 				 */
921 				if (ifa->ifa_dstaddr != 0
922 				    && equal(addr, ifa->ifa_dstaddr))
923 					goto done;
924 			} else {
925 				/*
926 				 * if we have a special address handler,
927 				 * then use it instead of the generic one.
928 				 */
929 	          		if (ifa->ifa_claim_addr) {
930 					if ((*ifa->ifa_claim_addr)(ifa, addr))
931 						goto done;
932 					continue;
933 				}
934 
935 				/*
936 				 * Scan all the bits in the ifa's address.
937 				 * If a bit dissagrees with what we are
938 				 * looking for, mask it with the netmask
939 				 * to see if it really matters.
940 				 * (A byte at a time)
941 				 */
942 				if (ifa->ifa_netmask == 0)
943 					continue;
944 				cp = addr_data;
945 				cp2 = ifa->ifa_addr->sa_data;
946 				cp3 = ifa->ifa_netmask->sa_data;
947 				cplim = ifa->ifa_netmask->sa_len
948 					+ (char *)ifa->ifa_netmask;
949 				while (cp3 < cplim)
950 					if ((*cp++ ^ *cp2++) & *cp3++)
951 						goto next; /* next address! */
952 				/*
953 				 * If the netmask of what we just found
954 				 * is more specific than what we had before
955 				 * (if we had one) then remember the new one
956 				 * before continuing to search
957 				 * for an even better one.
958 				 */
959 				if (ifa_maybe == 0 ||
960 				    rn_refines((caddr_t)ifa->ifa_netmask,
961 				    (caddr_t)ifa_maybe->ifa_netmask))
962 					ifa_maybe = ifa;
963 			}
964 		}
965 	}
966 	ifa = ifa_maybe;
967 done:
968 	return (ifa);
969 }
970 
971 /*
972  * Find an interface address specific to an interface best matching
973  * a given address.
974  */
975 struct ifaddr *
976 ifaof_ifpforaddr(addr, ifp)
977 	struct sockaddr *addr;
978 	register struct ifnet *ifp;
979 {
980 	register struct ifaddr *ifa;
981 	register char *cp, *cp2, *cp3;
982 	register char *cplim;
983 	struct ifaddr *ifa_maybe = 0;
984 	u_int af = addr->sa_family;
985 
986 	if (af >= AF_MAX)
987 		return (0);
988 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
989 		if (ifa->ifa_addr->sa_family != af)
990 			continue;
991 		if (ifa_maybe == 0)
992 			ifa_maybe = ifa;
993 		if (ifa->ifa_netmask == 0) {
994 			if (equal(addr, ifa->ifa_addr) ||
995 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
996 				goto done;
997 			continue;
998 		}
999 		if (ifp->if_flags & IFF_POINTOPOINT) {
1000 			if (equal(addr, ifa->ifa_dstaddr))
1001 				goto done;
1002 		} else {
1003 			cp = addr->sa_data;
1004 			cp2 = ifa->ifa_addr->sa_data;
1005 			cp3 = ifa->ifa_netmask->sa_data;
1006 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1007 			for (; cp3 < cplim; cp3++)
1008 				if ((*cp++ ^ *cp2++) & *cp3)
1009 					break;
1010 			if (cp3 == cplim)
1011 				goto done;
1012 		}
1013 	}
1014 	ifa = ifa_maybe;
1015 done:
1016 	return (ifa);
1017 }
1018 
1019 #include <net/route.h>
1020 
1021 /*
1022  * Default action when installing a route with a Link Level gateway.
1023  * Lookup an appropriate real ifa to point to.
1024  * This should be moved to /sys/net/link.c eventually.
1025  */
1026 static void
1027 link_rtrequest(cmd, rt, info)
1028 	int cmd;
1029 	register struct rtentry *rt;
1030 	struct rt_addrinfo *info;
1031 {
1032 	register struct ifaddr *ifa;
1033 	struct sockaddr *dst;
1034 	struct ifnet *ifp;
1035 
1036 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1037 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1038 		return;
1039 	ifa = ifaof_ifpforaddr(dst, ifp);
1040 	if (ifa) {
1041 		IFAFREE(rt->rt_ifa);
1042 		rt->rt_ifa = ifa;
1043 		ifa->ifa_refcnt++;
1044 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1045 			ifa->ifa_rtrequest(cmd, rt, info);
1046 	}
1047 }
1048 
1049 /*
1050  * Mark an interface down and notify protocols of
1051  * the transition.
1052  * NOTE: must be called at splnet or eqivalent.
1053  */
1054 void
1055 if_unroute(ifp, flag, fam)
1056 	register struct ifnet *ifp;
1057 	int flag, fam;
1058 {
1059 	register struct ifaddr *ifa;
1060 
1061 	ifp->if_flags &= ~flag;
1062 	getmicrotime(&ifp->if_lastchange);
1063 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1064 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1065 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1066 	if_qflush(&ifp->if_snd);
1067 	rt_ifmsg(ifp);
1068 }
1069 
1070 /*
1071  * Mark an interface up and notify protocols of
1072  * the transition.
1073  * NOTE: must be called at splnet or eqivalent.
1074  */
1075 void
1076 if_route(ifp, flag, fam)
1077 	register struct ifnet *ifp;
1078 	int flag, fam;
1079 {
1080 	register struct ifaddr *ifa;
1081 
1082 	ifp->if_flags |= flag;
1083 	getmicrotime(&ifp->if_lastchange);
1084 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1085 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1086 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1087 	rt_ifmsg(ifp);
1088 #ifdef INET6
1089 	in6_if_up(ifp);
1090 #endif
1091 }
1092 
1093 /*
1094  * Mark an interface down and notify protocols of
1095  * the transition.
1096  * NOTE: must be called at splnet or eqivalent.
1097  */
1098 void
1099 if_down(ifp)
1100 	register struct ifnet *ifp;
1101 {
1102 
1103 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1104 }
1105 
1106 /*
1107  * Mark an interface up and notify protocols of
1108  * the transition.
1109  * NOTE: must be called at splnet or eqivalent.
1110  */
1111 void
1112 if_up(ifp)
1113 	register struct ifnet *ifp;
1114 {
1115 
1116 	if_route(ifp, IFF_UP, AF_UNSPEC);
1117 }
1118 
1119 /*
1120  * Flush an interface queue.
1121  */
1122 static void
1123 if_qflush(ifq)
1124 	register struct ifqueue *ifq;
1125 {
1126 	register struct mbuf *m, *n;
1127 
1128 	n = ifq->ifq_head;
1129 	while ((m = n) != 0) {
1130 		n = m->m_act;
1131 		m_freem(m);
1132 	}
1133 	ifq->ifq_head = 0;
1134 	ifq->ifq_tail = 0;
1135 	ifq->ifq_len = 0;
1136 }
1137 
1138 /*
1139  * Handle interface watchdog timer routines.  Called
1140  * from softclock, we decrement timers (if set) and
1141  * call the appropriate interface routine on expiration.
1142  */
1143 static void
1144 if_slowtimo(arg)
1145 	void *arg;
1146 {
1147 	register struct ifnet *ifp;
1148 	int s = splimp();
1149 
1150 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1151 		if (ifp->if_timer == 0 || --ifp->if_timer)
1152 			continue;
1153 		if (ifp->if_watchdog)
1154 			(*ifp->if_watchdog)(ifp);
1155 	}
1156 	splx(s);
1157 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1158 }
1159 
1160 /*
1161  * Map interface name to
1162  * interface structure pointer.
1163  */
1164 struct ifnet *
1165 ifunit(const char *name)
1166 {
1167 	char namebuf[IFNAMSIZ + 1];
1168 	struct ifnet *ifp;
1169 	dev_t dev;
1170 
1171 	/*
1172 	 * Now search all the interfaces for this name/number
1173 	 */
1174 
1175 	/*
1176 	 * XXX
1177 	 * Devices should really be known as /dev/fooN, not /dev/net/fooN.
1178 	 */
1179 	snprintf(namebuf, IFNAMSIZ, "%s/%s", net_cdevsw.d_name, name);
1180 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1181 		dev = ifdev_byindex(ifp->if_index);
1182 		if (strcmp(devtoname(dev), namebuf) == 0)
1183 			break;
1184 		if (dev_named(dev, name))
1185 			break;
1186 	}
1187 	return (ifp);
1188 }
1189 
1190 /*
1191  * Map interface name in a sockaddr_dl to
1192  * interface structure pointer.
1193  */
1194 struct ifnet *
1195 if_withname(sa)
1196 	struct sockaddr *sa;
1197 {
1198 	char ifname[IFNAMSIZ+1];
1199 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1200 
1201 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1202 	     (sdl->sdl_nlen > IFNAMSIZ) )
1203 		return NULL;
1204 
1205 	/*
1206 	 * ifunit wants a null-terminated name.  It may not be null-terminated
1207 	 * in the sockaddr.  We don't want to change the caller's sockaddr,
1208 	 * and there might not be room to put the trailing null anyway, so we
1209 	 * make a local copy that we know we can null terminate safely.
1210 	 */
1211 
1212 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1213 	ifname[sdl->sdl_nlen] = '\0';
1214 	return ifunit(ifname);
1215 }
1216 
1217 /*
1218  * Hardware specific interface ioctls.
1219  */
1220 static int
1221 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1222 {
1223 	struct ifreq *ifr;
1224 	struct ifstat *ifs;
1225 	int error = 0;
1226 
1227 	ifr = (struct ifreq *)data;
1228 	switch (cmd) {
1229 	case SIOCGIFINDEX:
1230 		ifr->ifr_index = ifp->if_index;
1231 		break;
1232 
1233 	case SIOCGIFFLAGS:
1234 		ifr->ifr_flags = ifp->if_flags;
1235 		break;
1236 
1237 	case SIOCGIFCAP:
1238 		ifr->ifr_reqcap = ifp->if_capabilities;
1239 		ifr->ifr_curcap = ifp->if_capenable;
1240 		break;
1241 
1242 	case SIOCGIFMETRIC:
1243 		ifr->ifr_metric = ifp->if_metric;
1244 		break;
1245 
1246 	case SIOCGIFMTU:
1247 		ifr->ifr_mtu = ifp->if_mtu;
1248 		break;
1249 
1250 	case SIOCGIFPHYS:
1251 		ifr->ifr_phys = ifp->if_physical;
1252 		break;
1253 
1254 	case SIOCSIFFLAGS:
1255 		error = suser(td);
1256 		if (error)
1257 			return (error);
1258 		ifr->ifr_prevflags = ifp->if_flags;
1259 		if (ifp->if_flags & IFF_SMART) {
1260 			/* Smart drivers twiddle their own routes */
1261 		} else if (ifp->if_flags & IFF_UP &&
1262 		    (ifr->ifr_flags & IFF_UP) == 0) {
1263 			int s = splimp();
1264 			if_down(ifp);
1265 			splx(s);
1266 		} else if (ifr->ifr_flags & IFF_UP &&
1267 		    (ifp->if_flags & IFF_UP) == 0) {
1268 			int s = splimp();
1269 			if_up(ifp);
1270 			splx(s);
1271 		}
1272 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1273 			(ifr->ifr_flags &~ IFF_CANTCHANGE);
1274 		if (ifp->if_ioctl)
1275 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1276 		getmicrotime(&ifp->if_lastchange);
1277 		break;
1278 
1279 	case SIOCSIFCAP:
1280 		error = suser(td);
1281 		if (error)
1282 			return (error);
1283 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1284 			return (EINVAL);
1285 		(void) (*ifp->if_ioctl)(ifp, cmd, data);
1286 		break;
1287 
1288 	case SIOCSIFMETRIC:
1289 		error = suser(td);
1290 		if (error)
1291 			return (error);
1292 		ifp->if_metric = ifr->ifr_metric;
1293 		getmicrotime(&ifp->if_lastchange);
1294 		break;
1295 
1296 	case SIOCSIFPHYS:
1297 		error = suser(td);
1298 		if (error)
1299 			return error;
1300 		if (!ifp->if_ioctl)
1301 		        return EOPNOTSUPP;
1302 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1303 		if (error == 0)
1304 			getmicrotime(&ifp->if_lastchange);
1305 		return(error);
1306 
1307 	case SIOCSIFMTU:
1308 	{
1309 		u_long oldmtu = ifp->if_mtu;
1310 
1311 		error = suser(td);
1312 		if (error)
1313 			return (error);
1314 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1315 			return (EINVAL);
1316 		if (ifp->if_ioctl == NULL)
1317 			return (EOPNOTSUPP);
1318 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1319 		if (error == 0) {
1320 			getmicrotime(&ifp->if_lastchange);
1321 			rt_ifmsg(ifp);
1322 		}
1323 		/*
1324 		 * If the link MTU changed, do network layer specific procedure.
1325 		 */
1326 		if (ifp->if_mtu != oldmtu) {
1327 #ifdef INET6
1328 			nd6_setmtu(ifp);
1329 #endif
1330 		}
1331 		break;
1332 	}
1333 
1334 	case SIOCADDMULTI:
1335 	case SIOCDELMULTI:
1336 		error = suser(td);
1337 		if (error)
1338 			return (error);
1339 
1340 		/* Don't allow group membership on non-multicast interfaces. */
1341 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1342 			return (EOPNOTSUPP);
1343 
1344 		/* Don't let users screw up protocols' entries. */
1345 		if (ifr->ifr_addr.sa_family != AF_LINK)
1346 			return (EINVAL);
1347 
1348 		if (cmd == SIOCADDMULTI) {
1349 			struct ifmultiaddr *ifma;
1350 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1351 		} else {
1352 			error = if_delmulti(ifp, &ifr->ifr_addr);
1353 		}
1354 		if (error == 0)
1355 			getmicrotime(&ifp->if_lastchange);
1356 		break;
1357 
1358 	case SIOCSIFPHYADDR:
1359 	case SIOCDIFPHYADDR:
1360 #ifdef INET6
1361 	case SIOCSIFPHYADDR_IN6:
1362 #endif
1363 	case SIOCSLIFPHYADDR:
1364         case SIOCSIFMEDIA:
1365 	case SIOCSIFGENERIC:
1366 		error = suser(td);
1367 		if (error)
1368 			return (error);
1369 		if (ifp->if_ioctl == NULL)
1370 			return (EOPNOTSUPP);
1371 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1372 		if (error == 0)
1373 			getmicrotime(&ifp->if_lastchange);
1374 		break;
1375 
1376 	case SIOCGIFSTATUS:
1377 		ifs = (struct ifstat *)data;
1378 		ifs->ascii[0] = '\0';
1379 
1380 	case SIOCGIFPSRCADDR:
1381 	case SIOCGIFPDSTADDR:
1382 	case SIOCGLIFPHYADDR:
1383 	case SIOCGIFMEDIA:
1384 	case SIOCGIFGENERIC:
1385 		if (ifp->if_ioctl == 0)
1386 			return (EOPNOTSUPP);
1387 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1388 		break;
1389 
1390 	case SIOCSIFLLADDR:
1391 		error = suser(td);
1392 		if (error)
1393 			return (error);
1394 		error = if_setlladdr(ifp,
1395 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1396 		break;
1397 
1398 	default:
1399 		error = ENOIOCTL;
1400 		break;
1401 	}
1402 	return (error);
1403 }
1404 
1405 /*
1406  * Interface ioctls.
1407  */
1408 int
1409 ifioctl(so, cmd, data, td)
1410 	struct socket *so;
1411 	u_long cmd;
1412 	caddr_t data;
1413 	struct thread *td;
1414 {
1415 	struct ifnet *ifp;
1416 	struct ifreq *ifr;
1417 	int error;
1418 	short oif_flags;
1419 
1420 	switch (cmd) {
1421 	case SIOCGIFCONF:
1422 	case OSIOCGIFCONF:
1423 		return (ifconf(cmd, data));
1424 	}
1425 	ifr = (struct ifreq *)data;
1426 
1427 	switch (cmd) {
1428 	case SIOCIFCREATE:
1429 	case SIOCIFDESTROY:
1430 		if ((error = suser(td)) != 0)
1431 			return (error);
1432 		return ((cmd == SIOCIFCREATE) ?
1433 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1434 			if_clone_destroy(ifr->ifr_name));
1435 
1436 	case SIOCIFGCLONERS:
1437 		return (if_clone_list((struct if_clonereq *)data));
1438 	}
1439 
1440 	ifp = ifunit(ifr->ifr_name);
1441 	if (ifp == 0)
1442 		return (ENXIO);
1443 
1444 	error = ifhwioctl(cmd, ifp, data, td);
1445 	if (error != ENOIOCTL)
1446 		return (error);
1447 
1448 	oif_flags = ifp->if_flags;
1449 	if (so->so_proto == 0)
1450 		return (EOPNOTSUPP);
1451 #ifndef COMPAT_43
1452 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1453 								 data,
1454 								 ifp, td));
1455 #else
1456 	{
1457 		int ocmd = cmd;
1458 
1459 		switch (cmd) {
1460 
1461 		case SIOCSIFDSTADDR:
1462 		case SIOCSIFADDR:
1463 		case SIOCSIFBRDADDR:
1464 		case SIOCSIFNETMASK:
1465 #if BYTE_ORDER != BIG_ENDIAN
1466 			if (ifr->ifr_addr.sa_family == 0 &&
1467 			    ifr->ifr_addr.sa_len < 16) {
1468 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1469 				ifr->ifr_addr.sa_len = 16;
1470 			}
1471 #else
1472 			if (ifr->ifr_addr.sa_len == 0)
1473 				ifr->ifr_addr.sa_len = 16;
1474 #endif
1475 			break;
1476 
1477 		case OSIOCGIFADDR:
1478 			cmd = SIOCGIFADDR;
1479 			break;
1480 
1481 		case OSIOCGIFDSTADDR:
1482 			cmd = SIOCGIFDSTADDR;
1483 			break;
1484 
1485 		case OSIOCGIFBRDADDR:
1486 			cmd = SIOCGIFBRDADDR;
1487 			break;
1488 
1489 		case OSIOCGIFNETMASK:
1490 			cmd = SIOCGIFNETMASK;
1491 		}
1492 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1493 								   cmd,
1494 								   data,
1495 								   ifp, td));
1496 		switch (ocmd) {
1497 
1498 		case OSIOCGIFADDR:
1499 		case OSIOCGIFDSTADDR:
1500 		case OSIOCGIFBRDADDR:
1501 		case OSIOCGIFNETMASK:
1502 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1503 
1504 		}
1505 	}
1506 #endif /* COMPAT_43 */
1507 
1508 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1509 #ifdef INET6
1510 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1511 		if (ifp->if_flags & IFF_UP) {
1512 			int s = splimp();
1513 			in6_if_up(ifp);
1514 			splx(s);
1515 		}
1516 #endif
1517 	}
1518 	return (error);
1519 }
1520 
1521 /*
1522  * Set/clear promiscuous mode on interface ifp based on the truth value
1523  * of pswitch.  The calls are reference counted so that only the first
1524  * "on" request actually has an effect, as does the final "off" request.
1525  * Results are undefined if the "off" and "on" requests are not matched.
1526  */
1527 int
1528 ifpromisc(ifp, pswitch)
1529 	struct ifnet *ifp;
1530 	int pswitch;
1531 {
1532 	struct ifreq ifr;
1533 	int error;
1534 	int oldflags, oldpcount;
1535 
1536 	oldpcount = ifp->if_pcount;
1537 	oldflags = ifp->if_flags;
1538 	if (pswitch) {
1539 		/*
1540 		 * If the device is not configured up, we cannot put it in
1541 		 * promiscuous mode.
1542 		 */
1543 		if ((ifp->if_flags & IFF_UP) == 0)
1544 			return (ENETDOWN);
1545 		if (ifp->if_pcount++ != 0)
1546 			return (0);
1547 		ifp->if_flags |= IFF_PROMISC;
1548 	} else {
1549 		if (--ifp->if_pcount > 0)
1550 			return (0);
1551 		ifp->if_flags &= ~IFF_PROMISC;
1552 	}
1553 	ifr.ifr_flags = ifp->if_flags;
1554 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1555 	if (error == 0) {
1556 		log(LOG_INFO, "%s%d: promiscuous mode %s\n",
1557 		    ifp->if_name, ifp->if_unit,
1558 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1559 		rt_ifmsg(ifp);
1560 	} else {
1561 		ifp->if_pcount = oldpcount;
1562 		ifp->if_flags = oldflags;
1563 	}
1564 	return error;
1565 }
1566 
1567 /*
1568  * Return interface configuration
1569  * of system.  List may be used
1570  * in later ioctl's (above) to get
1571  * other information.
1572  */
1573 /*ARGSUSED*/
1574 static int
1575 ifconf(cmd, data)
1576 	u_long cmd;
1577 	caddr_t data;
1578 {
1579 	struct ifconf *ifc = (struct ifconf *)data;
1580 	struct ifnet *ifp;
1581 	struct ifaddr *ifa;
1582 	struct ifreq ifr, *ifrp;
1583 	int space = ifc->ifc_len, error = 0;
1584 
1585 	ifrp = ifc->ifc_req;
1586 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1587 		char workbuf[64];
1588 		int ifnlen, addrs;
1589 
1590 		if (space < sizeof(ifr))
1591 			break;
1592 		ifnlen = snprintf(workbuf, sizeof(workbuf),
1593 		    "%s%d", ifp->if_name, ifp->if_unit);
1594 		if(ifnlen + 1 > sizeof ifr.ifr_name) {
1595 			error = ENAMETOOLONG;
1596 			break;
1597 		} else {
1598 			strcpy(ifr.ifr_name, workbuf);
1599 		}
1600 
1601 		addrs = 0;
1602 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1603 			struct sockaddr *sa = ifa->ifa_addr;
1604 
1605 			if (space < sizeof(ifr))
1606 				break;
1607 			if (jailed(curthread->td_ucred) &&
1608 			    prison_if(curthread->td_ucred, sa))
1609 				continue;
1610 			addrs++;
1611 #ifdef COMPAT_43
1612 			if (cmd == OSIOCGIFCONF) {
1613 				struct osockaddr *osa =
1614 					 (struct osockaddr *)&ifr.ifr_addr;
1615 				ifr.ifr_addr = *sa;
1616 				osa->sa_family = sa->sa_family;
1617 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1618 						sizeof (ifr));
1619 				ifrp++;
1620 			} else
1621 #endif
1622 			if (sa->sa_len <= sizeof(*sa)) {
1623 				ifr.ifr_addr = *sa;
1624 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1625 						sizeof (ifr));
1626 				ifrp++;
1627 			} else {
1628 				if (space < sizeof (ifr) + sa->sa_len -
1629 					    sizeof(*sa))
1630 					break;
1631 				space -= sa->sa_len - sizeof(*sa);
1632 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1633 						sizeof (ifr.ifr_name));
1634 				if (error == 0)
1635 				    error = copyout((caddr_t)sa,
1636 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1637 				ifrp = (struct ifreq *)
1638 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1639 			}
1640 			if (error)
1641 				break;
1642 			space -= sizeof (ifr);
1643 		}
1644 		if (error)
1645 			break;
1646 		if (!addrs) {
1647 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1648 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1649 			    sizeof (ifr));
1650 			if (error)
1651 				break;
1652 			space -= sizeof (ifr);
1653 			ifrp++;
1654 		}
1655 	}
1656 	ifc->ifc_len -= space;
1657 	return (error);
1658 }
1659 
1660 /*
1661  * Just like if_promisc(), but for all-multicast-reception mode.
1662  */
1663 int
1664 if_allmulti(ifp, onswitch)
1665 	struct ifnet *ifp;
1666 	int onswitch;
1667 {
1668 	int error = 0;
1669 	int s = splimp();
1670 	struct ifreq ifr;
1671 
1672 	if (onswitch) {
1673 		if (ifp->if_amcount++ == 0) {
1674 			ifp->if_flags |= IFF_ALLMULTI;
1675 			ifr.ifr_flags = ifp->if_flags;
1676 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1677 		}
1678 	} else {
1679 		if (ifp->if_amcount > 1) {
1680 			ifp->if_amcount--;
1681 		} else {
1682 			ifp->if_amcount = 0;
1683 			ifp->if_flags &= ~IFF_ALLMULTI;
1684 			ifr.ifr_flags = ifp->if_flags;
1685 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1686 		}
1687 	}
1688 	splx(s);
1689 
1690 	if (error == 0)
1691 		rt_ifmsg(ifp);
1692 	return error;
1693 }
1694 
1695 /*
1696  * Add a multicast listenership to the interface in question.
1697  * The link layer provides a routine which converts
1698  */
1699 int
1700 if_addmulti(ifp, sa, retifma)
1701 	struct ifnet *ifp;	/* interface to manipulate */
1702 	struct sockaddr *sa;	/* address to add */
1703 	struct ifmultiaddr **retifma;
1704 {
1705 	struct sockaddr *llsa, *dupsa;
1706 	int error, s;
1707 	struct ifmultiaddr *ifma;
1708 
1709 	/*
1710 	 * If the matching multicast address already exists
1711 	 * then don't add a new one, just add a reference
1712 	 */
1713 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1714 		if (equal(sa, ifma->ifma_addr)) {
1715 			ifma->ifma_refcount++;
1716 			if (retifma)
1717 				*retifma = ifma;
1718 			return 0;
1719 		}
1720 	}
1721 
1722 	/*
1723 	 * Give the link layer a chance to accept/reject it, and also
1724 	 * find out which AF_LINK address this maps to, if it isn't one
1725 	 * already.
1726 	 */
1727 	if (ifp->if_resolvemulti) {
1728 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1729 		if (error) return error;
1730 	} else {
1731 		llsa = 0;
1732 	}
1733 
1734 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1735 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1736 	bcopy(sa, dupsa, sa->sa_len);
1737 
1738 	ifma->ifma_addr = dupsa;
1739 	ifma->ifma_lladdr = llsa;
1740 	ifma->ifma_ifp = ifp;
1741 	ifma->ifma_refcount = 1;
1742 	ifma->ifma_protospec = 0;
1743 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1744 
1745 	/*
1746 	 * Some network interfaces can scan the address list at
1747 	 * interrupt time; lock them out.
1748 	 */
1749 	s = splimp();
1750 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1751 	splx(s);
1752 	if (retifma != NULL)
1753 		*retifma = ifma;
1754 
1755 	if (llsa != 0) {
1756 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1757 			if (equal(ifma->ifma_addr, llsa))
1758 				break;
1759 		}
1760 		if (ifma) {
1761 			ifma->ifma_refcount++;
1762 		} else {
1763 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1764 			       M_IFMADDR, M_WAITOK);
1765 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1766 			       M_IFMADDR, M_WAITOK);
1767 			bcopy(llsa, dupsa, llsa->sa_len);
1768 			ifma->ifma_addr = dupsa;
1769 			ifma->ifma_ifp = ifp;
1770 			ifma->ifma_refcount = 1;
1771 			s = splimp();
1772 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1773 			splx(s);
1774 		}
1775 	}
1776 	/*
1777 	 * We are certain we have added something, so call down to the
1778 	 * interface to let them know about it.
1779 	 */
1780 	s = splimp();
1781 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1782 	splx(s);
1783 
1784 	return 0;
1785 }
1786 
1787 /*
1788  * Remove a reference to a multicast address on this interface.  Yell
1789  * if the request does not match an existing membership.
1790  */
1791 int
1792 if_delmulti(ifp, sa)
1793 	struct ifnet *ifp;
1794 	struct sockaddr *sa;
1795 {
1796 	struct ifmultiaddr *ifma;
1797 	int s;
1798 
1799 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1800 		if (equal(sa, ifma->ifma_addr))
1801 			break;
1802 	if (ifma == 0)
1803 		return ENOENT;
1804 
1805 	if (ifma->ifma_refcount > 1) {
1806 		ifma->ifma_refcount--;
1807 		return 0;
1808 	}
1809 
1810 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1811 	sa = ifma->ifma_lladdr;
1812 	s = splimp();
1813 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1814 	/*
1815 	 * Make sure the interface driver is notified
1816 	 * in the case of a link layer mcast group being left.
1817 	 */
1818 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1819 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1820 	splx(s);
1821 	free(ifma->ifma_addr, M_IFMADDR);
1822 	free(ifma, M_IFMADDR);
1823 	if (sa == 0)
1824 		return 0;
1825 
1826 	/*
1827 	 * Now look for the link-layer address which corresponds to
1828 	 * this network address.  It had been squirreled away in
1829 	 * ifma->ifma_lladdr for this purpose (so we don't have
1830 	 * to call ifp->if_resolvemulti() again), and we saved that
1831 	 * value in sa above.  If some nasty deleted the
1832 	 * link-layer address out from underneath us, we can deal because
1833 	 * the address we stored was is not the same as the one which was
1834 	 * in the record for the link-layer address.  (So we don't complain
1835 	 * in that case.)
1836 	 */
1837 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1838 		if (equal(sa, ifma->ifma_addr))
1839 			break;
1840 	if (ifma == 0)
1841 		return 0;
1842 
1843 	if (ifma->ifma_refcount > 1) {
1844 		ifma->ifma_refcount--;
1845 		return 0;
1846 	}
1847 
1848 	s = splimp();
1849 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1850 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1851 	splx(s);
1852 	free(ifma->ifma_addr, M_IFMADDR);
1853 	free(sa, M_IFMADDR);
1854 	free(ifma, M_IFMADDR);
1855 
1856 	return 0;
1857 }
1858 
1859 /*
1860  * Set the link layer address on an interface.
1861  *
1862  * At this time we only support certain types of interfaces,
1863  * and we don't allow the length of the address to change.
1864  */
1865 int
1866 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1867 {
1868 	struct sockaddr_dl *sdl;
1869 	struct ifaddr *ifa;
1870 	struct ifreq ifr;
1871 
1872 	ifa = ifaddr_byindex(ifp->if_index);
1873 	if (ifa == NULL)
1874 		return (EINVAL);
1875 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1876 	if (sdl == NULL)
1877 		return (EINVAL);
1878 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1879 		return (EINVAL);
1880 	switch (ifp->if_type) {
1881 	case IFT_ETHER:			/* these types use struct arpcom */
1882 	case IFT_FDDI:
1883 	case IFT_XETHER:
1884 	case IFT_ISO88025:
1885 	case IFT_L2VLAN:
1886 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1887 		bcopy(lladdr, LLADDR(sdl), len);
1888 		break;
1889 	default:
1890 		return (ENODEV);
1891 	}
1892 	/*
1893 	 * If the interface is already up, we need
1894 	 * to re-init it in order to reprogram its
1895 	 * address filter.
1896 	 */
1897 	if ((ifp->if_flags & IFF_UP) != 0) {
1898 		ifp->if_flags &= ~IFF_UP;
1899 		ifr.ifr_flags = ifp->if_flags;
1900 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1901 		ifp->if_flags |= IFF_UP;
1902 		ifr.ifr_flags = ifp->if_flags;
1903 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1904 #ifdef INET
1905 		/*
1906 		 * Also send gratuitous ARPs to notify other nodes about
1907 		 * the address change.
1908 		 */
1909 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1910 			if (ifa->ifa_addr != NULL &&
1911 			    ifa->ifa_addr->sa_family == AF_INET)
1912 				arp_ifinit(ifp, ifa);
1913 		}
1914 #endif
1915 	}
1916 	return (0);
1917 }
1918 
1919 struct ifmultiaddr *
1920 ifmaof_ifpforaddr(sa, ifp)
1921 	struct sockaddr *sa;
1922 	struct ifnet *ifp;
1923 {
1924 	struct ifmultiaddr *ifma;
1925 
1926 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1927 		if (equal(ifma->ifma_addr, sa))
1928 			break;
1929 
1930 	return ifma;
1931 }
1932 
1933 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1934 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1935