1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)if.c 8.5 (Berkeley) 1/9/95 32 * $FreeBSD$ 33 */ 34 35 #include "opt_bpf.h" 36 #include "opt_inet6.h" 37 #include "opt_inet.h" 38 39 #include <sys/param.h> 40 #include <sys/conf.h> 41 #include <sys/eventhandler.h> 42 #include <sys/malloc.h> 43 #include <sys/domainset.h> 44 #include <sys/sbuf.h> 45 #include <sys/bus.h> 46 #include <sys/epoch.h> 47 #include <sys/mbuf.h> 48 #include <sys/systm.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/protosw.h> 54 #include <sys/kernel.h> 55 #include <sys/lock.h> 56 #include <sys/refcount.h> 57 #include <sys/module.h> 58 #include <sys/rwlock.h> 59 #include <sys/sockio.h> 60 #include <sys/syslog.h> 61 #include <sys/sysctl.h> 62 #include <sys/sysent.h> 63 #include <sys/taskqueue.h> 64 #include <sys/domain.h> 65 #include <sys/jail.h> 66 #include <sys/priv.h> 67 68 #include <machine/stdarg.h> 69 #include <vm/uma.h> 70 71 #include <net/bpf.h> 72 #include <net/ethernet.h> 73 #include <net/if.h> 74 #include <net/if_arp.h> 75 #include <net/if_clone.h> 76 #include <net/if_dl.h> 77 #include <net/if_types.h> 78 #include <net/if_var.h> 79 #include <net/if_media.h> 80 #include <net/if_vlan_var.h> 81 #include <net/radix.h> 82 #include <net/route.h> 83 #include <net/route/route_ctl.h> 84 #include <net/vnet.h> 85 86 #if defined(INET) || defined(INET6) 87 #include <net/ethernet.h> 88 #include <netinet/in.h> 89 #include <netinet/in_var.h> 90 #include <netinet/ip.h> 91 #include <netinet/ip_carp.h> 92 #ifdef INET 93 #include <net/debugnet.h> 94 #include <netinet/if_ether.h> 95 #endif /* INET */ 96 #ifdef INET6 97 #include <netinet6/in6_var.h> 98 #include <netinet6/in6_ifattach.h> 99 #endif /* INET6 */ 100 #endif /* INET || INET6 */ 101 102 #include <security/mac/mac_framework.h> 103 104 /* 105 * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name 106 * and ifr_ifru when it is used in SIOCGIFCONF. 107 */ 108 _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) == 109 offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru"); 110 111 __read_mostly epoch_t net_epoch_preempt; 112 #ifdef COMPAT_FREEBSD32 113 #include <sys/mount.h> 114 #include <compat/freebsd32/freebsd32.h> 115 116 struct ifreq_buffer32 { 117 uint32_t length; /* (size_t) */ 118 uint32_t buffer; /* (void *) */ 119 }; 120 121 /* 122 * Interface request structure used for socket 123 * ioctl's. All interface ioctl's must have parameter 124 * definitions which begin with ifr_name. The 125 * remainder may be interface specific. 126 */ 127 struct ifreq32 { 128 char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ 129 union { 130 struct sockaddr ifru_addr; 131 struct sockaddr ifru_dstaddr; 132 struct sockaddr ifru_broadaddr; 133 struct ifreq_buffer32 ifru_buffer; 134 short ifru_flags[2]; 135 short ifru_index; 136 int ifru_jid; 137 int ifru_metric; 138 int ifru_mtu; 139 int ifru_phys; 140 int ifru_media; 141 uint32_t ifru_data; 142 int ifru_cap[2]; 143 u_int ifru_fib; 144 u_char ifru_vlan_pcp; 145 } ifr_ifru; 146 }; 147 CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); 148 CTASSERT(__offsetof(struct ifreq, ifr_ifru) == 149 __offsetof(struct ifreq32, ifr_ifru)); 150 151 struct ifgroupreq32 { 152 char ifgr_name[IFNAMSIZ]; 153 u_int ifgr_len; 154 union { 155 char ifgru_group[IFNAMSIZ]; 156 uint32_t ifgru_groups; 157 } ifgr_ifgru; 158 }; 159 160 struct ifmediareq32 { 161 char ifm_name[IFNAMSIZ]; 162 int ifm_current; 163 int ifm_mask; 164 int ifm_status; 165 int ifm_active; 166 int ifm_count; 167 uint32_t ifm_ulist; /* (int *) */ 168 }; 169 #define SIOCGIFMEDIA32 _IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32) 170 #define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32) 171 172 #define _CASE_IOC_IFGROUPREQ_32(cmd) \ 173 _IOC_NEWTYPE((cmd), struct ifgroupreq32): case 174 #else /* !COMPAT_FREEBSD32 */ 175 #define _CASE_IOC_IFGROUPREQ_32(cmd) 176 #endif /* !COMPAT_FREEBSD32 */ 177 178 #define CASE_IOC_IFGROUPREQ(cmd) \ 179 _CASE_IOC_IFGROUPREQ_32(cmd) \ 180 (cmd) 181 182 union ifreq_union { 183 struct ifreq ifr; 184 #ifdef COMPAT_FREEBSD32 185 struct ifreq32 ifr32; 186 #endif 187 }; 188 189 union ifgroupreq_union { 190 struct ifgroupreq ifgr; 191 #ifdef COMPAT_FREEBSD32 192 struct ifgroupreq32 ifgr32; 193 #endif 194 }; 195 196 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 197 "Link layers"); 198 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 199 "Generic link-management"); 200 201 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, 202 &ifqmaxlen, 0, "max send queue size"); 203 204 /* Log link state change events */ 205 static int log_link_state_change = 1; 206 207 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, 208 &log_link_state_change, 0, 209 "log interface link state change events"); 210 211 /* Log promiscuous mode change events */ 212 static int log_promisc_mode_change = 1; 213 214 SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, 215 &log_promisc_mode_change, 1, 216 "log promiscuous mode change events"); 217 218 /* Interface description */ 219 static unsigned int ifdescr_maxlen = 1024; 220 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, 221 &ifdescr_maxlen, 0, 222 "administrative maximum length for interface description"); 223 224 static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); 225 226 /* global sx for non-critical path ifdescr */ 227 static struct sx ifdescr_sx; 228 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); 229 230 void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); 231 void (*lagg_linkstate_p)(struct ifnet *ifp, int state); 232 /* These are external hooks for CARP. */ 233 void (*carp_linkstate_p)(struct ifnet *ifp); 234 void (*carp_demote_adj_p)(int, char *); 235 int (*carp_master_p)(struct ifaddr *); 236 #if defined(INET) || defined(INET6) 237 int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); 238 int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, 239 const struct sockaddr *sa); 240 int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); 241 int (*carp_attach_p)(struct ifaddr *, int); 242 void (*carp_detach_p)(struct ifaddr *, bool); 243 #endif 244 #ifdef INET 245 int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); 246 #endif 247 #ifdef INET6 248 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); 249 caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, 250 const struct in6_addr *taddr); 251 #endif 252 253 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; 254 255 /* 256 * XXX: Style; these should be sorted alphabetically, and unprototyped 257 * static functions should be prototyped. Currently they are sorted by 258 * declaration order. 259 */ 260 static void if_attachdomain(void *); 261 static void if_attachdomain1(struct ifnet *); 262 static int ifconf(u_long, caddr_t); 263 static void *if_grow(void); 264 static void if_input_default(struct ifnet *, struct mbuf *); 265 static int if_requestencap_default(struct ifnet *, struct if_encap_req *); 266 static void if_route(struct ifnet *, int flag, int fam); 267 static int if_setflag(struct ifnet *, int, int, int *, int); 268 static int if_transmit(struct ifnet *ifp, struct mbuf *m); 269 static void if_unroute(struct ifnet *, int flag, int fam); 270 static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); 271 static void do_link_state_change(void *, int); 272 static int if_getgroup(struct ifgroupreq *, struct ifnet *); 273 static int if_getgroupmembers(struct ifgroupreq *); 274 static void if_delgroups(struct ifnet *); 275 static void if_attach_internal(struct ifnet *, int, struct if_clone *); 276 static int if_detach_internal(struct ifnet *, int, struct if_clone **); 277 static void if_siocaddmulti(void *, int); 278 static void if_link_ifnet(struct ifnet *); 279 static bool if_unlink_ifnet(struct ifnet *, bool); 280 #ifdef VIMAGE 281 static int if_vmove(struct ifnet *, struct vnet *); 282 #endif 283 284 #ifdef INET6 285 /* 286 * XXX: declare here to avoid to include many inet6 related files.. 287 * should be more generalized? 288 */ 289 extern void nd6_setmtu(struct ifnet *); 290 #endif 291 292 /* ipsec helper hooks */ 293 VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); 294 VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); 295 296 VNET_DEFINE(int, if_index); 297 int ifqmaxlen = IFQ_MAXLEN; 298 VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ 299 VNET_DEFINE(struct ifgrouphead, ifg_head); 300 301 VNET_DEFINE_STATIC(int, if_indexlim) = 8; 302 303 /* Table of ifnet by index. */ 304 VNET_DEFINE(struct ifnet **, ifindex_table); 305 306 #define V_if_indexlim VNET(if_indexlim) 307 #define V_ifindex_table VNET(ifindex_table) 308 309 /* 310 * The global network interface list (V_ifnet) and related state (such as 311 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock. 312 * This may be acquired to stabilise the list, or we may rely on NET_EPOCH. 313 */ 314 struct sx ifnet_sxlock; 315 SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); 316 317 /* 318 * The allocation of network interfaces is a rather non-atomic affair; we 319 * need to select an index before we are ready to expose the interface for 320 * use, so will use this pointer value to indicate reservation. 321 */ 322 #define IFNET_HOLD (void *)(uintptr_t)(-1) 323 324 #ifdef VIMAGE 325 #define VNET_IS_SHUTTING_DOWN(_vnet) \ 326 ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE) 327 #endif 328 329 static if_com_alloc_t *if_com_alloc[256]; 330 static if_com_free_t *if_com_free[256]; 331 332 static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); 333 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 334 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 335 336 struct ifnet * 337 ifnet_byindex(u_short idx) 338 { 339 struct ifnet *ifp; 340 341 if (__predict_false(idx > V_if_index)) 342 return (NULL); 343 344 ifp = *(struct ifnet * const volatile *)(V_ifindex_table + idx); 345 return (__predict_false(ifp == IFNET_HOLD) ? NULL : ifp); 346 } 347 348 struct ifnet * 349 ifnet_byindex_ref(u_short idx) 350 { 351 struct ifnet *ifp; 352 353 NET_EPOCH_ASSERT(); 354 355 ifp = ifnet_byindex(idx); 356 if (ifp == NULL || (ifp->if_flags & IFF_DYING)) 357 return (NULL); 358 if_ref(ifp); 359 return (ifp); 360 } 361 362 /* 363 * Allocate an ifindex array entry; return 0 on success or an error on 364 * failure. 365 */ 366 static u_short 367 ifindex_alloc(void **old) 368 { 369 u_short idx; 370 371 IFNET_WLOCK_ASSERT(); 372 /* 373 * Try to find an empty slot below V_if_index. If we fail, take the 374 * next slot. 375 */ 376 for (idx = 1; idx <= V_if_index; idx++) { 377 if (V_ifindex_table[idx] == NULL) 378 break; 379 } 380 381 /* Catch if_index overflow. */ 382 if (idx >= V_if_indexlim) { 383 *old = if_grow(); 384 return (USHRT_MAX); 385 } 386 if (idx > V_if_index) 387 V_if_index = idx; 388 return (idx); 389 } 390 391 static void 392 ifindex_free_locked(u_short idx) 393 { 394 395 IFNET_WLOCK_ASSERT(); 396 397 V_ifindex_table[idx] = NULL; 398 while (V_if_index > 0 && 399 V_ifindex_table[V_if_index] == NULL) 400 V_if_index--; 401 } 402 403 static void 404 ifindex_free(u_short idx) 405 { 406 407 IFNET_WLOCK(); 408 ifindex_free_locked(idx); 409 IFNET_WUNLOCK(); 410 } 411 412 static void 413 ifnet_setbyindex(u_short idx, struct ifnet *ifp) 414 { 415 416 V_ifindex_table[idx] = ifp; 417 } 418 419 struct ifaddr * 420 ifaddr_byindex(u_short idx) 421 { 422 struct ifnet *ifp; 423 struct ifaddr *ifa = NULL; 424 425 NET_EPOCH_ASSERT(); 426 427 ifp = ifnet_byindex(idx); 428 if (ifp != NULL && (ifa = ifp->if_addr) != NULL) 429 ifa_ref(ifa); 430 return (ifa); 431 } 432 433 /* 434 * Network interface utility routines. 435 * 436 * Routines with ifa_ifwith* names take sockaddr *'s as 437 * parameters. 438 */ 439 440 static void 441 vnet_if_init(const void *unused __unused) 442 { 443 void *old; 444 445 CK_STAILQ_INIT(&V_ifnet); 446 CK_STAILQ_INIT(&V_ifg_head); 447 IFNET_WLOCK(); 448 old = if_grow(); /* create initial table */ 449 IFNET_WUNLOCK(); 450 epoch_wait_preempt(net_epoch_preempt); 451 free(old, M_IFNET); 452 vnet_if_clone_init(); 453 } 454 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, 455 NULL); 456 457 #ifdef VIMAGE 458 static void 459 vnet_if_uninit(const void *unused __unused) 460 { 461 462 VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " 463 "not empty", __func__, __LINE__, &V_ifnet)); 464 VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " 465 "not empty", __func__, __LINE__, &V_ifg_head)); 466 467 free((caddr_t)V_ifindex_table, M_IFNET); 468 } 469 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, 470 vnet_if_uninit, NULL); 471 #endif 472 473 static void 474 if_link_ifnet(struct ifnet *ifp) 475 { 476 477 IFNET_WLOCK(); 478 CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); 479 #ifdef VIMAGE 480 curvnet->vnet_ifcnt++; 481 #endif 482 IFNET_WUNLOCK(); 483 } 484 485 static bool 486 if_unlink_ifnet(struct ifnet *ifp, bool vmove) 487 { 488 struct ifnet *iter; 489 int found = 0; 490 491 IFNET_WLOCK(); 492 CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) 493 if (iter == ifp) { 494 CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); 495 if (!vmove) 496 ifp->if_flags |= IFF_DYING; 497 found = 1; 498 break; 499 } 500 #ifdef VIMAGE 501 curvnet->vnet_ifcnt--; 502 #endif 503 IFNET_WUNLOCK(); 504 505 return (found); 506 } 507 508 #ifdef VIMAGE 509 static void 510 vnet_if_return(const void *unused __unused) 511 { 512 struct ifnet *ifp, *nifp; 513 struct ifnet **pending; 514 int found, i; 515 516 i = 0; 517 518 /* 519 * We need to protect our access to the V_ifnet tailq. Ordinarily we'd 520 * enter NET_EPOCH, but that's not possible, because if_vmove() calls 521 * if_detach_internal(), which waits for NET_EPOCH callbacks to 522 * complete. We can't do that from within NET_EPOCH. 523 * 524 * However, we can also use the IFNET_xLOCK, which is the V_ifnet 525 * read/write lock. We cannot hold the lock as we call if_vmove() 526 * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib 527 * ctx lock. 528 */ 529 IFNET_WLOCK(); 530 531 pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt, 532 M_IFNET, M_WAITOK | M_ZERO); 533 534 /* Return all inherited interfaces to their parent vnets. */ 535 CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { 536 if (ifp->if_home_vnet != ifp->if_vnet) { 537 found = if_unlink_ifnet(ifp, true); 538 MPASS(found); 539 540 pending[i++] = ifp; 541 } 542 } 543 IFNET_WUNLOCK(); 544 545 for (int j = 0; j < i; j++) { 546 if_vmove(pending[j], pending[j]->if_home_vnet); 547 } 548 549 free(pending, M_IFNET); 550 } 551 VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, 552 vnet_if_return, NULL); 553 #endif 554 555 static void * 556 if_grow(void) 557 { 558 int oldlim; 559 u_int n; 560 struct ifnet **e; 561 void *old; 562 563 old = NULL; 564 IFNET_WLOCK_ASSERT(); 565 oldlim = V_if_indexlim; 566 IFNET_WUNLOCK(); 567 n = (oldlim << 1) * sizeof(*e); 568 e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); 569 IFNET_WLOCK(); 570 if (V_if_indexlim != oldlim) { 571 free(e, M_IFNET); 572 return (NULL); 573 } 574 if (V_ifindex_table != NULL) { 575 memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); 576 old = V_ifindex_table; 577 } 578 V_if_indexlim <<= 1; 579 V_ifindex_table = e; 580 return (old); 581 } 582 583 /* 584 * Allocate a struct ifnet and an index for an interface. A layer 2 585 * common structure will also be allocated if an allocation routine is 586 * registered for the passed type. 587 */ 588 struct ifnet * 589 if_alloc_domain(u_char type, int numa_domain) 590 { 591 struct ifnet *ifp; 592 u_short idx; 593 void *old; 594 595 KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large")); 596 if (numa_domain == IF_NODOM) 597 ifp = malloc(sizeof(struct ifnet), M_IFNET, 598 M_WAITOK | M_ZERO); 599 else 600 ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET, 601 DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO); 602 restart: 603 IFNET_WLOCK(); 604 idx = ifindex_alloc(&old); 605 if (__predict_false(idx == USHRT_MAX)) { 606 IFNET_WUNLOCK(); 607 epoch_wait_preempt(net_epoch_preempt); 608 free(old, M_IFNET); 609 goto restart; 610 } 611 ifnet_setbyindex(idx, IFNET_HOLD); 612 IFNET_WUNLOCK(); 613 ifp->if_index = idx; 614 ifp->if_type = type; 615 ifp->if_alloctype = type; 616 ifp->if_numa_domain = numa_domain; 617 #ifdef VIMAGE 618 ifp->if_vnet = curvnet; 619 #endif 620 if (if_com_alloc[type] != NULL) { 621 ifp->if_l2com = if_com_alloc[type](type, ifp); 622 if (ifp->if_l2com == NULL) { 623 free(ifp, M_IFNET); 624 ifindex_free(idx); 625 return (NULL); 626 } 627 } 628 629 IF_ADDR_LOCK_INIT(ifp); 630 TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); 631 TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp); 632 ifp->if_afdata_initialized = 0; 633 IF_AFDATA_LOCK_INIT(ifp); 634 CK_STAILQ_INIT(&ifp->if_addrhead); 635 CK_STAILQ_INIT(&ifp->if_multiaddrs); 636 CK_STAILQ_INIT(&ifp->if_groups); 637 #ifdef MAC 638 mac_ifnet_init(ifp); 639 #endif 640 ifq_init(&ifp->if_snd, ifp); 641 642 refcount_init(&ifp->if_refcount, 1); /* Index reference. */ 643 for (int i = 0; i < IFCOUNTERS; i++) 644 ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); 645 ifp->if_get_counter = if_get_counter_default; 646 ifp->if_pcp = IFNET_PCP_NONE; 647 ifnet_setbyindex(ifp->if_index, ifp); 648 return (ifp); 649 } 650 651 struct ifnet * 652 if_alloc_dev(u_char type, device_t dev) 653 { 654 int numa_domain; 655 656 if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0) 657 return (if_alloc_domain(type, IF_NODOM)); 658 return (if_alloc_domain(type, numa_domain)); 659 } 660 661 struct ifnet * 662 if_alloc(u_char type) 663 { 664 665 return (if_alloc_domain(type, IF_NODOM)); 666 } 667 /* 668 * Do the actual work of freeing a struct ifnet, and layer 2 common 669 * structure. This call is made when the last reference to an 670 * interface is released. 671 */ 672 static void 673 if_free_internal(struct ifnet *ifp) 674 { 675 676 KASSERT((ifp->if_flags & IFF_DYING), 677 ("if_free_internal: interface not dying")); 678 679 if (if_com_free[ifp->if_alloctype] != NULL) 680 if_com_free[ifp->if_alloctype](ifp->if_l2com, 681 ifp->if_alloctype); 682 683 #ifdef MAC 684 mac_ifnet_destroy(ifp); 685 #endif /* MAC */ 686 IF_AFDATA_DESTROY(ifp); 687 IF_ADDR_LOCK_DESTROY(ifp); 688 ifq_delete(&ifp->if_snd); 689 690 for (int i = 0; i < IFCOUNTERS; i++) 691 counter_u64_free(ifp->if_counters[i]); 692 693 free(ifp->if_description, M_IFDESCR); 694 free(ifp->if_hw_addr, M_IFADDR); 695 free(ifp, M_IFNET); 696 } 697 698 static void 699 if_destroy(epoch_context_t ctx) 700 { 701 struct ifnet *ifp; 702 703 ifp = __containerof(ctx, struct ifnet, if_epoch_ctx); 704 if_free_internal(ifp); 705 } 706 707 /* 708 * Deregister an interface and free the associated storage. 709 */ 710 void 711 if_free(struct ifnet *ifp) 712 { 713 714 ifp->if_flags |= IFF_DYING; /* XXX: Locking */ 715 716 CURVNET_SET_QUIET(ifp->if_vnet); 717 IFNET_WLOCK(); 718 KASSERT(ifp == ifnet_byindex(ifp->if_index), 719 ("%s: freeing unallocated ifnet", ifp->if_xname)); 720 721 ifindex_free_locked(ifp->if_index); 722 IFNET_WUNLOCK(); 723 724 if (refcount_release(&ifp->if_refcount)) 725 NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx); 726 CURVNET_RESTORE(); 727 } 728 729 /* 730 * Interfaces to keep an ifnet type-stable despite the possibility of the 731 * driver calling if_free(). If there are additional references, we defer 732 * freeing the underlying data structure. 733 */ 734 void 735 if_ref(struct ifnet *ifp) 736 { 737 738 /* We don't assert the ifnet list lock here, but arguably should. */ 739 refcount_acquire(&ifp->if_refcount); 740 } 741 742 void 743 if_rele(struct ifnet *ifp) 744 { 745 746 if (!refcount_release(&ifp->if_refcount)) 747 return; 748 NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx); 749 } 750 751 void 752 ifq_init(struct ifaltq *ifq, struct ifnet *ifp) 753 { 754 755 mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); 756 757 if (ifq->ifq_maxlen == 0) 758 ifq->ifq_maxlen = ifqmaxlen; 759 760 ifq->altq_type = 0; 761 ifq->altq_disc = NULL; 762 ifq->altq_flags &= ALTQF_CANTCHANGE; 763 ifq->altq_tbr = NULL; 764 ifq->altq_ifp = ifp; 765 } 766 767 void 768 ifq_delete(struct ifaltq *ifq) 769 { 770 mtx_destroy(&ifq->ifq_mtx); 771 } 772 773 /* 774 * Perform generic interface initialization tasks and attach the interface 775 * to the list of "active" interfaces. If vmove flag is set on entry 776 * to if_attach_internal(), perform only a limited subset of initialization 777 * tasks, given that we are moving from one vnet to another an ifnet which 778 * has already been fully initialized. 779 * 780 * Note that if_detach_internal() removes group membership unconditionally 781 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. 782 * Thus, when if_vmove() is applied to a cloned interface, group membership 783 * is lost while a cloned one always joins a group whose name is 784 * ifc->ifc_name. To recover this after if_detach_internal() and 785 * if_attach_internal(), the cloner should be specified to 786 * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() 787 * attempts to join a group whose name is ifc->ifc_name. 788 * 789 * XXX: 790 * - The decision to return void and thus require this function to 791 * succeed is questionable. 792 * - We should probably do more sanity checking. For instance we don't 793 * do anything to insure if_xname is unique or non-empty. 794 */ 795 void 796 if_attach(struct ifnet *ifp) 797 { 798 799 if_attach_internal(ifp, 0, NULL); 800 } 801 802 /* 803 * Compute the least common TSO limit. 804 */ 805 void 806 if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) 807 { 808 /* 809 * 1) If there is no limit currently, take the limit from 810 * the network adapter. 811 * 812 * 2) If the network adapter has a limit below the current 813 * limit, apply it. 814 */ 815 if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && 816 ifp->if_hw_tsomax < pmax->tsomaxbytes)) { 817 pmax->tsomaxbytes = ifp->if_hw_tsomax; 818 } 819 if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && 820 ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { 821 pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; 822 } 823 if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && 824 ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { 825 pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; 826 } 827 } 828 829 /* 830 * Update TSO limit of a network adapter. 831 * 832 * Returns zero if no change. Else non-zero. 833 */ 834 int 835 if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) 836 { 837 int retval = 0; 838 if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { 839 ifp->if_hw_tsomax = pmax->tsomaxbytes; 840 retval++; 841 } 842 if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { 843 ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; 844 retval++; 845 } 846 if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { 847 ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; 848 retval++; 849 } 850 return (retval); 851 } 852 853 static void 854 if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) 855 { 856 unsigned socksize, ifasize; 857 int namelen, masklen; 858 struct sockaddr_dl *sdl; 859 struct ifaddr *ifa; 860 861 if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) 862 panic ("%s: BUG: if_attach called without if_alloc'd input()\n", 863 ifp->if_xname); 864 865 #ifdef VIMAGE 866 ifp->if_vnet = curvnet; 867 if (ifp->if_home_vnet == NULL) 868 ifp->if_home_vnet = curvnet; 869 #endif 870 871 if_addgroup(ifp, IFG_ALL); 872 873 /* Restore group membership for cloned interfaces. */ 874 if (vmove && ifc != NULL) 875 if_clone_addgroup(ifp, ifc); 876 877 getmicrotime(&ifp->if_lastchange); 878 ifp->if_epoch = time_uptime; 879 880 KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || 881 (ifp->if_transmit != NULL && ifp->if_qflush != NULL), 882 ("transmit and qflush must both either be set or both be NULL")); 883 if (ifp->if_transmit == NULL) { 884 ifp->if_transmit = if_transmit; 885 ifp->if_qflush = if_qflush; 886 } 887 if (ifp->if_input == NULL) 888 ifp->if_input = if_input_default; 889 890 if (ifp->if_requestencap == NULL) 891 ifp->if_requestencap = if_requestencap_default; 892 893 if (!vmove) { 894 #ifdef MAC 895 mac_ifnet_create(ifp); 896 #endif 897 898 /* 899 * Create a Link Level name for this device. 900 */ 901 namelen = strlen(ifp->if_xname); 902 /* 903 * Always save enough space for any possiable name so we 904 * can do a rename in place later. 905 */ 906 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; 907 socksize = masklen + ifp->if_addrlen; 908 if (socksize < sizeof(*sdl)) 909 socksize = sizeof(*sdl); 910 socksize = roundup2(socksize, sizeof(long)); 911 ifasize = sizeof(*ifa) + 2 * socksize; 912 ifa = ifa_alloc(ifasize, M_WAITOK); 913 sdl = (struct sockaddr_dl *)(ifa + 1); 914 sdl->sdl_len = socksize; 915 sdl->sdl_family = AF_LINK; 916 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 917 sdl->sdl_nlen = namelen; 918 sdl->sdl_index = ifp->if_index; 919 sdl->sdl_type = ifp->if_type; 920 ifp->if_addr = ifa; 921 ifa->ifa_ifp = ifp; 922 ifa->ifa_addr = (struct sockaddr *)sdl; 923 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 924 ifa->ifa_netmask = (struct sockaddr *)sdl; 925 sdl->sdl_len = masklen; 926 while (namelen != 0) 927 sdl->sdl_data[--namelen] = 0xff; 928 CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); 929 /* Reliably crash if used uninitialized. */ 930 ifp->if_broadcastaddr = NULL; 931 932 if (ifp->if_type == IFT_ETHER) { 933 ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, 934 M_WAITOK | M_ZERO); 935 } 936 937 #if defined(INET) || defined(INET6) 938 /* Use defaults for TSO, if nothing is set */ 939 if (ifp->if_hw_tsomax == 0 && 940 ifp->if_hw_tsomaxsegcount == 0 && 941 ifp->if_hw_tsomaxsegsize == 0) { 942 /* 943 * The TSO defaults needs to be such that an 944 * NFS mbuf list of 35 mbufs totalling just 945 * below 64K works and that a chain of mbufs 946 * can be defragged into at most 32 segments: 947 */ 948 ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - 949 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 950 ifp->if_hw_tsomaxsegcount = 35; 951 ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ 952 953 /* XXX some drivers set IFCAP_TSO after ethernet attach */ 954 if (ifp->if_capabilities & IFCAP_TSO) { 955 if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", 956 ifp->if_hw_tsomax, 957 ifp->if_hw_tsomaxsegcount, 958 ifp->if_hw_tsomaxsegsize); 959 } 960 } 961 #endif 962 } 963 #ifdef VIMAGE 964 else { 965 /* 966 * Update the interface index in the link layer address 967 * of the interface. 968 */ 969 for (ifa = ifp->if_addr; ifa != NULL; 970 ifa = CK_STAILQ_NEXT(ifa, ifa_link)) { 971 if (ifa->ifa_addr->sa_family == AF_LINK) { 972 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 973 sdl->sdl_index = ifp->if_index; 974 } 975 } 976 } 977 #endif 978 979 if_link_ifnet(ifp); 980 981 if (domain_init_status >= 2) 982 if_attachdomain1(ifp); 983 984 EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); 985 if (IS_DEFAULT_VNET(curvnet)) 986 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 987 988 /* Announce the interface. */ 989 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 990 } 991 992 static void 993 if_epochalloc(void *dummy __unused) 994 { 995 996 net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT); 997 } 998 SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL); 999 1000 static void 1001 if_attachdomain(void *dummy) 1002 { 1003 struct ifnet *ifp; 1004 1005 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) 1006 if_attachdomain1(ifp); 1007 } 1008 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, 1009 if_attachdomain, NULL); 1010 1011 static void 1012 if_attachdomain1(struct ifnet *ifp) 1013 { 1014 struct domain *dp; 1015 1016 /* 1017 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we 1018 * cannot lock ifp->if_afdata initialization, entirely. 1019 */ 1020 IF_AFDATA_LOCK(ifp); 1021 if (ifp->if_afdata_initialized >= domain_init_status) { 1022 IF_AFDATA_UNLOCK(ifp); 1023 log(LOG_WARNING, "%s called more than once on %s\n", 1024 __func__, ifp->if_xname); 1025 return; 1026 } 1027 ifp->if_afdata_initialized = domain_init_status; 1028 IF_AFDATA_UNLOCK(ifp); 1029 1030 /* address family dependent data region */ 1031 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 1032 for (dp = domains; dp; dp = dp->dom_next) { 1033 if (dp->dom_ifattach) 1034 ifp->if_afdata[dp->dom_family] = 1035 (*dp->dom_ifattach)(ifp); 1036 } 1037 } 1038 1039 /* 1040 * Remove any unicast or broadcast network addresses from an interface. 1041 */ 1042 void 1043 if_purgeaddrs(struct ifnet *ifp) 1044 { 1045 struct ifaddr *ifa; 1046 1047 while (1) { 1048 struct epoch_tracker et; 1049 1050 NET_EPOCH_ENTER(et); 1051 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1052 if (ifa->ifa_addr->sa_family != AF_LINK) 1053 break; 1054 } 1055 NET_EPOCH_EXIT(et); 1056 1057 if (ifa == NULL) 1058 break; 1059 #ifdef INET 1060 /* XXX: Ugly!! ad hoc just for INET */ 1061 if (ifa->ifa_addr->sa_family == AF_INET) { 1062 struct ifaliasreq ifr; 1063 1064 bzero(&ifr, sizeof(ifr)); 1065 ifr.ifra_addr = *ifa->ifa_addr; 1066 if (ifa->ifa_dstaddr) 1067 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 1068 if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, 1069 NULL) == 0) 1070 continue; 1071 } 1072 #endif /* INET */ 1073 #ifdef INET6 1074 if (ifa->ifa_addr->sa_family == AF_INET6) { 1075 in6_purgeaddr(ifa); 1076 /* ifp_addrhead is already updated */ 1077 continue; 1078 } 1079 #endif /* INET6 */ 1080 IF_ADDR_WLOCK(ifp); 1081 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 1082 IF_ADDR_WUNLOCK(ifp); 1083 ifa_free(ifa); 1084 } 1085 } 1086 1087 /* 1088 * Remove any multicast network addresses from an interface when an ifnet 1089 * is going away. 1090 */ 1091 static void 1092 if_purgemaddrs(struct ifnet *ifp) 1093 { 1094 struct ifmultiaddr *ifma; 1095 1096 IF_ADDR_WLOCK(ifp); 1097 while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) { 1098 ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs); 1099 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 1100 if_delmulti_locked(ifp, ifma, 1); 1101 } 1102 IF_ADDR_WUNLOCK(ifp); 1103 } 1104 1105 /* 1106 * Detach an interface, removing it from the list of "active" interfaces. 1107 * If vmove flag is set on entry to if_detach_internal(), perform only a 1108 * limited subset of cleanup tasks, given that we are moving an ifnet from 1109 * one vnet to another, where it must be fully operational. 1110 * 1111 * XXXRW: There are some significant questions about event ordering, and 1112 * how to prevent things from starting to use the interface during detach. 1113 */ 1114 void 1115 if_detach(struct ifnet *ifp) 1116 { 1117 bool found; 1118 1119 CURVNET_SET_QUIET(ifp->if_vnet); 1120 found = if_unlink_ifnet(ifp, false); 1121 if (found) 1122 if_detach_internal(ifp, 0, NULL); 1123 CURVNET_RESTORE(); 1124 } 1125 1126 /* 1127 * The vmove flag, if set, indicates that we are called from a callpath 1128 * that is moving an interface to a different vnet instance. 1129 * 1130 * The shutdown flag, if set, indicates that we are called in the 1131 * process of shutting down a vnet instance. Currently only the 1132 * vnet_if_return SYSUNINIT function sets it. Note: we can be called 1133 * on a vnet instance shutdown without this flag being set, e.g., when 1134 * the cloned interfaces are destoyed as first thing of teardown. 1135 */ 1136 static int 1137 if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) 1138 { 1139 struct ifaddr *ifa; 1140 int i; 1141 struct domain *dp; 1142 #ifdef VIMAGE 1143 bool shutdown; 1144 1145 shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); 1146 #endif 1147 1148 /* 1149 * At this point we know the interface still was on the ifnet list 1150 * and we removed it so we are in a stable state. 1151 */ 1152 epoch_wait_preempt(net_epoch_preempt); 1153 1154 /* 1155 * Ensure all pending EPOCH(9) callbacks have been executed. This 1156 * fixes issues about late destruction of multicast options 1157 * which lead to leave group calls, which in turn access the 1158 * belonging ifnet structure: 1159 */ 1160 epoch_drain_callbacks(net_epoch_preempt); 1161 1162 /* 1163 * In any case (destroy or vmove) detach us from the groups 1164 * and remove/wait for pending events on the taskq. 1165 * XXX-BZ in theory an interface could still enqueue a taskq change? 1166 */ 1167 if_delgroups(ifp); 1168 1169 taskqueue_drain(taskqueue_swi, &ifp->if_linktask); 1170 taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask); 1171 1172 /* 1173 * Check if this is a cloned interface or not. Must do even if 1174 * shutting down as a if_vmove_reclaim() would move the ifp and 1175 * the if_clone_addgroup() will have a corrupted string overwise 1176 * from a gibberish pointer. 1177 */ 1178 if (vmove && ifcp != NULL) 1179 *ifcp = if_clone_findifc(ifp); 1180 1181 if_down(ifp); 1182 1183 #ifdef VIMAGE 1184 /* 1185 * On VNET shutdown abort here as the stack teardown will do all 1186 * the work top-down for us. 1187 */ 1188 if (shutdown) { 1189 /* Give interface users the chance to clean up. */ 1190 EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); 1191 1192 /* 1193 * In case of a vmove we are done here without error. 1194 * If we would signal an error it would lead to the same 1195 * abort as if we did not find the ifnet anymore. 1196 * if_detach() calls us in void context and does not care 1197 * about an early abort notification, so life is splendid :) 1198 */ 1199 goto finish_vnet_shutdown; 1200 } 1201 #endif 1202 1203 /* 1204 * At this point we are not tearing down a VNET and are either 1205 * going to destroy or vmove the interface and have to cleanup 1206 * accordingly. 1207 */ 1208 1209 /* 1210 * Remove routes and flush queues. 1211 */ 1212 #ifdef ALTQ 1213 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 1214 altq_disable(&ifp->if_snd); 1215 if (ALTQ_IS_ATTACHED(&ifp->if_snd)) 1216 altq_detach(&ifp->if_snd); 1217 #endif 1218 1219 if_purgeaddrs(ifp); 1220 1221 #ifdef INET 1222 in_ifdetach(ifp); 1223 #endif 1224 1225 #ifdef INET6 1226 /* 1227 * Remove all IPv6 kernel structs related to ifp. This should be done 1228 * before removing routing entries below, since IPv6 interface direct 1229 * routes are expected to be removed by the IPv6-specific kernel API. 1230 * Otherwise, the kernel will detect some inconsistency and bark it. 1231 */ 1232 in6_ifdetach(ifp); 1233 #endif 1234 if_purgemaddrs(ifp); 1235 1236 /* Announce that the interface is gone. */ 1237 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1238 EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); 1239 if (IS_DEFAULT_VNET(curvnet)) 1240 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 1241 1242 if (!vmove) { 1243 /* 1244 * Prevent further calls into the device driver via ifnet. 1245 */ 1246 if_dead(ifp); 1247 1248 /* 1249 * Clean up all addresses. 1250 */ 1251 IF_ADDR_WLOCK(ifp); 1252 if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) { 1253 ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); 1254 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 1255 IF_ADDR_WUNLOCK(ifp); 1256 ifa_free(ifa); 1257 } else 1258 IF_ADDR_WUNLOCK(ifp); 1259 } 1260 1261 rt_flushifroutes(ifp); 1262 1263 #ifdef VIMAGE 1264 finish_vnet_shutdown: 1265 #endif 1266 /* 1267 * We cannot hold the lock over dom_ifdetach calls as they might 1268 * sleep, for example trying to drain a callout, thus open up the 1269 * theoretical race with re-attaching. 1270 */ 1271 IF_AFDATA_LOCK(ifp); 1272 i = ifp->if_afdata_initialized; 1273 ifp->if_afdata_initialized = 0; 1274 IF_AFDATA_UNLOCK(ifp); 1275 for (dp = domains; i > 0 && dp; dp = dp->dom_next) { 1276 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { 1277 (*dp->dom_ifdetach)(ifp, 1278 ifp->if_afdata[dp->dom_family]); 1279 ifp->if_afdata[dp->dom_family] = NULL; 1280 } 1281 } 1282 1283 return (0); 1284 } 1285 1286 #ifdef VIMAGE 1287 /* 1288 * if_vmove() performs a limited version of if_detach() in current 1289 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. 1290 * An attempt is made to shrink if_index in current vnet, find an 1291 * unused if_index in target vnet and calls if_grow() if necessary, 1292 * and finally find an unused if_xname for the target vnet. 1293 */ 1294 static int 1295 if_vmove(struct ifnet *ifp, struct vnet *new_vnet) 1296 { 1297 struct if_clone *ifc; 1298 #ifdef DEV_BPF 1299 u_int bif_dlt, bif_hdrlen; 1300 #endif 1301 void *old; 1302 int rc; 1303 1304 #ifdef DEV_BPF 1305 /* 1306 * if_detach_internal() will call the eventhandler to notify 1307 * interface departure. That will detach if_bpf. We need to 1308 * safe the dlt and hdrlen so we can re-attach it later. 1309 */ 1310 bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen); 1311 #endif 1312 1313 /* 1314 * Detach from current vnet, but preserve LLADDR info, do not 1315 * mark as dead etc. so that the ifnet can be reattached later. 1316 * If we cannot find it, we lost the race to someone else. 1317 */ 1318 rc = if_detach_internal(ifp, 1, &ifc); 1319 if (rc != 0) 1320 return (rc); 1321 1322 /* 1323 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink 1324 * the if_index for that vnet if possible. 1325 * 1326 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, 1327 * or we'd lock on one vnet and unlock on another. 1328 */ 1329 IFNET_WLOCK(); 1330 ifindex_free_locked(ifp->if_index); 1331 IFNET_WUNLOCK(); 1332 1333 1334 /* Don't re-attach DYING interfaces. */ 1335 if (ifp->if_flags & IFF_DYING) 1336 return (0); 1337 1338 /* 1339 * Perform interface-specific reassignment tasks, if provided by 1340 * the driver. 1341 */ 1342 if (ifp->if_reassign != NULL) 1343 ifp->if_reassign(ifp, new_vnet, NULL); 1344 1345 /* 1346 * Switch to the context of the target vnet. 1347 */ 1348 CURVNET_SET_QUIET(new_vnet); 1349 restart: 1350 IFNET_WLOCK(); 1351 ifp->if_index = ifindex_alloc(&old); 1352 if (__predict_false(ifp->if_index == USHRT_MAX)) { 1353 IFNET_WUNLOCK(); 1354 epoch_wait_preempt(net_epoch_preempt); 1355 free(old, M_IFNET); 1356 goto restart; 1357 } 1358 ifnet_setbyindex(ifp->if_index, ifp); 1359 IFNET_WUNLOCK(); 1360 1361 if_attach_internal(ifp, 1, ifc); 1362 1363 #ifdef DEV_BPF 1364 if (ifp->if_bpf == NULL) 1365 bpfattach(ifp, bif_dlt, bif_hdrlen); 1366 #endif 1367 1368 CURVNET_RESTORE(); 1369 return (0); 1370 } 1371 1372 /* 1373 * Move an ifnet to or from another child prison/vnet, specified by the jail id. 1374 */ 1375 static int 1376 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) 1377 { 1378 struct prison *pr; 1379 struct ifnet *difp; 1380 int error; 1381 bool found; 1382 bool shutdown; 1383 1384 /* Try to find the prison within our visibility. */ 1385 sx_slock(&allprison_lock); 1386 pr = prison_find_child(td->td_ucred->cr_prison, jid); 1387 sx_sunlock(&allprison_lock); 1388 if (pr == NULL) 1389 return (ENXIO); 1390 prison_hold_locked(pr); 1391 mtx_unlock(&pr->pr_mtx); 1392 1393 /* Do not try to move the iface from and to the same prison. */ 1394 if (pr->pr_vnet == ifp->if_vnet) { 1395 prison_free(pr); 1396 return (EEXIST); 1397 } 1398 1399 /* Make sure the named iface does not exists in the dst. prison/vnet. */ 1400 /* XXX Lock interfaces to avoid races. */ 1401 CURVNET_SET_QUIET(pr->pr_vnet); 1402 difp = ifunit(ifname); 1403 if (difp != NULL) { 1404 CURVNET_RESTORE(); 1405 prison_free(pr); 1406 return (EEXIST); 1407 } 1408 1409 /* Make sure the VNET is stable. */ 1410 shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); 1411 if (shutdown) { 1412 CURVNET_RESTORE(); 1413 prison_free(pr); 1414 return (EBUSY); 1415 } 1416 CURVNET_RESTORE(); 1417 1418 found = if_unlink_ifnet(ifp, true); 1419 MPASS(found); 1420 1421 /* Move the interface into the child jail/vnet. */ 1422 error = if_vmove(ifp, pr->pr_vnet); 1423 1424 /* Report the new if_xname back to the userland on success. */ 1425 if (error == 0) 1426 sprintf(ifname, "%s", ifp->if_xname); 1427 1428 prison_free(pr); 1429 return (error); 1430 } 1431 1432 static int 1433 if_vmove_reclaim(struct thread *td, char *ifname, int jid) 1434 { 1435 struct prison *pr; 1436 struct vnet *vnet_dst; 1437 struct ifnet *ifp; 1438 int error, found; 1439 bool shutdown; 1440 1441 /* Try to find the prison within our visibility. */ 1442 sx_slock(&allprison_lock); 1443 pr = prison_find_child(td->td_ucred->cr_prison, jid); 1444 sx_sunlock(&allprison_lock); 1445 if (pr == NULL) 1446 return (ENXIO); 1447 prison_hold_locked(pr); 1448 mtx_unlock(&pr->pr_mtx); 1449 1450 /* Make sure the named iface exists in the source prison/vnet. */ 1451 CURVNET_SET(pr->pr_vnet); 1452 ifp = ifunit(ifname); /* XXX Lock to avoid races. */ 1453 if (ifp == NULL) { 1454 CURVNET_RESTORE(); 1455 prison_free(pr); 1456 return (ENXIO); 1457 } 1458 1459 /* Do not try to move the iface from and to the same prison. */ 1460 vnet_dst = TD_TO_VNET(td); 1461 if (vnet_dst == ifp->if_vnet) { 1462 CURVNET_RESTORE(); 1463 prison_free(pr); 1464 return (EEXIST); 1465 } 1466 1467 /* Make sure the VNET is stable. */ 1468 shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); 1469 if (shutdown) { 1470 CURVNET_RESTORE(); 1471 prison_free(pr); 1472 return (EBUSY); 1473 } 1474 1475 /* Get interface back from child jail/vnet. */ 1476 found = if_unlink_ifnet(ifp, true); 1477 MPASS(found); 1478 error = if_vmove(ifp, vnet_dst); 1479 CURVNET_RESTORE(); 1480 1481 /* Report the new if_xname back to the userland on success. */ 1482 if (error == 0) 1483 sprintf(ifname, "%s", ifp->if_xname); 1484 1485 prison_free(pr); 1486 return (error); 1487 } 1488 #endif /* VIMAGE */ 1489 1490 /* 1491 * Add a group to an interface 1492 */ 1493 int 1494 if_addgroup(struct ifnet *ifp, const char *groupname) 1495 { 1496 struct ifg_list *ifgl; 1497 struct ifg_group *ifg = NULL; 1498 struct ifg_member *ifgm; 1499 int new = 0; 1500 1501 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && 1502 groupname[strlen(groupname) - 1] <= '9') 1503 return (EINVAL); 1504 1505 IFNET_WLOCK(); 1506 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1507 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { 1508 IFNET_WUNLOCK(); 1509 return (EEXIST); 1510 } 1511 1512 if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) { 1513 IFNET_WUNLOCK(); 1514 return (ENOMEM); 1515 } 1516 1517 if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { 1518 free(ifgl, M_TEMP); 1519 IFNET_WUNLOCK(); 1520 return (ENOMEM); 1521 } 1522 1523 CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) 1524 if (!strcmp(ifg->ifg_group, groupname)) 1525 break; 1526 1527 if (ifg == NULL) { 1528 if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) { 1529 free(ifgl, M_TEMP); 1530 free(ifgm, M_TEMP); 1531 IFNET_WUNLOCK(); 1532 return (ENOMEM); 1533 } 1534 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1535 ifg->ifg_refcnt = 0; 1536 CK_STAILQ_INIT(&ifg->ifg_members); 1537 CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); 1538 new = 1; 1539 } 1540 1541 ifg->ifg_refcnt++; 1542 ifgl->ifgl_group = ifg; 1543 ifgm->ifgm_ifp = ifp; 1544 1545 IF_ADDR_WLOCK(ifp); 1546 CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1547 CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1548 IF_ADDR_WUNLOCK(ifp); 1549 1550 IFNET_WUNLOCK(); 1551 1552 if (new) 1553 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1554 EVENTHANDLER_INVOKE(group_change_event, groupname); 1555 1556 return (0); 1557 } 1558 1559 /* 1560 * Helper function to remove a group out of an interface. Expects the global 1561 * ifnet lock to be write-locked, and drops it before returning. 1562 */ 1563 static void 1564 _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl, 1565 const char *groupname) 1566 { 1567 struct ifg_member *ifgm; 1568 bool freeifgl; 1569 1570 IFNET_WLOCK_ASSERT(); 1571 1572 IF_ADDR_WLOCK(ifp); 1573 CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next); 1574 IF_ADDR_WUNLOCK(ifp); 1575 1576 CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1577 if (ifgm->ifgm_ifp == ifp) { 1578 CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, 1579 ifg_member, ifgm_next); 1580 break; 1581 } 1582 } 1583 1584 if (--ifgl->ifgl_group->ifg_refcnt == 0) { 1585 CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, 1586 ifg_next); 1587 freeifgl = true; 1588 } else { 1589 freeifgl = false; 1590 } 1591 IFNET_WUNLOCK(); 1592 1593 epoch_wait_preempt(net_epoch_preempt); 1594 if (freeifgl) { 1595 EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); 1596 free(ifgl->ifgl_group, M_TEMP); 1597 } 1598 free(ifgm, M_TEMP); 1599 free(ifgl, M_TEMP); 1600 1601 EVENTHANDLER_INVOKE(group_change_event, groupname); 1602 } 1603 1604 /* 1605 * Remove a group from an interface 1606 */ 1607 int 1608 if_delgroup(struct ifnet *ifp, const char *groupname) 1609 { 1610 struct ifg_list *ifgl; 1611 1612 IFNET_WLOCK(); 1613 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1614 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1615 break; 1616 if (ifgl == NULL) { 1617 IFNET_WUNLOCK(); 1618 return (ENOENT); 1619 } 1620 1621 _if_delgroup_locked(ifp, ifgl, groupname); 1622 1623 return (0); 1624 } 1625 1626 /* 1627 * Remove an interface from all groups 1628 */ 1629 static void 1630 if_delgroups(struct ifnet *ifp) 1631 { 1632 struct ifg_list *ifgl; 1633 char groupname[IFNAMSIZ]; 1634 1635 IFNET_WLOCK(); 1636 while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) { 1637 strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); 1638 _if_delgroup_locked(ifp, ifgl, groupname); 1639 IFNET_WLOCK(); 1640 } 1641 IFNET_WUNLOCK(); 1642 } 1643 1644 static char * 1645 ifgr_group_get(void *ifgrp) 1646 { 1647 union ifgroupreq_union *ifgrup; 1648 1649 ifgrup = ifgrp; 1650 #ifdef COMPAT_FREEBSD32 1651 if (SV_CURPROC_FLAG(SV_ILP32)) 1652 return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]); 1653 #endif 1654 return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]); 1655 } 1656 1657 static struct ifg_req * 1658 ifgr_groups_get(void *ifgrp) 1659 { 1660 union ifgroupreq_union *ifgrup; 1661 1662 ifgrup = ifgrp; 1663 #ifdef COMPAT_FREEBSD32 1664 if (SV_CURPROC_FLAG(SV_ILP32)) 1665 return ((struct ifg_req *)(uintptr_t) 1666 ifgrup->ifgr32.ifgr_ifgru.ifgru_groups); 1667 #endif 1668 return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups); 1669 } 1670 1671 /* 1672 * Stores all groups from an interface in memory pointed to by ifgr. 1673 */ 1674 static int 1675 if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp) 1676 { 1677 int len, error; 1678 struct ifg_list *ifgl; 1679 struct ifg_req ifgrq, *ifgp; 1680 1681 NET_EPOCH_ASSERT(); 1682 1683 if (ifgr->ifgr_len == 0) { 1684 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1685 ifgr->ifgr_len += sizeof(struct ifg_req); 1686 return (0); 1687 } 1688 1689 len = ifgr->ifgr_len; 1690 ifgp = ifgr_groups_get(ifgr); 1691 /* XXX: wire */ 1692 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1693 if (len < sizeof(ifgrq)) 1694 return (EINVAL); 1695 bzero(&ifgrq, sizeof ifgrq); 1696 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, 1697 sizeof(ifgrq.ifgrq_group)); 1698 if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) 1699 return (error); 1700 len -= sizeof(ifgrq); 1701 ifgp++; 1702 } 1703 1704 return (0); 1705 } 1706 1707 /* 1708 * Stores all members of a group in memory pointed to by igfr 1709 */ 1710 static int 1711 if_getgroupmembers(struct ifgroupreq *ifgr) 1712 { 1713 struct ifg_group *ifg; 1714 struct ifg_member *ifgm; 1715 struct ifg_req ifgrq, *ifgp; 1716 int len, error; 1717 1718 IFNET_RLOCK(); 1719 CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) 1720 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1721 break; 1722 if (ifg == NULL) { 1723 IFNET_RUNLOCK(); 1724 return (ENOENT); 1725 } 1726 1727 if (ifgr->ifgr_len == 0) { 1728 CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1729 ifgr->ifgr_len += sizeof(ifgrq); 1730 IFNET_RUNLOCK(); 1731 return (0); 1732 } 1733 1734 len = ifgr->ifgr_len; 1735 ifgp = ifgr_groups_get(ifgr); 1736 CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1737 if (len < sizeof(ifgrq)) { 1738 IFNET_RUNLOCK(); 1739 return (EINVAL); 1740 } 1741 bzero(&ifgrq, sizeof ifgrq); 1742 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, 1743 sizeof(ifgrq.ifgrq_member)); 1744 if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { 1745 IFNET_RUNLOCK(); 1746 return (error); 1747 } 1748 len -= sizeof(ifgrq); 1749 ifgp++; 1750 } 1751 IFNET_RUNLOCK(); 1752 1753 return (0); 1754 } 1755 1756 /* 1757 * Return counter values from counter(9)s stored in ifnet. 1758 */ 1759 uint64_t 1760 if_get_counter_default(struct ifnet *ifp, ift_counter cnt) 1761 { 1762 1763 KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); 1764 1765 return (counter_u64_fetch(ifp->if_counters[cnt])); 1766 } 1767 1768 /* 1769 * Increase an ifnet counter. Usually used for counters shared 1770 * between the stack and a driver, but function supports them all. 1771 */ 1772 void 1773 if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) 1774 { 1775 1776 KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); 1777 1778 counter_u64_add(ifp->if_counters[cnt], inc); 1779 } 1780 1781 /* 1782 * Copy data from ifnet to userland API structure if_data. 1783 */ 1784 void 1785 if_data_copy(struct ifnet *ifp, struct if_data *ifd) 1786 { 1787 1788 ifd->ifi_type = ifp->if_type; 1789 ifd->ifi_physical = 0; 1790 ifd->ifi_addrlen = ifp->if_addrlen; 1791 ifd->ifi_hdrlen = ifp->if_hdrlen; 1792 ifd->ifi_link_state = ifp->if_link_state; 1793 ifd->ifi_vhid = 0; 1794 ifd->ifi_datalen = sizeof(struct if_data); 1795 ifd->ifi_mtu = ifp->if_mtu; 1796 ifd->ifi_metric = ifp->if_metric; 1797 ifd->ifi_baudrate = ifp->if_baudrate; 1798 ifd->ifi_hwassist = ifp->if_hwassist; 1799 ifd->ifi_epoch = ifp->if_epoch; 1800 ifd->ifi_lastchange = ifp->if_lastchange; 1801 1802 ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); 1803 ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); 1804 ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); 1805 ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); 1806 ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); 1807 ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); 1808 ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); 1809 ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); 1810 ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); 1811 ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); 1812 ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); 1813 ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); 1814 } 1815 1816 /* 1817 * Initialization, destruction and refcounting functions for ifaddrs. 1818 */ 1819 struct ifaddr * 1820 ifa_alloc(size_t size, int flags) 1821 { 1822 struct ifaddr *ifa; 1823 1824 KASSERT(size >= sizeof(struct ifaddr), 1825 ("%s: invalid size %zu", __func__, size)); 1826 1827 ifa = malloc(size, M_IFADDR, M_ZERO | flags); 1828 if (ifa == NULL) 1829 return (NULL); 1830 1831 if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) 1832 goto fail; 1833 if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) 1834 goto fail; 1835 if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) 1836 goto fail; 1837 if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) 1838 goto fail; 1839 1840 refcount_init(&ifa->ifa_refcnt, 1); 1841 1842 return (ifa); 1843 1844 fail: 1845 /* free(NULL) is okay */ 1846 counter_u64_free(ifa->ifa_opackets); 1847 counter_u64_free(ifa->ifa_ipackets); 1848 counter_u64_free(ifa->ifa_obytes); 1849 counter_u64_free(ifa->ifa_ibytes); 1850 free(ifa, M_IFADDR); 1851 1852 return (NULL); 1853 } 1854 1855 void 1856 ifa_ref(struct ifaddr *ifa) 1857 { 1858 1859 refcount_acquire(&ifa->ifa_refcnt); 1860 } 1861 1862 static void 1863 ifa_destroy(epoch_context_t ctx) 1864 { 1865 struct ifaddr *ifa; 1866 1867 ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx); 1868 counter_u64_free(ifa->ifa_opackets); 1869 counter_u64_free(ifa->ifa_ipackets); 1870 counter_u64_free(ifa->ifa_obytes); 1871 counter_u64_free(ifa->ifa_ibytes); 1872 free(ifa, M_IFADDR); 1873 } 1874 1875 void 1876 ifa_free(struct ifaddr *ifa) 1877 { 1878 1879 if (refcount_release(&ifa->ifa_refcnt)) 1880 NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx); 1881 } 1882 1883 /* 1884 * XXX: Because sockaddr_dl has deeper structure than the sockaddr 1885 * structs used to represent other address families, it is necessary 1886 * to perform a different comparison. 1887 */ 1888 1889 #define sa_dl_equal(a1, a2) \ 1890 ((((const struct sockaddr_dl *)(a1))->sdl_len == \ 1891 ((const struct sockaddr_dl *)(a2))->sdl_len) && \ 1892 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \ 1893 CLLADDR((const struct sockaddr_dl *)(a2)), \ 1894 ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0)) 1895 1896 /* 1897 * Locate an interface based on a complete address. 1898 */ 1899 /*ARGSUSED*/ 1900 struct ifaddr * 1901 ifa_ifwithaddr(const struct sockaddr *addr) 1902 { 1903 struct ifnet *ifp; 1904 struct ifaddr *ifa; 1905 1906 NET_EPOCH_ASSERT(); 1907 1908 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1909 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1910 if (ifa->ifa_addr->sa_family != addr->sa_family) 1911 continue; 1912 if (sa_equal(addr, ifa->ifa_addr)) { 1913 goto done; 1914 } 1915 /* IP6 doesn't have broadcast */ 1916 if ((ifp->if_flags & IFF_BROADCAST) && 1917 ifa->ifa_broadaddr && 1918 ifa->ifa_broadaddr->sa_len != 0 && 1919 sa_equal(ifa->ifa_broadaddr, addr)) { 1920 goto done; 1921 } 1922 } 1923 } 1924 ifa = NULL; 1925 done: 1926 return (ifa); 1927 } 1928 1929 int 1930 ifa_ifwithaddr_check(const struct sockaddr *addr) 1931 { 1932 struct epoch_tracker et; 1933 int rc; 1934 1935 NET_EPOCH_ENTER(et); 1936 rc = (ifa_ifwithaddr(addr) != NULL); 1937 NET_EPOCH_EXIT(et); 1938 return (rc); 1939 } 1940 1941 /* 1942 * Locate an interface based on the broadcast address. 1943 */ 1944 /* ARGSUSED */ 1945 struct ifaddr * 1946 ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) 1947 { 1948 struct ifnet *ifp; 1949 struct ifaddr *ifa; 1950 1951 NET_EPOCH_ASSERT(); 1952 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1953 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) 1954 continue; 1955 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1956 if (ifa->ifa_addr->sa_family != addr->sa_family) 1957 continue; 1958 if ((ifp->if_flags & IFF_BROADCAST) && 1959 ifa->ifa_broadaddr && 1960 ifa->ifa_broadaddr->sa_len != 0 && 1961 sa_equal(ifa->ifa_broadaddr, addr)) { 1962 goto done; 1963 } 1964 } 1965 } 1966 ifa = NULL; 1967 done: 1968 return (ifa); 1969 } 1970 1971 /* 1972 * Locate the point to point interface with a given destination address. 1973 */ 1974 /*ARGSUSED*/ 1975 struct ifaddr * 1976 ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) 1977 { 1978 struct ifnet *ifp; 1979 struct ifaddr *ifa; 1980 1981 NET_EPOCH_ASSERT(); 1982 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1983 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) 1984 continue; 1985 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) 1986 continue; 1987 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1988 if (ifa->ifa_addr->sa_family != addr->sa_family) 1989 continue; 1990 if (ifa->ifa_dstaddr != NULL && 1991 sa_equal(addr, ifa->ifa_dstaddr)) { 1992 goto done; 1993 } 1994 } 1995 } 1996 ifa = NULL; 1997 done: 1998 return (ifa); 1999 } 2000 2001 /* 2002 * Find an interface on a specific network. If many, choice 2003 * is most specific found. 2004 */ 2005 struct ifaddr * 2006 ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) 2007 { 2008 struct ifnet *ifp; 2009 struct ifaddr *ifa; 2010 struct ifaddr *ifa_maybe = NULL; 2011 u_int af = addr->sa_family; 2012 const char *addr_data = addr->sa_data, *cplim; 2013 2014 NET_EPOCH_ASSERT(); 2015 /* 2016 * AF_LINK addresses can be looked up directly by their index number, 2017 * so do that if we can. 2018 */ 2019 if (af == AF_LINK) { 2020 const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr; 2021 if (sdl->sdl_index && sdl->sdl_index <= V_if_index) 2022 return (ifaddr_byindex(sdl->sdl_index)); 2023 } 2024 2025 /* 2026 * Scan though each interface, looking for ones that have addresses 2027 * in this address family and the requested fib. 2028 */ 2029 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 2030 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) 2031 continue; 2032 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 2033 const char *cp, *cp2, *cp3; 2034 2035 if (ifa->ifa_addr->sa_family != af) 2036 next: continue; 2037 if (af == AF_INET && 2038 ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { 2039 /* 2040 * This is a bit broken as it doesn't 2041 * take into account that the remote end may 2042 * be a single node in the network we are 2043 * looking for. 2044 * The trouble is that we don't know the 2045 * netmask for the remote end. 2046 */ 2047 if (ifa->ifa_dstaddr != NULL && 2048 sa_equal(addr, ifa->ifa_dstaddr)) { 2049 goto done; 2050 } 2051 } else { 2052 /* 2053 * Scan all the bits in the ifa's address. 2054 * If a bit dissagrees with what we are 2055 * looking for, mask it with the netmask 2056 * to see if it really matters. 2057 * (A byte at a time) 2058 */ 2059 if (ifa->ifa_netmask == 0) 2060 continue; 2061 cp = addr_data; 2062 cp2 = ifa->ifa_addr->sa_data; 2063 cp3 = ifa->ifa_netmask->sa_data; 2064 cplim = ifa->ifa_netmask->sa_len 2065 + (char *)ifa->ifa_netmask; 2066 while (cp3 < cplim) 2067 if ((*cp++ ^ *cp2++) & *cp3++) 2068 goto next; /* next address! */ 2069 /* 2070 * If the netmask of what we just found 2071 * is more specific than what we had before 2072 * (if we had one), or if the virtual status 2073 * of new prefix is better than of the old one, 2074 * then remember the new one before continuing 2075 * to search for an even better one. 2076 */ 2077 if (ifa_maybe == NULL || 2078 ifa_preferred(ifa_maybe, ifa) || 2079 rn_refines((caddr_t)ifa->ifa_netmask, 2080 (caddr_t)ifa_maybe->ifa_netmask)) { 2081 ifa_maybe = ifa; 2082 } 2083 } 2084 } 2085 } 2086 ifa = ifa_maybe; 2087 ifa_maybe = NULL; 2088 done: 2089 return (ifa); 2090 } 2091 2092 /* 2093 * Find an interface address specific to an interface best matching 2094 * a given address. 2095 */ 2096 struct ifaddr * 2097 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) 2098 { 2099 struct ifaddr *ifa; 2100 const char *cp, *cp2, *cp3; 2101 char *cplim; 2102 struct ifaddr *ifa_maybe = NULL; 2103 u_int af = addr->sa_family; 2104 2105 if (af >= AF_MAX) 2106 return (NULL); 2107 2108 NET_EPOCH_ASSERT(); 2109 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 2110 if (ifa->ifa_addr->sa_family != af) 2111 continue; 2112 if (ifa_maybe == NULL) 2113 ifa_maybe = ifa; 2114 if (ifa->ifa_netmask == 0) { 2115 if (sa_equal(addr, ifa->ifa_addr) || 2116 (ifa->ifa_dstaddr && 2117 sa_equal(addr, ifa->ifa_dstaddr))) 2118 goto done; 2119 continue; 2120 } 2121 if (ifp->if_flags & IFF_POINTOPOINT) { 2122 if (sa_equal(addr, ifa->ifa_dstaddr)) 2123 goto done; 2124 } else { 2125 cp = addr->sa_data; 2126 cp2 = ifa->ifa_addr->sa_data; 2127 cp3 = ifa->ifa_netmask->sa_data; 2128 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 2129 for (; cp3 < cplim; cp3++) 2130 if ((*cp++ ^ *cp2++) & *cp3) 2131 break; 2132 if (cp3 == cplim) 2133 goto done; 2134 } 2135 } 2136 ifa = ifa_maybe; 2137 done: 2138 return (ifa); 2139 } 2140 2141 /* 2142 * See whether new ifa is better than current one: 2143 * 1) A non-virtual one is preferred over virtual. 2144 * 2) A virtual in master state preferred over any other state. 2145 * 2146 * Used in several address selecting functions. 2147 */ 2148 int 2149 ifa_preferred(struct ifaddr *cur, struct ifaddr *next) 2150 { 2151 2152 return (cur->ifa_carp && (!next->ifa_carp || 2153 ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); 2154 } 2155 2156 struct sockaddr_dl * 2157 link_alloc_sdl(size_t size, int flags) 2158 { 2159 2160 return (malloc(size, M_TEMP, flags)); 2161 } 2162 2163 void 2164 link_free_sdl(struct sockaddr *sa) 2165 { 2166 free(sa, M_TEMP); 2167 } 2168 2169 /* 2170 * Fills in given sdl with interface basic info. 2171 * Returns pointer to filled sdl. 2172 */ 2173 struct sockaddr_dl * 2174 link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) 2175 { 2176 struct sockaddr_dl *sdl; 2177 2178 sdl = (struct sockaddr_dl *)paddr; 2179 memset(sdl, 0, sizeof(struct sockaddr_dl)); 2180 sdl->sdl_len = sizeof(struct sockaddr_dl); 2181 sdl->sdl_family = AF_LINK; 2182 sdl->sdl_index = ifp->if_index; 2183 sdl->sdl_type = iftype; 2184 2185 return (sdl); 2186 } 2187 2188 /* 2189 * Mark an interface down and notify protocols of 2190 * the transition. 2191 */ 2192 static void 2193 if_unroute(struct ifnet *ifp, int flag, int fam) 2194 { 2195 struct ifaddr *ifa; 2196 2197 KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); 2198 2199 ifp->if_flags &= ~flag; 2200 getmicrotime(&ifp->if_lastchange); 2201 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 2202 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 2203 pfctlinput(PRC_IFDOWN, ifa->ifa_addr); 2204 ifp->if_qflush(ifp); 2205 2206 if (ifp->if_carp) 2207 (*carp_linkstate_p)(ifp); 2208 rt_ifmsg(ifp); 2209 } 2210 2211 /* 2212 * Mark an interface up and notify protocols of 2213 * the transition. 2214 */ 2215 static void 2216 if_route(struct ifnet *ifp, int flag, int fam) 2217 { 2218 struct ifaddr *ifa; 2219 2220 KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); 2221 2222 ifp->if_flags |= flag; 2223 getmicrotime(&ifp->if_lastchange); 2224 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 2225 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 2226 pfctlinput(PRC_IFUP, ifa->ifa_addr); 2227 if (ifp->if_carp) 2228 (*carp_linkstate_p)(ifp); 2229 rt_ifmsg(ifp); 2230 #ifdef INET6 2231 in6_if_up(ifp); 2232 #endif 2233 } 2234 2235 void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ 2236 void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ 2237 struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); 2238 struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); 2239 int (*vlan_tag_p)(struct ifnet *, uint16_t *); 2240 int (*vlan_pcp_p)(struct ifnet *, uint16_t *); 2241 int (*vlan_setcookie_p)(struct ifnet *, void *); 2242 void *(*vlan_cookie_p)(struct ifnet *); 2243 2244 /* 2245 * Handle a change in the interface link state. To avoid LORs 2246 * between driver lock and upper layer locks, as well as possible 2247 * recursions, we post event to taskqueue, and all job 2248 * is done in static do_link_state_change(). 2249 */ 2250 void 2251 if_link_state_change(struct ifnet *ifp, int link_state) 2252 { 2253 /* Return if state hasn't changed. */ 2254 if (ifp->if_link_state == link_state) 2255 return; 2256 2257 ifp->if_link_state = link_state; 2258 2259 /* XXXGL: reference ifp? */ 2260 taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); 2261 } 2262 2263 static void 2264 do_link_state_change(void *arg, int pending) 2265 { 2266 struct ifnet *ifp; 2267 int link_state; 2268 2269 ifp = arg; 2270 link_state = ifp->if_link_state; 2271 2272 CURVNET_SET(ifp->if_vnet); 2273 rt_ifmsg(ifp); 2274 if (ifp->if_vlantrunk != NULL) 2275 (*vlan_link_state_p)(ifp); 2276 2277 if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && 2278 ifp->if_l2com != NULL) 2279 (*ng_ether_link_state_p)(ifp, link_state); 2280 if (ifp->if_carp) 2281 (*carp_linkstate_p)(ifp); 2282 if (ifp->if_bridge) 2283 ifp->if_bridge_linkstate(ifp); 2284 if (ifp->if_lagg) 2285 (*lagg_linkstate_p)(ifp, link_state); 2286 2287 if (IS_DEFAULT_VNET(curvnet)) 2288 devctl_notify("IFNET", ifp->if_xname, 2289 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", 2290 NULL); 2291 if (pending > 1) 2292 if_printf(ifp, "%d link states coalesced\n", pending); 2293 if (log_link_state_change) 2294 if_printf(ifp, "link state changed to %s\n", 2295 (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); 2296 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 2297 CURVNET_RESTORE(); 2298 } 2299 2300 /* 2301 * Mark an interface down and notify protocols of 2302 * the transition. 2303 */ 2304 void 2305 if_down(struct ifnet *ifp) 2306 { 2307 2308 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 2309 if_unroute(ifp, IFF_UP, AF_UNSPEC); 2310 } 2311 2312 /* 2313 * Mark an interface up and notify protocols of 2314 * the transition. 2315 */ 2316 void 2317 if_up(struct ifnet *ifp) 2318 { 2319 2320 if_route(ifp, IFF_UP, AF_UNSPEC); 2321 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 2322 } 2323 2324 /* 2325 * Flush an interface queue. 2326 */ 2327 void 2328 if_qflush(struct ifnet *ifp) 2329 { 2330 struct mbuf *m, *n; 2331 struct ifaltq *ifq; 2332 2333 ifq = &ifp->if_snd; 2334 IFQ_LOCK(ifq); 2335 #ifdef ALTQ 2336 if (ALTQ_IS_ENABLED(ifq)) 2337 ALTQ_PURGE(ifq); 2338 #endif 2339 n = ifq->ifq_head; 2340 while ((m = n) != NULL) { 2341 n = m->m_nextpkt; 2342 m_freem(m); 2343 } 2344 ifq->ifq_head = 0; 2345 ifq->ifq_tail = 0; 2346 ifq->ifq_len = 0; 2347 IFQ_UNLOCK(ifq); 2348 } 2349 2350 /* 2351 * Map interface name to interface structure pointer, with or without 2352 * returning a reference. 2353 */ 2354 struct ifnet * 2355 ifunit_ref(const char *name) 2356 { 2357 struct epoch_tracker et; 2358 struct ifnet *ifp; 2359 2360 NET_EPOCH_ENTER(et); 2361 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 2362 if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && 2363 !(ifp->if_flags & IFF_DYING)) 2364 break; 2365 } 2366 if (ifp != NULL) 2367 if_ref(ifp); 2368 NET_EPOCH_EXIT(et); 2369 return (ifp); 2370 } 2371 2372 struct ifnet * 2373 ifunit(const char *name) 2374 { 2375 struct epoch_tracker et; 2376 struct ifnet *ifp; 2377 2378 NET_EPOCH_ENTER(et); 2379 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 2380 if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) 2381 break; 2382 } 2383 NET_EPOCH_EXIT(et); 2384 return (ifp); 2385 } 2386 2387 void * 2388 ifr_buffer_get_buffer(void *data) 2389 { 2390 union ifreq_union *ifrup; 2391 2392 ifrup = data; 2393 #ifdef COMPAT_FREEBSD32 2394 if (SV_CURPROC_FLAG(SV_ILP32)) 2395 return ((void *)(uintptr_t) 2396 ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); 2397 #endif 2398 return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); 2399 } 2400 2401 static void 2402 ifr_buffer_set_buffer_null(void *data) 2403 { 2404 union ifreq_union *ifrup; 2405 2406 ifrup = data; 2407 #ifdef COMPAT_FREEBSD32 2408 if (SV_CURPROC_FLAG(SV_ILP32)) 2409 ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; 2410 else 2411 #endif 2412 ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; 2413 } 2414 2415 size_t 2416 ifr_buffer_get_length(void *data) 2417 { 2418 union ifreq_union *ifrup; 2419 2420 ifrup = data; 2421 #ifdef COMPAT_FREEBSD32 2422 if (SV_CURPROC_FLAG(SV_ILP32)) 2423 return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); 2424 #endif 2425 return (ifrup->ifr.ifr_ifru.ifru_buffer.length); 2426 } 2427 2428 static void 2429 ifr_buffer_set_length(void *data, size_t len) 2430 { 2431 union ifreq_union *ifrup; 2432 2433 ifrup = data; 2434 #ifdef COMPAT_FREEBSD32 2435 if (SV_CURPROC_FLAG(SV_ILP32)) 2436 ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; 2437 else 2438 #endif 2439 ifrup->ifr.ifr_ifru.ifru_buffer.length = len; 2440 } 2441 2442 void * 2443 ifr_data_get_ptr(void *ifrp) 2444 { 2445 union ifreq_union *ifrup; 2446 2447 ifrup = ifrp; 2448 #ifdef COMPAT_FREEBSD32 2449 if (SV_CURPROC_FLAG(SV_ILP32)) 2450 return ((void *)(uintptr_t) 2451 ifrup->ifr32.ifr_ifru.ifru_data); 2452 #endif 2453 return (ifrup->ifr.ifr_ifru.ifru_data); 2454 } 2455 2456 /* 2457 * Hardware specific interface ioctls. 2458 */ 2459 int 2460 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) 2461 { 2462 struct ifreq *ifr; 2463 int error = 0, do_ifup = 0; 2464 int new_flags, temp_flags; 2465 size_t namelen, onamelen; 2466 size_t descrlen; 2467 char *descrbuf, *odescrbuf; 2468 char new_name[IFNAMSIZ]; 2469 struct ifaddr *ifa; 2470 struct sockaddr_dl *sdl; 2471 2472 ifr = (struct ifreq *)data; 2473 switch (cmd) { 2474 case SIOCGIFINDEX: 2475 ifr->ifr_index = ifp->if_index; 2476 break; 2477 2478 case SIOCGIFFLAGS: 2479 temp_flags = ifp->if_flags | ifp->if_drv_flags; 2480 ifr->ifr_flags = temp_flags & 0xffff; 2481 ifr->ifr_flagshigh = temp_flags >> 16; 2482 break; 2483 2484 case SIOCGIFCAP: 2485 ifr->ifr_reqcap = ifp->if_capabilities; 2486 ifr->ifr_curcap = ifp->if_capenable; 2487 break; 2488 2489 case SIOCGIFDATA: 2490 { 2491 struct if_data ifd; 2492 2493 /* Ensure uninitialised padding is not leaked. */ 2494 memset(&ifd, 0, sizeof(ifd)); 2495 2496 if_data_copy(ifp, &ifd); 2497 error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd)); 2498 break; 2499 } 2500 2501 #ifdef MAC 2502 case SIOCGIFMAC: 2503 error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); 2504 break; 2505 #endif 2506 2507 case SIOCGIFMETRIC: 2508 ifr->ifr_metric = ifp->if_metric; 2509 break; 2510 2511 case SIOCGIFMTU: 2512 ifr->ifr_mtu = ifp->if_mtu; 2513 break; 2514 2515 case SIOCGIFPHYS: 2516 /* XXXGL: did this ever worked? */ 2517 ifr->ifr_phys = 0; 2518 break; 2519 2520 case SIOCGIFDESCR: 2521 error = 0; 2522 sx_slock(&ifdescr_sx); 2523 if (ifp->if_description == NULL) 2524 error = ENOMSG; 2525 else { 2526 /* space for terminating nul */ 2527 descrlen = strlen(ifp->if_description) + 1; 2528 if (ifr_buffer_get_length(ifr) < descrlen) 2529 ifr_buffer_set_buffer_null(ifr); 2530 else 2531 error = copyout(ifp->if_description, 2532 ifr_buffer_get_buffer(ifr), descrlen); 2533 ifr_buffer_set_length(ifr, descrlen); 2534 } 2535 sx_sunlock(&ifdescr_sx); 2536 break; 2537 2538 case SIOCSIFDESCR: 2539 error = priv_check(td, PRIV_NET_SETIFDESCR); 2540 if (error) 2541 return (error); 2542 2543 /* 2544 * Copy only (length-1) bytes to make sure that 2545 * if_description is always nul terminated. The 2546 * length parameter is supposed to count the 2547 * terminating nul in. 2548 */ 2549 if (ifr_buffer_get_length(ifr) > ifdescr_maxlen) 2550 return (ENAMETOOLONG); 2551 else if (ifr_buffer_get_length(ifr) == 0) 2552 descrbuf = NULL; 2553 else { 2554 descrbuf = malloc(ifr_buffer_get_length(ifr), 2555 M_IFDESCR, M_WAITOK | M_ZERO); 2556 error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, 2557 ifr_buffer_get_length(ifr) - 1); 2558 if (error) { 2559 free(descrbuf, M_IFDESCR); 2560 break; 2561 } 2562 } 2563 2564 sx_xlock(&ifdescr_sx); 2565 odescrbuf = ifp->if_description; 2566 ifp->if_description = descrbuf; 2567 sx_xunlock(&ifdescr_sx); 2568 2569 getmicrotime(&ifp->if_lastchange); 2570 free(odescrbuf, M_IFDESCR); 2571 break; 2572 2573 case SIOCGIFFIB: 2574 ifr->ifr_fib = ifp->if_fib; 2575 break; 2576 2577 case SIOCSIFFIB: 2578 error = priv_check(td, PRIV_NET_SETIFFIB); 2579 if (error) 2580 return (error); 2581 if (ifr->ifr_fib >= rt_numfibs) 2582 return (EINVAL); 2583 2584 ifp->if_fib = ifr->ifr_fib; 2585 break; 2586 2587 case SIOCSIFFLAGS: 2588 error = priv_check(td, PRIV_NET_SETIFFLAGS); 2589 if (error) 2590 return (error); 2591 /* 2592 * Currently, no driver owned flags pass the IFF_CANTCHANGE 2593 * check, so we don't need special handling here yet. 2594 */ 2595 new_flags = (ifr->ifr_flags & 0xffff) | 2596 (ifr->ifr_flagshigh << 16); 2597 if (ifp->if_flags & IFF_UP && 2598 (new_flags & IFF_UP) == 0) { 2599 if_down(ifp); 2600 } else if (new_flags & IFF_UP && 2601 (ifp->if_flags & IFF_UP) == 0) { 2602 do_ifup = 1; 2603 } 2604 /* See if permanently promiscuous mode bit is about to flip */ 2605 if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { 2606 if (new_flags & IFF_PPROMISC) 2607 ifp->if_flags |= IFF_PROMISC; 2608 else if (ifp->if_pcount == 0) 2609 ifp->if_flags &= ~IFF_PROMISC; 2610 if (log_promisc_mode_change) 2611 if_printf(ifp, "permanently promiscuous mode %s\n", 2612 ((new_flags & IFF_PPROMISC) ? 2613 "enabled" : "disabled")); 2614 } 2615 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2616 (new_flags &~ IFF_CANTCHANGE); 2617 if (ifp->if_ioctl) { 2618 (void) (*ifp->if_ioctl)(ifp, cmd, data); 2619 } 2620 if (do_ifup) 2621 if_up(ifp); 2622 getmicrotime(&ifp->if_lastchange); 2623 break; 2624 2625 case SIOCSIFCAP: 2626 error = priv_check(td, PRIV_NET_SETIFCAP); 2627 if (error) 2628 return (error); 2629 if (ifp->if_ioctl == NULL) 2630 return (EOPNOTSUPP); 2631 if (ifr->ifr_reqcap & ~ifp->if_capabilities) 2632 return (EINVAL); 2633 error = (*ifp->if_ioctl)(ifp, cmd, data); 2634 if (error == 0) 2635 getmicrotime(&ifp->if_lastchange); 2636 break; 2637 2638 #ifdef MAC 2639 case SIOCSIFMAC: 2640 error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); 2641 break; 2642 #endif 2643 2644 case SIOCSIFNAME: 2645 error = priv_check(td, PRIV_NET_SETIFNAME); 2646 if (error) 2647 return (error); 2648 error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, 2649 NULL); 2650 if (error != 0) 2651 return (error); 2652 if (new_name[0] == '\0') 2653 return (EINVAL); 2654 if (new_name[IFNAMSIZ-1] != '\0') { 2655 new_name[IFNAMSIZ-1] = '\0'; 2656 if (strlen(new_name) == IFNAMSIZ-1) 2657 return (EINVAL); 2658 } 2659 if (strcmp(new_name, ifp->if_xname) == 0) 2660 break; 2661 if (ifunit(new_name) != NULL) 2662 return (EEXIST); 2663 2664 /* 2665 * XXX: Locking. Nothing else seems to lock if_flags, 2666 * and there are numerous other races with the 2667 * ifunit() checks not being atomic with namespace 2668 * changes (renames, vmoves, if_attach, etc). 2669 */ 2670 ifp->if_flags |= IFF_RENAMING; 2671 2672 /* Announce the departure of the interface. */ 2673 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 2674 EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); 2675 2676 if_printf(ifp, "changing name to '%s'\n", new_name); 2677 2678 IF_ADDR_WLOCK(ifp); 2679 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 2680 ifa = ifp->if_addr; 2681 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 2682 namelen = strlen(new_name); 2683 onamelen = sdl->sdl_nlen; 2684 /* 2685 * Move the address if needed. This is safe because we 2686 * allocate space for a name of length IFNAMSIZ when we 2687 * create this in if_attach(). 2688 */ 2689 if (namelen != onamelen) { 2690 bcopy(sdl->sdl_data + onamelen, 2691 sdl->sdl_data + namelen, sdl->sdl_alen); 2692 } 2693 bcopy(new_name, sdl->sdl_data, namelen); 2694 sdl->sdl_nlen = namelen; 2695 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 2696 bzero(sdl->sdl_data, onamelen); 2697 while (namelen != 0) 2698 sdl->sdl_data[--namelen] = 0xff; 2699 IF_ADDR_WUNLOCK(ifp); 2700 2701 EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); 2702 /* Announce the return of the interface. */ 2703 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 2704 2705 ifp->if_flags &= ~IFF_RENAMING; 2706 break; 2707 2708 #ifdef VIMAGE 2709 case SIOCSIFVNET: 2710 error = priv_check(td, PRIV_NET_SETIFVNET); 2711 if (error) 2712 return (error); 2713 error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); 2714 break; 2715 #endif 2716 2717 case SIOCSIFMETRIC: 2718 error = priv_check(td, PRIV_NET_SETIFMETRIC); 2719 if (error) 2720 return (error); 2721 ifp->if_metric = ifr->ifr_metric; 2722 getmicrotime(&ifp->if_lastchange); 2723 break; 2724 2725 case SIOCSIFPHYS: 2726 error = priv_check(td, PRIV_NET_SETIFPHYS); 2727 if (error) 2728 return (error); 2729 if (ifp->if_ioctl == NULL) 2730 return (EOPNOTSUPP); 2731 error = (*ifp->if_ioctl)(ifp, cmd, data); 2732 if (error == 0) 2733 getmicrotime(&ifp->if_lastchange); 2734 break; 2735 2736 case SIOCSIFMTU: 2737 { 2738 u_long oldmtu = ifp->if_mtu; 2739 2740 error = priv_check(td, PRIV_NET_SETIFMTU); 2741 if (error) 2742 return (error); 2743 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) 2744 return (EINVAL); 2745 if (ifp->if_ioctl == NULL) 2746 return (EOPNOTSUPP); 2747 error = (*ifp->if_ioctl)(ifp, cmd, data); 2748 if (error == 0) { 2749 getmicrotime(&ifp->if_lastchange); 2750 rt_ifmsg(ifp); 2751 #ifdef INET 2752 DEBUGNET_NOTIFY_MTU(ifp); 2753 #endif 2754 } 2755 /* 2756 * If the link MTU changed, do network layer specific procedure. 2757 */ 2758 if (ifp->if_mtu != oldmtu) { 2759 #ifdef INET6 2760 nd6_setmtu(ifp); 2761 #endif 2762 rt_updatemtu(ifp); 2763 } 2764 break; 2765 } 2766 2767 case SIOCADDMULTI: 2768 case SIOCDELMULTI: 2769 if (cmd == SIOCADDMULTI) 2770 error = priv_check(td, PRIV_NET_ADDMULTI); 2771 else 2772 error = priv_check(td, PRIV_NET_DELMULTI); 2773 if (error) 2774 return (error); 2775 2776 /* Don't allow group membership on non-multicast interfaces. */ 2777 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2778 return (EOPNOTSUPP); 2779 2780 /* Don't let users screw up protocols' entries. */ 2781 if (ifr->ifr_addr.sa_family != AF_LINK) 2782 return (EINVAL); 2783 2784 if (cmd == SIOCADDMULTI) { 2785 struct epoch_tracker et; 2786 struct ifmultiaddr *ifma; 2787 2788 /* 2789 * Userland is only permitted to join groups once 2790 * via the if_addmulti() KPI, because it cannot hold 2791 * struct ifmultiaddr * between calls. It may also 2792 * lose a race while we check if the membership 2793 * already exists. 2794 */ 2795 NET_EPOCH_ENTER(et); 2796 ifma = if_findmulti(ifp, &ifr->ifr_addr); 2797 NET_EPOCH_EXIT(et); 2798 if (ifma != NULL) 2799 error = EADDRINUSE; 2800 else 2801 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2802 } else { 2803 error = if_delmulti(ifp, &ifr->ifr_addr); 2804 } 2805 if (error == 0) 2806 getmicrotime(&ifp->if_lastchange); 2807 break; 2808 2809 case SIOCSIFPHYADDR: 2810 case SIOCDIFPHYADDR: 2811 #ifdef INET6 2812 case SIOCSIFPHYADDR_IN6: 2813 #endif 2814 case SIOCSIFMEDIA: 2815 case SIOCSIFGENERIC: 2816 error = priv_check(td, PRIV_NET_HWIOCTL); 2817 if (error) 2818 return (error); 2819 if (ifp->if_ioctl == NULL) 2820 return (EOPNOTSUPP); 2821 error = (*ifp->if_ioctl)(ifp, cmd, data); 2822 if (error == 0) 2823 getmicrotime(&ifp->if_lastchange); 2824 break; 2825 2826 case SIOCGIFSTATUS: 2827 case SIOCGIFPSRCADDR: 2828 case SIOCGIFPDSTADDR: 2829 case SIOCGIFMEDIA: 2830 case SIOCGIFXMEDIA: 2831 case SIOCGIFGENERIC: 2832 case SIOCGIFRSSKEY: 2833 case SIOCGIFRSSHASH: 2834 case SIOCGIFDOWNREASON: 2835 if (ifp->if_ioctl == NULL) 2836 return (EOPNOTSUPP); 2837 error = (*ifp->if_ioctl)(ifp, cmd, data); 2838 break; 2839 2840 case SIOCSIFLLADDR: 2841 error = priv_check(td, PRIV_NET_SETLLADDR); 2842 if (error) 2843 return (error); 2844 error = if_setlladdr(ifp, 2845 ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); 2846 break; 2847 2848 case SIOCGHWADDR: 2849 error = if_gethwaddr(ifp, ifr); 2850 break; 2851 2852 case CASE_IOC_IFGROUPREQ(SIOCAIFGROUP): 2853 error = priv_check(td, PRIV_NET_ADDIFGROUP); 2854 if (error) 2855 return (error); 2856 if ((error = if_addgroup(ifp, 2857 ifgr_group_get((struct ifgroupreq *)data)))) 2858 return (error); 2859 break; 2860 2861 case CASE_IOC_IFGROUPREQ(SIOCGIFGROUP): 2862 { 2863 struct epoch_tracker et; 2864 2865 NET_EPOCH_ENTER(et); 2866 error = if_getgroup((struct ifgroupreq *)data, ifp); 2867 NET_EPOCH_EXIT(et); 2868 break; 2869 } 2870 2871 case CASE_IOC_IFGROUPREQ(SIOCDIFGROUP): 2872 error = priv_check(td, PRIV_NET_DELIFGROUP); 2873 if (error) 2874 return (error); 2875 if ((error = if_delgroup(ifp, 2876 ifgr_group_get((struct ifgroupreq *)data)))) 2877 return (error); 2878 break; 2879 2880 default: 2881 error = ENOIOCTL; 2882 break; 2883 } 2884 return (error); 2885 } 2886 2887 #ifdef COMPAT_FREEBSD32 2888 struct ifconf32 { 2889 int32_t ifc_len; 2890 union { 2891 uint32_t ifcu_buf; 2892 uint32_t ifcu_req; 2893 } ifc_ifcu; 2894 }; 2895 #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) 2896 #endif 2897 2898 #ifdef COMPAT_FREEBSD32 2899 static void 2900 ifmr_init(struct ifmediareq *ifmr, caddr_t data) 2901 { 2902 struct ifmediareq32 *ifmr32; 2903 2904 ifmr32 = (struct ifmediareq32 *)data; 2905 memcpy(ifmr->ifm_name, ifmr32->ifm_name, 2906 sizeof(ifmr->ifm_name)); 2907 ifmr->ifm_current = ifmr32->ifm_current; 2908 ifmr->ifm_mask = ifmr32->ifm_mask; 2909 ifmr->ifm_status = ifmr32->ifm_status; 2910 ifmr->ifm_active = ifmr32->ifm_active; 2911 ifmr->ifm_count = ifmr32->ifm_count; 2912 ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist; 2913 } 2914 2915 static void 2916 ifmr_update(const struct ifmediareq *ifmr, caddr_t data) 2917 { 2918 struct ifmediareq32 *ifmr32; 2919 2920 ifmr32 = (struct ifmediareq32 *)data; 2921 ifmr32->ifm_current = ifmr->ifm_current; 2922 ifmr32->ifm_mask = ifmr->ifm_mask; 2923 ifmr32->ifm_status = ifmr->ifm_status; 2924 ifmr32->ifm_active = ifmr->ifm_active; 2925 ifmr32->ifm_count = ifmr->ifm_count; 2926 } 2927 #endif 2928 2929 /* 2930 * Interface ioctls. 2931 */ 2932 int 2933 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) 2934 { 2935 #ifdef COMPAT_FREEBSD32 2936 caddr_t saved_data = NULL; 2937 struct ifmediareq ifmr; 2938 struct ifmediareq *ifmrp = NULL; 2939 #endif 2940 struct ifnet *ifp; 2941 struct ifreq *ifr; 2942 int error; 2943 int oif_flags; 2944 #ifdef VIMAGE 2945 bool shutdown; 2946 #endif 2947 2948 CURVNET_SET(so->so_vnet); 2949 #ifdef VIMAGE 2950 /* Make sure the VNET is stable. */ 2951 shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet); 2952 if (shutdown) { 2953 CURVNET_RESTORE(); 2954 return (EBUSY); 2955 } 2956 #endif 2957 2958 switch (cmd) { 2959 case SIOCGIFCONF: 2960 error = ifconf(cmd, data); 2961 goto out_noref; 2962 2963 #ifdef COMPAT_FREEBSD32 2964 case SIOCGIFCONF32: 2965 { 2966 struct ifconf32 *ifc32; 2967 struct ifconf ifc; 2968 2969 ifc32 = (struct ifconf32 *)data; 2970 ifc.ifc_len = ifc32->ifc_len; 2971 ifc.ifc_buf = PTRIN(ifc32->ifc_buf); 2972 2973 error = ifconf(SIOCGIFCONF, (void *)&ifc); 2974 if (error == 0) 2975 ifc32->ifc_len = ifc.ifc_len; 2976 goto out_noref; 2977 } 2978 #endif 2979 } 2980 2981 #ifdef COMPAT_FREEBSD32 2982 switch (cmd) { 2983 case SIOCGIFMEDIA32: 2984 case SIOCGIFXMEDIA32: 2985 ifmrp = &ifmr; 2986 ifmr_init(ifmrp, data); 2987 cmd = _IOC_NEWTYPE(cmd, struct ifmediareq); 2988 saved_data = data; 2989 data = (caddr_t)ifmrp; 2990 } 2991 #endif 2992 2993 ifr = (struct ifreq *)data; 2994 switch (cmd) { 2995 #ifdef VIMAGE 2996 case SIOCSIFRVNET: 2997 error = priv_check(td, PRIV_NET_SETIFVNET); 2998 if (error == 0) 2999 error = if_vmove_reclaim(td, ifr->ifr_name, 3000 ifr->ifr_jid); 3001 goto out_noref; 3002 #endif 3003 case SIOCIFCREATE: 3004 case SIOCIFCREATE2: 3005 error = priv_check(td, PRIV_NET_IFCREATE); 3006 if (error == 0) 3007 error = if_clone_create(ifr->ifr_name, 3008 sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? 3009 ifr_data_get_ptr(ifr) : NULL); 3010 goto out_noref; 3011 case SIOCIFDESTROY: 3012 error = priv_check(td, PRIV_NET_IFDESTROY); 3013 if (error == 0) 3014 error = if_clone_destroy(ifr->ifr_name); 3015 goto out_noref; 3016 3017 case SIOCIFGCLONERS: 3018 error = if_clone_list((struct if_clonereq *)data); 3019 goto out_noref; 3020 3021 case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB): 3022 error = if_getgroupmembers((struct ifgroupreq *)data); 3023 goto out_noref; 3024 3025 #if defined(INET) || defined(INET6) 3026 case SIOCSVH: 3027 case SIOCGVH: 3028 if (carp_ioctl_p == NULL) 3029 error = EPROTONOSUPPORT; 3030 else 3031 error = (*carp_ioctl_p)(ifr, cmd, td); 3032 goto out_noref; 3033 #endif 3034 } 3035 3036 ifp = ifunit_ref(ifr->ifr_name); 3037 if (ifp == NULL) { 3038 error = ENXIO; 3039 goto out_noref; 3040 } 3041 3042 error = ifhwioctl(cmd, ifp, data, td); 3043 if (error != ENOIOCTL) 3044 goto out_ref; 3045 3046 oif_flags = ifp->if_flags; 3047 if (so->so_proto == NULL) { 3048 error = EOPNOTSUPP; 3049 goto out_ref; 3050 } 3051 3052 /* 3053 * Pass the request on to the socket control method, and if the 3054 * latter returns EOPNOTSUPP, directly to the interface. 3055 * 3056 * Make an exception for the legacy SIOCSIF* requests. Drivers 3057 * trust SIOCSIFADDR et al to come from an already privileged 3058 * layer, and do not perform any credentials checks or input 3059 * validation. 3060 */ 3061 error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, 3062 ifp, td)); 3063 if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && 3064 cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && 3065 cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) 3066 error = (*ifp->if_ioctl)(ifp, cmd, data); 3067 3068 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 3069 #ifdef INET6 3070 if (ifp->if_flags & IFF_UP) 3071 in6_if_up(ifp); 3072 #endif 3073 } 3074 3075 out_ref: 3076 if_rele(ifp); 3077 out_noref: 3078 #ifdef COMPAT_FREEBSD32 3079 if (ifmrp != NULL) { 3080 KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA), 3081 ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx", 3082 cmd)); 3083 data = saved_data; 3084 ifmr_update(ifmrp, data); 3085 } 3086 #endif 3087 CURVNET_RESTORE(); 3088 return (error); 3089 } 3090 3091 /* 3092 * The code common to handling reference counted flags, 3093 * e.g., in ifpromisc() and if_allmulti(). 3094 * The "pflag" argument can specify a permanent mode flag to check, 3095 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. 3096 * 3097 * Only to be used on stack-owned flags, not driver-owned flags. 3098 */ 3099 static int 3100 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) 3101 { 3102 struct ifreq ifr; 3103 int error; 3104 int oldflags, oldcount; 3105 3106 /* Sanity checks to catch programming errors */ 3107 KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, 3108 ("%s: setting driver-owned flag %d", __func__, flag)); 3109 3110 if (onswitch) 3111 KASSERT(*refcount >= 0, 3112 ("%s: increment negative refcount %d for flag %d", 3113 __func__, *refcount, flag)); 3114 else 3115 KASSERT(*refcount > 0, 3116 ("%s: decrement non-positive refcount %d for flag %d", 3117 __func__, *refcount, flag)); 3118 3119 /* In case this mode is permanent, just touch refcount */ 3120 if (ifp->if_flags & pflag) { 3121 *refcount += onswitch ? 1 : -1; 3122 return (0); 3123 } 3124 3125 /* Save ifnet parameters for if_ioctl() may fail */ 3126 oldcount = *refcount; 3127 oldflags = ifp->if_flags; 3128 3129 /* 3130 * See if we aren't the only and touching refcount is enough. 3131 * Actually toggle interface flag if we are the first or last. 3132 */ 3133 if (onswitch) { 3134 if ((*refcount)++) 3135 return (0); 3136 ifp->if_flags |= flag; 3137 } else { 3138 if (--(*refcount)) 3139 return (0); 3140 ifp->if_flags &= ~flag; 3141 } 3142 3143 /* Call down the driver since we've changed interface flags */ 3144 if (ifp->if_ioctl == NULL) { 3145 error = EOPNOTSUPP; 3146 goto recover; 3147 } 3148 ifr.ifr_flags = ifp->if_flags & 0xffff; 3149 ifr.ifr_flagshigh = ifp->if_flags >> 16; 3150 error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); 3151 if (error) 3152 goto recover; 3153 /* Notify userland that interface flags have changed */ 3154 rt_ifmsg(ifp); 3155 return (0); 3156 3157 recover: 3158 /* Recover after driver error */ 3159 *refcount = oldcount; 3160 ifp->if_flags = oldflags; 3161 return (error); 3162 } 3163 3164 /* 3165 * Set/clear promiscuous mode on interface ifp based on the truth value 3166 * of pswitch. The calls are reference counted so that only the first 3167 * "on" request actually has an effect, as does the final "off" request. 3168 * Results are undefined if the "off" and "on" requests are not matched. 3169 */ 3170 int 3171 ifpromisc(struct ifnet *ifp, int pswitch) 3172 { 3173 int error; 3174 int oldflags = ifp->if_flags; 3175 3176 error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, 3177 &ifp->if_pcount, pswitch); 3178 /* If promiscuous mode status has changed, log a message */ 3179 if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && 3180 log_promisc_mode_change) 3181 if_printf(ifp, "promiscuous mode %s\n", 3182 (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); 3183 return (error); 3184 } 3185 3186 /* 3187 * Return interface configuration 3188 * of system. List may be used 3189 * in later ioctl's (above) to get 3190 * other information. 3191 */ 3192 /*ARGSUSED*/ 3193 static int 3194 ifconf(u_long cmd, caddr_t data) 3195 { 3196 struct ifconf *ifc = (struct ifconf *)data; 3197 struct ifnet *ifp; 3198 struct ifaddr *ifa; 3199 struct ifreq ifr; 3200 struct sbuf *sb; 3201 int error, full = 0, valid_len, max_len; 3202 3203 /* Limit initial buffer size to maxphys to avoid DoS from userspace. */ 3204 max_len = maxphys - 1; 3205 3206 /* Prevent hostile input from being able to crash the system */ 3207 if (ifc->ifc_len <= 0) 3208 return (EINVAL); 3209 3210 again: 3211 if (ifc->ifc_len <= max_len) { 3212 max_len = ifc->ifc_len; 3213 full = 1; 3214 } 3215 sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); 3216 max_len = 0; 3217 valid_len = 0; 3218 3219 IFNET_RLOCK(); 3220 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 3221 struct epoch_tracker et; 3222 int addrs; 3223 3224 /* 3225 * Zero the ifr to make sure we don't disclose the contents 3226 * of the stack. 3227 */ 3228 memset(&ifr, 0, sizeof(ifr)); 3229 3230 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 3231 >= sizeof(ifr.ifr_name)) { 3232 sbuf_delete(sb); 3233 IFNET_RUNLOCK(); 3234 return (ENAMETOOLONG); 3235 } 3236 3237 addrs = 0; 3238 NET_EPOCH_ENTER(et); 3239 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 3240 struct sockaddr *sa = ifa->ifa_addr; 3241 3242 if (prison_if(curthread->td_ucred, sa) != 0) 3243 continue; 3244 addrs++; 3245 if (sa->sa_len <= sizeof(*sa)) { 3246 if (sa->sa_len < sizeof(*sa)) { 3247 memset(&ifr.ifr_ifru.ifru_addr, 0, 3248 sizeof(ifr.ifr_ifru.ifru_addr)); 3249 memcpy(&ifr.ifr_ifru.ifru_addr, sa, 3250 sa->sa_len); 3251 } else 3252 ifr.ifr_ifru.ifru_addr = *sa; 3253 sbuf_bcat(sb, &ifr, sizeof(ifr)); 3254 max_len += sizeof(ifr); 3255 } else { 3256 sbuf_bcat(sb, &ifr, 3257 offsetof(struct ifreq, ifr_addr)); 3258 max_len += offsetof(struct ifreq, ifr_addr); 3259 sbuf_bcat(sb, sa, sa->sa_len); 3260 max_len += sa->sa_len; 3261 } 3262 3263 if (sbuf_error(sb) == 0) 3264 valid_len = sbuf_len(sb); 3265 } 3266 NET_EPOCH_EXIT(et); 3267 if (addrs == 0) { 3268 sbuf_bcat(sb, &ifr, sizeof(ifr)); 3269 max_len += sizeof(ifr); 3270 3271 if (sbuf_error(sb) == 0) 3272 valid_len = sbuf_len(sb); 3273 } 3274 } 3275 IFNET_RUNLOCK(); 3276 3277 /* 3278 * If we didn't allocate enough space (uncommon), try again. If 3279 * we have already allocated as much space as we are allowed, 3280 * return what we've got. 3281 */ 3282 if (valid_len != max_len && !full) { 3283 sbuf_delete(sb); 3284 goto again; 3285 } 3286 3287 ifc->ifc_len = valid_len; 3288 sbuf_finish(sb); 3289 error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); 3290 sbuf_delete(sb); 3291 return (error); 3292 } 3293 3294 /* 3295 * Just like ifpromisc(), but for all-multicast-reception mode. 3296 */ 3297 int 3298 if_allmulti(struct ifnet *ifp, int onswitch) 3299 { 3300 3301 return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); 3302 } 3303 3304 struct ifmultiaddr * 3305 if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) 3306 { 3307 struct ifmultiaddr *ifma; 3308 3309 IF_ADDR_LOCK_ASSERT(ifp); 3310 3311 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3312 if (sa->sa_family == AF_LINK) { 3313 if (sa_dl_equal(ifma->ifma_addr, sa)) 3314 break; 3315 } else { 3316 if (sa_equal(ifma->ifma_addr, sa)) 3317 break; 3318 } 3319 } 3320 3321 return ifma; 3322 } 3323 3324 /* 3325 * Allocate a new ifmultiaddr and initialize based on passed arguments. We 3326 * make copies of passed sockaddrs. The ifmultiaddr will not be added to 3327 * the ifnet multicast address list here, so the caller must do that and 3328 * other setup work (such as notifying the device driver). The reference 3329 * count is initialized to 1. 3330 */ 3331 static struct ifmultiaddr * 3332 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, 3333 int mflags) 3334 { 3335 struct ifmultiaddr *ifma; 3336 struct sockaddr *dupsa; 3337 3338 ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | 3339 M_ZERO); 3340 if (ifma == NULL) 3341 return (NULL); 3342 3343 dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); 3344 if (dupsa == NULL) { 3345 free(ifma, M_IFMADDR); 3346 return (NULL); 3347 } 3348 bcopy(sa, dupsa, sa->sa_len); 3349 ifma->ifma_addr = dupsa; 3350 3351 ifma->ifma_ifp = ifp; 3352 ifma->ifma_refcount = 1; 3353 ifma->ifma_protospec = NULL; 3354 3355 if (llsa == NULL) { 3356 ifma->ifma_lladdr = NULL; 3357 return (ifma); 3358 } 3359 3360 dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); 3361 if (dupsa == NULL) { 3362 free(ifma->ifma_addr, M_IFMADDR); 3363 free(ifma, M_IFMADDR); 3364 return (NULL); 3365 } 3366 bcopy(llsa, dupsa, llsa->sa_len); 3367 ifma->ifma_lladdr = dupsa; 3368 3369 return (ifma); 3370 } 3371 3372 /* 3373 * if_freemulti: free ifmultiaddr structure and possibly attached related 3374 * addresses. The caller is responsible for implementing reference 3375 * counting, notifying the driver, handling routing messages, and releasing 3376 * any dependent link layer state. 3377 */ 3378 #ifdef MCAST_VERBOSE 3379 extern void kdb_backtrace(void); 3380 #endif 3381 static void 3382 if_freemulti_internal(struct ifmultiaddr *ifma) 3383 { 3384 3385 KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", 3386 ifma->ifma_refcount)); 3387 3388 if (ifma->ifma_lladdr != NULL) 3389 free(ifma->ifma_lladdr, M_IFMADDR); 3390 #ifdef MCAST_VERBOSE 3391 kdb_backtrace(); 3392 printf("%s freeing ifma: %p\n", __func__, ifma); 3393 #endif 3394 free(ifma->ifma_addr, M_IFMADDR); 3395 free(ifma, M_IFMADDR); 3396 } 3397 3398 static void 3399 if_destroymulti(epoch_context_t ctx) 3400 { 3401 struct ifmultiaddr *ifma; 3402 3403 ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx); 3404 if_freemulti_internal(ifma); 3405 } 3406 3407 void 3408 if_freemulti(struct ifmultiaddr *ifma) 3409 { 3410 KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d", 3411 ifma->ifma_refcount)); 3412 3413 NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx); 3414 } 3415 3416 /* 3417 * Register an additional multicast address with a network interface. 3418 * 3419 * - If the address is already present, bump the reference count on the 3420 * address and return. 3421 * - If the address is not link-layer, look up a link layer address. 3422 * - Allocate address structures for one or both addresses, and attach to the 3423 * multicast address list on the interface. If automatically adding a link 3424 * layer address, the protocol address will own a reference to the link 3425 * layer address, to be freed when it is freed. 3426 * - Notify the network device driver of an addition to the multicast address 3427 * list. 3428 * 3429 * 'sa' points to caller-owned memory with the desired multicast address. 3430 * 3431 * 'retifma' will be used to return a pointer to the resulting multicast 3432 * address reference, if desired. 3433 */ 3434 int 3435 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 3436 struct ifmultiaddr **retifma) 3437 { 3438 struct ifmultiaddr *ifma, *ll_ifma; 3439 struct sockaddr *llsa; 3440 struct sockaddr_dl sdl; 3441 int error; 3442 3443 #ifdef INET 3444 IN_MULTI_LIST_UNLOCK_ASSERT(); 3445 #endif 3446 #ifdef INET6 3447 IN6_MULTI_LIST_UNLOCK_ASSERT(); 3448 #endif 3449 /* 3450 * If the address is already present, return a new reference to it; 3451 * otherwise, allocate storage and set up a new address. 3452 */ 3453 IF_ADDR_WLOCK(ifp); 3454 ifma = if_findmulti(ifp, sa); 3455 if (ifma != NULL) { 3456 ifma->ifma_refcount++; 3457 if (retifma != NULL) 3458 *retifma = ifma; 3459 IF_ADDR_WUNLOCK(ifp); 3460 return (0); 3461 } 3462 3463 /* 3464 * The address isn't already present; resolve the protocol address 3465 * into a link layer address, and then look that up, bump its 3466 * refcount or allocate an ifma for that also. 3467 * Most link layer resolving functions returns address data which 3468 * fits inside default sockaddr_dl structure. However callback 3469 * can allocate another sockaddr structure, in that case we need to 3470 * free it later. 3471 */ 3472 llsa = NULL; 3473 ll_ifma = NULL; 3474 if (ifp->if_resolvemulti != NULL) { 3475 /* Provide called function with buffer size information */ 3476 sdl.sdl_len = sizeof(sdl); 3477 llsa = (struct sockaddr *)&sdl; 3478 error = ifp->if_resolvemulti(ifp, &llsa, sa); 3479 if (error) 3480 goto unlock_out; 3481 } 3482 3483 /* 3484 * Allocate the new address. Don't hook it up yet, as we may also 3485 * need to allocate a link layer multicast address. 3486 */ 3487 ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); 3488 if (ifma == NULL) { 3489 error = ENOMEM; 3490 goto free_llsa_out; 3491 } 3492 3493 /* 3494 * If a link layer address is found, we'll need to see if it's 3495 * already present in the address list, or allocate is as well. 3496 * When this block finishes, the link layer address will be on the 3497 * list. 3498 */ 3499 if (llsa != NULL) { 3500 ll_ifma = if_findmulti(ifp, llsa); 3501 if (ll_ifma == NULL) { 3502 ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); 3503 if (ll_ifma == NULL) { 3504 --ifma->ifma_refcount; 3505 if_freemulti(ifma); 3506 error = ENOMEM; 3507 goto free_llsa_out; 3508 } 3509 ll_ifma->ifma_flags |= IFMA_F_ENQUEUED; 3510 CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, 3511 ifma_link); 3512 } else 3513 ll_ifma->ifma_refcount++; 3514 ifma->ifma_llifma = ll_ifma; 3515 } 3516 3517 /* 3518 * We now have a new multicast address, ifma, and possibly a new or 3519 * referenced link layer address. Add the primary address to the 3520 * ifnet address list. 3521 */ 3522 ifma->ifma_flags |= IFMA_F_ENQUEUED; 3523 CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 3524 3525 if (retifma != NULL) 3526 *retifma = ifma; 3527 3528 /* 3529 * Must generate the message while holding the lock so that 'ifma' 3530 * pointer is still valid. 3531 */ 3532 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 3533 IF_ADDR_WUNLOCK(ifp); 3534 3535 /* 3536 * We are certain we have added something, so call down to the 3537 * interface to let them know about it. 3538 */ 3539 if (ifp->if_ioctl != NULL) { 3540 if (THREAD_CAN_SLEEP()) 3541 (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); 3542 else 3543 taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask); 3544 } 3545 3546 if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) 3547 link_free_sdl(llsa); 3548 3549 return (0); 3550 3551 free_llsa_out: 3552 if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) 3553 link_free_sdl(llsa); 3554 3555 unlock_out: 3556 IF_ADDR_WUNLOCK(ifp); 3557 return (error); 3558 } 3559 3560 static void 3561 if_siocaddmulti(void *arg, int pending) 3562 { 3563 struct ifnet *ifp; 3564 3565 ifp = arg; 3566 #ifdef DIAGNOSTIC 3567 if (pending > 1) 3568 if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending); 3569 #endif 3570 CURVNET_SET(ifp->if_vnet); 3571 (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); 3572 CURVNET_RESTORE(); 3573 } 3574 3575 /* 3576 * Delete a multicast group membership by network-layer group address. 3577 * 3578 * Returns ENOENT if the entry could not be found. If ifp no longer 3579 * exists, results are undefined. This entry point should only be used 3580 * from subsystems which do appropriate locking to hold ifp for the 3581 * duration of the call. 3582 * Network-layer protocol domains must use if_delmulti_ifma(). 3583 */ 3584 int 3585 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 3586 { 3587 struct ifmultiaddr *ifma; 3588 int lastref; 3589 3590 KASSERT(ifp, ("%s: NULL ifp", __func__)); 3591 3592 IF_ADDR_WLOCK(ifp); 3593 lastref = 0; 3594 ifma = if_findmulti(ifp, sa); 3595 if (ifma != NULL) 3596 lastref = if_delmulti_locked(ifp, ifma, 0); 3597 IF_ADDR_WUNLOCK(ifp); 3598 3599 if (ifma == NULL) 3600 return (ENOENT); 3601 3602 if (lastref && ifp->if_ioctl != NULL) { 3603 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); 3604 } 3605 3606 return (0); 3607 } 3608 3609 /* 3610 * Delete all multicast group membership for an interface. 3611 * Should be used to quickly flush all multicast filters. 3612 */ 3613 void 3614 if_delallmulti(struct ifnet *ifp) 3615 { 3616 struct ifmultiaddr *ifma; 3617 struct ifmultiaddr *next; 3618 3619 IF_ADDR_WLOCK(ifp); 3620 CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) 3621 if_delmulti_locked(ifp, ifma, 0); 3622 IF_ADDR_WUNLOCK(ifp); 3623 } 3624 3625 void 3626 if_delmulti_ifma(struct ifmultiaddr *ifma) 3627 { 3628 if_delmulti_ifma_flags(ifma, 0); 3629 } 3630 3631 /* 3632 * Delete a multicast group membership by group membership pointer. 3633 * Network-layer protocol domains must use this routine. 3634 * 3635 * It is safe to call this routine if the ifp disappeared. 3636 */ 3637 void 3638 if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags) 3639 { 3640 struct ifnet *ifp; 3641 int lastref; 3642 MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma); 3643 #ifdef INET 3644 IN_MULTI_LIST_UNLOCK_ASSERT(); 3645 #endif 3646 ifp = ifma->ifma_ifp; 3647 #ifdef DIAGNOSTIC 3648 if (ifp == NULL) { 3649 printf("%s: ifma_ifp seems to be detached\n", __func__); 3650 } else { 3651 struct epoch_tracker et; 3652 struct ifnet *oifp; 3653 3654 NET_EPOCH_ENTER(et); 3655 CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link) 3656 if (ifp == oifp) 3657 break; 3658 NET_EPOCH_EXIT(et); 3659 if (ifp != oifp) 3660 ifp = NULL; 3661 } 3662 #endif 3663 /* 3664 * If and only if the ifnet instance exists: Acquire the address lock. 3665 */ 3666 if (ifp != NULL) 3667 IF_ADDR_WLOCK(ifp); 3668 3669 lastref = if_delmulti_locked(ifp, ifma, flags); 3670 3671 if (ifp != NULL) { 3672 /* 3673 * If and only if the ifnet instance exists: 3674 * Release the address lock. 3675 * If the group was left: update the hardware hash filter. 3676 */ 3677 IF_ADDR_WUNLOCK(ifp); 3678 if (lastref && ifp->if_ioctl != NULL) { 3679 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); 3680 } 3681 } 3682 } 3683 3684 /* 3685 * Perform deletion of network-layer and/or link-layer multicast address. 3686 * 3687 * Return 0 if the reference count was decremented. 3688 * Return 1 if the final reference was released, indicating that the 3689 * hardware hash filter should be reprogrammed. 3690 */ 3691 static int 3692 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) 3693 { 3694 struct ifmultiaddr *ll_ifma; 3695 3696 if (ifp != NULL && ifma->ifma_ifp != NULL) { 3697 KASSERT(ifma->ifma_ifp == ifp, 3698 ("%s: inconsistent ifp %p", __func__, ifp)); 3699 IF_ADDR_WLOCK_ASSERT(ifp); 3700 } 3701 3702 ifp = ifma->ifma_ifp; 3703 MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : ""); 3704 3705 /* 3706 * If the ifnet is detaching, null out references to ifnet, 3707 * so that upper protocol layers will notice, and not attempt 3708 * to obtain locks for an ifnet which no longer exists. The 3709 * routing socket announcement must happen before the ifnet 3710 * instance is detached from the system. 3711 */ 3712 if (detaching) { 3713 #ifdef DIAGNOSTIC 3714 printf("%s: detaching ifnet instance %p\n", __func__, ifp); 3715 #endif 3716 /* 3717 * ifp may already be nulled out if we are being reentered 3718 * to delete the ll_ifma. 3719 */ 3720 if (ifp != NULL) { 3721 rt_newmaddrmsg(RTM_DELMADDR, ifma); 3722 ifma->ifma_ifp = NULL; 3723 } 3724 } 3725 3726 if (--ifma->ifma_refcount > 0) 3727 return 0; 3728 3729 if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) { 3730 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 3731 ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 3732 } 3733 /* 3734 * If this ifma is a network-layer ifma, a link-layer ifma may 3735 * have been associated with it. Release it first if so. 3736 */ 3737 ll_ifma = ifma->ifma_llifma; 3738 if (ll_ifma != NULL) { 3739 KASSERT(ifma->ifma_lladdr != NULL, 3740 ("%s: llifma w/o lladdr", __func__)); 3741 if (detaching) 3742 ll_ifma->ifma_ifp = NULL; /* XXX */ 3743 if (--ll_ifma->ifma_refcount == 0) { 3744 if (ifp != NULL) { 3745 if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { 3746 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, 3747 ifma_link); 3748 ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 3749 } 3750 } 3751 if_freemulti(ll_ifma); 3752 } 3753 } 3754 #ifdef INVARIANTS 3755 if (ifp) { 3756 struct ifmultiaddr *ifmatmp; 3757 3758 CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link) 3759 MPASS(ifma != ifmatmp); 3760 } 3761 #endif 3762 if_freemulti(ifma); 3763 /* 3764 * The last reference to this instance of struct ifmultiaddr 3765 * was released; the hardware should be notified of this change. 3766 */ 3767 return 1; 3768 } 3769 3770 /* 3771 * Set the link layer address on an interface. 3772 * 3773 * At this time we only support certain types of interfaces, 3774 * and we don't allow the length of the address to change. 3775 * 3776 * Set noinline to be dtrace-friendly 3777 */ 3778 __noinline int 3779 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 3780 { 3781 struct sockaddr_dl *sdl; 3782 struct ifaddr *ifa; 3783 struct ifreq ifr; 3784 3785 ifa = ifp->if_addr; 3786 if (ifa == NULL) 3787 return (EINVAL); 3788 3789 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 3790 if (sdl == NULL) 3791 return (EINVAL); 3792 3793 if (len != sdl->sdl_alen) /* don't allow length to change */ 3794 return (EINVAL); 3795 3796 switch (ifp->if_type) { 3797 case IFT_ETHER: 3798 case IFT_XETHER: 3799 case IFT_L2VLAN: 3800 case IFT_BRIDGE: 3801 case IFT_IEEE8023ADLAG: 3802 bcopy(lladdr, LLADDR(sdl), len); 3803 break; 3804 default: 3805 return (ENODEV); 3806 } 3807 3808 /* 3809 * If the interface is already up, we need 3810 * to re-init it in order to reprogram its 3811 * address filter. 3812 */ 3813 if ((ifp->if_flags & IFF_UP) != 0) { 3814 if (ifp->if_ioctl) { 3815 ifp->if_flags &= ~IFF_UP; 3816 ifr.ifr_flags = ifp->if_flags & 0xffff; 3817 ifr.ifr_flagshigh = ifp->if_flags >> 16; 3818 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); 3819 ifp->if_flags |= IFF_UP; 3820 ifr.ifr_flags = ifp->if_flags & 0xffff; 3821 ifr.ifr_flagshigh = ifp->if_flags >> 16; 3822 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); 3823 } 3824 } 3825 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 3826 3827 return (0); 3828 } 3829 3830 /* 3831 * Compat function for handling basic encapsulation requests. 3832 * Not converted stacks (FDDI, IB, ..) supports traditional 3833 * output model: ARP (and other similar L2 protocols) are handled 3834 * inside output routine, arpresolve/nd6_resolve() returns MAC 3835 * address instead of full prepend. 3836 * 3837 * This function creates calculated header==MAC for IPv4/IPv6 and 3838 * returns EAFNOSUPPORT (which is then handled in ARP code) for other 3839 * address families. 3840 */ 3841 static int 3842 if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) 3843 { 3844 3845 if (req->rtype != IFENCAP_LL) 3846 return (EOPNOTSUPP); 3847 3848 if (req->bufsize < req->lladdr_len) 3849 return (ENOMEM); 3850 3851 switch (req->family) { 3852 case AF_INET: 3853 case AF_INET6: 3854 break; 3855 default: 3856 return (EAFNOSUPPORT); 3857 } 3858 3859 /* Copy lladdr to storage as is */ 3860 memmove(req->buf, req->lladdr, req->lladdr_len); 3861 req->bufsize = req->lladdr_len; 3862 req->lladdr_off = 0; 3863 3864 return (0); 3865 } 3866 3867 /* 3868 * Tunnel interfaces can nest, also they may cause infinite recursion 3869 * calls when misconfigured. We'll prevent this by detecting loops. 3870 * High nesting level may cause stack exhaustion. We'll prevent this 3871 * by introducing upper limit. 3872 * 3873 * Return 0, if tunnel nesting count is equal or less than limit. 3874 */ 3875 int 3876 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, 3877 int limit) 3878 { 3879 struct m_tag *mtag; 3880 int count; 3881 3882 count = 1; 3883 mtag = NULL; 3884 while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) { 3885 if (*(struct ifnet **)(mtag + 1) == ifp) { 3886 log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); 3887 return (EIO); 3888 } 3889 count++; 3890 } 3891 if (count > limit) { 3892 log(LOG_NOTICE, 3893 "%s: if_output recursively called too many times(%d)\n", 3894 if_name(ifp), count); 3895 return (EIO); 3896 } 3897 mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT); 3898 if (mtag == NULL) 3899 return (ENOMEM); 3900 *(struct ifnet **)(mtag + 1) = ifp; 3901 m_tag_prepend(m, mtag); 3902 return (0); 3903 } 3904 3905 /* 3906 * Get the link layer address that was read from the hardware at attach. 3907 * 3908 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type 3909 * their component interfaces as IFT_IEEE8023ADLAG. 3910 */ 3911 int 3912 if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) 3913 { 3914 3915 if (ifp->if_hw_addr == NULL) 3916 return (ENODEV); 3917 3918 switch (ifp->if_type) { 3919 case IFT_ETHER: 3920 case IFT_IEEE8023ADLAG: 3921 bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); 3922 return (0); 3923 default: 3924 return (ENODEV); 3925 } 3926 } 3927 3928 /* 3929 * The name argument must be a pointer to storage which will last as 3930 * long as the interface does. For physical devices, the result of 3931 * device_get_name(dev) is a good choice and for pseudo-devices a 3932 * static string works well. 3933 */ 3934 void 3935 if_initname(struct ifnet *ifp, const char *name, int unit) 3936 { 3937 ifp->if_dname = name; 3938 ifp->if_dunit = unit; 3939 if (unit != IF_DUNIT_NONE) 3940 snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 3941 else 3942 strlcpy(ifp->if_xname, name, IFNAMSIZ); 3943 } 3944 3945 int 3946 if_printf(struct ifnet *ifp, const char *fmt, ...) 3947 { 3948 char if_fmt[256]; 3949 va_list ap; 3950 3951 snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt); 3952 va_start(ap, fmt); 3953 vlog(LOG_INFO, if_fmt, ap); 3954 va_end(ap); 3955 return (0); 3956 } 3957 3958 void 3959 if_start(struct ifnet *ifp) 3960 { 3961 3962 (*(ifp)->if_start)(ifp); 3963 } 3964 3965 /* 3966 * Backwards compatibility interface for drivers 3967 * that have not implemented it 3968 */ 3969 static int 3970 if_transmit(struct ifnet *ifp, struct mbuf *m) 3971 { 3972 int error; 3973 3974 IFQ_HANDOFF(ifp, m, error); 3975 return (error); 3976 } 3977 3978 static void 3979 if_input_default(struct ifnet *ifp __unused, struct mbuf *m) 3980 { 3981 3982 m_freem(m); 3983 } 3984 3985 int 3986 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) 3987 { 3988 int active = 0; 3989 3990 IF_LOCK(ifq); 3991 if (_IF_QFULL(ifq)) { 3992 IF_UNLOCK(ifq); 3993 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 3994 m_freem(m); 3995 return (0); 3996 } 3997 if (ifp != NULL) { 3998 if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); 3999 if (m->m_flags & (M_BCAST|M_MCAST)) 4000 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 4001 active = ifp->if_drv_flags & IFF_DRV_OACTIVE; 4002 } 4003 _IF_ENQUEUE(ifq, m); 4004 IF_UNLOCK(ifq); 4005 if (ifp != NULL && !active) 4006 (*(ifp)->if_start)(ifp); 4007 return (1); 4008 } 4009 4010 void 4011 if_register_com_alloc(u_char type, 4012 if_com_alloc_t *a, if_com_free_t *f) 4013 { 4014 4015 KASSERT(if_com_alloc[type] == NULL, 4016 ("if_register_com_alloc: %d already registered", type)); 4017 KASSERT(if_com_free[type] == NULL, 4018 ("if_register_com_alloc: %d free already registered", type)); 4019 4020 if_com_alloc[type] = a; 4021 if_com_free[type] = f; 4022 } 4023 4024 void 4025 if_deregister_com_alloc(u_char type) 4026 { 4027 4028 KASSERT(if_com_alloc[type] != NULL, 4029 ("if_deregister_com_alloc: %d not registered", type)); 4030 KASSERT(if_com_free[type] != NULL, 4031 ("if_deregister_com_alloc: %d free not registered", type)); 4032 if_com_alloc[type] = NULL; 4033 if_com_free[type] = NULL; 4034 } 4035 4036 /* API for driver access to network stack owned ifnet.*/ 4037 uint64_t 4038 if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) 4039 { 4040 uint64_t oldbrate; 4041 4042 oldbrate = ifp->if_baudrate; 4043 ifp->if_baudrate = baudrate; 4044 return (oldbrate); 4045 } 4046 4047 uint64_t 4048 if_getbaudrate(if_t ifp) 4049 { 4050 4051 return (((struct ifnet *)ifp)->if_baudrate); 4052 } 4053 4054 int 4055 if_setcapabilities(if_t ifp, int capabilities) 4056 { 4057 ((struct ifnet *)ifp)->if_capabilities = capabilities; 4058 return (0); 4059 } 4060 4061 int 4062 if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) 4063 { 4064 ((struct ifnet *)ifp)->if_capabilities |= setbit; 4065 ((struct ifnet *)ifp)->if_capabilities &= ~clearbit; 4066 4067 return (0); 4068 } 4069 4070 int 4071 if_getcapabilities(if_t ifp) 4072 { 4073 return ((struct ifnet *)ifp)->if_capabilities; 4074 } 4075 4076 int 4077 if_setcapenable(if_t ifp, int capabilities) 4078 { 4079 ((struct ifnet *)ifp)->if_capenable = capabilities; 4080 return (0); 4081 } 4082 4083 int 4084 if_setcapenablebit(if_t ifp, int setcap, int clearcap) 4085 { 4086 if(setcap) 4087 ((struct ifnet *)ifp)->if_capenable |= setcap; 4088 if(clearcap) 4089 ((struct ifnet *)ifp)->if_capenable &= ~clearcap; 4090 4091 return (0); 4092 } 4093 4094 const char * 4095 if_getdname(if_t ifp) 4096 { 4097 return ((struct ifnet *)ifp)->if_dname; 4098 } 4099 4100 int 4101 if_togglecapenable(if_t ifp, int togglecap) 4102 { 4103 ((struct ifnet *)ifp)->if_capenable ^= togglecap; 4104 return (0); 4105 } 4106 4107 int 4108 if_getcapenable(if_t ifp) 4109 { 4110 return ((struct ifnet *)ifp)->if_capenable; 4111 } 4112 4113 /* 4114 * This is largely undesirable because it ties ifnet to a device, but does 4115 * provide flexiblity for an embedded product vendor. Should be used with 4116 * the understanding that it violates the interface boundaries, and should be 4117 * a last resort only. 4118 */ 4119 int 4120 if_setdev(if_t ifp, void *dev) 4121 { 4122 return (0); 4123 } 4124 4125 int 4126 if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) 4127 { 4128 ((struct ifnet *)ifp)->if_drv_flags |= set_flags; 4129 ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags; 4130 4131 return (0); 4132 } 4133 4134 int 4135 if_getdrvflags(if_t ifp) 4136 { 4137 return ((struct ifnet *)ifp)->if_drv_flags; 4138 } 4139 4140 int 4141 if_setdrvflags(if_t ifp, int flags) 4142 { 4143 ((struct ifnet *)ifp)->if_drv_flags = flags; 4144 return (0); 4145 } 4146 4147 int 4148 if_setflags(if_t ifp, int flags) 4149 { 4150 4151 ifp->if_flags = flags; 4152 return (0); 4153 } 4154 4155 int 4156 if_setflagbits(if_t ifp, int set, int clear) 4157 { 4158 ((struct ifnet *)ifp)->if_flags |= set; 4159 ((struct ifnet *)ifp)->if_flags &= ~clear; 4160 4161 return (0); 4162 } 4163 4164 int 4165 if_getflags(if_t ifp) 4166 { 4167 return ((struct ifnet *)ifp)->if_flags; 4168 } 4169 4170 int 4171 if_clearhwassist(if_t ifp) 4172 { 4173 ((struct ifnet *)ifp)->if_hwassist = 0; 4174 return (0); 4175 } 4176 4177 int 4178 if_sethwassistbits(if_t ifp, int toset, int toclear) 4179 { 4180 ((struct ifnet *)ifp)->if_hwassist |= toset; 4181 ((struct ifnet *)ifp)->if_hwassist &= ~toclear; 4182 4183 return (0); 4184 } 4185 4186 int 4187 if_sethwassist(if_t ifp, int hwassist_bit) 4188 { 4189 ((struct ifnet *)ifp)->if_hwassist = hwassist_bit; 4190 return (0); 4191 } 4192 4193 int 4194 if_gethwassist(if_t ifp) 4195 { 4196 return ((struct ifnet *)ifp)->if_hwassist; 4197 } 4198 4199 int 4200 if_setmtu(if_t ifp, int mtu) 4201 { 4202 ((struct ifnet *)ifp)->if_mtu = mtu; 4203 return (0); 4204 } 4205 4206 int 4207 if_getmtu(if_t ifp) 4208 { 4209 return ((struct ifnet *)ifp)->if_mtu; 4210 } 4211 4212 int 4213 if_getmtu_family(if_t ifp, int family) 4214 { 4215 struct domain *dp; 4216 4217 for (dp = domains; dp; dp = dp->dom_next) { 4218 if (dp->dom_family == family && dp->dom_ifmtu != NULL) 4219 return (dp->dom_ifmtu((struct ifnet *)ifp)); 4220 } 4221 4222 return (((struct ifnet *)ifp)->if_mtu); 4223 } 4224 4225 /* 4226 * Methods for drivers to access interface unicast and multicast 4227 * link level addresses. Driver shall not know 'struct ifaddr' neither 4228 * 'struct ifmultiaddr'. 4229 */ 4230 u_int 4231 if_lladdr_count(if_t ifp) 4232 { 4233 struct epoch_tracker et; 4234 struct ifaddr *ifa; 4235 u_int count; 4236 4237 count = 0; 4238 NET_EPOCH_ENTER(et); 4239 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 4240 if (ifa->ifa_addr->sa_family == AF_LINK) 4241 count++; 4242 NET_EPOCH_EXIT(et); 4243 4244 return (count); 4245 } 4246 4247 u_int 4248 if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) 4249 { 4250 struct epoch_tracker et; 4251 struct ifaddr *ifa; 4252 u_int count; 4253 4254 MPASS(cb); 4255 4256 count = 0; 4257 NET_EPOCH_ENTER(et); 4258 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 4259 if (ifa->ifa_addr->sa_family != AF_LINK) 4260 continue; 4261 count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr, 4262 count); 4263 } 4264 NET_EPOCH_EXIT(et); 4265 4266 return (count); 4267 } 4268 4269 u_int 4270 if_llmaddr_count(if_t ifp) 4271 { 4272 struct epoch_tracker et; 4273 struct ifmultiaddr *ifma; 4274 int count; 4275 4276 count = 0; 4277 NET_EPOCH_ENTER(et); 4278 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 4279 if (ifma->ifma_addr->sa_family == AF_LINK) 4280 count++; 4281 NET_EPOCH_EXIT(et); 4282 4283 return (count); 4284 } 4285 4286 u_int 4287 if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) 4288 { 4289 struct epoch_tracker et; 4290 struct ifmultiaddr *ifma; 4291 u_int count; 4292 4293 MPASS(cb); 4294 4295 count = 0; 4296 NET_EPOCH_ENTER(et); 4297 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 4298 if (ifma->ifma_addr->sa_family != AF_LINK) 4299 continue; 4300 count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr, 4301 count); 4302 } 4303 NET_EPOCH_EXIT(et); 4304 4305 return (count); 4306 } 4307 4308 int 4309 if_setsoftc(if_t ifp, void *softc) 4310 { 4311 ((struct ifnet *)ifp)->if_softc = softc; 4312 return (0); 4313 } 4314 4315 void * 4316 if_getsoftc(if_t ifp) 4317 { 4318 return ((struct ifnet *)ifp)->if_softc; 4319 } 4320 4321 void 4322 if_setrcvif(struct mbuf *m, if_t ifp) 4323 { 4324 4325 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 4326 m->m_pkthdr.rcvif = (struct ifnet *)ifp; 4327 } 4328 4329 void 4330 if_setvtag(struct mbuf *m, uint16_t tag) 4331 { 4332 m->m_pkthdr.ether_vtag = tag; 4333 } 4334 4335 uint16_t 4336 if_getvtag(struct mbuf *m) 4337 { 4338 4339 return (m->m_pkthdr.ether_vtag); 4340 } 4341 4342 int 4343 if_sendq_empty(if_t ifp) 4344 { 4345 return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd); 4346 } 4347 4348 struct ifaddr * 4349 if_getifaddr(if_t ifp) 4350 { 4351 return ((struct ifnet *)ifp)->if_addr; 4352 } 4353 4354 int 4355 if_getamcount(if_t ifp) 4356 { 4357 return ((struct ifnet *)ifp)->if_amcount; 4358 } 4359 4360 int 4361 if_setsendqready(if_t ifp) 4362 { 4363 IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd); 4364 return (0); 4365 } 4366 4367 int 4368 if_setsendqlen(if_t ifp, int tx_desc_count) 4369 { 4370 IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count); 4371 ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count; 4372 4373 return (0); 4374 } 4375 4376 int 4377 if_vlantrunkinuse(if_t ifp) 4378 { 4379 return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0; 4380 } 4381 4382 int 4383 if_input(if_t ifp, struct mbuf* sendmp) 4384 { 4385 (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp); 4386 return (0); 4387 4388 } 4389 4390 struct mbuf * 4391 if_dequeue(if_t ifp) 4392 { 4393 struct mbuf *m; 4394 IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m); 4395 4396 return (m); 4397 } 4398 4399 int 4400 if_sendq_prepend(if_t ifp, struct mbuf *m) 4401 { 4402 IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m); 4403 return (0); 4404 } 4405 4406 int 4407 if_setifheaderlen(if_t ifp, int len) 4408 { 4409 ((struct ifnet *)ifp)->if_hdrlen = len; 4410 return (0); 4411 } 4412 4413 caddr_t 4414 if_getlladdr(if_t ifp) 4415 { 4416 return (IF_LLADDR((struct ifnet *)ifp)); 4417 } 4418 4419 void * 4420 if_gethandle(u_char type) 4421 { 4422 return (if_alloc(type)); 4423 } 4424 4425 void 4426 if_bpfmtap(if_t ifh, struct mbuf *m) 4427 { 4428 struct ifnet *ifp = (struct ifnet *)ifh; 4429 4430 BPF_MTAP(ifp, m); 4431 } 4432 4433 void 4434 if_etherbpfmtap(if_t ifh, struct mbuf *m) 4435 { 4436 struct ifnet *ifp = (struct ifnet *)ifh; 4437 4438 ETHER_BPF_MTAP(ifp, m); 4439 } 4440 4441 void 4442 if_vlancap(if_t ifh) 4443 { 4444 struct ifnet *ifp = (struct ifnet *)ifh; 4445 VLAN_CAPABILITIES(ifp); 4446 } 4447 4448 int 4449 if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) 4450 { 4451 4452 ((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax; 4453 return (0); 4454 } 4455 4456 int 4457 if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) 4458 { 4459 4460 ((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; 4461 return (0); 4462 } 4463 4464 int 4465 if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) 4466 { 4467 4468 ((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; 4469 return (0); 4470 } 4471 4472 u_int 4473 if_gethwtsomax(if_t ifp) 4474 { 4475 4476 return (((struct ifnet *)ifp)->if_hw_tsomax); 4477 } 4478 4479 u_int 4480 if_gethwtsomaxsegcount(if_t ifp) 4481 { 4482 4483 return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount); 4484 } 4485 4486 u_int 4487 if_gethwtsomaxsegsize(if_t ifp) 4488 { 4489 4490 return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize); 4491 } 4492 4493 void 4494 if_setinitfn(if_t ifp, void (*init_fn)(void *)) 4495 { 4496 ((struct ifnet *)ifp)->if_init = init_fn; 4497 } 4498 4499 void 4500 if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t)) 4501 { 4502 ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn; 4503 } 4504 4505 void 4506 if_setstartfn(if_t ifp, void (*start_fn)(if_t)) 4507 { 4508 ((struct ifnet *)ifp)->if_start = (void *)start_fn; 4509 } 4510 4511 void 4512 if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) 4513 { 4514 ((struct ifnet *)ifp)->if_transmit = start_fn; 4515 } 4516 4517 void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) 4518 { 4519 ((struct ifnet *)ifp)->if_qflush = flush_fn; 4520 4521 } 4522 4523 void 4524 if_setgetcounterfn(if_t ifp, if_get_counter_t fn) 4525 { 4526 4527 ifp->if_get_counter = fn; 4528 } 4529 4530 /* Revisit these - These are inline functions originally. */ 4531 int 4532 drbr_inuse_drv(if_t ifh, struct buf_ring *br) 4533 { 4534 return drbr_inuse(ifh, br); 4535 } 4536 4537 struct mbuf* 4538 drbr_dequeue_drv(if_t ifh, struct buf_ring *br) 4539 { 4540 return drbr_dequeue(ifh, br); 4541 } 4542 4543 int 4544 drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br) 4545 { 4546 return drbr_needs_enqueue(ifh, br); 4547 } 4548 4549 int 4550 drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m) 4551 { 4552 return drbr_enqueue(ifh, br, m); 4553 4554 } 4555