1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2010 Bjoern A. Zeeb <bz@FreeBSD.org> 5 * Copyright (c) 1980, 1986, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include "opt_bpf.h" 34 #include "opt_inet6.h" 35 #include "opt_inet.h" 36 #include "opt_ddb.h" 37 38 #include <sys/param.h> 39 #include <sys/capsicum.h> 40 #include <sys/conf.h> 41 #include <sys/eventhandler.h> 42 #include <sys/malloc.h> 43 #include <sys/domainset.h> 44 #include <sys/sbuf.h> 45 #include <sys/bus.h> 46 #include <sys/epoch.h> 47 #include <sys/mbuf.h> 48 #include <sys/systm.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/protosw.h> 54 #include <sys/kernel.h> 55 #include <sys/lock.h> 56 #include <sys/refcount.h> 57 #include <sys/module.h> 58 #include <sys/nv.h> 59 #include <sys/rwlock.h> 60 #include <sys/sockio.h> 61 #include <sys/stdarg.h> 62 #include <sys/syslog.h> 63 #include <sys/sysctl.h> 64 #include <sys/sysent.h> 65 #include <sys/taskqueue.h> 66 #include <sys/domain.h> 67 #include <sys/jail.h> 68 #include <sys/priv.h> 69 70 #ifdef DDB 71 #include <ddb/ddb.h> 72 #endif 73 74 #include <vm/uma.h> 75 76 #include <net/bpf.h> 77 #include <net/if.h> 78 #include <net/if_arp.h> 79 #include <net/if_clone.h> 80 #include <net/if_dl.h> 81 #include <net/if_strings.h> 82 #include <net/if_types.h> 83 #include <net/if_var.h> 84 #include <net/if_media.h> 85 #include <net/if_mib.h> 86 #include <net/if_private.h> 87 #include <net/if_vlan_var.h> 88 #include <net/radix.h> 89 #include <net/route.h> 90 #include <net/route/route_ctl.h> 91 #include <net/vnet.h> 92 93 #if defined(INET) || defined(INET6) 94 #include <net/ethernet.h> 95 #include <netinet/in.h> 96 #include <netinet/in_var.h> 97 #include <netinet/ip.h> 98 #include <netinet/ip_carp.h> 99 #ifdef INET 100 #include <net/debugnet.h> 101 #include <netinet/if_ether.h> 102 #endif /* INET */ 103 #ifdef INET6 104 #include <netinet6/in6_var.h> 105 #endif /* INET6 */ 106 #endif /* INET || INET6 */ 107 108 #include <security/mac/mac_framework.h> 109 110 /* 111 * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name 112 * and ifr_ifru when it is used in SIOCGIFCONF. 113 */ 114 _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) == 115 offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru"); 116 117 __read_mostly epoch_t net_epoch_preempt; 118 #ifdef COMPAT_FREEBSD32 119 #include <sys/mount.h> 120 #include <compat/freebsd32/freebsd32.h> 121 122 struct ifreq_buffer32 { 123 uint32_t length; /* (size_t) */ 124 uint32_t buffer; /* (void *) */ 125 }; 126 127 /* 128 * Interface request structure used for socket 129 * ioctl's. All interface ioctl's must have parameter 130 * definitions which begin with ifr_name. The 131 * remainder may be interface specific. 132 */ 133 struct ifreq32 { 134 char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ 135 union { 136 struct sockaddr ifru_addr; 137 struct sockaddr ifru_dstaddr; 138 struct sockaddr ifru_broadaddr; 139 struct ifreq_buffer32 ifru_buffer; 140 short ifru_flags[2]; 141 short ifru_index; 142 int ifru_jid; 143 int ifru_metric; 144 int ifru_mtu; 145 int ifru_phys; 146 int ifru_media; 147 uint32_t ifru_data; 148 int ifru_cap[2]; 149 u_int ifru_fib; 150 u_char ifru_vlan_pcp; 151 } ifr_ifru; 152 }; 153 CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); 154 CTASSERT(__offsetof(struct ifreq, ifr_ifru) == 155 __offsetof(struct ifreq32, ifr_ifru)); 156 157 struct ifconf32 { 158 int32_t ifc_len; 159 union { 160 uint32_t ifcu_buf; 161 uint32_t ifcu_req; 162 } ifc_ifcu; 163 }; 164 #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) 165 166 struct ifdrv32 { 167 char ifd_name[IFNAMSIZ]; 168 uint32_t ifd_cmd; 169 uint32_t ifd_len; 170 uint32_t ifd_data; 171 }; 172 #define SIOCSDRVSPEC32 _IOC_NEWTYPE(SIOCSDRVSPEC, struct ifdrv32) 173 #define SIOCGDRVSPEC32 _IOC_NEWTYPE(SIOCGDRVSPEC, struct ifdrv32) 174 175 struct ifgroupreq32 { 176 char ifgr_name[IFNAMSIZ]; 177 u_int ifgr_len; 178 union { 179 char ifgru_group[IFNAMSIZ]; 180 uint32_t ifgru_groups; 181 } ifgr_ifgru; 182 }; 183 #define SIOCAIFGROUP32 _IOC_NEWTYPE(SIOCAIFGROUP, struct ifgroupreq32) 184 #define SIOCGIFGROUP32 _IOC_NEWTYPE(SIOCGIFGROUP, struct ifgroupreq32) 185 #define SIOCDIFGROUP32 _IOC_NEWTYPE(SIOCDIFGROUP, struct ifgroupreq32) 186 #define SIOCGIFGMEMB32 _IOC_NEWTYPE(SIOCGIFGMEMB, struct ifgroupreq32) 187 188 struct ifmediareq32 { 189 char ifm_name[IFNAMSIZ]; 190 int ifm_current; 191 int ifm_mask; 192 int ifm_status; 193 int ifm_active; 194 int ifm_count; 195 uint32_t ifm_ulist; /* (int *) */ 196 }; 197 #define SIOCGIFMEDIA32 _IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32) 198 #define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32) 199 #endif /* COMPAT_FREEBSD32 */ 200 201 union ifreq_union { 202 struct ifreq ifr; 203 #ifdef COMPAT_FREEBSD32 204 struct ifreq32 ifr32; 205 #endif 206 }; 207 208 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 209 "Link layers"); 210 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 211 "Generic link-management"); 212 213 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, 214 &ifqmaxlen, 0, "max send queue size"); 215 216 /* Log link state change events */ 217 static int log_link_state_change = 1; 218 219 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, 220 &log_link_state_change, 0, 221 "log interface link state change events"); 222 223 /* Log promiscuous mode change events */ 224 static int log_promisc_mode_change = 1; 225 226 SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, 227 &log_promisc_mode_change, 1, 228 "log promiscuous mode change events"); 229 230 /* Interface description */ 231 static unsigned int ifdescr_maxlen = 1024; 232 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, 233 &ifdescr_maxlen, 0, 234 "administrative maximum length for interface description"); 235 236 static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); 237 238 /* global sx for non-critical path ifdescr */ 239 static struct sx ifdescr_sx; 240 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); 241 242 void (*lagg_linkstate_p)(struct ifnet *ifp, int state); 243 /* These are external hooks for CARP. */ 244 void (*carp_linkstate_p)(struct ifnet *ifp); 245 void (*carp_demote_adj_p)(int, char *); 246 int (*carp_master_p)(struct ifaddr *); 247 #if defined(INET) || defined(INET6) 248 int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); 249 int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, 250 const struct sockaddr *sa); 251 int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); 252 int (*carp_attach_p)(struct ifaddr *, int); 253 void (*carp_detach_p)(struct ifaddr *, bool); 254 #endif 255 #ifdef INET 256 int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); 257 #endif 258 #ifdef INET6 259 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); 260 caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, 261 const struct in6_addr *taddr); 262 #endif 263 264 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; 265 266 /* 267 * XXX: Style; these should be sorted alphabetically, and unprototyped 268 * static functions should be prototyped. Currently they are sorted by 269 * declaration order. 270 */ 271 static int ifconf(u_long, caddr_t); 272 static void if_input_default(struct ifnet *, struct mbuf *); 273 static int if_requestencap_default(struct ifnet *, struct if_encap_req *); 274 static int if_setflag(struct ifnet *, int, int, int *, int); 275 static int if_transmit_default(struct ifnet *ifp, struct mbuf *m); 276 static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); 277 static void do_link_state_change(void *, int); 278 static int if_getgroup(struct ifgroupreq *, struct ifnet *); 279 static int if_getgroupmembers(struct ifgroupreq *); 280 static void if_delgroups(struct ifnet *); 281 static void if_attach_internal(struct ifnet *, bool); 282 static void if_detach_internal(struct ifnet *, bool); 283 static void if_siocaddmulti(void *, int); 284 static void if_link_ifnet(struct ifnet *); 285 static bool if_unlink_ifnet(struct ifnet *, bool); 286 #ifdef VIMAGE 287 static void if_vmove(struct ifnet *, struct vnet *); 288 #endif 289 290 #ifdef INET6 291 /* 292 * XXX: declare here to avoid to include many inet6 related files.. 293 * should be more generalized? 294 */ 295 extern void nd6_setmtu(struct ifnet *); 296 #endif 297 298 /* ipsec helper hooks */ 299 VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); 300 VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); 301 302 int ifqmaxlen = IFQ_MAXLEN; 303 VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ 304 VNET_DEFINE(struct ifgrouphead, ifg_head); 305 306 /* Table of ifnet by index. */ 307 static int if_index; 308 static int if_indexlim = 8; 309 static struct ifindex_entry { 310 struct ifnet *ife_ifnet; 311 uint16_t ife_gencnt; 312 } *ifindex_table; 313 314 SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, 315 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 316 "Variables global to all interfaces"); 317 static int 318 sysctl_ifcount(SYSCTL_HANDLER_ARGS) 319 { 320 int rv = 0; 321 322 IFNET_RLOCK(); 323 for (int i = 1; i <= if_index; i++) 324 if (ifindex_table[i].ife_ifnet != NULL && 325 ifindex_table[i].ife_ifnet->if_vnet == curvnet) 326 rv = i; 327 IFNET_RUNLOCK(); 328 329 return (sysctl_handle_int(oidp, &rv, 0, req)); 330 } 331 SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, 332 CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I", 333 "Maximum known interface index"); 334 335 /* 336 * The global network interface list (V_ifnet) and related state (such as 337 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock. 338 * This may be acquired to stabilise the list, or we may rely on NET_EPOCH. 339 */ 340 struct sx ifnet_sxlock; 341 SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); 342 343 struct sx ifnet_detach_sxlock; 344 SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx", 345 SX_RECURSE); 346 347 static if_com_alloc_t *if_com_alloc[256]; 348 static if_com_free_t *if_com_free[256]; 349 350 static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); 351 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 352 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 353 354 struct ifnet * 355 ifnet_byindex(u_int idx) 356 { 357 struct ifnet *ifp; 358 359 NET_EPOCH_ASSERT(); 360 361 if (__predict_false(idx > if_index)) 362 return (NULL); 363 364 ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); 365 366 if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet) 367 ifp = NULL; 368 369 return (ifp); 370 } 371 372 struct ifnet * 373 ifnet_byindex_ref(u_int idx) 374 { 375 struct ifnet *ifp; 376 377 ifp = ifnet_byindex(idx); 378 if (ifp == NULL || (ifp->if_flags & IFF_DYING)) 379 return (NULL); 380 if (!if_try_ref(ifp)) 381 return (NULL); 382 return (ifp); 383 } 384 385 struct ifnet * 386 ifnet_byindexgen(uint16_t idx, uint16_t gen) 387 { 388 struct ifnet *ifp; 389 390 NET_EPOCH_ASSERT(); 391 392 if (__predict_false(idx > if_index)) 393 return (NULL); 394 395 ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); 396 397 if (ifindex_table[idx].ife_gencnt == gen) 398 return (ifp); 399 else 400 return (NULL); 401 } 402 403 /* 404 * Network interface utility routines. 405 * 406 * Routines with ifa_ifwith* names take sockaddr *'s as 407 * parameters. 408 */ 409 410 static void 411 if_init_idxtable(void *arg __unused) 412 { 413 414 ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table), 415 M_IFNET, M_WAITOK | M_ZERO); 416 } 417 SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init_idxtable, NULL); 418 419 static void 420 vnet_if_init(const void *unused __unused) 421 { 422 423 CK_STAILQ_INIT(&V_ifnet); 424 CK_STAILQ_INIT(&V_ifg_head); 425 } 426 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, 427 NULL); 428 429 static void 430 if_link_ifnet(struct ifnet *ifp) 431 { 432 433 IFNET_WLOCK(); 434 CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); 435 #ifdef VIMAGE 436 curvnet->vnet_ifcnt++; 437 #endif 438 IFNET_WUNLOCK(); 439 } 440 441 static bool 442 if_unlink_ifnet(struct ifnet *ifp, bool vmove) 443 { 444 struct ifnet *iter; 445 int found = 0; 446 447 IFNET_WLOCK(); 448 CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) 449 if (iter == ifp) { 450 CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); 451 if (!vmove) 452 ifp->if_flags |= IFF_DYING; 453 found = 1; 454 break; 455 } 456 #ifdef VIMAGE 457 curvnet->vnet_ifcnt--; 458 #endif 459 IFNET_WUNLOCK(); 460 461 return (found); 462 } 463 464 #ifdef VIMAGE 465 static void 466 vnet_if_return(const void *unused __unused) 467 { 468 struct ifnet *ifp, *nifp; 469 struct ifnet **pending; 470 int found __diagused; 471 int i; 472 473 i = 0; 474 475 /* 476 * We need to protect our access to the V_ifnet tailq. Ordinarily we'd 477 * enter NET_EPOCH, but that's not possible, because if_vmove() calls 478 * if_detach_internal(), which waits for NET_EPOCH callbacks to 479 * complete. We can't do that from within NET_EPOCH. 480 * 481 * However, we can also use the IFNET_xLOCK, which is the V_ifnet 482 * read/write lock. We cannot hold the lock as we call if_vmove() 483 * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib 484 * ctx lock. 485 */ 486 IFNET_WLOCK(); 487 488 pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt, 489 M_IFNET, M_WAITOK | M_ZERO); 490 491 /* Return all inherited interfaces to their parent vnets. */ 492 CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { 493 if (ifp->if_home_vnet != ifp->if_vnet) { 494 found = if_unlink_ifnet(ifp, true); 495 MPASS(found); 496 497 pending[i++] = ifp; 498 } 499 } 500 IFNET_WUNLOCK(); 501 502 for (int j = 0; j < i; j++) { 503 sx_xlock(&ifnet_detach_sxlock); 504 if_vmove(pending[j], pending[j]->if_home_vnet); 505 sx_xunlock(&ifnet_detach_sxlock); 506 } 507 508 free(pending, M_IFNET); 509 } 510 VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, 511 vnet_if_return, NULL); 512 #endif 513 514 /* 515 * Allocate a struct ifnet and an index for an interface. A layer 2 516 * common structure will also be allocated if an allocation routine is 517 * registered for the passed type. 518 */ 519 static struct ifnet * 520 if_alloc_domain(u_char type, int numa_domain) 521 { 522 struct ifnet *ifp; 523 u_short idx; 524 525 KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large")); 526 if (numa_domain == IF_NODOM) 527 ifp = malloc(sizeof(struct ifnet), M_IFNET, 528 M_WAITOK | M_ZERO); 529 else 530 ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET, 531 DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO); 532 ifp->if_type = type; 533 ifp->if_alloctype = type; 534 ifp->if_numa_domain = numa_domain; 535 #ifdef VIMAGE 536 ifp->if_vnet = curvnet; 537 #endif 538 if (if_com_alloc[type] != NULL) { 539 ifp->if_l2com = if_com_alloc[type](type, ifp); 540 KASSERT(ifp->if_l2com, ("%s: if_com_alloc[%u] failed", __func__, 541 type)); 542 } 543 544 IF_ADDR_LOCK_INIT(ifp); 545 TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); 546 TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp); 547 CK_STAILQ_INIT(&ifp->if_addrhead); 548 CK_STAILQ_INIT(&ifp->if_multiaddrs); 549 CK_STAILQ_INIT(&ifp->if_groups); 550 #ifdef MAC 551 mac_ifnet_init(ifp); 552 #endif 553 ifq_init(&ifp->if_snd, ifp); 554 555 refcount_init(&ifp->if_refcount, 1); /* Index reference. */ 556 for (int i = 0; i < IFCOUNTERS; i++) 557 ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); 558 ifp->if_get_counter = if_get_counter_default; 559 ifp->if_pcp = IFNET_PCP_NONE; 560 561 /* Allocate an ifindex array entry. */ 562 IFNET_WLOCK(); 563 /* 564 * Try to find an empty slot below if_index. If we fail, take the 565 * next slot. 566 */ 567 for (idx = 1; idx <= if_index; idx++) { 568 if (ifindex_table[idx].ife_ifnet == NULL) 569 break; 570 } 571 572 /* Catch if_index overflow. */ 573 if (idx >= if_indexlim) { 574 struct ifindex_entry *new, *old; 575 int newlim; 576 577 newlim = if_indexlim * 2; 578 new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO); 579 memcpy(new, ifindex_table, if_indexlim * sizeof(*new)); 580 old = ifindex_table; 581 ck_pr_store_ptr(&ifindex_table, new); 582 if_indexlim = newlim; 583 NET_EPOCH_WAIT(); 584 free(old, M_IFNET); 585 } 586 if (idx > if_index) 587 if_index = idx; 588 589 ifp->if_index = idx; 590 ifp->if_idxgen = ifindex_table[idx].ife_gencnt; 591 ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp); 592 IFNET_WUNLOCK(); 593 594 return (ifp); 595 } 596 597 struct ifnet * 598 if_alloc_dev(u_char type, device_t dev) 599 { 600 int numa_domain; 601 602 if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0) 603 return (if_alloc_domain(type, IF_NODOM)); 604 return (if_alloc_domain(type, numa_domain)); 605 } 606 607 struct ifnet * 608 if_alloc(u_char type) 609 { 610 611 return (if_alloc_domain(type, IF_NODOM)); 612 } 613 /* 614 * Do the actual work of freeing a struct ifnet, and layer 2 common 615 * structure. This call is made when the network epoch guarantees 616 * us that nobody holds a pointer to the interface. 617 */ 618 static void 619 if_free_deferred(epoch_context_t ctx) 620 { 621 struct ifnet *ifp = __containerof(ctx, struct ifnet, if_epoch_ctx); 622 623 KASSERT((ifp->if_flags & IFF_DYING), 624 ("%s: interface not dying", __func__)); 625 626 if (if_com_free[ifp->if_alloctype] != NULL) 627 if_com_free[ifp->if_alloctype](ifp->if_l2com, 628 ifp->if_alloctype); 629 630 #ifdef MAC 631 mac_ifnet_destroy(ifp); 632 #endif /* MAC */ 633 IF_ADDR_LOCK_DESTROY(ifp); 634 ifq_delete(&ifp->if_snd); 635 636 for (int i = 0; i < IFCOUNTERS; i++) 637 counter_u64_free(ifp->if_counters[i]); 638 639 if_freedescr(ifp->if_description); 640 free(ifp->if_hw_addr, M_IFADDR); 641 free(ifp, M_IFNET); 642 } 643 644 /* 645 * Deregister an interface and free the associated storage. 646 */ 647 void 648 if_free(struct ifnet *ifp) 649 { 650 651 ifp->if_flags |= IFF_DYING; /* XXX: Locking */ 652 653 /* 654 * XXXGL: An interface index is really an alias to ifp pointer. 655 * Why would we clear the alias now, and not in the deferred 656 * context? Indeed there is nothing wrong with some network 657 * thread obtaining ifp via ifnet_byindex() inside the network 658 * epoch and then dereferencing ifp while we perform if_free(), 659 * and after if_free() finished, too. 660 * 661 * This early index freeing was important back when ifindex was 662 * virtualized and interface would outlive the vnet. 663 */ 664 IFNET_WLOCK(); 665 MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); 666 ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL); 667 ifindex_table[ifp->if_index].ife_gencnt++; 668 while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL) 669 if_index--; 670 IFNET_WUNLOCK(); 671 672 if (refcount_release(&ifp->if_refcount)) 673 NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); 674 } 675 676 /* 677 * Interfaces to keep an ifnet type-stable despite the possibility of the 678 * driver calling if_free(). If there are additional references, we defer 679 * freeing the underlying data structure. 680 */ 681 void 682 if_ref(struct ifnet *ifp) 683 { 684 u_int old __diagused; 685 686 /* We don't assert the ifnet list lock here, but arguably should. */ 687 old = refcount_acquire(&ifp->if_refcount); 688 KASSERT(old > 0, ("%s: ifp %p has 0 refs", __func__, ifp)); 689 } 690 691 bool 692 if_try_ref(struct ifnet *ifp) 693 { 694 NET_EPOCH_ASSERT(); 695 return (refcount_acquire_if_not_zero(&ifp->if_refcount)); 696 } 697 698 void 699 if_rele(struct ifnet *ifp) 700 { 701 702 if (!refcount_release(&ifp->if_refcount)) 703 return; 704 NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); 705 } 706 707 void 708 ifq_init(struct ifaltq *ifq, struct ifnet *ifp) 709 { 710 711 mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); 712 713 if (ifq->ifq_maxlen == 0) 714 ifq->ifq_maxlen = ifqmaxlen; 715 716 ifq->altq_type = 0; 717 ifq->altq_disc = NULL; 718 ifq->altq_flags &= ALTQF_CANTCHANGE; 719 ifq->altq_tbr = NULL; 720 ifq->altq_ifp = ifp; 721 } 722 723 void 724 ifq_delete(struct ifaltq *ifq) 725 { 726 mtx_destroy(&ifq->ifq_mtx); 727 } 728 729 /* 730 * Perform generic interface initialization tasks and attach the interface 731 * to the list of "active" interfaces. If vmove flag is set on entry 732 * to if_attach_internal(), perform only a limited subset of initialization 733 * tasks, given that we are moving from one vnet to another an ifnet which 734 * has already been fully initialized. 735 * 736 * Note that if_detach_internal() removes group membership unconditionally 737 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. 738 * Thus, when if_vmove() is applied to a cloned interface, group membership 739 * is lost while a cloned one always joins a group whose name is 740 * ifc->ifc_name. To recover this after if_detach_internal() and 741 * if_attach_internal(), the cloner should be specified to 742 * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() 743 * attempts to join a group whose name is ifc->ifc_name. 744 * 745 * XXX: 746 * - The decision to return void and thus require this function to 747 * succeed is questionable. 748 * - We should probably do more sanity checking. For instance we don't 749 * do anything to insure if_xname is unique or non-empty. 750 */ 751 void 752 if_attach(struct ifnet *ifp) 753 { 754 755 if_attach_internal(ifp, false); 756 } 757 758 /* 759 * Compute the least common TSO limit. 760 */ 761 void 762 if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) 763 { 764 /* 765 * 1) If there is no limit currently, take the limit from 766 * the network adapter. 767 * 768 * 2) If the network adapter has a limit below the current 769 * limit, apply it. 770 */ 771 if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && 772 ifp->if_hw_tsomax < pmax->tsomaxbytes)) { 773 pmax->tsomaxbytes = ifp->if_hw_tsomax; 774 } 775 if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && 776 ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { 777 pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; 778 } 779 if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && 780 ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { 781 pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; 782 } 783 } 784 785 /* 786 * Update TSO limit of a network adapter. 787 * 788 * Returns zero if no change. Else non-zero. 789 */ 790 int 791 if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) 792 { 793 int retval = 0; 794 if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { 795 ifp->if_hw_tsomax = pmax->tsomaxbytes; 796 retval++; 797 } 798 if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { 799 ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; 800 retval++; 801 } 802 if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { 803 ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; 804 retval++; 805 } 806 return (retval); 807 } 808 809 static void 810 if_attach_internal(struct ifnet *ifp, bool vmove) 811 { 812 unsigned socksize, ifasize; 813 int namelen, masklen; 814 struct sockaddr_dl *sdl; 815 struct ifaddr *ifa; 816 817 MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); 818 819 #ifdef VIMAGE 820 CURVNET_ASSERT_SET(); 821 ifp->if_vnet = curvnet; 822 if (ifp->if_home_vnet == NULL) 823 ifp->if_home_vnet = curvnet; 824 #endif 825 826 if_addgroup(ifp, IFG_ALL); 827 828 #ifdef VIMAGE 829 /* Restore group membership for cloned interface. */ 830 if (vmove) 831 if_clone_restoregroup(ifp); 832 #endif 833 834 getmicrotime(&ifp->if_lastchange); 835 ifp->if_epoch = time_uptime; 836 837 KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || 838 (ifp->if_transmit != NULL && ifp->if_qflush != NULL), 839 ("transmit and qflush must both either be set or both be NULL")); 840 if (ifp->if_transmit == NULL) { 841 ifp->if_transmit = if_transmit_default; 842 ifp->if_qflush = if_qflush; 843 } 844 if (ifp->if_input == NULL) 845 ifp->if_input = if_input_default; 846 847 if (ifp->if_requestencap == NULL) 848 ifp->if_requestencap = if_requestencap_default; 849 850 if (!vmove) { 851 #ifdef MAC 852 mac_ifnet_create(ifp); 853 #endif 854 855 /* 856 * Create a Link Level name for this device. 857 */ 858 namelen = strlen(ifp->if_xname); 859 /* 860 * Always save enough space for any possible name so we 861 * can do a rename in place later. 862 */ 863 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; 864 socksize = masklen + ifp->if_addrlen; 865 if (socksize < sizeof(*sdl)) 866 socksize = sizeof(*sdl); 867 socksize = roundup2(socksize, sizeof(long)); 868 ifasize = sizeof(*ifa) + 2 * socksize; 869 ifa = ifa_alloc(ifasize, M_WAITOK); 870 sdl = (struct sockaddr_dl *)(ifa + 1); 871 sdl->sdl_len = socksize; 872 sdl->sdl_family = AF_LINK; 873 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 874 sdl->sdl_nlen = namelen; 875 sdl->sdl_index = ifp->if_index; 876 sdl->sdl_type = ifp->if_type; 877 ifp->if_addr = ifa; 878 ifa->ifa_ifp = ifp; 879 ifa->ifa_addr = (struct sockaddr *)sdl; 880 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 881 ifa->ifa_netmask = (struct sockaddr *)sdl; 882 sdl->sdl_len = masklen; 883 while (namelen != 0) 884 sdl->sdl_data[--namelen] = 0xff; 885 CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); 886 /* Reliably crash if used uninitialized. */ 887 ifp->if_broadcastaddr = NULL; 888 889 if (ifp->if_type == IFT_ETHER) { 890 ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, 891 M_WAITOK | M_ZERO); 892 } 893 894 #if defined(INET) || defined(INET6) 895 /* Use defaults for TSO, if nothing is set */ 896 if (ifp->if_hw_tsomax == 0 && 897 ifp->if_hw_tsomaxsegcount == 0 && 898 ifp->if_hw_tsomaxsegsize == 0) { 899 /* 900 * The TSO defaults needs to be such that an 901 * NFS mbuf list of 35 mbufs totalling just 902 * below 64K works and that a chain of mbufs 903 * can be defragged into at most 32 segments: 904 */ 905 ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - 906 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 907 ifp->if_hw_tsomaxsegcount = 35; 908 ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ 909 910 /* XXX some drivers set IFCAP_TSO after ethernet attach */ 911 if (ifp->if_capabilities & IFCAP_TSO) { 912 if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", 913 ifp->if_hw_tsomax, 914 ifp->if_hw_tsomaxsegcount, 915 ifp->if_hw_tsomaxsegsize); 916 } 917 } 918 #endif 919 } 920 921 EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); 922 if_link_ifnet(ifp); 923 EVENTHANDLER_INVOKE(ifnet_attached_event, ifp); 924 if (IS_DEFAULT_VNET(curvnet)) 925 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 926 } 927 928 static void 929 if_epochalloc(void *dummy __unused) 930 { 931 932 net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT); 933 } 934 SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL); 935 936 /* 937 * Remove any unicast or broadcast network addresses from an interface. 938 */ 939 void 940 if_purgeaddrs(struct ifnet *ifp) 941 { 942 struct ifaddr *ifa; 943 944 #ifdef INET6 945 /* 946 * Need to leave multicast addresses of proxy NDP llentries 947 * before in6_purgeifaddr() because the llentries are keys 948 * for in6_multi objects of proxy NDP entries. 949 * in6_purgeifaddr()s clean up llentries including proxy NDPs 950 * then we would lose the keys if they are called earlier. 951 */ 952 in6_purge_proxy_ndp(ifp); 953 #endif 954 while (1) { 955 struct epoch_tracker et; 956 957 NET_EPOCH_ENTER(et); 958 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 959 if (ifa->ifa_addr->sa_family != AF_LINK) 960 break; 961 } 962 NET_EPOCH_EXIT(et); 963 964 if (ifa == NULL) 965 break; 966 #ifdef INET 967 /* XXX: Ugly!! ad hoc just for INET */ 968 if (ifa->ifa_addr->sa_family == AF_INET) { 969 struct ifreq ifr; 970 971 bzero(&ifr, sizeof(ifr)); 972 ifr.ifr_addr = *ifa->ifa_addr; 973 if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, 974 NULL) == 0) 975 continue; 976 } 977 #endif /* INET */ 978 #ifdef INET6 979 if (ifa->ifa_addr->sa_family == AF_INET6) { 980 in6_purgeifaddr((struct in6_ifaddr *)ifa); 981 /* ifp_addrhead is already updated */ 982 continue; 983 } 984 #endif /* INET6 */ 985 IF_ADDR_WLOCK(ifp); 986 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 987 IF_ADDR_WUNLOCK(ifp); 988 ifa_free(ifa); 989 } 990 } 991 992 /* 993 * Remove any multicast network addresses from an interface when an ifnet 994 * is going away. 995 */ 996 static void 997 if_purgemaddrs(struct ifnet *ifp) 998 { 999 struct ifmultiaddr *ifma; 1000 1001 IF_ADDR_WLOCK(ifp); 1002 while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) { 1003 ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs); 1004 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 1005 if_delmulti_locked(ifp, ifma, 1); 1006 } 1007 IF_ADDR_WUNLOCK(ifp); 1008 } 1009 1010 /* 1011 * Detach an interface, removing it from the list of "active" interfaces. 1012 * If vmove flag is set on entry to if_detach_internal(), perform only a 1013 * limited subset of cleanup tasks, given that we are moving an ifnet from 1014 * one vnet to another, where it must be fully operational. 1015 * 1016 * XXXRW: There are some significant questions about event ordering, and 1017 * how to prevent things from starting to use the interface during detach. 1018 */ 1019 void 1020 if_detach(struct ifnet *ifp) 1021 { 1022 bool found; 1023 1024 CURVNET_SET_QUIET(ifp->if_vnet); 1025 found = if_unlink_ifnet(ifp, false); 1026 if (found) { 1027 sx_xlock(&ifnet_detach_sxlock); 1028 if_detach_internal(ifp, false); 1029 sx_xunlock(&ifnet_detach_sxlock); 1030 } 1031 CURVNET_RESTORE(); 1032 } 1033 1034 /* 1035 * The vmove flag, if set, indicates that we are called from a callpath 1036 * that is moving an interface to a different vnet instance. 1037 * 1038 * The shutdown flag, if set, indicates that we are called in the 1039 * process of shutting down a vnet instance. Currently only the 1040 * vnet_if_return SYSUNINIT function sets it. Note: we can be called 1041 * on a vnet instance shutdown without this flag being set, e.g., when 1042 * the cloned interfaces are destoyed as first thing of teardown. 1043 */ 1044 static void 1045 if_detach_internal(struct ifnet *ifp, bool vmove) 1046 { 1047 struct ifaddr *ifa; 1048 #ifdef VIMAGE 1049 bool shutdown; 1050 1051 shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); 1052 #endif 1053 1054 sx_assert(&ifnet_detach_sxlock, SX_XLOCKED); 1055 1056 /* 1057 * At this point we know the interface still was on the ifnet list 1058 * and we removed it so we are in a stable state. 1059 */ 1060 NET_EPOCH_WAIT(); 1061 1062 /* 1063 * Ensure all pending EPOCH(9) callbacks have been executed. This 1064 * fixes issues about late destruction of multicast options 1065 * which lead to leave group calls, which in turn access the 1066 * belonging ifnet structure: 1067 */ 1068 NET_EPOCH_DRAIN_CALLBACKS(); 1069 1070 /* 1071 * In any case (destroy or vmove) detach us from the groups 1072 * and remove/wait for pending events on the taskq. 1073 * XXX-BZ in theory an interface could still enqueue a taskq change? 1074 */ 1075 if_delgroups(ifp); 1076 1077 taskqueue_drain(taskqueue_swi, &ifp->if_linktask); 1078 taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask); 1079 1080 if_down(ifp); 1081 1082 #ifdef VIMAGE 1083 /* 1084 * On VNET shutdown abort here as the stack teardown will do all 1085 * the work top-down for us. 1086 */ 1087 if (shutdown) { 1088 /* Give interface users the chance to clean up. */ 1089 EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); 1090 1091 /* 1092 * In case of a vmove we are done here without error. 1093 * If we would signal an error it would lead to the same 1094 * abort as if we did not find the ifnet anymore. 1095 * if_detach() calls us in void context and does not care 1096 * about an early abort notification, so life is splendid :) 1097 */ 1098 return; 1099 } 1100 #endif 1101 1102 /* 1103 * At this point we are not tearing down a VNET and are either 1104 * going to destroy or vmove the interface and have to cleanup 1105 * accordingly. 1106 */ 1107 1108 /* 1109 * Remove routes and flush queues. 1110 */ 1111 #ifdef ALTQ 1112 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 1113 altq_disable(&ifp->if_snd); 1114 if (ALTQ_IS_ATTACHED(&ifp->if_snd)) 1115 altq_detach(&ifp->if_snd); 1116 #endif 1117 1118 if_purgeaddrs(ifp); 1119 EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); 1120 if_purgemaddrs(ifp); 1121 if (IS_DEFAULT_VNET(curvnet)) 1122 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 1123 1124 if (!vmove) { 1125 /* 1126 * Prevent further calls into the device driver via ifnet. 1127 */ 1128 if_dead(ifp); 1129 1130 /* 1131 * Clean up all addresses. 1132 */ 1133 IF_ADDR_WLOCK(ifp); 1134 if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) { 1135 ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); 1136 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 1137 IF_ADDR_WUNLOCK(ifp); 1138 ifa_free(ifa); 1139 } else 1140 IF_ADDR_WUNLOCK(ifp); 1141 } 1142 1143 rt_flushifroutes(ifp); 1144 } 1145 1146 #ifdef VIMAGE 1147 /* 1148 * if_vmove() performs a limited version of if_detach() in current 1149 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. 1150 */ 1151 static void 1152 if_vmove(struct ifnet *ifp, struct vnet *new_vnet) 1153 { 1154 #ifdef DEV_BPF 1155 /* 1156 * Detach BPF file descriptors from its interface. 1157 */ 1158 bpf_ifdetach(ifp); 1159 #endif 1160 1161 /* 1162 * Detach from current vnet, but preserve LLADDR info, do not 1163 * mark as dead etc. so that the ifnet can be reattached later. 1164 */ 1165 if_detach_internal(ifp, true); 1166 1167 /* 1168 * Perform interface-specific reassignment tasks, if provided by 1169 * the driver. 1170 */ 1171 if (ifp->if_reassign != NULL) 1172 ifp->if_reassign(ifp, new_vnet, NULL); 1173 1174 /* 1175 * Switch to the context of the target vnet. 1176 */ 1177 CURVNET_SET_QUIET(new_vnet); 1178 if_attach_internal(ifp, true); 1179 bpf_vmove(ifp->if_bpf); 1180 CURVNET_RESTORE(); 1181 } 1182 1183 /* 1184 * Move an ifnet to or from another child prison/vnet, specified by the jail id. 1185 */ 1186 static int 1187 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) 1188 { 1189 struct prison *pr; 1190 struct ifnet *difp; 1191 bool found; 1192 bool shutdown; 1193 1194 MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); 1195 1196 /* Try to find the prison within our visibility. */ 1197 sx_slock(&allprison_lock); 1198 pr = prison_find_child(td->td_ucred->cr_prison, jid); 1199 sx_sunlock(&allprison_lock); 1200 if (pr == NULL) 1201 return (ENXIO); 1202 prison_hold_locked(pr); 1203 mtx_unlock(&pr->pr_mtx); 1204 1205 /* Do not try to move the iface from and to the same prison. */ 1206 if (pr->pr_vnet == ifp->if_vnet) { 1207 prison_free(pr); 1208 return (EEXIST); 1209 } 1210 1211 /* Make sure the named iface does not exists in the dst. prison/vnet. */ 1212 /* XXX Lock interfaces to avoid races. */ 1213 CURVNET_SET_QUIET(pr->pr_vnet); 1214 difp = ifunit(ifname); 1215 CURVNET_RESTORE(); 1216 if (difp != NULL) { 1217 prison_free(pr); 1218 return (EEXIST); 1219 } 1220 sx_xlock(&ifnet_detach_sxlock); 1221 1222 /* Make sure the VNET is stable. */ 1223 shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); 1224 if (shutdown) { 1225 sx_xunlock(&ifnet_detach_sxlock); 1226 prison_free(pr); 1227 return (EBUSY); 1228 } 1229 1230 found = if_unlink_ifnet(ifp, true); 1231 if (! found) { 1232 sx_xunlock(&ifnet_detach_sxlock); 1233 prison_free(pr); 1234 return (ENODEV); 1235 } 1236 1237 /* Move the interface into the child jail/vnet. */ 1238 if_vmove(ifp, pr->pr_vnet); 1239 1240 /* Report the new if_xname back to the userland. */ 1241 sprintf(ifname, "%s", ifp->if_xname); 1242 1243 sx_xunlock(&ifnet_detach_sxlock); 1244 1245 prison_free(pr); 1246 return (0); 1247 } 1248 1249 static int 1250 if_vmove_reclaim(struct thread *td, char *ifname, int jid) 1251 { 1252 struct prison *pr; 1253 struct vnet *vnet_dst; 1254 struct ifnet *ifp; 1255 int found __diagused; 1256 bool shutdown; 1257 1258 /* Try to find the prison within our visibility. */ 1259 sx_slock(&allprison_lock); 1260 pr = prison_find_child(td->td_ucred->cr_prison, jid); 1261 sx_sunlock(&allprison_lock); 1262 if (pr == NULL) 1263 return (ENXIO); 1264 prison_hold_locked(pr); 1265 mtx_unlock(&pr->pr_mtx); 1266 1267 /* Make sure the named iface exists in the source prison/vnet. */ 1268 CURVNET_SET(pr->pr_vnet); 1269 ifp = ifunit(ifname); /* XXX Lock to avoid races. */ 1270 if (ifp == NULL) { 1271 CURVNET_RESTORE(); 1272 prison_free(pr); 1273 return (ENXIO); 1274 } 1275 1276 /* Do not try to move the iface from and to the same prison. */ 1277 vnet_dst = TD_TO_VNET(td); 1278 if (vnet_dst == ifp->if_vnet) { 1279 CURVNET_RESTORE(); 1280 prison_free(pr); 1281 return (EEXIST); 1282 } 1283 1284 /* Make sure the VNET is stable. */ 1285 shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); 1286 if (shutdown) { 1287 CURVNET_RESTORE(); 1288 prison_free(pr); 1289 return (EBUSY); 1290 } 1291 1292 /* Get interface back from child jail/vnet. */ 1293 found = if_unlink_ifnet(ifp, true); 1294 MPASS(found); 1295 sx_xlock(&ifnet_detach_sxlock); 1296 if_vmove(ifp, vnet_dst); 1297 sx_xunlock(&ifnet_detach_sxlock); 1298 CURVNET_RESTORE(); 1299 1300 /* Report the new if_xname back to the userland. */ 1301 sprintf(ifname, "%s", ifp->if_xname); 1302 1303 prison_free(pr); 1304 return (0); 1305 } 1306 #endif /* VIMAGE */ 1307 1308 /* 1309 * Add a group to an interface 1310 */ 1311 int 1312 if_addgroup(struct ifnet *ifp, const char *groupname) 1313 { 1314 struct ifg_list *ifgl; 1315 struct ifg_group *ifg = NULL; 1316 struct ifg_member *ifgm; 1317 int new = 0; 1318 1319 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && 1320 groupname[strlen(groupname) - 1] <= '9') 1321 return (EINVAL); 1322 1323 IFNET_WLOCK(); 1324 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1325 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { 1326 IFNET_WUNLOCK(); 1327 return (EEXIST); 1328 } 1329 1330 if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) { 1331 IFNET_WUNLOCK(); 1332 return (ENOMEM); 1333 } 1334 1335 if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { 1336 free(ifgl, M_TEMP); 1337 IFNET_WUNLOCK(); 1338 return (ENOMEM); 1339 } 1340 1341 CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) 1342 if (!strcmp(ifg->ifg_group, groupname)) 1343 break; 1344 1345 if (ifg == NULL) { 1346 if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) { 1347 free(ifgl, M_TEMP); 1348 free(ifgm, M_TEMP); 1349 IFNET_WUNLOCK(); 1350 return (ENOMEM); 1351 } 1352 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1353 ifg->ifg_refcnt = 0; 1354 CK_STAILQ_INIT(&ifg->ifg_members); 1355 CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); 1356 new = 1; 1357 } 1358 1359 ifg->ifg_refcnt++; 1360 ifgl->ifgl_group = ifg; 1361 ifgm->ifgm_ifp = ifp; 1362 1363 IF_ADDR_WLOCK(ifp); 1364 CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1365 CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1366 IF_ADDR_WUNLOCK(ifp); 1367 1368 IFNET_WUNLOCK(); 1369 1370 if (new) 1371 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1372 EVENTHANDLER_INVOKE(group_change_event, groupname); 1373 1374 return (0); 1375 } 1376 1377 /* 1378 * Helper function to remove a group out of an interface. Expects the global 1379 * ifnet lock to be write-locked, and drops it before returning. 1380 */ 1381 static void 1382 _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl, 1383 const char *groupname) 1384 { 1385 struct ifg_member *ifgm; 1386 bool freeifgl; 1387 1388 IFNET_WLOCK_ASSERT(); 1389 1390 IF_ADDR_WLOCK(ifp); 1391 CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next); 1392 IF_ADDR_WUNLOCK(ifp); 1393 1394 CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1395 if (ifgm->ifgm_ifp == ifp) { 1396 CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, 1397 ifg_member, ifgm_next); 1398 break; 1399 } 1400 } 1401 1402 if (--ifgl->ifgl_group->ifg_refcnt == 0) { 1403 CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, 1404 ifg_next); 1405 freeifgl = true; 1406 } else { 1407 freeifgl = false; 1408 } 1409 IFNET_WUNLOCK(); 1410 1411 NET_EPOCH_WAIT(); 1412 EVENTHANDLER_INVOKE(group_change_event, groupname); 1413 if (freeifgl) { 1414 EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); 1415 free(ifgl->ifgl_group, M_TEMP); 1416 } 1417 free(ifgm, M_TEMP); 1418 free(ifgl, M_TEMP); 1419 } 1420 1421 /* 1422 * Remove a group from an interface 1423 */ 1424 int 1425 if_delgroup(struct ifnet *ifp, const char *groupname) 1426 { 1427 struct ifg_list *ifgl; 1428 1429 IFNET_WLOCK(); 1430 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1431 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1432 break; 1433 if (ifgl == NULL) { 1434 IFNET_WUNLOCK(); 1435 return (ENOENT); 1436 } 1437 1438 _if_delgroup_locked(ifp, ifgl, groupname); 1439 1440 return (0); 1441 } 1442 1443 /* 1444 * Remove an interface from all groups 1445 */ 1446 static void 1447 if_delgroups(struct ifnet *ifp) 1448 { 1449 struct ifg_list *ifgl; 1450 char groupname[IFNAMSIZ]; 1451 1452 IFNET_WLOCK(); 1453 while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) { 1454 strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); 1455 _if_delgroup_locked(ifp, ifgl, groupname); 1456 IFNET_WLOCK(); 1457 } 1458 IFNET_WUNLOCK(); 1459 } 1460 1461 /* 1462 * Stores all groups from an interface in memory pointed to by ifgr. 1463 */ 1464 static int 1465 if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp) 1466 { 1467 int len, error; 1468 struct ifg_list *ifgl; 1469 struct ifg_req ifgrq, *ifgp; 1470 1471 NET_EPOCH_ASSERT(); 1472 1473 if (ifgr->ifgr_len == 0) { 1474 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1475 ifgr->ifgr_len += sizeof(struct ifg_req); 1476 return (0); 1477 } 1478 1479 len = ifgr->ifgr_len; 1480 ifgp = ifgr->ifgr_groups; 1481 /* XXX: wire */ 1482 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1483 if (len < sizeof(ifgrq)) 1484 return (EINVAL); 1485 bzero(&ifgrq, sizeof ifgrq); 1486 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, 1487 sizeof(ifgrq.ifgrq_group)); 1488 if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) 1489 return (error); 1490 len -= sizeof(ifgrq); 1491 ifgp++; 1492 } 1493 1494 return (0); 1495 } 1496 1497 /* 1498 * Stores all members of a group in memory pointed to by igfr 1499 */ 1500 static int 1501 if_getgroupmembers(struct ifgroupreq *ifgr) 1502 { 1503 struct ifg_group *ifg; 1504 struct ifg_member *ifgm; 1505 struct ifg_req ifgrq, *ifgp; 1506 int len, error; 1507 1508 IFNET_RLOCK(); 1509 CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) 1510 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1511 break; 1512 if (ifg == NULL) { 1513 IFNET_RUNLOCK(); 1514 return (ENOENT); 1515 } 1516 1517 if (ifgr->ifgr_len == 0) { 1518 CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1519 ifgr->ifgr_len += sizeof(ifgrq); 1520 IFNET_RUNLOCK(); 1521 return (0); 1522 } 1523 1524 len = ifgr->ifgr_len; 1525 ifgp = ifgr->ifgr_groups; 1526 CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1527 if (len < sizeof(ifgrq)) { 1528 IFNET_RUNLOCK(); 1529 return (EINVAL); 1530 } 1531 bzero(&ifgrq, sizeof ifgrq); 1532 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, 1533 sizeof(ifgrq.ifgrq_member)); 1534 if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { 1535 IFNET_RUNLOCK(); 1536 return (error); 1537 } 1538 len -= sizeof(ifgrq); 1539 ifgp++; 1540 } 1541 IFNET_RUNLOCK(); 1542 1543 return (0); 1544 } 1545 1546 /* 1547 * Return counter values from counter(9)s stored in ifnet. 1548 */ 1549 uint64_t 1550 if_get_counter_default(struct ifnet *ifp, ift_counter cnt) 1551 { 1552 1553 KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); 1554 1555 return (counter_u64_fetch(ifp->if_counters[cnt])); 1556 } 1557 1558 /* 1559 * Increase an ifnet counter. Usually used for counters shared 1560 * between the stack and a driver, but function supports them all. 1561 */ 1562 void 1563 if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) 1564 { 1565 1566 KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); 1567 1568 counter_u64_add(ifp->if_counters[cnt], inc); 1569 } 1570 1571 /* 1572 * Copy data from ifnet to userland API structure if_data. 1573 */ 1574 void 1575 if_data_copy(struct ifnet *ifp, struct if_data *ifd) 1576 { 1577 1578 ifd->ifi_type = ifp->if_type; 1579 ifd->ifi_physical = 0; 1580 ifd->ifi_addrlen = ifp->if_addrlen; 1581 ifd->ifi_hdrlen = ifp->if_hdrlen; 1582 ifd->ifi_link_state = ifp->if_link_state; 1583 ifd->ifi_vhid = 0; 1584 ifd->ifi_datalen = sizeof(struct if_data); 1585 ifd->ifi_mtu = ifp->if_mtu; 1586 ifd->ifi_metric = ifp->if_metric; 1587 ifd->ifi_baudrate = ifp->if_baudrate; 1588 ifd->ifi_hwassist = ifp->if_hwassist; 1589 ifd->ifi_epoch = ifp->if_epoch; 1590 ifd->ifi_lastchange = ifp->if_lastchange; 1591 1592 ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); 1593 ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); 1594 ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); 1595 ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); 1596 ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); 1597 ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); 1598 ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); 1599 ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); 1600 ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); 1601 ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); 1602 ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); 1603 ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); 1604 } 1605 1606 /* 1607 * Initialization, destruction and refcounting functions for ifaddrs. 1608 */ 1609 struct ifaddr * 1610 ifa_alloc(size_t size, int flags) 1611 { 1612 struct ifaddr *ifa; 1613 1614 KASSERT(size >= sizeof(struct ifaddr), 1615 ("%s: invalid size %zu", __func__, size)); 1616 1617 ifa = malloc(size, M_IFADDR, M_ZERO | flags); 1618 if (ifa == NULL) 1619 return (NULL); 1620 1621 if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) 1622 goto fail; 1623 if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) 1624 goto fail; 1625 if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) 1626 goto fail; 1627 if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) 1628 goto fail; 1629 1630 refcount_init(&ifa->ifa_refcnt, 1); 1631 1632 return (ifa); 1633 1634 fail: 1635 /* free(NULL) is okay */ 1636 counter_u64_free(ifa->ifa_opackets); 1637 counter_u64_free(ifa->ifa_ipackets); 1638 counter_u64_free(ifa->ifa_obytes); 1639 counter_u64_free(ifa->ifa_ibytes); 1640 free(ifa, M_IFADDR); 1641 1642 return (NULL); 1643 } 1644 1645 void 1646 ifa_ref(struct ifaddr *ifa) 1647 { 1648 u_int old __diagused; 1649 1650 old = refcount_acquire(&ifa->ifa_refcnt); 1651 KASSERT(old > 0, ("%s: ifa %p has 0 refs", __func__, ifa)); 1652 } 1653 1654 int 1655 ifa_try_ref(struct ifaddr *ifa) 1656 { 1657 1658 NET_EPOCH_ASSERT(); 1659 return (refcount_acquire_if_not_zero(&ifa->ifa_refcnt)); 1660 } 1661 1662 static void 1663 ifa_destroy(epoch_context_t ctx) 1664 { 1665 struct ifaddr *ifa; 1666 1667 ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx); 1668 counter_u64_free(ifa->ifa_opackets); 1669 counter_u64_free(ifa->ifa_ipackets); 1670 counter_u64_free(ifa->ifa_obytes); 1671 counter_u64_free(ifa->ifa_ibytes); 1672 free(ifa, M_IFADDR); 1673 } 1674 1675 void 1676 ifa_free(struct ifaddr *ifa) 1677 { 1678 1679 if (refcount_release(&ifa->ifa_refcnt)) 1680 NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx); 1681 } 1682 1683 /* 1684 * XXX: Because sockaddr_dl has deeper structure than the sockaddr 1685 * structs used to represent other address families, it is necessary 1686 * to perform a different comparison. 1687 */ 1688 static bool 1689 sa_dl_equal(const struct sockaddr *a, const struct sockaddr *b) 1690 { 1691 const struct sockaddr_dl *sdl1 = (const struct sockaddr_dl *)a; 1692 const struct sockaddr_dl *sdl2 = (const struct sockaddr_dl *)b; 1693 1694 return (sdl1->sdl_len == sdl2->sdl_len && 1695 bcmp(sdl1->sdl_data + sdl1->sdl_nlen, 1696 sdl2->sdl_data + sdl2->sdl_nlen, sdl1->sdl_alen) == 0); 1697 } 1698 1699 /* 1700 * Locate an interface based on a complete address. 1701 */ 1702 /*ARGSUSED*/ 1703 struct ifaddr * 1704 ifa_ifwithaddr(const struct sockaddr *addr) 1705 { 1706 struct ifnet *ifp; 1707 struct ifaddr *ifa; 1708 1709 NET_EPOCH_ASSERT(); 1710 1711 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1712 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1713 if (ifa->ifa_addr->sa_family != addr->sa_family) 1714 continue; 1715 if (sa_equal(addr, ifa->ifa_addr)) { 1716 goto done; 1717 } 1718 /* IP6 doesn't have broadcast */ 1719 if ((ifp->if_flags & IFF_BROADCAST) && 1720 ifa->ifa_broadaddr && 1721 ifa->ifa_broadaddr->sa_len != 0 && 1722 sa_equal(ifa->ifa_broadaddr, addr)) { 1723 goto done; 1724 } 1725 } 1726 } 1727 ifa = NULL; 1728 done: 1729 return (ifa); 1730 } 1731 1732 int 1733 ifa_ifwithaddr_check(const struct sockaddr *addr) 1734 { 1735 struct epoch_tracker et; 1736 int rc; 1737 1738 NET_EPOCH_ENTER(et); 1739 rc = (ifa_ifwithaddr(addr) != NULL); 1740 NET_EPOCH_EXIT(et); 1741 return (rc); 1742 } 1743 1744 /* 1745 * Locate an interface based on the broadcast address. 1746 */ 1747 /* ARGSUSED */ 1748 struct ifaddr * 1749 ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) 1750 { 1751 struct ifnet *ifp; 1752 struct ifaddr *ifa; 1753 1754 NET_EPOCH_ASSERT(); 1755 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1756 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) 1757 continue; 1758 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1759 if (ifa->ifa_addr->sa_family != addr->sa_family) 1760 continue; 1761 if ((ifp->if_flags & IFF_BROADCAST) && 1762 ifa->ifa_broadaddr && 1763 ifa->ifa_broadaddr->sa_len != 0 && 1764 sa_equal(ifa->ifa_broadaddr, addr)) { 1765 goto done; 1766 } 1767 } 1768 } 1769 ifa = NULL; 1770 done: 1771 return (ifa); 1772 } 1773 1774 /* 1775 * Locate the point to point interface with a given destination address. 1776 */ 1777 /*ARGSUSED*/ 1778 struct ifaddr * 1779 ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) 1780 { 1781 struct ifnet *ifp; 1782 struct ifaddr *ifa; 1783 1784 NET_EPOCH_ASSERT(); 1785 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1786 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) 1787 continue; 1788 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) 1789 continue; 1790 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1791 if (ifa->ifa_addr->sa_family != addr->sa_family) 1792 continue; 1793 if (ifa->ifa_dstaddr != NULL && 1794 sa_equal(addr, ifa->ifa_dstaddr)) { 1795 goto done; 1796 } 1797 } 1798 } 1799 ifa = NULL; 1800 done: 1801 return (ifa); 1802 } 1803 1804 /* 1805 * Find an interface on a specific network. If many, choice 1806 * is most specific found. 1807 */ 1808 struct ifaddr * 1809 ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) 1810 { 1811 struct ifnet *ifp; 1812 struct ifaddr *ifa; 1813 struct ifaddr *ifa_maybe = NULL; 1814 u_int af = addr->sa_family; 1815 const char *addr_data = addr->sa_data, *cplim; 1816 1817 NET_EPOCH_ASSERT(); 1818 /* 1819 * AF_LINK addresses can be looked up directly by their index number, 1820 * so do that if we can. 1821 */ 1822 if (af == AF_LINK) { 1823 ifp = ifnet_byindex( 1824 ((const struct sockaddr_dl *)addr)->sdl_index); 1825 return (ifp ? ifp->if_addr : NULL); 1826 } 1827 1828 /* 1829 * Scan though each interface, looking for ones that have addresses 1830 * in this address family and the requested fib. 1831 */ 1832 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1833 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) 1834 continue; 1835 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1836 const char *cp, *cp2, *cp3; 1837 1838 if (ifa->ifa_addr->sa_family != af) 1839 next: continue; 1840 if (af == AF_INET && 1841 ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { 1842 /* 1843 * This is a bit broken as it doesn't 1844 * take into account that the remote end may 1845 * be a single node in the network we are 1846 * looking for. 1847 * The trouble is that we don't know the 1848 * netmask for the remote end. 1849 */ 1850 if (ifa->ifa_dstaddr != NULL && 1851 sa_equal(addr, ifa->ifa_dstaddr)) { 1852 goto done; 1853 } 1854 } else { 1855 /* 1856 * Scan all the bits in the ifa's address. 1857 * If a bit dissagrees with what we are 1858 * looking for, mask it with the netmask 1859 * to see if it really matters. 1860 * (A byte at a time) 1861 */ 1862 if (ifa->ifa_netmask == 0) 1863 continue; 1864 cp = addr_data; 1865 cp2 = ifa->ifa_addr->sa_data; 1866 cp3 = ifa->ifa_netmask->sa_data; 1867 cplim = ifa->ifa_netmask->sa_len 1868 + (char *)ifa->ifa_netmask; 1869 while (cp3 < cplim) 1870 if ((*cp++ ^ *cp2++) & *cp3++) 1871 goto next; /* next address! */ 1872 /* 1873 * If the netmask of what we just found 1874 * is more specific than what we had before 1875 * (if we had one), or if the virtual status 1876 * of new prefix is better than of the old one, 1877 * then remember the new one before continuing 1878 * to search for an even better one. 1879 */ 1880 if (ifa_maybe == NULL || 1881 ifa_preferred(ifa_maybe, ifa) || 1882 rn_refines((caddr_t)ifa->ifa_netmask, 1883 (caddr_t)ifa_maybe->ifa_netmask)) { 1884 ifa_maybe = ifa; 1885 } 1886 } 1887 } 1888 } 1889 ifa = ifa_maybe; 1890 ifa_maybe = NULL; 1891 done: 1892 return (ifa); 1893 } 1894 1895 /* 1896 * Find an interface address specific to an interface best matching 1897 * a given address. 1898 */ 1899 struct ifaddr * 1900 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) 1901 { 1902 struct ifaddr *ifa; 1903 const char *cp, *cp2, *cp3; 1904 char *cplim; 1905 struct ifaddr *ifa_maybe = NULL; 1906 u_int af = addr->sa_family; 1907 1908 if (af >= AF_MAX) 1909 return (NULL); 1910 1911 NET_EPOCH_ASSERT(); 1912 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1913 if (ifa->ifa_addr->sa_family != af) 1914 continue; 1915 if (ifa_maybe == NULL) 1916 ifa_maybe = ifa; 1917 if (ifa->ifa_netmask == 0) { 1918 if (sa_equal(addr, ifa->ifa_addr) || 1919 (ifa->ifa_dstaddr && 1920 sa_equal(addr, ifa->ifa_dstaddr))) 1921 goto done; 1922 continue; 1923 } 1924 if (ifp->if_flags & IFF_POINTOPOINT) { 1925 if (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr)) 1926 goto done; 1927 } else { 1928 cp = addr->sa_data; 1929 cp2 = ifa->ifa_addr->sa_data; 1930 cp3 = ifa->ifa_netmask->sa_data; 1931 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1932 for (; cp3 < cplim; cp3++) 1933 if ((*cp++ ^ *cp2++) & *cp3) 1934 break; 1935 if (cp3 == cplim) 1936 goto done; 1937 } 1938 } 1939 ifa = ifa_maybe; 1940 done: 1941 return (ifa); 1942 } 1943 1944 /* 1945 * See whether new ifa is better than current one: 1946 * 1) A non-virtual one is preferred over virtual. 1947 * 2) A virtual in master state preferred over any other state. 1948 * 1949 * Used in several address selecting functions. 1950 */ 1951 int 1952 ifa_preferred(struct ifaddr *cur, struct ifaddr *next) 1953 { 1954 1955 return (cur->ifa_carp && (!next->ifa_carp || 1956 ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); 1957 } 1958 1959 struct sockaddr_dl * 1960 link_alloc_sdl(size_t size, int flags) 1961 { 1962 1963 return (malloc(size, M_TEMP, flags)); 1964 } 1965 1966 void 1967 link_free_sdl(struct sockaddr *sa) 1968 { 1969 free(sa, M_TEMP); 1970 } 1971 1972 /* 1973 * Fills in given sdl with interface basic info. 1974 * Returns pointer to filled sdl. 1975 */ 1976 struct sockaddr_dl * 1977 link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) 1978 { 1979 struct sockaddr_dl *sdl; 1980 1981 sdl = (struct sockaddr_dl *)paddr; 1982 memset(sdl, 0, sizeof(struct sockaddr_dl)); 1983 sdl->sdl_len = sizeof(struct sockaddr_dl); 1984 sdl->sdl_family = AF_LINK; 1985 sdl->sdl_index = ifp->if_index; 1986 sdl->sdl_type = iftype; 1987 1988 return (sdl); 1989 } 1990 1991 void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ 1992 void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ 1993 struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); 1994 struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); 1995 int (*vlan_tag_p)(struct ifnet *, uint16_t *); 1996 int (*vlan_pcp_p)(struct ifnet *, uint16_t *); 1997 int (*vlan_setcookie_p)(struct ifnet *, void *); 1998 void *(*vlan_cookie_p)(struct ifnet *); 1999 void (*vlan_input_p)(struct ifnet *, struct mbuf *); 2000 2001 /* 2002 * Handle a change in the interface link state. To avoid LORs 2003 * between driver lock and upper layer locks, as well as possible 2004 * recursions, we post event to taskqueue, and all job 2005 * is done in static do_link_state_change(). 2006 */ 2007 void 2008 if_link_state_change(struct ifnet *ifp, int link_state) 2009 { 2010 /* Return if state hasn't changed. */ 2011 if (ifp->if_link_state == link_state) 2012 return; 2013 2014 ifp->if_link_state = link_state; 2015 2016 /* XXXGL: reference ifp? */ 2017 taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); 2018 } 2019 2020 static void 2021 do_link_state_change(void *arg, int pending) 2022 { 2023 struct ifnet *ifp; 2024 int link_state; 2025 2026 ifp = arg; 2027 link_state = ifp->if_link_state; 2028 2029 CURVNET_SET(ifp->if_vnet); 2030 rt_ifmsg(ifp, 0); 2031 if (ifp->if_vlantrunk != NULL) 2032 (*vlan_link_state_p)(ifp); 2033 if (ifp->if_carp) 2034 (*carp_linkstate_p)(ifp); 2035 if (ifp->if_bridge) 2036 ifp->if_bridge_linkstate(ifp); 2037 if (ifp->if_lagg) 2038 (*lagg_linkstate_p)(ifp, link_state); 2039 2040 if (IS_DEFAULT_VNET(curvnet)) 2041 devctl_notify("IFNET", ifp->if_xname, 2042 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", 2043 NULL); 2044 if (pending > 1) 2045 if_printf(ifp, "%d link states coalesced\n", pending); 2046 if (log_link_state_change) 2047 if_printf(ifp, "link state changed to %s\n", 2048 (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); 2049 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 2050 CURVNET_RESTORE(); 2051 } 2052 2053 /* 2054 * Mark an interface down and notify protocols of 2055 * the transition. 2056 */ 2057 void 2058 if_down(struct ifnet *ifp) 2059 { 2060 2061 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 2062 2063 ifp->if_flags &= ~IFF_UP; 2064 getmicrotime(&ifp->if_lastchange); 2065 ifp->if_qflush(ifp); 2066 2067 if (ifp->if_carp) 2068 (*carp_linkstate_p)(ifp); 2069 rt_ifmsg(ifp, IFF_UP); 2070 } 2071 2072 /* 2073 * Mark an interface up and notify protocols of 2074 * the transition. 2075 */ 2076 void 2077 if_up(struct ifnet *ifp) 2078 { 2079 2080 ifp->if_flags |= IFF_UP; 2081 getmicrotime(&ifp->if_lastchange); 2082 if (ifp->if_carp) 2083 (*carp_linkstate_p)(ifp); 2084 rt_ifmsg(ifp, IFF_UP); 2085 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 2086 } 2087 2088 /* 2089 * Flush an interface queue. 2090 */ 2091 void 2092 if_qflush(struct ifnet *ifp) 2093 { 2094 struct mbuf *m, *n; 2095 struct ifaltq *ifq; 2096 2097 ifq = &ifp->if_snd; 2098 IFQ_LOCK(ifq); 2099 #ifdef ALTQ 2100 if (ALTQ_IS_ENABLED(ifq)) 2101 ALTQ_PURGE(ifq); 2102 #endif 2103 n = ifq->ifq_head; 2104 while ((m = n) != NULL) { 2105 n = m->m_nextpkt; 2106 m_freem(m); 2107 } 2108 ifq->ifq_head = 0; 2109 ifq->ifq_tail = 0; 2110 ifq->ifq_len = 0; 2111 IFQ_UNLOCK(ifq); 2112 } 2113 2114 /* 2115 * Map interface name to interface structure pointer, with or without 2116 * returning a reference. 2117 */ 2118 struct ifnet * 2119 ifunit_ref(const char *name) 2120 { 2121 struct epoch_tracker et; 2122 struct ifnet *ifp; 2123 2124 NET_EPOCH_ENTER(et); 2125 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 2126 if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && 2127 !(ifp->if_flags & IFF_DYING)) 2128 break; 2129 } 2130 if (ifp != NULL) { 2131 if_ref(ifp); 2132 MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); 2133 } 2134 2135 NET_EPOCH_EXIT(et); 2136 return (ifp); 2137 } 2138 2139 struct ifnet * 2140 ifunit(const char *name) 2141 { 2142 struct epoch_tracker et; 2143 struct ifnet *ifp; 2144 2145 NET_EPOCH_ENTER(et); 2146 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 2147 if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) 2148 break; 2149 } 2150 NET_EPOCH_EXIT(et); 2151 return (ifp); 2152 } 2153 2154 void * 2155 ifr_buffer_get_buffer(void *data) 2156 { 2157 union ifreq_union *ifrup; 2158 2159 ifrup = data; 2160 #ifdef COMPAT_FREEBSD32 2161 if (SV_CURPROC_FLAG(SV_ILP32)) 2162 return ((void *)(uintptr_t) 2163 ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); 2164 #endif 2165 return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); 2166 } 2167 2168 static void 2169 ifr_buffer_set_buffer_null(void *data) 2170 { 2171 union ifreq_union *ifrup; 2172 2173 ifrup = data; 2174 #ifdef COMPAT_FREEBSD32 2175 if (SV_CURPROC_FLAG(SV_ILP32)) 2176 ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; 2177 else 2178 #endif 2179 ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; 2180 } 2181 2182 size_t 2183 ifr_buffer_get_length(void *data) 2184 { 2185 union ifreq_union *ifrup; 2186 2187 ifrup = data; 2188 #ifdef COMPAT_FREEBSD32 2189 if (SV_CURPROC_FLAG(SV_ILP32)) 2190 return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); 2191 #endif 2192 return (ifrup->ifr.ifr_ifru.ifru_buffer.length); 2193 } 2194 2195 static void 2196 ifr_buffer_set_length(void *data, size_t len) 2197 { 2198 union ifreq_union *ifrup; 2199 2200 ifrup = data; 2201 #ifdef COMPAT_FREEBSD32 2202 if (SV_CURPROC_FLAG(SV_ILP32)) 2203 ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; 2204 else 2205 #endif 2206 ifrup->ifr.ifr_ifru.ifru_buffer.length = len; 2207 } 2208 2209 void * 2210 ifr_data_get_ptr(void *ifrp) 2211 { 2212 union ifreq_union *ifrup; 2213 2214 ifrup = ifrp; 2215 #ifdef COMPAT_FREEBSD32 2216 if (SV_CURPROC_FLAG(SV_ILP32)) 2217 return ((void *)(uintptr_t) 2218 ifrup->ifr32.ifr_ifru.ifru_data); 2219 #endif 2220 return (ifrup->ifr.ifr_ifru.ifru_data); 2221 } 2222 2223 struct ifcap_nv_bit_name { 2224 uint64_t cap_bit; 2225 const char *cap_name; 2226 }; 2227 #define CAPNV(x) {.cap_bit = IFCAP_##x, \ 2228 .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) } 2229 const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = { 2230 CAPNV(RXCSUM), 2231 CAPNV(TXCSUM), 2232 CAPNV(NETCONS), 2233 CAPNV(VLAN_MTU), 2234 CAPNV(VLAN_HWTAGGING), 2235 CAPNV(JUMBO_MTU), 2236 CAPNV(POLLING), 2237 CAPNV(VLAN_HWCSUM), 2238 CAPNV(TSO4), 2239 CAPNV(TSO6), 2240 CAPNV(LRO), 2241 CAPNV(WOL_UCAST), 2242 CAPNV(WOL_MCAST), 2243 CAPNV(WOL_MAGIC), 2244 CAPNV(TOE4), 2245 CAPNV(TOE6), 2246 CAPNV(VLAN_HWFILTER), 2247 CAPNV(VLAN_HWTSO), 2248 CAPNV(LINKSTATE), 2249 CAPNV(NETMAP), 2250 CAPNV(RXCSUM_IPV6), 2251 CAPNV(TXCSUM_IPV6), 2252 CAPNV(HWSTATS), 2253 CAPNV(TXRTLMT), 2254 CAPNV(HWRXTSTMP), 2255 CAPNV(MEXTPG), 2256 CAPNV(TXTLS4), 2257 CAPNV(TXTLS6), 2258 CAPNV(VXLAN_HWCSUM), 2259 CAPNV(VXLAN_HWTSO), 2260 CAPNV(TXTLS_RTLMT), 2261 {0, NULL} 2262 }; 2263 #define CAP2NV(x) {.cap_bit = IFCAP2_BIT(IFCAP2_##x), \ 2264 .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) } 2265 const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = { 2266 CAP2NV(RXTLS4), 2267 CAP2NV(RXTLS6), 2268 CAP2NV(IPSEC_OFFLOAD), 2269 {0, NULL} 2270 }; 2271 #undef CAPNV 2272 #undef CAP2NV 2273 2274 int 2275 if_capnv_to_capint(const nvlist_t *nv, int *old_cap, 2276 const struct ifcap_nv_bit_name *nn, bool all) 2277 { 2278 int i, res; 2279 2280 res = 0; 2281 for (i = 0; nn[i].cap_name != NULL; i++) { 2282 if (nvlist_exists_bool(nv, nn[i].cap_name)) { 2283 if (all || nvlist_get_bool(nv, nn[i].cap_name)) 2284 res |= nn[i].cap_bit; 2285 } else { 2286 res |= *old_cap & nn[i].cap_bit; 2287 } 2288 } 2289 return (res); 2290 } 2291 2292 void 2293 if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn, 2294 int ifr_cap, int ifr_req) 2295 { 2296 int i; 2297 2298 for (i = 0; nn[i].cap_name != NULL; i++) { 2299 if ((nn[i].cap_bit & ifr_cap) != 0) { 2300 nvlist_add_bool(nv, nn[i].cap_name, 2301 (nn[i].cap_bit & ifr_req) != 0); 2302 } 2303 } 2304 } 2305 2306 /* 2307 * Hardware specific interface ioctls. 2308 */ 2309 int 2310 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) 2311 { 2312 struct ifreq *ifr; 2313 int error = 0, do_ifup = 0; 2314 int new_flags, temp_flags; 2315 size_t descrlen, nvbuflen; 2316 char *descrbuf; 2317 char new_name[IFNAMSIZ]; 2318 void *buf; 2319 nvlist_t *nvcap; 2320 struct siocsifcapnv_driver_data drv_ioctl_data; 2321 2322 ifr = (struct ifreq *)data; 2323 switch (cmd) { 2324 case SIOCGIFINDEX: 2325 ifr->ifr_index = ifp->if_index; 2326 break; 2327 2328 case SIOCGIFFLAGS: 2329 temp_flags = ifp->if_flags | ifp->if_drv_flags; 2330 ifr->ifr_flags = temp_flags & 0xffff; 2331 ifr->ifr_flagshigh = temp_flags >> 16; 2332 break; 2333 2334 case SIOCGIFCAP: 2335 ifr->ifr_reqcap = ifp->if_capabilities; 2336 ifr->ifr_curcap = ifp->if_capenable; 2337 break; 2338 2339 case SIOCGIFCAPNV: 2340 if ((ifp->if_capabilities & IFCAP_NV) == 0) { 2341 error = EINVAL; 2342 break; 2343 } 2344 buf = NULL; 2345 nvcap = nvlist_create(0); 2346 for (;;) { 2347 if_capint_to_capnv(nvcap, ifcap_nv_bit_names, 2348 ifp->if_capabilities, ifp->if_capenable); 2349 if_capint_to_capnv(nvcap, ifcap2_nv_bit_names, 2350 ifp->if_capabilities2, ifp->if_capenable2); 2351 error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV, 2352 __DECONST(caddr_t, nvcap)); 2353 if (error != 0) { 2354 if_printf(ifp, 2355 "SIOCGIFCAPNV driver mistake: nvlist error %d\n", 2356 error); 2357 break; 2358 } 2359 buf = nvlist_pack(nvcap, &nvbuflen); 2360 if (buf == NULL) { 2361 error = nvlist_error(nvcap); 2362 if (error == 0) 2363 error = EDOOFUS; 2364 break; 2365 } 2366 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 2367 ifr->ifr_cap_nv.length = nvbuflen; 2368 ifr->ifr_cap_nv.buffer = NULL; 2369 error = EFBIG; 2370 break; 2371 } 2372 ifr->ifr_cap_nv.length = nvbuflen; 2373 error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen); 2374 break; 2375 } 2376 free(buf, M_NVLIST); 2377 nvlist_destroy(nvcap); 2378 break; 2379 2380 case SIOCGIFDATA: 2381 { 2382 struct if_data ifd; 2383 2384 /* Ensure uninitialised padding is not leaked. */ 2385 memset(&ifd, 0, sizeof(ifd)); 2386 2387 if_data_copy(ifp, &ifd); 2388 error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd)); 2389 break; 2390 } 2391 2392 #ifdef MAC 2393 case SIOCGIFMAC: 2394 error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); 2395 break; 2396 #endif 2397 2398 case SIOCGIFMETRIC: 2399 ifr->ifr_metric = ifp->if_metric; 2400 break; 2401 2402 case SIOCGIFMTU: 2403 ifr->ifr_mtu = ifp->if_mtu; 2404 break; 2405 2406 case SIOCGIFPHYS: 2407 /* XXXGL: did this ever worked? */ 2408 ifr->ifr_phys = 0; 2409 break; 2410 2411 case SIOCGIFDESCR: 2412 error = 0; 2413 sx_slock(&ifdescr_sx); 2414 if (ifp->if_description == NULL) 2415 error = ENOMSG; 2416 else { 2417 /* space for terminating nul */ 2418 descrlen = strlen(ifp->if_description) + 1; 2419 if (ifr_buffer_get_length(ifr) < descrlen) 2420 ifr_buffer_set_buffer_null(ifr); 2421 else 2422 error = copyout(ifp->if_description, 2423 ifr_buffer_get_buffer(ifr), descrlen); 2424 ifr_buffer_set_length(ifr, descrlen); 2425 } 2426 sx_sunlock(&ifdescr_sx); 2427 break; 2428 2429 case SIOCSIFDESCR: 2430 error = priv_check(td, PRIV_NET_SETIFDESCR); 2431 if (error) 2432 return (error); 2433 2434 /* 2435 * Copy only (length-1) bytes to make sure that 2436 * if_description is always nul terminated. The 2437 * length parameter is supposed to count the 2438 * terminating nul in. 2439 */ 2440 if (ifr_buffer_get_length(ifr) > ifdescr_maxlen) 2441 return (ENAMETOOLONG); 2442 else if (ifr_buffer_get_length(ifr) == 0) 2443 descrbuf = NULL; 2444 else { 2445 descrbuf = if_allocdescr(ifr_buffer_get_length(ifr), M_WAITOK); 2446 error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, 2447 ifr_buffer_get_length(ifr) - 1); 2448 if (error) { 2449 if_freedescr(descrbuf); 2450 break; 2451 } 2452 } 2453 2454 if_setdescr(ifp, descrbuf); 2455 getmicrotime(&ifp->if_lastchange); 2456 break; 2457 2458 case SIOCGIFFIB: 2459 ifr->ifr_fib = ifp->if_fib; 2460 break; 2461 2462 case SIOCSIFFIB: 2463 error = priv_check(td, PRIV_NET_SETIFFIB); 2464 if (error) 2465 return (error); 2466 if (ifr->ifr_fib >= rt_numfibs) 2467 return (EINVAL); 2468 2469 ifp->if_fib = ifr->ifr_fib; 2470 break; 2471 2472 case SIOCSIFFLAGS: 2473 error = priv_check(td, PRIV_NET_SETIFFLAGS); 2474 if (error) 2475 return (error); 2476 /* 2477 * Currently, no driver owned flags pass the IFF_CANTCHANGE 2478 * check, so we don't need special handling here yet. 2479 */ 2480 new_flags = (ifr->ifr_flags & 0xffff) | 2481 (ifr->ifr_flagshigh << 16); 2482 if (ifp->if_flags & IFF_UP && 2483 (new_flags & IFF_UP) == 0) { 2484 if_down(ifp); 2485 } else if (new_flags & IFF_UP && 2486 (ifp->if_flags & IFF_UP) == 0) { 2487 do_ifup = 1; 2488 } 2489 2490 /* 2491 * See if the promiscuous mode or allmulti bits are about to 2492 * flip. They require special handling because in-kernel 2493 * consumers may indepdently toggle them. 2494 */ 2495 if_setppromisc(ifp, new_flags & IFF_PPROMISC); 2496 if ((ifp->if_flags ^ new_flags) & IFF_PALLMULTI) { 2497 if (new_flags & IFF_PALLMULTI) 2498 ifp->if_flags |= IFF_ALLMULTI; 2499 else if (ifp->if_amcount == 0) 2500 ifp->if_flags &= ~IFF_ALLMULTI; 2501 } 2502 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2503 (new_flags &~ IFF_CANTCHANGE); 2504 if (ifp->if_ioctl) { 2505 (void) (*ifp->if_ioctl)(ifp, cmd, data); 2506 } 2507 if (do_ifup) 2508 if_up(ifp); 2509 getmicrotime(&ifp->if_lastchange); 2510 break; 2511 2512 case SIOCSIFCAP: 2513 error = priv_check(td, PRIV_NET_SETIFCAP); 2514 if (error != 0) 2515 return (error); 2516 if (ifp->if_ioctl == NULL) 2517 return (EOPNOTSUPP); 2518 if (ifr->ifr_reqcap & ~ifp->if_capabilities) 2519 return (EINVAL); 2520 error = (*ifp->if_ioctl)(ifp, cmd, data); 2521 if (error == 0) 2522 getmicrotime(&ifp->if_lastchange); 2523 break; 2524 2525 case SIOCSIFCAPNV: 2526 error = priv_check(td, PRIV_NET_SETIFCAP); 2527 if (error != 0) 2528 return (error); 2529 if (ifp->if_ioctl == NULL) 2530 return (EOPNOTSUPP); 2531 if ((ifp->if_capabilities & IFCAP_NV) == 0) 2532 return (EINVAL); 2533 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 2534 return (EINVAL); 2535 nvcap = NULL; 2536 buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 2537 for (;;) { 2538 error = copyin(ifr->ifr_cap_nv.buffer, buf, 2539 ifr->ifr_cap_nv.length); 2540 if (error != 0) 2541 break; 2542 nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0); 2543 if (nvcap == NULL) { 2544 error = EINVAL; 2545 break; 2546 } 2547 drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap, 2548 &ifp->if_capenable, ifcap_nv_bit_names, false); 2549 if ((drv_ioctl_data.reqcap & 2550 ~ifp->if_capabilities) != 0) { 2551 error = EINVAL; 2552 break; 2553 } 2554 drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap, 2555 &ifp->if_capenable2, ifcap2_nv_bit_names, false); 2556 if ((drv_ioctl_data.reqcap2 & 2557 ~ifp->if_capabilities2) != 0) { 2558 error = EINVAL; 2559 break; 2560 } 2561 drv_ioctl_data.nvcap = nvcap; 2562 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV, 2563 (caddr_t)&drv_ioctl_data); 2564 break; 2565 } 2566 nvlist_destroy(nvcap); 2567 free(buf, M_TEMP); 2568 if (error == 0) 2569 getmicrotime(&ifp->if_lastchange); 2570 break; 2571 2572 #ifdef MAC 2573 case SIOCSIFMAC: 2574 error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); 2575 break; 2576 #endif 2577 2578 case SIOCSIFNAME: 2579 error = priv_check(td, PRIV_NET_SETIFNAME); 2580 if (error) 2581 return (error); 2582 error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, 2583 NULL); 2584 if (error != 0) 2585 return (error); 2586 error = if_rename(ifp, new_name); 2587 break; 2588 2589 #ifdef VIMAGE 2590 case SIOCSIFVNET: 2591 error = priv_check(td, PRIV_NET_SETIFVNET); 2592 if (error) 2593 return (error); 2594 error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); 2595 break; 2596 #endif 2597 2598 case SIOCSIFMETRIC: 2599 error = priv_check(td, PRIV_NET_SETIFMETRIC); 2600 if (error) 2601 return (error); 2602 ifp->if_metric = ifr->ifr_metric; 2603 getmicrotime(&ifp->if_lastchange); 2604 break; 2605 2606 case SIOCSIFPHYS: 2607 error = priv_check(td, PRIV_NET_SETIFPHYS); 2608 if (error) 2609 return (error); 2610 if (ifp->if_ioctl == NULL) 2611 return (EOPNOTSUPP); 2612 error = (*ifp->if_ioctl)(ifp, cmd, data); 2613 if (error == 0) 2614 getmicrotime(&ifp->if_lastchange); 2615 break; 2616 2617 case SIOCSIFMTU: 2618 { 2619 u_long oldmtu = ifp->if_mtu; 2620 2621 error = priv_check(td, PRIV_NET_SETIFMTU); 2622 if (error) 2623 return (error); 2624 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) 2625 return (EINVAL); 2626 if (ifp->if_ioctl == NULL) 2627 return (EOPNOTSUPP); 2628 /* Disallow MTU changes on bridge member interfaces. */ 2629 if (ifp->if_bridge) 2630 return (EOPNOTSUPP); 2631 error = (*ifp->if_ioctl)(ifp, cmd, data); 2632 if (error == 0) { 2633 getmicrotime(&ifp->if_lastchange); 2634 rt_ifmsg(ifp, 0); 2635 #ifdef INET 2636 DEBUGNET_NOTIFY_MTU(ifp); 2637 #endif 2638 } 2639 /* 2640 * If the link MTU changed, do network layer specific procedure. 2641 */ 2642 if (ifp->if_mtu != oldmtu) 2643 if_notifymtu(ifp); 2644 break; 2645 } 2646 2647 case SIOCADDMULTI: 2648 case SIOCDELMULTI: 2649 if (cmd == SIOCADDMULTI) 2650 error = priv_check(td, PRIV_NET_ADDMULTI); 2651 else 2652 error = priv_check(td, PRIV_NET_DELMULTI); 2653 if (error) 2654 return (error); 2655 2656 /* Don't allow group membership on non-multicast interfaces. */ 2657 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2658 return (EOPNOTSUPP); 2659 2660 /* Don't let users screw up protocols' entries. */ 2661 if (ifr->ifr_addr.sa_family != AF_LINK) 2662 return (EINVAL); 2663 2664 if (cmd == SIOCADDMULTI) { 2665 struct epoch_tracker et; 2666 struct ifmultiaddr *ifma; 2667 2668 /* 2669 * Userland is only permitted to join groups once 2670 * via the if_addmulti() KPI, because it cannot hold 2671 * struct ifmultiaddr * between calls. It may also 2672 * lose a race while we check if the membership 2673 * already exists. 2674 */ 2675 NET_EPOCH_ENTER(et); 2676 ifma = if_findmulti(ifp, &ifr->ifr_addr); 2677 NET_EPOCH_EXIT(et); 2678 if (ifma != NULL) 2679 error = EADDRINUSE; 2680 else 2681 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2682 } else { 2683 error = if_delmulti(ifp, &ifr->ifr_addr); 2684 } 2685 if (error == 0) 2686 getmicrotime(&ifp->if_lastchange); 2687 break; 2688 2689 case SIOCSIFPHYADDR: 2690 case SIOCDIFPHYADDR: 2691 #ifdef INET6 2692 case SIOCSIFPHYADDR_IN6: 2693 #endif 2694 case SIOCSIFMEDIA: 2695 case SIOCSIFGENERIC: 2696 error = priv_check(td, PRIV_NET_HWIOCTL); 2697 if (error) 2698 return (error); 2699 if (ifp->if_ioctl == NULL) 2700 return (EOPNOTSUPP); 2701 error = (*ifp->if_ioctl)(ifp, cmd, data); 2702 if (error == 0) 2703 getmicrotime(&ifp->if_lastchange); 2704 break; 2705 2706 case SIOCGIFSTATUS: 2707 case SIOCGIFPSRCADDR: 2708 case SIOCGIFPDSTADDR: 2709 case SIOCGIFMEDIA: 2710 case SIOCGIFXMEDIA: 2711 case SIOCGIFGENERIC: 2712 case SIOCGIFRSSKEY: 2713 case SIOCGIFRSSHASH: 2714 case SIOCGIFDOWNREASON: 2715 if (ifp->if_ioctl == NULL) 2716 return (EOPNOTSUPP); 2717 error = (*ifp->if_ioctl)(ifp, cmd, data); 2718 break; 2719 2720 case SIOCSIFLLADDR: 2721 error = priv_check(td, PRIV_NET_SETLLADDR); 2722 if (error) 2723 return (error); 2724 error = if_setlladdr(ifp, 2725 ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); 2726 break; 2727 2728 case SIOCGHWADDR: 2729 error = if_gethwaddr(ifp, ifr); 2730 break; 2731 2732 case SIOCAIFGROUP: 2733 { 2734 const char *groupname; 2735 2736 error = priv_check(td, PRIV_NET_ADDIFGROUP); 2737 if (error) 2738 return (error); 2739 groupname = ((struct ifgroupreq *)data)->ifgr_group; 2740 if (strnlen(groupname, IFNAMSIZ) == IFNAMSIZ) 2741 return (EINVAL); 2742 error = if_addgroup(ifp, groupname); 2743 if (error != 0) 2744 return (error); 2745 break; 2746 } 2747 case SIOCGIFGROUP: 2748 { 2749 struct epoch_tracker et; 2750 2751 NET_EPOCH_ENTER(et); 2752 error = if_getgroup((struct ifgroupreq *)data, ifp); 2753 NET_EPOCH_EXIT(et); 2754 break; 2755 } 2756 2757 case SIOCDIFGROUP: 2758 { 2759 const char *groupname; 2760 2761 error = priv_check(td, PRIV_NET_DELIFGROUP); 2762 if (error) 2763 return (error); 2764 groupname = ((struct ifgroupreq *)data)->ifgr_group; 2765 if (strnlen(groupname, IFNAMSIZ) == IFNAMSIZ) 2766 return (EINVAL); 2767 error = if_delgroup(ifp, groupname); 2768 if (error != 0) 2769 return (error); 2770 break; 2771 } 2772 default: 2773 error = ENOIOCTL; 2774 break; 2775 } 2776 return (error); 2777 } 2778 2779 /* 2780 * Interface ioctls. 2781 */ 2782 int 2783 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) 2784 { 2785 #ifdef COMPAT_FREEBSD32 2786 union { 2787 struct ifconf ifc; 2788 struct ifdrv ifd; 2789 struct ifgroupreq ifgr; 2790 struct ifmediareq ifmr; 2791 } thunk; 2792 u_long saved_cmd; 2793 struct ifconf32 *ifc32; 2794 struct ifdrv32 *ifd32; 2795 struct ifgroupreq32 *ifgr32; 2796 struct ifmediareq32 *ifmr32; 2797 #endif 2798 struct ifnet *ifp; 2799 struct ifreq *ifr; 2800 int error; 2801 int oif_flags; 2802 #ifdef VIMAGE 2803 bool shutdown; 2804 #endif 2805 2806 CURVNET_SET(so->so_vnet); 2807 #ifdef VIMAGE 2808 /* Make sure the VNET is stable. */ 2809 shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet); 2810 if (shutdown) { 2811 CURVNET_RESTORE(); 2812 return (EBUSY); 2813 } 2814 #endif 2815 2816 #ifdef COMPAT_FREEBSD32 2817 saved_cmd = cmd; 2818 switch (cmd) { 2819 case SIOCGIFCONF32: 2820 ifc32 = (struct ifconf32 *)data; 2821 thunk.ifc.ifc_len = ifc32->ifc_len; 2822 thunk.ifc.ifc_buf = PTRIN(ifc32->ifc_buf); 2823 data = (caddr_t)&thunk.ifc; 2824 cmd = SIOCGIFCONF; 2825 break; 2826 case SIOCGDRVSPEC32: 2827 case SIOCSDRVSPEC32: 2828 ifd32 = (struct ifdrv32 *)data; 2829 memcpy(thunk.ifd.ifd_name, ifd32->ifd_name, 2830 sizeof(thunk.ifd.ifd_name)); 2831 thunk.ifd.ifd_cmd = ifd32->ifd_cmd; 2832 thunk.ifd.ifd_len = ifd32->ifd_len; 2833 thunk.ifd.ifd_data = PTRIN(ifd32->ifd_data); 2834 data = (caddr_t)&thunk.ifd; 2835 cmd = _IOC_NEWTYPE(cmd, struct ifdrv); 2836 break; 2837 case SIOCAIFGROUP32: 2838 case SIOCGIFGROUP32: 2839 case SIOCDIFGROUP32: 2840 case SIOCGIFGMEMB32: 2841 ifgr32 = (struct ifgroupreq32 *)data; 2842 memcpy(thunk.ifgr.ifgr_name, ifgr32->ifgr_name, 2843 sizeof(thunk.ifgr.ifgr_name)); 2844 thunk.ifgr.ifgr_len = ifgr32->ifgr_len; 2845 switch (cmd) { 2846 case SIOCAIFGROUP32: 2847 case SIOCDIFGROUP32: 2848 memcpy(thunk.ifgr.ifgr_group, ifgr32->ifgr_group, 2849 sizeof(thunk.ifgr.ifgr_group)); 2850 break; 2851 case SIOCGIFGROUP32: 2852 case SIOCGIFGMEMB32: 2853 thunk.ifgr.ifgr_groups = PTRIN(ifgr32->ifgr_groups); 2854 break; 2855 } 2856 data = (caddr_t)&thunk.ifgr; 2857 cmd = _IOC_NEWTYPE(cmd, struct ifgroupreq); 2858 break; 2859 case SIOCGIFMEDIA32: 2860 case SIOCGIFXMEDIA32: 2861 ifmr32 = (struct ifmediareq32 *)data; 2862 memcpy(thunk.ifmr.ifm_name, ifmr32->ifm_name, 2863 sizeof(thunk.ifmr.ifm_name)); 2864 thunk.ifmr.ifm_current = ifmr32->ifm_current; 2865 thunk.ifmr.ifm_mask = ifmr32->ifm_mask; 2866 thunk.ifmr.ifm_status = ifmr32->ifm_status; 2867 thunk.ifmr.ifm_active = ifmr32->ifm_active; 2868 thunk.ifmr.ifm_count = ifmr32->ifm_count; 2869 thunk.ifmr.ifm_ulist = PTRIN(ifmr32->ifm_ulist); 2870 data = (caddr_t)&thunk.ifmr; 2871 cmd = _IOC_NEWTYPE(cmd, struct ifmediareq); 2872 break; 2873 } 2874 #endif 2875 2876 switch (cmd) { 2877 case SIOCGIFCONF: 2878 error = ifconf(cmd, data); 2879 goto out_noref; 2880 } 2881 2882 ifr = (struct ifreq *)data; 2883 switch (cmd) { 2884 #ifdef VIMAGE 2885 case SIOCSIFRVNET: 2886 error = priv_check(td, PRIV_NET_SETIFVNET); 2887 if (error == 0) 2888 error = if_vmove_reclaim(td, ifr->ifr_name, 2889 ifr->ifr_jid); 2890 goto out_noref; 2891 #endif 2892 case SIOCIFCREATE: 2893 case SIOCIFCREATE2: 2894 error = priv_check(td, PRIV_NET_IFCREATE); 2895 if (error == 0) 2896 error = if_clone_create(ifr->ifr_name, 2897 sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? 2898 ifr_data_get_ptr(ifr) : NULL); 2899 goto out_noref; 2900 case SIOCIFDESTROY: 2901 error = priv_check(td, PRIV_NET_IFDESTROY); 2902 2903 if (error == 0) { 2904 sx_xlock(&ifnet_detach_sxlock); 2905 error = if_clone_destroy(ifr->ifr_name); 2906 sx_xunlock(&ifnet_detach_sxlock); 2907 } 2908 goto out_noref; 2909 2910 case SIOCIFGCLONERS: 2911 error = if_clone_list((struct if_clonereq *)data); 2912 goto out_noref; 2913 2914 case SIOCGIFGMEMB: 2915 { 2916 struct ifgroupreq *req; 2917 2918 req = (struct ifgroupreq *)data; 2919 if (strnlen(req->ifgr_name, IFNAMSIZ) == IFNAMSIZ) { 2920 error = EINVAL; 2921 goto out_noref; 2922 } 2923 error = if_getgroupmembers(req); 2924 goto out_noref; 2925 } 2926 #if defined(INET) || defined(INET6) 2927 case SIOCSVH: 2928 case SIOCGVH: 2929 if (carp_ioctl_p == NULL) 2930 error = EPROTONOSUPPORT; 2931 else 2932 error = (*carp_ioctl_p)(ifr, cmd, td); 2933 goto out_noref; 2934 #endif 2935 } 2936 2937 ifp = ifunit_ref(ifr->ifr_name); 2938 if (ifp == NULL) { 2939 error = ENXIO; 2940 goto out_noref; 2941 } 2942 2943 error = ifhwioctl(cmd, ifp, data, td); 2944 if (error != ENOIOCTL) 2945 goto out_ref; 2946 2947 oif_flags = ifp->if_flags; 2948 if (so->so_proto == NULL) { 2949 error = EOPNOTSUPP; 2950 goto out_ref; 2951 } 2952 2953 /* 2954 * Pass the request on to the socket control method, and if the 2955 * latter returns EOPNOTSUPP, directly to the interface. 2956 * 2957 * Make an exception for the legacy SIOCSIF* requests. Drivers 2958 * trust SIOCSIFADDR et al to come from an already privileged 2959 * layer, and do not perform any credentials checks or input 2960 * validation. 2961 */ 2962 error = so->so_proto->pr_control(so, cmd, data, ifp, td); 2963 if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && 2964 cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && 2965 cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) 2966 error = (*ifp->if_ioctl)(ifp, cmd, data); 2967 2968 if (!(oif_flags & IFF_UP) && (ifp->if_flags & IFF_UP)) 2969 if_up(ifp); 2970 out_ref: 2971 if_rele(ifp); 2972 out_noref: 2973 CURVNET_RESTORE(); 2974 #ifdef COMPAT_FREEBSD32 2975 if (error != 0) 2976 return (error); 2977 switch (saved_cmd) { 2978 case SIOCGIFCONF32: 2979 ifc32->ifc_len = thunk.ifc.ifc_len; 2980 break; 2981 case SIOCGDRVSPEC32: 2982 /* 2983 * SIOCGDRVSPEC is IOWR, but nothing actually touches 2984 * the struct so just assert that ifd_len (the only 2985 * field it might make sense to update) hasn't 2986 * changed. 2987 */ 2988 KASSERT(thunk.ifd.ifd_len == ifd32->ifd_len, 2989 ("ifd_len was updated %u -> %zu", ifd32->ifd_len, 2990 thunk.ifd.ifd_len)); 2991 break; 2992 case SIOCGIFGROUP32: 2993 case SIOCGIFGMEMB32: 2994 ifgr32->ifgr_len = thunk.ifgr.ifgr_len; 2995 break; 2996 case SIOCGIFMEDIA32: 2997 case SIOCGIFXMEDIA32: 2998 ifmr32->ifm_current = thunk.ifmr.ifm_current; 2999 ifmr32->ifm_mask = thunk.ifmr.ifm_mask; 3000 ifmr32->ifm_status = thunk.ifmr.ifm_status; 3001 ifmr32->ifm_active = thunk.ifmr.ifm_active; 3002 ifmr32->ifm_count = thunk.ifmr.ifm_count; 3003 break; 3004 } 3005 #endif 3006 return (error); 3007 } 3008 3009 int 3010 if_rename(struct ifnet *ifp, char *new_name) 3011 { 3012 struct ifaddr *ifa; 3013 struct sockaddr_dl *sdl; 3014 size_t namelen, onamelen; 3015 char old_name[IFNAMSIZ]; 3016 char strbuf[IFNAMSIZ + 8]; 3017 3018 if (new_name[0] == '\0') 3019 return (EINVAL); 3020 if (strcmp(new_name, ifp->if_xname) == 0) 3021 return (0); 3022 if (ifunit(new_name) != NULL) 3023 return (EEXIST); 3024 3025 /* 3026 * XXX: Locking. Nothing else seems to lock if_flags, 3027 * and there are numerous other races with the 3028 * ifunit() checks not being atomic with namespace 3029 * changes (renames, vmoves, if_attach, etc). 3030 */ 3031 ifp->if_flags |= IFF_RENAMING; 3032 3033 if_printf(ifp, "changing name to '%s'\n", new_name); 3034 3035 IF_ADDR_WLOCK(ifp); 3036 strlcpy(old_name, ifp->if_xname, sizeof(old_name)); 3037 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 3038 ifa = ifp->if_addr; 3039 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 3040 namelen = strlen(new_name); 3041 onamelen = sdl->sdl_nlen; 3042 /* 3043 * Move the address if needed. This is safe because we 3044 * allocate space for a name of length IFNAMSIZ when we 3045 * create this in if_attach(). 3046 */ 3047 if (namelen != onamelen) { 3048 bcopy(sdl->sdl_data + onamelen, 3049 sdl->sdl_data + namelen, sdl->sdl_alen); 3050 } 3051 bcopy(new_name, sdl->sdl_data, namelen); 3052 sdl->sdl_nlen = namelen; 3053 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 3054 bzero(sdl->sdl_data, onamelen); 3055 while (namelen != 0) 3056 sdl->sdl_data[--namelen] = 0xff; 3057 IF_ADDR_WUNLOCK(ifp); 3058 3059 EVENTHANDLER_INVOKE(ifnet_rename_event, ifp, old_name); 3060 3061 ifp->if_flags &= ~IFF_RENAMING; 3062 3063 snprintf(strbuf, sizeof(strbuf), "name=%s", new_name); 3064 devctl_notify("IFNET", old_name, "RENAME", strbuf); 3065 3066 return (0); 3067 } 3068 3069 /* 3070 * The code common to handling reference counted flags, 3071 * e.g., in ifpromisc() and if_allmulti(). 3072 * The "pflag" argument can specify a permanent mode flag to check, 3073 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. 3074 * 3075 * Only to be used on stack-owned flags, not driver-owned flags. 3076 */ 3077 static int 3078 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) 3079 { 3080 struct ifreq ifr; 3081 int error; 3082 int oldflags, oldcount; 3083 3084 /* Sanity checks to catch programming errors */ 3085 KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, 3086 ("%s: setting driver-owned flag %d", __func__, flag)); 3087 3088 if (onswitch) 3089 KASSERT(*refcount >= 0, 3090 ("%s: increment negative refcount %d for flag %d", 3091 __func__, *refcount, flag)); 3092 else 3093 KASSERT(*refcount > 0, 3094 ("%s: decrement non-positive refcount %d for flag %d", 3095 __func__, *refcount, flag)); 3096 3097 /* In case this mode is permanent, just touch refcount */ 3098 if (ifp->if_flags & pflag) { 3099 *refcount += onswitch ? 1 : -1; 3100 return (0); 3101 } 3102 3103 /* Save ifnet parameters for if_ioctl() may fail */ 3104 oldcount = *refcount; 3105 oldflags = ifp->if_flags; 3106 3107 /* 3108 * See if we aren't the only and touching refcount is enough. 3109 * Actually toggle interface flag if we are the first or last. 3110 */ 3111 if (onswitch) { 3112 if ((*refcount)++) 3113 return (0); 3114 ifp->if_flags |= flag; 3115 } else { 3116 if (--(*refcount)) 3117 return (0); 3118 ifp->if_flags &= ~flag; 3119 } 3120 3121 /* Call down the driver since we've changed interface flags */ 3122 if (ifp->if_ioctl == NULL) { 3123 error = EOPNOTSUPP; 3124 goto recover; 3125 } 3126 ifr.ifr_flags = ifp->if_flags & 0xffff; 3127 ifr.ifr_flagshigh = ifp->if_flags >> 16; 3128 error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); 3129 if (error) 3130 goto recover; 3131 /* Notify userland that interface flags have changed */ 3132 rt_ifmsg(ifp, flag); 3133 return (0); 3134 3135 recover: 3136 /* Recover after driver error */ 3137 *refcount = oldcount; 3138 ifp->if_flags = oldflags; 3139 return (error); 3140 } 3141 3142 /* 3143 * Set/clear promiscuous mode on interface ifp based on the truth value 3144 * of pswitch. The calls are reference counted so that only the first 3145 * "on" request actually has an effect, as does the final "off" request. 3146 * Results are undefined if the "off" and "on" requests are not matched. 3147 */ 3148 int 3149 ifpromisc(struct ifnet *ifp, int pswitch) 3150 { 3151 int error; 3152 int oldflags = ifp->if_flags; 3153 3154 error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, 3155 &ifp->if_pcount, pswitch); 3156 /* If promiscuous mode status has changed, log a message */ 3157 if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && 3158 log_promisc_mode_change) 3159 if_printf(ifp, "promiscuous mode %s\n", 3160 (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); 3161 return (error); 3162 } 3163 3164 /* 3165 * Return interface configuration 3166 * of system. List may be used 3167 * in later ioctl's (above) to get 3168 * other information. 3169 */ 3170 /*ARGSUSED*/ 3171 static int 3172 ifconf(u_long cmd, caddr_t data) 3173 { 3174 struct ifconf *ifc = (struct ifconf *)data; 3175 struct ifnet *ifp; 3176 struct ifaddr *ifa; 3177 struct ifreq ifr; 3178 struct sbuf *sb; 3179 int error, full = 0, valid_len, max_len; 3180 3181 /* Limit initial buffer size to maxphys to avoid DoS from userspace. */ 3182 max_len = maxphys - 1; 3183 3184 /* Prevent hostile input from being able to crash the system */ 3185 if (ifc->ifc_len <= 0) 3186 return (EINVAL); 3187 3188 again: 3189 if (ifc->ifc_len <= max_len) { 3190 max_len = ifc->ifc_len; 3191 full = 1; 3192 } 3193 sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); 3194 max_len = 0; 3195 valid_len = 0; 3196 3197 IFNET_RLOCK(); 3198 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 3199 struct epoch_tracker et; 3200 int addrs; 3201 3202 /* 3203 * Zero the ifr to make sure we don't disclose the contents 3204 * of the stack. 3205 */ 3206 memset(&ifr, 0, sizeof(ifr)); 3207 3208 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 3209 >= sizeof(ifr.ifr_name)) { 3210 sbuf_delete(sb); 3211 IFNET_RUNLOCK(); 3212 return (ENAMETOOLONG); 3213 } 3214 3215 addrs = 0; 3216 NET_EPOCH_ENTER(et); 3217 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 3218 struct sockaddr *sa = ifa->ifa_addr; 3219 3220 if (prison_if(curthread->td_ucred, sa) != 0) 3221 continue; 3222 addrs++; 3223 if (sa->sa_len <= sizeof(*sa)) { 3224 if (sa->sa_len < sizeof(*sa)) { 3225 memset(&ifr.ifr_ifru.ifru_addr, 0, 3226 sizeof(ifr.ifr_ifru.ifru_addr)); 3227 memcpy(&ifr.ifr_ifru.ifru_addr, sa, 3228 sa->sa_len); 3229 } else 3230 ifr.ifr_ifru.ifru_addr = *sa; 3231 sbuf_bcat(sb, &ifr, sizeof(ifr)); 3232 max_len += sizeof(ifr); 3233 } else { 3234 sbuf_bcat(sb, &ifr, 3235 offsetof(struct ifreq, ifr_addr)); 3236 max_len += offsetof(struct ifreq, ifr_addr); 3237 sbuf_bcat(sb, sa, sa->sa_len); 3238 max_len += sa->sa_len; 3239 } 3240 3241 if (sbuf_error(sb) == 0) 3242 valid_len = sbuf_len(sb); 3243 } 3244 NET_EPOCH_EXIT(et); 3245 if (addrs == 0) { 3246 sbuf_bcat(sb, &ifr, sizeof(ifr)); 3247 max_len += sizeof(ifr); 3248 3249 if (sbuf_error(sb) == 0) 3250 valid_len = sbuf_len(sb); 3251 } 3252 } 3253 IFNET_RUNLOCK(); 3254 3255 /* 3256 * If we didn't allocate enough space (uncommon), try again. If 3257 * we have already allocated as much space as we are allowed, 3258 * return what we've got. 3259 */ 3260 if (valid_len != max_len && !full) { 3261 sbuf_delete(sb); 3262 goto again; 3263 } 3264 3265 ifc->ifc_len = valid_len; 3266 sbuf_finish(sb); 3267 error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); 3268 sbuf_delete(sb); 3269 return (error); 3270 } 3271 3272 /* 3273 * Just like ifpromisc(), but for all-multicast-reception mode. 3274 */ 3275 int 3276 if_allmulti(struct ifnet *ifp, int onswitch) 3277 { 3278 3279 return (if_setflag(ifp, IFF_ALLMULTI, IFF_PALLMULTI, &ifp->if_amcount, 3280 onswitch)); 3281 } 3282 3283 struct ifmultiaddr * 3284 if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) 3285 { 3286 struct ifmultiaddr *ifma; 3287 3288 IF_ADDR_LOCK_ASSERT(ifp); 3289 3290 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3291 if (sa->sa_family == AF_LINK) { 3292 if (sa_dl_equal(ifma->ifma_addr, sa)) 3293 break; 3294 } else { 3295 if (sa_equal(ifma->ifma_addr, sa)) 3296 break; 3297 } 3298 } 3299 3300 return ifma; 3301 } 3302 3303 /* 3304 * Allocate a new ifmultiaddr and initialize based on passed arguments. We 3305 * make copies of passed sockaddrs. The ifmultiaddr will not be added to 3306 * the ifnet multicast address list here, so the caller must do that and 3307 * other setup work (such as notifying the device driver). The reference 3308 * count is initialized to 1. 3309 */ 3310 static struct ifmultiaddr * 3311 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, 3312 int mflags) 3313 { 3314 struct ifmultiaddr *ifma; 3315 struct sockaddr *dupsa; 3316 3317 ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | 3318 M_ZERO); 3319 if (ifma == NULL) 3320 return (NULL); 3321 3322 dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); 3323 if (dupsa == NULL) { 3324 free(ifma, M_IFMADDR); 3325 return (NULL); 3326 } 3327 bcopy(sa, dupsa, sa->sa_len); 3328 ifma->ifma_addr = dupsa; 3329 3330 ifma->ifma_ifp = ifp; 3331 ifma->ifma_refcount = 1; 3332 ifma->ifma_protospec = NULL; 3333 3334 if (llsa == NULL) { 3335 ifma->ifma_lladdr = NULL; 3336 return (ifma); 3337 } 3338 3339 dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); 3340 if (dupsa == NULL) { 3341 free(ifma->ifma_addr, M_IFMADDR); 3342 free(ifma, M_IFMADDR); 3343 return (NULL); 3344 } 3345 bcopy(llsa, dupsa, llsa->sa_len); 3346 ifma->ifma_lladdr = dupsa; 3347 3348 return (ifma); 3349 } 3350 3351 /* 3352 * if_freemulti: free ifmultiaddr structure and possibly attached related 3353 * addresses. The caller is responsible for implementing reference 3354 * counting, notifying the driver, handling routing messages, and releasing 3355 * any dependent link layer state. 3356 */ 3357 #ifdef MCAST_VERBOSE 3358 extern void kdb_backtrace(void); 3359 #endif 3360 static void 3361 if_freemulti_internal(struct ifmultiaddr *ifma) 3362 { 3363 3364 KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", 3365 ifma->ifma_refcount)); 3366 3367 if (ifma->ifma_lladdr != NULL) 3368 free(ifma->ifma_lladdr, M_IFMADDR); 3369 #ifdef MCAST_VERBOSE 3370 kdb_backtrace(); 3371 printf("%s freeing ifma: %p\n", __func__, ifma); 3372 #endif 3373 free(ifma->ifma_addr, M_IFMADDR); 3374 free(ifma, M_IFMADDR); 3375 } 3376 3377 static void 3378 if_destroymulti(epoch_context_t ctx) 3379 { 3380 struct ifmultiaddr *ifma; 3381 3382 ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx); 3383 if_freemulti_internal(ifma); 3384 } 3385 3386 void 3387 if_freemulti(struct ifmultiaddr *ifma) 3388 { 3389 KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d", 3390 ifma->ifma_refcount)); 3391 3392 NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx); 3393 } 3394 3395 /* 3396 * Register an additional multicast address with a network interface. 3397 * 3398 * - If the address is already present, bump the reference count on the 3399 * address and return. 3400 * - If the address is not link-layer, look up a link layer address. 3401 * - Allocate address structures for one or both addresses, and attach to the 3402 * multicast address list on the interface. If automatically adding a link 3403 * layer address, the protocol address will own a reference to the link 3404 * layer address, to be freed when it is freed. 3405 * - Notify the network device driver of an addition to the multicast address 3406 * list. 3407 * 3408 * 'sa' points to caller-owned memory with the desired multicast address. 3409 * 3410 * 'retifma' will be used to return a pointer to the resulting multicast 3411 * address reference, if desired. 3412 */ 3413 int 3414 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 3415 struct ifmultiaddr **retifma) 3416 { 3417 struct ifmultiaddr *ifma, *ll_ifma; 3418 struct sockaddr *llsa; 3419 struct sockaddr_dl sdl; 3420 int error; 3421 3422 #ifdef INET 3423 IN_MULTI_LIST_UNLOCK_ASSERT(); 3424 #endif 3425 #ifdef INET6 3426 IN6_MULTI_LIST_UNLOCK_ASSERT(); 3427 #endif 3428 /* 3429 * If the address is already present, return a new reference to it; 3430 * otherwise, allocate storage and set up a new address. 3431 */ 3432 IF_ADDR_WLOCK(ifp); 3433 ifma = if_findmulti(ifp, sa); 3434 if (ifma != NULL) { 3435 ifma->ifma_refcount++; 3436 if (retifma != NULL) 3437 *retifma = ifma; 3438 IF_ADDR_WUNLOCK(ifp); 3439 return (0); 3440 } 3441 3442 /* 3443 * The address isn't already present; resolve the protocol address 3444 * into a link layer address, and then look that up, bump its 3445 * refcount or allocate an ifma for that also. 3446 * Most link layer resolving functions returns address data which 3447 * fits inside default sockaddr_dl structure. However callback 3448 * can allocate another sockaddr structure, in that case we need to 3449 * free it later. 3450 */ 3451 llsa = NULL; 3452 ll_ifma = NULL; 3453 if (ifp->if_resolvemulti != NULL) { 3454 /* Provide called function with buffer size information */ 3455 sdl.sdl_len = sizeof(sdl); 3456 llsa = (struct sockaddr *)&sdl; 3457 error = ifp->if_resolvemulti(ifp, &llsa, sa); 3458 if (error) 3459 goto unlock_out; 3460 } 3461 3462 /* 3463 * Allocate the new address. Don't hook it up yet, as we may also 3464 * need to allocate a link layer multicast address. 3465 */ 3466 ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); 3467 if (ifma == NULL) { 3468 error = ENOMEM; 3469 goto free_llsa_out; 3470 } 3471 3472 /* 3473 * If a link layer address is found, we'll need to see if it's 3474 * already present in the address list, or allocate is as well. 3475 * When this block finishes, the link layer address will be on the 3476 * list. 3477 */ 3478 if (llsa != NULL) { 3479 ll_ifma = if_findmulti(ifp, llsa); 3480 if (ll_ifma == NULL) { 3481 ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); 3482 if (ll_ifma == NULL) { 3483 --ifma->ifma_refcount; 3484 if_freemulti(ifma); 3485 error = ENOMEM; 3486 goto free_llsa_out; 3487 } 3488 ll_ifma->ifma_flags |= IFMA_F_ENQUEUED; 3489 CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, 3490 ifma_link); 3491 } else 3492 ll_ifma->ifma_refcount++; 3493 ifma->ifma_llifma = ll_ifma; 3494 } 3495 3496 /* 3497 * We now have a new multicast address, ifma, and possibly a new or 3498 * referenced link layer address. Add the primary address to the 3499 * ifnet address list. 3500 */ 3501 ifma->ifma_flags |= IFMA_F_ENQUEUED; 3502 CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 3503 3504 if (retifma != NULL) 3505 *retifma = ifma; 3506 3507 /* 3508 * Must generate the message while holding the lock so that 'ifma' 3509 * pointer is still valid. 3510 */ 3511 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 3512 IF_ADDR_WUNLOCK(ifp); 3513 3514 /* 3515 * We are certain we have added something, so call down to the 3516 * interface to let them know about it. 3517 */ 3518 if (ifp->if_ioctl != NULL) { 3519 if (THREAD_CAN_SLEEP()) 3520 (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); 3521 else 3522 taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask); 3523 } 3524 3525 if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) 3526 link_free_sdl(llsa); 3527 3528 return (0); 3529 3530 free_llsa_out: 3531 if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) 3532 link_free_sdl(llsa); 3533 3534 unlock_out: 3535 IF_ADDR_WUNLOCK(ifp); 3536 return (error); 3537 } 3538 3539 static void 3540 if_siocaddmulti(void *arg, int pending) 3541 { 3542 struct ifnet *ifp; 3543 3544 ifp = arg; 3545 #ifdef DIAGNOSTIC 3546 if (pending > 1) 3547 if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending); 3548 #endif 3549 CURVNET_SET(ifp->if_vnet); 3550 (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); 3551 CURVNET_RESTORE(); 3552 } 3553 3554 /* 3555 * Delete a multicast group membership by network-layer group address. 3556 * 3557 * Returns ENOENT if the entry could not be found. If ifp no longer 3558 * exists, results are undefined. This entry point should only be used 3559 * from subsystems which do appropriate locking to hold ifp for the 3560 * duration of the call. 3561 * Network-layer protocol domains must use if_delmulti_ifma(). 3562 */ 3563 int 3564 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 3565 { 3566 struct ifmultiaddr *ifma; 3567 int lastref; 3568 3569 KASSERT(ifp, ("%s: NULL ifp", __func__)); 3570 3571 IF_ADDR_WLOCK(ifp); 3572 lastref = 0; 3573 ifma = if_findmulti(ifp, sa); 3574 if (ifma != NULL) 3575 lastref = if_delmulti_locked(ifp, ifma, 0); 3576 IF_ADDR_WUNLOCK(ifp); 3577 3578 if (ifma == NULL) 3579 return (ENOENT); 3580 3581 if (lastref && ifp->if_ioctl != NULL) { 3582 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); 3583 } 3584 3585 return (0); 3586 } 3587 3588 /* 3589 * Delete all multicast group membership for an interface. 3590 * Should be used to quickly flush all multicast filters. 3591 */ 3592 void 3593 if_delallmulti(struct ifnet *ifp) 3594 { 3595 struct ifmultiaddr *ifma; 3596 struct ifmultiaddr *next; 3597 3598 IF_ADDR_WLOCK(ifp); 3599 CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) 3600 if_delmulti_locked(ifp, ifma, 0); 3601 IF_ADDR_WUNLOCK(ifp); 3602 } 3603 3604 void 3605 if_delmulti_ifma(struct ifmultiaddr *ifma) 3606 { 3607 if_delmulti_ifma_flags(ifma, 0); 3608 } 3609 3610 /* 3611 * Delete a multicast group membership by group membership pointer. 3612 * Network-layer protocol domains must use this routine. 3613 * 3614 * It is safe to call this routine if the ifp disappeared. 3615 */ 3616 void 3617 if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags) 3618 { 3619 struct ifnet *ifp; 3620 int lastref; 3621 MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma); 3622 #ifdef INET 3623 IN_MULTI_LIST_UNLOCK_ASSERT(); 3624 #endif 3625 ifp = ifma->ifma_ifp; 3626 #ifdef DIAGNOSTIC 3627 if (ifp == NULL) { 3628 printf("%s: ifma_ifp seems to be detached\n", __func__); 3629 } else { 3630 struct epoch_tracker et; 3631 struct ifnet *oifp; 3632 3633 NET_EPOCH_ENTER(et); 3634 CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link) 3635 if (ifp == oifp) 3636 break; 3637 NET_EPOCH_EXIT(et); 3638 if (ifp != oifp) 3639 ifp = NULL; 3640 } 3641 #endif 3642 /* 3643 * If and only if the ifnet instance exists: Acquire the address lock. 3644 */ 3645 if (ifp != NULL) 3646 IF_ADDR_WLOCK(ifp); 3647 3648 lastref = if_delmulti_locked(ifp, ifma, flags); 3649 3650 if (ifp != NULL) { 3651 /* 3652 * If and only if the ifnet instance exists: 3653 * Release the address lock. 3654 * If the group was left: update the hardware hash filter. 3655 */ 3656 IF_ADDR_WUNLOCK(ifp); 3657 if (lastref && ifp->if_ioctl != NULL) { 3658 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); 3659 } 3660 } 3661 } 3662 3663 /* 3664 * Perform deletion of network-layer and/or link-layer multicast address. 3665 * 3666 * Return 0 if the reference count was decremented. 3667 * Return 1 if the final reference was released, indicating that the 3668 * hardware hash filter should be reprogrammed. 3669 */ 3670 static int 3671 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) 3672 { 3673 struct ifmultiaddr *ll_ifma; 3674 3675 if (ifp != NULL && ifma->ifma_ifp != NULL) { 3676 KASSERT(ifma->ifma_ifp == ifp, 3677 ("%s: inconsistent ifp %p", __func__, ifp)); 3678 IF_ADDR_WLOCK_ASSERT(ifp); 3679 } 3680 3681 ifp = ifma->ifma_ifp; 3682 MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : ""); 3683 3684 /* 3685 * If the ifnet is detaching, null out references to ifnet, 3686 * so that upper protocol layers will notice, and not attempt 3687 * to obtain locks for an ifnet which no longer exists. The 3688 * routing socket announcement must happen before the ifnet 3689 * instance is detached from the system. 3690 */ 3691 if (detaching) { 3692 #ifdef DIAGNOSTIC 3693 printf("%s: detaching ifnet instance %p\n", __func__, ifp); 3694 #endif 3695 /* 3696 * ifp may already be nulled out if we are being reentered 3697 * to delete the ll_ifma. 3698 */ 3699 if (ifp != NULL) { 3700 rt_newmaddrmsg(RTM_DELMADDR, ifma); 3701 ifma->ifma_ifp = NULL; 3702 } 3703 } 3704 3705 if (--ifma->ifma_refcount > 0) 3706 return 0; 3707 3708 if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) { 3709 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 3710 ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 3711 } 3712 /* 3713 * If this ifma is a network-layer ifma, a link-layer ifma may 3714 * have been associated with it. Release it first if so. 3715 */ 3716 ll_ifma = ifma->ifma_llifma; 3717 if (ll_ifma != NULL) { 3718 KASSERT(ifma->ifma_lladdr != NULL, 3719 ("%s: llifma w/o lladdr", __func__)); 3720 if (detaching) 3721 ll_ifma->ifma_ifp = NULL; /* XXX */ 3722 if (--ll_ifma->ifma_refcount == 0) { 3723 if (ifp != NULL) { 3724 if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { 3725 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, 3726 ifma_link); 3727 ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 3728 } 3729 } 3730 if_freemulti(ll_ifma); 3731 } 3732 } 3733 #ifdef INVARIANTS 3734 if (ifp) { 3735 struct ifmultiaddr *ifmatmp; 3736 3737 CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link) 3738 MPASS(ifma != ifmatmp); 3739 } 3740 #endif 3741 if_freemulti(ifma); 3742 /* 3743 * The last reference to this instance of struct ifmultiaddr 3744 * was released; the hardware should be notified of this change. 3745 */ 3746 return 1; 3747 } 3748 3749 /* 3750 * Set the link layer address on an interface. 3751 * 3752 * At this time we only support certain types of interfaces, 3753 * and we don't allow the length of the address to change. 3754 * 3755 * Set noinline to be dtrace-friendly 3756 */ 3757 __noinline int 3758 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 3759 { 3760 struct sockaddr_dl *sdl; 3761 struct ifaddr *ifa; 3762 struct ifreq ifr; 3763 3764 ifa = ifp->if_addr; 3765 if (ifa == NULL) 3766 return (EINVAL); 3767 3768 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 3769 if (sdl == NULL) 3770 return (EINVAL); 3771 3772 if (len != sdl->sdl_alen) /* don't allow length to change */ 3773 return (EINVAL); 3774 3775 switch (ifp->if_type) { 3776 case IFT_ETHER: 3777 case IFT_XETHER: 3778 case IFT_L2VLAN: 3779 case IFT_BRIDGE: 3780 case IFT_IEEE8023ADLAG: 3781 bcopy(lladdr, LLADDR(sdl), len); 3782 break; 3783 default: 3784 return (ENODEV); 3785 } 3786 3787 /* 3788 * If the interface is already up, we need 3789 * to re-init it in order to reprogram its 3790 * address filter. 3791 */ 3792 if ((ifp->if_flags & IFF_UP) != 0) { 3793 if (ifp->if_ioctl) { 3794 ifp->if_flags &= ~IFF_UP; 3795 ifr.ifr_flags = ifp->if_flags & 0xffff; 3796 ifr.ifr_flagshigh = ifp->if_flags >> 16; 3797 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); 3798 ifp->if_flags |= IFF_UP; 3799 ifr.ifr_flags = ifp->if_flags & 0xffff; 3800 ifr.ifr_flagshigh = ifp->if_flags >> 16; 3801 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); 3802 } 3803 } 3804 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 3805 3806 return (0); 3807 } 3808 3809 /* 3810 * Compat function for handling basic encapsulation requests. 3811 * Not converted stacks (FDDI, IB, ..) supports traditional 3812 * output model: ARP (and other similar L2 protocols) are handled 3813 * inside output routine, arpresolve/nd6_resolve() returns MAC 3814 * address instead of full prepend. 3815 * 3816 * This function creates calculated header==MAC for IPv4/IPv6 and 3817 * returns EAFNOSUPPORT (which is then handled in ARP code) for other 3818 * address families. 3819 */ 3820 static int 3821 if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) 3822 { 3823 if (req->rtype != IFENCAP_LL) 3824 return (EOPNOTSUPP); 3825 3826 if (req->bufsize < req->lladdr_len) 3827 return (ENOMEM); 3828 3829 switch (req->family) { 3830 case AF_INET: 3831 case AF_INET6: 3832 break; 3833 default: 3834 return (EAFNOSUPPORT); 3835 } 3836 3837 /* Copy lladdr to storage as is */ 3838 memmove(req->buf, req->lladdr, req->lladdr_len); 3839 req->bufsize = req->lladdr_len; 3840 req->lladdr_off = 0; 3841 3842 return (0); 3843 } 3844 3845 /* 3846 * Tunnel interfaces can nest, also they may cause infinite recursion 3847 * calls when misconfigured. We'll prevent this by detecting loops. 3848 * High nesting level may cause stack exhaustion. We'll prevent this 3849 * by introducing upper limit. 3850 * 3851 * Return 0, if tunnel nesting count is equal or less than limit. 3852 */ 3853 int 3854 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, 3855 int limit) 3856 { 3857 struct m_tag *mtag; 3858 int count; 3859 3860 count = 1; 3861 mtag = NULL; 3862 while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) { 3863 if (*(struct ifnet **)(mtag + 1) == ifp) { 3864 log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); 3865 return (EIO); 3866 } 3867 count++; 3868 } 3869 if (count > limit) { 3870 log(LOG_NOTICE, 3871 "%s: if_output recursively called too many times(%d)\n", 3872 if_name(ifp), count); 3873 return (EIO); 3874 } 3875 mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT); 3876 if (mtag == NULL) 3877 return (ENOMEM); 3878 *(struct ifnet **)(mtag + 1) = ifp; 3879 m_tag_prepend(m, mtag); 3880 return (0); 3881 } 3882 3883 /* 3884 * Get the link layer address that was read from the hardware at attach. 3885 * 3886 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type 3887 * their component interfaces as IFT_IEEE8023ADLAG. 3888 */ 3889 int 3890 if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) 3891 { 3892 if (ifp->if_hw_addr == NULL) 3893 return (ENODEV); 3894 3895 switch (ifp->if_type) { 3896 case IFT_ETHER: 3897 case IFT_IEEE8023ADLAG: 3898 bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); 3899 return (0); 3900 default: 3901 return (ENODEV); 3902 } 3903 } 3904 3905 /* 3906 * The name argument must be a pointer to storage which will last as 3907 * long as the interface does. For physical devices, the result of 3908 * device_get_name(dev) is a good choice and for pseudo-devices a 3909 * static string works well. 3910 */ 3911 void 3912 if_initname(struct ifnet *ifp, const char *name, int unit) 3913 { 3914 ifp->if_dname = name; 3915 ifp->if_dunit = unit; 3916 if (unit != IF_DUNIT_NONE) 3917 snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 3918 else 3919 strlcpy(ifp->if_xname, name, IFNAMSIZ); 3920 } 3921 3922 static int 3923 if_vlog(struct ifnet *ifp, int pri, const char *fmt, va_list ap) 3924 { 3925 char if_fmt[256]; 3926 3927 snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt); 3928 vlog(pri, if_fmt, ap); 3929 return (0); 3930 } 3931 3932 3933 int 3934 if_printf(struct ifnet *ifp, const char *fmt, ...) 3935 { 3936 va_list ap; 3937 3938 va_start(ap, fmt); 3939 if_vlog(ifp, LOG_INFO, fmt, ap); 3940 va_end(ap); 3941 return (0); 3942 } 3943 3944 int 3945 if_log(struct ifnet *ifp, int pri, const char *fmt, ...) 3946 { 3947 va_list ap; 3948 3949 va_start(ap, fmt); 3950 if_vlog(ifp, pri, fmt, ap); 3951 va_end(ap); 3952 return (0); 3953 } 3954 3955 void 3956 if_start(struct ifnet *ifp) 3957 { 3958 3959 (*(ifp)->if_start)(ifp); 3960 } 3961 3962 /* 3963 * Backwards compatibility interface for drivers 3964 * that have not implemented it 3965 */ 3966 static int 3967 if_transmit_default(struct ifnet *ifp, struct mbuf *m) 3968 { 3969 int error; 3970 3971 IFQ_HANDOFF(ifp, m, error); 3972 return (error); 3973 } 3974 3975 static void 3976 if_input_default(struct ifnet *ifp __unused, struct mbuf *m) 3977 { 3978 m_freem(m); 3979 } 3980 3981 int 3982 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) 3983 { 3984 int active = 0; 3985 3986 IF_LOCK(ifq); 3987 if (_IF_QFULL(ifq)) { 3988 IF_UNLOCK(ifq); 3989 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 3990 m_freem(m); 3991 return (0); 3992 } 3993 if (ifp != NULL) { 3994 if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); 3995 if (m->m_flags & (M_BCAST|M_MCAST)) 3996 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 3997 active = ifp->if_drv_flags & IFF_DRV_OACTIVE; 3998 } 3999 _IF_ENQUEUE(ifq, m); 4000 IF_UNLOCK(ifq); 4001 if (ifp != NULL && !active) 4002 (*(ifp)->if_start)(ifp); 4003 return (1); 4004 } 4005 4006 void 4007 if_register_com_alloc(u_char type, 4008 if_com_alloc_t *a, if_com_free_t *f) 4009 { 4010 4011 KASSERT(if_com_alloc[type] == NULL, 4012 ("if_register_com_alloc: %d already registered", type)); 4013 KASSERT(if_com_free[type] == NULL, 4014 ("if_register_com_alloc: %d free already registered", type)); 4015 4016 if_com_alloc[type] = a; 4017 if_com_free[type] = f; 4018 } 4019 4020 void 4021 if_deregister_com_alloc(u_char type) 4022 { 4023 4024 KASSERT(if_com_alloc[type] != NULL, 4025 ("if_deregister_com_alloc: %d not registered", type)); 4026 KASSERT(if_com_free[type] != NULL, 4027 ("if_deregister_com_alloc: %d free not registered", type)); 4028 4029 /* 4030 * Ensure all pending EPOCH(9) callbacks have been executed. This 4031 * fixes issues about late invocation of if_destroy(), which leads 4032 * to memory leak from if_com_alloc[type] allocated if_l2com. 4033 */ 4034 NET_EPOCH_DRAIN_CALLBACKS(); 4035 4036 if_com_alloc[type] = NULL; 4037 if_com_free[type] = NULL; 4038 } 4039 4040 /* API for driver access to network stack owned ifnet.*/ 4041 uint64_t 4042 if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) 4043 { 4044 uint64_t oldbrate; 4045 4046 oldbrate = ifp->if_baudrate; 4047 ifp->if_baudrate = baudrate; 4048 return (oldbrate); 4049 } 4050 4051 uint64_t 4052 if_getbaudrate(const if_t ifp) 4053 { 4054 return (ifp->if_baudrate); 4055 } 4056 4057 int 4058 if_setcapabilities(if_t ifp, int capabilities) 4059 { 4060 ifp->if_capabilities = capabilities; 4061 return (0); 4062 } 4063 4064 int 4065 if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) 4066 { 4067 ifp->if_capabilities &= ~clearbit; 4068 ifp->if_capabilities |= setbit; 4069 return (0); 4070 } 4071 4072 int 4073 if_getcapabilities(const if_t ifp) 4074 { 4075 return (ifp->if_capabilities); 4076 } 4077 4078 int 4079 if_setcapenable(if_t ifp, int capabilities) 4080 { 4081 ifp->if_capenable = capabilities; 4082 return (0); 4083 } 4084 4085 int 4086 if_setcapenablebit(if_t ifp, int setcap, int clearcap) 4087 { 4088 ifp->if_capenable &= ~clearcap; 4089 ifp->if_capenable |= setcap; 4090 return (0); 4091 } 4092 4093 int 4094 if_setcapabilities2(if_t ifp, int capabilities) 4095 { 4096 ifp->if_capabilities2 = capabilities; 4097 return (0); 4098 } 4099 4100 int 4101 if_setcapabilities2bit(if_t ifp, int setbit, int clearbit) 4102 { 4103 ifp->if_capabilities2 &= ~clearbit; 4104 ifp->if_capabilities2 |= setbit; 4105 return (0); 4106 } 4107 4108 int 4109 if_getcapabilities2(const if_t ifp) 4110 { 4111 return (ifp->if_capabilities2); 4112 } 4113 4114 int 4115 if_setcapenable2(if_t ifp, int capabilities2) 4116 { 4117 ifp->if_capenable2 = capabilities2; 4118 return (0); 4119 } 4120 4121 int 4122 if_setcapenable2bit(if_t ifp, int setcap, int clearcap) 4123 { 4124 ifp->if_capenable2 &= ~clearcap; 4125 ifp->if_capenable2 |= setcap; 4126 return (0); 4127 } 4128 4129 const char * 4130 if_getdname(const if_t ifp) 4131 { 4132 return (ifp->if_dname); 4133 } 4134 4135 void 4136 if_setdname(if_t ifp, const char *dname) 4137 { 4138 ifp->if_dname = dname; 4139 } 4140 4141 const char * 4142 if_name(if_t ifp) 4143 { 4144 return (ifp->if_xname); 4145 } 4146 4147 int 4148 if_setname(if_t ifp, const char *name) 4149 { 4150 if (strlen(name) > sizeof(ifp->if_xname) - 1) 4151 return (ENAMETOOLONG); 4152 strcpy(ifp->if_xname, name); 4153 4154 return (0); 4155 } 4156 4157 int 4158 if_togglecapenable(if_t ifp, int togglecap) 4159 { 4160 ifp->if_capenable ^= togglecap; 4161 return (0); 4162 } 4163 4164 int 4165 if_getcapenable(const if_t ifp) 4166 { 4167 return (ifp->if_capenable); 4168 } 4169 4170 int 4171 if_togglecapenable2(if_t ifp, int togglecap) 4172 { 4173 ifp->if_capenable2 ^= togglecap; 4174 return (0); 4175 } 4176 4177 int 4178 if_getcapenable2(const if_t ifp) 4179 { 4180 return (ifp->if_capenable2); 4181 } 4182 4183 int 4184 if_getdunit(const if_t ifp) 4185 { 4186 return (ifp->if_dunit); 4187 } 4188 4189 int 4190 if_getindex(const if_t ifp) 4191 { 4192 return (ifp->if_index); 4193 } 4194 4195 int 4196 if_getidxgen(const if_t ifp) 4197 { 4198 return (ifp->if_idxgen); 4199 } 4200 4201 const char * 4202 if_getdescr(if_t ifp) 4203 { 4204 return (ifp->if_description); 4205 } 4206 4207 void 4208 if_setdescr(if_t ifp, char *descrbuf) 4209 { 4210 sx_xlock(&ifdescr_sx); 4211 char *odescrbuf = ifp->if_description; 4212 ifp->if_description = descrbuf; 4213 sx_xunlock(&ifdescr_sx); 4214 4215 if_freedescr(odescrbuf); 4216 } 4217 4218 char * 4219 if_allocdescr(size_t sz, int malloc_flag) 4220 { 4221 malloc_flag &= (M_WAITOK | M_NOWAIT); 4222 return (malloc(sz, M_IFDESCR, M_ZERO | malloc_flag)); 4223 } 4224 4225 void 4226 if_freedescr(char *descrbuf) 4227 { 4228 free(descrbuf, M_IFDESCR); 4229 } 4230 4231 int 4232 if_getalloctype(const if_t ifp) 4233 { 4234 return (ifp->if_alloctype); 4235 } 4236 4237 void 4238 if_setlastchange(if_t ifp) 4239 { 4240 getmicrotime(&ifp->if_lastchange); 4241 } 4242 4243 /* 4244 * This is largely undesirable because it ties ifnet to a device, but does 4245 * provide flexiblity for an embedded product vendor. Should be used with 4246 * the understanding that it violates the interface boundaries, and should be 4247 * a last resort only. 4248 */ 4249 int 4250 if_setdev(if_t ifp, void *dev) 4251 { 4252 return (0); 4253 } 4254 4255 int 4256 if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) 4257 { 4258 ifp->if_drv_flags &= ~clear_flags; 4259 ifp->if_drv_flags |= set_flags; 4260 4261 return (0); 4262 } 4263 4264 int 4265 if_getdrvflags(const if_t ifp) 4266 { 4267 return (ifp->if_drv_flags); 4268 } 4269 4270 int 4271 if_setdrvflags(if_t ifp, int flags) 4272 { 4273 ifp->if_drv_flags = flags; 4274 return (0); 4275 } 4276 4277 int 4278 if_setflags(if_t ifp, int flags) 4279 { 4280 ifp->if_flags = flags; 4281 return (0); 4282 } 4283 4284 int 4285 if_setflagbits(if_t ifp, int set, int clear) 4286 { 4287 ifp->if_flags &= ~clear; 4288 ifp->if_flags |= set; 4289 return (0); 4290 } 4291 4292 int 4293 if_getflags(const if_t ifp) 4294 { 4295 return (ifp->if_flags); 4296 } 4297 4298 int 4299 if_clearhwassist(if_t ifp) 4300 { 4301 ifp->if_hwassist = 0; 4302 return (0); 4303 } 4304 4305 int 4306 if_sethwassistbits(if_t ifp, int toset, int toclear) 4307 { 4308 ifp->if_hwassist &= ~toclear; 4309 ifp->if_hwassist |= toset; 4310 4311 return (0); 4312 } 4313 4314 int 4315 if_sethwassist(if_t ifp, int hwassist_bit) 4316 { 4317 ifp->if_hwassist = hwassist_bit; 4318 return (0); 4319 } 4320 4321 int 4322 if_gethwassist(const if_t ifp) 4323 { 4324 return (ifp->if_hwassist); 4325 } 4326 4327 int 4328 if_togglehwassist(if_t ifp, int toggle_bits) 4329 { 4330 ifp->if_hwassist ^= toggle_bits; 4331 return (0); 4332 } 4333 4334 int 4335 if_setmtu(if_t ifp, int mtu) 4336 { 4337 ifp->if_mtu = mtu; 4338 return (0); 4339 } 4340 4341 void 4342 if_notifymtu(if_t ifp) 4343 { 4344 #ifdef INET6 4345 nd6_setmtu(ifp); 4346 #endif 4347 rt_updatemtu(ifp); 4348 } 4349 4350 int 4351 if_getmtu(const if_t ifp) 4352 { 4353 return (ifp->if_mtu); 4354 } 4355 4356 void 4357 if_setppromisc(if_t ifp, bool ppromisc) 4358 { 4359 int new_flags; 4360 4361 if (ppromisc) 4362 new_flags = ifp->if_flags | IFF_PPROMISC; 4363 else 4364 new_flags = ifp->if_flags & ~IFF_PPROMISC; 4365 if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { 4366 if (new_flags & IFF_PPROMISC) 4367 new_flags |= IFF_PROMISC; 4368 /* 4369 * Only unset IFF_PROMISC if there are no more consumers of 4370 * promiscuity, i.e. the ifp->if_pcount refcount is 0. 4371 */ 4372 else if (ifp->if_pcount == 0) 4373 new_flags &= ~IFF_PROMISC; 4374 if (log_promisc_mode_change) 4375 if_printf(ifp, "permanently promiscuous mode %s\n", 4376 ((new_flags & IFF_PPROMISC) ? 4377 "enabled" : "disabled")); 4378 } 4379 ifp->if_flags = new_flags; 4380 } 4381 4382 /* 4383 * Methods for drivers to access interface unicast and multicast 4384 * link level addresses. Driver shall not know 'struct ifaddr' neither 4385 * 'struct ifmultiaddr'. 4386 */ 4387 u_int 4388 if_lladdr_count(if_t ifp) 4389 { 4390 struct epoch_tracker et; 4391 struct ifaddr *ifa; 4392 u_int count; 4393 4394 count = 0; 4395 NET_EPOCH_ENTER(et); 4396 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 4397 if (ifa->ifa_addr->sa_family == AF_LINK) 4398 count++; 4399 NET_EPOCH_EXIT(et); 4400 4401 return (count); 4402 } 4403 4404 int 4405 if_foreach(if_foreach_cb_t cb, void *cb_arg) 4406 { 4407 if_t ifp; 4408 int error; 4409 4410 NET_EPOCH_ASSERT(); 4411 MPASS(cb); 4412 4413 error = 0; 4414 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 4415 error = cb(ifp, cb_arg); 4416 if (error != 0) 4417 break; 4418 } 4419 4420 return (error); 4421 } 4422 4423 /* 4424 * Iterates over the list of interfaces, permitting callback function @cb to sleep. 4425 * Stops iteration if @cb returns non-zero error code. 4426 * Returns the last error code from @cb. 4427 * @match_cb: optional match callback limiting the iteration to only matched interfaces 4428 * @match_arg: argument to pass to @match_cb 4429 * @cb: iteration callback 4430 * @cb_arg: argument to pass to @cb 4431 */ 4432 int 4433 if_foreach_sleep(if_foreach_match_t match_cb, void *match_arg, if_foreach_cb_t cb, 4434 void *cb_arg) 4435 { 4436 int match_count = 0, array_size = 16; /* 128 bytes for malloc */ 4437 struct ifnet **match_array = NULL; 4438 int error = 0; 4439 4440 MPASS(cb); 4441 4442 while (true) { 4443 struct ifnet **new_array; 4444 int new_size = array_size; 4445 struct epoch_tracker et; 4446 struct ifnet *ifp; 4447 4448 while (new_size < match_count) 4449 new_size *= 2; 4450 new_array = malloc(new_size * sizeof(void *), M_TEMP, M_WAITOK); 4451 if (match_array != NULL) 4452 memcpy(new_array, match_array, array_size * sizeof(void *)); 4453 free(match_array, M_TEMP); 4454 match_array = new_array; 4455 array_size = new_size; 4456 4457 match_count = 0; 4458 NET_EPOCH_ENTER(et); 4459 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 4460 if (match_cb != NULL && !match_cb(ifp, match_arg)) 4461 continue; 4462 if (match_count < array_size) { 4463 if (if_try_ref(ifp)) 4464 match_array[match_count++] = ifp; 4465 } else 4466 match_count++; 4467 } 4468 NET_EPOCH_EXIT(et); 4469 4470 if (match_count > array_size) { 4471 for (int i = 0; i < array_size; i++) 4472 if_rele(match_array[i]); 4473 continue; 4474 } else { 4475 for (int i = 0; i < match_count; i++) { 4476 if (error == 0) 4477 error = cb(match_array[i], cb_arg); 4478 if_rele(match_array[i]); 4479 } 4480 free(match_array, M_TEMP); 4481 break; 4482 } 4483 } 4484 4485 return (error); 4486 } 4487 4488 4489 /* 4490 * Uses just 1 pointer of the 4 available in the public struct. 4491 */ 4492 if_t 4493 if_iter_start(struct if_iter *iter) 4494 { 4495 if_t ifp; 4496 4497 NET_EPOCH_ASSERT(); 4498 4499 bzero(iter, sizeof(*iter)); 4500 ifp = CK_STAILQ_FIRST(&V_ifnet); 4501 if (ifp != NULL) 4502 iter->context[0] = CK_STAILQ_NEXT(ifp, if_link); 4503 else 4504 iter->context[0] = NULL; 4505 return (ifp); 4506 } 4507 4508 if_t 4509 if_iter_next(struct if_iter *iter) 4510 { 4511 if_t cur_ifp = iter->context[0]; 4512 4513 if (cur_ifp != NULL) 4514 iter->context[0] = CK_STAILQ_NEXT(cur_ifp, if_link); 4515 return (cur_ifp); 4516 } 4517 4518 void 4519 if_iter_finish(struct if_iter *iter) 4520 { 4521 /* Nothing to do here for now. */ 4522 } 4523 4524 u_int 4525 if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) 4526 { 4527 struct epoch_tracker et; 4528 struct ifaddr *ifa; 4529 u_int count; 4530 4531 MPASS(cb); 4532 4533 count = 0; 4534 NET_EPOCH_ENTER(et); 4535 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 4536 if (ifa->ifa_addr->sa_family != AF_LINK) 4537 continue; 4538 count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr, 4539 count); 4540 } 4541 NET_EPOCH_EXIT(et); 4542 4543 return (count); 4544 } 4545 4546 u_int 4547 if_llmaddr_count(if_t ifp) 4548 { 4549 struct epoch_tracker et; 4550 struct ifmultiaddr *ifma; 4551 int count; 4552 4553 count = 0; 4554 NET_EPOCH_ENTER(et); 4555 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 4556 if (ifma->ifma_addr->sa_family == AF_LINK) 4557 count++; 4558 NET_EPOCH_EXIT(et); 4559 4560 return (count); 4561 } 4562 4563 bool 4564 if_maddr_empty(if_t ifp) 4565 { 4566 4567 return (CK_STAILQ_EMPTY(&ifp->if_multiaddrs)); 4568 } 4569 4570 u_int 4571 if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) 4572 { 4573 struct epoch_tracker et; 4574 struct ifmultiaddr *ifma; 4575 u_int count; 4576 4577 MPASS(cb); 4578 4579 count = 0; 4580 NET_EPOCH_ENTER(et); 4581 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 4582 if (ifma->ifma_addr->sa_family != AF_LINK) 4583 continue; 4584 count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr, 4585 count); 4586 } 4587 NET_EPOCH_EXIT(et); 4588 4589 return (count); 4590 } 4591 4592 u_int 4593 if_foreach_addr_type(if_t ifp, int type, if_addr_cb_t cb, void *cb_arg) 4594 { 4595 struct epoch_tracker et; 4596 struct ifaddr *ifa; 4597 u_int count; 4598 4599 MPASS(cb); 4600 4601 count = 0; 4602 NET_EPOCH_ENTER(et); 4603 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 4604 if (ifa->ifa_addr->sa_family != type) 4605 continue; 4606 count += (*cb)(cb_arg, ifa, count); 4607 } 4608 NET_EPOCH_EXIT(et); 4609 4610 return (count); 4611 } 4612 4613 struct ifaddr * 4614 ifa_iter_start(if_t ifp, struct ifa_iter *iter) 4615 { 4616 struct ifaddr *ifa; 4617 4618 NET_EPOCH_ASSERT(); 4619 4620 bzero(iter, sizeof(*iter)); 4621 ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); 4622 if (ifa != NULL) 4623 iter->context[0] = CK_STAILQ_NEXT(ifa, ifa_link); 4624 else 4625 iter->context[0] = NULL; 4626 return (ifa); 4627 } 4628 4629 struct ifaddr * 4630 ifa_iter_next(struct ifa_iter *iter) 4631 { 4632 struct ifaddr *ifa = iter->context[0]; 4633 4634 if (ifa != NULL) 4635 iter->context[0] = CK_STAILQ_NEXT(ifa, ifa_link); 4636 return (ifa); 4637 } 4638 4639 void 4640 ifa_iter_finish(struct ifa_iter *iter) 4641 { 4642 /* Nothing to do here for now. */ 4643 } 4644 4645 int 4646 if_setsoftc(if_t ifp, void *softc) 4647 { 4648 ifp->if_softc = softc; 4649 return (0); 4650 } 4651 4652 void * 4653 if_getsoftc(const if_t ifp) 4654 { 4655 return (ifp->if_softc); 4656 } 4657 4658 void 4659 if_setrcvif(struct mbuf *m, if_t ifp) 4660 { 4661 4662 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 4663 m->m_pkthdr.rcvif = (struct ifnet *)ifp; 4664 } 4665 4666 void 4667 if_setvtag(struct mbuf *m, uint16_t tag) 4668 { 4669 m->m_pkthdr.ether_vtag = tag; 4670 } 4671 4672 uint16_t 4673 if_getvtag(struct mbuf *m) 4674 { 4675 return (m->m_pkthdr.ether_vtag); 4676 } 4677 4678 int 4679 if_sendq_empty(if_t ifp) 4680 { 4681 return (IFQ_DRV_IS_EMPTY(&ifp->if_snd)); 4682 } 4683 4684 struct ifaddr * 4685 if_getifaddr(const if_t ifp) 4686 { 4687 return (ifp->if_addr); 4688 } 4689 4690 int 4691 if_setsendqready(if_t ifp) 4692 { 4693 IFQ_SET_READY(&ifp->if_snd); 4694 return (0); 4695 } 4696 4697 int 4698 if_setsendqlen(if_t ifp, int tx_desc_count) 4699 { 4700 IFQ_SET_MAXLEN(&ifp->if_snd, tx_desc_count); 4701 ifp->if_snd.ifq_drv_maxlen = tx_desc_count; 4702 return (0); 4703 } 4704 4705 void 4706 if_setnetmapadapter(if_t ifp, struct netmap_adapter *na) 4707 { 4708 ifp->if_netmap = na; 4709 } 4710 4711 struct netmap_adapter * 4712 if_getnetmapadapter(if_t ifp) 4713 { 4714 return (ifp->if_netmap); 4715 } 4716 4717 int 4718 if_vlantrunkinuse(if_t ifp) 4719 { 4720 return (ifp->if_vlantrunk != NULL); 4721 } 4722 4723 void 4724 if_init(if_t ifp, void *ctx) 4725 { 4726 (*ifp->if_init)(ctx); 4727 } 4728 4729 void 4730 if_input(if_t ifp, struct mbuf* sendmp) 4731 { 4732 (*ifp->if_input)(ifp, sendmp); 4733 } 4734 4735 int 4736 if_transmit(if_t ifp, struct mbuf *m) 4737 { 4738 return ((*ifp->if_transmit)(ifp, m)); 4739 } 4740 4741 int 4742 if_resolvemulti(if_t ifp, struct sockaddr **srcs, struct sockaddr *dst) 4743 { 4744 if (ifp->if_resolvemulti == NULL) 4745 return (EOPNOTSUPP); 4746 4747 return (ifp->if_resolvemulti(ifp, srcs, dst)); 4748 } 4749 4750 int 4751 if_ioctl(if_t ifp, u_long cmd, void *data) 4752 { 4753 if (ifp->if_ioctl == NULL) 4754 return (EOPNOTSUPP); 4755 4756 return (ifp->if_ioctl(ifp, cmd, data)); 4757 } 4758 4759 struct mbuf * 4760 if_dequeue(if_t ifp) 4761 { 4762 struct mbuf *m; 4763 4764 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 4765 return (m); 4766 } 4767 4768 int 4769 if_sendq_prepend(if_t ifp, struct mbuf *m) 4770 { 4771 IFQ_DRV_PREPEND(&ifp->if_snd, m); 4772 return (0); 4773 } 4774 4775 int 4776 if_setifheaderlen(if_t ifp, int len) 4777 { 4778 ifp->if_hdrlen = len; 4779 return (0); 4780 } 4781 4782 char * 4783 if_getlladdr(const if_t ifp) 4784 { 4785 return (IF_LLADDR(ifp)); 4786 } 4787 4788 void * 4789 if_gethandle(u_char type) 4790 { 4791 return (if_alloc(type)); 4792 } 4793 4794 void 4795 if_vlancap(if_t ifp) 4796 { 4797 VLAN_CAPABILITIES(ifp); 4798 } 4799 4800 int 4801 if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) 4802 { 4803 ifp->if_hw_tsomax = if_hw_tsomax; 4804 return (0); 4805 } 4806 4807 int 4808 if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) 4809 { 4810 ifp->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; 4811 return (0); 4812 } 4813 4814 int 4815 if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) 4816 { 4817 ifp->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; 4818 return (0); 4819 } 4820 4821 u_int 4822 if_gethwtsomax(const if_t ifp) 4823 { 4824 return (ifp->if_hw_tsomax); 4825 } 4826 4827 u_int 4828 if_gethwtsomaxsegcount(const if_t ifp) 4829 { 4830 return (ifp->if_hw_tsomaxsegcount); 4831 } 4832 4833 u_int 4834 if_gethwtsomaxsegsize(const if_t ifp) 4835 { 4836 return (ifp->if_hw_tsomaxsegsize); 4837 } 4838 4839 void 4840 if_setinitfn(if_t ifp, if_init_fn_t init_fn) 4841 { 4842 ifp->if_init = init_fn; 4843 } 4844 4845 void 4846 if_setinputfn(if_t ifp, if_input_fn_t input_fn) 4847 { 4848 ifp->if_input = input_fn; 4849 } 4850 4851 if_input_fn_t 4852 if_getinputfn(if_t ifp) 4853 { 4854 return (ifp->if_input); 4855 } 4856 4857 void 4858 if_setioctlfn(if_t ifp, if_ioctl_fn_t ioctl_fn) 4859 { 4860 ifp->if_ioctl = ioctl_fn; 4861 } 4862 4863 void 4864 if_setoutputfn(if_t ifp, if_output_fn_t output_fn) 4865 { 4866 ifp->if_output = output_fn; 4867 } 4868 4869 void 4870 if_setstartfn(if_t ifp, if_start_fn_t start_fn) 4871 { 4872 ifp->if_start = start_fn; 4873 } 4874 4875 if_start_fn_t 4876 if_getstartfn(if_t ifp) 4877 { 4878 return (ifp->if_start); 4879 } 4880 4881 void 4882 if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) 4883 { 4884 ifp->if_transmit = start_fn; 4885 } 4886 4887 if_transmit_fn_t 4888 if_gettransmitfn(if_t ifp) 4889 { 4890 return (ifp->if_transmit); 4891 } 4892 4893 void 4894 if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) 4895 { 4896 ifp->if_qflush = flush_fn; 4897 } 4898 4899 void 4900 if_setsndtagallocfn(if_t ifp, if_snd_tag_alloc_t alloc_fn) 4901 { 4902 ifp->if_snd_tag_alloc = alloc_fn; 4903 } 4904 4905 int 4906 if_snd_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, 4907 struct m_snd_tag **mstp) 4908 { 4909 if (ifp->if_snd_tag_alloc == NULL) 4910 return (EOPNOTSUPP); 4911 return (ifp->if_snd_tag_alloc(ifp, params, mstp)); 4912 } 4913 4914 void 4915 if_setgetcounterfn(if_t ifp, if_get_counter_t fn) 4916 { 4917 ifp->if_get_counter = fn; 4918 } 4919 4920 void 4921 if_setreassignfn(if_t ifp, if_reassign_fn_t fn) 4922 { 4923 ifp->if_reassign = fn; 4924 } 4925 4926 void 4927 if_setratelimitqueryfn(if_t ifp, if_ratelimit_query_t fn) 4928 { 4929 ifp->if_ratelimit_query = fn; 4930 } 4931 4932 void 4933 if_setdebugnet_methods(if_t ifp, struct debugnet_methods *m) 4934 { 4935 ifp->if_debugnet_methods = m; 4936 } 4937 4938 struct label * 4939 if_getmaclabel(if_t ifp) 4940 { 4941 return (ifp->if_label); 4942 } 4943 4944 void 4945 if_setmaclabel(if_t ifp, struct label *label) 4946 { 4947 ifp->if_label = label; 4948 } 4949 4950 int 4951 if_gettype(if_t ifp) 4952 { 4953 return (ifp->if_type); 4954 } 4955 4956 void * 4957 if_getllsoftc(if_t ifp) 4958 { 4959 return (ifp->if_llsoftc); 4960 } 4961 4962 void 4963 if_setllsoftc(if_t ifp, void *llsoftc) 4964 { 4965 ifp->if_llsoftc = llsoftc; 4966 }; 4967 4968 int 4969 if_getlinkstate(if_t ifp) 4970 { 4971 return (ifp->if_link_state); 4972 } 4973 4974 const uint8_t * 4975 if_getbroadcastaddr(if_t ifp) 4976 { 4977 return (ifp->if_broadcastaddr); 4978 } 4979 4980 void 4981 if_setbroadcastaddr(if_t ifp, const uint8_t *addr) 4982 { 4983 ifp->if_broadcastaddr = addr; 4984 } 4985 4986 int 4987 if_getnumadomain(if_t ifp) 4988 { 4989 return (ifp->if_numa_domain); 4990 } 4991 4992 uint64_t 4993 if_getcounter(if_t ifp, ift_counter counter) 4994 { 4995 return (ifp->if_get_counter(ifp, counter)); 4996 } 4997 4998 bool 4999 if_altq_is_enabled(if_t ifp) 5000 { 5001 return (ALTQ_IS_ENABLED(&ifp->if_snd)); 5002 } 5003 5004 struct vnet * 5005 if_getvnet(if_t ifp) 5006 { 5007 return (ifp->if_vnet); 5008 } 5009 5010 struct in_ifinfo * 5011 if_getinet(if_t ifp) 5012 { 5013 return (ifp->if_inet); 5014 } 5015 5016 struct in6_ifextra * 5017 if_getinet6(if_t ifp) 5018 { 5019 return (ifp->if_inet6); 5020 } 5021 5022 u_int 5023 if_getfib(if_t ifp) 5024 { 5025 return (ifp->if_fib); 5026 } 5027 5028 uint8_t 5029 if_getaddrlen(if_t ifp) 5030 { 5031 return (ifp->if_addrlen); 5032 } 5033 5034 struct bpf_if * 5035 if_getbpf(if_t ifp) 5036 { 5037 return (ifp->if_bpf); 5038 } 5039 5040 struct ifvlantrunk * 5041 if_getvlantrunk(if_t ifp) 5042 { 5043 return (ifp->if_vlantrunk); 5044 } 5045 5046 uint8_t 5047 if_getpcp(if_t ifp) 5048 { 5049 return (ifp->if_pcp); 5050 } 5051 5052 void * 5053 if_getl2com(if_t ifp) 5054 { 5055 return (ifp->if_l2com); 5056 } 5057 5058 void 5059 if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *m) 5060 { 5061 ifp->if_ipsec_accel_m = m; 5062 } 5063 5064 #ifdef DDB 5065 static void 5066 if_show_ifnet(struct ifnet *ifp) 5067 { 5068 if (ifp == NULL) 5069 return; 5070 db_printf("%s:\n", ifp->if_xname); 5071 #define IF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, ifp->e); 5072 IF_DB_PRINTF("%s", if_dname); 5073 IF_DB_PRINTF("%d", if_dunit); 5074 IF_DB_PRINTF("%s", if_description); 5075 IF_DB_PRINTF("%u", if_index); 5076 IF_DB_PRINTF("%d", if_idxgen); 5077 IF_DB_PRINTF("%u", if_refcount); 5078 IF_DB_PRINTF("%p", if_softc); 5079 IF_DB_PRINTF("%p", if_l2com); 5080 IF_DB_PRINTF("%p", if_llsoftc); 5081 IF_DB_PRINTF("%d", if_amcount); 5082 IF_DB_PRINTF("%p", if_addr); 5083 IF_DB_PRINTF("%p", if_broadcastaddr); 5084 IF_DB_PRINTF("%u", if_fib); 5085 IF_DB_PRINTF("%p", if_vnet); 5086 IF_DB_PRINTF("%p", if_home_vnet); 5087 IF_DB_PRINTF("%p", if_vlantrunk); 5088 IF_DB_PRINTF("%p", if_bpf); 5089 IF_DB_PRINTF("%u", if_pcount); 5090 IF_DB_PRINTF("%p", if_bridge); 5091 IF_DB_PRINTF("%p", if_lagg); 5092 IF_DB_PRINTF("%p", if_pf_kif); 5093 IF_DB_PRINTF("%p", if_carp); 5094 IF_DB_PRINTF("%p", if_label); 5095 IF_DB_PRINTF("%p", if_netmap); 5096 IF_DB_PRINTF("0x%08x", if_flags); 5097 IF_DB_PRINTF("0x%08x", if_drv_flags); 5098 IF_DB_PRINTF("0x%08x", if_capabilities); 5099 IF_DB_PRINTF("0x%08x", if_capenable); 5100 IF_DB_PRINTF("%p", if_snd.ifq_head); 5101 IF_DB_PRINTF("%p", if_snd.ifq_tail); 5102 IF_DB_PRINTF("%d", if_snd.ifq_len); 5103 IF_DB_PRINTF("%d", if_snd.ifq_maxlen); 5104 IF_DB_PRINTF("%p", if_snd.ifq_drv_head); 5105 IF_DB_PRINTF("%p", if_snd.ifq_drv_tail); 5106 IF_DB_PRINTF("%d", if_snd.ifq_drv_len); 5107 IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen); 5108 IF_DB_PRINTF("%d", if_snd.altq_type); 5109 IF_DB_PRINTF("%x", if_snd.altq_flags); 5110 #undef IF_DB_PRINTF 5111 } 5112 5113 DB_SHOW_COMMAND(ifnet, db_show_ifnet) 5114 { 5115 if (!have_addr) { 5116 db_printf("usage: show ifnet <struct ifnet *>\n"); 5117 return; 5118 } 5119 5120 if_show_ifnet((struct ifnet *)addr); 5121 } 5122 5123 DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) 5124 { 5125 struct ifnet *ifp; 5126 u_short idx; 5127 5128 for (idx = 1; idx <= if_index; idx++) { 5129 ifp = ifindex_table[idx].ife_ifnet; 5130 if (ifp == NULL) 5131 continue; 5132 db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); 5133 if (db_pager_quit) 5134 break; 5135 } 5136 } 5137 #endif /* DDB */ 5138