1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (C) 2001 WIDE Project. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)in.c 8.4 (Berkeley) 1/9/95 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/eventhandler.h> 40 #include <sys/systm.h> 41 #include <sys/sockio.h> 42 #include <sys/malloc.h> 43 #include <sys/priv.h> 44 #include <sys/socket.h> 45 #include <sys/jail.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/proc.h> 49 #include <sys/rmlock.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/sx.h> 53 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/if_arp.h> 57 #include <net/if_dl.h> 58 #include <net/if_llatbl.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/route/nhop.h> 62 #include <net/route/route_ctl.h> 63 #include <net/vnet.h> 64 65 #include <netinet/if_ether.h> 66 #include <netinet/in.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_var.h> 69 #include <netinet/in_pcb.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/ip_carp.h> 72 #include <netinet/igmp_var.h> 73 #include <netinet/udp.h> 74 #include <netinet/udp_var.h> 75 76 static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); 77 static int in_difaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); 78 static int in_gifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); 79 80 static void in_socktrim(struct sockaddr_in *); 81 static void in_purgemaddrs(struct ifnet *); 82 83 static bool ia_need_loopback_route(const struct in_ifaddr *); 84 85 VNET_DEFINE_STATIC(int, nosameprefix); 86 #define V_nosameprefix VNET(nosameprefix) 87 SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW, 88 &VNET_NAME(nosameprefix), 0, 89 "Refuse to create same prefixes on different interfaces"); 90 91 VNET_DECLARE(struct inpcbinfo, ripcbinfo); 92 #define V_ripcbinfo VNET(ripcbinfo) 93 94 static struct sx in_control_sx; 95 SX_SYSINIT(in_control_sx, &in_control_sx, "in_control"); 96 97 /* 98 * Return 1 if an internet address is for a ``local'' host 99 * (one to which we have a connection). 100 */ 101 int 102 in_localaddr(struct in_addr in) 103 { 104 struct rm_priotracker in_ifa_tracker; 105 u_long i = ntohl(in.s_addr); 106 struct in_ifaddr *ia; 107 108 IN_IFADDR_RLOCK(&in_ifa_tracker); 109 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 110 if ((i & ia->ia_subnetmask) == ia->ia_subnet) { 111 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 112 return (1); 113 } 114 } 115 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 116 return (0); 117 } 118 119 /* 120 * Return 1 if an internet address is for the local host and configured 121 * on one of its interfaces. 122 */ 123 int 124 in_localip(struct in_addr in) 125 { 126 struct rm_priotracker in_ifa_tracker; 127 struct in_ifaddr *ia; 128 129 IN_IFADDR_RLOCK(&in_ifa_tracker); 130 LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) { 131 if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) { 132 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 133 return (1); 134 } 135 } 136 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 137 return (0); 138 } 139 140 /* 141 * Return 1 if an internet address is configured on an interface. 142 */ 143 int 144 in_ifhasaddr(struct ifnet *ifp, struct in_addr in) 145 { 146 struct ifaddr *ifa; 147 struct in_ifaddr *ia; 148 149 NET_EPOCH_ASSERT(); 150 151 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 152 if (ifa->ifa_addr->sa_family != AF_INET) 153 continue; 154 ia = (struct in_ifaddr *)ifa; 155 if (ia->ia_addr.sin_addr.s_addr == in.s_addr) 156 return (1); 157 } 158 159 return (0); 160 } 161 162 /* 163 * Return a reference to the interface address which is different to 164 * the supplied one but with same IP address value. 165 */ 166 static struct in_ifaddr * 167 in_localip_more(struct in_ifaddr *original_ia) 168 { 169 struct rm_priotracker in_ifa_tracker; 170 in_addr_t original_addr = IA_SIN(original_ia)->sin_addr.s_addr; 171 uint32_t original_fib = original_ia->ia_ifa.ifa_ifp->if_fib; 172 struct in_ifaddr *ia; 173 174 IN_IFADDR_RLOCK(&in_ifa_tracker); 175 LIST_FOREACH(ia, INADDR_HASH(original_addr), ia_hash) { 176 in_addr_t addr = IA_SIN(ia)->sin_addr.s_addr; 177 uint32_t fib = ia->ia_ifa.ifa_ifp->if_fib; 178 if (!V_rt_add_addr_allfibs && (original_fib != fib)) 179 continue; 180 if ((original_ia != ia) && (original_addr == addr)) { 181 ifa_ref(&ia->ia_ifa); 182 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 183 return (ia); 184 } 185 } 186 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 187 188 return (NULL); 189 } 190 191 /* 192 * Determine whether an IP address is in a reserved set of addresses 193 * that may not be forwarded, or whether datagrams to that destination 194 * may be forwarded. 195 */ 196 int 197 in_canforward(struct in_addr in) 198 { 199 u_long i = ntohl(in.s_addr); 200 201 if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i) || 202 IN_ZERONET(i) || IN_LOOPBACK(i)) 203 return (0); 204 return (1); 205 } 206 207 /* 208 * Trim a mask in a sockaddr 209 */ 210 static void 211 in_socktrim(struct sockaddr_in *ap) 212 { 213 char *cplim = (char *) &ap->sin_addr; 214 char *cp = (char *) (&ap->sin_addr + 1); 215 216 ap->sin_len = 0; 217 while (--cp >= cplim) 218 if (*cp) { 219 (ap)->sin_len = cp - (char *) (ap) + 1; 220 break; 221 } 222 } 223 224 /* 225 * Generic internet control operations (ioctl's). 226 */ 227 int 228 in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, 229 struct thread *td) 230 { 231 struct ifreq *ifr = (struct ifreq *)data; 232 struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr; 233 struct epoch_tracker et; 234 struct ifaddr *ifa; 235 struct in_ifaddr *ia; 236 int error; 237 238 if (ifp == NULL) 239 return (EADDRNOTAVAIL); 240 241 /* 242 * Filter out 4 ioctls we implement directly. Forward the rest 243 * to specific functions and ifp->if_ioctl(). 244 */ 245 switch (cmd) { 246 case SIOCGIFADDR: 247 case SIOCGIFBRDADDR: 248 case SIOCGIFDSTADDR: 249 case SIOCGIFNETMASK: 250 break; 251 case SIOCGIFALIAS: 252 sx_xlock(&in_control_sx); 253 error = in_gifaddr_ioctl(cmd, data, ifp, td); 254 sx_xunlock(&in_control_sx); 255 return (error); 256 case SIOCDIFADDR: 257 sx_xlock(&in_control_sx); 258 error = in_difaddr_ioctl(cmd, data, ifp, td); 259 sx_xunlock(&in_control_sx); 260 return (error); 261 case OSIOCAIFADDR: /* 9.x compat */ 262 case SIOCAIFADDR: 263 sx_xlock(&in_control_sx); 264 error = in_aifaddr_ioctl(cmd, data, ifp, td); 265 sx_xunlock(&in_control_sx); 266 return (error); 267 case SIOCSIFADDR: 268 case SIOCSIFBRDADDR: 269 case SIOCSIFDSTADDR: 270 case SIOCSIFNETMASK: 271 /* We no longer support that old commands. */ 272 return (EINVAL); 273 default: 274 if (ifp->if_ioctl == NULL) 275 return (EOPNOTSUPP); 276 return ((*ifp->if_ioctl)(ifp, cmd, data)); 277 } 278 279 if (addr->sin_addr.s_addr != INADDR_ANY && 280 prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0) 281 return (EADDRNOTAVAIL); 282 283 /* 284 * Find address for this interface, if it exists. If an 285 * address was specified, find that one instead of the 286 * first one on the interface, if possible. 287 */ 288 NET_EPOCH_ENTER(et); 289 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 290 if (ifa->ifa_addr->sa_family != AF_INET) 291 continue; 292 ia = (struct in_ifaddr *)ifa; 293 if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr) 294 break; 295 } 296 if (ifa == NULL) 297 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 298 if (ifa->ifa_addr->sa_family == AF_INET) { 299 ia = (struct in_ifaddr *)ifa; 300 if (prison_check_ip4(td->td_ucred, 301 &ia->ia_addr.sin_addr) == 0) 302 break; 303 } 304 305 if (ifa == NULL) { 306 NET_EPOCH_EXIT(et); 307 return (EADDRNOTAVAIL); 308 } 309 310 error = 0; 311 switch (cmd) { 312 case SIOCGIFADDR: 313 *addr = ia->ia_addr; 314 break; 315 316 case SIOCGIFBRDADDR: 317 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 318 error = EINVAL; 319 break; 320 } 321 *addr = ia->ia_broadaddr; 322 break; 323 324 case SIOCGIFDSTADDR: 325 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { 326 error = EINVAL; 327 break; 328 } 329 *addr = ia->ia_dstaddr; 330 break; 331 332 case SIOCGIFNETMASK: 333 *addr = ia->ia_sockmask; 334 break; 335 } 336 337 NET_EPOCH_EXIT(et); 338 339 return (error); 340 } 341 342 static int 343 in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) 344 { 345 const struct in_aliasreq *ifra = (struct in_aliasreq *)data; 346 const struct sockaddr_in *addr = &ifra->ifra_addr; 347 const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr; 348 const struct sockaddr_in *mask = &ifra->ifra_mask; 349 const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr; 350 const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0; 351 struct epoch_tracker et; 352 struct ifaddr *ifa; 353 struct in_ifaddr *ia; 354 bool iaIsFirst; 355 int error = 0; 356 357 error = priv_check(td, PRIV_NET_ADDIFADDR); 358 if (error) 359 return (error); 360 361 /* 362 * ifra_addr must be present and be of INET family. 363 * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional. 364 */ 365 if (addr->sin_len != sizeof(struct sockaddr_in) || 366 addr->sin_family != AF_INET) 367 return (EINVAL); 368 if (broadaddr->sin_len != 0 && 369 (broadaddr->sin_len != sizeof(struct sockaddr_in) || 370 broadaddr->sin_family != AF_INET)) 371 return (EINVAL); 372 if (mask->sin_len != 0 && 373 (mask->sin_len != sizeof(struct sockaddr_in) || 374 mask->sin_family != AF_INET)) 375 return (EINVAL); 376 if ((ifp->if_flags & IFF_POINTOPOINT) && 377 (dstaddr->sin_len != sizeof(struct sockaddr_in) || 378 dstaddr->sin_addr.s_addr == INADDR_ANY)) 379 return (EDESTADDRREQ); 380 if (vhid != 0 && carp_attach_p == NULL) 381 return (EPROTONOSUPPORT); 382 383 /* 384 * See whether address already exist. 385 */ 386 iaIsFirst = true; 387 ia = NULL; 388 NET_EPOCH_ENTER(et); 389 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 390 struct in_ifaddr *it; 391 392 if (ifa->ifa_addr->sa_family != AF_INET) 393 continue; 394 395 it = (struct in_ifaddr *)ifa; 396 if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && 397 prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0) 398 ia = it; 399 else 400 iaIsFirst = false; 401 } 402 NET_EPOCH_EXIT(et); 403 404 if (ia != NULL) 405 (void )in_difaddr_ioctl(cmd, data, ifp, td); 406 407 ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK); 408 ia = (struct in_ifaddr *)ifa; 409 ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; 410 ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; 411 ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; 412 callout_init_rw(&ia->ia_garp_timer, &ifp->if_addr_lock, 413 CALLOUT_RETURNUNLOCKED); 414 415 ia->ia_ifp = ifp; 416 ia->ia_addr = *addr; 417 if (mask->sin_len != 0) { 418 ia->ia_sockmask = *mask; 419 ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); 420 } else { 421 in_addr_t i = ntohl(addr->sin_addr.s_addr); 422 423 /* 424 * Be compatible with network classes, if netmask isn't 425 * supplied, guess it based on classes. 426 */ 427 if (IN_CLASSA(i)) 428 ia->ia_subnetmask = IN_CLASSA_NET; 429 else if (IN_CLASSB(i)) 430 ia->ia_subnetmask = IN_CLASSB_NET; 431 else 432 ia->ia_subnetmask = IN_CLASSC_NET; 433 ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask); 434 } 435 ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask; 436 in_socktrim(&ia->ia_sockmask); 437 438 if (ifp->if_flags & IFF_BROADCAST) { 439 if (broadaddr->sin_len != 0) { 440 ia->ia_broadaddr = *broadaddr; 441 } else if (ia->ia_subnetmask == IN_RFC3021_MASK) { 442 ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST; 443 ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); 444 ia->ia_broadaddr.sin_family = AF_INET; 445 } else { 446 ia->ia_broadaddr.sin_addr.s_addr = 447 htonl(ia->ia_subnet | ~ia->ia_subnetmask); 448 ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); 449 ia->ia_broadaddr.sin_family = AF_INET; 450 } 451 } 452 453 if (ifp->if_flags & IFF_POINTOPOINT) 454 ia->ia_dstaddr = *dstaddr; 455 456 if (vhid != 0) { 457 error = (*carp_attach_p)(&ia->ia_ifa, vhid); 458 if (error) 459 return (error); 460 } 461 462 /* if_addrhead is already referenced by ifa_alloc() */ 463 IF_ADDR_WLOCK(ifp); 464 CK_STAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); 465 IF_ADDR_WUNLOCK(ifp); 466 467 ifa_ref(ifa); /* in_ifaddrhead */ 468 IN_IFADDR_WLOCK(); 469 CK_STAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link); 470 LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); 471 IN_IFADDR_WUNLOCK(); 472 473 /* 474 * Give the interface a chance to initialize 475 * if this is its first address, 476 * and to validate the address if necessary. 477 */ 478 if (ifp->if_ioctl != NULL) { 479 error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); 480 if (error) 481 goto fail1; 482 } 483 484 /* 485 * Add route for the network. 486 */ 487 if (vhid == 0) { 488 error = in_addprefix(ia); 489 if (error) 490 goto fail1; 491 } 492 493 /* 494 * Add a loopback route to self. 495 */ 496 if (vhid == 0 && ia_need_loopback_route(ia)) { 497 struct in_ifaddr *eia; 498 499 eia = in_localip_more(ia); 500 501 if (eia == NULL) { 502 error = ifa_add_loopback_route((struct ifaddr *)ia, 503 (struct sockaddr *)&ia->ia_addr); 504 if (error) 505 goto fail2; 506 } else 507 ifa_free(&eia->ia_ifa); 508 } 509 510 if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) { 511 struct in_addr allhosts_addr; 512 struct in_ifinfo *ii; 513 514 ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); 515 allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); 516 517 error = in_joingroup(ifp, &allhosts_addr, NULL, 518 &ii->ii_allhosts); 519 } 520 521 /* 522 * Note: we don't need extra reference for ifa, since we called 523 * with sx lock held, and ifaddr can not be deleted in concurrent 524 * thread. 525 */ 526 EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, ifa, IFADDR_EVENT_ADD); 527 528 return (error); 529 530 fail2: 531 if (vhid == 0) 532 (void )in_scrubprefix(ia, LLE_STATIC); 533 534 fail1: 535 if (ia->ia_ifa.ifa_carp) 536 (*carp_detach_p)(&ia->ia_ifa, false); 537 538 IF_ADDR_WLOCK(ifp); 539 CK_STAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifaddr, ifa_link); 540 IF_ADDR_WUNLOCK(ifp); 541 ifa_free(&ia->ia_ifa); /* if_addrhead */ 542 543 IN_IFADDR_WLOCK(); 544 CK_STAILQ_REMOVE(&V_in_ifaddrhead, ia, in_ifaddr, ia_link); 545 LIST_REMOVE(ia, ia_hash); 546 IN_IFADDR_WUNLOCK(); 547 ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ 548 549 return (error); 550 } 551 552 static int 553 in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) 554 { 555 const struct ifreq *ifr = (struct ifreq *)data; 556 const struct sockaddr_in *addr = (const struct sockaddr_in *) 557 &ifr->ifr_addr; 558 struct ifaddr *ifa; 559 struct in_ifaddr *ia; 560 bool deleteAny, iaIsLast; 561 int error; 562 563 if (td != NULL) { 564 error = priv_check(td, PRIV_NET_DELIFADDR); 565 if (error) 566 return (error); 567 } 568 569 if (addr->sin_len != sizeof(struct sockaddr_in) || 570 addr->sin_family != AF_INET) 571 deleteAny = true; 572 else 573 deleteAny = false; 574 575 iaIsLast = true; 576 ia = NULL; 577 IF_ADDR_WLOCK(ifp); 578 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 579 struct in_ifaddr *it; 580 581 if (ifa->ifa_addr->sa_family != AF_INET) 582 continue; 583 584 it = (struct in_ifaddr *)ifa; 585 if (deleteAny && ia == NULL && (td == NULL || 586 prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0)) 587 ia = it; 588 589 if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && 590 (td == NULL || prison_check_ip4(td->td_ucred, 591 &addr->sin_addr) == 0)) 592 ia = it; 593 594 if (it != ia) 595 iaIsLast = false; 596 } 597 598 if (ia == NULL) { 599 IF_ADDR_WUNLOCK(ifp); 600 return (EADDRNOTAVAIL); 601 } 602 603 CK_STAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifaddr, ifa_link); 604 IF_ADDR_WUNLOCK(ifp); 605 ifa_free(&ia->ia_ifa); /* if_addrhead */ 606 607 IN_IFADDR_WLOCK(); 608 CK_STAILQ_REMOVE(&V_in_ifaddrhead, ia, in_ifaddr, ia_link); 609 LIST_REMOVE(ia, ia_hash); 610 IN_IFADDR_WUNLOCK(); 611 612 /* 613 * in_scrubprefix() kills the interface route. 614 */ 615 in_scrubprefix(ia, LLE_STATIC); 616 617 /* 618 * in_ifadown gets rid of all the rest of 619 * the routes. This is not quite the right 620 * thing to do, but at least if we are running 621 * a routing process they will come back. 622 */ 623 in_ifadown(&ia->ia_ifa, 1); 624 625 if (ia->ia_ifa.ifa_carp) 626 (*carp_detach_p)(&ia->ia_ifa, cmd == SIOCAIFADDR); 627 628 /* 629 * If this is the last IPv4 address configured on this 630 * interface, leave the all-hosts group. 631 * No state-change report need be transmitted. 632 */ 633 if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) { 634 struct in_ifinfo *ii; 635 636 ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); 637 if (ii->ii_allhosts) { 638 (void)in_leavegroup(ii->ii_allhosts, NULL); 639 ii->ii_allhosts = NULL; 640 } 641 } 642 643 IF_ADDR_WLOCK(ifp); 644 if (callout_stop(&ia->ia_garp_timer) == 1) { 645 ifa_free(&ia->ia_ifa); 646 } 647 IF_ADDR_WUNLOCK(ifp); 648 649 EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa, 650 IFADDR_EVENT_DEL); 651 ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ 652 653 return (0); 654 } 655 656 static int 657 in_gifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) 658 { 659 struct in_aliasreq *ifra = (struct in_aliasreq *)data; 660 const struct sockaddr_in *addr = &ifra->ifra_addr; 661 struct epoch_tracker et; 662 struct ifaddr *ifa; 663 struct in_ifaddr *ia; 664 665 /* 666 * ifra_addr must be present and be of INET family. 667 */ 668 if (addr->sin_len != sizeof(struct sockaddr_in) || 669 addr->sin_family != AF_INET) 670 return (EINVAL); 671 672 /* 673 * See whether address exist. 674 */ 675 ia = NULL; 676 NET_EPOCH_ENTER(et); 677 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 678 struct in_ifaddr *it; 679 680 if (ifa->ifa_addr->sa_family != AF_INET) 681 continue; 682 683 it = (struct in_ifaddr *)ifa; 684 if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && 685 prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0) { 686 ia = it; 687 break; 688 } 689 } 690 if (ia == NULL) { 691 NET_EPOCH_EXIT(et); 692 return (EADDRNOTAVAIL); 693 } 694 695 ifra->ifra_mask = ia->ia_sockmask; 696 if ((ifp->if_flags & IFF_POINTOPOINT) && 697 ia->ia_dstaddr.sin_family == AF_INET) 698 ifra->ifra_dstaddr = ia->ia_dstaddr; 699 else if ((ifp->if_flags & IFF_BROADCAST) && 700 ia->ia_broadaddr.sin_family == AF_INET) 701 ifra->ifra_broadaddr = ia->ia_broadaddr; 702 else 703 memset(&ifra->ifra_broadaddr, 0, 704 sizeof(ifra->ifra_broadaddr)); 705 706 NET_EPOCH_EXIT(et); 707 return (0); 708 } 709 710 static int 711 in_match_ifaddr(const struct rtentry *rt, const struct nhop_object *nh, void *arg) 712 { 713 714 if (nh->nh_ifa == (struct ifaddr *)arg) 715 return (1); 716 717 return (0); 718 } 719 720 static int 721 in_handle_prefix_route(uint32_t fibnum, int cmd, 722 struct sockaddr_in *dst, struct sockaddr_in *netmask, struct ifaddr *ifa, 723 struct ifnet *ifp) 724 { 725 726 NET_EPOCH_ASSERT(); 727 728 /* Prepare gateway */ 729 struct sockaddr_dl_short sdl = { 730 .sdl_family = AF_LINK, 731 .sdl_len = sizeof(struct sockaddr_dl_short), 732 .sdl_type = ifa->ifa_ifp->if_type, 733 .sdl_index = ifa->ifa_ifp->if_index, 734 }; 735 736 struct rt_addrinfo info = { 737 .rti_ifa = ifa, 738 .rti_ifp = ifp, 739 .rti_flags = RTF_PINNED | ((netmask != NULL) ? 0 : RTF_HOST), 740 .rti_info = { 741 [RTAX_DST] = (struct sockaddr *)dst, 742 [RTAX_NETMASK] = (struct sockaddr *)netmask, 743 [RTAX_GATEWAY] = (struct sockaddr *)&sdl, 744 }, 745 /* Ensure we delete the prefix IFF prefix ifa matches */ 746 .rti_filter = in_match_ifaddr, 747 .rti_filterdata = ifa, 748 }; 749 750 return (rib_handle_ifaddr_info(fibnum, cmd, &info)); 751 } 752 753 /* 754 * Routing table interaction with interface addresses. 755 * 756 * In general, two types of routes needs to be installed: 757 * a) "interface" or "prefix" route, telling user that the addresses 758 * behind the ifa prefix are reached directly. 759 * b) "loopback" route installed for the ifa address, telling user that 760 * the address belongs to local system. 761 * 762 * Handling for (a) and (b) differs in multi-fib aspects, hence they 763 * are implemented in different functions below. 764 * 765 * The cases above may intersect - /32 interface aliases results in 766 * the same prefix produced by (a) and (b). This blurs the definition 767 * of the "loopback" route and complicate interactions. The interaction 768 * table is defined below. The case numbers are used in the multiple 769 * functions below to refer to the particular test case. 770 * 771 * There can be multiple options: 772 * 1) Adding address with prefix on non-p2p/non-loopback interface. 773 * Example: 192.0.2.1/24. Action: 774 * * add "prefix" route towards 192.0.2.0/24 via @ia interface, 775 * using @ia as an address source. 776 * * add "loopback" route towards 192.0.2.1 via V_loif, saving 777 * @ia ifp in the gateway and using @ia as an address source. 778 * 779 * 2) Adding address with /32 mask to non-p2p/non-loopback interface. 780 * Example: 192.0.2.2/32. Action: 781 * * add "prefix" host route via V_loif, using @ia as an address source. 782 * 783 * 3) Adding address with or without prefix to p2p interface. 784 * Example: 10.0.0.1/24->10.0.0.2. Action: 785 * * add "prefix" host route towards 10.0.0.2 via this interface, using @ia 786 * as an address source. Note: no sense in installing full /24 as the interface 787 * is point-to-point. 788 * * add "loopback" route towards 10.0.9.1 via V_loif, saving 789 * @ia ifp in the gateway and using @ia as an address source. 790 * 791 * 4) Adding address with or without prefix to loopback interface. 792 * Example: 192.0.2.1/24. Action: 793 * * add "prefix" host route via @ia interface, using @ia as an address source. 794 * Note: Skip installing /24 prefix as it would introduce TTL loop 795 * for the traffic destined to these addresses. 796 */ 797 798 /* 799 * Checks if @ia needs to install loopback route to @ia address via 800 * ifa_maintain_loopback_route(). 801 * 802 * Return true on success. 803 */ 804 static bool 805 ia_need_loopback_route(const struct in_ifaddr *ia) 806 { 807 struct ifnet *ifp = ia->ia_ifp; 808 809 /* Case 4: Skip loopback interfaces */ 810 if ((ifp->if_flags & IFF_LOOPBACK) || 811 (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)) 812 return (false); 813 814 /* Clash avoidance: Skip p2p interfaces with both addresses are equal */ 815 if ((ifp->if_flags & IFF_POINTOPOINT) && 816 ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) 817 return (false); 818 819 /* Case 2: skip /32 prefixes */ 820 if (!(ifp->if_flags & IFF_POINTOPOINT) && 821 (ia->ia_sockmask.sin_addr.s_addr == INADDR_BROADCAST)) 822 return (false); 823 824 return (true); 825 } 826 827 /* 828 * Calculate "prefix" route corresponding to @ia. 829 */ 830 static void 831 ia_getrtprefix(const struct in_ifaddr *ia, struct in_addr *prefix, struct in_addr *mask) 832 { 833 834 if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) { 835 /* Case 3: return host route for dstaddr */ 836 *prefix = ia->ia_dstaddr.sin_addr; 837 mask->s_addr = INADDR_BROADCAST; 838 } else if (ia->ia_ifp->if_flags & IFF_LOOPBACK) { 839 /* Case 4: return host route for ifaddr */ 840 *prefix = ia->ia_addr.sin_addr; 841 mask->s_addr = INADDR_BROADCAST; 842 } else { 843 /* Cases 1,2: return actual ia prefix */ 844 *prefix = ia->ia_addr.sin_addr; 845 *mask = ia->ia_sockmask.sin_addr; 846 prefix->s_addr &= mask->s_addr; 847 } 848 } 849 850 /* 851 * Adds or delete interface "prefix" route corresponding to @ifa. 852 * Returns 0 on success or errno. 853 */ 854 int 855 in_handle_ifaddr_route(int cmd, struct in_ifaddr *ia) 856 { 857 struct ifaddr *ifa = &ia->ia_ifa; 858 struct in_addr daddr, maddr; 859 struct sockaddr_in *pmask; 860 struct epoch_tracker et; 861 int error; 862 863 ia_getrtprefix(ia, &daddr, &maddr); 864 865 struct sockaddr_in mask = { 866 .sin_family = AF_INET, 867 .sin_len = sizeof(struct sockaddr_in), 868 .sin_addr = maddr, 869 }; 870 871 pmask = (maddr.s_addr != INADDR_BROADCAST) ? &mask : NULL; 872 873 struct sockaddr_in dst = { 874 .sin_family = AF_INET, 875 .sin_len = sizeof(struct sockaddr_in), 876 .sin_addr.s_addr = daddr.s_addr & maddr.s_addr, 877 }; 878 879 struct ifnet *ifp = ia->ia_ifp; 880 881 if ((maddr.s_addr == INADDR_BROADCAST) && 882 (!(ia->ia_ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)))) { 883 /* Case 2: host route on broadcast interface */ 884 ifp = V_loif; 885 } 886 887 uint32_t fibnum = ifa->ifa_ifp->if_fib; 888 NET_EPOCH_ENTER(et); 889 error = in_handle_prefix_route(fibnum, cmd, &dst, pmask, ifa, ifp); 890 NET_EPOCH_EXIT(et); 891 892 return (error); 893 } 894 895 /* 896 * Check if we have a route for the given prefix already. 897 */ 898 static bool 899 in_hasrtprefix(struct in_ifaddr *target) 900 { 901 struct rm_priotracker in_ifa_tracker; 902 struct in_ifaddr *ia; 903 struct in_addr prefix, mask, p, m; 904 bool result = false; 905 906 ia_getrtprefix(target, &prefix, &mask); 907 908 IN_IFADDR_RLOCK(&in_ifa_tracker); 909 /* Look for an existing address with the same prefix, mask, and fib */ 910 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 911 ia_getrtprefix(ia, &p, &m); 912 913 if (prefix.s_addr != p.s_addr || 914 mask.s_addr != m.s_addr) 915 continue; 916 917 if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib) 918 continue; 919 920 /* 921 * If we got a matching prefix route inserted by other 922 * interface address, we are done here. 923 */ 924 if (ia->ia_flags & IFA_ROUTE) { 925 result = true; 926 break; 927 } 928 } 929 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 930 931 return (result); 932 } 933 934 int 935 in_addprefix(struct in_ifaddr *target) 936 { 937 int error; 938 939 if (in_hasrtprefix(target)) { 940 if (V_nosameprefix) 941 return (EEXIST); 942 else { 943 rt_addrmsg(RTM_ADD, &target->ia_ifa, 944 target->ia_ifp->if_fib); 945 return (0); 946 } 947 } 948 949 /* 950 * No-one seem to have this prefix route, so we try to insert it. 951 */ 952 rt_addrmsg(RTM_ADD, &target->ia_ifa, target->ia_ifp->if_fib); 953 error = in_handle_ifaddr_route(RTM_ADD, target); 954 if (!error) 955 target->ia_flags |= IFA_ROUTE; 956 return (error); 957 } 958 959 /* 960 * Removes either all lle entries for given @ia, or lle 961 * corresponding to @ia address. 962 */ 963 static void 964 in_scrubprefixlle(struct in_ifaddr *ia, int all, u_int flags) 965 { 966 struct sockaddr_in addr, mask; 967 struct sockaddr *saddr, *smask; 968 struct ifnet *ifp; 969 970 saddr = (struct sockaddr *)&addr; 971 bzero(&addr, sizeof(addr)); 972 addr.sin_len = sizeof(addr); 973 addr.sin_family = AF_INET; 974 smask = (struct sockaddr *)&mask; 975 bzero(&mask, sizeof(mask)); 976 mask.sin_len = sizeof(mask); 977 mask.sin_family = AF_INET; 978 mask.sin_addr.s_addr = ia->ia_subnetmask; 979 ifp = ia->ia_ifp; 980 981 if (all) { 982 /* 983 * Remove all L2 entries matching given prefix. 984 * Convert address to host representation to avoid 985 * doing this on every callback. ia_subnetmask is already 986 * stored in host representation. 987 */ 988 addr.sin_addr.s_addr = ntohl(ia->ia_addr.sin_addr.s_addr); 989 lltable_prefix_free(AF_INET, saddr, smask, flags); 990 } else { 991 /* Remove interface address only */ 992 addr.sin_addr.s_addr = ia->ia_addr.sin_addr.s_addr; 993 lltable_delete_addr(LLTABLE(ifp), LLE_IFADDR, saddr); 994 } 995 } 996 997 /* 998 * If there is no other address in the system that can serve a route to the 999 * same prefix, remove the route. Hand over the route to the new address 1000 * otherwise. 1001 */ 1002 int 1003 in_scrubprefix(struct in_ifaddr *target, u_int flags) 1004 { 1005 struct rm_priotracker in_ifa_tracker; 1006 struct in_ifaddr *ia; 1007 struct in_addr prefix, mask, p, m; 1008 int error = 0; 1009 1010 /* 1011 * Remove the loopback route to the interface address. 1012 */ 1013 if (ia_need_loopback_route(target) && (flags & LLE_STATIC)) { 1014 struct in_ifaddr *eia; 1015 1016 eia = in_localip_more(target); 1017 1018 if (eia != NULL) { 1019 error = ifa_switch_loopback_route((struct ifaddr *)eia, 1020 (struct sockaddr *)&target->ia_addr); 1021 ifa_free(&eia->ia_ifa); 1022 } else { 1023 error = ifa_del_loopback_route((struct ifaddr *)target, 1024 (struct sockaddr *)&target->ia_addr); 1025 } 1026 } 1027 1028 ia_getrtprefix(target, &prefix, &mask); 1029 1030 if ((target->ia_flags & IFA_ROUTE) == 0) { 1031 rt_addrmsg(RTM_DELETE, &target->ia_ifa, target->ia_ifp->if_fib); 1032 1033 /* 1034 * Removing address from !IFF_UP interface or 1035 * prefix which exists on other interface (along with route). 1036 * No entries should exist here except target addr. 1037 * Given that, delete this entry only. 1038 */ 1039 in_scrubprefixlle(target, 0, flags); 1040 return (0); 1041 } 1042 1043 IN_IFADDR_RLOCK(&in_ifa_tracker); 1044 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1045 ia_getrtprefix(ia, &p, &m); 1046 1047 if (prefix.s_addr != p.s_addr || 1048 mask.s_addr != m.s_addr) 1049 continue; 1050 1051 if ((ia->ia_ifp->if_flags & IFF_UP) == 0) 1052 continue; 1053 1054 /* 1055 * If we got a matching prefix address, move IFA_ROUTE and 1056 * the route itself to it. Make sure that routing daemons 1057 * get a heads-up. 1058 */ 1059 if ((ia->ia_flags & IFA_ROUTE) == 0) { 1060 ifa_ref(&ia->ia_ifa); 1061 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1062 error = in_handle_ifaddr_route(RTM_DELETE, target); 1063 if (error == 0) 1064 target->ia_flags &= ~IFA_ROUTE; 1065 else 1066 log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", 1067 error); 1068 /* Scrub all entries IFF interface is different */ 1069 in_scrubprefixlle(target, target->ia_ifp != ia->ia_ifp, 1070 flags); 1071 error = in_handle_ifaddr_route(RTM_ADD, ia); 1072 if (error == 0) 1073 ia->ia_flags |= IFA_ROUTE; 1074 else 1075 log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", 1076 error); 1077 ifa_free(&ia->ia_ifa); 1078 return (error); 1079 } 1080 } 1081 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1082 1083 /* 1084 * remove all L2 entries on the given prefix 1085 */ 1086 in_scrubprefixlle(target, 1, flags); 1087 1088 /* 1089 * As no-one seem to have this prefix, we can remove the route. 1090 */ 1091 rt_addrmsg(RTM_DELETE, &target->ia_ifa, target->ia_ifp->if_fib); 1092 error = in_handle_ifaddr_route(RTM_DELETE, target); 1093 if (error == 0) 1094 target->ia_flags &= ~IFA_ROUTE; 1095 else 1096 log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); 1097 return (error); 1098 } 1099 1100 void 1101 in_ifscrub_all(void) 1102 { 1103 struct ifnet *ifp; 1104 struct ifaddr *ifa, *nifa; 1105 struct ifaliasreq ifr; 1106 1107 IFNET_RLOCK(); 1108 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1109 /* Cannot lock here - lock recursion. */ 1110 /* NET_EPOCH_ENTER(et); */ 1111 CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { 1112 if (ifa->ifa_addr->sa_family != AF_INET) 1113 continue; 1114 1115 /* 1116 * This is ugly but the only way for legacy IP to 1117 * cleanly remove addresses and everything attached. 1118 */ 1119 bzero(&ifr, sizeof(ifr)); 1120 ifr.ifra_addr = *ifa->ifa_addr; 1121 if (ifa->ifa_dstaddr) 1122 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 1123 (void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, 1124 ifp, NULL); 1125 } 1126 /* NET_EPOCH_EXIT(et); */ 1127 in_purgemaddrs(ifp); 1128 igmp_domifdetach(ifp); 1129 } 1130 IFNET_RUNLOCK(); 1131 } 1132 1133 int 1134 in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia) 1135 { 1136 1137 return ((in.s_addr == ia->ia_broadaddr.sin_addr.s_addr || 1138 /* 1139 * Check for old-style (host 0) broadcast, but 1140 * taking into account that RFC 3021 obsoletes it. 1141 */ 1142 (ia->ia_subnetmask != IN_RFC3021_MASK && 1143 ntohl(in.s_addr) == ia->ia_subnet)) && 1144 /* 1145 * Check for an all one subnetmask. These 1146 * only exist when an interface gets a secondary 1147 * address. 1148 */ 1149 ia->ia_subnetmask != (u_long)0xffffffff); 1150 } 1151 1152 /* 1153 * Return 1 if the address might be a local broadcast address. 1154 */ 1155 int 1156 in_broadcast(struct in_addr in, struct ifnet *ifp) 1157 { 1158 struct ifaddr *ifa; 1159 int found; 1160 1161 NET_EPOCH_ASSERT(); 1162 1163 if (in.s_addr == INADDR_BROADCAST || 1164 in.s_addr == INADDR_ANY) 1165 return (1); 1166 if ((ifp->if_flags & IFF_BROADCAST) == 0) 1167 return (0); 1168 found = 0; 1169 /* 1170 * Look through the list of addresses for a match 1171 * with a broadcast address. 1172 */ 1173 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 1174 if (ifa->ifa_addr->sa_family == AF_INET && 1175 in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa)) { 1176 found = 1; 1177 break; 1178 } 1179 return (found); 1180 } 1181 1182 /* 1183 * On interface removal, clean up IPv4 data structures hung off of the ifnet. 1184 */ 1185 void 1186 in_ifdetach(struct ifnet *ifp) 1187 { 1188 IN_MULTI_LOCK(); 1189 in_pcbpurgeif0(&V_ripcbinfo, ifp); 1190 in_pcbpurgeif0(&V_udbinfo, ifp); 1191 in_pcbpurgeif0(&V_ulitecbinfo, ifp); 1192 in_purgemaddrs(ifp); 1193 IN_MULTI_UNLOCK(); 1194 1195 /* 1196 * Make sure all multicast deletions invoking if_ioctl() are 1197 * completed before returning. Else we risk accessing a freed 1198 * ifnet structure pointer. 1199 */ 1200 inm_release_wait(NULL); 1201 } 1202 1203 /* 1204 * Delete all IPv4 multicast address records, and associated link-layer 1205 * multicast address records, associated with ifp. 1206 * XXX It looks like domifdetach runs AFTER the link layer cleanup. 1207 * XXX This should not race with ifma_protospec being set during 1208 * a new allocation, if it does, we have bigger problems. 1209 */ 1210 static void 1211 in_purgemaddrs(struct ifnet *ifp) 1212 { 1213 struct in_multi_head purgeinms; 1214 struct in_multi *inm; 1215 struct ifmultiaddr *ifma, *next; 1216 1217 SLIST_INIT(&purgeinms); 1218 IN_MULTI_LIST_LOCK(); 1219 1220 /* 1221 * Extract list of in_multi associated with the detaching ifp 1222 * which the PF_INET layer is about to release. 1223 * We need to do this as IF_ADDR_LOCK() may be re-acquired 1224 * by code further down. 1225 */ 1226 IF_ADDR_WLOCK(ifp); 1227 restart: 1228 CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) { 1229 if (ifma->ifma_addr->sa_family != AF_INET || 1230 ifma->ifma_protospec == NULL) 1231 continue; 1232 inm = (struct in_multi *)ifma->ifma_protospec; 1233 inm_rele_locked(&purgeinms, inm); 1234 if (__predict_false(ifma_restart)) { 1235 ifma_restart = true; 1236 goto restart; 1237 } 1238 } 1239 IF_ADDR_WUNLOCK(ifp); 1240 1241 inm_release_list_deferred(&purgeinms); 1242 igmp_ifdetach(ifp); 1243 IN_MULTI_LIST_UNLOCK(); 1244 } 1245 1246 struct in_llentry { 1247 struct llentry base; 1248 }; 1249 1250 #define IN_LLTBL_DEFAULT_HSIZE 32 1251 #define IN_LLTBL_HASH(k, h) \ 1252 (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) 1253 1254 /* 1255 * Do actual deallocation of @lle. 1256 */ 1257 static void 1258 in_lltable_destroy_lle_unlocked(epoch_context_t ctx) 1259 { 1260 struct llentry *lle; 1261 1262 lle = __containerof(ctx, struct llentry, lle_epoch_ctx); 1263 LLE_LOCK_DESTROY(lle); 1264 LLE_REQ_DESTROY(lle); 1265 free(lle, M_LLTABLE); 1266 } 1267 1268 /* 1269 * Called by LLE_FREE_LOCKED when number of references 1270 * drops to zero. 1271 */ 1272 static void 1273 in_lltable_destroy_lle(struct llentry *lle) 1274 { 1275 1276 LLE_WUNLOCK(lle); 1277 NET_EPOCH_CALL(in_lltable_destroy_lle_unlocked, &lle->lle_epoch_ctx); 1278 } 1279 1280 static struct llentry * 1281 in_lltable_new(struct in_addr addr4, u_int flags) 1282 { 1283 struct in_llentry *lle; 1284 1285 lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); 1286 if (lle == NULL) /* NB: caller generates msg */ 1287 return NULL; 1288 1289 /* 1290 * For IPv4 this will trigger "arpresolve" to generate 1291 * an ARP request. 1292 */ 1293 lle->base.la_expire = time_uptime; /* mark expired */ 1294 lle->base.r_l3addr.addr4 = addr4; 1295 lle->base.lle_refcnt = 1; 1296 lle->base.lle_free = in_lltable_destroy_lle; 1297 LLE_LOCK_INIT(&lle->base); 1298 LLE_REQ_INIT(&lle->base); 1299 callout_init(&lle->base.lle_timer, 1); 1300 1301 return (&lle->base); 1302 } 1303 1304 #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ 1305 ((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 ) 1306 1307 static int 1308 in_lltable_match_prefix(const struct sockaddr *saddr, 1309 const struct sockaddr *smask, u_int flags, struct llentry *lle) 1310 { 1311 struct in_addr addr, mask, lle_addr; 1312 1313 addr = ((const struct sockaddr_in *)saddr)->sin_addr; 1314 mask = ((const struct sockaddr_in *)smask)->sin_addr; 1315 lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr); 1316 1317 if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) 1318 return (0); 1319 1320 if (lle->la_flags & LLE_IFADDR) { 1321 /* 1322 * Delete LLE_IFADDR records IFF address & flag matches. 1323 * Note that addr is the interface address within prefix 1324 * being matched. 1325 * Note also we should handle 'ifdown' cases without removing 1326 * ifaddr macs. 1327 */ 1328 if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0) 1329 return (1); 1330 return (0); 1331 } 1332 1333 /* flags & LLE_STATIC means deleting both dynamic and static entries */ 1334 if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) 1335 return (1); 1336 1337 return (0); 1338 } 1339 1340 static void 1341 in_lltable_free_entry(struct lltable *llt, struct llentry *lle) 1342 { 1343 size_t pkts_dropped; 1344 1345 LLE_WLOCK_ASSERT(lle); 1346 KASSERT(llt != NULL, ("lltable is NULL")); 1347 1348 /* Unlink entry from table if not already */ 1349 if ((lle->la_flags & LLE_LINKED) != 0) { 1350 IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp); 1351 lltable_unlink_entry(llt, lle); 1352 } 1353 1354 /* Drop hold queue */ 1355 pkts_dropped = llentry_free(lle); 1356 ARPSTAT_ADD(dropped, pkts_dropped); 1357 } 1358 1359 static int 1360 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) 1361 { 1362 struct nhop_object *nh; 1363 struct in_addr addr; 1364 1365 KASSERT(l3addr->sa_family == AF_INET, 1366 ("sin_family %d", l3addr->sa_family)); 1367 1368 addr = ((const struct sockaddr_in *)l3addr)->sin_addr; 1369 1370 nh = fib4_lookup(ifp->if_fib, addr, 0, NHR_NONE, 0); 1371 if (nh == NULL) 1372 return (EINVAL); 1373 1374 /* 1375 * If the gateway for an existing host route matches the target L3 1376 * address, which is a special route inserted by some implementation 1377 * such as MANET, and the interface is of the correct type, then 1378 * allow for ARP to proceed. 1379 */ 1380 if (nh->nh_flags & NHF_GATEWAY) { 1381 if (!(nh->nh_flags & NHF_HOST) || nh->nh_ifp->if_type != IFT_ETHER || 1382 (nh->nh_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || 1383 memcmp(nh->gw_sa.sa_data, l3addr->sa_data, 1384 sizeof(in_addr_t)) != 0) { 1385 return (EINVAL); 1386 } 1387 } 1388 1389 /* 1390 * Make sure that at least the destination address is covered 1391 * by the route. This is for handling the case where 2 or more 1392 * interfaces have the same prefix. An incoming packet arrives 1393 * on one interface and the corresponding outgoing packet leaves 1394 * another interface. 1395 */ 1396 if ((nh->nh_ifp != ifp) && (nh->nh_flags & NHF_HOST) == 0) { 1397 struct in_ifaddr *ia = (struct in_ifaddr *)ifaof_ifpforaddr(l3addr, ifp); 1398 struct in_addr dst_addr, mask_addr; 1399 1400 if (ia == NULL) 1401 return (EINVAL); 1402 1403 /* 1404 * ifaof_ifpforaddr() returns _best matching_ IFA. 1405 * It is possible that ifa prefix does not cover our address. 1406 * Explicitly verify and fail if that's the case. 1407 */ 1408 dst_addr = IA_SIN(ia)->sin_addr; 1409 mask_addr.s_addr = htonl(ia->ia_subnetmask); 1410 1411 if (!IN_ARE_MASKED_ADDR_EQUAL(dst_addr, addr, mask_addr)) 1412 return (EINVAL); 1413 } 1414 1415 return (0); 1416 } 1417 1418 static inline uint32_t 1419 in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize) 1420 { 1421 1422 return (IN_LLTBL_HASH(dst.s_addr, hsize)); 1423 } 1424 1425 static uint32_t 1426 in_lltable_hash(const struct llentry *lle, uint32_t hsize) 1427 { 1428 1429 return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize)); 1430 } 1431 1432 static void 1433 in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) 1434 { 1435 struct sockaddr_in *sin; 1436 1437 sin = (struct sockaddr_in *)sa; 1438 bzero(sin, sizeof(*sin)); 1439 sin->sin_family = AF_INET; 1440 sin->sin_len = sizeof(*sin); 1441 sin->sin_addr = lle->r_l3addr.addr4; 1442 } 1443 1444 static inline struct llentry * 1445 in_lltable_find_dst(struct lltable *llt, struct in_addr dst) 1446 { 1447 struct llentry *lle; 1448 struct llentries *lleh; 1449 u_int hashidx; 1450 1451 hashidx = in_lltable_hash_dst(dst, llt->llt_hsize); 1452 lleh = &llt->lle_head[hashidx]; 1453 CK_LIST_FOREACH(lle, lleh, lle_next) { 1454 if (lle->la_flags & LLE_DELETED) 1455 continue; 1456 if (lle->r_l3addr.addr4.s_addr == dst.s_addr) 1457 break; 1458 } 1459 1460 return (lle); 1461 } 1462 1463 static void 1464 in_lltable_delete_entry(struct lltable *llt, struct llentry *lle) 1465 { 1466 1467 lle->la_flags |= LLE_DELETED; 1468 EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); 1469 #ifdef DIAGNOSTIC 1470 log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); 1471 #endif 1472 llentry_free(lle); 1473 } 1474 1475 static struct llentry * 1476 in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) 1477 { 1478 const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; 1479 struct ifnet *ifp = llt->llt_ifp; 1480 struct llentry *lle; 1481 char linkhdr[LLE_MAX_LINKHDR]; 1482 size_t linkhdrsize; 1483 int lladdr_off; 1484 1485 KASSERT(l3addr->sa_family == AF_INET, 1486 ("sin_family %d", l3addr->sa_family)); 1487 1488 /* 1489 * A route that covers the given address must have 1490 * been installed 1st because we are doing a resolution, 1491 * verify this. 1492 */ 1493 if (!(flags & LLE_IFADDR) && 1494 in_lltable_rtcheck(ifp, flags, l3addr) != 0) 1495 return (NULL); 1496 1497 lle = in_lltable_new(sin->sin_addr, flags); 1498 if (lle == NULL) { 1499 log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); 1500 return (NULL); 1501 } 1502 lle->la_flags = flags; 1503 if (flags & LLE_STATIC) 1504 lle->r_flags |= RLLE_VALID; 1505 if ((flags & LLE_IFADDR) == LLE_IFADDR) { 1506 linkhdrsize = LLE_MAX_LINKHDR; 1507 if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp), 1508 linkhdr, &linkhdrsize, &lladdr_off) != 0) { 1509 NET_EPOCH_CALL(in_lltable_destroy_lle_unlocked, &lle->lle_epoch_ctx); 1510 return (NULL); 1511 } 1512 lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, 1513 lladdr_off); 1514 lle->la_flags |= LLE_STATIC; 1515 lle->r_flags |= (RLLE_VALID | RLLE_IFADDR); 1516 } 1517 1518 return (lle); 1519 } 1520 1521 /* 1522 * Return NULL if not found or marked for deletion. 1523 * If found return lle read locked. 1524 */ 1525 static struct llentry * 1526 in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) 1527 { 1528 const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; 1529 struct llentry *lle; 1530 1531 IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); 1532 KASSERT(l3addr->sa_family == AF_INET, 1533 ("sin_family %d", l3addr->sa_family)); 1534 KASSERT((flags & (LLE_UNLOCKED | LLE_EXCLUSIVE)) != 1535 (LLE_UNLOCKED | LLE_EXCLUSIVE), 1536 ("wrong lle request flags: %#x", flags)); 1537 1538 lle = in_lltable_find_dst(llt, sin->sin_addr); 1539 if (lle == NULL) 1540 return (NULL); 1541 if (flags & LLE_UNLOCKED) 1542 return (lle); 1543 1544 if (flags & LLE_EXCLUSIVE) 1545 LLE_WLOCK(lle); 1546 else 1547 LLE_RLOCK(lle); 1548 1549 /* 1550 * If the afdata lock is not held, the LLE may have been unlinked while 1551 * we were blocked on the LLE lock. Check for this case. 1552 */ 1553 if (__predict_false((lle->la_flags & LLE_LINKED) == 0)) { 1554 if (flags & LLE_EXCLUSIVE) 1555 LLE_WUNLOCK(lle); 1556 else 1557 LLE_RUNLOCK(lle); 1558 return (NULL); 1559 } 1560 return (lle); 1561 } 1562 1563 static int 1564 in_lltable_dump_entry(struct lltable *llt, struct llentry *lle, 1565 struct sysctl_req *wr) 1566 { 1567 struct ifnet *ifp = llt->llt_ifp; 1568 /* XXX stack use */ 1569 struct { 1570 struct rt_msghdr rtm; 1571 struct sockaddr_in sin; 1572 struct sockaddr_dl sdl; 1573 } arpc; 1574 struct sockaddr_dl *sdl; 1575 int error; 1576 1577 bzero(&arpc, sizeof(arpc)); 1578 /* skip deleted entries */ 1579 if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) 1580 return (0); 1581 /* Skip if jailed and not a valid IP of the prison. */ 1582 lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin); 1583 if (prison_if(wr->td->td_ucred, (struct sockaddr *)&arpc.sin) != 0) 1584 return (0); 1585 /* 1586 * produce a msg made of: 1587 * struct rt_msghdr; 1588 * struct sockaddr_in; (IPv4) 1589 * struct sockaddr_dl; 1590 */ 1591 arpc.rtm.rtm_msglen = sizeof(arpc); 1592 arpc.rtm.rtm_version = RTM_VERSION; 1593 arpc.rtm.rtm_type = RTM_GET; 1594 arpc.rtm.rtm_flags = RTF_UP; 1595 arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; 1596 1597 /* publish */ 1598 if (lle->la_flags & LLE_PUB) 1599 arpc.rtm.rtm_flags |= RTF_ANNOUNCE; 1600 1601 sdl = &arpc.sdl; 1602 sdl->sdl_family = AF_LINK; 1603 sdl->sdl_len = sizeof(*sdl); 1604 sdl->sdl_index = ifp->if_index; 1605 sdl->sdl_type = ifp->if_type; 1606 if ((lle->la_flags & LLE_VALID) == LLE_VALID) { 1607 sdl->sdl_alen = ifp->if_addrlen; 1608 bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); 1609 } else { 1610 sdl->sdl_alen = 0; 1611 bzero(LLADDR(sdl), ifp->if_addrlen); 1612 } 1613 1614 arpc.rtm.rtm_rmx.rmx_expire = 1615 lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; 1616 arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); 1617 if (lle->la_flags & LLE_STATIC) 1618 arpc.rtm.rtm_flags |= RTF_STATIC; 1619 if (lle->la_flags & LLE_IFADDR) 1620 arpc.rtm.rtm_flags |= RTF_PINNED; 1621 arpc.rtm.rtm_index = ifp->if_index; 1622 error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); 1623 1624 return (error); 1625 } 1626 1627 static struct lltable * 1628 in_lltattach(struct ifnet *ifp) 1629 { 1630 struct lltable *llt; 1631 1632 llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE); 1633 llt->llt_af = AF_INET; 1634 llt->llt_ifp = ifp; 1635 1636 llt->llt_lookup = in_lltable_lookup; 1637 llt->llt_alloc_entry = in_lltable_alloc; 1638 llt->llt_delete_entry = in_lltable_delete_entry; 1639 llt->llt_dump_entry = in_lltable_dump_entry; 1640 llt->llt_hash = in_lltable_hash; 1641 llt->llt_fill_sa_entry = in_lltable_fill_sa_entry; 1642 llt->llt_free_entry = in_lltable_free_entry; 1643 llt->llt_match_prefix = in_lltable_match_prefix; 1644 llt->llt_mark_used = llentry_mark_used; 1645 lltable_link(llt); 1646 1647 return (llt); 1648 } 1649 1650 void * 1651 in_domifattach(struct ifnet *ifp) 1652 { 1653 struct in_ifinfo *ii; 1654 1655 ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO); 1656 1657 ii->ii_llt = in_lltattach(ifp); 1658 ii->ii_igmp = igmp_domifattach(ifp); 1659 1660 return (ii); 1661 } 1662 1663 void 1664 in_domifdetach(struct ifnet *ifp, void *aux) 1665 { 1666 struct in_ifinfo *ii = (struct in_ifinfo *)aux; 1667 1668 igmp_domifdetach(ifp); 1669 lltable_free(ii->ii_llt); 1670 free(ii, M_IFADDR); 1671 } 1672