1 /*- 2 * Copyright (c) 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 30 * $FreeBSD$ 31 */ 32 #include "opt_compat.h" 33 #include "opt_sctp.h" 34 #include "opt_mpath.h" 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 38 #include <sys/param.h> 39 #include <sys/jail.h> 40 #include <sys/kernel.h> 41 #include <sys/domain.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/protosw.h> 48 #include <sys/rwlock.h> 49 #include <sys/signalvar.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/sysctl.h> 53 #include <sys/systm.h> 54 55 #include <net/if.h> 56 #include <net/if_dl.h> 57 #include <net/if_llatbl.h> 58 #include <net/if_types.h> 59 #include <net/netisr.h> 60 #include <net/raw_cb.h> 61 #include <net/route.h> 62 #include <net/vnet.h> 63 64 #include <netinet/in.h> 65 #include <netinet/if_ether.h> 66 #include <netinet/ip_carp.h> 67 #ifdef INET6 68 #include <netinet6/scope6_var.h> 69 #endif 70 71 #if defined(INET) || defined(INET6) 72 #ifdef SCTP 73 extern void sctp_addr_change(struct ifaddr *ifa, int cmd); 74 #endif /* SCTP */ 75 #endif 76 77 #ifdef COMPAT_FREEBSD32 78 #include <sys/mount.h> 79 #include <compat/freebsd32/freebsd32.h> 80 81 struct if_data32 { 82 uint8_t ifi_type; 83 uint8_t ifi_physical; 84 uint8_t ifi_addrlen; 85 uint8_t ifi_hdrlen; 86 uint8_t ifi_link_state; 87 uint8_t ifi_vhid; 88 uint8_t ifi_spare_char2; 89 uint8_t ifi_datalen; 90 uint32_t ifi_mtu; 91 uint32_t ifi_metric; 92 uint32_t ifi_baudrate; 93 uint32_t ifi_ipackets; 94 uint32_t ifi_ierrors; 95 uint32_t ifi_opackets; 96 uint32_t ifi_oerrors; 97 uint32_t ifi_collisions; 98 uint32_t ifi_ibytes; 99 uint32_t ifi_obytes; 100 uint32_t ifi_imcasts; 101 uint32_t ifi_omcasts; 102 uint32_t ifi_iqdrops; 103 uint32_t ifi_noproto; 104 uint32_t ifi_hwassist; 105 int32_t ifi_epoch; 106 struct timeval32 ifi_lastchange; 107 }; 108 109 struct if_msghdr32 { 110 uint16_t ifm_msglen; 111 uint8_t ifm_version; 112 uint8_t ifm_type; 113 int32_t ifm_addrs; 114 int32_t ifm_flags; 115 uint16_t ifm_index; 116 struct if_data32 ifm_data; 117 }; 118 #endif 119 120 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); 121 122 /* NB: these are not modified */ 123 static struct sockaddr route_src = { 2, PF_ROUTE, }; 124 static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; 125 126 /* These are external hooks for CARP. */ 127 int (*carp_get_vhid_p)(struct ifaddr *); 128 129 /* 130 * Used by rtsock/raw_input callback code to decide whether to filter the update 131 * notification to a socket bound to a particular FIB. 132 */ 133 #define RTS_FILTER_FIB M_PROTO8 134 #define RTS_ALLFIBS -1 135 136 static struct { 137 int ip_count; /* attached w/ AF_INET */ 138 int ip6_count; /* attached w/ AF_INET6 */ 139 int ipx_count; /* attached w/ AF_IPX */ 140 int any_count; /* total attached */ 141 } route_cb; 142 143 struct mtx rtsock_mtx; 144 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); 145 146 #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) 147 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) 148 #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) 149 150 static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); 151 152 struct walkarg { 153 int w_tmemsize; 154 int w_op, w_arg; 155 caddr_t w_tmem; 156 struct sysctl_req *w_req; 157 }; 158 159 static void rts_input(struct mbuf *m); 160 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo); 161 static int rt_msg2(int type, struct rt_addrinfo *rtinfo, 162 caddr_t cp, struct walkarg *w); 163 static int rt_xaddrs(caddr_t cp, caddr_t cplim, 164 struct rt_addrinfo *rtinfo); 165 static int sysctl_dumpentry(struct radix_node *rn, void *vw); 166 static int sysctl_iflist(int af, struct walkarg *w); 167 static int sysctl_ifmalist(int af, struct walkarg *w); 168 static int route_output(struct mbuf *m, struct socket *so); 169 static void rt_setmetrics(u_long which, const struct rt_metrics *in, 170 struct rt_metrics_lite *out); 171 static void rt_getmetrics(const struct rt_metrics_lite *in, 172 struct rt_metrics *out); 173 static void rt_dispatch(struct mbuf *, sa_family_t); 174 175 static struct netisr_handler rtsock_nh = { 176 .nh_name = "rtsock", 177 .nh_handler = rts_input, 178 .nh_proto = NETISR_ROUTE, 179 .nh_policy = NETISR_POLICY_SOURCE, 180 }; 181 182 static int 183 sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 184 { 185 int error, qlimit; 186 187 netisr_getqlimit(&rtsock_nh, &qlimit); 188 error = sysctl_handle_int(oidp, &qlimit, 0, req); 189 if (error || !req->newptr) 190 return (error); 191 if (qlimit < 1) 192 return (EINVAL); 193 return (netisr_setqlimit(&rtsock_nh, qlimit)); 194 } 195 SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 196 0, 0, sysctl_route_netisr_maxqlen, "I", 197 "maximum routing socket dispatch queue length"); 198 199 static void 200 rts_init(void) 201 { 202 int tmp; 203 204 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) 205 rtsock_nh.nh_qlimit = tmp; 206 netisr_register(&rtsock_nh); 207 } 208 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0); 209 210 static int 211 raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src, 212 struct rawcb *rp) 213 { 214 int fibnum; 215 216 KASSERT(m != NULL, ("%s: m is NULL", __func__)); 217 KASSERT(proto != NULL, ("%s: proto is NULL", __func__)); 218 KASSERT(rp != NULL, ("%s: rp is NULL", __func__)); 219 220 /* No filtering requested. */ 221 if ((m->m_flags & RTS_FILTER_FIB) == 0) 222 return (0); 223 224 /* Check if it is a rts and the fib matches the one of the socket. */ 225 fibnum = M_GETFIB(m); 226 if (proto->sp_family != PF_ROUTE || 227 rp->rcb_socket == NULL || 228 rp->rcb_socket->so_fibnum == fibnum) 229 return (0); 230 231 /* Filtering requested and no match, the socket shall be skipped. */ 232 return (1); 233 } 234 235 static void 236 rts_input(struct mbuf *m) 237 { 238 struct sockproto route_proto; 239 unsigned short *family; 240 struct m_tag *tag; 241 242 route_proto.sp_family = PF_ROUTE; 243 tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); 244 if (tag != NULL) { 245 family = (unsigned short *)(tag + 1); 246 route_proto.sp_protocol = *family; 247 m_tag_delete(m, tag); 248 } else 249 route_proto.sp_protocol = 0; 250 251 raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb); 252 } 253 254 /* 255 * It really doesn't make any sense at all for this code to share much 256 * with raw_usrreq.c, since its functionality is so restricted. XXX 257 */ 258 static void 259 rts_abort(struct socket *so) 260 { 261 262 raw_usrreqs.pru_abort(so); 263 } 264 265 static void 266 rts_close(struct socket *so) 267 { 268 269 raw_usrreqs.pru_close(so); 270 } 271 272 /* pru_accept is EOPNOTSUPP */ 273 274 static int 275 rts_attach(struct socket *so, int proto, struct thread *td) 276 { 277 struct rawcb *rp; 278 int s, error; 279 280 KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL")); 281 282 /* XXX */ 283 rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO); 284 if (rp == NULL) 285 return ENOBUFS; 286 287 /* 288 * The splnet() is necessary to block protocols from sending 289 * error notifications (like RTM_REDIRECT or RTM_LOSING) while 290 * this PCB is extant but incompletely initialized. 291 * Probably we should try to do more of this work beforehand and 292 * eliminate the spl. 293 */ 294 s = splnet(); 295 so->so_pcb = (caddr_t)rp; 296 so->so_fibnum = td->td_proc->p_fibnum; 297 error = raw_attach(so, proto); 298 rp = sotorawcb(so); 299 if (error) { 300 splx(s); 301 so->so_pcb = NULL; 302 free(rp, M_PCB); 303 return error; 304 } 305 RTSOCK_LOCK(); 306 switch(rp->rcb_proto.sp_protocol) { 307 case AF_INET: 308 route_cb.ip_count++; 309 break; 310 case AF_INET6: 311 route_cb.ip6_count++; 312 break; 313 case AF_IPX: 314 route_cb.ipx_count++; 315 break; 316 } 317 route_cb.any_count++; 318 RTSOCK_UNLOCK(); 319 soisconnected(so); 320 so->so_options |= SO_USELOOPBACK; 321 splx(s); 322 return 0; 323 } 324 325 static int 326 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 327 { 328 329 return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */ 330 } 331 332 static int 333 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 334 { 335 336 return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */ 337 } 338 339 /* pru_connect2 is EOPNOTSUPP */ 340 /* pru_control is EOPNOTSUPP */ 341 342 static void 343 rts_detach(struct socket *so) 344 { 345 struct rawcb *rp = sotorawcb(so); 346 347 KASSERT(rp != NULL, ("rts_detach: rp == NULL")); 348 349 RTSOCK_LOCK(); 350 switch(rp->rcb_proto.sp_protocol) { 351 case AF_INET: 352 route_cb.ip_count--; 353 break; 354 case AF_INET6: 355 route_cb.ip6_count--; 356 break; 357 case AF_IPX: 358 route_cb.ipx_count--; 359 break; 360 } 361 route_cb.any_count--; 362 RTSOCK_UNLOCK(); 363 raw_usrreqs.pru_detach(so); 364 } 365 366 static int 367 rts_disconnect(struct socket *so) 368 { 369 370 return (raw_usrreqs.pru_disconnect(so)); 371 } 372 373 /* pru_listen is EOPNOTSUPP */ 374 375 static int 376 rts_peeraddr(struct socket *so, struct sockaddr **nam) 377 { 378 379 return (raw_usrreqs.pru_peeraddr(so, nam)); 380 } 381 382 /* pru_rcvd is EOPNOTSUPP */ 383 /* pru_rcvoob is EOPNOTSUPP */ 384 385 static int 386 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 387 struct mbuf *control, struct thread *td) 388 { 389 390 return (raw_usrreqs.pru_send(so, flags, m, nam, control, td)); 391 } 392 393 /* pru_sense is null */ 394 395 static int 396 rts_shutdown(struct socket *so) 397 { 398 399 return (raw_usrreqs.pru_shutdown(so)); 400 } 401 402 static int 403 rts_sockaddr(struct socket *so, struct sockaddr **nam) 404 { 405 406 return (raw_usrreqs.pru_sockaddr(so, nam)); 407 } 408 409 static struct pr_usrreqs route_usrreqs = { 410 .pru_abort = rts_abort, 411 .pru_attach = rts_attach, 412 .pru_bind = rts_bind, 413 .pru_connect = rts_connect, 414 .pru_detach = rts_detach, 415 .pru_disconnect = rts_disconnect, 416 .pru_peeraddr = rts_peeraddr, 417 .pru_send = rts_send, 418 .pru_shutdown = rts_shutdown, 419 .pru_sockaddr = rts_sockaddr, 420 .pru_close = rts_close, 421 }; 422 423 #ifndef _SOCKADDR_UNION_DEFINED 424 #define _SOCKADDR_UNION_DEFINED 425 /* 426 * The union of all possible address formats we handle. 427 */ 428 union sockaddr_union { 429 struct sockaddr sa; 430 struct sockaddr_in sin; 431 struct sockaddr_in6 sin6; 432 }; 433 #endif /* _SOCKADDR_UNION_DEFINED */ 434 435 static int 436 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, 437 struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred) 438 { 439 440 /* First, see if the returned address is part of the jail. */ 441 if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) { 442 info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 443 return (0); 444 } 445 446 switch (info->rti_info[RTAX_DST]->sa_family) { 447 #ifdef INET 448 case AF_INET: 449 { 450 struct in_addr ia; 451 struct ifaddr *ifa; 452 int found; 453 454 found = 0; 455 /* 456 * Try to find an address on the given outgoing interface 457 * that belongs to the jail. 458 */ 459 IF_ADDR_LOCK(ifp); 460 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 461 struct sockaddr *sa; 462 sa = ifa->ifa_addr; 463 if (sa->sa_family != AF_INET) 464 continue; 465 ia = ((struct sockaddr_in *)sa)->sin_addr; 466 if (prison_check_ip4(cred, &ia) == 0) { 467 found = 1; 468 break; 469 } 470 } 471 IF_ADDR_UNLOCK(ifp); 472 if (!found) { 473 /* 474 * As a last resort return the 'default' jail address. 475 */ 476 ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)-> 477 sin_addr; 478 if (prison_get_ip4(cred, &ia) != 0) 479 return (ESRCH); 480 } 481 bzero(&saun->sin, sizeof(struct sockaddr_in)); 482 saun->sin.sin_len = sizeof(struct sockaddr_in); 483 saun->sin.sin_family = AF_INET; 484 saun->sin.sin_addr.s_addr = ia.s_addr; 485 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin; 486 break; 487 } 488 #endif 489 #ifdef INET6 490 case AF_INET6: 491 { 492 struct in6_addr ia6; 493 struct ifaddr *ifa; 494 int found; 495 496 found = 0; 497 /* 498 * Try to find an address on the given outgoing interface 499 * that belongs to the jail. 500 */ 501 IF_ADDR_LOCK(ifp); 502 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 503 struct sockaddr *sa; 504 sa = ifa->ifa_addr; 505 if (sa->sa_family != AF_INET6) 506 continue; 507 bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr, 508 &ia6, sizeof(struct in6_addr)); 509 if (prison_check_ip6(cred, &ia6) == 0) { 510 found = 1; 511 break; 512 } 513 } 514 IF_ADDR_UNLOCK(ifp); 515 if (!found) { 516 /* 517 * As a last resort return the 'default' jail address. 518 */ 519 ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)-> 520 sin6_addr; 521 if (prison_get_ip6(cred, &ia6) != 0) 522 return (ESRCH); 523 } 524 bzero(&saun->sin6, sizeof(struct sockaddr_in6)); 525 saun->sin6.sin6_len = sizeof(struct sockaddr_in6); 526 saun->sin6.sin6_family = AF_INET6; 527 bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr)); 528 if (sa6_recoverscope(&saun->sin6) != 0) 529 return (ESRCH); 530 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6; 531 break; 532 } 533 #endif 534 default: 535 return (ESRCH); 536 } 537 return (0); 538 } 539 540 /*ARGSUSED*/ 541 static int 542 route_output(struct mbuf *m, struct socket *so) 543 { 544 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 545 struct rt_msghdr *rtm = NULL; 546 struct rtentry *rt = NULL; 547 struct radix_node_head *rnh; 548 struct rt_addrinfo info; 549 int len, error = 0; 550 struct ifnet *ifp = NULL; 551 union sockaddr_union saun; 552 sa_family_t saf = AF_UNSPEC; 553 554 #define senderr(e) { error = e; goto flush;} 555 if (m == NULL || ((m->m_len < sizeof(long)) && 556 (m = m_pullup(m, sizeof(long))) == NULL)) 557 return (ENOBUFS); 558 if ((m->m_flags & M_PKTHDR) == 0) 559 panic("route_output"); 560 len = m->m_pkthdr.len; 561 if (len < sizeof(*rtm) || 562 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 563 info.rti_info[RTAX_DST] = NULL; 564 senderr(EINVAL); 565 } 566 R_Malloc(rtm, struct rt_msghdr *, len); 567 if (rtm == NULL) { 568 info.rti_info[RTAX_DST] = NULL; 569 senderr(ENOBUFS); 570 } 571 m_copydata(m, 0, len, (caddr_t)rtm); 572 if (rtm->rtm_version != RTM_VERSION) { 573 info.rti_info[RTAX_DST] = NULL; 574 senderr(EPROTONOSUPPORT); 575 } 576 rtm->rtm_pid = curproc->p_pid; 577 bzero(&info, sizeof(info)); 578 info.rti_addrs = rtm->rtm_addrs; 579 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { 580 info.rti_info[RTAX_DST] = NULL; 581 senderr(EINVAL); 582 } 583 info.rti_flags = rtm->rtm_flags; 584 if (info.rti_info[RTAX_DST] == NULL || 585 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 586 (info.rti_info[RTAX_GATEWAY] != NULL && 587 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) 588 senderr(EINVAL); 589 saf = info.rti_info[RTAX_DST]->sa_family; 590 /* 591 * Verify that the caller has the appropriate privilege; RTM_GET 592 * is the only operation the non-superuser is allowed. 593 */ 594 if (rtm->rtm_type != RTM_GET) { 595 error = priv_check(curthread, PRIV_NET_ROUTE); 596 if (error) 597 senderr(error); 598 } 599 600 /* 601 * The given gateway address may be an interface address. 602 * For example, issuing a "route change" command on a route 603 * entry that was created from a tunnel, and the gateway 604 * address given is the local end point. In this case the 605 * RTF_GATEWAY flag must be cleared or the destination will 606 * not be reachable even though there is no error message. 607 */ 608 if (info.rti_info[RTAX_GATEWAY] != NULL && 609 info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) { 610 struct route gw_ro; 611 612 bzero(&gw_ro, sizeof(gw_ro)); 613 gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY]; 614 rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum); 615 /* 616 * A host route through the loopback interface is 617 * installed for each interface adddress. In pre 8.0 618 * releases the interface address of a PPP link type 619 * is not reachable locally. This behavior is fixed as 620 * part of the new L2/L3 redesign and rewrite work. The 621 * signature of this interface address route is the 622 * AF_LINK sa_family type of the rt_gateway, and the 623 * rt_ifp has the IFF_LOOPBACK flag set. 624 */ 625 if (gw_ro.ro_rt != NULL && 626 gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK && 627 gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) 628 info.rti_flags &= ~RTF_GATEWAY; 629 if (gw_ro.ro_rt != NULL) 630 RTFREE(gw_ro.ro_rt); 631 } 632 633 switch (rtm->rtm_type) { 634 struct rtentry *saved_nrt; 635 636 case RTM_ADD: 637 if (info.rti_info[RTAX_GATEWAY] == NULL) 638 senderr(EINVAL); 639 saved_nrt = NULL; 640 641 /* support for new ARP code */ 642 if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 643 (rtm->rtm_flags & RTF_LLDATA) != 0) { 644 error = lla_rt_output(rtm, &info); 645 break; 646 } 647 error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt, 648 so->so_fibnum); 649 if (error == 0 && saved_nrt) { 650 RT_LOCK(saved_nrt); 651 rt_setmetrics(rtm->rtm_inits, 652 &rtm->rtm_rmx, &saved_nrt->rt_rmx); 653 rtm->rtm_index = saved_nrt->rt_ifp->if_index; 654 RT_REMREF(saved_nrt); 655 RT_UNLOCK(saved_nrt); 656 } 657 break; 658 659 case RTM_DELETE: 660 saved_nrt = NULL; 661 /* support for new ARP code */ 662 if (info.rti_info[RTAX_GATEWAY] && 663 (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) && 664 (rtm->rtm_flags & RTF_LLDATA) != 0) { 665 error = lla_rt_output(rtm, &info); 666 break; 667 } 668 error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, 669 so->so_fibnum); 670 if (error == 0) { 671 RT_LOCK(saved_nrt); 672 rt = saved_nrt; 673 goto report; 674 } 675 break; 676 677 case RTM_GET: 678 case RTM_CHANGE: 679 case RTM_LOCK: 680 rnh = rt_tables_get_rnh(so->so_fibnum, 681 info.rti_info[RTAX_DST]->sa_family); 682 if (rnh == NULL) 683 senderr(EAFNOSUPPORT); 684 RADIX_NODE_HEAD_RLOCK(rnh); 685 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST], 686 info.rti_info[RTAX_NETMASK], rnh); 687 if (rt == NULL) { /* XXX looks bogus */ 688 RADIX_NODE_HEAD_RUNLOCK(rnh); 689 senderr(ESRCH); 690 } 691 #ifdef RADIX_MPATH 692 /* 693 * for RTM_CHANGE/LOCK, if we got multipath routes, 694 * we require users to specify a matching RTAX_GATEWAY. 695 * 696 * for RTM_GET, gate is optional even with multipath. 697 * if gate == NULL the first match is returned. 698 * (no need to call rt_mpath_matchgate if gate == NULL) 699 */ 700 if (rn_mpath_capable(rnh) && 701 (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) { 702 rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]); 703 if (!rt) { 704 RADIX_NODE_HEAD_RUNLOCK(rnh); 705 senderr(ESRCH); 706 } 707 } 708 #endif 709 /* 710 * If performing proxied L2 entry insertion, and 711 * the actual PPP host entry is found, perform 712 * another search to retrieve the prefix route of 713 * the local end point of the PPP link. 714 */ 715 if (rtm->rtm_flags & RTF_ANNOUNCE) { 716 struct sockaddr laddr; 717 718 if (rt->rt_ifp != NULL && 719 rt->rt_ifp->if_type == IFT_PROPVIRTUAL) { 720 struct ifaddr *ifa; 721 722 ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1); 723 if (ifa != NULL) 724 rt_maskedcopy(ifa->ifa_addr, 725 &laddr, 726 ifa->ifa_netmask); 727 } else 728 rt_maskedcopy(rt->rt_ifa->ifa_addr, 729 &laddr, 730 rt->rt_ifa->ifa_netmask); 731 /* 732 * refactor rt and no lock operation necessary 733 */ 734 rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh); 735 if (rt == NULL) { 736 RADIX_NODE_HEAD_RUNLOCK(rnh); 737 senderr(ESRCH); 738 } 739 } 740 RT_LOCK(rt); 741 RT_ADDREF(rt); 742 RADIX_NODE_HEAD_RUNLOCK(rnh); 743 744 /* 745 * Fix for PR: 82974 746 * 747 * RTM_CHANGE/LOCK need a perfect match, rn_lookup() 748 * returns a perfect match in case a netmask is 749 * specified. For host routes only a longest prefix 750 * match is returned so it is necessary to compare the 751 * existence of the netmask. If both have a netmask 752 * rnh_lookup() did a perfect match and if none of them 753 * have a netmask both are host routes which is also a 754 * perfect match. 755 */ 756 757 if (rtm->rtm_type != RTM_GET && 758 (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) { 759 RT_UNLOCK(rt); 760 senderr(ESRCH); 761 } 762 763 switch(rtm->rtm_type) { 764 765 case RTM_GET: 766 report: 767 RT_LOCK_ASSERT(rt); 768 if ((rt->rt_flags & RTF_HOST) == 0 769 ? jailed_without_vnet(curthread->td_ucred) 770 : prison_if(curthread->td_ucred, 771 rt_key(rt)) != 0) { 772 RT_UNLOCK(rt); 773 senderr(ESRCH); 774 } 775 info.rti_info[RTAX_DST] = rt_key(rt); 776 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 777 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 778 info.rti_info[RTAX_GENMASK] = 0; 779 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { 780 ifp = rt->rt_ifp; 781 if (ifp) { 782 info.rti_info[RTAX_IFP] = 783 ifp->if_addr->ifa_addr; 784 error = rtm_get_jailed(&info, ifp, rt, 785 &saun, curthread->td_ucred); 786 if (error != 0) { 787 RT_UNLOCK(rt); 788 senderr(error); 789 } 790 if (ifp->if_flags & IFF_POINTOPOINT) 791 info.rti_info[RTAX_BRD] = 792 rt->rt_ifa->ifa_dstaddr; 793 rtm->rtm_index = ifp->if_index; 794 } else { 795 info.rti_info[RTAX_IFP] = NULL; 796 info.rti_info[RTAX_IFA] = NULL; 797 } 798 } else if ((ifp = rt->rt_ifp) != NULL) { 799 rtm->rtm_index = ifp->if_index; 800 } 801 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL); 802 if (len > rtm->rtm_msglen) { 803 struct rt_msghdr *new_rtm; 804 R_Malloc(new_rtm, struct rt_msghdr *, len); 805 if (new_rtm == NULL) { 806 RT_UNLOCK(rt); 807 senderr(ENOBUFS); 808 } 809 bcopy(rtm, new_rtm, rtm->rtm_msglen); 810 Free(rtm); rtm = new_rtm; 811 } 812 (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL); 813 rtm->rtm_flags = rt->rt_flags; 814 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 815 rtm->rtm_addrs = info.rti_addrs; 816 break; 817 818 case RTM_CHANGE: 819 /* 820 * New gateway could require new ifaddr, ifp; 821 * flags may also be different; ifp may be specified 822 * by ll sockaddr when protocol address is ambiguous 823 */ 824 if (((rt->rt_flags & RTF_GATEWAY) && 825 info.rti_info[RTAX_GATEWAY] != NULL) || 826 info.rti_info[RTAX_IFP] != NULL || 827 (info.rti_info[RTAX_IFA] != NULL && 828 !sa_equal(info.rti_info[RTAX_IFA], 829 rt->rt_ifa->ifa_addr))) { 830 RT_UNLOCK(rt); 831 RADIX_NODE_HEAD_LOCK(rnh); 832 error = rt_getifa_fib(&info, rt->rt_fibnum); 833 /* 834 * XXXRW: Really we should release this 835 * reference later, but this maintains 836 * historical behavior. 837 */ 838 if (info.rti_ifa != NULL) 839 ifa_free(info.rti_ifa); 840 RADIX_NODE_HEAD_UNLOCK(rnh); 841 if (error != 0) 842 senderr(error); 843 RT_LOCK(rt); 844 } 845 if (info.rti_ifa != NULL && 846 info.rti_ifa != rt->rt_ifa && 847 rt->rt_ifa != NULL && 848 rt->rt_ifa->ifa_rtrequest != NULL) { 849 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, 850 &info); 851 ifa_free(rt->rt_ifa); 852 } 853 if (info.rti_info[RTAX_GATEWAY] != NULL) { 854 RT_UNLOCK(rt); 855 RADIX_NODE_HEAD_LOCK(rnh); 856 RT_LOCK(rt); 857 858 error = rt_setgate(rt, rt_key(rt), 859 info.rti_info[RTAX_GATEWAY]); 860 RADIX_NODE_HEAD_UNLOCK(rnh); 861 if (error != 0) { 862 RT_UNLOCK(rt); 863 senderr(error); 864 } 865 rt->rt_flags |= (RTF_GATEWAY & info.rti_flags); 866 } 867 if (info.rti_ifa != NULL && 868 info.rti_ifa != rt->rt_ifa) { 869 ifa_ref(info.rti_ifa); 870 rt->rt_ifa = info.rti_ifa; 871 rt->rt_ifp = info.rti_ifp; 872 } 873 /* Allow some flags to be toggled on change. */ 874 rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) | 875 (rtm->rtm_flags & RTF_FMASK); 876 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 877 &rt->rt_rmx); 878 rtm->rtm_index = rt->rt_ifp->if_index; 879 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) 880 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); 881 /* FALLTHROUGH */ 882 case RTM_LOCK: 883 /* We don't support locks anymore */ 884 break; 885 } 886 RT_UNLOCK(rt); 887 break; 888 889 default: 890 senderr(EOPNOTSUPP); 891 } 892 893 flush: 894 if (rtm) { 895 if (error) 896 rtm->rtm_errno = error; 897 else 898 rtm->rtm_flags |= RTF_DONE; 899 } 900 if (rt) /* XXX can this be true? */ 901 RTFREE(rt); 902 { 903 struct rawcb *rp = NULL; 904 /* 905 * Check to see if we don't want our own messages. 906 */ 907 if ((so->so_options & SO_USELOOPBACK) == 0) { 908 if (route_cb.any_count <= 1) { 909 if (rtm) 910 Free(rtm); 911 m_freem(m); 912 return (error); 913 } 914 /* There is another listener, so construct message */ 915 rp = sotorawcb(so); 916 } 917 if (rtm) { 918 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); 919 if (m->m_pkthdr.len < rtm->rtm_msglen) { 920 m_freem(m); 921 m = NULL; 922 } else if (m->m_pkthdr.len > rtm->rtm_msglen) 923 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); 924 } 925 if (m) { 926 M_SETFIB(m, so->so_fibnum); 927 m->m_flags |= RTS_FILTER_FIB; 928 if (rp) { 929 /* 930 * XXX insure we don't get a copy by 931 * invalidating our protocol 932 */ 933 unsigned short family = rp->rcb_proto.sp_family; 934 rp->rcb_proto.sp_family = 0; 935 rt_dispatch(m, saf); 936 rp->rcb_proto.sp_family = family; 937 } else 938 rt_dispatch(m, saf); 939 } 940 /* info.rti_info[RTAX_DST] (used above) can point inside of rtm */ 941 if (rtm) 942 Free(rtm); 943 } 944 return (error); 945 #undef sa_equal 946 } 947 948 static void 949 rt_setmetrics(u_long which, const struct rt_metrics *in, 950 struct rt_metrics_lite *out) 951 { 952 #define metric(f, e) if (which & (f)) out->e = in->e; 953 /* 954 * Only these are stored in the routing entry since introduction 955 * of tcp hostcache. The rest is ignored. 956 */ 957 metric(RTV_MTU, rmx_mtu); 958 metric(RTV_WEIGHT, rmx_weight); 959 /* Userland -> kernel timebase conversion. */ 960 if (which & RTV_EXPIRE) 961 out->rmx_expire = in->rmx_expire ? 962 in->rmx_expire - time_second + time_uptime : 0; 963 #undef metric 964 } 965 966 static void 967 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out) 968 { 969 #define metric(e) out->e = in->e; 970 bzero(out, sizeof(*out)); 971 metric(rmx_mtu); 972 metric(rmx_weight); 973 /* Kernel -> userland timebase conversion. */ 974 out->rmx_expire = in->rmx_expire ? 975 in->rmx_expire - time_uptime + time_second : 0; 976 #undef metric 977 } 978 979 /* 980 * Extract the addresses of the passed sockaddrs. 981 * Do a little sanity checking so as to avoid bad memory references. 982 * This data is derived straight from userland. 983 */ 984 static int 985 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 986 { 987 struct sockaddr *sa; 988 int i; 989 990 for (i = 0; i < RTAX_MAX && cp < cplim; i++) { 991 if ((rtinfo->rti_addrs & (1 << i)) == 0) 992 continue; 993 sa = (struct sockaddr *)cp; 994 /* 995 * It won't fit. 996 */ 997 if (cp + sa->sa_len > cplim) 998 return (EINVAL); 999 /* 1000 * there are no more.. quit now 1001 * If there are more bits, they are in error. 1002 * I've seen this. route(1) can evidently generate these. 1003 * This causes kernel to core dump. 1004 * for compatibility, If we see this, point to a safe address. 1005 */ 1006 if (sa->sa_len == 0) { 1007 rtinfo->rti_info[i] = &sa_zero; 1008 return (0); /* should be EINVAL but for compat */ 1009 } 1010 /* accept it */ 1011 rtinfo->rti_info[i] = sa; 1012 cp += SA_SIZE(sa); 1013 } 1014 return (0); 1015 } 1016 1017 static struct mbuf * 1018 rt_msg1(int type, struct rt_addrinfo *rtinfo) 1019 { 1020 struct rt_msghdr *rtm; 1021 struct mbuf *m; 1022 int i; 1023 struct sockaddr *sa; 1024 int len, dlen; 1025 1026 switch (type) { 1027 1028 case RTM_DELADDR: 1029 case RTM_NEWADDR: 1030 len = sizeof(struct ifa_msghdr); 1031 break; 1032 1033 case RTM_DELMADDR: 1034 case RTM_NEWMADDR: 1035 len = sizeof(struct ifma_msghdr); 1036 break; 1037 1038 case RTM_IFINFO: 1039 len = sizeof(struct if_msghdr); 1040 break; 1041 1042 case RTM_IFANNOUNCE: 1043 case RTM_IEEE80211: 1044 len = sizeof(struct if_announcemsghdr); 1045 break; 1046 1047 default: 1048 len = sizeof(struct rt_msghdr); 1049 } 1050 if (len > MCLBYTES) 1051 panic("rt_msg1"); 1052 m = m_gethdr(M_DONTWAIT, MT_DATA); 1053 if (m && len > MHLEN) { 1054 MCLGET(m, M_DONTWAIT); 1055 if ((m->m_flags & M_EXT) == 0) { 1056 m_free(m); 1057 m = NULL; 1058 } 1059 } 1060 if (m == NULL) 1061 return (m); 1062 m->m_pkthdr.len = m->m_len = len; 1063 m->m_pkthdr.rcvif = NULL; 1064 rtm = mtod(m, struct rt_msghdr *); 1065 bzero((caddr_t)rtm, len); 1066 for (i = 0; i < RTAX_MAX; i++) { 1067 if ((sa = rtinfo->rti_info[i]) == NULL) 1068 continue; 1069 rtinfo->rti_addrs |= (1 << i); 1070 dlen = SA_SIZE(sa); 1071 m_copyback(m, len, dlen, (caddr_t)sa); 1072 len += dlen; 1073 } 1074 if (m->m_pkthdr.len != len) { 1075 m_freem(m); 1076 return (NULL); 1077 } 1078 rtm->rtm_msglen = len; 1079 rtm->rtm_version = RTM_VERSION; 1080 rtm->rtm_type = type; 1081 return (m); 1082 } 1083 1084 static int 1085 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) 1086 { 1087 int i; 1088 int len, dlen, second_time = 0; 1089 caddr_t cp0; 1090 1091 rtinfo->rti_addrs = 0; 1092 again: 1093 switch (type) { 1094 1095 case RTM_DELADDR: 1096 case RTM_NEWADDR: 1097 len = sizeof(struct ifa_msghdr); 1098 break; 1099 1100 case RTM_IFINFO: 1101 #ifdef COMPAT_FREEBSD32 1102 if (w != NULL && w->w_req->flags & SCTL_MASK32) { 1103 len = sizeof(struct if_msghdr32); 1104 break; 1105 } 1106 #endif 1107 len = sizeof(struct if_msghdr); 1108 break; 1109 1110 case RTM_NEWMADDR: 1111 len = sizeof(struct ifma_msghdr); 1112 break; 1113 1114 default: 1115 len = sizeof(struct rt_msghdr); 1116 } 1117 cp0 = cp; 1118 if (cp0) 1119 cp += len; 1120 for (i = 0; i < RTAX_MAX; i++) { 1121 struct sockaddr *sa; 1122 1123 if ((sa = rtinfo->rti_info[i]) == NULL) 1124 continue; 1125 rtinfo->rti_addrs |= (1 << i); 1126 dlen = SA_SIZE(sa); 1127 if (cp) { 1128 bcopy((caddr_t)sa, cp, (unsigned)dlen); 1129 cp += dlen; 1130 } 1131 len += dlen; 1132 } 1133 len = ALIGN(len); 1134 if (cp == NULL && w != NULL && !second_time) { 1135 struct walkarg *rw = w; 1136 1137 if (rw->w_req) { 1138 if (rw->w_tmemsize < len) { 1139 if (rw->w_tmem) 1140 free(rw->w_tmem, M_RTABLE); 1141 rw->w_tmem = (caddr_t) 1142 malloc(len, M_RTABLE, M_NOWAIT); 1143 if (rw->w_tmem) 1144 rw->w_tmemsize = len; 1145 } 1146 if (rw->w_tmem) { 1147 cp = rw->w_tmem; 1148 second_time = 1; 1149 goto again; 1150 } 1151 } 1152 } 1153 if (cp) { 1154 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1155 1156 rtm->rtm_version = RTM_VERSION; 1157 rtm->rtm_type = type; 1158 rtm->rtm_msglen = len; 1159 } 1160 return (len); 1161 } 1162 1163 /* 1164 * This routine is called to generate a message from the routing 1165 * socket indicating that a redirect has occured, a routing lookup 1166 * has failed, or that a protocol has detected timeouts to a particular 1167 * destination. 1168 */ 1169 void 1170 rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error, 1171 int fibnum) 1172 { 1173 struct rt_msghdr *rtm; 1174 struct mbuf *m; 1175 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1176 1177 if (route_cb.any_count == 0) 1178 return; 1179 m = rt_msg1(type, rtinfo); 1180 if (m == NULL) 1181 return; 1182 1183 if (fibnum != RTS_ALLFIBS) { 1184 KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out " 1185 "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs)); 1186 M_SETFIB(m, fibnum); 1187 m->m_flags |= RTS_FILTER_FIB; 1188 } 1189 1190 rtm = mtod(m, struct rt_msghdr *); 1191 rtm->rtm_flags = RTF_DONE | flags; 1192 rtm->rtm_errno = error; 1193 rtm->rtm_addrs = rtinfo->rti_addrs; 1194 rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); 1195 } 1196 1197 void 1198 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) 1199 { 1200 1201 rt_missmsg_fib(type, rtinfo, flags, error, RTS_ALLFIBS); 1202 } 1203 1204 /* 1205 * This routine is called to generate a message from the routing 1206 * socket indicating that the status of a network interface has changed. 1207 */ 1208 void 1209 rt_ifmsg(struct ifnet *ifp) 1210 { 1211 struct if_msghdr *ifm; 1212 struct mbuf *m; 1213 struct rt_addrinfo info; 1214 1215 if (route_cb.any_count == 0) 1216 return; 1217 bzero((caddr_t)&info, sizeof(info)); 1218 m = rt_msg1(RTM_IFINFO, &info); 1219 if (m == NULL) 1220 return; 1221 ifm = mtod(m, struct if_msghdr *); 1222 ifm->ifm_index = ifp->if_index; 1223 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; 1224 ifm->ifm_data = ifp->if_data; 1225 ifm->ifm_addrs = 0; 1226 rt_dispatch(m, AF_UNSPEC); 1227 } 1228 1229 /* 1230 * This is called to generate messages from the routing socket 1231 * indicating a network interface has had addresses associated with it. 1232 * if we ever reverse the logic and replace messages TO the routing 1233 * socket indicate a request to configure interfaces, then it will 1234 * be unnecessary as the routing socket will automatically generate 1235 * copies of it. 1236 */ 1237 void 1238 rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, 1239 int fibnum) 1240 { 1241 struct rt_addrinfo info; 1242 struct sockaddr *sa = NULL; 1243 int pass; 1244 struct mbuf *m = NULL; 1245 struct ifnet *ifp = ifa->ifa_ifp; 1246 1247 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 1248 ("unexpected cmd %u", cmd)); 1249 #if defined(INET) || defined(INET6) 1250 #ifdef SCTP 1251 /* 1252 * notify the SCTP stack 1253 * this will only get called when an address is added/deleted 1254 * XXX pass the ifaddr struct instead if ifa->ifa_addr... 1255 */ 1256 sctp_addr_change(ifa, cmd); 1257 #endif /* SCTP */ 1258 #endif 1259 if (route_cb.any_count == 0) 1260 return; 1261 for (pass = 1; pass < 3; pass++) { 1262 bzero((caddr_t)&info, sizeof(info)); 1263 if ((cmd == RTM_ADD && pass == 1) || 1264 (cmd == RTM_DELETE && pass == 2)) { 1265 struct ifa_msghdr *ifam; 1266 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; 1267 1268 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; 1269 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; 1270 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1271 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1272 if ((m = rt_msg1(ncmd, &info)) == NULL) 1273 continue; 1274 ifam = mtod(m, struct ifa_msghdr *); 1275 ifam->ifam_index = ifp->if_index; 1276 ifam->ifam_metric = ifa->ifa_metric; 1277 ifam->ifam_flags = ifa->ifa_flags; 1278 ifam->ifam_addrs = info.rti_addrs; 1279 } 1280 if ((cmd == RTM_ADD && pass == 2) || 1281 (cmd == RTM_DELETE && pass == 1)) { 1282 struct rt_msghdr *rtm; 1283 1284 if (rt == NULL) 1285 continue; 1286 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1287 info.rti_info[RTAX_DST] = sa = rt_key(rt); 1288 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1289 if ((m = rt_msg1(cmd, &info)) == NULL) 1290 continue; 1291 rtm = mtod(m, struct rt_msghdr *); 1292 rtm->rtm_index = ifp->if_index; 1293 rtm->rtm_flags |= rt->rt_flags; 1294 rtm->rtm_errno = error; 1295 rtm->rtm_addrs = info.rti_addrs; 1296 } 1297 if (fibnum != RTS_ALLFIBS) { 1298 KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: " 1299 "fibnum out of range 0 <= %d < %d", __func__, 1300 fibnum, rt_numfibs)); 1301 M_SETFIB(m, fibnum); 1302 m->m_flags |= RTS_FILTER_FIB; 1303 } 1304 rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); 1305 } 1306 } 1307 1308 void 1309 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) 1310 { 1311 1312 rt_newaddrmsg_fib(cmd, ifa, error, rt, RTS_ALLFIBS); 1313 } 1314 1315 /* 1316 * This is the analogue to the rt_newaddrmsg which performs the same 1317 * function but for multicast group memberhips. This is easier since 1318 * there is no route state to worry about. 1319 */ 1320 void 1321 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) 1322 { 1323 struct rt_addrinfo info; 1324 struct mbuf *m = NULL; 1325 struct ifnet *ifp = ifma->ifma_ifp; 1326 struct ifma_msghdr *ifmam; 1327 1328 if (route_cb.any_count == 0) 1329 return; 1330 1331 bzero((caddr_t)&info, sizeof(info)); 1332 info.rti_info[RTAX_IFA] = ifma->ifma_addr; 1333 info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL; 1334 /* 1335 * If a link-layer address is present, present it as a ``gateway'' 1336 * (similarly to how ARP entries, e.g., are presented). 1337 */ 1338 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; 1339 m = rt_msg1(cmd, &info); 1340 if (m == NULL) 1341 return; 1342 ifmam = mtod(m, struct ifma_msghdr *); 1343 KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n", 1344 __func__)); 1345 ifmam->ifmam_index = ifp->if_index; 1346 ifmam->ifmam_addrs = info.rti_addrs; 1347 rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC); 1348 } 1349 1350 static struct mbuf * 1351 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, 1352 struct rt_addrinfo *info) 1353 { 1354 struct if_announcemsghdr *ifan; 1355 struct mbuf *m; 1356 1357 if (route_cb.any_count == 0) 1358 return NULL; 1359 bzero((caddr_t)info, sizeof(*info)); 1360 m = rt_msg1(type, info); 1361 if (m != NULL) { 1362 ifan = mtod(m, struct if_announcemsghdr *); 1363 ifan->ifan_index = ifp->if_index; 1364 strlcpy(ifan->ifan_name, ifp->if_xname, 1365 sizeof(ifan->ifan_name)); 1366 ifan->ifan_what = what; 1367 } 1368 return m; 1369 } 1370 1371 /* 1372 * This is called to generate routing socket messages indicating 1373 * IEEE80211 wireless events. 1374 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. 1375 */ 1376 void 1377 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) 1378 { 1379 struct mbuf *m; 1380 struct rt_addrinfo info; 1381 1382 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); 1383 if (m != NULL) { 1384 /* 1385 * Append the ieee80211 data. Try to stick it in the 1386 * mbuf containing the ifannounce msg; otherwise allocate 1387 * a new mbuf and append. 1388 * 1389 * NB: we assume m is a single mbuf. 1390 */ 1391 if (data_len > M_TRAILINGSPACE(m)) { 1392 struct mbuf *n = m_get(M_NOWAIT, MT_DATA); 1393 if (n == NULL) { 1394 m_freem(m); 1395 return; 1396 } 1397 bcopy(data, mtod(n, void *), data_len); 1398 n->m_len = data_len; 1399 m->m_next = n; 1400 } else if (data_len > 0) { 1401 bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); 1402 m->m_len += data_len; 1403 } 1404 if (m->m_flags & M_PKTHDR) 1405 m->m_pkthdr.len += data_len; 1406 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; 1407 rt_dispatch(m, AF_UNSPEC); 1408 } 1409 } 1410 1411 /* 1412 * This is called to generate routing socket messages indicating 1413 * network interface arrival and departure. 1414 */ 1415 void 1416 rt_ifannouncemsg(struct ifnet *ifp, int what) 1417 { 1418 struct mbuf *m; 1419 struct rt_addrinfo info; 1420 1421 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); 1422 if (m != NULL) 1423 rt_dispatch(m, AF_UNSPEC); 1424 } 1425 1426 static void 1427 rt_dispatch(struct mbuf *m, sa_family_t saf) 1428 { 1429 struct m_tag *tag; 1430 1431 /* 1432 * Preserve the family from the sockaddr, if any, in an m_tag for 1433 * use when injecting the mbuf into the routing socket buffer from 1434 * the netisr. 1435 */ 1436 if (saf != AF_UNSPEC) { 1437 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), 1438 M_NOWAIT); 1439 if (tag == NULL) { 1440 m_freem(m); 1441 return; 1442 } 1443 *(unsigned short *)(tag + 1) = saf; 1444 m_tag_prepend(m, tag); 1445 } 1446 #ifdef VIMAGE 1447 if (V_loif) 1448 m->m_pkthdr.rcvif = V_loif; 1449 else { 1450 m_freem(m); 1451 return; 1452 } 1453 #endif 1454 netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ 1455 } 1456 1457 /* 1458 * This is used in dumping the kernel table via sysctl(). 1459 */ 1460 static int 1461 sysctl_dumpentry(struct radix_node *rn, void *vw) 1462 { 1463 struct walkarg *w = vw; 1464 struct rtentry *rt = (struct rtentry *)rn; 1465 int error = 0, size; 1466 struct rt_addrinfo info; 1467 1468 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1469 return 0; 1470 if ((rt->rt_flags & RTF_HOST) == 0 1471 ? jailed_without_vnet(w->w_req->td->td_ucred) 1472 : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0) 1473 return (0); 1474 bzero((caddr_t)&info, sizeof(info)); 1475 info.rti_info[RTAX_DST] = rt_key(rt); 1476 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1477 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1478 info.rti_info[RTAX_GENMASK] = 0; 1479 if (rt->rt_ifp) { 1480 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; 1481 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1482 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) 1483 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1484 } 1485 size = rt_msg2(RTM_GET, &info, NULL, w); 1486 if (w->w_req && w->w_tmem) { 1487 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1488 1489 rtm->rtm_flags = rt->rt_flags; 1490 /* 1491 * let's be honest about this being a retarded hack 1492 */ 1493 rtm->rtm_fmask = rt->rt_rmx.rmx_pksent; 1494 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1495 rtm->rtm_index = rt->rt_ifp->if_index; 1496 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; 1497 rtm->rtm_addrs = info.rti_addrs; 1498 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); 1499 return (error); 1500 } 1501 return (error); 1502 } 1503 1504 #ifdef COMPAT_FREEBSD32 1505 static void 1506 copy_ifdata32(struct if_data *src, struct if_data32 *dst) 1507 { 1508 1509 bzero(dst, sizeof(*dst)); 1510 CP(*src, *dst, ifi_type); 1511 CP(*src, *dst, ifi_physical); 1512 CP(*src, *dst, ifi_addrlen); 1513 CP(*src, *dst, ifi_hdrlen); 1514 CP(*src, *dst, ifi_link_state); 1515 CP(*src, *dst, ifi_vhid); 1516 dst->ifi_datalen = sizeof(struct if_data32); 1517 CP(*src, *dst, ifi_mtu); 1518 CP(*src, *dst, ifi_metric); 1519 CP(*src, *dst, ifi_baudrate); 1520 CP(*src, *dst, ifi_ipackets); 1521 CP(*src, *dst, ifi_ierrors); 1522 CP(*src, *dst, ifi_opackets); 1523 CP(*src, *dst, ifi_oerrors); 1524 CP(*src, *dst, ifi_collisions); 1525 CP(*src, *dst, ifi_ibytes); 1526 CP(*src, *dst, ifi_obytes); 1527 CP(*src, *dst, ifi_imcasts); 1528 CP(*src, *dst, ifi_omcasts); 1529 CP(*src, *dst, ifi_iqdrops); 1530 CP(*src, *dst, ifi_noproto); 1531 CP(*src, *dst, ifi_hwassist); 1532 CP(*src, *dst, ifi_epoch); 1533 TV_CP(*src, *dst, ifi_lastchange); 1534 } 1535 #endif 1536 1537 static int 1538 sysctl_iflist(int af, struct walkarg *w) 1539 { 1540 struct ifnet *ifp; 1541 struct ifaddr *ifa; 1542 struct rt_addrinfo info; 1543 int len, error = 0; 1544 1545 bzero((caddr_t)&info, sizeof(info)); 1546 IFNET_RLOCK(); 1547 TAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1548 if (w->w_arg && w->w_arg != ifp->if_index) 1549 continue; 1550 IF_ADDR_LOCK(ifp); 1551 ifa = ifp->if_addr; 1552 info.rti_info[RTAX_IFP] = ifa->ifa_addr; 1553 len = rt_msg2(RTM_IFINFO, &info, NULL, w); 1554 info.rti_info[RTAX_IFP] = NULL; 1555 if (w->w_req && w->w_tmem) { 1556 struct if_msghdr *ifm; 1557 1558 #ifdef COMPAT_FREEBSD32 1559 if (w->w_req->flags & SCTL_MASK32) { 1560 struct if_msghdr32 *ifm32; 1561 1562 ifm32 = (struct if_msghdr32 *)w->w_tmem; 1563 ifm32->ifm_index = ifp->if_index; 1564 ifm32->ifm_flags = ifp->if_flags | 1565 ifp->if_drv_flags; 1566 copy_ifdata32(&ifp->if_data, &ifm32->ifm_data); 1567 if (carp_get_vhid_p != NULL) 1568 ifm32->ifm_data.ifi_vhid = 1569 (*carp_get_vhid_p)(ifa); 1570 ifm32->ifm_addrs = info.rti_addrs; 1571 error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32, 1572 len); 1573 goto sysctl_out; 1574 } 1575 #endif 1576 ifm = (struct if_msghdr *)w->w_tmem; 1577 ifm->ifm_index = ifp->if_index; 1578 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; 1579 ifm->ifm_data = ifp->if_data; 1580 if (carp_get_vhid_p != NULL) 1581 ifm->ifm_data.ifi_vhid = 1582 (*carp_get_vhid_p)(ifa); 1583 ifm->ifm_addrs = info.rti_addrs; 1584 error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len); 1585 #ifdef COMPAT_FREEBSD32 1586 sysctl_out: 1587 #endif 1588 if (error) 1589 goto done; 1590 } 1591 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { 1592 if (af && af != ifa->ifa_addr->sa_family) 1593 continue; 1594 if (prison_if(w->w_req->td->td_ucred, 1595 ifa->ifa_addr) != 0) 1596 continue; 1597 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1598 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1599 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1600 len = rt_msg2(RTM_NEWADDR, &info, NULL, w); 1601 if (w->w_req && w->w_tmem) { 1602 struct ifa_msghdr *ifam; 1603 1604 ifam = (struct ifa_msghdr *)w->w_tmem; 1605 ifam->ifam_index = ifa->ifa_ifp->if_index; 1606 ifam->ifam_flags = ifa->ifa_flags; 1607 ifam->ifam_metric = ifa->ifa_metric; 1608 ifam->ifam_addrs = info.rti_addrs; 1609 if (carp_get_vhid_p != NULL) 1610 ifam->ifam_data.ifi_vhid = 1611 (*carp_get_vhid_p)(ifa); 1612 error = SYSCTL_OUT(w->w_req, w->w_tmem, len); 1613 if (error) 1614 goto done; 1615 } 1616 } 1617 IF_ADDR_UNLOCK(ifp); 1618 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1619 info.rti_info[RTAX_BRD] = NULL; 1620 } 1621 done: 1622 if (ifp != NULL) 1623 IF_ADDR_UNLOCK(ifp); 1624 IFNET_RUNLOCK(); 1625 return (error); 1626 } 1627 1628 static int 1629 sysctl_ifmalist(int af, struct walkarg *w) 1630 { 1631 struct ifnet *ifp; 1632 struct ifmultiaddr *ifma; 1633 struct rt_addrinfo info; 1634 int len, error = 0; 1635 struct ifaddr *ifa; 1636 1637 bzero((caddr_t)&info, sizeof(info)); 1638 IFNET_RLOCK(); 1639 TAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1640 if (w->w_arg && w->w_arg != ifp->if_index) 1641 continue; 1642 ifa = ifp->if_addr; 1643 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; 1644 IF_ADDR_LOCK(ifp); 1645 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1646 if (af && af != ifma->ifma_addr->sa_family) 1647 continue; 1648 if (prison_if(w->w_req->td->td_ucred, 1649 ifma->ifma_addr) != 0) 1650 continue; 1651 info.rti_info[RTAX_IFA] = ifma->ifma_addr; 1652 info.rti_info[RTAX_GATEWAY] = 1653 (ifma->ifma_addr->sa_family != AF_LINK) ? 1654 ifma->ifma_lladdr : NULL; 1655 len = rt_msg2(RTM_NEWMADDR, &info, NULL, w); 1656 if (w->w_req && w->w_tmem) { 1657 struct ifma_msghdr *ifmam; 1658 1659 ifmam = (struct ifma_msghdr *)w->w_tmem; 1660 ifmam->ifmam_index = ifma->ifma_ifp->if_index; 1661 ifmam->ifmam_flags = 0; 1662 ifmam->ifmam_addrs = info.rti_addrs; 1663 error = SYSCTL_OUT(w->w_req, w->w_tmem, len); 1664 if (error) { 1665 IF_ADDR_UNLOCK(ifp); 1666 goto done; 1667 } 1668 } 1669 } 1670 IF_ADDR_UNLOCK(ifp); 1671 } 1672 done: 1673 IFNET_RUNLOCK(); 1674 return (error); 1675 } 1676 1677 static int 1678 sysctl_rtsock(SYSCTL_HANDLER_ARGS) 1679 { 1680 int *name = (int *)arg1; 1681 u_int namelen = arg2; 1682 struct radix_node_head *rnh = NULL; /* silence compiler. */ 1683 int i, lim, error = EINVAL; 1684 u_char af; 1685 struct walkarg w; 1686 1687 name ++; 1688 namelen--; 1689 if (req->newptr) 1690 return (EPERM); 1691 if (namelen != 3) 1692 return ((namelen < 3) ? EISDIR : ENOTDIR); 1693 af = name[0]; 1694 if (af > AF_MAX) 1695 return (EINVAL); 1696 bzero(&w, sizeof(w)); 1697 w.w_op = name[1]; 1698 w.w_arg = name[2]; 1699 w.w_req = req; 1700 1701 error = sysctl_wire_old_buffer(req, 0); 1702 if (error) 1703 return (error); 1704 switch (w.w_op) { 1705 1706 case NET_RT_DUMP: 1707 case NET_RT_FLAGS: 1708 if (af == 0) { /* dump all tables */ 1709 i = 1; 1710 lim = AF_MAX; 1711 } else /* dump only one table */ 1712 i = lim = af; 1713 1714 /* 1715 * take care of llinfo entries, the caller must 1716 * specify an AF 1717 */ 1718 if (w.w_op == NET_RT_FLAGS && 1719 (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) { 1720 if (af != 0) 1721 error = lltable_sysctl_dumparp(af, w.w_req); 1722 else 1723 error = EINVAL; 1724 break; 1725 } 1726 /* 1727 * take care of routing entries 1728 */ 1729 for (error = 0; error == 0 && i <= lim; i++) { 1730 rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i); 1731 if (rnh != NULL) { 1732 RADIX_NODE_HEAD_LOCK(rnh); 1733 error = rnh->rnh_walktree(rnh, 1734 sysctl_dumpentry, &w); 1735 RADIX_NODE_HEAD_UNLOCK(rnh); 1736 } else if (af != 0) 1737 error = EAFNOSUPPORT; 1738 } 1739 break; 1740 1741 case NET_RT_IFLIST: 1742 error = sysctl_iflist(af, &w); 1743 break; 1744 1745 case NET_RT_IFMALIST: 1746 error = sysctl_ifmalist(af, &w); 1747 break; 1748 } 1749 if (w.w_tmem) 1750 free(w.w_tmem, M_RTABLE); 1751 return (error); 1752 } 1753 1754 static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); 1755 1756 /* 1757 * Definitions of protocols supported in the ROUTE domain. 1758 */ 1759 1760 static struct domain routedomain; /* or at least forward */ 1761 1762 static struct protosw routesw[] = { 1763 { 1764 .pr_type = SOCK_RAW, 1765 .pr_domain = &routedomain, 1766 .pr_flags = PR_ATOMIC|PR_ADDR, 1767 .pr_output = route_output, 1768 .pr_ctlinput = raw_ctlinput, 1769 .pr_init = raw_init, 1770 .pr_usrreqs = &route_usrreqs 1771 } 1772 }; 1773 1774 static struct domain routedomain = { 1775 .dom_family = PF_ROUTE, 1776 .dom_name = "route", 1777 .dom_protosw = routesw, 1778 .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])] 1779 }; 1780 1781 VNET_DOMAIN_SET(route); 1782