1 /*- 2 * Copyright (c) 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 30 * $FreeBSD$ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/domain.h> 35 #include <sys/kernel.h> 36 #include <sys/jail.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/proc.h> 40 #include <sys/protosw.h> 41 #include <sys/signalvar.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/sysctl.h> 45 #include <sys/systm.h> 46 47 #include <net/if.h> 48 #include <net/netisr.h> 49 #include <net/raw_cb.h> 50 #include <net/route.h> 51 52 #include <netinet/in.h> 53 54 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); 55 56 /* NB: these are not modified */ 57 static struct sockaddr route_dst = { 2, PF_ROUTE, }; 58 static struct sockaddr route_src = { 2, PF_ROUTE, }; 59 static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; 60 61 static struct { 62 int ip_count; /* attached w/ AF_INET */ 63 int ip6_count; /* attached w/ AF_INET6 */ 64 int ipx_count; /* attached w/ AF_IPX */ 65 int any_count; /* total attached */ 66 } route_cb; 67 68 struct mtx rtsock_mtx; 69 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); 70 71 #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) 72 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) 73 #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) 74 75 static struct ifqueue rtsintrq; 76 77 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); 78 SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW, 79 &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length"); 80 81 struct walkarg { 82 int w_tmemsize; 83 int w_op, w_arg; 84 caddr_t w_tmem; 85 struct sysctl_req *w_req; 86 }; 87 88 static void rts_input(struct mbuf *m); 89 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo); 90 static int rt_msg2(int type, struct rt_addrinfo *rtinfo, 91 caddr_t cp, struct walkarg *w); 92 static int rt_xaddrs(caddr_t cp, caddr_t cplim, 93 struct rt_addrinfo *rtinfo); 94 static int sysctl_dumpentry(struct radix_node *rn, void *vw); 95 static int sysctl_iflist(int af, struct walkarg *w); 96 static int sysctl_ifmalist(int af, struct walkarg *w); 97 static int route_output(struct mbuf *m, struct socket *so); 98 static void rt_setmetrics(u_long which, const struct rt_metrics *in, 99 struct rt_metrics_lite *out); 100 static void rt_getmetrics(const struct rt_metrics_lite *in, 101 struct rt_metrics *out); 102 static void rt_dispatch(struct mbuf *, const struct sockaddr *); 103 104 static void 105 rts_init(void) 106 { 107 int tmp; 108 109 rtsintrq.ifq_maxlen = 256; 110 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) 111 rtsintrq.ifq_maxlen = tmp; 112 mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF); 113 netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE); 114 } 115 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0) 116 117 static void 118 rts_input(struct mbuf *m) 119 { 120 struct sockproto route_proto; 121 unsigned short *family; 122 struct m_tag *tag; 123 124 route_proto.sp_family = PF_ROUTE; 125 tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); 126 if (tag != NULL) { 127 family = (unsigned short *)(tag + 1); 128 route_proto.sp_protocol = *family; 129 m_tag_delete(m, tag); 130 } else 131 route_proto.sp_protocol = 0; 132 133 raw_input(m, &route_proto, &route_src, &route_dst); 134 } 135 136 /* 137 * It really doesn't make any sense at all for this code to share much 138 * with raw_usrreq.c, since its functionality is so restricted. XXX 139 */ 140 static void 141 rts_abort(struct socket *so) 142 { 143 144 raw_usrreqs.pru_abort(so); 145 } 146 147 /* pru_accept is EOPNOTSUPP */ 148 149 static int 150 rts_attach(struct socket *so, int proto, struct thread *td) 151 { 152 struct rawcb *rp; 153 int s, error; 154 155 KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL")); 156 157 /* XXX */ 158 MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO); 159 if (rp == NULL) 160 return ENOBUFS; 161 162 /* 163 * The splnet() is necessary to block protocols from sending 164 * error notifications (like RTM_REDIRECT or RTM_LOSING) while 165 * this PCB is extant but incompletely initialized. 166 * Probably we should try to do more of this work beforehand and 167 * eliminate the spl. 168 */ 169 s = splnet(); 170 so->so_pcb = (caddr_t)rp; 171 error = raw_attach(so, proto); 172 rp = sotorawcb(so); 173 if (error) { 174 splx(s); 175 so->so_pcb = NULL; 176 free(rp, M_PCB); 177 return error; 178 } 179 RTSOCK_LOCK(); 180 switch(rp->rcb_proto.sp_protocol) { 181 case AF_INET: 182 route_cb.ip_count++; 183 break; 184 case AF_INET6: 185 route_cb.ip6_count++; 186 break; 187 case AF_IPX: 188 route_cb.ipx_count++; 189 break; 190 } 191 rp->rcb_faddr = &route_src; 192 route_cb.any_count++; 193 RTSOCK_UNLOCK(); 194 soisconnected(so); 195 so->so_options |= SO_USELOOPBACK; 196 splx(s); 197 return 0; 198 } 199 200 static int 201 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 202 { 203 204 return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */ 205 } 206 207 static int 208 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 209 { 210 211 return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */ 212 } 213 214 /* pru_connect2 is EOPNOTSUPP */ 215 /* pru_control is EOPNOTSUPP */ 216 217 static void 218 rts_detach(struct socket *so) 219 { 220 struct rawcb *rp = sotorawcb(so); 221 222 KASSERT(rp != NULL, ("rts_detach: rp == NULL")); 223 224 RTSOCK_LOCK(); 225 switch(rp->rcb_proto.sp_protocol) { 226 case AF_INET: 227 route_cb.ip_count--; 228 break; 229 case AF_INET6: 230 route_cb.ip6_count--; 231 break; 232 case AF_IPX: 233 route_cb.ipx_count--; 234 break; 235 } 236 route_cb.any_count--; 237 RTSOCK_UNLOCK(); 238 raw_usrreqs.pru_detach(so); 239 } 240 241 static int 242 rts_disconnect(struct socket *so) 243 { 244 245 return (raw_usrreqs.pru_disconnect(so)); 246 } 247 248 /* pru_listen is EOPNOTSUPP */ 249 250 static int 251 rts_peeraddr(struct socket *so, struct sockaddr **nam) 252 { 253 254 return (raw_usrreqs.pru_peeraddr(so, nam)); 255 } 256 257 /* pru_rcvd is EOPNOTSUPP */ 258 /* pru_rcvoob is EOPNOTSUPP */ 259 260 static int 261 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 262 struct mbuf *control, struct thread *td) 263 { 264 265 return (raw_usrreqs.pru_send(so, flags, m, nam, control, td)); 266 } 267 268 /* pru_sense is null */ 269 270 static int 271 rts_shutdown(struct socket *so) 272 { 273 274 return (raw_usrreqs.pru_shutdown(so)); 275 } 276 277 static int 278 rts_sockaddr(struct socket *so, struct sockaddr **nam) 279 { 280 281 return (raw_usrreqs.pru_sockaddr(so, nam)); 282 } 283 284 static struct pr_usrreqs route_usrreqs = { 285 .pru_abort = rts_abort, 286 .pru_attach = rts_attach, 287 .pru_bind = rts_bind, 288 .pru_connect = rts_connect, 289 .pru_detach = rts_detach, 290 .pru_disconnect = rts_disconnect, 291 .pru_peeraddr = rts_peeraddr, 292 .pru_send = rts_send, 293 .pru_shutdown = rts_shutdown, 294 .pru_sockaddr = rts_sockaddr, 295 }; 296 297 /*ARGSUSED*/ 298 static int 299 route_output(struct mbuf *m, struct socket *so) 300 { 301 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 302 struct rt_msghdr *rtm = NULL; 303 struct rtentry *rt = NULL; 304 struct radix_node_head *rnh; 305 struct rt_addrinfo info; 306 int len, error = 0; 307 struct ifnet *ifp = NULL; 308 struct ifaddr *ifa = NULL; 309 struct sockaddr_in jail; 310 311 #define senderr(e) { error = e; goto flush;} 312 if (m == NULL || ((m->m_len < sizeof(long)) && 313 (m = m_pullup(m, sizeof(long))) == NULL)) 314 return (ENOBUFS); 315 if ((m->m_flags & M_PKTHDR) == 0) 316 panic("route_output"); 317 len = m->m_pkthdr.len; 318 if (len < sizeof(*rtm) || 319 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 320 info.rti_info[RTAX_DST] = NULL; 321 senderr(EINVAL); 322 } 323 R_Malloc(rtm, struct rt_msghdr *, len); 324 if (rtm == NULL) { 325 info.rti_info[RTAX_DST] = NULL; 326 senderr(ENOBUFS); 327 } 328 m_copydata(m, 0, len, (caddr_t)rtm); 329 if (rtm->rtm_version != RTM_VERSION) { 330 info.rti_info[RTAX_DST] = NULL; 331 senderr(EPROTONOSUPPORT); 332 } 333 rtm->rtm_pid = curproc->p_pid; 334 bzero(&info, sizeof(info)); 335 info.rti_addrs = rtm->rtm_addrs; 336 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { 337 info.rti_info[RTAX_DST] = NULL; 338 senderr(EINVAL); 339 } 340 info.rti_flags = rtm->rtm_flags; 341 if (info.rti_info[RTAX_DST] == NULL || 342 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 343 (info.rti_info[RTAX_GATEWAY] != NULL && 344 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) 345 senderr(EINVAL); 346 if (info.rti_info[RTAX_GENMASK]) { 347 struct radix_node *t; 348 t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1); 349 if (t != NULL && 350 bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1, 351 (char *)(void *)t->rn_key + 1, 352 ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0) 353 info.rti_info[RTAX_GENMASK] = 354 (struct sockaddr *)t->rn_key; 355 else 356 senderr(ENOBUFS); 357 } 358 359 /* 360 * Verify that the caller has the appropriate privilege; RTM_GET 361 * is the only operation the non-superuser is allowed. 362 */ 363 if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0) 364 senderr(error); 365 366 switch (rtm->rtm_type) { 367 struct rtentry *saved_nrt; 368 369 case RTM_ADD: 370 if (info.rti_info[RTAX_GATEWAY] == NULL) 371 senderr(EINVAL); 372 saved_nrt = NULL; 373 error = rtrequest1(RTM_ADD, &info, &saved_nrt); 374 if (error == 0 && saved_nrt) { 375 RT_LOCK(saved_nrt); 376 rt_setmetrics(rtm->rtm_inits, 377 &rtm->rtm_rmx, &saved_nrt->rt_rmx); 378 rtm->rtm_index = saved_nrt->rt_ifp->if_index; 379 RT_REMREF(saved_nrt); 380 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; 381 RT_UNLOCK(saved_nrt); 382 } 383 break; 384 385 case RTM_DELETE: 386 saved_nrt = NULL; 387 error = rtrequest1(RTM_DELETE, &info, &saved_nrt); 388 if (error == 0) { 389 RT_LOCK(saved_nrt); 390 rt = saved_nrt; 391 goto report; 392 } 393 break; 394 395 case RTM_GET: 396 case RTM_CHANGE: 397 case RTM_LOCK: 398 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]; 399 if (rnh == NULL) 400 senderr(EAFNOSUPPORT); 401 RADIX_NODE_HEAD_LOCK(rnh); 402 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST], 403 info.rti_info[RTAX_NETMASK], rnh); 404 if (rt == NULL) { /* XXX looks bogus */ 405 RADIX_NODE_HEAD_UNLOCK(rnh); 406 senderr(ESRCH); 407 } 408 RT_LOCK(rt); 409 RT_ADDREF(rt); 410 RADIX_NODE_HEAD_UNLOCK(rnh); 411 412 /* 413 * Fix for PR: 82974 414 * 415 * RTM_CHANGE/LOCK need a perfect match, rn_lookup() 416 * returns a perfect match in case a netmask is 417 * specified. For host routes only a longest prefix 418 * match is returned so it is necessary to compare the 419 * existence of the netmask. If both have a netmask 420 * rnh_lookup() did a perfect match and if none of them 421 * have a netmask both are host routes which is also a 422 * perfect match. 423 */ 424 425 if (rtm->rtm_type != RTM_GET && 426 (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) { 427 RT_UNLOCK(rt); 428 senderr(ESRCH); 429 } 430 431 switch(rtm->rtm_type) { 432 433 case RTM_GET: 434 report: 435 RT_LOCK_ASSERT(rt); 436 info.rti_info[RTAX_DST] = rt_key(rt); 437 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 438 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 439 info.rti_info[RTAX_GENMASK] = rt->rt_genmask; 440 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { 441 ifp = rt->rt_ifp; 442 if (ifp) { 443 info.rti_info[RTAX_IFP] = 444 ifp->if_addr->ifa_addr; 445 if (jailed(so->so_cred)) { 446 bzero(&jail, sizeof(jail)); 447 jail.sin_family = PF_INET; 448 jail.sin_len = sizeof(jail); 449 jail.sin_addr.s_addr = 450 htonl(prison_getip(so->so_cred)); 451 info.rti_info[RTAX_IFA] = 452 (struct sockaddr *)&jail; 453 } else 454 info.rti_info[RTAX_IFA] = 455 rt->rt_ifa->ifa_addr; 456 if (ifp->if_flags & IFF_POINTOPOINT) 457 info.rti_info[RTAX_BRD] = 458 rt->rt_ifa->ifa_dstaddr; 459 rtm->rtm_index = ifp->if_index; 460 } else { 461 info.rti_info[RTAX_IFP] = NULL; 462 info.rti_info[RTAX_IFA] = NULL; 463 } 464 } else if ((ifp = rt->rt_ifp) != NULL) { 465 rtm->rtm_index = ifp->if_index; 466 } 467 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL); 468 if (len > rtm->rtm_msglen) { 469 struct rt_msghdr *new_rtm; 470 R_Malloc(new_rtm, struct rt_msghdr *, len); 471 if (new_rtm == NULL) { 472 RT_UNLOCK(rt); 473 senderr(ENOBUFS); 474 } 475 bcopy(rtm, new_rtm, rtm->rtm_msglen); 476 Free(rtm); rtm = new_rtm; 477 } 478 (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL); 479 rtm->rtm_flags = rt->rt_flags; 480 rtm->rtm_use = 0; 481 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 482 rtm->rtm_addrs = info.rti_addrs; 483 break; 484 485 case RTM_CHANGE: 486 /* 487 * New gateway could require new ifaddr, ifp; 488 * flags may also be different; ifp may be specified 489 * by ll sockaddr when protocol address is ambiguous 490 */ 491 if (((rt->rt_flags & RTF_GATEWAY) && 492 info.rti_info[RTAX_GATEWAY] != NULL) || 493 info.rti_info[RTAX_IFP] != NULL || 494 (info.rti_info[RTAX_IFA] != NULL && 495 !sa_equal(info.rti_info[RTAX_IFA], 496 rt->rt_ifa->ifa_addr))) { 497 RT_UNLOCK(rt); 498 if ((error = rt_getifa(&info)) != 0) 499 senderr(error); 500 RT_LOCK(rt); 501 } 502 if (info.rti_info[RTAX_GATEWAY] != NULL && 503 (error = rt_setgate(rt, rt_key(rt), 504 info.rti_info[RTAX_GATEWAY])) != 0) { 505 RT_UNLOCK(rt); 506 senderr(error); 507 } 508 if ((ifa = info.rti_ifa) != NULL) { 509 struct ifaddr *oifa = rt->rt_ifa; 510 if (oifa != ifa) { 511 if (oifa) { 512 if (oifa->ifa_rtrequest) 513 oifa->ifa_rtrequest( 514 RTM_DELETE, rt, 515 &info); 516 IFAFREE(oifa); 517 } 518 IFAREF(ifa); 519 rt->rt_ifa = ifa; 520 rt->rt_ifp = info.rti_ifp; 521 } 522 } 523 /* Allow some flags to be toggled on change. */ 524 if (rtm->rtm_fmask & RTF_FMASK) 525 rt->rt_flags = (rt->rt_flags & 526 ~rtm->rtm_fmask) | 527 (rtm->rtm_flags & rtm->rtm_fmask); 528 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 529 &rt->rt_rmx); 530 rtm->rtm_index = rt->rt_ifp->if_index; 531 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) 532 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); 533 if (info.rti_info[RTAX_GENMASK]) 534 rt->rt_genmask = info.rti_info[RTAX_GENMASK]; 535 /* FALLTHROUGH */ 536 case RTM_LOCK: 537 /* We don't support locks anymore */ 538 break; 539 } 540 RT_UNLOCK(rt); 541 break; 542 543 default: 544 senderr(EOPNOTSUPP); 545 } 546 547 flush: 548 if (rtm) { 549 if (error) 550 rtm->rtm_errno = error; 551 else 552 rtm->rtm_flags |= RTF_DONE; 553 } 554 if (rt) /* XXX can this be true? */ 555 RTFREE(rt); 556 { 557 struct rawcb *rp = NULL; 558 /* 559 * Check to see if we don't want our own messages. 560 */ 561 if ((so->so_options & SO_USELOOPBACK) == 0) { 562 if (route_cb.any_count <= 1) { 563 if (rtm) 564 Free(rtm); 565 m_freem(m); 566 return (error); 567 } 568 /* There is another listener, so construct message */ 569 rp = sotorawcb(so); 570 } 571 if (rtm) { 572 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); 573 if (m->m_pkthdr.len < rtm->rtm_msglen) { 574 m_freem(m); 575 m = NULL; 576 } else if (m->m_pkthdr.len > rtm->rtm_msglen) 577 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); 578 Free(rtm); 579 } 580 if (m) { 581 if (rp) { 582 /* 583 * XXX insure we don't get a copy by 584 * invalidating our protocol 585 */ 586 unsigned short family = rp->rcb_proto.sp_family; 587 rp->rcb_proto.sp_family = 0; 588 rt_dispatch(m, info.rti_info[RTAX_DST]); 589 rp->rcb_proto.sp_family = family; 590 } else 591 rt_dispatch(m, info.rti_info[RTAX_DST]); 592 } 593 } 594 return (error); 595 #undef sa_equal 596 } 597 598 static void 599 rt_setmetrics(u_long which, const struct rt_metrics *in, 600 struct rt_metrics_lite *out) 601 { 602 #define metric(f, e) if (which & (f)) out->e = in->e; 603 /* 604 * Only these are stored in the routing entry since introduction 605 * of tcp hostcache. The rest is ignored. 606 */ 607 metric(RTV_MTU, rmx_mtu); 608 metric(RTV_EXPIRE, rmx_expire); 609 #undef metric 610 } 611 612 static void 613 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out) 614 { 615 #define metric(e) out->e = in->e; 616 bzero(out, sizeof(*out)); 617 metric(rmx_mtu); 618 metric(rmx_expire); 619 #undef metric 620 } 621 622 /* 623 * Extract the addresses of the passed sockaddrs. 624 * Do a little sanity checking so as to avoid bad memory references. 625 * This data is derived straight from userland. 626 */ 627 static int 628 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 629 { 630 struct sockaddr *sa; 631 int i; 632 633 for (i = 0; i < RTAX_MAX && cp < cplim; i++) { 634 if ((rtinfo->rti_addrs & (1 << i)) == 0) 635 continue; 636 sa = (struct sockaddr *)cp; 637 /* 638 * It won't fit. 639 */ 640 if (cp + sa->sa_len > cplim) 641 return (EINVAL); 642 /* 643 * there are no more.. quit now 644 * If there are more bits, they are in error. 645 * I've seen this. route(1) can evidently generate these. 646 * This causes kernel to core dump. 647 * for compatibility, If we see this, point to a safe address. 648 */ 649 if (sa->sa_len == 0) { 650 rtinfo->rti_info[i] = &sa_zero; 651 return (0); /* should be EINVAL but for compat */ 652 } 653 /* accept it */ 654 rtinfo->rti_info[i] = sa; 655 cp += SA_SIZE(sa); 656 } 657 return (0); 658 } 659 660 static struct mbuf * 661 rt_msg1(int type, struct rt_addrinfo *rtinfo) 662 { 663 struct rt_msghdr *rtm; 664 struct mbuf *m; 665 int i; 666 struct sockaddr *sa; 667 int len, dlen; 668 669 switch (type) { 670 671 case RTM_DELADDR: 672 case RTM_NEWADDR: 673 len = sizeof(struct ifa_msghdr); 674 break; 675 676 case RTM_DELMADDR: 677 case RTM_NEWMADDR: 678 len = sizeof(struct ifma_msghdr); 679 break; 680 681 case RTM_IFINFO: 682 len = sizeof(struct if_msghdr); 683 break; 684 685 case RTM_IFANNOUNCE: 686 case RTM_IEEE80211: 687 len = sizeof(struct if_announcemsghdr); 688 break; 689 690 default: 691 len = sizeof(struct rt_msghdr); 692 } 693 if (len > MCLBYTES) 694 panic("rt_msg1"); 695 m = m_gethdr(M_DONTWAIT, MT_DATA); 696 if (m && len > MHLEN) { 697 MCLGET(m, M_DONTWAIT); 698 if ((m->m_flags & M_EXT) == 0) { 699 m_free(m); 700 m = NULL; 701 } 702 } 703 if (m == NULL) 704 return (m); 705 m->m_pkthdr.len = m->m_len = len; 706 m->m_pkthdr.rcvif = NULL; 707 rtm = mtod(m, struct rt_msghdr *); 708 bzero((caddr_t)rtm, len); 709 for (i = 0; i < RTAX_MAX; i++) { 710 if ((sa = rtinfo->rti_info[i]) == NULL) 711 continue; 712 rtinfo->rti_addrs |= (1 << i); 713 dlen = SA_SIZE(sa); 714 m_copyback(m, len, dlen, (caddr_t)sa); 715 len += dlen; 716 } 717 if (m->m_pkthdr.len != len) { 718 m_freem(m); 719 return (NULL); 720 } 721 rtm->rtm_msglen = len; 722 rtm->rtm_version = RTM_VERSION; 723 rtm->rtm_type = type; 724 return (m); 725 } 726 727 static int 728 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) 729 { 730 int i; 731 int len, dlen, second_time = 0; 732 caddr_t cp0; 733 734 rtinfo->rti_addrs = 0; 735 again: 736 switch (type) { 737 738 case RTM_DELADDR: 739 case RTM_NEWADDR: 740 len = sizeof(struct ifa_msghdr); 741 break; 742 743 case RTM_IFINFO: 744 len = sizeof(struct if_msghdr); 745 break; 746 747 case RTM_NEWMADDR: 748 len = sizeof(struct ifma_msghdr); 749 break; 750 751 default: 752 len = sizeof(struct rt_msghdr); 753 } 754 cp0 = cp; 755 if (cp0) 756 cp += len; 757 for (i = 0; i < RTAX_MAX; i++) { 758 struct sockaddr *sa; 759 760 if ((sa = rtinfo->rti_info[i]) == NULL) 761 continue; 762 rtinfo->rti_addrs |= (1 << i); 763 dlen = SA_SIZE(sa); 764 if (cp) { 765 bcopy((caddr_t)sa, cp, (unsigned)dlen); 766 cp += dlen; 767 } 768 len += dlen; 769 } 770 len = ALIGN(len); 771 if (cp == NULL && w != NULL && !second_time) { 772 struct walkarg *rw = w; 773 774 if (rw->w_req) { 775 if (rw->w_tmemsize < len) { 776 if (rw->w_tmem) 777 free(rw->w_tmem, M_RTABLE); 778 rw->w_tmem = (caddr_t) 779 malloc(len, M_RTABLE, M_NOWAIT); 780 if (rw->w_tmem) 781 rw->w_tmemsize = len; 782 } 783 if (rw->w_tmem) { 784 cp = rw->w_tmem; 785 second_time = 1; 786 goto again; 787 } 788 } 789 } 790 if (cp) { 791 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 792 793 rtm->rtm_version = RTM_VERSION; 794 rtm->rtm_type = type; 795 rtm->rtm_msglen = len; 796 } 797 return (len); 798 } 799 800 /* 801 * This routine is called to generate a message from the routing 802 * socket indicating that a redirect has occured, a routing lookup 803 * has failed, or that a protocol has detected timeouts to a particular 804 * destination. 805 */ 806 void 807 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) 808 { 809 struct rt_msghdr *rtm; 810 struct mbuf *m; 811 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 812 813 if (route_cb.any_count == 0) 814 return; 815 m = rt_msg1(type, rtinfo); 816 if (m == NULL) 817 return; 818 rtm = mtod(m, struct rt_msghdr *); 819 rtm->rtm_flags = RTF_DONE | flags; 820 rtm->rtm_errno = error; 821 rtm->rtm_addrs = rtinfo->rti_addrs; 822 rt_dispatch(m, sa); 823 } 824 825 /* 826 * This routine is called to generate a message from the routing 827 * socket indicating that the status of a network interface has changed. 828 */ 829 void 830 rt_ifmsg(struct ifnet *ifp) 831 { 832 struct if_msghdr *ifm; 833 struct mbuf *m; 834 struct rt_addrinfo info; 835 836 if (route_cb.any_count == 0) 837 return; 838 bzero((caddr_t)&info, sizeof(info)); 839 m = rt_msg1(RTM_IFINFO, &info); 840 if (m == NULL) 841 return; 842 ifm = mtod(m, struct if_msghdr *); 843 ifm->ifm_index = ifp->if_index; 844 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; 845 ifm->ifm_data = ifp->if_data; 846 ifm->ifm_addrs = 0; 847 rt_dispatch(m, NULL); 848 } 849 850 /* 851 * This is called to generate messages from the routing socket 852 * indicating a network interface has had addresses associated with it. 853 * if we ever reverse the logic and replace messages TO the routing 854 * socket indicate a request to configure interfaces, then it will 855 * be unnecessary as the routing socket will automatically generate 856 * copies of it. 857 */ 858 void 859 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) 860 { 861 struct rt_addrinfo info; 862 struct sockaddr *sa = NULL; 863 int pass; 864 struct mbuf *m = NULL; 865 struct ifnet *ifp = ifa->ifa_ifp; 866 867 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 868 ("unexpected cmd %u", cmd)); 869 870 if (route_cb.any_count == 0) 871 return; 872 for (pass = 1; pass < 3; pass++) { 873 bzero((caddr_t)&info, sizeof(info)); 874 if ((cmd == RTM_ADD && pass == 1) || 875 (cmd == RTM_DELETE && pass == 2)) { 876 struct ifa_msghdr *ifam; 877 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; 878 879 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; 880 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; 881 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 882 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 883 if ((m = rt_msg1(ncmd, &info)) == NULL) 884 continue; 885 ifam = mtod(m, struct ifa_msghdr *); 886 ifam->ifam_index = ifp->if_index; 887 ifam->ifam_metric = ifa->ifa_metric; 888 ifam->ifam_flags = ifa->ifa_flags; 889 ifam->ifam_addrs = info.rti_addrs; 890 } 891 if ((cmd == RTM_ADD && pass == 2) || 892 (cmd == RTM_DELETE && pass == 1)) { 893 struct rt_msghdr *rtm; 894 895 if (rt == NULL) 896 continue; 897 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 898 info.rti_info[RTAX_DST] = sa = rt_key(rt); 899 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 900 if ((m = rt_msg1(cmd, &info)) == NULL) 901 continue; 902 rtm = mtod(m, struct rt_msghdr *); 903 rtm->rtm_index = ifp->if_index; 904 rtm->rtm_flags |= rt->rt_flags; 905 rtm->rtm_errno = error; 906 rtm->rtm_addrs = info.rti_addrs; 907 } 908 rt_dispatch(m, sa); 909 } 910 } 911 912 /* 913 * This is the analogue to the rt_newaddrmsg which performs the same 914 * function but for multicast group memberhips. This is easier since 915 * there is no route state to worry about. 916 */ 917 void 918 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) 919 { 920 struct rt_addrinfo info; 921 struct mbuf *m = NULL; 922 struct ifnet *ifp = ifma->ifma_ifp; 923 struct ifma_msghdr *ifmam; 924 925 if (route_cb.any_count == 0) 926 return; 927 928 bzero((caddr_t)&info, sizeof(info)); 929 info.rti_info[RTAX_IFA] = ifma->ifma_addr; 930 info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL; 931 /* 932 * If a link-layer address is present, present it as a ``gateway'' 933 * (similarly to how ARP entries, e.g., are presented). 934 */ 935 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; 936 m = rt_msg1(cmd, &info); 937 if (m == NULL) 938 return; 939 ifmam = mtod(m, struct ifma_msghdr *); 940 ifmam->ifmam_index = ifp->if_index; 941 ifmam->ifmam_addrs = info.rti_addrs; 942 rt_dispatch(m, ifma->ifma_addr); 943 } 944 945 static struct mbuf * 946 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, 947 struct rt_addrinfo *info) 948 { 949 struct if_announcemsghdr *ifan; 950 struct mbuf *m; 951 952 if (route_cb.any_count == 0) 953 return NULL; 954 bzero((caddr_t)info, sizeof(*info)); 955 m = rt_msg1(type, info); 956 if (m != NULL) { 957 ifan = mtod(m, struct if_announcemsghdr *); 958 ifan->ifan_index = ifp->if_index; 959 strlcpy(ifan->ifan_name, ifp->if_xname, 960 sizeof(ifan->ifan_name)); 961 ifan->ifan_what = what; 962 } 963 return m; 964 } 965 966 /* 967 * This is called to generate routing socket messages indicating 968 * IEEE80211 wireless events. 969 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. 970 */ 971 void 972 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) 973 { 974 struct mbuf *m; 975 struct rt_addrinfo info; 976 977 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); 978 if (m != NULL) { 979 /* 980 * Append the ieee80211 data. Try to stick it in the 981 * mbuf containing the ifannounce msg; otherwise allocate 982 * a new mbuf and append. 983 * 984 * NB: we assume m is a single mbuf. 985 */ 986 if (data_len > M_TRAILINGSPACE(m)) { 987 struct mbuf *n = m_get(M_NOWAIT, MT_DATA); 988 if (n == NULL) { 989 m_freem(m); 990 return; 991 } 992 bcopy(data, mtod(n, void *), data_len); 993 n->m_len = data_len; 994 m->m_next = n; 995 } else if (data_len > 0) { 996 bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); 997 m->m_len += data_len; 998 } 999 if (m->m_flags & M_PKTHDR) 1000 m->m_pkthdr.len += data_len; 1001 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; 1002 rt_dispatch(m, NULL); 1003 } 1004 } 1005 1006 /* 1007 * This is called to generate routing socket messages indicating 1008 * network interface arrival and departure. 1009 */ 1010 void 1011 rt_ifannouncemsg(struct ifnet *ifp, int what) 1012 { 1013 struct mbuf *m; 1014 struct rt_addrinfo info; 1015 1016 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); 1017 if (m != NULL) 1018 rt_dispatch(m, NULL); 1019 } 1020 1021 static void 1022 rt_dispatch(struct mbuf *m, const struct sockaddr *sa) 1023 { 1024 struct m_tag *tag; 1025 1026 /* 1027 * Preserve the family from the sockaddr, if any, in an m_tag for 1028 * use when injecting the mbuf into the routing socket buffer from 1029 * the netisr. 1030 */ 1031 if (sa != NULL) { 1032 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), 1033 M_NOWAIT); 1034 if (tag == NULL) { 1035 m_freem(m); 1036 return; 1037 } 1038 *(unsigned short *)(tag + 1) = sa->sa_family; 1039 m_tag_prepend(m, tag); 1040 } 1041 netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ 1042 } 1043 1044 /* 1045 * This is used in dumping the kernel table via sysctl(). 1046 */ 1047 static int 1048 sysctl_dumpentry(struct radix_node *rn, void *vw) 1049 { 1050 struct walkarg *w = vw; 1051 struct rtentry *rt = (struct rtentry *)rn; 1052 int error = 0, size; 1053 struct rt_addrinfo info; 1054 1055 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1056 return 0; 1057 bzero((caddr_t)&info, sizeof(info)); 1058 info.rti_info[RTAX_DST] = rt_key(rt); 1059 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1060 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1061 info.rti_info[RTAX_GENMASK] = rt->rt_genmask; 1062 if (rt->rt_ifp) { 1063 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; 1064 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1065 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) 1066 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1067 } 1068 size = rt_msg2(RTM_GET, &info, NULL, w); 1069 if (w->w_req && w->w_tmem) { 1070 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1071 1072 rtm->rtm_flags = rt->rt_flags; 1073 rtm->rtm_use = rt->rt_rmx.rmx_pksent; 1074 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1075 rtm->rtm_index = rt->rt_ifp->if_index; 1076 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; 1077 rtm->rtm_addrs = info.rti_addrs; 1078 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); 1079 return (error); 1080 } 1081 return (error); 1082 } 1083 1084 static int 1085 sysctl_iflist(int af, struct walkarg *w) 1086 { 1087 struct ifnet *ifp; 1088 struct ifaddr *ifa; 1089 struct rt_addrinfo info; 1090 int len, error = 0; 1091 1092 bzero((caddr_t)&info, sizeof(info)); 1093 IFNET_RLOCK(); 1094 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1095 if (w->w_arg && w->w_arg != ifp->if_index) 1096 continue; 1097 ifa = ifp->if_addr; 1098 info.rti_info[RTAX_IFP] = ifa->ifa_addr; 1099 len = rt_msg2(RTM_IFINFO, &info, NULL, w); 1100 info.rti_info[RTAX_IFP] = NULL; 1101 if (w->w_req && w->w_tmem) { 1102 struct if_msghdr *ifm; 1103 1104 ifm = (struct if_msghdr *)w->w_tmem; 1105 ifm->ifm_index = ifp->if_index; 1106 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; 1107 ifm->ifm_data = ifp->if_data; 1108 ifm->ifm_addrs = info.rti_addrs; 1109 error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len); 1110 if (error) 1111 goto done; 1112 } 1113 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { 1114 if (af && af != ifa->ifa_addr->sa_family) 1115 continue; 1116 if (jailed(curthread->td_ucred) && 1117 prison_if(curthread->td_ucred, ifa->ifa_addr)) 1118 continue; 1119 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1120 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1121 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1122 len = rt_msg2(RTM_NEWADDR, &info, NULL, w); 1123 if (w->w_req && w->w_tmem) { 1124 struct ifa_msghdr *ifam; 1125 1126 ifam = (struct ifa_msghdr *)w->w_tmem; 1127 ifam->ifam_index = ifa->ifa_ifp->if_index; 1128 ifam->ifam_flags = ifa->ifa_flags; 1129 ifam->ifam_metric = ifa->ifa_metric; 1130 ifam->ifam_addrs = info.rti_addrs; 1131 error = SYSCTL_OUT(w->w_req, w->w_tmem, len); 1132 if (error) 1133 goto done; 1134 } 1135 } 1136 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1137 info.rti_info[RTAX_BRD] = NULL; 1138 } 1139 done: 1140 IFNET_RUNLOCK(); 1141 return (error); 1142 } 1143 1144 int 1145 sysctl_ifmalist(int af, struct walkarg *w) 1146 { 1147 struct ifnet *ifp; 1148 struct ifmultiaddr *ifma; 1149 struct rt_addrinfo info; 1150 int len, error = 0; 1151 struct ifaddr *ifa; 1152 1153 bzero((caddr_t)&info, sizeof(info)); 1154 IFNET_RLOCK(); 1155 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1156 if (w->w_arg && w->w_arg != ifp->if_index) 1157 continue; 1158 ifa = ifp->if_addr; 1159 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; 1160 IF_ADDR_LOCK(ifp); 1161 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1162 if (af && af != ifma->ifma_addr->sa_family) 1163 continue; 1164 if (jailed(curproc->p_ucred) && 1165 prison_if(curproc->p_ucred, ifma->ifma_addr)) 1166 continue; 1167 info.rti_info[RTAX_IFA] = ifma->ifma_addr; 1168 info.rti_info[RTAX_GATEWAY] = 1169 (ifma->ifma_addr->sa_family != AF_LINK) ? 1170 ifma->ifma_lladdr : NULL; 1171 len = rt_msg2(RTM_NEWMADDR, &info, NULL, w); 1172 if (w->w_req && w->w_tmem) { 1173 struct ifma_msghdr *ifmam; 1174 1175 ifmam = (struct ifma_msghdr *)w->w_tmem; 1176 ifmam->ifmam_index = ifma->ifma_ifp->if_index; 1177 ifmam->ifmam_flags = 0; 1178 ifmam->ifmam_addrs = info.rti_addrs; 1179 error = SYSCTL_OUT(w->w_req, w->w_tmem, len); 1180 if (error) { 1181 IF_ADDR_UNLOCK(ifp); 1182 goto done; 1183 } 1184 } 1185 } 1186 IF_ADDR_UNLOCK(ifp); 1187 } 1188 done: 1189 IFNET_RUNLOCK(); 1190 return (error); 1191 } 1192 1193 static int 1194 sysctl_rtsock(SYSCTL_HANDLER_ARGS) 1195 { 1196 int *name = (int *)arg1; 1197 u_int namelen = arg2; 1198 struct radix_node_head *rnh; 1199 int i, lim, error = EINVAL; 1200 u_char af; 1201 struct walkarg w; 1202 1203 name ++; 1204 namelen--; 1205 if (req->newptr) 1206 return (EPERM); 1207 if (namelen != 3) 1208 return ((namelen < 3) ? EISDIR : ENOTDIR); 1209 af = name[0]; 1210 if (af > AF_MAX) 1211 return (EINVAL); 1212 bzero(&w, sizeof(w)); 1213 w.w_op = name[1]; 1214 w.w_arg = name[2]; 1215 w.w_req = req; 1216 1217 error = sysctl_wire_old_buffer(req, 0); 1218 if (error) 1219 return (error); 1220 switch (w.w_op) { 1221 1222 case NET_RT_DUMP: 1223 case NET_RT_FLAGS: 1224 if (af == 0) { /* dump all tables */ 1225 i = 1; 1226 lim = AF_MAX; 1227 } else /* dump only one table */ 1228 i = lim = af; 1229 for (error = 0; error == 0 && i <= lim; i++) 1230 if ((rnh = rt_tables[i]) != NULL) { 1231 RADIX_NODE_HEAD_LOCK(rnh); 1232 error = rnh->rnh_walktree(rnh, 1233 sysctl_dumpentry, &w); 1234 RADIX_NODE_HEAD_UNLOCK(rnh); 1235 } else if (af != 0) 1236 error = EAFNOSUPPORT; 1237 break; 1238 1239 case NET_RT_IFLIST: 1240 error = sysctl_iflist(af, &w); 1241 break; 1242 1243 case NET_RT_IFMALIST: 1244 error = sysctl_ifmalist(af, &w); 1245 break; 1246 } 1247 if (w.w_tmem) 1248 free(w.w_tmem, M_RTABLE); 1249 return (error); 1250 } 1251 1252 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); 1253 1254 /* 1255 * Definitions of protocols supported in the ROUTE domain. 1256 */ 1257 1258 static struct domain routedomain; /* or at least forward */ 1259 1260 static struct protosw routesw[] = { 1261 { 1262 .pr_type = SOCK_RAW, 1263 .pr_domain = &routedomain, 1264 .pr_flags = PR_ATOMIC|PR_ADDR, 1265 .pr_output = route_output, 1266 .pr_ctlinput = raw_ctlinput, 1267 .pr_init = raw_init, 1268 .pr_usrreqs = &route_usrreqs 1269 } 1270 }; 1271 1272 static struct domain routedomain = { 1273 .dom_family = PF_ROUTE, 1274 .dom_name = "route", 1275 .dom_protosw = routesw, 1276 .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])] 1277 }; 1278 1279 DOMAIN_SET(route); 1280