1 /* 2 * Copyright (c) 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 30 * $FreeBSD$ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/domain.h> 35 #include <sys/kernel.h> 36 #include <sys/jail.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/proc.h> 40 #include <sys/protosw.h> 41 #include <sys/signalvar.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/sysctl.h> 45 #include <sys/systm.h> 46 47 #include <net/if.h> 48 #include <net/netisr.h> 49 #include <net/raw_cb.h> 50 #include <net/route.h> 51 52 #include <netinet/in.h> 53 54 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); 55 56 /* NB: these are not modified */ 57 static struct sockaddr route_dst = { 2, PF_ROUTE, }; 58 static struct sockaddr route_src = { 2, PF_ROUTE, }; 59 static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; 60 61 static struct { 62 int ip_count; /* attacked w/ AF_INET */ 63 int ip6_count; /* attached w/ AF_INET6 */ 64 int ipx_count; /* attached w/ AF_IPX */ 65 int any_count; /* total attached */ 66 } route_cb; 67 68 struct mtx rtsock_mtx; 69 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); 70 71 #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) 72 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) 73 #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) 74 75 static struct ifqueue rtsintrq; 76 77 struct walkarg { 78 int w_tmemsize; 79 int w_op, w_arg; 80 caddr_t w_tmem; 81 struct sysctl_req *w_req; 82 }; 83 84 static void rts_input(struct mbuf *m); 85 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo); 86 static int rt_msg2(int type, struct rt_addrinfo *rtinfo, 87 caddr_t cp, struct walkarg *w); 88 static int rt_xaddrs(caddr_t cp, caddr_t cplim, 89 struct rt_addrinfo *rtinfo); 90 static int sysctl_dumpentry(struct radix_node *rn, void *vw); 91 static int sysctl_iflist(int af, struct walkarg *w); 92 static int sysctl_ifmalist(int af, struct walkarg *w); 93 static int route_output(struct mbuf *m, struct socket *so); 94 static void rt_setmetrics(u_long which, const struct rt_metrics *in, 95 struct rt_metrics_lite *out); 96 static void rt_getmetrics(const struct rt_metrics_lite *in, 97 struct rt_metrics *out); 98 static void rt_dispatch(struct mbuf *, const struct sockaddr *); 99 100 static void 101 rts_init(void) 102 { 103 104 rtsintrq.ifq_maxlen = IFQ_MAXLEN; 105 mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF); 106 netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE); 107 } 108 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0) 109 110 static void 111 rts_input(struct mbuf *m) 112 { 113 struct sockproto route_proto; 114 unsigned short *family; 115 struct m_tag *tag; 116 117 route_proto.sp_family = PF_ROUTE; 118 tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); 119 if (tag != NULL) { 120 family = (unsigned short *)(tag + 1); 121 route_proto.sp_protocol = *family; 122 m_tag_delete(m, tag); 123 } else 124 route_proto.sp_protocol = 0; 125 126 raw_input(m, &route_proto, &route_src, &route_dst); 127 } 128 129 /* 130 * It really doesn't make any sense at all for this code to share much 131 * with raw_usrreq.c, since its functionality is so restricted. XXX 132 */ 133 static int 134 rts_abort(struct socket *so) 135 { 136 int s, error; 137 s = splnet(); 138 error = raw_usrreqs.pru_abort(so); 139 splx(s); 140 return error; 141 } 142 143 /* pru_accept is EOPNOTSUPP */ 144 145 static int 146 rts_attach(struct socket *so, int proto, struct thread *td) 147 { 148 struct rawcb *rp; 149 int s, error; 150 151 if (sotorawcb(so) != NULL) 152 return EISCONN; /* XXX panic? */ 153 /* XXX */ 154 MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO); 155 if (rp == NULL) 156 return ENOBUFS; 157 158 /* 159 * The splnet() is necessary to block protocols from sending 160 * error notifications (like RTM_REDIRECT or RTM_LOSING) while 161 * this PCB is extant but incompletely initialized. 162 * Probably we should try to do more of this work beforehand and 163 * eliminate the spl. 164 */ 165 s = splnet(); 166 so->so_pcb = (caddr_t)rp; 167 error = raw_attach(so, proto); 168 rp = sotorawcb(so); 169 if (error) { 170 splx(s); 171 so->so_pcb = NULL; 172 free(rp, M_PCB); 173 return error; 174 } 175 RTSOCK_LOCK(); 176 switch(rp->rcb_proto.sp_protocol) { 177 case AF_INET: 178 route_cb.ip_count++; 179 break; 180 case AF_INET6: 181 route_cb.ip6_count++; 182 break; 183 case AF_IPX: 184 route_cb.ipx_count++; 185 break; 186 } 187 rp->rcb_faddr = &route_src; 188 route_cb.any_count++; 189 RTSOCK_UNLOCK(); 190 soisconnected(so); 191 so->so_options |= SO_USELOOPBACK; 192 splx(s); 193 return 0; 194 } 195 196 static int 197 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 198 { 199 int s, error; 200 s = splnet(); 201 error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */ 202 splx(s); 203 return error; 204 } 205 206 static int 207 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 208 { 209 int s, error; 210 s = splnet(); 211 error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */ 212 splx(s); 213 return error; 214 } 215 216 /* pru_connect2 is EOPNOTSUPP */ 217 /* pru_control is EOPNOTSUPP */ 218 219 static int 220 rts_detach(struct socket *so) 221 { 222 struct rawcb *rp = sotorawcb(so); 223 int s, error; 224 225 s = splnet(); 226 if (rp != NULL) { 227 RTSOCK_LOCK(); 228 switch(rp->rcb_proto.sp_protocol) { 229 case AF_INET: 230 route_cb.ip_count--; 231 break; 232 case AF_INET6: 233 route_cb.ip6_count--; 234 break; 235 case AF_IPX: 236 route_cb.ipx_count--; 237 break; 238 } 239 route_cb.any_count--; 240 RTSOCK_UNLOCK(); 241 } 242 error = raw_usrreqs.pru_detach(so); 243 splx(s); 244 return error; 245 } 246 247 static int 248 rts_disconnect(struct socket *so) 249 { 250 int s, error; 251 s = splnet(); 252 error = raw_usrreqs.pru_disconnect(so); 253 splx(s); 254 return error; 255 } 256 257 /* pru_listen is EOPNOTSUPP */ 258 259 static int 260 rts_peeraddr(struct socket *so, struct sockaddr **nam) 261 { 262 int s, error; 263 s = splnet(); 264 error = raw_usrreqs.pru_peeraddr(so, nam); 265 splx(s); 266 return error; 267 } 268 269 /* pru_rcvd is EOPNOTSUPP */ 270 /* pru_rcvoob is EOPNOTSUPP */ 271 272 static int 273 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 274 struct mbuf *control, struct thread *td) 275 { 276 int s, error; 277 s = splnet(); 278 error = raw_usrreqs.pru_send(so, flags, m, nam, control, td); 279 splx(s); 280 return error; 281 } 282 283 /* pru_sense is null */ 284 285 static int 286 rts_shutdown(struct socket *so) 287 { 288 int s, error; 289 s = splnet(); 290 error = raw_usrreqs.pru_shutdown(so); 291 splx(s); 292 return error; 293 } 294 295 static int 296 rts_sockaddr(struct socket *so, struct sockaddr **nam) 297 { 298 int s, error; 299 s = splnet(); 300 error = raw_usrreqs.pru_sockaddr(so, nam); 301 splx(s); 302 return error; 303 } 304 305 static struct pr_usrreqs route_usrreqs = { 306 rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect, 307 pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect, 308 pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, 309 rts_send, pru_sense_null, rts_shutdown, rts_sockaddr, 310 sosend, soreceive, sopoll, pru_sosetlabel_null 311 }; 312 313 /*ARGSUSED*/ 314 static int 315 route_output(struct mbuf *m, struct socket *so) 316 { 317 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 318 struct rt_msghdr *rtm = NULL; 319 struct rtentry *rt = NULL; 320 struct radix_node_head *rnh; 321 struct rt_addrinfo info; 322 int len, error = 0; 323 struct ifnet *ifp = NULL; 324 struct ifaddr *ifa = NULL; 325 struct sockaddr_in jail; 326 327 #define senderr(e) { error = e; goto flush;} 328 if (m == NULL || ((m->m_len < sizeof(long)) && 329 (m = m_pullup(m, sizeof(long))) == NULL)) 330 return (ENOBUFS); 331 if ((m->m_flags & M_PKTHDR) == 0) 332 panic("route_output"); 333 len = m->m_pkthdr.len; 334 if (len < sizeof(*rtm) || 335 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 336 info.rti_info[RTAX_DST] = NULL; 337 senderr(EINVAL); 338 } 339 R_Malloc(rtm, struct rt_msghdr *, len); 340 if (rtm == NULL) { 341 info.rti_info[RTAX_DST] = NULL; 342 senderr(ENOBUFS); 343 } 344 m_copydata(m, 0, len, (caddr_t)rtm); 345 if (rtm->rtm_version != RTM_VERSION) { 346 info.rti_info[RTAX_DST] = NULL; 347 senderr(EPROTONOSUPPORT); 348 } 349 rtm->rtm_pid = curproc->p_pid; 350 bzero(&info, sizeof(info)); 351 info.rti_addrs = rtm->rtm_addrs; 352 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { 353 info.rti_info[RTAX_DST] = NULL; 354 senderr(EINVAL); 355 } 356 info.rti_flags = rtm->rtm_flags; 357 if (info.rti_info[RTAX_DST] == NULL || 358 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 359 (info.rti_info[RTAX_GATEWAY] != NULL && 360 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) 361 senderr(EINVAL); 362 if (info.rti_info[RTAX_GENMASK]) { 363 struct radix_node *t; 364 t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1); 365 if (t != NULL && 366 bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1, 367 (char *)(void *)t->rn_key + 1, 368 ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0) 369 info.rti_info[RTAX_GENMASK] = 370 (struct sockaddr *)t->rn_key; 371 else 372 senderr(ENOBUFS); 373 } 374 375 /* 376 * Verify that the caller has the appropriate privilege; RTM_GET 377 * is the only operation the non-superuser is allowed. 378 */ 379 if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0) 380 senderr(error); 381 382 switch (rtm->rtm_type) { 383 struct rtentry *saved_nrt; 384 385 case RTM_ADD: 386 if (info.rti_info[RTAX_GATEWAY] == NULL) 387 senderr(EINVAL); 388 saved_nrt = NULL; 389 error = rtrequest1(RTM_ADD, &info, &saved_nrt); 390 if (error == 0 && saved_nrt) { 391 RT_LOCK(saved_nrt); 392 rt_setmetrics(rtm->rtm_inits, 393 &rtm->rtm_rmx, &saved_nrt->rt_rmx); 394 RT_REMREF(saved_nrt); 395 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; 396 RT_UNLOCK(saved_nrt); 397 } 398 break; 399 400 case RTM_DELETE: 401 saved_nrt = NULL; 402 error = rtrequest1(RTM_DELETE, &info, &saved_nrt); 403 if (error == 0) { 404 RT_LOCK(saved_nrt); 405 rt = saved_nrt; 406 goto report; 407 } 408 break; 409 410 case RTM_GET: 411 case RTM_CHANGE: 412 case RTM_LOCK: 413 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]; 414 if (rnh == NULL) 415 senderr(EAFNOSUPPORT); 416 RADIX_NODE_HEAD_LOCK(rnh); 417 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST], 418 info.rti_info[RTAX_NETMASK], rnh); 419 RADIX_NODE_HEAD_UNLOCK(rnh); 420 if (rt == NULL) /* XXX looks bogus */ 421 senderr(ESRCH); 422 RT_LOCK(rt); 423 RT_ADDREF(rt); 424 425 switch(rtm->rtm_type) { 426 427 case RTM_GET: 428 report: 429 RT_LOCK_ASSERT(rt); 430 info.rti_info[RTAX_DST] = rt_key(rt); 431 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 432 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 433 info.rti_info[RTAX_GENMASK] = rt->rt_genmask; 434 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { 435 ifp = rt->rt_ifp; 436 if (ifp) { 437 info.rti_info[RTAX_IFP] = 438 ifaddr_byindex(ifp->if_index)->ifa_addr; 439 if (jailed(so->so_cred)) { 440 bzero(&jail, sizeof(jail)); 441 jail.sin_family = PF_INET; 442 jail.sin_len = sizeof(jail); 443 jail.sin_addr.s_addr = 444 htonl(prison_getip(so->so_cred)); 445 info.rti_info[RTAX_IFA] = 446 (struct sockaddr *)&jail; 447 } else 448 info.rti_info[RTAX_IFA] = 449 rt->rt_ifa->ifa_addr; 450 if (ifp->if_flags & IFF_POINTOPOINT) 451 info.rti_info[RTAX_BRD] = 452 rt->rt_ifa->ifa_dstaddr; 453 rtm->rtm_index = ifp->if_index; 454 } else { 455 info.rti_info[RTAX_IFP] = NULL; 456 info.rti_info[RTAX_IFA] = NULL; 457 } 458 } 459 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL); 460 if (len > rtm->rtm_msglen) { 461 struct rt_msghdr *new_rtm; 462 R_Malloc(new_rtm, struct rt_msghdr *, len); 463 if (new_rtm == NULL) { 464 RT_UNLOCK(rt); 465 senderr(ENOBUFS); 466 } 467 bcopy(rtm, new_rtm, rtm->rtm_msglen); 468 Free(rtm); rtm = new_rtm; 469 } 470 (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL); 471 rtm->rtm_flags = rt->rt_flags; 472 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 473 rtm->rtm_addrs = info.rti_addrs; 474 break; 475 476 case RTM_CHANGE: 477 /* 478 * New gateway could require new ifaddr, ifp; 479 * flags may also be different; ifp may be specified 480 * by ll sockaddr when protocol address is ambiguous 481 */ 482 if (((rt->rt_flags & RTF_GATEWAY) && 483 info.rti_info[RTAX_GATEWAY] != NULL) || 484 info.rti_info[RTAX_IFP] != NULL || 485 (info.rti_info[RTAX_IFA] != NULL && 486 !sa_equal(info.rti_info[RTAX_IFA], 487 rt->rt_ifa->ifa_addr))) { 488 if ((error = rt_getifa(&info)) != 0) { 489 RT_UNLOCK(rt); 490 senderr(error); 491 } 492 } 493 if (info.rti_info[RTAX_GATEWAY] != NULL && 494 (error = rt_setgate(rt, rt_key(rt), 495 info.rti_info[RTAX_GATEWAY])) != 0) { 496 RT_UNLOCK(rt); 497 senderr(error); 498 } 499 if ((ifa = info.rti_ifa) != NULL) { 500 struct ifaddr *oifa = rt->rt_ifa; 501 if (oifa != ifa) { 502 if (oifa) { 503 if (oifa->ifa_rtrequest) 504 oifa->ifa_rtrequest( 505 RTM_DELETE, rt, 506 &info); 507 IFAFREE(oifa); 508 } 509 IFAREF(ifa); 510 rt->rt_ifa = ifa; 511 rt->rt_ifp = info.rti_ifp; 512 } 513 } 514 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 515 &rt->rt_rmx); 516 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) 517 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); 518 if (info.rti_info[RTAX_GENMASK]) 519 rt->rt_genmask = info.rti_info[RTAX_GENMASK]; 520 /* FALLTHROUGH */ 521 case RTM_LOCK: 522 /* We don't support locks anymore */ 523 break; 524 } 525 RT_UNLOCK(rt); 526 break; 527 528 default: 529 senderr(EOPNOTSUPP); 530 } 531 532 flush: 533 if (rtm) { 534 if (error) 535 rtm->rtm_errno = error; 536 else 537 rtm->rtm_flags |= RTF_DONE; 538 } 539 if (rt) /* XXX can this be true? */ 540 RTFREE(rt); 541 { 542 struct rawcb *rp = NULL; 543 /* 544 * Check to see if we don't want our own messages. 545 */ 546 if ((so->so_options & SO_USELOOPBACK) == 0) { 547 if (route_cb.any_count <= 1) { 548 if (rtm) 549 Free(rtm); 550 m_freem(m); 551 return (error); 552 } 553 /* There is another listener, so construct message */ 554 rp = sotorawcb(so); 555 } 556 if (rtm) { 557 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); 558 if (m->m_pkthdr.len < rtm->rtm_msglen) { 559 m_freem(m); 560 m = NULL; 561 } else if (m->m_pkthdr.len > rtm->rtm_msglen) 562 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); 563 Free(rtm); 564 } 565 if (m) { 566 if (rp) { 567 /* 568 * XXX insure we don't get a copy by 569 * invalidating our protocol 570 */ 571 unsigned short family = rp->rcb_proto.sp_family; 572 rp->rcb_proto.sp_family = 0; 573 rt_dispatch(m, info.rti_info[RTAX_DST]); 574 rp->rcb_proto.sp_family = family; 575 } else 576 rt_dispatch(m, info.rti_info[RTAX_DST]); 577 } 578 } 579 return (error); 580 #undef sa_equal 581 } 582 583 static void 584 rt_setmetrics(u_long which, const struct rt_metrics *in, 585 struct rt_metrics_lite *out) 586 { 587 #define metric(f, e) if (which & (f)) out->e = in->e; 588 /* 589 * Only these are stored in the routing entry since introduction 590 * of tcp hostcache. The rest is ignored. 591 */ 592 metric(RTV_MTU, rmx_mtu); 593 metric(RTV_EXPIRE, rmx_expire); 594 #undef metric 595 } 596 597 static void 598 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out) 599 { 600 #define metric(e) out->e = in->e; 601 bzero(out, sizeof(*out)); 602 metric(rmx_mtu); 603 metric(rmx_expire); 604 #undef metric 605 } 606 607 /* 608 * Extract the addresses of the passed sockaddrs. 609 * Do a little sanity checking so as to avoid bad memory references. 610 * This data is derived straight from userland. 611 */ 612 static int 613 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 614 { 615 struct sockaddr *sa; 616 int i; 617 618 for (i = 0; i < RTAX_MAX && cp < cplim; i++) { 619 if ((rtinfo->rti_addrs & (1 << i)) == 0) 620 continue; 621 sa = (struct sockaddr *)cp; 622 /* 623 * It won't fit. 624 */ 625 if (cp + sa->sa_len > cplim) 626 return (EINVAL); 627 /* 628 * there are no more.. quit now 629 * If there are more bits, they are in error. 630 * I've seen this. route(1) can evidently generate these. 631 * This causes kernel to core dump. 632 * for compatibility, If we see this, point to a safe address. 633 */ 634 if (sa->sa_len == 0) { 635 rtinfo->rti_info[i] = &sa_zero; 636 return (0); /* should be EINVAL but for compat */ 637 } 638 /* accept it */ 639 rtinfo->rti_info[i] = sa; 640 cp += SA_SIZE(sa); 641 } 642 return (0); 643 } 644 645 static struct mbuf * 646 rt_msg1(int type, struct rt_addrinfo *rtinfo) 647 { 648 struct rt_msghdr *rtm; 649 struct mbuf *m; 650 int i; 651 struct sockaddr *sa; 652 int len, dlen; 653 654 switch (type) { 655 656 case RTM_DELADDR: 657 case RTM_NEWADDR: 658 len = sizeof(struct ifa_msghdr); 659 break; 660 661 case RTM_DELMADDR: 662 case RTM_NEWMADDR: 663 len = sizeof(struct ifma_msghdr); 664 break; 665 666 case RTM_IFINFO: 667 len = sizeof(struct if_msghdr); 668 break; 669 670 case RTM_IFANNOUNCE: 671 len = sizeof(struct if_announcemsghdr); 672 break; 673 674 default: 675 len = sizeof(struct rt_msghdr); 676 } 677 if (len > MCLBYTES) 678 panic("rt_msg1"); 679 m = m_gethdr(M_DONTWAIT, MT_DATA); 680 if (m && len > MHLEN) { 681 MCLGET(m, M_DONTWAIT); 682 if ((m->m_flags & M_EXT) == 0) { 683 m_free(m); 684 m = NULL; 685 } 686 } 687 if (m == NULL) 688 return (m); 689 m->m_pkthdr.len = m->m_len = len; 690 m->m_pkthdr.rcvif = NULL; 691 rtm = mtod(m, struct rt_msghdr *); 692 bzero((caddr_t)rtm, len); 693 for (i = 0; i < RTAX_MAX; i++) { 694 if ((sa = rtinfo->rti_info[i]) == NULL) 695 continue; 696 rtinfo->rti_addrs |= (1 << i); 697 dlen = SA_SIZE(sa); 698 m_copyback(m, len, dlen, (caddr_t)sa); 699 len += dlen; 700 } 701 if (m->m_pkthdr.len != len) { 702 m_freem(m); 703 return (NULL); 704 } 705 rtm->rtm_msglen = len; 706 rtm->rtm_version = RTM_VERSION; 707 rtm->rtm_type = type; 708 return (m); 709 } 710 711 static int 712 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) 713 { 714 int i; 715 int len, dlen, second_time = 0; 716 caddr_t cp0; 717 718 rtinfo->rti_addrs = 0; 719 again: 720 switch (type) { 721 722 case RTM_DELADDR: 723 case RTM_NEWADDR: 724 len = sizeof(struct ifa_msghdr); 725 break; 726 727 case RTM_IFINFO: 728 len = sizeof(struct if_msghdr); 729 break; 730 731 case RTM_NEWMADDR: 732 len = sizeof(struct ifma_msghdr); 733 break; 734 735 default: 736 len = sizeof(struct rt_msghdr); 737 } 738 cp0 = cp; 739 if (cp0) 740 cp += len; 741 for (i = 0; i < RTAX_MAX; i++) { 742 struct sockaddr *sa; 743 744 if ((sa = rtinfo->rti_info[i]) == NULL) 745 continue; 746 rtinfo->rti_addrs |= (1 << i); 747 dlen = SA_SIZE(sa); 748 if (cp) { 749 bcopy((caddr_t)sa, cp, (unsigned)dlen); 750 cp += dlen; 751 } 752 len += dlen; 753 } 754 len = ALIGN(len); 755 if (cp == NULL && w != NULL && !second_time) { 756 struct walkarg *rw = w; 757 758 if (rw->w_req) { 759 if (rw->w_tmemsize < len) { 760 if (rw->w_tmem) 761 free(rw->w_tmem, M_RTABLE); 762 rw->w_tmem = (caddr_t) 763 malloc(len, M_RTABLE, M_NOWAIT); 764 if (rw->w_tmem) 765 rw->w_tmemsize = len; 766 } 767 if (rw->w_tmem) { 768 cp = rw->w_tmem; 769 second_time = 1; 770 goto again; 771 } 772 } 773 } 774 if (cp) { 775 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 776 777 rtm->rtm_version = RTM_VERSION; 778 rtm->rtm_type = type; 779 rtm->rtm_msglen = len; 780 } 781 return (len); 782 } 783 784 /* 785 * This routine is called to generate a message from the routing 786 * socket indicating that a redirect has occured, a routing lookup 787 * has failed, or that a protocol has detected timeouts to a particular 788 * destination. 789 */ 790 void 791 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) 792 { 793 struct rt_msghdr *rtm; 794 struct mbuf *m; 795 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 796 797 if (route_cb.any_count == 0) 798 return; 799 m = rt_msg1(type, rtinfo); 800 if (m == NULL) 801 return; 802 rtm = mtod(m, struct rt_msghdr *); 803 rtm->rtm_flags = RTF_DONE | flags; 804 rtm->rtm_errno = error; 805 rtm->rtm_addrs = rtinfo->rti_addrs; 806 rt_dispatch(m, sa); 807 } 808 809 /* 810 * This routine is called to generate a message from the routing 811 * socket indicating that the status of a network interface has changed. 812 */ 813 void 814 rt_ifmsg(struct ifnet *ifp) 815 { 816 struct if_msghdr *ifm; 817 struct mbuf *m; 818 struct rt_addrinfo info; 819 820 if (route_cb.any_count == 0) 821 return; 822 bzero((caddr_t)&info, sizeof(info)); 823 m = rt_msg1(RTM_IFINFO, &info); 824 if (m == NULL) 825 return; 826 ifm = mtod(m, struct if_msghdr *); 827 ifm->ifm_index = ifp->if_index; 828 ifm->ifm_flags = ifp->if_flags; 829 ifm->ifm_data = ifp->if_data; 830 ifm->ifm_addrs = 0; 831 rt_dispatch(m, NULL); 832 } 833 834 /* 835 * This is called to generate messages from the routing socket 836 * indicating a network interface has had addresses associated with it. 837 * if we ever reverse the logic and replace messages TO the routing 838 * socket indicate a request to configure interfaces, then it will 839 * be unnecessary as the routing socket will automatically generate 840 * copies of it. 841 */ 842 void 843 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) 844 { 845 struct rt_addrinfo info; 846 struct sockaddr *sa = NULL; 847 int pass; 848 struct mbuf *m = NULL; 849 struct ifnet *ifp = ifa->ifa_ifp; 850 851 if (route_cb.any_count == 0) 852 return; 853 for (pass = 1; pass < 3; pass++) { 854 bzero((caddr_t)&info, sizeof(info)); 855 if ((cmd == RTM_ADD && pass == 1) || 856 (cmd == RTM_DELETE && pass == 2)) { 857 struct ifa_msghdr *ifam; 858 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; 859 860 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; 861 info.rti_info[RTAX_IFP] = 862 ifaddr_byindex(ifp->if_index)->ifa_addr; 863 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 864 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 865 if ((m = rt_msg1(ncmd, &info)) == NULL) 866 continue; 867 ifam = mtod(m, struct ifa_msghdr *); 868 ifam->ifam_index = ifp->if_index; 869 ifam->ifam_metric = ifa->ifa_metric; 870 ifam->ifam_flags = ifa->ifa_flags; 871 ifam->ifam_addrs = info.rti_addrs; 872 } 873 if ((cmd == RTM_ADD && pass == 2) || 874 (cmd == RTM_DELETE && pass == 1)) { 875 struct rt_msghdr *rtm; 876 877 if (rt == NULL) 878 continue; 879 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 880 info.rti_info[RTAX_DST] = sa = rt_key(rt); 881 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 882 if ((m = rt_msg1(cmd, &info)) == NULL) 883 continue; 884 rtm = mtod(m, struct rt_msghdr *); 885 rtm->rtm_index = ifp->if_index; 886 rtm->rtm_flags |= rt->rt_flags; 887 rtm->rtm_errno = error; 888 rtm->rtm_addrs = info.rti_addrs; 889 } 890 rt_dispatch(m, sa); 891 } 892 } 893 894 /* 895 * This is the analogue to the rt_newaddrmsg which performs the same 896 * function but for multicast group memberhips. This is easier since 897 * there is no route state to worry about. 898 */ 899 void 900 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) 901 { 902 struct rt_addrinfo info; 903 struct mbuf *m = NULL; 904 struct ifnet *ifp = ifma->ifma_ifp; 905 struct ifma_msghdr *ifmam; 906 907 if (route_cb.any_count == 0) 908 return; 909 910 bzero((caddr_t)&info, sizeof(info)); 911 info.rti_info[RTAX_IFA] = ifma->ifma_addr; 912 info.rti_info[RTAX_IFP] = 913 ifp ? ifaddr_byindex(ifp->if_index)->ifa_addr : NULL; 914 /* 915 * If a link-layer address is present, present it as a ``gateway'' 916 * (similarly to how ARP entries, e.g., are presented). 917 */ 918 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; 919 m = rt_msg1(cmd, &info); 920 if (m == NULL) 921 return; 922 ifmam = mtod(m, struct ifma_msghdr *); 923 ifmam->ifmam_index = ifp->if_index; 924 ifmam->ifmam_addrs = info.rti_addrs; 925 rt_dispatch(m, ifma->ifma_addr); 926 } 927 928 /* 929 * This is called to generate routing socket messages indicating 930 * network interface arrival and departure. 931 */ 932 void 933 rt_ifannouncemsg(struct ifnet *ifp, int what) 934 { 935 struct if_announcemsghdr *ifan; 936 struct mbuf *m; 937 struct rt_addrinfo info; 938 939 if (route_cb.any_count == 0) 940 return; 941 bzero((caddr_t)&info, sizeof(info)); 942 m = rt_msg1(RTM_IFANNOUNCE, &info); 943 if (m == NULL) 944 return; 945 ifan = mtod(m, struct if_announcemsghdr *); 946 ifan->ifan_index = ifp->if_index; 947 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 948 ifan->ifan_what = what; 949 rt_dispatch(m, NULL); 950 } 951 952 static void 953 rt_dispatch(struct mbuf *m, const struct sockaddr *sa) 954 { 955 unsigned short *family; 956 struct m_tag *tag; 957 958 /* 959 * Preserve the family from the sockaddr, if any, in an m_tag for 960 * use when injecting the mbuf into the routing socket buffer from 961 * the netisr. 962 */ 963 if (sa != NULL) { 964 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), 965 M_NOWAIT); 966 if (tag == NULL) { 967 m_freem(m); 968 return; 969 } 970 family = (unsigned short *)(tag + 1); 971 *family = sa ? sa->sa_family : 0; 972 m_tag_prepend(m, tag); 973 } 974 netisr_queue(NETISR_ROUTE, m); 975 } 976 977 /* 978 * This is used in dumping the kernel table via sysctl(). 979 */ 980 static int 981 sysctl_dumpentry(struct radix_node *rn, void *vw) 982 { 983 struct walkarg *w = vw; 984 struct rtentry *rt = (struct rtentry *)rn; 985 int error = 0, size; 986 struct rt_addrinfo info; 987 988 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 989 return 0; 990 bzero((caddr_t)&info, sizeof(info)); 991 info.rti_info[RTAX_DST] = rt_key(rt); 992 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 993 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 994 info.rti_info[RTAX_GENMASK] = rt->rt_genmask; 995 if (rt->rt_ifp) { 996 info.rti_info[RTAX_IFP] = 997 ifaddr_byindex(rt->rt_ifp->if_index)->ifa_addr; 998 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 999 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) 1000 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1001 } 1002 size = rt_msg2(RTM_GET, &info, NULL, w); 1003 if (w->w_req && w->w_tmem) { 1004 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1005 1006 rtm->rtm_flags = rt->rt_flags; 1007 rtm->rtm_use = rt->rt_rmx.rmx_pksent; 1008 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1009 rtm->rtm_index = rt->rt_ifp->if_index; 1010 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; 1011 rtm->rtm_addrs = info.rti_addrs; 1012 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); 1013 return (error); 1014 } 1015 return (error); 1016 } 1017 1018 static int 1019 sysctl_iflist(int af, struct walkarg *w) 1020 { 1021 struct ifnet *ifp; 1022 struct ifaddr *ifa; 1023 struct rt_addrinfo info; 1024 int len, error = 0; 1025 1026 bzero((caddr_t)&info, sizeof(info)); 1027 /* IFNET_RLOCK(); */ /* could sleep XXX */ 1028 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1029 if (w->w_arg && w->w_arg != ifp->if_index) 1030 continue; 1031 ifa = ifaddr_byindex(ifp->if_index); 1032 info.rti_info[RTAX_IFP] = ifa->ifa_addr; 1033 len = rt_msg2(RTM_IFINFO, &info, NULL, w); 1034 info.rti_info[RTAX_IFP] = NULL; 1035 if (w->w_req && w->w_tmem) { 1036 struct if_msghdr *ifm; 1037 1038 ifm = (struct if_msghdr *)w->w_tmem; 1039 ifm->ifm_index = ifp->if_index; 1040 ifm->ifm_flags = ifp->if_flags; 1041 ifm->ifm_data = ifp->if_data; 1042 ifm->ifm_addrs = info.rti_addrs; 1043 error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len); 1044 if (error) 1045 goto done; 1046 } 1047 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { 1048 if (af && af != ifa->ifa_addr->sa_family) 1049 continue; 1050 if (jailed(curthread->td_ucred) && 1051 prison_if(curthread->td_ucred, ifa->ifa_addr)) 1052 continue; 1053 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1054 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1055 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1056 len = rt_msg2(RTM_NEWADDR, &info, NULL, w); 1057 if (w->w_req && w->w_tmem) { 1058 struct ifa_msghdr *ifam; 1059 1060 ifam = (struct ifa_msghdr *)w->w_tmem; 1061 ifam->ifam_index = ifa->ifa_ifp->if_index; 1062 ifam->ifam_flags = ifa->ifa_flags; 1063 ifam->ifam_metric = ifa->ifa_metric; 1064 ifam->ifam_addrs = info.rti_addrs; 1065 error = SYSCTL_OUT(w->w_req, w->w_tmem, len); 1066 if (error) 1067 goto done; 1068 } 1069 } 1070 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1071 info.rti_info[RTAX_BRD] = NULL; 1072 } 1073 done: 1074 /* IFNET_RUNLOCK(); */ /* XXX */ 1075 return (error); 1076 } 1077 1078 int 1079 sysctl_ifmalist(int af, struct walkarg *w) 1080 { 1081 struct ifnet *ifp; 1082 struct ifmultiaddr *ifma; 1083 struct rt_addrinfo info; 1084 int len, error = 0; 1085 struct ifaddr *ifa; 1086 1087 bzero((caddr_t)&info, sizeof(info)); 1088 /* IFNET_RLOCK(); */ /* could sleep XXX */ 1089 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1090 if (w->w_arg && w->w_arg != ifp->if_index) 1091 continue; 1092 ifa = ifaddr_byindex(ifp->if_index); 1093 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; 1094 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1095 if (af && af != ifma->ifma_addr->sa_family) 1096 continue; 1097 if (jailed(curproc->p_ucred) && 1098 prison_if(curproc->p_ucred, ifma->ifma_addr)) 1099 continue; 1100 info.rti_info[RTAX_IFA] = ifma->ifma_addr; 1101 info.rti_info[RTAX_GATEWAY] = 1102 (ifma->ifma_addr->sa_family != AF_LINK) ? 1103 ifma->ifma_lladdr : NULL; 1104 len = rt_msg2(RTM_NEWMADDR, &info, NULL, w); 1105 if (w->w_req && w->w_tmem) { 1106 struct ifma_msghdr *ifmam; 1107 1108 ifmam = (struct ifma_msghdr *)w->w_tmem; 1109 ifmam->ifmam_index = ifma->ifma_ifp->if_index; 1110 ifmam->ifmam_flags = 0; 1111 ifmam->ifmam_addrs = info.rti_addrs; 1112 error = SYSCTL_OUT(w->w_req, w->w_tmem, len); 1113 if (error) 1114 goto done; 1115 } 1116 } 1117 } 1118 done: 1119 /* IFNET_RUNLOCK(); */ /* XXX */ 1120 return (error); 1121 } 1122 1123 static int 1124 sysctl_rtsock(SYSCTL_HANDLER_ARGS) 1125 { 1126 int *name = (int *)arg1; 1127 u_int namelen = arg2; 1128 struct radix_node_head *rnh; 1129 int i, lim, s, error = EINVAL; 1130 u_char af; 1131 struct walkarg w; 1132 1133 name ++; 1134 namelen--; 1135 if (req->newptr) 1136 return (EPERM); 1137 if (namelen != 3) 1138 return ((namelen < 3) ? EISDIR : ENOTDIR); 1139 af = name[0]; 1140 if (af > AF_MAX) 1141 return (EINVAL); 1142 bzero(&w, sizeof(w)); 1143 w.w_op = name[1]; 1144 w.w_arg = name[2]; 1145 w.w_req = req; 1146 1147 s = splnet(); 1148 switch (w.w_op) { 1149 1150 case NET_RT_DUMP: 1151 case NET_RT_FLAGS: 1152 if (af == 0) { /* dump all tables */ 1153 i = 1; 1154 lim = AF_MAX; 1155 } else /* dump only one table */ 1156 i = lim = af; 1157 for (error = 0; error == 0 && i <= lim; i++) 1158 if ((rnh = rt_tables[i]) != NULL) { 1159 /* RADIX_NODE_HEAD_LOCK(rnh); */ 1160 error = rnh->rnh_walktree(rnh, 1161 sysctl_dumpentry, &w);/* could sleep XXX */ 1162 /* RADIX_NODE_HEAD_UNLOCK(rnh); */ 1163 } else if (af != 0) 1164 error = EAFNOSUPPORT; 1165 break; 1166 1167 case NET_RT_IFLIST: 1168 error = sysctl_iflist(af, &w); 1169 break; 1170 1171 case NET_RT_IFMALIST: 1172 error = sysctl_ifmalist(af, &w); 1173 break; 1174 } 1175 splx(s); 1176 if (w.w_tmem) 1177 free(w.w_tmem, M_RTABLE); 1178 return (error); 1179 } 1180 1181 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); 1182 1183 /* 1184 * Definitions of protocols supported in the ROUTE domain. 1185 */ 1186 1187 extern struct domain routedomain; /* or at least forward */ 1188 1189 static struct protosw routesw[] = { 1190 { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, 1191 0, route_output, raw_ctlinput, 0, 1192 0, 1193 raw_init, 0, 0, 0, 1194 &route_usrreqs 1195 } 1196 }; 1197 1198 static struct domain routedomain = 1199 { PF_ROUTE, "route", 0, 0, 0, 1200 routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] }; 1201 1202 DOMAIN_SET(route); 1203