1 /* 2 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright (c) 1988, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 38 */ 39 40 /* 41 * This file contains routines that processes routing socket requests. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/stream.h> 46 #include <sys/stropts.h> 47 #include <sys/ddi.h> 48 #include <sys/strsubr.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/policy.h> 52 #include <sys/zone.h> 53 54 #include <sys/systm.h> 55 #include <sys/param.h> 56 #include <sys/socket.h> 57 #include <sys/strsun.h> 58 #include <net/if.h> 59 #include <net/route.h> 60 #include <netinet/in.h> 61 #include <net/if_dl.h> 62 #include <netinet/ip6.h> 63 64 #include <inet/common.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_if.h> 68 #include <inet/ip_ire.h> 69 #include <inet/ip_ftable.h> 70 #include <inet/ip_rts.h> 71 72 #include <inet/ipclassifier.h> 73 74 #include <sys/tsol/tndb.h> 75 #include <sys/tsol/tnet.h> 76 77 #define RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \ 78 (rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type)) 79 80 static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp); 81 static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, 82 ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, 83 ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, 84 const tsol_gc_t *); 85 static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, 86 in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp, 87 in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp, 88 sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error); 89 static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif); 90 static int rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics); 91 static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, 92 const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af); 93 static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics); 94 static ire_t *ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, 95 ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid, 96 const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, 97 ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp); 98 static ire_t *ire_lookup_v6(const in6_addr_t *dst_addr_v6, 99 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, 100 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, 101 ip_stack_t *ipst, ire_t **pifire, 102 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp); 103 104 /* 105 * Send `mp' to all eligible routing queues. A queue is ineligible if: 106 * 107 * 1. SO_USELOOPBACK is off and it is not the originating queue. 108 * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'. 109 * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'. 110 * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC. 111 */ 112 void 113 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, 114 ip_stack_t *ipst) 115 { 116 mblk_t *mp1; 117 conn_t *connp, *next_connp; 118 119 /* 120 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be 121 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point. 122 */ 123 ASSERT(!(flags & RTSQ_DEFAULT)); 124 125 mutex_enter(&ipst->ips_rts_clients->connf_lock); 126 connp = ipst->ips_rts_clients->connf_head; 127 128 for (; connp != NULL; connp = next_connp) { 129 next_connp = connp->conn_next; 130 /* 131 * If there was a family specified when this routing socket was 132 * created and it doesn't match the family of the message to 133 * copy, then continue. 134 */ 135 if ((connp->conn_proto != AF_UNSPEC) && 136 (connp->conn_proto != af)) 137 continue; 138 139 /* 140 * Queue the message only if the conn_t and flags match. 141 */ 142 if (connp->conn_rtaware & RTAW_UNDER_IPMP) { 143 if (!(flags & RTSQ_UNDER_IPMP)) 144 continue; 145 } else { 146 if (!(flags & RTSQ_NORMAL)) 147 continue; 148 } 149 /* 150 * For the originating queue, we only copy the message upstream 151 * if loopback is set. For others reading on the routing 152 * socket, we check if there is room upstream for a copy of the 153 * message. 154 */ 155 if ((o_connp == connp) && connp->conn_useloopback == 0) { 156 connp = connp->conn_next; 157 continue; 158 } 159 CONN_INC_REF(connp); 160 mutex_exit(&ipst->ips_rts_clients->connf_lock); 161 /* Pass to rts_input */ 162 if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld : 163 canputnext(connp->conn_rq)) { 164 mp1 = dupmsg(mp); 165 if (mp1 == NULL) 166 mp1 = copymsg(mp); 167 /* Note that we pass a NULL ira to rts_input */ 168 if (mp1 != NULL) 169 (connp->conn_recv)(connp, mp1, NULL, NULL); 170 } 171 172 mutex_enter(&ipst->ips_rts_clients->connf_lock); 173 /* reload next_connp since conn_next may have changed */ 174 next_connp = connp->conn_next; 175 CONN_DEC_REF(connp); 176 } 177 mutex_exit(&ipst->ips_rts_clients->connf_lock); 178 freemsg(mp); 179 } 180 181 /* 182 * Takes an ire and sends an ack to all the routing sockets. This 183 * routine is used 184 * - when a route is created/deleted through the ioctl interface. 185 * - when a stale redirect is deleted 186 */ 187 void 188 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) 189 { 190 mblk_t *mp; 191 rt_msghdr_t *rtm; 192 int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY); 193 sa_family_t af; 194 in6_addr_t gw_addr_v6; 195 196 if (ire == NULL) 197 return; 198 ASSERT(ire->ire_ipversion == IPV4_VERSION || 199 ire->ire_ipversion == IPV6_VERSION); 200 201 ASSERT(!(ire->ire_type & IRE_IF_CLONE)); 202 203 if (ire->ire_flags & RTF_SETSRC) 204 rtm_addrs |= RTA_SRC; 205 206 switch (ire->ire_ipversion) { 207 case IPV4_VERSION: 208 af = AF_INET; 209 mp = rts_alloc_msg(type, rtm_addrs, af, 0); 210 if (mp == NULL) 211 return; 212 rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask, 213 ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL, 214 mp, NULL); 215 break; 216 case IPV6_VERSION: 217 af = AF_INET6; 218 mp = rts_alloc_msg(type, rtm_addrs, af, 0); 219 if (mp == NULL) 220 return; 221 mutex_enter(&ire->ire_lock); 222 gw_addr_v6 = ire->ire_gateway_addr_v6; 223 mutex_exit(&ire->ire_lock); 224 rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6, 225 &ire->ire_mask_v6, &gw_addr_v6, 226 &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros, 227 &ipv6_all_zeros, NULL, mp, NULL); 228 break; 229 } 230 rtm = (rt_msghdr_t *)mp->b_rptr; 231 mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen]; 232 rtm->rtm_addrs = rtm_addrs; 233 rtm->rtm_flags = ire->ire_flags; 234 if (error != 0) 235 rtm->rtm_errno = error; 236 else 237 rtm->rtm_flags |= RTF_DONE; 238 rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst); 239 } 240 241 /* 242 * This is a call from the RTS module 243 * indicating that this is a Routing Socket 244 * Stream. Insert this conn_t in routing 245 * socket client list. 246 */ 247 void 248 ip_rts_register(conn_t *connp) 249 { 250 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 251 252 connp->conn_useloopback = 1; 253 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); 254 } 255 256 /* 257 * This is a call from the RTS module indicating that it is closing. 258 */ 259 void 260 ip_rts_unregister(conn_t *connp) 261 { 262 ipcl_hash_remove(connp); 263 } 264 265 /* 266 * Processes requests received on a routing socket. It extracts all the 267 * arguments and calls the appropriate function to process the request. 268 * 269 * RTA_SRC bit flag requests are sent by 'route -setsrc'. 270 * 271 * In general, this function does not consume the message supplied but rather 272 * sends the message upstream with an appropriate UNIX errno. 273 */ 274 int 275 ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr) 276 { 277 rt_msghdr_t *rtm = NULL; 278 in6_addr_t dst_addr_v6; 279 in6_addr_t src_addr_v6; 280 in6_addr_t gw_addr_v6; 281 in6_addr_t net_mask_v6; 282 in6_addr_t author_v6; 283 in6_addr_t if_addr_v6; 284 mblk_t *mp1; 285 ire_t *ire = NULL; 286 ire_t *ifire = NULL; 287 ipaddr_t v4setsrc; 288 in6_addr_t v6setsrc = ipv6_all_zeros; 289 tsol_ire_gw_secattr_t *gwattr = NULL; 290 int error = 0; 291 int match_flags = MATCH_IRE_DSTONLY; 292 int match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW; 293 int found_addrs; 294 sa_family_t af; 295 ipaddr_t dst_addr; 296 ipaddr_t gw_addr; 297 ipaddr_t src_addr; 298 ipaddr_t net_mask; 299 ushort_t index; 300 boolean_t gcgrp_xtraref = B_FALSE; 301 tsol_gcgrp_addr_t ga; 302 tsol_rtsecattr_t rtsecattr; 303 struct rtsa_s *rtsap = NULL; 304 tsol_gcgrp_t *gcgrp = NULL; 305 tsol_gc_t *gc = NULL; 306 ts_label_t *tsl = NULL; 307 zoneid_t zoneid; 308 ip_stack_t *ipst; 309 ill_t *ill = NULL; 310 311 zoneid = connp->conn_zoneid; 312 ipst = connp->conn_netstack->netstack_ip; 313 314 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { 315 freemsg(mp); 316 error = EINVAL; 317 goto done; 318 } 319 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 320 freemsg(mp); 321 error = EINVAL; 322 goto done; 323 } 324 325 /* 326 * Check the routing message for basic consistency including the 327 * version number and that the number of octets written is the same 328 * as specified by the rtm_msglen field. 329 * 330 * At this point, an error can be delivered back via rtm_errno. 331 */ 332 rtm = (rt_msghdr_t *)mp->b_rptr; 333 if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) { 334 error = EINVAL; 335 goto done; 336 } 337 if (rtm->rtm_version != RTM_VERSION) { 338 error = EPROTONOSUPPORT; 339 goto done; 340 } 341 342 /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */ 343 if (rtm->rtm_type != RTM_GET && 344 rtm->rtm_type != RTM_RESOLVE && 345 (ioc_cr == NULL || 346 secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) { 347 error = EPERM; 348 goto done; 349 } 350 351 found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6, 352 &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr, 353 &error); 354 355 if (error != 0) 356 goto done; 357 358 if ((found_addrs & RTA_DST) == 0) { 359 error = EINVAL; 360 goto done; 361 } 362 363 /* 364 * Based on the address family of the destination address, determine 365 * the destination, gateway and netmask and return the appropriate error 366 * if an unknown address family was specified (following the errno 367 * values that 4.4BSD-Lite2 returns.) 368 */ 369 switch (af) { 370 case AF_INET: 371 IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr); 372 IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr); 373 IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr); 374 if (((found_addrs & RTA_NETMASK) == 0) || 375 (rtm->rtm_flags & RTF_HOST)) 376 net_mask = IP_HOST_MASK; 377 else 378 IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask); 379 break; 380 case AF_INET6: 381 if (((found_addrs & RTA_NETMASK) == 0) || 382 (rtm->rtm_flags & RTF_HOST)) 383 net_mask_v6 = ipv6_all_ones; 384 break; 385 default: 386 /* 387 * These errno values are meant to be compatible with 388 * 4.4BSD-Lite2 for the given message types. 389 */ 390 switch (rtm->rtm_type) { 391 case RTM_ADD: 392 case RTM_DELETE: 393 error = ESRCH; 394 goto done; 395 case RTM_GET: 396 case RTM_CHANGE: 397 error = EAFNOSUPPORT; 398 goto done; 399 default: 400 error = EOPNOTSUPP; 401 goto done; 402 } 403 } 404 405 /* 406 * At this point, the address family must be something known. 407 */ 408 ASSERT(af == AF_INET || af == AF_INET6); 409 410 /* Handle RTA_IFP */ 411 if (index != 0) { 412 ipif_t *ipif; 413 lookup: 414 ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst); 415 if (ill == NULL) { 416 error = EINVAL; 417 goto done; 418 } 419 420 /* 421 * Since all interfaces in an IPMP group must be equivalent, 422 * we prevent changes to a specific underlying interface's 423 * routing configuration. However, for backward compatibility, 424 * we intepret a request to add a route on an underlying 425 * interface as a request to add a route on its IPMP interface. 426 */ 427 if (IS_UNDER_IPMP(ill)) { 428 switch (rtm->rtm_type) { 429 case RTM_CHANGE: 430 case RTM_DELETE: 431 error = EINVAL; 432 goto done; 433 case RTM_ADD: 434 index = ipmp_ill_get_ipmp_ifindex(ill); 435 ill_refrele(ill); 436 if (index == 0) { 437 ill = NULL; /* already refrele'd */ 438 error = EINVAL; 439 goto done; 440 } 441 goto lookup; 442 } 443 } 444 445 match_flags |= MATCH_IRE_ILL; 446 /* 447 * This provides the same zoneid as in Solaris 10 448 * that -ifp picks the zoneid from the first ipif on the ill. 449 * But it might not be useful since the first ipif will always 450 * have the same zoneid as the ill. 451 */ 452 ipif = ipif_get_next_ipif(NULL, ill); 453 if (ipif != NULL) { 454 zoneid = ipif->ipif_zoneid; 455 ipif_refrele(ipif); 456 } 457 } 458 459 /* 460 * If a netmask was supplied in the message, then subsequent route 461 * lookups will attempt to match on the netmask as well. 462 */ 463 if ((found_addrs & RTA_NETMASK) != 0) 464 match_flags |= MATCH_IRE_MASK; 465 466 /* 467 * We only process any passed-in route security attributes for 468 * either RTM_ADD or RTM_CHANGE message; We overload them 469 * to do an RTM_GET as a different label; ignore otherwise. 470 */ 471 if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE || 472 rtm->rtm_type == RTM_GET) { 473 ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); 474 if (rtsecattr.rtsa_cnt > 0) 475 rtsap = &rtsecattr.rtsa_attr[0]; 476 } 477 478 switch (rtm->rtm_type) { 479 case RTM_ADD: 480 /* if we are adding a route, gateway is a must */ 481 if ((found_addrs & RTA_GATEWAY) == 0) { 482 error = EINVAL; 483 goto done; 484 } 485 486 /* Multirouting does not support net routes. */ 487 if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) == 488 RTF_MULTIRT) { 489 error = EADDRNOTAVAIL; 490 goto done; 491 } 492 493 /* 494 * Multirouting and user-specified source addresses 495 * do not support interface based routing. 496 * Assigning a source address to an interface based 497 * route is achievable by plumbing a new ipif and 498 * setting up the interface route via this ipif, 499 * though. 500 */ 501 if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) { 502 if ((rtm->rtm_flags & RTF_GATEWAY) == 0) { 503 error = EADDRNOTAVAIL; 504 goto done; 505 } 506 } 507 508 switch (af) { 509 case AF_INET: 510 if (src_addr != INADDR_ANY) { 511 uint_t type; 512 513 /* 514 * The RTF_SETSRC flag is present, check that 515 * the supplied src address is not the loopback 516 * address. This would produce martian packets. 517 */ 518 if (src_addr == htonl(INADDR_LOOPBACK)) { 519 error = EINVAL; 520 goto done; 521 } 522 /* 523 * Also check that the supplied address is a 524 * valid, local one. Only allow IFF_UP ones 525 */ 526 type = ip_type_v4(src_addr, ipst); 527 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { 528 error = EADDRNOTAVAIL; 529 goto done; 530 } 531 } else { 532 /* 533 * The RTF_SETSRC modifier must be associated 534 * to a non-null source address. 535 */ 536 if (rtm->rtm_flags & RTF_SETSRC) { 537 error = EINVAL; 538 goto done; 539 } 540 } 541 542 error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr, 543 rtm->rtm_flags, ill, &ire, B_FALSE, 544 rtsap, ipst, zoneid); 545 if (ill != NULL) 546 ASSERT(!MUTEX_HELD(&ill->ill_lock)); 547 break; 548 case AF_INET6: 549 if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) { 550 uint_t type; 551 552 /* 553 * The RTF_SETSRC flag is present, check that 554 * the supplied src address is not the loopback 555 * address. This would produce martian packets. 556 */ 557 if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) { 558 error = EINVAL; 559 goto done; 560 } 561 /* 562 * Also check that the supplied address is a 563 * valid, local one. Only allow UP ones. 564 */ 565 type = ip_type_v6(&src_addr_v6, ipst); 566 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { 567 error = EADDRNOTAVAIL; 568 goto done; 569 } 570 571 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 572 &gw_addr_v6, &src_addr_v6, rtm->rtm_flags, 573 ill, &ire, rtsap, ipst, zoneid); 574 break; 575 } 576 /* 577 * The RTF_SETSRC modifier must be associated 578 * to a non-null source address. 579 */ 580 if (rtm->rtm_flags & RTF_SETSRC) { 581 error = EINVAL; 582 goto done; 583 } 584 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 585 &gw_addr_v6, NULL, rtm->rtm_flags, 586 ill, &ire, rtsap, ipst, zoneid); 587 if (ill != NULL) 588 ASSERT(!MUTEX_HELD(&ill->ill_lock)); 589 break; 590 } 591 if (error != 0) 592 goto done; 593 ASSERT(ire != NULL); 594 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 595 break; 596 case RTM_DELETE: 597 /* if we are deleting a route, gateway is a must */ 598 if ((found_addrs & RTA_GATEWAY) == 0) { 599 error = EINVAL; 600 goto done; 601 } 602 /* 603 * The RTF_SETSRC modifier does not make sense 604 * when deleting a route. 605 */ 606 if (rtm->rtm_flags & RTF_SETSRC) { 607 error = EINVAL; 608 goto done; 609 } 610 611 switch (af) { 612 case AF_INET: 613 error = ip_rt_delete(dst_addr, net_mask, gw_addr, 614 found_addrs, rtm->rtm_flags, ill, B_FALSE, 615 ipst, zoneid); 616 break; 617 case AF_INET6: 618 error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6, 619 &gw_addr_v6, found_addrs, rtm->rtm_flags, ill, 620 ipst, zoneid); 621 break; 622 } 623 break; 624 case RTM_GET: 625 case RTM_CHANGE: 626 /* 627 * In the case of RTM_GET, the forwarding table should be 628 * searched recursively. Also, if a gateway was 629 * specified then the gateway address must also be matched. 630 * 631 * In the case of RTM_CHANGE, the gateway address (if supplied) 632 * is the new gateway address so matching on the gateway address 633 * is not done. This can lead to ambiguity when looking up the 634 * route to change as usually only the destination (and netmask, 635 * if supplied) is used for the lookup. However if a RTA_IFP 636 * sockaddr is also supplied, it can disambiguate which route to 637 * change provided the ambigous routes are tied to distinct 638 * ill's (or interface indices). If the routes are not tied to 639 * any particular interfaces (for example, with traditional 640 * gateway routes), then a RTA_IFP sockaddr will be of no use as 641 * it won't match any such routes. 642 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE, 643 * except when RTM_CHANGE is combined to RTF_SETSRC. 644 */ 645 if (((found_addrs & RTA_SRC) != 0) && 646 ((rtm->rtm_type == RTM_GET) || 647 !(rtm->rtm_flags & RTF_SETSRC))) { 648 error = EOPNOTSUPP; 649 goto done; 650 } 651 652 if (rtm->rtm_type == RTM_GET) { 653 match_flags |= MATCH_IRE_SECATTR; 654 match_flags_local |= MATCH_IRE_SECATTR; 655 if ((found_addrs & RTA_GATEWAY) != 0) 656 match_flags |= MATCH_IRE_GW; 657 if (ioc_cr) 658 tsl = crgetlabel(ioc_cr); 659 if (rtsap != NULL) { 660 if (rtsa_validate(rtsap) != 0) { 661 error = EINVAL; 662 goto done; 663 } 664 if (tsl != NULL && 665 crgetzoneid(ioc_cr) != GLOBAL_ZONEID && 666 (tsl->tsl_doi != rtsap->rtsa_doi || 667 !bldominates(&tsl->tsl_label, 668 &rtsap->rtsa_slrange.lower_bound))) { 669 error = EPERM; 670 goto done; 671 } 672 tsl = labelalloc( 673 &rtsap->rtsa_slrange.lower_bound, 674 rtsap->rtsa_doi, KM_NOSLEEP); 675 } 676 } 677 if (rtm->rtm_type == RTM_CHANGE) { 678 if ((found_addrs & RTA_GATEWAY) && 679 (rtm->rtm_flags & RTF_SETSRC)) { 680 /* 681 * Do not want to change the gateway, 682 * but rather the source address. 683 */ 684 match_flags |= MATCH_IRE_GW; 685 } 686 } 687 688 /* 689 * If the netmask is all ones (either as supplied or as derived 690 * above), then first check for an IRE_LOOPBACK or 691 * IRE_LOCAL entry. 692 * 693 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL 694 * entry, then look for any other type of IRE. 695 */ 696 switch (af) { 697 case AF_INET: 698 if (net_mask == IP_HOST_MASK) { 699 ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr, 700 IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid, 701 tsl, match_flags_local, 0, ipst, NULL); 702 } 703 if (ire == NULL) { 704 ire = ire_lookup_v4(dst_addr, net_mask, 705 gw_addr, ill, zoneid, tsl, match_flags, 706 ipst, &ifire, &v4setsrc, &gwattr); 707 IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc); 708 } 709 break; 710 case AF_INET6: 711 if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) { 712 ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL, 713 &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL, 714 zoneid, tsl, match_flags_local, 0, ipst, 715 NULL); 716 } 717 if (ire == NULL) { 718 ire = ire_lookup_v6(&dst_addr_v6, 719 &net_mask_v6, &gw_addr_v6, ill, zoneid, 720 tsl, match_flags, ipst, &ifire, &v6setsrc, 721 &gwattr); 722 } 723 break; 724 } 725 if (tsl != NULL && tsl != crgetlabel(ioc_cr)) 726 label_rele(tsl); 727 728 if (ire == NULL) { 729 error = ESRCH; 730 goto done; 731 } 732 /* 733 * Want to return failure if we get an IRE_NOROUTE from 734 * ire_route_recursive 735 */ 736 if (ire->ire_type & IRE_NOROUTE) { 737 ire_refrele(ire); 738 ire = NULL; 739 error = ESRCH; 740 goto done; 741 } 742 743 /* we know the IRE before we come here */ 744 switch (rtm->rtm_type) { 745 case RTM_GET: 746 mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af); 747 if (mp1 == NULL) { 748 error = ENOBUFS; 749 goto done; 750 } 751 freemsg(mp); 752 mp = mp1; 753 rtm = (rt_msghdr_t *)mp->b_rptr; 754 break; 755 case RTM_CHANGE: 756 /* 757 * Do not allow to the multirouting state of a route 758 * to be changed. This aims to prevent undesirable 759 * stages where both multirt and non-multirt routes 760 * for the same destination are declared. 761 */ 762 if ((ire->ire_flags & RTF_MULTIRT) != 763 (rtm->rtm_flags & RTF_MULTIRT)) { 764 error = EINVAL; 765 goto done; 766 } 767 /* 768 * Note that we do not need to do 769 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change 770 * in metrics or gateway will not affect existing 771 * routes since it does not create a more specific 772 * route. 773 */ 774 switch (af) { 775 case AF_INET: 776 if ((found_addrs & RTA_GATEWAY) != 0 && 777 (ire->ire_gateway_addr != gw_addr)) { 778 ire->ire_gateway_addr = gw_addr; 779 } 780 781 if (rtsap != NULL) { 782 ga.ga_af = AF_INET; 783 IN6_IPADDR_TO_V4MAPPED( 784 ire->ire_gateway_addr, &ga.ga_addr); 785 786 gcgrp = gcgrp_lookup(&ga, B_TRUE); 787 if (gcgrp == NULL) { 788 error = ENOMEM; 789 goto done; 790 } 791 } 792 793 if ((found_addrs & RTA_SRC) != 0 && 794 (rtm->rtm_flags & RTF_SETSRC) != 0 && 795 (ire->ire_setsrc_addr != src_addr)) { 796 if (src_addr != INADDR_ANY) { 797 uint_t type; 798 799 /* 800 * The RTF_SETSRC flag is 801 * present, check that the 802 * supplied src address is not 803 * the loopback address. This 804 * would produce martian 805 * packets. 806 */ 807 if (src_addr == 808 htonl(INADDR_LOOPBACK)) { 809 error = EINVAL; 810 goto done; 811 } 812 /* 813 * Also check that the 814 * supplied addr is a valid 815 * local address. 816 */ 817 type = ip_type_v4(src_addr, 818 ipst); 819 if (!(type & 820 (IRE_LOCAL|IRE_LOOPBACK))) { 821 error = EADDRNOTAVAIL; 822 goto done; 823 } 824 ire->ire_flags |= RTF_SETSRC; 825 ire->ire_setsrc_addr = 826 src_addr; 827 } else { 828 ire->ire_flags &= ~RTF_SETSRC; 829 ire->ire_setsrc_addr = 830 INADDR_ANY; 831 } 832 /* 833 * Let conn_ixa caching know that 834 * source address selection changed 835 */ 836 ip_update_source_selection(ipst); 837 } 838 ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE); 839 break; 840 case AF_INET6: 841 mutex_enter(&ire->ire_lock); 842 if ((found_addrs & RTA_GATEWAY) != 0 && 843 !IN6_ARE_ADDR_EQUAL( 844 &ire->ire_gateway_addr_v6, &gw_addr_v6)) { 845 ire->ire_gateway_addr_v6 = gw_addr_v6; 846 } 847 mutex_exit(&ire->ire_lock); 848 849 if (rtsap != NULL) { 850 ga.ga_af = AF_INET6; 851 mutex_enter(&ire->ire_lock); 852 ga.ga_addr = ire->ire_gateway_addr_v6; 853 mutex_exit(&ire->ire_lock); 854 855 gcgrp = gcgrp_lookup(&ga, B_TRUE); 856 if (gcgrp == NULL) { 857 error = ENOMEM; 858 goto done; 859 } 860 } 861 862 if ((found_addrs & RTA_SRC) != 0 && 863 (rtm->rtm_flags & RTF_SETSRC) != 0 && 864 !IN6_ARE_ADDR_EQUAL( 865 &ire->ire_setsrc_addr_v6, &src_addr_v6)) { 866 if (!IN6_IS_ADDR_UNSPECIFIED( 867 &src_addr_v6)) { 868 uint_t type; 869 870 /* 871 * The RTF_SETSRC flag is 872 * present, check that the 873 * supplied src address is not 874 * the loopback address. This 875 * would produce martian 876 * packets. 877 */ 878 if (IN6_IS_ADDR_LOOPBACK( 879 &src_addr_v6)) { 880 error = EINVAL; 881 goto done; 882 } 883 /* 884 * Also check that the 885 * supplied addr is a valid 886 * local address. 887 */ 888 type = ip_type_v6(&src_addr_v6, 889 ipst); 890 if (!(type & 891 (IRE_LOCAL|IRE_LOOPBACK))) { 892 error = EADDRNOTAVAIL; 893 goto done; 894 } 895 mutex_enter(&ire->ire_lock); 896 ire->ire_flags |= RTF_SETSRC; 897 ire->ire_setsrc_addr_v6 = 898 src_addr_v6; 899 mutex_exit(&ire->ire_lock); 900 } else { 901 mutex_enter(&ire->ire_lock); 902 ire->ire_flags &= ~RTF_SETSRC; 903 ire->ire_setsrc_addr_v6 = 904 ipv6_all_zeros; 905 mutex_exit(&ire->ire_lock); 906 } 907 /* 908 * Let conn_ixa caching know that 909 * source address selection changed 910 */ 911 ip_update_source_selection(ipst); 912 } 913 ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE); 914 break; 915 } 916 917 if (rtsap != NULL) { 918 ASSERT(gcgrp != NULL); 919 920 /* 921 * Create and add the security attribute to 922 * prefix IRE; it will add a reference to the 923 * group upon allocating a new entry. If it 924 * finds an already-existing entry for the 925 * security attribute, it simply returns it 926 * and no new group reference is made. 927 */ 928 gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref); 929 if (gc == NULL || 930 (error = tsol_ire_init_gwattr(ire, 931 ire->ire_ipversion, gc)) != 0) { 932 if (gc != NULL) { 933 GC_REFRELE(gc); 934 } else { 935 /* gc_create failed */ 936 error = ENOMEM; 937 } 938 goto done; 939 } 940 } 941 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 942 break; 943 } 944 break; 945 default: 946 error = EOPNOTSUPP; 947 break; 948 } 949 done: 950 if (ire != NULL) 951 ire_refrele(ire); 952 if (ifire != NULL) 953 ire_refrele(ifire); 954 if (ill != NULL) 955 ill_refrele(ill); 956 957 if (gcgrp_xtraref) 958 GCGRP_REFRELE(gcgrp); 959 960 if (rtm != NULL) { 961 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 962 if (error != 0) { 963 rtm->rtm_errno = error; 964 /* Send error ACK */ 965 ip1dbg(("ip_rts_request: error %d\n", error)); 966 } else { 967 rtm->rtm_flags |= RTF_DONE; 968 /* OK ACK already set up by caller except this */ 969 ip2dbg(("ip_rts_request: OK ACK\n")); 970 } 971 rts_queue_input(mp, connp, af, RTSQ_ALL, ipst); 972 } 973 return (error); 974 } 975 976 /* 977 * Helper function that can do recursive lookups including when 978 * MATCH_IRE_GW and/or MATCH_IRE_MASK is set. 979 */ 980 static ire_t * 981 ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr, 982 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, 983 int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp, 984 tsol_ire_gw_secattr_t **gwattrp) 985 { 986 ire_t *ire; 987 ire_t *ifire = NULL; 988 uint_t ire_type; 989 990 *pifire = NULL; 991 *v4setsrcp = INADDR_ANY; 992 *gwattrp = NULL; 993 994 /* Skip IRE_IF_CLONE */ 995 match_flags |= MATCH_IRE_TYPE; 996 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; 997 998 /* 999 * ire_route_recursive can't match gateway or mask thus if they are 1000 * set we have to do two steps of lookups 1001 */ 1002 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { 1003 ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr, 1004 ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL); 1005 1006 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) 1007 return (ire); 1008 1009 if (ire->ire_type & IRE_ONLINK) 1010 return (ire); 1011 1012 if (ire->ire_flags & RTF_SETSRC) { 1013 ASSERT(ire->ire_setsrc_addr != INADDR_ANY); 1014 *v4setsrcp = ire->ire_setsrc_addr; 1015 v4setsrcp = NULL; 1016 } 1017 1018 /* The first ire_gw_secattr is passed back */ 1019 if (ire->ire_gw_secattr != NULL) { 1020 *gwattrp = ire->ire_gw_secattr; 1021 gwattrp = NULL; 1022 } 1023 1024 /* Look for an interface ire recursively based on the gateway */ 1025 dst_addr = ire->ire_gateway_addr; 1026 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); 1027 /* 1028 * Don't allow anything unusual past the first iteration. 1029 * After the first lookup, we should no longer look for 1030 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT 1031 * routes. 1032 * 1033 * In addition, after we have found a direct IRE_OFFLINK, 1034 * we should only look for interface or clone routes. 1035 */ 1036 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */ 1037 1038 if ((ire->ire_type & IRE_OFFLINK) && 1039 !(ire->ire_flags & RTF_INDIRECT)) { 1040 ire_type = IRE_IF_ALL; 1041 } else { 1042 /* 1043 * no more local, loopback, broadcast routes 1044 */ 1045 if (!(match_flags & MATCH_IRE_TYPE)) 1046 ire_type = (IRE_OFFLINK|IRE_ONLINK); 1047 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST); 1048 } 1049 match_flags |= MATCH_IRE_TYPE; 1050 1051 ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, 1052 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, 1053 gwattrp, NULL); 1054 } else { 1055 ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, 1056 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, 1057 gwattrp, NULL); 1058 } 1059 *pifire = ifire; 1060 return (ire); 1061 } 1062 1063 static ire_t * 1064 ire_lookup_v6(const in6_addr_t *dst_addr_v6, 1065 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, 1066 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, 1067 ip_stack_t *ipst, ire_t **pifire, 1068 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp) 1069 { 1070 ire_t *ire; 1071 ire_t *ifire = NULL; 1072 uint_t ire_type; 1073 1074 *pifire = NULL; 1075 *v6setsrcp = ipv6_all_zeros; 1076 *gwattrp = NULL; 1077 1078 /* Skip IRE_IF_CLONE */ 1079 match_flags |= MATCH_IRE_TYPE; 1080 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; 1081 1082 /* 1083 * ire_route_recursive can't match gateway or mask thus if they are 1084 * set we have to do two steps of lookups 1085 */ 1086 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { 1087 in6_addr_t dst; 1088 1089 ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6, 1090 gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0, 1091 ipst, NULL); 1092 1093 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) 1094 return (ire); 1095 1096 if (ire->ire_type & IRE_ONLINK) 1097 return (ire); 1098 1099 if (ire->ire_flags & RTF_SETSRC) { 1100 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 1101 &ire->ire_setsrc_addr_v6)); 1102 *v6setsrcp = ire->ire_setsrc_addr_v6; 1103 v6setsrcp = NULL; 1104 } 1105 1106 /* The first ire_gw_secattr is passed back */ 1107 if (ire->ire_gw_secattr != NULL) { 1108 *gwattrp = ire->ire_gw_secattr; 1109 gwattrp = NULL; 1110 } 1111 1112 mutex_enter(&ire->ire_lock); 1113 dst = ire->ire_gateway_addr_v6; 1114 mutex_exit(&ire->ire_lock); 1115 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); 1116 /* 1117 * Don't allow anything unusual past the first iteration. 1118 * After the first lookup, we should no longer look for 1119 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT 1120 * routes. 1121 * 1122 * In addition, after we have found a direct IRE_OFFLINK, 1123 * we should only look for interface or clone routes. 1124 */ 1125 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */ 1126 1127 if ((ire->ire_type & IRE_OFFLINK) && 1128 !(ire->ire_flags & RTF_INDIRECT)) { 1129 ire_type = IRE_IF_ALL; 1130 } else { 1131 /* 1132 * no more local, loopback routes 1133 */ 1134 if (!(match_flags & MATCH_IRE_TYPE)) 1135 ire_type = (IRE_OFFLINK|IRE_ONLINK); 1136 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK); 1137 } 1138 match_flags |= MATCH_IRE_TYPE; 1139 1140 ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl, 1141 match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, gwattrp, 1142 NULL); 1143 } else { 1144 ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid, 1145 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, 1146 gwattrp, NULL); 1147 } 1148 *pifire = ifire; 1149 return (ire); 1150 } 1151 1152 1153 /* 1154 * Handle IP_IOC_RTS_REQUEST ioctls 1155 */ 1156 int 1157 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) 1158 { 1159 conn_t *connp = Q_TO_CONN(q); 1160 IOCP iocp = (IOCP)mp->b_rptr; 1161 mblk_t *mp1, *ioc_mp = mp; 1162 int error = 0; 1163 ip_stack_t *ipst; 1164 1165 ipst = connp->conn_netstack->netstack_ip; 1166 1167 ASSERT(mp->b_cont != NULL); 1168 /* ioc_mp holds mp */ 1169 mp = mp->b_cont; 1170 1171 /* 1172 * The Routing Socket data starts on 1173 * next block. If there is no next block 1174 * this is an indication from routing module 1175 * that it is a routing socket stream queue. 1176 * We need to support that for compatibility with SDP since 1177 * it has a contract private interface to use IP_IOC_RTS_REQUEST. 1178 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this. 1179 */ 1180 if (mp->b_cont == NULL) { 1181 /* 1182 * This is a message from SDP 1183 * indicating that this is a Routing Socket 1184 * Stream. Insert this conn_t in routing 1185 * socket client list. 1186 */ 1187 connp->conn_useloopback = 1; 1188 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); 1189 goto done; 1190 } 1191 mp1 = dupmsg(mp->b_cont); 1192 if (mp1 == NULL) { 1193 error = ENOBUFS; 1194 goto done; 1195 } 1196 mp = mp1; 1197 1198 error = ip_rts_request_common(mp, connp, ioc_cr); 1199 done: 1200 iocp->ioc_error = error; 1201 ioc_mp->b_datap->db_type = M_IOCACK; 1202 if (iocp->ioc_error != 0) 1203 iocp->ioc_count = 0; 1204 /* Note that we pass a NULL ira to rts_input */ 1205 (connp->conn_recv)(connp, ioc_mp, NULL, NULL); 1206 1207 /* conn was refheld in ip_wput_ioctl. */ 1208 CONN_DEC_IOCTLREF(connp); 1209 CONN_OPER_PENDING_DONE(connp); 1210 1211 return (error); 1212 } 1213 1214 /* 1215 * Build a reply to the RTM_GET request contained in the given message block 1216 * using the retrieved IRE of the destination address, the parent IRE (if it 1217 * exists) and the address family. 1218 * 1219 * Returns a pointer to a message block containing the reply if successful, 1220 * otherwise NULL is returned. 1221 */ 1222 static mblk_t * 1223 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc, 1224 tsol_ire_gw_secattr_t *attrp, sa_family_t af) 1225 { 1226 rt_msghdr_t *rtm; 1227 rt_msghdr_t *new_rtm; 1228 mblk_t *new_mp; 1229 int rtm_addrs; 1230 int rtm_flags; 1231 tsol_gc_t *gc = NULL; 1232 tsol_gcgrp_t *gcgrp = NULL; 1233 ill_t *ill; 1234 ipif_t *ipif = NULL; 1235 ipaddr_t brdaddr; /* IFF_POINTOPOINT destination */ 1236 ipaddr_t ifaddr; 1237 in6_addr_t brdaddr6; /* IFF_POINTOPOINT destination */ 1238 in6_addr_t ifaddr6; 1239 ipaddr_t v4setsrc; 1240 1241 rtm = (rt_msghdr_t *)mp->b_rptr; 1242 1243 /* 1244 * Find the ill used to send packets. This will be NULL in case 1245 * of a reject or blackhole. 1246 */ 1247 if (ifire != NULL) 1248 ill = ire_nexthop_ill(ifire); 1249 else 1250 ill = ire_nexthop_ill(ire); 1251 1252 if (attrp != NULL) { 1253 mutex_enter(&attrp->igsa_lock); 1254 if ((gc = attrp->igsa_gc) != NULL) { 1255 gcgrp = gc->gc_grp; 1256 ASSERT(gcgrp != NULL); 1257 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1258 } 1259 mutex_exit(&attrp->igsa_lock); 1260 } 1261 1262 /* 1263 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK. 1264 * 1265 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both 1266 * RTA_IFP and RTA_IFA if either is defined, and also 1267 * returns RTA_BRD if the appropriate interface is 1268 * point-to-point. 1269 */ 1270 rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK); 1271 if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) { 1272 rtm_addrs |= (RTA_IFP | RTA_IFA); 1273 /* 1274 * We associate an IRE with an ILL, hence we don't exactly 1275 * know what might make sense for RTA_IFA and RTA_BRD. We 1276 * pick the first ipif on the ill. 1277 */ 1278 ipif = ipif_get_next_ipif(NULL, ill); 1279 if (ipif != NULL) { 1280 if (ipif->ipif_isv6) 1281 ifaddr6 = ipif->ipif_v6lcl_addr; 1282 else 1283 ifaddr = ipif->ipif_lcl_addr; 1284 if (ipif->ipif_flags & IPIF_POINTOPOINT) { 1285 rtm_addrs |= RTA_BRD; 1286 if (ipif->ipif_isv6) 1287 brdaddr6 = ipif->ipif_v6pp_dst_addr; 1288 else 1289 brdaddr = ipif->ipif_pp_dst_addr; 1290 } 1291 ipif_refrele(ipif); 1292 } 1293 } 1294 1295 new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0); 1296 if (new_mp == NULL) { 1297 if (gcgrp != NULL) 1298 rw_exit(&gcgrp->gcgrp_rwlock); 1299 if (ill != NULL) 1300 ill_refrele(ill); 1301 return (NULL); 1302 } 1303 1304 /* 1305 * We set the destination address, gateway address, 1306 * netmask and flags in the RTM_GET response depending 1307 * on whether we found a parent IRE or not. 1308 * In particular, if we did find a parent IRE during the 1309 * recursive search, use that IRE's gateway address. 1310 * Otherwise, we use the IRE's source address for the 1311 * gateway address. 1312 */ 1313 ASSERT(af == AF_INET || af == AF_INET6); 1314 switch (af) { 1315 case AF_INET: 1316 IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc); 1317 if (v4setsrc != INADDR_ANY) 1318 rtm_addrs |= RTA_SRC; 1319 1320 rtm_flags = ire->ire_flags; 1321 rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr, 1322 ire->ire_mask, ire->ire_gateway_addr, v4setsrc, 1323 brdaddr, 0, ifaddr, ill, new_mp, gc); 1324 break; 1325 case AF_INET6: 1326 if (!IN6_IS_ADDR_UNSPECIFIED(setsrc)) 1327 rtm_addrs |= RTA_SRC; 1328 1329 rtm_flags = ire->ire_flags; 1330 rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6, 1331 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 1332 setsrc, &brdaddr6, &ipv6_all_zeros, 1333 &ifaddr6, ill, new_mp, gc); 1334 break; 1335 } 1336 1337 if (gcgrp != NULL) 1338 rw_exit(&gcgrp->gcgrp_rwlock); 1339 1340 new_rtm = (rt_msghdr_t *)new_mp->b_rptr; 1341 1342 /* 1343 * The rtm_msglen, rtm_version and rtm_type fields in 1344 * RTM_GET response are filled in by rts_fill_msg. 1345 * 1346 * rtm_addrs and rtm_flags are filled in based on what 1347 * was requested and the state of the IREs looked up 1348 * above. 1349 * 1350 * rtm_inits and rtm_rmx are filled in with metrics 1351 * based on whether a parent IRE was found or not. 1352 * 1353 * TODO: rtm_index and rtm_use should probably be 1354 * filled in with something resonable here and not just 1355 * copied from the request. 1356 */ 1357 new_rtm->rtm_index = rtm->rtm_index; 1358 new_rtm->rtm_pid = rtm->rtm_pid; 1359 new_rtm->rtm_seq = rtm->rtm_seq; 1360 new_rtm->rtm_use = rtm->rtm_use; 1361 new_rtm->rtm_addrs = rtm_addrs; 1362 new_rtm->rtm_flags = rtm_flags; 1363 new_rtm->rtm_inits = rts_getmetrics(ire, ill, &new_rtm->rtm_rmx); 1364 if (ill != NULL) 1365 ill_refrele(ill); 1366 return (new_mp); 1367 } 1368 1369 /* 1370 * Fill the given if_data_t with interface statistics. 1371 */ 1372 static void 1373 rts_getifdata(if_data_t *if_data, const ipif_t *ipif) 1374 { 1375 if_data->ifi_type = ipif->ipif_ill->ill_type; 1376 /* ethernet, tokenring, etc */ 1377 if_data->ifi_addrlen = 0; /* media address length */ 1378 if_data->ifi_hdrlen = 0; /* media header length */ 1379 if_data->ifi_mtu = ipif->ipif_ill->ill_mtu; /* mtu */ 1380 /* metric (external only) */ 1381 if_data->ifi_metric = ipif->ipif_ill->ill_metric; 1382 if_data->ifi_baudrate = 0; /* linespeed */ 1383 1384 if_data->ifi_ipackets = 0; /* packets received on if */ 1385 if_data->ifi_ierrors = 0; /* input errors on interface */ 1386 if_data->ifi_opackets = 0; /* packets sent on interface */ 1387 if_data->ifi_oerrors = 0; /* output errors on if */ 1388 if_data->ifi_collisions = 0; /* collisions on csma if */ 1389 if_data->ifi_ibytes = 0; /* total number received */ 1390 if_data->ifi_obytes = 0; /* total number sent */ 1391 if_data->ifi_imcasts = 0; /* multicast packets received */ 1392 if_data->ifi_omcasts = 0; /* multicast packets sent */ 1393 if_data->ifi_iqdrops = 0; /* dropped on input */ 1394 if_data->ifi_noproto = 0; /* destined for unsupported */ 1395 /* protocol. */ 1396 } 1397 1398 /* 1399 * Set the metrics on a forwarding table route. 1400 */ 1401 static void 1402 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) 1403 { 1404 clock_t rtt; 1405 clock_t rtt_sd; 1406 ill_t *ill; 1407 ifrt_t *ifrt; 1408 mblk_t *mp; 1409 in6_addr_t gw_addr_v6; 1410 1411 /* Need to add back some metrics to the IRE? */ 1412 /* 1413 * Bypass obtaining the lock and searching ill_saved_ire_mp in the 1414 * common case of no metrics. 1415 */ 1416 if (which == 0) 1417 return; 1418 ire->ire_metrics.iulp_set = B_TRUE; 1419 1420 /* 1421 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1422 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1423 * microseconds. 1424 */ 1425 if (which & RTV_RTT) 1426 rtt = metrics->rmx_rtt / 1000; 1427 if (which & RTV_RTTVAR) 1428 rtt_sd = metrics->rmx_rttvar / 1000; 1429 1430 /* 1431 * Update the metrics in the IRE itself. 1432 */ 1433 mutex_enter(&ire->ire_lock); 1434 if (which & RTV_MTU) 1435 ire->ire_metrics.iulp_mtu = metrics->rmx_mtu; 1436 if (which & RTV_RTT) 1437 ire->ire_metrics.iulp_rtt = rtt; 1438 if (which & RTV_SSTHRESH) 1439 ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh; 1440 if (which & RTV_RTTVAR) 1441 ire->ire_metrics.iulp_rtt_sd = rtt_sd; 1442 if (which & RTV_SPIPE) 1443 ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe; 1444 if (which & RTV_RPIPE) 1445 ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe; 1446 mutex_exit(&ire->ire_lock); 1447 1448 /* 1449 * Search through the ifrt_t chain hanging off the ILL in order to 1450 * reflect the metric change there. 1451 */ 1452 ill = ire->ire_ill; 1453 if (ill == NULL) 1454 return; 1455 ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) || 1456 ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION))); 1457 if (ill->ill_isv6) { 1458 mutex_enter(&ire->ire_lock); 1459 gw_addr_v6 = ire->ire_gateway_addr_v6; 1460 mutex_exit(&ire->ire_lock); 1461 } 1462 mutex_enter(&ill->ill_saved_ire_lock); 1463 for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) { 1464 /* 1465 * On a given ill, the tuple of address, gateway, mask, 1466 * ire_type and zoneid unique for each saved IRE. 1467 */ 1468 ifrt = (ifrt_t *)mp->b_rptr; 1469 if (ill->ill_isv6) { 1470 if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, 1471 &ire->ire_addr_v6) || 1472 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, 1473 &gw_addr_v6) || 1474 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask, 1475 &ire->ire_mask_v6)) 1476 continue; 1477 } else { 1478 if (ifrt->ifrt_addr != ire->ire_addr || 1479 ifrt->ifrt_gateway_addr != ire->ire_gateway_addr || 1480 ifrt->ifrt_mask != ire->ire_mask) 1481 continue; 1482 } 1483 if (ifrt->ifrt_zoneid != ire->ire_zoneid || 1484 ifrt->ifrt_type != ire->ire_type) 1485 continue; 1486 1487 if (which & RTV_MTU) 1488 ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu; 1489 if (which & RTV_RTT) 1490 ifrt->ifrt_metrics.iulp_rtt = rtt; 1491 if (which & RTV_SSTHRESH) { 1492 ifrt->ifrt_metrics.iulp_ssthresh = 1493 metrics->rmx_ssthresh; 1494 } 1495 if (which & RTV_RTTVAR) 1496 ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar; 1497 if (which & RTV_SPIPE) 1498 ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe; 1499 if (which & RTV_RPIPE) 1500 ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe; 1501 break; 1502 } 1503 mutex_exit(&ill->ill_saved_ire_lock); 1504 1505 /* 1506 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they 1507 * get any new iulp_mtu. 1508 * We do that by deleting them; ire_create_if_clone will pick 1509 * up the new metrics. 1510 */ 1511 if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0) 1512 ire_dep_delete_if_clone(ire); 1513 } 1514 1515 /* 1516 * Get the metrics from a forwarding table route. 1517 */ 1518 static int 1519 rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics) 1520 { 1521 int metrics_set = 0; 1522 1523 bzero(metrics, sizeof (rt_metrics_t)); 1524 1525 /* 1526 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1527 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1528 * microseconds. 1529 */ 1530 metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000; 1531 metrics_set |= RTV_RTT; 1532 if (ire->ire_metrics.iulp_mtu != 0) { 1533 metrics->rmx_mtu = ire->ire_metrics.iulp_mtu; 1534 metrics_set |= RTV_MTU; 1535 } else if (ill != NULL) { 1536 metrics->rmx_mtu = ill->ill_mtu; 1537 metrics_set |= RTV_MTU; 1538 } 1539 metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh; 1540 metrics_set |= RTV_SSTHRESH; 1541 metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000; 1542 metrics_set |= RTV_RTTVAR; 1543 metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe; 1544 metrics_set |= RTV_SPIPE; 1545 metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe; 1546 metrics_set |= RTV_RPIPE; 1547 return (metrics_set); 1548 } 1549 1550 /* 1551 * Given two sets of metrics (src and dst), use the dst values if they are 1552 * set. If a dst value is not set but the src value is set, then we use 1553 * the src value. 1554 * dst is updated with the new values. 1555 * This is used to merge information from a dce_t and ire_metrics, where the 1556 * dce values takes precedence. 1557 */ 1558 void 1559 rts_merge_metrics(iulp_t *dst, const iulp_t *src) 1560 { 1561 if (!src->iulp_set) 1562 return; 1563 1564 if (dst->iulp_ssthresh == 0) 1565 dst->iulp_ssthresh = src->iulp_ssthresh; 1566 if (dst->iulp_rtt == 0) 1567 dst->iulp_rtt = src->iulp_rtt; 1568 if (dst->iulp_rtt_sd == 0) 1569 dst->iulp_rtt_sd = src->iulp_rtt_sd; 1570 if (dst->iulp_spipe == 0) 1571 dst->iulp_spipe = src->iulp_spipe; 1572 if (dst->iulp_rpipe == 0) 1573 dst->iulp_rpipe = src->iulp_rpipe; 1574 if (dst->iulp_rtomax == 0) 1575 dst->iulp_rtomax = src->iulp_rtomax; 1576 if (dst->iulp_sack == 0) 1577 dst->iulp_sack = src->iulp_sack; 1578 if (dst->iulp_tstamp_ok == 0) 1579 dst->iulp_tstamp_ok = src->iulp_tstamp_ok; 1580 if (dst->iulp_wscale_ok == 0) 1581 dst->iulp_wscale_ok = src->iulp_wscale_ok; 1582 if (dst->iulp_ecn_ok == 0) 1583 dst->iulp_ecn_ok = src->iulp_ecn_ok; 1584 if (dst->iulp_pmtud_ok == 0) 1585 dst->iulp_pmtud_ok = src->iulp_pmtud_ok; 1586 if (dst->iulp_mtu == 0) 1587 dst->iulp_mtu = src->iulp_mtu; 1588 } 1589 1590 1591 /* 1592 * Takes a pointer to a routing message and extracts necessary info by looking 1593 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers 1594 * passed (all of which must be valid). 1595 * 1596 * The bitmask of sockaddrs actually found in the message is returned, or zero 1597 * is returned in the case of an error. 1598 */ 1599 static int 1600 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, 1601 in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp, 1602 in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp, 1603 tsol_rtsecattr_t *rtsecattr, int *error) 1604 { 1605 struct sockaddr *sa; 1606 int i; 1607 int addr_bits; 1608 int length; 1609 int found_addrs = 0; 1610 caddr_t cp; 1611 size_t size; 1612 struct sockaddr_dl *sdl; 1613 1614 *dst_addrp = ipv6_all_zeros; 1615 *gw_addrp = ipv6_all_zeros; 1616 *net_maskp = ipv6_all_zeros; 1617 *authorp = ipv6_all_zeros; 1618 *if_addrp = ipv6_all_zeros; 1619 *in_src_addrp = ipv6_all_zeros; 1620 *indexp = 0; 1621 *afp = AF_UNSPEC; 1622 rtsecattr->rtsa_cnt = 0; 1623 *error = 0; 1624 1625 /* 1626 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP, 1627 * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them. 1628 */ 1629 cp = (caddr_t)&rtm[1]; 1630 length = rtm->rtm_msglen; 1631 for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) { 1632 /* 1633 * The address family we are working with starts out as 1634 * AF_UNSPEC, but is set to the one specified with the 1635 * destination address. 1636 * 1637 * If the "working" address family that has been set to 1638 * something other than AF_UNSPEC, then the address family of 1639 * subsequent sockaddrs must either be AF_UNSPEC (for 1640 * compatibility with older programs) or must be the same as our 1641 * "working" one. 1642 * 1643 * This code assumes that RTA_DST (1) comes first in the loop. 1644 */ 1645 sa = (struct sockaddr *)cp; 1646 addr_bits = (rtm->rtm_addrs & (1 << i)); 1647 if (addr_bits == 0) 1648 continue; 1649 switch (addr_bits) { 1650 case RTA_DST: 1651 size = rts_copyfromsockaddr(sa, dst_addrp); 1652 *afp = sa->sa_family; 1653 break; 1654 case RTA_GATEWAY: 1655 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1656 return (0); 1657 size = rts_copyfromsockaddr(sa, gw_addrp); 1658 break; 1659 case RTA_NETMASK: 1660 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1661 return (0); 1662 size = rts_copyfromsockaddr(sa, net_maskp); 1663 break; 1664 case RTA_IFP: 1665 if (sa->sa_family != AF_LINK && 1666 sa->sa_family != AF_UNSPEC) 1667 return (0); 1668 sdl = (struct sockaddr_dl *)cp; 1669 *indexp = sdl->sdl_index; 1670 size = sizeof (struct sockaddr_dl); 1671 break; 1672 case RTA_SRC: 1673 /* Source address of the incoming packet */ 1674 size = rts_copyfromsockaddr(sa, in_src_addrp); 1675 *afp = sa->sa_family; 1676 break; 1677 case RTA_IFA: 1678 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1679 return (0); 1680 size = rts_copyfromsockaddr(sa, if_addrp); 1681 break; 1682 case RTA_AUTHOR: 1683 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1684 return (0); 1685 size = rts_copyfromsockaddr(sa, authorp); 1686 break; 1687 default: 1688 return (0); 1689 } 1690 if (size == 0) 1691 return (0); 1692 cp += size; 1693 found_addrs |= addr_bits; 1694 } 1695 1696 /* 1697 * Parse the routing message and look for any security- 1698 * related attributes for the route. For each valid 1699 * attribute, allocate/obtain the corresponding kernel 1700 * route security attributes. 1701 */ 1702 if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) { 1703 *error = tsol_rtsa_init(rtm, rtsecattr, cp); 1704 ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); 1705 } 1706 1707 return (found_addrs); 1708 } 1709 1710 /* 1711 * Fills the message with the given info. 1712 */ 1713 static void 1714 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, 1715 ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author, 1716 ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, 1717 const tsol_gc_t *gc) 1718 { 1719 rt_msghdr_t *rtm; 1720 sin_t *sin; 1721 size_t data_size, header_size; 1722 uchar_t *cp; 1723 int i; 1724 1725 ASSERT(mp != NULL); 1726 /* 1727 * First find the type of the message 1728 * and its length. 1729 */ 1730 header_size = rts_header_msg_size(type); 1731 /* 1732 * Now find the size of the data 1733 * that follows the message header. 1734 */ 1735 data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0); 1736 1737 rtm = (rt_msghdr_t *)mp->b_rptr; 1738 mp->b_wptr = &mp->b_rptr[header_size]; 1739 cp = mp->b_wptr; 1740 bzero(cp, data_size); 1741 for (i = 0; i < RTA_NUMBITS; i++) { 1742 sin = (sin_t *)cp; 1743 switch (rtm_addrs & (1 << i)) { 1744 case RTA_DST: 1745 sin->sin_addr.s_addr = dst; 1746 sin->sin_family = AF_INET; 1747 cp += sizeof (sin_t); 1748 break; 1749 case RTA_GATEWAY: 1750 sin->sin_addr.s_addr = gateway; 1751 sin->sin_family = AF_INET; 1752 cp += sizeof (sin_t); 1753 break; 1754 case RTA_NETMASK: 1755 sin->sin_addr.s_addr = mask; 1756 sin->sin_family = AF_INET; 1757 cp += sizeof (sin_t); 1758 break; 1759 case RTA_IFP: 1760 cp += ill_dls_info((struct sockaddr_dl *)cp, ill); 1761 break; 1762 case RTA_IFA: 1763 sin->sin_addr.s_addr = ifaddr; 1764 sin->sin_family = AF_INET; 1765 cp += sizeof (sin_t); 1766 break; 1767 case RTA_SRC: 1768 sin->sin_addr.s_addr = src_addr; 1769 sin->sin_family = AF_INET; 1770 cp += sizeof (sin_t); 1771 break; 1772 case RTA_AUTHOR: 1773 sin->sin_addr.s_addr = author; 1774 sin->sin_family = AF_INET; 1775 cp += sizeof (sin_t); 1776 break; 1777 case RTA_BRD: 1778 /* 1779 * RTA_BRD is used typically to specify a point-to-point 1780 * destination address. 1781 */ 1782 sin->sin_addr.s_addr = brd_addr; 1783 sin->sin_family = AF_INET; 1784 cp += sizeof (sin_t); 1785 break; 1786 } 1787 } 1788 1789 if (gc != NULL) { 1790 rtm_ext_t *rtm_ext; 1791 struct rtsa_s *rp_dst; 1792 tsol_rtsecattr_t *rsap; 1793 1794 ASSERT(gc->gc_grp != NULL); 1795 ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock)); 1796 1797 rtm_ext = (rtm_ext_t *)cp; 1798 rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR; 1799 rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1); 1800 1801 rsap = (tsol_rtsecattr_t *)(rtm_ext + 1); 1802 rsap->rtsa_cnt = 1; 1803 rp_dst = rsap->rtsa_attr; 1804 1805 ASSERT(gc->gc_db != NULL); 1806 bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst)); 1807 cp = (uchar_t *)rp_dst; 1808 } 1809 1810 mp->b_wptr = cp; 1811 mp->b_cont = NULL; 1812 /* 1813 * set the fields that are common to 1814 * to different messages. 1815 */ 1816 rtm->rtm_msglen = (short)(header_size + data_size); 1817 rtm->rtm_version = RTM_VERSION; 1818 rtm->rtm_type = (uchar_t)type; 1819 } 1820 1821 /* 1822 * Allocates and initializes a routing socket message. 1823 * Note that sacnt is either zero or one. 1824 */ 1825 mblk_t * 1826 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt) 1827 { 1828 size_t length; 1829 mblk_t *mp; 1830 1831 length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt); 1832 mp = allocb(length, BPRI_MED); 1833 if (mp == NULL) 1834 return (mp); 1835 bzero(mp->b_rptr, length); 1836 return (mp); 1837 } 1838 1839 /* 1840 * Returns the size of the routing 1841 * socket message header size. 1842 */ 1843 size_t 1844 rts_header_msg_size(int type) 1845 { 1846 switch (type) { 1847 case RTM_DELADDR: 1848 case RTM_NEWADDR: 1849 case RTM_CHGADDR: 1850 case RTM_FREEADDR: 1851 return (sizeof (ifa_msghdr_t)); 1852 case RTM_IFINFO: 1853 return (sizeof (if_msghdr_t)); 1854 default: 1855 return (sizeof (rt_msghdr_t)); 1856 } 1857 } 1858 1859 /* 1860 * Returns the size of the message needed with the given rtm_addrs and family. 1861 * 1862 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are 1863 * of the same family (currently either AF_INET or AF_INET6). 1864 */ 1865 size_t 1866 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt) 1867 { 1868 int i; 1869 size_t length = 0; 1870 1871 for (i = 0; i < RTA_NUMBITS; i++) { 1872 switch (rtm_addrs & (1 << i)) { 1873 case RTA_IFP: 1874 length += sizeof (struct sockaddr_dl); 1875 break; 1876 case RTA_DST: 1877 case RTA_GATEWAY: 1878 case RTA_NETMASK: 1879 case RTA_SRC: 1880 case RTA_IFA: 1881 case RTA_AUTHOR: 1882 case RTA_BRD: 1883 ASSERT(af == AF_INET || af == AF_INET6); 1884 switch (af) { 1885 case AF_INET: 1886 length += sizeof (sin_t); 1887 break; 1888 case AF_INET6: 1889 length += sizeof (sin6_t); 1890 break; 1891 } 1892 break; 1893 } 1894 } 1895 if (sacnt > 0) 1896 length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt); 1897 1898 return (length); 1899 } 1900 1901 /* 1902 * This routine is called to generate a message to the routing 1903 * socket indicating that a redirect has occured, a routing lookup 1904 * has failed, or that a protocol has detected timeouts to a particular 1905 * destination. This routine is called for message types RTM_LOSING, 1906 * RTM_REDIRECT, and RTM_MISS. 1907 */ 1908 void 1909 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, 1910 ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs, 1911 ip_stack_t *ipst) 1912 { 1913 rt_msghdr_t *rtm; 1914 mblk_t *mp; 1915 1916 if (rtm_addrs == 0) 1917 return; 1918 mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0); 1919 if (mp == NULL) 1920 return; 1921 rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0, 1922 author, 0, NULL, mp, NULL); 1923 rtm = (rt_msghdr_t *)mp->b_rptr; 1924 rtm->rtm_flags = flags; 1925 rtm->rtm_errno = error; 1926 rtm->rtm_flags |= RTF_DONE; 1927 rtm->rtm_addrs = rtm_addrs; 1928 rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst); 1929 } 1930 1931 /* 1932 * This routine is called to generate a message to the routing 1933 * socket indicating that the status of a network interface has changed. 1934 * Message type generated RTM_IFINFO. 1935 */ 1936 void 1937 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags) 1938 { 1939 ip_rts_xifmsg(ipif, 0, 0, flags); 1940 } 1941 1942 void 1943 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags) 1944 { 1945 if_msghdr_t *ifm; 1946 mblk_t *mp; 1947 sa_family_t af; 1948 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 1949 1950 /* 1951 * This message should be generated only 1952 * when the physical device is changing 1953 * state. 1954 */ 1955 if (ipif->ipif_id != 0) 1956 return; 1957 if (ipif->ipif_isv6) { 1958 af = AF_INET6; 1959 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); 1960 if (mp == NULL) 1961 return; 1962 rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros, 1963 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, 1964 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, 1965 ipif->ipif_ill, mp, NULL); 1966 } else { 1967 af = AF_INET; 1968 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); 1969 if (mp == NULL) 1970 return; 1971 rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0, 1972 ipif->ipif_ill, mp, NULL); 1973 } 1974 ifm = (if_msghdr_t *)mp->b_rptr; 1975 ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; 1976 ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags | 1977 ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear; 1978 rts_getifdata(&ifm->ifm_data, ipif); 1979 ifm->ifm_addrs = RTA_IFP; 1980 1981 if (flags & RTSQ_DEFAULT) { 1982 flags = RTSQ_ALL; 1983 /* 1984 * If this message is for an underlying interface, prevent 1985 * "normal" (IPMP-unaware) routing sockets from seeing it. 1986 */ 1987 if (IS_UNDER_IPMP(ipif->ipif_ill)) 1988 flags &= ~RTSQ_NORMAL; 1989 } 1990 1991 rts_queue_input(mp, NULL, af, flags, ipst); 1992 } 1993 1994 /* 1995 * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message; 1996 * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR) 1997 * generate the ifa_msghdr_t message. 1998 */ 1999 static void 2000 rts_new_rtsmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) 2001 { 2002 int rtm_addrs; 2003 mblk_t *mp; 2004 ifa_msghdr_t *ifam; 2005 rt_msghdr_t *rtm; 2006 sa_family_t af; 2007 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2008 2009 /* 2010 * Do not report unspecified address if this is the RTM_CHGADDR or 2011 * RTM_FREEADDR message. 2012 */ 2013 if (cmd == RTM_CHGADDR || cmd == RTM_FREEADDR) { 2014 if (!ipif->ipif_isv6) { 2015 if (ipif->ipif_lcl_addr == INADDR_ANY) 2016 return; 2017 } else if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) { 2018 return; 2019 } 2020 } 2021 2022 if (ipif->ipif_isv6) 2023 af = AF_INET6; 2024 else 2025 af = AF_INET; 2026 2027 if (cmd == RTM_ADD || cmd == RTM_DELETE) 2028 rtm_addrs = (RTA_DST | RTA_NETMASK); 2029 else 2030 rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP); 2031 2032 mp = rts_alloc_msg(cmd, rtm_addrs, af, 0); 2033 if (mp == NULL) 2034 return; 2035 2036 if (cmd != RTM_ADD && cmd != RTM_DELETE) { 2037 switch (af) { 2038 case AF_INET: 2039 rts_fill_msg(cmd, rtm_addrs, 0, 2040 ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr, 2041 ipif->ipif_pp_dst_addr, 0, 2042 ipif->ipif_lcl_addr, ipif->ipif_ill, 2043 mp, NULL); 2044 break; 2045 case AF_INET6: 2046 rts_fill_msg_v6(cmd, rtm_addrs, 2047 &ipv6_all_zeros, &ipif->ipif_v6net_mask, 2048 &ipv6_all_zeros, &ipif->ipif_v6lcl_addr, 2049 &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, 2050 &ipif->ipif_v6lcl_addr, ipif->ipif_ill, 2051 mp, NULL); 2052 break; 2053 } 2054 ifam = (ifa_msghdr_t *)mp->b_rptr; 2055 ifam->ifam_index = 2056 ipif->ipif_ill->ill_phyint->phyint_ifindex; 2057 ifam->ifam_metric = ipif->ipif_ill->ill_metric; 2058 ifam->ifam_flags = ((cmd == RTM_NEWADDR) ? RTF_UP : 0); 2059 ifam->ifam_addrs = rtm_addrs; 2060 } else { 2061 switch (af) { 2062 case AF_INET: 2063 rts_fill_msg(cmd, rtm_addrs, 2064 ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0, 2065 0, 0, 0, 0, NULL, mp, NULL); 2066 break; 2067 case AF_INET6: 2068 rts_fill_msg_v6(cmd, rtm_addrs, 2069 &ipif->ipif_v6lcl_addr, 2070 &ipif->ipif_v6net_mask, &ipv6_all_zeros, 2071 &ipv6_all_zeros, &ipv6_all_zeros, 2072 &ipv6_all_zeros, &ipv6_all_zeros, 2073 NULL, mp, NULL); 2074 break; 2075 } 2076 rtm = (rt_msghdr_t *)mp->b_rptr; 2077 rtm->rtm_index = 2078 ipif->ipif_ill->ill_phyint->phyint_ifindex; 2079 rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0); 2080 rtm->rtm_errno = error; 2081 if (error == 0) 2082 rtm->rtm_flags |= RTF_DONE; 2083 rtm->rtm_addrs = rtm_addrs; 2084 } 2085 rts_queue_input(mp, NULL, af, flags, ipst); 2086 } 2087 2088 /* 2089 * This is called to generate messages to the routing socket 2090 * indicating a network interface has had addresses associated with it. 2091 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>. 2092 */ 2093 void 2094 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) 2095 { 2096 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2097 2098 if (flags & RTSQ_DEFAULT) { 2099 flags = RTSQ_ALL; 2100 /* 2101 * If this message is for an underlying interface, prevent 2102 * "normal" (IPMP-unaware) routing sockets from seeing it. 2103 */ 2104 if (IS_UNDER_IPMP(ipif->ipif_ill)) 2105 flags &= ~RTSQ_NORMAL; 2106 } 2107 2108 /* 2109 * Let conn_ixa caching know that source address selection 2110 * changed 2111 */ 2112 if (cmd == RTM_ADD || cmd == RTM_DELETE) 2113 ip_update_source_selection(ipst); 2114 2115 /* 2116 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR. 2117 * if the request is ADD, send RTM_NEWADDR and RTM_ADD. 2118 * otherwise simply send the request. 2119 */ 2120 switch (cmd) { 2121 case RTM_ADD: 2122 rts_new_rtsmsg(RTM_NEWADDR, error, ipif, flags); 2123 rts_new_rtsmsg(RTM_ADD, error, ipif, flags); 2124 break; 2125 case RTM_DELETE: 2126 rts_new_rtsmsg(RTM_DELETE, error, ipif, flags); 2127 rts_new_rtsmsg(RTM_DELADDR, error, ipif, flags); 2128 break; 2129 default: 2130 rts_new_rtsmsg(cmd, error, ipif, flags); 2131 break; 2132 } 2133 } 2134 2135 /* 2136 * Based on the address family specified in a sockaddr, copy the address field 2137 * into an in6_addr_t. 2138 * 2139 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for 2140 * compatibility with programs that leave the family cleared in the sockaddr. 2141 * Callers of rts_copyfromsockaddr should check the family themselves if they 2142 * wish to verify its value. 2143 * 2144 * In the case of AF_INET6, a check is made to ensure that address is not an 2145 * IPv4-mapped address. 2146 */ 2147 size_t 2148 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp) 2149 { 2150 switch (sa->sa_family) { 2151 case AF_INET: 2152 case AF_UNSPEC: 2153 IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp); 2154 return (sizeof (sin_t)); 2155 case AF_INET6: 2156 *addrp = ((sin6_t *)sa)->sin6_addr; 2157 if (IN6_IS_ADDR_V4MAPPED(addrp)) 2158 return (0); 2159 return (sizeof (sin6_t)); 2160 default: 2161 return (0); 2162 } 2163 } 2164