1 /* 2 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright (c) 1988, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 38 */ 39 40 /* 41 * This file contains routines that processes routing socket requests. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/stream.h> 46 #include <sys/stropts.h> 47 #include <sys/ddi.h> 48 #include <sys/strsubr.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/policy.h> 52 #include <sys/zone.h> 53 54 #include <sys/systm.h> 55 #include <sys/param.h> 56 #include <sys/socket.h> 57 #include <sys/strsun.h> 58 #include <net/if.h> 59 #include <net/route.h> 60 #include <netinet/in.h> 61 #include <net/if_dl.h> 62 #include <netinet/ip6.h> 63 64 #include <inet/common.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_if.h> 68 #include <inet/ip_ire.h> 69 #include <inet/ip_ftable.h> 70 #include <inet/ip_rts.h> 71 72 #include <inet/ipclassifier.h> 73 74 #include <sys/tsol/tndb.h> 75 #include <sys/tsol/tnet.h> 76 77 #define RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \ 78 (rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type)) 79 80 static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp); 81 static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, 82 ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, 83 ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, 84 const tsol_gc_t *); 85 static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, 86 in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp, 87 in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp, 88 sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error); 89 static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif); 90 static int rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics); 91 static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, 92 const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af); 93 static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics); 94 static ire_t *ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, 95 ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid, 96 const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, 97 ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp); 98 static ire_t *ire_lookup_v6(const in6_addr_t *dst_addr_v6, 99 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, 100 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, 101 ip_stack_t *ipst, ire_t **pifire, 102 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp); 103 104 /* 105 * Send `mp' to all eligible routing queues. A queue is ineligible if: 106 * 107 * 1. SO_USELOOPBACK is off and it is not the originating queue. 108 * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'. 109 * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'. 110 * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC. 111 */ 112 void 113 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, 114 ip_stack_t *ipst) 115 { 116 mblk_t *mp1; 117 conn_t *connp, *next_connp; 118 119 /* 120 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be 121 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point. 122 */ 123 ASSERT(!(flags & RTSQ_DEFAULT)); 124 125 mutex_enter(&ipst->ips_rts_clients->connf_lock); 126 connp = ipst->ips_rts_clients->connf_head; 127 128 for (; connp != NULL; connp = next_connp) { 129 next_connp = connp->conn_next; 130 /* 131 * If there was a family specified when this routing socket was 132 * created and it doesn't match the family of the message to 133 * copy, then continue. 134 */ 135 if ((connp->conn_proto != AF_UNSPEC) && 136 (connp->conn_proto != af)) 137 continue; 138 139 /* 140 * Queue the message only if the conn_t and flags match. 141 */ 142 if (connp->conn_rtaware & RTAW_UNDER_IPMP) { 143 if (!(flags & RTSQ_UNDER_IPMP)) 144 continue; 145 } else { 146 if (!(flags & RTSQ_NORMAL)) 147 continue; 148 } 149 /* 150 * For the originating queue, we only copy the message upstream 151 * if loopback is set. For others reading on the routing 152 * socket, we check if there is room upstream for a copy of the 153 * message. 154 */ 155 if ((o_connp == connp) && connp->conn_useloopback == 0) { 156 connp = connp->conn_next; 157 continue; 158 } 159 CONN_INC_REF(connp); 160 mutex_exit(&ipst->ips_rts_clients->connf_lock); 161 /* Pass to rts_input */ 162 if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld : 163 canputnext(connp->conn_rq)) { 164 mp1 = dupmsg(mp); 165 if (mp1 == NULL) 166 mp1 = copymsg(mp); 167 /* Note that we pass a NULL ira to rts_input */ 168 if (mp1 != NULL) 169 (connp->conn_recv)(connp, mp1, NULL, NULL); 170 } 171 172 mutex_enter(&ipst->ips_rts_clients->connf_lock); 173 /* reload next_connp since conn_next may have changed */ 174 next_connp = connp->conn_next; 175 CONN_DEC_REF(connp); 176 } 177 mutex_exit(&ipst->ips_rts_clients->connf_lock); 178 freemsg(mp); 179 } 180 181 /* 182 * Takes an ire and sends an ack to all the routing sockets. This 183 * routine is used 184 * - when a route is created/deleted through the ioctl interface. 185 * - when a stale redirect is deleted 186 */ 187 void 188 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) 189 { 190 mblk_t *mp; 191 rt_msghdr_t *rtm; 192 int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY); 193 sa_family_t af = { 0 }; 194 in6_addr_t gw_addr_v6; 195 196 if (ire == NULL) 197 return; 198 ASSERT(ire->ire_ipversion == IPV4_VERSION || 199 ire->ire_ipversion == IPV6_VERSION); 200 201 ASSERT(!(ire->ire_type & IRE_IF_CLONE)); 202 mp = NULL; 203 204 if (ire->ire_flags & RTF_SETSRC) 205 rtm_addrs |= RTA_SRC; 206 207 switch (ire->ire_ipversion) { 208 case IPV4_VERSION: 209 af = AF_INET; 210 mp = rts_alloc_msg(type, rtm_addrs, af, 0); 211 if (mp == NULL) 212 return; 213 rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask, 214 ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL, 215 mp, NULL); 216 break; 217 case IPV6_VERSION: 218 af = AF_INET6; 219 mp = rts_alloc_msg(type, rtm_addrs, af, 0); 220 if (mp == NULL) 221 return; 222 mutex_enter(&ire->ire_lock); 223 gw_addr_v6 = ire->ire_gateway_addr_v6; 224 mutex_exit(&ire->ire_lock); 225 rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6, 226 &ire->ire_mask_v6, &gw_addr_v6, 227 &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros, 228 &ipv6_all_zeros, NULL, mp, NULL); 229 break; 230 } 231 rtm = (rt_msghdr_t *)mp->b_rptr; 232 mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen]; 233 rtm->rtm_addrs = rtm_addrs; 234 rtm->rtm_flags = ire->ire_flags; 235 if (error != 0) 236 rtm->rtm_errno = error; 237 else 238 rtm->rtm_flags |= RTF_DONE; 239 rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst); 240 } 241 242 /* 243 * This is a call from the RTS module 244 * indicating that this is a Routing Socket 245 * Stream. Insert this conn_t in routing 246 * socket client list. 247 */ 248 void 249 ip_rts_register(conn_t *connp) 250 { 251 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 252 253 connp->conn_useloopback = 1; 254 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); 255 } 256 257 /* 258 * This is a call from the RTS module indicating that it is closing. 259 */ 260 void 261 ip_rts_unregister(conn_t *connp) 262 { 263 ipcl_hash_remove(connp); 264 } 265 266 /* 267 * Processes requests received on a routing socket. It extracts all the 268 * arguments and calls the appropriate function to process the request. 269 * 270 * RTA_SRC bit flag requests are sent by 'route -setsrc'. 271 * 272 * In general, this function does not consume the message supplied but rather 273 * sends the message upstream with an appropriate UNIX errno. 274 */ 275 int 276 ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr) 277 { 278 rt_msghdr_t *rtm = NULL; 279 in6_addr_t dst_addr_v6; 280 in6_addr_t src_addr_v6; 281 in6_addr_t gw_addr_v6; 282 in6_addr_t net_mask_v6; 283 in6_addr_t author_v6; 284 in6_addr_t if_addr_v6; 285 mblk_t *mp1; 286 ire_t *ire = NULL; 287 ire_t *ifire = NULL; 288 ipaddr_t v4setsrc; 289 in6_addr_t v6setsrc = ipv6_all_zeros; 290 tsol_ire_gw_secattr_t *gwattr = NULL; 291 int error = 0; 292 int match_flags = MATCH_IRE_DSTONLY; 293 int match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW; 294 int found_addrs; 295 sa_family_t af; 296 ipaddr_t dst_addr; 297 ipaddr_t gw_addr; 298 ipaddr_t src_addr; 299 ipaddr_t net_mask; 300 ushort_t index; 301 boolean_t gcgrp_xtraref = B_FALSE; 302 tsol_gcgrp_addr_t ga; 303 tsol_rtsecattr_t rtsecattr; 304 struct rtsa_s *rtsap = NULL; 305 tsol_gcgrp_t *gcgrp = NULL; 306 tsol_gc_t *gc = NULL; 307 ts_label_t *tsl = NULL; 308 zoneid_t zoneid; 309 ip_stack_t *ipst; 310 ill_t *ill = NULL; 311 312 zoneid = connp->conn_zoneid; 313 ipst = connp->conn_netstack->netstack_ip; 314 net_mask = 0; 315 src_addr = 0; 316 dst_addr = 0; 317 gw_addr = 0; 318 319 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { 320 freemsg(mp); 321 error = EINVAL; 322 goto done; 323 } 324 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 325 freemsg(mp); 326 error = EINVAL; 327 goto done; 328 } 329 330 /* 331 * Check the routing message for basic consistency including the 332 * version number and that the number of octets written is the same 333 * as specified by the rtm_msglen field. 334 * 335 * At this point, an error can be delivered back via rtm_errno. 336 */ 337 rtm = (rt_msghdr_t *)mp->b_rptr; 338 if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) { 339 error = EINVAL; 340 goto done; 341 } 342 if (rtm->rtm_version != RTM_VERSION) { 343 error = EPROTONOSUPPORT; 344 goto done; 345 } 346 347 /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */ 348 if (rtm->rtm_type != RTM_GET && 349 rtm->rtm_type != RTM_RESOLVE && 350 (ioc_cr == NULL || 351 secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) { 352 error = EPERM; 353 goto done; 354 } 355 356 found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6, 357 &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr, 358 &error); 359 360 if (error != 0) 361 goto done; 362 363 if ((found_addrs & RTA_DST) == 0) { 364 error = EINVAL; 365 goto done; 366 } 367 368 /* 369 * Based on the address family of the destination address, determine 370 * the destination, gateway and netmask and return the appropriate error 371 * if an unknown address family was specified (following the errno 372 * values that 4.4BSD-Lite2 returns.) 373 */ 374 switch (af) { 375 case AF_INET: 376 IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr); 377 IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr); 378 IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr); 379 if (((found_addrs & RTA_NETMASK) == 0) || 380 (rtm->rtm_flags & RTF_HOST)) 381 net_mask = IP_HOST_MASK; 382 else 383 IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask); 384 break; 385 case AF_INET6: 386 if (((found_addrs & RTA_NETMASK) == 0) || 387 (rtm->rtm_flags & RTF_HOST)) 388 net_mask_v6 = ipv6_all_ones; 389 break; 390 default: 391 /* 392 * These errno values are meant to be compatible with 393 * 4.4BSD-Lite2 for the given message types. 394 */ 395 switch (rtm->rtm_type) { 396 case RTM_ADD: 397 case RTM_DELETE: 398 error = ESRCH; 399 goto done; 400 case RTM_GET: 401 case RTM_CHANGE: 402 error = EAFNOSUPPORT; 403 goto done; 404 default: 405 error = EOPNOTSUPP; 406 goto done; 407 } 408 } 409 410 /* 411 * At this point, the address family must be something known. 412 */ 413 ASSERT(af == AF_INET || af == AF_INET6); 414 415 /* Handle RTA_IFP */ 416 if (index != 0) { 417 ipif_t *ipif; 418 lookup: 419 ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst); 420 if (ill == NULL) { 421 error = EINVAL; 422 goto done; 423 } 424 425 /* 426 * Since all interfaces in an IPMP group must be equivalent, 427 * we prevent changes to a specific underlying interface's 428 * routing configuration. However, for backward compatibility, 429 * we intepret a request to add a route on an underlying 430 * interface as a request to add a route on its IPMP interface. 431 */ 432 if (IS_UNDER_IPMP(ill)) { 433 switch (rtm->rtm_type) { 434 case RTM_CHANGE: 435 case RTM_DELETE: 436 error = EINVAL; 437 goto done; 438 case RTM_ADD: 439 index = ipmp_ill_get_ipmp_ifindex(ill); 440 ill_refrele(ill); 441 if (index == 0) { 442 ill = NULL; /* already refrele'd */ 443 error = EINVAL; 444 goto done; 445 } 446 goto lookup; 447 } 448 } 449 450 match_flags |= MATCH_IRE_ILL; 451 /* 452 * This provides the same zoneid as in Solaris 10 453 * that -ifp picks the zoneid from the first ipif on the ill. 454 * But it might not be useful since the first ipif will always 455 * have the same zoneid as the ill. 456 */ 457 ipif = ipif_get_next_ipif(NULL, ill); 458 if (ipif != NULL) { 459 zoneid = ipif->ipif_zoneid; 460 ipif_refrele(ipif); 461 } 462 } 463 464 /* 465 * If a netmask was supplied in the message, then subsequent route 466 * lookups will attempt to match on the netmask as well. 467 */ 468 if ((found_addrs & RTA_NETMASK) != 0) 469 match_flags |= MATCH_IRE_MASK; 470 471 /* 472 * We only process any passed-in route security attributes for 473 * either RTM_ADD or RTM_CHANGE message; We overload them 474 * to do an RTM_GET as a different label; ignore otherwise. 475 */ 476 if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE || 477 rtm->rtm_type == RTM_GET) { 478 ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); 479 if (rtsecattr.rtsa_cnt > 0) 480 rtsap = &rtsecattr.rtsa_attr[0]; 481 } 482 483 switch (rtm->rtm_type) { 484 case RTM_ADD: 485 /* if we are adding a route, gateway is a must */ 486 if ((found_addrs & RTA_GATEWAY) == 0) { 487 error = EINVAL; 488 goto done; 489 } 490 491 /* Multirouting does not support net routes. */ 492 if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) == 493 RTF_MULTIRT) { 494 error = EADDRNOTAVAIL; 495 goto done; 496 } 497 498 /* 499 * Multirouting and user-specified source addresses 500 * do not support interface based routing. 501 * Assigning a source address to an interface based 502 * route is achievable by plumbing a new ipif and 503 * setting up the interface route via this ipif, 504 * though. 505 */ 506 if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) { 507 if ((rtm->rtm_flags & RTF_GATEWAY) == 0) { 508 error = EADDRNOTAVAIL; 509 goto done; 510 } 511 } 512 513 switch (af) { 514 case AF_INET: 515 if (src_addr != INADDR_ANY) { 516 uint_t type; 517 518 /* 519 * The RTF_SETSRC flag is present, check that 520 * the supplied src address is not the loopback 521 * address. This would produce martian packets. 522 */ 523 if (src_addr == htonl(INADDR_LOOPBACK)) { 524 error = EINVAL; 525 goto done; 526 } 527 /* 528 * Also check that the supplied address is a 529 * valid, local one. Only allow IFF_UP ones 530 */ 531 type = ip_type_v4(src_addr, ipst); 532 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { 533 error = EADDRNOTAVAIL; 534 goto done; 535 } 536 } else { 537 /* 538 * The RTF_SETSRC modifier must be associated 539 * to a non-null source address. 540 */ 541 if (rtm->rtm_flags & RTF_SETSRC) { 542 error = EINVAL; 543 goto done; 544 } 545 } 546 547 error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr, 548 rtm->rtm_flags, ill, &ire, B_FALSE, 549 rtsap, ipst, zoneid); 550 if (ill != NULL) 551 ASSERT(!MUTEX_HELD(&ill->ill_lock)); 552 break; 553 case AF_INET6: 554 if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) { 555 uint_t type; 556 557 /* 558 * The RTF_SETSRC flag is present, check that 559 * the supplied src address is not the loopback 560 * address. This would produce martian packets. 561 */ 562 if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) { 563 error = EINVAL; 564 goto done; 565 } 566 /* 567 * Also check that the supplied address is a 568 * valid, local one. Only allow UP ones. 569 */ 570 type = ip_type_v6(&src_addr_v6, ipst); 571 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { 572 error = EADDRNOTAVAIL; 573 goto done; 574 } 575 576 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 577 &gw_addr_v6, &src_addr_v6, rtm->rtm_flags, 578 ill, &ire, rtsap, ipst, zoneid); 579 break; 580 } 581 /* 582 * The RTF_SETSRC modifier must be associated 583 * to a non-null source address. 584 */ 585 if (rtm->rtm_flags & RTF_SETSRC) { 586 error = EINVAL; 587 goto done; 588 } 589 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 590 &gw_addr_v6, NULL, rtm->rtm_flags, 591 ill, &ire, rtsap, ipst, zoneid); 592 if (ill != NULL) 593 ASSERT(!MUTEX_HELD(&ill->ill_lock)); 594 break; 595 } 596 if (error != 0) 597 goto done; 598 ASSERT(ire != NULL); 599 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 600 break; 601 case RTM_DELETE: 602 /* if we are deleting a route, gateway is a must */ 603 if ((found_addrs & RTA_GATEWAY) == 0) { 604 error = EINVAL; 605 goto done; 606 } 607 /* 608 * The RTF_SETSRC modifier does not make sense 609 * when deleting a route. 610 */ 611 if (rtm->rtm_flags & RTF_SETSRC) { 612 error = EINVAL; 613 goto done; 614 } 615 616 switch (af) { 617 case AF_INET: 618 error = ip_rt_delete(dst_addr, net_mask, gw_addr, 619 found_addrs, rtm->rtm_flags, ill, B_FALSE, 620 ipst, zoneid); 621 break; 622 case AF_INET6: 623 error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6, 624 &gw_addr_v6, found_addrs, rtm->rtm_flags, ill, 625 ipst, zoneid); 626 break; 627 } 628 break; 629 case RTM_GET: 630 case RTM_CHANGE: 631 /* 632 * In the case of RTM_GET, the forwarding table should be 633 * searched recursively. Also, if a gateway was 634 * specified then the gateway address must also be matched. 635 * 636 * In the case of RTM_CHANGE, the gateway address (if supplied) 637 * is the new gateway address so matching on the gateway address 638 * is not done. This can lead to ambiguity when looking up the 639 * route to change as usually only the destination (and netmask, 640 * if supplied) is used for the lookup. However if a RTA_IFP 641 * sockaddr is also supplied, it can disambiguate which route to 642 * change provided the ambigous routes are tied to distinct 643 * ill's (or interface indices). If the routes are not tied to 644 * any particular interfaces (for example, with traditional 645 * gateway routes), then a RTA_IFP sockaddr will be of no use as 646 * it won't match any such routes. 647 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE, 648 * except when RTM_CHANGE is combined to RTF_SETSRC. 649 */ 650 if (((found_addrs & RTA_SRC) != 0) && 651 ((rtm->rtm_type == RTM_GET) || 652 !(rtm->rtm_flags & RTF_SETSRC))) { 653 error = EOPNOTSUPP; 654 goto done; 655 } 656 657 if (rtm->rtm_type == RTM_GET) { 658 match_flags |= MATCH_IRE_SECATTR; 659 match_flags_local |= MATCH_IRE_SECATTR; 660 if ((found_addrs & RTA_GATEWAY) != 0) 661 match_flags |= MATCH_IRE_GW; 662 if (ioc_cr) 663 tsl = crgetlabel(ioc_cr); 664 if (rtsap != NULL) { 665 if (rtsa_validate(rtsap) != 0) { 666 error = EINVAL; 667 goto done; 668 } 669 if (tsl != NULL && 670 crgetzoneid(ioc_cr) != GLOBAL_ZONEID && 671 (tsl->tsl_doi != rtsap->rtsa_doi || 672 !bldominates(&tsl->tsl_label, 673 &rtsap->rtsa_slrange.lower_bound))) { 674 error = EPERM; 675 goto done; 676 } 677 tsl = labelalloc( 678 &rtsap->rtsa_slrange.lower_bound, 679 rtsap->rtsa_doi, KM_NOSLEEP); 680 } 681 } 682 if (rtm->rtm_type == RTM_CHANGE) { 683 if ((found_addrs & RTA_GATEWAY) && 684 (rtm->rtm_flags & RTF_SETSRC)) { 685 /* 686 * Do not want to change the gateway, 687 * but rather the source address. 688 */ 689 match_flags |= MATCH_IRE_GW; 690 } 691 } 692 693 /* 694 * If the netmask is all ones (either as supplied or as derived 695 * above), then first check for an IRE_LOOPBACK or 696 * IRE_LOCAL entry. 697 * 698 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL 699 * entry, then look for any other type of IRE. 700 */ 701 switch (af) { 702 case AF_INET: 703 if (net_mask == IP_HOST_MASK) { 704 ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr, 705 IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid, 706 tsl, match_flags_local, 0, ipst, NULL); 707 } 708 if (ire == NULL) { 709 ire = ire_lookup_v4(dst_addr, net_mask, 710 gw_addr, ill, zoneid, tsl, match_flags, 711 ipst, &ifire, &v4setsrc, &gwattr); 712 IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc); 713 } 714 break; 715 case AF_INET6: 716 if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) { 717 ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL, 718 &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL, 719 zoneid, tsl, match_flags_local, 0, ipst, 720 NULL); 721 } 722 if (ire == NULL) { 723 ire = ire_lookup_v6(&dst_addr_v6, 724 &net_mask_v6, &gw_addr_v6, ill, zoneid, 725 tsl, match_flags, ipst, &ifire, &v6setsrc, 726 &gwattr); 727 } 728 break; 729 } 730 if (tsl != NULL && tsl != crgetlabel(ioc_cr)) 731 label_rele(tsl); 732 733 if (ire == NULL) { 734 error = ESRCH; 735 goto done; 736 } 737 /* 738 * Want to return failure if we get an IRE_NOROUTE from 739 * ire_route_recursive 740 */ 741 if (ire->ire_type & IRE_NOROUTE) { 742 ire_refrele(ire); 743 ire = NULL; 744 error = ESRCH; 745 goto done; 746 } 747 748 /* we know the IRE before we come here */ 749 switch (rtm->rtm_type) { 750 case RTM_GET: 751 mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af); 752 if (mp1 == NULL) { 753 error = ENOBUFS; 754 goto done; 755 } 756 freemsg(mp); 757 mp = mp1; 758 rtm = (rt_msghdr_t *)mp->b_rptr; 759 break; 760 case RTM_CHANGE: 761 /* 762 * Do not allow to the multirouting state of a route 763 * to be changed. This aims to prevent undesirable 764 * stages where both multirt and non-multirt routes 765 * for the same destination are declared. 766 */ 767 if ((ire->ire_flags & RTF_MULTIRT) != 768 (rtm->rtm_flags & RTF_MULTIRT)) { 769 error = EINVAL; 770 goto done; 771 } 772 /* 773 * Note that we do not need to do 774 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change 775 * in metrics or gateway will not affect existing 776 * routes since it does not create a more specific 777 * route. 778 */ 779 switch (af) { 780 case AF_INET: 781 if ((found_addrs & RTA_GATEWAY) != 0 && 782 (ire->ire_gateway_addr != gw_addr)) { 783 ire->ire_gateway_addr = gw_addr; 784 } 785 786 if (rtsap != NULL) { 787 ga.ga_af = AF_INET; 788 IN6_IPADDR_TO_V4MAPPED( 789 ire->ire_gateway_addr, &ga.ga_addr); 790 791 gcgrp = gcgrp_lookup(&ga, B_TRUE); 792 if (gcgrp == NULL) { 793 error = ENOMEM; 794 goto done; 795 } 796 } 797 798 if ((found_addrs & RTA_SRC) != 0 && 799 (rtm->rtm_flags & RTF_SETSRC) != 0 && 800 (ire->ire_setsrc_addr != src_addr)) { 801 if (src_addr != INADDR_ANY) { 802 uint_t type; 803 804 /* 805 * The RTF_SETSRC flag is 806 * present, check that the 807 * supplied src address is not 808 * the loopback address. This 809 * would produce martian 810 * packets. 811 */ 812 if (src_addr == 813 htonl(INADDR_LOOPBACK)) { 814 error = EINVAL; 815 goto done; 816 } 817 /* 818 * Also check that the 819 * supplied addr is a valid 820 * local address. 821 */ 822 type = ip_type_v4(src_addr, 823 ipst); 824 if (!(type & 825 (IRE_LOCAL|IRE_LOOPBACK))) { 826 error = EADDRNOTAVAIL; 827 goto done; 828 } 829 ire->ire_flags |= RTF_SETSRC; 830 ire->ire_setsrc_addr = 831 src_addr; 832 } else { 833 ire->ire_flags &= ~RTF_SETSRC; 834 ire->ire_setsrc_addr = 835 INADDR_ANY; 836 } 837 /* 838 * Let conn_ixa caching know that 839 * source address selection changed 840 */ 841 ip_update_source_selection(ipst); 842 } 843 ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE); 844 break; 845 case AF_INET6: 846 mutex_enter(&ire->ire_lock); 847 if ((found_addrs & RTA_GATEWAY) != 0 && 848 !IN6_ARE_ADDR_EQUAL( 849 &ire->ire_gateway_addr_v6, &gw_addr_v6)) { 850 ire->ire_gateway_addr_v6 = gw_addr_v6; 851 } 852 mutex_exit(&ire->ire_lock); 853 854 if (rtsap != NULL) { 855 ga.ga_af = AF_INET6; 856 mutex_enter(&ire->ire_lock); 857 ga.ga_addr = ire->ire_gateway_addr_v6; 858 mutex_exit(&ire->ire_lock); 859 860 gcgrp = gcgrp_lookup(&ga, B_TRUE); 861 if (gcgrp == NULL) { 862 error = ENOMEM; 863 goto done; 864 } 865 } 866 867 if ((found_addrs & RTA_SRC) != 0 && 868 (rtm->rtm_flags & RTF_SETSRC) != 0 && 869 !IN6_ARE_ADDR_EQUAL( 870 &ire->ire_setsrc_addr_v6, &src_addr_v6)) { 871 if (!IN6_IS_ADDR_UNSPECIFIED( 872 &src_addr_v6)) { 873 uint_t type; 874 875 /* 876 * The RTF_SETSRC flag is 877 * present, check that the 878 * supplied src address is not 879 * the loopback address. This 880 * would produce martian 881 * packets. 882 */ 883 if (IN6_IS_ADDR_LOOPBACK( 884 &src_addr_v6)) { 885 error = EINVAL; 886 goto done; 887 } 888 /* 889 * Also check that the 890 * supplied addr is a valid 891 * local address. 892 */ 893 type = ip_type_v6(&src_addr_v6, 894 ipst); 895 if (!(type & 896 (IRE_LOCAL|IRE_LOOPBACK))) { 897 error = EADDRNOTAVAIL; 898 goto done; 899 } 900 mutex_enter(&ire->ire_lock); 901 ire->ire_flags |= RTF_SETSRC; 902 ire->ire_setsrc_addr_v6 = 903 src_addr_v6; 904 mutex_exit(&ire->ire_lock); 905 } else { 906 mutex_enter(&ire->ire_lock); 907 ire->ire_flags &= ~RTF_SETSRC; 908 ire->ire_setsrc_addr_v6 = 909 ipv6_all_zeros; 910 mutex_exit(&ire->ire_lock); 911 } 912 /* 913 * Let conn_ixa caching know that 914 * source address selection changed 915 */ 916 ip_update_source_selection(ipst); 917 } 918 ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE); 919 break; 920 } 921 922 if (rtsap != NULL) { 923 ASSERT(gcgrp != NULL); 924 925 /* 926 * Create and add the security attribute to 927 * prefix IRE; it will add a reference to the 928 * group upon allocating a new entry. If it 929 * finds an already-existing entry for the 930 * security attribute, it simply returns it 931 * and no new group reference is made. 932 */ 933 gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref); 934 if (gc == NULL || 935 (error = tsol_ire_init_gwattr(ire, 936 ire->ire_ipversion, gc)) != 0) { 937 if (gc != NULL) { 938 GC_REFRELE(gc); 939 } else { 940 /* gc_create failed */ 941 error = ENOMEM; 942 } 943 goto done; 944 } 945 } 946 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 947 break; 948 } 949 break; 950 default: 951 error = EOPNOTSUPP; 952 break; 953 } 954 done: 955 if (ire != NULL) 956 ire_refrele(ire); 957 if (ifire != NULL) 958 ire_refrele(ifire); 959 if (ill != NULL) 960 ill_refrele(ill); 961 962 if (gcgrp_xtraref) 963 GCGRP_REFRELE(gcgrp); 964 965 if (rtm != NULL) { 966 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 967 if (error != 0) { 968 rtm->rtm_errno = error; 969 /* Send error ACK */ 970 ip1dbg(("ip_rts_request: error %d\n", error)); 971 } else { 972 rtm->rtm_flags |= RTF_DONE; 973 /* OK ACK already set up by caller except this */ 974 ip2dbg(("ip_rts_request: OK ACK\n")); 975 } 976 rts_queue_input(mp, connp, af, RTSQ_ALL, ipst); 977 } 978 return (error); 979 } 980 981 /* 982 * Helper function that can do recursive lookups including when 983 * MATCH_IRE_GW and/or MATCH_IRE_MASK is set. 984 */ 985 static ire_t * 986 ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr, 987 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, 988 int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp, 989 tsol_ire_gw_secattr_t **gwattrp) 990 { 991 ire_t *ire; 992 ire_t *ifire = NULL; 993 uint_t ire_type; 994 995 *pifire = NULL; 996 *v4setsrcp = INADDR_ANY; 997 *gwattrp = NULL; 998 999 /* Skip IRE_IF_CLONE */ 1000 match_flags |= MATCH_IRE_TYPE; 1001 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; 1002 1003 /* 1004 * ire_route_recursive can't match gateway or mask thus if they are 1005 * set we have to do two steps of lookups 1006 */ 1007 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { 1008 ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr, 1009 ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL); 1010 1011 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) 1012 return (ire); 1013 1014 if (ire->ire_type & IRE_ONLINK) 1015 return (ire); 1016 1017 if (ire->ire_flags & RTF_SETSRC) { 1018 ASSERT(ire->ire_setsrc_addr != INADDR_ANY); 1019 *v4setsrcp = ire->ire_setsrc_addr; 1020 v4setsrcp = NULL; 1021 } 1022 1023 /* The first ire_gw_secattr is passed back */ 1024 if (ire->ire_gw_secattr != NULL) { 1025 *gwattrp = ire->ire_gw_secattr; 1026 gwattrp = NULL; 1027 } 1028 1029 /* Look for an interface ire recursively based on the gateway */ 1030 dst_addr = ire->ire_gateway_addr; 1031 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); 1032 /* 1033 * Don't allow anything unusual past the first iteration. 1034 * After the first lookup, we should no longer look for 1035 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT 1036 * routes. 1037 * 1038 * In addition, after we have found a direct IRE_OFFLINK, 1039 * we should only look for interface or clone routes. 1040 */ 1041 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */ 1042 1043 if ((ire->ire_type & IRE_OFFLINK) && 1044 !(ire->ire_flags & RTF_INDIRECT)) { 1045 ire_type = IRE_IF_ALL; 1046 } else { 1047 /* 1048 * no more local, loopback, broadcast routes 1049 */ 1050 if (!(match_flags & MATCH_IRE_TYPE)) 1051 ire_type = (IRE_OFFLINK|IRE_ONLINK); 1052 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST); 1053 } 1054 match_flags |= MATCH_IRE_TYPE; 1055 1056 ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, 1057 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, 1058 gwattrp, NULL); 1059 } else { 1060 ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, 1061 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, 1062 gwattrp, NULL); 1063 } 1064 *pifire = ifire; 1065 return (ire); 1066 } 1067 1068 static ire_t * 1069 ire_lookup_v6(const in6_addr_t *dst_addr_v6, 1070 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, 1071 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, 1072 ip_stack_t *ipst, ire_t **pifire, 1073 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp) 1074 { 1075 ire_t *ire; 1076 ire_t *ifire = NULL; 1077 uint_t ire_type; 1078 1079 *pifire = NULL; 1080 *v6setsrcp = ipv6_all_zeros; 1081 *gwattrp = NULL; 1082 1083 /* Skip IRE_IF_CLONE */ 1084 match_flags |= MATCH_IRE_TYPE; 1085 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; 1086 1087 /* 1088 * ire_route_recursive can't match gateway or mask thus if they are 1089 * set we have to do two steps of lookups 1090 */ 1091 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { 1092 in6_addr_t dst; 1093 1094 ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6, 1095 gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0, 1096 ipst, NULL); 1097 1098 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) 1099 return (ire); 1100 1101 if (ire->ire_type & IRE_ONLINK) 1102 return (ire); 1103 1104 if (ire->ire_flags & RTF_SETSRC) { 1105 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 1106 &ire->ire_setsrc_addr_v6)); 1107 *v6setsrcp = ire->ire_setsrc_addr_v6; 1108 v6setsrcp = NULL; 1109 } 1110 1111 /* The first ire_gw_secattr is passed back */ 1112 if (ire->ire_gw_secattr != NULL) { 1113 *gwattrp = ire->ire_gw_secattr; 1114 gwattrp = NULL; 1115 } 1116 1117 mutex_enter(&ire->ire_lock); 1118 dst = ire->ire_gateway_addr_v6; 1119 mutex_exit(&ire->ire_lock); 1120 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); 1121 /* 1122 * Don't allow anything unusual past the first iteration. 1123 * After the first lookup, we should no longer look for 1124 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT 1125 * routes. 1126 * 1127 * In addition, after we have found a direct IRE_OFFLINK, 1128 * we should only look for interface or clone routes. 1129 */ 1130 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */ 1131 1132 if ((ire->ire_type & IRE_OFFLINK) && 1133 !(ire->ire_flags & RTF_INDIRECT)) { 1134 ire_type = IRE_IF_ALL; 1135 } else { 1136 /* 1137 * no more local, loopback routes 1138 */ 1139 if (!(match_flags & MATCH_IRE_TYPE)) 1140 ire_type = (IRE_OFFLINK|IRE_ONLINK); 1141 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK); 1142 } 1143 match_flags |= MATCH_IRE_TYPE; 1144 1145 ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl, 1146 match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, gwattrp, 1147 NULL); 1148 } else { 1149 ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid, 1150 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, 1151 gwattrp, NULL); 1152 } 1153 *pifire = ifire; 1154 return (ire); 1155 } 1156 1157 1158 /* 1159 * Handle IP_IOC_RTS_REQUEST ioctls 1160 */ 1161 int 1162 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) 1163 { 1164 conn_t *connp = Q_TO_CONN(q); 1165 IOCP iocp = (IOCP)mp->b_rptr; 1166 mblk_t *mp1, *ioc_mp = mp; 1167 int error = 0; 1168 ip_stack_t *ipst; 1169 1170 ipst = connp->conn_netstack->netstack_ip; 1171 1172 ASSERT(mp->b_cont != NULL); 1173 /* ioc_mp holds mp */ 1174 mp = mp->b_cont; 1175 1176 /* 1177 * The Routing Socket data starts on 1178 * next block. If there is no next block 1179 * this is an indication from routing module 1180 * that it is a routing socket stream queue. 1181 * We need to support that for compatibility with SDP since 1182 * it has a contract private interface to use IP_IOC_RTS_REQUEST. 1183 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this. 1184 */ 1185 if (mp->b_cont == NULL) { 1186 /* 1187 * This is a message from SDP 1188 * indicating that this is a Routing Socket 1189 * Stream. Insert this conn_t in routing 1190 * socket client list. 1191 */ 1192 connp->conn_useloopback = 1; 1193 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); 1194 goto done; 1195 } 1196 mp1 = dupmsg(mp->b_cont); 1197 if (mp1 == NULL) { 1198 error = ENOBUFS; 1199 goto done; 1200 } 1201 mp = mp1; 1202 1203 error = ip_rts_request_common(mp, connp, ioc_cr); 1204 done: 1205 iocp->ioc_error = error; 1206 ioc_mp->b_datap->db_type = M_IOCACK; 1207 if (iocp->ioc_error != 0) 1208 iocp->ioc_count = 0; 1209 /* Note that we pass a NULL ira to rts_input */ 1210 (connp->conn_recv)(connp, ioc_mp, NULL, NULL); 1211 1212 /* conn was refheld in ip_wput_ioctl. */ 1213 CONN_DEC_IOCTLREF(connp); 1214 CONN_OPER_PENDING_DONE(connp); 1215 1216 return (error); 1217 } 1218 1219 /* 1220 * Build a reply to the RTM_GET request contained in the given message block 1221 * using the retrieved IRE of the destination address, the parent IRE (if it 1222 * exists) and the address family. 1223 * 1224 * Returns a pointer to a message block containing the reply if successful, 1225 * otherwise NULL is returned. 1226 */ 1227 static mblk_t * 1228 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc, 1229 tsol_ire_gw_secattr_t *attrp, sa_family_t af) 1230 { 1231 rt_msghdr_t *rtm; 1232 rt_msghdr_t *new_rtm; 1233 mblk_t *new_mp; 1234 int rtm_addrs; 1235 int rtm_flags; 1236 tsol_gc_t *gc = NULL; 1237 tsol_gcgrp_t *gcgrp = NULL; 1238 ill_t *ill; 1239 ipif_t *ipif = NULL; 1240 ipaddr_t brdaddr; /* IFF_POINTOPOINT destination */ 1241 ipaddr_t ifaddr; 1242 in6_addr_t brdaddr6; /* IFF_POINTOPOINT destination */ 1243 in6_addr_t ifaddr6; 1244 ipaddr_t v4setsrc; 1245 1246 rtm = (rt_msghdr_t *)mp->b_rptr; 1247 ifaddr = 0; 1248 brdaddr = 0; 1249 rtm_flags = 0; 1250 1251 /* 1252 * Find the ill used to send packets. This will be NULL in case 1253 * of a reject or blackhole. 1254 */ 1255 if (ifire != NULL) 1256 ill = ire_nexthop_ill(ifire); 1257 else 1258 ill = ire_nexthop_ill(ire); 1259 1260 if (attrp != NULL) { 1261 mutex_enter(&attrp->igsa_lock); 1262 if ((gc = attrp->igsa_gc) != NULL) { 1263 gcgrp = gc->gc_grp; 1264 ASSERT(gcgrp != NULL); 1265 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1266 } 1267 mutex_exit(&attrp->igsa_lock); 1268 } 1269 1270 /* 1271 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK. 1272 * 1273 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both 1274 * RTA_IFP and RTA_IFA if either is defined, and also 1275 * returns RTA_BRD if the appropriate interface is 1276 * point-to-point. 1277 */ 1278 rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK); 1279 if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) { 1280 rtm_addrs |= (RTA_IFP | RTA_IFA); 1281 /* 1282 * We associate an IRE with an ILL, hence we don't exactly 1283 * know what might make sense for RTA_IFA and RTA_BRD. We 1284 * pick the first ipif on the ill. 1285 */ 1286 ipif = ipif_get_next_ipif(NULL, ill); 1287 if (ipif != NULL) { 1288 if (ipif->ipif_isv6) 1289 ifaddr6 = ipif->ipif_v6lcl_addr; 1290 else 1291 ifaddr = ipif->ipif_lcl_addr; 1292 if (ipif->ipif_flags & IPIF_POINTOPOINT) { 1293 rtm_addrs |= RTA_BRD; 1294 if (ipif->ipif_isv6) 1295 brdaddr6 = ipif->ipif_v6pp_dst_addr; 1296 else 1297 brdaddr = ipif->ipif_pp_dst_addr; 1298 } 1299 ipif_refrele(ipif); 1300 } 1301 } 1302 1303 new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0); 1304 if (new_mp == NULL) { 1305 if (gcgrp != NULL) 1306 rw_exit(&gcgrp->gcgrp_rwlock); 1307 if (ill != NULL) 1308 ill_refrele(ill); 1309 return (NULL); 1310 } 1311 1312 /* 1313 * We set the destination address, gateway address, 1314 * netmask and flags in the RTM_GET response depending 1315 * on whether we found a parent IRE or not. 1316 * In particular, if we did find a parent IRE during the 1317 * recursive search, use that IRE's gateway address. 1318 * Otherwise, we use the IRE's source address for the 1319 * gateway address. 1320 */ 1321 ASSERT(af == AF_INET || af == AF_INET6); 1322 switch (af) { 1323 case AF_INET: 1324 IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc); 1325 if (v4setsrc != INADDR_ANY) 1326 rtm_addrs |= RTA_SRC; 1327 1328 rtm_flags = ire->ire_flags; 1329 rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr, 1330 ire->ire_mask, ire->ire_gateway_addr, v4setsrc, 1331 brdaddr, 0, ifaddr, ill, new_mp, gc); 1332 break; 1333 case AF_INET6: 1334 if (!IN6_IS_ADDR_UNSPECIFIED(setsrc)) 1335 rtm_addrs |= RTA_SRC; 1336 1337 rtm_flags = ire->ire_flags; 1338 rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6, 1339 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 1340 setsrc, &brdaddr6, &ipv6_all_zeros, 1341 &ifaddr6, ill, new_mp, gc); 1342 break; 1343 } 1344 1345 if (gcgrp != NULL) 1346 rw_exit(&gcgrp->gcgrp_rwlock); 1347 1348 new_rtm = (rt_msghdr_t *)new_mp->b_rptr; 1349 1350 /* 1351 * The rtm_msglen, rtm_version and rtm_type fields in 1352 * RTM_GET response are filled in by rts_fill_msg. 1353 * 1354 * rtm_addrs and rtm_flags are filled in based on what 1355 * was requested and the state of the IREs looked up 1356 * above. 1357 * 1358 * rtm_inits and rtm_rmx are filled in with metrics 1359 * based on whether a parent IRE was found or not. 1360 * 1361 * TODO: rtm_index and rtm_use should probably be 1362 * filled in with something resonable here and not just 1363 * copied from the request. 1364 */ 1365 new_rtm->rtm_index = rtm->rtm_index; 1366 new_rtm->rtm_pid = rtm->rtm_pid; 1367 new_rtm->rtm_seq = rtm->rtm_seq; 1368 new_rtm->rtm_use = rtm->rtm_use; 1369 new_rtm->rtm_addrs = rtm_addrs; 1370 new_rtm->rtm_flags = rtm_flags; 1371 new_rtm->rtm_inits = rts_getmetrics(ire, ill, &new_rtm->rtm_rmx); 1372 if (ill != NULL) 1373 ill_refrele(ill); 1374 return (new_mp); 1375 } 1376 1377 /* 1378 * Fill the given if_data_t with interface statistics. 1379 */ 1380 static void 1381 rts_getifdata(if_data_t *if_data, const ipif_t *ipif) 1382 { 1383 if_data->ifi_type = ipif->ipif_ill->ill_type; 1384 /* ethernet, tokenring, etc */ 1385 if_data->ifi_addrlen = 0; /* media address length */ 1386 if_data->ifi_hdrlen = 0; /* media header length */ 1387 if_data->ifi_mtu = ipif->ipif_ill->ill_mtu; /* mtu */ 1388 /* metric (external only) */ 1389 if_data->ifi_metric = ipif->ipif_ill->ill_metric; 1390 if_data->ifi_baudrate = 0; /* linespeed */ 1391 1392 if_data->ifi_ipackets = 0; /* packets received on if */ 1393 if_data->ifi_ierrors = 0; /* input errors on interface */ 1394 if_data->ifi_opackets = 0; /* packets sent on interface */ 1395 if_data->ifi_oerrors = 0; /* output errors on if */ 1396 if_data->ifi_collisions = 0; /* collisions on csma if */ 1397 if_data->ifi_ibytes = 0; /* total number received */ 1398 if_data->ifi_obytes = 0; /* total number sent */ 1399 if_data->ifi_imcasts = 0; /* multicast packets received */ 1400 if_data->ifi_omcasts = 0; /* multicast packets sent */ 1401 if_data->ifi_iqdrops = 0; /* dropped on input */ 1402 if_data->ifi_noproto = 0; /* destined for unsupported */ 1403 /* protocol. */ 1404 } 1405 1406 /* 1407 * Set the metrics on a forwarding table route. 1408 */ 1409 static void 1410 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) 1411 { 1412 clock_t rtt; 1413 clock_t rtt_sd; 1414 ill_t *ill; 1415 ifrt_t *ifrt; 1416 mblk_t *mp; 1417 in6_addr_t gw_addr_v6 = { 0 }; 1418 1419 /* Need to add back some metrics to the IRE? */ 1420 /* 1421 * Bypass obtaining the lock and searching ill_saved_ire_mp in the 1422 * common case of no metrics. 1423 */ 1424 if (which == 0) 1425 return; 1426 ire->ire_metrics.iulp_set = B_TRUE; 1427 1428 /* 1429 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1430 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1431 * microseconds. 1432 */ 1433 rtt = 0; 1434 if (which & RTV_RTT) 1435 rtt = metrics->rmx_rtt / 1000; 1436 if (which & RTV_RTTVAR) 1437 rtt_sd = metrics->rmx_rttvar / 1000; 1438 1439 /* 1440 * Update the metrics in the IRE itself. 1441 */ 1442 mutex_enter(&ire->ire_lock); 1443 if (which & RTV_MTU) 1444 ire->ire_metrics.iulp_mtu = metrics->rmx_mtu; 1445 if (which & RTV_RTT) 1446 ire->ire_metrics.iulp_rtt = rtt; 1447 if (which & RTV_SSTHRESH) 1448 ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh; 1449 if (which & RTV_RTTVAR) 1450 ire->ire_metrics.iulp_rtt_sd = rtt_sd; 1451 if (which & RTV_SPIPE) 1452 ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe; 1453 if (which & RTV_RPIPE) 1454 ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe; 1455 mutex_exit(&ire->ire_lock); 1456 1457 /* 1458 * Search through the ifrt_t chain hanging off the ILL in order to 1459 * reflect the metric change there. 1460 */ 1461 ill = ire->ire_ill; 1462 if (ill == NULL) 1463 return; 1464 ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) || 1465 ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION))); 1466 if (ill->ill_isv6) { 1467 mutex_enter(&ire->ire_lock); 1468 gw_addr_v6 = ire->ire_gateway_addr_v6; 1469 mutex_exit(&ire->ire_lock); 1470 } 1471 mutex_enter(&ill->ill_saved_ire_lock); 1472 for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) { 1473 /* 1474 * On a given ill, the tuple of address, gateway, mask, 1475 * ire_type and zoneid unique for each saved IRE. 1476 */ 1477 ifrt = (ifrt_t *)mp->b_rptr; 1478 if (ill->ill_isv6) { 1479 if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, 1480 &ire->ire_addr_v6) || 1481 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, 1482 &gw_addr_v6) || 1483 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask, 1484 &ire->ire_mask_v6)) 1485 continue; 1486 } else { 1487 if (ifrt->ifrt_addr != ire->ire_addr || 1488 ifrt->ifrt_gateway_addr != ire->ire_gateway_addr || 1489 ifrt->ifrt_mask != ire->ire_mask) 1490 continue; 1491 } 1492 if (ifrt->ifrt_zoneid != ire->ire_zoneid || 1493 ifrt->ifrt_type != ire->ire_type) 1494 continue; 1495 1496 if (which & RTV_MTU) 1497 ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu; 1498 if (which & RTV_RTT) 1499 ifrt->ifrt_metrics.iulp_rtt = rtt; 1500 if (which & RTV_SSTHRESH) { 1501 ifrt->ifrt_metrics.iulp_ssthresh = 1502 metrics->rmx_ssthresh; 1503 } 1504 if (which & RTV_RTTVAR) 1505 ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar; 1506 if (which & RTV_SPIPE) 1507 ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe; 1508 if (which & RTV_RPIPE) 1509 ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe; 1510 break; 1511 } 1512 mutex_exit(&ill->ill_saved_ire_lock); 1513 1514 /* 1515 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they 1516 * get any new iulp_mtu. 1517 * We do that by deleting them; ire_create_if_clone will pick 1518 * up the new metrics. 1519 */ 1520 if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0) 1521 ire_dep_delete_if_clone(ire); 1522 } 1523 1524 /* 1525 * Get the metrics from a forwarding table route. 1526 */ 1527 static int 1528 rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics) 1529 { 1530 int metrics_set = 0; 1531 1532 bzero(metrics, sizeof (rt_metrics_t)); 1533 1534 /* 1535 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1536 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1537 * microseconds. 1538 */ 1539 metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000; 1540 metrics_set |= RTV_RTT; 1541 if (ire->ire_metrics.iulp_mtu != 0) { 1542 metrics->rmx_mtu = ire->ire_metrics.iulp_mtu; 1543 metrics_set |= RTV_MTU; 1544 } else if (ill != NULL) { 1545 metrics->rmx_mtu = ill->ill_mtu; 1546 metrics_set |= RTV_MTU; 1547 } 1548 metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh; 1549 metrics_set |= RTV_SSTHRESH; 1550 metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000; 1551 metrics_set |= RTV_RTTVAR; 1552 metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe; 1553 metrics_set |= RTV_SPIPE; 1554 metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe; 1555 metrics_set |= RTV_RPIPE; 1556 return (metrics_set); 1557 } 1558 1559 /* 1560 * Given two sets of metrics (src and dst), use the dst values if they are 1561 * set. If a dst value is not set but the src value is set, then we use 1562 * the src value. 1563 * dst is updated with the new values. 1564 * This is used to merge information from a dce_t and ire_metrics, where the 1565 * dce values takes precedence. 1566 */ 1567 void 1568 rts_merge_metrics(iulp_t *dst, const iulp_t *src) 1569 { 1570 if (!src->iulp_set) 1571 return; 1572 1573 if (dst->iulp_ssthresh == 0) 1574 dst->iulp_ssthresh = src->iulp_ssthresh; 1575 if (dst->iulp_rtt == 0) 1576 dst->iulp_rtt = src->iulp_rtt; 1577 if (dst->iulp_rtt_sd == 0) 1578 dst->iulp_rtt_sd = src->iulp_rtt_sd; 1579 if (dst->iulp_spipe == 0) 1580 dst->iulp_spipe = src->iulp_spipe; 1581 if (dst->iulp_rpipe == 0) 1582 dst->iulp_rpipe = src->iulp_rpipe; 1583 if (dst->iulp_rtomax == 0) 1584 dst->iulp_rtomax = src->iulp_rtomax; 1585 if (dst->iulp_sack == 0) 1586 dst->iulp_sack = src->iulp_sack; 1587 if (dst->iulp_tstamp_ok == 0) 1588 dst->iulp_tstamp_ok = src->iulp_tstamp_ok; 1589 if (dst->iulp_wscale_ok == 0) 1590 dst->iulp_wscale_ok = src->iulp_wscale_ok; 1591 if (dst->iulp_ecn_ok == 0) 1592 dst->iulp_ecn_ok = src->iulp_ecn_ok; 1593 if (dst->iulp_pmtud_ok == 0) 1594 dst->iulp_pmtud_ok = src->iulp_pmtud_ok; 1595 if (dst->iulp_mtu == 0) 1596 dst->iulp_mtu = src->iulp_mtu; 1597 } 1598 1599 1600 /* 1601 * Takes a pointer to a routing message and extracts necessary info by looking 1602 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers 1603 * passed (all of which must be valid). 1604 * 1605 * The bitmask of sockaddrs actually found in the message is returned, or zero 1606 * is returned in the case of an error. 1607 */ 1608 static int 1609 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, 1610 in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp, 1611 in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp, 1612 tsol_rtsecattr_t *rtsecattr, int *error) 1613 { 1614 struct sockaddr *sa; 1615 int i; 1616 int addr_bits; 1617 int length; 1618 int found_addrs = 0; 1619 caddr_t cp; 1620 size_t size; 1621 struct sockaddr_dl *sdl; 1622 1623 *dst_addrp = ipv6_all_zeros; 1624 *gw_addrp = ipv6_all_zeros; 1625 *net_maskp = ipv6_all_zeros; 1626 *authorp = ipv6_all_zeros; 1627 *if_addrp = ipv6_all_zeros; 1628 *in_src_addrp = ipv6_all_zeros; 1629 *indexp = 0; 1630 *afp = AF_UNSPEC; 1631 rtsecattr->rtsa_cnt = 0; 1632 *error = 0; 1633 1634 /* 1635 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP, 1636 * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them. 1637 */ 1638 cp = (caddr_t)&rtm[1]; 1639 length = rtm->rtm_msglen; 1640 for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) { 1641 /* 1642 * The address family we are working with starts out as 1643 * AF_UNSPEC, but is set to the one specified with the 1644 * destination address. 1645 * 1646 * If the "working" address family that has been set to 1647 * something other than AF_UNSPEC, then the address family of 1648 * subsequent sockaddrs must either be AF_UNSPEC (for 1649 * compatibility with older programs) or must be the same as our 1650 * "working" one. 1651 * 1652 * This code assumes that RTA_DST (1) comes first in the loop. 1653 */ 1654 sa = (struct sockaddr *)cp; 1655 addr_bits = (rtm->rtm_addrs & (1 << i)); 1656 if (addr_bits == 0) 1657 continue; 1658 switch (addr_bits) { 1659 case RTA_DST: 1660 size = rts_copyfromsockaddr(sa, dst_addrp); 1661 *afp = sa->sa_family; 1662 break; 1663 case RTA_GATEWAY: 1664 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1665 return (0); 1666 size = rts_copyfromsockaddr(sa, gw_addrp); 1667 break; 1668 case RTA_NETMASK: 1669 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1670 return (0); 1671 size = rts_copyfromsockaddr(sa, net_maskp); 1672 break; 1673 case RTA_IFP: 1674 if (sa->sa_family != AF_LINK && 1675 sa->sa_family != AF_UNSPEC) 1676 return (0); 1677 sdl = (struct sockaddr_dl *)cp; 1678 *indexp = sdl->sdl_index; 1679 size = sizeof (struct sockaddr_dl); 1680 break; 1681 case RTA_SRC: 1682 /* Source address of the incoming packet */ 1683 size = rts_copyfromsockaddr(sa, in_src_addrp); 1684 *afp = sa->sa_family; 1685 break; 1686 case RTA_IFA: 1687 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1688 return (0); 1689 size = rts_copyfromsockaddr(sa, if_addrp); 1690 break; 1691 case RTA_AUTHOR: 1692 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1693 return (0); 1694 size = rts_copyfromsockaddr(sa, authorp); 1695 break; 1696 default: 1697 return (0); 1698 } 1699 if (size == 0) 1700 return (0); 1701 cp += size; 1702 found_addrs |= addr_bits; 1703 } 1704 1705 /* 1706 * Parse the routing message and look for any security- 1707 * related attributes for the route. For each valid 1708 * attribute, allocate/obtain the corresponding kernel 1709 * route security attributes. 1710 */ 1711 if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) { 1712 *error = tsol_rtsa_init(rtm, rtsecattr, cp); 1713 ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); 1714 } 1715 1716 return (found_addrs); 1717 } 1718 1719 /* 1720 * Fills the message with the given info. 1721 */ 1722 static void 1723 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, 1724 ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author, 1725 ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, 1726 const tsol_gc_t *gc) 1727 { 1728 rt_msghdr_t *rtm; 1729 sin_t *sin; 1730 size_t data_size, header_size; 1731 uchar_t *cp; 1732 int i; 1733 1734 ASSERT(mp != NULL); 1735 /* 1736 * First find the type of the message 1737 * and its length. 1738 */ 1739 header_size = rts_header_msg_size(type); 1740 /* 1741 * Now find the size of the data 1742 * that follows the message header. 1743 */ 1744 data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0); 1745 1746 rtm = (rt_msghdr_t *)mp->b_rptr; 1747 mp->b_wptr = &mp->b_rptr[header_size]; 1748 cp = mp->b_wptr; 1749 bzero(cp, data_size); 1750 for (i = 0; i < RTA_NUMBITS; i++) { 1751 sin = (sin_t *)cp; 1752 switch (rtm_addrs & (1 << i)) { 1753 case RTA_DST: 1754 sin->sin_addr.s_addr = dst; 1755 sin->sin_family = AF_INET; 1756 cp += sizeof (sin_t); 1757 break; 1758 case RTA_GATEWAY: 1759 sin->sin_addr.s_addr = gateway; 1760 sin->sin_family = AF_INET; 1761 cp += sizeof (sin_t); 1762 break; 1763 case RTA_NETMASK: 1764 sin->sin_addr.s_addr = mask; 1765 sin->sin_family = AF_INET; 1766 cp += sizeof (sin_t); 1767 break; 1768 case RTA_IFP: 1769 cp += ill_dls_info((struct sockaddr_dl *)cp, ill); 1770 break; 1771 case RTA_IFA: 1772 sin->sin_addr.s_addr = ifaddr; 1773 sin->sin_family = AF_INET; 1774 cp += sizeof (sin_t); 1775 break; 1776 case RTA_SRC: 1777 sin->sin_addr.s_addr = src_addr; 1778 sin->sin_family = AF_INET; 1779 cp += sizeof (sin_t); 1780 break; 1781 case RTA_AUTHOR: 1782 sin->sin_addr.s_addr = author; 1783 sin->sin_family = AF_INET; 1784 cp += sizeof (sin_t); 1785 break; 1786 case RTA_BRD: 1787 /* 1788 * RTA_BRD is used typically to specify a point-to-point 1789 * destination address. 1790 */ 1791 sin->sin_addr.s_addr = brd_addr; 1792 sin->sin_family = AF_INET; 1793 cp += sizeof (sin_t); 1794 break; 1795 } 1796 } 1797 1798 if (gc != NULL) { 1799 rtm_ext_t *rtm_ext; 1800 struct rtsa_s *rp_dst; 1801 tsol_rtsecattr_t *rsap; 1802 1803 ASSERT(gc->gc_grp != NULL); 1804 ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock)); 1805 1806 rtm_ext = (rtm_ext_t *)cp; 1807 rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR; 1808 rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1); 1809 1810 rsap = (tsol_rtsecattr_t *)(rtm_ext + 1); 1811 rsap->rtsa_cnt = 1; 1812 rp_dst = rsap->rtsa_attr; 1813 1814 ASSERT(gc->gc_db != NULL); 1815 bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst)); 1816 cp = (uchar_t *)rp_dst; 1817 } 1818 1819 mp->b_wptr = cp; 1820 mp->b_cont = NULL; 1821 /* 1822 * set the fields that are common to 1823 * to different messages. 1824 */ 1825 rtm->rtm_msglen = (short)(header_size + data_size); 1826 rtm->rtm_version = RTM_VERSION; 1827 rtm->rtm_type = (uchar_t)type; 1828 } 1829 1830 /* 1831 * Allocates and initializes a routing socket message. 1832 * Note that sacnt is either zero or one. 1833 */ 1834 mblk_t * 1835 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt) 1836 { 1837 size_t length; 1838 mblk_t *mp; 1839 1840 length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt); 1841 mp = allocb(length, BPRI_MED); 1842 if (mp == NULL) 1843 return (mp); 1844 bzero(mp->b_rptr, length); 1845 return (mp); 1846 } 1847 1848 /* 1849 * Returns the size of the routing 1850 * socket message header size. 1851 */ 1852 size_t 1853 rts_header_msg_size(int type) 1854 { 1855 switch (type) { 1856 case RTM_DELADDR: 1857 case RTM_NEWADDR: 1858 case RTM_CHGADDR: 1859 case RTM_FREEADDR: 1860 return (sizeof (ifa_msghdr_t)); 1861 case RTM_IFINFO: 1862 return (sizeof (if_msghdr_t)); 1863 default: 1864 return (sizeof (rt_msghdr_t)); 1865 } 1866 } 1867 1868 /* 1869 * Returns the size of the message needed with the given rtm_addrs and family. 1870 * 1871 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are 1872 * of the same family (currently either AF_INET or AF_INET6). 1873 */ 1874 size_t 1875 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt) 1876 { 1877 int i; 1878 size_t length = 0; 1879 1880 for (i = 0; i < RTA_NUMBITS; i++) { 1881 switch (rtm_addrs & (1 << i)) { 1882 case RTA_IFP: 1883 length += sizeof (struct sockaddr_dl); 1884 break; 1885 case RTA_DST: 1886 case RTA_GATEWAY: 1887 case RTA_NETMASK: 1888 case RTA_SRC: 1889 case RTA_IFA: 1890 case RTA_AUTHOR: 1891 case RTA_BRD: 1892 ASSERT(af == AF_INET || af == AF_INET6); 1893 switch (af) { 1894 case AF_INET: 1895 length += sizeof (sin_t); 1896 break; 1897 case AF_INET6: 1898 length += sizeof (sin6_t); 1899 break; 1900 } 1901 break; 1902 } 1903 } 1904 if (sacnt > 0) 1905 length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt); 1906 1907 return (length); 1908 } 1909 1910 /* 1911 * This routine is called to generate a message to the routing 1912 * socket indicating that a redirect has occured, a routing lookup 1913 * has failed, or that a protocol has detected timeouts to a particular 1914 * destination. This routine is called for message types RTM_LOSING, 1915 * RTM_REDIRECT, and RTM_MISS. 1916 */ 1917 void 1918 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, 1919 ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs, 1920 ip_stack_t *ipst) 1921 { 1922 rt_msghdr_t *rtm; 1923 mblk_t *mp; 1924 1925 if (rtm_addrs == 0) 1926 return; 1927 mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0); 1928 if (mp == NULL) 1929 return; 1930 rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0, 1931 author, 0, NULL, mp, NULL); 1932 rtm = (rt_msghdr_t *)mp->b_rptr; 1933 rtm->rtm_flags = flags; 1934 rtm->rtm_errno = error; 1935 rtm->rtm_flags |= RTF_DONE; 1936 rtm->rtm_addrs = rtm_addrs; 1937 rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst); 1938 } 1939 1940 /* 1941 * This routine is called to generate a message to the routing 1942 * socket indicating that the status of a network interface has changed. 1943 * Message type generated RTM_IFINFO. 1944 */ 1945 void 1946 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags) 1947 { 1948 ip_rts_xifmsg(ipif, 0, 0, flags); 1949 } 1950 1951 void 1952 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags) 1953 { 1954 if_msghdr_t *ifm; 1955 mblk_t *mp; 1956 sa_family_t af; 1957 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 1958 1959 /* 1960 * This message should be generated only 1961 * when the physical device is changing 1962 * state. 1963 */ 1964 if (ipif->ipif_id != 0) 1965 return; 1966 if (ipif->ipif_isv6) { 1967 af = AF_INET6; 1968 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); 1969 if (mp == NULL) 1970 return; 1971 rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros, 1972 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, 1973 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, 1974 ipif->ipif_ill, mp, NULL); 1975 } else { 1976 af = AF_INET; 1977 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); 1978 if (mp == NULL) 1979 return; 1980 rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0, 1981 ipif->ipif_ill, mp, NULL); 1982 } 1983 ifm = (if_msghdr_t *)mp->b_rptr; 1984 ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; 1985 ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags | 1986 ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear; 1987 rts_getifdata(&ifm->ifm_data, ipif); 1988 ifm->ifm_addrs = RTA_IFP; 1989 1990 if (flags & RTSQ_DEFAULT) { 1991 flags = RTSQ_ALL; 1992 /* 1993 * If this message is for an underlying interface, prevent 1994 * "normal" (IPMP-unaware) routing sockets from seeing it. 1995 */ 1996 if (IS_UNDER_IPMP(ipif->ipif_ill)) 1997 flags &= ~RTSQ_NORMAL; 1998 } 1999 2000 rts_queue_input(mp, NULL, af, flags, ipst); 2001 } 2002 2003 /* 2004 * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message; 2005 * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR) 2006 * generate the ifa_msghdr_t message. 2007 */ 2008 static void 2009 rts_new_rtsmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) 2010 { 2011 int rtm_addrs; 2012 mblk_t *mp; 2013 ifa_msghdr_t *ifam; 2014 rt_msghdr_t *rtm; 2015 sa_family_t af; 2016 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2017 2018 /* 2019 * Do not report unspecified address if this is the RTM_CHGADDR or 2020 * RTM_FREEADDR message. 2021 */ 2022 if (cmd == RTM_CHGADDR || cmd == RTM_FREEADDR) { 2023 if (!ipif->ipif_isv6) { 2024 if (ipif->ipif_lcl_addr == INADDR_ANY) 2025 return; 2026 } else if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) { 2027 return; 2028 } 2029 } 2030 2031 if (ipif->ipif_isv6) 2032 af = AF_INET6; 2033 else 2034 af = AF_INET; 2035 2036 if (cmd == RTM_ADD || cmd == RTM_DELETE) 2037 rtm_addrs = (RTA_DST | RTA_NETMASK); 2038 else 2039 rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP); 2040 2041 mp = rts_alloc_msg(cmd, rtm_addrs, af, 0); 2042 if (mp == NULL) 2043 return; 2044 2045 if (cmd != RTM_ADD && cmd != RTM_DELETE) { 2046 switch (af) { 2047 case AF_INET: 2048 rts_fill_msg(cmd, rtm_addrs, 0, 2049 ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr, 2050 ipif->ipif_pp_dst_addr, 0, 2051 ipif->ipif_lcl_addr, ipif->ipif_ill, 2052 mp, NULL); 2053 break; 2054 case AF_INET6: 2055 rts_fill_msg_v6(cmd, rtm_addrs, 2056 &ipv6_all_zeros, &ipif->ipif_v6net_mask, 2057 &ipv6_all_zeros, &ipif->ipif_v6lcl_addr, 2058 &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, 2059 &ipif->ipif_v6lcl_addr, ipif->ipif_ill, 2060 mp, NULL); 2061 break; 2062 } 2063 ifam = (ifa_msghdr_t *)mp->b_rptr; 2064 ifam->ifam_index = 2065 ipif->ipif_ill->ill_phyint->phyint_ifindex; 2066 ifam->ifam_metric = ipif->ipif_ill->ill_metric; 2067 ifam->ifam_flags = ((cmd == RTM_NEWADDR) ? RTF_UP : 0); 2068 ifam->ifam_addrs = rtm_addrs; 2069 } else { 2070 switch (af) { 2071 case AF_INET: 2072 rts_fill_msg(cmd, rtm_addrs, 2073 ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0, 2074 0, 0, 0, 0, NULL, mp, NULL); 2075 break; 2076 case AF_INET6: 2077 rts_fill_msg_v6(cmd, rtm_addrs, 2078 &ipif->ipif_v6lcl_addr, 2079 &ipif->ipif_v6net_mask, &ipv6_all_zeros, 2080 &ipv6_all_zeros, &ipv6_all_zeros, 2081 &ipv6_all_zeros, &ipv6_all_zeros, 2082 NULL, mp, NULL); 2083 break; 2084 } 2085 rtm = (rt_msghdr_t *)mp->b_rptr; 2086 rtm->rtm_index = 2087 ipif->ipif_ill->ill_phyint->phyint_ifindex; 2088 rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0); 2089 rtm->rtm_errno = error; 2090 if (error == 0) 2091 rtm->rtm_flags |= RTF_DONE; 2092 rtm->rtm_addrs = rtm_addrs; 2093 } 2094 rts_queue_input(mp, NULL, af, flags, ipst); 2095 } 2096 2097 /* 2098 * This is called to generate messages to the routing socket 2099 * indicating a network interface has had addresses associated with it. 2100 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>. 2101 */ 2102 void 2103 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) 2104 { 2105 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2106 2107 if (flags & RTSQ_DEFAULT) { 2108 flags = RTSQ_ALL; 2109 /* 2110 * If this message is for an underlying interface, prevent 2111 * "normal" (IPMP-unaware) routing sockets from seeing it. 2112 */ 2113 if (IS_UNDER_IPMP(ipif->ipif_ill)) 2114 flags &= ~RTSQ_NORMAL; 2115 } 2116 2117 /* 2118 * Let conn_ixa caching know that source address selection 2119 * changed 2120 */ 2121 if (cmd == RTM_ADD || cmd == RTM_DELETE) 2122 ip_update_source_selection(ipst); 2123 2124 /* 2125 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR. 2126 * if the request is ADD, send RTM_NEWADDR and RTM_ADD. 2127 * otherwise simply send the request. 2128 */ 2129 switch (cmd) { 2130 case RTM_ADD: 2131 rts_new_rtsmsg(RTM_NEWADDR, error, ipif, flags); 2132 rts_new_rtsmsg(RTM_ADD, error, ipif, flags); 2133 break; 2134 case RTM_DELETE: 2135 rts_new_rtsmsg(RTM_DELETE, error, ipif, flags); 2136 rts_new_rtsmsg(RTM_DELADDR, error, ipif, flags); 2137 break; 2138 default: 2139 rts_new_rtsmsg(cmd, error, ipif, flags); 2140 break; 2141 } 2142 } 2143 2144 /* 2145 * Based on the address family specified in a sockaddr, copy the address field 2146 * into an in6_addr_t. 2147 * 2148 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for 2149 * compatibility with programs that leave the family cleared in the sockaddr. 2150 * Callers of rts_copyfromsockaddr should check the family themselves if they 2151 * wish to verify its value. 2152 * 2153 * In the case of AF_INET6, a check is made to ensure that address is not an 2154 * IPv4-mapped address. 2155 */ 2156 size_t 2157 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp) 2158 { 2159 switch (sa->sa_family) { 2160 case AF_INET: 2161 case AF_UNSPEC: 2162 IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp); 2163 return (sizeof (sin_t)); 2164 case AF_INET6: 2165 *addrp = ((sin6_t *)sa)->sin6_addr; 2166 if (IN6_IS_ADDR_V4MAPPED(addrp)) 2167 return (0); 2168 return (sizeof (sin6_t)); 2169 default: 2170 return (0); 2171 } 2172 } 2173