1 /* 2 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* 7 * Copyright (c) 1988, 1991, 1993 8 * The Regents of the University of California. All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 39 */ 40 41 /* 42 * This file contains routines that processes routing socket requests. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/stream.h> 47 #include <sys/stropts.h> 48 #include <sys/ddi.h> 49 #include <sys/strsubr.h> 50 #include <sys/cmn_err.h> 51 #include <sys/debug.h> 52 #include <sys/policy.h> 53 #include <sys/zone.h> 54 55 #include <sys/systm.h> 56 #include <sys/param.h> 57 #include <sys/socket.h> 58 #include <sys/strsun.h> 59 #include <net/if.h> 60 #include <net/route.h> 61 #include <netinet/in.h> 62 #include <net/if_dl.h> 63 #include <netinet/ip6.h> 64 65 #include <inet/common.h> 66 #include <inet/ip.h> 67 #include <inet/ip6.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_ire.h> 70 #include <inet/ip_ftable.h> 71 #include <inet/ip_rts.h> 72 73 #include <inet/ipclassifier.h> 74 75 #include <sys/tsol/tndb.h> 76 #include <sys/tsol/tnet.h> 77 78 #define RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \ 79 (rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type)) 80 81 static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp); 82 static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, 83 ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, 84 ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, 85 const tsol_gc_t *); 86 static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, 87 in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp, 88 in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp, 89 sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error); 90 static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif); 91 static int rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics); 92 static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, 93 const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af); 94 static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics); 95 static ire_t *ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, 96 ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid, 97 const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, 98 ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp); 99 static ire_t *ire_lookup_v6(const in6_addr_t *dst_addr_v6, 100 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, 101 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, 102 ip_stack_t *ipst, ire_t **pifire, 103 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp); 104 105 /* 106 * Send `mp' to all eligible routing queues. A queue is ineligible if: 107 * 108 * 1. SO_USELOOPBACK is off and it is not the originating queue. 109 * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'. 110 * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'. 111 * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC. 112 */ 113 void 114 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, 115 ip_stack_t *ipst) 116 { 117 mblk_t *mp1; 118 conn_t *connp, *next_connp; 119 120 /* 121 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be 122 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point. 123 */ 124 ASSERT(!(flags & RTSQ_DEFAULT)); 125 126 mutex_enter(&ipst->ips_rts_clients->connf_lock); 127 connp = ipst->ips_rts_clients->connf_head; 128 129 for (; connp != NULL; connp = next_connp) { 130 next_connp = connp->conn_next; 131 /* 132 * If there was a family specified when this routing socket was 133 * created and it doesn't match the family of the message to 134 * copy, then continue. 135 */ 136 if ((connp->conn_proto != AF_UNSPEC) && 137 (connp->conn_proto != af)) 138 continue; 139 140 /* 141 * Queue the message only if the conn_t and flags match. 142 */ 143 if (connp->conn_rtaware & RTAW_UNDER_IPMP) { 144 if (!(flags & RTSQ_UNDER_IPMP)) 145 continue; 146 } else { 147 if (!(flags & RTSQ_NORMAL)) 148 continue; 149 } 150 /* 151 * For the originating queue, we only copy the message upstream 152 * if loopback is set. For others reading on the routing 153 * socket, we check if there is room upstream for a copy of the 154 * message. 155 */ 156 if ((o_connp == connp) && connp->conn_useloopback == 0) { 157 connp = connp->conn_next; 158 continue; 159 } 160 CONN_INC_REF(connp); 161 mutex_exit(&ipst->ips_rts_clients->connf_lock); 162 /* Pass to rts_input */ 163 if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld : 164 canputnext(connp->conn_rq)) { 165 mp1 = dupmsg(mp); 166 if (mp1 == NULL) 167 mp1 = copymsg(mp); 168 /* Note that we pass a NULL ira to rts_input */ 169 if (mp1 != NULL) 170 (connp->conn_recv)(connp, mp1, NULL, NULL); 171 } 172 173 mutex_enter(&ipst->ips_rts_clients->connf_lock); 174 /* reload next_connp since conn_next may have changed */ 175 next_connp = connp->conn_next; 176 CONN_DEC_REF(connp); 177 } 178 mutex_exit(&ipst->ips_rts_clients->connf_lock); 179 freemsg(mp); 180 } 181 182 /* 183 * Takes an ire and sends an ack to all the routing sockets. This 184 * routine is used 185 * - when a route is created/deleted through the ioctl interface. 186 * - when a stale redirect is deleted 187 */ 188 void 189 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) 190 { 191 mblk_t *mp; 192 rt_msghdr_t *rtm; 193 int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY); 194 sa_family_t af; 195 in6_addr_t gw_addr_v6; 196 197 if (ire == NULL) 198 return; 199 ASSERT(ire->ire_ipversion == IPV4_VERSION || 200 ire->ire_ipversion == IPV6_VERSION); 201 202 ASSERT(!(ire->ire_type & IRE_IF_CLONE)); 203 204 if (ire->ire_flags & RTF_SETSRC) 205 rtm_addrs |= RTA_SRC; 206 207 switch (ire->ire_ipversion) { 208 case IPV4_VERSION: 209 af = AF_INET; 210 mp = rts_alloc_msg(type, rtm_addrs, af, 0); 211 if (mp == NULL) 212 return; 213 rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask, 214 ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL, 215 mp, NULL); 216 break; 217 case IPV6_VERSION: 218 af = AF_INET6; 219 mp = rts_alloc_msg(type, rtm_addrs, af, 0); 220 if (mp == NULL) 221 return; 222 mutex_enter(&ire->ire_lock); 223 gw_addr_v6 = ire->ire_gateway_addr_v6; 224 mutex_exit(&ire->ire_lock); 225 rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6, 226 &ire->ire_mask_v6, &gw_addr_v6, 227 &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros, 228 &ipv6_all_zeros, NULL, mp, NULL); 229 break; 230 } 231 rtm = (rt_msghdr_t *)mp->b_rptr; 232 mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen]; 233 rtm->rtm_addrs = rtm_addrs; 234 rtm->rtm_flags = ire->ire_flags; 235 if (error != 0) 236 rtm->rtm_errno = error; 237 else 238 rtm->rtm_flags |= RTF_DONE; 239 rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst); 240 } 241 242 /* 243 * This is a call from the RTS module 244 * indicating that this is a Routing Socket 245 * Stream. Insert this conn_t in routing 246 * socket client list. 247 */ 248 void 249 ip_rts_register(conn_t *connp) 250 { 251 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 252 253 connp->conn_useloopback = 1; 254 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); 255 } 256 257 /* 258 * This is a call from the RTS module indicating that it is closing. 259 */ 260 void 261 ip_rts_unregister(conn_t *connp) 262 { 263 ipcl_hash_remove(connp); 264 } 265 266 /* 267 * Processes requests received on a routing socket. It extracts all the 268 * arguments and calls the appropriate function to process the request. 269 * 270 * RTA_SRC bit flag requests are sent by 'route -setsrc'. 271 * 272 * In general, this function does not consume the message supplied but rather 273 * sends the message upstream with an appropriate UNIX errno. 274 */ 275 int 276 ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr) 277 { 278 rt_msghdr_t *rtm = NULL; 279 in6_addr_t dst_addr_v6; 280 in6_addr_t src_addr_v6; 281 in6_addr_t gw_addr_v6; 282 in6_addr_t net_mask_v6; 283 in6_addr_t author_v6; 284 in6_addr_t if_addr_v6; 285 mblk_t *mp1; 286 ire_t *ire = NULL; 287 ire_t *ifire = NULL; 288 ipaddr_t v4setsrc; 289 in6_addr_t v6setsrc = ipv6_all_zeros; 290 tsol_ire_gw_secattr_t *gwattr = NULL; 291 int error = 0; 292 int match_flags = MATCH_IRE_DSTONLY; 293 int match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW; 294 int found_addrs; 295 sa_family_t af; 296 ipaddr_t dst_addr; 297 ipaddr_t gw_addr; 298 ipaddr_t src_addr; 299 ipaddr_t net_mask; 300 ushort_t index; 301 boolean_t gcgrp_xtraref = B_FALSE; 302 tsol_gcgrp_addr_t ga; 303 tsol_rtsecattr_t rtsecattr; 304 struct rtsa_s *rtsap = NULL; 305 tsol_gcgrp_t *gcgrp = NULL; 306 tsol_gc_t *gc = NULL; 307 ts_label_t *tsl = NULL; 308 zoneid_t zoneid; 309 ip_stack_t *ipst; 310 ill_t *ill = NULL; 311 312 zoneid = connp->conn_zoneid; 313 ipst = connp->conn_netstack->netstack_ip; 314 315 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { 316 freemsg(mp); 317 error = EINVAL; 318 goto done; 319 } 320 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 321 freemsg(mp); 322 error = EINVAL; 323 goto done; 324 } 325 326 /* 327 * Check the routing message for basic consistency including the 328 * version number and that the number of octets written is the same 329 * as specified by the rtm_msglen field. 330 * 331 * At this point, an error can be delivered back via rtm_errno. 332 */ 333 rtm = (rt_msghdr_t *)mp->b_rptr; 334 if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) { 335 error = EINVAL; 336 goto done; 337 } 338 if (rtm->rtm_version != RTM_VERSION) { 339 error = EPROTONOSUPPORT; 340 goto done; 341 } 342 343 /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */ 344 if (rtm->rtm_type != RTM_GET && 345 rtm->rtm_type != RTM_RESOLVE && 346 (ioc_cr == NULL || 347 secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) { 348 error = EPERM; 349 goto done; 350 } 351 352 found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6, 353 &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr, 354 &error); 355 356 if (error != 0) 357 goto done; 358 359 if ((found_addrs & RTA_DST) == 0) { 360 error = EINVAL; 361 goto done; 362 } 363 364 /* 365 * Based on the address family of the destination address, determine 366 * the destination, gateway and netmask and return the appropriate error 367 * if an unknown address family was specified (following the errno 368 * values that 4.4BSD-Lite2 returns.) 369 */ 370 switch (af) { 371 case AF_INET: 372 IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr); 373 IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr); 374 IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr); 375 if (((found_addrs & RTA_NETMASK) == 0) || 376 (rtm->rtm_flags & RTF_HOST)) 377 net_mask = IP_HOST_MASK; 378 else 379 IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask); 380 break; 381 case AF_INET6: 382 if (((found_addrs & RTA_NETMASK) == 0) || 383 (rtm->rtm_flags & RTF_HOST)) 384 net_mask_v6 = ipv6_all_ones; 385 break; 386 default: 387 /* 388 * These errno values are meant to be compatible with 389 * 4.4BSD-Lite2 for the given message types. 390 */ 391 switch (rtm->rtm_type) { 392 case RTM_ADD: 393 case RTM_DELETE: 394 error = ESRCH; 395 goto done; 396 case RTM_GET: 397 case RTM_CHANGE: 398 error = EAFNOSUPPORT; 399 goto done; 400 default: 401 error = EOPNOTSUPP; 402 goto done; 403 } 404 } 405 406 /* 407 * At this point, the address family must be something known. 408 */ 409 ASSERT(af == AF_INET || af == AF_INET6); 410 411 /* Handle RTA_IFP */ 412 if (index != 0) { 413 ipif_t *ipif; 414 lookup: 415 ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst); 416 if (ill == NULL) { 417 error = EINVAL; 418 goto done; 419 } 420 421 /* 422 * Since all interfaces in an IPMP group must be equivalent, 423 * we prevent changes to a specific underlying interface's 424 * routing configuration. However, for backward compatibility, 425 * we intepret a request to add a route on an underlying 426 * interface as a request to add a route on its IPMP interface. 427 */ 428 if (IS_UNDER_IPMP(ill)) { 429 switch (rtm->rtm_type) { 430 case RTM_CHANGE: 431 case RTM_DELETE: 432 error = EINVAL; 433 goto done; 434 case RTM_ADD: 435 index = ipmp_ill_get_ipmp_ifindex(ill); 436 ill_refrele(ill); 437 if (index == 0) { 438 ill = NULL; /* already refrele'd */ 439 error = EINVAL; 440 goto done; 441 } 442 goto lookup; 443 } 444 } 445 446 match_flags |= MATCH_IRE_ILL; 447 /* 448 * This provides the same zoneid as in Solaris 10 449 * that -ifp picks the zoneid from the first ipif on the ill. 450 * But it might not be useful since the first ipif will always 451 * have the same zoneid as the ill. 452 */ 453 ipif = ipif_get_next_ipif(NULL, ill); 454 if (ipif != NULL) { 455 zoneid = ipif->ipif_zoneid; 456 ipif_refrele(ipif); 457 } 458 } 459 460 /* 461 * If a netmask was supplied in the message, then subsequent route 462 * lookups will attempt to match on the netmask as well. 463 */ 464 if ((found_addrs & RTA_NETMASK) != 0) 465 match_flags |= MATCH_IRE_MASK; 466 467 /* 468 * We only process any passed-in route security attributes for 469 * either RTM_ADD or RTM_CHANGE message; We overload them 470 * to do an RTM_GET as a different label; ignore otherwise. 471 */ 472 if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE || 473 rtm->rtm_type == RTM_GET) { 474 ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); 475 if (rtsecattr.rtsa_cnt > 0) 476 rtsap = &rtsecattr.rtsa_attr[0]; 477 } 478 479 switch (rtm->rtm_type) { 480 case RTM_ADD: 481 /* if we are adding a route, gateway is a must */ 482 if ((found_addrs & RTA_GATEWAY) == 0) { 483 error = EINVAL; 484 goto done; 485 } 486 487 /* Multirouting does not support net routes. */ 488 if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) == 489 RTF_MULTIRT) { 490 error = EADDRNOTAVAIL; 491 goto done; 492 } 493 494 /* 495 * Multirouting and user-specified source addresses 496 * do not support interface based routing. 497 * Assigning a source address to an interface based 498 * route is achievable by plumbing a new ipif and 499 * setting up the interface route via this ipif, 500 * though. 501 */ 502 if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) { 503 if ((rtm->rtm_flags & RTF_GATEWAY) == 0) { 504 error = EADDRNOTAVAIL; 505 goto done; 506 } 507 } 508 509 switch (af) { 510 case AF_INET: 511 if (src_addr != INADDR_ANY) { 512 uint_t type; 513 514 /* 515 * The RTF_SETSRC flag is present, check that 516 * the supplied src address is not the loopback 517 * address. This would produce martian packets. 518 */ 519 if (src_addr == htonl(INADDR_LOOPBACK)) { 520 error = EINVAL; 521 goto done; 522 } 523 /* 524 * Also check that the supplied address is a 525 * valid, local one. Only allow IFF_UP ones 526 */ 527 type = ip_type_v4(src_addr, ipst); 528 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { 529 error = EADDRNOTAVAIL; 530 goto done; 531 } 532 } else { 533 /* 534 * The RTF_SETSRC modifier must be associated 535 * to a non-null source address. 536 */ 537 if (rtm->rtm_flags & RTF_SETSRC) { 538 error = EINVAL; 539 goto done; 540 } 541 } 542 543 error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr, 544 rtm->rtm_flags, ill, &ire, B_FALSE, 545 rtsap, ipst, zoneid); 546 if (ill != NULL) 547 ASSERT(!MUTEX_HELD(&ill->ill_lock)); 548 break; 549 case AF_INET6: 550 if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) { 551 uint_t type; 552 553 /* 554 * The RTF_SETSRC flag is present, check that 555 * the supplied src address is not the loopback 556 * address. This would produce martian packets. 557 */ 558 if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) { 559 error = EINVAL; 560 goto done; 561 } 562 /* 563 * Also check that the supplied address is a 564 * valid, local one. Only allow UP ones. 565 */ 566 type = ip_type_v6(&src_addr_v6, ipst); 567 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { 568 error = EADDRNOTAVAIL; 569 goto done; 570 } 571 572 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 573 &gw_addr_v6, &src_addr_v6, rtm->rtm_flags, 574 ill, &ire, rtsap, ipst, zoneid); 575 break; 576 } 577 /* 578 * The RTF_SETSRC modifier must be associated 579 * to a non-null source address. 580 */ 581 if (rtm->rtm_flags & RTF_SETSRC) { 582 error = EINVAL; 583 goto done; 584 } 585 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 586 &gw_addr_v6, NULL, rtm->rtm_flags, 587 ill, &ire, rtsap, ipst, zoneid); 588 if (ill != NULL) 589 ASSERT(!MUTEX_HELD(&ill->ill_lock)); 590 break; 591 } 592 if (error != 0) 593 goto done; 594 ASSERT(ire != NULL); 595 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 596 break; 597 case RTM_DELETE: 598 /* if we are deleting a route, gateway is a must */ 599 if ((found_addrs & RTA_GATEWAY) == 0) { 600 error = EINVAL; 601 goto done; 602 } 603 /* 604 * The RTF_SETSRC modifier does not make sense 605 * when deleting a route. 606 */ 607 if (rtm->rtm_flags & RTF_SETSRC) { 608 error = EINVAL; 609 goto done; 610 } 611 612 switch (af) { 613 case AF_INET: 614 error = ip_rt_delete(dst_addr, net_mask, gw_addr, 615 found_addrs, rtm->rtm_flags, ill, B_FALSE, 616 ipst, zoneid); 617 break; 618 case AF_INET6: 619 error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6, 620 &gw_addr_v6, found_addrs, rtm->rtm_flags, ill, 621 ipst, zoneid); 622 break; 623 } 624 break; 625 case RTM_GET: 626 case RTM_CHANGE: 627 /* 628 * In the case of RTM_GET, the forwarding table should be 629 * searched recursively. Also, if a gateway was 630 * specified then the gateway address must also be matched. 631 * 632 * In the case of RTM_CHANGE, the gateway address (if supplied) 633 * is the new gateway address so matching on the gateway address 634 * is not done. This can lead to ambiguity when looking up the 635 * route to change as usually only the destination (and netmask, 636 * if supplied) is used for the lookup. However if a RTA_IFP 637 * sockaddr is also supplied, it can disambiguate which route to 638 * change provided the ambigous routes are tied to distinct 639 * ill's (or interface indices). If the routes are not tied to 640 * any particular interfaces (for example, with traditional 641 * gateway routes), then a RTA_IFP sockaddr will be of no use as 642 * it won't match any such routes. 643 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE, 644 * except when RTM_CHANGE is combined to RTF_SETSRC. 645 */ 646 if (((found_addrs & RTA_SRC) != 0) && 647 ((rtm->rtm_type == RTM_GET) || 648 !(rtm->rtm_flags & RTF_SETSRC))) { 649 error = EOPNOTSUPP; 650 goto done; 651 } 652 653 if (rtm->rtm_type == RTM_GET) { 654 match_flags |= MATCH_IRE_SECATTR; 655 match_flags_local |= MATCH_IRE_SECATTR; 656 if ((found_addrs & RTA_GATEWAY) != 0) 657 match_flags |= MATCH_IRE_GW; 658 if (ioc_cr) 659 tsl = crgetlabel(ioc_cr); 660 if (rtsap != NULL) { 661 if (rtsa_validate(rtsap) != 0) { 662 error = EINVAL; 663 goto done; 664 } 665 if (tsl != NULL && 666 crgetzoneid(ioc_cr) != GLOBAL_ZONEID && 667 (tsl->tsl_doi != rtsap->rtsa_doi || 668 !bldominates(&tsl->tsl_label, 669 &rtsap->rtsa_slrange.lower_bound))) { 670 error = EPERM; 671 goto done; 672 } 673 tsl = labelalloc( 674 &rtsap->rtsa_slrange.lower_bound, 675 rtsap->rtsa_doi, KM_NOSLEEP); 676 } 677 } 678 if (rtm->rtm_type == RTM_CHANGE) { 679 if ((found_addrs & RTA_GATEWAY) && 680 (rtm->rtm_flags & RTF_SETSRC)) { 681 /* 682 * Do not want to change the gateway, 683 * but rather the source address. 684 */ 685 match_flags |= MATCH_IRE_GW; 686 } 687 } 688 689 /* 690 * If the netmask is all ones (either as supplied or as derived 691 * above), then first check for an IRE_LOOPBACK or 692 * IRE_LOCAL entry. 693 * 694 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL 695 * entry, then look for any other type of IRE. 696 */ 697 switch (af) { 698 case AF_INET: 699 if (net_mask == IP_HOST_MASK) { 700 ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr, 701 IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid, 702 tsl, match_flags_local, 0, ipst, NULL); 703 } 704 if (ire == NULL) { 705 ire = ire_lookup_v4(dst_addr, net_mask, 706 gw_addr, ill, zoneid, tsl, match_flags, 707 ipst, &ifire, &v4setsrc, &gwattr); 708 IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc); 709 } 710 break; 711 case AF_INET6: 712 if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) { 713 ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL, 714 &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL, 715 zoneid, tsl, match_flags_local, 0, ipst, 716 NULL); 717 } 718 if (ire == NULL) { 719 ire = ire_lookup_v6(&dst_addr_v6, 720 &net_mask_v6, &gw_addr_v6, ill, zoneid, 721 tsl, match_flags, ipst, &ifire, &v6setsrc, 722 &gwattr); 723 } 724 break; 725 } 726 if (tsl != NULL && tsl != crgetlabel(ioc_cr)) 727 label_rele(tsl); 728 729 if (ire == NULL) { 730 error = ESRCH; 731 goto done; 732 } 733 /* 734 * Want to return failure if we get an IRE_NOROUTE from 735 * ire_route_recursive 736 */ 737 if (ire->ire_type & IRE_NOROUTE) { 738 ire_refrele(ire); 739 ire = NULL; 740 error = ESRCH; 741 goto done; 742 } 743 744 /* we know the IRE before we come here */ 745 switch (rtm->rtm_type) { 746 case RTM_GET: 747 mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af); 748 if (mp1 == NULL) { 749 error = ENOBUFS; 750 goto done; 751 } 752 freemsg(mp); 753 mp = mp1; 754 rtm = (rt_msghdr_t *)mp->b_rptr; 755 break; 756 case RTM_CHANGE: 757 /* 758 * Do not allow to the multirouting state of a route 759 * to be changed. This aims to prevent undesirable 760 * stages where both multirt and non-multirt routes 761 * for the same destination are declared. 762 */ 763 if ((ire->ire_flags & RTF_MULTIRT) != 764 (rtm->rtm_flags & RTF_MULTIRT)) { 765 error = EINVAL; 766 goto done; 767 } 768 /* 769 * Note that we do not need to do 770 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change 771 * in metrics or gateway will not affect existing 772 * routes since it does not create a more specific 773 * route. 774 */ 775 switch (af) { 776 case AF_INET: 777 if ((found_addrs & RTA_GATEWAY) != 0 && 778 (ire->ire_gateway_addr != gw_addr)) { 779 ire->ire_gateway_addr = gw_addr; 780 } 781 782 if (rtsap != NULL) { 783 ga.ga_af = AF_INET; 784 IN6_IPADDR_TO_V4MAPPED( 785 ire->ire_gateway_addr, &ga.ga_addr); 786 787 gcgrp = gcgrp_lookup(&ga, B_TRUE); 788 if (gcgrp == NULL) { 789 error = ENOMEM; 790 goto done; 791 } 792 } 793 794 if ((found_addrs & RTA_SRC) != 0 && 795 (rtm->rtm_flags & RTF_SETSRC) != 0 && 796 (ire->ire_setsrc_addr != src_addr)) { 797 if (src_addr != INADDR_ANY) { 798 uint_t type; 799 800 /* 801 * The RTF_SETSRC flag is 802 * present, check that the 803 * supplied src address is not 804 * the loopback address. This 805 * would produce martian 806 * packets. 807 */ 808 if (src_addr == 809 htonl(INADDR_LOOPBACK)) { 810 error = EINVAL; 811 goto done; 812 } 813 /* 814 * Also check that the 815 * supplied addr is a valid 816 * local address. 817 */ 818 type = ip_type_v4(src_addr, 819 ipst); 820 if (!(type & 821 (IRE_LOCAL|IRE_LOOPBACK))) { 822 error = EADDRNOTAVAIL; 823 goto done; 824 } 825 ire->ire_flags |= RTF_SETSRC; 826 ire->ire_setsrc_addr = 827 src_addr; 828 } else { 829 ire->ire_flags &= ~RTF_SETSRC; 830 ire->ire_setsrc_addr = 831 INADDR_ANY; 832 } 833 /* 834 * Let conn_ixa caching know that 835 * source address selection changed 836 */ 837 ip_update_source_selection(ipst); 838 } 839 ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE); 840 break; 841 case AF_INET6: 842 mutex_enter(&ire->ire_lock); 843 if ((found_addrs & RTA_GATEWAY) != 0 && 844 !IN6_ARE_ADDR_EQUAL( 845 &ire->ire_gateway_addr_v6, &gw_addr_v6)) { 846 ire->ire_gateway_addr_v6 = gw_addr_v6; 847 } 848 mutex_exit(&ire->ire_lock); 849 850 if (rtsap != NULL) { 851 ga.ga_af = AF_INET6; 852 mutex_enter(&ire->ire_lock); 853 ga.ga_addr = ire->ire_gateway_addr_v6; 854 mutex_exit(&ire->ire_lock); 855 856 gcgrp = gcgrp_lookup(&ga, B_TRUE); 857 if (gcgrp == NULL) { 858 error = ENOMEM; 859 goto done; 860 } 861 } 862 863 if ((found_addrs & RTA_SRC) != 0 && 864 (rtm->rtm_flags & RTF_SETSRC) != 0 && 865 !IN6_ARE_ADDR_EQUAL( 866 &ire->ire_setsrc_addr_v6, &src_addr_v6)) { 867 if (!IN6_IS_ADDR_UNSPECIFIED( 868 &src_addr_v6)) { 869 uint_t type; 870 871 /* 872 * The RTF_SETSRC flag is 873 * present, check that the 874 * supplied src address is not 875 * the loopback address. This 876 * would produce martian 877 * packets. 878 */ 879 if (IN6_IS_ADDR_LOOPBACK( 880 &src_addr_v6)) { 881 error = EINVAL; 882 goto done; 883 } 884 /* 885 * Also check that the 886 * supplied addr is a valid 887 * local address. 888 */ 889 type = ip_type_v6(&src_addr_v6, 890 ipst); 891 if (!(type & 892 (IRE_LOCAL|IRE_LOOPBACK))) { 893 error = EADDRNOTAVAIL; 894 goto done; 895 } 896 mutex_enter(&ire->ire_lock); 897 ire->ire_flags |= RTF_SETSRC; 898 ire->ire_setsrc_addr_v6 = 899 src_addr_v6; 900 mutex_exit(&ire->ire_lock); 901 } else { 902 mutex_enter(&ire->ire_lock); 903 ire->ire_flags &= ~RTF_SETSRC; 904 ire->ire_setsrc_addr_v6 = 905 ipv6_all_zeros; 906 mutex_exit(&ire->ire_lock); 907 } 908 /* 909 * Let conn_ixa caching know that 910 * source address selection changed 911 */ 912 ip_update_source_selection(ipst); 913 } 914 ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE); 915 break; 916 } 917 918 if (rtsap != NULL) { 919 ASSERT(gcgrp != NULL); 920 921 /* 922 * Create and add the security attribute to 923 * prefix IRE; it will add a reference to the 924 * group upon allocating a new entry. If it 925 * finds an already-existing entry for the 926 * security attribute, it simply returns it 927 * and no new group reference is made. 928 */ 929 gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref); 930 if (gc == NULL || 931 (error = tsol_ire_init_gwattr(ire, 932 ire->ire_ipversion, gc)) != 0) { 933 if (gc != NULL) { 934 GC_REFRELE(gc); 935 } else { 936 /* gc_create failed */ 937 error = ENOMEM; 938 } 939 goto done; 940 } 941 } 942 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 943 break; 944 } 945 break; 946 default: 947 error = EOPNOTSUPP; 948 break; 949 } 950 done: 951 if (ire != NULL) 952 ire_refrele(ire); 953 if (ifire != NULL) 954 ire_refrele(ifire); 955 if (ill != NULL) 956 ill_refrele(ill); 957 958 if (gcgrp_xtraref) 959 GCGRP_REFRELE(gcgrp); 960 961 if (rtm != NULL) { 962 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 963 if (error != 0) { 964 rtm->rtm_errno = error; 965 /* Send error ACK */ 966 ip1dbg(("ip_rts_request: error %d\n", error)); 967 } else { 968 rtm->rtm_flags |= RTF_DONE; 969 /* OK ACK already set up by caller except this */ 970 ip2dbg(("ip_rts_request: OK ACK\n")); 971 } 972 rts_queue_input(mp, connp, af, RTSQ_ALL, ipst); 973 } 974 return (error); 975 } 976 977 /* 978 * Helper function that can do recursive lookups including when 979 * MATCH_IRE_GW and/or MATCH_IRE_MASK is set. 980 */ 981 static ire_t * 982 ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr, 983 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, 984 int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp, 985 tsol_ire_gw_secattr_t **gwattrp) 986 { 987 ire_t *ire; 988 ire_t *ifire = NULL; 989 uint_t ire_type; 990 991 *pifire = NULL; 992 *v4setsrcp = INADDR_ANY; 993 *gwattrp = NULL; 994 995 /* Skip IRE_IF_CLONE */ 996 match_flags |= MATCH_IRE_TYPE; 997 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; 998 999 /* 1000 * ire_route_recursive can't match gateway or mask thus if they are 1001 * set we have to do two steps of lookups 1002 */ 1003 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { 1004 ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr, 1005 ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL); 1006 1007 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) 1008 return (ire); 1009 1010 if (ire->ire_type & IRE_ONLINK) 1011 return (ire); 1012 1013 if (ire->ire_flags & RTF_SETSRC) { 1014 ASSERT(ire->ire_setsrc_addr != INADDR_ANY); 1015 *v4setsrcp = ire->ire_setsrc_addr; 1016 v4setsrcp = NULL; 1017 } 1018 1019 /* The first ire_gw_secattr is passed back */ 1020 if (ire->ire_gw_secattr != NULL) { 1021 *gwattrp = ire->ire_gw_secattr; 1022 gwattrp = NULL; 1023 } 1024 1025 /* Look for an interface ire recursively based on the gateway */ 1026 dst_addr = ire->ire_gateway_addr; 1027 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); 1028 ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, 1029 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, 1030 gwattrp, NULL); 1031 /* 1032 * Don't allow anything unusual past the first 1033 * iteration. Clearing ifire means caller will not see a 1034 * complete response - there will be no RTA_IFP returned. 1035 */ 1036 if ((ifire->ire_type & 1037 (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST)) || 1038 ire_pref(ifire) <= ire_pref(ire)) { 1039 ire_refrele(ifire); 1040 ifire = NULL; 1041 } 1042 } else { 1043 ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, 1044 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, 1045 gwattrp, NULL); 1046 } 1047 *pifire = ifire; 1048 return (ire); 1049 } 1050 1051 static ire_t * 1052 ire_lookup_v6(const in6_addr_t *dst_addr_v6, 1053 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, 1054 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, 1055 ip_stack_t *ipst, ire_t **pifire, 1056 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp) 1057 { 1058 ire_t *ire; 1059 ire_t *ifire = NULL; 1060 uint_t ire_type; 1061 1062 *pifire = NULL; 1063 *v6setsrcp = ipv6_all_zeros; 1064 *gwattrp = NULL; 1065 1066 /* Skip IRE_IF_CLONE */ 1067 match_flags |= MATCH_IRE_TYPE; 1068 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; 1069 1070 /* 1071 * ire_route_recursive can't match gateway or mask thus if they are 1072 * set we have to do two steps of lookups 1073 */ 1074 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { 1075 in6_addr_t dst; 1076 1077 ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6, 1078 gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0, 1079 ipst, NULL); 1080 1081 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) 1082 return (ire); 1083 1084 if (ire->ire_type & IRE_ONLINK) 1085 return (ire); 1086 1087 if (ire->ire_flags & RTF_SETSRC) { 1088 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 1089 &ire->ire_setsrc_addr_v6)); 1090 *v6setsrcp = ire->ire_setsrc_addr_v6; 1091 v6setsrcp = NULL; 1092 } 1093 1094 /* The first ire_gw_secattr is passed back */ 1095 if (ire->ire_gw_secattr != NULL) { 1096 *gwattrp = ire->ire_gw_secattr; 1097 gwattrp = NULL; 1098 } 1099 1100 mutex_enter(&ire->ire_lock); 1101 dst = ire->ire_gateway_addr_v6; 1102 mutex_exit(&ire->ire_lock); 1103 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); 1104 ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl, 1105 match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, gwattrp, 1106 NULL); 1107 /* 1108 * Don't allow anything unusual past the first 1109 * iteration. Clearing ifire means caller will not see a 1110 * complete response - there will be no RTA_IFP returned. 1111 */ 1112 if ((ifire->ire_type & 1113 (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST)) || 1114 ire_pref(ifire) <= ire_pref(ire)) { 1115 ire_refrele(ifire); 1116 ifire = NULL; 1117 } 1118 } else { 1119 ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid, 1120 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, 1121 gwattrp, NULL); 1122 } 1123 *pifire = ifire; 1124 return (ire); 1125 } 1126 1127 1128 /* 1129 * Handle IP_IOC_RTS_REQUEST ioctls 1130 */ 1131 int 1132 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) 1133 { 1134 conn_t *connp = Q_TO_CONN(q); 1135 IOCP iocp = (IOCP)mp->b_rptr; 1136 mblk_t *mp1, *ioc_mp = mp; 1137 int error = 0; 1138 ip_stack_t *ipst; 1139 1140 ipst = connp->conn_netstack->netstack_ip; 1141 1142 ASSERT(mp->b_cont != NULL); 1143 /* ioc_mp holds mp */ 1144 mp = mp->b_cont; 1145 1146 /* 1147 * The Routing Socket data starts on 1148 * next block. If there is no next block 1149 * this is an indication from routing module 1150 * that it is a routing socket stream queue. 1151 * We need to support that for compatibility with SDP since 1152 * it has a contract private interface to use IP_IOC_RTS_REQUEST. 1153 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this. 1154 */ 1155 if (mp->b_cont == NULL) { 1156 /* 1157 * This is a message from SDP 1158 * indicating that this is a Routing Socket 1159 * Stream. Insert this conn_t in routing 1160 * socket client list. 1161 */ 1162 connp->conn_useloopback = 1; 1163 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); 1164 goto done; 1165 } 1166 mp1 = dupmsg(mp->b_cont); 1167 if (mp1 == NULL) { 1168 error = ENOBUFS; 1169 goto done; 1170 } 1171 mp = mp1; 1172 1173 error = ip_rts_request_common(mp, connp, ioc_cr); 1174 done: 1175 iocp->ioc_error = error; 1176 ioc_mp->b_datap->db_type = M_IOCACK; 1177 if (iocp->ioc_error != 0) 1178 iocp->ioc_count = 0; 1179 /* Note that we pass a NULL ira to rts_input */ 1180 (connp->conn_recv)(connp, ioc_mp, NULL, NULL); 1181 1182 /* conn was refheld in ip_wput_ioctl. */ 1183 CONN_OPER_PENDING_DONE(connp); 1184 1185 return (error); 1186 } 1187 1188 /* 1189 * Build a reply to the RTM_GET request contained in the given message block 1190 * using the retrieved IRE of the destination address, the parent IRE (if it 1191 * exists) and the address family. 1192 * 1193 * Returns a pointer to a message block containing the reply if successful, 1194 * otherwise NULL is returned. 1195 */ 1196 static mblk_t * 1197 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc, 1198 tsol_ire_gw_secattr_t *attrp, sa_family_t af) 1199 { 1200 rt_msghdr_t *rtm; 1201 rt_msghdr_t *new_rtm; 1202 mblk_t *new_mp; 1203 int rtm_addrs; 1204 int rtm_flags; 1205 tsol_gc_t *gc = NULL; 1206 tsol_gcgrp_t *gcgrp = NULL; 1207 ill_t *ill; 1208 ipif_t *ipif = NULL; 1209 ipaddr_t brdaddr; /* IFF_POINTOPOINT destination */ 1210 ipaddr_t ifaddr; 1211 in6_addr_t brdaddr6; /* IFF_POINTOPOINT destination */ 1212 in6_addr_t ifaddr6; 1213 ipaddr_t v4setsrc; 1214 1215 rtm = (rt_msghdr_t *)mp->b_rptr; 1216 1217 /* 1218 * Find the ill used to send packets. This will be NULL in case 1219 * of a reject or blackhole. 1220 */ 1221 if (ifire != NULL) 1222 ill = ire_nexthop_ill(ifire); 1223 else 1224 ill = ire_nexthop_ill(ire); 1225 1226 if (attrp != NULL) { 1227 mutex_enter(&attrp->igsa_lock); 1228 if ((gc = attrp->igsa_gc) != NULL) { 1229 gcgrp = gc->gc_grp; 1230 ASSERT(gcgrp != NULL); 1231 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1232 } 1233 mutex_exit(&attrp->igsa_lock); 1234 } 1235 1236 /* 1237 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK. 1238 * 1239 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both 1240 * RTA_IFP and RTA_IFA if either is defined, and also 1241 * returns RTA_BRD if the appropriate interface is 1242 * point-to-point. 1243 */ 1244 rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK); 1245 if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) { 1246 rtm_addrs |= (RTA_IFP | RTA_IFA); 1247 /* 1248 * We associate an IRE with an ILL, hence we don't exactly 1249 * know what might make sense for RTA_IFA and RTA_BRD. We 1250 * pick the first ipif on the ill. 1251 */ 1252 ipif = ipif_get_next_ipif(NULL, ill); 1253 if (ipif != NULL) { 1254 if (ipif->ipif_isv6) 1255 ifaddr6 = ipif->ipif_v6lcl_addr; 1256 else 1257 ifaddr = ipif->ipif_lcl_addr; 1258 if (ipif->ipif_flags & IPIF_POINTOPOINT) { 1259 rtm_addrs |= RTA_BRD; 1260 if (ipif->ipif_isv6) 1261 brdaddr6 = ipif->ipif_v6pp_dst_addr; 1262 else 1263 brdaddr = ipif->ipif_pp_dst_addr; 1264 } 1265 ipif_refrele(ipif); 1266 } 1267 } 1268 1269 new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0); 1270 if (new_mp == NULL) { 1271 if (gcgrp != NULL) 1272 rw_exit(&gcgrp->gcgrp_rwlock); 1273 if (ill != NULL) 1274 ill_refrele(ill); 1275 return (NULL); 1276 } 1277 1278 /* 1279 * We set the destination address, gateway address, 1280 * netmask and flags in the RTM_GET response depending 1281 * on whether we found a parent IRE or not. 1282 * In particular, if we did find a parent IRE during the 1283 * recursive search, use that IRE's gateway address. 1284 * Otherwise, we use the IRE's source address for the 1285 * gateway address. 1286 */ 1287 ASSERT(af == AF_INET || af == AF_INET6); 1288 switch (af) { 1289 case AF_INET: 1290 IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc); 1291 if (v4setsrc != INADDR_ANY) 1292 rtm_addrs |= RTA_SRC; 1293 1294 rtm_flags = ire->ire_flags; 1295 rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr, 1296 ire->ire_mask, ire->ire_gateway_addr, v4setsrc, 1297 brdaddr, 0, ifaddr, ill, new_mp, gc); 1298 break; 1299 case AF_INET6: 1300 if (!IN6_IS_ADDR_UNSPECIFIED(setsrc)) 1301 rtm_addrs |= RTA_SRC; 1302 1303 rtm_flags = ire->ire_flags; 1304 rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6, 1305 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 1306 setsrc, &brdaddr6, &ipv6_all_zeros, 1307 &ifaddr6, ill, new_mp, gc); 1308 break; 1309 } 1310 1311 if (gcgrp != NULL) 1312 rw_exit(&gcgrp->gcgrp_rwlock); 1313 1314 new_rtm = (rt_msghdr_t *)new_mp->b_rptr; 1315 1316 /* 1317 * The rtm_msglen, rtm_version and rtm_type fields in 1318 * RTM_GET response are filled in by rts_fill_msg. 1319 * 1320 * rtm_addrs and rtm_flags are filled in based on what 1321 * was requested and the state of the IREs looked up 1322 * above. 1323 * 1324 * rtm_inits and rtm_rmx are filled in with metrics 1325 * based on whether a parent IRE was found or not. 1326 * 1327 * TODO: rtm_index and rtm_use should probably be 1328 * filled in with something resonable here and not just 1329 * copied from the request. 1330 */ 1331 new_rtm->rtm_index = rtm->rtm_index; 1332 new_rtm->rtm_pid = rtm->rtm_pid; 1333 new_rtm->rtm_seq = rtm->rtm_seq; 1334 new_rtm->rtm_use = rtm->rtm_use; 1335 new_rtm->rtm_addrs = rtm_addrs; 1336 new_rtm->rtm_flags = rtm_flags; 1337 new_rtm->rtm_inits = rts_getmetrics(ire, ill, &new_rtm->rtm_rmx); 1338 if (ill != NULL) 1339 ill_refrele(ill); 1340 return (new_mp); 1341 } 1342 1343 /* 1344 * Fill the given if_data_t with interface statistics. 1345 */ 1346 static void 1347 rts_getifdata(if_data_t *if_data, const ipif_t *ipif) 1348 { 1349 if_data->ifi_type = ipif->ipif_ill->ill_type; 1350 /* ethernet, tokenring, etc */ 1351 if_data->ifi_addrlen = 0; /* media address length */ 1352 if_data->ifi_hdrlen = 0; /* media header length */ 1353 if_data->ifi_mtu = ipif->ipif_ill->ill_mtu; /* mtu */ 1354 /* metric (external only) */ 1355 if_data->ifi_metric = ipif->ipif_ill->ill_metric; 1356 if_data->ifi_baudrate = 0; /* linespeed */ 1357 1358 if_data->ifi_ipackets = 0; /* packets received on if */ 1359 if_data->ifi_ierrors = 0; /* input errors on interface */ 1360 if_data->ifi_opackets = 0; /* packets sent on interface */ 1361 if_data->ifi_oerrors = 0; /* output errors on if */ 1362 if_data->ifi_collisions = 0; /* collisions on csma if */ 1363 if_data->ifi_ibytes = 0; /* total number received */ 1364 if_data->ifi_obytes = 0; /* total number sent */ 1365 if_data->ifi_imcasts = 0; /* multicast packets received */ 1366 if_data->ifi_omcasts = 0; /* multicast packets sent */ 1367 if_data->ifi_iqdrops = 0; /* dropped on input */ 1368 if_data->ifi_noproto = 0; /* destined for unsupported */ 1369 /* protocol. */ 1370 } 1371 1372 /* 1373 * Set the metrics on a forwarding table route. 1374 */ 1375 static void 1376 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) 1377 { 1378 clock_t rtt; 1379 clock_t rtt_sd; 1380 ill_t *ill; 1381 ifrt_t *ifrt; 1382 mblk_t *mp; 1383 in6_addr_t gw_addr_v6; 1384 1385 /* Need to add back some metrics to the IRE? */ 1386 /* 1387 * Bypass obtaining the lock and searching ill_saved_ire_mp in the 1388 * common case of no metrics. 1389 */ 1390 if (which == 0) 1391 return; 1392 ire->ire_metrics.iulp_set = B_TRUE; 1393 1394 /* 1395 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1396 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1397 * microseconds. 1398 */ 1399 if (which & RTV_RTT) 1400 rtt = metrics->rmx_rtt / 1000; 1401 if (which & RTV_RTTVAR) 1402 rtt_sd = metrics->rmx_rttvar / 1000; 1403 1404 /* 1405 * Update the metrics in the IRE itself. 1406 */ 1407 mutex_enter(&ire->ire_lock); 1408 if (which & RTV_MTU) 1409 ire->ire_metrics.iulp_mtu = metrics->rmx_mtu; 1410 if (which & RTV_RTT) 1411 ire->ire_metrics.iulp_rtt = rtt; 1412 if (which & RTV_SSTHRESH) 1413 ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh; 1414 if (which & RTV_RTTVAR) 1415 ire->ire_metrics.iulp_rtt_sd = rtt_sd; 1416 if (which & RTV_SPIPE) 1417 ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe; 1418 if (which & RTV_RPIPE) 1419 ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe; 1420 mutex_exit(&ire->ire_lock); 1421 1422 /* 1423 * Search through the ifrt_t chain hanging off the ILL in order to 1424 * reflect the metric change there. 1425 */ 1426 ill = ire->ire_ill; 1427 if (ill == NULL) 1428 return; 1429 ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) || 1430 ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION))); 1431 if (ill->ill_isv6) { 1432 mutex_enter(&ire->ire_lock); 1433 gw_addr_v6 = ire->ire_gateway_addr_v6; 1434 mutex_exit(&ire->ire_lock); 1435 } 1436 mutex_enter(&ill->ill_saved_ire_lock); 1437 for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) { 1438 /* 1439 * On a given ill, the tuple of address, gateway, mask, 1440 * ire_type and zoneid unique for each saved IRE. 1441 */ 1442 ifrt = (ifrt_t *)mp->b_rptr; 1443 if (ill->ill_isv6) { 1444 if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, 1445 &ire->ire_addr_v6) || 1446 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, 1447 &gw_addr_v6) || 1448 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask, 1449 &ire->ire_mask_v6)) 1450 continue; 1451 } else { 1452 if (ifrt->ifrt_addr != ire->ire_addr || 1453 ifrt->ifrt_gateway_addr != ire->ire_gateway_addr || 1454 ifrt->ifrt_mask != ire->ire_mask) 1455 continue; 1456 } 1457 if (ifrt->ifrt_zoneid != ire->ire_zoneid || 1458 ifrt->ifrt_type != ire->ire_type) 1459 continue; 1460 1461 if (which & RTV_MTU) 1462 ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu; 1463 if (which & RTV_RTT) 1464 ifrt->ifrt_metrics.iulp_rtt = rtt; 1465 if (which & RTV_SSTHRESH) { 1466 ifrt->ifrt_metrics.iulp_ssthresh = 1467 metrics->rmx_ssthresh; 1468 } 1469 if (which & RTV_RTTVAR) 1470 ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar; 1471 if (which & RTV_SPIPE) 1472 ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe; 1473 if (which & RTV_RPIPE) 1474 ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe; 1475 break; 1476 } 1477 mutex_exit(&ill->ill_saved_ire_lock); 1478 1479 /* 1480 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they 1481 * get any new iulp_mtu. 1482 * We do that by deleting them; ire_create_if_clone will pick 1483 * up the new metrics. 1484 */ 1485 if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0) 1486 ire_dep_delete_if_clone(ire); 1487 } 1488 1489 /* 1490 * Get the metrics from a forwarding table route. 1491 */ 1492 static int 1493 rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics) 1494 { 1495 int metrics_set = 0; 1496 1497 bzero(metrics, sizeof (rt_metrics_t)); 1498 1499 /* 1500 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1501 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1502 * microseconds. 1503 */ 1504 metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000; 1505 metrics_set |= RTV_RTT; 1506 if (ire->ire_metrics.iulp_mtu != 0) { 1507 metrics->rmx_mtu = ire->ire_metrics.iulp_mtu; 1508 metrics_set |= RTV_MTU; 1509 } else if (ill != NULL) { 1510 metrics->rmx_mtu = ill->ill_mtu; 1511 metrics_set |= RTV_MTU; 1512 } 1513 metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh; 1514 metrics_set |= RTV_SSTHRESH; 1515 metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000; 1516 metrics_set |= RTV_RTTVAR; 1517 metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe; 1518 metrics_set |= RTV_SPIPE; 1519 metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe; 1520 metrics_set |= RTV_RPIPE; 1521 return (metrics_set); 1522 } 1523 1524 /* 1525 * Given two sets of metrics (src and dst), use the dst values if they are 1526 * set. If a dst value is not set but the src value is set, then we use 1527 * the src value. 1528 * dst is updated with the new values. 1529 * This is used to merge information from a dce_t and ire_metrics, where the 1530 * dce values takes precedence. 1531 */ 1532 void 1533 rts_merge_metrics(iulp_t *dst, const iulp_t *src) 1534 { 1535 if (!src->iulp_set) 1536 return; 1537 1538 if (dst->iulp_ssthresh == 0) 1539 dst->iulp_ssthresh = src->iulp_ssthresh; 1540 if (dst->iulp_rtt == 0) 1541 dst->iulp_rtt = src->iulp_rtt; 1542 if (dst->iulp_rtt_sd == 0) 1543 dst->iulp_rtt_sd = src->iulp_rtt_sd; 1544 if (dst->iulp_spipe == 0) 1545 dst->iulp_spipe = src->iulp_spipe; 1546 if (dst->iulp_rpipe == 0) 1547 dst->iulp_rpipe = src->iulp_rpipe; 1548 if (dst->iulp_rtomax == 0) 1549 dst->iulp_rtomax = src->iulp_rtomax; 1550 if (dst->iulp_sack == 0) 1551 dst->iulp_sack = src->iulp_sack; 1552 if (dst->iulp_tstamp_ok == 0) 1553 dst->iulp_tstamp_ok = src->iulp_tstamp_ok; 1554 if (dst->iulp_wscale_ok == 0) 1555 dst->iulp_wscale_ok = src->iulp_wscale_ok; 1556 if (dst->iulp_ecn_ok == 0) 1557 dst->iulp_ecn_ok = src->iulp_ecn_ok; 1558 if (dst->iulp_pmtud_ok == 0) 1559 dst->iulp_pmtud_ok = src->iulp_pmtud_ok; 1560 if (dst->iulp_mtu == 0) 1561 dst->iulp_mtu = src->iulp_mtu; 1562 } 1563 1564 1565 /* 1566 * Takes a pointer to a routing message and extracts necessary info by looking 1567 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers 1568 * passed (all of which must be valid). 1569 * 1570 * The bitmask of sockaddrs actually found in the message is returned, or zero 1571 * is returned in the case of an error. 1572 */ 1573 static int 1574 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, 1575 in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp, 1576 in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp, 1577 tsol_rtsecattr_t *rtsecattr, int *error) 1578 { 1579 struct sockaddr *sa; 1580 int i; 1581 int addr_bits; 1582 int length; 1583 int found_addrs = 0; 1584 caddr_t cp; 1585 size_t size; 1586 struct sockaddr_dl *sdl; 1587 1588 *dst_addrp = ipv6_all_zeros; 1589 *gw_addrp = ipv6_all_zeros; 1590 *net_maskp = ipv6_all_zeros; 1591 *authorp = ipv6_all_zeros; 1592 *if_addrp = ipv6_all_zeros; 1593 *in_src_addrp = ipv6_all_zeros; 1594 *indexp = 0; 1595 *afp = AF_UNSPEC; 1596 rtsecattr->rtsa_cnt = 0; 1597 *error = 0; 1598 1599 /* 1600 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP, 1601 * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them. 1602 */ 1603 cp = (caddr_t)&rtm[1]; 1604 length = rtm->rtm_msglen; 1605 for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) { 1606 /* 1607 * The address family we are working with starts out as 1608 * AF_UNSPEC, but is set to the one specified with the 1609 * destination address. 1610 * 1611 * If the "working" address family that has been set to 1612 * something other than AF_UNSPEC, then the address family of 1613 * subsequent sockaddrs must either be AF_UNSPEC (for 1614 * compatibility with older programs) or must be the same as our 1615 * "working" one. 1616 * 1617 * This code assumes that RTA_DST (1) comes first in the loop. 1618 */ 1619 sa = (struct sockaddr *)cp; 1620 addr_bits = (rtm->rtm_addrs & (1 << i)); 1621 if (addr_bits == 0) 1622 continue; 1623 switch (addr_bits) { 1624 case RTA_DST: 1625 size = rts_copyfromsockaddr(sa, dst_addrp); 1626 *afp = sa->sa_family; 1627 break; 1628 case RTA_GATEWAY: 1629 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1630 return (0); 1631 size = rts_copyfromsockaddr(sa, gw_addrp); 1632 break; 1633 case RTA_NETMASK: 1634 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1635 return (0); 1636 size = rts_copyfromsockaddr(sa, net_maskp); 1637 break; 1638 case RTA_IFP: 1639 if (sa->sa_family != AF_LINK && 1640 sa->sa_family != AF_UNSPEC) 1641 return (0); 1642 sdl = (struct sockaddr_dl *)cp; 1643 *indexp = sdl->sdl_index; 1644 size = sizeof (struct sockaddr_dl); 1645 break; 1646 case RTA_SRC: 1647 /* Source address of the incoming packet */ 1648 size = rts_copyfromsockaddr(sa, in_src_addrp); 1649 *afp = sa->sa_family; 1650 break; 1651 case RTA_IFA: 1652 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1653 return (0); 1654 size = rts_copyfromsockaddr(sa, if_addrp); 1655 break; 1656 case RTA_AUTHOR: 1657 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1658 return (0); 1659 size = rts_copyfromsockaddr(sa, authorp); 1660 break; 1661 default: 1662 return (0); 1663 } 1664 if (size == 0) 1665 return (0); 1666 cp += size; 1667 found_addrs |= addr_bits; 1668 } 1669 1670 /* 1671 * Parse the routing message and look for any security- 1672 * related attributes for the route. For each valid 1673 * attribute, allocate/obtain the corresponding kernel 1674 * route security attributes. 1675 */ 1676 if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) { 1677 *error = tsol_rtsa_init(rtm, rtsecattr, cp); 1678 ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); 1679 } 1680 1681 return (found_addrs); 1682 } 1683 1684 /* 1685 * Fills the message with the given info. 1686 */ 1687 static void 1688 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, 1689 ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author, 1690 ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, 1691 const tsol_gc_t *gc) 1692 { 1693 rt_msghdr_t *rtm; 1694 sin_t *sin; 1695 size_t data_size, header_size; 1696 uchar_t *cp; 1697 int i; 1698 1699 ASSERT(mp != NULL); 1700 /* 1701 * First find the type of the message 1702 * and its length. 1703 */ 1704 header_size = rts_header_msg_size(type); 1705 /* 1706 * Now find the size of the data 1707 * that follows the message header. 1708 */ 1709 data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0); 1710 1711 rtm = (rt_msghdr_t *)mp->b_rptr; 1712 mp->b_wptr = &mp->b_rptr[header_size]; 1713 cp = mp->b_wptr; 1714 bzero(cp, data_size); 1715 for (i = 0; i < RTA_NUMBITS; i++) { 1716 sin = (sin_t *)cp; 1717 switch (rtm_addrs & (1 << i)) { 1718 case RTA_DST: 1719 sin->sin_addr.s_addr = dst; 1720 sin->sin_family = AF_INET; 1721 cp += sizeof (sin_t); 1722 break; 1723 case RTA_GATEWAY: 1724 sin->sin_addr.s_addr = gateway; 1725 sin->sin_family = AF_INET; 1726 cp += sizeof (sin_t); 1727 break; 1728 case RTA_NETMASK: 1729 sin->sin_addr.s_addr = mask; 1730 sin->sin_family = AF_INET; 1731 cp += sizeof (sin_t); 1732 break; 1733 case RTA_IFP: 1734 cp += ill_dls_info((struct sockaddr_dl *)cp, ill); 1735 break; 1736 case RTA_IFA: 1737 sin->sin_addr.s_addr = ifaddr; 1738 sin->sin_family = AF_INET; 1739 cp += sizeof (sin_t); 1740 break; 1741 case RTA_SRC: 1742 sin->sin_addr.s_addr = src_addr; 1743 sin->sin_family = AF_INET; 1744 cp += sizeof (sin_t); 1745 break; 1746 case RTA_AUTHOR: 1747 sin->sin_addr.s_addr = author; 1748 sin->sin_family = AF_INET; 1749 cp += sizeof (sin_t); 1750 break; 1751 case RTA_BRD: 1752 /* 1753 * RTA_BRD is used typically to specify a point-to-point 1754 * destination address. 1755 */ 1756 sin->sin_addr.s_addr = brd_addr; 1757 sin->sin_family = AF_INET; 1758 cp += sizeof (sin_t); 1759 break; 1760 } 1761 } 1762 1763 if (gc != NULL) { 1764 rtm_ext_t *rtm_ext; 1765 struct rtsa_s *rp_dst; 1766 tsol_rtsecattr_t *rsap; 1767 1768 ASSERT(gc->gc_grp != NULL); 1769 ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock)); 1770 1771 rtm_ext = (rtm_ext_t *)cp; 1772 rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR; 1773 rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1); 1774 1775 rsap = (tsol_rtsecattr_t *)(rtm_ext + 1); 1776 rsap->rtsa_cnt = 1; 1777 rp_dst = rsap->rtsa_attr; 1778 1779 ASSERT(gc->gc_db != NULL); 1780 bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst)); 1781 cp = (uchar_t *)rp_dst; 1782 } 1783 1784 mp->b_wptr = cp; 1785 mp->b_cont = NULL; 1786 /* 1787 * set the fields that are common to 1788 * to different messages. 1789 */ 1790 rtm->rtm_msglen = (short)(header_size + data_size); 1791 rtm->rtm_version = RTM_VERSION; 1792 rtm->rtm_type = (uchar_t)type; 1793 } 1794 1795 /* 1796 * Allocates and initializes a routing socket message. 1797 * Note that sacnt is either zero or one. 1798 */ 1799 mblk_t * 1800 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt) 1801 { 1802 size_t length; 1803 mblk_t *mp; 1804 1805 length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt); 1806 mp = allocb(length, BPRI_MED); 1807 if (mp == NULL) 1808 return (mp); 1809 bzero(mp->b_rptr, length); 1810 return (mp); 1811 } 1812 1813 /* 1814 * Returns the size of the routing 1815 * socket message header size. 1816 */ 1817 size_t 1818 rts_header_msg_size(int type) 1819 { 1820 switch (type) { 1821 case RTM_DELADDR: 1822 case RTM_NEWADDR: 1823 case RTM_CHGADDR: 1824 case RTM_FREEADDR: 1825 return (sizeof (ifa_msghdr_t)); 1826 case RTM_IFINFO: 1827 return (sizeof (if_msghdr_t)); 1828 default: 1829 return (sizeof (rt_msghdr_t)); 1830 } 1831 } 1832 1833 /* 1834 * Returns the size of the message needed with the given rtm_addrs and family. 1835 * 1836 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are 1837 * of the same family (currently either AF_INET or AF_INET6). 1838 */ 1839 size_t 1840 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt) 1841 { 1842 int i; 1843 size_t length = 0; 1844 1845 for (i = 0; i < RTA_NUMBITS; i++) { 1846 switch (rtm_addrs & (1 << i)) { 1847 case RTA_IFP: 1848 length += sizeof (struct sockaddr_dl); 1849 break; 1850 case RTA_DST: 1851 case RTA_GATEWAY: 1852 case RTA_NETMASK: 1853 case RTA_SRC: 1854 case RTA_IFA: 1855 case RTA_AUTHOR: 1856 case RTA_BRD: 1857 ASSERT(af == AF_INET || af == AF_INET6); 1858 switch (af) { 1859 case AF_INET: 1860 length += sizeof (sin_t); 1861 break; 1862 case AF_INET6: 1863 length += sizeof (sin6_t); 1864 break; 1865 } 1866 break; 1867 } 1868 } 1869 if (sacnt > 0) 1870 length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt); 1871 1872 return (length); 1873 } 1874 1875 /* 1876 * This routine is called to generate a message to the routing 1877 * socket indicating that a redirect has occured, a routing lookup 1878 * has failed, or that a protocol has detected timeouts to a particular 1879 * destination. This routine is called for message types RTM_LOSING, 1880 * RTM_REDIRECT, and RTM_MISS. 1881 */ 1882 void 1883 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, 1884 ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs, 1885 ip_stack_t *ipst) 1886 { 1887 rt_msghdr_t *rtm; 1888 mblk_t *mp; 1889 1890 if (rtm_addrs == 0) 1891 return; 1892 mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0); 1893 if (mp == NULL) 1894 return; 1895 rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0, 1896 author, 0, NULL, mp, NULL); 1897 rtm = (rt_msghdr_t *)mp->b_rptr; 1898 rtm->rtm_flags = flags; 1899 rtm->rtm_errno = error; 1900 rtm->rtm_flags |= RTF_DONE; 1901 rtm->rtm_addrs = rtm_addrs; 1902 rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst); 1903 } 1904 1905 /* 1906 * This routine is called to generate a message to the routing 1907 * socket indicating that the status of a network interface has changed. 1908 * Message type generated RTM_IFINFO. 1909 */ 1910 void 1911 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags) 1912 { 1913 ip_rts_xifmsg(ipif, 0, 0, flags); 1914 } 1915 1916 void 1917 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags) 1918 { 1919 if_msghdr_t *ifm; 1920 mblk_t *mp; 1921 sa_family_t af; 1922 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 1923 1924 /* 1925 * This message should be generated only 1926 * when the physical device is changing 1927 * state. 1928 */ 1929 if (ipif->ipif_id != 0) 1930 return; 1931 if (ipif->ipif_isv6) { 1932 af = AF_INET6; 1933 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); 1934 if (mp == NULL) 1935 return; 1936 rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros, 1937 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, 1938 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, 1939 ipif->ipif_ill, mp, NULL); 1940 } else { 1941 af = AF_INET; 1942 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); 1943 if (mp == NULL) 1944 return; 1945 rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0, 1946 ipif->ipif_ill, mp, NULL); 1947 } 1948 ifm = (if_msghdr_t *)mp->b_rptr; 1949 ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; 1950 ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags | 1951 ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear; 1952 rts_getifdata(&ifm->ifm_data, ipif); 1953 ifm->ifm_addrs = RTA_IFP; 1954 1955 if (flags & RTSQ_DEFAULT) { 1956 flags = RTSQ_ALL; 1957 /* 1958 * If this message is for an underlying interface, prevent 1959 * "normal" (IPMP-unaware) routing sockets from seeing it. 1960 */ 1961 if (IS_UNDER_IPMP(ipif->ipif_ill)) 1962 flags &= ~RTSQ_NORMAL; 1963 } 1964 1965 rts_queue_input(mp, NULL, af, flags, ipst); 1966 } 1967 1968 /* 1969 * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message; 1970 * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR) 1971 * generate the ifa_msghdr_t message. 1972 */ 1973 static void 1974 rts_new_rtsmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) 1975 { 1976 int rtm_addrs; 1977 mblk_t *mp; 1978 ifa_msghdr_t *ifam; 1979 rt_msghdr_t *rtm; 1980 sa_family_t af; 1981 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 1982 1983 /* 1984 * Do not report unspecified address if this is the RTM_CHGADDR or 1985 * RTM_FREEADDR message. 1986 */ 1987 if (cmd == RTM_CHGADDR || cmd == RTM_FREEADDR) { 1988 if (!ipif->ipif_isv6) { 1989 if (ipif->ipif_lcl_addr == INADDR_ANY) 1990 return; 1991 } else if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) { 1992 return; 1993 } 1994 } 1995 1996 if (ipif->ipif_isv6) 1997 af = AF_INET6; 1998 else 1999 af = AF_INET; 2000 2001 if (cmd == RTM_ADD || cmd == RTM_DELETE) 2002 rtm_addrs = (RTA_DST | RTA_NETMASK); 2003 else 2004 rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP); 2005 2006 mp = rts_alloc_msg(cmd, rtm_addrs, af, 0); 2007 if (mp == NULL) 2008 return; 2009 2010 if (cmd != RTM_ADD && cmd != RTM_DELETE) { 2011 switch (af) { 2012 case AF_INET: 2013 rts_fill_msg(cmd, rtm_addrs, 0, 2014 ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr, 2015 ipif->ipif_pp_dst_addr, 0, 2016 ipif->ipif_lcl_addr, ipif->ipif_ill, 2017 mp, NULL); 2018 break; 2019 case AF_INET6: 2020 rts_fill_msg_v6(cmd, rtm_addrs, 2021 &ipv6_all_zeros, &ipif->ipif_v6net_mask, 2022 &ipv6_all_zeros, &ipif->ipif_v6lcl_addr, 2023 &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, 2024 &ipif->ipif_v6lcl_addr, ipif->ipif_ill, 2025 mp, NULL); 2026 break; 2027 } 2028 ifam = (ifa_msghdr_t *)mp->b_rptr; 2029 ifam->ifam_index = 2030 ipif->ipif_ill->ill_phyint->phyint_ifindex; 2031 ifam->ifam_metric = ipif->ipif_ill->ill_metric; 2032 ifam->ifam_flags = ((cmd == RTM_NEWADDR) ? RTF_UP : 0); 2033 ifam->ifam_addrs = rtm_addrs; 2034 } else { 2035 switch (af) { 2036 case AF_INET: 2037 rts_fill_msg(cmd, rtm_addrs, 2038 ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0, 2039 0, 0, 0, 0, NULL, mp, NULL); 2040 break; 2041 case AF_INET6: 2042 rts_fill_msg_v6(cmd, rtm_addrs, 2043 &ipif->ipif_v6lcl_addr, 2044 &ipif->ipif_v6net_mask, &ipv6_all_zeros, 2045 &ipv6_all_zeros, &ipv6_all_zeros, 2046 &ipv6_all_zeros, &ipv6_all_zeros, 2047 NULL, mp, NULL); 2048 break; 2049 } 2050 rtm = (rt_msghdr_t *)mp->b_rptr; 2051 rtm->rtm_index = 2052 ipif->ipif_ill->ill_phyint->phyint_ifindex; 2053 rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0); 2054 rtm->rtm_errno = error; 2055 if (error == 0) 2056 rtm->rtm_flags |= RTF_DONE; 2057 rtm->rtm_addrs = rtm_addrs; 2058 } 2059 rts_queue_input(mp, NULL, af, flags, ipst); 2060 } 2061 2062 /* 2063 * This is called to generate messages to the routing socket 2064 * indicating a network interface has had addresses associated with it. 2065 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>. 2066 */ 2067 void 2068 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) 2069 { 2070 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2071 2072 if (flags & RTSQ_DEFAULT) { 2073 flags = RTSQ_ALL; 2074 /* 2075 * If this message is for an underlying interface, prevent 2076 * "normal" (IPMP-unaware) routing sockets from seeing it. 2077 */ 2078 if (IS_UNDER_IPMP(ipif->ipif_ill)) 2079 flags &= ~RTSQ_NORMAL; 2080 } 2081 2082 /* 2083 * Let conn_ixa caching know that source address selection 2084 * changed 2085 */ 2086 if (cmd == RTM_ADD || cmd == RTM_DELETE) 2087 ip_update_source_selection(ipst); 2088 2089 /* 2090 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR. 2091 * if the request is ADD, send RTM_NEWADDR and RTM_ADD. 2092 * otherwise simply send the request. 2093 */ 2094 switch (cmd) { 2095 case RTM_ADD: 2096 rts_new_rtsmsg(RTM_NEWADDR, error, ipif, flags); 2097 rts_new_rtsmsg(RTM_ADD, error, ipif, flags); 2098 break; 2099 case RTM_DELETE: 2100 rts_new_rtsmsg(RTM_DELETE, error, ipif, flags); 2101 rts_new_rtsmsg(RTM_DELADDR, error, ipif, flags); 2102 break; 2103 default: 2104 rts_new_rtsmsg(cmd, error, ipif, flags); 2105 break; 2106 } 2107 } 2108 2109 /* 2110 * Based on the address family specified in a sockaddr, copy the address field 2111 * into an in6_addr_t. 2112 * 2113 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for 2114 * compatibility with programs that leave the family cleared in the sockaddr. 2115 * Callers of rts_copyfromsockaddr should check the family themselves if they 2116 * wish to verify its value. 2117 * 2118 * In the case of AF_INET6, a check is made to ensure that address is not an 2119 * IPv4-mapped address. 2120 */ 2121 size_t 2122 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp) 2123 { 2124 switch (sa->sa_family) { 2125 case AF_INET: 2126 case AF_UNSPEC: 2127 IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp); 2128 return (sizeof (sin_t)); 2129 case AF_INET6: 2130 *addrp = ((sin6_t *)sa)->sin6_addr; 2131 if (IN6_IS_ADDR_V4MAPPED(addrp)) 2132 return (0); 2133 return (sizeof (sin6_t)); 2134 default: 2135 return (0); 2136 } 2137 } 2138