1 /* 2 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* 7 * Copyright (c) 1988, 1991, 1993 8 * The Regents of the University of California. All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 39 */ 40 41 #pragma ident "%Z%%M% %I% %E% SMI" 42 43 /* 44 * This file contains routines that processes routing socket requests. 45 */ 46 47 #include <sys/types.h> 48 #include <sys/stream.h> 49 #include <sys/stropts.h> 50 #include <sys/strlog.h> 51 #include <sys/dlpi.h> 52 #include <sys/ddi.h> 53 #include <sys/cmn_err.h> 54 #include <sys/debug.h> 55 #include <sys/policy.h> 56 #include <sys/zone.h> 57 58 #include <sys/systm.h> 59 #include <sys/param.h> 60 #include <sys/socket.h> 61 #define _SUN_TPI_VERSION 2 62 #include <sys/tihdr.h> 63 #include <sys/strsun.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 #include <netinet/in.h> 67 #include <net/if_dl.h> 68 #include <netinet/ip6.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/ip.h> 73 #include <inet/ip6.h> 74 #include <inet/ip_if.h> 75 #include <inet/ip_ire.h> 76 #include <inet/ip_rts.h> 77 #include <inet/ip_multi.h> 78 79 #include <inet/ipclassifier.h> 80 81 #define RTS_MSG_SIZE(type, rtm_addrs, af) \ 82 (rts_data_msg_size(rtm_addrs, af) + rts_header_msg_size(type)) 83 84 static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp); 85 static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, 86 ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, 87 ipaddr_t author, ipif_t *ipif, mblk_t *mp); 88 static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, 89 in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp, 90 in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp, 91 ushort_t *src_indexp, sa_family_t *afp); 92 static void rts_getifdata(if_data_t *if_data, ipif_t *ipif); 93 static int rts_getmetrics(ire_t *ire, rt_metrics_t *metrics); 94 static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, 95 sa_family_t af); 96 static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics); 97 static void ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *); 98 99 /* 100 * Send the ack to all the routing queues. In case of the originating queue, 101 * send it only if the loopback is set. 102 * 103 * Messages are sent upstream only on routing sockets that did not specify an 104 * address family when they were created or when the address family matches the 105 * one specified by the caller. 106 * 107 */ 108 void 109 rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af) 110 { 111 mblk_t *mp1; 112 int checkqfull; 113 conn_t *connp, *next_connp; 114 115 mutex_enter(&rts_clients.connf_lock); 116 connp = rts_clients.connf_head; 117 118 while (connp != NULL) { 119 /* 120 * If there was a family specified when this routing socket was 121 * created and it doesn't match the family of the message to 122 * copy, then continue. 123 */ 124 if ((connp->conn_proto != AF_UNSPEC) && 125 (connp->conn_proto != af)) { 126 connp = connp->conn_next; 127 continue; 128 } 129 /* 130 * For the originating queue, we only copy the message upstream 131 * if loopback is set. For others reading on the routing 132 * socket, we check if there is room upstream for a copy of the 133 * message. 134 */ 135 if ((q != NULL) && (CONNP_TO_RQ(connp) == RD(q))) { 136 if (connp->conn_loopback == 0) { 137 connp = connp->conn_next; 138 continue; 139 } 140 checkqfull = B_FALSE; 141 } else { 142 checkqfull = B_TRUE; 143 } 144 CONN_INC_REF(connp); 145 mutex_exit(&rts_clients.connf_lock); 146 if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) { 147 mp1 = dupmsg(mp); 148 if (mp1 == NULL) 149 mp1 = copymsg(mp); 150 if (mp1 != NULL) 151 putnext(CONNP_TO_RQ(connp), mp1); 152 } 153 154 mutex_enter(&rts_clients.connf_lock); 155 /* Follow the next pointer before releasing the conn. */ 156 next_connp = connp->conn_next; 157 CONN_DEC_REF(connp); 158 connp = next_connp; 159 } 160 mutex_exit(&rts_clients.connf_lock); 161 freemsg(mp); 162 } 163 164 /* 165 * Takes an ire and sends an ack to all the routing sockets. This 166 * routine is used 167 * - when a route is created/deleted through the ioctl interface. 168 * - when ire_expire deletes a stale redirect 169 */ 170 void 171 ip_rts_rtmsg(int type, ire_t *ire, int error) 172 { 173 mblk_t *mp; 174 rt_msghdr_t *rtm; 175 int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY); 176 sa_family_t af; 177 in6_addr_t gw_addr_v6; 178 179 if (ire == NULL) 180 return; 181 ASSERT(ire->ire_ipversion == IPV4_VERSION || 182 ire->ire_ipversion == IPV6_VERSION); 183 184 if (ire->ire_flags & RTF_SETSRC) 185 rtm_addrs |= RTA_SRC; 186 187 switch (ire->ire_ipversion) { 188 case IPV4_VERSION: 189 af = AF_INET; 190 mp = rts_alloc_msg(type, rtm_addrs, af); 191 if (mp == NULL) 192 return; 193 rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask, 194 ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp); 195 break; 196 case IPV6_VERSION: 197 af = AF_INET6; 198 mp = rts_alloc_msg(type, rtm_addrs, af); 199 if (mp == NULL) 200 return; 201 mutex_enter(&ire->ire_lock); 202 gw_addr_v6 = ire->ire_gateway_addr_v6; 203 mutex_exit(&ire->ire_lock); 204 rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6, 205 &ire->ire_mask_v6, &gw_addr_v6, 206 &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros, 207 NULL, mp); 208 break; 209 } 210 rtm = (rt_msghdr_t *)mp->b_rptr; 211 mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen]; 212 rtm->rtm_addrs = rtm_addrs; 213 rtm->rtm_flags = ire->ire_flags; 214 if (error != 0) 215 rtm->rtm_errno = error; 216 else 217 rtm->rtm_flags |= RTF_DONE; 218 rts_queue_input(mp, NULL, af); 219 } 220 221 /* ARGSUSED */ 222 static void 223 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy) 224 { 225 (void) ip_rts_request(q, mp, DB_CRED(mp)); 226 } 227 228 /* 229 * Processes requests received on a routing socket. It extracts all the 230 * arguments and calls the appropriate function to process the request. 231 * 232 * RTA_SRC bit flag requests are sent by mipagent and 'route -setsrc'. 233 * RTA_SRCIFP bit flag requests are sent by mipagent only. 234 * 235 * In general, this function does not consume the message supplied but rather 236 * sends the message upstream with an appropriate UNIX errno. 237 * 238 * We may need to restart this operation if the ipif cannot be looked up 239 * due to an exclusive operation that is currently in progress. The restart 240 * entry point is ip_rts_request_retry. While the request is enqueud in the 241 * ipsq the ioctl could be aborted and the conn close. To ensure that we don't 242 * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is 243 * released at the completion of the rts ioctl at the end of this function 244 * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and 245 * conn close occurs in conn_ioctl_cleanup. 246 */ 247 int 248 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) 249 { 250 rt_msghdr_t *rtm = NULL; 251 in6_addr_t dst_addr_v6; 252 in6_addr_t src_addr_v6; 253 in6_addr_t gw_addr_v6; 254 in6_addr_t net_mask_v6; 255 in6_addr_t author_v6; 256 in6_addr_t if_addr_v6; 257 mblk_t *mp1, *ioc_mp = mp; 258 ire_t *ire = NULL; 259 ire_t *sire = NULL; 260 int error = 0; 261 int match_flags = MATCH_IRE_DSTONLY; 262 int found_addrs; 263 sa_family_t af; 264 ipaddr_t dst_addr; 265 ipaddr_t gw_addr; 266 ipaddr_t src_addr; 267 ipaddr_t net_mask; 268 ushort_t index; 269 ushort_t src_index; 270 ipif_t *ipif = NULL; 271 ipif_t *src_ipif = NULL; 272 ipif_t *tmp_ipif = NULL; 273 IOCP iocp = (IOCP)mp->b_rptr; 274 conn_t *connp; 275 276 ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp))); 277 278 ASSERT(CONN_Q(q)); 279 connp = Q_TO_CONN(q); 280 281 ASSERT(mp->b_cont != NULL); 282 /* ioc_mp holds mp */ 283 mp = mp->b_cont; 284 285 /* 286 * The Routing Socket data starts on 287 * next block. If there is no next block 288 * this is an indication from routing module 289 * that it is a routing socket stream queue. 290 */ 291 if (mp->b_cont != NULL) { 292 mp1 = dupmsg(mp->b_cont); 293 if (mp1 == NULL) { 294 freemsg(mp); 295 error = ENOBUFS; 296 goto done; 297 } 298 mp = mp1; 299 } else { 300 /* 301 * This is a message from RTS module 302 * indicating that this is a Routing Socket 303 * Stream. Insert this conn_t in routing 304 * socket client list. 305 */ 306 307 connp->conn_loopback = 1; 308 ipcl_hash_insert_wildcard(&rts_clients, connp); 309 310 goto done; 311 } 312 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { 313 freemsg(mp); 314 error = EINVAL; 315 goto done; 316 } 317 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 318 freemsg(mp); 319 error = EINVAL; 320 goto done; 321 } 322 323 /* 324 * Check the routing message for basic consistency including the 325 * version number and that the number of octets written is the same 326 * as specified by the rtm_msglen field. 327 * 328 * At this point, an error can be delivered back via rtm_errno. 329 */ 330 rtm = (rt_msghdr_t *)mp->b_rptr; 331 if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) { 332 error = EINVAL; 333 goto done; 334 } 335 if (rtm->rtm_version != RTM_VERSION) { 336 error = EPROTONOSUPPORT; 337 goto done; 338 } 339 340 /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */ 341 if (rtm->rtm_type != RTM_GET && 342 rtm->rtm_type != RTM_RESOLVE && 343 (ioc_cr == NULL || 344 secpolicy_net_config(ioc_cr, B_FALSE) != 0)) { 345 error = EPERM; 346 goto done; 347 } 348 349 found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6, 350 &author_v6, &if_addr_v6, &src_addr_v6, &index, &src_index, &af); 351 if ((found_addrs & RTA_DST) == 0) { 352 error = EINVAL; 353 goto done; 354 } 355 356 /* 357 * Based on the address family of the destination address, determine 358 * the destination, gateway and netmask and return the appropriate error 359 * if an unknown address family was specified (following the errno 360 * values that 4.4BSD-Lite2 returns.) 361 */ 362 switch (af) { 363 case AF_INET: 364 /* 365 * RTA_SRCIFP is supported for interface route only. 366 * Thus a gateway route with srcifindex is rejected, 367 * except if it's a request to add reverse tunnel 368 * route. 369 */ 370 if ((rtm->rtm_flags & RTF_GATEWAY) && 371 (found_addrs & RTA_SRCIFP) && 372 !(found_addrs & RTA_SRC)) { 373 error = EINVAL; 374 goto done; 375 } 376 IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr); 377 IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr); 378 IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr); 379 if (((found_addrs & RTA_NETMASK) == 0) || 380 (rtm->rtm_flags & RTF_HOST)) 381 net_mask = IP_HOST_MASK; 382 else 383 IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask); 384 break; 385 case AF_INET6: 386 /* 387 * RTA_SRCIFP is not a valid flag for IPv6 routes. 388 */ 389 if (found_addrs & RTA_SRCIFP) { 390 error = EINVAL; 391 goto done; 392 } 393 if (((found_addrs & RTA_NETMASK) == 0) || 394 (rtm->rtm_flags & RTF_HOST)) 395 net_mask_v6 = ipv6_all_ones; 396 break; 397 default: 398 /* 399 * These errno values are meant to be compatible with 400 * 4.4BSD-Lite2 for the given message types. 401 */ 402 switch (rtm->rtm_type) { 403 case RTM_ADD: 404 case RTM_DELETE: 405 error = ESRCH; 406 goto done; 407 case RTM_GET: 408 case RTM_CHANGE: 409 error = EAFNOSUPPORT; 410 goto done; 411 default: 412 error = EOPNOTSUPP; 413 goto done; 414 } 415 } 416 417 /* 418 * At this point, the address family must be something known. 419 */ 420 ASSERT(af == AF_INET || af == AF_INET6); 421 422 if (index != 0) { 423 ill_t *ill; 424 425 /* 426 * IPC must be refheld somewhere in ip_wput_nondata or 427 * ip_wput_ioctl etc... and cleaned up if ioctl is killed. 428 * If ILL_CHANGING the request is queued in the ipsq. 429 */ 430 ill = ill_lookup_on_ifindex(index, af == AF_INET6, 431 CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error); 432 if (ill == NULL) { 433 if (error != EINPROGRESS) 434 error = EINVAL; 435 goto done; 436 } 437 438 ipif = ipif_get_next_ipif(NULL, ill); 439 ill_refrele(ill); 440 /* 441 * If this is replacement ipif, prevent a route from 442 * being added. 443 */ 444 if (ipif != NULL && ipif->ipif_replace_zero) { 445 error = ENETDOWN; 446 goto done; 447 } 448 match_flags |= MATCH_IRE_ILL; 449 } 450 451 /* RTA_SRCIFP is unsupported on AF_INET6. */ 452 if (af == AF_INET && src_index != 0) { 453 ill_t *ill; 454 455 /* If ILL_CHANGING the request is queued in the ipsq. */ 456 ill = ill_lookup_on_ifindex(src_index, B_FALSE, 457 CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error); 458 if (ill == NULL) { 459 if (error != EINPROGRESS) 460 error = EINVAL; 461 goto done; 462 } 463 464 src_ipif = ipif_get_next_ipif(NULL, ill); 465 ill_refrele(ill); 466 } 467 /* 468 * If a netmask was supplied in the message, then subsequent route 469 * lookups will attempt to match on the netmask as well. 470 */ 471 if ((found_addrs & RTA_NETMASK) != 0) 472 match_flags |= MATCH_IRE_MASK; 473 474 switch (rtm->rtm_type) { 475 case RTM_ADD: 476 /* if we are adding a route, gateway is a must */ 477 if ((found_addrs & RTA_GATEWAY) == 0) { 478 error = EINVAL; 479 goto done; 480 } 481 482 /* Multirouting does not support net routes. */ 483 if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) == 484 RTF_MULTIRT) { 485 error = EADDRNOTAVAIL; 486 goto done; 487 } 488 489 /* 490 * Multirouting and user-specified source addresses 491 * do not support interface based routing. 492 * Assigning a source address to an interface based 493 * route is achievable by plumbing a new ipif and 494 * setting up the interface route via this ipif, 495 * though. 496 */ 497 if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) { 498 if ((rtm->rtm_flags & RTF_GATEWAY) == 0) { 499 error = EADDRNOTAVAIL; 500 goto done; 501 } 502 } 503 504 switch (af) { 505 case AF_INET: 506 if (src_addr != INADDR_ANY) { 507 /* 508 * If there is a source address, but 509 * no RTF_SETSRC modifier, setup a MobileIP 510 * reverse tunnel. 511 */ 512 if ((rtm->rtm_flags & RTF_SETSRC) == 0) { 513 error = ip_mrtun_rt_add(src_addr, 514 rtm->rtm_flags, ipif, 515 src_ipif, &ire, CONNP_TO_WQ(connp), 516 ioc_mp, ip_rts_request_retry); 517 break; 518 } 519 /* 520 * The RTF_SETSRC flag is present, check that 521 * the supplied src address is not the loopback 522 * address. This would produce martian packets. 523 */ 524 if (src_addr == htonl(INADDR_LOOPBACK)) { 525 error = EINVAL; 526 goto done; 527 } 528 /* 529 * Also check that the supplied address is a 530 * valid, local one. 531 */ 532 tmp_ipif = ipif_lookup_addr(src_addr, NULL, 533 ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, 534 ip_rts_request_retry, &error); 535 if (tmp_ipif == NULL) { 536 if (error != EINPROGRESS) 537 error = EADDRNOTAVAIL; 538 goto done; 539 } 540 if (!(tmp_ipif->ipif_flags & IPIF_UP) || 541 (tmp_ipif->ipif_flags & 542 (IPIF_NOLOCAL | IPIF_ANYCAST))) { 543 error = EINVAL; 544 goto done; 545 } 546 } else { 547 /* 548 * The RTF_SETSRC modifier must be associated 549 * to a non-null source address. 550 */ 551 if (rtm->rtm_flags & RTF_SETSRC) { 552 error = EINVAL; 553 goto done; 554 } 555 } 556 557 error = ip_rt_add(dst_addr, net_mask, 558 gw_addr, src_addr, 559 rtm->rtm_flags, ipif, src_ipif, &ire, B_FALSE, 560 CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry); 561 if (ipif != NULL) 562 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 563 break; 564 case AF_INET6: 565 if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) { 566 /* 567 * If there is a source address, but 568 * no RTF_SETSRC modifier, reject, as 569 * MobileIP IPv6 reverse tunnels are 570 * not supported. 571 */ 572 if ((rtm->rtm_flags & RTF_SETSRC) == 0) { 573 error = EINVAL; 574 goto done; 575 } 576 /* 577 * The RTF_SETSRC flag is present, check that 578 * the supplied src address is not the loopback 579 * address. This would produce martian packets. 580 */ 581 if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) { 582 error = EINVAL; 583 goto done; 584 } 585 /* 586 * Also check that the supplied address is a 587 * valid, local one. 588 */ 589 tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6, 590 NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, 591 ip_rts_request_retry, &error); 592 if (tmp_ipif == NULL) { 593 if (error != EINPROGRESS) 594 error = EADDRNOTAVAIL; 595 goto done; 596 } 597 598 if (!(tmp_ipif->ipif_flags & IPIF_UP) || 599 (tmp_ipif->ipif_flags & 600 (IPIF_NOLOCAL | IPIF_ANYCAST))) { 601 error = EINVAL; 602 goto done; 603 } 604 605 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 606 &gw_addr_v6, &src_addr_v6, rtm->rtm_flags, 607 ipif, &ire, CONNP_TO_WQ(connp), ioc_mp, 608 ip_rts_request_retry); 609 break; 610 } 611 /* 612 * The RTF_SETSRC modifier must be associated 613 * to a non-null source address. 614 */ 615 if (rtm->rtm_flags & RTF_SETSRC) { 616 error = EINVAL; 617 goto done; 618 } 619 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, 620 &gw_addr_v6, NULL, rtm->rtm_flags, 621 ipif, &ire, CONNP_TO_WQ(connp), ioc_mp, 622 ip_rts_request_retry); 623 if (ipif != NULL) 624 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 625 break; 626 } 627 if (error != 0) 628 goto done; 629 ASSERT(ire != NULL); 630 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 631 break; 632 case RTM_DELETE: 633 /* if we are deleting a route, gateway is a must */ 634 if ((found_addrs & RTA_GATEWAY) == 0) { 635 error = EINVAL; 636 goto done; 637 } 638 /* 639 * The RTF_SETSRC modifier does not make sense 640 * when deleting a route. 641 */ 642 if (rtm->rtm_flags & RTF_SETSRC) { 643 error = EINVAL; 644 goto done; 645 } 646 647 switch (af) { 648 case AF_INET: 649 /* 650 * If there is a source address, delete 651 * a MobileIP reverse tunnel. 652 */ 653 if (src_addr != INADDR_ANY) { 654 error = ip_mrtun_rt_delete(src_addr, 655 src_ipif); 656 break; 657 } 658 error = ip_rt_delete(dst_addr, net_mask, gw_addr, 659 found_addrs, rtm->rtm_flags, ipif, src_ipif, 660 B_FALSE, CONNP_TO_WQ(connp), ioc_mp, 661 ip_rts_request_retry); 662 break; 663 case AF_INET6: 664 error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6, 665 &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif, 666 CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry); 667 break; 668 } 669 break; 670 case RTM_GET: 671 case RTM_CHANGE: 672 /* 673 * In the case of RTM_GET, the forwarding table should be 674 * searched recursively with default being matched if the 675 * specific route doesn't exist. Also, if a gateway was 676 * specified then the gateway address must also be matched. 677 * 678 * In the case of RTM_CHANGE, the gateway address (if supplied) 679 * is the new gateway address so matching on the gateway address 680 * is not done. This can lead to ambiguity when looking up the 681 * route to change as usually only the destination (and netmask, 682 * if supplied) is used for the lookup. However if a RTA_IFP 683 * sockaddr is also supplied, it can disambiguate which route to 684 * change provided the ambigous routes are tied to distinct 685 * ill's (or interface indices). If the routes are not tied to 686 * any particular interfaces (for example, with traditional 687 * gateway routes), then a RTA_IFP sockaddr will be of no use as 688 * it won't match any such routes. 689 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE, 690 * except when RTM_CHANGE is combined to RTF_SETSRC. 691 */ 692 if (((found_addrs & RTA_SRC) != 0) && 693 ((rtm->rtm_type == RTM_GET) || 694 !(rtm->rtm_flags & RTF_SETSRC))) { 695 error = EOPNOTSUPP; 696 goto done; 697 } 698 699 if (rtm->rtm_type == RTM_GET) { 700 match_flags |= 701 (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE); 702 if ((found_addrs & RTA_GATEWAY) != 0) 703 match_flags |= MATCH_IRE_GW; 704 } 705 if (rtm->rtm_type == RTM_CHANGE) { 706 if ((found_addrs & RTA_GATEWAY) && 707 (rtm->rtm_flags & RTF_SETSRC)) { 708 /* 709 * Do not want to change the gateway, 710 * but rather the source address. 711 */ 712 match_flags |= MATCH_IRE_GW; 713 } 714 } 715 716 /* 717 * If the netmask is all ones (either as supplied or as derived 718 * above), then first check for an IRE_LOOPBACK or 719 * IRE_LOCAL entry. 720 * 721 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL 722 * entry, then look in the forwarding table. 723 */ 724 switch (af) { 725 case AF_INET: 726 if (net_mask == IP_HOST_MASK) { 727 ire = ire_ctable_lookup(dst_addr, gw_addr, 728 IRE_LOCAL | IRE_LOOPBACK, NULL, ALL_ZONES, 729 MATCH_IRE_TYPE | MATCH_IRE_GW); 730 } 731 if (ire == NULL) { 732 ire = ire_ftable_lookup(dst_addr, net_mask, 733 gw_addr, 0, ipif, &sire, ALL_ZONES, 0, 734 match_flags); 735 } 736 break; 737 case AF_INET6: 738 if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) { 739 ire = ire_ctable_lookup_v6(&dst_addr_v6, 740 &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL, 741 ALL_ZONES, MATCH_IRE_TYPE | MATCH_IRE_GW); 742 } 743 if (ire == NULL) { 744 ire = ire_ftable_lookup_v6(&dst_addr_v6, 745 &net_mask_v6, &gw_addr_v6, 0, ipif, &sire, 746 ALL_ZONES, 0, match_flags); 747 } 748 break; 749 } 750 751 if (ire == NULL) { 752 error = ESRCH; 753 goto done; 754 } 755 /* we know the IRE before we come here */ 756 switch (rtm->rtm_type) { 757 case RTM_GET: 758 mp1 = rts_rtmget(mp, ire, sire, af); 759 if (mp1 == NULL) { 760 error = ENOBUFS; 761 goto done; 762 } 763 freemsg(mp); 764 mp = mp1; 765 rtm = (rt_msghdr_t *)mp->b_rptr; 766 break; 767 case RTM_CHANGE: 768 /* 769 * Do not allow to the multirouting state of a route 770 * to be changed. This aims to prevent undesirable 771 * stages where both multirt and non-multirt routes 772 * for the same destination are declared. 773 */ 774 if ((ire->ire_flags & RTF_MULTIRT) != 775 (rtm->rtm_flags & RTF_MULTIRT)) { 776 error = EINVAL; 777 goto done; 778 } 779 /* 780 * Note that we do not need to do 781 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change 782 * in metrics or gateway will not affect existing 783 * routes since it does not create a more specific 784 * route. 785 */ 786 switch (af) { 787 case AF_INET: 788 ire_flush_cache_v4(ire, IRE_FLUSH_DELETE); 789 if ((found_addrs & RTA_GATEWAY) != 0 && 790 (ire->ire_gateway_addr != gw_addr)) { 791 ire->ire_gateway_addr = gw_addr; 792 } 793 if ((found_addrs & RTA_SRC) != 0 && 794 (rtm->rtm_flags & RTF_SETSRC) != 0 && 795 (ire->ire_src_addr != src_addr)) { 796 797 if (src_addr != INADDR_ANY) { 798 /* 799 * The RTF_SETSRC flag is 800 * present, check that the 801 * supplied src address is not 802 * the loopback address. This 803 * would produce martian 804 * packets. 805 */ 806 if (src_addr == 807 htonl(INADDR_LOOPBACK)) { 808 error = EINVAL; 809 goto done; 810 } 811 /* 812 * Also check that the the 813 * supplied addr is a valid 814 * local address. 815 */ 816 tmp_ipif = ipif_lookup_addr( 817 src_addr, NULL, ALL_ZONES, 818 CONNP_TO_WQ(connp), ioc_mp, 819 ip_rts_request_retry, 820 &error); 821 if (tmp_ipif == NULL) { 822 error = (error == 823 EINPROGRESS) ? 824 error : 825 EADDRNOTAVAIL; 826 goto done; 827 } 828 829 if (!(tmp_ipif->ipif_flags & 830 IPIF_UP) || 831 (tmp_ipif->ipif_flags & 832 (IPIF_NOLOCAL | 833 IPIF_ANYCAST))) { 834 error = EINVAL; 835 goto done; 836 } 837 ire->ire_flags |= RTF_SETSRC; 838 } else { 839 ire->ire_flags &= ~RTF_SETSRC; 840 } 841 ire->ire_src_addr = src_addr; 842 } 843 break; 844 case AF_INET6: 845 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 846 mutex_enter(&ire->ire_lock); 847 if ((found_addrs & RTA_GATEWAY) != 0 && 848 !IN6_ARE_ADDR_EQUAL( 849 &ire->ire_gateway_addr_v6, &gw_addr_v6)) { 850 ire->ire_gateway_addr_v6 = gw_addr_v6; 851 } 852 if ((found_addrs & RTA_SRC) != 0 && 853 (rtm->rtm_flags & RTF_SETSRC) != 0 && 854 !IN6_ARE_ADDR_EQUAL( 855 &ire->ire_src_addr_v6, &src_addr_v6)) { 856 857 if (!IN6_IS_ADDR_UNSPECIFIED( 858 &src_addr_v6)) { 859 /* 860 * The RTF_SETSRC flag is 861 * present, check that the 862 * supplied src address is not 863 * the loopback address. This 864 * would produce martian 865 * packets. 866 */ 867 if (IN6_IS_ADDR_LOOPBACK( 868 &src_addr_v6)) { 869 mutex_exit( 870 &ire->ire_lock); 871 error = EINVAL; 872 goto done; 873 } 874 /* 875 * Also check that the the 876 * supplied addr is a valid 877 * local address. 878 */ 879 tmp_ipif = ipif_lookup_addr_v6( 880 &src_addr_v6, NULL, 881 ALL_ZONES, 882 CONNP_TO_WQ(connp), ioc_mp, 883 ip_rts_request_retry, 884 &error); 885 if (tmp_ipif == NULL) { 886 mutex_exit( 887 &ire->ire_lock); 888 error = (error == 889 EINPROGRESS) ? 890 error : 891 EADDRNOTAVAIL; 892 goto done; 893 } 894 if (!(tmp_ipif->ipif_flags & 895 IPIF_UP) || 896 (tmp_ipif->ipif_flags & 897 (IPIF_NOLOCAL | 898 IPIF_ANYCAST))) { 899 mutex_exit( 900 &ire->ire_lock); 901 error = EINVAL; 902 goto done; 903 } 904 ire->ire_flags |= RTF_SETSRC; 905 } else { 906 ire->ire_flags &= ~RTF_SETSRC; 907 } 908 ire->ire_src_addr_v6 = src_addr_v6; 909 } 910 mutex_exit(&ire->ire_lock); 911 break; 912 } 913 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); 914 break; 915 } 916 break; 917 default: 918 error = EOPNOTSUPP; 919 break; 920 } 921 done: 922 if (ire != NULL) 923 ire_refrele(ire); 924 if (sire != NULL) 925 ire_refrele(sire); 926 if (ipif != NULL) 927 ipif_refrele(ipif); 928 if (src_ipif != NULL) 929 ipif_refrele(src_ipif); 930 if (tmp_ipif != NULL) 931 ipif_refrele(tmp_ipif); 932 933 if (error == EINPROGRESS) 934 return (error); 935 if (rtm != NULL) { 936 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 937 if (error != 0) { 938 rtm->rtm_errno = error; 939 /* Send error ACK */ 940 ip1dbg(("ip_rts_request: error %d\n", error)); 941 } else { 942 rtm->rtm_flags |= RTF_DONE; 943 /* OK ACK already set up by caller except this */ 944 ip2dbg(("ip_rts_request: OK ACK\n")); 945 } 946 rts_queue_input(mp, q, af); 947 } 948 iocp->ioc_error = error; 949 ioc_mp->b_datap->db_type = M_IOCACK; 950 if (iocp->ioc_error != 0) 951 iocp->ioc_count = 0; 952 qreply(q, ioc_mp); 953 /* conn was refheld in ip_wput_ioctl. */ 954 CONN_OPER_PENDING_DONE(connp); 955 956 return (error); 957 } 958 959 /* 960 * Build a reply to the RTM_GET request contained in the given message block 961 * using the retrieved IRE of the destination address, the parent IRE (if it 962 * exists) and the address family. 963 * 964 * Returns a pointer to a message block containing the reply if successful, 965 * otherwise NULL is returned. 966 */ 967 mblk_t * 968 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) 969 { 970 rt_msghdr_t *rtm; 971 rt_msghdr_t *new_rtm; 972 mblk_t *new_mp; 973 int rtm_addrs; 974 int rtm_flags; 975 in6_addr_t gw_addr_v6; 976 977 ASSERT(ire->ire_ipif != NULL); 978 rtm = (rt_msghdr_t *)mp->b_rptr; 979 980 /* 981 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK. 982 * 983 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both 984 * RTA_IFP and RTA_IFA if either is defined, and also 985 * returns RTA_BRD if the appropriate interface is 986 * point-to-point. 987 */ 988 rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK); 989 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { 990 rtm_addrs |= (RTA_IFP | RTA_IFA); 991 if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT) 992 rtm_addrs |= RTA_BRD; 993 } 994 995 new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af); 996 if (new_mp == NULL) 997 return (NULL); 998 999 /* 1000 * We set the destination address, gateway address, 1001 * netmask and flags in the RTM_GET response depending 1002 * on whether we found a parent IRE or not. 1003 * In particular, if we did find a parent IRE during the 1004 * recursive search, use that IRE's gateway address. 1005 * Otherwise, we use the IRE's source address for the 1006 * gateway address. 1007 */ 1008 ASSERT(af == AF_INET || af == AF_INET6); 1009 switch (af) { 1010 case AF_INET: 1011 if (sire == NULL) { 1012 rtm_flags = ire->ire_flags; 1013 rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr, 1014 ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr, 1015 ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif, 1016 new_mp); 1017 } else { 1018 if (sire->ire_flags & RTF_SETSRC) 1019 rtm_addrs |= RTA_SRC; 1020 1021 rtm_flags = sire->ire_flags; 1022 rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr, 1023 sire->ire_mask, sire->ire_gateway_addr, 1024 (sire->ire_flags & RTF_SETSRC) ? 1025 sire->ire_src_addr : ire->ire_src_addr, 1026 ire->ire_ipif->ipif_pp_dst_addr, 1027 0, ire->ire_ipif, new_mp); 1028 } 1029 break; 1030 case AF_INET6: 1031 if (sire == NULL) { 1032 rtm_flags = ire->ire_flags; 1033 rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6, 1034 &ire->ire_mask_v6, &ire->ire_src_addr_v6, 1035 &ire->ire_src_addr_v6, 1036 &ire->ire_ipif->ipif_v6pp_dst_addr, 1037 &ipv6_all_zeros, ire->ire_ipif, new_mp); 1038 } else { 1039 if (sire->ire_flags & RTF_SETSRC) 1040 rtm_addrs |= RTA_SRC; 1041 1042 rtm_flags = sire->ire_flags; 1043 mutex_enter(&sire->ire_lock); 1044 gw_addr_v6 = sire->ire_gateway_addr_v6; 1045 mutex_exit(&sire->ire_lock); 1046 rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6, 1047 &sire->ire_mask_v6, &gw_addr_v6, 1048 (sire->ire_flags & RTF_SETSRC) ? 1049 &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6, 1050 &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, 1051 ire->ire_ipif, new_mp); 1052 } 1053 break; 1054 } 1055 new_rtm = (rt_msghdr_t *)new_mp->b_rptr; 1056 1057 /* 1058 * The rtm_msglen, rtm_version and rtm_type fields in 1059 * RTM_GET response are filled in by rts_fill_msg. 1060 * 1061 * rtm_addrs and rtm_flags are filled in based on what 1062 * was requested and the state of the IREs looked up 1063 * above. 1064 * 1065 * rtm_inits and rtm_rmx are filled in with metrics 1066 * based on whether a parent IRE was found or not. 1067 * 1068 * TODO: rtm_index and rtm_use should probably be 1069 * filled in with something resonable here and not just 1070 * copied from the request. 1071 */ 1072 new_rtm->rtm_index = rtm->rtm_index; 1073 new_rtm->rtm_pid = rtm->rtm_pid; 1074 new_rtm->rtm_seq = rtm->rtm_seq; 1075 new_rtm->rtm_use = rtm->rtm_use; 1076 new_rtm->rtm_addrs = rtm_addrs; 1077 new_rtm->rtm_flags = rtm_flags; 1078 if (sire == NULL) 1079 new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx); 1080 else 1081 new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx); 1082 return (new_mp); 1083 } 1084 1085 /* 1086 * Fill the given if_data_t with interface statistics. 1087 */ 1088 static void 1089 rts_getifdata(if_data_t *if_data, ipif_t *ipif) 1090 { 1091 if_data->ifi_type = ipif->ipif_type; /* ethernet, tokenring, etc */ 1092 if_data->ifi_addrlen = 0; /* media address length */ 1093 if_data->ifi_hdrlen = 0; /* media header length */ 1094 if_data->ifi_mtu = ipif->ipif_mtu; /* maximum transmission unit */ 1095 if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */ 1096 if_data->ifi_baudrate = 0; /* linespeed */ 1097 1098 if_data->ifi_ipackets = 0; /* packets received on if */ 1099 if_data->ifi_ierrors = 0; /* input errors on interface */ 1100 if_data->ifi_opackets = 0; /* packets sent on interface */ 1101 if_data->ifi_oerrors = 0; /* output errors on if */ 1102 if_data->ifi_collisions = 0; /* collisions on csma if */ 1103 if_data->ifi_ibytes = 0; /* total number received */ 1104 if_data->ifi_obytes = 0; /* total number sent */ 1105 if_data->ifi_imcasts = 0; /* multicast packets received */ 1106 if_data->ifi_omcasts = 0; /* multicast packets sent */ 1107 if_data->ifi_iqdrops = 0; /* dropped on input */ 1108 if_data->ifi_noproto = 0; /* destined for unsupported */ 1109 /* protocol. */ 1110 } 1111 1112 /* 1113 * Set the metrics on a forwarding table route. 1114 */ 1115 static void 1116 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) 1117 { 1118 clock_t rtt; 1119 clock_t rtt_sd; 1120 ipif_t *ipif; 1121 ifrt_t *ifrt; 1122 mblk_t *mp; 1123 in6_addr_t gw_addr_v6; 1124 1125 /* 1126 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the 1127 * common case of no metrics. 1128 */ 1129 if (which == 0) 1130 return; 1131 ire->ire_uinfo.iulp_set = B_TRUE; 1132 1133 /* 1134 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1135 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1136 * microseconds. 1137 */ 1138 if (which & RTV_RTT) 1139 rtt = metrics->rmx_rtt / 1000; 1140 if (which & RTV_RTTVAR) 1141 rtt_sd = metrics->rmx_rttvar / 1000; 1142 1143 /* 1144 * Update the metrics in the IRE itself. 1145 */ 1146 mutex_enter(&ire->ire_lock); 1147 if (which & RTV_MTU) 1148 ire->ire_max_frag = metrics->rmx_mtu; 1149 if (which & RTV_RTT) 1150 ire->ire_uinfo.iulp_rtt = rtt; 1151 if (which & RTV_SSTHRESH) 1152 ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh; 1153 if (which & RTV_RTTVAR) 1154 ire->ire_uinfo.iulp_rtt_sd = rtt_sd; 1155 if (which & RTV_SPIPE) 1156 ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe; 1157 if (which & RTV_RPIPE) 1158 ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe; 1159 mutex_exit(&ire->ire_lock); 1160 1161 /* 1162 * Search through the ifrt_t chain hanging off the IPIF in order to 1163 * reflect the metric change there. 1164 */ 1165 ipif = ire->ire_ipif; 1166 if (ipif == NULL) 1167 return; 1168 ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) || 1169 ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION))); 1170 if (ipif->ipif_isv6) { 1171 mutex_enter(&ire->ire_lock); 1172 gw_addr_v6 = ire->ire_gateway_addr_v6; 1173 mutex_exit(&ire->ire_lock); 1174 } 1175 mutex_enter(&ipif->ipif_saved_ire_lock); 1176 for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) { 1177 /* 1178 * On a given ipif, the triple of address, gateway and mask is 1179 * unique for each saved IRE (in the case of ordinary interface 1180 * routes, the gateway address is all-zeroes). 1181 */ 1182 ifrt = (ifrt_t *)mp->b_rptr; 1183 if (ipif->ipif_isv6) { 1184 if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, 1185 &ire->ire_addr_v6) || 1186 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, 1187 &gw_addr_v6) || 1188 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask, 1189 &ire->ire_mask_v6)) 1190 continue; 1191 } else { 1192 if (ifrt->ifrt_addr != ire->ire_addr || 1193 ifrt->ifrt_gateway_addr != ire->ire_gateway_addr || 1194 ifrt->ifrt_mask != ire->ire_mask) 1195 continue; 1196 } 1197 if (which & RTV_MTU) 1198 ifrt->ifrt_max_frag = metrics->rmx_mtu; 1199 if (which & RTV_RTT) 1200 ifrt->ifrt_iulp_info.iulp_rtt = rtt; 1201 if (which & RTV_SSTHRESH) { 1202 ifrt->ifrt_iulp_info.iulp_ssthresh = 1203 metrics->rmx_ssthresh; 1204 } 1205 if (which & RTV_RTTVAR) 1206 ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar; 1207 if (which & RTV_SPIPE) 1208 ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe; 1209 if (which & RTV_RPIPE) 1210 ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe; 1211 break; 1212 } 1213 mutex_exit(&ipif->ipif_saved_ire_lock); 1214 } 1215 1216 /* 1217 * Get the metrics from a forwarding table route. 1218 */ 1219 static int 1220 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics) 1221 { 1222 int metrics_set = 0; 1223 1224 bzero(metrics, sizeof (rt_metrics_t)); 1225 /* 1226 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's 1227 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as 1228 * microseconds. 1229 */ 1230 metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000; 1231 metrics_set |= RTV_RTT; 1232 metrics->rmx_mtu = ire->ire_max_frag; 1233 metrics_set |= RTV_MTU; 1234 metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh; 1235 metrics_set |= RTV_SSTHRESH; 1236 metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000; 1237 metrics_set |= RTV_RTTVAR; 1238 metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe; 1239 metrics_set |= RTV_SPIPE; 1240 metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe; 1241 metrics_set |= RTV_RPIPE; 1242 return (metrics_set); 1243 } 1244 1245 /* 1246 * Takes a pointer to a routing message and extracts necessary info by looking 1247 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers 1248 * passed (all of which must be valid). 1249 * 1250 * The bitmask of sockaddrs actually found in the message is returned, or zero 1251 * is returned in the case of an error. 1252 */ 1253 static int 1254 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, 1255 in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp, 1256 in6_addr_t *in_src_addrp, ushort_t *indexp, ushort_t *src_indexp, 1257 sa_family_t *afp) 1258 { 1259 struct sockaddr *sa; 1260 int i; 1261 int addr_bits; 1262 int length; 1263 int found_addrs = 0; 1264 caddr_t cp; 1265 size_t size; 1266 struct sockaddr_dl *sdl; 1267 1268 *dst_addrp = ipv6_all_zeros; 1269 *gw_addrp = ipv6_all_zeros; 1270 *net_maskp = ipv6_all_zeros; 1271 *authorp = ipv6_all_zeros; 1272 *if_addrp = ipv6_all_zeros; 1273 *in_src_addrp = ipv6_all_zeros; 1274 *indexp = 0; 1275 *src_indexp = 0; 1276 *afp = AF_UNSPEC; 1277 1278 /* 1279 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP, 1280 * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them. 1281 */ 1282 cp = (caddr_t)&rtm[1]; 1283 length = rtm->rtm_msglen; 1284 for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) { 1285 /* 1286 * The address family we are working with starts out as 1287 * AF_UNSPEC, but is set to the one specified with the 1288 * destination address. 1289 * 1290 * If the "working" address family that has been set to 1291 * something other than AF_UNSPEC, then the address family of 1292 * subsequent sockaddrs must either be AF_UNSPEC (for 1293 * compatibility with older programs) or must be the same as our 1294 * "working" one. 1295 * 1296 * This code assumes that RTA_DST (1) comes first in the loop. 1297 */ 1298 sa = (struct sockaddr *)cp; 1299 addr_bits = (rtm->rtm_addrs & (1 << i)); 1300 if (addr_bits == 0) 1301 continue; 1302 switch (addr_bits) { 1303 case RTA_DST: 1304 size = rts_copyfromsockaddr(sa, dst_addrp); 1305 *afp = sa->sa_family; 1306 break; 1307 case RTA_GATEWAY: 1308 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1309 return (0); 1310 size = rts_copyfromsockaddr(sa, gw_addrp); 1311 break; 1312 case RTA_NETMASK: 1313 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1314 return (0); 1315 size = rts_copyfromsockaddr(sa, net_maskp); 1316 break; 1317 case RTA_IFP: 1318 if (sa->sa_family != AF_LINK && 1319 sa->sa_family != AF_UNSPEC) 1320 return (0); 1321 sdl = (struct sockaddr_dl *)cp; 1322 *indexp = sdl->sdl_index; 1323 size = sizeof (struct sockaddr_dl); 1324 break; 1325 case RTA_SRC: 1326 /* Source address of the incoming packet */ 1327 size = rts_copyfromsockaddr(sa, in_src_addrp); 1328 *afp = sa->sa_family; 1329 break; 1330 case RTA_SRCIFP: 1331 /* Return incoming interface index pointer */ 1332 if (sa->sa_family != AF_LINK && 1333 sa->sa_family != AF_UNSPEC) 1334 return (0); 1335 sdl = (struct sockaddr_dl *)cp; 1336 *src_indexp = sdl->sdl_index; 1337 size = sizeof (struct sockaddr_dl); 1338 break; 1339 case RTA_IFA: 1340 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1341 return (0); 1342 size = rts_copyfromsockaddr(sa, if_addrp); 1343 break; 1344 case RTA_AUTHOR: 1345 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) 1346 return (0); 1347 size = rts_copyfromsockaddr(sa, authorp); 1348 break; 1349 default: 1350 return (0); 1351 } 1352 if (size == 0) 1353 return (0); 1354 cp += size; 1355 found_addrs |= addr_bits; 1356 } 1357 return (found_addrs); 1358 } 1359 1360 /* 1361 * Fills the message with the given info. 1362 */ 1363 static void 1364 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, 1365 ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author, 1366 ipif_t *ipif, mblk_t *mp) 1367 { 1368 rt_msghdr_t *rtm; 1369 sin_t *sin; 1370 size_t data_size, header_size; 1371 uchar_t *cp; 1372 int i; 1373 1374 ASSERT(mp != NULL); 1375 /* 1376 * First find the type of the message 1377 * and its length. 1378 */ 1379 header_size = rts_header_msg_size(type); 1380 /* 1381 * Now find the size of the data 1382 * that follows the message header. 1383 */ 1384 data_size = rts_data_msg_size(rtm_addrs, AF_INET); 1385 1386 rtm = (rt_msghdr_t *)mp->b_rptr; 1387 mp->b_wptr = &mp->b_rptr[header_size]; 1388 cp = mp->b_wptr; 1389 bzero(cp, data_size); 1390 for (i = 0; i < RTA_NUMBITS; i++) { 1391 sin = (sin_t *)cp; 1392 switch (rtm_addrs & (1 << i)) { 1393 case RTA_DST: 1394 sin->sin_addr.s_addr = dst; 1395 sin->sin_family = AF_INET; 1396 cp += sizeof (sin_t); 1397 break; 1398 case RTA_GATEWAY: 1399 sin->sin_addr.s_addr = gateway; 1400 sin->sin_family = AF_INET; 1401 cp += sizeof (sin_t); 1402 break; 1403 case RTA_NETMASK: 1404 sin->sin_addr.s_addr = mask; 1405 sin->sin_family = AF_INET; 1406 cp += sizeof (sin_t); 1407 break; 1408 case RTA_IFP: 1409 cp += ill_dls_info((struct sockaddr_dl *)cp, ipif); 1410 break; 1411 case RTA_SRCIFP: 1412 /* 1413 * RTA_SRCIFP is not yet supported 1414 * for RTM_GET and RTM_CHANGE 1415 */ 1416 break; 1417 case RTA_IFA: 1418 case RTA_SRC: 1419 sin->sin_addr.s_addr = src_addr; 1420 sin->sin_family = AF_INET; 1421 cp += sizeof (sin_t); 1422 break; 1423 case RTA_AUTHOR: 1424 sin->sin_addr.s_addr = author; 1425 sin->sin_family = AF_INET; 1426 cp += sizeof (sin_t); 1427 break; 1428 case RTA_BRD: 1429 /* 1430 * RTA_BRD is used typically to specify a point-to-point 1431 * destination address. 1432 */ 1433 sin->sin_addr.s_addr = brd_addr; 1434 sin->sin_family = AF_INET; 1435 cp += sizeof (sin_t); 1436 break; 1437 } 1438 } 1439 mp->b_wptr = cp; 1440 mp->b_cont = NULL; 1441 /* 1442 * set the fields that are common to 1443 * to different messages. 1444 */ 1445 rtm->rtm_msglen = (short)(header_size + data_size); 1446 rtm->rtm_version = RTM_VERSION; 1447 rtm->rtm_type = (uchar_t)type; 1448 } 1449 1450 /* 1451 * Allocates and initializes a routing socket message. 1452 */ 1453 mblk_t * 1454 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af) 1455 { 1456 size_t length; 1457 mblk_t *mp; 1458 1459 length = RTS_MSG_SIZE(type, rtm_addrs, af); 1460 mp = allocb(length, BPRI_MED); 1461 if (mp == NULL) 1462 return (mp); 1463 bzero(mp->b_rptr, length); 1464 return (mp); 1465 } 1466 1467 /* 1468 * Returns the size of the routing 1469 * socket message header size. 1470 */ 1471 size_t 1472 rts_header_msg_size(int type) 1473 { 1474 switch (type) { 1475 case RTM_DELADDR: 1476 case RTM_NEWADDR: 1477 return (sizeof (ifa_msghdr_t)); 1478 case RTM_IFINFO: 1479 return (sizeof (if_msghdr_t)); 1480 default: 1481 return (sizeof (rt_msghdr_t)); 1482 } 1483 } 1484 1485 /* 1486 * Returns the size of the message needed with the given rtm_addrs and family. 1487 * 1488 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are 1489 * of the same family (currently either AF_INET or AF_INET6). 1490 */ 1491 size_t 1492 rts_data_msg_size(int rtm_addrs, sa_family_t af) 1493 { 1494 int i; 1495 size_t length = 0; 1496 1497 for (i = 0; i < RTA_NUMBITS; i++) { 1498 switch (rtm_addrs & (1 << i)) { 1499 case RTA_IFP: 1500 length += sizeof (struct sockaddr_dl); 1501 break; 1502 case RTA_DST: 1503 case RTA_GATEWAY: 1504 case RTA_NETMASK: 1505 case RTA_SRC: 1506 case RTA_SRCIFP: 1507 case RTA_IFA: 1508 case RTA_AUTHOR: 1509 case RTA_BRD: 1510 ASSERT(af == AF_INET || af == AF_INET6); 1511 switch (af) { 1512 case AF_INET: 1513 length += sizeof (sin_t); 1514 break; 1515 case AF_INET6: 1516 length += sizeof (sin6_t); 1517 break; 1518 } 1519 break; 1520 } 1521 } 1522 return (length); 1523 } 1524 1525 /* 1526 * This routine is called to generate a message to the routing 1527 * socket indicating that a redirect has occured, a routing lookup 1528 * has failed, or that a protocol has detected timeouts to a particular 1529 * destination. This routine is called for message types RTM_LOSING, 1530 * RTM_REDIRECT, and RTM_MISS. 1531 */ 1532 void 1533 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, 1534 ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs) 1535 { 1536 rt_msghdr_t *rtm; 1537 mblk_t *mp; 1538 1539 if (rtm_addrs == 0) 1540 return; 1541 mp = rts_alloc_msg(type, rtm_addrs, AF_INET); 1542 if (mp == NULL) 1543 return; 1544 rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0, 1545 author, NULL, mp); 1546 rtm = (rt_msghdr_t *)mp->b_rptr; 1547 rtm->rtm_flags = flags; 1548 rtm->rtm_errno = error; 1549 rtm->rtm_flags |= RTF_DONE; 1550 rtm->rtm_addrs = rtm_addrs; 1551 rts_queue_input(mp, NULL, AF_INET); 1552 } 1553 1554 /* 1555 * This routine is called to generate a message to the routing 1556 * socket indicating that the status of a network interface has changed. 1557 * Message type generated RTM_IFINFO. 1558 */ 1559 void 1560 ip_rts_ifmsg(ipif_t *ipif) 1561 { 1562 if_msghdr_t *ifm; 1563 mblk_t *mp; 1564 sa_family_t af; 1565 1566 /* 1567 * This message should be generated only 1568 * when the physical device is changing 1569 * state. 1570 */ 1571 if (ipif->ipif_id != 0) 1572 return; 1573 if (ipif->ipif_isv6) { 1574 af = AF_INET6; 1575 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af); 1576 if (mp == NULL) 1577 return; 1578 rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros, 1579 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, 1580 &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp); 1581 } else { 1582 af = AF_INET; 1583 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af); 1584 if (mp == NULL) 1585 return; 1586 rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp); 1587 } 1588 ifm = (if_msghdr_t *)mp->b_rptr; 1589 ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; 1590 ifm->ifm_flags = ipif->ipif_flags | ipif->ipif_ill->ill_flags | 1591 ipif->ipif_ill->ill_phyint->phyint_flags; 1592 rts_getifdata(&ifm->ifm_data, ipif); 1593 ifm->ifm_addrs = RTA_IFP; 1594 rts_queue_input(mp, NULL, af); 1595 } 1596 1597 /* 1598 * This is called to generate messages to the routing socket 1599 * indicating a network interface has had addresses associated with it. 1600 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>. 1601 */ 1602 void 1603 ip_rts_newaddrmsg(int cmd, int error, ipif_t *ipif) 1604 { 1605 int pass; 1606 int ncmd; 1607 int rtm_addrs; 1608 mblk_t *mp; 1609 ifa_msghdr_t *ifam; 1610 rt_msghdr_t *rtm; 1611 sa_family_t af; 1612 1613 if (ipif->ipif_isv6) 1614 af = AF_INET6; 1615 else 1616 af = AF_INET; 1617 /* 1618 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR. 1619 * if the request is ADD, send RTM_NEWADDR and RTM_ADD. 1620 */ 1621 for (pass = 1; pass < 3; pass++) { 1622 if ((cmd == RTM_ADD && pass == 1) || 1623 (cmd == RTM_DELETE && pass == 2)) { 1624 ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR); 1625 1626 rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD); 1627 mp = rts_alloc_msg(ncmd, rtm_addrs, af); 1628 if (mp == NULL) 1629 continue; 1630 switch (af) { 1631 case AF_INET: 1632 rts_fill_msg(ncmd, rtm_addrs, 0, 1633 ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr, 1634 ipif->ipif_pp_dst_addr, 0, NULL, mp); 1635 break; 1636 case AF_INET6: 1637 rts_fill_msg_v6(ncmd, rtm_addrs, 1638 &ipv6_all_zeros, &ipif->ipif_v6net_mask, 1639 &ipv6_all_zeros, &ipif->ipif_v6lcl_addr, 1640 &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, 1641 NULL, mp); 1642 break; 1643 } 1644 ifam = (ifa_msghdr_t *)mp->b_rptr; 1645 ifam->ifam_index = 1646 ipif->ipif_ill->ill_phyint->phyint_ifindex; 1647 ifam->ifam_metric = ipif->ipif_metric; 1648 ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0); 1649 ifam->ifam_addrs = rtm_addrs; 1650 rts_queue_input(mp, NULL, af); 1651 } 1652 if ((cmd == RTM_ADD && pass == 2) || 1653 (cmd == RTM_DELETE && pass == 1)) { 1654 rtm_addrs = (RTA_DST | RTA_NETMASK); 1655 mp = rts_alloc_msg(cmd, rtm_addrs, af); 1656 if (mp == NULL) 1657 continue; 1658 switch (af) { 1659 case AF_INET: 1660 rts_fill_msg(cmd, rtm_addrs, 1661 ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0, 1662 0, 0, 0, NULL, mp); 1663 break; 1664 case AF_INET6: 1665 rts_fill_msg_v6(cmd, rtm_addrs, 1666 &ipif->ipif_v6lcl_addr, 1667 &ipif->ipif_v6net_mask, &ipv6_all_zeros, 1668 &ipv6_all_zeros, &ipv6_all_zeros, 1669 &ipv6_all_zeros, NULL, mp); 1670 break; 1671 } 1672 rtm = (rt_msghdr_t *)mp->b_rptr; 1673 rtm->rtm_index = 1674 ipif->ipif_ill->ill_phyint->phyint_ifindex; 1675 rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0); 1676 rtm->rtm_errno = error; 1677 if (error == 0) 1678 rtm->rtm_flags |= RTF_DONE; 1679 rtm->rtm_addrs = rtm_addrs; 1680 rts_queue_input(mp, NULL, af); 1681 } 1682 } 1683 } 1684 1685 /* 1686 * Based on the address family specified in a sockaddr, copy the address field 1687 * into an in6_addr_t. 1688 * 1689 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for 1690 * compatibility with programs that leave the family cleared in the sockaddr. 1691 * Callers of rts_copyfromsockaddr should check the family themselves if they 1692 * wish to verify its value. 1693 * 1694 * In the case of AF_INET6, a check is made to ensure that address is not an 1695 * IPv4-mapped address. 1696 */ 1697 size_t 1698 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp) 1699 { 1700 switch (sa->sa_family) { 1701 case AF_INET: 1702 case AF_UNSPEC: 1703 IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp); 1704 return (sizeof (sin_t)); 1705 case AF_INET6: 1706 *addrp = ((sin6_t *)sa)->sin6_addr; 1707 if (IN6_IS_ADDR_V4MAPPED(addrp)) 1708 return (0); 1709 return (sizeof (sin6_t)); 1710 default: 1711 return (0); 1712 } 1713 } 1714