/* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 */ /* * This file contains routines that processes routing socket requests. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \ (rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type)) static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp); static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, const tsol_gc_t *); static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp, sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error); static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif); static int rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics); static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af); static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics); static ire_t *ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp); static ire_t *ire_lookup_v6(const in6_addr_t *dst_addr_v6, const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp); /* * Send `mp' to all eligible routing queues. A queue is ineligible if: * * 1. SO_USELOOPBACK is off and it is not the originating queue. * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'. * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'. * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC. */ void rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, ip_stack_t *ipst) { mblk_t *mp1; conn_t *connp, *next_connp; /* * Since we don't have an ill_t here, RTSQ_DEFAULT must already be * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point. */ ASSERT(!(flags & RTSQ_DEFAULT)); mutex_enter(&ipst->ips_rts_clients->connf_lock); connp = ipst->ips_rts_clients->connf_head; for (; connp != NULL; connp = next_connp) { next_connp = connp->conn_next; /* * If there was a family specified when this routing socket was * created and it doesn't match the family of the message to * copy, then continue. */ if ((connp->conn_proto != AF_UNSPEC) && (connp->conn_proto != af)) continue; /* * Queue the message only if the conn_t and flags match. */ if (connp->conn_rtaware & RTAW_UNDER_IPMP) { if (!(flags & RTSQ_UNDER_IPMP)) continue; } else { if (!(flags & RTSQ_NORMAL)) continue; } /* * For the originating queue, we only copy the message upstream * if loopback is set. For others reading on the routing * socket, we check if there is room upstream for a copy of the * message. */ if ((o_connp == connp) && connp->conn_useloopback == 0) { connp = connp->conn_next; continue; } CONN_INC_REF(connp); mutex_exit(&ipst->ips_rts_clients->connf_lock); /* Pass to rts_input */ if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld : canputnext(connp->conn_rq)) { mp1 = dupmsg(mp); if (mp1 == NULL) mp1 = copymsg(mp); /* Note that we pass a NULL ira to rts_input */ if (mp1 != NULL) (connp->conn_recv)(connp, mp1, NULL, NULL); } mutex_enter(&ipst->ips_rts_clients->connf_lock); /* reload next_connp since conn_next may have changed */ next_connp = connp->conn_next; CONN_DEC_REF(connp); } mutex_exit(&ipst->ips_rts_clients->connf_lock); freemsg(mp); } /* * Takes an ire and sends an ack to all the routing sockets. This * routine is used * - when a route is created/deleted through the ioctl interface. * - when a stale redirect is deleted */ void ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) { mblk_t *mp; rt_msghdr_t *rtm; int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY); sa_family_t af; in6_addr_t gw_addr_v6; if (ire == NULL) return; ASSERT(ire->ire_ipversion == IPV4_VERSION || ire->ire_ipversion == IPV6_VERSION); ASSERT(!(ire->ire_type & IRE_IF_CLONE)); if (ire->ire_flags & RTF_SETSRC) rtm_addrs |= RTA_SRC; switch (ire->ire_ipversion) { case IPV4_VERSION: af = AF_INET; mp = rts_alloc_msg(type, rtm_addrs, af, 0); if (mp == NULL) return; rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask, ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL, mp, NULL); break; case IPV6_VERSION: af = AF_INET6; mp = rts_alloc_msg(type, rtm_addrs, af, 0); if (mp == NULL) return; mutex_enter(&ire->ire_lock); gw_addr_v6 = ire->ire_gateway_addr_v6; mutex_exit(&ire->ire_lock); rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6, &ire->ire_mask_v6, &gw_addr_v6, &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, NULL, mp, NULL); break; } rtm = (rt_msghdr_t *)mp->b_rptr; mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen]; rtm->rtm_addrs = rtm_addrs; rtm->rtm_flags = ire->ire_flags; if (error != 0) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst); } /* * This is a call from the RTS module * indicating that this is a Routing Socket * Stream. Insert this conn_t in routing * socket client list. */ void ip_rts_register(conn_t *connp) { ip_stack_t *ipst = connp->conn_netstack->netstack_ip; connp->conn_useloopback = 1; ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); } /* * This is a call from the RTS module indicating that it is closing. */ void ip_rts_unregister(conn_t *connp) { ipcl_hash_remove(connp); } /* * Processes requests received on a routing socket. It extracts all the * arguments and calls the appropriate function to process the request. * * RTA_SRC bit flag requests are sent by 'route -setsrc'. * * In general, this function does not consume the message supplied but rather * sends the message upstream with an appropriate UNIX errno. */ int ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr) { rt_msghdr_t *rtm = NULL; in6_addr_t dst_addr_v6; in6_addr_t src_addr_v6; in6_addr_t gw_addr_v6; in6_addr_t net_mask_v6; in6_addr_t author_v6; in6_addr_t if_addr_v6; mblk_t *mp1; ire_t *ire = NULL; ire_t *ifire = NULL; ipaddr_t v4setsrc; in6_addr_t v6setsrc = ipv6_all_zeros; tsol_ire_gw_secattr_t *gwattr = NULL; int error = 0; int match_flags = MATCH_IRE_DSTONLY; int match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW; int found_addrs; sa_family_t af; ipaddr_t dst_addr; ipaddr_t gw_addr; ipaddr_t src_addr; ipaddr_t net_mask; ushort_t index; boolean_t gcgrp_xtraref = B_FALSE; tsol_gcgrp_addr_t ga; tsol_rtsecattr_t rtsecattr; struct rtsa_s *rtsap = NULL; tsol_gcgrp_t *gcgrp = NULL; tsol_gc_t *gc = NULL; ts_label_t *tsl = NULL; zoneid_t zoneid; ip_stack_t *ipst; ill_t *ill = NULL; zoneid = connp->conn_zoneid; ipst = connp->conn_netstack->netstack_ip; if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { freemsg(mp); error = EINVAL; goto done; } if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { freemsg(mp); error = EINVAL; goto done; } /* * Check the routing message for basic consistency including the * version number and that the number of octets written is the same * as specified by the rtm_msglen field. * * At this point, an error can be delivered back via rtm_errno. */ rtm = (rt_msghdr_t *)mp->b_rptr; if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) { error = EINVAL; goto done; } if (rtm->rtm_version != RTM_VERSION) { error = EPROTONOSUPPORT; goto done; } /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */ if (rtm->rtm_type != RTM_GET && rtm->rtm_type != RTM_RESOLVE && (ioc_cr == NULL || secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) { error = EPERM; goto done; } found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6, &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr, &error); if (error != 0) goto done; if ((found_addrs & RTA_DST) == 0) { error = EINVAL; goto done; } /* * Based on the address family of the destination address, determine * the destination, gateway and netmask and return the appropriate error * if an unknown address family was specified (following the errno * values that 4.4BSD-Lite2 returns.) */ switch (af) { case AF_INET: IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr); IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr); IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr); if (((found_addrs & RTA_NETMASK) == 0) || (rtm->rtm_flags & RTF_HOST)) net_mask = IP_HOST_MASK; else IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask); break; case AF_INET6: if (((found_addrs & RTA_NETMASK) == 0) || (rtm->rtm_flags & RTF_HOST)) net_mask_v6 = ipv6_all_ones; break; default: /* * These errno values are meant to be compatible with * 4.4BSD-Lite2 for the given message types. */ switch (rtm->rtm_type) { case RTM_ADD: case RTM_DELETE: error = ESRCH; goto done; case RTM_GET: case RTM_CHANGE: error = EAFNOSUPPORT; goto done; default: error = EOPNOTSUPP; goto done; } } /* * At this point, the address family must be something known. */ ASSERT(af == AF_INET || af == AF_INET6); /* Handle RTA_IFP */ if (index != 0) { ipif_t *ipif; lookup: ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst); if (ill == NULL) { error = EINVAL; goto done; } /* * Since all interfaces in an IPMP group must be equivalent, * we prevent changes to a specific underlying interface's * routing configuration. However, for backward compatibility, * we intepret a request to add a route on an underlying * interface as a request to add a route on its IPMP interface. */ if (IS_UNDER_IPMP(ill)) { switch (rtm->rtm_type) { case RTM_CHANGE: case RTM_DELETE: error = EINVAL; goto done; case RTM_ADD: index = ipmp_ill_get_ipmp_ifindex(ill); ill_refrele(ill); if (index == 0) { ill = NULL; /* already refrele'd */ error = EINVAL; goto done; } goto lookup; } } match_flags |= MATCH_IRE_ILL; /* * This provides the same zoneid as in Solaris 10 * that -ifp picks the zoneid from the first ipif on the ill. * But it might not be useful since the first ipif will always * have the same zoneid as the ill. */ ipif = ipif_get_next_ipif(NULL, ill); if (ipif != NULL) { zoneid = ipif->ipif_zoneid; ipif_refrele(ipif); } } /* * If a netmask was supplied in the message, then subsequent route * lookups will attempt to match on the netmask as well. */ if ((found_addrs & RTA_NETMASK) != 0) match_flags |= MATCH_IRE_MASK; /* * We only process any passed-in route security attributes for * either RTM_ADD or RTM_CHANGE message; We overload them * to do an RTM_GET as a different label; ignore otherwise. */ if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE || rtm->rtm_type == RTM_GET) { ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); if (rtsecattr.rtsa_cnt > 0) rtsap = &rtsecattr.rtsa_attr[0]; } switch (rtm->rtm_type) { case RTM_ADD: /* if we are adding a route, gateway is a must */ if ((found_addrs & RTA_GATEWAY) == 0) { error = EINVAL; goto done; } /* Multirouting does not support net routes. */ if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) == RTF_MULTIRT) { error = EADDRNOTAVAIL; goto done; } /* * Multirouting and user-specified source addresses * do not support interface based routing. * Assigning a source address to an interface based * route is achievable by plumbing a new ipif and * setting up the interface route via this ipif, * though. */ if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) { if ((rtm->rtm_flags & RTF_GATEWAY) == 0) { error = EADDRNOTAVAIL; goto done; } } switch (af) { case AF_INET: if (src_addr != INADDR_ANY) { uint_t type; /* * The RTF_SETSRC flag is present, check that * the supplied src address is not the loopback * address. This would produce martian packets. */ if (src_addr == htonl(INADDR_LOOPBACK)) { error = EINVAL; goto done; } /* * Also check that the supplied address is a * valid, local one. Only allow IFF_UP ones */ type = ip_type_v4(src_addr, ipst); if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { error = EADDRNOTAVAIL; goto done; } } else { /* * The RTF_SETSRC modifier must be associated * to a non-null source address. */ if (rtm->rtm_flags & RTF_SETSRC) { error = EINVAL; goto done; } } error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr, rtm->rtm_flags, ill, &ire, B_FALSE, rtsap, ipst, zoneid); if (ill != NULL) ASSERT(!MUTEX_HELD(&ill->ill_lock)); break; case AF_INET6: if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) { uint_t type; /* * The RTF_SETSRC flag is present, check that * the supplied src address is not the loopback * address. This would produce martian packets. */ if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) { error = EINVAL; goto done; } /* * Also check that the supplied address is a * valid, local one. Only allow UP ones. */ type = ip_type_v6(&src_addr_v6, ipst); if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { error = EADDRNOTAVAIL; goto done; } error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, &src_addr_v6, rtm->rtm_flags, ill, &ire, rtsap, ipst, zoneid); break; } /* * The RTF_SETSRC modifier must be associated * to a non-null source address. */ if (rtm->rtm_flags & RTF_SETSRC) { error = EINVAL; goto done; } error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, NULL, rtm->rtm_flags, ill, &ire, rtsap, ipst, zoneid); if (ill != NULL) ASSERT(!MUTEX_HELD(&ill->ill_lock)); break; } if (error != 0) goto done; ASSERT(ire != NULL); rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); break; case RTM_DELETE: /* if we are deleting a route, gateway is a must */ if ((found_addrs & RTA_GATEWAY) == 0) { error = EINVAL; goto done; } /* * The RTF_SETSRC modifier does not make sense * when deleting a route. */ if (rtm->rtm_flags & RTF_SETSRC) { error = EINVAL; goto done; } switch (af) { case AF_INET: error = ip_rt_delete(dst_addr, net_mask, gw_addr, found_addrs, rtm->rtm_flags, ill, B_FALSE, ipst, zoneid); break; case AF_INET6: error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, found_addrs, rtm->rtm_flags, ill, ipst, zoneid); break; } break; case RTM_GET: case RTM_CHANGE: /* * In the case of RTM_GET, the forwarding table should be * searched recursively. Also, if a gateway was * specified then the gateway address must also be matched. * * In the case of RTM_CHANGE, the gateway address (if supplied) * is the new gateway address so matching on the gateway address * is not done. This can lead to ambiguity when looking up the * route to change as usually only the destination (and netmask, * if supplied) is used for the lookup. However if a RTA_IFP * sockaddr is also supplied, it can disambiguate which route to * change provided the ambigous routes are tied to distinct * ill's (or interface indices). If the routes are not tied to * any particular interfaces (for example, with traditional * gateway routes), then a RTA_IFP sockaddr will be of no use as * it won't match any such routes. * RTA_SRC is not supported for RTM_GET and RTM_CHANGE, * except when RTM_CHANGE is combined to RTF_SETSRC. */ if (((found_addrs & RTA_SRC) != 0) && ((rtm->rtm_type == RTM_GET) || !(rtm->rtm_flags & RTF_SETSRC))) { error = EOPNOTSUPP; goto done; } if (rtm->rtm_type == RTM_GET) { match_flags |= MATCH_IRE_SECATTR; match_flags_local |= MATCH_IRE_SECATTR; if ((found_addrs & RTA_GATEWAY) != 0) match_flags |= MATCH_IRE_GW; if (ioc_cr) tsl = crgetlabel(ioc_cr); if (rtsap != NULL) { if (rtsa_validate(rtsap) != 0) { error = EINVAL; goto done; } if (tsl != NULL && crgetzoneid(ioc_cr) != GLOBAL_ZONEID && (tsl->tsl_doi != rtsap->rtsa_doi || !bldominates(&tsl->tsl_label, &rtsap->rtsa_slrange.lower_bound))) { error = EPERM; goto done; } tsl = labelalloc( &rtsap->rtsa_slrange.lower_bound, rtsap->rtsa_doi, KM_NOSLEEP); } } if (rtm->rtm_type == RTM_CHANGE) { if ((found_addrs & RTA_GATEWAY) && (rtm->rtm_flags & RTF_SETSRC)) { /* * Do not want to change the gateway, * but rather the source address. */ match_flags |= MATCH_IRE_GW; } } /* * If the netmask is all ones (either as supplied or as derived * above), then first check for an IRE_LOOPBACK or * IRE_LOCAL entry. * * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL * entry, then look for any other type of IRE. */ switch (af) { case AF_INET: if (net_mask == IP_HOST_MASK) { ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr, IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid, tsl, match_flags_local, 0, ipst, NULL); } if (ire == NULL) { ire = ire_lookup_v4(dst_addr, net_mask, gw_addr, ill, zoneid, tsl, match_flags, ipst, &ifire, &v4setsrc, &gwattr); IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc); } break; case AF_INET6: if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) { ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL, &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid, tsl, match_flags_local, 0, ipst, NULL); } if (ire == NULL) { ire = ire_lookup_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, ill, zoneid, tsl, match_flags, ipst, &ifire, &v6setsrc, &gwattr); } break; } if (tsl != NULL && tsl != crgetlabel(ioc_cr)) label_rele(tsl); if (ire == NULL) { error = ESRCH; goto done; } /* * Want to return failure if we get an IRE_NOROUTE from * ire_route_recursive */ if (ire->ire_type & IRE_NOROUTE) { ire_refrele(ire); ire = NULL; error = ESRCH; goto done; } /* we know the IRE before we come here */ switch (rtm->rtm_type) { case RTM_GET: mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af); if (mp1 == NULL) { error = ENOBUFS; goto done; } freemsg(mp); mp = mp1; rtm = (rt_msghdr_t *)mp->b_rptr; break; case RTM_CHANGE: /* * Do not allow to the multirouting state of a route * to be changed. This aims to prevent undesirable * stages where both multirt and non-multirt routes * for the same destination are declared. */ if ((ire->ire_flags & RTF_MULTIRT) != (rtm->rtm_flags & RTF_MULTIRT)) { error = EINVAL; goto done; } /* * Note that we do not need to do * ire_flush_cache_*(IRE_FLUSH_ADD) as a change * in metrics or gateway will not affect existing * routes since it does not create a more specific * route. */ switch (af) { case AF_INET: if ((found_addrs & RTA_GATEWAY) != 0 && (ire->ire_gateway_addr != gw_addr)) { ire->ire_gateway_addr = gw_addr; } if (rtsap != NULL) { ga.ga_af = AF_INET; IN6_IPADDR_TO_V4MAPPED( ire->ire_gateway_addr, &ga.ga_addr); gcgrp = gcgrp_lookup(&ga, B_TRUE); if (gcgrp == NULL) { error = ENOMEM; goto done; } } if ((found_addrs & RTA_SRC) != 0 && (rtm->rtm_flags & RTF_SETSRC) != 0 && (ire->ire_setsrc_addr != src_addr)) { if (src_addr != INADDR_ANY) { uint_t type; /* * The RTF_SETSRC flag is * present, check that the * supplied src address is not * the loopback address. This * would produce martian * packets. */ if (src_addr == htonl(INADDR_LOOPBACK)) { error = EINVAL; goto done; } /* * Also check that the * supplied addr is a valid * local address. */ type = ip_type_v4(src_addr, ipst); if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { error = EADDRNOTAVAIL; goto done; } ire->ire_flags |= RTF_SETSRC; ire->ire_setsrc_addr = src_addr; } else { ire->ire_flags &= ~RTF_SETSRC; ire->ire_setsrc_addr = INADDR_ANY; } /* * Let conn_ixa caching know that * source address selection changed */ ip_update_source_selection(ipst); } ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE); break; case AF_INET6: mutex_enter(&ire->ire_lock); if ((found_addrs & RTA_GATEWAY) != 0 && !IN6_ARE_ADDR_EQUAL( &ire->ire_gateway_addr_v6, &gw_addr_v6)) { ire->ire_gateway_addr_v6 = gw_addr_v6; } mutex_exit(&ire->ire_lock); if (rtsap != NULL) { ga.ga_af = AF_INET6; mutex_enter(&ire->ire_lock); ga.ga_addr = ire->ire_gateway_addr_v6; mutex_exit(&ire->ire_lock); gcgrp = gcgrp_lookup(&ga, B_TRUE); if (gcgrp == NULL) { error = ENOMEM; goto done; } } if ((found_addrs & RTA_SRC) != 0 && (rtm->rtm_flags & RTF_SETSRC) != 0 && !IN6_ARE_ADDR_EQUAL( &ire->ire_setsrc_addr_v6, &src_addr_v6)) { if (!IN6_IS_ADDR_UNSPECIFIED( &src_addr_v6)) { uint_t type; /* * The RTF_SETSRC flag is * present, check that the * supplied src address is not * the loopback address. This * would produce martian * packets. */ if (IN6_IS_ADDR_LOOPBACK( &src_addr_v6)) { error = EINVAL; goto done; } /* * Also check that the * supplied addr is a valid * local address. */ type = ip_type_v6(&src_addr_v6, ipst); if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { error = EADDRNOTAVAIL; goto done; } mutex_enter(&ire->ire_lock); ire->ire_flags |= RTF_SETSRC; ire->ire_setsrc_addr_v6 = src_addr_v6; mutex_exit(&ire->ire_lock); } else { mutex_enter(&ire->ire_lock); ire->ire_flags &= ~RTF_SETSRC; ire->ire_setsrc_addr_v6 = ipv6_all_zeros; mutex_exit(&ire->ire_lock); } /* * Let conn_ixa caching know that * source address selection changed */ ip_update_source_selection(ipst); } ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE); break; } if (rtsap != NULL) { ASSERT(gcgrp != NULL); /* * Create and add the security attribute to * prefix IRE; it will add a reference to the * group upon allocating a new entry. If it * finds an already-existing entry for the * security attribute, it simply returns it * and no new group reference is made. */ gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref); if (gc == NULL || (error = tsol_ire_init_gwattr(ire, ire->ire_ipversion, gc)) != 0) { if (gc != NULL) { GC_REFRELE(gc); } else { /* gc_create failed */ error = ENOMEM; } goto done; } } rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); break; } break; default: error = EOPNOTSUPP; break; } done: if (ire != NULL) ire_refrele(ire); if (ifire != NULL) ire_refrele(ifire); if (ill != NULL) ill_refrele(ill); if (gcgrp_xtraref) GCGRP_REFRELE(gcgrp); if (rtm != NULL) { ASSERT(mp->b_wptr <= mp->b_datap->db_lim); if (error != 0) { rtm->rtm_errno = error; /* Send error ACK */ ip1dbg(("ip_rts_request: error %d\n", error)); } else { rtm->rtm_flags |= RTF_DONE; /* OK ACK already set up by caller except this */ ip2dbg(("ip_rts_request: OK ACK\n")); } rts_queue_input(mp, connp, af, RTSQ_ALL, ipst); } return (error); } /* * Helper function that can do recursive lookups including when * MATCH_IRE_GW and/or MATCH_IRE_MASK is set. */ static ire_t * ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp) { ire_t *ire; ire_t *ifire = NULL; uint_t ire_type; *pifire = NULL; *v4setsrcp = INADDR_ANY; *gwattrp = NULL; /* Skip IRE_IF_CLONE */ match_flags |= MATCH_IRE_TYPE; ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; /* * ire_route_recursive can't match gateway or mask thus if they are * set we have to do two steps of lookups */ if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr, ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL); if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) return (ire); if (ire->ire_type & IRE_ONLINK) return (ire); if (ire->ire_flags & RTF_SETSRC) { ASSERT(ire->ire_setsrc_addr != INADDR_ANY); *v4setsrcp = ire->ire_setsrc_addr; v4setsrcp = NULL; } /* The first ire_gw_secattr is passed back */ if (ire->ire_gw_secattr != NULL) { *gwattrp = ire->ire_gw_secattr; gwattrp = NULL; } /* Look for an interface ire recursively based on the gateway */ dst_addr = ire->ire_gateway_addr; match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); /* * Don't allow anything unusual past the first iteration. * After the first lookup, we should no longer look for * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT * routes. * * In addition, after we have found a direct IRE_OFFLINK, * we should only look for interface or clone routes. */ match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */ if ((ire->ire_type & IRE_OFFLINK) && !(ire->ire_flags & RTF_INDIRECT)) { ire_type = IRE_IF_ALL; } else { /* * no more local, loopback, broadcast routes */ if (!(match_flags & MATCH_IRE_TYPE)) ire_type = (IRE_OFFLINK|IRE_ONLINK); ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST); } match_flags |= MATCH_IRE_TYPE; ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, gwattrp, NULL); } else { ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp, gwattrp, NULL); } *pifire = ifire; return (ire); } static ire_t * ire_lookup_v6(const in6_addr_t *dst_addr_v6, const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp) { ire_t *ire; ire_t *ifire = NULL; uint_t ire_type; *pifire = NULL; *v6setsrcp = ipv6_all_zeros; *gwattrp = NULL; /* Skip IRE_IF_CLONE */ match_flags |= MATCH_IRE_TYPE; ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; /* * ire_route_recursive can't match gateway or mask thus if they are * set we have to do two steps of lookups */ if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { in6_addr_t dst; ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6, gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL); if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) return (ire); if (ire->ire_type & IRE_ONLINK) return (ire); if (ire->ire_flags & RTF_SETSRC) { ASSERT(!IN6_IS_ADDR_UNSPECIFIED( &ire->ire_setsrc_addr_v6)); *v6setsrcp = ire->ire_setsrc_addr_v6; v6setsrcp = NULL; } /* The first ire_gw_secattr is passed back */ if (ire->ire_gw_secattr != NULL) { *gwattrp = ire->ire_gw_secattr; gwattrp = NULL; } mutex_enter(&ire->ire_lock); dst = ire->ire_gateway_addr_v6; mutex_exit(&ire->ire_lock); match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); /* * Don't allow anything unusual past the first iteration. * After the first lookup, we should no longer look for * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT * routes. * * In addition, after we have found a direct IRE_OFFLINK, * we should only look for interface or clone routes. */ match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */ if ((ire->ire_type & IRE_OFFLINK) && !(ire->ire_flags & RTF_INDIRECT)) { ire_type = IRE_IF_ALL; } else { /* * no more local, loopback routes */ if (!(match_flags & MATCH_IRE_TYPE)) ire_type = (IRE_OFFLINK|IRE_ONLINK); ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK); } match_flags |= MATCH_IRE_TYPE; ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, gwattrp, NULL); } else { ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid, tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, gwattrp, NULL); } *pifire = ifire; return (ire); } /* * Handle IP_IOC_RTS_REQUEST ioctls */ int ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) { conn_t *connp = Q_TO_CONN(q); IOCP iocp = (IOCP)mp->b_rptr; mblk_t *mp1, *ioc_mp = mp; int error = 0; ip_stack_t *ipst; ipst = connp->conn_netstack->netstack_ip; ASSERT(mp->b_cont != NULL); /* ioc_mp holds mp */ mp = mp->b_cont; /* * The Routing Socket data starts on * next block. If there is no next block * this is an indication from routing module * that it is a routing socket stream queue. * We need to support that for compatibility with SDP since * it has a contract private interface to use IP_IOC_RTS_REQUEST. * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this. */ if (mp->b_cont == NULL) { /* * This is a message from SDP * indicating that this is a Routing Socket * Stream. Insert this conn_t in routing * socket client list. */ connp->conn_useloopback = 1; ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); goto done; } mp1 = dupmsg(mp->b_cont); if (mp1 == NULL) { error = ENOBUFS; goto done; } mp = mp1; error = ip_rts_request_common(mp, connp, ioc_cr); done: iocp->ioc_error = error; ioc_mp->b_datap->db_type = M_IOCACK; if (iocp->ioc_error != 0) iocp->ioc_count = 0; /* Note that we pass a NULL ira to rts_input */ (connp->conn_recv)(connp, ioc_mp, NULL, NULL); /* conn was refheld in ip_wput_ioctl. */ CONN_OPER_PENDING_DONE(connp); return (error); } /* * Build a reply to the RTM_GET request contained in the given message block * using the retrieved IRE of the destination address, the parent IRE (if it * exists) and the address family. * * Returns a pointer to a message block containing the reply if successful, * otherwise NULL is returned. */ static mblk_t * rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af) { rt_msghdr_t *rtm; rt_msghdr_t *new_rtm; mblk_t *new_mp; int rtm_addrs; int rtm_flags; tsol_gc_t *gc = NULL; tsol_gcgrp_t *gcgrp = NULL; ill_t *ill; ipif_t *ipif = NULL; ipaddr_t brdaddr; /* IFF_POINTOPOINT destination */ ipaddr_t ifaddr; in6_addr_t brdaddr6; /* IFF_POINTOPOINT destination */ in6_addr_t ifaddr6; ipaddr_t v4setsrc; rtm = (rt_msghdr_t *)mp->b_rptr; /* * Find the ill used to send packets. This will be NULL in case * of a reject or blackhole. */ if (ifire != NULL) ill = ire_nexthop_ill(ifire); else ill = ire_nexthop_ill(ire); if (attrp != NULL) { mutex_enter(&attrp->igsa_lock); if ((gc = attrp->igsa_gc) != NULL) { gcgrp = gc->gc_grp; ASSERT(gcgrp != NULL); rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); } mutex_exit(&attrp->igsa_lock); } /* * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK. * * The 4.4BSD-Lite2 code (net/rtsock.c) returns both * RTA_IFP and RTA_IFA if either is defined, and also * returns RTA_BRD if the appropriate interface is * point-to-point. */ rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK); if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) { rtm_addrs |= (RTA_IFP | RTA_IFA); /* * We associate an IRE with an ILL, hence we don't exactly * know what might make sense for RTA_IFA and RTA_BRD. We * pick the first ipif on the ill. */ ipif = ipif_get_next_ipif(NULL, ill); if (ipif != NULL) { if (ipif->ipif_isv6) ifaddr6 = ipif->ipif_v6lcl_addr; else ifaddr = ipif->ipif_lcl_addr; if (ipif->ipif_flags & IPIF_POINTOPOINT) { rtm_addrs |= RTA_BRD; if (ipif->ipif_isv6) brdaddr6 = ipif->ipif_v6pp_dst_addr; else brdaddr = ipif->ipif_pp_dst_addr; } ipif_refrele(ipif); } } new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0); if (new_mp == NULL) { if (gcgrp != NULL) rw_exit(&gcgrp->gcgrp_rwlock); if (ill != NULL) ill_refrele(ill); return (NULL); } /* * We set the destination address, gateway address, * netmask and flags in the RTM_GET response depending * on whether we found a parent IRE or not. * In particular, if we did find a parent IRE during the * recursive search, use that IRE's gateway address. * Otherwise, we use the IRE's source address for the * gateway address. */ ASSERT(af == AF_INET || af == AF_INET6); switch (af) { case AF_INET: IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc); if (v4setsrc != INADDR_ANY) rtm_addrs |= RTA_SRC; rtm_flags = ire->ire_flags; rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr, ire->ire_mask, ire->ire_gateway_addr, v4setsrc, brdaddr, 0, ifaddr, ill, new_mp, gc); break; case AF_INET6: if (!IN6_IS_ADDR_UNSPECIFIED(setsrc)) rtm_addrs |= RTA_SRC; rtm_flags = ire->ire_flags; rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6, &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, setsrc, &brdaddr6, &ipv6_all_zeros, &ifaddr6, ill, new_mp, gc); break; } if (gcgrp != NULL) rw_exit(&gcgrp->gcgrp_rwlock); new_rtm = (rt_msghdr_t *)new_mp->b_rptr; /* * The rtm_msglen, rtm_version and rtm_type fields in * RTM_GET response are filled in by rts_fill_msg. * * rtm_addrs and rtm_flags are filled in based on what * was requested and the state of the IREs looked up * above. * * rtm_inits and rtm_rmx are filled in with metrics * based on whether a parent IRE was found or not. * * TODO: rtm_index and rtm_use should probably be * filled in with something resonable here and not just * copied from the request. */ new_rtm->rtm_index = rtm->rtm_index; new_rtm->rtm_pid = rtm->rtm_pid; new_rtm->rtm_seq = rtm->rtm_seq; new_rtm->rtm_use = rtm->rtm_use; new_rtm->rtm_addrs = rtm_addrs; new_rtm->rtm_flags = rtm_flags; new_rtm->rtm_inits = rts_getmetrics(ire, ill, &new_rtm->rtm_rmx); if (ill != NULL) ill_refrele(ill); return (new_mp); } /* * Fill the given if_data_t with interface statistics. */ static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif) { if_data->ifi_type = ipif->ipif_ill->ill_type; /* ethernet, tokenring, etc */ if_data->ifi_addrlen = 0; /* media address length */ if_data->ifi_hdrlen = 0; /* media header length */ if_data->ifi_mtu = ipif->ipif_ill->ill_mtu; /* mtu */ /* metric (external only) */ if_data->ifi_metric = ipif->ipif_ill->ill_metric; if_data->ifi_baudrate = 0; /* linespeed */ if_data->ifi_ipackets = 0; /* packets received on if */ if_data->ifi_ierrors = 0; /* input errors on interface */ if_data->ifi_opackets = 0; /* packets sent on interface */ if_data->ifi_oerrors = 0; /* output errors on if */ if_data->ifi_collisions = 0; /* collisions on csma if */ if_data->ifi_ibytes = 0; /* total number received */ if_data->ifi_obytes = 0; /* total number sent */ if_data->ifi_imcasts = 0; /* multicast packets received */ if_data->ifi_omcasts = 0; /* multicast packets sent */ if_data->ifi_iqdrops = 0; /* dropped on input */ if_data->ifi_noproto = 0; /* destined for unsupported */ /* protocol. */ } /* * Set the metrics on a forwarding table route. */ static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) { clock_t rtt; clock_t rtt_sd; ill_t *ill; ifrt_t *ifrt; mblk_t *mp; in6_addr_t gw_addr_v6; /* Need to add back some metrics to the IRE? */ /* * Bypass obtaining the lock and searching ill_saved_ire_mp in the * common case of no metrics. */ if (which == 0) return; ire->ire_metrics.iulp_set = B_TRUE; /* * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's * says: rmx_rtt and rmx_rttvar are stored as * microseconds. */ if (which & RTV_RTT) rtt = metrics->rmx_rtt / 1000; if (which & RTV_RTTVAR) rtt_sd = metrics->rmx_rttvar / 1000; /* * Update the metrics in the IRE itself. */ mutex_enter(&ire->ire_lock); if (which & RTV_MTU) ire->ire_metrics.iulp_mtu = metrics->rmx_mtu; if (which & RTV_RTT) ire->ire_metrics.iulp_rtt = rtt; if (which & RTV_SSTHRESH) ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh; if (which & RTV_RTTVAR) ire->ire_metrics.iulp_rtt_sd = rtt_sd; if (which & RTV_SPIPE) ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe; if (which & RTV_RPIPE) ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe; mutex_exit(&ire->ire_lock); /* * Search through the ifrt_t chain hanging off the ILL in order to * reflect the metric change there. */ ill = ire->ire_ill; if (ill == NULL) return; ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) || ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION))); if (ill->ill_isv6) { mutex_enter(&ire->ire_lock); gw_addr_v6 = ire->ire_gateway_addr_v6; mutex_exit(&ire->ire_lock); } mutex_enter(&ill->ill_saved_ire_lock); for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) { /* * On a given ill, the tuple of address, gateway, mask, * ire_type and zoneid unique for each saved IRE. */ ifrt = (ifrt_t *)mp->b_rptr; if (ill->ill_isv6) { if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, &ire->ire_addr_v6) || !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, &gw_addr_v6) || !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask, &ire->ire_mask_v6)) continue; } else { if (ifrt->ifrt_addr != ire->ire_addr || ifrt->ifrt_gateway_addr != ire->ire_gateway_addr || ifrt->ifrt_mask != ire->ire_mask) continue; } if (ifrt->ifrt_zoneid != ire->ire_zoneid || ifrt->ifrt_type != ire->ire_type) continue; if (which & RTV_MTU) ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu; if (which & RTV_RTT) ifrt->ifrt_metrics.iulp_rtt = rtt; if (which & RTV_SSTHRESH) { ifrt->ifrt_metrics.iulp_ssthresh = metrics->rmx_ssthresh; } if (which & RTV_RTTVAR) ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar; if (which & RTV_SPIPE) ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe; if (which & RTV_RPIPE) ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe; break; } mutex_exit(&ill->ill_saved_ire_lock); /* * Update any IRE_IF_CLONE hanging created from this IRE_IF so they * get any new iulp_mtu. * We do that by deleting them; ire_create_if_clone will pick * up the new metrics. */ if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0) ire_dep_delete_if_clone(ire); } /* * Get the metrics from a forwarding table route. */ static int rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics) { int metrics_set = 0; bzero(metrics, sizeof (rt_metrics_t)); /* * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's * says: rmx_rtt and rmx_rttvar are stored as * microseconds. */ metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000; metrics_set |= RTV_RTT; if (ire->ire_metrics.iulp_mtu != 0) { metrics->rmx_mtu = ire->ire_metrics.iulp_mtu; metrics_set |= RTV_MTU; } else if (ill != NULL) { metrics->rmx_mtu = ill->ill_mtu; metrics_set |= RTV_MTU; } metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh; metrics_set |= RTV_SSTHRESH; metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000; metrics_set |= RTV_RTTVAR; metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe; metrics_set |= RTV_SPIPE; metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe; metrics_set |= RTV_RPIPE; return (metrics_set); } /* * Given two sets of metrics (src and dst), use the dst values if they are * set. If a dst value is not set but the src value is set, then we use * the src value. * dst is updated with the new values. * This is used to merge information from a dce_t and ire_metrics, where the * dce values takes precedence. */ void rts_merge_metrics(iulp_t *dst, const iulp_t *src) { if (!src->iulp_set) return; if (dst->iulp_ssthresh == 0) dst->iulp_ssthresh = src->iulp_ssthresh; if (dst->iulp_rtt == 0) dst->iulp_rtt = src->iulp_rtt; if (dst->iulp_rtt_sd == 0) dst->iulp_rtt_sd = src->iulp_rtt_sd; if (dst->iulp_spipe == 0) dst->iulp_spipe = src->iulp_spipe; if (dst->iulp_rpipe == 0) dst->iulp_rpipe = src->iulp_rpipe; if (dst->iulp_rtomax == 0) dst->iulp_rtomax = src->iulp_rtomax; if (dst->iulp_sack == 0) dst->iulp_sack = src->iulp_sack; if (dst->iulp_tstamp_ok == 0) dst->iulp_tstamp_ok = src->iulp_tstamp_ok; if (dst->iulp_wscale_ok == 0) dst->iulp_wscale_ok = src->iulp_wscale_ok; if (dst->iulp_ecn_ok == 0) dst->iulp_ecn_ok = src->iulp_ecn_ok; if (dst->iulp_pmtud_ok == 0) dst->iulp_pmtud_ok = src->iulp_pmtud_ok; if (dst->iulp_mtu == 0) dst->iulp_mtu = src->iulp_mtu; } /* * Takes a pointer to a routing message and extracts necessary info by looking * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers * passed (all of which must be valid). * * The bitmask of sockaddrs actually found in the message is returned, or zero * is returned in the case of an error. */ static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp, in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error) { struct sockaddr *sa; int i; int addr_bits; int length; int found_addrs = 0; caddr_t cp; size_t size; struct sockaddr_dl *sdl; *dst_addrp = ipv6_all_zeros; *gw_addrp = ipv6_all_zeros; *net_maskp = ipv6_all_zeros; *authorp = ipv6_all_zeros; *if_addrp = ipv6_all_zeros; *in_src_addrp = ipv6_all_zeros; *indexp = 0; *afp = AF_UNSPEC; rtsecattr->rtsa_cnt = 0; *error = 0; /* * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP, * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them. */ cp = (caddr_t)&rtm[1]; length = rtm->rtm_msglen; for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) { /* * The address family we are working with starts out as * AF_UNSPEC, but is set to the one specified with the * destination address. * * If the "working" address family that has been set to * something other than AF_UNSPEC, then the address family of * subsequent sockaddrs must either be AF_UNSPEC (for * compatibility with older programs) or must be the same as our * "working" one. * * This code assumes that RTA_DST (1) comes first in the loop. */ sa = (struct sockaddr *)cp; addr_bits = (rtm->rtm_addrs & (1 << i)); if (addr_bits == 0) continue; switch (addr_bits) { case RTA_DST: size = rts_copyfromsockaddr(sa, dst_addrp); *afp = sa->sa_family; break; case RTA_GATEWAY: if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) return (0); size = rts_copyfromsockaddr(sa, gw_addrp); break; case RTA_NETMASK: if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) return (0); size = rts_copyfromsockaddr(sa, net_maskp); break; case RTA_IFP: if (sa->sa_family != AF_LINK && sa->sa_family != AF_UNSPEC) return (0); sdl = (struct sockaddr_dl *)cp; *indexp = sdl->sdl_index; size = sizeof (struct sockaddr_dl); break; case RTA_SRC: /* Source address of the incoming packet */ size = rts_copyfromsockaddr(sa, in_src_addrp); *afp = sa->sa_family; break; case RTA_IFA: if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) return (0); size = rts_copyfromsockaddr(sa, if_addrp); break; case RTA_AUTHOR: if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC) return (0); size = rts_copyfromsockaddr(sa, authorp); break; default: return (0); } if (size == 0) return (0); cp += size; found_addrs |= addr_bits; } /* * Parse the routing message and look for any security- * related attributes for the route. For each valid * attribute, allocate/obtain the corresponding kernel * route security attributes. */ if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) { *error = tsol_rtsa_init(rtm, rtsecattr, cp); ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX); } return (found_addrs); } /* * Fills the message with the given info. */ static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, const tsol_gc_t *gc) { rt_msghdr_t *rtm; sin_t *sin; size_t data_size, header_size; uchar_t *cp; int i; ASSERT(mp != NULL); /* * First find the type of the message * and its length. */ header_size = rts_header_msg_size(type); /* * Now find the size of the data * that follows the message header. */ data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0); rtm = (rt_msghdr_t *)mp->b_rptr; mp->b_wptr = &mp->b_rptr[header_size]; cp = mp->b_wptr; bzero(cp, data_size); for (i = 0; i < RTA_NUMBITS; i++) { sin = (sin_t *)cp; switch (rtm_addrs & (1 << i)) { case RTA_DST: sin->sin_addr.s_addr = dst; sin->sin_family = AF_INET; cp += sizeof (sin_t); break; case RTA_GATEWAY: sin->sin_addr.s_addr = gateway; sin->sin_family = AF_INET; cp += sizeof (sin_t); break; case RTA_NETMASK: sin->sin_addr.s_addr = mask; sin->sin_family = AF_INET; cp += sizeof (sin_t); break; case RTA_IFP: cp += ill_dls_info((struct sockaddr_dl *)cp, ill); break; case RTA_IFA: sin->sin_addr.s_addr = ifaddr; sin->sin_family = AF_INET; cp += sizeof (sin_t); break; case RTA_SRC: sin->sin_addr.s_addr = src_addr; sin->sin_family = AF_INET; cp += sizeof (sin_t); break; case RTA_AUTHOR: sin->sin_addr.s_addr = author; sin->sin_family = AF_INET; cp += sizeof (sin_t); break; case RTA_BRD: /* * RTA_BRD is used typically to specify a point-to-point * destination address. */ sin->sin_addr.s_addr = brd_addr; sin->sin_family = AF_INET; cp += sizeof (sin_t); break; } } if (gc != NULL) { rtm_ext_t *rtm_ext; struct rtsa_s *rp_dst; tsol_rtsecattr_t *rsap; ASSERT(gc->gc_grp != NULL); ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock)); rtm_ext = (rtm_ext_t *)cp; rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR; rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1); rsap = (tsol_rtsecattr_t *)(rtm_ext + 1); rsap->rtsa_cnt = 1; rp_dst = rsap->rtsa_attr; ASSERT(gc->gc_db != NULL); bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst)); cp = (uchar_t *)rp_dst; } mp->b_wptr = cp; mp->b_cont = NULL; /* * set the fields that are common to * to different messages. */ rtm->rtm_msglen = (short)(header_size + data_size); rtm->rtm_version = RTM_VERSION; rtm->rtm_type = (uchar_t)type; } /* * Allocates and initializes a routing socket message. * Note that sacnt is either zero or one. */ mblk_t * rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt) { size_t length; mblk_t *mp; length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt); mp = allocb(length, BPRI_MED); if (mp == NULL) return (mp); bzero(mp->b_rptr, length); return (mp); } /* * Returns the size of the routing * socket message header size. */ size_t rts_header_msg_size(int type) { switch (type) { case RTM_DELADDR: case RTM_NEWADDR: case RTM_CHGADDR: case RTM_FREEADDR: return (sizeof (ifa_msghdr_t)); case RTM_IFINFO: return (sizeof (if_msghdr_t)); default: return (sizeof (rt_msghdr_t)); } } /* * Returns the size of the message needed with the given rtm_addrs and family. * * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are * of the same family (currently either AF_INET or AF_INET6). */ size_t rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt) { int i; size_t length = 0; for (i = 0; i < RTA_NUMBITS; i++) { switch (rtm_addrs & (1 << i)) { case RTA_IFP: length += sizeof (struct sockaddr_dl); break; case RTA_DST: case RTA_GATEWAY: case RTA_NETMASK: case RTA_SRC: case RTA_IFA: case RTA_AUTHOR: case RTA_BRD: ASSERT(af == AF_INET || af == AF_INET6); switch (af) { case AF_INET: length += sizeof (sin_t); break; case AF_INET6: length += sizeof (sin6_t); break; } break; } } if (sacnt > 0) length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt); return (length); } /* * This routine is called to generate a message to the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. This routine is called for message types RTM_LOSING, * RTM_REDIRECT, and RTM_MISS. */ void ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs, ip_stack_t *ipst) { rt_msghdr_t *rtm; mblk_t *mp; if (rtm_addrs == 0) return; mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0); if (mp == NULL) return; rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0, author, 0, NULL, mp, NULL); rtm = (rt_msghdr_t *)mp->b_rptr; rtm->rtm_flags = flags; rtm->rtm_errno = error; rtm->rtm_flags |= RTF_DONE; rtm->rtm_addrs = rtm_addrs; rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst); } /* * This routine is called to generate a message to the routing * socket indicating that the status of a network interface has changed. * Message type generated RTM_IFINFO. */ void ip_rts_ifmsg(const ipif_t *ipif, uint_t flags) { ip_rts_xifmsg(ipif, 0, 0, flags); } void ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags) { if_msghdr_t *ifm; mblk_t *mp; sa_family_t af; ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* * This message should be generated only * when the physical device is changing * state. */ if (ipif->ipif_id != 0) return; if (ipif->ipif_isv6) { af = AF_INET6; mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); if (mp == NULL) return; rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, ipif->ipif_ill, mp, NULL); } else { af = AF_INET; mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); if (mp == NULL) return; rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0, ipif->ipif_ill, mp, NULL); } ifm = (if_msghdr_t *)mp->b_rptr; ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags | ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear; rts_getifdata(&ifm->ifm_data, ipif); ifm->ifm_addrs = RTA_IFP; if (flags & RTSQ_DEFAULT) { flags = RTSQ_ALL; /* * If this message is for an underlying interface, prevent * "normal" (IPMP-unaware) routing sockets from seeing it. */ if (IS_UNDER_IPMP(ipif->ipif_ill)) flags &= ~RTSQ_NORMAL; } rts_queue_input(mp, NULL, af, flags, ipst); } /* * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message; * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR) * generate the ifa_msghdr_t message. */ static void rts_new_rtsmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) { int rtm_addrs; mblk_t *mp; ifa_msghdr_t *ifam; rt_msghdr_t *rtm; sa_family_t af; ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* * Do not report unspecified address if this is the RTM_CHGADDR or * RTM_FREEADDR message. */ if (cmd == RTM_CHGADDR || cmd == RTM_FREEADDR) { if (!ipif->ipif_isv6) { if (ipif->ipif_lcl_addr == INADDR_ANY) return; } else if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) { return; } } if (ipif->ipif_isv6) af = AF_INET6; else af = AF_INET; if (cmd == RTM_ADD || cmd == RTM_DELETE) rtm_addrs = (RTA_DST | RTA_NETMASK); else rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP); mp = rts_alloc_msg(cmd, rtm_addrs, af, 0); if (mp == NULL) return; if (cmd != RTM_ADD && cmd != RTM_DELETE) { switch (af) { case AF_INET: rts_fill_msg(cmd, rtm_addrs, 0, ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr, ipif->ipif_pp_dst_addr, 0, ipif->ipif_lcl_addr, ipif->ipif_ill, mp, NULL); break; case AF_INET6: rts_fill_msg_v6(cmd, rtm_addrs, &ipv6_all_zeros, &ipif->ipif_v6net_mask, &ipv6_all_zeros, &ipif->ipif_v6lcl_addr, &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, &ipif->ipif_v6lcl_addr, ipif->ipif_ill, mp, NULL); break; } ifam = (ifa_msghdr_t *)mp->b_rptr; ifam->ifam_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; ifam->ifam_metric = ipif->ipif_ill->ill_metric; ifam->ifam_flags = ((cmd == RTM_NEWADDR) ? RTF_UP : 0); ifam->ifam_addrs = rtm_addrs; } else { switch (af) { case AF_INET: rts_fill_msg(cmd, rtm_addrs, ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0, 0, 0, 0, 0, NULL, mp, NULL); break; case AF_INET6: rts_fill_msg_v6(cmd, rtm_addrs, &ipif->ipif_v6lcl_addr, &ipif->ipif_v6net_mask, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, NULL, mp, NULL); break; } rtm = (rt_msghdr_t *)mp->b_rptr; rtm->rtm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0); rtm->rtm_errno = error; if (error == 0) rtm->rtm_flags |= RTF_DONE; rtm->rtm_addrs = rtm_addrs; } rts_queue_input(mp, NULL, af, flags, ipst); } /* * This is called to generate messages to the routing socket * indicating a network interface has had addresses associated with it. * The structure of the code is based on the 4.4BSD-Lite2 . */ void ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) { ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; if (flags & RTSQ_DEFAULT) { flags = RTSQ_ALL; /* * If this message is for an underlying interface, prevent * "normal" (IPMP-unaware) routing sockets from seeing it. */ if (IS_UNDER_IPMP(ipif->ipif_ill)) flags &= ~RTSQ_NORMAL; } /* * Let conn_ixa caching know that source address selection * changed */ if (cmd == RTM_ADD || cmd == RTM_DELETE) ip_update_source_selection(ipst); /* * If the request is DELETE, send RTM_DELETE and RTM_DELADDR. * if the request is ADD, send RTM_NEWADDR and RTM_ADD. * otherwise simply send the request. */ switch (cmd) { case RTM_ADD: rts_new_rtsmsg(RTM_NEWADDR, error, ipif, flags); rts_new_rtsmsg(RTM_ADD, error, ipif, flags); break; case RTM_DELETE: rts_new_rtsmsg(RTM_DELETE, error, ipif, flags); rts_new_rtsmsg(RTM_DELADDR, error, ipif, flags); break; default: rts_new_rtsmsg(cmd, error, ipif, flags); break; } } /* * Based on the address family specified in a sockaddr, copy the address field * into an in6_addr_t. * * In the case of AF_UNSPEC, we assume the family is actually AF_INET for * compatibility with programs that leave the family cleared in the sockaddr. * Callers of rts_copyfromsockaddr should check the family themselves if they * wish to verify its value. * * In the case of AF_INET6, a check is made to ensure that address is not an * IPv4-mapped address. */ size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp) { switch (sa->sa_family) { case AF_INET: case AF_UNSPEC: IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp); return (sizeof (sin_t)); case AF_INET6: *addrp = ((sin6_t *)sa)->sin6_addr; if (IN6_IS_ADDR_V4MAPPED(addrp)) return (0); return (sizeof (sin6_t)); default: return (0); } }