1 /*
2 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /*
6 * Copyright (c) 1988, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95
38 */
39
40 /*
41 * This file contains routines that processes routing socket requests.
42 */
43
44 #include <sys/types.h>
45 #include <sys/stream.h>
46 #include <sys/stropts.h>
47 #include <sys/ddi.h>
48 #include <sys/strsubr.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/policy.h>
52 #include <sys/zone.h>
53
54 #include <sys/systm.h>
55 #include <sys/param.h>
56 #include <sys/socket.h>
57 #include <sys/strsun.h>
58 #include <net/if.h>
59 #include <net/route.h>
60 #include <netinet/in.h>
61 #include <net/if_dl.h>
62 #include <netinet/ip6.h>
63
64 #include <inet/common.h>
65 #include <inet/ip.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_if.h>
68 #include <inet/ip_ire.h>
69 #include <inet/ip_ftable.h>
70 #include <inet/ip_rts.h>
71
72 #include <inet/ipclassifier.h>
73
74 #include <sys/tsol/tndb.h>
75 #include <sys/tsol/tnet.h>
76
77 #define RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
78 (rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
79
80 static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
81 static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
82 ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
83 ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
84 const tsol_gc_t *);
85 static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
86 in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
87 in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
88 sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
89 static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
90 static int rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics);
91 static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire,
92 const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af);
93 static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
94 static ire_t *ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask,
95 ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid,
96 const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire,
97 ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp);
98 static ire_t *ire_lookup_v6(const in6_addr_t *dst_addr_v6,
99 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
100 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
101 ip_stack_t *ipst, ire_t **pifire,
102 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp);
103
104 /*
105 * Send `mp' to all eligible routing queues. A queue is ineligible if:
106 *
107 * 1. SO_USELOOPBACK is off and it is not the originating queue.
108 * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'.
109 * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'.
110 * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC.
111 */
112 void
rts_queue_input(mblk_t * mp,conn_t * o_connp,sa_family_t af,uint_t flags,ip_stack_t * ipst)113 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags,
114 ip_stack_t *ipst)
115 {
116 mblk_t *mp1;
117 conn_t *connp, *next_connp;
118
119 /*
120 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be
121 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point.
122 */
123 ASSERT(!(flags & RTSQ_DEFAULT));
124
125 mutex_enter(&ipst->ips_rts_clients->connf_lock);
126 connp = ipst->ips_rts_clients->connf_head;
127
128 for (; connp != NULL; connp = next_connp) {
129 next_connp = connp->conn_next;
130 /*
131 * If there was a family specified when this routing socket was
132 * created and it doesn't match the family of the message to
133 * copy, then continue.
134 */
135 if ((connp->conn_proto != AF_UNSPEC) &&
136 (connp->conn_proto != af))
137 continue;
138
139 /*
140 * Queue the message only if the conn_t and flags match.
141 */
142 if (connp->conn_rtaware & RTAW_UNDER_IPMP) {
143 if (!(flags & RTSQ_UNDER_IPMP))
144 continue;
145 } else {
146 if (!(flags & RTSQ_NORMAL))
147 continue;
148 }
149 /*
150 * For the originating queue, we only copy the message upstream
151 * if loopback is set. For others reading on the routing
152 * socket, we check if there is room upstream for a copy of the
153 * message.
154 */
155 if ((o_connp == connp) && connp->conn_useloopback == 0) {
156 connp = connp->conn_next;
157 continue;
158 }
159 CONN_INC_REF(connp);
160 mutex_exit(&ipst->ips_rts_clients->connf_lock);
161 /* Pass to rts_input */
162 if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld :
163 canputnext(connp->conn_rq)) {
164 mp1 = dupmsg(mp);
165 if (mp1 == NULL)
166 mp1 = copymsg(mp);
167 /* Note that we pass a NULL ira to rts_input */
168 if (mp1 != NULL)
169 (connp->conn_recv)(connp, mp1, NULL, NULL);
170 }
171
172 mutex_enter(&ipst->ips_rts_clients->connf_lock);
173 /* reload next_connp since conn_next may have changed */
174 next_connp = connp->conn_next;
175 CONN_DEC_REF(connp);
176 }
177 mutex_exit(&ipst->ips_rts_clients->connf_lock);
178 freemsg(mp);
179 }
180
181 /*
182 * Takes an ire and sends an ack to all the routing sockets. This
183 * routine is used
184 * - when a route is created/deleted through the ioctl interface.
185 * - when a stale redirect is deleted
186 */
187 void
ip_rts_rtmsg(int type,ire_t * ire,int error,ip_stack_t * ipst)188 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
189 {
190 mblk_t *mp;
191 rt_msghdr_t *rtm;
192 int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
193 sa_family_t af;
194 in6_addr_t gw_addr_v6;
195
196 if (ire == NULL)
197 return;
198 ASSERT(ire->ire_ipversion == IPV4_VERSION ||
199 ire->ire_ipversion == IPV6_VERSION);
200
201 ASSERT(!(ire->ire_type & IRE_IF_CLONE));
202
203 if (ire->ire_flags & RTF_SETSRC)
204 rtm_addrs |= RTA_SRC;
205
206 switch (ire->ire_ipversion) {
207 case IPV4_VERSION:
208 af = AF_INET;
209 mp = rts_alloc_msg(type, rtm_addrs, af, 0);
210 if (mp == NULL)
211 return;
212 rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
213 ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL,
214 mp, NULL);
215 break;
216 case IPV6_VERSION:
217 af = AF_INET6;
218 mp = rts_alloc_msg(type, rtm_addrs, af, 0);
219 if (mp == NULL)
220 return;
221 mutex_enter(&ire->ire_lock);
222 gw_addr_v6 = ire->ire_gateway_addr_v6;
223 mutex_exit(&ire->ire_lock);
224 rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
225 &ire->ire_mask_v6, &gw_addr_v6,
226 &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
227 &ipv6_all_zeros, NULL, mp, NULL);
228 break;
229 }
230 rtm = (rt_msghdr_t *)mp->b_rptr;
231 mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
232 rtm->rtm_addrs = rtm_addrs;
233 rtm->rtm_flags = ire->ire_flags;
234 if (error != 0)
235 rtm->rtm_errno = error;
236 else
237 rtm->rtm_flags |= RTF_DONE;
238 rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst);
239 }
240
241 /*
242 * This is a call from the RTS module
243 * indicating that this is a Routing Socket
244 * Stream. Insert this conn_t in routing
245 * socket client list.
246 */
247 void
ip_rts_register(conn_t * connp)248 ip_rts_register(conn_t *connp)
249 {
250 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
251
252 connp->conn_useloopback = 1;
253 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
254 }
255
256 /*
257 * This is a call from the RTS module indicating that it is closing.
258 */
259 void
ip_rts_unregister(conn_t * connp)260 ip_rts_unregister(conn_t *connp)
261 {
262 ipcl_hash_remove(connp);
263 }
264
265 /*
266 * Processes requests received on a routing socket. It extracts all the
267 * arguments and calls the appropriate function to process the request.
268 *
269 * RTA_SRC bit flag requests are sent by 'route -setsrc'.
270 *
271 * In general, this function does not consume the message supplied but rather
272 * sends the message upstream with an appropriate UNIX errno.
273 */
274 int
ip_rts_request_common(mblk_t * mp,conn_t * connp,cred_t * ioc_cr)275 ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
276 {
277 rt_msghdr_t *rtm = NULL;
278 in6_addr_t dst_addr_v6;
279 in6_addr_t src_addr_v6;
280 in6_addr_t gw_addr_v6;
281 in6_addr_t net_mask_v6;
282 in6_addr_t author_v6;
283 in6_addr_t if_addr_v6;
284 mblk_t *mp1;
285 ire_t *ire = NULL;
286 ire_t *ifire = NULL;
287 ipaddr_t v4setsrc;
288 in6_addr_t v6setsrc = ipv6_all_zeros;
289 tsol_ire_gw_secattr_t *gwattr = NULL;
290 int error = 0;
291 int match_flags = MATCH_IRE_DSTONLY;
292 int match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
293 int found_addrs;
294 sa_family_t af;
295 ipaddr_t dst_addr;
296 ipaddr_t gw_addr;
297 ipaddr_t src_addr;
298 ipaddr_t net_mask;
299 ushort_t index;
300 boolean_t gcgrp_xtraref = B_FALSE;
301 tsol_gcgrp_addr_t ga;
302 tsol_rtsecattr_t rtsecattr;
303 struct rtsa_s *rtsap = NULL;
304 tsol_gcgrp_t *gcgrp = NULL;
305 tsol_gc_t *gc = NULL;
306 ts_label_t *tsl = NULL;
307 zoneid_t zoneid;
308 ip_stack_t *ipst;
309 ill_t *ill = NULL;
310
311 zoneid = connp->conn_zoneid;
312 ipst = connp->conn_netstack->netstack_ip;
313
314 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
315 freemsg(mp);
316 error = EINVAL;
317 goto done;
318 }
319 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
320 freemsg(mp);
321 error = EINVAL;
322 goto done;
323 }
324
325 /*
326 * Check the routing message for basic consistency including the
327 * version number and that the number of octets written is the same
328 * as specified by the rtm_msglen field.
329 *
330 * At this point, an error can be delivered back via rtm_errno.
331 */
332 rtm = (rt_msghdr_t *)mp->b_rptr;
333 if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
334 error = EINVAL;
335 goto done;
336 }
337 if (rtm->rtm_version != RTM_VERSION) {
338 error = EPROTONOSUPPORT;
339 goto done;
340 }
341
342 /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
343 if (rtm->rtm_type != RTM_GET &&
344 rtm->rtm_type != RTM_RESOLVE &&
345 (ioc_cr == NULL ||
346 secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
347 error = EPERM;
348 goto done;
349 }
350
351 found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
352 &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
353 &error);
354
355 if (error != 0)
356 goto done;
357
358 if ((found_addrs & RTA_DST) == 0) {
359 error = EINVAL;
360 goto done;
361 }
362
363 /*
364 * Based on the address family of the destination address, determine
365 * the destination, gateway and netmask and return the appropriate error
366 * if an unknown address family was specified (following the errno
367 * values that 4.4BSD-Lite2 returns.)
368 */
369 switch (af) {
370 case AF_INET:
371 IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
372 IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
373 IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
374 if (((found_addrs & RTA_NETMASK) == 0) ||
375 (rtm->rtm_flags & RTF_HOST))
376 net_mask = IP_HOST_MASK;
377 else
378 IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
379 break;
380 case AF_INET6:
381 if (((found_addrs & RTA_NETMASK) == 0) ||
382 (rtm->rtm_flags & RTF_HOST))
383 net_mask_v6 = ipv6_all_ones;
384 break;
385 default:
386 /*
387 * These errno values are meant to be compatible with
388 * 4.4BSD-Lite2 for the given message types.
389 */
390 switch (rtm->rtm_type) {
391 case RTM_ADD:
392 case RTM_DELETE:
393 error = ESRCH;
394 goto done;
395 case RTM_GET:
396 case RTM_CHANGE:
397 error = EAFNOSUPPORT;
398 goto done;
399 default:
400 error = EOPNOTSUPP;
401 goto done;
402 }
403 }
404
405 /*
406 * At this point, the address family must be something known.
407 */
408 ASSERT(af == AF_INET || af == AF_INET6);
409
410 /* Handle RTA_IFP */
411 if (index != 0) {
412 ipif_t *ipif;
413 lookup:
414 ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst);
415 if (ill == NULL) {
416 error = EINVAL;
417 goto done;
418 }
419
420 /*
421 * Since all interfaces in an IPMP group must be equivalent,
422 * we prevent changes to a specific underlying interface's
423 * routing configuration. However, for backward compatibility,
424 * we intepret a request to add a route on an underlying
425 * interface as a request to add a route on its IPMP interface.
426 */
427 if (IS_UNDER_IPMP(ill)) {
428 switch (rtm->rtm_type) {
429 case RTM_CHANGE:
430 case RTM_DELETE:
431 error = EINVAL;
432 goto done;
433 case RTM_ADD:
434 index = ipmp_ill_get_ipmp_ifindex(ill);
435 ill_refrele(ill);
436 if (index == 0) {
437 ill = NULL; /* already refrele'd */
438 error = EINVAL;
439 goto done;
440 }
441 goto lookup;
442 }
443 }
444
445 match_flags |= MATCH_IRE_ILL;
446 /*
447 * This provides the same zoneid as in Solaris 10
448 * that -ifp picks the zoneid from the first ipif on the ill.
449 * But it might not be useful since the first ipif will always
450 * have the same zoneid as the ill.
451 */
452 ipif = ipif_get_next_ipif(NULL, ill);
453 if (ipif != NULL) {
454 zoneid = ipif->ipif_zoneid;
455 ipif_refrele(ipif);
456 }
457 }
458
459 /*
460 * If a netmask was supplied in the message, then subsequent route
461 * lookups will attempt to match on the netmask as well.
462 */
463 if ((found_addrs & RTA_NETMASK) != 0)
464 match_flags |= MATCH_IRE_MASK;
465
466 /*
467 * We only process any passed-in route security attributes for
468 * either RTM_ADD or RTM_CHANGE message; We overload them
469 * to do an RTM_GET as a different label; ignore otherwise.
470 */
471 if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
472 rtm->rtm_type == RTM_GET) {
473 ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
474 if (rtsecattr.rtsa_cnt > 0)
475 rtsap = &rtsecattr.rtsa_attr[0];
476 }
477
478 switch (rtm->rtm_type) {
479 case RTM_ADD:
480 /* if we are adding a route, gateway is a must */
481 if ((found_addrs & RTA_GATEWAY) == 0) {
482 error = EINVAL;
483 goto done;
484 }
485
486 /* Multirouting does not support net routes. */
487 if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
488 RTF_MULTIRT) {
489 error = EADDRNOTAVAIL;
490 goto done;
491 }
492
493 /*
494 * Multirouting and user-specified source addresses
495 * do not support interface based routing.
496 * Assigning a source address to an interface based
497 * route is achievable by plumbing a new ipif and
498 * setting up the interface route via this ipif,
499 * though.
500 */
501 if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
502 if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
503 error = EADDRNOTAVAIL;
504 goto done;
505 }
506 }
507
508 switch (af) {
509 case AF_INET:
510 if (src_addr != INADDR_ANY) {
511 uint_t type;
512
513 /*
514 * The RTF_SETSRC flag is present, check that
515 * the supplied src address is not the loopback
516 * address. This would produce martian packets.
517 */
518 if (src_addr == htonl(INADDR_LOOPBACK)) {
519 error = EINVAL;
520 goto done;
521 }
522 /*
523 * Also check that the supplied address is a
524 * valid, local one. Only allow IFF_UP ones
525 */
526 type = ip_type_v4(src_addr, ipst);
527 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
528 error = EADDRNOTAVAIL;
529 goto done;
530 }
531 } else {
532 /*
533 * The RTF_SETSRC modifier must be associated
534 * to a non-null source address.
535 */
536 if (rtm->rtm_flags & RTF_SETSRC) {
537 error = EINVAL;
538 goto done;
539 }
540 }
541
542 error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
543 rtm->rtm_flags, ill, &ire, B_FALSE,
544 rtsap, ipst, zoneid);
545 if (ill != NULL)
546 ASSERT(!MUTEX_HELD(&ill->ill_lock));
547 break;
548 case AF_INET6:
549 if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
550 uint_t type;
551
552 /*
553 * The RTF_SETSRC flag is present, check that
554 * the supplied src address is not the loopback
555 * address. This would produce martian packets.
556 */
557 if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
558 error = EINVAL;
559 goto done;
560 }
561 /*
562 * Also check that the supplied address is a
563 * valid, local one. Only allow UP ones.
564 */
565 type = ip_type_v6(&src_addr_v6, ipst);
566 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
567 error = EADDRNOTAVAIL;
568 goto done;
569 }
570
571 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
572 &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
573 ill, &ire, rtsap, ipst, zoneid);
574 break;
575 }
576 /*
577 * The RTF_SETSRC modifier must be associated
578 * to a non-null source address.
579 */
580 if (rtm->rtm_flags & RTF_SETSRC) {
581 error = EINVAL;
582 goto done;
583 }
584 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
585 &gw_addr_v6, NULL, rtm->rtm_flags,
586 ill, &ire, rtsap, ipst, zoneid);
587 if (ill != NULL)
588 ASSERT(!MUTEX_HELD(&ill->ill_lock));
589 break;
590 }
591 if (error != 0)
592 goto done;
593 ASSERT(ire != NULL);
594 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
595 break;
596 case RTM_DELETE:
597 /* if we are deleting a route, gateway is a must */
598 if ((found_addrs & RTA_GATEWAY) == 0) {
599 error = EINVAL;
600 goto done;
601 }
602 /*
603 * The RTF_SETSRC modifier does not make sense
604 * when deleting a route.
605 */
606 if (rtm->rtm_flags & RTF_SETSRC) {
607 error = EINVAL;
608 goto done;
609 }
610
611 switch (af) {
612 case AF_INET:
613 error = ip_rt_delete(dst_addr, net_mask, gw_addr,
614 found_addrs, rtm->rtm_flags, ill, B_FALSE,
615 ipst, zoneid);
616 break;
617 case AF_INET6:
618 error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
619 &gw_addr_v6, found_addrs, rtm->rtm_flags, ill,
620 ipst, zoneid);
621 break;
622 }
623 break;
624 case RTM_GET:
625 case RTM_CHANGE:
626 /*
627 * In the case of RTM_GET, the forwarding table should be
628 * searched recursively. Also, if a gateway was
629 * specified then the gateway address must also be matched.
630 *
631 * In the case of RTM_CHANGE, the gateway address (if supplied)
632 * is the new gateway address so matching on the gateway address
633 * is not done. This can lead to ambiguity when looking up the
634 * route to change as usually only the destination (and netmask,
635 * if supplied) is used for the lookup. However if a RTA_IFP
636 * sockaddr is also supplied, it can disambiguate which route to
637 * change provided the ambigous routes are tied to distinct
638 * ill's (or interface indices). If the routes are not tied to
639 * any particular interfaces (for example, with traditional
640 * gateway routes), then a RTA_IFP sockaddr will be of no use as
641 * it won't match any such routes.
642 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
643 * except when RTM_CHANGE is combined to RTF_SETSRC.
644 */
645 if (((found_addrs & RTA_SRC) != 0) &&
646 ((rtm->rtm_type == RTM_GET) ||
647 !(rtm->rtm_flags & RTF_SETSRC))) {
648 error = EOPNOTSUPP;
649 goto done;
650 }
651
652 if (rtm->rtm_type == RTM_GET) {
653 match_flags |= MATCH_IRE_SECATTR;
654 match_flags_local |= MATCH_IRE_SECATTR;
655 if ((found_addrs & RTA_GATEWAY) != 0)
656 match_flags |= MATCH_IRE_GW;
657 if (ioc_cr)
658 tsl = crgetlabel(ioc_cr);
659 if (rtsap != NULL) {
660 if (rtsa_validate(rtsap) != 0) {
661 error = EINVAL;
662 goto done;
663 }
664 if (tsl != NULL &&
665 crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
666 (tsl->tsl_doi != rtsap->rtsa_doi ||
667 !bldominates(&tsl->tsl_label,
668 &rtsap->rtsa_slrange.lower_bound))) {
669 error = EPERM;
670 goto done;
671 }
672 tsl = labelalloc(
673 &rtsap->rtsa_slrange.lower_bound,
674 rtsap->rtsa_doi, KM_NOSLEEP);
675 }
676 }
677 if (rtm->rtm_type == RTM_CHANGE) {
678 if ((found_addrs & RTA_GATEWAY) &&
679 (rtm->rtm_flags & RTF_SETSRC)) {
680 /*
681 * Do not want to change the gateway,
682 * but rather the source address.
683 */
684 match_flags |= MATCH_IRE_GW;
685 }
686 }
687
688 /*
689 * If the netmask is all ones (either as supplied or as derived
690 * above), then first check for an IRE_LOOPBACK or
691 * IRE_LOCAL entry.
692 *
693 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
694 * entry, then look for any other type of IRE.
695 */
696 switch (af) {
697 case AF_INET:
698 if (net_mask == IP_HOST_MASK) {
699 ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr,
700 IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
701 tsl, match_flags_local, 0, ipst, NULL);
702 }
703 if (ire == NULL) {
704 ire = ire_lookup_v4(dst_addr, net_mask,
705 gw_addr, ill, zoneid, tsl, match_flags,
706 ipst, &ifire, &v4setsrc, &gwattr);
707 IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc);
708 }
709 break;
710 case AF_INET6:
711 if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
712 ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL,
713 &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
714 zoneid, tsl, match_flags_local, 0, ipst,
715 NULL);
716 }
717 if (ire == NULL) {
718 ire = ire_lookup_v6(&dst_addr_v6,
719 &net_mask_v6, &gw_addr_v6, ill, zoneid,
720 tsl, match_flags, ipst, &ifire, &v6setsrc,
721 &gwattr);
722 }
723 break;
724 }
725 if (tsl != NULL && tsl != crgetlabel(ioc_cr))
726 label_rele(tsl);
727
728 if (ire == NULL) {
729 error = ESRCH;
730 goto done;
731 }
732 /*
733 * Want to return failure if we get an IRE_NOROUTE from
734 * ire_route_recursive
735 */
736 if (ire->ire_type & IRE_NOROUTE) {
737 ire_refrele(ire);
738 ire = NULL;
739 error = ESRCH;
740 goto done;
741 }
742
743 /* we know the IRE before we come here */
744 switch (rtm->rtm_type) {
745 case RTM_GET:
746 mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af);
747 if (mp1 == NULL) {
748 error = ENOBUFS;
749 goto done;
750 }
751 freemsg(mp);
752 mp = mp1;
753 rtm = (rt_msghdr_t *)mp->b_rptr;
754 break;
755 case RTM_CHANGE:
756 /*
757 * Do not allow to the multirouting state of a route
758 * to be changed. This aims to prevent undesirable
759 * stages where both multirt and non-multirt routes
760 * for the same destination are declared.
761 */
762 if ((ire->ire_flags & RTF_MULTIRT) !=
763 (rtm->rtm_flags & RTF_MULTIRT)) {
764 error = EINVAL;
765 goto done;
766 }
767 /*
768 * Note that we do not need to do
769 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
770 * in metrics or gateway will not affect existing
771 * routes since it does not create a more specific
772 * route.
773 */
774 switch (af) {
775 case AF_INET:
776 if ((found_addrs & RTA_GATEWAY) != 0 &&
777 (ire->ire_gateway_addr != gw_addr)) {
778 ire->ire_gateway_addr = gw_addr;
779 }
780
781 if (rtsap != NULL) {
782 ga.ga_af = AF_INET;
783 IN6_IPADDR_TO_V4MAPPED(
784 ire->ire_gateway_addr, &ga.ga_addr);
785
786 gcgrp = gcgrp_lookup(&ga, B_TRUE);
787 if (gcgrp == NULL) {
788 error = ENOMEM;
789 goto done;
790 }
791 }
792
793 if ((found_addrs & RTA_SRC) != 0 &&
794 (rtm->rtm_flags & RTF_SETSRC) != 0 &&
795 (ire->ire_setsrc_addr != src_addr)) {
796 if (src_addr != INADDR_ANY) {
797 uint_t type;
798
799 /*
800 * The RTF_SETSRC flag is
801 * present, check that the
802 * supplied src address is not
803 * the loopback address. This
804 * would produce martian
805 * packets.
806 */
807 if (src_addr ==
808 htonl(INADDR_LOOPBACK)) {
809 error = EINVAL;
810 goto done;
811 }
812 /*
813 * Also check that the
814 * supplied addr is a valid
815 * local address.
816 */
817 type = ip_type_v4(src_addr,
818 ipst);
819 if (!(type &
820 (IRE_LOCAL|IRE_LOOPBACK))) {
821 error = EADDRNOTAVAIL;
822 goto done;
823 }
824 ire->ire_flags |= RTF_SETSRC;
825 ire->ire_setsrc_addr =
826 src_addr;
827 } else {
828 ire->ire_flags &= ~RTF_SETSRC;
829 ire->ire_setsrc_addr =
830 INADDR_ANY;
831 }
832 /*
833 * Let conn_ixa caching know that
834 * source address selection changed
835 */
836 ip_update_source_selection(ipst);
837 }
838 ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE);
839 break;
840 case AF_INET6:
841 mutex_enter(&ire->ire_lock);
842 if ((found_addrs & RTA_GATEWAY) != 0 &&
843 !IN6_ARE_ADDR_EQUAL(
844 &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
845 ire->ire_gateway_addr_v6 = gw_addr_v6;
846 }
847 mutex_exit(&ire->ire_lock);
848
849 if (rtsap != NULL) {
850 ga.ga_af = AF_INET6;
851 mutex_enter(&ire->ire_lock);
852 ga.ga_addr = ire->ire_gateway_addr_v6;
853 mutex_exit(&ire->ire_lock);
854
855 gcgrp = gcgrp_lookup(&ga, B_TRUE);
856 if (gcgrp == NULL) {
857 error = ENOMEM;
858 goto done;
859 }
860 }
861
862 if ((found_addrs & RTA_SRC) != 0 &&
863 (rtm->rtm_flags & RTF_SETSRC) != 0 &&
864 !IN6_ARE_ADDR_EQUAL(
865 &ire->ire_setsrc_addr_v6, &src_addr_v6)) {
866 if (!IN6_IS_ADDR_UNSPECIFIED(
867 &src_addr_v6)) {
868 uint_t type;
869
870 /*
871 * The RTF_SETSRC flag is
872 * present, check that the
873 * supplied src address is not
874 * the loopback address. This
875 * would produce martian
876 * packets.
877 */
878 if (IN6_IS_ADDR_LOOPBACK(
879 &src_addr_v6)) {
880 error = EINVAL;
881 goto done;
882 }
883 /*
884 * Also check that the
885 * supplied addr is a valid
886 * local address.
887 */
888 type = ip_type_v6(&src_addr_v6,
889 ipst);
890 if (!(type &
891 (IRE_LOCAL|IRE_LOOPBACK))) {
892 error = EADDRNOTAVAIL;
893 goto done;
894 }
895 mutex_enter(&ire->ire_lock);
896 ire->ire_flags |= RTF_SETSRC;
897 ire->ire_setsrc_addr_v6 =
898 src_addr_v6;
899 mutex_exit(&ire->ire_lock);
900 } else {
901 mutex_enter(&ire->ire_lock);
902 ire->ire_flags &= ~RTF_SETSRC;
903 ire->ire_setsrc_addr_v6 =
904 ipv6_all_zeros;
905 mutex_exit(&ire->ire_lock);
906 }
907 /*
908 * Let conn_ixa caching know that
909 * source address selection changed
910 */
911 ip_update_source_selection(ipst);
912 }
913 ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE);
914 break;
915 }
916
917 if (rtsap != NULL) {
918 ASSERT(gcgrp != NULL);
919
920 /*
921 * Create and add the security attribute to
922 * prefix IRE; it will add a reference to the
923 * group upon allocating a new entry. If it
924 * finds an already-existing entry for the
925 * security attribute, it simply returns it
926 * and no new group reference is made.
927 */
928 gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
929 if (gc == NULL ||
930 (error = tsol_ire_init_gwattr(ire,
931 ire->ire_ipversion, gc)) != 0) {
932 if (gc != NULL) {
933 GC_REFRELE(gc);
934 } else {
935 /* gc_create failed */
936 error = ENOMEM;
937 }
938 goto done;
939 }
940 }
941 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
942 break;
943 }
944 break;
945 default:
946 error = EOPNOTSUPP;
947 break;
948 }
949 done:
950 if (ire != NULL)
951 ire_refrele(ire);
952 if (ifire != NULL)
953 ire_refrele(ifire);
954 if (ill != NULL)
955 ill_refrele(ill);
956
957 if (gcgrp_xtraref)
958 GCGRP_REFRELE(gcgrp);
959
960 if (rtm != NULL) {
961 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
962 if (error != 0) {
963 rtm->rtm_errno = error;
964 /* Send error ACK */
965 ip1dbg(("ip_rts_request: error %d\n", error));
966 } else {
967 rtm->rtm_flags |= RTF_DONE;
968 /* OK ACK already set up by caller except this */
969 ip2dbg(("ip_rts_request: OK ACK\n"));
970 }
971 rts_queue_input(mp, connp, af, RTSQ_ALL, ipst);
972 }
973 return (error);
974 }
975
976 /*
977 * Helper function that can do recursive lookups including when
978 * MATCH_IRE_GW and/or MATCH_IRE_MASK is set.
979 */
980 static ire_t *
ire_lookup_v4(ipaddr_t dst_addr,ipaddr_t net_mask,ipaddr_t gw_addr,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,int match_flags,ip_stack_t * ipst,ire_t ** pifire,ipaddr_t * v4setsrcp,tsol_ire_gw_secattr_t ** gwattrp)981 ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr,
982 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl,
983 int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp,
984 tsol_ire_gw_secattr_t **gwattrp)
985 {
986 ire_t *ire;
987 ire_t *ifire = NULL;
988 uint_t ire_type;
989
990 *pifire = NULL;
991 *v4setsrcp = INADDR_ANY;
992 *gwattrp = NULL;
993
994 /* Skip IRE_IF_CLONE */
995 match_flags |= MATCH_IRE_TYPE;
996 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
997
998 /*
999 * ire_route_recursive can't match gateway or mask thus if they are
1000 * set we have to do two steps of lookups
1001 */
1002 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
1003 ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr,
1004 ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL);
1005
1006 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
1007 return (ire);
1008
1009 if (ire->ire_type & IRE_ONLINK)
1010 return (ire);
1011
1012 if (ire->ire_flags & RTF_SETSRC) {
1013 ASSERT(ire->ire_setsrc_addr != INADDR_ANY);
1014 *v4setsrcp = ire->ire_setsrc_addr;
1015 v4setsrcp = NULL;
1016 }
1017
1018 /* The first ire_gw_secattr is passed back */
1019 if (ire->ire_gw_secattr != NULL) {
1020 *gwattrp = ire->ire_gw_secattr;
1021 gwattrp = NULL;
1022 }
1023
1024 /* Look for an interface ire recursively based on the gateway */
1025 dst_addr = ire->ire_gateway_addr;
1026 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
1027 /*
1028 * Don't allow anything unusual past the first iteration.
1029 * After the first lookup, we should no longer look for
1030 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT
1031 * routes.
1032 *
1033 * In addition, after we have found a direct IRE_OFFLINK,
1034 * we should only look for interface or clone routes.
1035 */
1036 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */
1037
1038 if ((ire->ire_type & IRE_OFFLINK) &&
1039 !(ire->ire_flags & RTF_INDIRECT)) {
1040 ire_type = IRE_IF_ALL;
1041 } else {
1042 /*
1043 * no more local, loopback, broadcast routes
1044 */
1045 if (!(match_flags & MATCH_IRE_TYPE))
1046 ire_type = (IRE_OFFLINK|IRE_ONLINK);
1047 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST);
1048 }
1049 match_flags |= MATCH_IRE_TYPE;
1050
1051 ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
1052 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp,
1053 gwattrp, NULL);
1054 } else {
1055 ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
1056 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp,
1057 gwattrp, NULL);
1058 }
1059 *pifire = ifire;
1060 return (ire);
1061 }
1062
1063 static ire_t *
ire_lookup_v6(const in6_addr_t * dst_addr_v6,const in6_addr_t * net_mask_v6,const in6_addr_t * gw_addr_v6,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,int match_flags,ip_stack_t * ipst,ire_t ** pifire,in6_addr_t * v6setsrcp,tsol_ire_gw_secattr_t ** gwattrp)1064 ire_lookup_v6(const in6_addr_t *dst_addr_v6,
1065 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
1066 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
1067 ip_stack_t *ipst, ire_t **pifire,
1068 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp)
1069 {
1070 ire_t *ire;
1071 ire_t *ifire = NULL;
1072 uint_t ire_type;
1073
1074 *pifire = NULL;
1075 *v6setsrcp = ipv6_all_zeros;
1076 *gwattrp = NULL;
1077
1078 /* Skip IRE_IF_CLONE */
1079 match_flags |= MATCH_IRE_TYPE;
1080 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
1081
1082 /*
1083 * ire_route_recursive can't match gateway or mask thus if they are
1084 * set we have to do two steps of lookups
1085 */
1086 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
1087 in6_addr_t dst;
1088
1089 ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6,
1090 gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0,
1091 ipst, NULL);
1092
1093 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
1094 return (ire);
1095
1096 if (ire->ire_type & IRE_ONLINK)
1097 return (ire);
1098
1099 if (ire->ire_flags & RTF_SETSRC) {
1100 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
1101 &ire->ire_setsrc_addr_v6));
1102 *v6setsrcp = ire->ire_setsrc_addr_v6;
1103 v6setsrcp = NULL;
1104 }
1105
1106 /* The first ire_gw_secattr is passed back */
1107 if (ire->ire_gw_secattr != NULL) {
1108 *gwattrp = ire->ire_gw_secattr;
1109 gwattrp = NULL;
1110 }
1111
1112 mutex_enter(&ire->ire_lock);
1113 dst = ire->ire_gateway_addr_v6;
1114 mutex_exit(&ire->ire_lock);
1115 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
1116 /*
1117 * Don't allow anything unusual past the first iteration.
1118 * After the first lookup, we should no longer look for
1119 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT
1120 * routes.
1121 *
1122 * In addition, after we have found a direct IRE_OFFLINK,
1123 * we should only look for interface or clone routes.
1124 */
1125 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */
1126
1127 if ((ire->ire_type & IRE_OFFLINK) &&
1128 !(ire->ire_flags & RTF_INDIRECT)) {
1129 ire_type = IRE_IF_ALL;
1130 } else {
1131 /*
1132 * no more local, loopback routes
1133 */
1134 if (!(match_flags & MATCH_IRE_TYPE))
1135 ire_type = (IRE_OFFLINK|IRE_ONLINK);
1136 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK);
1137 }
1138 match_flags |= MATCH_IRE_TYPE;
1139
1140 ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl,
1141 match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, gwattrp,
1142 NULL);
1143 } else {
1144 ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid,
1145 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp,
1146 gwattrp, NULL);
1147 }
1148 *pifire = ifire;
1149 return (ire);
1150 }
1151
1152
1153 /*
1154 * Handle IP_IOC_RTS_REQUEST ioctls
1155 */
1156 int
ip_rts_request(queue_t * q,mblk_t * mp,cred_t * ioc_cr)1157 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
1158 {
1159 conn_t *connp = Q_TO_CONN(q);
1160 IOCP iocp = (IOCP)mp->b_rptr;
1161 mblk_t *mp1, *ioc_mp = mp;
1162 int error = 0;
1163 ip_stack_t *ipst;
1164
1165 ipst = connp->conn_netstack->netstack_ip;
1166
1167 ASSERT(mp->b_cont != NULL);
1168 /* ioc_mp holds mp */
1169 mp = mp->b_cont;
1170
1171 /*
1172 * The Routing Socket data starts on
1173 * next block. If there is no next block
1174 * this is an indication from routing module
1175 * that it is a routing socket stream queue.
1176 * We need to support that for compatibility with SDP since
1177 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
1178 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this.
1179 */
1180 if (mp->b_cont == NULL) {
1181 /*
1182 * This is a message from SDP
1183 * indicating that this is a Routing Socket
1184 * Stream. Insert this conn_t in routing
1185 * socket client list.
1186 */
1187 connp->conn_useloopback = 1;
1188 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
1189 goto done;
1190 }
1191 mp1 = dupmsg(mp->b_cont);
1192 if (mp1 == NULL) {
1193 error = ENOBUFS;
1194 goto done;
1195 }
1196 mp = mp1;
1197
1198 error = ip_rts_request_common(mp, connp, ioc_cr);
1199 done:
1200 iocp->ioc_error = error;
1201 ioc_mp->b_datap->db_type = M_IOCACK;
1202 if (iocp->ioc_error != 0)
1203 iocp->ioc_count = 0;
1204 /* Note that we pass a NULL ira to rts_input */
1205 (connp->conn_recv)(connp, ioc_mp, NULL, NULL);
1206
1207 /* conn was refheld in ip_wput_ioctl. */
1208 CONN_DEC_IOCTLREF(connp);
1209 CONN_OPER_PENDING_DONE(connp);
1210
1211 return (error);
1212 }
1213
1214 /*
1215 * Build a reply to the RTM_GET request contained in the given message block
1216 * using the retrieved IRE of the destination address, the parent IRE (if it
1217 * exists) and the address family.
1218 *
1219 * Returns a pointer to a message block containing the reply if successful,
1220 * otherwise NULL is returned.
1221 */
1222 static mblk_t *
rts_rtmget(mblk_t * mp,ire_t * ire,ire_t * ifire,const in6_addr_t * setsrc,tsol_ire_gw_secattr_t * attrp,sa_family_t af)1223 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc,
1224 tsol_ire_gw_secattr_t *attrp, sa_family_t af)
1225 {
1226 rt_msghdr_t *rtm;
1227 rt_msghdr_t *new_rtm;
1228 mblk_t *new_mp;
1229 int rtm_addrs;
1230 int rtm_flags;
1231 tsol_gc_t *gc = NULL;
1232 tsol_gcgrp_t *gcgrp = NULL;
1233 ill_t *ill;
1234 ipif_t *ipif = NULL;
1235 ipaddr_t brdaddr; /* IFF_POINTOPOINT destination */
1236 ipaddr_t ifaddr;
1237 in6_addr_t brdaddr6; /* IFF_POINTOPOINT destination */
1238 in6_addr_t ifaddr6;
1239 ipaddr_t v4setsrc;
1240
1241 rtm = (rt_msghdr_t *)mp->b_rptr;
1242
1243 /*
1244 * Find the ill used to send packets. This will be NULL in case
1245 * of a reject or blackhole.
1246 */
1247 if (ifire != NULL)
1248 ill = ire_nexthop_ill(ifire);
1249 else
1250 ill = ire_nexthop_ill(ire);
1251
1252 if (attrp != NULL) {
1253 mutex_enter(&attrp->igsa_lock);
1254 if ((gc = attrp->igsa_gc) != NULL) {
1255 gcgrp = gc->gc_grp;
1256 ASSERT(gcgrp != NULL);
1257 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1258 }
1259 mutex_exit(&attrp->igsa_lock);
1260 }
1261
1262 /*
1263 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1264 *
1265 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1266 * RTA_IFP and RTA_IFA if either is defined, and also
1267 * returns RTA_BRD if the appropriate interface is
1268 * point-to-point.
1269 */
1270 rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1271 if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) {
1272 rtm_addrs |= (RTA_IFP | RTA_IFA);
1273 /*
1274 * We associate an IRE with an ILL, hence we don't exactly
1275 * know what might make sense for RTA_IFA and RTA_BRD. We
1276 * pick the first ipif on the ill.
1277 */
1278 ipif = ipif_get_next_ipif(NULL, ill);
1279 if (ipif != NULL) {
1280 if (ipif->ipif_isv6)
1281 ifaddr6 = ipif->ipif_v6lcl_addr;
1282 else
1283 ifaddr = ipif->ipif_lcl_addr;
1284 if (ipif->ipif_flags & IPIF_POINTOPOINT) {
1285 rtm_addrs |= RTA_BRD;
1286 if (ipif->ipif_isv6)
1287 brdaddr6 = ipif->ipif_v6pp_dst_addr;
1288 else
1289 brdaddr = ipif->ipif_pp_dst_addr;
1290 }
1291 ipif_refrele(ipif);
1292 }
1293 }
1294
1295 new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0);
1296 if (new_mp == NULL) {
1297 if (gcgrp != NULL)
1298 rw_exit(&gcgrp->gcgrp_rwlock);
1299 if (ill != NULL)
1300 ill_refrele(ill);
1301 return (NULL);
1302 }
1303
1304 /*
1305 * We set the destination address, gateway address,
1306 * netmask and flags in the RTM_GET response depending
1307 * on whether we found a parent IRE or not.
1308 * In particular, if we did find a parent IRE during the
1309 * recursive search, use that IRE's gateway address.
1310 * Otherwise, we use the IRE's source address for the
1311 * gateway address.
1312 */
1313 ASSERT(af == AF_INET || af == AF_INET6);
1314 switch (af) {
1315 case AF_INET:
1316 IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc);
1317 if (v4setsrc != INADDR_ANY)
1318 rtm_addrs |= RTA_SRC;
1319
1320 rtm_flags = ire->ire_flags;
1321 rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1322 ire->ire_mask, ire->ire_gateway_addr, v4setsrc,
1323 brdaddr, 0, ifaddr, ill, new_mp, gc);
1324 break;
1325 case AF_INET6:
1326 if (!IN6_IS_ADDR_UNSPECIFIED(setsrc))
1327 rtm_addrs |= RTA_SRC;
1328
1329 rtm_flags = ire->ire_flags;
1330 rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1331 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
1332 setsrc, &brdaddr6, &ipv6_all_zeros,
1333 &ifaddr6, ill, new_mp, gc);
1334 break;
1335 }
1336
1337 if (gcgrp != NULL)
1338 rw_exit(&gcgrp->gcgrp_rwlock);
1339
1340 new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1341
1342 /*
1343 * The rtm_msglen, rtm_version and rtm_type fields in
1344 * RTM_GET response are filled in by rts_fill_msg.
1345 *
1346 * rtm_addrs and rtm_flags are filled in based on what
1347 * was requested and the state of the IREs looked up
1348 * above.
1349 *
1350 * rtm_inits and rtm_rmx are filled in with metrics
1351 * based on whether a parent IRE was found or not.
1352 *
1353 * TODO: rtm_index and rtm_use should probably be
1354 * filled in with something resonable here and not just
1355 * copied from the request.
1356 */
1357 new_rtm->rtm_index = rtm->rtm_index;
1358 new_rtm->rtm_pid = rtm->rtm_pid;
1359 new_rtm->rtm_seq = rtm->rtm_seq;
1360 new_rtm->rtm_use = rtm->rtm_use;
1361 new_rtm->rtm_addrs = rtm_addrs;
1362 new_rtm->rtm_flags = rtm_flags;
1363 new_rtm->rtm_inits = rts_getmetrics(ire, ill, &new_rtm->rtm_rmx);
1364 if (ill != NULL)
1365 ill_refrele(ill);
1366 return (new_mp);
1367 }
1368
1369 /*
1370 * Fill the given if_data_t with interface statistics.
1371 */
1372 static void
rts_getifdata(if_data_t * if_data,const ipif_t * ipif)1373 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1374 {
1375 if_data->ifi_type = ipif->ipif_ill->ill_type;
1376 /* ethernet, tokenring, etc */
1377 if_data->ifi_addrlen = 0; /* media address length */
1378 if_data->ifi_hdrlen = 0; /* media header length */
1379 if_data->ifi_mtu = ipif->ipif_ill->ill_mtu; /* mtu */
1380 /* metric (external only) */
1381 if_data->ifi_metric = ipif->ipif_ill->ill_metric;
1382 if_data->ifi_baudrate = 0; /* linespeed */
1383
1384 if_data->ifi_ipackets = 0; /* packets received on if */
1385 if_data->ifi_ierrors = 0; /* input errors on interface */
1386 if_data->ifi_opackets = 0; /* packets sent on interface */
1387 if_data->ifi_oerrors = 0; /* output errors on if */
1388 if_data->ifi_collisions = 0; /* collisions on csma if */
1389 if_data->ifi_ibytes = 0; /* total number received */
1390 if_data->ifi_obytes = 0; /* total number sent */
1391 if_data->ifi_imcasts = 0; /* multicast packets received */
1392 if_data->ifi_omcasts = 0; /* multicast packets sent */
1393 if_data->ifi_iqdrops = 0; /* dropped on input */
1394 if_data->ifi_noproto = 0; /* destined for unsupported */
1395 /* protocol. */
1396 }
1397
1398 /*
1399 * Set the metrics on a forwarding table route.
1400 */
1401 static void
rts_setmetrics(ire_t * ire,uint_t which,rt_metrics_t * metrics)1402 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1403 {
1404 clock_t rtt;
1405 clock_t rtt_sd;
1406 ill_t *ill;
1407 ifrt_t *ifrt;
1408 mblk_t *mp;
1409 in6_addr_t gw_addr_v6;
1410
1411 /* Need to add back some metrics to the IRE? */
1412 /*
1413 * Bypass obtaining the lock and searching ill_saved_ire_mp in the
1414 * common case of no metrics.
1415 */
1416 if (which == 0)
1417 return;
1418 ire->ire_metrics.iulp_set = B_TRUE;
1419
1420 /*
1421 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1422 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1423 * microseconds.
1424 */
1425 if (which & RTV_RTT)
1426 rtt = metrics->rmx_rtt / 1000;
1427 if (which & RTV_RTTVAR)
1428 rtt_sd = metrics->rmx_rttvar / 1000;
1429
1430 /*
1431 * Update the metrics in the IRE itself.
1432 */
1433 mutex_enter(&ire->ire_lock);
1434 if (which & RTV_MTU)
1435 ire->ire_metrics.iulp_mtu = metrics->rmx_mtu;
1436 if (which & RTV_RTT)
1437 ire->ire_metrics.iulp_rtt = rtt;
1438 if (which & RTV_SSTHRESH)
1439 ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh;
1440 if (which & RTV_RTTVAR)
1441 ire->ire_metrics.iulp_rtt_sd = rtt_sd;
1442 if (which & RTV_SPIPE)
1443 ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe;
1444 if (which & RTV_RPIPE)
1445 ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe;
1446 mutex_exit(&ire->ire_lock);
1447
1448 /*
1449 * Search through the ifrt_t chain hanging off the ILL in order to
1450 * reflect the metric change there.
1451 */
1452 ill = ire->ire_ill;
1453 if (ill == NULL)
1454 return;
1455 ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1456 ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1457 if (ill->ill_isv6) {
1458 mutex_enter(&ire->ire_lock);
1459 gw_addr_v6 = ire->ire_gateway_addr_v6;
1460 mutex_exit(&ire->ire_lock);
1461 }
1462 mutex_enter(&ill->ill_saved_ire_lock);
1463 for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1464 /*
1465 * On a given ill, the tuple of address, gateway, mask,
1466 * ire_type and zoneid unique for each saved IRE.
1467 */
1468 ifrt = (ifrt_t *)mp->b_rptr;
1469 if (ill->ill_isv6) {
1470 if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1471 &ire->ire_addr_v6) ||
1472 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1473 &gw_addr_v6) ||
1474 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1475 &ire->ire_mask_v6))
1476 continue;
1477 } else {
1478 if (ifrt->ifrt_addr != ire->ire_addr ||
1479 ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1480 ifrt->ifrt_mask != ire->ire_mask)
1481 continue;
1482 }
1483 if (ifrt->ifrt_zoneid != ire->ire_zoneid ||
1484 ifrt->ifrt_type != ire->ire_type)
1485 continue;
1486
1487 if (which & RTV_MTU)
1488 ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu;
1489 if (which & RTV_RTT)
1490 ifrt->ifrt_metrics.iulp_rtt = rtt;
1491 if (which & RTV_SSTHRESH) {
1492 ifrt->ifrt_metrics.iulp_ssthresh =
1493 metrics->rmx_ssthresh;
1494 }
1495 if (which & RTV_RTTVAR)
1496 ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar;
1497 if (which & RTV_SPIPE)
1498 ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe;
1499 if (which & RTV_RPIPE)
1500 ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe;
1501 break;
1502 }
1503 mutex_exit(&ill->ill_saved_ire_lock);
1504
1505 /*
1506 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they
1507 * get any new iulp_mtu.
1508 * We do that by deleting them; ire_create_if_clone will pick
1509 * up the new metrics.
1510 */
1511 if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0)
1512 ire_dep_delete_if_clone(ire);
1513 }
1514
1515 /*
1516 * Get the metrics from a forwarding table route.
1517 */
1518 static int
rts_getmetrics(ire_t * ire,ill_t * ill,rt_metrics_t * metrics)1519 rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics)
1520 {
1521 int metrics_set = 0;
1522
1523 bzero(metrics, sizeof (rt_metrics_t));
1524
1525 /*
1526 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1527 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1528 * microseconds.
1529 */
1530 metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000;
1531 metrics_set |= RTV_RTT;
1532 if (ire->ire_metrics.iulp_mtu != 0) {
1533 metrics->rmx_mtu = ire->ire_metrics.iulp_mtu;
1534 metrics_set |= RTV_MTU;
1535 } else if (ill != NULL) {
1536 metrics->rmx_mtu = ill->ill_mtu;
1537 metrics_set |= RTV_MTU;
1538 }
1539 metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh;
1540 metrics_set |= RTV_SSTHRESH;
1541 metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000;
1542 metrics_set |= RTV_RTTVAR;
1543 metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe;
1544 metrics_set |= RTV_SPIPE;
1545 metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe;
1546 metrics_set |= RTV_RPIPE;
1547 return (metrics_set);
1548 }
1549
1550 /*
1551 * Given two sets of metrics (src and dst), use the dst values if they are
1552 * set. If a dst value is not set but the src value is set, then we use
1553 * the src value.
1554 * dst is updated with the new values.
1555 * This is used to merge information from a dce_t and ire_metrics, where the
1556 * dce values takes precedence.
1557 */
1558 void
rts_merge_metrics(iulp_t * dst,const iulp_t * src)1559 rts_merge_metrics(iulp_t *dst, const iulp_t *src)
1560 {
1561 if (!src->iulp_set)
1562 return;
1563
1564 if (dst->iulp_ssthresh == 0)
1565 dst->iulp_ssthresh = src->iulp_ssthresh;
1566 if (dst->iulp_rtt == 0)
1567 dst->iulp_rtt = src->iulp_rtt;
1568 if (dst->iulp_rtt_sd == 0)
1569 dst->iulp_rtt_sd = src->iulp_rtt_sd;
1570 if (dst->iulp_spipe == 0)
1571 dst->iulp_spipe = src->iulp_spipe;
1572 if (dst->iulp_rpipe == 0)
1573 dst->iulp_rpipe = src->iulp_rpipe;
1574 if (dst->iulp_rtomax == 0)
1575 dst->iulp_rtomax = src->iulp_rtomax;
1576 if (dst->iulp_sack == 0)
1577 dst->iulp_sack = src->iulp_sack;
1578 if (dst->iulp_tstamp_ok == 0)
1579 dst->iulp_tstamp_ok = src->iulp_tstamp_ok;
1580 if (dst->iulp_wscale_ok == 0)
1581 dst->iulp_wscale_ok = src->iulp_wscale_ok;
1582 if (dst->iulp_ecn_ok == 0)
1583 dst->iulp_ecn_ok = src->iulp_ecn_ok;
1584 if (dst->iulp_pmtud_ok == 0)
1585 dst->iulp_pmtud_ok = src->iulp_pmtud_ok;
1586 if (dst->iulp_mtu == 0)
1587 dst->iulp_mtu = src->iulp_mtu;
1588 }
1589
1590
1591 /*
1592 * Takes a pointer to a routing message and extracts necessary info by looking
1593 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1594 * passed (all of which must be valid).
1595 *
1596 * The bitmask of sockaddrs actually found in the message is returned, or zero
1597 * is returned in the case of an error.
1598 */
1599 static int
rts_getaddrs(rt_msghdr_t * rtm,in6_addr_t * dst_addrp,in6_addr_t * gw_addrp,in6_addr_t * net_maskp,in6_addr_t * authorp,in6_addr_t * if_addrp,in6_addr_t * in_src_addrp,ushort_t * indexp,sa_family_t * afp,tsol_rtsecattr_t * rtsecattr,int * error)1600 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1601 in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1602 in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
1603 tsol_rtsecattr_t *rtsecattr, int *error)
1604 {
1605 struct sockaddr *sa;
1606 int i;
1607 int addr_bits;
1608 int length;
1609 int found_addrs = 0;
1610 caddr_t cp;
1611 size_t size;
1612 struct sockaddr_dl *sdl;
1613
1614 *dst_addrp = ipv6_all_zeros;
1615 *gw_addrp = ipv6_all_zeros;
1616 *net_maskp = ipv6_all_zeros;
1617 *authorp = ipv6_all_zeros;
1618 *if_addrp = ipv6_all_zeros;
1619 *in_src_addrp = ipv6_all_zeros;
1620 *indexp = 0;
1621 *afp = AF_UNSPEC;
1622 rtsecattr->rtsa_cnt = 0;
1623 *error = 0;
1624
1625 /*
1626 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1627 * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them.
1628 */
1629 cp = (caddr_t)&rtm[1];
1630 length = rtm->rtm_msglen;
1631 for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1632 /*
1633 * The address family we are working with starts out as
1634 * AF_UNSPEC, but is set to the one specified with the
1635 * destination address.
1636 *
1637 * If the "working" address family that has been set to
1638 * something other than AF_UNSPEC, then the address family of
1639 * subsequent sockaddrs must either be AF_UNSPEC (for
1640 * compatibility with older programs) or must be the same as our
1641 * "working" one.
1642 *
1643 * This code assumes that RTA_DST (1) comes first in the loop.
1644 */
1645 sa = (struct sockaddr *)cp;
1646 addr_bits = (rtm->rtm_addrs & (1 << i));
1647 if (addr_bits == 0)
1648 continue;
1649 switch (addr_bits) {
1650 case RTA_DST:
1651 size = rts_copyfromsockaddr(sa, dst_addrp);
1652 *afp = sa->sa_family;
1653 break;
1654 case RTA_GATEWAY:
1655 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1656 return (0);
1657 size = rts_copyfromsockaddr(sa, gw_addrp);
1658 break;
1659 case RTA_NETMASK:
1660 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1661 return (0);
1662 size = rts_copyfromsockaddr(sa, net_maskp);
1663 break;
1664 case RTA_IFP:
1665 if (sa->sa_family != AF_LINK &&
1666 sa->sa_family != AF_UNSPEC)
1667 return (0);
1668 sdl = (struct sockaddr_dl *)cp;
1669 *indexp = sdl->sdl_index;
1670 size = sizeof (struct sockaddr_dl);
1671 break;
1672 case RTA_SRC:
1673 /* Source address of the incoming packet */
1674 size = rts_copyfromsockaddr(sa, in_src_addrp);
1675 *afp = sa->sa_family;
1676 break;
1677 case RTA_IFA:
1678 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1679 return (0);
1680 size = rts_copyfromsockaddr(sa, if_addrp);
1681 break;
1682 case RTA_AUTHOR:
1683 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1684 return (0);
1685 size = rts_copyfromsockaddr(sa, authorp);
1686 break;
1687 default:
1688 return (0);
1689 }
1690 if (size == 0)
1691 return (0);
1692 cp += size;
1693 found_addrs |= addr_bits;
1694 }
1695
1696 /*
1697 * Parse the routing message and look for any security-
1698 * related attributes for the route. For each valid
1699 * attribute, allocate/obtain the corresponding kernel
1700 * route security attributes.
1701 */
1702 if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) {
1703 *error = tsol_rtsa_init(rtm, rtsecattr, cp);
1704 ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1705 }
1706
1707 return (found_addrs);
1708 }
1709
1710 /*
1711 * Fills the message with the given info.
1712 */
1713 static void
rts_fill_msg(int type,int rtm_addrs,ipaddr_t dst,ipaddr_t mask,ipaddr_t gateway,ipaddr_t src_addr,ipaddr_t brd_addr,ipaddr_t author,ipaddr_t ifaddr,const ill_t * ill,mblk_t * mp,const tsol_gc_t * gc)1714 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1715 ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1716 ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
1717 const tsol_gc_t *gc)
1718 {
1719 rt_msghdr_t *rtm;
1720 sin_t *sin;
1721 size_t data_size, header_size;
1722 uchar_t *cp;
1723 int i;
1724
1725 ASSERT(mp != NULL);
1726 /*
1727 * First find the type of the message
1728 * and its length.
1729 */
1730 header_size = rts_header_msg_size(type);
1731 /*
1732 * Now find the size of the data
1733 * that follows the message header.
1734 */
1735 data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0);
1736
1737 rtm = (rt_msghdr_t *)mp->b_rptr;
1738 mp->b_wptr = &mp->b_rptr[header_size];
1739 cp = mp->b_wptr;
1740 bzero(cp, data_size);
1741 for (i = 0; i < RTA_NUMBITS; i++) {
1742 sin = (sin_t *)cp;
1743 switch (rtm_addrs & (1 << i)) {
1744 case RTA_DST:
1745 sin->sin_addr.s_addr = dst;
1746 sin->sin_family = AF_INET;
1747 cp += sizeof (sin_t);
1748 break;
1749 case RTA_GATEWAY:
1750 sin->sin_addr.s_addr = gateway;
1751 sin->sin_family = AF_INET;
1752 cp += sizeof (sin_t);
1753 break;
1754 case RTA_NETMASK:
1755 sin->sin_addr.s_addr = mask;
1756 sin->sin_family = AF_INET;
1757 cp += sizeof (sin_t);
1758 break;
1759 case RTA_IFP:
1760 cp += ill_dls_info((struct sockaddr_dl *)cp, ill);
1761 break;
1762 case RTA_IFA:
1763 sin->sin_addr.s_addr = ifaddr;
1764 sin->sin_family = AF_INET;
1765 cp += sizeof (sin_t);
1766 break;
1767 case RTA_SRC:
1768 sin->sin_addr.s_addr = src_addr;
1769 sin->sin_family = AF_INET;
1770 cp += sizeof (sin_t);
1771 break;
1772 case RTA_AUTHOR:
1773 sin->sin_addr.s_addr = author;
1774 sin->sin_family = AF_INET;
1775 cp += sizeof (sin_t);
1776 break;
1777 case RTA_BRD:
1778 /*
1779 * RTA_BRD is used typically to specify a point-to-point
1780 * destination address.
1781 */
1782 sin->sin_addr.s_addr = brd_addr;
1783 sin->sin_family = AF_INET;
1784 cp += sizeof (sin_t);
1785 break;
1786 }
1787 }
1788
1789 if (gc != NULL) {
1790 rtm_ext_t *rtm_ext;
1791 struct rtsa_s *rp_dst;
1792 tsol_rtsecattr_t *rsap;
1793
1794 ASSERT(gc->gc_grp != NULL);
1795 ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1796
1797 rtm_ext = (rtm_ext_t *)cp;
1798 rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1799 rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1);
1800
1801 rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1802 rsap->rtsa_cnt = 1;
1803 rp_dst = rsap->rtsa_attr;
1804
1805 ASSERT(gc->gc_db != NULL);
1806 bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1807 cp = (uchar_t *)rp_dst;
1808 }
1809
1810 mp->b_wptr = cp;
1811 mp->b_cont = NULL;
1812 /*
1813 * set the fields that are common to
1814 * to different messages.
1815 */
1816 rtm->rtm_msglen = (short)(header_size + data_size);
1817 rtm->rtm_version = RTM_VERSION;
1818 rtm->rtm_type = (uchar_t)type;
1819 }
1820
1821 /*
1822 * Allocates and initializes a routing socket message.
1823 * Note that sacnt is either zero or one.
1824 */
1825 mblk_t *
rts_alloc_msg(int type,int rtm_addrs,sa_family_t af,uint_t sacnt)1826 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1827 {
1828 size_t length;
1829 mblk_t *mp;
1830
1831 length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1832 mp = allocb(length, BPRI_MED);
1833 if (mp == NULL)
1834 return (mp);
1835 bzero(mp->b_rptr, length);
1836 return (mp);
1837 }
1838
1839 /*
1840 * Returns the size of the routing
1841 * socket message header size.
1842 */
1843 size_t
rts_header_msg_size(int type)1844 rts_header_msg_size(int type)
1845 {
1846 switch (type) {
1847 case RTM_DELADDR:
1848 case RTM_NEWADDR:
1849 case RTM_CHGADDR:
1850 case RTM_FREEADDR:
1851 return (sizeof (ifa_msghdr_t));
1852 case RTM_IFINFO:
1853 return (sizeof (if_msghdr_t));
1854 default:
1855 return (sizeof (rt_msghdr_t));
1856 }
1857 }
1858
1859 /*
1860 * Returns the size of the message needed with the given rtm_addrs and family.
1861 *
1862 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1863 * of the same family (currently either AF_INET or AF_INET6).
1864 */
1865 size_t
rts_data_msg_size(int rtm_addrs,sa_family_t af,uint_t sacnt)1866 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1867 {
1868 int i;
1869 size_t length = 0;
1870
1871 for (i = 0; i < RTA_NUMBITS; i++) {
1872 switch (rtm_addrs & (1 << i)) {
1873 case RTA_IFP:
1874 length += sizeof (struct sockaddr_dl);
1875 break;
1876 case RTA_DST:
1877 case RTA_GATEWAY:
1878 case RTA_NETMASK:
1879 case RTA_SRC:
1880 case RTA_IFA:
1881 case RTA_AUTHOR:
1882 case RTA_BRD:
1883 ASSERT(af == AF_INET || af == AF_INET6);
1884 switch (af) {
1885 case AF_INET:
1886 length += sizeof (sin_t);
1887 break;
1888 case AF_INET6:
1889 length += sizeof (sin6_t);
1890 break;
1891 }
1892 break;
1893 }
1894 }
1895 if (sacnt > 0)
1896 length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1897
1898 return (length);
1899 }
1900
1901 /*
1902 * This routine is called to generate a message to the routing
1903 * socket indicating that a redirect has occured, a routing lookup
1904 * has failed, or that a protocol has detected timeouts to a particular
1905 * destination. This routine is called for message types RTM_LOSING,
1906 * RTM_REDIRECT, and RTM_MISS.
1907 */
1908 void
ip_rts_change(int type,ipaddr_t dst_addr,ipaddr_t gw_addr,ipaddr_t net_mask,ipaddr_t source,ipaddr_t author,int flags,int error,int rtm_addrs,ip_stack_t * ipst)1909 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1910 ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1911 ip_stack_t *ipst)
1912 {
1913 rt_msghdr_t *rtm;
1914 mblk_t *mp;
1915
1916 if (rtm_addrs == 0)
1917 return;
1918 mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1919 if (mp == NULL)
1920 return;
1921 rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1922 author, 0, NULL, mp, NULL);
1923 rtm = (rt_msghdr_t *)mp->b_rptr;
1924 rtm->rtm_flags = flags;
1925 rtm->rtm_errno = error;
1926 rtm->rtm_flags |= RTF_DONE;
1927 rtm->rtm_addrs = rtm_addrs;
1928 rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst);
1929 }
1930
1931 /*
1932 * This routine is called to generate a message to the routing
1933 * socket indicating that the status of a network interface has changed.
1934 * Message type generated RTM_IFINFO.
1935 */
1936 void
ip_rts_ifmsg(const ipif_t * ipif,uint_t flags)1937 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags)
1938 {
1939 ip_rts_xifmsg(ipif, 0, 0, flags);
1940 }
1941
1942 void
ip_rts_xifmsg(const ipif_t * ipif,uint64_t set,uint64_t clear,uint_t flags)1943 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags)
1944 {
1945 if_msghdr_t *ifm;
1946 mblk_t *mp;
1947 sa_family_t af;
1948 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
1949
1950 /*
1951 * This message should be generated only
1952 * when the physical device is changing
1953 * state.
1954 */
1955 if (ipif->ipif_id != 0)
1956 return;
1957 if (ipif->ipif_isv6) {
1958 af = AF_INET6;
1959 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1960 if (mp == NULL)
1961 return;
1962 rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1963 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1964 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1965 ipif->ipif_ill, mp, NULL);
1966 } else {
1967 af = AF_INET;
1968 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1969 if (mp == NULL)
1970 return;
1971 rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0,
1972 ipif->ipif_ill, mp, NULL);
1973 }
1974 ifm = (if_msghdr_t *)mp->b_rptr;
1975 ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1976 ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1977 ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear;
1978 rts_getifdata(&ifm->ifm_data, ipif);
1979 ifm->ifm_addrs = RTA_IFP;
1980
1981 if (flags & RTSQ_DEFAULT) {
1982 flags = RTSQ_ALL;
1983 /*
1984 * If this message is for an underlying interface, prevent
1985 * "normal" (IPMP-unaware) routing sockets from seeing it.
1986 */
1987 if (IS_UNDER_IPMP(ipif->ipif_ill))
1988 flags &= ~RTSQ_NORMAL;
1989 }
1990
1991 rts_queue_input(mp, NULL, af, flags, ipst);
1992 }
1993
1994 /*
1995 * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message;
1996 * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR)
1997 * generate the ifa_msghdr_t message.
1998 */
1999 static void
rts_new_rtsmsg(int cmd,int error,const ipif_t * ipif,uint_t flags)2000 rts_new_rtsmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
2001 {
2002 int rtm_addrs;
2003 mblk_t *mp;
2004 ifa_msghdr_t *ifam;
2005 rt_msghdr_t *rtm;
2006 sa_family_t af;
2007 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
2008
2009 /*
2010 * Do not report unspecified address if this is the RTM_CHGADDR or
2011 * RTM_FREEADDR message.
2012 */
2013 if (cmd == RTM_CHGADDR || cmd == RTM_FREEADDR) {
2014 if (!ipif->ipif_isv6) {
2015 if (ipif->ipif_lcl_addr == INADDR_ANY)
2016 return;
2017 } else if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) {
2018 return;
2019 }
2020 }
2021
2022 if (ipif->ipif_isv6)
2023 af = AF_INET6;
2024 else
2025 af = AF_INET;
2026
2027 if (cmd == RTM_ADD || cmd == RTM_DELETE)
2028 rtm_addrs = (RTA_DST | RTA_NETMASK);
2029 else
2030 rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
2031
2032 mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
2033 if (mp == NULL)
2034 return;
2035
2036 if (cmd != RTM_ADD && cmd != RTM_DELETE) {
2037 switch (af) {
2038 case AF_INET:
2039 rts_fill_msg(cmd, rtm_addrs, 0,
2040 ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
2041 ipif->ipif_pp_dst_addr, 0,
2042 ipif->ipif_lcl_addr, ipif->ipif_ill,
2043 mp, NULL);
2044 break;
2045 case AF_INET6:
2046 rts_fill_msg_v6(cmd, rtm_addrs,
2047 &ipv6_all_zeros, &ipif->ipif_v6net_mask,
2048 &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
2049 &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
2050 &ipif->ipif_v6lcl_addr, ipif->ipif_ill,
2051 mp, NULL);
2052 break;
2053 }
2054 ifam = (ifa_msghdr_t *)mp->b_rptr;
2055 ifam->ifam_index =
2056 ipif->ipif_ill->ill_phyint->phyint_ifindex;
2057 ifam->ifam_metric = ipif->ipif_ill->ill_metric;
2058 ifam->ifam_flags = ((cmd == RTM_NEWADDR) ? RTF_UP : 0);
2059 ifam->ifam_addrs = rtm_addrs;
2060 } else {
2061 switch (af) {
2062 case AF_INET:
2063 rts_fill_msg(cmd, rtm_addrs,
2064 ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
2065 0, 0, 0, 0, NULL, mp, NULL);
2066 break;
2067 case AF_INET6:
2068 rts_fill_msg_v6(cmd, rtm_addrs,
2069 &ipif->ipif_v6lcl_addr,
2070 &ipif->ipif_v6net_mask, &ipv6_all_zeros,
2071 &ipv6_all_zeros, &ipv6_all_zeros,
2072 &ipv6_all_zeros, &ipv6_all_zeros,
2073 NULL, mp, NULL);
2074 break;
2075 }
2076 rtm = (rt_msghdr_t *)mp->b_rptr;
2077 rtm->rtm_index =
2078 ipif->ipif_ill->ill_phyint->phyint_ifindex;
2079 rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
2080 rtm->rtm_errno = error;
2081 if (error == 0)
2082 rtm->rtm_flags |= RTF_DONE;
2083 rtm->rtm_addrs = rtm_addrs;
2084 }
2085 rts_queue_input(mp, NULL, af, flags, ipst);
2086 }
2087
2088 /*
2089 * This is called to generate messages to the routing socket
2090 * indicating a network interface has had addresses associated with it.
2091 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
2092 */
2093 void
ip_rts_newaddrmsg(int cmd,int error,const ipif_t * ipif,uint_t flags)2094 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
2095 {
2096 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
2097
2098 if (flags & RTSQ_DEFAULT) {
2099 flags = RTSQ_ALL;
2100 /*
2101 * If this message is for an underlying interface, prevent
2102 * "normal" (IPMP-unaware) routing sockets from seeing it.
2103 */
2104 if (IS_UNDER_IPMP(ipif->ipif_ill))
2105 flags &= ~RTSQ_NORMAL;
2106 }
2107
2108 /*
2109 * Let conn_ixa caching know that source address selection
2110 * changed
2111 */
2112 if (cmd == RTM_ADD || cmd == RTM_DELETE)
2113 ip_update_source_selection(ipst);
2114
2115 /*
2116 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
2117 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
2118 * otherwise simply send the request.
2119 */
2120 switch (cmd) {
2121 case RTM_ADD:
2122 rts_new_rtsmsg(RTM_NEWADDR, error, ipif, flags);
2123 rts_new_rtsmsg(RTM_ADD, error, ipif, flags);
2124 break;
2125 case RTM_DELETE:
2126 rts_new_rtsmsg(RTM_DELETE, error, ipif, flags);
2127 rts_new_rtsmsg(RTM_DELADDR, error, ipif, flags);
2128 break;
2129 default:
2130 rts_new_rtsmsg(cmd, error, ipif, flags);
2131 break;
2132 }
2133 }
2134
2135 /*
2136 * Based on the address family specified in a sockaddr, copy the address field
2137 * into an in6_addr_t.
2138 *
2139 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
2140 * compatibility with programs that leave the family cleared in the sockaddr.
2141 * Callers of rts_copyfromsockaddr should check the family themselves if they
2142 * wish to verify its value.
2143 *
2144 * In the case of AF_INET6, a check is made to ensure that address is not an
2145 * IPv4-mapped address.
2146 */
2147 size_t
rts_copyfromsockaddr(struct sockaddr * sa,in6_addr_t * addrp)2148 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
2149 {
2150 switch (sa->sa_family) {
2151 case AF_INET:
2152 case AF_UNSPEC:
2153 IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
2154 return (sizeof (sin_t));
2155 case AF_INET6:
2156 *addrp = ((sin6_t *)sa)->sin6_addr;
2157 if (IN6_IS_ADDR_V4MAPPED(addrp))
2158 return (0);
2159 return (sizeof (sin6_t));
2160 default:
2161 return (0);
2162 }
2163 }
2164