1 /*
2 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /*
6 * Copyright (c) 1988, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95
38 */
39
40 /*
41 * This file contains routines that processes routing socket requests.
42 */
43
44 #include <sys/types.h>
45 #include <sys/stream.h>
46 #include <sys/stropts.h>
47 #include <sys/ddi.h>
48 #include <sys/strsubr.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/policy.h>
52 #include <sys/zone.h>
53
54 #include <sys/systm.h>
55 #include <sys/param.h>
56 #include <sys/socket.h>
57 #include <sys/strsun.h>
58 #include <net/if.h>
59 #include <net/route.h>
60 #include <netinet/in.h>
61 #include <net/if_dl.h>
62 #include <netinet/ip6.h>
63
64 #include <inet/common.h>
65 #include <inet/ip.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_if.h>
68 #include <inet/ip_ire.h>
69 #include <inet/ip_ftable.h>
70 #include <inet/ip_rts.h>
71
72 #include <inet/ipclassifier.h>
73
74 #include <sys/tsol/tndb.h>
75 #include <sys/tsol/tnet.h>
76
77 #define RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
78 (rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
79
80 static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
81 static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
82 ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
83 ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
84 const tsol_gc_t *);
85 static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
86 in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
87 in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
88 sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
89 static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
90 static int rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics);
91 static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire,
92 const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af);
93 static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
94 static ire_t *ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask,
95 ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid,
96 const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire,
97 ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp);
98 static ire_t *ire_lookup_v6(const in6_addr_t *dst_addr_v6,
99 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
100 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
101 ip_stack_t *ipst, ire_t **pifire,
102 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp);
103
104 /*
105 * Send `mp' to all eligible routing queues. A queue is ineligible if:
106 *
107 * 1. SO_USELOOPBACK is off and it is not the originating queue.
108 * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'.
109 * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'.
110 * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC.
111 */
112 void
rts_queue_input(mblk_t * mp,conn_t * o_connp,sa_family_t af,uint_t flags,ip_stack_t * ipst)113 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags,
114 ip_stack_t *ipst)
115 {
116 mblk_t *mp1;
117 conn_t *connp, *next_connp;
118
119 /*
120 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be
121 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point.
122 */
123 ASSERT(!(flags & RTSQ_DEFAULT));
124
125 mutex_enter(&ipst->ips_rts_clients->connf_lock);
126 connp = ipst->ips_rts_clients->connf_head;
127
128 for (; connp != NULL; connp = next_connp) {
129 next_connp = connp->conn_next;
130 /*
131 * If there was a family specified when this routing socket was
132 * created and it doesn't match the family of the message to
133 * copy, then continue.
134 */
135 if ((connp->conn_proto != AF_UNSPEC) &&
136 (connp->conn_proto != af))
137 continue;
138
139 /*
140 * Queue the message only if the conn_t and flags match.
141 */
142 if (connp->conn_rtaware & RTAW_UNDER_IPMP) {
143 if (!(flags & RTSQ_UNDER_IPMP))
144 continue;
145 } else {
146 if (!(flags & RTSQ_NORMAL))
147 continue;
148 }
149 /*
150 * For the originating queue, we only copy the message upstream
151 * if loopback is set. For others reading on the routing
152 * socket, we check if there is room upstream for a copy of the
153 * message.
154 */
155 if ((o_connp == connp) && connp->conn_useloopback == 0) {
156 connp = connp->conn_next;
157 continue;
158 }
159 CONN_INC_REF(connp);
160 mutex_exit(&ipst->ips_rts_clients->connf_lock);
161 /* Pass to rts_input */
162 if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld :
163 canputnext(connp->conn_rq)) {
164 mp1 = dupmsg(mp);
165 if (mp1 == NULL)
166 mp1 = copymsg(mp);
167 /* Note that we pass a NULL ira to rts_input */
168 if (mp1 != NULL)
169 (connp->conn_recv)(connp, mp1, NULL, NULL);
170 }
171
172 mutex_enter(&ipst->ips_rts_clients->connf_lock);
173 /* reload next_connp since conn_next may have changed */
174 next_connp = connp->conn_next;
175 CONN_DEC_REF(connp);
176 }
177 mutex_exit(&ipst->ips_rts_clients->connf_lock);
178 freemsg(mp);
179 }
180
181 /*
182 * Takes an ire and sends an ack to all the routing sockets. This
183 * routine is used
184 * - when a route is created/deleted through the ioctl interface.
185 * - when a stale redirect is deleted
186 */
187 void
ip_rts_rtmsg(int type,ire_t * ire,int error,ip_stack_t * ipst)188 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
189 {
190 mblk_t *mp;
191 rt_msghdr_t *rtm;
192 int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
193 sa_family_t af = { 0 };
194 in6_addr_t gw_addr_v6;
195
196 if (ire == NULL)
197 return;
198 ASSERT(ire->ire_ipversion == IPV4_VERSION ||
199 ire->ire_ipversion == IPV6_VERSION);
200
201 ASSERT(!(ire->ire_type & IRE_IF_CLONE));
202 mp = NULL;
203
204 if (ire->ire_flags & RTF_SETSRC)
205 rtm_addrs |= RTA_SRC;
206
207 switch (ire->ire_ipversion) {
208 case IPV4_VERSION:
209 af = AF_INET;
210 mp = rts_alloc_msg(type, rtm_addrs, af, 0);
211 if (mp == NULL)
212 return;
213 rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
214 ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL,
215 mp, NULL);
216 break;
217 case IPV6_VERSION:
218 af = AF_INET6;
219 mp = rts_alloc_msg(type, rtm_addrs, af, 0);
220 if (mp == NULL)
221 return;
222 mutex_enter(&ire->ire_lock);
223 gw_addr_v6 = ire->ire_gateway_addr_v6;
224 mutex_exit(&ire->ire_lock);
225 rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
226 &ire->ire_mask_v6, &gw_addr_v6,
227 &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
228 &ipv6_all_zeros, NULL, mp, NULL);
229 break;
230 }
231 rtm = (rt_msghdr_t *)mp->b_rptr;
232 mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
233 rtm->rtm_addrs = rtm_addrs;
234 rtm->rtm_flags = ire->ire_flags;
235 if (error != 0)
236 rtm->rtm_errno = error;
237 else
238 rtm->rtm_flags |= RTF_DONE;
239 rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst);
240 }
241
242 /*
243 * This is a call from the RTS module
244 * indicating that this is a Routing Socket
245 * Stream. Insert this conn_t in routing
246 * socket client list.
247 */
248 void
ip_rts_register(conn_t * connp)249 ip_rts_register(conn_t *connp)
250 {
251 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
252
253 connp->conn_useloopback = 1;
254 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
255 }
256
257 /*
258 * This is a call from the RTS module indicating that it is closing.
259 */
260 void
ip_rts_unregister(conn_t * connp)261 ip_rts_unregister(conn_t *connp)
262 {
263 ipcl_hash_remove(connp);
264 }
265
266 /*
267 * Processes requests received on a routing socket. It extracts all the
268 * arguments and calls the appropriate function to process the request.
269 *
270 * RTA_SRC bit flag requests are sent by 'route -setsrc'.
271 *
272 * In general, this function does not consume the message supplied but rather
273 * sends the message upstream with an appropriate UNIX errno.
274 */
275 int
ip_rts_request_common(mblk_t * mp,conn_t * connp,cred_t * ioc_cr)276 ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
277 {
278 rt_msghdr_t *rtm = NULL;
279 in6_addr_t dst_addr_v6;
280 in6_addr_t src_addr_v6;
281 in6_addr_t gw_addr_v6;
282 in6_addr_t net_mask_v6;
283 in6_addr_t author_v6;
284 in6_addr_t if_addr_v6;
285 mblk_t *mp1;
286 ire_t *ire = NULL;
287 ire_t *ifire = NULL;
288 ipaddr_t v4setsrc;
289 in6_addr_t v6setsrc = ipv6_all_zeros;
290 tsol_ire_gw_secattr_t *gwattr = NULL;
291 int error = 0;
292 int match_flags = MATCH_IRE_DSTONLY;
293 int match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
294 int found_addrs;
295 sa_family_t af;
296 ipaddr_t dst_addr;
297 ipaddr_t gw_addr;
298 ipaddr_t src_addr;
299 ipaddr_t net_mask;
300 ushort_t index;
301 boolean_t gcgrp_xtraref = B_FALSE;
302 tsol_gcgrp_addr_t ga;
303 tsol_rtsecattr_t rtsecattr;
304 struct rtsa_s *rtsap = NULL;
305 tsol_gcgrp_t *gcgrp = NULL;
306 tsol_gc_t *gc = NULL;
307 ts_label_t *tsl = NULL;
308 zoneid_t zoneid;
309 ip_stack_t *ipst;
310 ill_t *ill = NULL;
311
312 zoneid = connp->conn_zoneid;
313 ipst = connp->conn_netstack->netstack_ip;
314 net_mask = 0;
315 src_addr = 0;
316 dst_addr = 0;
317 gw_addr = 0;
318
319 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
320 freemsg(mp);
321 error = EINVAL;
322 goto done;
323 }
324 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
325 freemsg(mp);
326 error = EINVAL;
327 goto done;
328 }
329
330 /*
331 * Check the routing message for basic consistency including the
332 * version number and that the number of octets written is the same
333 * as specified by the rtm_msglen field.
334 *
335 * At this point, an error can be delivered back via rtm_errno.
336 */
337 rtm = (rt_msghdr_t *)mp->b_rptr;
338 if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
339 error = EINVAL;
340 goto done;
341 }
342 if (rtm->rtm_version != RTM_VERSION) {
343 error = EPROTONOSUPPORT;
344 goto done;
345 }
346
347 /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
348 if (rtm->rtm_type != RTM_GET &&
349 rtm->rtm_type != RTM_RESOLVE &&
350 (ioc_cr == NULL ||
351 secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
352 error = EPERM;
353 goto done;
354 }
355
356 found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
357 &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
358 &error);
359
360 if (error != 0)
361 goto done;
362
363 if ((found_addrs & RTA_DST) == 0) {
364 error = EINVAL;
365 goto done;
366 }
367
368 /*
369 * Based on the address family of the destination address, determine
370 * the destination, gateway and netmask and return the appropriate error
371 * if an unknown address family was specified (following the errno
372 * values that 4.4BSD-Lite2 returns.)
373 */
374 switch (af) {
375 case AF_INET:
376 IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
377 IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
378 IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
379 if (((found_addrs & RTA_NETMASK) == 0) ||
380 (rtm->rtm_flags & RTF_HOST))
381 net_mask = IP_HOST_MASK;
382 else
383 IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
384 break;
385 case AF_INET6:
386 if (((found_addrs & RTA_NETMASK) == 0) ||
387 (rtm->rtm_flags & RTF_HOST))
388 net_mask_v6 = ipv6_all_ones;
389 break;
390 default:
391 /*
392 * These errno values are meant to be compatible with
393 * 4.4BSD-Lite2 for the given message types.
394 */
395 switch (rtm->rtm_type) {
396 case RTM_ADD:
397 case RTM_DELETE:
398 error = ESRCH;
399 goto done;
400 case RTM_GET:
401 case RTM_CHANGE:
402 error = EAFNOSUPPORT;
403 goto done;
404 default:
405 error = EOPNOTSUPP;
406 goto done;
407 }
408 }
409
410 /*
411 * At this point, the address family must be something known.
412 */
413 ASSERT(af == AF_INET || af == AF_INET6);
414
415 /* Handle RTA_IFP */
416 if (index != 0) {
417 ipif_t *ipif;
418 lookup:
419 ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst);
420 if (ill == NULL) {
421 error = EINVAL;
422 goto done;
423 }
424
425 /*
426 * Since all interfaces in an IPMP group must be equivalent,
427 * we prevent changes to a specific underlying interface's
428 * routing configuration. However, for backward compatibility,
429 * we intepret a request to add a route on an underlying
430 * interface as a request to add a route on its IPMP interface.
431 */
432 if (IS_UNDER_IPMP(ill)) {
433 switch (rtm->rtm_type) {
434 case RTM_CHANGE:
435 case RTM_DELETE:
436 error = EINVAL;
437 goto done;
438 case RTM_ADD:
439 index = ipmp_ill_get_ipmp_ifindex(ill);
440 ill_refrele(ill);
441 if (index == 0) {
442 ill = NULL; /* already refrele'd */
443 error = EINVAL;
444 goto done;
445 }
446 goto lookup;
447 }
448 }
449
450 match_flags |= MATCH_IRE_ILL;
451 /*
452 * This provides the same zoneid as in Solaris 10
453 * that -ifp picks the zoneid from the first ipif on the ill.
454 * But it might not be useful since the first ipif will always
455 * have the same zoneid as the ill.
456 */
457 ipif = ipif_get_next_ipif(NULL, ill);
458 if (ipif != NULL) {
459 zoneid = ipif->ipif_zoneid;
460 ipif_refrele(ipif);
461 }
462 }
463
464 /*
465 * If a netmask was supplied in the message, then subsequent route
466 * lookups will attempt to match on the netmask as well.
467 */
468 if ((found_addrs & RTA_NETMASK) != 0)
469 match_flags |= MATCH_IRE_MASK;
470
471 /*
472 * We only process any passed-in route security attributes for
473 * either RTM_ADD or RTM_CHANGE message; We overload them
474 * to do an RTM_GET as a different label; ignore otherwise.
475 */
476 if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
477 rtm->rtm_type == RTM_GET) {
478 ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
479 if (rtsecattr.rtsa_cnt > 0)
480 rtsap = &rtsecattr.rtsa_attr[0];
481 }
482
483 switch (rtm->rtm_type) {
484 case RTM_ADD:
485 /* if we are adding a route, gateway is a must */
486 if ((found_addrs & RTA_GATEWAY) == 0) {
487 error = EINVAL;
488 goto done;
489 }
490
491 /* Multirouting does not support net routes. */
492 if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
493 RTF_MULTIRT) {
494 error = EADDRNOTAVAIL;
495 goto done;
496 }
497
498 /*
499 * Multirouting and user-specified source addresses
500 * do not support interface based routing.
501 * Assigning a source address to an interface based
502 * route is achievable by plumbing a new ipif and
503 * setting up the interface route via this ipif,
504 * though.
505 */
506 if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
507 if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
508 error = EADDRNOTAVAIL;
509 goto done;
510 }
511 }
512
513 switch (af) {
514 case AF_INET:
515 if (src_addr != INADDR_ANY) {
516 uint_t type;
517
518 /*
519 * The RTF_SETSRC flag is present, check that
520 * the supplied src address is not the loopback
521 * address. This would produce martian packets.
522 */
523 if (src_addr == htonl(INADDR_LOOPBACK)) {
524 error = EINVAL;
525 goto done;
526 }
527 /*
528 * Also check that the supplied address is a
529 * valid, local one. Only allow IFF_UP ones
530 */
531 type = ip_type_v4(src_addr, ipst);
532 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
533 error = EADDRNOTAVAIL;
534 goto done;
535 }
536 } else {
537 /*
538 * The RTF_SETSRC modifier must be associated
539 * to a non-null source address.
540 */
541 if (rtm->rtm_flags & RTF_SETSRC) {
542 error = EINVAL;
543 goto done;
544 }
545 }
546
547 error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
548 rtm->rtm_flags, ill, &ire, B_FALSE,
549 rtsap, ipst, zoneid);
550 if (ill != NULL)
551 ASSERT(!MUTEX_HELD(&ill->ill_lock));
552 break;
553 case AF_INET6:
554 if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
555 uint_t type;
556
557 /*
558 * The RTF_SETSRC flag is present, check that
559 * the supplied src address is not the loopback
560 * address. This would produce martian packets.
561 */
562 if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
563 error = EINVAL;
564 goto done;
565 }
566 /*
567 * Also check that the supplied address is a
568 * valid, local one. Only allow UP ones.
569 */
570 type = ip_type_v6(&src_addr_v6, ipst);
571 if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
572 error = EADDRNOTAVAIL;
573 goto done;
574 }
575
576 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
577 &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
578 ill, &ire, rtsap, ipst, zoneid);
579 break;
580 }
581 /*
582 * The RTF_SETSRC modifier must be associated
583 * to a non-null source address.
584 */
585 if (rtm->rtm_flags & RTF_SETSRC) {
586 error = EINVAL;
587 goto done;
588 }
589 error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
590 &gw_addr_v6, NULL, rtm->rtm_flags,
591 ill, &ire, rtsap, ipst, zoneid);
592 if (ill != NULL)
593 ASSERT(!MUTEX_HELD(&ill->ill_lock));
594 break;
595 }
596 if (error != 0)
597 goto done;
598 ASSERT(ire != NULL);
599 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
600 break;
601 case RTM_DELETE:
602 /* if we are deleting a route, gateway is a must */
603 if ((found_addrs & RTA_GATEWAY) == 0) {
604 error = EINVAL;
605 goto done;
606 }
607 /*
608 * The RTF_SETSRC modifier does not make sense
609 * when deleting a route.
610 */
611 if (rtm->rtm_flags & RTF_SETSRC) {
612 error = EINVAL;
613 goto done;
614 }
615
616 switch (af) {
617 case AF_INET:
618 error = ip_rt_delete(dst_addr, net_mask, gw_addr,
619 found_addrs, rtm->rtm_flags, ill, B_FALSE,
620 ipst, zoneid);
621 break;
622 case AF_INET6:
623 error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
624 &gw_addr_v6, found_addrs, rtm->rtm_flags, ill,
625 ipst, zoneid);
626 break;
627 }
628 break;
629 case RTM_GET:
630 case RTM_CHANGE:
631 /*
632 * In the case of RTM_GET, the forwarding table should be
633 * searched recursively. Also, if a gateway was
634 * specified then the gateway address must also be matched.
635 *
636 * In the case of RTM_CHANGE, the gateway address (if supplied)
637 * is the new gateway address so matching on the gateway address
638 * is not done. This can lead to ambiguity when looking up the
639 * route to change as usually only the destination (and netmask,
640 * if supplied) is used for the lookup. However if a RTA_IFP
641 * sockaddr is also supplied, it can disambiguate which route to
642 * change provided the ambigous routes are tied to distinct
643 * ill's (or interface indices). If the routes are not tied to
644 * any particular interfaces (for example, with traditional
645 * gateway routes), then a RTA_IFP sockaddr will be of no use as
646 * it won't match any such routes.
647 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
648 * except when RTM_CHANGE is combined to RTF_SETSRC.
649 */
650 if (((found_addrs & RTA_SRC) != 0) &&
651 ((rtm->rtm_type == RTM_GET) ||
652 !(rtm->rtm_flags & RTF_SETSRC))) {
653 error = EOPNOTSUPP;
654 goto done;
655 }
656
657 if (rtm->rtm_type == RTM_GET) {
658 match_flags |= MATCH_IRE_SECATTR;
659 match_flags_local |= MATCH_IRE_SECATTR;
660 if ((found_addrs & RTA_GATEWAY) != 0)
661 match_flags |= MATCH_IRE_GW;
662 if (ioc_cr)
663 tsl = crgetlabel(ioc_cr);
664 if (rtsap != NULL) {
665 if (rtsa_validate(rtsap) != 0) {
666 error = EINVAL;
667 goto done;
668 }
669 if (tsl != NULL &&
670 crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
671 (tsl->tsl_doi != rtsap->rtsa_doi ||
672 !bldominates(&tsl->tsl_label,
673 &rtsap->rtsa_slrange.lower_bound))) {
674 error = EPERM;
675 goto done;
676 }
677 tsl = labelalloc(
678 &rtsap->rtsa_slrange.lower_bound,
679 rtsap->rtsa_doi, KM_NOSLEEP);
680 }
681 }
682 if (rtm->rtm_type == RTM_CHANGE) {
683 if ((found_addrs & RTA_GATEWAY) &&
684 (rtm->rtm_flags & RTF_SETSRC)) {
685 /*
686 * Do not want to change the gateway,
687 * but rather the source address.
688 */
689 match_flags |= MATCH_IRE_GW;
690 }
691 }
692
693 /*
694 * If the netmask is all ones (either as supplied or as derived
695 * above), then first check for an IRE_LOOPBACK or
696 * IRE_LOCAL entry.
697 *
698 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
699 * entry, then look for any other type of IRE.
700 */
701 switch (af) {
702 case AF_INET:
703 if (net_mask == IP_HOST_MASK) {
704 ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr,
705 IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
706 tsl, match_flags_local, 0, ipst, NULL);
707 }
708 if (ire == NULL) {
709 ire = ire_lookup_v4(dst_addr, net_mask,
710 gw_addr, ill, zoneid, tsl, match_flags,
711 ipst, &ifire, &v4setsrc, &gwattr);
712 IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc);
713 }
714 break;
715 case AF_INET6:
716 if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
717 ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL,
718 &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
719 zoneid, tsl, match_flags_local, 0, ipst,
720 NULL);
721 }
722 if (ire == NULL) {
723 ire = ire_lookup_v6(&dst_addr_v6,
724 &net_mask_v6, &gw_addr_v6, ill, zoneid,
725 tsl, match_flags, ipst, &ifire, &v6setsrc,
726 &gwattr);
727 }
728 break;
729 }
730 if (tsl != NULL && tsl != crgetlabel(ioc_cr))
731 label_rele(tsl);
732
733 if (ire == NULL) {
734 error = ESRCH;
735 goto done;
736 }
737 /*
738 * Want to return failure if we get an IRE_NOROUTE from
739 * ire_route_recursive
740 */
741 if (ire->ire_type & IRE_NOROUTE) {
742 ire_refrele(ire);
743 ire = NULL;
744 error = ESRCH;
745 goto done;
746 }
747
748 /* we know the IRE before we come here */
749 switch (rtm->rtm_type) {
750 case RTM_GET:
751 mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af);
752 if (mp1 == NULL) {
753 error = ENOBUFS;
754 goto done;
755 }
756 freemsg(mp);
757 mp = mp1;
758 rtm = (rt_msghdr_t *)mp->b_rptr;
759 break;
760 case RTM_CHANGE:
761 /*
762 * Do not allow to the multirouting state of a route
763 * to be changed. This aims to prevent undesirable
764 * stages where both multirt and non-multirt routes
765 * for the same destination are declared.
766 */
767 if ((ire->ire_flags & RTF_MULTIRT) !=
768 (rtm->rtm_flags & RTF_MULTIRT)) {
769 error = EINVAL;
770 goto done;
771 }
772 /*
773 * Note that we do not need to do
774 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
775 * in metrics or gateway will not affect existing
776 * routes since it does not create a more specific
777 * route.
778 */
779 switch (af) {
780 case AF_INET:
781 if ((found_addrs & RTA_GATEWAY) != 0 &&
782 (ire->ire_gateway_addr != gw_addr)) {
783 ire->ire_gateway_addr = gw_addr;
784 }
785
786 if (rtsap != NULL) {
787 ga.ga_af = AF_INET;
788 IN6_IPADDR_TO_V4MAPPED(
789 ire->ire_gateway_addr, &ga.ga_addr);
790
791 gcgrp = gcgrp_lookup(&ga, B_TRUE);
792 if (gcgrp == NULL) {
793 error = ENOMEM;
794 goto done;
795 }
796 }
797
798 if ((found_addrs & RTA_SRC) != 0 &&
799 (rtm->rtm_flags & RTF_SETSRC) != 0 &&
800 (ire->ire_setsrc_addr != src_addr)) {
801 if (src_addr != INADDR_ANY) {
802 uint_t type;
803
804 /*
805 * The RTF_SETSRC flag is
806 * present, check that the
807 * supplied src address is not
808 * the loopback address. This
809 * would produce martian
810 * packets.
811 */
812 if (src_addr ==
813 htonl(INADDR_LOOPBACK)) {
814 error = EINVAL;
815 goto done;
816 }
817 /*
818 * Also check that the
819 * supplied addr is a valid
820 * local address.
821 */
822 type = ip_type_v4(src_addr,
823 ipst);
824 if (!(type &
825 (IRE_LOCAL|IRE_LOOPBACK))) {
826 error = EADDRNOTAVAIL;
827 goto done;
828 }
829 ire->ire_flags |= RTF_SETSRC;
830 ire->ire_setsrc_addr =
831 src_addr;
832 } else {
833 ire->ire_flags &= ~RTF_SETSRC;
834 ire->ire_setsrc_addr =
835 INADDR_ANY;
836 }
837 /*
838 * Let conn_ixa caching know that
839 * source address selection changed
840 */
841 ip_update_source_selection(ipst);
842 }
843 ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE);
844 break;
845 case AF_INET6:
846 mutex_enter(&ire->ire_lock);
847 if ((found_addrs & RTA_GATEWAY) != 0 &&
848 !IN6_ARE_ADDR_EQUAL(
849 &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
850 ire->ire_gateway_addr_v6 = gw_addr_v6;
851 }
852 mutex_exit(&ire->ire_lock);
853
854 if (rtsap != NULL) {
855 ga.ga_af = AF_INET6;
856 mutex_enter(&ire->ire_lock);
857 ga.ga_addr = ire->ire_gateway_addr_v6;
858 mutex_exit(&ire->ire_lock);
859
860 gcgrp = gcgrp_lookup(&ga, B_TRUE);
861 if (gcgrp == NULL) {
862 error = ENOMEM;
863 goto done;
864 }
865 }
866
867 if ((found_addrs & RTA_SRC) != 0 &&
868 (rtm->rtm_flags & RTF_SETSRC) != 0 &&
869 !IN6_ARE_ADDR_EQUAL(
870 &ire->ire_setsrc_addr_v6, &src_addr_v6)) {
871 if (!IN6_IS_ADDR_UNSPECIFIED(
872 &src_addr_v6)) {
873 uint_t type;
874
875 /*
876 * The RTF_SETSRC flag is
877 * present, check that the
878 * supplied src address is not
879 * the loopback address. This
880 * would produce martian
881 * packets.
882 */
883 if (IN6_IS_ADDR_LOOPBACK(
884 &src_addr_v6)) {
885 error = EINVAL;
886 goto done;
887 }
888 /*
889 * Also check that the
890 * supplied addr is a valid
891 * local address.
892 */
893 type = ip_type_v6(&src_addr_v6,
894 ipst);
895 if (!(type &
896 (IRE_LOCAL|IRE_LOOPBACK))) {
897 error = EADDRNOTAVAIL;
898 goto done;
899 }
900 mutex_enter(&ire->ire_lock);
901 ire->ire_flags |= RTF_SETSRC;
902 ire->ire_setsrc_addr_v6 =
903 src_addr_v6;
904 mutex_exit(&ire->ire_lock);
905 } else {
906 mutex_enter(&ire->ire_lock);
907 ire->ire_flags &= ~RTF_SETSRC;
908 ire->ire_setsrc_addr_v6 =
909 ipv6_all_zeros;
910 mutex_exit(&ire->ire_lock);
911 }
912 /*
913 * Let conn_ixa caching know that
914 * source address selection changed
915 */
916 ip_update_source_selection(ipst);
917 }
918 ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE);
919 break;
920 }
921
922 if (rtsap != NULL) {
923 ASSERT(gcgrp != NULL);
924
925 /*
926 * Create and add the security attribute to
927 * prefix IRE; it will add a reference to the
928 * group upon allocating a new entry. If it
929 * finds an already-existing entry for the
930 * security attribute, it simply returns it
931 * and no new group reference is made.
932 */
933 gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
934 if (gc == NULL ||
935 (error = tsol_ire_init_gwattr(ire,
936 ire->ire_ipversion, gc)) != 0) {
937 if (gc != NULL) {
938 GC_REFRELE(gc);
939 } else {
940 /* gc_create failed */
941 error = ENOMEM;
942 }
943 goto done;
944 }
945 }
946 rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
947 break;
948 }
949 break;
950 default:
951 error = EOPNOTSUPP;
952 break;
953 }
954 done:
955 if (ire != NULL)
956 ire_refrele(ire);
957 if (ifire != NULL)
958 ire_refrele(ifire);
959 if (ill != NULL)
960 ill_refrele(ill);
961
962 if (gcgrp_xtraref)
963 GCGRP_REFRELE(gcgrp);
964
965 if (rtm != NULL) {
966 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
967 if (error != 0) {
968 rtm->rtm_errno = error;
969 /* Send error ACK */
970 ip1dbg(("ip_rts_request: error %d\n", error));
971 } else {
972 rtm->rtm_flags |= RTF_DONE;
973 /* OK ACK already set up by caller except this */
974 ip2dbg(("ip_rts_request: OK ACK\n"));
975 }
976 rts_queue_input(mp, connp, af, RTSQ_ALL, ipst);
977 }
978 return (error);
979 }
980
981 /*
982 * Helper function that can do recursive lookups including when
983 * MATCH_IRE_GW and/or MATCH_IRE_MASK is set.
984 */
985 static ire_t *
ire_lookup_v4(ipaddr_t dst_addr,ipaddr_t net_mask,ipaddr_t gw_addr,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,int match_flags,ip_stack_t * ipst,ire_t ** pifire,ipaddr_t * v4setsrcp,tsol_ire_gw_secattr_t ** gwattrp)986 ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr,
987 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl,
988 int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp,
989 tsol_ire_gw_secattr_t **gwattrp)
990 {
991 ire_t *ire;
992 ire_t *ifire = NULL;
993 uint_t ire_type;
994
995 *pifire = NULL;
996 *v4setsrcp = INADDR_ANY;
997 *gwattrp = NULL;
998
999 /* Skip IRE_IF_CLONE */
1000 match_flags |= MATCH_IRE_TYPE;
1001 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
1002
1003 /*
1004 * ire_route_recursive can't match gateway or mask thus if they are
1005 * set we have to do two steps of lookups
1006 */
1007 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
1008 ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr,
1009 ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL);
1010
1011 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
1012 return (ire);
1013
1014 if (ire->ire_type & IRE_ONLINK)
1015 return (ire);
1016
1017 if (ire->ire_flags & RTF_SETSRC) {
1018 ASSERT(ire->ire_setsrc_addr != INADDR_ANY);
1019 *v4setsrcp = ire->ire_setsrc_addr;
1020 v4setsrcp = NULL;
1021 }
1022
1023 /* The first ire_gw_secattr is passed back */
1024 if (ire->ire_gw_secattr != NULL) {
1025 *gwattrp = ire->ire_gw_secattr;
1026 gwattrp = NULL;
1027 }
1028
1029 /* Look for an interface ire recursively based on the gateway */
1030 dst_addr = ire->ire_gateway_addr;
1031 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
1032 /*
1033 * Don't allow anything unusual past the first iteration.
1034 * After the first lookup, we should no longer look for
1035 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT
1036 * routes.
1037 *
1038 * In addition, after we have found a direct IRE_OFFLINK,
1039 * we should only look for interface or clone routes.
1040 */
1041 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */
1042
1043 if ((ire->ire_type & IRE_OFFLINK) &&
1044 !(ire->ire_flags & RTF_INDIRECT)) {
1045 ire_type = IRE_IF_ALL;
1046 } else {
1047 /*
1048 * no more local, loopback, broadcast routes
1049 */
1050 if (!(match_flags & MATCH_IRE_TYPE))
1051 ire_type = (IRE_OFFLINK|IRE_ONLINK);
1052 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST);
1053 }
1054 match_flags |= MATCH_IRE_TYPE;
1055
1056 ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
1057 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp,
1058 gwattrp, NULL);
1059 } else {
1060 ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
1061 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v4setsrcp,
1062 gwattrp, NULL);
1063 }
1064 *pifire = ifire;
1065 return (ire);
1066 }
1067
1068 static ire_t *
ire_lookup_v6(const in6_addr_t * dst_addr_v6,const in6_addr_t * net_mask_v6,const in6_addr_t * gw_addr_v6,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,int match_flags,ip_stack_t * ipst,ire_t ** pifire,in6_addr_t * v6setsrcp,tsol_ire_gw_secattr_t ** gwattrp)1069 ire_lookup_v6(const in6_addr_t *dst_addr_v6,
1070 const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
1071 const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
1072 ip_stack_t *ipst, ire_t **pifire,
1073 in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp)
1074 {
1075 ire_t *ire;
1076 ire_t *ifire = NULL;
1077 uint_t ire_type;
1078
1079 *pifire = NULL;
1080 *v6setsrcp = ipv6_all_zeros;
1081 *gwattrp = NULL;
1082
1083 /* Skip IRE_IF_CLONE */
1084 match_flags |= MATCH_IRE_TYPE;
1085 ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
1086
1087 /*
1088 * ire_route_recursive can't match gateway or mask thus if they are
1089 * set we have to do two steps of lookups
1090 */
1091 if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
1092 in6_addr_t dst;
1093
1094 ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6,
1095 gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0,
1096 ipst, NULL);
1097
1098 if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
1099 return (ire);
1100
1101 if (ire->ire_type & IRE_ONLINK)
1102 return (ire);
1103
1104 if (ire->ire_flags & RTF_SETSRC) {
1105 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
1106 &ire->ire_setsrc_addr_v6));
1107 *v6setsrcp = ire->ire_setsrc_addr_v6;
1108 v6setsrcp = NULL;
1109 }
1110
1111 /* The first ire_gw_secattr is passed back */
1112 if (ire->ire_gw_secattr != NULL) {
1113 *gwattrp = ire->ire_gw_secattr;
1114 gwattrp = NULL;
1115 }
1116
1117 mutex_enter(&ire->ire_lock);
1118 dst = ire->ire_gateway_addr_v6;
1119 mutex_exit(&ire->ire_lock);
1120 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
1121 /*
1122 * Don't allow anything unusual past the first iteration.
1123 * After the first lookup, we should no longer look for
1124 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT
1125 * routes.
1126 *
1127 * In addition, after we have found a direct IRE_OFFLINK,
1128 * we should only look for interface or clone routes.
1129 */
1130 match_flags |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */
1131
1132 if ((ire->ire_type & IRE_OFFLINK) &&
1133 !(ire->ire_flags & RTF_INDIRECT)) {
1134 ire_type = IRE_IF_ALL;
1135 } else {
1136 /*
1137 * no more local, loopback routes
1138 */
1139 if (!(match_flags & MATCH_IRE_TYPE))
1140 ire_type = (IRE_OFFLINK|IRE_ONLINK);
1141 ire_type &= ~(IRE_LOCAL|IRE_LOOPBACK);
1142 }
1143 match_flags |= MATCH_IRE_TYPE;
1144
1145 ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl,
1146 match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp, gwattrp,
1147 NULL);
1148 } else {
1149 ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid,
1150 tsl, match_flags, IRR_INCOMPLETE, 0, ipst, v6setsrcp,
1151 gwattrp, NULL);
1152 }
1153 *pifire = ifire;
1154 return (ire);
1155 }
1156
1157
1158 /*
1159 * Handle IP_IOC_RTS_REQUEST ioctls
1160 */
1161 int
ip_rts_request(queue_t * q,mblk_t * mp,cred_t * ioc_cr)1162 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
1163 {
1164 conn_t *connp = Q_TO_CONN(q);
1165 IOCP iocp = (IOCP)mp->b_rptr;
1166 mblk_t *mp1, *ioc_mp = mp;
1167 int error = 0;
1168 ip_stack_t *ipst;
1169
1170 ipst = connp->conn_netstack->netstack_ip;
1171
1172 ASSERT(mp->b_cont != NULL);
1173 /* ioc_mp holds mp */
1174 mp = mp->b_cont;
1175
1176 /*
1177 * The Routing Socket data starts on
1178 * next block. If there is no next block
1179 * this is an indication from routing module
1180 * that it is a routing socket stream queue.
1181 * We need to support that for compatibility with SDP since
1182 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
1183 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this.
1184 */
1185 if (mp->b_cont == NULL) {
1186 /*
1187 * This is a message from SDP
1188 * indicating that this is a Routing Socket
1189 * Stream. Insert this conn_t in routing
1190 * socket client list.
1191 */
1192 connp->conn_useloopback = 1;
1193 ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
1194 goto done;
1195 }
1196 mp1 = dupmsg(mp->b_cont);
1197 if (mp1 == NULL) {
1198 error = ENOBUFS;
1199 goto done;
1200 }
1201 mp = mp1;
1202
1203 error = ip_rts_request_common(mp, connp, ioc_cr);
1204 done:
1205 iocp->ioc_error = error;
1206 ioc_mp->b_datap->db_type = M_IOCACK;
1207 if (iocp->ioc_error != 0)
1208 iocp->ioc_count = 0;
1209 /* Note that we pass a NULL ira to rts_input */
1210 (connp->conn_recv)(connp, ioc_mp, NULL, NULL);
1211
1212 /* conn was refheld in ip_wput_ioctl. */
1213 CONN_DEC_IOCTLREF(connp);
1214 CONN_OPER_PENDING_DONE(connp);
1215
1216 return (error);
1217 }
1218
1219 /*
1220 * Build a reply to the RTM_GET request contained in the given message block
1221 * using the retrieved IRE of the destination address, the parent IRE (if it
1222 * exists) and the address family.
1223 *
1224 * Returns a pointer to a message block containing the reply if successful,
1225 * otherwise NULL is returned.
1226 */
1227 static mblk_t *
rts_rtmget(mblk_t * mp,ire_t * ire,ire_t * ifire,const in6_addr_t * setsrc,tsol_ire_gw_secattr_t * attrp,sa_family_t af)1228 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc,
1229 tsol_ire_gw_secattr_t *attrp, sa_family_t af)
1230 {
1231 rt_msghdr_t *rtm;
1232 rt_msghdr_t *new_rtm;
1233 mblk_t *new_mp;
1234 int rtm_addrs;
1235 int rtm_flags;
1236 tsol_gc_t *gc = NULL;
1237 tsol_gcgrp_t *gcgrp = NULL;
1238 ill_t *ill;
1239 ipif_t *ipif = NULL;
1240 ipaddr_t brdaddr; /* IFF_POINTOPOINT destination */
1241 ipaddr_t ifaddr;
1242 in6_addr_t brdaddr6; /* IFF_POINTOPOINT destination */
1243 in6_addr_t ifaddr6;
1244 ipaddr_t v4setsrc;
1245
1246 rtm = (rt_msghdr_t *)mp->b_rptr;
1247 ifaddr = 0;
1248 brdaddr = 0;
1249 rtm_flags = 0;
1250
1251 /*
1252 * Find the ill used to send packets. This will be NULL in case
1253 * of a reject or blackhole.
1254 */
1255 if (ifire != NULL)
1256 ill = ire_nexthop_ill(ifire);
1257 else
1258 ill = ire_nexthop_ill(ire);
1259
1260 if (attrp != NULL) {
1261 mutex_enter(&attrp->igsa_lock);
1262 if ((gc = attrp->igsa_gc) != NULL) {
1263 gcgrp = gc->gc_grp;
1264 ASSERT(gcgrp != NULL);
1265 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1266 }
1267 mutex_exit(&attrp->igsa_lock);
1268 }
1269
1270 /*
1271 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1272 *
1273 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1274 * RTA_IFP and RTA_IFA if either is defined, and also
1275 * returns RTA_BRD if the appropriate interface is
1276 * point-to-point.
1277 */
1278 rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1279 if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) {
1280 rtm_addrs |= (RTA_IFP | RTA_IFA);
1281 /*
1282 * We associate an IRE with an ILL, hence we don't exactly
1283 * know what might make sense for RTA_IFA and RTA_BRD. We
1284 * pick the first ipif on the ill.
1285 */
1286 ipif = ipif_get_next_ipif(NULL, ill);
1287 if (ipif != NULL) {
1288 if (ipif->ipif_isv6)
1289 ifaddr6 = ipif->ipif_v6lcl_addr;
1290 else
1291 ifaddr = ipif->ipif_lcl_addr;
1292 if (ipif->ipif_flags & IPIF_POINTOPOINT) {
1293 rtm_addrs |= RTA_BRD;
1294 if (ipif->ipif_isv6)
1295 brdaddr6 = ipif->ipif_v6pp_dst_addr;
1296 else
1297 brdaddr = ipif->ipif_pp_dst_addr;
1298 }
1299 ipif_refrele(ipif);
1300 }
1301 }
1302
1303 new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0);
1304 if (new_mp == NULL) {
1305 if (gcgrp != NULL)
1306 rw_exit(&gcgrp->gcgrp_rwlock);
1307 if (ill != NULL)
1308 ill_refrele(ill);
1309 return (NULL);
1310 }
1311
1312 /*
1313 * We set the destination address, gateway address,
1314 * netmask and flags in the RTM_GET response depending
1315 * on whether we found a parent IRE or not.
1316 * In particular, if we did find a parent IRE during the
1317 * recursive search, use that IRE's gateway address.
1318 * Otherwise, we use the IRE's source address for the
1319 * gateway address.
1320 */
1321 ASSERT(af == AF_INET || af == AF_INET6);
1322 switch (af) {
1323 case AF_INET:
1324 IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc);
1325 if (v4setsrc != INADDR_ANY)
1326 rtm_addrs |= RTA_SRC;
1327
1328 rtm_flags = ire->ire_flags;
1329 rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1330 ire->ire_mask, ire->ire_gateway_addr, v4setsrc,
1331 brdaddr, 0, ifaddr, ill, new_mp, gc);
1332 break;
1333 case AF_INET6:
1334 if (!IN6_IS_ADDR_UNSPECIFIED(setsrc))
1335 rtm_addrs |= RTA_SRC;
1336
1337 rtm_flags = ire->ire_flags;
1338 rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1339 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
1340 setsrc, &brdaddr6, &ipv6_all_zeros,
1341 &ifaddr6, ill, new_mp, gc);
1342 break;
1343 }
1344
1345 if (gcgrp != NULL)
1346 rw_exit(&gcgrp->gcgrp_rwlock);
1347
1348 new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1349
1350 /*
1351 * The rtm_msglen, rtm_version and rtm_type fields in
1352 * RTM_GET response are filled in by rts_fill_msg.
1353 *
1354 * rtm_addrs and rtm_flags are filled in based on what
1355 * was requested and the state of the IREs looked up
1356 * above.
1357 *
1358 * rtm_inits and rtm_rmx are filled in with metrics
1359 * based on whether a parent IRE was found or not.
1360 *
1361 * TODO: rtm_index and rtm_use should probably be
1362 * filled in with something resonable here and not just
1363 * copied from the request.
1364 */
1365 new_rtm->rtm_index = rtm->rtm_index;
1366 new_rtm->rtm_pid = rtm->rtm_pid;
1367 new_rtm->rtm_seq = rtm->rtm_seq;
1368 new_rtm->rtm_use = rtm->rtm_use;
1369 new_rtm->rtm_addrs = rtm_addrs;
1370 new_rtm->rtm_flags = rtm_flags;
1371 new_rtm->rtm_inits = rts_getmetrics(ire, ill, &new_rtm->rtm_rmx);
1372 if (ill != NULL)
1373 ill_refrele(ill);
1374 return (new_mp);
1375 }
1376
1377 /*
1378 * Fill the given if_data_t with interface statistics.
1379 */
1380 static void
rts_getifdata(if_data_t * if_data,const ipif_t * ipif)1381 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1382 {
1383 if_data->ifi_type = ipif->ipif_ill->ill_type;
1384 /* ethernet, tokenring, etc */
1385 if_data->ifi_addrlen = 0; /* media address length */
1386 if_data->ifi_hdrlen = 0; /* media header length */
1387 if_data->ifi_mtu = ipif->ipif_ill->ill_mtu; /* mtu */
1388 /* metric (external only) */
1389 if_data->ifi_metric = ipif->ipif_ill->ill_metric;
1390 if_data->ifi_baudrate = 0; /* linespeed */
1391
1392 if_data->ifi_ipackets = 0; /* packets received on if */
1393 if_data->ifi_ierrors = 0; /* input errors on interface */
1394 if_data->ifi_opackets = 0; /* packets sent on interface */
1395 if_data->ifi_oerrors = 0; /* output errors on if */
1396 if_data->ifi_collisions = 0; /* collisions on csma if */
1397 if_data->ifi_ibytes = 0; /* total number received */
1398 if_data->ifi_obytes = 0; /* total number sent */
1399 if_data->ifi_imcasts = 0; /* multicast packets received */
1400 if_data->ifi_omcasts = 0; /* multicast packets sent */
1401 if_data->ifi_iqdrops = 0; /* dropped on input */
1402 if_data->ifi_noproto = 0; /* destined for unsupported */
1403 /* protocol. */
1404 }
1405
1406 /*
1407 * Set the metrics on a forwarding table route.
1408 */
1409 static void
rts_setmetrics(ire_t * ire,uint_t which,rt_metrics_t * metrics)1410 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1411 {
1412 clock_t rtt;
1413 clock_t rtt_sd;
1414 ill_t *ill;
1415 ifrt_t *ifrt;
1416 mblk_t *mp;
1417 in6_addr_t gw_addr_v6 = { 0 };
1418
1419 /* Need to add back some metrics to the IRE? */
1420 /*
1421 * Bypass obtaining the lock and searching ill_saved_ire_mp in the
1422 * common case of no metrics.
1423 */
1424 if (which == 0)
1425 return;
1426 ire->ire_metrics.iulp_set = B_TRUE;
1427
1428 /*
1429 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1430 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1431 * microseconds.
1432 */
1433 rtt = 0;
1434 if (which & RTV_RTT)
1435 rtt = metrics->rmx_rtt / 1000;
1436 if (which & RTV_RTTVAR)
1437 rtt_sd = metrics->rmx_rttvar / 1000;
1438
1439 /*
1440 * Update the metrics in the IRE itself.
1441 */
1442 mutex_enter(&ire->ire_lock);
1443 if (which & RTV_MTU)
1444 ire->ire_metrics.iulp_mtu = metrics->rmx_mtu;
1445 if (which & RTV_RTT)
1446 ire->ire_metrics.iulp_rtt = rtt;
1447 if (which & RTV_SSTHRESH)
1448 ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh;
1449 if (which & RTV_RTTVAR)
1450 ire->ire_metrics.iulp_rtt_sd = rtt_sd;
1451 if (which & RTV_SPIPE)
1452 ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe;
1453 if (which & RTV_RPIPE)
1454 ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe;
1455 mutex_exit(&ire->ire_lock);
1456
1457 /*
1458 * Search through the ifrt_t chain hanging off the ILL in order to
1459 * reflect the metric change there.
1460 */
1461 ill = ire->ire_ill;
1462 if (ill == NULL)
1463 return;
1464 ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1465 ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1466 if (ill->ill_isv6) {
1467 mutex_enter(&ire->ire_lock);
1468 gw_addr_v6 = ire->ire_gateway_addr_v6;
1469 mutex_exit(&ire->ire_lock);
1470 }
1471 mutex_enter(&ill->ill_saved_ire_lock);
1472 for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1473 /*
1474 * On a given ill, the tuple of address, gateway, mask,
1475 * ire_type and zoneid unique for each saved IRE.
1476 */
1477 ifrt = (ifrt_t *)mp->b_rptr;
1478 if (ill->ill_isv6) {
1479 if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1480 &ire->ire_addr_v6) ||
1481 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1482 &gw_addr_v6) ||
1483 !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1484 &ire->ire_mask_v6))
1485 continue;
1486 } else {
1487 if (ifrt->ifrt_addr != ire->ire_addr ||
1488 ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1489 ifrt->ifrt_mask != ire->ire_mask)
1490 continue;
1491 }
1492 if (ifrt->ifrt_zoneid != ire->ire_zoneid ||
1493 ifrt->ifrt_type != ire->ire_type)
1494 continue;
1495
1496 if (which & RTV_MTU)
1497 ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu;
1498 if (which & RTV_RTT)
1499 ifrt->ifrt_metrics.iulp_rtt = rtt;
1500 if (which & RTV_SSTHRESH) {
1501 ifrt->ifrt_metrics.iulp_ssthresh =
1502 metrics->rmx_ssthresh;
1503 }
1504 if (which & RTV_RTTVAR)
1505 ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar;
1506 if (which & RTV_SPIPE)
1507 ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe;
1508 if (which & RTV_RPIPE)
1509 ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe;
1510 break;
1511 }
1512 mutex_exit(&ill->ill_saved_ire_lock);
1513
1514 /*
1515 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they
1516 * get any new iulp_mtu.
1517 * We do that by deleting them; ire_create_if_clone will pick
1518 * up the new metrics.
1519 */
1520 if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0)
1521 ire_dep_delete_if_clone(ire);
1522 }
1523
1524 /*
1525 * Get the metrics from a forwarding table route.
1526 */
1527 static int
rts_getmetrics(ire_t * ire,ill_t * ill,rt_metrics_t * metrics)1528 rts_getmetrics(ire_t *ire, ill_t *ill, rt_metrics_t *metrics)
1529 {
1530 int metrics_set = 0;
1531
1532 bzero(metrics, sizeof (rt_metrics_t));
1533
1534 /*
1535 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1536 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1537 * microseconds.
1538 */
1539 metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000;
1540 metrics_set |= RTV_RTT;
1541 if (ire->ire_metrics.iulp_mtu != 0) {
1542 metrics->rmx_mtu = ire->ire_metrics.iulp_mtu;
1543 metrics_set |= RTV_MTU;
1544 } else if (ill != NULL) {
1545 metrics->rmx_mtu = ill->ill_mtu;
1546 metrics_set |= RTV_MTU;
1547 }
1548 metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh;
1549 metrics_set |= RTV_SSTHRESH;
1550 metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000;
1551 metrics_set |= RTV_RTTVAR;
1552 metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe;
1553 metrics_set |= RTV_SPIPE;
1554 metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe;
1555 metrics_set |= RTV_RPIPE;
1556 return (metrics_set);
1557 }
1558
1559 /*
1560 * Given two sets of metrics (src and dst), use the dst values if they are
1561 * set. If a dst value is not set but the src value is set, then we use
1562 * the src value.
1563 * dst is updated with the new values.
1564 * This is used to merge information from a dce_t and ire_metrics, where the
1565 * dce values takes precedence.
1566 */
1567 void
rts_merge_metrics(iulp_t * dst,const iulp_t * src)1568 rts_merge_metrics(iulp_t *dst, const iulp_t *src)
1569 {
1570 if (!src->iulp_set)
1571 return;
1572
1573 if (dst->iulp_ssthresh == 0)
1574 dst->iulp_ssthresh = src->iulp_ssthresh;
1575 if (dst->iulp_rtt == 0)
1576 dst->iulp_rtt = src->iulp_rtt;
1577 if (dst->iulp_rtt_sd == 0)
1578 dst->iulp_rtt_sd = src->iulp_rtt_sd;
1579 if (dst->iulp_spipe == 0)
1580 dst->iulp_spipe = src->iulp_spipe;
1581 if (dst->iulp_rpipe == 0)
1582 dst->iulp_rpipe = src->iulp_rpipe;
1583 if (dst->iulp_rtomax == 0)
1584 dst->iulp_rtomax = src->iulp_rtomax;
1585 if (dst->iulp_sack == 0)
1586 dst->iulp_sack = src->iulp_sack;
1587 if (dst->iulp_tstamp_ok == 0)
1588 dst->iulp_tstamp_ok = src->iulp_tstamp_ok;
1589 if (dst->iulp_wscale_ok == 0)
1590 dst->iulp_wscale_ok = src->iulp_wscale_ok;
1591 if (dst->iulp_ecn_ok == 0)
1592 dst->iulp_ecn_ok = src->iulp_ecn_ok;
1593 if (dst->iulp_pmtud_ok == 0)
1594 dst->iulp_pmtud_ok = src->iulp_pmtud_ok;
1595 if (dst->iulp_mtu == 0)
1596 dst->iulp_mtu = src->iulp_mtu;
1597 }
1598
1599
1600 /*
1601 * Takes a pointer to a routing message and extracts necessary info by looking
1602 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1603 * passed (all of which must be valid).
1604 *
1605 * The bitmask of sockaddrs actually found in the message is returned, or zero
1606 * is returned in the case of an error.
1607 */
1608 static int
rts_getaddrs(rt_msghdr_t * rtm,in6_addr_t * dst_addrp,in6_addr_t * gw_addrp,in6_addr_t * net_maskp,in6_addr_t * authorp,in6_addr_t * if_addrp,in6_addr_t * in_src_addrp,ushort_t * indexp,sa_family_t * afp,tsol_rtsecattr_t * rtsecattr,int * error)1609 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1610 in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1611 in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
1612 tsol_rtsecattr_t *rtsecattr, int *error)
1613 {
1614 struct sockaddr *sa;
1615 int i;
1616 int addr_bits;
1617 int length;
1618 int found_addrs = 0;
1619 caddr_t cp;
1620 size_t size;
1621 struct sockaddr_dl *sdl;
1622
1623 *dst_addrp = ipv6_all_zeros;
1624 *gw_addrp = ipv6_all_zeros;
1625 *net_maskp = ipv6_all_zeros;
1626 *authorp = ipv6_all_zeros;
1627 *if_addrp = ipv6_all_zeros;
1628 *in_src_addrp = ipv6_all_zeros;
1629 *indexp = 0;
1630 *afp = AF_UNSPEC;
1631 rtsecattr->rtsa_cnt = 0;
1632 *error = 0;
1633
1634 /*
1635 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1636 * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them.
1637 */
1638 cp = (caddr_t)&rtm[1];
1639 length = rtm->rtm_msglen;
1640 for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1641 /*
1642 * The address family we are working with starts out as
1643 * AF_UNSPEC, but is set to the one specified with the
1644 * destination address.
1645 *
1646 * If the "working" address family that has been set to
1647 * something other than AF_UNSPEC, then the address family of
1648 * subsequent sockaddrs must either be AF_UNSPEC (for
1649 * compatibility with older programs) or must be the same as our
1650 * "working" one.
1651 *
1652 * This code assumes that RTA_DST (1) comes first in the loop.
1653 */
1654 sa = (struct sockaddr *)cp;
1655 addr_bits = (rtm->rtm_addrs & (1 << i));
1656 if (addr_bits == 0)
1657 continue;
1658 switch (addr_bits) {
1659 case RTA_DST:
1660 size = rts_copyfromsockaddr(sa, dst_addrp);
1661 *afp = sa->sa_family;
1662 break;
1663 case RTA_GATEWAY:
1664 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1665 return (0);
1666 size = rts_copyfromsockaddr(sa, gw_addrp);
1667 break;
1668 case RTA_NETMASK:
1669 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1670 return (0);
1671 size = rts_copyfromsockaddr(sa, net_maskp);
1672 break;
1673 case RTA_IFP:
1674 if (sa->sa_family != AF_LINK &&
1675 sa->sa_family != AF_UNSPEC)
1676 return (0);
1677 sdl = (struct sockaddr_dl *)cp;
1678 *indexp = sdl->sdl_index;
1679 size = sizeof (struct sockaddr_dl);
1680 break;
1681 case RTA_SRC:
1682 /* Source address of the incoming packet */
1683 size = rts_copyfromsockaddr(sa, in_src_addrp);
1684 *afp = sa->sa_family;
1685 break;
1686 case RTA_IFA:
1687 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1688 return (0);
1689 size = rts_copyfromsockaddr(sa, if_addrp);
1690 break;
1691 case RTA_AUTHOR:
1692 if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1693 return (0);
1694 size = rts_copyfromsockaddr(sa, authorp);
1695 break;
1696 default:
1697 return (0);
1698 }
1699 if (size == 0)
1700 return (0);
1701 cp += size;
1702 found_addrs |= addr_bits;
1703 }
1704
1705 /*
1706 * Parse the routing message and look for any security-
1707 * related attributes for the route. For each valid
1708 * attribute, allocate/obtain the corresponding kernel
1709 * route security attributes.
1710 */
1711 if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) {
1712 *error = tsol_rtsa_init(rtm, rtsecattr, cp);
1713 ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1714 }
1715
1716 return (found_addrs);
1717 }
1718
1719 /*
1720 * Fills the message with the given info.
1721 */
1722 static void
rts_fill_msg(int type,int rtm_addrs,ipaddr_t dst,ipaddr_t mask,ipaddr_t gateway,ipaddr_t src_addr,ipaddr_t brd_addr,ipaddr_t author,ipaddr_t ifaddr,const ill_t * ill,mblk_t * mp,const tsol_gc_t * gc)1723 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1724 ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1725 ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
1726 const tsol_gc_t *gc)
1727 {
1728 rt_msghdr_t *rtm;
1729 sin_t *sin;
1730 size_t data_size, header_size;
1731 uchar_t *cp;
1732 int i;
1733
1734 ASSERT(mp != NULL);
1735 /*
1736 * First find the type of the message
1737 * and its length.
1738 */
1739 header_size = rts_header_msg_size(type);
1740 /*
1741 * Now find the size of the data
1742 * that follows the message header.
1743 */
1744 data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0);
1745
1746 rtm = (rt_msghdr_t *)mp->b_rptr;
1747 mp->b_wptr = &mp->b_rptr[header_size];
1748 cp = mp->b_wptr;
1749 bzero(cp, data_size);
1750 for (i = 0; i < RTA_NUMBITS; i++) {
1751 sin = (sin_t *)cp;
1752 switch (rtm_addrs & (1 << i)) {
1753 case RTA_DST:
1754 sin->sin_addr.s_addr = dst;
1755 sin->sin_family = AF_INET;
1756 cp += sizeof (sin_t);
1757 break;
1758 case RTA_GATEWAY:
1759 sin->sin_addr.s_addr = gateway;
1760 sin->sin_family = AF_INET;
1761 cp += sizeof (sin_t);
1762 break;
1763 case RTA_NETMASK:
1764 sin->sin_addr.s_addr = mask;
1765 sin->sin_family = AF_INET;
1766 cp += sizeof (sin_t);
1767 break;
1768 case RTA_IFP:
1769 cp += ill_dls_info((struct sockaddr_dl *)cp, ill);
1770 break;
1771 case RTA_IFA:
1772 sin->sin_addr.s_addr = ifaddr;
1773 sin->sin_family = AF_INET;
1774 cp += sizeof (sin_t);
1775 break;
1776 case RTA_SRC:
1777 sin->sin_addr.s_addr = src_addr;
1778 sin->sin_family = AF_INET;
1779 cp += sizeof (sin_t);
1780 break;
1781 case RTA_AUTHOR:
1782 sin->sin_addr.s_addr = author;
1783 sin->sin_family = AF_INET;
1784 cp += sizeof (sin_t);
1785 break;
1786 case RTA_BRD:
1787 /*
1788 * RTA_BRD is used typically to specify a point-to-point
1789 * destination address.
1790 */
1791 sin->sin_addr.s_addr = brd_addr;
1792 sin->sin_family = AF_INET;
1793 cp += sizeof (sin_t);
1794 break;
1795 }
1796 }
1797
1798 if (gc != NULL) {
1799 rtm_ext_t *rtm_ext;
1800 struct rtsa_s *rp_dst;
1801 tsol_rtsecattr_t *rsap;
1802
1803 ASSERT(gc->gc_grp != NULL);
1804 ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1805
1806 rtm_ext = (rtm_ext_t *)cp;
1807 rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1808 rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1);
1809
1810 rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1811 rsap->rtsa_cnt = 1;
1812 rp_dst = rsap->rtsa_attr;
1813
1814 ASSERT(gc->gc_db != NULL);
1815 bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1816 cp = (uchar_t *)rp_dst;
1817 }
1818
1819 mp->b_wptr = cp;
1820 mp->b_cont = NULL;
1821 /*
1822 * set the fields that are common to
1823 * to different messages.
1824 */
1825 rtm->rtm_msglen = (short)(header_size + data_size);
1826 rtm->rtm_version = RTM_VERSION;
1827 rtm->rtm_type = (uchar_t)type;
1828 }
1829
1830 /*
1831 * Allocates and initializes a routing socket message.
1832 * Note that sacnt is either zero or one.
1833 */
1834 mblk_t *
rts_alloc_msg(int type,int rtm_addrs,sa_family_t af,uint_t sacnt)1835 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1836 {
1837 size_t length;
1838 mblk_t *mp;
1839
1840 length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1841 mp = allocb(length, BPRI_MED);
1842 if (mp == NULL)
1843 return (mp);
1844 bzero(mp->b_rptr, length);
1845 return (mp);
1846 }
1847
1848 /*
1849 * Returns the size of the routing
1850 * socket message header size.
1851 */
1852 size_t
rts_header_msg_size(int type)1853 rts_header_msg_size(int type)
1854 {
1855 switch (type) {
1856 case RTM_DELADDR:
1857 case RTM_NEWADDR:
1858 case RTM_CHGADDR:
1859 case RTM_FREEADDR:
1860 return (sizeof (ifa_msghdr_t));
1861 case RTM_IFINFO:
1862 return (sizeof (if_msghdr_t));
1863 default:
1864 return (sizeof (rt_msghdr_t));
1865 }
1866 }
1867
1868 /*
1869 * Returns the size of the message needed with the given rtm_addrs and family.
1870 *
1871 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1872 * of the same family (currently either AF_INET or AF_INET6).
1873 */
1874 size_t
rts_data_msg_size(int rtm_addrs,sa_family_t af,uint_t sacnt)1875 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1876 {
1877 int i;
1878 size_t length = 0;
1879
1880 for (i = 0; i < RTA_NUMBITS; i++) {
1881 switch (rtm_addrs & (1 << i)) {
1882 case RTA_IFP:
1883 length += sizeof (struct sockaddr_dl);
1884 break;
1885 case RTA_DST:
1886 case RTA_GATEWAY:
1887 case RTA_NETMASK:
1888 case RTA_SRC:
1889 case RTA_IFA:
1890 case RTA_AUTHOR:
1891 case RTA_BRD:
1892 ASSERT(af == AF_INET || af == AF_INET6);
1893 switch (af) {
1894 case AF_INET:
1895 length += sizeof (sin_t);
1896 break;
1897 case AF_INET6:
1898 length += sizeof (sin6_t);
1899 break;
1900 }
1901 break;
1902 }
1903 }
1904 if (sacnt > 0)
1905 length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1906
1907 return (length);
1908 }
1909
1910 /*
1911 * This routine is called to generate a message to the routing
1912 * socket indicating that a redirect has occured, a routing lookup
1913 * has failed, or that a protocol has detected timeouts to a particular
1914 * destination. This routine is called for message types RTM_LOSING,
1915 * RTM_REDIRECT, and RTM_MISS.
1916 */
1917 void
ip_rts_change(int type,ipaddr_t dst_addr,ipaddr_t gw_addr,ipaddr_t net_mask,ipaddr_t source,ipaddr_t author,int flags,int error,int rtm_addrs,ip_stack_t * ipst)1918 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1919 ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1920 ip_stack_t *ipst)
1921 {
1922 rt_msghdr_t *rtm;
1923 mblk_t *mp;
1924
1925 if (rtm_addrs == 0)
1926 return;
1927 mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1928 if (mp == NULL)
1929 return;
1930 rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1931 author, 0, NULL, mp, NULL);
1932 rtm = (rt_msghdr_t *)mp->b_rptr;
1933 rtm->rtm_flags = flags;
1934 rtm->rtm_errno = error;
1935 rtm->rtm_flags |= RTF_DONE;
1936 rtm->rtm_addrs = rtm_addrs;
1937 rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst);
1938 }
1939
1940 /*
1941 * This routine is called to generate a message to the routing
1942 * socket indicating that the status of a network interface has changed.
1943 * Message type generated RTM_IFINFO.
1944 */
1945 void
ip_rts_ifmsg(const ipif_t * ipif,uint_t flags)1946 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags)
1947 {
1948 ip_rts_xifmsg(ipif, 0, 0, flags);
1949 }
1950
1951 void
ip_rts_xifmsg(const ipif_t * ipif,uint64_t set,uint64_t clear,uint_t flags)1952 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags)
1953 {
1954 if_msghdr_t *ifm;
1955 mblk_t *mp;
1956 sa_family_t af;
1957 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
1958
1959 /*
1960 * This message should be generated only
1961 * when the physical device is changing
1962 * state.
1963 */
1964 if (ipif->ipif_id != 0)
1965 return;
1966 if (ipif->ipif_isv6) {
1967 af = AF_INET6;
1968 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1969 if (mp == NULL)
1970 return;
1971 rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1972 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1973 &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1974 ipif->ipif_ill, mp, NULL);
1975 } else {
1976 af = AF_INET;
1977 mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1978 if (mp == NULL)
1979 return;
1980 rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0,
1981 ipif->ipif_ill, mp, NULL);
1982 }
1983 ifm = (if_msghdr_t *)mp->b_rptr;
1984 ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1985 ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1986 ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear;
1987 rts_getifdata(&ifm->ifm_data, ipif);
1988 ifm->ifm_addrs = RTA_IFP;
1989
1990 if (flags & RTSQ_DEFAULT) {
1991 flags = RTSQ_ALL;
1992 /*
1993 * If this message is for an underlying interface, prevent
1994 * "normal" (IPMP-unaware) routing sockets from seeing it.
1995 */
1996 if (IS_UNDER_IPMP(ipif->ipif_ill))
1997 flags &= ~RTSQ_NORMAL;
1998 }
1999
2000 rts_queue_input(mp, NULL, af, flags, ipst);
2001 }
2002
2003 /*
2004 * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message;
2005 * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR)
2006 * generate the ifa_msghdr_t message.
2007 */
2008 static void
rts_new_rtsmsg(int cmd,int error,const ipif_t * ipif,uint_t flags)2009 rts_new_rtsmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
2010 {
2011 int rtm_addrs;
2012 mblk_t *mp;
2013 ifa_msghdr_t *ifam;
2014 rt_msghdr_t *rtm;
2015 sa_family_t af;
2016 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
2017
2018 /*
2019 * Do not report unspecified address if this is the RTM_CHGADDR or
2020 * RTM_FREEADDR message.
2021 */
2022 if (cmd == RTM_CHGADDR || cmd == RTM_FREEADDR) {
2023 if (!ipif->ipif_isv6) {
2024 if (ipif->ipif_lcl_addr == INADDR_ANY)
2025 return;
2026 } else if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) {
2027 return;
2028 }
2029 }
2030
2031 if (ipif->ipif_isv6)
2032 af = AF_INET6;
2033 else
2034 af = AF_INET;
2035
2036 if (cmd == RTM_ADD || cmd == RTM_DELETE)
2037 rtm_addrs = (RTA_DST | RTA_NETMASK);
2038 else
2039 rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
2040
2041 mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
2042 if (mp == NULL)
2043 return;
2044
2045 if (cmd != RTM_ADD && cmd != RTM_DELETE) {
2046 switch (af) {
2047 case AF_INET:
2048 rts_fill_msg(cmd, rtm_addrs, 0,
2049 ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
2050 ipif->ipif_pp_dst_addr, 0,
2051 ipif->ipif_lcl_addr, ipif->ipif_ill,
2052 mp, NULL);
2053 break;
2054 case AF_INET6:
2055 rts_fill_msg_v6(cmd, rtm_addrs,
2056 &ipv6_all_zeros, &ipif->ipif_v6net_mask,
2057 &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
2058 &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
2059 &ipif->ipif_v6lcl_addr, ipif->ipif_ill,
2060 mp, NULL);
2061 break;
2062 }
2063 ifam = (ifa_msghdr_t *)mp->b_rptr;
2064 ifam->ifam_index =
2065 ipif->ipif_ill->ill_phyint->phyint_ifindex;
2066 ifam->ifam_metric = ipif->ipif_ill->ill_metric;
2067 ifam->ifam_flags = ((cmd == RTM_NEWADDR) ? RTF_UP : 0);
2068 ifam->ifam_addrs = rtm_addrs;
2069 } else {
2070 switch (af) {
2071 case AF_INET:
2072 rts_fill_msg(cmd, rtm_addrs,
2073 ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
2074 0, 0, 0, 0, NULL, mp, NULL);
2075 break;
2076 case AF_INET6:
2077 rts_fill_msg_v6(cmd, rtm_addrs,
2078 &ipif->ipif_v6lcl_addr,
2079 &ipif->ipif_v6net_mask, &ipv6_all_zeros,
2080 &ipv6_all_zeros, &ipv6_all_zeros,
2081 &ipv6_all_zeros, &ipv6_all_zeros,
2082 NULL, mp, NULL);
2083 break;
2084 }
2085 rtm = (rt_msghdr_t *)mp->b_rptr;
2086 rtm->rtm_index =
2087 ipif->ipif_ill->ill_phyint->phyint_ifindex;
2088 rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
2089 rtm->rtm_errno = error;
2090 if (error == 0)
2091 rtm->rtm_flags |= RTF_DONE;
2092 rtm->rtm_addrs = rtm_addrs;
2093 }
2094 rts_queue_input(mp, NULL, af, flags, ipst);
2095 }
2096
2097 /*
2098 * This is called to generate messages to the routing socket
2099 * indicating a network interface has had addresses associated with it.
2100 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
2101 */
2102 void
ip_rts_newaddrmsg(int cmd,int error,const ipif_t * ipif,uint_t flags)2103 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
2104 {
2105 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
2106
2107 if (flags & RTSQ_DEFAULT) {
2108 flags = RTSQ_ALL;
2109 /*
2110 * If this message is for an underlying interface, prevent
2111 * "normal" (IPMP-unaware) routing sockets from seeing it.
2112 */
2113 if (IS_UNDER_IPMP(ipif->ipif_ill))
2114 flags &= ~RTSQ_NORMAL;
2115 }
2116
2117 /*
2118 * Let conn_ixa caching know that source address selection
2119 * changed
2120 */
2121 if (cmd == RTM_ADD || cmd == RTM_DELETE)
2122 ip_update_source_selection(ipst);
2123
2124 /*
2125 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
2126 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
2127 * otherwise simply send the request.
2128 */
2129 switch (cmd) {
2130 case RTM_ADD:
2131 rts_new_rtsmsg(RTM_NEWADDR, error, ipif, flags);
2132 rts_new_rtsmsg(RTM_ADD, error, ipif, flags);
2133 break;
2134 case RTM_DELETE:
2135 rts_new_rtsmsg(RTM_DELETE, error, ipif, flags);
2136 rts_new_rtsmsg(RTM_DELADDR, error, ipif, flags);
2137 break;
2138 default:
2139 rts_new_rtsmsg(cmd, error, ipif, flags);
2140 break;
2141 }
2142 }
2143
2144 /*
2145 * Based on the address family specified in a sockaddr, copy the address field
2146 * into an in6_addr_t.
2147 *
2148 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
2149 * compatibility with programs that leave the family cleared in the sockaddr.
2150 * Callers of rts_copyfromsockaddr should check the family themselves if they
2151 * wish to verify its value.
2152 *
2153 * In the case of AF_INET6, a check is made to ensure that address is not an
2154 * IPv4-mapped address.
2155 */
2156 size_t
rts_copyfromsockaddr(struct sockaddr * sa,in6_addr_t * addrp)2157 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
2158 {
2159 switch (sa->sa_family) {
2160 case AF_INET:
2161 case AF_UNSPEC:
2162 IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
2163 return (sizeof (sin_t));
2164 case AF_INET6:
2165 *addrp = ((sin6_t *)sa)->sin6_addr;
2166 if (IN6_IS_ADDR_V4MAPPED(addrp))
2167 return (0);
2168 return (sizeof (sin6_t));
2169 default:
2170 return (0);
2171 }
2172 }
2173