1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. 23 * All rights reserved. Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Procedures for the kernel part of DVMRP, 31 * a Distance-Vector Multicast Routing Protocol. 32 * (See RFC-1075) 33 * Written by David Waitzman, BBN Labs, August 1988. 34 * Modified by Steve Deering, Stanford, February 1989. 35 * Modified by Mark J. Steiglitz, Stanford, May, 1991 36 * Modified by Van Jacobson, LBL, January 1993 37 * Modified by Ajit Thyagarajan, PARC, August 1993 38 * Modified by Bill Fenner, PARC, April 1995 39 * 40 * MROUTING 3.5 41 */ 42 43 /* 44 * TODO 45 * - function pointer field in vif, void *vif_sendit() 46 */ 47 48 #include <sys/types.h> 49 #include <sys/stream.h> 50 #include <sys/stropts.h> 51 #include <sys/strlog.h> 52 #include <sys/systm.h> 53 #include <sys/ddi.h> 54 #include <sys/cmn_err.h> 55 #include <sys/zone.h> 56 57 #include <sys/param.h> 58 #include <sys/socket.h> 59 #include <sys/vtrace.h> 60 #include <sys/debug.h> 61 #include <net/if.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <net/if_dl.h> 65 66 #include <inet/common.h> 67 #include <inet/mi.h> 68 #include <inet/nd.h> 69 #include <inet/mib2.h> 70 #include <netinet/ip6.h> 71 #include <inet/ip.h> 72 #include <inet/snmpcom.h> 73 74 #include <netinet/igmp.h> 75 #include <netinet/igmp_var.h> 76 #include <netinet/udp.h> 77 #include <netinet/ip_mroute.h> 78 #include <inet/ip_multi.h> 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 #include <inet/ipclassifier.h> 82 83 #include <netinet/pim.h> 84 85 86 /* 87 * MT Design: 88 * 89 * There are three main data structures viftable, mfctable and tbftable that 90 * need to be protected against MT races. 91 * 92 * vitable is a fixed length array of vif structs. There is no lock to protect 93 * the whole array, instead each struct is protected by its own indiviual lock. 94 * The value of v_marks in conjuction with the value of v_refcnt determines the 95 * current state of a vif structure. One special state that needs mention 96 * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates 97 * that vif is being initalized. 98 * Each structure is freed when the refcnt goes down to zero. If a delete comes 99 * in when the the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED 100 * which prevents the struct from further use. When the refcnt goes to zero 101 * the struct is freed and is marked VIF_MARK_NOTINUSE. 102 * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill 103 * from going away a refhold is put on the ipif before using it. see 104 * lock_good_vif() and unlock_good_vif(). 105 * 106 * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts 107 * of the vif struct. 108 * 109 * tbftable is also a fixed length array of tbf structs and is only accessed 110 * via v_tbf. It is protected by its own lock tbf_lock. 111 * 112 * Lock Ordering is 113 * v_lock --> tbf_lock 114 * v_lock --> ill_locK 115 * 116 * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb). 117 * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker, 118 * it also maintains a state. These fields are protected by a lock (mfcb_lock). 119 * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to 120 * protect the struct elements. 121 * 122 * mfc structs are dynamically allocated and are singly linked 123 * at the head of the chain. When an mfc structure is to be deleted 124 * it is marked condemned and so is the state in the bucket struct. 125 * When the last walker of the hash bucket exits all the mfc structs 126 * marked condemed are freed. 127 * 128 * Locking Hierarchy: 129 * The bucket lock should be acquired before the mfc struct lock. 130 * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking 131 * operations on the bucket struct. 132 * 133 * last_encap_lock and numvifs_mutex should be acquired after 134 * acquring vif or mfc locks. These locks protect some global variables. 135 * 136 * The statistics are not currently protected by a lock 137 * causing the stats be be approximate, not exact. 138 */ 139 140 /* 141 * Globals 142 * All but ip_g_mrouter and ip_mrtproto could be static, 143 * except for netstat or debugging purposes. 144 */ 145 queue_t *ip_g_mrouter = NULL; 146 static kmutex_t ip_g_mrouter_mutex; 147 148 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 149 struct mrtstat mrtstat; /* Stats for netstat */ 150 151 #define NO_VIF MAXVIFS /* from mrouted, no route for src */ 152 153 /* 154 * Timeouts: 155 * Upcall timeouts - BSD uses boolean_t mfc->expire and 156 * nexpire[MFCTBLSIZE], the number of times expire has been called. 157 * SunOS 5.x uses mfc->timeout for each mfc. 158 * Some Unixes are limited in the number of simultaneous timeouts 159 * that can be run, SunOS 5.x does not have this restriction. 160 */ 161 162 /* 163 * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and 164 * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall 165 * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE 166 */ 167 #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */ 168 #define UPCALL_EXPIRE 6 /* number of timeouts */ 169 170 /* 171 * Hash function for a source, group entry 172 */ 173 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 174 ((g) >> 20) ^ ((g) >> 10) ^ (g)) 175 176 /* 177 * mfctable: 178 * Includes all mfcs, including waiting upcalls. 179 * Multiple mfcs per bucket. 180 */ 181 static struct mfcb mfctable[MFCTBLSIZ]; /* kernel routing table */ 182 183 /* 184 * Define the token bucket filter structures. 185 * tbftable -> each vif has one of these for storing info. 186 */ 187 struct tbf tbftable[MAXVIFS]; 188 #define TBF_REPROCESS (hz / 100) /* 100x /second */ 189 190 /* Identify PIM packet that came on a Register interface */ 191 #define PIM_REGISTER_MARKER 0xffffffff 192 193 /* Function declarations */ 194 static int add_mfc(struct mfcctl *); 195 static int add_vif(struct vifctl *, queue_t *, mblk_t *); 196 static int del_mfc(struct mfcctl *); 197 static int del_vif(vifi_t *, queue_t *, mblk_t *); 198 static void del_vifp(struct vif *); 199 static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 200 static void expire_upcalls(void *); 201 static void fill_route(struct mfc *, struct mfcctl *); 202 static int get_assert(uchar_t *); 203 static int get_lsg_cnt(struct sioc_lsg_req *); 204 static int get_sg_cnt(struct sioc_sg_req *); 205 static int get_version(uchar_t *); 206 static int get_vif_cnt(struct sioc_vif_req *); 207 static int ip_mdq(mblk_t *, ipha_t *, ill_t *, 208 ipaddr_t, struct mfc *); 209 static int ip_mrouter_init(queue_t *, uchar_t *, int); 210 static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 211 static int register_mforward(queue_t *, mblk_t *); 212 static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 213 static int set_assert(int *); 214 215 /* 216 * Token Bucket Filter functions 217 */ 218 static int priority(struct vif *, ipha_t *); 219 static void tbf_control(struct vif *, mblk_t *, ipha_t *); 220 static int tbf_dq_sel(struct vif *, ipha_t *); 221 static void tbf_process_q(struct vif *); 222 static void tbf_queue(struct vif *, mblk_t *); 223 static void tbf_reprocess_q(void *); 224 static void tbf_send_packet(struct vif *, mblk_t *); 225 static void tbf_update_tokens(struct vif *); 226 static void release_mfc(struct mfcb *); 227 228 static boolean_t is_mrouter_off(void); 229 /* 230 * Encapsulation packets 231 */ 232 233 #define ENCAP_TTL 64 234 235 /* prototype IP hdr for encapsulated packets */ 236 static ipha_t multicast_encap_iphdr = { 237 IP_SIMPLE_HDR_VERSION, 238 0, /* tos */ 239 sizeof (ipha_t), /* total length */ 240 0, /* id */ 241 0, /* frag offset */ 242 ENCAP_TTL, IPPROTO_ENCAP, 243 0, /* checksum */ 244 }; 245 246 /* 247 * Private variables. 248 */ 249 static int saved_ip_g_forward = -1; 250 251 /* 252 * numvifs is only a hint about the max interface being used. 253 */ 254 static vifi_t numvifs = 0; 255 static kmutex_t numvifs_mutex; 256 257 static struct vif viftable[MAXVIFS+1]; /* Index needs to accomodate */ 258 /* the value of NO_VIF, which */ 259 /* is MAXVIFS. */ 260 261 /* 262 * One-back cache used to locate a tunnel's vif, 263 * given a datagram's src ip address. 264 */ 265 static ipaddr_t last_encap_src; 266 static struct vif *last_encap_vif; 267 static kmutex_t last_encap_lock; /* Protects the above */ 268 269 /* 270 * Whether or not special PIM assert processing is enabled. 271 */ 272 /* 273 * reg_vif_num is protected by numvifs_mutex 274 */ 275 static vifi_t reg_vif_num = ALL_VIFS; /* Index to Register vif */ 276 static int pim_assert; 277 278 /* 279 * Rate limit for assert notification messages, in nsec. 280 */ 281 #define ASSERT_MSG_TIME 3000000000 282 283 284 #define VIF_REFHOLD(vifp) { \ 285 mutex_enter(&(vifp)->v_lock); \ 286 (vifp)->v_refcnt++; \ 287 mutex_exit(&(vifp)->v_lock); \ 288 } 289 290 #define VIF_REFRELE_LOCKED(vifp) { \ 291 (vifp)->v_refcnt--; \ 292 if ((vifp)->v_refcnt == 0 && \ 293 ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 294 del_vifp(vifp); \ 295 } else { \ 296 mutex_exit(&(vifp)->v_lock); \ 297 } \ 298 } 299 300 #define VIF_REFRELE(vifp) { \ 301 mutex_enter(&(vifp)->v_lock); \ 302 (vifp)->v_refcnt--; \ 303 if ((vifp)->v_refcnt == 0 && \ 304 ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 305 del_vifp(vifp); \ 306 } else { \ 307 mutex_exit(&(vifp)->v_lock); \ 308 } \ 309 } 310 311 #define MFCB_REFHOLD(mfcb) { \ 312 mutex_enter(&(mfcb)->mfcb_lock); \ 313 (mfcb)->mfcb_refcnt++; \ 314 ASSERT((mfcb)->mfcb_refcnt != 0); \ 315 mutex_exit(&(mfcb)->mfcb_lock); \ 316 } 317 318 #define MFCB_REFRELE(mfcb) { \ 319 mutex_enter(&(mfcb)->mfcb_lock); \ 320 ASSERT((mfcb)->mfcb_refcnt != 0); \ 321 if (--(mfcb)->mfcb_refcnt == 0 && \ 322 ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \ 323 release_mfc(mfcb); \ 324 } \ 325 mutex_exit(&(mfcb)->mfcb_lock); \ 326 } 327 328 /* 329 * MFCFIND: 330 * Find a route for a given origin IP address and multicast group address. 331 * Skip entries with pending upcalls. 332 * Type of service parameter to be added in the future! 333 */ 334 #define MFCFIND(mfcbp, o, g, rt) { \ 335 struct mfc *_mb_rt = NULL; \ 336 rt = NULL; \ 337 _mb_rt = mfcbp->mfcb_mfc; \ 338 while (_mb_rt) { \ 339 if ((_mb_rt->mfc_origin.s_addr == o) && \ 340 (_mb_rt->mfc_mcastgrp.s_addr == g) && \ 341 (_mb_rt->mfc_rte == NULL) && \ 342 (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \ 343 rt = _mb_rt; \ 344 break; \ 345 } \ 346 _mb_rt = _mb_rt->mfc_next; \ 347 } \ 348 } 349 350 /* 351 * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime() 352 * are inefficient. We use gethrestime() which returns a timespec_t with 353 * sec and nsec, the resolution is machine dependent. 354 * The following 2 macros have been changed to use nsec instead of usec. 355 */ 356 /* 357 * Macros to compute elapsed time efficiently. 358 * Borrowed from Van Jacobson's scheduling code. 359 * Delta should be a hrtime_t. 360 */ 361 #define TV_DELTA(a, b, delta) { \ 362 int xxs; \ 363 \ 364 delta = (a).tv_nsec - (b).tv_nsec; \ 365 if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 366 switch (xxs) { \ 367 case 2: \ 368 delta += 1000000000; \ 369 /*FALLTHROUGH*/ \ 370 case 1: \ 371 delta += 1000000000; \ 372 break; \ 373 default: \ 374 delta += (1000000000 * xxs); \ 375 } \ 376 } \ 377 } 378 379 #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \ 380 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 381 382 /* 383 * Handle MRT setsockopt commands to modify the multicast routing tables. 384 */ 385 int 386 ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, 387 int datalen, mblk_t *first_mp) 388 { 389 mutex_enter(&ip_g_mrouter_mutex); 390 if (cmd != MRT_INIT && q != ip_g_mrouter) { 391 mutex_exit(&ip_g_mrouter_mutex); 392 return (EACCES); 393 } 394 mutex_exit(&ip_g_mrouter_mutex); 395 396 if (checkonly) { 397 /* 398 * do not do operation, just pretend to - new T_CHECK 399 * Note: Even routines further on can probably fail but 400 * this T_CHECK stuff is only to please XTI so it not 401 * necessary to be perfect. 402 */ 403 switch (cmd) { 404 case MRT_INIT: 405 case MRT_DONE: 406 case MRT_ADD_VIF: 407 case MRT_DEL_VIF: 408 case MRT_ADD_MFC: 409 case MRT_DEL_MFC: 410 case MRT_ASSERT: 411 return (0); 412 default: 413 return (EOPNOTSUPP); 414 } 415 } 416 417 /* 418 * make sure no command is issued after multicast routing has been 419 * turned off. 420 */ 421 if (cmd != MRT_INIT && cmd != MRT_DONE) { 422 if (is_mrouter_off()) 423 return (EINVAL); 424 } 425 426 switch (cmd) { 427 case MRT_INIT: return (ip_mrouter_init(q, data, datalen)); 428 case MRT_DONE: return (ip_mrouter_done(first_mp)); 429 case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, q, first_mp)); 430 case MRT_DEL_VIF: return (del_vif((vifi_t *)data, q, first_mp)); 431 case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data)); 432 case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data)); 433 case MRT_ASSERT: return (set_assert((int *)data)); 434 default: return (EOPNOTSUPP); 435 } 436 } 437 438 /* 439 * Handle MRT getsockopt commands 440 */ 441 int 442 ip_mrouter_get(int cmd, queue_t *q, uchar_t *data) 443 { 444 if (q != ip_g_mrouter) 445 return (EACCES); 446 447 switch (cmd) { 448 case MRT_VERSION: return (get_version((uchar_t *)data)); 449 case MRT_ASSERT: return (get_assert((uchar_t *)data)); 450 default: return (EOPNOTSUPP); 451 } 452 } 453 454 /* 455 * Handle ioctl commands to obtain information from the cache. 456 * Called with shared access to IP. These are read_only ioctls. 457 */ 458 /* ARGSUSED */ 459 int 460 mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, 461 ip_ioctl_cmd_t *ipip, void *if_req) 462 { 463 mblk_t *mp1; 464 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 465 466 /* Existence verified in ip_wput_nondata */ 467 mp1 = mp->b_cont->b_cont; 468 469 switch (iocp->ioc_cmd) { 470 case (SIOCGETVIFCNT): 471 return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr)); 472 case (SIOCGETSGCNT): 473 return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr)); 474 case (SIOCGETLSGCNT): 475 return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr)); 476 default: 477 return (EINVAL); 478 } 479 } 480 481 /* 482 * Returns the packet, byte, rpf-failure count for the source, group provided. 483 */ 484 static int 485 get_sg_cnt(struct sioc_sg_req *req) 486 { 487 struct mfc *rt; 488 struct mfcb *mfcbp; 489 490 mfcbp = &mfctable[MFCHASH(req->src.s_addr, req->grp.s_addr)]; 491 MFCB_REFHOLD(mfcbp); 492 MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt); 493 494 if (rt != NULL) { 495 mutex_enter(&rt->mfc_mutex); 496 req->pktcnt = rt->mfc_pkt_cnt; 497 req->bytecnt = rt->mfc_byte_cnt; 498 req->wrong_if = rt->mfc_wrong_if; 499 mutex_exit(&rt->mfc_mutex); 500 } else 501 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU; 502 503 MFCB_REFRELE(mfcbp); 504 return (0); 505 } 506 507 /* 508 * Returns the packet, byte, rpf-failure count for the source, group provided. 509 * Uses larger counters and IPv6 addresses. 510 */ 511 /* ARGSUSED XXX until implemented */ 512 static int 513 get_lsg_cnt(struct sioc_lsg_req *req) 514 { 515 /* XXX TODO SIOCGETLSGCNT */ 516 return (ENXIO); 517 } 518 519 /* 520 * Returns the input and output packet and byte counts on the vif provided. 521 */ 522 static int 523 get_vif_cnt(struct sioc_vif_req *req) 524 { 525 vifi_t vifi = req->vifi; 526 527 if (vifi >= numvifs) 528 return (EINVAL); 529 530 /* 531 * No locks here, an approximation is fine. 532 */ 533 req->icount = viftable[vifi].v_pkt_in; 534 req->ocount = viftable[vifi].v_pkt_out; 535 req->ibytes = viftable[vifi].v_bytes_in; 536 req->obytes = viftable[vifi].v_bytes_out; 537 538 return (0); 539 } 540 541 static int 542 get_version(uchar_t *data) 543 { 544 int *v = (int *)data; 545 546 *v = 0x0305; /* XXX !!!! */ 547 548 return (0); 549 } 550 551 /* 552 * Set PIM assert processing global. 553 */ 554 static int 555 set_assert(int *i) 556 { 557 if ((*i != 1) && (*i != 0)) 558 return (EINVAL); 559 560 pim_assert = *i; 561 562 return (0); 563 } 564 565 /* 566 * Get PIM assert processing global. 567 */ 568 static int 569 get_assert(uchar_t *data) 570 { 571 int *i = (int *)data; 572 573 *i = pim_assert; 574 575 return (0); 576 } 577 578 /* 579 * Enable multicast routing. 580 */ 581 static int 582 ip_mrouter_init(queue_t *q, uchar_t *data, int datalen) 583 { 584 conn_t *connp = Q_TO_CONN(q); 585 int *v; 586 587 if (data == NULL || (datalen != sizeof (int))) 588 return (ENOPROTOOPT); 589 590 v = (int *)data; 591 if (*v != 1) 592 return (ENOPROTOOPT); 593 594 mutex_enter(&ip_g_mrouter_mutex); 595 if (ip_g_mrouter != NULL) { 596 mutex_exit(&ip_g_mrouter_mutex); 597 return (EADDRINUSE); 598 } 599 600 ip_g_mrouter = q; 601 connp->conn_multi_router = 1; 602 603 mutex_init(&last_encap_lock, NULL, MUTEX_DEFAULT, NULL); 604 605 mrtstat.mrts_vifctlSize = sizeof (struct vifctl); 606 mrtstat.mrts_mfcctlSize = sizeof (struct mfcctl); 607 608 pim_assert = 0; 609 610 /* In order for tunnels to work we have to turn ip_g_forward on */ 611 if (!WE_ARE_FORWARDING) { 612 if (ip_mrtdebug > 1) { 613 (void) mi_strlog(q, 1, SL_TRACE, 614 "ip_mrouter_init: turning on forwarding"); 615 } 616 saved_ip_g_forward = ip_g_forward; 617 ip_g_forward = IP_FORWARD_ALWAYS; 618 } 619 620 mutex_exit(&ip_g_mrouter_mutex); 621 return (0); 622 } 623 624 /* 625 * Disable multicast routing. 626 * Didn't use global timeout_val (BSD version), instead check the mfctable. 627 */ 628 int 629 ip_mrouter_done(mblk_t *mp) 630 { 631 conn_t *connp; 632 vifi_t vifi; 633 struct mfc *mfc_rt; 634 int i; 635 636 mutex_enter(&ip_g_mrouter_mutex); 637 if (ip_g_mrouter == NULL) { 638 mutex_exit(&ip_g_mrouter_mutex); 639 return (EINVAL); 640 } 641 642 connp = Q_TO_CONN(ip_g_mrouter); 643 644 if (saved_ip_g_forward != -1) { 645 if (ip_mrtdebug > 1) { 646 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 647 "ip_mrouter_done: turning off forwarding"); 648 } 649 ip_g_forward = saved_ip_g_forward; 650 saved_ip_g_forward = -1; 651 } 652 653 /* 654 * Always clear cache when vifs change. 655 * No need to get last_encap_lock since we are running as a writer. 656 */ 657 mutex_enter(&last_encap_lock); 658 last_encap_src = 0; 659 last_encap_vif = NULL; 660 mutex_exit(&last_encap_lock); 661 connp->conn_multi_router = 0; 662 663 mutex_exit(&ip_g_mrouter_mutex); 664 665 /* 666 * For each phyint in use, 667 * disable promiscuous reception of all IP multicasts. 668 */ 669 for (vifi = 0; vifi < MAXVIFS; vifi++) { 670 struct vif *vifp = viftable + vifi; 671 672 mutex_enter(&vifp->v_lock); 673 /* 674 * if the vif is active mark it condemned. 675 */ 676 if (vifp->v_marks & VIF_MARK_GOOD) { 677 ASSERT(vifp->v_ipif != NULL); 678 ipif_refhold(vifp->v_ipif); 679 /* Phyint only */ 680 if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 681 ipif_t *ipif = vifp->v_ipif; 682 ipsq_t *ipsq; 683 boolean_t suc; 684 ill_t *ill; 685 686 ill = ipif->ipif_ill; 687 suc = B_FALSE; 688 if (mp == NULL) { 689 /* 690 * being called from ip_close, 691 * lets do it synchronously. 692 * Clear VIF_MARK_GOOD and 693 * set VIF_MARK_CONDEMNED. 694 */ 695 vifp->v_marks &= ~VIF_MARK_GOOD; 696 vifp->v_marks |= VIF_MARK_CONDEMNED; 697 mutex_exit(&(vifp)->v_lock); 698 suc = ipsq_enter(ill, B_FALSE); 699 ipsq = ill->ill_phyint->phyint_ipsq; 700 } else { 701 ipsq = ipsq_try_enter(ipif, NULL, 702 ip_g_mrouter, mp, 703 ip_restart_optmgmt, NEW_OP, B_TRUE); 704 if (ipsq == NULL) { 705 mutex_exit(&(vifp)->v_lock); 706 return (EINPROGRESS); 707 } 708 /* 709 * Clear VIF_MARK_GOOD and 710 * set VIF_MARK_CONDEMNED. 711 */ 712 vifp->v_marks &= ~VIF_MARK_GOOD; 713 vifp->v_marks |= VIF_MARK_CONDEMNED; 714 mutex_exit(&(vifp)->v_lock); 715 suc = B_TRUE; 716 } 717 718 if (suc) { 719 (void) ip_delmulti(INADDR_ANY, ipif, 720 B_TRUE, B_TRUE); 721 ipsq_exit(ipsq, B_TRUE, B_TRUE); 722 } 723 mutex_enter(&vifp->v_lock); 724 } 725 /* 726 * decreases the refcnt added in add_vif. 727 * and release v_lock. 728 */ 729 VIF_REFRELE_LOCKED(vifp); 730 } else { 731 mutex_exit(&vifp->v_lock); 732 continue; 733 } 734 } 735 736 mutex_enter(&numvifs_mutex); 737 numvifs = 0; 738 pim_assert = 0; 739 reg_vif_num = ALL_VIFS; 740 mutex_exit(&numvifs_mutex); 741 742 /* 743 * Free upcall msgs. 744 * Go through mfctable and stop any outstanding upcall 745 * timeouts remaining on mfcs. 746 */ 747 for (i = 0; i < MFCTBLSIZ; i++) { 748 mutex_enter(&mfctable[i].mfcb_lock); 749 mfctable[i].mfcb_refcnt++; 750 mfctable[i].mfcb_marks |= MFCB_MARK_CONDEMNED; 751 mutex_exit(&mfctable[i].mfcb_lock); 752 mfc_rt = mfctable[i].mfcb_mfc; 753 while (mfc_rt) { 754 /* Free upcalls */ 755 mutex_enter(&mfc_rt->mfc_mutex); 756 if (mfc_rt->mfc_rte != NULL) { 757 if (mfc_rt->mfc_timeout_id != 0) { 758 /* 759 * OK to drop the lock as we have 760 * a refcnt on the bucket. timeout 761 * can fire but it will see that 762 * mfc_timeout_id == 0 and not do 763 * anything. see expire_upcalls(). 764 */ 765 mfc_rt->mfc_timeout_id = 0; 766 mutex_exit(&mfc_rt->mfc_mutex); 767 (void) untimeout( 768 mfc_rt->mfc_timeout_id); 769 mfc_rt->mfc_timeout_id = 0; 770 mutex_enter(&mfc_rt->mfc_mutex); 771 772 /* 773 * all queued upcall packets 774 * and mblk will be freed in 775 * release_mfc(). 776 */ 777 } 778 } 779 780 mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 781 782 mutex_exit(&mfc_rt->mfc_mutex); 783 mfc_rt = mfc_rt->mfc_next; 784 } 785 MFCB_REFRELE(&mfctable[i]); 786 } 787 788 mutex_enter(&ip_g_mrouter_mutex); 789 ip_g_mrouter = NULL; 790 mutex_exit(&ip_g_mrouter_mutex); 791 return (0); 792 } 793 794 static boolean_t 795 is_mrouter_off(void) 796 { 797 conn_t *connp; 798 799 mutex_enter(&ip_g_mrouter_mutex); 800 if (ip_g_mrouter == NULL) { 801 mutex_exit(&ip_g_mrouter_mutex); 802 return (B_TRUE); 803 } 804 805 connp = Q_TO_CONN(ip_g_mrouter); 806 if (connp->conn_multi_router == 0) { 807 mutex_exit(&ip_g_mrouter_mutex); 808 return (B_TRUE); 809 } 810 mutex_exit(&ip_g_mrouter_mutex); 811 return (B_FALSE); 812 } 813 814 static void 815 unlock_good_vif(struct vif *vifp) 816 { 817 ASSERT(vifp->v_ipif != NULL); 818 ipif_refrele(vifp->v_ipif); 819 VIF_REFRELE(vifp); 820 } 821 822 static boolean_t 823 lock_good_vif(struct vif *vifp) 824 { 825 mutex_enter(&vifp->v_lock); 826 if (!(vifp->v_marks & VIF_MARK_GOOD)) { 827 mutex_exit(&vifp->v_lock); 828 return (B_FALSE); 829 } 830 831 ASSERT(vifp->v_ipif != NULL); 832 mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock); 833 if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) { 834 mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 835 mutex_exit(&vifp->v_lock); 836 return (B_FALSE); 837 } 838 ipif_refhold_locked(vifp->v_ipif); 839 mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 840 vifp->v_refcnt++; 841 mutex_exit(&vifp->v_lock); 842 return (B_TRUE); 843 } 844 845 /* 846 * Add a vif to the vif table. 847 */ 848 static int 849 add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) 850 { 851 struct vif *vifp = viftable + vifcp->vifc_vifi; 852 ipif_t *ipif; 853 int error; 854 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 855 conn_t *connp = Q_TO_CONN(q); 856 ipsq_t *ipsq; 857 858 ASSERT(connp != NULL); 859 860 if (vifcp->vifc_vifi >= MAXVIFS) 861 return (EINVAL); 862 863 if (is_mrouter_off()) 864 return (EINVAL); 865 866 mutex_enter(&vifp->v_lock); 867 /* 868 * Viftable entry should be 0. 869 * if v_marks == 0 but v_refcnt != 0 means struct is being 870 * initialized. 871 * 872 * Also note that it is very unlikely that we will get a MRT_ADD_VIF 873 * request while the delete is in progress, mrouted only sends add 874 * requests when a new interface is added and the new interface cannot 875 * have the same vifi as an existing interface. We make sure that 876 * ill_delete will block till the vif is deleted by adding a refcnt 877 * to ipif in del_vif(). 878 */ 879 if (vifp->v_lcl_addr.s_addr != 0 || 880 vifp->v_marks != 0 || 881 vifp->v_refcnt != 0) { 882 mutex_exit(&vifp->v_lock); 883 return (EADDRINUSE); 884 } 885 886 /* Incoming vif should not be 0 */ 887 if (vifcp->vifc_lcl_addr.s_addr == 0) { 888 mutex_exit(&vifp->v_lock); 889 return (EINVAL); 890 } 891 892 vifp->v_refcnt++; 893 mutex_exit(&vifp->v_lock); 894 /* Find the interface with the local address */ 895 ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL, 896 connp->conn_zoneid, CONNP_TO_WQ(connp), first_mp, 897 ip_restart_optmgmt, &error); 898 if (ipif == NULL) { 899 VIF_REFRELE(vifp); 900 if (error == EINPROGRESS) 901 return (error); 902 return (EADDRNOTAVAIL); 903 } 904 905 /* 906 * We have to be exclusive as we have to call ip_addmulti() 907 * This is the best position to try to be exclusive in case 908 * we have to wait. 909 */ 910 ipsq = ipsq_try_enter(ipif, NULL, CONNP_TO_WQ(connp), first_mp, 911 ip_restart_optmgmt, NEW_OP, B_TRUE); 912 if ((ipsq) == NULL) { 913 VIF_REFRELE(vifp); 914 ipif_refrele(ipif); 915 return (EINPROGRESS); 916 } 917 918 if (ip_mrtdebug > 1) { 919 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 920 "add_vif: src 0x%x enter", 921 vifcp->vifc_lcl_addr.s_addr); 922 } 923 924 mutex_enter(&vifp->v_lock); 925 /* 926 * Always clear cache when vifs change. 927 * Needed to ensure that src isn't left over from before vif was added. 928 * No need to get last_encap_lock, since we are running as a writer. 929 */ 930 931 mutex_enter(&last_encap_lock); 932 last_encap_src = 0; 933 last_encap_vif = NULL; 934 mutex_exit(&last_encap_lock); 935 936 if (vifcp->vifc_flags & VIFF_TUNNEL) { 937 if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) { 938 cmn_err(CE_WARN, 939 "add_vif: source route tunnels not supported\n"); 940 VIF_REFRELE_LOCKED(vifp); 941 ipif_refrele(ipif); 942 ipsq_exit(ipsq, B_TRUE, B_TRUE); 943 return (EOPNOTSUPP); 944 } 945 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 946 947 } else { 948 /* Phyint or Register vif */ 949 if (vifcp->vifc_flags & VIFF_REGISTER) { 950 /* 951 * Note: Since all IPPROTO_IP level options (including 952 * MRT_ADD_VIF) are done exclusively via 953 * ip_optmgmt_writer(), a lock is not necessary to 954 * protect reg_vif_num. 955 */ 956 mutex_enter(&numvifs_mutex); 957 if (reg_vif_num == ALL_VIFS) { 958 reg_vif_num = vifcp->vifc_vifi; 959 mutex_exit(&numvifs_mutex); 960 } else { 961 mutex_exit(&numvifs_mutex); 962 VIF_REFRELE_LOCKED(vifp); 963 ipif_refrele(ipif); 964 ipsq_exit(ipsq, B_TRUE, B_TRUE); 965 return (EADDRINUSE); 966 } 967 } 968 969 /* Make sure the interface supports multicast */ 970 if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) { 971 VIF_REFRELE_LOCKED(vifp); 972 ipif_refrele(ipif); 973 if (vifcp->vifc_flags & VIFF_REGISTER) { 974 mutex_enter(&numvifs_mutex); 975 reg_vif_num = ALL_VIFS; 976 mutex_exit(&numvifs_mutex); 977 } 978 ipsq_exit(ipsq, B_TRUE, B_TRUE); 979 return (EOPNOTSUPP); 980 } 981 /* Enable promiscuous reception of all IP mcasts from the if */ 982 mutex_exit(&vifp->v_lock); 983 error = ip_addmulti(INADDR_ANY, ipif, ILGSTAT_NONE, 984 MODE_IS_EXCLUDE, NULL); 985 mutex_enter(&vifp->v_lock); 986 /* 987 * since we released the lock lets make sure that 988 * ip_mrouter_done() has not been called. 989 */ 990 if (error != 0 || is_mrouter_off()) { 991 if (error == 0) 992 (void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, 993 B_TRUE); 994 if (vifcp->vifc_flags & VIFF_REGISTER) { 995 mutex_enter(&numvifs_mutex); 996 reg_vif_num = ALL_VIFS; 997 mutex_exit(&numvifs_mutex); 998 } 999 VIF_REFRELE_LOCKED(vifp); 1000 ipif_refrele(ipif); 1001 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1002 return (error?error:EINVAL); 1003 } 1004 } 1005 /* Define parameters for the tbf structure */ 1006 vifp->v_tbf = v_tbf; 1007 gethrestime(&vifp->v_tbf->tbf_last_pkt_t); 1008 vifp->v_tbf->tbf_n_tok = 0; 1009 vifp->v_tbf->tbf_q_len = 0; 1010 vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 1011 vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 1012 1013 vifp->v_flags = vifcp->vifc_flags; 1014 vifp->v_threshold = vifcp->vifc_threshold; 1015 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 1016 vifp->v_ipif = ipif; 1017 ipif_refrele(ipif); 1018 /* Scaling up here, allows division by 1024 in critical code. */ 1019 vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000); 1020 vifp->v_timeout_id = 0; 1021 /* initialize per vif pkt counters */ 1022 vifp->v_pkt_in = 0; 1023 vifp->v_pkt_out = 0; 1024 vifp->v_bytes_in = 0; 1025 vifp->v_bytes_out = 0; 1026 mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL); 1027 1028 /* Adjust numvifs up, if the vifi is higher than numvifs */ 1029 mutex_enter(&numvifs_mutex); 1030 if (numvifs <= vifcp->vifc_vifi) 1031 numvifs = vifcp->vifc_vifi + 1; 1032 mutex_exit(&numvifs_mutex); 1033 1034 if (ip_mrtdebug > 1) { 1035 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1036 "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", 1037 vifcp->vifc_vifi, 1038 ntohl(vifcp->vifc_lcl_addr.s_addr), 1039 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 1040 ntohl(vifcp->vifc_rmt_addr.s_addr), 1041 vifcp->vifc_threshold, vifcp->vifc_rate_limit); 1042 } 1043 1044 vifp->v_marks = VIF_MARK_GOOD; 1045 mutex_exit(&vifp->v_lock); 1046 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1047 return (0); 1048 } 1049 1050 1051 /* Delete a vif from the vif table. */ 1052 static void 1053 del_vifp(struct vif *vifp) 1054 { 1055 struct tbf *t = vifp->v_tbf; 1056 mblk_t *mp0; 1057 vifi_t vifi; 1058 1059 1060 ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); 1061 ASSERT(t != NULL); 1062 1063 /* 1064 * release the ref we put in vif_del. 1065 */ 1066 ASSERT(vifp->v_ipif != NULL); 1067 ipif_refrele(vifp->v_ipif); 1068 1069 if (ip_mrtdebug > 1) { 1070 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1071 "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); 1072 } 1073 1074 if (vifp->v_timeout_id != 0) { 1075 (void) untimeout(vifp->v_timeout_id); 1076 vifp->v_timeout_id = 0; 1077 } 1078 1079 /* 1080 * Free packets queued at the interface. 1081 * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc. 1082 */ 1083 mutex_enter(&t->tbf_lock); 1084 while (t->tbf_q != NULL) { 1085 mp0 = t->tbf_q; 1086 t->tbf_q = t->tbf_q->b_next; 1087 mp0->b_prev = mp0->b_next = NULL; 1088 freemsg(mp0); 1089 } 1090 mutex_exit(&t->tbf_lock); 1091 1092 /* 1093 * Always clear cache when vifs change. 1094 * No need to get last_encap_lock since we are running as a writer. 1095 */ 1096 mutex_enter(&last_encap_lock); 1097 if (vifp == last_encap_vif) { 1098 last_encap_vif = NULL; 1099 last_encap_src = 0; 1100 } 1101 mutex_exit(&last_encap_lock); 1102 1103 mutex_destroy(&t->tbf_lock); 1104 1105 bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf))); 1106 1107 /* Adjust numvifs down */ 1108 mutex_enter(&numvifs_mutex); 1109 for (vifi = numvifs; vifi != 0; vifi--) /* vifi is unsigned */ 1110 if (viftable[vifi - 1].v_lcl_addr.s_addr != 0) 1111 break; 1112 numvifs = vifi; 1113 mutex_exit(&numvifs_mutex); 1114 1115 bzero(vifp, sizeof (*vifp)); 1116 } 1117 1118 static int 1119 del_vif(vifi_t *vifip, queue_t *q, mblk_t *first_mp) 1120 { 1121 struct vif *vifp = viftable + *vifip; 1122 conn_t *connp; 1123 ipsq_t *ipsq; 1124 1125 if (*vifip >= numvifs) 1126 return (EINVAL); 1127 1128 1129 mutex_enter(&vifp->v_lock); 1130 /* 1131 * Not initialized 1132 * Here we are not looking at the vif that is being initialized 1133 * i.e vifp->v_marks == 0 and refcnt > 0. 1134 */ 1135 if (vifp->v_lcl_addr.s_addr == 0 || 1136 !(vifp->v_marks & VIF_MARK_GOOD)) { 1137 mutex_exit(&vifp->v_lock); 1138 return (EADDRNOTAVAIL); 1139 } 1140 1141 /* 1142 * This is an optimization, if first_mp == NULL 1143 * than we are being called from reset_mrt_vif_ipif() 1144 * so we already have exclusive access to the ipsq. 1145 * the ASSERT below is a check for this condition. 1146 */ 1147 if (first_mp != NULL && 1148 !(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 1149 connp = Q_TO_CONN(q); 1150 ASSERT(connp != NULL); 1151 /* 1152 * We have to be exclusive as we have to call ip_delmulti() 1153 * This is the best position to try to be exclusive in case 1154 * we have to wait. 1155 */ 1156 ipsq = ipsq_try_enter(vifp->v_ipif, NULL, CONNP_TO_WQ(connp), 1157 first_mp, ip_restart_optmgmt, NEW_OP, B_TRUE); 1158 if ((ipsq) == NULL) { 1159 mutex_exit(&vifp->v_lock); 1160 return (EINPROGRESS); 1161 } 1162 /* recheck after being exclusive */ 1163 if (vifp->v_lcl_addr.s_addr == 0 || 1164 !vifp->v_marks & VIF_MARK_GOOD) { 1165 /* 1166 * someone beat us. 1167 */ 1168 mutex_exit(&vifp->v_lock); 1169 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1170 return (EADDRNOTAVAIL); 1171 } 1172 } 1173 1174 1175 ASSERT(IAM_WRITER_IPIF(vifp->v_ipif)); 1176 1177 1178 /* 1179 * add a refhold so that ipif does not go away while 1180 * there are still users, this will be released in del_vifp 1181 * when we free the vif. 1182 */ 1183 ipif_refhold(vifp->v_ipif); 1184 1185 /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */ 1186 vifp->v_marks &= ~VIF_MARK_GOOD; 1187 vifp->v_marks |= VIF_MARK_CONDEMNED; 1188 1189 /* Phyint only */ 1190 if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 1191 ipif_t *ipif = vifp->v_ipif; 1192 ASSERT(ipif != NULL); 1193 /* 1194 * should be OK to drop the lock as we 1195 * have marked this as CONDEMNED. 1196 */ 1197 mutex_exit(&(vifp)->v_lock); 1198 (void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, B_TRUE); 1199 if (first_mp != NULL) 1200 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1201 mutex_enter(&(vifp)->v_lock); 1202 } 1203 1204 /* 1205 * decreases the refcnt added in add_vif. 1206 */ 1207 VIF_REFRELE_LOCKED(vifp); 1208 return (0); 1209 } 1210 1211 /* 1212 * Add an mfc entry. 1213 */ 1214 static int 1215 add_mfc(struct mfcctl *mfccp) 1216 { 1217 struct mfc *rt; 1218 struct rtdetq *rte; 1219 ushort_t nstl; 1220 int i; 1221 struct mfcb *mfcbp; 1222 1223 /* 1224 * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted 1225 * did not have a real route for pkt. 1226 * We want this pkt without rt installed in the mfctable to prevent 1227 * multiiple tries, so go ahead and put it in mfctable, it will 1228 * be discarded later in ip_mdq() because the child is NULL. 1229 */ 1230 1231 /* Error checking, out of bounds? */ 1232 if (mfccp->mfcc_parent > MAXVIFS) { 1233 ip0dbg(("ADD_MFC: mfcc_parent out of range %d", 1234 (int)mfccp->mfcc_parent)); 1235 return (EINVAL); 1236 } 1237 1238 if ((mfccp->mfcc_parent != NO_VIF) && 1239 (viftable[mfccp->mfcc_parent].v_ipif == NULL)) { 1240 ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n", 1241 (int)mfccp->mfcc_parent)); 1242 return (EINVAL); 1243 } 1244 1245 if (is_mrouter_off()) { 1246 return (EINVAL); 1247 } 1248 1249 mfcbp = &mfctable[MFCHASH(mfccp->mfcc_origin.s_addr, 1250 mfccp->mfcc_mcastgrp.s_addr)]; 1251 MFCB_REFHOLD(mfcbp); 1252 MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr, 1253 mfccp->mfcc_mcastgrp.s_addr, rt); 1254 1255 /* If an entry already exists, just update the fields */ 1256 if (rt) { 1257 if (ip_mrtdebug > 1) { 1258 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1259 "add_mfc: update o %x grp %x parent %x", 1260 ntohl(mfccp->mfcc_origin.s_addr), 1261 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1262 mfccp->mfcc_parent); 1263 } 1264 mutex_enter(&rt->mfc_mutex); 1265 rt->mfc_parent = mfccp->mfcc_parent; 1266 1267 mutex_enter(&numvifs_mutex); 1268 for (i = 0; i < (int)numvifs; i++) 1269 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1270 mutex_exit(&numvifs_mutex); 1271 mutex_exit(&rt->mfc_mutex); 1272 1273 MFCB_REFRELE(mfcbp); 1274 return (0); 1275 } 1276 1277 /* 1278 * Find the entry for which the upcall was made and update. 1279 */ 1280 for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) { 1281 mutex_enter(&rt->mfc_mutex); 1282 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1283 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 1284 (rt->mfc_rte != NULL) && 1285 !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 1286 if (nstl++ != 0) 1287 cmn_err(CE_WARN, 1288 "add_mfc: %s o %x g %x p %x", 1289 "multiple kernel entries", 1290 ntohl(mfccp->mfcc_origin.s_addr), 1291 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1292 mfccp->mfcc_parent); 1293 1294 if (ip_mrtdebug > 1) { 1295 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1296 "add_mfc: o %x g %x p %x", 1297 ntohl(mfccp->mfcc_origin.s_addr), 1298 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1299 mfccp->mfcc_parent); 1300 } 1301 fill_route(rt, mfccp); 1302 1303 /* 1304 * Prevent cleanup of cache entry. 1305 * Timer starts in ip_mforward. 1306 */ 1307 if (rt->mfc_timeout_id != 0) { 1308 timeout_id_t id; 1309 id = rt->mfc_timeout_id; 1310 /* 1311 * setting id to zero will avoid this 1312 * entry from being cleaned up in 1313 * expire_up_calls(). 1314 */ 1315 rt->mfc_timeout_id = 0; 1316 /* 1317 * dropping the lock is fine as we 1318 * have a refhold on the bucket. 1319 * so mfc cannot be freed. 1320 * The timeout can fire but it will see 1321 * that mfc_timeout_id == 0 and not cleanup. 1322 */ 1323 mutex_exit(&rt->mfc_mutex); 1324 (void) untimeout(id); 1325 mutex_enter(&rt->mfc_mutex); 1326 } 1327 1328 /* 1329 * Send all pkts that are queued waiting for the upcall. 1330 * ip_mdq param tun set to 0 - 1331 * the return value of ip_mdq() isn't used here, 1332 * so value we send doesn't matter. 1333 */ 1334 while (rt->mfc_rte != NULL) { 1335 rte = rt->mfc_rte; 1336 rt->mfc_rte = rte->rte_next; 1337 mutex_exit(&rt->mfc_mutex); 1338 (void) ip_mdq(rte->mp, (ipha_t *) 1339 rte->mp->b_rptr, rte->ill, 0, rt); 1340 freemsg(rte->mp); 1341 mi_free((char *)rte); 1342 mutex_enter(&rt->mfc_mutex); 1343 } 1344 } 1345 mutex_exit(&rt->mfc_mutex); 1346 } 1347 1348 1349 /* 1350 * It is possible that an entry is being inserted without an upcall 1351 */ 1352 if (nstl == 0) { 1353 mutex_enter(&(mfcbp->mfcb_lock)); 1354 if (ip_mrtdebug > 1) { 1355 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1356 "add_mfc: no upcall o %x g %x p %x", 1357 ntohl(mfccp->mfcc_origin.s_addr), 1358 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1359 mfccp->mfcc_parent); 1360 } 1361 if (is_mrouter_off()) { 1362 mutex_exit(&mfcbp->mfcb_lock); 1363 MFCB_REFRELE(mfcbp); 1364 return (EINVAL); 1365 } 1366 1367 for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) { 1368 1369 mutex_enter(&rt->mfc_mutex); 1370 if ((rt->mfc_origin.s_addr == 1371 mfccp->mfcc_origin.s_addr) && 1372 (rt->mfc_mcastgrp.s_addr == 1373 mfccp->mfcc_mcastgrp.s_addr) && 1374 (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { 1375 fill_route(rt, mfccp); 1376 mutex_exit(&rt->mfc_mutex); 1377 break; 1378 } 1379 mutex_exit(&rt->mfc_mutex); 1380 } 1381 1382 /* No upcall, so make a new entry into mfctable */ 1383 if (rt == NULL) { 1384 rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 1385 if (rt == NULL) { 1386 ip1dbg(("add_mfc: out of memory\n")); 1387 mutex_exit(&mfcbp->mfcb_lock); 1388 MFCB_REFRELE(mfcbp); 1389 return (ENOBUFS); 1390 } 1391 1392 /* Insert new entry at head of hash chain */ 1393 mutex_enter(&rt->mfc_mutex); 1394 fill_route(rt, mfccp); 1395 1396 /* Link into table */ 1397 rt->mfc_next = mfcbp->mfcb_mfc; 1398 mfcbp->mfcb_mfc = rt; 1399 mutex_exit(&rt->mfc_mutex); 1400 } 1401 mutex_exit(&mfcbp->mfcb_lock); 1402 } 1403 1404 MFCB_REFRELE(mfcbp); 1405 return (0); 1406 } 1407 1408 /* 1409 * Fills in mfc structure from mrouted mfcctl. 1410 */ 1411 static void 1412 fill_route(struct mfc *rt, struct mfcctl *mfccp) 1413 { 1414 int i; 1415 1416 rt->mfc_origin = mfccp->mfcc_origin; 1417 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1418 rt->mfc_parent = mfccp->mfcc_parent; 1419 mutex_enter(&numvifs_mutex); 1420 for (i = 0; i < (int)numvifs; i++) { 1421 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1422 } 1423 mutex_exit(&numvifs_mutex); 1424 /* Initialize pkt counters per src-grp */ 1425 rt->mfc_pkt_cnt = 0; 1426 rt->mfc_byte_cnt = 0; 1427 rt->mfc_wrong_if = 0; 1428 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0; 1429 1430 } 1431 1432 static void 1433 free_queue(struct mfc *mfcp) 1434 { 1435 struct rtdetq *rte0; 1436 1437 /* 1438 * Drop all queued upcall packets. 1439 * Free the mbuf with the pkt. 1440 */ 1441 while ((rte0 = mfcp->mfc_rte) != NULL) { 1442 mfcp->mfc_rte = rte0->rte_next; 1443 freemsg(rte0->mp); 1444 mi_free((char *)rte0); 1445 } 1446 } 1447 /* 1448 * go thorugh the hash bucket and free all the entries marked condemned. 1449 */ 1450 void 1451 release_mfc(struct mfcb *mfcbp) 1452 { 1453 struct mfc *current_mfcp; 1454 struct mfc *prev_mfcp; 1455 1456 prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 1457 1458 while (current_mfcp != NULL) { 1459 if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) { 1460 if (current_mfcp == mfcbp->mfcb_mfc) { 1461 mfcbp->mfcb_mfc = current_mfcp->mfc_next; 1462 free_queue(current_mfcp); 1463 mi_free(current_mfcp); 1464 prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 1465 continue; 1466 } 1467 ASSERT(prev_mfcp != NULL); 1468 prev_mfcp->mfc_next = current_mfcp->mfc_next; 1469 free_queue(current_mfcp); 1470 mi_free(current_mfcp); 1471 current_mfcp = NULL; 1472 } else { 1473 prev_mfcp = current_mfcp; 1474 } 1475 1476 current_mfcp = prev_mfcp->mfc_next; 1477 1478 } 1479 mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED; 1480 ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0); 1481 } 1482 1483 /* 1484 * Delete an mfc entry. 1485 */ 1486 static int 1487 del_mfc(struct mfcctl *mfccp) 1488 { 1489 struct in_addr origin; 1490 struct in_addr mcastgrp; 1491 struct mfc *rt; 1492 uint_t hash; 1493 1494 origin = mfccp->mfcc_origin; 1495 mcastgrp = mfccp->mfcc_mcastgrp; 1496 hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1497 1498 if (ip_mrtdebug > 1) { 1499 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1500 "del_mfc: o %x g %x", 1501 ntohl(origin.s_addr), 1502 ntohl(mcastgrp.s_addr)); 1503 } 1504 1505 MFCB_REFHOLD(&mfctable[hash]); 1506 1507 /* Find mfc in mfctable, finds only entries without upcalls */ 1508 for (rt = mfctable[hash].mfcb_mfc; rt; rt = rt->mfc_next) { 1509 mutex_enter(&rt->mfc_mutex); 1510 if (origin.s_addr == rt->mfc_origin.s_addr && 1511 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1512 rt->mfc_rte == NULL && 1513 !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) 1514 break; 1515 mutex_exit(&rt->mfc_mutex); 1516 } 1517 1518 /* 1519 * Return if there was an upcall (mfc_rte != NULL, 1520 * or rt not in mfctable. 1521 */ 1522 if (rt == NULL) { 1523 MFCB_REFRELE(&mfctable[hash]); 1524 return (EADDRNOTAVAIL); 1525 } 1526 1527 1528 /* 1529 * no need to hold lock as we have a reference. 1530 */ 1531 mfctable[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 1532 /* error checking */ 1533 if (rt->mfc_timeout_id != 0) { 1534 ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null")); 1535 /* 1536 * Its ok to drop the lock, the struct cannot be freed 1537 * since we have a ref on the hash bucket. 1538 */ 1539 rt->mfc_timeout_id = 0; 1540 mutex_exit(&rt->mfc_mutex); 1541 (void) untimeout(rt->mfc_timeout_id); 1542 mutex_enter(&rt->mfc_mutex); 1543 } 1544 1545 ASSERT(rt->mfc_rte == NULL); 1546 1547 1548 /* 1549 * Delete the entry from the cache 1550 */ 1551 rt->mfc_marks |= MFCB_MARK_CONDEMNED; 1552 mutex_exit(&rt->mfc_mutex); 1553 1554 MFCB_REFRELE(&mfctable[hash]); 1555 1556 return (0); 1557 } 1558 1559 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1560 1561 /* 1562 * IP multicast forwarding function. This function assumes that the packet 1563 * pointed to by ipha has arrived on (or is about to be sent to) the interface 1564 * pointed to by "ill", and the packet is to be relayed to other networks 1565 * that have members of the packet's destination IP multicast group. 1566 * 1567 * The packet is returned unscathed to the caller, unless it is 1568 * erroneous, in which case a -1 value tells the caller (IP) 1569 * to discard it. 1570 * 1571 * Unlike BSD, SunOS 5.x needs to return to IP info about 1572 * whether pkt came in thru a tunnel, so it can be discarded, unless 1573 * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try 1574 * to be delivered. 1575 * Return values are 0 - pkt is okay and phyint 1576 * -1 - pkt is malformed and to be tossed 1577 * 1 - pkt came in on tunnel 1578 */ 1579 int 1580 ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) 1581 { 1582 struct mfc *rt; 1583 ipaddr_t src, dst, tunnel_src = 0; 1584 static int srctun = 0; 1585 vifi_t vifi; 1586 boolean_t pim_reg_packet = B_FALSE; 1587 struct mfcb *mfcbp; 1588 1589 if (ip_mrtdebug > 1) { 1590 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1591 "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", 1592 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 1593 ill->ill_name); 1594 } 1595 1596 dst = ipha->ipha_dst; 1597 if ((uint32_t)(uintptr_t)mp->b_prev == PIM_REGISTER_MARKER) 1598 pim_reg_packet = B_TRUE; 1599 else 1600 tunnel_src = (ipaddr_t)(uintptr_t)mp->b_prev; 1601 1602 /* 1603 * Don't forward a packet with time-to-live of zero or one, 1604 * or a packet destined to a local-only group. 1605 */ 1606 if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || 1607 (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { 1608 if (ip_mrtdebug > 1) { 1609 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1610 "ip_mforward: not forwarded ttl %d," 1611 " dst 0x%x ill %s", 1612 ipha->ipha_ttl, ntohl(dst), ill->ill_name); 1613 } 1614 mp->b_prev = NULL; 1615 if (tunnel_src != 0) 1616 return (1); 1617 else 1618 return (0); 1619 } 1620 1621 if ((tunnel_src != 0) || pim_reg_packet) { 1622 /* 1623 * Packet arrived over an encapsulated tunnel or via a PIM 1624 * register message. Both ip_mroute_decap() and pim_input() 1625 * encode information in mp->b_prev. 1626 */ 1627 mp->b_prev = NULL; 1628 if (ip_mrtdebug > 1) { 1629 if (tunnel_src != 0) { 1630 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1631 "ip_mforward: ill %s arrived via ENCAP TUN", 1632 ill->ill_name); 1633 } else if (pim_reg_packet) { 1634 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1635 "ip_mforward: ill %s arrived via" 1636 " REGISTER VIF", 1637 ill->ill_name); 1638 } 1639 } 1640 } else if ((ipha->ipha_version_and_hdr_length & 0xf) < 1641 (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 || 1642 ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { 1643 /* Packet arrived via a physical interface. */ 1644 if (ip_mrtdebug > 1) { 1645 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1646 "ip_mforward: ill %s arrived via PHYINT", 1647 ill->ill_name); 1648 } 1649 1650 } else { 1651 /* 1652 * Packet arrived through a SRCRT tunnel. 1653 * Source-route tunnels are no longer supported. 1654 * Error message printed every 1000 times. 1655 */ 1656 if ((srctun++ % 1000) == 0) { 1657 cmn_err(CE_WARN, 1658 "ip_mforward: received source-routed pkt from %x", 1659 ntohl(ipha->ipha_src)); 1660 } 1661 return (-1); 1662 } 1663 1664 mrtstat.mrts_fwd_in++; 1665 src = ipha->ipha_src; 1666 1667 /* Find route in cache, return NULL if not there or upcalls q'ed. */ 1668 1669 /* 1670 * Lock the mfctable against changes made by ip_mforward. 1671 * Note that only add_mfc and del_mfc can remove entries and 1672 * they run with exclusive access to IP. So we do not need to 1673 * guard against the rt being deleted, so release lock after reading. 1674 */ 1675 1676 if (is_mrouter_off()) 1677 return (-1); 1678 1679 mfcbp = &mfctable[MFCHASH(src, dst)]; 1680 MFCB_REFHOLD(mfcbp); 1681 MFCFIND(mfcbp, src, dst, rt); 1682 1683 /* Entry exists, so forward if necessary */ 1684 if (rt != NULL) { 1685 int ret = 0; 1686 mrtstat.mrts_mfc_hits++; 1687 if (pim_reg_packet) { 1688 ASSERT(reg_vif_num != ALL_VIFS); 1689 ret = ip_mdq(mp, ipha, 1690 viftable[reg_vif_num].v_ipif->ipif_ill, 0, rt); 1691 } else { 1692 ret = ip_mdq(mp, ipha, ill, tunnel_src, rt); 1693 } 1694 1695 MFCB_REFRELE(mfcbp); 1696 return (ret); 1697 1698 /* 1699 * Don't forward if we don't have a cache entry. Mrouted will 1700 * always provide a cache entry in response to an upcall. 1701 */ 1702 } else { 1703 /* 1704 * If we don't have a route for packet's origin, make a copy 1705 * of the packet and send message to routing daemon. 1706 */ 1707 struct mfc *mfc_rt = NULL; 1708 mblk_t *mp0 = NULL; 1709 mblk_t *mp_copy = NULL; 1710 struct rtdetq *rte = NULL; 1711 struct rtdetq *rte_m, *rte1, *prev_rte; 1712 uint_t hash; 1713 int npkts; 1714 boolean_t new_mfc = B_FALSE; 1715 mrtstat.mrts_mfc_misses++; 1716 /* BSD uses mrts_no_route++ */ 1717 if (ip_mrtdebug > 1) { 1718 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1719 "ip_mforward: no rte ill %s src %x g %x misses %d", 1720 ill->ill_name, ntohl(src), ntohl(dst), 1721 (int)mrtstat.mrts_mfc_misses); 1722 } 1723 /* 1724 * The order of the following code differs from the BSD code. 1725 * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x 1726 * code works, so SunOS 5.x wasn't changed to conform to the 1727 * BSD version. 1728 */ 1729 1730 /* Lock mfctable. */ 1731 hash = MFCHASH(src, dst); 1732 mutex_enter(&(mfctable[hash].mfcb_lock)); 1733 1734 /* 1735 * If we are turning off mrouted return an error 1736 */ 1737 if (is_mrouter_off()) { 1738 mutex_exit(&mfcbp->mfcb_lock); 1739 MFCB_REFRELE(mfcbp); 1740 return (-1); 1741 } 1742 1743 /* Is there an upcall waiting for this packet? */ 1744 for (mfc_rt = mfctable[hash].mfcb_mfc; mfc_rt; 1745 mfc_rt = mfc_rt->mfc_next) { 1746 mutex_enter(&mfc_rt->mfc_mutex); 1747 if (ip_mrtdebug > 1) { 1748 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1749 "ip_mforward: MFCTAB hash %d o 0x%x" 1750 " g 0x%x\n", 1751 hash, ntohl(mfc_rt->mfc_origin.s_addr), 1752 ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1753 } 1754 /* There is an upcall */ 1755 if ((src == mfc_rt->mfc_origin.s_addr) && 1756 (dst == mfc_rt->mfc_mcastgrp.s_addr) && 1757 (mfc_rt->mfc_rte != NULL) && 1758 !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 1759 break; 1760 } 1761 mutex_exit(&mfc_rt->mfc_mutex); 1762 } 1763 /* No upcall, so make a new entry into mfctable */ 1764 if (mfc_rt == NULL) { 1765 mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 1766 if (mfc_rt == NULL) { 1767 mrtstat.mrts_fwd_drop++; 1768 ip1dbg(("ip_mforward: out of memory " 1769 "for mfc, mfc_rt\n")); 1770 goto error_return; 1771 } else 1772 new_mfc = B_TRUE; 1773 /* Get resources */ 1774 /* TODO could copy header and dup rest */ 1775 mp_copy = copymsg(mp); 1776 if (mp_copy == NULL) { 1777 mrtstat.mrts_fwd_drop++; 1778 ip1dbg(("ip_mforward: out of memory for " 1779 "mblk, mp_copy\n")); 1780 goto error_return; 1781 } 1782 mutex_enter(&mfc_rt->mfc_mutex); 1783 } 1784 /* Get resources for rte, whether first rte or not first. */ 1785 /* Add this packet into rtdetq */ 1786 rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq)); 1787 if (rte == NULL) { 1788 mrtstat.mrts_fwd_drop++; 1789 mutex_exit(&mfc_rt->mfc_mutex); 1790 ip1dbg(("ip_mforward: out of memory for" 1791 " rtdetq, rte\n")); 1792 goto error_return; 1793 } 1794 1795 mp0 = copymsg(mp); 1796 if (mp0 == NULL) { 1797 mrtstat.mrts_fwd_drop++; 1798 ip1dbg(("ip_mforward: out of memory for mblk, mp0\n")); 1799 mutex_exit(&mfc_rt->mfc_mutex); 1800 goto error_return; 1801 } 1802 rte->mp = mp0; 1803 if (pim_reg_packet) { 1804 ASSERT(reg_vif_num != ALL_VIFS); 1805 rte->ill = viftable[reg_vif_num].v_ipif->ipif_ill; 1806 } else { 1807 rte->ill = ill; 1808 } 1809 rte->rte_next = NULL; 1810 1811 /* 1812 * Determine if upcall q (rtdetq) has overflowed. 1813 * mfc_rt->mfc_rte is null by mi_zalloc 1814 * if it is the first message. 1815 */ 1816 for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m; 1817 rte_m = rte_m->rte_next) 1818 npkts++; 1819 if (ip_mrtdebug > 1) { 1820 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1821 "ip_mforward: upcalls %d\n", npkts); 1822 } 1823 if (npkts > MAX_UPQ) { 1824 mrtstat.mrts_upq_ovflw++; 1825 mutex_exit(&mfc_rt->mfc_mutex); 1826 goto error_return; 1827 } 1828 1829 if (npkts == 0) { /* first upcall */ 1830 int i = 0; 1831 /* 1832 * Now finish installing the new mfc! Now that we have 1833 * resources! Insert new entry at head of hash chain. 1834 * Use src and dst which are ipaddr_t's. 1835 */ 1836 mfc_rt->mfc_origin.s_addr = src; 1837 mfc_rt->mfc_mcastgrp.s_addr = dst; 1838 1839 mutex_enter(&numvifs_mutex); 1840 for (i = 0; i < (int)numvifs; i++) 1841 mfc_rt->mfc_ttls[i] = 0; 1842 mutex_exit(&numvifs_mutex); 1843 mfc_rt->mfc_parent = ALL_VIFS; 1844 1845 /* Link into table */ 1846 if (ip_mrtdebug > 1) { 1847 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1848 "ip_mforward: NEW MFCTAB hash %d o 0x%x " 1849 "g 0x%x\n", hash, 1850 ntohl(mfc_rt->mfc_origin.s_addr), 1851 ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1852 } 1853 mfc_rt->mfc_next = mfctable[hash].mfcb_mfc; 1854 mfctable[hash].mfcb_mfc = mfc_rt; 1855 mfc_rt->mfc_rte = NULL; 1856 } 1857 1858 /* Link in the upcall */ 1859 /* First upcall */ 1860 if (mfc_rt->mfc_rte == NULL) 1861 mfc_rt->mfc_rte = rte; 1862 else { 1863 /* not the first upcall */ 1864 prev_rte = mfc_rt->mfc_rte; 1865 for (rte1 = mfc_rt->mfc_rte->rte_next; rte1; 1866 prev_rte = rte1, rte1 = rte1->rte_next); 1867 prev_rte->rte_next = rte; 1868 } 1869 1870 /* 1871 * No upcalls waiting, this is first one, so send a message to 1872 * routing daemon to install a route into kernel table. 1873 */ 1874 if (npkts == 0) { 1875 struct igmpmsg *im; 1876 /* ipha_protocol is 0, for upcall */ 1877 ASSERT(mp_copy != NULL); 1878 im = (struct igmpmsg *)mp_copy->b_rptr; 1879 im->im_msgtype = IGMPMSG_NOCACHE; 1880 im->im_mbz = 0; 1881 mutex_enter(&numvifs_mutex); 1882 if (pim_reg_packet) { 1883 im->im_vif = (uchar_t)reg_vif_num; 1884 mutex_exit(&numvifs_mutex); 1885 } else { 1886 /* 1887 * XXX do we need to hold locks here ? 1888 */ 1889 for (vifi = 0; vifi < numvifs; vifi++) { 1890 if (viftable[vifi].v_ipif == NULL) 1891 continue; 1892 if (viftable[vifi].v_ipif->ipif_ill == 1893 ill) { 1894 im->im_vif = (uchar_t)vifi; 1895 break; 1896 } 1897 } 1898 mutex_exit(&numvifs_mutex); 1899 ASSERT(vifi < numvifs); 1900 } 1901 1902 mrtstat.mrts_upcalls++; 1903 /* Timer to discard upcalls if mrouted is too slow */ 1904 mfc_rt->mfc_timeout_id = timeout(expire_upcalls, 1905 mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); 1906 mutex_exit(&mfc_rt->mfc_mutex); 1907 mutex_exit(&(mfctable[hash].mfcb_lock)); 1908 putnext(RD(ip_g_mrouter), mp_copy); 1909 1910 } else { 1911 mutex_exit(&mfc_rt->mfc_mutex); 1912 mutex_exit(&(mfctable[hash].mfcb_lock)); 1913 freemsg(mp_copy); 1914 } 1915 1916 MFCB_REFRELE(mfcbp); 1917 if (tunnel_src != 0) 1918 return (1); 1919 else 1920 return (0); 1921 error_return: 1922 mutex_exit(&(mfctable[hash].mfcb_lock)); 1923 MFCB_REFRELE(mfcbp); 1924 if (mfc_rt != NULL && (new_mfc == B_TRUE)) 1925 mi_free((char *)mfc_rt); 1926 if (rte != NULL) 1927 mi_free((char *)rte); 1928 if (mp_copy != NULL) 1929 freemsg(mp_copy); 1930 if (mp0 != NULL) 1931 freemsg(mp0); 1932 return (-1); 1933 } 1934 } 1935 1936 /* 1937 * Clean up the mfctable cache entry if upcall is not serviced. 1938 * SunOS 5.x has timeout per mfc, unlike BSD which has one timer. 1939 */ 1940 static void 1941 expire_upcalls(void *arg) 1942 { 1943 struct mfc *mfc_rt = arg; 1944 uint_t hash; 1945 struct mfc *prev_mfc, *mfc0; 1946 1947 hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); 1948 if (ip_mrtdebug > 1) { 1949 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1950 "expire_upcalls: hash %d s %x g %x", 1951 hash, ntohl(mfc_rt->mfc_origin.s_addr), 1952 ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1953 } 1954 MFCB_REFHOLD(&mfctable[hash]); 1955 mutex_enter(&mfc_rt->mfc_mutex); 1956 /* 1957 * if timeout has been set to zero, than the 1958 * entry has been filled, no need to delete it. 1959 */ 1960 if (mfc_rt->mfc_timeout_id == 0) 1961 goto done; 1962 mrtstat.mrts_cache_cleanups++; 1963 mfc_rt->mfc_timeout_id = 0; 1964 1965 /* Determine entry to be cleaned up in cache table. */ 1966 for (prev_mfc = mfc0 = mfctable[hash].mfcb_mfc; mfc0; 1967 prev_mfc = mfc0, mfc0 = mfc0->mfc_next) 1968 if (mfc0 == mfc_rt) 1969 break; 1970 1971 /* del_mfc takes care of gone mfcs */ 1972 ASSERT(prev_mfc != NULL); 1973 ASSERT(mfc0 != NULL); 1974 1975 /* 1976 * Delete the entry from the cache 1977 */ 1978 mfctable[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 1979 mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 1980 1981 /* 1982 * release_mfc will drop all queued upcall packets. 1983 * and will free the mbuf with the pkt, if, timing info. 1984 */ 1985 done: 1986 mutex_exit(&mfc_rt->mfc_mutex); 1987 MFCB_REFRELE(&mfctable[hash]); 1988 } 1989 1990 /* 1991 * Packet forwarding routine once entry in the cache is made. 1992 */ 1993 static int 1994 ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, 1995 struct mfc *rt) 1996 { 1997 vifi_t vifi; 1998 struct vif *vifp; 1999 ipaddr_t dst = ipha->ipha_dst; 2000 size_t plen = msgdsize(mp); 2001 vifi_t num_of_vifs; 2002 2003 if (ip_mrtdebug > 1) { 2004 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2005 "ip_mdq: SEND src %x, ipha_dst %x, ill %s", 2006 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 2007 ill->ill_name); 2008 } 2009 2010 /* Macro to send packet on vif */ 2011 #define MC_SEND(ipha, mp, vifp, dst) { \ 2012 if ((vifp)->v_flags & VIFF_TUNNEL) \ 2013 encap_send((ipha), (mp), (vifp), (dst)); \ 2014 else if ((vifp)->v_flags & VIFF_REGISTER) \ 2015 register_send((ipha), (mp), (vifp), (dst)); \ 2016 else \ 2017 phyint_send((ipha), (mp), (vifp), (dst)); \ 2018 } 2019 2020 vifi = rt->mfc_parent; 2021 2022 /* 2023 * The value of vifi is MAXVIFS if the pkt had no parent, i.e., 2024 * Mrouted had no route. 2025 * We wanted the route installed in the mfctable to prevent multiple 2026 * tries, so it passed add_mfc(), but is discarded here. The v_ipif is 2027 * NULL so we don't want to check the ill. Still needed as of Mrouted 2028 * 3.6. 2029 */ 2030 if (vifi == NO_VIF) { 2031 ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", 2032 ill->ill_name)); 2033 if (ip_mrtdebug > 1) { 2034 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2035 "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); 2036 } 2037 return (-1); /* drop pkt */ 2038 } 2039 2040 if (!lock_good_vif(&viftable[vifi])) 2041 return (-1); 2042 /* 2043 * The MFC entries are not cleaned up when an ipif goes 2044 * away thus this code has to guard against an MFC referencing 2045 * an ipif that has been closed. Note: reset_mrt_vif_ipif 2046 * sets the v_ipif to NULL when the ipif disappears. 2047 */ 2048 ASSERT(viftable[vifi].v_ipif != NULL); 2049 2050 if (vifi >= numvifs) { 2051 cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs " 2052 "%d ill %s viftable ill %s\n", 2053 (int)vifi, (int)numvifs, ill->ill_name, 2054 viftable[vifi].v_ipif->ipif_ill->ill_name); 2055 unlock_good_vif(&viftable[vifi]); 2056 return (-1); 2057 } 2058 /* 2059 * Don't forward if it didn't arrive from the parent vif for its 2060 * origin. But do match on the groups as we nominate only one 2061 * ill in the group for receiving allmulti packets. 2062 */ 2063 if ((viftable[vifi].v_ipif->ipif_ill != ill && 2064 (ill->ill_group == NULL || 2065 viftable[vifi].v_ipif->ipif_ill->ill_group != ill->ill_group)) || 2066 (viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 2067 /* Came in the wrong interface */ 2068 ip1dbg(("ip_mdq: arrived wrong if, vifi %d " 2069 "numvifs %d ill %s viftable ill %s\n", 2070 (int)vifi, (int)numvifs, ill->ill_name, 2071 viftable[vifi].v_ipif->ipif_ill->ill_name)); 2072 if (ip_mrtdebug > 1) { 2073 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2074 "ip_mdq: arrived wrong if, vifi %d ill " 2075 "%s viftable ill %s\n", 2076 (int)vifi, ill->ill_name, 2077 viftable[vifi].v_ipif->ipif_ill->ill_name); 2078 } 2079 mrtstat.mrts_wrong_if++; 2080 rt->mfc_wrong_if++; 2081 2082 /* 2083 * If we are doing PIM assert processing and we are forwarding 2084 * packets on this interface, and it is a broadcast medium 2085 * interface (and not a tunnel), send a message to the routing. 2086 * 2087 * We use the first ipif on the list, since it's all we have. 2088 * Chances are the ipif_flags are the same for ipifs on the ill. 2089 */ 2090 if (pim_assert && rt->mfc_ttls[vifi] > 0 && 2091 (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) && 2092 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 2093 mblk_t *mp_copy; 2094 struct igmpmsg *im; 2095 2096 /* TODO could copy header and dup rest */ 2097 mp_copy = copymsg(mp); 2098 if (mp_copy == NULL) { 2099 mrtstat.mrts_fwd_drop++; 2100 ip1dbg(("ip_mdq: out of memory " 2101 "for mblk, mp_copy\n")); 2102 unlock_good_vif(&viftable[vifi]); 2103 return (-1); 2104 } 2105 2106 im = (struct igmpmsg *)mp_copy->b_rptr; 2107 im->im_msgtype = IGMPMSG_WRONGVIF; 2108 im->im_mbz = 0; 2109 im->im_vif = (ushort_t)vifi; 2110 putnext(RD(ip_g_mrouter), mp_copy); 2111 } 2112 unlock_good_vif(&viftable[vifi]); 2113 if (tunnel_src != 0) 2114 return (1); 2115 else 2116 return (0); 2117 } 2118 /* 2119 * If I sourced this packet, it counts as output, else it was input. 2120 */ 2121 if (ipha->ipha_src == viftable[vifi].v_lcl_addr.s_addr) { 2122 viftable[vifi].v_pkt_out++; 2123 viftable[vifi].v_bytes_out += plen; 2124 } else { 2125 viftable[vifi].v_pkt_in++; 2126 viftable[vifi].v_bytes_in += plen; 2127 } 2128 mutex_enter(&rt->mfc_mutex); 2129 rt->mfc_pkt_cnt++; 2130 rt->mfc_byte_cnt += plen; 2131 mutex_exit(&rt->mfc_mutex); 2132 unlock_good_vif(&viftable[vifi]); 2133 /* 2134 * For each vif, decide if a copy of the packet should be forwarded. 2135 * Forward if: 2136 * - the vif threshold ttl is non-zero AND 2137 * - the pkt ttl exceeds the vif's threshold 2138 * A non-zero mfc_ttl indicates that the vif is part of 2139 * the output set for the mfc entry. 2140 */ 2141 mutex_enter(&numvifs_mutex); 2142 num_of_vifs = numvifs; 2143 mutex_exit(&numvifs_mutex); 2144 for (vifp = viftable, vifi = 0; vifi < num_of_vifs; vifp++, vifi++) { 2145 if (!lock_good_vif(vifp)) 2146 continue; 2147 if ((rt->mfc_ttls[vifi] > 0) && 2148 (ipha->ipha_ttl > rt->mfc_ttls[vifi])) { 2149 /* 2150 * lock_good_vif should not have succedded if 2151 * v_ipif is null. 2152 */ 2153 ASSERT(vifp->v_ipif != NULL); 2154 vifp->v_pkt_out++; 2155 vifp->v_bytes_out += plen; 2156 MC_SEND(ipha, mp, vifp, dst); 2157 mrtstat.mrts_fwd_out++; 2158 } 2159 unlock_good_vif(vifp); 2160 } 2161 if (tunnel_src != 0) 2162 return (1); 2163 else 2164 return (0); 2165 } 2166 2167 /* 2168 * Send the packet on physical interface. 2169 * Caller assumes can continue to use mp on return. 2170 */ 2171 /* ARGSUSED */ 2172 static void 2173 phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2174 { 2175 mblk_t *mp_copy; 2176 2177 /* Make a new reference to the packet */ 2178 mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ 2179 if (mp_copy == NULL) { 2180 mrtstat.mrts_fwd_drop++; 2181 ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n")); 2182 return; 2183 } 2184 if (vifp->v_rate_limit <= 0) 2185 tbf_send_packet(vifp, mp_copy); 2186 else { 2187 if (ip_mrtdebug > 1) { 2188 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2189 "phyint_send: tbf_contr rate %d " 2190 "vifp 0x%p mp 0x%p dst 0x%x", 2191 vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); 2192 } 2193 tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr); 2194 } 2195 } 2196 2197 /* 2198 * Send the whole packet for REGISTER encapsulation to PIM daemon 2199 * Caller assumes it can continue to use mp on return. 2200 */ 2201 /* ARGSUSED */ 2202 static void 2203 register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2204 { 2205 struct igmpmsg *im; 2206 mblk_t *mp_copy; 2207 ipha_t *ipha_copy; 2208 2209 if (ip_mrtdebug > 1) { 2210 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2211 "register_send: src %x, dst %x\n", 2212 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 2213 } 2214 2215 /* 2216 * Copy the old packet & pullup its IP header into the new mblk_t so we 2217 * can modify it. Try to fill the new mblk_t since if we don't the 2218 * ethernet driver will. 2219 */ 2220 mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED); 2221 if (mp_copy == NULL) { 2222 ++mrtstat.mrts_pim_nomemory; 2223 if (ip_mrtdebug > 3) { 2224 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2225 "register_send: allocb failure."); 2226 } 2227 return; 2228 } 2229 2230 /* 2231 * Bump write pointer to account for igmpmsg being added. 2232 */ 2233 mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg); 2234 2235 /* 2236 * Chain packet to new mblk_t. 2237 */ 2238 if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 2239 ++mrtstat.mrts_pim_nomemory; 2240 if (ip_mrtdebug > 3) { 2241 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2242 "register_send: copymsg failure."); 2243 } 2244 freeb(mp_copy); 2245 return; 2246 } 2247 2248 /* 2249 * icmp_rput() asserts that IP version field is set to an 2250 * appropriate version. Hence, the struct igmpmsg that this really 2251 * becomes, needs to have the correct IP version field. 2252 */ 2253 ipha_copy = (ipha_t *)mp_copy->b_rptr; 2254 *ipha_copy = multicast_encap_iphdr; 2255 2256 /* 2257 * The kernel uses the struct igmpmsg header to encode the messages to 2258 * the multicast routing daemon. Fill in the fields in the header 2259 * starting with the message type which is IGMPMSG_WHOLEPKT 2260 */ 2261 im = (struct igmpmsg *)mp_copy->b_rptr; 2262 im->im_msgtype = IGMPMSG_WHOLEPKT; 2263 im->im_src.s_addr = ipha->ipha_src; 2264 im->im_dst.s_addr = ipha->ipha_dst; 2265 2266 /* 2267 * Must Be Zero. This is because the struct igmpmsg is really an IP 2268 * header with renamed fields and the multicast routing daemon uses 2269 * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages. 2270 */ 2271 im->im_mbz = 0; 2272 2273 ++mrtstat.mrts_upcalls; 2274 if (!canputnext(RD(ip_g_mrouter))) { 2275 ++mrtstat.mrts_pim_regsend_drops; 2276 if (ip_mrtdebug > 3) { 2277 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2278 "register_send: register upcall failure."); 2279 } 2280 freemsg(mp_copy); 2281 } else { 2282 putnext(RD(ip_g_mrouter), mp_copy); 2283 } 2284 } 2285 2286 /* 2287 * pim_validate_cksum handles verification of the checksum in the 2288 * pim header. For PIM Register packets, the checksum is calculated 2289 * across the PIM header only. For all other packets, the checksum 2290 * is for the PIM header and remainder of the packet. 2291 * 2292 * returns: B_TRUE, if checksum is okay. 2293 * B_FALSE, if checksum is not valid. 2294 */ 2295 static boolean_t 2296 pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp) 2297 { 2298 mblk_t *mp_dup; 2299 2300 if ((mp_dup = dupmsg(mp)) == NULL) 2301 return (B_FALSE); 2302 2303 mp_dup->b_rptr += IPH_HDR_LENGTH(ip); 2304 if (pimp->pim_type == PIM_REGISTER) 2305 mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN; 2306 if (IP_CSUM(mp_dup, 0, 0)) { 2307 freemsg(mp_dup); 2308 return (B_FALSE); 2309 } 2310 freemsg(mp_dup); 2311 return (B_TRUE); 2312 } 2313 2314 /* 2315 * int 2316 * pim_input(queue_t *, mblk_t *) - Process PIM protocol packets. 2317 * IP Protocol 103. Register messages are decapsulated and sent 2318 * onto multicast forwarding. 2319 */ 2320 int 2321 pim_input(queue_t *q, mblk_t *mp) 2322 { 2323 ipha_t *eip, *ip; 2324 int iplen, pimlen, iphlen; 2325 struct pim *pimp; /* pointer to a pim struct */ 2326 uint32_t *reghdr; 2327 2328 /* 2329 * Pullup the msg for PIM protocol processing. 2330 */ 2331 if (pullupmsg(mp, -1) == 0) { 2332 ++mrtstat.mrts_pim_nomemory; 2333 freemsg(mp); 2334 return (-1); 2335 } 2336 2337 ip = (ipha_t *)mp->b_rptr; 2338 iplen = ip->ipha_length; 2339 iphlen = IPH_HDR_LENGTH(ip); 2340 pimlen = ntohs(iplen) - iphlen; 2341 2342 /* 2343 * Validate lengths 2344 */ 2345 if (pimlen < PIM_MINLEN) { 2346 ++mrtstat.mrts_pim_malformed; 2347 if (ip_mrtdebug > 1) { 2348 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2349 "pim_input: length not at least minlen"); 2350 } 2351 freemsg(mp); 2352 return (-1); 2353 } 2354 2355 /* 2356 * Point to the PIM header. 2357 */ 2358 pimp = (struct pim *)((caddr_t)ip + iphlen); 2359 2360 /* 2361 * Check the version number. 2362 */ 2363 if (pimp->pim_vers != PIM_VERSION) { 2364 ++mrtstat.mrts_pim_badversion; 2365 if (ip_mrtdebug > 1) { 2366 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2367 "pim_input: unknown version of PIM"); 2368 } 2369 freemsg(mp); 2370 return (-1); 2371 } 2372 2373 /* 2374 * Validate the checksum 2375 */ 2376 if (!pim_validate_cksum(mp, ip, pimp)) { 2377 ++mrtstat.mrts_pim_rcv_badcsum; 2378 if (ip_mrtdebug > 1) { 2379 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2380 "pim_input: invalid checksum"); 2381 } 2382 freemsg(mp); 2383 return (-1); 2384 } 2385 2386 if (pimp->pim_type != PIM_REGISTER) 2387 return (0); 2388 2389 reghdr = (uint32_t *)(pimp + 1); 2390 eip = (ipha_t *)(reghdr + 1); 2391 2392 /* 2393 * check if the inner packet is destined to mcast group 2394 */ 2395 if (!CLASSD(eip->ipha_dst)) { 2396 ++mrtstat.mrts_pim_badregisters; 2397 if (ip_mrtdebug > 1) { 2398 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2399 "pim_input: Inner pkt not mcast .. !"); 2400 } 2401 freemsg(mp); 2402 return (-1); 2403 } 2404 if (ip_mrtdebug > 1) { 2405 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2406 "register from %x, to %x, len %d", 2407 ntohl(eip->ipha_src), 2408 ntohl(eip->ipha_dst), 2409 ntohs(eip->ipha_length)); 2410 } 2411 /* 2412 * If the null register bit is not set, decapsulate 2413 * the packet before forwarding it. 2414 */ 2415 if (!(ntohl(*reghdr) & PIM_NULL_REGISTER)) { 2416 mblk_t *mp_copy; 2417 2418 /* Copy the message */ 2419 if ((mp_copy = copymsg(mp)) == NULL) { 2420 ++mrtstat.mrts_pim_nomemory; 2421 freemsg(mp); 2422 return (-1); 2423 } 2424 2425 /* 2426 * Decapsulate the packet and give it to 2427 * register_mforward. 2428 */ 2429 mp_copy->b_rptr += iphlen + sizeof (pim_t) + 2430 sizeof (*reghdr); 2431 if (register_mforward(q, mp_copy) != 0) { 2432 freemsg(mp); 2433 return (-1); 2434 } 2435 } 2436 2437 /* 2438 * Pass all valid PIM packets up to any process(es) listening on a raw 2439 * PIM socket. For Solaris it is done right after pim_input() is 2440 * called. 2441 */ 2442 return (0); 2443 } 2444 2445 /* 2446 * PIM sparse mode hook. Called by pim_input after decapsulating 2447 * the packet. Loop back the packet, as if we have received it. 2448 * In pim_input() we have to check if the destination is a multicast address. 2449 */ 2450 /* ARGSUSED */ 2451 static int 2452 register_mforward(queue_t *q, mblk_t *mp) 2453 { 2454 ASSERT(reg_vif_num <= numvifs); 2455 2456 if (ip_mrtdebug > 3) { 2457 ipha_t *ipha; 2458 2459 ipha = (ipha_t *)mp->b_rptr; 2460 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2461 "register_mforward: src %x, dst %x\n", 2462 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 2463 } 2464 /* 2465 * Need to pass in to ip_mforward() the information that the 2466 * packet has arrived on the register_vif. We use the solution that 2467 * ip_mroute_decap() employs: use mp->b_prev to pass some information 2468 * to ip_mforward(). Nonzero value means the packet has arrived on a 2469 * tunnel (ip_mroute_decap() puts the address of the other side of the 2470 * tunnel there.) This is safe since ip_rput() either frees the packet 2471 * or passes it to ip_mforward(). We use 2472 * PIM_REGISTER_MARKER = 0xffffffff to indicate the has arrived on the 2473 * register vif. If in the future we have more than one register vifs, 2474 * then this will need re-examination. 2475 */ 2476 mp->b_prev = (mblk_t *)PIM_REGISTER_MARKER; 2477 ++mrtstat.mrts_pim_regforwards; 2478 ip_rput(q, mp); 2479 return (0); 2480 } 2481 2482 /* 2483 * Send an encapsulated packet. 2484 * Caller assumes can continue to use mp when routine returns. 2485 */ 2486 /* ARGSUSED */ 2487 static void 2488 encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2489 { 2490 mblk_t *mp_copy; 2491 ipha_t *ipha_copy; 2492 size_t len; 2493 2494 if (ip_mrtdebug > 1) { 2495 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2496 "encap_send: vif %ld enter", (ptrdiff_t)(vifp - viftable)); 2497 } 2498 len = ntohs(ipha->ipha_length); 2499 2500 /* 2501 * Copy the old packet & pullup it's IP header into the 2502 * new mbuf so we can modify it. Try to fill the new 2503 * mbuf since if we don't the ethernet driver will. 2504 */ 2505 mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED); 2506 if (mp_copy == NULL) 2507 return; 2508 mp_copy->b_rptr += 32; 2509 mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr); 2510 if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 2511 freeb(mp_copy); 2512 return; 2513 } 2514 2515 /* 2516 * Fill in the encapsulating IP header. 2517 * Remote tunnel dst in rmt_addr, from add_vif(). 2518 */ 2519 ipha_copy = (ipha_t *)mp_copy->b_rptr; 2520 *ipha_copy = multicast_encap_iphdr; 2521 ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET); 2522 ipha_copy->ipha_length = htons(len + sizeof (ipha_t)); 2523 ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr; 2524 ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr; 2525 ASSERT(ipha_copy->ipha_ident == 0); 2526 2527 /* Turn the encapsulated IP header back into a valid one. */ 2528 ipha = (ipha_t *)mp_copy->b_cont->b_rptr; 2529 ipha->ipha_ttl--; 2530 ipha->ipha_hdr_checksum = 0; 2531 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2532 2533 if (ip_mrtdebug > 1) { 2534 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2535 "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); 2536 } 2537 if (vifp->v_rate_limit <= 0) 2538 tbf_send_packet(vifp, mp_copy); 2539 else 2540 /* ipha is from the original header */ 2541 tbf_control(vifp, mp_copy, ipha); 2542 } 2543 2544 /* 2545 * De-encapsulate a packet and feed it back through IP input. 2546 * This routine is called whenever IP gets a packet with prototype 2547 * IPPROTO_ENCAP and a local destination address. 2548 */ 2549 void 2550 ip_mroute_decap(queue_t *q, mblk_t *mp) 2551 { 2552 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2553 ipha_t *ipha_encap; 2554 int hlen = IPH_HDR_LENGTH(ipha); 2555 ipaddr_t src; 2556 struct vif *vifp; 2557 2558 /* 2559 * Dump the packet if it's not to a multicast destination or if 2560 * we don't have an encapsulating tunnel with the source. 2561 * Note: This code assumes that the remote site IP address 2562 * uniquely identifies the tunnel (i.e., that this site has 2563 * at most one tunnel with the remote site). 2564 */ 2565 ipha_encap = (ipha_t *)((char *)ipha + hlen); 2566 if (!CLASSD(ipha_encap->ipha_dst)) { 2567 mrtstat.mrts_bad_tunnel++; 2568 ip1dbg(("ip_mroute_decap: bad tunnel\n")); 2569 freemsg(mp); 2570 return; 2571 } 2572 src = (ipaddr_t)ipha->ipha_src; 2573 mutex_enter(&last_encap_lock); 2574 if (src != last_encap_src) { 2575 struct vif *vife; 2576 2577 vifp = viftable; 2578 vife = vifp + numvifs; 2579 last_encap_src = src; 2580 last_encap_vif = 0; 2581 for (; vifp < vife; ++vifp) { 2582 if (!lock_good_vif(vifp)) 2583 continue; 2584 if (vifp->v_rmt_addr.s_addr == src) { 2585 if (vifp->v_flags & VIFF_TUNNEL) 2586 last_encap_vif = vifp; 2587 if (ip_mrtdebug > 1) { 2588 (void) mi_strlog(ip_g_mrouter, 2589 1, SL_TRACE, 2590 "ip_mroute_decap: good tun " 2591 "vif %ld with %x", 2592 (ptrdiff_t)(vifp - viftable), 2593 ntohl(src)); 2594 } 2595 unlock_good_vif(vifp); 2596 break; 2597 } 2598 unlock_good_vif(vifp); 2599 } 2600 } 2601 if ((vifp = last_encap_vif) == 0) { 2602 mutex_exit(&last_encap_lock); 2603 mrtstat.mrts_bad_tunnel++; 2604 freemsg(mp); 2605 ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n", 2606 (ptrdiff_t)(vifp - viftable), ntohl(src))); 2607 return; 2608 } 2609 mutex_exit(&last_encap_lock); 2610 2611 /* 2612 * Need to pass in the tunnel source to ip_mforward (so that it can 2613 * verify that the packet arrived over the correct vif.) We use b_prev 2614 * to pass this information. This is safe since the ip_rput either 2615 * frees the packet or passes it to ip_mforward. 2616 */ 2617 mp->b_prev = (mblk_t *)(uintptr_t)src; 2618 mp->b_rptr += hlen; 2619 /* Feed back into ip_rput as an M_DATA. */ 2620 ip_rput(q, mp); 2621 } 2622 2623 /* 2624 * Remove all records with v_ipif == ipif. Called when an interface goes away 2625 * (stream closed). Called as writer. 2626 */ 2627 void 2628 reset_mrt_vif_ipif(ipif_t *ipif) 2629 { 2630 vifi_t vifi, tmp_vifi; 2631 vifi_t num_of_vifs; 2632 2633 /* Can't check vifi >= 0 since vifi_t is unsigned! */ 2634 2635 mutex_enter(&numvifs_mutex); 2636 num_of_vifs = numvifs; 2637 mutex_exit(&numvifs_mutex); 2638 2639 for (vifi = num_of_vifs; vifi != 0; vifi--) { 2640 tmp_vifi = vifi - 1; 2641 if (viftable[tmp_vifi].v_ipif == ipif) { 2642 (void) del_vif(&tmp_vifi, NULL, NULL); 2643 } 2644 } 2645 } 2646 2647 /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */ 2648 void 2649 reset_mrt_ill(ill_t *ill) 2650 { 2651 struct mfc *rt; 2652 struct rtdetq *rte; 2653 int i; 2654 2655 for (i = 0; i < MFCTBLSIZ; i++) { 2656 MFCB_REFHOLD(&mfctable[i]); 2657 if ((rt = mfctable[i].mfcb_mfc) != NULL) { 2658 if (ip_mrtdebug > 1) { 2659 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2660 "reset_mrt_ill: mfctable [%d]", i); 2661 } 2662 while (rt != NULL) { 2663 mutex_enter(&rt->mfc_mutex); 2664 while ((rte = rt->mfc_rte) != NULL) { 2665 if (rte->ill == ill) { 2666 if (ip_mrtdebug > 1) { 2667 (void) mi_strlog( 2668 ip_g_mrouter, 2669 1, SL_TRACE, 2670 "reset_mrt_ill: " 2671 "ill 0x%p", ill); 2672 } 2673 rt->mfc_rte = rte->rte_next; 2674 freemsg(rte->mp); 2675 mi_free((char *)rte); 2676 } 2677 } 2678 mutex_exit(&rt->mfc_mutex); 2679 rt = rt->mfc_next; 2680 } 2681 } 2682 MFCB_REFRELE(&mfctable[i]); 2683 } 2684 } 2685 2686 /* 2687 * Token bucket filter module. 2688 * The ipha is for mcastgrp destination for phyint and encap. 2689 */ 2690 static void 2691 tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) 2692 { 2693 size_t p_len = msgdsize(mp); 2694 struct tbf *t = vifp->v_tbf; 2695 timeout_id_t id = 0; 2696 2697 /* Drop if packet is too large */ 2698 if (p_len > MAX_BKT_SIZE) { 2699 mrtstat.mrts_pkt2large++; 2700 freemsg(mp); 2701 return; 2702 } 2703 if (ip_mrtdebug > 1) { 2704 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2705 "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", 2706 (ptrdiff_t)(vifp - viftable), t->tbf_q_len, 2707 ntohl(ipha->ipha_dst)); 2708 } 2709 2710 mutex_enter(&t->tbf_lock); 2711 2712 tbf_update_tokens(vifp); 2713 2714 /* 2715 * If there are enough tokens, 2716 * and the queue is empty, send this packet out. 2717 */ 2718 if (ip_mrtdebug > 1) { 2719 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2720 "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", 2721 (ptrdiff_t)(vifp - viftable), t->tbf_n_tok, p_len, 2722 t->tbf_q_len); 2723 } 2724 /* No packets are queued */ 2725 if (t->tbf_q_len == 0) { 2726 /* queue empty, send packet if enough tokens */ 2727 if (p_len <= t->tbf_n_tok) { 2728 t->tbf_n_tok -= p_len; 2729 mutex_exit(&t->tbf_lock); 2730 tbf_send_packet(vifp, mp); 2731 return; 2732 } else { 2733 /* Queue packet and timeout till later */ 2734 tbf_queue(vifp, mp); 2735 ASSERT(vifp->v_timeout_id == 0); 2736 vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 2737 TBF_REPROCESS); 2738 } 2739 } else if (t->tbf_q_len < t->tbf_max_q_len) { 2740 /* Finite queue length, so queue pkts and process queue */ 2741 tbf_queue(vifp, mp); 2742 tbf_process_q(vifp); 2743 } else { 2744 /* Check that we have UDP header with IP header */ 2745 size_t hdr_length = IPH_HDR_LENGTH(ipha) + 2746 sizeof (struct udphdr); 2747 2748 if ((mp->b_wptr - mp->b_rptr) < hdr_length) { 2749 if (!pullupmsg(mp, hdr_length)) { 2750 freemsg(mp); 2751 ip1dbg(("tbf_ctl: couldn't pullup udp hdr, " 2752 "vif %ld src 0x%x dst 0x%x\n", 2753 (ptrdiff_t)(vifp - viftable), 2754 ntohl(ipha->ipha_src), 2755 ntohl(ipha->ipha_dst))); 2756 mutex_exit(&vifp->v_tbf->tbf_lock); 2757 return; 2758 } else 2759 /* Have to reassign ipha after pullupmsg */ 2760 ipha = (ipha_t *)mp->b_rptr; 2761 } 2762 /* 2763 * Queue length too much, 2764 * try to selectively dq, or queue and process 2765 */ 2766 if (!tbf_dq_sel(vifp, ipha)) { 2767 mrtstat.mrts_q_overflow++; 2768 freemsg(mp); 2769 } else { 2770 tbf_queue(vifp, mp); 2771 tbf_process_q(vifp); 2772 } 2773 } 2774 if (t->tbf_q_len == 0) { 2775 id = vifp->v_timeout_id; 2776 vifp->v_timeout_id = 0; 2777 } 2778 mutex_exit(&vifp->v_tbf->tbf_lock); 2779 if (id != 0) 2780 (void) untimeout(id); 2781 } 2782 2783 /* 2784 * Adds a packet to the tbf queue at the interface. 2785 * The ipha is for mcastgrp destination for phyint and encap. 2786 */ 2787 static void 2788 tbf_queue(struct vif *vifp, mblk_t *mp) 2789 { 2790 struct tbf *t = vifp->v_tbf; 2791 2792 if (ip_mrtdebug > 1) { 2793 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2794 "tbf_queue: vif %ld", (ptrdiff_t)(vifp - viftable)); 2795 } 2796 ASSERT(MUTEX_HELD(&t->tbf_lock)); 2797 2798 if (t->tbf_t == NULL) { 2799 /* Queue was empty */ 2800 t->tbf_q = mp; 2801 } else { 2802 /* Insert at tail */ 2803 t->tbf_t->b_next = mp; 2804 } 2805 /* set new tail pointer */ 2806 t->tbf_t = mp; 2807 2808 mp->b_next = mp->b_prev = NULL; 2809 2810 t->tbf_q_len++; 2811 } 2812 2813 /* 2814 * Process the queue at the vif interface. 2815 * Drops the tbf_lock when sending packets. 2816 * 2817 * NOTE : The caller should quntimeout if the queue length is 0. 2818 */ 2819 static void 2820 tbf_process_q(struct vif *vifp) 2821 { 2822 mblk_t *mp; 2823 struct tbf *t = vifp->v_tbf; 2824 size_t len; 2825 2826 if (ip_mrtdebug > 1) { 2827 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2828 "tbf_process_q 1: vif %ld qlen = %d", 2829 (ptrdiff_t)(vifp - viftable), t->tbf_q_len); 2830 } 2831 2832 /* 2833 * Loop through the queue at the interface and send 2834 * as many packets as possible. 2835 */ 2836 ASSERT(MUTEX_HELD(&t->tbf_lock)); 2837 2838 while (t->tbf_q_len > 0) { 2839 mp = t->tbf_q; 2840 len = (size_t)msgdsize(mp); /* length of ip pkt */ 2841 2842 /* Determine if the packet can be sent */ 2843 if (len <= t->tbf_n_tok) { 2844 /* 2845 * If so, reduce no. of tokens, dequeue the packet, 2846 * send the packet. 2847 */ 2848 t->tbf_n_tok -= len; 2849 2850 t->tbf_q = mp->b_next; 2851 if (--t->tbf_q_len == 0) { 2852 t->tbf_t = NULL; 2853 } 2854 mp->b_next = NULL; 2855 /* Exit mutex before sending packet, then re-enter */ 2856 mutex_exit(&t->tbf_lock); 2857 tbf_send_packet(vifp, mp); 2858 mutex_enter(&t->tbf_lock); 2859 } else 2860 break; 2861 } 2862 } 2863 2864 /* Called at tbf timeout to update tokens, process q and reset timer. */ 2865 static void 2866 tbf_reprocess_q(void *arg) 2867 { 2868 struct vif *vifp = arg; 2869 2870 mutex_enter(&vifp->v_tbf->tbf_lock); 2871 vifp->v_timeout_id = 0; 2872 tbf_update_tokens(vifp); 2873 2874 tbf_process_q(vifp); 2875 2876 if (vifp->v_tbf->tbf_q_len > 0) { 2877 vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 2878 TBF_REPROCESS); 2879 } 2880 mutex_exit(&vifp->v_tbf->tbf_lock); 2881 2882 if (ip_mrtdebug > 1) { 2883 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2884 "tbf_reprcess_q: vif %ld timeout id = %p", 2885 (ptrdiff_t)(vifp - viftable), vifp->v_timeout_id); 2886 } 2887 } 2888 2889 /* 2890 * Function that will selectively discard a member of the tbf queue, 2891 * based on the precedence value and the priority. 2892 * 2893 * NOTE : The caller should quntimeout if the queue length is 0. 2894 */ 2895 static int 2896 tbf_dq_sel(struct vif *vifp, ipha_t *ipha) 2897 { 2898 uint_t p; 2899 struct tbf *t = vifp->v_tbf; 2900 mblk_t **np; 2901 mblk_t *last, *mp; 2902 2903 if (ip_mrtdebug > 1) { 2904 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2905 "dq_sel: vif %ld dst 0x%x", 2906 (ptrdiff_t)(vifp - viftable), ntohl(ipha->ipha_dst)); 2907 } 2908 2909 ASSERT(MUTEX_HELD(&t->tbf_lock)); 2910 p = priority(vifp, ipha); 2911 2912 np = &t->tbf_q; 2913 last = NULL; 2914 while ((mp = *np) != NULL) { 2915 if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) { 2916 *np = mp->b_next; 2917 /* If removing the last packet, fix the tail pointer */ 2918 if (mp == t->tbf_t) 2919 t->tbf_t = last; 2920 mp->b_prev = mp->b_next = NULL; 2921 freemsg(mp); 2922 /* 2923 * It's impossible for the queue to be empty, but 2924 * we check anyway. 2925 */ 2926 if (--t->tbf_q_len == 0) { 2927 t->tbf_t = NULL; 2928 } 2929 mrtstat.mrts_drop_sel++; 2930 return (1); 2931 } 2932 np = &mp->b_next; 2933 last = mp; 2934 } 2935 return (0); 2936 } 2937 2938 /* Sends packet, 2 cases - encap tunnel, phyint. */ 2939 static void 2940 tbf_send_packet(struct vif *vifp, mblk_t *mp) 2941 { 2942 ipif_t *ipif; 2943 2944 /* If encap tunnel options */ 2945 if (vifp->v_flags & VIFF_TUNNEL) { 2946 if (ip_mrtdebug > 1) { 2947 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2948 "tbf_send_pkt: ENCAP tunnel vif %ld", 2949 (ptrdiff_t)(vifp - viftable)); 2950 } 2951 2952 /* 2953 * Feed into ip_wput which will set the ident field and 2954 * checksum the encapsulating header. 2955 * BSD gets the cached route vifp->v_route from ip_output() 2956 * to speed up route table lookups. Not necessary in SunOS 5.x. 2957 */ 2958 put(vifp->v_ipif->ipif_wq, mp); 2959 return; 2960 2961 /* phyint */ 2962 } else { 2963 /* Need to loop back to members on the outgoing interface. */ 2964 ipha_t *ipha; 2965 ipaddr_t dst; 2966 ipha = (ipha_t *)mp->b_rptr; 2967 dst = ipha->ipha_dst; 2968 ipif = vifp->v_ipif; 2969 2970 mutex_enter(&ipif->ipif_ill->ill_lock); 2971 if (ilm_lookup_ipif(ipif, dst) != NULL) { 2972 /* 2973 * The packet is not yet reassembled, thus we need to 2974 * pass it to ip_rput_local for checksum verification 2975 * and reassembly (and fanout the user stream). 2976 */ 2977 mblk_t *mp_loop; 2978 ire_t *ire; 2979 2980 mutex_exit(&ipif->ipif_ill->ill_lock); 2981 if (ip_mrtdebug > 1) { 2982 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2983 "tbf_send_pkt: loopback vif %ld", 2984 (ptrdiff_t)(vifp - viftable)); 2985 } 2986 mp_loop = copymsg(mp); 2987 ire = ire_ctable_lookup(~0, 0, IRE_BROADCAST, NULL, 2988 ALL_ZONES, NULL, MATCH_IRE_TYPE); 2989 2990 if (mp_loop != NULL && ire != NULL) { 2991 IP_RPUT_LOCAL(ipif->ipif_rq, mp_loop, 2992 ((ipha_t *)mp_loop->b_rptr), 2993 ire, (ill_t *)ipif->ipif_rq->q_ptr); 2994 } else { 2995 /* Either copymsg failed or no ire */ 2996 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2997 "tbf_send_pkt: mp_loop 0x%p, ire 0x%p " 2998 "vif %ld\n", mp_loop, ire, 2999 (ptrdiff_t)(vifp - viftable)); 3000 } 3001 if (ire != NULL) 3002 ire_refrele(ire); 3003 } else { 3004 mutex_exit(&ipif->ipif_ill->ill_lock); 3005 } 3006 if (ip_mrtdebug > 1) { 3007 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 3008 "tbf_send_pkt: phyint forward vif %ld dst = 0x%x", 3009 (ptrdiff_t)(vifp - viftable), ntohl(dst)); 3010 } 3011 ip_rput_forward_multicast(dst, mp, ipif); 3012 } 3013 } 3014 3015 /* 3016 * Determine the current time and then the elapsed time (between the last time 3017 * and time now). Update the no. of tokens in the bucket. 3018 */ 3019 static void 3020 tbf_update_tokens(struct vif *vifp) 3021 { 3022 timespec_t tp; 3023 hrtime_t tm; 3024 struct tbf *t = vifp->v_tbf; 3025 3026 ASSERT(MUTEX_HELD(&t->tbf_lock)); 3027 3028 /* Time in secs and nsecs, rate limit in kbits/sec */ 3029 gethrestime(&tp); 3030 3031 /*LINTED*/ 3032 TV_DELTA(tp, t->tbf_last_pkt_t, tm); 3033 3034 /* 3035 * This formula is actually 3036 * "time in seconds" * "bytes/second". Scaled for nsec. 3037 * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8) 3038 * 3039 * The (1000/1024) was introduced in add_vif to optimize 3040 * this divide into a shift. 3041 */ 3042 t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8; 3043 t->tbf_last_pkt_t = tp; 3044 3045 if (t->tbf_n_tok > MAX_BKT_SIZE) 3046 t->tbf_n_tok = MAX_BKT_SIZE; 3047 if (ip_mrtdebug > 1) { 3048 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 3049 "tbf_update_tok: tm %lld tok %d vif %ld", 3050 tm, t->tbf_n_tok, (ptrdiff_t)(vifp - viftable)); 3051 } 3052 } 3053 3054 /* 3055 * Priority currently is based on port nos. 3056 * Different forwarding mechanisms have different ways 3057 * of obtaining the port no. Hence, the vif must be 3058 * given along with the packet itself. 3059 * 3060 */ 3061 static int 3062 priority(struct vif *vifp, ipha_t *ipha) 3063 { 3064 int prio; 3065 3066 /* Temporary hack; may add general packet classifier some day */ 3067 3068 ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock)); 3069 3070 /* 3071 * The UDP port space is divided up into four priority ranges: 3072 * [0, 16384) : unclassified - lowest priority 3073 * [16384, 32768) : audio - highest priority 3074 * [32768, 49152) : whiteboard - medium priority 3075 * [49152, 65536) : video - low priority 3076 */ 3077 3078 if (ipha->ipha_protocol == IPPROTO_UDP) { 3079 struct udphdr *udp = 3080 (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha)); 3081 switch (ntohs(udp->uh_dport) & 0xc000) { 3082 case 0x4000: 3083 prio = 70; 3084 break; 3085 case 0x8000: 3086 prio = 60; 3087 break; 3088 case 0xc000: 3089 prio = 55; 3090 break; 3091 default: 3092 prio = 50; 3093 break; 3094 } 3095 if (ip_mrtdebug > 1) { 3096 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 3097 "priority: port %x prio %d\n", 3098 ntohs(udp->uh_dport), prio); 3099 } 3100 } else 3101 prio = 50; /* default priority */ 3102 return (prio); 3103 } 3104 3105 /* 3106 * End of token bucket filter modifications 3107 */ 3108 3109 3110 3111 /* 3112 * Produces data for netstat -M. 3113 */ 3114 int 3115 ip_mroute_stats(mblk_t *mp) 3116 { 3117 mrtstat.mrts_vifctlSize = sizeof (struct vifctl); 3118 mrtstat.mrts_mfcctlSize = sizeof (struct mfcctl); 3119 if (!snmp_append_data(mp, (char *)&mrtstat, sizeof (mrtstat))) { 3120 ip0dbg(("ip_mroute_stats: failed %ld bytes\n", 3121 (size_t)sizeof (mrtstat))); 3122 return (0); 3123 } 3124 return (1); 3125 } 3126 3127 /* 3128 * Sends info for SNMP's MIB. 3129 */ 3130 int 3131 ip_mroute_vif(mblk_t *mp) 3132 { 3133 struct vifctl vi; 3134 vifi_t vifi; 3135 3136 mutex_enter(&numvifs_mutex); 3137 for (vifi = 0; vifi < numvifs; vifi++) { 3138 if (viftable[vifi].v_lcl_addr.s_addr == 0) 3139 continue; 3140 /* 3141 * No locks here, an approximation is fine. 3142 */ 3143 vi.vifc_vifi = vifi; 3144 vi.vifc_flags = viftable[vifi].v_flags; 3145 vi.vifc_threshold = viftable[vifi].v_threshold; 3146 vi.vifc_rate_limit = viftable[vifi].v_rate_limit; 3147 vi.vifc_lcl_addr = viftable[vifi].v_lcl_addr; 3148 vi.vifc_rmt_addr = viftable[vifi].v_rmt_addr; 3149 vi.vifc_pkt_in = viftable[vifi].v_pkt_in; 3150 vi.vifc_pkt_out = viftable[vifi].v_pkt_out; 3151 3152 if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) { 3153 ip0dbg(("ip_mroute_vif: failed %ld bytes\n", 3154 (size_t)sizeof (vi))); 3155 return (0); 3156 } 3157 } 3158 mutex_exit(&numvifs_mutex); 3159 return (1); 3160 } 3161 3162 /* 3163 * Called by ip_snmp_get to send up multicast routing table. 3164 */ 3165 int 3166 ip_mroute_mrt(mblk_t *mp) 3167 { 3168 int i, j; 3169 struct mfc *rt; 3170 struct mfcctl mfcc; 3171 3172 /* 3173 * Make sure multicast has not been turned off. 3174 */ 3175 if (is_mrouter_off()) 3176 return (1); 3177 3178 /* Loop over all hash buckets and their chains */ 3179 for (i = 0; i < MFCTBLSIZ; i++) { 3180 MFCB_REFHOLD(&mfctable[i]); 3181 for (rt = mfctable[i].mfcb_mfc; rt; rt = rt->mfc_next) { 3182 mutex_enter(&rt->mfc_mutex); 3183 if (rt->mfc_rte != NULL || 3184 (rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 3185 mutex_exit(&rt->mfc_mutex); 3186 continue; 3187 } 3188 mfcc.mfcc_origin = rt->mfc_origin; 3189 mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp; 3190 mfcc.mfcc_parent = rt->mfc_parent; 3191 mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt; 3192 mutex_enter(&numvifs_mutex); 3193 for (j = 0; j < (int)numvifs; j++) 3194 mfcc.mfcc_ttls[j] = rt->mfc_ttls[j]; 3195 for (j = (int)numvifs; j < MAXVIFS; j++) 3196 mfcc.mfcc_ttls[j] = 0; 3197 mutex_exit(&numvifs_mutex); 3198 3199 mutex_exit(&rt->mfc_mutex); 3200 if (!snmp_append_data(mp, (char *)&mfcc, 3201 sizeof (mfcc))) { 3202 MFCB_REFRELE(&mfctable[i]); 3203 ip0dbg(("ip_mroute_mrt: failed %ld bytes\n", 3204 (size_t)sizeof (mfcc))); 3205 return (0); 3206 } 3207 } 3208 MFCB_REFRELE(&mfctable[i]); 3209 } 3210 return (1); 3211 } 3212