1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. 24 * All rights reserved. Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 /* 31 * Procedures for the kernel part of DVMRP, 32 * a Distance-Vector Multicast Routing Protocol. 33 * (See RFC-1075) 34 * Written by David Waitzman, BBN Labs, August 1988. 35 * Modified by Steve Deering, Stanford, February 1989. 36 * Modified by Mark J. Steiglitz, Stanford, May, 1991 37 * Modified by Van Jacobson, LBL, January 1993 38 * Modified by Ajit Thyagarajan, PARC, August 1993 39 * Modified by Bill Fenner, PARC, April 1995 40 * 41 * MROUTING 3.5 42 */ 43 44 /* 45 * TODO 46 * - function pointer field in vif, void *vif_sendit() 47 */ 48 49 #include <sys/types.h> 50 #include <sys/stream.h> 51 #include <sys/dlpi.h> 52 #include <sys/stropts.h> 53 #include <sys/strlog.h> 54 #include <sys/systm.h> 55 #include <sys/ddi.h> 56 #include <sys/cmn_err.h> 57 #include <sys/zone.h> 58 59 #include <sys/param.h> 60 #include <sys/socket.h> 61 #define _SUN_TPI_VERSION 2 62 #include <sys/tihdr.h> 63 #include <sys/vtrace.h> 64 #include <sys/debug.h> 65 #include <net/if.h> 66 #include <net/if_arp.h> 67 #include <sys/sockio.h> 68 #include <net/route.h> 69 #include <netinet/in.h> 70 #include <net/if_dl.h> 71 72 #include <inet/common.h> 73 #include <inet/mi.h> 74 #include <inet/nd.h> 75 #include <inet/arp.h> 76 #include <inet/mib2.h> 77 #include <netinet/ip6.h> 78 #include <inet/ip.h> 79 #include <inet/snmpcom.h> 80 81 #include <netinet/igmp.h> 82 #include <netinet/igmp_var.h> 83 #include <netinet/udp.h> 84 #include <netinet/ip_mroute.h> 85 #include <inet/ip_multi.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_if.h> 88 #include <inet/ipclassifier.h> 89 90 #include <netinet/pim.h> 91 92 93 /* 94 * MT Design: 95 * 96 * There are three main data structures viftable, mfctable and tbftable that 97 * need to be protected against MT races. 98 * 99 * vitable is a fixed length array of vif structs. There is no lock to protect 100 * the whole array, instead each struct is protected by its own indiviual lock. 101 * The value of v_marks in conjuction with the value of v_refcnt determines the 102 * current state of a vif structure. One special state that needs mention 103 * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates 104 * that vif is being initalized. 105 * Each structure is freed when the refcnt goes down to zero. If a delete comes 106 * in when the the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED 107 * which prevents the struct from further use. When the refcnt goes to zero 108 * the struct is freed and is marked VIF_MARK_NOTINUSE. 109 * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill 110 * from going away a refhold is put on the ipif before using it. see 111 * lock_good_vif() and unlock_good_vif(). 112 * 113 * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts 114 * of the vif struct. 115 * 116 * tbftable is also a fixed length array of tbf structs and is only accessed 117 * via v_tbf. It is protected by its own lock tbf_lock. 118 * 119 * Lock Ordering is 120 * v_lock --> tbf_lock 121 * v_lock --> ill_locK 122 * 123 * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb). 124 * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker, 125 * it also maintains a state. These fields are protected by a lock (mfcb_lock). 126 * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to 127 * protect the struct elements. 128 * 129 * mfc structs are dynamically allocated and are singly linked 130 * at the head of the chain. When an mfc structure is to be deleted 131 * it is marked condemned and so is the state in the bucket struct. 132 * When the last walker of the hash bucket exits all the mfc structs 133 * marked condemed are freed. 134 * 135 * Locking Hierarchy: 136 * The bucket lock should be acquired before the mfc struct lock. 137 * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking 138 * operations on the bucket struct. 139 * 140 * last_encap_lock and numvifs_mutex should be acquired after 141 * acquring vif or mfc locks. These locks protect some global variables. 142 * 143 * The statistics are not currently protected by a lock 144 * causing the stats be be approximate, not exact. 145 */ 146 147 /* 148 * Globals 149 * All but ip_g_mrouter and ip_mrtproto could be static, 150 * except for netstat or debugging purposes. 151 */ 152 queue_t *ip_g_mrouter = NULL; 153 static kmutex_t ip_g_mrouter_mutex; 154 155 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 156 struct mrtstat mrtstat; /* Stats for netstat */ 157 158 #define NO_VIF MAXVIFS /* from mrouted, no route for src */ 159 160 /* 161 * Timeouts: 162 * Upcall timeouts - BSD uses boolean_t mfc->expire and 163 * nexpire[MFCTBLSIZE], the number of times expire has been called. 164 * SunOS 5.x uses mfc->timeout for each mfc. 165 * Some Unixes are limited in the number of simultaneous timeouts 166 * that can be run, SunOS 5.x does not have this restriction. 167 */ 168 169 /* 170 * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and 171 * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall 172 * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE 173 */ 174 #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */ 175 #define UPCALL_EXPIRE 6 /* number of timeouts */ 176 177 /* 178 * Hash function for a source, group entry 179 */ 180 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 181 ((g) >> 20) ^ ((g) >> 10) ^ (g)) 182 183 /* 184 * mfctable: 185 * Includes all mfcs, including waiting upcalls. 186 * Multiple mfcs per bucket. 187 */ 188 static struct mfcb mfctable[MFCTBLSIZ]; /* kernel routing table */ 189 190 /* 191 * Define the token bucket filter structures. 192 * tbftable -> each vif has one of these for storing info. 193 */ 194 struct tbf tbftable[MAXVIFS]; 195 #define TBF_REPROCESS (hz / 100) /* 100x /second */ 196 197 /* Identify PIM packet that came on a Register interface */ 198 #define PIM_REGISTER_MARKER 0xffffffff 199 200 /* Function declarations */ 201 static int add_mfc(struct mfcctl *); 202 static int add_vif(struct vifctl *, queue_t *, mblk_t *); 203 static int del_mfc(struct mfcctl *); 204 static int del_vif(vifi_t *, queue_t *, mblk_t *); 205 static void del_vifp(struct vif *); 206 static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 207 static void expire_upcalls(void *); 208 static void fill_route(struct mfc *, struct mfcctl *); 209 static int get_assert(uchar_t *); 210 static int get_lsg_cnt(struct sioc_lsg_req *); 211 static int get_sg_cnt(struct sioc_sg_req *); 212 static int get_version(uchar_t *); 213 static int get_vif_cnt(struct sioc_vif_req *); 214 static int ip_mdq(mblk_t *, ipha_t *, ill_t *, 215 ipaddr_t, struct mfc *); 216 static int ip_mrouter_init(queue_t *, uchar_t *, int); 217 static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 218 static int register_mforward(queue_t *, mblk_t *); 219 static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 220 static int set_assert(int *); 221 222 /* 223 * Token Bucket Filter functions 224 */ 225 static int priority(struct vif *, ipha_t *); 226 static void tbf_control(struct vif *, mblk_t *, ipha_t *); 227 static int tbf_dq_sel(struct vif *, ipha_t *); 228 static void tbf_process_q(struct vif *); 229 static void tbf_queue(struct vif *, mblk_t *); 230 static void tbf_reprocess_q(void *); 231 static void tbf_send_packet(struct vif *, mblk_t *); 232 static void tbf_update_tokens(struct vif *); 233 static void release_mfc(struct mfcb *); 234 235 static boolean_t is_mrouter_off(void); 236 /* 237 * Encapsulation packets 238 */ 239 240 #define ENCAP_TTL 64 241 242 /* prototype IP hdr for encapsulated packets */ 243 static ipha_t multicast_encap_iphdr = { 244 IP_SIMPLE_HDR_VERSION, 245 0, /* tos */ 246 sizeof (ipha_t), /* total length */ 247 0, /* id */ 248 0, /* frag offset */ 249 ENCAP_TTL, IPPROTO_ENCAP, 250 0, /* checksum */ 251 }; 252 253 /* 254 * Private variables. 255 */ 256 static int saved_ip_g_forward = -1; 257 258 /* 259 * numvifs is only a hint about the max interface being used. 260 */ 261 static vifi_t numvifs = 0; 262 static kmutex_t numvifs_mutex; 263 264 static struct vif viftable[MAXVIFS+1]; /* Index needs to accomodate */ 265 /* the value of NO_VIF, which */ 266 /* is MAXVIFS. */ 267 268 /* 269 * One-back cache used to locate a tunnel's vif, 270 * given a datagram's src ip address. 271 */ 272 static ipaddr_t last_encap_src; 273 static struct vif *last_encap_vif; 274 static kmutex_t last_encap_lock; /* Protects the above */ 275 276 /* 277 * Whether or not special PIM assert processing is enabled. 278 */ 279 /* 280 * reg_vif_num is protected by numvifs_mutex 281 */ 282 static vifi_t reg_vif_num = ALL_VIFS; /* Index to Register vif */ 283 static int pim_assert; 284 285 /* 286 * Rate limit for assert notification messages, in nsec. 287 */ 288 #define ASSERT_MSG_TIME 3000000000 289 290 291 #define VIF_REFHOLD(vifp) { \ 292 mutex_enter(&(vifp)->v_lock); \ 293 (vifp)->v_refcnt++; \ 294 mutex_exit(&(vifp)->v_lock); \ 295 } 296 297 #define VIF_REFRELE_LOCKED(vifp) { \ 298 (vifp)->v_refcnt--; \ 299 if ((vifp)->v_refcnt == 0 && \ 300 ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 301 del_vifp(vifp); \ 302 } else { \ 303 mutex_exit(&(vifp)->v_lock); \ 304 } \ 305 } 306 307 #define VIF_REFRELE(vifp) { \ 308 mutex_enter(&(vifp)->v_lock); \ 309 (vifp)->v_refcnt--; \ 310 if ((vifp)->v_refcnt == 0 && \ 311 ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 312 del_vifp(vifp); \ 313 } else { \ 314 mutex_exit(&(vifp)->v_lock); \ 315 } \ 316 } 317 318 #define MFCB_REFHOLD(mfcb) { \ 319 mutex_enter(&(mfcb)->mfcb_lock); \ 320 (mfcb)->mfcb_refcnt++; \ 321 ASSERT((mfcb)->mfcb_refcnt != 0); \ 322 mutex_exit(&(mfcb)->mfcb_lock); \ 323 } 324 325 #define MFCB_REFRELE(mfcb) { \ 326 mutex_enter(&(mfcb)->mfcb_lock); \ 327 ASSERT((mfcb)->mfcb_refcnt != 0); \ 328 if (--(mfcb)->mfcb_refcnt == 0 && \ 329 ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \ 330 release_mfc(mfcb); \ 331 } \ 332 mutex_exit(&(mfcb)->mfcb_lock); \ 333 } 334 335 /* 336 * MFCFIND: 337 * Find a route for a given origin IP address and multicast group address. 338 * Skip entries with pending upcalls. 339 * Type of service parameter to be added in the future! 340 */ 341 #define MFCFIND(mfcbp, o, g, rt) { \ 342 struct mfc *_mb_rt = NULL; \ 343 rt = NULL; \ 344 _mb_rt = mfcbp->mfcb_mfc; \ 345 while (_mb_rt) { \ 346 if ((_mb_rt->mfc_origin.s_addr == o) && \ 347 (_mb_rt->mfc_mcastgrp.s_addr == g) && \ 348 (_mb_rt->mfc_rte == NULL) && \ 349 (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \ 350 rt = _mb_rt; \ 351 break; \ 352 } \ 353 _mb_rt = _mb_rt->mfc_next; \ 354 } \ 355 } 356 357 /* 358 * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime() 359 * are inefficient. We use gethrestime() which returns a timespec_t with 360 * sec and nsec, the resolution is machine dependent. 361 * The following 2 macros have been changed to use nsec instead of usec. 362 */ 363 /* 364 * Macros to compute elapsed time efficiently. 365 * Borrowed from Van Jacobson's scheduling code. 366 * Delta should be a hrtime_t. 367 */ 368 #define TV_DELTA(a, b, delta) { \ 369 int xxs; \ 370 \ 371 delta = (a).tv_nsec - (b).tv_nsec; \ 372 if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 373 switch (xxs) { \ 374 case 2: \ 375 delta += 1000000000; \ 376 /*FALLTHROUGH*/ \ 377 case 1: \ 378 delta += 1000000000; \ 379 break; \ 380 default: \ 381 delta += (1000000000 * xxs); \ 382 } \ 383 } \ 384 } 385 386 #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \ 387 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 388 389 /* 390 * Handle MRT setsockopt commands to modify the multicast routing tables. 391 */ 392 int 393 ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, 394 int datalen, mblk_t *first_mp) 395 { 396 mutex_enter(&ip_g_mrouter_mutex); 397 if (cmd != MRT_INIT && q != ip_g_mrouter) { 398 mutex_exit(&ip_g_mrouter_mutex); 399 return (EACCES); 400 } 401 mutex_exit(&ip_g_mrouter_mutex); 402 403 if (checkonly) { 404 /* 405 * do not do operation, just pretend to - new T_CHECK 406 * Note: Even routines further on can probably fail but 407 * this T_CHECK stuff is only to please XTI so it not 408 * necessary to be perfect. 409 */ 410 switch (cmd) { 411 case MRT_INIT: 412 case MRT_DONE: 413 case MRT_ADD_VIF: 414 case MRT_DEL_VIF: 415 case MRT_ADD_MFC: 416 case MRT_DEL_MFC: 417 case MRT_ASSERT: 418 return (0); 419 default: 420 return (EOPNOTSUPP); 421 } 422 } 423 424 /* 425 * make sure no command is issued after multicast routing has been 426 * turned off. 427 */ 428 if (cmd != MRT_INIT && cmd != MRT_DONE) { 429 if (is_mrouter_off()) 430 return (EINVAL); 431 } 432 433 switch (cmd) { 434 case MRT_INIT: return (ip_mrouter_init(q, data, datalen)); 435 case MRT_DONE: return (ip_mrouter_done(first_mp)); 436 case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, q, first_mp)); 437 case MRT_DEL_VIF: return (del_vif((vifi_t *)data, q, first_mp)); 438 case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data)); 439 case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data)); 440 case MRT_ASSERT: return (set_assert((int *)data)); 441 default: return (EOPNOTSUPP); 442 } 443 } 444 445 /* 446 * Handle MRT getsockopt commands 447 */ 448 int 449 ip_mrouter_get(int cmd, queue_t *q, uchar_t *data) 450 { 451 if (q != ip_g_mrouter) 452 return (EACCES); 453 454 switch (cmd) { 455 case MRT_VERSION: return (get_version((uchar_t *)data)); 456 case MRT_ASSERT: return (get_assert((uchar_t *)data)); 457 default: return (EOPNOTSUPP); 458 } 459 } 460 461 /* 462 * Handle ioctl commands to obtain information from the cache. 463 * Called with shared access to IP. These are read_only ioctls. 464 */ 465 /* ARGSUSED */ 466 int 467 mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, 468 ip_ioctl_cmd_t *ipip, void *if_req) 469 { 470 mblk_t *mp1; 471 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 472 473 /* Existence verified in ip_wput_nondata */ 474 mp1 = mp->b_cont->b_cont; 475 476 switch (iocp->ioc_cmd) { 477 case (SIOCGETVIFCNT): 478 return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr)); 479 case (SIOCGETSGCNT): 480 return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr)); 481 case (SIOCGETLSGCNT): 482 return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr)); 483 default: 484 return (EINVAL); 485 } 486 } 487 488 /* 489 * Returns the packet, byte, rpf-failure count for the source, group provided. 490 */ 491 static int 492 get_sg_cnt(struct sioc_sg_req *req) 493 { 494 struct mfc *rt; 495 struct mfcb *mfcbp; 496 497 mfcbp = &mfctable[MFCHASH(req->src.s_addr, req->grp.s_addr)]; 498 MFCB_REFHOLD(mfcbp); 499 MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt); 500 501 if (rt != NULL) { 502 mutex_enter(&rt->mfc_mutex); 503 req->pktcnt = rt->mfc_pkt_cnt; 504 req->bytecnt = rt->mfc_byte_cnt; 505 req->wrong_if = rt->mfc_wrong_if; 506 mutex_exit(&rt->mfc_mutex); 507 } else 508 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU; 509 510 MFCB_REFRELE(mfcbp); 511 return (0); 512 } 513 514 /* 515 * Returns the packet, byte, rpf-failure count for the source, group provided. 516 * Uses larger counters and IPv6 addresses. 517 */ 518 /* ARGSUSED XXX until implemented */ 519 static int 520 get_lsg_cnt(struct sioc_lsg_req *req) 521 { 522 /* XXX TODO SIOCGETLSGCNT */ 523 return (ENXIO); 524 } 525 526 /* 527 * Returns the input and output packet and byte counts on the vif provided. 528 */ 529 static int 530 get_vif_cnt(struct sioc_vif_req *req) 531 { 532 vifi_t vifi = req->vifi; 533 534 if (vifi >= numvifs) 535 return (EINVAL); 536 537 /* 538 * No locks here, an approximation is fine. 539 */ 540 req->icount = viftable[vifi].v_pkt_in; 541 req->ocount = viftable[vifi].v_pkt_out; 542 req->ibytes = viftable[vifi].v_bytes_in; 543 req->obytes = viftable[vifi].v_bytes_out; 544 545 return (0); 546 } 547 548 static int 549 get_version(uchar_t *data) 550 { 551 int *v = (int *)data; 552 553 *v = 0x0305; /* XXX !!!! */ 554 555 return (0); 556 } 557 558 /* 559 * Set PIM assert processing global. 560 */ 561 static int 562 set_assert(int *i) 563 { 564 if ((*i != 1) && (*i != 0)) 565 return (EINVAL); 566 567 pim_assert = *i; 568 569 return (0); 570 } 571 572 /* 573 * Get PIM assert processing global. 574 */ 575 static int 576 get_assert(uchar_t *data) 577 { 578 int *i = (int *)data; 579 580 *i = pim_assert; 581 582 return (0); 583 } 584 585 /* 586 * Enable multicast routing. 587 */ 588 static int 589 ip_mrouter_init(queue_t *q, uchar_t *data, int datalen) 590 { 591 conn_t *connp = Q_TO_CONN(q); 592 int *v; 593 594 if (data == NULL || (datalen != sizeof (int))) 595 return (ENOPROTOOPT); 596 597 v = (int *)data; 598 if (*v != 1) 599 return (ENOPROTOOPT); 600 601 mutex_enter(&ip_g_mrouter_mutex); 602 if (ip_g_mrouter != NULL) { 603 mutex_exit(&ip_g_mrouter_mutex); 604 return (EADDRINUSE); 605 } 606 607 ip_g_mrouter = q; 608 connp->conn_multi_router = 1; 609 610 mutex_init(&last_encap_lock, NULL, MUTEX_DEFAULT, NULL); 611 612 mrtstat.mrts_vifctlSize = sizeof (struct vifctl); 613 mrtstat.mrts_mfcctlSize = sizeof (struct mfcctl); 614 615 pim_assert = 0; 616 617 /* In order for tunnels to work we have to turn ip_g_forward on */ 618 if (!WE_ARE_FORWARDING) { 619 if (ip_mrtdebug > 1) { 620 (void) mi_strlog(q, 1, SL_TRACE, 621 "ip_mrouter_init: turning on forwarding"); 622 } 623 saved_ip_g_forward = ip_g_forward; 624 ip_g_forward = IP_FORWARD_ALWAYS; 625 } 626 627 mutex_exit(&ip_g_mrouter_mutex); 628 return (0); 629 } 630 631 /* 632 * Disable multicast routing. 633 * Didn't use global timeout_val (BSD version), instead check the mfctable. 634 */ 635 int 636 ip_mrouter_done(mblk_t *mp) 637 { 638 conn_t *connp; 639 vifi_t vifi; 640 struct mfc *mfc_rt; 641 int i; 642 643 mutex_enter(&ip_g_mrouter_mutex); 644 if (ip_g_mrouter == NULL) { 645 mutex_exit(&ip_g_mrouter_mutex); 646 return (EINVAL); 647 } 648 649 connp = Q_TO_CONN(ip_g_mrouter); 650 651 if (saved_ip_g_forward != -1) { 652 if (ip_mrtdebug > 1) { 653 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 654 "ip_mrouter_done: turning off forwarding"); 655 } 656 ip_g_forward = saved_ip_g_forward; 657 saved_ip_g_forward = -1; 658 } 659 660 /* 661 * Always clear cache when vifs change. 662 * No need to get last_encap_lock since we are running as a writer. 663 */ 664 mutex_enter(&last_encap_lock); 665 last_encap_src = 0; 666 last_encap_vif = NULL; 667 mutex_exit(&last_encap_lock); 668 connp->conn_multi_router = 0; 669 670 mutex_exit(&ip_g_mrouter_mutex); 671 672 /* 673 * For each phyint in use, 674 * disable promiscuous reception of all IP multicasts. 675 */ 676 for (vifi = 0; vifi < MAXVIFS; vifi++) { 677 struct vif *vifp = viftable + vifi; 678 679 mutex_enter(&vifp->v_lock); 680 /* 681 * if the vif is active mark it condemned. 682 */ 683 if (vifp->v_marks & VIF_MARK_GOOD) { 684 ASSERT(vifp->v_ipif != NULL); 685 ipif_refhold(vifp->v_ipif); 686 /* Phyint only */ 687 if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 688 ipif_t *ipif = vifp->v_ipif; 689 ipsq_t *ipsq; 690 boolean_t suc; 691 ill_t *ill; 692 693 ill = ipif->ipif_ill; 694 suc = B_FALSE; 695 if (mp == NULL) { 696 /* 697 * being called from ip_close, 698 * lets do it synchronously. 699 * Clear VIF_MARK_GOOD and 700 * set VIF_MARK_CONDEMNED. 701 */ 702 vifp->v_marks &= ~VIF_MARK_GOOD; 703 vifp->v_marks |= VIF_MARK_CONDEMNED; 704 mutex_exit(&(vifp)->v_lock); 705 suc = ipsq_enter(ill, B_FALSE); 706 ipsq = ill->ill_phyint->phyint_ipsq; 707 } else { 708 ipsq = ipsq_try_enter(ipif, NULL, 709 ip_g_mrouter, mp, 710 ip_restart_optmgmt, NEW_OP, B_TRUE); 711 if (ipsq == NULL) { 712 mutex_exit(&(vifp)->v_lock); 713 return (EINPROGRESS); 714 } 715 /* 716 * Clear VIF_MARK_GOOD and 717 * set VIF_MARK_CONDEMNED. 718 */ 719 vifp->v_marks &= ~VIF_MARK_GOOD; 720 vifp->v_marks |= VIF_MARK_CONDEMNED; 721 mutex_exit(&(vifp)->v_lock); 722 suc = B_TRUE; 723 } 724 725 if (suc) { 726 (void) ip_delmulti(INADDR_ANY, ipif, 727 B_TRUE, B_TRUE); 728 ipsq_exit(ipsq, B_TRUE, B_TRUE); 729 } 730 mutex_enter(&vifp->v_lock); 731 } 732 /* 733 * decreases the refcnt added in add_vif. 734 * and release v_lock. 735 */ 736 VIF_REFRELE_LOCKED(vifp); 737 } else { 738 mutex_exit(&vifp->v_lock); 739 continue; 740 } 741 } 742 743 mutex_enter(&numvifs_mutex); 744 numvifs = 0; 745 pim_assert = 0; 746 reg_vif_num = ALL_VIFS; 747 mutex_exit(&numvifs_mutex); 748 749 /* 750 * Free upcall msgs. 751 * Go through mfctable and stop any outstanding upcall 752 * timeouts remaining on mfcs. 753 */ 754 for (i = 0; i < MFCTBLSIZ; i++) { 755 mutex_enter(&mfctable[i].mfcb_lock); 756 mfctable[i].mfcb_refcnt++; 757 mfctable[i].mfcb_marks |= MFCB_MARK_CONDEMNED; 758 mutex_exit(&mfctable[i].mfcb_lock); 759 mfc_rt = mfctable[i].mfcb_mfc; 760 while (mfc_rt) { 761 /* Free upcalls */ 762 mutex_enter(&mfc_rt->mfc_mutex); 763 if (mfc_rt->mfc_rte != NULL) { 764 if (mfc_rt->mfc_timeout_id != 0) { 765 /* 766 * OK to drop the lock as we have 767 * a refcnt on the bucket. timeout 768 * can fire but it will see that 769 * mfc_timeout_id == 0 and not do 770 * anything. see expire_upcalls(). 771 */ 772 mfc_rt->mfc_timeout_id = 0; 773 mutex_exit(&mfc_rt->mfc_mutex); 774 (void) untimeout( 775 mfc_rt->mfc_timeout_id); 776 mfc_rt->mfc_timeout_id = 0; 777 mutex_enter(&mfc_rt->mfc_mutex); 778 779 /* 780 * all queued upcall packets 781 * and mblk will be freed in 782 * release_mfc(). 783 */ 784 } 785 } 786 787 mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 788 789 mutex_exit(&mfc_rt->mfc_mutex); 790 mfc_rt = mfc_rt->mfc_next; 791 } 792 MFCB_REFRELE(&mfctable[i]); 793 } 794 795 mutex_enter(&ip_g_mrouter_mutex); 796 ip_g_mrouter = NULL; 797 mutex_exit(&ip_g_mrouter_mutex); 798 return (0); 799 } 800 801 static boolean_t 802 is_mrouter_off(void) 803 { 804 conn_t *connp; 805 806 mutex_enter(&ip_g_mrouter_mutex); 807 if (ip_g_mrouter == NULL) { 808 mutex_exit(&ip_g_mrouter_mutex); 809 return (B_TRUE); 810 } 811 812 connp = Q_TO_CONN(ip_g_mrouter); 813 if (connp->conn_multi_router == 0) { 814 mutex_exit(&ip_g_mrouter_mutex); 815 return (B_TRUE); 816 } 817 mutex_exit(&ip_g_mrouter_mutex); 818 return (B_FALSE); 819 } 820 821 static void 822 unlock_good_vif(struct vif *vifp) 823 { 824 ASSERT(vifp->v_ipif != NULL); 825 ipif_refrele(vifp->v_ipif); 826 VIF_REFRELE(vifp); 827 } 828 829 static boolean_t 830 lock_good_vif(struct vif *vifp) 831 { 832 mutex_enter(&vifp->v_lock); 833 if (!(vifp->v_marks & VIF_MARK_GOOD)) { 834 mutex_exit(&vifp->v_lock); 835 return (B_FALSE); 836 } 837 838 ASSERT(vifp->v_ipif != NULL); 839 mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock); 840 if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) { 841 mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 842 mutex_exit(&vifp->v_lock); 843 return (B_FALSE); 844 } 845 ipif_refhold_locked(vifp->v_ipif); 846 mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 847 vifp->v_refcnt++; 848 mutex_exit(&vifp->v_lock); 849 return (B_TRUE); 850 } 851 852 /* 853 * Add a vif to the vif table. 854 */ 855 static int 856 add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) 857 { 858 struct vif *vifp = viftable + vifcp->vifc_vifi; 859 ipif_t *ipif; 860 int error; 861 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 862 conn_t *connp = Q_TO_CONN(q); 863 ipsq_t *ipsq; 864 865 ASSERT(connp != NULL); 866 867 if (vifcp->vifc_vifi >= MAXVIFS) 868 return (EINVAL); 869 870 if (is_mrouter_off()) 871 return (EINVAL); 872 873 mutex_enter(&vifp->v_lock); 874 /* 875 * Viftable entry should be 0. 876 * if v_marks == 0 but v_refcnt != 0 means struct is being 877 * initialized. 878 * 879 * Also note that it is very unlikely that we will get a MRT_ADD_VIF 880 * request while the delete is in progress, mrouted only sends add 881 * requests when a new interface is added and the new interface cannot 882 * have the same vifi as an existing interface. We make sure that 883 * ill_delete will block till the vif is deleted by adding a refcnt 884 * to ipif in del_vif(). 885 */ 886 if (vifp->v_lcl_addr.s_addr != 0 || 887 vifp->v_marks != 0 || 888 vifp->v_refcnt != 0) { 889 mutex_exit(&vifp->v_lock); 890 return (EADDRINUSE); 891 } 892 893 /* Incoming vif should not be 0 */ 894 if (vifcp->vifc_lcl_addr.s_addr == 0) { 895 mutex_exit(&vifp->v_lock); 896 return (EINVAL); 897 } 898 899 vifp->v_refcnt++; 900 mutex_exit(&vifp->v_lock); 901 /* Find the interface with the local address */ 902 ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL, 903 connp->conn_zoneid, CONNP_TO_WQ(connp), first_mp, 904 ip_restart_optmgmt, &error); 905 if (ipif == NULL) { 906 VIF_REFRELE(vifp); 907 if (error == EINPROGRESS) 908 return (error); 909 return (EADDRNOTAVAIL); 910 } 911 912 /* 913 * We have to be exclusive as we have to call ip_addmulti() 914 * This is the best position to try to be exclusive in case 915 * we have to wait. 916 */ 917 ipsq = ipsq_try_enter(ipif, NULL, CONNP_TO_WQ(connp), first_mp, 918 ip_restart_optmgmt, NEW_OP, B_TRUE); 919 if ((ipsq) == NULL) { 920 VIF_REFRELE(vifp); 921 ipif_refrele(ipif); 922 return (EINPROGRESS); 923 } 924 925 if (ip_mrtdebug > 1) { 926 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 927 "add_vif: src 0x%x enter", 928 vifcp->vifc_lcl_addr.s_addr); 929 } 930 931 mutex_enter(&vifp->v_lock); 932 /* 933 * Always clear cache when vifs change. 934 * Needed to ensure that src isn't left over from before vif was added. 935 * No need to get last_encap_lock, since we are running as a writer. 936 */ 937 938 mutex_enter(&last_encap_lock); 939 last_encap_src = 0; 940 last_encap_vif = NULL; 941 mutex_exit(&last_encap_lock); 942 943 if (vifcp->vifc_flags & VIFF_TUNNEL) { 944 if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) { 945 cmn_err(CE_WARN, 946 "add_vif: source route tunnels not supported\n"); 947 VIF_REFRELE_LOCKED(vifp); 948 ipif_refrele(ipif); 949 ipsq_exit(ipsq, B_TRUE, B_TRUE); 950 return (EOPNOTSUPP); 951 } 952 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 953 954 } else { 955 /* Phyint or Register vif */ 956 if (vifcp->vifc_flags & VIFF_REGISTER) { 957 /* 958 * Note: Since all IPPROTO_IP level options (including 959 * MRT_ADD_VIF) are done exclusively via 960 * ip_optmgmt_writer(), a lock is not necessary to 961 * protect reg_vif_num. 962 */ 963 mutex_enter(&numvifs_mutex); 964 if (reg_vif_num == ALL_VIFS) { 965 reg_vif_num = vifcp->vifc_vifi; 966 mutex_exit(&numvifs_mutex); 967 } else { 968 mutex_exit(&numvifs_mutex); 969 VIF_REFRELE_LOCKED(vifp); 970 ipif_refrele(ipif); 971 ipsq_exit(ipsq, B_TRUE, B_TRUE); 972 return (EADDRINUSE); 973 } 974 } 975 976 /* Make sure the interface supports multicast */ 977 if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) { 978 VIF_REFRELE_LOCKED(vifp); 979 ipif_refrele(ipif); 980 if (vifcp->vifc_flags & VIFF_REGISTER) { 981 mutex_enter(&numvifs_mutex); 982 reg_vif_num = ALL_VIFS; 983 mutex_exit(&numvifs_mutex); 984 } 985 ipsq_exit(ipsq, B_TRUE, B_TRUE); 986 return (EOPNOTSUPP); 987 } 988 /* Enable promiscuous reception of all IP mcasts from the if */ 989 mutex_exit(&vifp->v_lock); 990 error = ip_addmulti(INADDR_ANY, ipif, ILGSTAT_NONE, 991 MODE_IS_EXCLUDE, NULL); 992 mutex_enter(&vifp->v_lock); 993 /* 994 * since we released the lock lets make sure that 995 * ip_mrouter_done() has not been called. 996 */ 997 if (error != 0 || is_mrouter_off()) { 998 if (error == 0) 999 (void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, 1000 B_TRUE); 1001 if (vifcp->vifc_flags & VIFF_REGISTER) { 1002 mutex_enter(&numvifs_mutex); 1003 reg_vif_num = ALL_VIFS; 1004 mutex_exit(&numvifs_mutex); 1005 } 1006 VIF_REFRELE_LOCKED(vifp); 1007 ipif_refrele(ipif); 1008 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1009 return (error?error:EINVAL); 1010 } 1011 } 1012 /* Define parameters for the tbf structure */ 1013 vifp->v_tbf = v_tbf; 1014 gethrestime(&vifp->v_tbf->tbf_last_pkt_t); 1015 vifp->v_tbf->tbf_n_tok = 0; 1016 vifp->v_tbf->tbf_q_len = 0; 1017 vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 1018 vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 1019 1020 vifp->v_flags = vifcp->vifc_flags; 1021 vifp->v_threshold = vifcp->vifc_threshold; 1022 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 1023 vifp->v_ipif = ipif; 1024 ipif_refrele(ipif); 1025 /* Scaling up here, allows division by 1024 in critical code. */ 1026 vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000); 1027 vifp->v_timeout_id = 0; 1028 /* initialize per vif pkt counters */ 1029 vifp->v_pkt_in = 0; 1030 vifp->v_pkt_out = 0; 1031 vifp->v_bytes_in = 0; 1032 vifp->v_bytes_out = 0; 1033 mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL); 1034 1035 /* Adjust numvifs up, if the vifi is higher than numvifs */ 1036 mutex_enter(&numvifs_mutex); 1037 if (numvifs <= vifcp->vifc_vifi) 1038 numvifs = vifcp->vifc_vifi + 1; 1039 mutex_exit(&numvifs_mutex); 1040 1041 if (ip_mrtdebug > 1) { 1042 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1043 "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", 1044 vifcp->vifc_vifi, 1045 ntohl(vifcp->vifc_lcl_addr.s_addr), 1046 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 1047 ntohl(vifcp->vifc_rmt_addr.s_addr), 1048 vifcp->vifc_threshold, vifcp->vifc_rate_limit); 1049 } 1050 1051 vifp->v_marks = VIF_MARK_GOOD; 1052 mutex_exit(&vifp->v_lock); 1053 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1054 return (0); 1055 } 1056 1057 1058 /* Delete a vif from the vif table. */ 1059 static void 1060 del_vifp(struct vif *vifp) 1061 { 1062 struct tbf *t = vifp->v_tbf; 1063 mblk_t *mp0; 1064 vifi_t vifi; 1065 1066 1067 ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); 1068 ASSERT(t != NULL); 1069 1070 /* 1071 * release the ref we put in vif_del. 1072 */ 1073 ASSERT(vifp->v_ipif != NULL); 1074 ipif_refrele(vifp->v_ipif); 1075 1076 if (ip_mrtdebug > 1) { 1077 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1078 "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); 1079 } 1080 1081 if (vifp->v_timeout_id != 0) { 1082 (void) untimeout(vifp->v_timeout_id); 1083 vifp->v_timeout_id = 0; 1084 } 1085 1086 /* 1087 * Free packets queued at the interface. 1088 * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc. 1089 */ 1090 mutex_enter(&t->tbf_lock); 1091 while (t->tbf_q != NULL) { 1092 mp0 = t->tbf_q; 1093 t->tbf_q = t->tbf_q->b_next; 1094 mp0->b_prev = mp0->b_next = NULL; 1095 freemsg(mp0); 1096 } 1097 mutex_exit(&t->tbf_lock); 1098 1099 /* 1100 * Always clear cache when vifs change. 1101 * No need to get last_encap_lock since we are running as a writer. 1102 */ 1103 mutex_enter(&last_encap_lock); 1104 if (vifp == last_encap_vif) { 1105 last_encap_vif = NULL; 1106 last_encap_src = 0; 1107 } 1108 mutex_exit(&last_encap_lock); 1109 1110 mutex_destroy(&t->tbf_lock); 1111 1112 bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf))); 1113 1114 /* Adjust numvifs down */ 1115 mutex_enter(&numvifs_mutex); 1116 for (vifi = numvifs; vifi != 0; vifi--) /* vifi is unsigned */ 1117 if (viftable[vifi - 1].v_lcl_addr.s_addr != 0) 1118 break; 1119 numvifs = vifi; 1120 mutex_exit(&numvifs_mutex); 1121 1122 bzero(vifp, sizeof (*vifp)); 1123 } 1124 1125 static int 1126 del_vif(vifi_t *vifip, queue_t *q, mblk_t *first_mp) 1127 { 1128 struct vif *vifp = viftable + *vifip; 1129 conn_t *connp; 1130 ipsq_t *ipsq; 1131 1132 if (*vifip >= numvifs) 1133 return (EINVAL); 1134 1135 1136 mutex_enter(&vifp->v_lock); 1137 /* 1138 * Not initialized 1139 * Here we are not looking at the vif that is being initialized 1140 * i.e vifp->v_marks == 0 and refcnt > 0. 1141 */ 1142 if (vifp->v_lcl_addr.s_addr == 0 || 1143 !(vifp->v_marks & VIF_MARK_GOOD)) { 1144 mutex_exit(&vifp->v_lock); 1145 return (EADDRNOTAVAIL); 1146 } 1147 1148 /* 1149 * This is an optimization, if first_mp == NULL 1150 * than we are being called from reset_mrt_vif_ipif() 1151 * so we already have exclusive access to the ipsq. 1152 * the ASSERT below is a check for this condition. 1153 */ 1154 if (first_mp != NULL && 1155 !(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 1156 connp = Q_TO_CONN(q); 1157 ASSERT(connp != NULL); 1158 /* 1159 * We have to be exclusive as we have to call ip_delmulti() 1160 * This is the best position to try to be exclusive in case 1161 * we have to wait. 1162 */ 1163 ipsq = ipsq_try_enter(vifp->v_ipif, NULL, CONNP_TO_WQ(connp), 1164 first_mp, ip_restart_optmgmt, NEW_OP, B_TRUE); 1165 if ((ipsq) == NULL) { 1166 mutex_exit(&vifp->v_lock); 1167 return (EINPROGRESS); 1168 } 1169 /* recheck after being exclusive */ 1170 if (vifp->v_lcl_addr.s_addr == 0 || 1171 !vifp->v_marks & VIF_MARK_GOOD) { 1172 /* 1173 * someone beat us. 1174 */ 1175 mutex_exit(&vifp->v_lock); 1176 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1177 return (EADDRNOTAVAIL); 1178 } 1179 } 1180 1181 1182 ASSERT(IAM_WRITER_IPIF(vifp->v_ipif)); 1183 1184 1185 /* 1186 * add a refhold so that ipif does not go away while 1187 * there are still users, this will be released in del_vifp 1188 * when we free the vif. 1189 */ 1190 ipif_refhold(vifp->v_ipif); 1191 1192 /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */ 1193 vifp->v_marks &= ~VIF_MARK_GOOD; 1194 vifp->v_marks |= VIF_MARK_CONDEMNED; 1195 1196 /* Phyint only */ 1197 if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 1198 ipif_t *ipif = vifp->v_ipif; 1199 ASSERT(ipif != NULL); 1200 /* 1201 * should be OK to drop the lock as we 1202 * have marked this as CONDEMNED. 1203 */ 1204 mutex_exit(&(vifp)->v_lock); 1205 (void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, B_TRUE); 1206 if (first_mp != NULL) 1207 ipsq_exit(ipsq, B_TRUE, B_TRUE); 1208 mutex_enter(&(vifp)->v_lock); 1209 } 1210 1211 /* 1212 * decreases the refcnt added in add_vif. 1213 */ 1214 VIF_REFRELE_LOCKED(vifp); 1215 return (0); 1216 } 1217 1218 /* 1219 * Add an mfc entry. 1220 */ 1221 static int 1222 add_mfc(struct mfcctl *mfccp) 1223 { 1224 struct mfc *rt; 1225 struct rtdetq *rte; 1226 ushort_t nstl; 1227 int i; 1228 struct mfcb *mfcbp; 1229 1230 /* 1231 * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted 1232 * did not have a real route for pkt. 1233 * We want this pkt without rt installed in the mfctable to prevent 1234 * multiiple tries, so go ahead and put it in mfctable, it will 1235 * be discarded later in ip_mdq() because the child is NULL. 1236 */ 1237 1238 /* Error checking, out of bounds? */ 1239 if (mfccp->mfcc_parent > MAXVIFS) { 1240 ip0dbg(("ADD_MFC: mfcc_parent out of range %d", 1241 (int)mfccp->mfcc_parent)); 1242 return (EINVAL); 1243 } 1244 1245 if ((mfccp->mfcc_parent != NO_VIF) && 1246 (viftable[mfccp->mfcc_parent].v_ipif == NULL)) { 1247 ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n", 1248 (int)mfccp->mfcc_parent)); 1249 return (EINVAL); 1250 } 1251 1252 if (is_mrouter_off()) { 1253 return (EINVAL); 1254 } 1255 1256 mfcbp = &mfctable[MFCHASH(mfccp->mfcc_origin.s_addr, 1257 mfccp->mfcc_mcastgrp.s_addr)]; 1258 MFCB_REFHOLD(mfcbp); 1259 MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr, 1260 mfccp->mfcc_mcastgrp.s_addr, rt); 1261 1262 /* If an entry already exists, just update the fields */ 1263 if (rt) { 1264 if (ip_mrtdebug > 1) { 1265 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1266 "add_mfc: update o %x grp %x parent %x", 1267 ntohl(mfccp->mfcc_origin.s_addr), 1268 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1269 mfccp->mfcc_parent); 1270 } 1271 mutex_enter(&rt->mfc_mutex); 1272 rt->mfc_parent = mfccp->mfcc_parent; 1273 1274 mutex_enter(&numvifs_mutex); 1275 for (i = 0; i < (int)numvifs; i++) 1276 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1277 mutex_exit(&numvifs_mutex); 1278 mutex_exit(&rt->mfc_mutex); 1279 1280 MFCB_REFRELE(mfcbp); 1281 return (0); 1282 } 1283 1284 /* 1285 * Find the entry for which the upcall was made and update. 1286 */ 1287 for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) { 1288 mutex_enter(&rt->mfc_mutex); 1289 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1290 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 1291 (rt->mfc_rte != NULL) && 1292 !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 1293 if (nstl++ != 0) 1294 cmn_err(CE_WARN, 1295 "add_mfc: %s o %x g %x p %x", 1296 "multiple kernel entries", 1297 ntohl(mfccp->mfcc_origin.s_addr), 1298 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1299 mfccp->mfcc_parent); 1300 1301 if (ip_mrtdebug > 1) { 1302 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1303 "add_mfc: o %x g %x p %x", 1304 ntohl(mfccp->mfcc_origin.s_addr), 1305 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1306 mfccp->mfcc_parent); 1307 } 1308 fill_route(rt, mfccp); 1309 1310 /* 1311 * Prevent cleanup of cache entry. 1312 * Timer starts in ip_mforward. 1313 */ 1314 if (rt->mfc_timeout_id != 0) { 1315 timeout_id_t id; 1316 id = rt->mfc_timeout_id; 1317 /* 1318 * setting id to zero will avoid this 1319 * entry from being cleaned up in 1320 * expire_up_calls(). 1321 */ 1322 rt->mfc_timeout_id = 0; 1323 /* 1324 * dropping the lock is fine as we 1325 * have a refhold on the bucket. 1326 * so mfc cannot be freed. 1327 * The timeout can fire but it will see 1328 * that mfc_timeout_id == 0 and not cleanup. 1329 */ 1330 mutex_exit(&rt->mfc_mutex); 1331 (void) untimeout(id); 1332 mutex_enter(&rt->mfc_mutex); 1333 } 1334 1335 /* 1336 * Send all pkts that are queued waiting for the upcall. 1337 * ip_mdq param tun set to 0 - 1338 * the return value of ip_mdq() isn't used here, 1339 * so value we send doesn't matter. 1340 */ 1341 while (rt->mfc_rte != NULL) { 1342 rte = rt->mfc_rte; 1343 rt->mfc_rte = rte->rte_next; 1344 mutex_exit(&rt->mfc_mutex); 1345 (void) ip_mdq(rte->mp, (ipha_t *) 1346 rte->mp->b_rptr, rte->ill, 0, rt); 1347 freemsg(rte->mp); 1348 mi_free((char *)rte); 1349 mutex_enter(&rt->mfc_mutex); 1350 } 1351 } 1352 mutex_exit(&rt->mfc_mutex); 1353 } 1354 1355 1356 /* 1357 * It is possible that an entry is being inserted without an upcall 1358 */ 1359 if (nstl == 0) { 1360 mutex_enter(&(mfcbp->mfcb_lock)); 1361 if (ip_mrtdebug > 1) { 1362 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1363 "add_mfc: no upcall o %x g %x p %x", 1364 ntohl(mfccp->mfcc_origin.s_addr), 1365 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1366 mfccp->mfcc_parent); 1367 } 1368 if (is_mrouter_off()) { 1369 mutex_exit(&mfcbp->mfcb_lock); 1370 MFCB_REFRELE(mfcbp); 1371 return (EINVAL); 1372 } 1373 1374 for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) { 1375 1376 mutex_enter(&rt->mfc_mutex); 1377 if ((rt->mfc_origin.s_addr == 1378 mfccp->mfcc_origin.s_addr) && 1379 (rt->mfc_mcastgrp.s_addr == 1380 mfccp->mfcc_mcastgrp.s_addr) && 1381 (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { 1382 fill_route(rt, mfccp); 1383 mutex_exit(&rt->mfc_mutex); 1384 break; 1385 } 1386 mutex_exit(&rt->mfc_mutex); 1387 } 1388 1389 /* No upcall, so make a new entry into mfctable */ 1390 if (rt == NULL) { 1391 rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 1392 if (rt == NULL) { 1393 ip1dbg(("add_mfc: out of memory\n")); 1394 mutex_exit(&mfcbp->mfcb_lock); 1395 MFCB_REFRELE(mfcbp); 1396 return (ENOBUFS); 1397 } 1398 1399 /* Insert new entry at head of hash chain */ 1400 mutex_enter(&rt->mfc_mutex); 1401 fill_route(rt, mfccp); 1402 1403 /* Link into table */ 1404 rt->mfc_next = mfcbp->mfcb_mfc; 1405 mfcbp->mfcb_mfc = rt; 1406 mutex_exit(&rt->mfc_mutex); 1407 } 1408 mutex_exit(&mfcbp->mfcb_lock); 1409 } 1410 1411 MFCB_REFRELE(mfcbp); 1412 return (0); 1413 } 1414 1415 /* 1416 * Fills in mfc structure from mrouted mfcctl. 1417 */ 1418 static void 1419 fill_route(struct mfc *rt, struct mfcctl *mfccp) 1420 { 1421 int i; 1422 1423 rt->mfc_origin = mfccp->mfcc_origin; 1424 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1425 rt->mfc_parent = mfccp->mfcc_parent; 1426 mutex_enter(&numvifs_mutex); 1427 for (i = 0; i < (int)numvifs; i++) { 1428 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1429 } 1430 mutex_exit(&numvifs_mutex); 1431 /* Initialize pkt counters per src-grp */ 1432 rt->mfc_pkt_cnt = 0; 1433 rt->mfc_byte_cnt = 0; 1434 rt->mfc_wrong_if = 0; 1435 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0; 1436 1437 } 1438 1439 static void 1440 free_queue(struct mfc *mfcp) 1441 { 1442 struct rtdetq *rte0; 1443 1444 /* 1445 * Drop all queued upcall packets. 1446 * Free the mbuf with the pkt. 1447 */ 1448 while ((rte0 = mfcp->mfc_rte) != NULL) { 1449 mfcp->mfc_rte = rte0->rte_next; 1450 freemsg(rte0->mp); 1451 mi_free((char *)rte0); 1452 } 1453 } 1454 /* 1455 * go thorugh the hash bucket and free all the entries marked condemned. 1456 */ 1457 void 1458 release_mfc(struct mfcb *mfcbp) 1459 { 1460 struct mfc *current_mfcp; 1461 struct mfc *prev_mfcp; 1462 1463 prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 1464 1465 while (current_mfcp != NULL) { 1466 if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) { 1467 if (current_mfcp == mfcbp->mfcb_mfc) { 1468 mfcbp->mfcb_mfc = current_mfcp->mfc_next; 1469 free_queue(current_mfcp); 1470 mi_free(current_mfcp); 1471 prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 1472 continue; 1473 } 1474 ASSERT(prev_mfcp != NULL); 1475 prev_mfcp->mfc_next = current_mfcp->mfc_next; 1476 free_queue(current_mfcp); 1477 mi_free(current_mfcp); 1478 current_mfcp = NULL; 1479 } else { 1480 prev_mfcp = current_mfcp; 1481 } 1482 1483 current_mfcp = prev_mfcp->mfc_next; 1484 1485 } 1486 mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED; 1487 ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0); 1488 } 1489 1490 /* 1491 * Delete an mfc entry. 1492 */ 1493 static int 1494 del_mfc(struct mfcctl *mfccp) 1495 { 1496 struct in_addr origin; 1497 struct in_addr mcastgrp; 1498 struct mfc *rt; 1499 uint_t hash; 1500 1501 origin = mfccp->mfcc_origin; 1502 mcastgrp = mfccp->mfcc_mcastgrp; 1503 hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1504 1505 if (ip_mrtdebug > 1) { 1506 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1507 "del_mfc: o %x g %x", 1508 ntohl(origin.s_addr), 1509 ntohl(mcastgrp.s_addr)); 1510 } 1511 1512 MFCB_REFHOLD(&mfctable[hash]); 1513 1514 /* Find mfc in mfctable, finds only entries without upcalls */ 1515 for (rt = mfctable[hash].mfcb_mfc; rt; rt = rt->mfc_next) { 1516 mutex_enter(&rt->mfc_mutex); 1517 if (origin.s_addr == rt->mfc_origin.s_addr && 1518 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1519 rt->mfc_rte == NULL && 1520 !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) 1521 break; 1522 mutex_exit(&rt->mfc_mutex); 1523 } 1524 1525 /* 1526 * Return if there was an upcall (mfc_rte != NULL, 1527 * or rt not in mfctable. 1528 */ 1529 if (rt == NULL) { 1530 MFCB_REFRELE(&mfctable[hash]); 1531 return (EADDRNOTAVAIL); 1532 } 1533 1534 1535 /* 1536 * no need to hold lock as we have a reference. 1537 */ 1538 mfctable[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 1539 /* error checking */ 1540 if (rt->mfc_timeout_id != 0) { 1541 ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null")); 1542 /* 1543 * Its ok to drop the lock, the struct cannot be freed 1544 * since we have a ref on the hash bucket. 1545 */ 1546 rt->mfc_timeout_id = 0; 1547 mutex_exit(&rt->mfc_mutex); 1548 (void) untimeout(rt->mfc_timeout_id); 1549 mutex_enter(&rt->mfc_mutex); 1550 } 1551 1552 ASSERT(rt->mfc_rte == NULL); 1553 1554 1555 /* 1556 * Delete the entry from the cache 1557 */ 1558 rt->mfc_marks |= MFCB_MARK_CONDEMNED; 1559 mutex_exit(&rt->mfc_mutex); 1560 1561 MFCB_REFRELE(&mfctable[hash]); 1562 1563 return (0); 1564 } 1565 1566 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1567 1568 /* 1569 * IP multicast forwarding function. This function assumes that the packet 1570 * pointed to by ipha has arrived on (or is about to be sent to) the interface 1571 * pointed to by "ill", and the packet is to be relayed to other networks 1572 * that have members of the packet's destination IP multicast group. 1573 * 1574 * The packet is returned unscathed to the caller, unless it is 1575 * erroneous, in which case a -1 value tells the caller (IP) 1576 * to discard it. 1577 * 1578 * Unlike BSD, SunOS 5.x needs to return to IP info about 1579 * whether pkt came in thru a tunnel, so it can be discarded, unless 1580 * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try 1581 * to be delivered. 1582 * Return values are 0 - pkt is okay and phyint 1583 * -1 - pkt is malformed and to be tossed 1584 * 1 - pkt came in on tunnel 1585 */ 1586 int 1587 ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) 1588 { 1589 struct mfc *rt; 1590 ipaddr_t src, dst, tunnel_src = 0; 1591 static int srctun = 0; 1592 vifi_t vifi; 1593 boolean_t pim_reg_packet = B_FALSE; 1594 struct mfcb *mfcbp; 1595 1596 if (ip_mrtdebug > 1) { 1597 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1598 "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", 1599 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 1600 ill->ill_name); 1601 } 1602 1603 dst = ipha->ipha_dst; 1604 if ((uint32_t)(uintptr_t)mp->b_prev == PIM_REGISTER_MARKER) 1605 pim_reg_packet = B_TRUE; 1606 else 1607 tunnel_src = (ipaddr_t)(uintptr_t)mp->b_prev; 1608 1609 /* 1610 * Don't forward a packet with time-to-live of zero or one, 1611 * or a packet destined to a local-only group. 1612 */ 1613 if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || 1614 (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { 1615 if (ip_mrtdebug > 1) { 1616 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1617 "ip_mforward: not forwarded ttl %d," 1618 " dst 0x%x ill %s", 1619 ipha->ipha_ttl, ntohl(dst), ill->ill_name); 1620 } 1621 mp->b_prev = NULL; 1622 if (tunnel_src != 0) 1623 return (1); 1624 else 1625 return (0); 1626 } 1627 1628 if ((tunnel_src != 0) || pim_reg_packet) { 1629 /* 1630 * Packet arrived over an encapsulated tunnel or via a PIM 1631 * register message. Both ip_mroute_decap() and pim_input() 1632 * encode information in mp->b_prev. 1633 */ 1634 mp->b_prev = NULL; 1635 if (ip_mrtdebug > 1) { 1636 if (tunnel_src != 0) { 1637 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1638 "ip_mforward: ill %s arrived via ENCAP TUN", 1639 ill->ill_name); 1640 } else if (pim_reg_packet) { 1641 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1642 "ip_mforward: ill %s arrived via" 1643 " REGISTER VIF", 1644 ill->ill_name); 1645 } 1646 } 1647 } else if ((ipha->ipha_version_and_hdr_length & 0xf) < 1648 (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 || 1649 ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { 1650 /* Packet arrived via a physical interface. */ 1651 if (ip_mrtdebug > 1) { 1652 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1653 "ip_mforward: ill %s arrived via PHYINT", 1654 ill->ill_name); 1655 } 1656 1657 } else { 1658 /* 1659 * Packet arrived through a SRCRT tunnel. 1660 * Source-route tunnels are no longer supported. 1661 * Error message printed every 1000 times. 1662 */ 1663 if ((srctun++ % 1000) == 0) { 1664 cmn_err(CE_WARN, 1665 "ip_mforward: received source-routed pkt from %x", 1666 ntohl(ipha->ipha_src)); 1667 } 1668 return (-1); 1669 } 1670 1671 mrtstat.mrts_fwd_in++; 1672 src = ipha->ipha_src; 1673 1674 /* Find route in cache, return NULL if not there or upcalls q'ed. */ 1675 1676 /* 1677 * Lock the mfctable against changes made by ip_mforward. 1678 * Note that only add_mfc and del_mfc can remove entries and 1679 * they run with exclusive access to IP. So we do not need to 1680 * guard against the rt being deleted, so release lock after reading. 1681 */ 1682 1683 if (is_mrouter_off()) 1684 return (-1); 1685 1686 mfcbp = &mfctable[MFCHASH(src, dst)]; 1687 MFCB_REFHOLD(mfcbp); 1688 MFCFIND(mfcbp, src, dst, rt); 1689 1690 /* Entry exists, so forward if necessary */ 1691 if (rt != NULL) { 1692 int ret = 0; 1693 mrtstat.mrts_mfc_hits++; 1694 if (pim_reg_packet) { 1695 ASSERT(reg_vif_num != ALL_VIFS); 1696 ret = ip_mdq(mp, ipha, 1697 viftable[reg_vif_num].v_ipif->ipif_ill, 0, rt); 1698 } else { 1699 ret = ip_mdq(mp, ipha, ill, tunnel_src, rt); 1700 } 1701 1702 MFCB_REFRELE(mfcbp); 1703 return (ret); 1704 1705 /* 1706 * Don't forward if we don't have a cache entry. Mrouted will 1707 * always provide a cache entry in response to an upcall. 1708 */ 1709 } else { 1710 /* 1711 * If we don't have a route for packet's origin, make a copy 1712 * of the packet and send message to routing daemon. 1713 */ 1714 struct mfc *mfc_rt = NULL; 1715 mblk_t *mp0 = NULL; 1716 mblk_t *mp_copy = NULL; 1717 struct rtdetq *rte = NULL; 1718 struct rtdetq *rte_m, *rte1, *prev_rte; 1719 uint_t hash; 1720 int npkts; 1721 boolean_t new_mfc = B_FALSE; 1722 mrtstat.mrts_mfc_misses++; 1723 /* BSD uses mrts_no_route++ */ 1724 if (ip_mrtdebug > 1) { 1725 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1726 "ip_mforward: no rte ill %s src %x g %x misses %d", 1727 ill->ill_name, ntohl(src), ntohl(dst), 1728 (int)mrtstat.mrts_mfc_misses); 1729 } 1730 /* 1731 * The order of the following code differs from the BSD code. 1732 * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x 1733 * code works, so SunOS 5.x wasn't changed to conform to the 1734 * BSD version. 1735 */ 1736 1737 /* Lock mfctable. */ 1738 hash = MFCHASH(src, dst); 1739 mutex_enter(&(mfctable[hash].mfcb_lock)); 1740 1741 /* 1742 * If we are turning off mrouted return an error 1743 */ 1744 if (is_mrouter_off()) { 1745 mutex_exit(&mfcbp->mfcb_lock); 1746 MFCB_REFRELE(mfcbp); 1747 return (-1); 1748 } 1749 1750 /* Is there an upcall waiting for this packet? */ 1751 for (mfc_rt = mfctable[hash].mfcb_mfc; mfc_rt; 1752 mfc_rt = mfc_rt->mfc_next) { 1753 mutex_enter(&mfc_rt->mfc_mutex); 1754 if (ip_mrtdebug > 1) { 1755 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1756 "ip_mforward: MFCTAB hash %d o 0x%x" 1757 " g 0x%x\n", 1758 hash, ntohl(mfc_rt->mfc_origin.s_addr), 1759 ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1760 } 1761 /* There is an upcall */ 1762 if ((src == mfc_rt->mfc_origin.s_addr) && 1763 (dst == mfc_rt->mfc_mcastgrp.s_addr) && 1764 (mfc_rt->mfc_rte != NULL) && 1765 !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 1766 break; 1767 } 1768 mutex_exit(&mfc_rt->mfc_mutex); 1769 } 1770 /* No upcall, so make a new entry into mfctable */ 1771 if (mfc_rt == NULL) { 1772 mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 1773 if (mfc_rt == NULL) { 1774 mrtstat.mrts_fwd_drop++; 1775 ip1dbg(("ip_mforward: out of memory " 1776 "for mfc, mfc_rt\n")); 1777 goto error_return; 1778 } else 1779 new_mfc = B_TRUE; 1780 /* Get resources */ 1781 /* TODO could copy header and dup rest */ 1782 mp_copy = copymsg(mp); 1783 if (mp_copy == NULL) { 1784 mrtstat.mrts_fwd_drop++; 1785 ip1dbg(("ip_mforward: out of memory for " 1786 "mblk, mp_copy\n")); 1787 goto error_return; 1788 } 1789 mutex_enter(&mfc_rt->mfc_mutex); 1790 } 1791 /* Get resources for rte, whether first rte or not first. */ 1792 /* Add this packet into rtdetq */ 1793 rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq)); 1794 if (rte == NULL) { 1795 mrtstat.mrts_fwd_drop++; 1796 mutex_exit(&mfc_rt->mfc_mutex); 1797 ip1dbg(("ip_mforward: out of memory for" 1798 " rtdetq, rte\n")); 1799 goto error_return; 1800 } 1801 1802 mp0 = copymsg(mp); 1803 if (mp0 == NULL) { 1804 mrtstat.mrts_fwd_drop++; 1805 ip1dbg(("ip_mforward: out of memory for mblk, mp0\n")); 1806 mutex_exit(&mfc_rt->mfc_mutex); 1807 goto error_return; 1808 } 1809 rte->mp = mp0; 1810 if (pim_reg_packet) { 1811 ASSERT(reg_vif_num != ALL_VIFS); 1812 rte->ill = viftable[reg_vif_num].v_ipif->ipif_ill; 1813 } else { 1814 rte->ill = ill; 1815 } 1816 rte->rte_next = NULL; 1817 1818 /* 1819 * Determine if upcall q (rtdetq) has overflowed. 1820 * mfc_rt->mfc_rte is null by mi_zalloc 1821 * if it is the first message. 1822 */ 1823 for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m; 1824 rte_m = rte_m->rte_next) 1825 npkts++; 1826 if (ip_mrtdebug > 1) { 1827 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1828 "ip_mforward: upcalls %d\n", npkts); 1829 } 1830 if (npkts > MAX_UPQ) { 1831 mrtstat.mrts_upq_ovflw++; 1832 mutex_exit(&mfc_rt->mfc_mutex); 1833 goto error_return; 1834 } 1835 1836 if (npkts == 0) { /* first upcall */ 1837 int i = 0; 1838 /* 1839 * Now finish installing the new mfc! Now that we have 1840 * resources! Insert new entry at head of hash chain. 1841 * Use src and dst which are ipaddr_t's. 1842 */ 1843 mfc_rt->mfc_origin.s_addr = src; 1844 mfc_rt->mfc_mcastgrp.s_addr = dst; 1845 1846 mutex_enter(&numvifs_mutex); 1847 for (i = 0; i < (int)numvifs; i++) 1848 mfc_rt->mfc_ttls[i] = 0; 1849 mutex_exit(&numvifs_mutex); 1850 mfc_rt->mfc_parent = ALL_VIFS; 1851 1852 /* Link into table */ 1853 if (ip_mrtdebug > 1) { 1854 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1855 "ip_mforward: NEW MFCTAB hash %d o 0x%x " 1856 "g 0x%x\n", hash, 1857 ntohl(mfc_rt->mfc_origin.s_addr), 1858 ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1859 } 1860 mfc_rt->mfc_next = mfctable[hash].mfcb_mfc; 1861 mfctable[hash].mfcb_mfc = mfc_rt; 1862 mfc_rt->mfc_rte = NULL; 1863 } 1864 1865 /* Link in the upcall */ 1866 /* First upcall */ 1867 if (mfc_rt->mfc_rte == NULL) 1868 mfc_rt->mfc_rte = rte; 1869 else { 1870 /* not the first upcall */ 1871 prev_rte = mfc_rt->mfc_rte; 1872 for (rte1 = mfc_rt->mfc_rte->rte_next; rte1; 1873 prev_rte = rte1, rte1 = rte1->rte_next); 1874 prev_rte->rte_next = rte; 1875 } 1876 1877 /* 1878 * No upcalls waiting, this is first one, so send a message to 1879 * routing daemon to install a route into kernel table. 1880 */ 1881 if (npkts == 0) { 1882 struct igmpmsg *im; 1883 /* ipha_protocol is 0, for upcall */ 1884 ASSERT(mp_copy != NULL); 1885 im = (struct igmpmsg *)mp_copy->b_rptr; 1886 im->im_msgtype = IGMPMSG_NOCACHE; 1887 im->im_mbz = 0; 1888 mutex_enter(&numvifs_mutex); 1889 if (pim_reg_packet) { 1890 im->im_vif = (uchar_t)reg_vif_num; 1891 mutex_exit(&numvifs_mutex); 1892 } else { 1893 /* 1894 * XXX do we need to hold locks here ? 1895 */ 1896 for (vifi = 0; vifi < numvifs; vifi++) { 1897 if (viftable[vifi].v_ipif == NULL) 1898 continue; 1899 if (viftable[vifi].v_ipif->ipif_ill == 1900 ill) { 1901 im->im_vif = (uchar_t)vifi; 1902 break; 1903 } 1904 } 1905 mutex_exit(&numvifs_mutex); 1906 ASSERT(vifi < numvifs); 1907 } 1908 1909 mrtstat.mrts_upcalls++; 1910 /* Timer to discard upcalls if mrouted is too slow */ 1911 mfc_rt->mfc_timeout_id = timeout(expire_upcalls, 1912 mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); 1913 mutex_exit(&mfc_rt->mfc_mutex); 1914 mutex_exit(&(mfctable[hash].mfcb_lock)); 1915 putnext(RD(ip_g_mrouter), mp_copy); 1916 1917 } else { 1918 mutex_exit(&mfc_rt->mfc_mutex); 1919 mutex_exit(&(mfctable[hash].mfcb_lock)); 1920 freemsg(mp_copy); 1921 } 1922 1923 MFCB_REFRELE(mfcbp); 1924 if (tunnel_src != 0) 1925 return (1); 1926 else 1927 return (0); 1928 error_return: 1929 mutex_exit(&(mfctable[hash].mfcb_lock)); 1930 MFCB_REFRELE(mfcbp); 1931 if (mfc_rt != NULL && (new_mfc == B_TRUE)) 1932 mi_free((char *)mfc_rt); 1933 if (rte != NULL) 1934 mi_free((char *)rte); 1935 if (mp_copy != NULL) 1936 freemsg(mp_copy); 1937 if (mp0 != NULL) 1938 freemsg(mp0); 1939 return (-1); 1940 } 1941 } 1942 1943 /* 1944 * Clean up the mfctable cache entry if upcall is not serviced. 1945 * SunOS 5.x has timeout per mfc, unlike BSD which has one timer. 1946 */ 1947 static void 1948 expire_upcalls(void *arg) 1949 { 1950 struct mfc *mfc_rt = arg; 1951 uint_t hash; 1952 struct mfc *prev_mfc, *mfc0; 1953 1954 hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); 1955 if (ip_mrtdebug > 1) { 1956 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 1957 "expire_upcalls: hash %d s %x g %x", 1958 hash, ntohl(mfc_rt->mfc_origin.s_addr), 1959 ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1960 } 1961 MFCB_REFHOLD(&mfctable[hash]); 1962 mutex_enter(&mfc_rt->mfc_mutex); 1963 /* 1964 * if timeout has been set to zero, than the 1965 * entry has been filled, no need to delete it. 1966 */ 1967 if (mfc_rt->mfc_timeout_id == 0) 1968 goto done; 1969 mrtstat.mrts_cache_cleanups++; 1970 mfc_rt->mfc_timeout_id = 0; 1971 1972 /* Determine entry to be cleaned up in cache table. */ 1973 for (prev_mfc = mfc0 = mfctable[hash].mfcb_mfc; mfc0; 1974 prev_mfc = mfc0, mfc0 = mfc0->mfc_next) 1975 if (mfc0 == mfc_rt) 1976 break; 1977 1978 /* del_mfc takes care of gone mfcs */ 1979 ASSERT(prev_mfc != NULL); 1980 ASSERT(mfc0 != NULL); 1981 1982 /* 1983 * Delete the entry from the cache 1984 */ 1985 mfctable[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 1986 mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 1987 1988 /* 1989 * release_mfc will drop all queued upcall packets. 1990 * and will free the mbuf with the pkt, if, timing info. 1991 */ 1992 done: 1993 mutex_exit(&mfc_rt->mfc_mutex); 1994 MFCB_REFRELE(&mfctable[hash]); 1995 } 1996 1997 /* 1998 * Packet forwarding routine once entry in the cache is made. 1999 */ 2000 static int 2001 ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, 2002 struct mfc *rt) 2003 { 2004 vifi_t vifi; 2005 struct vif *vifp; 2006 ipaddr_t dst = ipha->ipha_dst; 2007 size_t plen = msgdsize(mp); 2008 vifi_t num_of_vifs; 2009 2010 if (ip_mrtdebug > 1) { 2011 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2012 "ip_mdq: SEND src %x, ipha_dst %x, ill %s", 2013 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 2014 ill->ill_name); 2015 } 2016 2017 /* Macro to send packet on vif */ 2018 #define MC_SEND(ipha, mp, vifp, dst) { \ 2019 if ((vifp)->v_flags & VIFF_TUNNEL) \ 2020 encap_send((ipha), (mp), (vifp), (dst)); \ 2021 else if ((vifp)->v_flags & VIFF_REGISTER) \ 2022 register_send((ipha), (mp), (vifp), (dst)); \ 2023 else \ 2024 phyint_send((ipha), (mp), (vifp), (dst)); \ 2025 } 2026 2027 vifi = rt->mfc_parent; 2028 2029 /* 2030 * The value of vifi is MAXVIFS if the pkt had no parent, i.e., 2031 * Mrouted had no route. 2032 * We wanted the route installed in the mfctable to prevent multiple 2033 * tries, so it passed add_mfc(), but is discarded here. The v_ipif is 2034 * NULL so we don't want to check the ill. Still needed as of Mrouted 2035 * 3.6. 2036 */ 2037 if (vifi == NO_VIF) { 2038 ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", 2039 ill->ill_name)); 2040 if (ip_mrtdebug > 1) { 2041 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2042 "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); 2043 } 2044 return (-1); /* drop pkt */ 2045 } 2046 2047 if (!lock_good_vif(&viftable[vifi])) 2048 return (-1); 2049 /* 2050 * The MFC entries are not cleaned up when an ipif goes 2051 * away thus this code has to guard against an MFC referencing 2052 * an ipif that has been closed. Note: reset_mrt_vif_ipif 2053 * sets the v_ipif to NULL when the ipif disappears. 2054 */ 2055 ASSERT(viftable[vifi].v_ipif != NULL); 2056 2057 if (vifi >= numvifs) { 2058 cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs " 2059 "%d ill %s viftable ill %s\n", 2060 (int)vifi, (int)numvifs, ill->ill_name, 2061 viftable[vifi].v_ipif->ipif_ill->ill_name); 2062 unlock_good_vif(&viftable[vifi]); 2063 return (-1); 2064 } 2065 /* 2066 * Don't forward if it didn't arrive from the parent vif for its 2067 * origin. But do match on the groups as we nominate only one 2068 * ill in the group for receiving allmulti packets. 2069 */ 2070 if ((viftable[vifi].v_ipif->ipif_ill != ill && 2071 (ill->ill_group == NULL || 2072 viftable[vifi].v_ipif->ipif_ill->ill_group != ill->ill_group)) || 2073 (viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 2074 /* Came in the wrong interface */ 2075 ip1dbg(("ip_mdq: arrived wrong if, vifi %d " 2076 "numvifs %d ill %s viftable ill %s\n", 2077 (int)vifi, (int)numvifs, ill->ill_name, 2078 viftable[vifi].v_ipif->ipif_ill->ill_name)); 2079 if (ip_mrtdebug > 1) { 2080 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2081 "ip_mdq: arrived wrong if, vifi %d ill " 2082 "%s viftable ill %s\n", 2083 (int)vifi, ill->ill_name, 2084 viftable[vifi].v_ipif->ipif_ill->ill_name); 2085 } 2086 mrtstat.mrts_wrong_if++; 2087 rt->mfc_wrong_if++; 2088 2089 /* 2090 * If we are doing PIM assert processing and we are forwarding 2091 * packets on this interface, and it is a broadcast medium 2092 * interface (and not a tunnel), send a message to the routing. 2093 * 2094 * We use the first ipif on the list, since it's all we have. 2095 * Chances are the ipif_flags are the same for ipifs on the ill. 2096 */ 2097 if (pim_assert && rt->mfc_ttls[vifi] > 0 && 2098 (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) && 2099 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 2100 mblk_t *mp_copy; 2101 struct igmpmsg *im; 2102 2103 /* TODO could copy header and dup rest */ 2104 mp_copy = copymsg(mp); 2105 if (mp_copy == NULL) { 2106 mrtstat.mrts_fwd_drop++; 2107 ip1dbg(("ip_mdq: out of memory " 2108 "for mblk, mp_copy\n")); 2109 unlock_good_vif(&viftable[vifi]); 2110 return (-1); 2111 } 2112 2113 im = (struct igmpmsg *)mp_copy->b_rptr; 2114 im->im_msgtype = IGMPMSG_WRONGVIF; 2115 im->im_mbz = 0; 2116 im->im_vif = (ushort_t)vifi; 2117 putnext(RD(ip_g_mrouter), mp_copy); 2118 } 2119 unlock_good_vif(&viftable[vifi]); 2120 if (tunnel_src != 0) 2121 return (1); 2122 else 2123 return (0); 2124 } 2125 /* 2126 * If I sourced this packet, it counts as output, else it was input. 2127 */ 2128 if (ipha->ipha_src == viftable[vifi].v_lcl_addr.s_addr) { 2129 viftable[vifi].v_pkt_out++; 2130 viftable[vifi].v_bytes_out += plen; 2131 } else { 2132 viftable[vifi].v_pkt_in++; 2133 viftable[vifi].v_bytes_in += plen; 2134 } 2135 mutex_enter(&rt->mfc_mutex); 2136 rt->mfc_pkt_cnt++; 2137 rt->mfc_byte_cnt += plen; 2138 mutex_exit(&rt->mfc_mutex); 2139 unlock_good_vif(&viftable[vifi]); 2140 /* 2141 * For each vif, decide if a copy of the packet should be forwarded. 2142 * Forward if: 2143 * - the vif threshold ttl is non-zero AND 2144 * - the pkt ttl exceeds the vif's threshold 2145 * A non-zero mfc_ttl indicates that the vif is part of 2146 * the output set for the mfc entry. 2147 */ 2148 mutex_enter(&numvifs_mutex); 2149 num_of_vifs = numvifs; 2150 mutex_exit(&numvifs_mutex); 2151 for (vifp = viftable, vifi = 0; vifi < num_of_vifs; vifp++, vifi++) { 2152 if (!lock_good_vif(vifp)) 2153 continue; 2154 if ((rt->mfc_ttls[vifi] > 0) && 2155 (ipha->ipha_ttl > rt->mfc_ttls[vifi])) { 2156 /* 2157 * lock_good_vif should not have succedded if 2158 * v_ipif is null. 2159 */ 2160 ASSERT(vifp->v_ipif != NULL); 2161 vifp->v_pkt_out++; 2162 vifp->v_bytes_out += plen; 2163 MC_SEND(ipha, mp, vifp, dst); 2164 mrtstat.mrts_fwd_out++; 2165 } 2166 unlock_good_vif(vifp); 2167 } 2168 if (tunnel_src != 0) 2169 return (1); 2170 else 2171 return (0); 2172 } 2173 2174 /* 2175 * Send the packet on physical interface. 2176 * Caller assumes can continue to use mp on return. 2177 */ 2178 /* ARGSUSED */ 2179 static void 2180 phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2181 { 2182 mblk_t *mp_copy; 2183 2184 /* Make a new reference to the packet */ 2185 mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ 2186 if (mp_copy == NULL) { 2187 mrtstat.mrts_fwd_drop++; 2188 ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n")); 2189 return; 2190 } 2191 if (vifp->v_rate_limit <= 0) 2192 tbf_send_packet(vifp, mp_copy); 2193 else { 2194 if (ip_mrtdebug > 1) { 2195 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2196 "phyint_send: tbf_contr rate %d " 2197 "vifp 0x%p mp 0x%p dst 0x%x", 2198 vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); 2199 } 2200 tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr); 2201 } 2202 } 2203 2204 /* 2205 * Send the whole packet for REGISTER encapsulation to PIM daemon 2206 * Caller assumes it can continue to use mp on return. 2207 */ 2208 /* ARGSUSED */ 2209 static void 2210 register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2211 { 2212 struct igmpmsg *im; 2213 mblk_t *mp_copy; 2214 ipha_t *ipha_copy; 2215 2216 if (ip_mrtdebug > 1) { 2217 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2218 "register_send: src %x, dst %x\n", 2219 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 2220 } 2221 2222 /* 2223 * Copy the old packet & pullup its IP header into the new mblk_t so we 2224 * can modify it. Try to fill the new mblk_t since if we don't the 2225 * ethernet driver will. 2226 */ 2227 mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED); 2228 if (mp_copy == NULL) { 2229 ++mrtstat.mrts_pim_nomemory; 2230 if (ip_mrtdebug > 3) { 2231 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2232 "register_send: allocb failure."); 2233 } 2234 return; 2235 } 2236 2237 /* 2238 * Bump write pointer to account for igmpmsg being added. 2239 */ 2240 mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg); 2241 2242 /* 2243 * Chain packet to new mblk_t. 2244 */ 2245 if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 2246 ++mrtstat.mrts_pim_nomemory; 2247 if (ip_mrtdebug > 3) { 2248 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2249 "register_send: copymsg failure."); 2250 } 2251 freeb(mp_copy); 2252 return; 2253 } 2254 2255 /* 2256 * icmp_rput() asserts that IP version field is set to an 2257 * appropriate version. Hence, the struct igmpmsg that this really 2258 * becomes, needs to have the correct IP version field. 2259 */ 2260 ipha_copy = (ipha_t *)mp_copy->b_rptr; 2261 *ipha_copy = multicast_encap_iphdr; 2262 2263 /* 2264 * The kernel uses the struct igmpmsg header to encode the messages to 2265 * the multicast routing daemon. Fill in the fields in the header 2266 * starting with the message type which is IGMPMSG_WHOLEPKT 2267 */ 2268 im = (struct igmpmsg *)mp_copy->b_rptr; 2269 im->im_msgtype = IGMPMSG_WHOLEPKT; 2270 im->im_src.s_addr = ipha->ipha_src; 2271 im->im_dst.s_addr = ipha->ipha_dst; 2272 2273 /* 2274 * Must Be Zero. This is because the struct igmpmsg is really an IP 2275 * header with renamed fields and the multicast routing daemon uses 2276 * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages. 2277 */ 2278 im->im_mbz = 0; 2279 2280 ++mrtstat.mrts_upcalls; 2281 if (!canputnext(RD(ip_g_mrouter))) { 2282 ++mrtstat.mrts_pim_regsend_drops; 2283 if (ip_mrtdebug > 3) { 2284 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2285 "register_send: register upcall failure."); 2286 } 2287 freemsg(mp_copy); 2288 } else { 2289 putnext(RD(ip_g_mrouter), mp_copy); 2290 } 2291 } 2292 2293 /* 2294 * pim_validate_cksum handles verification of the checksum in the 2295 * pim header. For PIM Register packets, the checksum is calculated 2296 * across the PIM header only. For all other packets, the checksum 2297 * is for the PIM header and remainder of the packet. 2298 * 2299 * returns: B_TRUE, if checksum is okay. 2300 * B_FALSE, if checksum is not valid. 2301 */ 2302 static boolean_t 2303 pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp) 2304 { 2305 mblk_t *mp_dup; 2306 2307 if ((mp_dup = dupmsg(mp)) == NULL) 2308 return (B_FALSE); 2309 2310 mp_dup->b_rptr += IPH_HDR_LENGTH(ip); 2311 if (pimp->pim_type == PIM_REGISTER) 2312 mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN; 2313 if (IP_CSUM(mp_dup, 0, 0)) { 2314 freemsg(mp_dup); 2315 return (B_FALSE); 2316 } 2317 freemsg(mp_dup); 2318 return (B_TRUE); 2319 } 2320 2321 /* 2322 * int 2323 * pim_input(queue_t *, mblk_t *) - Process PIM protocol packets. 2324 * IP Protocol 103. Register messages are decapsulated and sent 2325 * onto multicast forwarding. 2326 */ 2327 int 2328 pim_input(queue_t *q, mblk_t *mp) 2329 { 2330 ipha_t *eip, *ip; 2331 int iplen, pimlen, iphlen; 2332 struct pim *pimp; /* pointer to a pim struct */ 2333 uint32_t *reghdr; 2334 2335 /* 2336 * Pullup the msg for PIM protocol processing. 2337 */ 2338 if (pullupmsg(mp, -1) == 0) { 2339 ++mrtstat.mrts_pim_nomemory; 2340 freemsg(mp); 2341 return (-1); 2342 } 2343 2344 ip = (ipha_t *)mp->b_rptr; 2345 iplen = ip->ipha_length; 2346 iphlen = IPH_HDR_LENGTH(ip); 2347 pimlen = ntohs(iplen) - iphlen; 2348 2349 /* 2350 * Validate lengths 2351 */ 2352 if (pimlen < PIM_MINLEN) { 2353 ++mrtstat.mrts_pim_malformed; 2354 if (ip_mrtdebug > 1) { 2355 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2356 "pim_input: length not at least minlen"); 2357 } 2358 freemsg(mp); 2359 return (-1); 2360 } 2361 2362 /* 2363 * Point to the PIM header. 2364 */ 2365 pimp = (struct pim *)((caddr_t)ip + iphlen); 2366 2367 /* 2368 * Check the version number. 2369 */ 2370 if (pimp->pim_vers != PIM_VERSION) { 2371 ++mrtstat.mrts_pim_badversion; 2372 if (ip_mrtdebug > 1) { 2373 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2374 "pim_input: unknown version of PIM"); 2375 } 2376 freemsg(mp); 2377 return (-1); 2378 } 2379 2380 /* 2381 * Validate the checksum 2382 */ 2383 if (!pim_validate_cksum(mp, ip, pimp)) { 2384 ++mrtstat.mrts_pim_rcv_badcsum; 2385 if (ip_mrtdebug > 1) { 2386 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2387 "pim_input: invalid checksum"); 2388 } 2389 freemsg(mp); 2390 return (-1); 2391 } 2392 2393 if (pimp->pim_type != PIM_REGISTER) 2394 return (0); 2395 2396 reghdr = (uint32_t *)(pimp + 1); 2397 eip = (ipha_t *)(reghdr + 1); 2398 2399 /* 2400 * check if the inner packet is destined to mcast group 2401 */ 2402 if (!CLASSD(eip->ipha_dst)) { 2403 ++mrtstat.mrts_pim_badregisters; 2404 if (ip_mrtdebug > 1) { 2405 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2406 "pim_input: Inner pkt not mcast .. !"); 2407 } 2408 freemsg(mp); 2409 return (-1); 2410 } 2411 if (ip_mrtdebug > 1) { 2412 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2413 "register from %x, to %x, len %d", 2414 ntohl(eip->ipha_src), 2415 ntohl(eip->ipha_dst), 2416 ntohs(eip->ipha_length)); 2417 } 2418 /* 2419 * If the null register bit is not set, decapsulate 2420 * the packet before forwarding it. 2421 */ 2422 if (!(ntohl(*reghdr) & PIM_NULL_REGISTER)) { 2423 mblk_t *mp_copy; 2424 2425 /* Copy the message */ 2426 if ((mp_copy = copymsg(mp)) == NULL) { 2427 ++mrtstat.mrts_pim_nomemory; 2428 freemsg(mp); 2429 return (-1); 2430 } 2431 2432 /* 2433 * Decapsulate the packet and give it to 2434 * register_mforward. 2435 */ 2436 mp_copy->b_rptr += iphlen + sizeof (pim_t) + 2437 sizeof (*reghdr); 2438 if (register_mforward(q, mp_copy) != 0) { 2439 freemsg(mp); 2440 return (-1); 2441 } 2442 } 2443 2444 /* 2445 * Pass all valid PIM packets up to any process(es) listening on a raw 2446 * PIM socket. For Solaris it is done right after pim_input() is 2447 * called. 2448 */ 2449 return (0); 2450 } 2451 2452 /* 2453 * PIM sparse mode hook. Called by pim_input after decapsulating 2454 * the packet. Loop back the packet, as if we have received it. 2455 * In pim_input() we have to check if the destination is a multicast address. 2456 */ 2457 /* ARGSUSED */ 2458 static int 2459 register_mforward(queue_t *q, mblk_t *mp) 2460 { 2461 ASSERT(reg_vif_num <= numvifs); 2462 2463 if (ip_mrtdebug > 3) { 2464 ipha_t *ipha; 2465 2466 ipha = (ipha_t *)mp->b_rptr; 2467 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2468 "register_mforward: src %x, dst %x\n", 2469 ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 2470 } 2471 /* 2472 * Need to pass in to ip_mforward() the information that the 2473 * packet has arrived on the register_vif. We use the solution that 2474 * ip_mroute_decap() employs: use mp->b_prev to pass some information 2475 * to ip_mforward(). Nonzero value means the packet has arrived on a 2476 * tunnel (ip_mroute_decap() puts the address of the other side of the 2477 * tunnel there.) This is safe since ip_rput() either frees the packet 2478 * or passes it to ip_mforward(). We use 2479 * PIM_REGISTER_MARKER = 0xffffffff to indicate the has arrived on the 2480 * register vif. If in the future we have more than one register vifs, 2481 * then this will need re-examination. 2482 */ 2483 mp->b_prev = (mblk_t *)PIM_REGISTER_MARKER; 2484 ++mrtstat.mrts_pim_regforwards; 2485 ip_rput(q, mp); 2486 return (0); 2487 } 2488 2489 /* 2490 * Send an encapsulated packet. 2491 * Caller assumes can continue to use mp when routine returns. 2492 */ 2493 /* ARGSUSED */ 2494 static void 2495 encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2496 { 2497 mblk_t *mp_copy; 2498 ipha_t *ipha_copy; 2499 size_t len; 2500 2501 if (ip_mrtdebug > 1) { 2502 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2503 "encap_send: vif %ld enter", (ptrdiff_t)(vifp - viftable)); 2504 } 2505 len = ntohs(ipha->ipha_length); 2506 2507 /* 2508 * Copy the old packet & pullup it's IP header into the 2509 * new mbuf so we can modify it. Try to fill the new 2510 * mbuf since if we don't the ethernet driver will. 2511 */ 2512 mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED); 2513 if (mp_copy == NULL) 2514 return; 2515 mp_copy->b_rptr += 32; 2516 mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr); 2517 if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 2518 freeb(mp_copy); 2519 return; 2520 } 2521 2522 /* 2523 * Fill in the encapsulating IP header. 2524 * Remote tunnel dst in rmt_addr, from add_vif(). 2525 */ 2526 ipha_copy = (ipha_t *)mp_copy->b_rptr; 2527 *ipha_copy = multicast_encap_iphdr; 2528 ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET); 2529 ipha_copy->ipha_length = htons(len + sizeof (ipha_t)); 2530 ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr; 2531 ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr; 2532 ASSERT(ipha_copy->ipha_ident == 0); 2533 2534 /* Turn the encapsulated IP header back into a valid one. */ 2535 ipha = (ipha_t *)mp_copy->b_cont->b_rptr; 2536 ipha->ipha_ttl--; 2537 ipha->ipha_hdr_checksum = 0; 2538 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2539 2540 if (ip_mrtdebug > 1) { 2541 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2542 "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); 2543 } 2544 if (vifp->v_rate_limit <= 0) 2545 tbf_send_packet(vifp, mp_copy); 2546 else 2547 /* ipha is from the original header */ 2548 tbf_control(vifp, mp_copy, ipha); 2549 } 2550 2551 /* 2552 * De-encapsulate a packet and feed it back through IP input. 2553 * This routine is called whenever IP gets a packet with prototype 2554 * IPPROTO_ENCAP and a local destination address. 2555 */ 2556 void 2557 ip_mroute_decap(queue_t *q, mblk_t *mp) 2558 { 2559 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2560 ipha_t *ipha_encap; 2561 int hlen = IPH_HDR_LENGTH(ipha); 2562 ipaddr_t src; 2563 struct vif *vifp; 2564 2565 /* 2566 * Dump the packet if it's not to a multicast destination or if 2567 * we don't have an encapsulating tunnel with the source. 2568 * Note: This code assumes that the remote site IP address 2569 * uniquely identifies the tunnel (i.e., that this site has 2570 * at most one tunnel with the remote site). 2571 */ 2572 ipha_encap = (ipha_t *)((char *)ipha + hlen); 2573 if (!CLASSD(ipha_encap->ipha_dst)) { 2574 mrtstat.mrts_bad_tunnel++; 2575 ip1dbg(("ip_mroute_decap: bad tunnel\n")); 2576 freemsg(mp); 2577 return; 2578 } 2579 src = (ipaddr_t)ipha->ipha_src; 2580 mutex_enter(&last_encap_lock); 2581 if (src != last_encap_src) { 2582 struct vif *vife; 2583 2584 vifp = viftable; 2585 vife = vifp + numvifs; 2586 last_encap_src = src; 2587 last_encap_vif = 0; 2588 for (; vifp < vife; ++vifp) { 2589 if (!lock_good_vif(vifp)) 2590 continue; 2591 if (vifp->v_rmt_addr.s_addr == src) { 2592 if (vifp->v_flags & VIFF_TUNNEL) 2593 last_encap_vif = vifp; 2594 if (ip_mrtdebug > 1) { 2595 (void) mi_strlog(ip_g_mrouter, 2596 1, SL_TRACE, 2597 "ip_mroute_decap: good tun " 2598 "vif %ld with %x", 2599 (ptrdiff_t)(vifp - viftable), 2600 ntohl(src)); 2601 } 2602 unlock_good_vif(vifp); 2603 break; 2604 } 2605 unlock_good_vif(vifp); 2606 } 2607 } 2608 if ((vifp = last_encap_vif) == 0) { 2609 mutex_exit(&last_encap_lock); 2610 mrtstat.mrts_bad_tunnel++; 2611 freemsg(mp); 2612 ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n", 2613 (ptrdiff_t)(vifp - viftable), ntohl(src))); 2614 return; 2615 } 2616 mutex_exit(&last_encap_lock); 2617 2618 /* 2619 * Need to pass in the tunnel source to ip_mforward (so that it can 2620 * verify that the packet arrived over the correct vif.) We use b_prev 2621 * to pass this information. This is safe since the ip_rput either 2622 * frees the packet or passes it to ip_mforward. 2623 */ 2624 mp->b_prev = (mblk_t *)(uintptr_t)src; 2625 mp->b_rptr += hlen; 2626 /* Feed back into ip_rput as an M_DATA. */ 2627 ip_rput(q, mp); 2628 } 2629 2630 /* 2631 * Remove all records with v_ipif == ipif. Called when an interface goes away 2632 * (stream closed). Called as writer. 2633 */ 2634 void 2635 reset_mrt_vif_ipif(ipif_t *ipif) 2636 { 2637 vifi_t vifi, tmp_vifi; 2638 vifi_t num_of_vifs; 2639 2640 /* Can't check vifi >= 0 since vifi_t is unsigned! */ 2641 2642 mutex_enter(&numvifs_mutex); 2643 num_of_vifs = numvifs; 2644 mutex_exit(&numvifs_mutex); 2645 2646 for (vifi = num_of_vifs; vifi != 0; vifi--) { 2647 tmp_vifi = vifi - 1; 2648 if (viftable[tmp_vifi].v_ipif == ipif) { 2649 (void) del_vif(&tmp_vifi, NULL, NULL); 2650 } 2651 } 2652 } 2653 2654 /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */ 2655 void 2656 reset_mrt_ill(ill_t *ill) 2657 { 2658 struct mfc *rt; 2659 struct rtdetq *rte; 2660 int i; 2661 2662 for (i = 0; i < MFCTBLSIZ; i++) { 2663 MFCB_REFHOLD(&mfctable[i]); 2664 if ((rt = mfctable[i].mfcb_mfc) != NULL) { 2665 if (ip_mrtdebug > 1) { 2666 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2667 "reset_mrt_ill: mfctable [%d]", i); 2668 } 2669 while (rt != NULL) { 2670 mutex_enter(&rt->mfc_mutex); 2671 while ((rte = rt->mfc_rte) != NULL) { 2672 if (rte->ill == ill) { 2673 if (ip_mrtdebug > 1) { 2674 (void) mi_strlog( 2675 ip_g_mrouter, 2676 1, SL_TRACE, 2677 "reset_mrt_ill: " 2678 "ill 0x%p", ill); 2679 } 2680 rt->mfc_rte = rte->rte_next; 2681 freemsg(rte->mp); 2682 mi_free((char *)rte); 2683 } 2684 } 2685 mutex_exit(&rt->mfc_mutex); 2686 rt = rt->mfc_next; 2687 } 2688 } 2689 MFCB_REFRELE(&mfctable[i]); 2690 } 2691 } 2692 2693 /* 2694 * Token bucket filter module. 2695 * The ipha is for mcastgrp destination for phyint and encap. 2696 */ 2697 static void 2698 tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) 2699 { 2700 size_t p_len = msgdsize(mp); 2701 struct tbf *t = vifp->v_tbf; 2702 timeout_id_t id = 0; 2703 2704 /* Drop if packet is too large */ 2705 if (p_len > MAX_BKT_SIZE) { 2706 mrtstat.mrts_pkt2large++; 2707 freemsg(mp); 2708 return; 2709 } 2710 if (ip_mrtdebug > 1) { 2711 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2712 "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", 2713 (ptrdiff_t)(vifp - viftable), t->tbf_q_len, 2714 ntohl(ipha->ipha_dst)); 2715 } 2716 2717 mutex_enter(&t->tbf_lock); 2718 2719 tbf_update_tokens(vifp); 2720 2721 /* 2722 * If there are enough tokens, 2723 * and the queue is empty, send this packet out. 2724 */ 2725 if (ip_mrtdebug > 1) { 2726 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2727 "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", 2728 (ptrdiff_t)(vifp - viftable), t->tbf_n_tok, p_len, 2729 t->tbf_q_len); 2730 } 2731 /* No packets are queued */ 2732 if (t->tbf_q_len == 0) { 2733 /* queue empty, send packet if enough tokens */ 2734 if (p_len <= t->tbf_n_tok) { 2735 t->tbf_n_tok -= p_len; 2736 mutex_exit(&t->tbf_lock); 2737 tbf_send_packet(vifp, mp); 2738 return; 2739 } else { 2740 /* Queue packet and timeout till later */ 2741 tbf_queue(vifp, mp); 2742 ASSERT(vifp->v_timeout_id == 0); 2743 vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 2744 TBF_REPROCESS); 2745 } 2746 } else if (t->tbf_q_len < t->tbf_max_q_len) { 2747 /* Finite queue length, so queue pkts and process queue */ 2748 tbf_queue(vifp, mp); 2749 tbf_process_q(vifp); 2750 } else { 2751 /* Check that we have UDP header with IP header */ 2752 size_t hdr_length = IPH_HDR_LENGTH(ipha) + 2753 sizeof (struct udphdr); 2754 2755 if ((mp->b_wptr - mp->b_rptr) < hdr_length) { 2756 if (!pullupmsg(mp, hdr_length)) { 2757 freemsg(mp); 2758 ip1dbg(("tbf_ctl: couldn't pullup udp hdr, " 2759 "vif %ld src 0x%x dst 0x%x\n", 2760 (ptrdiff_t)(vifp - viftable), 2761 ntohl(ipha->ipha_src), 2762 ntohl(ipha->ipha_dst))); 2763 mutex_exit(&vifp->v_tbf->tbf_lock); 2764 return; 2765 } else 2766 /* Have to reassign ipha after pullupmsg */ 2767 ipha = (ipha_t *)mp->b_rptr; 2768 } 2769 /* 2770 * Queue length too much, 2771 * try to selectively dq, or queue and process 2772 */ 2773 if (!tbf_dq_sel(vifp, ipha)) { 2774 mrtstat.mrts_q_overflow++; 2775 freemsg(mp); 2776 } else { 2777 tbf_queue(vifp, mp); 2778 tbf_process_q(vifp); 2779 } 2780 } 2781 if (t->tbf_q_len == 0) { 2782 id = vifp->v_timeout_id; 2783 vifp->v_timeout_id = 0; 2784 } 2785 mutex_exit(&vifp->v_tbf->tbf_lock); 2786 if (id != 0) 2787 (void) untimeout(id); 2788 } 2789 2790 /* 2791 * Adds a packet to the tbf queue at the interface. 2792 * The ipha is for mcastgrp destination for phyint and encap. 2793 */ 2794 static void 2795 tbf_queue(struct vif *vifp, mblk_t *mp) 2796 { 2797 struct tbf *t = vifp->v_tbf; 2798 2799 if (ip_mrtdebug > 1) { 2800 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2801 "tbf_queue: vif %ld", (ptrdiff_t)(vifp - viftable)); 2802 } 2803 ASSERT(MUTEX_HELD(&t->tbf_lock)); 2804 2805 if (t->tbf_t == NULL) { 2806 /* Queue was empty */ 2807 t->tbf_q = mp; 2808 } else { 2809 /* Insert at tail */ 2810 t->tbf_t->b_next = mp; 2811 } 2812 /* set new tail pointer */ 2813 t->tbf_t = mp; 2814 2815 mp->b_next = mp->b_prev = NULL; 2816 2817 t->tbf_q_len++; 2818 } 2819 2820 /* 2821 * Process the queue at the vif interface. 2822 * Drops the tbf_lock when sending packets. 2823 * 2824 * NOTE : The caller should quntimeout if the queue length is 0. 2825 */ 2826 static void 2827 tbf_process_q(struct vif *vifp) 2828 { 2829 mblk_t *mp; 2830 struct tbf *t = vifp->v_tbf; 2831 size_t len; 2832 2833 if (ip_mrtdebug > 1) { 2834 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2835 "tbf_process_q 1: vif %ld qlen = %d", 2836 (ptrdiff_t)(vifp - viftable), t->tbf_q_len); 2837 } 2838 2839 /* 2840 * Loop through the queue at the interface and send 2841 * as many packets as possible. 2842 */ 2843 ASSERT(MUTEX_HELD(&t->tbf_lock)); 2844 2845 while (t->tbf_q_len > 0) { 2846 mp = t->tbf_q; 2847 len = (size_t)msgdsize(mp); /* length of ip pkt */ 2848 2849 /* Determine if the packet can be sent */ 2850 if (len <= t->tbf_n_tok) { 2851 /* 2852 * If so, reduce no. of tokens, dequeue the packet, 2853 * send the packet. 2854 */ 2855 t->tbf_n_tok -= len; 2856 2857 t->tbf_q = mp->b_next; 2858 if (--t->tbf_q_len == 0) { 2859 t->tbf_t = NULL; 2860 } 2861 mp->b_next = NULL; 2862 /* Exit mutex before sending packet, then re-enter */ 2863 mutex_exit(&t->tbf_lock); 2864 tbf_send_packet(vifp, mp); 2865 mutex_enter(&t->tbf_lock); 2866 } else 2867 break; 2868 } 2869 } 2870 2871 /* Called at tbf timeout to update tokens, process q and reset timer. */ 2872 static void 2873 tbf_reprocess_q(void *arg) 2874 { 2875 struct vif *vifp = arg; 2876 2877 mutex_enter(&vifp->v_tbf->tbf_lock); 2878 vifp->v_timeout_id = 0; 2879 tbf_update_tokens(vifp); 2880 2881 tbf_process_q(vifp); 2882 2883 if (vifp->v_tbf->tbf_q_len > 0) { 2884 vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 2885 TBF_REPROCESS); 2886 } 2887 mutex_exit(&vifp->v_tbf->tbf_lock); 2888 2889 if (ip_mrtdebug > 1) { 2890 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2891 "tbf_reprcess_q: vif %ld timeout id = %p", 2892 (ptrdiff_t)(vifp - viftable), vifp->v_timeout_id); 2893 } 2894 } 2895 2896 /* 2897 * Function that will selectively discard a member of the tbf queue, 2898 * based on the precedence value and the priority. 2899 * 2900 * NOTE : The caller should quntimeout if the queue length is 0. 2901 */ 2902 static int 2903 tbf_dq_sel(struct vif *vifp, ipha_t *ipha) 2904 { 2905 uint_t p; 2906 struct tbf *t = vifp->v_tbf; 2907 mblk_t **np; 2908 mblk_t *last, *mp; 2909 2910 if (ip_mrtdebug > 1) { 2911 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2912 "dq_sel: vif %ld dst 0x%x", 2913 (ptrdiff_t)(vifp - viftable), ntohl(ipha->ipha_dst)); 2914 } 2915 2916 ASSERT(MUTEX_HELD(&t->tbf_lock)); 2917 p = priority(vifp, ipha); 2918 2919 np = &t->tbf_q; 2920 last = NULL; 2921 while ((mp = *np) != NULL) { 2922 if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) { 2923 *np = mp->b_next; 2924 /* If removing the last packet, fix the tail pointer */ 2925 if (mp == t->tbf_t) 2926 t->tbf_t = last; 2927 mp->b_prev = mp->b_next = NULL; 2928 freemsg(mp); 2929 /* 2930 * It's impossible for the queue to be empty, but 2931 * we check anyway. 2932 */ 2933 if (--t->tbf_q_len == 0) { 2934 t->tbf_t = NULL; 2935 } 2936 mrtstat.mrts_drop_sel++; 2937 return (1); 2938 } 2939 np = &mp->b_next; 2940 last = mp; 2941 } 2942 return (0); 2943 } 2944 2945 /* Sends packet, 2 cases - encap tunnel, phyint. */ 2946 static void 2947 tbf_send_packet(struct vif *vifp, mblk_t *mp) 2948 { 2949 ipif_t *ipif; 2950 2951 /* If encap tunnel options */ 2952 if (vifp->v_flags & VIFF_TUNNEL) { 2953 if (ip_mrtdebug > 1) { 2954 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2955 "tbf_send_pkt: ENCAP tunnel vif %ld", 2956 (ptrdiff_t)(vifp - viftable)); 2957 } 2958 2959 /* 2960 * Feed into ip_wput which will set the ident field and 2961 * checksum the encapsulating header. 2962 * BSD gets the cached route vifp->v_route from ip_output() 2963 * to speed up route table lookups. Not necessary in SunOS 5.x. 2964 */ 2965 put(vifp->v_ipif->ipif_wq, mp); 2966 return; 2967 2968 /* phyint */ 2969 } else { 2970 /* Need to loop back to members on the outgoing interface. */ 2971 ipha_t *ipha; 2972 ipaddr_t dst; 2973 ipha = (ipha_t *)mp->b_rptr; 2974 dst = ipha->ipha_dst; 2975 ipif = vifp->v_ipif; 2976 2977 mutex_enter(&ipif->ipif_ill->ill_lock); 2978 if (ilm_lookup_ipif(ipif, dst) != NULL) { 2979 /* 2980 * The packet is not yet reassembled, thus we need to 2981 * pass it to ip_rput_local for checksum verification 2982 * and reassembly (and fanout the user stream). 2983 */ 2984 mblk_t *mp_loop; 2985 ire_t *ire; 2986 2987 mutex_exit(&ipif->ipif_ill->ill_lock); 2988 if (ip_mrtdebug > 1) { 2989 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 2990 "tbf_send_pkt: loopback vif %ld", 2991 (ptrdiff_t)(vifp - viftable)); 2992 } 2993 mp_loop = copymsg(mp); 2994 ire = ire_ctable_lookup(~0, 0, IRE_BROADCAST, NULL, 2995 ALL_ZONES, MATCH_IRE_TYPE); 2996 2997 if (mp_loop != NULL && ire != NULL) { 2998 IP_RPUT_LOCAL(ipif->ipif_rq, mp_loop, 2999 ((ipha_t *)mp_loop->b_rptr), 3000 ire, (ill_t *)ipif->ipif_rq->q_ptr); 3001 } else { 3002 /* Either copymsg failed or no ire */ 3003 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 3004 "tbf_send_pkt: mp_loop 0x%p, ire 0x%p " 3005 "vif %ld\n", mp_loop, ire, 3006 (ptrdiff_t)(vifp - viftable)); 3007 } 3008 if (ire != NULL) 3009 ire_refrele(ire); 3010 } else { 3011 mutex_exit(&ipif->ipif_ill->ill_lock); 3012 } 3013 if (ip_mrtdebug > 1) { 3014 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 3015 "tbf_send_pkt: phyint forward vif %ld dst = 0x%x", 3016 (ptrdiff_t)(vifp - viftable), ntohl(dst)); 3017 } 3018 ip_rput_forward_multicast(dst, mp, ipif); 3019 } 3020 } 3021 3022 /* 3023 * Determine the current time and then the elapsed time (between the last time 3024 * and time now). Update the no. of tokens in the bucket. 3025 */ 3026 static void 3027 tbf_update_tokens(struct vif *vifp) 3028 { 3029 timespec_t tp; 3030 hrtime_t tm; 3031 struct tbf *t = vifp->v_tbf; 3032 3033 ASSERT(MUTEX_HELD(&t->tbf_lock)); 3034 3035 /* Time in secs and nsecs, rate limit in kbits/sec */ 3036 gethrestime(&tp); 3037 3038 /*LINTED*/ 3039 TV_DELTA(tp, t->tbf_last_pkt_t, tm); 3040 3041 /* 3042 * This formula is actually 3043 * "time in seconds" * "bytes/second". Scaled for nsec. 3044 * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8) 3045 * 3046 * The (1000/1024) was introduced in add_vif to optimize 3047 * this divide into a shift. 3048 */ 3049 t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8; 3050 t->tbf_last_pkt_t = tp; 3051 3052 if (t->tbf_n_tok > MAX_BKT_SIZE) 3053 t->tbf_n_tok = MAX_BKT_SIZE; 3054 if (ip_mrtdebug > 1) { 3055 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 3056 "tbf_update_tok: tm %lld tok %d vif %ld", 3057 tm, t->tbf_n_tok, (ptrdiff_t)(vifp - viftable)); 3058 } 3059 } 3060 3061 /* 3062 * Priority currently is based on port nos. 3063 * Different forwarding mechanisms have different ways 3064 * of obtaining the port no. Hence, the vif must be 3065 * given along with the packet itself. 3066 * 3067 */ 3068 static int 3069 priority(struct vif *vifp, ipha_t *ipha) 3070 { 3071 int prio; 3072 3073 /* Temporary hack; may add general packet classifier some day */ 3074 3075 ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock)); 3076 3077 /* 3078 * The UDP port space is divided up into four priority ranges: 3079 * [0, 16384) : unclassified - lowest priority 3080 * [16384, 32768) : audio - highest priority 3081 * [32768, 49152) : whiteboard - medium priority 3082 * [49152, 65536) : video - low priority 3083 */ 3084 3085 if (ipha->ipha_protocol == IPPROTO_UDP) { 3086 struct udphdr *udp = 3087 (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha)); 3088 switch (ntohs(udp->uh_dport) & 0xc000) { 3089 case 0x4000: 3090 prio = 70; 3091 break; 3092 case 0x8000: 3093 prio = 60; 3094 break; 3095 case 0xc000: 3096 prio = 55; 3097 break; 3098 default: 3099 prio = 50; 3100 break; 3101 } 3102 if (ip_mrtdebug > 1) { 3103 (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, 3104 "priority: port %x prio %d\n", 3105 ntohs(udp->uh_dport), prio); 3106 } 3107 } else 3108 prio = 50; /* default priority */ 3109 return (prio); 3110 } 3111 3112 /* 3113 * End of token bucket filter modifications 3114 */ 3115 3116 3117 3118 /* 3119 * Produces data for netstat -M. 3120 */ 3121 int 3122 ip_mroute_stats(mblk_t *mp) 3123 { 3124 mrtstat.mrts_vifctlSize = sizeof (struct vifctl); 3125 mrtstat.mrts_mfcctlSize = sizeof (struct mfcctl); 3126 if (!snmp_append_data(mp, (char *)&mrtstat, sizeof (mrtstat))) { 3127 ip0dbg(("ip_mroute_stats: failed %ld bytes\n", 3128 (size_t)sizeof (mrtstat))); 3129 return (0); 3130 } 3131 return (1); 3132 } 3133 3134 /* 3135 * Sends info for SNMP's MIB. 3136 */ 3137 int 3138 ip_mroute_vif(mblk_t *mp) 3139 { 3140 struct vifctl vi; 3141 vifi_t vifi; 3142 3143 mutex_enter(&numvifs_mutex); 3144 for (vifi = 0; vifi < numvifs; vifi++) { 3145 if (viftable[vifi].v_lcl_addr.s_addr == 0) 3146 continue; 3147 /* 3148 * No locks here, an approximation is fine. 3149 */ 3150 vi.vifc_vifi = vifi; 3151 vi.vifc_flags = viftable[vifi].v_flags; 3152 vi.vifc_threshold = viftable[vifi].v_threshold; 3153 vi.vifc_rate_limit = viftable[vifi].v_rate_limit; 3154 vi.vifc_lcl_addr = viftable[vifi].v_lcl_addr; 3155 vi.vifc_rmt_addr = viftable[vifi].v_rmt_addr; 3156 vi.vifc_pkt_in = viftable[vifi].v_pkt_in; 3157 vi.vifc_pkt_out = viftable[vifi].v_pkt_out; 3158 3159 if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) { 3160 ip0dbg(("ip_mroute_vif: failed %ld bytes\n", 3161 (size_t)sizeof (vi))); 3162 return (0); 3163 } 3164 } 3165 mutex_exit(&numvifs_mutex); 3166 return (1); 3167 } 3168 3169 /* 3170 * Called by ip_snmp_get to send up multicast routing table. 3171 */ 3172 int 3173 ip_mroute_mrt(mblk_t *mp) 3174 { 3175 int i, j; 3176 struct mfc *rt; 3177 struct mfcctl mfcc; 3178 3179 /* 3180 * Make sure multicast has not been turned off. 3181 */ 3182 if (is_mrouter_off()) 3183 return (1); 3184 3185 /* Loop over all hash buckets and their chains */ 3186 for (i = 0; i < MFCTBLSIZ; i++) { 3187 MFCB_REFHOLD(&mfctable[i]); 3188 for (rt = mfctable[i].mfcb_mfc; rt; rt = rt->mfc_next) { 3189 mutex_enter(&rt->mfc_mutex); 3190 if (rt->mfc_rte != NULL || 3191 (rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 3192 mutex_exit(&rt->mfc_mutex); 3193 continue; 3194 } 3195 mfcc.mfcc_origin = rt->mfc_origin; 3196 mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp; 3197 mfcc.mfcc_parent = rt->mfc_parent; 3198 mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt; 3199 mutex_enter(&numvifs_mutex); 3200 for (j = 0; j < (int)numvifs; j++) 3201 mfcc.mfcc_ttls[j] = rt->mfc_ttls[j]; 3202 for (j = (int)numvifs; j < MAXVIFS; j++) 3203 mfcc.mfcc_ttls[j] = 0; 3204 mutex_exit(&numvifs_mutex); 3205 3206 mutex_exit(&rt->mfc_mutex); 3207 if (!snmp_append_data(mp, (char *)&mfcc, 3208 sizeof (mfcc))) { 3209 MFCB_REFRELE(&mfctable[i]); 3210 ip0dbg(("ip_mroute_mrt: failed %ld bytes\n", 3211 (size_t)sizeof (mfcc))); 3212 return (0); 3213 } 3214 } 3215 MFCB_REFRELE(&mfctable[i]); 3216 } 3217 return (1); 3218 } 3219