17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 545916cd2Sjpk * Common Development and Distribution License (the "License"). 645916cd2Sjpk * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*1eee170aSErik Nordmark * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate */ 247c478bd9Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * Procedures for the kernel part of DVMRP, 287c478bd9Sstevel@tonic-gate * a Distance-Vector Multicast Routing Protocol. 297c478bd9Sstevel@tonic-gate * (See RFC-1075) 307c478bd9Sstevel@tonic-gate * Written by David Waitzman, BBN Labs, August 1988. 317c478bd9Sstevel@tonic-gate * Modified by Steve Deering, Stanford, February 1989. 327c478bd9Sstevel@tonic-gate * Modified by Mark J. Steiglitz, Stanford, May, 1991 337c478bd9Sstevel@tonic-gate * Modified by Van Jacobson, LBL, January 1993 347c478bd9Sstevel@tonic-gate * Modified by Ajit Thyagarajan, PARC, August 1993 357c478bd9Sstevel@tonic-gate * Modified by Bill Fenner, PARC, April 1995 367c478bd9Sstevel@tonic-gate * 377c478bd9Sstevel@tonic-gate * MROUTING 3.5 387c478bd9Sstevel@tonic-gate */ 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate /* 417c478bd9Sstevel@tonic-gate * TODO 427c478bd9Sstevel@tonic-gate * - function pointer field in vif, void *vif_sendit() 437c478bd9Sstevel@tonic-gate */ 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate #include <sys/types.h> 467c478bd9Sstevel@tonic-gate #include <sys/stream.h> 477c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 487c478bd9Sstevel@tonic-gate #include <sys/strlog.h> 497c478bd9Sstevel@tonic-gate #include <sys/systm.h> 507c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 527c478bd9Sstevel@tonic-gate #include <sys/zone.h> 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate #include <sys/param.h> 557c478bd9Sstevel@tonic-gate #include <sys/socket.h> 567c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 577c478bd9Sstevel@tonic-gate #include <sys/debug.h> 587c478bd9Sstevel@tonic-gate #include <net/if.h> 597c478bd9Sstevel@tonic-gate #include <sys/sockio.h> 607c478bd9Sstevel@tonic-gate #include <netinet/in.h> 617c478bd9Sstevel@tonic-gate #include <net/if_dl.h> 627c478bd9Sstevel@tonic-gate 63bd670b35SErik Nordmark #include <inet/ipsec_impl.h> 647c478bd9Sstevel@tonic-gate #include <inet/common.h> 657c478bd9Sstevel@tonic-gate #include <inet/mi.h> 667c478bd9Sstevel@tonic-gate #include <inet/nd.h> 676e91bba0SGirish Moodalbail #include <inet/tunables.h> 687c478bd9Sstevel@tonic-gate #include <inet/mib2.h> 697c478bd9Sstevel@tonic-gate #include <netinet/ip6.h> 707c478bd9Sstevel@tonic-gate #include <inet/ip.h> 717c478bd9Sstevel@tonic-gate #include <inet/snmpcom.h> 727c478bd9Sstevel@tonic-gate 737c478bd9Sstevel@tonic-gate #include <netinet/igmp.h> 747c478bd9Sstevel@tonic-gate #include <netinet/igmp_var.h> 757c478bd9Sstevel@tonic-gate #include <netinet/udp.h> 767c478bd9Sstevel@tonic-gate #include <netinet/ip_mroute.h> 777c478bd9Sstevel@tonic-gate #include <inet/ip_multi.h> 787c478bd9Sstevel@tonic-gate #include <inet/ip_ire.h> 79bd670b35SErik Nordmark #include <inet/ip_ndp.h> 807c478bd9Sstevel@tonic-gate #include <inet/ip_if.h> 817c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h> 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate #include <netinet/pim.h> 847c478bd9Sstevel@tonic-gate 857c478bd9Sstevel@tonic-gate 867c478bd9Sstevel@tonic-gate /* 877c478bd9Sstevel@tonic-gate * MT Design: 887c478bd9Sstevel@tonic-gate * 897c478bd9Sstevel@tonic-gate * There are three main data structures viftable, mfctable and tbftable that 907c478bd9Sstevel@tonic-gate * need to be protected against MT races. 917c478bd9Sstevel@tonic-gate * 927c478bd9Sstevel@tonic-gate * vitable is a fixed length array of vif structs. There is no lock to protect 937c478bd9Sstevel@tonic-gate * the whole array, instead each struct is protected by its own indiviual lock. 947c478bd9Sstevel@tonic-gate * The value of v_marks in conjuction with the value of v_refcnt determines the 957c478bd9Sstevel@tonic-gate * current state of a vif structure. One special state that needs mention 967c478bd9Sstevel@tonic-gate * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates 977c478bd9Sstevel@tonic-gate * that vif is being initalized. 987c478bd9Sstevel@tonic-gate * Each structure is freed when the refcnt goes down to zero. If a delete comes 99bd670b35SErik Nordmark * in when the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED 1007c478bd9Sstevel@tonic-gate * which prevents the struct from further use. When the refcnt goes to zero 1017c478bd9Sstevel@tonic-gate * the struct is freed and is marked VIF_MARK_NOTINUSE. 1027c478bd9Sstevel@tonic-gate * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill 1037c478bd9Sstevel@tonic-gate * from going away a refhold is put on the ipif before using it. see 1047c478bd9Sstevel@tonic-gate * lock_good_vif() and unlock_good_vif(). 1057c478bd9Sstevel@tonic-gate * 1067c478bd9Sstevel@tonic-gate * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts 1077c478bd9Sstevel@tonic-gate * of the vif struct. 1087c478bd9Sstevel@tonic-gate * 1097c478bd9Sstevel@tonic-gate * tbftable is also a fixed length array of tbf structs and is only accessed 1107c478bd9Sstevel@tonic-gate * via v_tbf. It is protected by its own lock tbf_lock. 1117c478bd9Sstevel@tonic-gate * 1127c478bd9Sstevel@tonic-gate * Lock Ordering is 1137c478bd9Sstevel@tonic-gate * v_lock --> tbf_lock 1147c478bd9Sstevel@tonic-gate * v_lock --> ill_locK 1157c478bd9Sstevel@tonic-gate * 1167c478bd9Sstevel@tonic-gate * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb). 1177c478bd9Sstevel@tonic-gate * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker, 1187c478bd9Sstevel@tonic-gate * it also maintains a state. These fields are protected by a lock (mfcb_lock). 1197c478bd9Sstevel@tonic-gate * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to 1207c478bd9Sstevel@tonic-gate * protect the struct elements. 1217c478bd9Sstevel@tonic-gate * 1227c478bd9Sstevel@tonic-gate * mfc structs are dynamically allocated and are singly linked 1237c478bd9Sstevel@tonic-gate * at the head of the chain. When an mfc structure is to be deleted 1247c478bd9Sstevel@tonic-gate * it is marked condemned and so is the state in the bucket struct. 1257c478bd9Sstevel@tonic-gate * When the last walker of the hash bucket exits all the mfc structs 1267c478bd9Sstevel@tonic-gate * marked condemed are freed. 1277c478bd9Sstevel@tonic-gate * 1287c478bd9Sstevel@tonic-gate * Locking Hierarchy: 1297c478bd9Sstevel@tonic-gate * The bucket lock should be acquired before the mfc struct lock. 1307c478bd9Sstevel@tonic-gate * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking 1317c478bd9Sstevel@tonic-gate * operations on the bucket struct. 1327c478bd9Sstevel@tonic-gate * 1337c478bd9Sstevel@tonic-gate * last_encap_lock and numvifs_mutex should be acquired after 1347c478bd9Sstevel@tonic-gate * acquring vif or mfc locks. These locks protect some global variables. 1357c478bd9Sstevel@tonic-gate * 1367c478bd9Sstevel@tonic-gate * The statistics are not currently protected by a lock 1377c478bd9Sstevel@tonic-gate * causing the stats be be approximate, not exact. 1387c478bd9Sstevel@tonic-gate */ 1397c478bd9Sstevel@tonic-gate 1407c478bd9Sstevel@tonic-gate #define NO_VIF MAXVIFS /* from mrouted, no route for src */ 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate /* 1437c478bd9Sstevel@tonic-gate * Timeouts: 1447c478bd9Sstevel@tonic-gate * Upcall timeouts - BSD uses boolean_t mfc->expire and 1457c478bd9Sstevel@tonic-gate * nexpire[MFCTBLSIZE], the number of times expire has been called. 1467c478bd9Sstevel@tonic-gate * SunOS 5.x uses mfc->timeout for each mfc. 1477c478bd9Sstevel@tonic-gate * Some Unixes are limited in the number of simultaneous timeouts 1487c478bd9Sstevel@tonic-gate * that can be run, SunOS 5.x does not have this restriction. 1497c478bd9Sstevel@tonic-gate */ 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate /* 1527c478bd9Sstevel@tonic-gate * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and 1537c478bd9Sstevel@tonic-gate * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall 1547c478bd9Sstevel@tonic-gate * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE 1557c478bd9Sstevel@tonic-gate */ 1567c478bd9Sstevel@tonic-gate #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */ 1577c478bd9Sstevel@tonic-gate #define UPCALL_EXPIRE 6 /* number of timeouts */ 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate /* 1607c478bd9Sstevel@tonic-gate * Hash function for a source, group entry 1617c478bd9Sstevel@tonic-gate */ 1627c478bd9Sstevel@tonic-gate #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 1637c478bd9Sstevel@tonic-gate ((g) >> 20) ^ ((g) >> 10) ^ (g)) 1647c478bd9Sstevel@tonic-gate 1657c478bd9Sstevel@tonic-gate #define TBF_REPROCESS (hz / 100) /* 100x /second */ 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate /* Identify PIM packet that came on a Register interface */ 1687c478bd9Sstevel@tonic-gate #define PIM_REGISTER_MARKER 0xffffffff 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate /* Function declarations */ 171f4b3ec61Sdh155122 static int add_mfc(struct mfcctl *, ip_stack_t *); 172bd670b35SErik Nordmark static int add_vif(struct vifctl *, conn_t *, ip_stack_t *); 173f4b3ec61Sdh155122 static int del_mfc(struct mfcctl *, ip_stack_t *); 174bd670b35SErik Nordmark static int del_vif(vifi_t *, ip_stack_t *); 1757c478bd9Sstevel@tonic-gate static void del_vifp(struct vif *); 1767c478bd9Sstevel@tonic-gate static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 1777c478bd9Sstevel@tonic-gate static void expire_upcalls(void *); 178f4b3ec61Sdh155122 static void fill_route(struct mfc *, struct mfcctl *, ip_stack_t *); 179f4b3ec61Sdh155122 static void free_queue(struct mfc *); 180f4b3ec61Sdh155122 static int get_assert(uchar_t *, ip_stack_t *); 181f4b3ec61Sdh155122 static int get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *); 182f4b3ec61Sdh155122 static int get_sg_cnt(struct sioc_sg_req *, ip_stack_t *); 1837c478bd9Sstevel@tonic-gate static int get_version(uchar_t *); 184f4b3ec61Sdh155122 static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *); 1857c478bd9Sstevel@tonic-gate static int ip_mdq(mblk_t *, ipha_t *, ill_t *, 1867c478bd9Sstevel@tonic-gate ipaddr_t, struct mfc *); 187fc80c0dfSnordmark static int ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *); 1887c478bd9Sstevel@tonic-gate static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 189bd670b35SErik Nordmark static int register_mforward(mblk_t *, ip_recv_attr_t *); 1907c478bd9Sstevel@tonic-gate static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 191f4b3ec61Sdh155122 static int set_assert(int *, ip_stack_t *); 1927c478bd9Sstevel@tonic-gate 1937c478bd9Sstevel@tonic-gate /* 1947c478bd9Sstevel@tonic-gate * Token Bucket Filter functions 1957c478bd9Sstevel@tonic-gate */ 1967c478bd9Sstevel@tonic-gate static int priority(struct vif *, ipha_t *); 1977c478bd9Sstevel@tonic-gate static void tbf_control(struct vif *, mblk_t *, ipha_t *); 1987c478bd9Sstevel@tonic-gate static int tbf_dq_sel(struct vif *, ipha_t *); 1997c478bd9Sstevel@tonic-gate static void tbf_process_q(struct vif *); 2007c478bd9Sstevel@tonic-gate static void tbf_queue(struct vif *, mblk_t *); 2017c478bd9Sstevel@tonic-gate static void tbf_reprocess_q(void *); 2027c478bd9Sstevel@tonic-gate static void tbf_send_packet(struct vif *, mblk_t *); 2037c478bd9Sstevel@tonic-gate static void tbf_update_tokens(struct vif *); 2047c478bd9Sstevel@tonic-gate static void release_mfc(struct mfcb *); 2057c478bd9Sstevel@tonic-gate 206f4b3ec61Sdh155122 static boolean_t is_mrouter_off(ip_stack_t *); 2077c478bd9Sstevel@tonic-gate /* 2087c478bd9Sstevel@tonic-gate * Encapsulation packets 2097c478bd9Sstevel@tonic-gate */ 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate #define ENCAP_TTL 64 2127c478bd9Sstevel@tonic-gate 2137c478bd9Sstevel@tonic-gate /* prototype IP hdr for encapsulated packets */ 2147c478bd9Sstevel@tonic-gate static ipha_t multicast_encap_iphdr = { 2157c478bd9Sstevel@tonic-gate IP_SIMPLE_HDR_VERSION, 2167c478bd9Sstevel@tonic-gate 0, /* tos */ 2177c478bd9Sstevel@tonic-gate sizeof (ipha_t), /* total length */ 2187c478bd9Sstevel@tonic-gate 0, /* id */ 2197c478bd9Sstevel@tonic-gate 0, /* frag offset */ 2207c478bd9Sstevel@tonic-gate ENCAP_TTL, IPPROTO_ENCAP, 2217c478bd9Sstevel@tonic-gate 0, /* checksum */ 2227c478bd9Sstevel@tonic-gate }; 2237c478bd9Sstevel@tonic-gate 2247c478bd9Sstevel@tonic-gate /* 2257c478bd9Sstevel@tonic-gate * Rate limit for assert notification messages, in nsec. 2267c478bd9Sstevel@tonic-gate */ 2277c478bd9Sstevel@tonic-gate #define ASSERT_MSG_TIME 3000000000 2287c478bd9Sstevel@tonic-gate 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate #define VIF_REFHOLD(vifp) { \ 2317c478bd9Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \ 2327c478bd9Sstevel@tonic-gate (vifp)->v_refcnt++; \ 2337c478bd9Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2347c478bd9Sstevel@tonic-gate } 2357c478bd9Sstevel@tonic-gate 2367c478bd9Sstevel@tonic-gate #define VIF_REFRELE_LOCKED(vifp) { \ 2377c478bd9Sstevel@tonic-gate (vifp)->v_refcnt--; \ 2387c478bd9Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \ 2397c478bd9Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 2407c478bd9Sstevel@tonic-gate del_vifp(vifp); \ 2417c478bd9Sstevel@tonic-gate } else { \ 2427c478bd9Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2437c478bd9Sstevel@tonic-gate } \ 2447c478bd9Sstevel@tonic-gate } 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate #define VIF_REFRELE(vifp) { \ 2477c478bd9Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \ 2487c478bd9Sstevel@tonic-gate (vifp)->v_refcnt--; \ 2497c478bd9Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \ 2507c478bd9Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 2517c478bd9Sstevel@tonic-gate del_vifp(vifp); \ 2527c478bd9Sstevel@tonic-gate } else { \ 2537c478bd9Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2547c478bd9Sstevel@tonic-gate } \ 2557c478bd9Sstevel@tonic-gate } 2567c478bd9Sstevel@tonic-gate 2577c478bd9Sstevel@tonic-gate #define MFCB_REFHOLD(mfcb) { \ 2587c478bd9Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \ 2597c478bd9Sstevel@tonic-gate (mfcb)->mfcb_refcnt++; \ 2607c478bd9Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \ 2617c478bd9Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \ 2627c478bd9Sstevel@tonic-gate } 2637c478bd9Sstevel@tonic-gate 2647c478bd9Sstevel@tonic-gate #define MFCB_REFRELE(mfcb) { \ 2657c478bd9Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \ 2667c478bd9Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \ 2677c478bd9Sstevel@tonic-gate if (--(mfcb)->mfcb_refcnt == 0 && \ 2687c478bd9Sstevel@tonic-gate ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \ 2697c478bd9Sstevel@tonic-gate release_mfc(mfcb); \ 2707c478bd9Sstevel@tonic-gate } \ 2717c478bd9Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \ 2727c478bd9Sstevel@tonic-gate } 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate /* 2757c478bd9Sstevel@tonic-gate * MFCFIND: 2767c478bd9Sstevel@tonic-gate * Find a route for a given origin IP address and multicast group address. 2777c478bd9Sstevel@tonic-gate * Skip entries with pending upcalls. 2787c478bd9Sstevel@tonic-gate * Type of service parameter to be added in the future! 2797c478bd9Sstevel@tonic-gate */ 2807c478bd9Sstevel@tonic-gate #define MFCFIND(mfcbp, o, g, rt) { \ 2817c478bd9Sstevel@tonic-gate struct mfc *_mb_rt = NULL; \ 2827c478bd9Sstevel@tonic-gate rt = NULL; \ 2837c478bd9Sstevel@tonic-gate _mb_rt = mfcbp->mfcb_mfc; \ 2847c478bd9Sstevel@tonic-gate while (_mb_rt) { \ 2857c478bd9Sstevel@tonic-gate if ((_mb_rt->mfc_origin.s_addr == o) && \ 2867c478bd9Sstevel@tonic-gate (_mb_rt->mfc_mcastgrp.s_addr == g) && \ 2877c478bd9Sstevel@tonic-gate (_mb_rt->mfc_rte == NULL) && \ 2887c478bd9Sstevel@tonic-gate (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \ 2897c478bd9Sstevel@tonic-gate rt = _mb_rt; \ 2907c478bd9Sstevel@tonic-gate break; \ 2917c478bd9Sstevel@tonic-gate } \ 2927c478bd9Sstevel@tonic-gate _mb_rt = _mb_rt->mfc_next; \ 2937c478bd9Sstevel@tonic-gate } \ 2947c478bd9Sstevel@tonic-gate } 2957c478bd9Sstevel@tonic-gate 2967c478bd9Sstevel@tonic-gate /* 2977c478bd9Sstevel@tonic-gate * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime() 2987c478bd9Sstevel@tonic-gate * are inefficient. We use gethrestime() which returns a timespec_t with 2997c478bd9Sstevel@tonic-gate * sec and nsec, the resolution is machine dependent. 3007c478bd9Sstevel@tonic-gate * The following 2 macros have been changed to use nsec instead of usec. 3017c478bd9Sstevel@tonic-gate */ 3027c478bd9Sstevel@tonic-gate /* 3037c478bd9Sstevel@tonic-gate * Macros to compute elapsed time efficiently. 3047c478bd9Sstevel@tonic-gate * Borrowed from Van Jacobson's scheduling code. 3057c478bd9Sstevel@tonic-gate * Delta should be a hrtime_t. 3067c478bd9Sstevel@tonic-gate */ 3077c478bd9Sstevel@tonic-gate #define TV_DELTA(a, b, delta) { \ 3087c478bd9Sstevel@tonic-gate int xxs; \ 3097c478bd9Sstevel@tonic-gate \ 3107c478bd9Sstevel@tonic-gate delta = (a).tv_nsec - (b).tv_nsec; \ 3117c478bd9Sstevel@tonic-gate if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 3127c478bd9Sstevel@tonic-gate switch (xxs) { \ 3137c478bd9Sstevel@tonic-gate case 2: \ 3147c478bd9Sstevel@tonic-gate delta += 1000000000; \ 3157c478bd9Sstevel@tonic-gate /*FALLTHROUGH*/ \ 3167c478bd9Sstevel@tonic-gate case 1: \ 3177c478bd9Sstevel@tonic-gate delta += 1000000000; \ 3187c478bd9Sstevel@tonic-gate break; \ 3197c478bd9Sstevel@tonic-gate default: \ 3207c478bd9Sstevel@tonic-gate delta += (1000000000 * xxs); \ 3217c478bd9Sstevel@tonic-gate } \ 3227c478bd9Sstevel@tonic-gate } \ 3237c478bd9Sstevel@tonic-gate } 3247c478bd9Sstevel@tonic-gate 3257c478bd9Sstevel@tonic-gate #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \ 3267c478bd9Sstevel@tonic-gate (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate /* 3297c478bd9Sstevel@tonic-gate * Handle MRT setsockopt commands to modify the multicast routing tables. 3307c478bd9Sstevel@tonic-gate */ 3317c478bd9Sstevel@tonic-gate int 332bd670b35SErik Nordmark ip_mrouter_set(int cmd, conn_t *connp, int checkonly, uchar_t *data, 333bd670b35SErik Nordmark int datalen) 3347c478bd9Sstevel@tonic-gate { 335fc80c0dfSnordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 336f4b3ec61Sdh155122 337f4b3ec61Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 338fc80c0dfSnordmark if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) { 339f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 3407c478bd9Sstevel@tonic-gate return (EACCES); 3417c478bd9Sstevel@tonic-gate } 342f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate if (checkonly) { 3457c478bd9Sstevel@tonic-gate /* 3467c478bd9Sstevel@tonic-gate * do not do operation, just pretend to - new T_CHECK 3477c478bd9Sstevel@tonic-gate * Note: Even routines further on can probably fail but 3487c478bd9Sstevel@tonic-gate * this T_CHECK stuff is only to please XTI so it not 3497c478bd9Sstevel@tonic-gate * necessary to be perfect. 3507c478bd9Sstevel@tonic-gate */ 3517c478bd9Sstevel@tonic-gate switch (cmd) { 3527c478bd9Sstevel@tonic-gate case MRT_INIT: 3537c478bd9Sstevel@tonic-gate case MRT_DONE: 3547c478bd9Sstevel@tonic-gate case MRT_ADD_VIF: 3557c478bd9Sstevel@tonic-gate case MRT_DEL_VIF: 3567c478bd9Sstevel@tonic-gate case MRT_ADD_MFC: 3577c478bd9Sstevel@tonic-gate case MRT_DEL_MFC: 3587c478bd9Sstevel@tonic-gate case MRT_ASSERT: 3597c478bd9Sstevel@tonic-gate return (0); 3607c478bd9Sstevel@tonic-gate default: 3617c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 3627c478bd9Sstevel@tonic-gate } 3637c478bd9Sstevel@tonic-gate } 3647c478bd9Sstevel@tonic-gate 3657c478bd9Sstevel@tonic-gate /* 3667c478bd9Sstevel@tonic-gate * make sure no command is issued after multicast routing has been 3677c478bd9Sstevel@tonic-gate * turned off. 3687c478bd9Sstevel@tonic-gate */ 3697c478bd9Sstevel@tonic-gate if (cmd != MRT_INIT && cmd != MRT_DONE) { 370f4b3ec61Sdh155122 if (is_mrouter_off(ipst)) 3717c478bd9Sstevel@tonic-gate return (EINVAL); 3727c478bd9Sstevel@tonic-gate } 3737c478bd9Sstevel@tonic-gate 3747c478bd9Sstevel@tonic-gate switch (cmd) { 375fc80c0dfSnordmark case MRT_INIT: return (ip_mrouter_init(connp, data, datalen, ipst)); 376bd670b35SErik Nordmark case MRT_DONE: return (ip_mrouter_done(ipst)); 377bd670b35SErik Nordmark case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, connp, ipst)); 378bd670b35SErik Nordmark case MRT_DEL_VIF: return (del_vif((vifi_t *)data, ipst)); 379f4b3ec61Sdh155122 case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst)); 380f4b3ec61Sdh155122 case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst)); 381f4b3ec61Sdh155122 case MRT_ASSERT: return (set_assert((int *)data, ipst)); 3827c478bd9Sstevel@tonic-gate default: return (EOPNOTSUPP); 3837c478bd9Sstevel@tonic-gate } 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate 3867c478bd9Sstevel@tonic-gate /* 3877c478bd9Sstevel@tonic-gate * Handle MRT getsockopt commands 3887c478bd9Sstevel@tonic-gate */ 3897c478bd9Sstevel@tonic-gate int 390bd670b35SErik Nordmark ip_mrouter_get(int cmd, conn_t *connp, uchar_t *data) 3917c478bd9Sstevel@tonic-gate { 392fc80c0dfSnordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 393f4b3ec61Sdh155122 394fc80c0dfSnordmark if (connp != ipst->ips_ip_g_mrouter) 3957c478bd9Sstevel@tonic-gate return (EACCES); 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate switch (cmd) { 3987c478bd9Sstevel@tonic-gate case MRT_VERSION: return (get_version((uchar_t *)data)); 399f4b3ec61Sdh155122 case MRT_ASSERT: return (get_assert((uchar_t *)data, ipst)); 4007c478bd9Sstevel@tonic-gate default: return (EOPNOTSUPP); 4017c478bd9Sstevel@tonic-gate } 4027c478bd9Sstevel@tonic-gate } 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate /* 4057c478bd9Sstevel@tonic-gate * Handle ioctl commands to obtain information from the cache. 4067c478bd9Sstevel@tonic-gate * Called with shared access to IP. These are read_only ioctls. 4077c478bd9Sstevel@tonic-gate */ 4087c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4097c478bd9Sstevel@tonic-gate int 4107c478bd9Sstevel@tonic-gate mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, 4117c478bd9Sstevel@tonic-gate ip_ioctl_cmd_t *ipip, void *if_req) 4127c478bd9Sstevel@tonic-gate { 4137c478bd9Sstevel@tonic-gate mblk_t *mp1; 4147c478bd9Sstevel@tonic-gate struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 415fc80c0dfSnordmark conn_t *connp = Q_TO_CONN(q); 416fc80c0dfSnordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate /* Existence verified in ip_wput_nondata */ 4197c478bd9Sstevel@tonic-gate mp1 = mp->b_cont->b_cont; 4207c478bd9Sstevel@tonic-gate 4217c478bd9Sstevel@tonic-gate switch (iocp->ioc_cmd) { 4227c478bd9Sstevel@tonic-gate case (SIOCGETVIFCNT): 423f4b3ec61Sdh155122 return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst)); 4247c478bd9Sstevel@tonic-gate case (SIOCGETSGCNT): 425f4b3ec61Sdh155122 return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst)); 4267c478bd9Sstevel@tonic-gate case (SIOCGETLSGCNT): 427f4b3ec61Sdh155122 return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst)); 4287c478bd9Sstevel@tonic-gate default: 4297c478bd9Sstevel@tonic-gate return (EINVAL); 4307c478bd9Sstevel@tonic-gate } 4317c478bd9Sstevel@tonic-gate } 4327c478bd9Sstevel@tonic-gate 4337c478bd9Sstevel@tonic-gate /* 4347c478bd9Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided. 4357c478bd9Sstevel@tonic-gate */ 4367c478bd9Sstevel@tonic-gate static int 437f4b3ec61Sdh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst) 4387c478bd9Sstevel@tonic-gate { 4397c478bd9Sstevel@tonic-gate struct mfc *rt; 4407c478bd9Sstevel@tonic-gate struct mfcb *mfcbp; 4417c478bd9Sstevel@tonic-gate 442f4b3ec61Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)]; 4437c478bd9Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 4447c478bd9Sstevel@tonic-gate MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt); 4457c478bd9Sstevel@tonic-gate 4467c478bd9Sstevel@tonic-gate if (rt != NULL) { 4477c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 4487c478bd9Sstevel@tonic-gate req->pktcnt = rt->mfc_pkt_cnt; 4497c478bd9Sstevel@tonic-gate req->bytecnt = rt->mfc_byte_cnt; 4507c478bd9Sstevel@tonic-gate req->wrong_if = rt->mfc_wrong_if; 4517c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 4527c478bd9Sstevel@tonic-gate } else 4537c478bd9Sstevel@tonic-gate req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU; 4547c478bd9Sstevel@tonic-gate 4557c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 4567c478bd9Sstevel@tonic-gate return (0); 4577c478bd9Sstevel@tonic-gate } 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate /* 4607c478bd9Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided. 4617c478bd9Sstevel@tonic-gate * Uses larger counters and IPv6 addresses. 4627c478bd9Sstevel@tonic-gate */ 4637c478bd9Sstevel@tonic-gate /* ARGSUSED XXX until implemented */ 4647c478bd9Sstevel@tonic-gate static int 465f4b3ec61Sdh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst) 4667c478bd9Sstevel@tonic-gate { 4677c478bd9Sstevel@tonic-gate /* XXX TODO SIOCGETLSGCNT */ 4687c478bd9Sstevel@tonic-gate return (ENXIO); 4697c478bd9Sstevel@tonic-gate } 4707c478bd9Sstevel@tonic-gate 4717c478bd9Sstevel@tonic-gate /* 4727c478bd9Sstevel@tonic-gate * Returns the input and output packet and byte counts on the vif provided. 4737c478bd9Sstevel@tonic-gate */ 4747c478bd9Sstevel@tonic-gate static int 475f4b3ec61Sdh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst) 4767c478bd9Sstevel@tonic-gate { 4777c478bd9Sstevel@tonic-gate vifi_t vifi = req->vifi; 4787c478bd9Sstevel@tonic-gate 479f4b3ec61Sdh155122 if (vifi >= ipst->ips_numvifs) 4807c478bd9Sstevel@tonic-gate return (EINVAL); 4817c478bd9Sstevel@tonic-gate 4827c478bd9Sstevel@tonic-gate /* 4837c478bd9Sstevel@tonic-gate * No locks here, an approximation is fine. 4847c478bd9Sstevel@tonic-gate */ 485f4b3ec61Sdh155122 req->icount = ipst->ips_vifs[vifi].v_pkt_in; 486f4b3ec61Sdh155122 req->ocount = ipst->ips_vifs[vifi].v_pkt_out; 487f4b3ec61Sdh155122 req->ibytes = ipst->ips_vifs[vifi].v_bytes_in; 488f4b3ec61Sdh155122 req->obytes = ipst->ips_vifs[vifi].v_bytes_out; 4897c478bd9Sstevel@tonic-gate 4907c478bd9Sstevel@tonic-gate return (0); 4917c478bd9Sstevel@tonic-gate } 4927c478bd9Sstevel@tonic-gate 4937c478bd9Sstevel@tonic-gate static int 4947c478bd9Sstevel@tonic-gate get_version(uchar_t *data) 4957c478bd9Sstevel@tonic-gate { 4967c478bd9Sstevel@tonic-gate int *v = (int *)data; 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate *v = 0x0305; /* XXX !!!! */ 4997c478bd9Sstevel@tonic-gate 5007c478bd9Sstevel@tonic-gate return (0); 5017c478bd9Sstevel@tonic-gate } 5027c478bd9Sstevel@tonic-gate 5037c478bd9Sstevel@tonic-gate /* 5047c478bd9Sstevel@tonic-gate * Set PIM assert processing global. 5057c478bd9Sstevel@tonic-gate */ 5067c478bd9Sstevel@tonic-gate static int 507f4b3ec61Sdh155122 set_assert(int *i, ip_stack_t *ipst) 5087c478bd9Sstevel@tonic-gate { 5097c478bd9Sstevel@tonic-gate if ((*i != 1) && (*i != 0)) 5107c478bd9Sstevel@tonic-gate return (EINVAL); 5117c478bd9Sstevel@tonic-gate 512f4b3ec61Sdh155122 ipst->ips_pim_assert = *i; 5137c478bd9Sstevel@tonic-gate 5147c478bd9Sstevel@tonic-gate return (0); 5157c478bd9Sstevel@tonic-gate } 5167c478bd9Sstevel@tonic-gate 5177c478bd9Sstevel@tonic-gate /* 5187c478bd9Sstevel@tonic-gate * Get PIM assert processing global. 5197c478bd9Sstevel@tonic-gate */ 5207c478bd9Sstevel@tonic-gate static int 521f4b3ec61Sdh155122 get_assert(uchar_t *data, ip_stack_t *ipst) 5227c478bd9Sstevel@tonic-gate { 5237c478bd9Sstevel@tonic-gate int *i = (int *)data; 5247c478bd9Sstevel@tonic-gate 525f4b3ec61Sdh155122 *i = ipst->ips_pim_assert; 5267c478bd9Sstevel@tonic-gate 5277c478bd9Sstevel@tonic-gate return (0); 5287c478bd9Sstevel@tonic-gate } 5297c478bd9Sstevel@tonic-gate 5307c478bd9Sstevel@tonic-gate /* 5317c478bd9Sstevel@tonic-gate * Enable multicast routing. 5327c478bd9Sstevel@tonic-gate */ 5337c478bd9Sstevel@tonic-gate static int 534fc80c0dfSnordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst) 5357c478bd9Sstevel@tonic-gate { 5367c478bd9Sstevel@tonic-gate int *v; 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate if (data == NULL || (datalen != sizeof (int))) 5397c478bd9Sstevel@tonic-gate return (ENOPROTOOPT); 5407c478bd9Sstevel@tonic-gate 5417c478bd9Sstevel@tonic-gate v = (int *)data; 5427c478bd9Sstevel@tonic-gate if (*v != 1) 5437c478bd9Sstevel@tonic-gate return (ENOPROTOOPT); 5447c478bd9Sstevel@tonic-gate 545f4b3ec61Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 546f4b3ec61Sdh155122 if (ipst->ips_ip_g_mrouter != NULL) { 547f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5487c478bd9Sstevel@tonic-gate return (EADDRINUSE); 5497c478bd9Sstevel@tonic-gate } 5507c478bd9Sstevel@tonic-gate 551fc80c0dfSnordmark /* 552fc80c0dfSnordmark * MRT_INIT should only be allowed for RAW sockets, but we double 553fc80c0dfSnordmark * check. 554fc80c0dfSnordmark */ 555fc80c0dfSnordmark if (!IPCL_IS_RAWIP(connp)) { 556fc80c0dfSnordmark mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 557fc80c0dfSnordmark return (EINVAL); 558fc80c0dfSnordmark } 559fc80c0dfSnordmark 560fc80c0dfSnordmark ipst->ips_ip_g_mrouter = connp; 5617c478bd9Sstevel@tonic-gate connp->conn_multi_router = 1; 5627c478bd9Sstevel@tonic-gate /* In order for tunnels to work we have to turn ip_g_forward on */ 563f4b3ec61Sdh155122 if (!WE_ARE_FORWARDING(ipst)) { 564f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 565fc80c0dfSnordmark (void) mi_strlog(connp->conn_rq, 1, SL_TRACE, 5667c478bd9Sstevel@tonic-gate "ip_mrouter_init: turning on forwarding"); 5677c478bd9Sstevel@tonic-gate } 5686e91bba0SGirish Moodalbail ipst->ips_saved_ip_forwarding = ipst->ips_ip_forwarding; 5696e91bba0SGirish Moodalbail ipst->ips_ip_forwarding = IP_FORWARD_ALWAYS; 5707c478bd9Sstevel@tonic-gate } 5717c478bd9Sstevel@tonic-gate 572f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5737c478bd9Sstevel@tonic-gate return (0); 5747c478bd9Sstevel@tonic-gate } 5757c478bd9Sstevel@tonic-gate 576f4b3ec61Sdh155122 void 577f4b3ec61Sdh155122 ip_mrouter_stack_init(ip_stack_t *ipst) 578f4b3ec61Sdh155122 { 579f4b3ec61Sdh155122 mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL); 580f4b3ec61Sdh155122 581f4b3ec61Sdh155122 ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1), 582f4b3ec61Sdh155122 KM_SLEEP); 583f4b3ec61Sdh155122 ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP); 584f4b3ec61Sdh155122 /* 585f4b3ec61Sdh155122 * mfctable: 586f4b3ec61Sdh155122 * Includes all mfcs, including waiting upcalls. 587f4b3ec61Sdh155122 * Multiple mfcs per bucket. 588f4b3ec61Sdh155122 */ 589f4b3ec61Sdh155122 ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ, 590f4b3ec61Sdh155122 KM_SLEEP); 591f4b3ec61Sdh155122 /* 592f4b3ec61Sdh155122 * Define the token bucket filter structures. 593f4b3ec61Sdh155122 * tbftable -> each vif has one of these for storing info. 594f4b3ec61Sdh155122 */ 595f4b3ec61Sdh155122 ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP); 596f4b3ec61Sdh155122 597f4b3ec61Sdh155122 mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL); 598f4b3ec61Sdh155122 599f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); 600f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); 601f4b3ec61Sdh155122 } 602f4b3ec61Sdh155122 6037c478bd9Sstevel@tonic-gate /* 6047c478bd9Sstevel@tonic-gate * Disable multicast routing. 6057c478bd9Sstevel@tonic-gate * Didn't use global timeout_val (BSD version), instead check the mfctable. 6067c478bd9Sstevel@tonic-gate */ 6077c478bd9Sstevel@tonic-gate int 608bd670b35SErik Nordmark ip_mrouter_done(ip_stack_t *ipst) 6097c478bd9Sstevel@tonic-gate { 610fc80c0dfSnordmark conn_t *mrouter; 6117c478bd9Sstevel@tonic-gate vifi_t vifi; 6127c478bd9Sstevel@tonic-gate struct mfc *mfc_rt; 6137c478bd9Sstevel@tonic-gate int i; 6147c478bd9Sstevel@tonic-gate 615f4b3ec61Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 616f4b3ec61Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) { 617f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 6187c478bd9Sstevel@tonic-gate return (EINVAL); 6197c478bd9Sstevel@tonic-gate } 6207c478bd9Sstevel@tonic-gate 621fc80c0dfSnordmark mrouter = ipst->ips_ip_g_mrouter; 6227c478bd9Sstevel@tonic-gate 6236e91bba0SGirish Moodalbail if (ipst->ips_saved_ip_forwarding != -1) { 624f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 625fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 6267c478bd9Sstevel@tonic-gate "ip_mrouter_done: turning off forwarding"); 6277c478bd9Sstevel@tonic-gate } 6286e91bba0SGirish Moodalbail ipst->ips_ip_forwarding = ipst->ips_saved_ip_forwarding; 6296e91bba0SGirish Moodalbail ipst->ips_saved_ip_forwarding = -1; 6307c478bd9Sstevel@tonic-gate } 6317c478bd9Sstevel@tonic-gate 6327c478bd9Sstevel@tonic-gate /* 6337c478bd9Sstevel@tonic-gate * Always clear cache when vifs change. 634f4b3ec61Sdh155122 * No need to get ipst->ips_last_encap_lock since we are running as 635f4b3ec61Sdh155122 * a writer. 6367c478bd9Sstevel@tonic-gate */ 637f4b3ec61Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 638f4b3ec61Sdh155122 ipst->ips_last_encap_src = 0; 639f4b3ec61Sdh155122 ipst->ips_last_encap_vif = NULL; 640f4b3ec61Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 641fc80c0dfSnordmark mrouter->conn_multi_router = 0; 6427c478bd9Sstevel@tonic-gate 643f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 6447c478bd9Sstevel@tonic-gate 6457c478bd9Sstevel@tonic-gate /* 6467c478bd9Sstevel@tonic-gate * For each phyint in use, 6477c478bd9Sstevel@tonic-gate * disable promiscuous reception of all IP multicasts. 6487c478bd9Sstevel@tonic-gate */ 6497c478bd9Sstevel@tonic-gate for (vifi = 0; vifi < MAXVIFS; vifi++) { 650f4b3ec61Sdh155122 struct vif *vifp = ipst->ips_vifs + vifi; 6517c478bd9Sstevel@tonic-gate 6527c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 6537c478bd9Sstevel@tonic-gate /* 6547c478bd9Sstevel@tonic-gate * if the vif is active mark it condemned. 6557c478bd9Sstevel@tonic-gate */ 6567c478bd9Sstevel@tonic-gate if (vifp->v_marks & VIF_MARK_GOOD) { 6577c478bd9Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 6587c478bd9Sstevel@tonic-gate ipif_refhold(vifp->v_ipif); 6597c478bd9Sstevel@tonic-gate /* Phyint only */ 6607c478bd9Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 6617c478bd9Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif; 662bd670b35SErik Nordmark ilm_t *ilm = vifp->v_ilm; 6637c478bd9Sstevel@tonic-gate 664bd670b35SErik Nordmark vifp->v_ilm = NULL; 6657c478bd9Sstevel@tonic-gate vifp->v_marks &= ~VIF_MARK_GOOD; 6667c478bd9Sstevel@tonic-gate vifp->v_marks |= VIF_MARK_CONDEMNED; 6677c478bd9Sstevel@tonic-gate 668bd670b35SErik Nordmark mutex_exit(&(vifp)->v_lock); 669bd670b35SErik Nordmark if (ilm != NULL) { 670bd670b35SErik Nordmark ill_t *ill = ipif->ipif_ill; 671bd670b35SErik Nordmark 672bd670b35SErik Nordmark (void) ip_delmulti(ilm); 673bd670b35SErik Nordmark ASSERT(ill->ill_mrouter_cnt > 0); 674bd670b35SErik Nordmark atomic_dec_32(&ill->ill_mrouter_cnt); 6757c478bd9Sstevel@tonic-gate } 6767c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 6777c478bd9Sstevel@tonic-gate } 6787ba7860fSErik Nordmark ipif_refrele(vifp->v_ipif); 6797c478bd9Sstevel@tonic-gate /* 6807c478bd9Sstevel@tonic-gate * decreases the refcnt added in add_vif. 6817c478bd9Sstevel@tonic-gate * and release v_lock. 6827c478bd9Sstevel@tonic-gate */ 6837c478bd9Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 6847c478bd9Sstevel@tonic-gate } else { 6857c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 6867c478bd9Sstevel@tonic-gate continue; 6877c478bd9Sstevel@tonic-gate } 6887c478bd9Sstevel@tonic-gate } 6897c478bd9Sstevel@tonic-gate 690f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 691f4b3ec61Sdh155122 ipst->ips_numvifs = 0; 692f4b3ec61Sdh155122 ipst->ips_pim_assert = 0; 693f4b3ec61Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 694f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 6957c478bd9Sstevel@tonic-gate 6967c478bd9Sstevel@tonic-gate /* 6977c478bd9Sstevel@tonic-gate * Free upcall msgs. 6987c478bd9Sstevel@tonic-gate * Go through mfctable and stop any outstanding upcall 6997c478bd9Sstevel@tonic-gate * timeouts remaining on mfcs. 7007c478bd9Sstevel@tonic-gate */ 7017c478bd9Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 702f4b3ec61Sdh155122 mutex_enter(&ipst->ips_mfcs[i].mfcb_lock); 703f4b3ec61Sdh155122 ipst->ips_mfcs[i].mfcb_refcnt++; 704f4b3ec61Sdh155122 ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED; 705f4b3ec61Sdh155122 mutex_exit(&ipst->ips_mfcs[i].mfcb_lock); 706f4b3ec61Sdh155122 mfc_rt = ipst->ips_mfcs[i].mfcb_mfc; 7077c478bd9Sstevel@tonic-gate while (mfc_rt) { 7087c478bd9Sstevel@tonic-gate /* Free upcalls */ 7097c478bd9Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 7107c478bd9Sstevel@tonic-gate if (mfc_rt->mfc_rte != NULL) { 7117c478bd9Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id != 0) { 7127c478bd9Sstevel@tonic-gate /* 7137c478bd9Sstevel@tonic-gate * OK to drop the lock as we have 7147c478bd9Sstevel@tonic-gate * a refcnt on the bucket. timeout 7157c478bd9Sstevel@tonic-gate * can fire but it will see that 7167c478bd9Sstevel@tonic-gate * mfc_timeout_id == 0 and not do 7177c478bd9Sstevel@tonic-gate * anything. see expire_upcalls(). 7187c478bd9Sstevel@tonic-gate */ 7197c478bd9Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 7207c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 7217c478bd9Sstevel@tonic-gate (void) untimeout( 7227c478bd9Sstevel@tonic-gate mfc_rt->mfc_timeout_id); 7237c478bd9Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 7247c478bd9Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate /* 7277c478bd9Sstevel@tonic-gate * all queued upcall packets 7287c478bd9Sstevel@tonic-gate * and mblk will be freed in 7297c478bd9Sstevel@tonic-gate * release_mfc(). 7307c478bd9Sstevel@tonic-gate */ 7317c478bd9Sstevel@tonic-gate } 7327c478bd9Sstevel@tonic-gate } 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 7357c478bd9Sstevel@tonic-gate 7367c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 7377c478bd9Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next; 7387c478bd9Sstevel@tonic-gate } 739f4b3ec61Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 7407c478bd9Sstevel@tonic-gate } 7417c478bd9Sstevel@tonic-gate 742f4b3ec61Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 743f4b3ec61Sdh155122 ipst->ips_ip_g_mrouter = NULL; 744f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7457c478bd9Sstevel@tonic-gate return (0); 7467c478bd9Sstevel@tonic-gate } 7477c478bd9Sstevel@tonic-gate 748f4b3ec61Sdh155122 void 749f4b3ec61Sdh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst) 750f4b3ec61Sdh155122 { 751f4b3ec61Sdh155122 struct mfcb *mfcbp; 752f4b3ec61Sdh155122 struct mfc *rt; 753f4b3ec61Sdh155122 int i; 754f4b3ec61Sdh155122 755f4b3ec61Sdh155122 for (i = 0; i < MFCTBLSIZ; i++) { 756f4b3ec61Sdh155122 mfcbp = &ipst->ips_mfcs[i]; 757f4b3ec61Sdh155122 758f4b3ec61Sdh155122 while ((rt = mfcbp->mfcb_mfc) != NULL) { 759f4b3ec61Sdh155122 (void) printf("ip_mrouter_stack_destroy: free for %d\n", 760f4b3ec61Sdh155122 i); 761f4b3ec61Sdh155122 762f4b3ec61Sdh155122 mfcbp->mfcb_mfc = rt->mfc_next; 763f4b3ec61Sdh155122 free_queue(rt); 764f4b3ec61Sdh155122 mi_free(rt); 765f4b3ec61Sdh155122 } 766f4b3ec61Sdh155122 } 767f4b3ec61Sdh155122 kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1)); 768f4b3ec61Sdh155122 ipst->ips_vifs = NULL; 769f4b3ec61Sdh155122 kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat)); 770f4b3ec61Sdh155122 ipst->ips_mrtstat = NULL; 771f4b3ec61Sdh155122 kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ); 772f4b3ec61Sdh155122 ipst->ips_mfcs = NULL; 773f4b3ec61Sdh155122 kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS); 774f4b3ec61Sdh155122 ipst->ips_tbfs = NULL; 775f4b3ec61Sdh155122 776f4b3ec61Sdh155122 mutex_destroy(&ipst->ips_last_encap_lock); 777f4b3ec61Sdh155122 mutex_destroy(&ipst->ips_ip_g_mrouter_mutex); 778f4b3ec61Sdh155122 } 779f4b3ec61Sdh155122 7807c478bd9Sstevel@tonic-gate static boolean_t 781f4b3ec61Sdh155122 is_mrouter_off(ip_stack_t *ipst) 7827c478bd9Sstevel@tonic-gate { 783fc80c0dfSnordmark conn_t *mrouter; 7847c478bd9Sstevel@tonic-gate 785f4b3ec61Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 786f4b3ec61Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) { 787f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7887c478bd9Sstevel@tonic-gate return (B_TRUE); 7897c478bd9Sstevel@tonic-gate } 7907c478bd9Sstevel@tonic-gate 791fc80c0dfSnordmark mrouter = ipst->ips_ip_g_mrouter; 792fc80c0dfSnordmark if (mrouter->conn_multi_router == 0) { 793f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7947c478bd9Sstevel@tonic-gate return (B_TRUE); 7957c478bd9Sstevel@tonic-gate } 796f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7977c478bd9Sstevel@tonic-gate return (B_FALSE); 7987c478bd9Sstevel@tonic-gate } 7997c478bd9Sstevel@tonic-gate 8007c478bd9Sstevel@tonic-gate static void 8017c478bd9Sstevel@tonic-gate unlock_good_vif(struct vif *vifp) 8027c478bd9Sstevel@tonic-gate { 8037c478bd9Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 8047c478bd9Sstevel@tonic-gate ipif_refrele(vifp->v_ipif); 8057c478bd9Sstevel@tonic-gate VIF_REFRELE(vifp); 8067c478bd9Sstevel@tonic-gate } 8077c478bd9Sstevel@tonic-gate 8087c478bd9Sstevel@tonic-gate static boolean_t 8097c478bd9Sstevel@tonic-gate lock_good_vif(struct vif *vifp) 8107c478bd9Sstevel@tonic-gate { 8117c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8127c478bd9Sstevel@tonic-gate if (!(vifp->v_marks & VIF_MARK_GOOD)) { 8137c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8147c478bd9Sstevel@tonic-gate return (B_FALSE); 8157c478bd9Sstevel@tonic-gate } 8167c478bd9Sstevel@tonic-gate 8177c478bd9Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 8187c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock); 8197c478bd9Sstevel@tonic-gate if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) { 8207c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 8217c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8227c478bd9Sstevel@tonic-gate return (B_FALSE); 8237c478bd9Sstevel@tonic-gate } 8247c478bd9Sstevel@tonic-gate ipif_refhold_locked(vifp->v_ipif); 8257c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 8267c478bd9Sstevel@tonic-gate vifp->v_refcnt++; 8277c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8287c478bd9Sstevel@tonic-gate return (B_TRUE); 8297c478bd9Sstevel@tonic-gate } 8307c478bd9Sstevel@tonic-gate 8317c478bd9Sstevel@tonic-gate /* 8327c478bd9Sstevel@tonic-gate * Add a vif to the vif table. 8337c478bd9Sstevel@tonic-gate */ 8347c478bd9Sstevel@tonic-gate static int 835bd670b35SErik Nordmark add_vif(struct vifctl *vifcp, conn_t *connp, ip_stack_t *ipst) 8367c478bd9Sstevel@tonic-gate { 837f4b3ec61Sdh155122 struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi; 8387c478bd9Sstevel@tonic-gate ipif_t *ipif; 839bd670b35SErik Nordmark int error = 0; 840f4b3ec61Sdh155122 struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi; 841fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 842bd670b35SErik Nordmark ilm_t *ilm; 843bd670b35SErik Nordmark ill_t *ill; 8447c478bd9Sstevel@tonic-gate 8457c478bd9Sstevel@tonic-gate ASSERT(connp != NULL); 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate if (vifcp->vifc_vifi >= MAXVIFS) 8487c478bd9Sstevel@tonic-gate return (EINVAL); 8497c478bd9Sstevel@tonic-gate 850f4b3ec61Sdh155122 if (is_mrouter_off(ipst)) 8517c478bd9Sstevel@tonic-gate return (EINVAL); 8527c478bd9Sstevel@tonic-gate 8537c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8547c478bd9Sstevel@tonic-gate /* 8557c478bd9Sstevel@tonic-gate * Viftable entry should be 0. 8567c478bd9Sstevel@tonic-gate * if v_marks == 0 but v_refcnt != 0 means struct is being 8577c478bd9Sstevel@tonic-gate * initialized. 8587c478bd9Sstevel@tonic-gate * 8597c478bd9Sstevel@tonic-gate * Also note that it is very unlikely that we will get a MRT_ADD_VIF 8607c478bd9Sstevel@tonic-gate * request while the delete is in progress, mrouted only sends add 8617c478bd9Sstevel@tonic-gate * requests when a new interface is added and the new interface cannot 8627c478bd9Sstevel@tonic-gate * have the same vifi as an existing interface. We make sure that 8637c478bd9Sstevel@tonic-gate * ill_delete will block till the vif is deleted by adding a refcnt 8647c478bd9Sstevel@tonic-gate * to ipif in del_vif(). 8657c478bd9Sstevel@tonic-gate */ 8667c478bd9Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr != 0 || 8677c478bd9Sstevel@tonic-gate vifp->v_marks != 0 || 8687c478bd9Sstevel@tonic-gate vifp->v_refcnt != 0) { 8697c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8707c478bd9Sstevel@tonic-gate return (EADDRINUSE); 8717c478bd9Sstevel@tonic-gate } 8727c478bd9Sstevel@tonic-gate 8737c478bd9Sstevel@tonic-gate /* Incoming vif should not be 0 */ 8747c478bd9Sstevel@tonic-gate if (vifcp->vifc_lcl_addr.s_addr == 0) { 8757c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8767c478bd9Sstevel@tonic-gate return (EINVAL); 8777c478bd9Sstevel@tonic-gate } 8787c478bd9Sstevel@tonic-gate 8797c478bd9Sstevel@tonic-gate vifp->v_refcnt++; 8807c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8817c478bd9Sstevel@tonic-gate /* Find the interface with the local address */ 8827c478bd9Sstevel@tonic-gate ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL, 883bd670b35SErik Nordmark IPCL_ZONEID(connp), ipst); 8847c478bd9Sstevel@tonic-gate if (ipif == NULL) { 8857c478bd9Sstevel@tonic-gate VIF_REFRELE(vifp); 8867c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL); 8877c478bd9Sstevel@tonic-gate } 8887c478bd9Sstevel@tonic-gate 889f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 890fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 8917c478bd9Sstevel@tonic-gate "add_vif: src 0x%x enter", 8927c478bd9Sstevel@tonic-gate vifcp->vifc_lcl_addr.s_addr); 8937c478bd9Sstevel@tonic-gate } 8947c478bd9Sstevel@tonic-gate 8957c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8967c478bd9Sstevel@tonic-gate /* 8977c478bd9Sstevel@tonic-gate * Always clear cache when vifs change. 8987c478bd9Sstevel@tonic-gate * Needed to ensure that src isn't left over from before vif was added. 8997c478bd9Sstevel@tonic-gate * No need to get last_encap_lock, since we are running as a writer. 9007c478bd9Sstevel@tonic-gate */ 9017c478bd9Sstevel@tonic-gate 902f4b3ec61Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 903f4b3ec61Sdh155122 ipst->ips_last_encap_src = 0; 904f4b3ec61Sdh155122 ipst->ips_last_encap_vif = NULL; 905f4b3ec61Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 9067c478bd9Sstevel@tonic-gate 9077c478bd9Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_TUNNEL) { 9087c478bd9Sstevel@tonic-gate if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) { 9097c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 9107c478bd9Sstevel@tonic-gate "add_vif: source route tunnels not supported\n"); 9117c478bd9Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9127c478bd9Sstevel@tonic-gate ipif_refrele(ipif); 9137c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 9147c478bd9Sstevel@tonic-gate } 9157c478bd9Sstevel@tonic-gate vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 9167c478bd9Sstevel@tonic-gate 9177c478bd9Sstevel@tonic-gate } else { 9187c478bd9Sstevel@tonic-gate /* Phyint or Register vif */ 9197c478bd9Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 9207c478bd9Sstevel@tonic-gate /* 9217c478bd9Sstevel@tonic-gate * Note: Since all IPPROTO_IP level options (including 9227c478bd9Sstevel@tonic-gate * MRT_ADD_VIF) are done exclusively via 9237c478bd9Sstevel@tonic-gate * ip_optmgmt_writer(), a lock is not necessary to 9247c478bd9Sstevel@tonic-gate * protect reg_vif_num. 9257c478bd9Sstevel@tonic-gate */ 926f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 927f4b3ec61Sdh155122 if (ipst->ips_reg_vif_num == ALL_VIFS) { 928f4b3ec61Sdh155122 ipst->ips_reg_vif_num = vifcp->vifc_vifi; 929f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9307c478bd9Sstevel@tonic-gate } else { 931f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9327c478bd9Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9337c478bd9Sstevel@tonic-gate ipif_refrele(ipif); 9347c478bd9Sstevel@tonic-gate return (EADDRINUSE); 9357c478bd9Sstevel@tonic-gate } 9367c478bd9Sstevel@tonic-gate } 9377c478bd9Sstevel@tonic-gate 9387c478bd9Sstevel@tonic-gate /* Make sure the interface supports multicast */ 9397c478bd9Sstevel@tonic-gate if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) { 9407c478bd9Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9417c478bd9Sstevel@tonic-gate ipif_refrele(ipif); 9427c478bd9Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 943f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 944f4b3ec61Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 945f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9467c478bd9Sstevel@tonic-gate } 9477c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 9487c478bd9Sstevel@tonic-gate } 9497c478bd9Sstevel@tonic-gate /* Enable promiscuous reception of all IP mcasts from the if */ 9507c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 951bd670b35SErik Nordmark 952bd670b35SErik Nordmark ill = ipif->ipif_ill; 953bd670b35SErik Nordmark if (IS_UNDER_IPMP(ill)) 954bd670b35SErik Nordmark ill = ipmp_ill_hold_ipmp_ill(ill); 955bd670b35SErik Nordmark 956bd670b35SErik Nordmark if (ill == NULL) { 957bd670b35SErik Nordmark ilm = NULL; 958bd670b35SErik Nordmark } else { 959bd670b35SErik Nordmark ilm = ip_addmulti(&ipv6_all_zeros, ill, 960bd670b35SErik Nordmark ipif->ipif_zoneid, &error); 961bd670b35SErik Nordmark if (ilm != NULL) 962bd670b35SErik Nordmark atomic_inc_32(&ill->ill_mrouter_cnt); 963bd670b35SErik Nordmark if (IS_UNDER_IPMP(ipif->ipif_ill)) { 964bd670b35SErik Nordmark ill_refrele(ill); 965bd670b35SErik Nordmark ill = ipif->ipif_ill; 966bd670b35SErik Nordmark } 967bd670b35SErik Nordmark } 968bd670b35SErik Nordmark 9697c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 9707c478bd9Sstevel@tonic-gate /* 9717c478bd9Sstevel@tonic-gate * since we released the lock lets make sure that 9727c478bd9Sstevel@tonic-gate * ip_mrouter_done() has not been called. 9737c478bd9Sstevel@tonic-gate */ 974bd670b35SErik Nordmark if (ilm == NULL || is_mrouter_off(ipst)) { 975bd670b35SErik Nordmark if (ilm != NULL) { 976bd670b35SErik Nordmark (void) ip_delmulti(ilm); 977bd670b35SErik Nordmark ASSERT(ill->ill_mrouter_cnt > 0); 978bd670b35SErik Nordmark atomic_dec_32(&ill->ill_mrouter_cnt); 979bd670b35SErik Nordmark } 9807c478bd9Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 981f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 982f4b3ec61Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 983f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9847c478bd9Sstevel@tonic-gate } 9857c478bd9Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9867c478bd9Sstevel@tonic-gate ipif_refrele(ipif); 9877c478bd9Sstevel@tonic-gate return (error?error:EINVAL); 9887c478bd9Sstevel@tonic-gate } 989bd670b35SErik Nordmark vifp->v_ilm = ilm; 9907c478bd9Sstevel@tonic-gate } 9917c478bd9Sstevel@tonic-gate /* Define parameters for the tbf structure */ 9927c478bd9Sstevel@tonic-gate vifp->v_tbf = v_tbf; 9937c478bd9Sstevel@tonic-gate gethrestime(&vifp->v_tbf->tbf_last_pkt_t); 9947c478bd9Sstevel@tonic-gate vifp->v_tbf->tbf_n_tok = 0; 9957c478bd9Sstevel@tonic-gate vifp->v_tbf->tbf_q_len = 0; 9967c478bd9Sstevel@tonic-gate vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 9977c478bd9Sstevel@tonic-gate vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 9987c478bd9Sstevel@tonic-gate 9997c478bd9Sstevel@tonic-gate vifp->v_flags = vifcp->vifc_flags; 10007c478bd9Sstevel@tonic-gate vifp->v_threshold = vifcp->vifc_threshold; 10017c478bd9Sstevel@tonic-gate vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 10027c478bd9Sstevel@tonic-gate vifp->v_ipif = ipif; 10037c478bd9Sstevel@tonic-gate ipif_refrele(ipif); 10047c478bd9Sstevel@tonic-gate /* Scaling up here, allows division by 1024 in critical code. */ 10057c478bd9Sstevel@tonic-gate vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000); 10067c478bd9Sstevel@tonic-gate vifp->v_timeout_id = 0; 10077c478bd9Sstevel@tonic-gate /* initialize per vif pkt counters */ 10087c478bd9Sstevel@tonic-gate vifp->v_pkt_in = 0; 10097c478bd9Sstevel@tonic-gate vifp->v_pkt_out = 0; 10107c478bd9Sstevel@tonic-gate vifp->v_bytes_in = 0; 10117c478bd9Sstevel@tonic-gate vifp->v_bytes_out = 0; 10127c478bd9Sstevel@tonic-gate mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL); 10137c478bd9Sstevel@tonic-gate 10147c478bd9Sstevel@tonic-gate /* Adjust numvifs up, if the vifi is higher than numvifs */ 1015f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1016f4b3ec61Sdh155122 if (ipst->ips_numvifs <= vifcp->vifc_vifi) 1017f4b3ec61Sdh155122 ipst->ips_numvifs = vifcp->vifc_vifi + 1; 1018f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 10197c478bd9Sstevel@tonic-gate 1020f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1021fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 10227c478bd9Sstevel@tonic-gate "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", 10237c478bd9Sstevel@tonic-gate vifcp->vifc_vifi, 10247c478bd9Sstevel@tonic-gate ntohl(vifcp->vifc_lcl_addr.s_addr), 10257c478bd9Sstevel@tonic-gate (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 10267c478bd9Sstevel@tonic-gate ntohl(vifcp->vifc_rmt_addr.s_addr), 10277c478bd9Sstevel@tonic-gate vifcp->vifc_threshold, vifcp->vifc_rate_limit); 10287c478bd9Sstevel@tonic-gate } 10297c478bd9Sstevel@tonic-gate 10307c478bd9Sstevel@tonic-gate vifp->v_marks = VIF_MARK_GOOD; 10317c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 10327c478bd9Sstevel@tonic-gate return (0); 10337c478bd9Sstevel@tonic-gate } 10347c478bd9Sstevel@tonic-gate 10357c478bd9Sstevel@tonic-gate 10367c478bd9Sstevel@tonic-gate /* Delete a vif from the vif table. */ 10377c478bd9Sstevel@tonic-gate static void 10387c478bd9Sstevel@tonic-gate del_vifp(struct vif *vifp) 10397c478bd9Sstevel@tonic-gate { 10407c478bd9Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 10417c478bd9Sstevel@tonic-gate mblk_t *mp0; 10427c478bd9Sstevel@tonic-gate vifi_t vifi; 1043f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 1044fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 10457c478bd9Sstevel@tonic-gate 10467c478bd9Sstevel@tonic-gate ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); 10477c478bd9Sstevel@tonic-gate ASSERT(t != NULL); 10487c478bd9Sstevel@tonic-gate 1049f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1050fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 10517c478bd9Sstevel@tonic-gate "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); 10527c478bd9Sstevel@tonic-gate } 10537c478bd9Sstevel@tonic-gate 10547c478bd9Sstevel@tonic-gate if (vifp->v_timeout_id != 0) { 10557c478bd9Sstevel@tonic-gate (void) untimeout(vifp->v_timeout_id); 10567c478bd9Sstevel@tonic-gate vifp->v_timeout_id = 0; 10577c478bd9Sstevel@tonic-gate } 10587c478bd9Sstevel@tonic-gate 10597c478bd9Sstevel@tonic-gate /* 10607c478bd9Sstevel@tonic-gate * Free packets queued at the interface. 10617c478bd9Sstevel@tonic-gate * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc. 10627c478bd9Sstevel@tonic-gate */ 10637c478bd9Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 10647c478bd9Sstevel@tonic-gate while (t->tbf_q != NULL) { 10657c478bd9Sstevel@tonic-gate mp0 = t->tbf_q; 10667c478bd9Sstevel@tonic-gate t->tbf_q = t->tbf_q->b_next; 10677c478bd9Sstevel@tonic-gate mp0->b_prev = mp0->b_next = NULL; 10687c478bd9Sstevel@tonic-gate freemsg(mp0); 10697c478bd9Sstevel@tonic-gate } 10707c478bd9Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 10717c478bd9Sstevel@tonic-gate 10727c478bd9Sstevel@tonic-gate /* 10737c478bd9Sstevel@tonic-gate * Always clear cache when vifs change. 10747c478bd9Sstevel@tonic-gate * No need to get last_encap_lock since we are running as a writer. 10757c478bd9Sstevel@tonic-gate */ 1076f4b3ec61Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 1077f4b3ec61Sdh155122 if (vifp == ipst->ips_last_encap_vif) { 1078f4b3ec61Sdh155122 ipst->ips_last_encap_vif = NULL; 1079f4b3ec61Sdh155122 ipst->ips_last_encap_src = 0; 10807c478bd9Sstevel@tonic-gate } 1081f4b3ec61Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 10827c478bd9Sstevel@tonic-gate 10837c478bd9Sstevel@tonic-gate mutex_destroy(&t->tbf_lock); 10847c478bd9Sstevel@tonic-gate 10857c478bd9Sstevel@tonic-gate bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf))); 10867c478bd9Sstevel@tonic-gate 10877c478bd9Sstevel@tonic-gate /* Adjust numvifs down */ 1088f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1089f4b3ec61Sdh155122 for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */ 1090f4b3ec61Sdh155122 if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0) 10917c478bd9Sstevel@tonic-gate break; 1092f4b3ec61Sdh155122 ipst->ips_numvifs = vifi; 1093f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 10947c478bd9Sstevel@tonic-gate 10957c478bd9Sstevel@tonic-gate bzero(vifp, sizeof (*vifp)); 10967c478bd9Sstevel@tonic-gate } 10977c478bd9Sstevel@tonic-gate 10987c478bd9Sstevel@tonic-gate static int 1099bd670b35SErik Nordmark del_vif(vifi_t *vifip, ip_stack_t *ipst) 11007c478bd9Sstevel@tonic-gate { 1101f4b3ec61Sdh155122 struct vif *vifp = ipst->ips_vifs + *vifip; 11027c478bd9Sstevel@tonic-gate 1103f4b3ec61Sdh155122 if (*vifip >= ipst->ips_numvifs) 11047c478bd9Sstevel@tonic-gate return (EINVAL); 11057c478bd9Sstevel@tonic-gate 11067c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 11077c478bd9Sstevel@tonic-gate /* 11087c478bd9Sstevel@tonic-gate * Not initialized 11097c478bd9Sstevel@tonic-gate * Here we are not looking at the vif that is being initialized 11107c478bd9Sstevel@tonic-gate * i.e vifp->v_marks == 0 and refcnt > 0. 11117c478bd9Sstevel@tonic-gate */ 11127c478bd9Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr == 0 || 11137c478bd9Sstevel@tonic-gate !(vifp->v_marks & VIF_MARK_GOOD)) { 11147c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 11157c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL); 11167c478bd9Sstevel@tonic-gate } 11177c478bd9Sstevel@tonic-gate 11187c478bd9Sstevel@tonic-gate /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */ 11197c478bd9Sstevel@tonic-gate vifp->v_marks &= ~VIF_MARK_GOOD; 11207c478bd9Sstevel@tonic-gate vifp->v_marks |= VIF_MARK_CONDEMNED; 11217c478bd9Sstevel@tonic-gate 11227c478bd9Sstevel@tonic-gate /* Phyint only */ 11237c478bd9Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 11247c478bd9Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif; 1125bd670b35SErik Nordmark ilm_t *ilm = vifp->v_ilm; 1126bd670b35SErik Nordmark 1127bd670b35SErik Nordmark vifp->v_ilm = NULL; 1128bd670b35SErik Nordmark 11297c478bd9Sstevel@tonic-gate ASSERT(ipif != NULL); 11307c478bd9Sstevel@tonic-gate /* 11317c478bd9Sstevel@tonic-gate * should be OK to drop the lock as we 11327c478bd9Sstevel@tonic-gate * have marked this as CONDEMNED. 11337c478bd9Sstevel@tonic-gate */ 11347c478bd9Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); 1135bd670b35SErik Nordmark if (ilm != NULL) { 1136bd670b35SErik Nordmark (void) ip_delmulti(ilm); 1137bd670b35SErik Nordmark ASSERT(ipif->ipif_ill->ill_mrouter_cnt > 0); 1138bd670b35SErik Nordmark atomic_dec_32(&ipif->ipif_ill->ill_mrouter_cnt); 1139bd670b35SErik Nordmark } 11407c478bd9Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); 11417c478bd9Sstevel@tonic-gate } 11427c478bd9Sstevel@tonic-gate 1143bd670b35SErik Nordmark if (vifp->v_flags & VIFF_REGISTER) { 1144bd670b35SErik Nordmark mutex_enter(&ipst->ips_numvifs_mutex); 1145bd670b35SErik Nordmark ipst->ips_reg_vif_num = ALL_VIFS; 1146bd670b35SErik Nordmark mutex_exit(&ipst->ips_numvifs_mutex); 1147bd670b35SErik Nordmark } 1148bd670b35SErik Nordmark 11497c478bd9Sstevel@tonic-gate /* 11507c478bd9Sstevel@tonic-gate * decreases the refcnt added in add_vif. 11517c478bd9Sstevel@tonic-gate */ 11527c478bd9Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 11537c478bd9Sstevel@tonic-gate return (0); 11547c478bd9Sstevel@tonic-gate } 11557c478bd9Sstevel@tonic-gate 11567c478bd9Sstevel@tonic-gate /* 11577c478bd9Sstevel@tonic-gate * Add an mfc entry. 11587c478bd9Sstevel@tonic-gate */ 11597c478bd9Sstevel@tonic-gate static int 1160f4b3ec61Sdh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 11617c478bd9Sstevel@tonic-gate { 11627c478bd9Sstevel@tonic-gate struct mfc *rt; 11637c478bd9Sstevel@tonic-gate struct rtdetq *rte; 11647c478bd9Sstevel@tonic-gate ushort_t nstl; 11657c478bd9Sstevel@tonic-gate int i; 11667c478bd9Sstevel@tonic-gate struct mfcb *mfcbp; 1167fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 11687c478bd9Sstevel@tonic-gate 11697c478bd9Sstevel@tonic-gate /* 11707c478bd9Sstevel@tonic-gate * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted 11717c478bd9Sstevel@tonic-gate * did not have a real route for pkt. 11727c478bd9Sstevel@tonic-gate * We want this pkt without rt installed in the mfctable to prevent 11737c478bd9Sstevel@tonic-gate * multiiple tries, so go ahead and put it in mfctable, it will 11747c478bd9Sstevel@tonic-gate * be discarded later in ip_mdq() because the child is NULL. 11757c478bd9Sstevel@tonic-gate */ 11767c478bd9Sstevel@tonic-gate 11777c478bd9Sstevel@tonic-gate /* Error checking, out of bounds? */ 11787c478bd9Sstevel@tonic-gate if (mfccp->mfcc_parent > MAXVIFS) { 11797c478bd9Sstevel@tonic-gate ip0dbg(("ADD_MFC: mfcc_parent out of range %d", 11807c478bd9Sstevel@tonic-gate (int)mfccp->mfcc_parent)); 11817c478bd9Sstevel@tonic-gate return (EINVAL); 11827c478bd9Sstevel@tonic-gate } 11837c478bd9Sstevel@tonic-gate 11847c478bd9Sstevel@tonic-gate if ((mfccp->mfcc_parent != NO_VIF) && 1185f4b3ec61Sdh155122 (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) { 11867c478bd9Sstevel@tonic-gate ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n", 11877c478bd9Sstevel@tonic-gate (int)mfccp->mfcc_parent)); 11887c478bd9Sstevel@tonic-gate return (EINVAL); 11897c478bd9Sstevel@tonic-gate } 11907c478bd9Sstevel@tonic-gate 1191f4b3ec61Sdh155122 if (is_mrouter_off(ipst)) { 11927c478bd9Sstevel@tonic-gate return (EINVAL); 11937c478bd9Sstevel@tonic-gate } 11947c478bd9Sstevel@tonic-gate 1195f4b3ec61Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr, 11967c478bd9Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr)]; 11977c478bd9Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 11987c478bd9Sstevel@tonic-gate MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr, 11997c478bd9Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr, rt); 12007c478bd9Sstevel@tonic-gate 12017c478bd9Sstevel@tonic-gate /* If an entry already exists, just update the fields */ 12027c478bd9Sstevel@tonic-gate if (rt) { 1203f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1204fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 12057c478bd9Sstevel@tonic-gate "add_mfc: update o %x grp %x parent %x", 12067c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 12077c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 12087c478bd9Sstevel@tonic-gate mfccp->mfcc_parent); 12097c478bd9Sstevel@tonic-gate } 12107c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12117c478bd9Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent; 12127c478bd9Sstevel@tonic-gate 1213f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1214f4b3ec61Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 12157c478bd9Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1216f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 12177c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12187c478bd9Sstevel@tonic-gate 12197c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 12207c478bd9Sstevel@tonic-gate return (0); 12217c478bd9Sstevel@tonic-gate } 12227c478bd9Sstevel@tonic-gate 12237c478bd9Sstevel@tonic-gate /* 12247c478bd9Sstevel@tonic-gate * Find the entry for which the upcall was made and update. 12257c478bd9Sstevel@tonic-gate */ 12267c478bd9Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) { 12277c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12287c478bd9Sstevel@tonic-gate if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 12297c478bd9Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 12307c478bd9Sstevel@tonic-gate (rt->mfc_rte != NULL) && 12317c478bd9Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 12327c478bd9Sstevel@tonic-gate if (nstl++ != 0) 12337c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 12347c478bd9Sstevel@tonic-gate "add_mfc: %s o %x g %x p %x", 12357c478bd9Sstevel@tonic-gate "multiple kernel entries", 12367c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 12377c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 12387c478bd9Sstevel@tonic-gate mfccp->mfcc_parent); 12397c478bd9Sstevel@tonic-gate 1240f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1241fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, 1242f4b3ec61Sdh155122 SL_TRACE, 12437c478bd9Sstevel@tonic-gate "add_mfc: o %x g %x p %x", 12447c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 12457c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 12467c478bd9Sstevel@tonic-gate mfccp->mfcc_parent); 12477c478bd9Sstevel@tonic-gate } 1248f4b3ec61Sdh155122 fill_route(rt, mfccp, ipst); 12497c478bd9Sstevel@tonic-gate 12507c478bd9Sstevel@tonic-gate /* 12517c478bd9Sstevel@tonic-gate * Prevent cleanup of cache entry. 12527c478bd9Sstevel@tonic-gate * Timer starts in ip_mforward. 12537c478bd9Sstevel@tonic-gate */ 12547c478bd9Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) { 12557c478bd9Sstevel@tonic-gate timeout_id_t id; 12567c478bd9Sstevel@tonic-gate id = rt->mfc_timeout_id; 12577c478bd9Sstevel@tonic-gate /* 12587c478bd9Sstevel@tonic-gate * setting id to zero will avoid this 12597c478bd9Sstevel@tonic-gate * entry from being cleaned up in 12607c478bd9Sstevel@tonic-gate * expire_up_calls(). 12617c478bd9Sstevel@tonic-gate */ 12627c478bd9Sstevel@tonic-gate rt->mfc_timeout_id = 0; 12637c478bd9Sstevel@tonic-gate /* 12647c478bd9Sstevel@tonic-gate * dropping the lock is fine as we 12657c478bd9Sstevel@tonic-gate * have a refhold on the bucket. 12667c478bd9Sstevel@tonic-gate * so mfc cannot be freed. 12677c478bd9Sstevel@tonic-gate * The timeout can fire but it will see 12687c478bd9Sstevel@tonic-gate * that mfc_timeout_id == 0 and not cleanup. 12697c478bd9Sstevel@tonic-gate */ 12707c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12717c478bd9Sstevel@tonic-gate (void) untimeout(id); 12727c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12737c478bd9Sstevel@tonic-gate } 12747c478bd9Sstevel@tonic-gate 12757c478bd9Sstevel@tonic-gate /* 12767c478bd9Sstevel@tonic-gate * Send all pkts that are queued waiting for the upcall. 12777c478bd9Sstevel@tonic-gate * ip_mdq param tun set to 0 - 12787c478bd9Sstevel@tonic-gate * the return value of ip_mdq() isn't used here, 12797c478bd9Sstevel@tonic-gate * so value we send doesn't matter. 12807c478bd9Sstevel@tonic-gate */ 12817c478bd9Sstevel@tonic-gate while (rt->mfc_rte != NULL) { 12827c478bd9Sstevel@tonic-gate rte = rt->mfc_rte; 12837c478bd9Sstevel@tonic-gate rt->mfc_rte = rte->rte_next; 12847c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12857c478bd9Sstevel@tonic-gate (void) ip_mdq(rte->mp, (ipha_t *) 12867c478bd9Sstevel@tonic-gate rte->mp->b_rptr, rte->ill, 0, rt); 12877c478bd9Sstevel@tonic-gate freemsg(rte->mp); 12887c478bd9Sstevel@tonic-gate mi_free((char *)rte); 12897c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12907c478bd9Sstevel@tonic-gate } 12917c478bd9Sstevel@tonic-gate } 12927c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12937c478bd9Sstevel@tonic-gate } 12947c478bd9Sstevel@tonic-gate 12957c478bd9Sstevel@tonic-gate 12967c478bd9Sstevel@tonic-gate /* 12977c478bd9Sstevel@tonic-gate * It is possible that an entry is being inserted without an upcall 12987c478bd9Sstevel@tonic-gate */ 12997c478bd9Sstevel@tonic-gate if (nstl == 0) { 13007c478bd9Sstevel@tonic-gate mutex_enter(&(mfcbp->mfcb_lock)); 1301f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1302fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 13037c478bd9Sstevel@tonic-gate "add_mfc: no upcall o %x g %x p %x", 13047c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 13057c478bd9Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 13067c478bd9Sstevel@tonic-gate mfccp->mfcc_parent); 13077c478bd9Sstevel@tonic-gate } 1308f4b3ec61Sdh155122 if (is_mrouter_off(ipst)) { 13097c478bd9Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 13107c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 13117c478bd9Sstevel@tonic-gate return (EINVAL); 13127c478bd9Sstevel@tonic-gate } 13137c478bd9Sstevel@tonic-gate 13147c478bd9Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) { 13157c478bd9Sstevel@tonic-gate 13167c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 13177c478bd9Sstevel@tonic-gate if ((rt->mfc_origin.s_addr == 13187c478bd9Sstevel@tonic-gate mfccp->mfcc_origin.s_addr) && 13197c478bd9Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr == 13207c478bd9Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr) && 13217c478bd9Sstevel@tonic-gate (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { 1322f4b3ec61Sdh155122 fill_route(rt, mfccp, ipst); 13237c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13247c478bd9Sstevel@tonic-gate break; 13257c478bd9Sstevel@tonic-gate } 13267c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13277c478bd9Sstevel@tonic-gate } 13287c478bd9Sstevel@tonic-gate 13297c478bd9Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */ 13307c478bd9Sstevel@tonic-gate if (rt == NULL) { 13317c478bd9Sstevel@tonic-gate rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 13327c478bd9Sstevel@tonic-gate if (rt == NULL) { 13337c478bd9Sstevel@tonic-gate ip1dbg(("add_mfc: out of memory\n")); 13347c478bd9Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 13357c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 13367c478bd9Sstevel@tonic-gate return (ENOBUFS); 13377c478bd9Sstevel@tonic-gate } 13387c478bd9Sstevel@tonic-gate 13397c478bd9Sstevel@tonic-gate /* Insert new entry at head of hash chain */ 13407c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 1341f4b3ec61Sdh155122 fill_route(rt, mfccp, ipst); 13427c478bd9Sstevel@tonic-gate 13437c478bd9Sstevel@tonic-gate /* Link into table */ 13447c478bd9Sstevel@tonic-gate rt->mfc_next = mfcbp->mfcb_mfc; 13457c478bd9Sstevel@tonic-gate mfcbp->mfcb_mfc = rt; 13467c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13477c478bd9Sstevel@tonic-gate } 13487c478bd9Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 13497c478bd9Sstevel@tonic-gate } 13507c478bd9Sstevel@tonic-gate 13517c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 13527c478bd9Sstevel@tonic-gate return (0); 13537c478bd9Sstevel@tonic-gate } 13547c478bd9Sstevel@tonic-gate 13557c478bd9Sstevel@tonic-gate /* 13567c478bd9Sstevel@tonic-gate * Fills in mfc structure from mrouted mfcctl. 13577c478bd9Sstevel@tonic-gate */ 13587c478bd9Sstevel@tonic-gate static void 1359f4b3ec61Sdh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst) 13607c478bd9Sstevel@tonic-gate { 13617c478bd9Sstevel@tonic-gate int i; 13627c478bd9Sstevel@tonic-gate 13637c478bd9Sstevel@tonic-gate rt->mfc_origin = mfccp->mfcc_origin; 13647c478bd9Sstevel@tonic-gate rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 13657c478bd9Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent; 1366f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1367f4b3ec61Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) { 13687c478bd9Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 13697c478bd9Sstevel@tonic-gate } 1370f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 13717c478bd9Sstevel@tonic-gate /* Initialize pkt counters per src-grp */ 13727c478bd9Sstevel@tonic-gate rt->mfc_pkt_cnt = 0; 13737c478bd9Sstevel@tonic-gate rt->mfc_byte_cnt = 0; 13747c478bd9Sstevel@tonic-gate rt->mfc_wrong_if = 0; 13757c478bd9Sstevel@tonic-gate rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0; 13767c478bd9Sstevel@tonic-gate 13777c478bd9Sstevel@tonic-gate } 13787c478bd9Sstevel@tonic-gate 13797c478bd9Sstevel@tonic-gate static void 13807c478bd9Sstevel@tonic-gate free_queue(struct mfc *mfcp) 13817c478bd9Sstevel@tonic-gate { 13827c478bd9Sstevel@tonic-gate struct rtdetq *rte0; 13837c478bd9Sstevel@tonic-gate 13847c478bd9Sstevel@tonic-gate /* 13857c478bd9Sstevel@tonic-gate * Drop all queued upcall packets. 13867c478bd9Sstevel@tonic-gate * Free the mbuf with the pkt. 13877c478bd9Sstevel@tonic-gate */ 13887c478bd9Sstevel@tonic-gate while ((rte0 = mfcp->mfc_rte) != NULL) { 13897c478bd9Sstevel@tonic-gate mfcp->mfc_rte = rte0->rte_next; 13907c478bd9Sstevel@tonic-gate freemsg(rte0->mp); 13917c478bd9Sstevel@tonic-gate mi_free((char *)rte0); 13927c478bd9Sstevel@tonic-gate } 13937c478bd9Sstevel@tonic-gate } 13947c478bd9Sstevel@tonic-gate /* 13957c478bd9Sstevel@tonic-gate * go thorugh the hash bucket and free all the entries marked condemned. 13967c478bd9Sstevel@tonic-gate */ 13977c478bd9Sstevel@tonic-gate void 13987c478bd9Sstevel@tonic-gate release_mfc(struct mfcb *mfcbp) 13997c478bd9Sstevel@tonic-gate { 14007c478bd9Sstevel@tonic-gate struct mfc *current_mfcp; 14017c478bd9Sstevel@tonic-gate struct mfc *prev_mfcp; 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 14047c478bd9Sstevel@tonic-gate 14057c478bd9Sstevel@tonic-gate while (current_mfcp != NULL) { 14067c478bd9Sstevel@tonic-gate if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) { 14077c478bd9Sstevel@tonic-gate if (current_mfcp == mfcbp->mfcb_mfc) { 14087c478bd9Sstevel@tonic-gate mfcbp->mfcb_mfc = current_mfcp->mfc_next; 14097c478bd9Sstevel@tonic-gate free_queue(current_mfcp); 14107c478bd9Sstevel@tonic-gate mi_free(current_mfcp); 14117c478bd9Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 14127c478bd9Sstevel@tonic-gate continue; 14137c478bd9Sstevel@tonic-gate } 14147c478bd9Sstevel@tonic-gate ASSERT(prev_mfcp != NULL); 14157c478bd9Sstevel@tonic-gate prev_mfcp->mfc_next = current_mfcp->mfc_next; 14167c478bd9Sstevel@tonic-gate free_queue(current_mfcp); 14177c478bd9Sstevel@tonic-gate mi_free(current_mfcp); 14187c478bd9Sstevel@tonic-gate current_mfcp = NULL; 14197c478bd9Sstevel@tonic-gate } else { 14207c478bd9Sstevel@tonic-gate prev_mfcp = current_mfcp; 14217c478bd9Sstevel@tonic-gate } 14227c478bd9Sstevel@tonic-gate 14237c478bd9Sstevel@tonic-gate current_mfcp = prev_mfcp->mfc_next; 14247c478bd9Sstevel@tonic-gate 14257c478bd9Sstevel@tonic-gate } 14267c478bd9Sstevel@tonic-gate mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED; 14277c478bd9Sstevel@tonic-gate ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0); 14287c478bd9Sstevel@tonic-gate } 14297c478bd9Sstevel@tonic-gate 14307c478bd9Sstevel@tonic-gate /* 14317c478bd9Sstevel@tonic-gate * Delete an mfc entry. 14327c478bd9Sstevel@tonic-gate */ 14337c478bd9Sstevel@tonic-gate static int 1434f4b3ec61Sdh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 14357c478bd9Sstevel@tonic-gate { 14367c478bd9Sstevel@tonic-gate struct in_addr origin; 14377c478bd9Sstevel@tonic-gate struct in_addr mcastgrp; 14387c478bd9Sstevel@tonic-gate struct mfc *rt; 14397c478bd9Sstevel@tonic-gate uint_t hash; 1440fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 14417c478bd9Sstevel@tonic-gate 14427c478bd9Sstevel@tonic-gate origin = mfccp->mfcc_origin; 14437c478bd9Sstevel@tonic-gate mcastgrp = mfccp->mfcc_mcastgrp; 14447c478bd9Sstevel@tonic-gate hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 14457c478bd9Sstevel@tonic-gate 1446f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1447fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 14487c478bd9Sstevel@tonic-gate "del_mfc: o %x g %x", 14497c478bd9Sstevel@tonic-gate ntohl(origin.s_addr), 14507c478bd9Sstevel@tonic-gate ntohl(mcastgrp.s_addr)); 14517c478bd9Sstevel@tonic-gate } 14527c478bd9Sstevel@tonic-gate 1453f4b3ec61Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 14547c478bd9Sstevel@tonic-gate 14557c478bd9Sstevel@tonic-gate /* Find mfc in mfctable, finds only entries without upcalls */ 1456f4b3ec61Sdh155122 for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) { 14577c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 14587c478bd9Sstevel@tonic-gate if (origin.s_addr == rt->mfc_origin.s_addr && 14597c478bd9Sstevel@tonic-gate mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 14607c478bd9Sstevel@tonic-gate rt->mfc_rte == NULL && 14617c478bd9Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) 14627c478bd9Sstevel@tonic-gate break; 14637c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 14647c478bd9Sstevel@tonic-gate } 14657c478bd9Sstevel@tonic-gate 14667c478bd9Sstevel@tonic-gate /* 14677c478bd9Sstevel@tonic-gate * Return if there was an upcall (mfc_rte != NULL, 14687c478bd9Sstevel@tonic-gate * or rt not in mfctable. 14697c478bd9Sstevel@tonic-gate */ 14707c478bd9Sstevel@tonic-gate if (rt == NULL) { 1471f4b3ec61Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 14727c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL); 14737c478bd9Sstevel@tonic-gate } 14747c478bd9Sstevel@tonic-gate 14757c478bd9Sstevel@tonic-gate 14767c478bd9Sstevel@tonic-gate /* 14777c478bd9Sstevel@tonic-gate * no need to hold lock as we have a reference. 14787c478bd9Sstevel@tonic-gate */ 1479f4b3ec61Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 14807c478bd9Sstevel@tonic-gate /* error checking */ 14817c478bd9Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) { 14827c478bd9Sstevel@tonic-gate ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null")); 14837c478bd9Sstevel@tonic-gate /* 14847c478bd9Sstevel@tonic-gate * Its ok to drop the lock, the struct cannot be freed 14857c478bd9Sstevel@tonic-gate * since we have a ref on the hash bucket. 14867c478bd9Sstevel@tonic-gate */ 14877c478bd9Sstevel@tonic-gate rt->mfc_timeout_id = 0; 14887c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 14897c478bd9Sstevel@tonic-gate (void) untimeout(rt->mfc_timeout_id); 14907c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 14917c478bd9Sstevel@tonic-gate } 14927c478bd9Sstevel@tonic-gate 14937c478bd9Sstevel@tonic-gate ASSERT(rt->mfc_rte == NULL); 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate 14967c478bd9Sstevel@tonic-gate /* 14977c478bd9Sstevel@tonic-gate * Delete the entry from the cache 14987c478bd9Sstevel@tonic-gate */ 14997c478bd9Sstevel@tonic-gate rt->mfc_marks |= MFCB_MARK_CONDEMNED; 15007c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 15017c478bd9Sstevel@tonic-gate 1502f4b3ec61Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 15037c478bd9Sstevel@tonic-gate 15047c478bd9Sstevel@tonic-gate return (0); 15057c478bd9Sstevel@tonic-gate } 15067c478bd9Sstevel@tonic-gate 15077c478bd9Sstevel@tonic-gate #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 15087c478bd9Sstevel@tonic-gate 15097c478bd9Sstevel@tonic-gate /* 15107c478bd9Sstevel@tonic-gate * IP multicast forwarding function. This function assumes that the packet 15117c478bd9Sstevel@tonic-gate * pointed to by ipha has arrived on (or is about to be sent to) the interface 15127c478bd9Sstevel@tonic-gate * pointed to by "ill", and the packet is to be relayed to other networks 15137c478bd9Sstevel@tonic-gate * that have members of the packet's destination IP multicast group. 15147c478bd9Sstevel@tonic-gate * 15157c478bd9Sstevel@tonic-gate * The packet is returned unscathed to the caller, unless it is 15167c478bd9Sstevel@tonic-gate * erroneous, in which case a -1 value tells the caller (IP) 15177c478bd9Sstevel@tonic-gate * to discard it. 15187c478bd9Sstevel@tonic-gate * 15197c478bd9Sstevel@tonic-gate * Unlike BSD, SunOS 5.x needs to return to IP info about 15207c478bd9Sstevel@tonic-gate * whether pkt came in thru a tunnel, so it can be discarded, unless 15217c478bd9Sstevel@tonic-gate * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try 15227c478bd9Sstevel@tonic-gate * to be delivered. 15237c478bd9Sstevel@tonic-gate * Return values are 0 - pkt is okay and phyint 15247c478bd9Sstevel@tonic-gate * -1 - pkt is malformed and to be tossed 15257c478bd9Sstevel@tonic-gate * 1 - pkt came in on tunnel 15267c478bd9Sstevel@tonic-gate */ 15277c478bd9Sstevel@tonic-gate int 1528bd670b35SErik Nordmark ip_mforward(mblk_t *mp, ip_recv_attr_t *ira) 15297c478bd9Sstevel@tonic-gate { 1530bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr; 1531bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 15327c478bd9Sstevel@tonic-gate struct mfc *rt; 15337c478bd9Sstevel@tonic-gate ipaddr_t src, dst, tunnel_src = 0; 15347c478bd9Sstevel@tonic-gate static int srctun = 0; 15357c478bd9Sstevel@tonic-gate vifi_t vifi; 15367c478bd9Sstevel@tonic-gate boolean_t pim_reg_packet = B_FALSE; 15377c478bd9Sstevel@tonic-gate struct mfcb *mfcbp; 1538f4b3ec61Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 1539fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 1540bd670b35SErik Nordmark ill_t *rill = ira->ira_rill; 1541bd670b35SErik Nordmark 1542bd670b35SErik Nordmark ASSERT(ira->ira_pktlen == msgdsize(mp)); 15437c478bd9Sstevel@tonic-gate 1544f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1545fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 15467c478bd9Sstevel@tonic-gate "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", 15477c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 15487c478bd9Sstevel@tonic-gate ill->ill_name); 15497c478bd9Sstevel@tonic-gate } 15507c478bd9Sstevel@tonic-gate 15517c478bd9Sstevel@tonic-gate dst = ipha->ipha_dst; 1552bd670b35SErik Nordmark if (ira->ira_flags & IRAF_PIM_REGISTER) 15537c478bd9Sstevel@tonic-gate pim_reg_packet = B_TRUE; 1554bd670b35SErik Nordmark else if (ira->ira_flags & IRAF_MROUTE_TUNNEL_SET) 1555bd670b35SErik Nordmark tunnel_src = ira->ira_mroute_tunnel; 15567c478bd9Sstevel@tonic-gate 15577c478bd9Sstevel@tonic-gate /* 15587c478bd9Sstevel@tonic-gate * Don't forward a packet with time-to-live of zero or one, 15597c478bd9Sstevel@tonic-gate * or a packet destined to a local-only group. 15607c478bd9Sstevel@tonic-gate */ 15617c478bd9Sstevel@tonic-gate if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || 15627c478bd9Sstevel@tonic-gate (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { 1563f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1564fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 15657c478bd9Sstevel@tonic-gate "ip_mforward: not forwarded ttl %d," 15667c478bd9Sstevel@tonic-gate " dst 0x%x ill %s", 15677c478bd9Sstevel@tonic-gate ipha->ipha_ttl, ntohl(dst), ill->ill_name); 15687c478bd9Sstevel@tonic-gate } 15697c478bd9Sstevel@tonic-gate if (tunnel_src != 0) 15707c478bd9Sstevel@tonic-gate return (1); 15717c478bd9Sstevel@tonic-gate else 15727c478bd9Sstevel@tonic-gate return (0); 15737c478bd9Sstevel@tonic-gate } 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate if ((tunnel_src != 0) || pim_reg_packet) { 15767c478bd9Sstevel@tonic-gate /* 15777c478bd9Sstevel@tonic-gate * Packet arrived over an encapsulated tunnel or via a PIM 1578bd670b35SErik Nordmark * register message. 15797c478bd9Sstevel@tonic-gate */ 1580f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 15817c478bd9Sstevel@tonic-gate if (tunnel_src != 0) { 1582fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, 1583f4b3ec61Sdh155122 SL_TRACE, 15847c478bd9Sstevel@tonic-gate "ip_mforward: ill %s arrived via ENCAP TUN", 15857c478bd9Sstevel@tonic-gate ill->ill_name); 15867c478bd9Sstevel@tonic-gate } else if (pim_reg_packet) { 1587fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, 1588f4b3ec61Sdh155122 SL_TRACE, 15897c478bd9Sstevel@tonic-gate "ip_mforward: ill %s arrived via" 15907c478bd9Sstevel@tonic-gate " REGISTER VIF", 15917c478bd9Sstevel@tonic-gate ill->ill_name); 15927c478bd9Sstevel@tonic-gate } 15937c478bd9Sstevel@tonic-gate } 15947c478bd9Sstevel@tonic-gate } else if ((ipha->ipha_version_and_hdr_length & 0xf) < 15957c478bd9Sstevel@tonic-gate (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 || 15967c478bd9Sstevel@tonic-gate ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { 15977c478bd9Sstevel@tonic-gate /* Packet arrived via a physical interface. */ 1598f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1599fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 16007c478bd9Sstevel@tonic-gate "ip_mforward: ill %s arrived via PHYINT", 16017c478bd9Sstevel@tonic-gate ill->ill_name); 16027c478bd9Sstevel@tonic-gate } 16037c478bd9Sstevel@tonic-gate 16047c478bd9Sstevel@tonic-gate } else { 16057c478bd9Sstevel@tonic-gate /* 16067c478bd9Sstevel@tonic-gate * Packet arrived through a SRCRT tunnel. 16077c478bd9Sstevel@tonic-gate * Source-route tunnels are no longer supported. 16087c478bd9Sstevel@tonic-gate * Error message printed every 1000 times. 16097c478bd9Sstevel@tonic-gate */ 16107c478bd9Sstevel@tonic-gate if ((srctun++ % 1000) == 0) { 16117c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 16127c478bd9Sstevel@tonic-gate "ip_mforward: received source-routed pkt from %x", 16137c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_src)); 16147c478bd9Sstevel@tonic-gate } 16157c478bd9Sstevel@tonic-gate return (-1); 16167c478bd9Sstevel@tonic-gate } 16177c478bd9Sstevel@tonic-gate 1618f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_in++; 16197c478bd9Sstevel@tonic-gate src = ipha->ipha_src; 16207c478bd9Sstevel@tonic-gate 16217c478bd9Sstevel@tonic-gate /* Find route in cache, return NULL if not there or upcalls q'ed. */ 16227c478bd9Sstevel@tonic-gate 16237c478bd9Sstevel@tonic-gate /* 16247c478bd9Sstevel@tonic-gate * Lock the mfctable against changes made by ip_mforward. 16257c478bd9Sstevel@tonic-gate * Note that only add_mfc and del_mfc can remove entries and 16267c478bd9Sstevel@tonic-gate * they run with exclusive access to IP. So we do not need to 16277c478bd9Sstevel@tonic-gate * guard against the rt being deleted, so release lock after reading. 16287c478bd9Sstevel@tonic-gate */ 16297c478bd9Sstevel@tonic-gate 1630f4b3ec61Sdh155122 if (is_mrouter_off(ipst)) 16317c478bd9Sstevel@tonic-gate return (-1); 16327c478bd9Sstevel@tonic-gate 1633f4b3ec61Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)]; 16347c478bd9Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 16357c478bd9Sstevel@tonic-gate MFCFIND(mfcbp, src, dst, rt); 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate /* Entry exists, so forward if necessary */ 16387c478bd9Sstevel@tonic-gate if (rt != NULL) { 16397c478bd9Sstevel@tonic-gate int ret = 0; 1640f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_mfc_hits++; 16417c478bd9Sstevel@tonic-gate if (pim_reg_packet) { 1642f4b3ec61Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 16437c478bd9Sstevel@tonic-gate ret = ip_mdq(mp, ipha, 1644f4b3ec61Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 1645f4b3ec61Sdh155122 v_ipif->ipif_ill, 1646f4b3ec61Sdh155122 0, rt); 16477c478bd9Sstevel@tonic-gate } else { 16487c478bd9Sstevel@tonic-gate ret = ip_mdq(mp, ipha, ill, tunnel_src, rt); 16497c478bd9Sstevel@tonic-gate } 16507c478bd9Sstevel@tonic-gate 16517c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 16527c478bd9Sstevel@tonic-gate return (ret); 16537c478bd9Sstevel@tonic-gate 16547c478bd9Sstevel@tonic-gate /* 16557c478bd9Sstevel@tonic-gate * Don't forward if we don't have a cache entry. Mrouted will 16567c478bd9Sstevel@tonic-gate * always provide a cache entry in response to an upcall. 16577c478bd9Sstevel@tonic-gate */ 16587c478bd9Sstevel@tonic-gate } else { 16597c478bd9Sstevel@tonic-gate /* 16607c478bd9Sstevel@tonic-gate * If we don't have a route for packet's origin, make a copy 16617c478bd9Sstevel@tonic-gate * of the packet and send message to routing daemon. 16627c478bd9Sstevel@tonic-gate */ 16637c478bd9Sstevel@tonic-gate struct mfc *mfc_rt = NULL; 16647c478bd9Sstevel@tonic-gate mblk_t *mp0 = NULL; 16657c478bd9Sstevel@tonic-gate mblk_t *mp_copy = NULL; 16667c478bd9Sstevel@tonic-gate struct rtdetq *rte = NULL; 16677c478bd9Sstevel@tonic-gate struct rtdetq *rte_m, *rte1, *prev_rte; 16687c478bd9Sstevel@tonic-gate uint_t hash; 16697c478bd9Sstevel@tonic-gate int npkts; 16707c478bd9Sstevel@tonic-gate boolean_t new_mfc = B_FALSE; 1671f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_mfc_misses++; 16727c478bd9Sstevel@tonic-gate /* BSD uses mrts_no_route++ */ 1673f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1674fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 16757c478bd9Sstevel@tonic-gate "ip_mforward: no rte ill %s src %x g %x misses %d", 16767c478bd9Sstevel@tonic-gate ill->ill_name, ntohl(src), ntohl(dst), 1677f4b3ec61Sdh155122 (int)ipst->ips_mrtstat->mrts_mfc_misses); 16787c478bd9Sstevel@tonic-gate } 16797c478bd9Sstevel@tonic-gate /* 16807c478bd9Sstevel@tonic-gate * The order of the following code differs from the BSD code. 16817c478bd9Sstevel@tonic-gate * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x 16827c478bd9Sstevel@tonic-gate * code works, so SunOS 5.x wasn't changed to conform to the 16837c478bd9Sstevel@tonic-gate * BSD version. 16847c478bd9Sstevel@tonic-gate */ 16857c478bd9Sstevel@tonic-gate 16867c478bd9Sstevel@tonic-gate /* Lock mfctable. */ 16877c478bd9Sstevel@tonic-gate hash = MFCHASH(src, dst); 1688f4b3ec61Sdh155122 mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock)); 16897c478bd9Sstevel@tonic-gate 16907c478bd9Sstevel@tonic-gate /* 16917c478bd9Sstevel@tonic-gate * If we are turning off mrouted return an error 16927c478bd9Sstevel@tonic-gate */ 1693f4b3ec61Sdh155122 if (is_mrouter_off(ipst)) { 16947c478bd9Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 16957c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 16967c478bd9Sstevel@tonic-gate return (-1); 16977c478bd9Sstevel@tonic-gate } 16987c478bd9Sstevel@tonic-gate 16997c478bd9Sstevel@tonic-gate /* Is there an upcall waiting for this packet? */ 1700f4b3ec61Sdh155122 for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt; 17017c478bd9Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next) { 17027c478bd9Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 1703f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1704fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, 1705f4b3ec61Sdh155122 SL_TRACE, 17067c478bd9Sstevel@tonic-gate "ip_mforward: MFCTAB hash %d o 0x%x" 17077c478bd9Sstevel@tonic-gate " g 0x%x\n", 17087c478bd9Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr), 17097c478bd9Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 17107c478bd9Sstevel@tonic-gate } 17117c478bd9Sstevel@tonic-gate /* There is an upcall */ 17127c478bd9Sstevel@tonic-gate if ((src == mfc_rt->mfc_origin.s_addr) && 17137c478bd9Sstevel@tonic-gate (dst == mfc_rt->mfc_mcastgrp.s_addr) && 17147c478bd9Sstevel@tonic-gate (mfc_rt->mfc_rte != NULL) && 17157c478bd9Sstevel@tonic-gate !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 17167c478bd9Sstevel@tonic-gate break; 17177c478bd9Sstevel@tonic-gate } 17187c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17197c478bd9Sstevel@tonic-gate } 17207c478bd9Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */ 17217c478bd9Sstevel@tonic-gate if (mfc_rt == NULL) { 17227c478bd9Sstevel@tonic-gate mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 17237c478bd9Sstevel@tonic-gate if (mfc_rt == NULL) { 1724f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17257c478bd9Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory " 17267c478bd9Sstevel@tonic-gate "for mfc, mfc_rt\n")); 17277c478bd9Sstevel@tonic-gate goto error_return; 17287c478bd9Sstevel@tonic-gate } else 17297c478bd9Sstevel@tonic-gate new_mfc = B_TRUE; 17307c478bd9Sstevel@tonic-gate /* Get resources */ 17317c478bd9Sstevel@tonic-gate /* TODO could copy header and dup rest */ 17327c478bd9Sstevel@tonic-gate mp_copy = copymsg(mp); 17337c478bd9Sstevel@tonic-gate if (mp_copy == NULL) { 1734f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17357c478bd9Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for " 17367c478bd9Sstevel@tonic-gate "mblk, mp_copy\n")); 17377c478bd9Sstevel@tonic-gate goto error_return; 17387c478bd9Sstevel@tonic-gate } 17397c478bd9Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 17407c478bd9Sstevel@tonic-gate } 17417c478bd9Sstevel@tonic-gate /* Get resources for rte, whether first rte or not first. */ 17427c478bd9Sstevel@tonic-gate /* Add this packet into rtdetq */ 17437c478bd9Sstevel@tonic-gate rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq)); 17447c478bd9Sstevel@tonic-gate if (rte == NULL) { 1745f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17467c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17477c478bd9Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for" 17487c478bd9Sstevel@tonic-gate " rtdetq, rte\n")); 17497c478bd9Sstevel@tonic-gate goto error_return; 17507c478bd9Sstevel@tonic-gate } 17517c478bd9Sstevel@tonic-gate 17527c478bd9Sstevel@tonic-gate mp0 = copymsg(mp); 17537c478bd9Sstevel@tonic-gate if (mp0 == NULL) { 1754f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17557c478bd9Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for mblk, mp0\n")); 17567c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17577c478bd9Sstevel@tonic-gate goto error_return; 17587c478bd9Sstevel@tonic-gate } 17597c478bd9Sstevel@tonic-gate rte->mp = mp0; 17607c478bd9Sstevel@tonic-gate if (pim_reg_packet) { 1761f4b3ec61Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 1762f4b3ec61Sdh155122 rte->ill = 1763f4b3ec61Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 1764f4b3ec61Sdh155122 v_ipif->ipif_ill; 17657c478bd9Sstevel@tonic-gate } else { 17667c478bd9Sstevel@tonic-gate rte->ill = ill; 17677c478bd9Sstevel@tonic-gate } 17687c478bd9Sstevel@tonic-gate rte->rte_next = NULL; 17697c478bd9Sstevel@tonic-gate 17707c478bd9Sstevel@tonic-gate /* 17717c478bd9Sstevel@tonic-gate * Determine if upcall q (rtdetq) has overflowed. 17727c478bd9Sstevel@tonic-gate * mfc_rt->mfc_rte is null by mi_zalloc 17737c478bd9Sstevel@tonic-gate * if it is the first message. 17747c478bd9Sstevel@tonic-gate */ 17757c478bd9Sstevel@tonic-gate for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m; 17767c478bd9Sstevel@tonic-gate rte_m = rte_m->rte_next) 17777c478bd9Sstevel@tonic-gate npkts++; 1778f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1779fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 17807c478bd9Sstevel@tonic-gate "ip_mforward: upcalls %d\n", npkts); 17817c478bd9Sstevel@tonic-gate } 17827c478bd9Sstevel@tonic-gate if (npkts > MAX_UPQ) { 1783f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_upq_ovflw++; 17847c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17857c478bd9Sstevel@tonic-gate goto error_return; 17867c478bd9Sstevel@tonic-gate } 17877c478bd9Sstevel@tonic-gate 17887c478bd9Sstevel@tonic-gate if (npkts == 0) { /* first upcall */ 17897c478bd9Sstevel@tonic-gate int i = 0; 17907c478bd9Sstevel@tonic-gate /* 17917c478bd9Sstevel@tonic-gate * Now finish installing the new mfc! Now that we have 17927c478bd9Sstevel@tonic-gate * resources! Insert new entry at head of hash chain. 17937c478bd9Sstevel@tonic-gate * Use src and dst which are ipaddr_t's. 17947c478bd9Sstevel@tonic-gate */ 17957c478bd9Sstevel@tonic-gate mfc_rt->mfc_origin.s_addr = src; 17967c478bd9Sstevel@tonic-gate mfc_rt->mfc_mcastgrp.s_addr = dst; 17977c478bd9Sstevel@tonic-gate 1798f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1799f4b3ec61Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 18007c478bd9Sstevel@tonic-gate mfc_rt->mfc_ttls[i] = 0; 1801f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 18027c478bd9Sstevel@tonic-gate mfc_rt->mfc_parent = ALL_VIFS; 18037c478bd9Sstevel@tonic-gate 18047c478bd9Sstevel@tonic-gate /* Link into table */ 1805f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1806fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, 1807f4b3ec61Sdh155122 SL_TRACE, 18087c478bd9Sstevel@tonic-gate "ip_mforward: NEW MFCTAB hash %d o 0x%x " 18097c478bd9Sstevel@tonic-gate "g 0x%x\n", hash, 18107c478bd9Sstevel@tonic-gate ntohl(mfc_rt->mfc_origin.s_addr), 18117c478bd9Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 18127c478bd9Sstevel@tonic-gate } 1813f4b3ec61Sdh155122 mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc; 1814f4b3ec61Sdh155122 ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt; 18157c478bd9Sstevel@tonic-gate mfc_rt->mfc_rte = NULL; 18167c478bd9Sstevel@tonic-gate } 18177c478bd9Sstevel@tonic-gate 18187c478bd9Sstevel@tonic-gate /* Link in the upcall */ 18197c478bd9Sstevel@tonic-gate /* First upcall */ 18207c478bd9Sstevel@tonic-gate if (mfc_rt->mfc_rte == NULL) 18217c478bd9Sstevel@tonic-gate mfc_rt->mfc_rte = rte; 18227c478bd9Sstevel@tonic-gate else { 18237c478bd9Sstevel@tonic-gate /* not the first upcall */ 18247c478bd9Sstevel@tonic-gate prev_rte = mfc_rt->mfc_rte; 18257c478bd9Sstevel@tonic-gate for (rte1 = mfc_rt->mfc_rte->rte_next; rte1; 1826fc80c0dfSnordmark prev_rte = rte1, rte1 = rte1->rte_next) 1827fc80c0dfSnordmark ; 18287c478bd9Sstevel@tonic-gate prev_rte->rte_next = rte; 18297c478bd9Sstevel@tonic-gate } 18307c478bd9Sstevel@tonic-gate 18317c478bd9Sstevel@tonic-gate /* 18327c478bd9Sstevel@tonic-gate * No upcalls waiting, this is first one, so send a message to 18337c478bd9Sstevel@tonic-gate * routing daemon to install a route into kernel table. 18347c478bd9Sstevel@tonic-gate */ 18357c478bd9Sstevel@tonic-gate if (npkts == 0) { 18367c478bd9Sstevel@tonic-gate struct igmpmsg *im; 18377c478bd9Sstevel@tonic-gate /* ipha_protocol is 0, for upcall */ 18387c478bd9Sstevel@tonic-gate ASSERT(mp_copy != NULL); 18397c478bd9Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 18407c478bd9Sstevel@tonic-gate im->im_msgtype = IGMPMSG_NOCACHE; 18417c478bd9Sstevel@tonic-gate im->im_mbz = 0; 1842f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 18437c478bd9Sstevel@tonic-gate if (pim_reg_packet) { 1844f4b3ec61Sdh155122 im->im_vif = (uchar_t)ipst->ips_reg_vif_num; 1845f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 18467c478bd9Sstevel@tonic-gate } else { 18477c478bd9Sstevel@tonic-gate /* 18487c478bd9Sstevel@tonic-gate * XXX do we need to hold locks here ? 18497c478bd9Sstevel@tonic-gate */ 1850f4b3ec61Sdh155122 for (vifi = 0; 1851f4b3ec61Sdh155122 vifi < ipst->ips_numvifs; 1852f4b3ec61Sdh155122 vifi++) { 1853f4b3ec61Sdh155122 if (ipst->ips_vifs[vifi].v_ipif == NULL) 18547c478bd9Sstevel@tonic-gate continue; 1855f4b3ec61Sdh155122 if (ipst->ips_vifs[vifi]. 1856f4b3ec61Sdh155122 v_ipif->ipif_ill == ill) { 18577c478bd9Sstevel@tonic-gate im->im_vif = (uchar_t)vifi; 18587c478bd9Sstevel@tonic-gate break; 18597c478bd9Sstevel@tonic-gate } 18607c478bd9Sstevel@tonic-gate } 1861f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1862f4b3ec61Sdh155122 ASSERT(vifi < ipst->ips_numvifs); 18637c478bd9Sstevel@tonic-gate } 18647c478bd9Sstevel@tonic-gate 1865f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_upcalls++; 18667c478bd9Sstevel@tonic-gate /* Timer to discard upcalls if mrouted is too slow */ 18677c478bd9Sstevel@tonic-gate mfc_rt->mfc_timeout_id = timeout(expire_upcalls, 18687c478bd9Sstevel@tonic-gate mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); 18697c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 1870f4b3ec61Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 1871fc80c0dfSnordmark /* Pass to RAWIP */ 1872bd670b35SErik Nordmark ira->ira_ill = ira->ira_rill = NULL; 1873bd670b35SErik Nordmark (mrouter->conn_recv)(mrouter, mp_copy, NULL, ira); 1874bd670b35SErik Nordmark ira->ira_ill = ill; 1875bd670b35SErik Nordmark ira->ira_rill = rill; 18767c478bd9Sstevel@tonic-gate } else { 18777c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 1878f4b3ec61Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 1879bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1880bd670b35SErik Nordmark ip_drop_input("ip_mforward - upcall already waiting", 1881bd670b35SErik Nordmark mp_copy, ill); 18827c478bd9Sstevel@tonic-gate freemsg(mp_copy); 18837c478bd9Sstevel@tonic-gate } 18847c478bd9Sstevel@tonic-gate 18857c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 18867c478bd9Sstevel@tonic-gate if (tunnel_src != 0) 18877c478bd9Sstevel@tonic-gate return (1); 18887c478bd9Sstevel@tonic-gate else 18897c478bd9Sstevel@tonic-gate return (0); 18907c478bd9Sstevel@tonic-gate error_return: 1891f4b3ec61Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 18927c478bd9Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 18937c478bd9Sstevel@tonic-gate if (mfc_rt != NULL && (new_mfc == B_TRUE)) 18947c478bd9Sstevel@tonic-gate mi_free((char *)mfc_rt); 18957c478bd9Sstevel@tonic-gate if (rte != NULL) 18967c478bd9Sstevel@tonic-gate mi_free((char *)rte); 1897bd670b35SErik Nordmark if (mp_copy != NULL) { 1898bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1899bd670b35SErik Nordmark ip_drop_input("ip_mforward error", mp_copy, ill); 19007c478bd9Sstevel@tonic-gate freemsg(mp_copy); 1901bd670b35SErik Nordmark } 19027c478bd9Sstevel@tonic-gate if (mp0 != NULL) 19037c478bd9Sstevel@tonic-gate freemsg(mp0); 19047c478bd9Sstevel@tonic-gate return (-1); 19057c478bd9Sstevel@tonic-gate } 19067c478bd9Sstevel@tonic-gate } 19077c478bd9Sstevel@tonic-gate 19087c478bd9Sstevel@tonic-gate /* 19097c478bd9Sstevel@tonic-gate * Clean up the mfctable cache entry if upcall is not serviced. 19107c478bd9Sstevel@tonic-gate * SunOS 5.x has timeout per mfc, unlike BSD which has one timer. 19117c478bd9Sstevel@tonic-gate */ 19127c478bd9Sstevel@tonic-gate static void 19137c478bd9Sstevel@tonic-gate expire_upcalls(void *arg) 19147c478bd9Sstevel@tonic-gate { 19157c478bd9Sstevel@tonic-gate struct mfc *mfc_rt = arg; 19167c478bd9Sstevel@tonic-gate uint_t hash; 19177c478bd9Sstevel@tonic-gate struct mfc *prev_mfc, *mfc0; 1918f4b3ec61Sdh155122 ip_stack_t *ipst; 1919fc80c0dfSnordmark conn_t *mrouter; 1920f4b3ec61Sdh155122 1921f4b3ec61Sdh155122 if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) { 1922f4b3ec61Sdh155122 cmn_err(CE_WARN, "expire_upcalls: no ILL\n"); 1923f4b3ec61Sdh155122 return; 1924f4b3ec61Sdh155122 } 1925f4b3ec61Sdh155122 ipst = mfc_rt->mfc_rte->ill->ill_ipst; 1926fc80c0dfSnordmark mrouter = ipst->ips_ip_g_mrouter; 19277c478bd9Sstevel@tonic-gate 19287c478bd9Sstevel@tonic-gate hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); 1929f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1930fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 19317c478bd9Sstevel@tonic-gate "expire_upcalls: hash %d s %x g %x", 19327c478bd9Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr), 19337c478bd9Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 19347c478bd9Sstevel@tonic-gate } 1935f4b3ec61Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 19367c478bd9Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 19377c478bd9Sstevel@tonic-gate /* 19387c478bd9Sstevel@tonic-gate * if timeout has been set to zero, than the 19397c478bd9Sstevel@tonic-gate * entry has been filled, no need to delete it. 19407c478bd9Sstevel@tonic-gate */ 19417c478bd9Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id == 0) 19427c478bd9Sstevel@tonic-gate goto done; 1943f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_cache_cleanups++; 19447c478bd9Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 19457c478bd9Sstevel@tonic-gate 19467c478bd9Sstevel@tonic-gate /* Determine entry to be cleaned up in cache table. */ 1947f4b3ec61Sdh155122 for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0; 19487c478bd9Sstevel@tonic-gate prev_mfc = mfc0, mfc0 = mfc0->mfc_next) 19497c478bd9Sstevel@tonic-gate if (mfc0 == mfc_rt) 19507c478bd9Sstevel@tonic-gate break; 19517c478bd9Sstevel@tonic-gate 19527c478bd9Sstevel@tonic-gate /* del_mfc takes care of gone mfcs */ 19537c478bd9Sstevel@tonic-gate ASSERT(prev_mfc != NULL); 19547c478bd9Sstevel@tonic-gate ASSERT(mfc0 != NULL); 19557c478bd9Sstevel@tonic-gate 19567c478bd9Sstevel@tonic-gate /* 19577c478bd9Sstevel@tonic-gate * Delete the entry from the cache 19587c478bd9Sstevel@tonic-gate */ 1959f4b3ec61Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 19607c478bd9Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate /* 19637c478bd9Sstevel@tonic-gate * release_mfc will drop all queued upcall packets. 19647c478bd9Sstevel@tonic-gate * and will free the mbuf with the pkt, if, timing info. 19657c478bd9Sstevel@tonic-gate */ 19667c478bd9Sstevel@tonic-gate done: 19677c478bd9Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 1968f4b3ec61Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 19697c478bd9Sstevel@tonic-gate } 19707c478bd9Sstevel@tonic-gate 19717c478bd9Sstevel@tonic-gate /* 19727c478bd9Sstevel@tonic-gate * Packet forwarding routine once entry in the cache is made. 19737c478bd9Sstevel@tonic-gate */ 19747c478bd9Sstevel@tonic-gate static int 19757c478bd9Sstevel@tonic-gate ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, 19767c478bd9Sstevel@tonic-gate struct mfc *rt) 19777c478bd9Sstevel@tonic-gate { 19787c478bd9Sstevel@tonic-gate vifi_t vifi; 19797c478bd9Sstevel@tonic-gate struct vif *vifp; 19807c478bd9Sstevel@tonic-gate ipaddr_t dst = ipha->ipha_dst; 19817c478bd9Sstevel@tonic-gate size_t plen = msgdsize(mp); 19827c478bd9Sstevel@tonic-gate vifi_t num_of_vifs; 1983f4b3ec61Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 1984fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 1985bd670b35SErik Nordmark ip_recv_attr_t iras; 19867c478bd9Sstevel@tonic-gate 1987f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 1988fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 19897c478bd9Sstevel@tonic-gate "ip_mdq: SEND src %x, ipha_dst %x, ill %s", 19907c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 19917c478bd9Sstevel@tonic-gate ill->ill_name); 19927c478bd9Sstevel@tonic-gate } 19937c478bd9Sstevel@tonic-gate 19947c478bd9Sstevel@tonic-gate /* Macro to send packet on vif */ 19957c478bd9Sstevel@tonic-gate #define MC_SEND(ipha, mp, vifp, dst) { \ 19967c478bd9Sstevel@tonic-gate if ((vifp)->v_flags & VIFF_TUNNEL) \ 19977c478bd9Sstevel@tonic-gate encap_send((ipha), (mp), (vifp), (dst)); \ 19987c478bd9Sstevel@tonic-gate else if ((vifp)->v_flags & VIFF_REGISTER) \ 19997c478bd9Sstevel@tonic-gate register_send((ipha), (mp), (vifp), (dst)); \ 20007c478bd9Sstevel@tonic-gate else \ 20017c478bd9Sstevel@tonic-gate phyint_send((ipha), (mp), (vifp), (dst)); \ 20027c478bd9Sstevel@tonic-gate } 20037c478bd9Sstevel@tonic-gate 20047c478bd9Sstevel@tonic-gate vifi = rt->mfc_parent; 20057c478bd9Sstevel@tonic-gate 20067c478bd9Sstevel@tonic-gate /* 20077c478bd9Sstevel@tonic-gate * The value of vifi is MAXVIFS if the pkt had no parent, i.e., 20087c478bd9Sstevel@tonic-gate * Mrouted had no route. 20097c478bd9Sstevel@tonic-gate * We wanted the route installed in the mfctable to prevent multiple 20107c478bd9Sstevel@tonic-gate * tries, so it passed add_mfc(), but is discarded here. The v_ipif is 20117c478bd9Sstevel@tonic-gate * NULL so we don't want to check the ill. Still needed as of Mrouted 20127c478bd9Sstevel@tonic-gate * 3.6. 20137c478bd9Sstevel@tonic-gate */ 20147c478bd9Sstevel@tonic-gate if (vifi == NO_VIF) { 20157c478bd9Sstevel@tonic-gate ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", 20167c478bd9Sstevel@tonic-gate ill->ill_name)); 2017f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2018fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 20197c478bd9Sstevel@tonic-gate "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); 20207c478bd9Sstevel@tonic-gate } 20217c478bd9Sstevel@tonic-gate return (-1); /* drop pkt */ 20227c478bd9Sstevel@tonic-gate } 20237c478bd9Sstevel@tonic-gate 2024f4b3ec61Sdh155122 if (!lock_good_vif(&ipst->ips_vifs[vifi])) 20257c478bd9Sstevel@tonic-gate return (-1); 20267c478bd9Sstevel@tonic-gate /* 20277c478bd9Sstevel@tonic-gate * The MFC entries are not cleaned up when an ipif goes 20287c478bd9Sstevel@tonic-gate * away thus this code has to guard against an MFC referencing 20297c478bd9Sstevel@tonic-gate * an ipif that has been closed. Note: reset_mrt_vif_ipif 20307c478bd9Sstevel@tonic-gate * sets the v_ipif to NULL when the ipif disappears. 20317c478bd9Sstevel@tonic-gate */ 2032f4b3ec61Sdh155122 ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL); 20337c478bd9Sstevel@tonic-gate 2034f4b3ec61Sdh155122 if (vifi >= ipst->ips_numvifs) { 20357c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs " 20367c478bd9Sstevel@tonic-gate "%d ill %s viftable ill %s\n", 2037f4b3ec61Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 2038f4b3ec61Sdh155122 ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); 2039f4b3ec61Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 20407c478bd9Sstevel@tonic-gate return (-1); 20417c478bd9Sstevel@tonic-gate } 20427c478bd9Sstevel@tonic-gate /* 20437c478bd9Sstevel@tonic-gate * Don't forward if it didn't arrive from the parent vif for its 2044e11c3f44Smeem * origin. 20457c478bd9Sstevel@tonic-gate */ 2046bd670b35SErik Nordmark if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill) || 2047f4b3ec61Sdh155122 (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) { 20487c478bd9Sstevel@tonic-gate /* Came in the wrong interface */ 20497c478bd9Sstevel@tonic-gate ip1dbg(("ip_mdq: arrived wrong if, vifi %d " 20507c478bd9Sstevel@tonic-gate "numvifs %d ill %s viftable ill %s\n", 2051f4b3ec61Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 2052bd670b35SErik Nordmark ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name)); 2053f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2054fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 20557c478bd9Sstevel@tonic-gate "ip_mdq: arrived wrong if, vifi %d ill " 20567c478bd9Sstevel@tonic-gate "%s viftable ill %s\n", 2057bd670b35SErik Nordmark (int)vifi, ill->ill_name, 2058bd670b35SErik Nordmark ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); 20597c478bd9Sstevel@tonic-gate } 2060f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_wrong_if++; 20617c478bd9Sstevel@tonic-gate rt->mfc_wrong_if++; 20627c478bd9Sstevel@tonic-gate 20637c478bd9Sstevel@tonic-gate /* 20647c478bd9Sstevel@tonic-gate * If we are doing PIM assert processing and we are forwarding 20657c478bd9Sstevel@tonic-gate * packets on this interface, and it is a broadcast medium 20667c478bd9Sstevel@tonic-gate * interface (and not a tunnel), send a message to the routing. 20677c478bd9Sstevel@tonic-gate * 20687c478bd9Sstevel@tonic-gate * We use the first ipif on the list, since it's all we have. 20697c478bd9Sstevel@tonic-gate * Chances are the ipif_flags are the same for ipifs on the ill. 20707c478bd9Sstevel@tonic-gate */ 2071f4b3ec61Sdh155122 if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 && 20727c478bd9Sstevel@tonic-gate (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) && 2073f4b3ec61Sdh155122 !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) { 20747c478bd9Sstevel@tonic-gate mblk_t *mp_copy; 20757c478bd9Sstevel@tonic-gate struct igmpmsg *im; 20767c478bd9Sstevel@tonic-gate 20777c478bd9Sstevel@tonic-gate /* TODO could copy header and dup rest */ 20787c478bd9Sstevel@tonic-gate mp_copy = copymsg(mp); 20797c478bd9Sstevel@tonic-gate if (mp_copy == NULL) { 2080f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 20817c478bd9Sstevel@tonic-gate ip1dbg(("ip_mdq: out of memory " 20827c478bd9Sstevel@tonic-gate "for mblk, mp_copy\n")); 2083f4b3ec61Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 20847c478bd9Sstevel@tonic-gate return (-1); 20857c478bd9Sstevel@tonic-gate } 20867c478bd9Sstevel@tonic-gate 20877c478bd9Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 20887c478bd9Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WRONGVIF; 20897c478bd9Sstevel@tonic-gate im->im_mbz = 0; 20907c478bd9Sstevel@tonic-gate im->im_vif = (ushort_t)vifi; 2091fc80c0dfSnordmark /* Pass to RAWIP */ 2092bd670b35SErik Nordmark 2093bd670b35SErik Nordmark bzero(&iras, sizeof (iras)); 2094bd670b35SErik Nordmark iras.ira_flags = IRAF_IS_IPV4; 2095bd670b35SErik Nordmark iras.ira_ip_hdr_length = 2096bd670b35SErik Nordmark IPH_HDR_LENGTH(mp_copy->b_rptr); 2097bd670b35SErik Nordmark iras.ira_pktlen = msgdsize(mp_copy); 2098bd670b35SErik Nordmark (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras); 2099bd670b35SErik Nordmark ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 21007c478bd9Sstevel@tonic-gate } 2101f4b3ec61Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21027c478bd9Sstevel@tonic-gate if (tunnel_src != 0) 21037c478bd9Sstevel@tonic-gate return (1); 21047c478bd9Sstevel@tonic-gate else 21057c478bd9Sstevel@tonic-gate return (0); 21067c478bd9Sstevel@tonic-gate } 21077c478bd9Sstevel@tonic-gate /* 21087c478bd9Sstevel@tonic-gate * If I sourced this packet, it counts as output, else it was input. 21097c478bd9Sstevel@tonic-gate */ 2110f4b3ec61Sdh155122 if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) { 2111f4b3ec61Sdh155122 ipst->ips_vifs[vifi].v_pkt_out++; 2112f4b3ec61Sdh155122 ipst->ips_vifs[vifi].v_bytes_out += plen; 21137c478bd9Sstevel@tonic-gate } else { 2114f4b3ec61Sdh155122 ipst->ips_vifs[vifi].v_pkt_in++; 2115f4b3ec61Sdh155122 ipst->ips_vifs[vifi].v_bytes_in += plen; 21167c478bd9Sstevel@tonic-gate } 21177c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 21187c478bd9Sstevel@tonic-gate rt->mfc_pkt_cnt++; 21197c478bd9Sstevel@tonic-gate rt->mfc_byte_cnt += plen; 21207c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 2121f4b3ec61Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21227c478bd9Sstevel@tonic-gate /* 21237c478bd9Sstevel@tonic-gate * For each vif, decide if a copy of the packet should be forwarded. 21247c478bd9Sstevel@tonic-gate * Forward if: 21257c478bd9Sstevel@tonic-gate * - the vif threshold ttl is non-zero AND 21267c478bd9Sstevel@tonic-gate * - the pkt ttl exceeds the vif's threshold 21277c478bd9Sstevel@tonic-gate * A non-zero mfc_ttl indicates that the vif is part of 21287c478bd9Sstevel@tonic-gate * the output set for the mfc entry. 21297c478bd9Sstevel@tonic-gate */ 2130f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 2131f4b3ec61Sdh155122 num_of_vifs = ipst->ips_numvifs; 2132f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 2133f4b3ec61Sdh155122 for (vifp = ipst->ips_vifs, vifi = 0; 2134f4b3ec61Sdh155122 vifi < num_of_vifs; 2135f4b3ec61Sdh155122 vifp++, vifi++) { 21367c478bd9Sstevel@tonic-gate if (!lock_good_vif(vifp)) 21377c478bd9Sstevel@tonic-gate continue; 21387c478bd9Sstevel@tonic-gate if ((rt->mfc_ttls[vifi] > 0) && 21397c478bd9Sstevel@tonic-gate (ipha->ipha_ttl > rt->mfc_ttls[vifi])) { 21407c478bd9Sstevel@tonic-gate /* 21417c478bd9Sstevel@tonic-gate * lock_good_vif should not have succedded if 21427c478bd9Sstevel@tonic-gate * v_ipif is null. 21437c478bd9Sstevel@tonic-gate */ 21447c478bd9Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 21457c478bd9Sstevel@tonic-gate vifp->v_pkt_out++; 21467c478bd9Sstevel@tonic-gate vifp->v_bytes_out += plen; 21477c478bd9Sstevel@tonic-gate MC_SEND(ipha, mp, vifp, dst); 2148f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_out++; 21497c478bd9Sstevel@tonic-gate } 21507c478bd9Sstevel@tonic-gate unlock_good_vif(vifp); 21517c478bd9Sstevel@tonic-gate } 21527c478bd9Sstevel@tonic-gate if (tunnel_src != 0) 21537c478bd9Sstevel@tonic-gate return (1); 21547c478bd9Sstevel@tonic-gate else 21557c478bd9Sstevel@tonic-gate return (0); 21567c478bd9Sstevel@tonic-gate } 21577c478bd9Sstevel@tonic-gate 21587c478bd9Sstevel@tonic-gate /* 21597c478bd9Sstevel@tonic-gate * Send the packet on physical interface. 21607c478bd9Sstevel@tonic-gate * Caller assumes can continue to use mp on return. 21617c478bd9Sstevel@tonic-gate */ 21627c478bd9Sstevel@tonic-gate /* ARGSUSED */ 21637c478bd9Sstevel@tonic-gate static void 21647c478bd9Sstevel@tonic-gate phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 21657c478bd9Sstevel@tonic-gate { 21667c478bd9Sstevel@tonic-gate mblk_t *mp_copy; 2167f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2168fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 21697c478bd9Sstevel@tonic-gate 21707c478bd9Sstevel@tonic-gate /* Make a new reference to the packet */ 21717c478bd9Sstevel@tonic-gate mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ 21727c478bd9Sstevel@tonic-gate if (mp_copy == NULL) { 2173f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 21747c478bd9Sstevel@tonic-gate ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n")); 21757c478bd9Sstevel@tonic-gate return; 21767c478bd9Sstevel@tonic-gate } 21777c478bd9Sstevel@tonic-gate if (vifp->v_rate_limit <= 0) 21787c478bd9Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy); 21797c478bd9Sstevel@tonic-gate else { 2180f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2181fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 21827c478bd9Sstevel@tonic-gate "phyint_send: tbf_contr rate %d " 21837c478bd9Sstevel@tonic-gate "vifp 0x%p mp 0x%p dst 0x%x", 21847c478bd9Sstevel@tonic-gate vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); 21857c478bd9Sstevel@tonic-gate } 21867c478bd9Sstevel@tonic-gate tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr); 21877c478bd9Sstevel@tonic-gate } 21887c478bd9Sstevel@tonic-gate } 21897c478bd9Sstevel@tonic-gate 21907c478bd9Sstevel@tonic-gate /* 21917c478bd9Sstevel@tonic-gate * Send the whole packet for REGISTER encapsulation to PIM daemon 21927c478bd9Sstevel@tonic-gate * Caller assumes it can continue to use mp on return. 21937c478bd9Sstevel@tonic-gate */ 21947c478bd9Sstevel@tonic-gate /* ARGSUSED */ 21957c478bd9Sstevel@tonic-gate static void 21967c478bd9Sstevel@tonic-gate register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 21977c478bd9Sstevel@tonic-gate { 21987c478bd9Sstevel@tonic-gate struct igmpmsg *im; 21997c478bd9Sstevel@tonic-gate mblk_t *mp_copy; 22007c478bd9Sstevel@tonic-gate ipha_t *ipha_copy; 2201bd670b35SErik Nordmark ill_t *ill = vifp->v_ipif->ipif_ill; 2202bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 2203fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2204bd670b35SErik Nordmark ip_recv_attr_t iras; 22057c478bd9Sstevel@tonic-gate 2206f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2207fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22087c478bd9Sstevel@tonic-gate "register_send: src %x, dst %x\n", 22097c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 22107c478bd9Sstevel@tonic-gate } 22117c478bd9Sstevel@tonic-gate 22127c478bd9Sstevel@tonic-gate /* 22137c478bd9Sstevel@tonic-gate * Copy the old packet & pullup its IP header into the new mblk_t so we 22147c478bd9Sstevel@tonic-gate * can modify it. Try to fill the new mblk_t since if we don't the 22157c478bd9Sstevel@tonic-gate * ethernet driver will. 22167c478bd9Sstevel@tonic-gate */ 22177c478bd9Sstevel@tonic-gate mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED); 22187c478bd9Sstevel@tonic-gate if (mp_copy == NULL) { 2219f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2220f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 2221fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22227c478bd9Sstevel@tonic-gate "register_send: allocb failure."); 22237c478bd9Sstevel@tonic-gate } 22247c478bd9Sstevel@tonic-gate return; 22257c478bd9Sstevel@tonic-gate } 22267c478bd9Sstevel@tonic-gate 22277c478bd9Sstevel@tonic-gate /* 22287c478bd9Sstevel@tonic-gate * Bump write pointer to account for igmpmsg being added. 22297c478bd9Sstevel@tonic-gate */ 22307c478bd9Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg); 22317c478bd9Sstevel@tonic-gate 22327c478bd9Sstevel@tonic-gate /* 22337c478bd9Sstevel@tonic-gate * Chain packet to new mblk_t. 22347c478bd9Sstevel@tonic-gate */ 22357c478bd9Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 2236f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2237f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 2238fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22397c478bd9Sstevel@tonic-gate "register_send: copymsg failure."); 22407c478bd9Sstevel@tonic-gate } 22417c478bd9Sstevel@tonic-gate freeb(mp_copy); 22427c478bd9Sstevel@tonic-gate return; 22437c478bd9Sstevel@tonic-gate } 22447c478bd9Sstevel@tonic-gate 22457c478bd9Sstevel@tonic-gate /* 2246fc80c0dfSnordmark * icmp_input() asserts that IP version field is set to an 22477c478bd9Sstevel@tonic-gate * appropriate version. Hence, the struct igmpmsg that this really 22487c478bd9Sstevel@tonic-gate * becomes, needs to have the correct IP version field. 22497c478bd9Sstevel@tonic-gate */ 22507c478bd9Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr; 22517c478bd9Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr; 22527c478bd9Sstevel@tonic-gate 22537c478bd9Sstevel@tonic-gate /* 22547c478bd9Sstevel@tonic-gate * The kernel uses the struct igmpmsg header to encode the messages to 22557c478bd9Sstevel@tonic-gate * the multicast routing daemon. Fill in the fields in the header 22567c478bd9Sstevel@tonic-gate * starting with the message type which is IGMPMSG_WHOLEPKT 22577c478bd9Sstevel@tonic-gate */ 22587c478bd9Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 22597c478bd9Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WHOLEPKT; 22607c478bd9Sstevel@tonic-gate im->im_src.s_addr = ipha->ipha_src; 22617c478bd9Sstevel@tonic-gate im->im_dst.s_addr = ipha->ipha_dst; 22627c478bd9Sstevel@tonic-gate 22637c478bd9Sstevel@tonic-gate /* 22647c478bd9Sstevel@tonic-gate * Must Be Zero. This is because the struct igmpmsg is really an IP 22657c478bd9Sstevel@tonic-gate * header with renamed fields and the multicast routing daemon uses 22667c478bd9Sstevel@tonic-gate * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages. 22677c478bd9Sstevel@tonic-gate */ 22687c478bd9Sstevel@tonic-gate im->im_mbz = 0; 22697c478bd9Sstevel@tonic-gate 2270f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_upcalls; 2271bd670b35SErik Nordmark if (IPCL_IS_NONSTR(mrouter) ? mrouter->conn_flow_cntrld : 2272bd670b35SErik Nordmark !canputnext(mrouter->conn_rq)) { 2273f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regsend_drops; 2274f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 2275fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22767c478bd9Sstevel@tonic-gate "register_send: register upcall failure."); 22777c478bd9Sstevel@tonic-gate } 2278bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2279bd670b35SErik Nordmark ip_drop_input("mrts_pim_regsend_drops", mp_copy, ill); 22807c478bd9Sstevel@tonic-gate freemsg(mp_copy); 22817c478bd9Sstevel@tonic-gate } else { 2282fc80c0dfSnordmark /* Pass to RAWIP */ 2283bd670b35SErik Nordmark bzero(&iras, sizeof (iras)); 2284bd670b35SErik Nordmark iras.ira_flags = IRAF_IS_IPV4; 2285bd670b35SErik Nordmark iras.ira_ip_hdr_length = sizeof (ipha_t); 2286bd670b35SErik Nordmark iras.ira_pktlen = msgdsize(mp_copy); 2287bd670b35SErik Nordmark (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras); 2288bd670b35SErik Nordmark ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 22897c478bd9Sstevel@tonic-gate } 22907c478bd9Sstevel@tonic-gate } 22917c478bd9Sstevel@tonic-gate 22927c478bd9Sstevel@tonic-gate /* 22937c478bd9Sstevel@tonic-gate * pim_validate_cksum handles verification of the checksum in the 22947c478bd9Sstevel@tonic-gate * pim header. For PIM Register packets, the checksum is calculated 22957c478bd9Sstevel@tonic-gate * across the PIM header only. For all other packets, the checksum 22967c478bd9Sstevel@tonic-gate * is for the PIM header and remainder of the packet. 22977c478bd9Sstevel@tonic-gate * 22987c478bd9Sstevel@tonic-gate * returns: B_TRUE, if checksum is okay. 22997c478bd9Sstevel@tonic-gate * B_FALSE, if checksum is not valid. 23007c478bd9Sstevel@tonic-gate */ 23017c478bd9Sstevel@tonic-gate static boolean_t 23027c478bd9Sstevel@tonic-gate pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp) 23037c478bd9Sstevel@tonic-gate { 23047c478bd9Sstevel@tonic-gate mblk_t *mp_dup; 23057c478bd9Sstevel@tonic-gate 23067c478bd9Sstevel@tonic-gate if ((mp_dup = dupmsg(mp)) == NULL) 23077c478bd9Sstevel@tonic-gate return (B_FALSE); 23087c478bd9Sstevel@tonic-gate 23097c478bd9Sstevel@tonic-gate mp_dup->b_rptr += IPH_HDR_LENGTH(ip); 23107c478bd9Sstevel@tonic-gate if (pimp->pim_type == PIM_REGISTER) 23117c478bd9Sstevel@tonic-gate mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN; 23127c478bd9Sstevel@tonic-gate if (IP_CSUM(mp_dup, 0, 0)) { 23137c478bd9Sstevel@tonic-gate freemsg(mp_dup); 23147c478bd9Sstevel@tonic-gate return (B_FALSE); 23157c478bd9Sstevel@tonic-gate } 23167c478bd9Sstevel@tonic-gate freemsg(mp_dup); 23177c478bd9Sstevel@tonic-gate return (B_TRUE); 23187c478bd9Sstevel@tonic-gate } 23197c478bd9Sstevel@tonic-gate 23207c478bd9Sstevel@tonic-gate /* 2321bd670b35SErik Nordmark * Process PIM protocol packets i.e. IP Protocol 103. 2322bd670b35SErik Nordmark * Register messages are decapsulated and sent onto multicast forwarding. 2323bd670b35SErik Nordmark * 2324bd670b35SErik Nordmark * Return NULL for a bad packet that is discarded here. 2325bd670b35SErik Nordmark * Return mp if the message is OK and should be handed to "raw" receivers. 2326bd670b35SErik Nordmark * Callers of pim_input() may need to reinitialize variables that were copied 2327bd670b35SErik Nordmark * from the mblk as this calls pullupmsg(). 23287c478bd9Sstevel@tonic-gate */ 2329bd670b35SErik Nordmark mblk_t * 2330bd670b35SErik Nordmark pim_input(mblk_t *mp, ip_recv_attr_t *ira) 23317c478bd9Sstevel@tonic-gate { 23327c478bd9Sstevel@tonic-gate ipha_t *eip, *ip; 23337c478bd9Sstevel@tonic-gate int iplen, pimlen, iphlen; 23347c478bd9Sstevel@tonic-gate struct pim *pimp; /* pointer to a pim struct */ 23357c478bd9Sstevel@tonic-gate uint32_t *reghdr; 2336bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 2337f4b3ec61Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 2338fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 23397c478bd9Sstevel@tonic-gate 23407c478bd9Sstevel@tonic-gate /* 23417c478bd9Sstevel@tonic-gate * Pullup the msg for PIM protocol processing. 23427c478bd9Sstevel@tonic-gate */ 23437c478bd9Sstevel@tonic-gate if (pullupmsg(mp, -1) == 0) { 2344f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2345bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2346bd670b35SErik Nordmark ip_drop_input("mrts_pim_nomemory", mp, ill); 23477c478bd9Sstevel@tonic-gate freemsg(mp); 2348bd670b35SErik Nordmark return (NULL); 23497c478bd9Sstevel@tonic-gate } 23507c478bd9Sstevel@tonic-gate 23517c478bd9Sstevel@tonic-gate ip = (ipha_t *)mp->b_rptr; 23527c478bd9Sstevel@tonic-gate iplen = ip->ipha_length; 23537c478bd9Sstevel@tonic-gate iphlen = IPH_HDR_LENGTH(ip); 23547c478bd9Sstevel@tonic-gate pimlen = ntohs(iplen) - iphlen; 23557c478bd9Sstevel@tonic-gate 23567c478bd9Sstevel@tonic-gate /* 23577c478bd9Sstevel@tonic-gate * Validate lengths 23587c478bd9Sstevel@tonic-gate */ 23597c478bd9Sstevel@tonic-gate if (pimlen < PIM_MINLEN) { 2360f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_malformed; 2361f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2362fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 23637c478bd9Sstevel@tonic-gate "pim_input: length not at least minlen"); 23647c478bd9Sstevel@tonic-gate } 2365bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2366bd670b35SErik Nordmark ip_drop_input("mrts_pim_malformed", mp, ill); 23677c478bd9Sstevel@tonic-gate freemsg(mp); 2368bd670b35SErik Nordmark return (NULL); 23697c478bd9Sstevel@tonic-gate } 23707c478bd9Sstevel@tonic-gate 23717c478bd9Sstevel@tonic-gate /* 23727c478bd9Sstevel@tonic-gate * Point to the PIM header. 23737c478bd9Sstevel@tonic-gate */ 23747c478bd9Sstevel@tonic-gate pimp = (struct pim *)((caddr_t)ip + iphlen); 23757c478bd9Sstevel@tonic-gate 23767c478bd9Sstevel@tonic-gate /* 23777c478bd9Sstevel@tonic-gate * Check the version number. 23787c478bd9Sstevel@tonic-gate */ 23797c478bd9Sstevel@tonic-gate if (pimp->pim_vers != PIM_VERSION) { 2380f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badversion; 2381f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2382fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 23837c478bd9Sstevel@tonic-gate "pim_input: unknown version of PIM"); 23847c478bd9Sstevel@tonic-gate } 2385bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2386bd670b35SErik Nordmark ip_drop_input("mrts_pim_badversion", mp, ill); 23877c478bd9Sstevel@tonic-gate freemsg(mp); 2388bd670b35SErik Nordmark return (NULL); 23897c478bd9Sstevel@tonic-gate } 23907c478bd9Sstevel@tonic-gate 23917c478bd9Sstevel@tonic-gate /* 23927c478bd9Sstevel@tonic-gate * Validate the checksum 23937c478bd9Sstevel@tonic-gate */ 23947c478bd9Sstevel@tonic-gate if (!pim_validate_cksum(mp, ip, pimp)) { 2395f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_rcv_badcsum; 2396f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2397fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 23987c478bd9Sstevel@tonic-gate "pim_input: invalid checksum"); 23997c478bd9Sstevel@tonic-gate } 2400bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2401bd670b35SErik Nordmark ip_drop_input("pim_rcv_badcsum", mp, ill); 24027c478bd9Sstevel@tonic-gate freemsg(mp); 2403bd670b35SErik Nordmark return (NULL); 24047c478bd9Sstevel@tonic-gate } 24057c478bd9Sstevel@tonic-gate 24067c478bd9Sstevel@tonic-gate if (pimp->pim_type != PIM_REGISTER) 2407bd670b35SErik Nordmark return (mp); 24087c478bd9Sstevel@tonic-gate 24097c478bd9Sstevel@tonic-gate reghdr = (uint32_t *)(pimp + 1); 24107c478bd9Sstevel@tonic-gate eip = (ipha_t *)(reghdr + 1); 24117c478bd9Sstevel@tonic-gate 24127c478bd9Sstevel@tonic-gate /* 24137c478bd9Sstevel@tonic-gate * check if the inner packet is destined to mcast group 24147c478bd9Sstevel@tonic-gate */ 24157c478bd9Sstevel@tonic-gate if (!CLASSD(eip->ipha_dst)) { 2416f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badregisters; 2417f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2418fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24197c478bd9Sstevel@tonic-gate "pim_input: Inner pkt not mcast .. !"); 24207c478bd9Sstevel@tonic-gate } 2421bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2422bd670b35SErik Nordmark ip_drop_input("mrts_pim_badregisters", mp, ill); 24237c478bd9Sstevel@tonic-gate freemsg(mp); 2424bd670b35SErik Nordmark return (NULL); 24257c478bd9Sstevel@tonic-gate } 2426f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2427fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24287c478bd9Sstevel@tonic-gate "register from %x, to %x, len %d", 24297c478bd9Sstevel@tonic-gate ntohl(eip->ipha_src), 24307c478bd9Sstevel@tonic-gate ntohl(eip->ipha_dst), 24317c478bd9Sstevel@tonic-gate ntohs(eip->ipha_length)); 24327c478bd9Sstevel@tonic-gate } 24337c478bd9Sstevel@tonic-gate /* 24347c478bd9Sstevel@tonic-gate * If the null register bit is not set, decapsulate 24357c478bd9Sstevel@tonic-gate * the packet before forwarding it. 2436bd670b35SErik Nordmark * Avoid this in no register vif 24377c478bd9Sstevel@tonic-gate */ 2438bd670b35SErik Nordmark if (!(ntohl(*reghdr) & PIM_NULL_REGISTER) && 2439bd670b35SErik Nordmark ipst->ips_reg_vif_num != ALL_VIFS) { 24407c478bd9Sstevel@tonic-gate mblk_t *mp_copy; 2441bd670b35SErik Nordmark uint_t saved_pktlen; 24427c478bd9Sstevel@tonic-gate 24437c478bd9Sstevel@tonic-gate /* Copy the message */ 24447c478bd9Sstevel@tonic-gate if ((mp_copy = copymsg(mp)) == NULL) { 2445f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2446bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2447bd670b35SErik Nordmark ip_drop_input("mrts_pim_nomemory", mp, ill); 24487c478bd9Sstevel@tonic-gate freemsg(mp); 2449bd670b35SErik Nordmark return (NULL); 24507c478bd9Sstevel@tonic-gate } 24517c478bd9Sstevel@tonic-gate 24527c478bd9Sstevel@tonic-gate /* 24537c478bd9Sstevel@tonic-gate * Decapsulate the packet and give it to 24547c478bd9Sstevel@tonic-gate * register_mforward. 24557c478bd9Sstevel@tonic-gate */ 2456bd670b35SErik Nordmark mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr); 2457bd670b35SErik Nordmark saved_pktlen = ira->ira_pktlen; 2458bd670b35SErik Nordmark ira->ira_pktlen -= iphlen + sizeof (pim_t) + sizeof (*reghdr); 2459bd670b35SErik Nordmark if (register_mforward(mp_copy, ira) != 0) { 2460bd670b35SErik Nordmark /* register_mforward already called ip_drop_input */ 24617c478bd9Sstevel@tonic-gate freemsg(mp); 2462bd670b35SErik Nordmark ira->ira_pktlen = saved_pktlen; 2463bd670b35SErik Nordmark return (NULL); 24647c478bd9Sstevel@tonic-gate } 2465bd670b35SErik Nordmark ira->ira_pktlen = saved_pktlen; 24667c478bd9Sstevel@tonic-gate } 24677c478bd9Sstevel@tonic-gate 24687c478bd9Sstevel@tonic-gate /* 24697c478bd9Sstevel@tonic-gate * Pass all valid PIM packets up to any process(es) listening on a raw 24707c478bd9Sstevel@tonic-gate * PIM socket. For Solaris it is done right after pim_input() is 24717c478bd9Sstevel@tonic-gate * called. 24727c478bd9Sstevel@tonic-gate */ 2473bd670b35SErik Nordmark return (mp); 24747c478bd9Sstevel@tonic-gate } 24757c478bd9Sstevel@tonic-gate 24767c478bd9Sstevel@tonic-gate /* 24777c478bd9Sstevel@tonic-gate * PIM sparse mode hook. Called by pim_input after decapsulating 24787c478bd9Sstevel@tonic-gate * the packet. Loop back the packet, as if we have received it. 24797c478bd9Sstevel@tonic-gate * In pim_input() we have to check if the destination is a multicast address. 24807c478bd9Sstevel@tonic-gate */ 24817c478bd9Sstevel@tonic-gate static int 2482bd670b35SErik Nordmark register_mforward(mblk_t *mp, ip_recv_attr_t *ira) 24837c478bd9Sstevel@tonic-gate { 2484bd670b35SErik Nordmark ire_t *ire; 2485bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr; 2486bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 2487f4b3ec61Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 2488fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 24897c478bd9Sstevel@tonic-gate 2490f4b3ec61Sdh155122 ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs); 2491f4b3ec61Sdh155122 2492f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 2493fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24947c478bd9Sstevel@tonic-gate "register_mforward: src %x, dst %x\n", 24957c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 24967c478bd9Sstevel@tonic-gate } 24977c478bd9Sstevel@tonic-gate /* 24987c478bd9Sstevel@tonic-gate * Need to pass in to ip_mforward() the information that the 2499bd670b35SErik Nordmark * packet has arrived on the register_vif. We mark it with 2500bd670b35SErik Nordmark * the IRAF_PIM_REGISTER attribute. 2501bd670b35SErik Nordmark * pim_input verified that the (inner) destination is multicast, 2502bd670b35SErik Nordmark * hence we skip the generic code in ip_input. 25037c478bd9Sstevel@tonic-gate */ 2504bd670b35SErik Nordmark ira->ira_flags |= IRAF_PIM_REGISTER; 2505f4b3ec61Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regforwards; 2506bd670b35SErik Nordmark 2507bd670b35SErik Nordmark if (!CLASSD(ipha->ipha_dst)) { 2508bd670b35SErik Nordmark ire = ire_route_recursive_v4(ipha->ipha_dst, 0, NULL, ALL_ZONES, 25099e3469d3SErik Nordmark ira->ira_tsl, MATCH_IRE_SECATTR, IRR_ALLOCATE, 0, ipst, 25109e3469d3SErik Nordmark NULL, NULL, NULL); 2511bd670b35SErik Nordmark } else { 2512bd670b35SErik Nordmark ire = ire_multicast(ill); 2513bd670b35SErik Nordmark } 2514bd670b35SErik Nordmark ASSERT(ire != NULL); 2515bd670b35SErik Nordmark /* Normally this will return the IRE_MULTICAST */ 2516bd670b35SErik Nordmark if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2517bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2518bd670b35SErik Nordmark ip_drop_input("mrts_pim RTF_REJECT", mp, ill); 2519bd670b35SErik Nordmark freemsg(mp); 2520bd670b35SErik Nordmark ire_refrele(ire); 2521bd670b35SErik Nordmark return (-1); 2522bd670b35SErik Nordmark } 2523bd670b35SErik Nordmark ASSERT(ire->ire_type & IRE_MULTICAST); 2524bd670b35SErik Nordmark (*ire->ire_recvfn)(ire, mp, ipha, ira); 2525bd670b35SErik Nordmark ire_refrele(ire); 2526bd670b35SErik Nordmark 25277c478bd9Sstevel@tonic-gate return (0); 25287c478bd9Sstevel@tonic-gate } 25297c478bd9Sstevel@tonic-gate 25307c478bd9Sstevel@tonic-gate /* 25317c478bd9Sstevel@tonic-gate * Send an encapsulated packet. 25327c478bd9Sstevel@tonic-gate * Caller assumes can continue to use mp when routine returns. 25337c478bd9Sstevel@tonic-gate */ 25347c478bd9Sstevel@tonic-gate /* ARGSUSED */ 25357c478bd9Sstevel@tonic-gate static void 25367c478bd9Sstevel@tonic-gate encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 25377c478bd9Sstevel@tonic-gate { 25387c478bd9Sstevel@tonic-gate mblk_t *mp_copy; 25397c478bd9Sstevel@tonic-gate ipha_t *ipha_copy; 25407c478bd9Sstevel@tonic-gate size_t len; 2541f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2542fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 25437c478bd9Sstevel@tonic-gate 2544f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2545fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2546f4b3ec61Sdh155122 "encap_send: vif %ld enter", 2547f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 25487c478bd9Sstevel@tonic-gate } 25497c478bd9Sstevel@tonic-gate len = ntohs(ipha->ipha_length); 25507c478bd9Sstevel@tonic-gate 25517c478bd9Sstevel@tonic-gate /* 25527c478bd9Sstevel@tonic-gate * Copy the old packet & pullup it's IP header into the 25537c478bd9Sstevel@tonic-gate * new mbuf so we can modify it. Try to fill the new 25547c478bd9Sstevel@tonic-gate * mbuf since if we don't the ethernet driver will. 25557c478bd9Sstevel@tonic-gate */ 25567c478bd9Sstevel@tonic-gate mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED); 25577c478bd9Sstevel@tonic-gate if (mp_copy == NULL) 25587c478bd9Sstevel@tonic-gate return; 25597c478bd9Sstevel@tonic-gate mp_copy->b_rptr += 32; 25607c478bd9Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr); 25617c478bd9Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 25627c478bd9Sstevel@tonic-gate freeb(mp_copy); 25637c478bd9Sstevel@tonic-gate return; 25647c478bd9Sstevel@tonic-gate } 25657c478bd9Sstevel@tonic-gate 25667c478bd9Sstevel@tonic-gate /* 25677c478bd9Sstevel@tonic-gate * Fill in the encapsulating IP header. 25687c478bd9Sstevel@tonic-gate * Remote tunnel dst in rmt_addr, from add_vif(). 25697c478bd9Sstevel@tonic-gate */ 25707c478bd9Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr; 25717c478bd9Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr; 25727c478bd9Sstevel@tonic-gate ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET); 25737c478bd9Sstevel@tonic-gate ipha_copy->ipha_length = htons(len + sizeof (ipha_t)); 25747c478bd9Sstevel@tonic-gate ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr; 25757c478bd9Sstevel@tonic-gate ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr; 25767c478bd9Sstevel@tonic-gate ASSERT(ipha_copy->ipha_ident == 0); 25777c478bd9Sstevel@tonic-gate 25787c478bd9Sstevel@tonic-gate /* Turn the encapsulated IP header back into a valid one. */ 25797c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp_copy->b_cont->b_rptr; 25807c478bd9Sstevel@tonic-gate ipha->ipha_ttl--; 25817c478bd9Sstevel@tonic-gate ipha->ipha_hdr_checksum = 0; 25827c478bd9Sstevel@tonic-gate ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 25837c478bd9Sstevel@tonic-gate 2584bd670b35SErik Nordmark ipha_copy->ipha_ttl = ipha->ipha_ttl; 2585bd670b35SErik Nordmark 2586f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2587fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 25887c478bd9Sstevel@tonic-gate "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); 25897c478bd9Sstevel@tonic-gate } 25907c478bd9Sstevel@tonic-gate if (vifp->v_rate_limit <= 0) 25917c478bd9Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy); 25927c478bd9Sstevel@tonic-gate else 25937c478bd9Sstevel@tonic-gate /* ipha is from the original header */ 25947c478bd9Sstevel@tonic-gate tbf_control(vifp, mp_copy, ipha); 25957c478bd9Sstevel@tonic-gate } 25967c478bd9Sstevel@tonic-gate 25977c478bd9Sstevel@tonic-gate /* 2598bd670b35SErik Nordmark * De-encapsulate a packet and feed it back through IP input if it 2599bd670b35SErik Nordmark * matches one of our multicast tunnels. 2600bd670b35SErik Nordmark * 26017c478bd9Sstevel@tonic-gate * This routine is called whenever IP gets a packet with prototype 2602bd670b35SErik Nordmark * IPPROTO_ENCAP and a local destination address and the packet didn't 2603bd670b35SErik Nordmark * match one of our configured IP-in-IP tunnels. 26047c478bd9Sstevel@tonic-gate */ 26057c478bd9Sstevel@tonic-gate void 2606bd670b35SErik Nordmark ip_mroute_decap(mblk_t *mp, ip_recv_attr_t *ira) 26077c478bd9Sstevel@tonic-gate { 26087c478bd9Sstevel@tonic-gate ipha_t *ipha = (ipha_t *)mp->b_rptr; 26097c478bd9Sstevel@tonic-gate ipha_t *ipha_encap; 26107c478bd9Sstevel@tonic-gate int hlen = IPH_HDR_LENGTH(ipha); 2611bd670b35SErik Nordmark int hlen_encap; 26127c478bd9Sstevel@tonic-gate ipaddr_t src; 26137c478bd9Sstevel@tonic-gate struct vif *vifp; 2614bd670b35SErik Nordmark ire_t *ire; 2615bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 2616f4b3ec61Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 2617fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 26187c478bd9Sstevel@tonic-gate 2619bd670b35SErik Nordmark /* Make sure we have all of the inner header */ 2620bd670b35SErik Nordmark ipha_encap = (ipha_t *)((char *)ipha + hlen); 2621bd670b35SErik Nordmark if (mp->b_wptr - mp->b_rptr < hlen + IP_SIMPLE_HDR_LENGTH) { 2622bd670b35SErik Nordmark ipha = ip_pullup(mp, hlen + IP_SIMPLE_HDR_LENGTH, ira); 2623bd670b35SErik Nordmark if (ipha == NULL) { 2624bd670b35SErik Nordmark ipst->ips_mrtstat->mrts_bad_tunnel++; 2625bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2626bd670b35SErik Nordmark ip_drop_input("ip_mroute_decap: too short", mp, ill); 2627bd670b35SErik Nordmark freemsg(mp); 2628bd670b35SErik Nordmark return; 2629bd670b35SErik Nordmark } 2630bd670b35SErik Nordmark ipha_encap = (ipha_t *)((char *)ipha + hlen); 2631bd670b35SErik Nordmark } 2632bd670b35SErik Nordmark hlen_encap = IPH_HDR_LENGTH(ipha_encap); 2633bd670b35SErik Nordmark if (mp->b_wptr - mp->b_rptr < hlen + hlen_encap) { 2634bd670b35SErik Nordmark ipha = ip_pullup(mp, hlen + hlen_encap, ira); 2635bd670b35SErik Nordmark if (ipha == NULL) { 2636bd670b35SErik Nordmark ipst->ips_mrtstat->mrts_bad_tunnel++; 2637bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2638bd670b35SErik Nordmark ip_drop_input("ip_mroute_decap: too short", mp, ill); 2639bd670b35SErik Nordmark freemsg(mp); 2640bd670b35SErik Nordmark return; 2641bd670b35SErik Nordmark } 2642bd670b35SErik Nordmark ipha_encap = (ipha_t *)((char *)ipha + hlen); 2643bd670b35SErik Nordmark } 2644bd670b35SErik Nordmark 26457c478bd9Sstevel@tonic-gate /* 26467c478bd9Sstevel@tonic-gate * Dump the packet if it's not to a multicast destination or if 26477c478bd9Sstevel@tonic-gate * we don't have an encapsulating tunnel with the source. 26487c478bd9Sstevel@tonic-gate * Note: This code assumes that the remote site IP address 26497c478bd9Sstevel@tonic-gate * uniquely identifies the tunnel (i.e., that this site has 26507c478bd9Sstevel@tonic-gate * at most one tunnel with the remote site). 26517c478bd9Sstevel@tonic-gate */ 26527c478bd9Sstevel@tonic-gate if (!CLASSD(ipha_encap->ipha_dst)) { 2653f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 26547c478bd9Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: bad tunnel\n")); 2655bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2656bd670b35SErik Nordmark ip_drop_input("mrts_bad_tunnel", mp, ill); 26577c478bd9Sstevel@tonic-gate freemsg(mp); 26587c478bd9Sstevel@tonic-gate return; 26597c478bd9Sstevel@tonic-gate } 26607c478bd9Sstevel@tonic-gate src = (ipaddr_t)ipha->ipha_src; 2661f4b3ec61Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 2662f4b3ec61Sdh155122 if (src != ipst->ips_last_encap_src) { 26637c478bd9Sstevel@tonic-gate struct vif *vife; 26647c478bd9Sstevel@tonic-gate 2665f4b3ec61Sdh155122 vifp = ipst->ips_vifs; 2666f4b3ec61Sdh155122 vife = vifp + ipst->ips_numvifs; 2667f4b3ec61Sdh155122 ipst->ips_last_encap_src = src; 2668f4b3ec61Sdh155122 ipst->ips_last_encap_vif = 0; 26697c478bd9Sstevel@tonic-gate for (; vifp < vife; ++vifp) { 26707c478bd9Sstevel@tonic-gate if (!lock_good_vif(vifp)) 26717c478bd9Sstevel@tonic-gate continue; 26727c478bd9Sstevel@tonic-gate if (vifp->v_rmt_addr.s_addr == src) { 26737c478bd9Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL) 2674f4b3ec61Sdh155122 ipst->ips_last_encap_vif = vifp; 2675f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2676fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 26777c478bd9Sstevel@tonic-gate 1, SL_TRACE, 26787c478bd9Sstevel@tonic-gate "ip_mroute_decap: good tun " 26797c478bd9Sstevel@tonic-gate "vif %ld with %x", 2680f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 26817c478bd9Sstevel@tonic-gate ntohl(src)); 26827c478bd9Sstevel@tonic-gate } 26837c478bd9Sstevel@tonic-gate unlock_good_vif(vifp); 26847c478bd9Sstevel@tonic-gate break; 26857c478bd9Sstevel@tonic-gate } 26867c478bd9Sstevel@tonic-gate unlock_good_vif(vifp); 26877c478bd9Sstevel@tonic-gate } 26887c478bd9Sstevel@tonic-gate } 2689f4b3ec61Sdh155122 if ((vifp = ipst->ips_last_encap_vif) == 0) { 2690f4b3ec61Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 2691f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 2692bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2693bd670b35SErik Nordmark ip_drop_input("mrts_bad_tunnel", mp, ill); 26947c478bd9Sstevel@tonic-gate freemsg(mp); 26957c478bd9Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n", 2696f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src))); 26977c478bd9Sstevel@tonic-gate return; 26987c478bd9Sstevel@tonic-gate } 2699f4b3ec61Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 27007c478bd9Sstevel@tonic-gate 27017c478bd9Sstevel@tonic-gate /* 27027c478bd9Sstevel@tonic-gate * Need to pass in the tunnel source to ip_mforward (so that it can 2703bd670b35SErik Nordmark * verify that the packet arrived over the correct vif.) 27047c478bd9Sstevel@tonic-gate */ 2705bd670b35SErik Nordmark ira->ira_flags |= IRAF_MROUTE_TUNNEL_SET; 2706bd670b35SErik Nordmark ira->ira_mroute_tunnel = src; 27077c478bd9Sstevel@tonic-gate mp->b_rptr += hlen; 2708bd670b35SErik Nordmark ira->ira_pktlen -= hlen; 2709bd670b35SErik Nordmark ira->ira_ip_hdr_length = hlen_encap; 2710bd670b35SErik Nordmark 2711bd670b35SErik Nordmark /* 2712bd670b35SErik Nordmark * We don't redo any of the filtering in ill_input_full_v4 and we 2713bd670b35SErik Nordmark * have checked that all of ipha_encap and any IP options are 2714bd670b35SErik Nordmark * pulled up. Hence we call ire_recv_multicast_v4 directly. 2715bd670b35SErik Nordmark * However, we have to check for RSVP as in ip_input_full_v4 2716bd670b35SErik Nordmark * and if so we pass it to ire_recv_broadcast_v4 for local delivery 2717bd670b35SErik Nordmark * to the rsvpd. 2718bd670b35SErik Nordmark */ 2719bd670b35SErik Nordmark if (ipha_encap->ipha_protocol == IPPROTO_RSVP && 2720bd670b35SErik Nordmark ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 2721bd670b35SErik Nordmark ire = ire_route_recursive_v4(INADDR_BROADCAST, 0, ill, 2722bd670b35SErik Nordmark ALL_ZONES, ira->ira_tsl, MATCH_IRE_ILL|MATCH_IRE_SECATTR, 27239e3469d3SErik Nordmark IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL); 2724bd670b35SErik Nordmark } else { 2725bd670b35SErik Nordmark ire = ire_multicast(ill); 2726bd670b35SErik Nordmark } 2727bd670b35SErik Nordmark ASSERT(ire != NULL); 2728bd670b35SErik Nordmark /* Normally this will return the IRE_MULTICAST or IRE_BROADCAST */ 2729bd670b35SErik Nordmark if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2730bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2731bd670b35SErik Nordmark ip_drop_input("ip_mroute_decap: RTF_REJECT", mp, ill); 2732bd670b35SErik Nordmark freemsg(mp); 2733bd670b35SErik Nordmark ire_refrele(ire); 2734bd670b35SErik Nordmark return; 2735bd670b35SErik Nordmark } 2736bd670b35SErik Nordmark ire->ire_ib_pkt_count++; 2737bd670b35SErik Nordmark ASSERT(ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST)); 2738bd670b35SErik Nordmark (*ire->ire_recvfn)(ire, mp, ipha_encap, ira); 2739bd670b35SErik Nordmark ire_refrele(ire); 27407c478bd9Sstevel@tonic-gate } 27417c478bd9Sstevel@tonic-gate 27427c478bd9Sstevel@tonic-gate /* 27437c478bd9Sstevel@tonic-gate * Remove all records with v_ipif == ipif. Called when an interface goes away 27447c478bd9Sstevel@tonic-gate * (stream closed). Called as writer. 27457c478bd9Sstevel@tonic-gate */ 27467c478bd9Sstevel@tonic-gate void 27477c478bd9Sstevel@tonic-gate reset_mrt_vif_ipif(ipif_t *ipif) 27487c478bd9Sstevel@tonic-gate { 27497c478bd9Sstevel@tonic-gate vifi_t vifi, tmp_vifi; 27507c478bd9Sstevel@tonic-gate vifi_t num_of_vifs; 2751f4b3ec61Sdh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 27527c478bd9Sstevel@tonic-gate 27537c478bd9Sstevel@tonic-gate /* Can't check vifi >= 0 since vifi_t is unsigned! */ 27547c478bd9Sstevel@tonic-gate 2755f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 2756f4b3ec61Sdh155122 num_of_vifs = ipst->ips_numvifs; 2757f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 27587c478bd9Sstevel@tonic-gate 27597c478bd9Sstevel@tonic-gate for (vifi = num_of_vifs; vifi != 0; vifi--) { 27607c478bd9Sstevel@tonic-gate tmp_vifi = vifi - 1; 2761f4b3ec61Sdh155122 if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) { 2762bd670b35SErik Nordmark (void) del_vif(&tmp_vifi, ipst); 27637c478bd9Sstevel@tonic-gate } 27647c478bd9Sstevel@tonic-gate } 27657c478bd9Sstevel@tonic-gate } 27667c478bd9Sstevel@tonic-gate 27677c478bd9Sstevel@tonic-gate /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */ 27687c478bd9Sstevel@tonic-gate void 27697c478bd9Sstevel@tonic-gate reset_mrt_ill(ill_t *ill) 27707c478bd9Sstevel@tonic-gate { 27717c478bd9Sstevel@tonic-gate struct mfc *rt; 27727c478bd9Sstevel@tonic-gate struct rtdetq *rte; 27737c478bd9Sstevel@tonic-gate int i; 2774f4b3ec61Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 2775fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2776bd670b35SErik Nordmark timeout_id_t id; 27777c478bd9Sstevel@tonic-gate 27787c478bd9Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 2779f4b3ec61Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]); 2780f4b3ec61Sdh155122 if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) { 2781f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2782fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, 2783f4b3ec61Sdh155122 SL_TRACE, 27847c478bd9Sstevel@tonic-gate "reset_mrt_ill: mfctable [%d]", i); 27857c478bd9Sstevel@tonic-gate } 27867c478bd9Sstevel@tonic-gate while (rt != NULL) { 27877c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 27887c478bd9Sstevel@tonic-gate while ((rte = rt->mfc_rte) != NULL) { 2789bd670b35SErik Nordmark if (rte->ill == ill && 2790bd670b35SErik Nordmark (id = rt->mfc_timeout_id) != 0) { 2791bd670b35SErik Nordmark /* 2792bd670b35SErik Nordmark * Its ok to drop the lock, the 2793bd670b35SErik Nordmark * struct cannot be freed since 2794bd670b35SErik Nordmark * we have a ref on the hash 2795bd670b35SErik Nordmark * bucket. 2796bd670b35SErik Nordmark */ 2797bd670b35SErik Nordmark mutex_exit(&rt->mfc_mutex); 2798bd670b35SErik Nordmark (void) untimeout(id); 2799bd670b35SErik Nordmark mutex_enter(&rt->mfc_mutex); 2800bd670b35SErik Nordmark } 28017c478bd9Sstevel@tonic-gate if (rte->ill == ill) { 2802f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 28037c478bd9Sstevel@tonic-gate (void) mi_strlog( 2804fc80c0dfSnordmark mrouter->conn_rq, 28057c478bd9Sstevel@tonic-gate 1, SL_TRACE, 28067c478bd9Sstevel@tonic-gate "reset_mrt_ill: " 2807903a11ebSrh87107 "ill 0x%p", (void *)ill); 28087c478bd9Sstevel@tonic-gate } 28097c478bd9Sstevel@tonic-gate rt->mfc_rte = rte->rte_next; 28107c478bd9Sstevel@tonic-gate freemsg(rte->mp); 28117c478bd9Sstevel@tonic-gate mi_free((char *)rte); 28127c478bd9Sstevel@tonic-gate } 28137c478bd9Sstevel@tonic-gate } 28147c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 28157c478bd9Sstevel@tonic-gate rt = rt->mfc_next; 28167c478bd9Sstevel@tonic-gate } 28177c478bd9Sstevel@tonic-gate } 2818f4b3ec61Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 28197c478bd9Sstevel@tonic-gate } 28207c478bd9Sstevel@tonic-gate } 28217c478bd9Sstevel@tonic-gate 28227c478bd9Sstevel@tonic-gate /* 28237c478bd9Sstevel@tonic-gate * Token bucket filter module. 28247c478bd9Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap. 28257c478bd9Sstevel@tonic-gate */ 28267c478bd9Sstevel@tonic-gate static void 28277c478bd9Sstevel@tonic-gate tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) 28287c478bd9Sstevel@tonic-gate { 28297c478bd9Sstevel@tonic-gate size_t p_len = msgdsize(mp); 28307c478bd9Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 28317c478bd9Sstevel@tonic-gate timeout_id_t id = 0; 2832bd670b35SErik Nordmark ill_t *ill = vifp->v_ipif->ipif_ill; 2833bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 2834fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 28357c478bd9Sstevel@tonic-gate 28367c478bd9Sstevel@tonic-gate /* Drop if packet is too large */ 28377c478bd9Sstevel@tonic-gate if (p_len > MAX_BKT_SIZE) { 2838f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_pkt2large++; 2839bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2840bd670b35SErik Nordmark ip_drop_output("tbf_control - too large", mp, ill); 28417c478bd9Sstevel@tonic-gate freemsg(mp); 28427c478bd9Sstevel@tonic-gate return; 28437c478bd9Sstevel@tonic-gate } 2844f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2845fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 28467c478bd9Sstevel@tonic-gate "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", 2847f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len, 28487c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_dst)); 28497c478bd9Sstevel@tonic-gate } 28507c478bd9Sstevel@tonic-gate 28517c478bd9Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 28527c478bd9Sstevel@tonic-gate 28537c478bd9Sstevel@tonic-gate tbf_update_tokens(vifp); 28547c478bd9Sstevel@tonic-gate 28557c478bd9Sstevel@tonic-gate /* 28567c478bd9Sstevel@tonic-gate * If there are enough tokens, 28577c478bd9Sstevel@tonic-gate * and the queue is empty, send this packet out. 28587c478bd9Sstevel@tonic-gate */ 2859f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2860fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 28617c478bd9Sstevel@tonic-gate "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", 2862f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len, 28637c478bd9Sstevel@tonic-gate t->tbf_q_len); 28647c478bd9Sstevel@tonic-gate } 28657c478bd9Sstevel@tonic-gate /* No packets are queued */ 28667c478bd9Sstevel@tonic-gate if (t->tbf_q_len == 0) { 28677c478bd9Sstevel@tonic-gate /* queue empty, send packet if enough tokens */ 28687c478bd9Sstevel@tonic-gate if (p_len <= t->tbf_n_tok) { 28697c478bd9Sstevel@tonic-gate t->tbf_n_tok -= p_len; 28707c478bd9Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 28717c478bd9Sstevel@tonic-gate tbf_send_packet(vifp, mp); 28727c478bd9Sstevel@tonic-gate return; 28737c478bd9Sstevel@tonic-gate } else { 28747c478bd9Sstevel@tonic-gate /* Queue packet and timeout till later */ 28757c478bd9Sstevel@tonic-gate tbf_queue(vifp, mp); 28767c478bd9Sstevel@tonic-gate ASSERT(vifp->v_timeout_id == 0); 28777c478bd9Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 28787c478bd9Sstevel@tonic-gate TBF_REPROCESS); 28797c478bd9Sstevel@tonic-gate } 28807c478bd9Sstevel@tonic-gate } else if (t->tbf_q_len < t->tbf_max_q_len) { 28817c478bd9Sstevel@tonic-gate /* Finite queue length, so queue pkts and process queue */ 28827c478bd9Sstevel@tonic-gate tbf_queue(vifp, mp); 28837c478bd9Sstevel@tonic-gate tbf_process_q(vifp); 28847c478bd9Sstevel@tonic-gate } else { 28857c478bd9Sstevel@tonic-gate /* Check that we have UDP header with IP header */ 28867c478bd9Sstevel@tonic-gate size_t hdr_length = IPH_HDR_LENGTH(ipha) + 28877c478bd9Sstevel@tonic-gate sizeof (struct udphdr); 28887c478bd9Sstevel@tonic-gate 28897c478bd9Sstevel@tonic-gate if ((mp->b_wptr - mp->b_rptr) < hdr_length) { 28907c478bd9Sstevel@tonic-gate if (!pullupmsg(mp, hdr_length)) { 2891bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 2892bd670b35SErik Nordmark ipIfStatsOutDiscards); 2893bd670b35SErik Nordmark ip_drop_output("tbf_control - pullup", mp, ill); 28947c478bd9Sstevel@tonic-gate freemsg(mp); 28957c478bd9Sstevel@tonic-gate ip1dbg(("tbf_ctl: couldn't pullup udp hdr, " 28967c478bd9Sstevel@tonic-gate "vif %ld src 0x%x dst 0x%x\n", 2897f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 28987c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_src), 28997c478bd9Sstevel@tonic-gate ntohl(ipha->ipha_dst))); 29007c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 29017c478bd9Sstevel@tonic-gate return; 29027c478bd9Sstevel@tonic-gate } else 29037c478bd9Sstevel@tonic-gate /* Have to reassign ipha after pullupmsg */ 29047c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 29057c478bd9Sstevel@tonic-gate } 29067c478bd9Sstevel@tonic-gate /* 29077c478bd9Sstevel@tonic-gate * Queue length too much, 29087c478bd9Sstevel@tonic-gate * try to selectively dq, or queue and process 29097c478bd9Sstevel@tonic-gate */ 29107c478bd9Sstevel@tonic-gate if (!tbf_dq_sel(vifp, ipha)) { 2911f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_q_overflow++; 2912bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2913bd670b35SErik Nordmark ip_drop_output("mrts_q_overflow", mp, ill); 29147c478bd9Sstevel@tonic-gate freemsg(mp); 29157c478bd9Sstevel@tonic-gate } else { 29167c478bd9Sstevel@tonic-gate tbf_queue(vifp, mp); 29177c478bd9Sstevel@tonic-gate tbf_process_q(vifp); 29187c478bd9Sstevel@tonic-gate } 29197c478bd9Sstevel@tonic-gate } 29207c478bd9Sstevel@tonic-gate if (t->tbf_q_len == 0) { 29217c478bd9Sstevel@tonic-gate id = vifp->v_timeout_id; 29227c478bd9Sstevel@tonic-gate vifp->v_timeout_id = 0; 29237c478bd9Sstevel@tonic-gate } 29247c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 29257c478bd9Sstevel@tonic-gate if (id != 0) 29267c478bd9Sstevel@tonic-gate (void) untimeout(id); 29277c478bd9Sstevel@tonic-gate } 29287c478bd9Sstevel@tonic-gate 29297c478bd9Sstevel@tonic-gate /* 29307c478bd9Sstevel@tonic-gate * Adds a packet to the tbf queue at the interface. 29317c478bd9Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap. 29327c478bd9Sstevel@tonic-gate */ 29337c478bd9Sstevel@tonic-gate static void 29347c478bd9Sstevel@tonic-gate tbf_queue(struct vif *vifp, mblk_t *mp) 29357c478bd9Sstevel@tonic-gate { 29367c478bd9Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 2937f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2938fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 29397c478bd9Sstevel@tonic-gate 2940f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2941fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2942f4b3ec61Sdh155122 "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); 29437c478bd9Sstevel@tonic-gate } 29447c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 29457c478bd9Sstevel@tonic-gate 29467c478bd9Sstevel@tonic-gate if (t->tbf_t == NULL) { 29477c478bd9Sstevel@tonic-gate /* Queue was empty */ 29487c478bd9Sstevel@tonic-gate t->tbf_q = mp; 29497c478bd9Sstevel@tonic-gate } else { 29507c478bd9Sstevel@tonic-gate /* Insert at tail */ 29517c478bd9Sstevel@tonic-gate t->tbf_t->b_next = mp; 29527c478bd9Sstevel@tonic-gate } 29537c478bd9Sstevel@tonic-gate /* set new tail pointer */ 29547c478bd9Sstevel@tonic-gate t->tbf_t = mp; 29557c478bd9Sstevel@tonic-gate 29567c478bd9Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL; 29577c478bd9Sstevel@tonic-gate 29587c478bd9Sstevel@tonic-gate t->tbf_q_len++; 29597c478bd9Sstevel@tonic-gate } 29607c478bd9Sstevel@tonic-gate 29617c478bd9Sstevel@tonic-gate /* 29627c478bd9Sstevel@tonic-gate * Process the queue at the vif interface. 29637c478bd9Sstevel@tonic-gate * Drops the tbf_lock when sending packets. 29647c478bd9Sstevel@tonic-gate * 29657c478bd9Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0. 29667c478bd9Sstevel@tonic-gate */ 29677c478bd9Sstevel@tonic-gate static void 29687c478bd9Sstevel@tonic-gate tbf_process_q(struct vif *vifp) 29697c478bd9Sstevel@tonic-gate { 29707c478bd9Sstevel@tonic-gate mblk_t *mp; 29717c478bd9Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 29727c478bd9Sstevel@tonic-gate size_t len; 2973f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2974fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 29757c478bd9Sstevel@tonic-gate 2976f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 2977fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 29787c478bd9Sstevel@tonic-gate "tbf_process_q 1: vif %ld qlen = %d", 2979f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len); 29807c478bd9Sstevel@tonic-gate } 29817c478bd9Sstevel@tonic-gate 29827c478bd9Sstevel@tonic-gate /* 29837c478bd9Sstevel@tonic-gate * Loop through the queue at the interface and send 29847c478bd9Sstevel@tonic-gate * as many packets as possible. 29857c478bd9Sstevel@tonic-gate */ 29867c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 29877c478bd9Sstevel@tonic-gate 29887c478bd9Sstevel@tonic-gate while (t->tbf_q_len > 0) { 29897c478bd9Sstevel@tonic-gate mp = t->tbf_q; 29907c478bd9Sstevel@tonic-gate len = (size_t)msgdsize(mp); /* length of ip pkt */ 29917c478bd9Sstevel@tonic-gate 29927c478bd9Sstevel@tonic-gate /* Determine if the packet can be sent */ 29937c478bd9Sstevel@tonic-gate if (len <= t->tbf_n_tok) { 29947c478bd9Sstevel@tonic-gate /* 29957c478bd9Sstevel@tonic-gate * If so, reduce no. of tokens, dequeue the packet, 29967c478bd9Sstevel@tonic-gate * send the packet. 29977c478bd9Sstevel@tonic-gate */ 29987c478bd9Sstevel@tonic-gate t->tbf_n_tok -= len; 29997c478bd9Sstevel@tonic-gate 30007c478bd9Sstevel@tonic-gate t->tbf_q = mp->b_next; 30017c478bd9Sstevel@tonic-gate if (--t->tbf_q_len == 0) { 30027c478bd9Sstevel@tonic-gate t->tbf_t = NULL; 30037c478bd9Sstevel@tonic-gate } 30047c478bd9Sstevel@tonic-gate mp->b_next = NULL; 30057c478bd9Sstevel@tonic-gate /* Exit mutex before sending packet, then re-enter */ 30067c478bd9Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 30077c478bd9Sstevel@tonic-gate tbf_send_packet(vifp, mp); 30087c478bd9Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 30097c478bd9Sstevel@tonic-gate } else 30107c478bd9Sstevel@tonic-gate break; 30117c478bd9Sstevel@tonic-gate } 30127c478bd9Sstevel@tonic-gate } 30137c478bd9Sstevel@tonic-gate 30147c478bd9Sstevel@tonic-gate /* Called at tbf timeout to update tokens, process q and reset timer. */ 30157c478bd9Sstevel@tonic-gate static void 30167c478bd9Sstevel@tonic-gate tbf_reprocess_q(void *arg) 30177c478bd9Sstevel@tonic-gate { 30187c478bd9Sstevel@tonic-gate struct vif *vifp = arg; 3019f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 3020fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 30217c478bd9Sstevel@tonic-gate 30227c478bd9Sstevel@tonic-gate mutex_enter(&vifp->v_tbf->tbf_lock); 30237c478bd9Sstevel@tonic-gate vifp->v_timeout_id = 0; 30247c478bd9Sstevel@tonic-gate tbf_update_tokens(vifp); 30257c478bd9Sstevel@tonic-gate 30267c478bd9Sstevel@tonic-gate tbf_process_q(vifp); 30277c478bd9Sstevel@tonic-gate 30287c478bd9Sstevel@tonic-gate if (vifp->v_tbf->tbf_q_len > 0) { 30297c478bd9Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 30307c478bd9Sstevel@tonic-gate TBF_REPROCESS); 30317c478bd9Sstevel@tonic-gate } 30327c478bd9Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 30337c478bd9Sstevel@tonic-gate 3034f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 3035fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 30367c478bd9Sstevel@tonic-gate "tbf_reprcess_q: vif %ld timeout id = %p", 3037f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id); 30387c478bd9Sstevel@tonic-gate } 30397c478bd9Sstevel@tonic-gate } 30407c478bd9Sstevel@tonic-gate 30417c478bd9Sstevel@tonic-gate /* 30427c478bd9Sstevel@tonic-gate * Function that will selectively discard a member of the tbf queue, 30437c478bd9Sstevel@tonic-gate * based on the precedence value and the priority. 30447c478bd9Sstevel@tonic-gate * 30457c478bd9Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0. 30467c478bd9Sstevel@tonic-gate */ 30477c478bd9Sstevel@tonic-gate static int 30487c478bd9Sstevel@tonic-gate tbf_dq_sel(struct vif *vifp, ipha_t *ipha) 30497c478bd9Sstevel@tonic-gate { 30507c478bd9Sstevel@tonic-gate uint_t p; 30517c478bd9Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 30527c478bd9Sstevel@tonic-gate mblk_t **np; 30537c478bd9Sstevel@tonic-gate mblk_t *last, *mp; 3054bd670b35SErik Nordmark ill_t *ill = vifp->v_ipif->ipif_ill; 3055bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 3056fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 30577c478bd9Sstevel@tonic-gate 3058f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 3059fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 30607c478bd9Sstevel@tonic-gate "dq_sel: vif %ld dst 0x%x", 3061f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst)); 30627c478bd9Sstevel@tonic-gate } 30637c478bd9Sstevel@tonic-gate 30647c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 30657c478bd9Sstevel@tonic-gate p = priority(vifp, ipha); 30667c478bd9Sstevel@tonic-gate 30677c478bd9Sstevel@tonic-gate np = &t->tbf_q; 30687c478bd9Sstevel@tonic-gate last = NULL; 30697c478bd9Sstevel@tonic-gate while ((mp = *np) != NULL) { 30707c478bd9Sstevel@tonic-gate if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) { 30717c478bd9Sstevel@tonic-gate *np = mp->b_next; 30727c478bd9Sstevel@tonic-gate /* If removing the last packet, fix the tail pointer */ 30737c478bd9Sstevel@tonic-gate if (mp == t->tbf_t) 30747c478bd9Sstevel@tonic-gate t->tbf_t = last; 30757c478bd9Sstevel@tonic-gate mp->b_prev = mp->b_next = NULL; 3076bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3077bd670b35SErik Nordmark ip_drop_output("tbf_dq_send", mp, ill); 30787c478bd9Sstevel@tonic-gate freemsg(mp); 30797c478bd9Sstevel@tonic-gate /* 30807c478bd9Sstevel@tonic-gate * It's impossible for the queue to be empty, but 30817c478bd9Sstevel@tonic-gate * we check anyway. 30827c478bd9Sstevel@tonic-gate */ 30837c478bd9Sstevel@tonic-gate if (--t->tbf_q_len == 0) { 30847c478bd9Sstevel@tonic-gate t->tbf_t = NULL; 30857c478bd9Sstevel@tonic-gate } 3086f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_drop_sel++; 30877c478bd9Sstevel@tonic-gate return (1); 30887c478bd9Sstevel@tonic-gate } 30897c478bd9Sstevel@tonic-gate np = &mp->b_next; 30907c478bd9Sstevel@tonic-gate last = mp; 30917c478bd9Sstevel@tonic-gate } 30927c478bd9Sstevel@tonic-gate return (0); 30937c478bd9Sstevel@tonic-gate } 30947c478bd9Sstevel@tonic-gate 30957c478bd9Sstevel@tonic-gate /* Sends packet, 2 cases - encap tunnel, phyint. */ 30967c478bd9Sstevel@tonic-gate static void 30977c478bd9Sstevel@tonic-gate tbf_send_packet(struct vif *vifp, mblk_t *mp) 30987c478bd9Sstevel@tonic-gate { 3099bd670b35SErik Nordmark ipif_t *ipif = vifp->v_ipif; 3100bd670b35SErik Nordmark ill_t *ill = ipif->ipif_ill; 3101bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 3102fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 3103bd670b35SErik Nordmark ipha_t *ipha; 31047c478bd9Sstevel@tonic-gate 3105bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 31067c478bd9Sstevel@tonic-gate /* If encap tunnel options */ 31077c478bd9Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL) { 3108bd670b35SErik Nordmark ip_xmit_attr_t ixas; 3109bd670b35SErik Nordmark 3110f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 3111fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 3112bd670b35SErik Nordmark "tbf_send_packet: ENCAP tunnel vif %ld", 3113f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 31147c478bd9Sstevel@tonic-gate } 3115bd670b35SErik Nordmark bzero(&ixas, sizeof (ixas)); 311644b099c4SSowmini Varadhan ixas.ixa_flags = 311744b099c4SSowmini Varadhan IXAF_IS_IPV4 | IXAF_NO_TTL_CHANGE | IXAF_VERIFY_SOURCE; 3118bd670b35SErik Nordmark ixas.ixa_ipst = ipst; 3119bd670b35SErik Nordmark ixas.ixa_ifindex = 0; 3120bd670b35SErik Nordmark ixas.ixa_cred = kcred; 3121bd670b35SErik Nordmark ixas.ixa_cpid = NOPID; 3122bd670b35SErik Nordmark ixas.ixa_tsl = NULL; 3123bd670b35SErik Nordmark ixas.ixa_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */ 3124bd670b35SErik Nordmark ixas.ixa_pktlen = ntohs(ipha->ipha_length); 3125bd670b35SErik Nordmark ixas.ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha); 31267c478bd9Sstevel@tonic-gate 31277c478bd9Sstevel@tonic-gate /* 3128bd670b35SErik Nordmark * Feed into ip_output_simple which will set the ident field 3129bd670b35SErik Nordmark * and checksum the encapsulating header. 31307c478bd9Sstevel@tonic-gate * BSD gets the cached route vifp->v_route from ip_output() 31317c478bd9Sstevel@tonic-gate * to speed up route table lookups. Not necessary in SunOS 5.x. 3132bd670b35SErik Nordmark * One could make multicast forwarding faster by putting an 3133bd670b35SErik Nordmark * ip_xmit_attr_t in each vif thereby caching the ire/nce. 31347c478bd9Sstevel@tonic-gate */ 3135bd670b35SErik Nordmark (void) ip_output_simple(mp, &ixas); 3136bd670b35SErik Nordmark ixa_cleanup(&ixas); 31377c478bd9Sstevel@tonic-gate return; 31387c478bd9Sstevel@tonic-gate 31397c478bd9Sstevel@tonic-gate /* phyint */ 31407c478bd9Sstevel@tonic-gate } else { 31417c478bd9Sstevel@tonic-gate /* Need to loop back to members on the outgoing interface. */ 31427c478bd9Sstevel@tonic-gate ipaddr_t dst; 3143bd670b35SErik Nordmark ip_recv_attr_t iras; 3144bd670b35SErik Nordmark nce_t *nce; 3145bd670b35SErik Nordmark 3146bd670b35SErik Nordmark bzero(&iras, sizeof (iras)); 3147bd670b35SErik Nordmark iras.ira_flags = IRAF_IS_IPV4; 3148bd670b35SErik Nordmark iras.ira_ill = iras.ira_rill = ill; 3149bd670b35SErik Nordmark iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 3150bd670b35SErik Nordmark iras.ira_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */ 3151bd670b35SErik Nordmark iras.ira_pktlen = ntohs(ipha->ipha_length); 3152bd670b35SErik Nordmark iras.ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 3153bd670b35SErik Nordmark 31547c478bd9Sstevel@tonic-gate dst = ipha->ipha_dst; 3155bd670b35SErik Nordmark if (ill_hasmembers_v4(ill, dst)) { 3156bd670b35SErik Nordmark iras.ira_flags |= IRAF_LOOPBACK_COPY; 31577c478bd9Sstevel@tonic-gate } 3158f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 3159fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 31607c478bd9Sstevel@tonic-gate "tbf_send_pkt: phyint forward vif %ld dst = 0x%x", 3161f4b3ec61Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst)); 31627c478bd9Sstevel@tonic-gate } 3163bd670b35SErik Nordmark /* 3164bd670b35SErik Nordmark * Find an NCE which matches the nexthop. 3165bd670b35SErik Nordmark * For a pt-pt interface we use the other end of the pt-pt 3166bd670b35SErik Nordmark * link. 3167bd670b35SErik Nordmark */ 3168bd670b35SErik Nordmark if (ipif->ipif_flags & IPIF_POINTOPOINT) { 3169bd670b35SErik Nordmark dst = ipif->ipif_pp_dst_addr; 3170bd670b35SErik Nordmark nce = arp_nce_init(ill, dst, ill->ill_net_type); 3171bd670b35SErik Nordmark } else { 3172bd670b35SErik Nordmark nce = arp_nce_init(ill, dst, IRE_MULTICAST); 3173bd670b35SErik Nordmark } 3174bd670b35SErik Nordmark if (nce == NULL) { 3175bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3176bd670b35SErik Nordmark ip_drop_output("tbf_send_packet - no nce", mp, ill); 3177bd670b35SErik Nordmark freemsg(mp); 3178bd670b35SErik Nordmark return; 3179bd670b35SErik Nordmark } 3180bd670b35SErik Nordmark 3181bd670b35SErik Nordmark /* 3182bd670b35SErik Nordmark * We don't remeber the incoming ill. Thus we 3183bd670b35SErik Nordmark * pretend the packet arrived on the outbound ill. This means 3184bd670b35SErik Nordmark * statistics for input errors will be increased on the wrong 3185bd670b35SErik Nordmark * ill but that isn't a big deal. 3186bd670b35SErik Nordmark */ 3187*1eee170aSErik Nordmark ip_forward_xmit_v4(nce, ill, mp, ipha, &iras, ill->ill_mc_mtu, 3188*1eee170aSErik Nordmark 0); 3189bd670b35SErik Nordmark ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 3190bd670b35SErik Nordmark 3191bd670b35SErik Nordmark nce_refrele(nce); 31927c478bd9Sstevel@tonic-gate } 31937c478bd9Sstevel@tonic-gate } 31947c478bd9Sstevel@tonic-gate 31957c478bd9Sstevel@tonic-gate /* 31967c478bd9Sstevel@tonic-gate * Determine the current time and then the elapsed time (between the last time 31977c478bd9Sstevel@tonic-gate * and time now). Update the no. of tokens in the bucket. 31987c478bd9Sstevel@tonic-gate */ 31997c478bd9Sstevel@tonic-gate static void 32007c478bd9Sstevel@tonic-gate tbf_update_tokens(struct vif *vifp) 32017c478bd9Sstevel@tonic-gate { 32027c478bd9Sstevel@tonic-gate timespec_t tp; 32037c478bd9Sstevel@tonic-gate hrtime_t tm; 32047c478bd9Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 3205f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 3206fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 32077c478bd9Sstevel@tonic-gate 32087c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 32097c478bd9Sstevel@tonic-gate 32107c478bd9Sstevel@tonic-gate /* Time in secs and nsecs, rate limit in kbits/sec */ 32117c478bd9Sstevel@tonic-gate gethrestime(&tp); 32127c478bd9Sstevel@tonic-gate 32137c478bd9Sstevel@tonic-gate /*LINTED*/ 32147c478bd9Sstevel@tonic-gate TV_DELTA(tp, t->tbf_last_pkt_t, tm); 32157c478bd9Sstevel@tonic-gate 32167c478bd9Sstevel@tonic-gate /* 32177c478bd9Sstevel@tonic-gate * This formula is actually 32187c478bd9Sstevel@tonic-gate * "time in seconds" * "bytes/second". Scaled for nsec. 32197c478bd9Sstevel@tonic-gate * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8) 32207c478bd9Sstevel@tonic-gate * 32217c478bd9Sstevel@tonic-gate * The (1000/1024) was introduced in add_vif to optimize 32227c478bd9Sstevel@tonic-gate * this divide into a shift. 32237c478bd9Sstevel@tonic-gate */ 32247c478bd9Sstevel@tonic-gate t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8; 32257c478bd9Sstevel@tonic-gate t->tbf_last_pkt_t = tp; 32267c478bd9Sstevel@tonic-gate 32277c478bd9Sstevel@tonic-gate if (t->tbf_n_tok > MAX_BKT_SIZE) 32287c478bd9Sstevel@tonic-gate t->tbf_n_tok = MAX_BKT_SIZE; 3229f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 3230fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 32317c478bd9Sstevel@tonic-gate "tbf_update_tok: tm %lld tok %d vif %ld", 3232f4b3ec61Sdh155122 tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs)); 32337c478bd9Sstevel@tonic-gate } 32347c478bd9Sstevel@tonic-gate } 32357c478bd9Sstevel@tonic-gate 32367c478bd9Sstevel@tonic-gate /* 32377c478bd9Sstevel@tonic-gate * Priority currently is based on port nos. 32387c478bd9Sstevel@tonic-gate * Different forwarding mechanisms have different ways 32397c478bd9Sstevel@tonic-gate * of obtaining the port no. Hence, the vif must be 32407c478bd9Sstevel@tonic-gate * given along with the packet itself. 32417c478bd9Sstevel@tonic-gate * 32427c478bd9Sstevel@tonic-gate */ 32437c478bd9Sstevel@tonic-gate static int 32447c478bd9Sstevel@tonic-gate priority(struct vif *vifp, ipha_t *ipha) 32457c478bd9Sstevel@tonic-gate { 32467c478bd9Sstevel@tonic-gate int prio; 3247f4b3ec61Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 3248fc80c0dfSnordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 32497c478bd9Sstevel@tonic-gate 32507c478bd9Sstevel@tonic-gate /* Temporary hack; may add general packet classifier some day */ 32517c478bd9Sstevel@tonic-gate 32527c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock)); 32537c478bd9Sstevel@tonic-gate 32547c478bd9Sstevel@tonic-gate /* 32557c478bd9Sstevel@tonic-gate * The UDP port space is divided up into four priority ranges: 32567c478bd9Sstevel@tonic-gate * [0, 16384) : unclassified - lowest priority 32577c478bd9Sstevel@tonic-gate * [16384, 32768) : audio - highest priority 32587c478bd9Sstevel@tonic-gate * [32768, 49152) : whiteboard - medium priority 32597c478bd9Sstevel@tonic-gate * [49152, 65536) : video - low priority 32607c478bd9Sstevel@tonic-gate */ 32617c478bd9Sstevel@tonic-gate 32627c478bd9Sstevel@tonic-gate if (ipha->ipha_protocol == IPPROTO_UDP) { 32637c478bd9Sstevel@tonic-gate struct udphdr *udp = 32647c478bd9Sstevel@tonic-gate (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha)); 32657c478bd9Sstevel@tonic-gate switch (ntohs(udp->uh_dport) & 0xc000) { 32667c478bd9Sstevel@tonic-gate case 0x4000: 32677c478bd9Sstevel@tonic-gate prio = 70; 32687c478bd9Sstevel@tonic-gate break; 32697c478bd9Sstevel@tonic-gate case 0x8000: 32707c478bd9Sstevel@tonic-gate prio = 60; 32717c478bd9Sstevel@tonic-gate break; 32727c478bd9Sstevel@tonic-gate case 0xc000: 32737c478bd9Sstevel@tonic-gate prio = 55; 32747c478bd9Sstevel@tonic-gate break; 32757c478bd9Sstevel@tonic-gate default: 32767c478bd9Sstevel@tonic-gate prio = 50; 32777c478bd9Sstevel@tonic-gate break; 32787c478bd9Sstevel@tonic-gate } 3279f4b3ec61Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 3280fc80c0dfSnordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 32817c478bd9Sstevel@tonic-gate "priority: port %x prio %d\n", 32827c478bd9Sstevel@tonic-gate ntohs(udp->uh_dport), prio); 32837c478bd9Sstevel@tonic-gate } 32847c478bd9Sstevel@tonic-gate } else 32857c478bd9Sstevel@tonic-gate prio = 50; /* default priority */ 32867c478bd9Sstevel@tonic-gate return (prio); 32877c478bd9Sstevel@tonic-gate } 32887c478bd9Sstevel@tonic-gate 32897c478bd9Sstevel@tonic-gate /* 32907c478bd9Sstevel@tonic-gate * End of token bucket filter modifications 32917c478bd9Sstevel@tonic-gate */ 32927c478bd9Sstevel@tonic-gate 32937c478bd9Sstevel@tonic-gate 32947c478bd9Sstevel@tonic-gate 32957c478bd9Sstevel@tonic-gate /* 32967c478bd9Sstevel@tonic-gate * Produces data for netstat -M. 32977c478bd9Sstevel@tonic-gate */ 32987c478bd9Sstevel@tonic-gate int 3299f4b3ec61Sdh155122 ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst) 33007c478bd9Sstevel@tonic-gate { 3301f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); 3302f4b3ec61Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); 3303f4b3ec61Sdh155122 if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat, 3304f4b3ec61Sdh155122 sizeof (struct mrtstat))) { 33057c478bd9Sstevel@tonic-gate ip0dbg(("ip_mroute_stats: failed %ld bytes\n", 3306f4b3ec61Sdh155122 (size_t)sizeof (struct mrtstat))); 33077c478bd9Sstevel@tonic-gate return (0); 33087c478bd9Sstevel@tonic-gate } 33097c478bd9Sstevel@tonic-gate return (1); 33107c478bd9Sstevel@tonic-gate } 33117c478bd9Sstevel@tonic-gate 33127c478bd9Sstevel@tonic-gate /* 33137c478bd9Sstevel@tonic-gate * Sends info for SNMP's MIB. 33147c478bd9Sstevel@tonic-gate */ 33157c478bd9Sstevel@tonic-gate int 3316f4b3ec61Sdh155122 ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst) 33177c478bd9Sstevel@tonic-gate { 33187c478bd9Sstevel@tonic-gate struct vifctl vi; 33197c478bd9Sstevel@tonic-gate vifi_t vifi; 33207c478bd9Sstevel@tonic-gate 3321f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 3322f4b3ec61Sdh155122 for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) { 3323f4b3ec61Sdh155122 if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0) 33247c478bd9Sstevel@tonic-gate continue; 33257c478bd9Sstevel@tonic-gate /* 33267c478bd9Sstevel@tonic-gate * No locks here, an approximation is fine. 33277c478bd9Sstevel@tonic-gate */ 33287c478bd9Sstevel@tonic-gate vi.vifc_vifi = vifi; 3329f4b3ec61Sdh155122 vi.vifc_flags = ipst->ips_vifs[vifi].v_flags; 3330f4b3ec61Sdh155122 vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold; 3331f4b3ec61Sdh155122 vi.vifc_rate_limit = ipst->ips_vifs[vifi].v_rate_limit; 3332f4b3ec61Sdh155122 vi.vifc_lcl_addr = ipst->ips_vifs[vifi].v_lcl_addr; 3333f4b3ec61Sdh155122 vi.vifc_rmt_addr = ipst->ips_vifs[vifi].v_rmt_addr; 3334f4b3ec61Sdh155122 vi.vifc_pkt_in = ipst->ips_vifs[vifi].v_pkt_in; 3335f4b3ec61Sdh155122 vi.vifc_pkt_out = ipst->ips_vifs[vifi].v_pkt_out; 33367c478bd9Sstevel@tonic-gate 33377c478bd9Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) { 33387c478bd9Sstevel@tonic-gate ip0dbg(("ip_mroute_vif: failed %ld bytes\n", 33397c478bd9Sstevel@tonic-gate (size_t)sizeof (vi))); 3340ee07f6e7SSowmini Varadhan mutex_exit(&ipst->ips_numvifs_mutex); 33417c478bd9Sstevel@tonic-gate return (0); 33427c478bd9Sstevel@tonic-gate } 33437c478bd9Sstevel@tonic-gate } 3344f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 33457c478bd9Sstevel@tonic-gate return (1); 33467c478bd9Sstevel@tonic-gate } 33477c478bd9Sstevel@tonic-gate 33487c478bd9Sstevel@tonic-gate /* 33497c478bd9Sstevel@tonic-gate * Called by ip_snmp_get to send up multicast routing table. 33507c478bd9Sstevel@tonic-gate */ 33517c478bd9Sstevel@tonic-gate int 3352f4b3ec61Sdh155122 ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst) 33537c478bd9Sstevel@tonic-gate { 33547c478bd9Sstevel@tonic-gate int i, j; 33557c478bd9Sstevel@tonic-gate struct mfc *rt; 33567c478bd9Sstevel@tonic-gate struct mfcctl mfcc; 33577c478bd9Sstevel@tonic-gate 33587c478bd9Sstevel@tonic-gate /* 33597c478bd9Sstevel@tonic-gate * Make sure multicast has not been turned off. 33607c478bd9Sstevel@tonic-gate */ 3361f4b3ec61Sdh155122 if (is_mrouter_off(ipst)) 33627c478bd9Sstevel@tonic-gate return (1); 33637c478bd9Sstevel@tonic-gate 33647c478bd9Sstevel@tonic-gate /* Loop over all hash buckets and their chains */ 33657c478bd9Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 3366f4b3ec61Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]); 3367f4b3ec61Sdh155122 for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) { 33687c478bd9Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 33697c478bd9Sstevel@tonic-gate if (rt->mfc_rte != NULL || 33707c478bd9Sstevel@tonic-gate (rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 33717c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 33727c478bd9Sstevel@tonic-gate continue; 33737c478bd9Sstevel@tonic-gate } 33747c478bd9Sstevel@tonic-gate mfcc.mfcc_origin = rt->mfc_origin; 33757c478bd9Sstevel@tonic-gate mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp; 33767c478bd9Sstevel@tonic-gate mfcc.mfcc_parent = rt->mfc_parent; 33777c478bd9Sstevel@tonic-gate mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt; 3378f4b3ec61Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 3379f4b3ec61Sdh155122 for (j = 0; j < (int)ipst->ips_numvifs; j++) 33807c478bd9Sstevel@tonic-gate mfcc.mfcc_ttls[j] = rt->mfc_ttls[j]; 3381f4b3ec61Sdh155122 for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++) 33827c478bd9Sstevel@tonic-gate mfcc.mfcc_ttls[j] = 0; 3383f4b3ec61Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 33847c478bd9Sstevel@tonic-gate 33857c478bd9Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 33867c478bd9Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&mfcc, 33877c478bd9Sstevel@tonic-gate sizeof (mfcc))) { 3388f4b3ec61Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 33897c478bd9Sstevel@tonic-gate ip0dbg(("ip_mroute_mrt: failed %ld bytes\n", 33907c478bd9Sstevel@tonic-gate (size_t)sizeof (mfcc))); 33917c478bd9Sstevel@tonic-gate return (0); 33927c478bd9Sstevel@tonic-gate } 33937c478bd9Sstevel@tonic-gate } 3394f4b3ec61Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 33957c478bd9Sstevel@tonic-gate } 33967c478bd9Sstevel@tonic-gate return (1); 33977c478bd9Sstevel@tonic-gate } 3398