in_pcb.c (a0e610c43975ca0ec0bfc7d1df88d8b7a3cb871c) | in_pcb.c (f3e7afe2d7b262ab55ab818445d4dfdb6e0c70a9) |
---|---|
1/*- 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. 4 * Copyright (c) 2007-2009 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * Portions of this software were developed by Robert N. M. Watson under --- 28 unchanged lines hidden (view full) --- 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD$"); 40 41#include "opt_ddb.h" 42#include "opt_ipsec.h" 43#include "opt_inet.h" 44#include "opt_inet6.h" | 1/*- 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. 4 * Copyright (c) 2007-2009 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * Portions of this software were developed by Robert N. M. Watson under --- 28 unchanged lines hidden (view full) --- 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD$"); 40 41#include "opt_ddb.h" 42#include "opt_ipsec.h" 43#include "opt_inet.h" 44#include "opt_inet6.h" |
45#include "opt_ratelimit.h" |
|
45#include "opt_pcbgroup.h" 46#include "opt_rss.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/lock.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/callout.h> 54#include <sys/eventhandler.h> 55#include <sys/domain.h> 56#include <sys/protosw.h> 57#include <sys/rmlock.h> 58#include <sys/socket.h> 59#include <sys/socketvar.h> | 46#include "opt_pcbgroup.h" 47#include "opt_rss.h" 48 49#include <sys/param.h> 50#include <sys/systm.h> 51#include <sys/lock.h> 52#include <sys/malloc.h> 53#include <sys/mbuf.h> 54#include <sys/callout.h> 55#include <sys/eventhandler.h> 56#include <sys/domain.h> 57#include <sys/protosw.h> 58#include <sys/rmlock.h> 59#include <sys/socket.h> 60#include <sys/socketvar.h> |
61#include <sys/sockio.h> |
|
60#include <sys/priv.h> 61#include <sys/proc.h> 62#include <sys/refcount.h> 63#include <sys/jail.h> 64#include <sys/kernel.h> 65#include <sys/sysctl.h> 66 67#ifdef DDB --- 1067 unchanged lines hidden (view full) --- 1135 * socket, in which case in_pcbfree() is deferred. 1136 */ 1137void 1138in_pcbdetach(struct inpcb *inp) 1139{ 1140 1141 KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); 1142 | 62#include <sys/priv.h> 63#include <sys/proc.h> 64#include <sys/refcount.h> 65#include <sys/jail.h> 66#include <sys/kernel.h> 67#include <sys/sysctl.h> 68 69#ifdef DDB --- 1067 unchanged lines hidden (view full) --- 1137 * socket, in which case in_pcbfree() is deferred. 1138 */ 1139void 1140in_pcbdetach(struct inpcb *inp) 1141{ 1142 1143 KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); 1144 |
1145#ifdef RATELIMIT 1146 if (inp->inp_snd_tag != NULL) 1147 in_pcbdetach_txrtlmt(inp); 1148#endif |
|
1143 inp->inp_socket->so_pcb = NULL; 1144 inp->inp_socket = NULL; 1145} 1146 1147/* 1148 * in_pcbref() bumps the reference count on an inpcb in order to maintain 1149 * stability of an inpcb pointer despite the inpcb lock being released. This 1150 * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, --- 1521 unchanged lines hidden (view full) --- 2672 db_printf("usage: show inpcb <addr>\n"); 2673 return; 2674 } 2675 inp = (struct inpcb *)addr; 2676 2677 db_print_inpcb(inp, "inpcb", 0); 2678} 2679#endif /* DDB */ | 1149 inp->inp_socket->so_pcb = NULL; 1150 inp->inp_socket = NULL; 1151} 1152 1153/* 1154 * in_pcbref() bumps the reference count on an inpcb in order to maintain 1155 * stability of an inpcb pointer despite the inpcb lock being released. This 1156 * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, --- 1521 unchanged lines hidden (view full) --- 2678 db_printf("usage: show inpcb <addr>\n"); 2679 return; 2680 } 2681 inp = (struct inpcb *)addr; 2682 2683 db_print_inpcb(inp, "inpcb", 0); 2684} 2685#endif /* DDB */ |
2686 2687#ifdef RATELIMIT 2688/* 2689 * Modify TX rate limit based on the existing "inp->inp_snd_tag", 2690 * if any. 2691 */ 2692int 2693in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate) 2694{ 2695 union if_snd_tag_modify_params params = { 2696 .rate_limit.max_rate = max_pacing_rate, 2697 }; 2698 struct m_snd_tag *mst; 2699 struct ifnet *ifp; 2700 int error; 2701 2702 mst = inp->inp_snd_tag; 2703 if (mst == NULL) 2704 return (EINVAL); 2705 2706 ifp = mst->ifp; 2707 if (ifp == NULL) 2708 return (EINVAL); 2709 2710 if (ifp->if_snd_tag_modify == NULL) { 2711 error = EOPNOTSUPP; 2712 } else { 2713 error = ifp->if_snd_tag_modify(mst, ¶ms); 2714 } 2715 return (error); 2716} 2717 2718/* 2719 * Query existing TX rate limit based on the existing 2720 * "inp->inp_snd_tag", if any. 2721 */ 2722int 2723in_pcbquery_txrtlmt(struct inpcb *inp, uint32_t *p_max_pacing_rate) 2724{ 2725 union if_snd_tag_query_params params = { }; 2726 struct m_snd_tag *mst; 2727 struct ifnet *ifp; 2728 int error; 2729 2730 mst = inp->inp_snd_tag; 2731 if (mst == NULL) 2732 return (EINVAL); 2733 2734 ifp = mst->ifp; 2735 if (ifp == NULL) 2736 return (EINVAL); 2737 2738 if (ifp->if_snd_tag_query == NULL) { 2739 error = EOPNOTSUPP; 2740 } else { 2741 error = ifp->if_snd_tag_query(mst, ¶ms); 2742 if (error == 0 && p_max_pacing_rate != NULL) 2743 *p_max_pacing_rate = params.rate_limit.max_rate; 2744 } 2745 return (error); 2746} 2747 2748/* 2749 * Allocate a new TX rate limit send tag from the network interface 2750 * given by the "ifp" argument and save it in "inp->inp_snd_tag": 2751 */ 2752int 2753in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, 2754 uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate) 2755{ 2756 union if_snd_tag_alloc_params params = { 2757 .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, 2758 .rate_limit.hdr.flowid = flowid, 2759 .rate_limit.hdr.flowtype = flowtype, 2760 .rate_limit.max_rate = max_pacing_rate, 2761 }; 2762 int error; 2763 2764 INP_WLOCK_ASSERT(inp); 2765 2766 if (inp->inp_snd_tag != NULL) 2767 return (EINVAL); 2768 2769 if (ifp->if_snd_tag_alloc == NULL) { 2770 error = EOPNOTSUPP; 2771 } else { 2772 error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag); 2773 2774 /* 2775 * At success increment the refcount on 2776 * the send tag's network interface: 2777 */ 2778 if (error == 0) 2779 if_ref(inp->inp_snd_tag->ifp); 2780 } 2781 return (error); 2782} 2783 2784/* 2785 * Free an existing TX rate limit tag based on the "inp->inp_snd_tag", 2786 * if any: 2787 */ 2788void 2789in_pcbdetach_txrtlmt(struct inpcb *inp) 2790{ 2791 struct m_snd_tag *mst; 2792 struct ifnet *ifp; 2793 2794 INP_WLOCK_ASSERT(inp); 2795 2796 mst = inp->inp_snd_tag; 2797 inp->inp_snd_tag = NULL; 2798 2799 if (mst == NULL) 2800 return; 2801 2802 ifp = mst->ifp; 2803 if (ifp == NULL) 2804 return; 2805 2806 /* 2807 * If the device was detached while we still had reference(s) 2808 * on the ifp, we assume if_snd_tag_free() was replaced with 2809 * stubs. 2810 */ 2811 ifp->if_snd_tag_free(mst); 2812 2813 /* release reference count on network interface */ 2814 if_rele(ifp); 2815} 2816 2817/* 2818 * This function should be called when the INP_RATE_LIMIT_CHANGED flag 2819 * is set in the fast path and will attach/detach/modify the TX rate 2820 * limit send tag based on the socket's so_max_pacing_rate value. 2821 */ 2822void 2823in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) 2824{ 2825 struct socket *socket; 2826 uint32_t max_pacing_rate; 2827 bool did_upgrade; 2828 int error; 2829 2830 if (inp == NULL) 2831 return; 2832 2833 socket = inp->inp_socket; 2834 if (socket == NULL) 2835 return; 2836 2837 if (!INP_WLOCKED(inp)) { 2838 /* 2839 * NOTE: If the write locking fails, we need to bail 2840 * out and use the non-ratelimited ring for the 2841 * transmit until there is a new chance to get the 2842 * write lock. 2843 */ 2844 if (!INP_TRY_UPGRADE(inp)) 2845 return; 2846 did_upgrade = 1; 2847 } else { 2848 did_upgrade = 0; 2849 } 2850 2851 /* 2852 * NOTE: The so_max_pacing_rate value is read unlocked, 2853 * because atomic updates are not required since the variable 2854 * is checked at every mbuf we send. It is assumed that the 2855 * variable read itself will be atomic. 2856 */ 2857 max_pacing_rate = socket->so_max_pacing_rate; 2858 2859 /* 2860 * NOTE: When attaching to a network interface a reference is 2861 * made to ensure the network interface doesn't go away until 2862 * all ratelimit connections are gone. The network interface 2863 * pointers compared below represent valid network interfaces, 2864 * except when comparing towards NULL. 2865 */ 2866 if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) { 2867 error = 0; 2868 } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) { 2869 if (inp->inp_snd_tag != NULL) 2870 in_pcbdetach_txrtlmt(inp); 2871 error = 0; 2872 } else if (inp->inp_snd_tag == NULL) { 2873 /* 2874 * In order to utilize packet pacing with RSS, we need 2875 * to wait until there is a valid RSS hash before we 2876 * can proceed: 2877 */ 2878 if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { 2879 error = EAGAIN; 2880 } else { 2881 error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb), 2882 mb->m_pkthdr.flowid, max_pacing_rate); 2883 } 2884 } else { 2885 error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); 2886 } 2887 if (error == 0 || error == EOPNOTSUPP) 2888 inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; 2889 if (did_upgrade) 2890 INP_DOWNGRADE(inp); 2891} 2892 2893/* 2894 * Track route changes for TX rate limiting. 2895 */ 2896void 2897in_pcboutput_eagain(struct inpcb *inp) 2898{ 2899 struct socket *socket; 2900 bool did_upgrade; 2901 2902 if (inp == NULL) 2903 return; 2904 2905 socket = inp->inp_socket; 2906 if (socket == NULL) 2907 return; 2908 2909 if (inp->inp_snd_tag == NULL) 2910 return; 2911 2912 if (!INP_WLOCKED(inp)) { 2913 /* 2914 * NOTE: If the write locking fails, we need to bail 2915 * out and use the non-ratelimited ring for the 2916 * transmit until there is a new chance to get the 2917 * write lock. 2918 */ 2919 if (!INP_TRY_UPGRADE(inp)) 2920 return; 2921 did_upgrade = 1; 2922 } else { 2923 did_upgrade = 0; 2924 } 2925 2926 /* detach rate limiting */ 2927 in_pcbdetach_txrtlmt(inp); 2928 2929 /* make sure new mbuf send tag allocation is made */ 2930 inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; 2931 2932 if (did_upgrade) 2933 INP_DOWNGRADE(inp); 2934} 2935#endif /* RATELIMIT */ |
|