1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (c) 2007-2014 Nicira, Inc. 4 */ 5 6 #ifndef DATAPATH_H 7 #define DATAPATH_H 1 8 9 #include <asm/page.h> 10 #include <linux/kernel.h> 11 #include <linux/mutex.h> 12 #include <linux/netdevice.h> 13 #include <linux/skbuff.h> 14 #include <linux/u64_stats_sync.h> 15 #include <net/ip_tunnels.h> 16 17 #include "conntrack.h" 18 #include "flow.h" 19 #include "flow_table.h" 20 #include "meter.h" 21 #include "vport-internal_dev.h" 22 23 #define DP_MAX_PORTS USHRT_MAX 24 #define DP_VPORT_HASH_BUCKETS 1024 25 #define DP_MASKS_REBALANCE_INTERVAL 4000 26 27 /** 28 * struct dp_stats_percpu - per-cpu packet processing statistics for a given 29 * datapath. 30 * @n_hit: Number of received packets for which a matching flow was found in 31 * the flow table. 32 * @n_missed: Number of received packets that had no matching flow in the flow 33 * table. The sum of @n_hit and @n_missed is the number of packets that have 34 * been received by the datapath. 35 * @n_lost: Number of received packets that had no matching flow in the flow 36 * table that could not be sent to userspace (normally due to an overflow in 37 * one of the datapath's queues). 38 * @n_mask_hit: Number of masks looked up for flow match. 39 * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked 40 * up per packet. 41 * @n_cache_hit: The number of received packets that had their mask found using 42 * the mask cache. 43 * @syncp: Synchronization point for 64bit counters. 44 */ 45 struct dp_stats_percpu { 46 u64 n_hit; 47 u64 n_missed; 48 u64 n_lost; 49 u64 n_mask_hit; 50 u64 n_cache_hit; 51 struct u64_stats_sync syncp; 52 }; 53 54 /** 55 * struct dp_nlsk_pids - array of netlink portids of for a datapath. 56 * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU 57 * is enabled and must be protected by rcu. 58 * @rcu: RCU callback head for deferred destruction. 59 * @n_pids: Size of @pids array. 60 * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets 61 * that miss the flow table. 62 */ 63 struct dp_nlsk_pids { 64 struct rcu_head rcu; 65 u32 n_pids; 66 u32 pids[]; 67 }; 68 69 /** 70 * struct datapath - datapath for flow-based packet switching 71 * @rcu: RCU callback head for deferred destruction. 72 * @list_node: Element in global 'dps' list. 73 * @table: flow table. 74 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 75 * ovs_mutex and RCU. 76 * @stats_percpu: Per-CPU datapath statistics. 77 * @net: Reference to net namespace. 78 * @user_features: Bitmap of enabled %OVS_DP_F_* features. 79 * @max_headroom: The maximum headroom of all vports in this datapath; it will 80 * be used by all the internal vports in this dp. 81 * @meter_tbl: Meter table. 82 * @upcall_portids: RCU protected 'struct dp_nlsk_pids'. 83 * 84 * Context: See the comment on locking at the top of datapath.c for additional 85 * locking information. 86 */ 87 struct datapath { 88 struct rcu_head rcu; 89 struct list_head list_node; 90 91 /* Flow table. */ 92 struct flow_table table; 93 94 /* Switch ports. */ 95 struct hlist_head *ports; 96 97 /* Stats. */ 98 struct dp_stats_percpu __percpu *stats_percpu; 99 100 /* Network namespace ref. */ 101 possible_net_t net; 102 103 u32 user_features; 104 105 u32 max_headroom; 106 107 /* Switch meters. */ 108 struct dp_meter_table meter_tbl; 109 110 struct dp_nlsk_pids __rcu *upcall_portids; 111 }; 112 113 /** 114 * struct ovs_skb_cb - OVS data in skb CB 115 * @input_vport: The original vport packet came in on. This value is cached 116 * when a packet is received by OVS. 117 * @mru: The maximum received fragement size; 0 if the packet is not 118 * fragmented. 119 * @acts_origlen: The netlink size of the flow actions applied to this skb. 120 * @cutlen: The number of bytes from the packet end to be removed. 121 * @probability: The sampling probability that was applied to this skb; 0 means 122 * no sampling has occurred; U32_MAX means 100% probability. 123 */ 124 struct ovs_skb_cb { 125 struct vport *input_vport; 126 u16 mru; 127 u16 acts_origlen; 128 u32 cutlen; 129 u32 probability; 130 }; 131 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) 132 133 /** 134 * struct dp_upcall_info - metadata to include with a packet sent to userspace 135 * @cmd: One of %OVS_PACKET_CMD_*. 136 * @userdata: If nonnull, its variable-length value is passed to userspace as 137 * %OVS_PACKET_ATTR_USERDATA. 138 * @actions: If nonnull, its variable-length value is passed to userspace as 139 * %OVS_PACKET_ATTR_ACTIONS. 140 * @actions_len: The length of the @actions. 141 * @portid: Netlink portid to which packet should be sent. If @portid is 0 142 * then no packet is sent and the packet is accounted in the datapath's @n_lost 143 * counter. 144 * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. 145 * @mru: If not zero, Maximum received IP fragment size. 146 */ 147 struct dp_upcall_info { 148 struct ip_tunnel_info *egress_tun_info; 149 const struct nlattr *userdata; 150 const struct nlattr *actions; 151 int actions_len; 152 u32 portid; 153 u8 cmd; 154 u16 mru; 155 }; 156 157 /** 158 * struct ovs_net - Per net-namespace data for ovs. 159 * @dps: List of datapaths to enable dumping them all out. 160 * Protected by genl_mutex. 161 * @dp_notify_work: A work notifier to handle port unregistering. 162 * @masks_rebalance: A work to periodically optimize flow table caches. 163 * @ct_limit_info: A hash table of conntrack zone connection limits. 164 * @xt_label: Whether connlables are configured for the network or not. 165 */ 166 struct ovs_net { 167 struct list_head dps; 168 struct work_struct dp_notify_work; 169 struct delayed_work masks_rebalance; 170 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 171 struct ovs_ct_limit_info *ct_limit_info; 172 #endif 173 bool xt_label; 174 }; 175 176 /** 177 * enum ovs_pkt_hash_types - hash info to include with a packet 178 * to send to userspace. 179 * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack. 180 * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash 181 * over transport ports. 182 */ 183 enum ovs_pkt_hash_types { 184 OVS_PACKET_HASH_SW_BIT = (1ULL << 32), 185 OVS_PACKET_HASH_L4_BIT = (1ULL << 33), 186 }; 187 188 extern unsigned int ovs_net_id; 189 void ovs_lock(void); 190 void ovs_unlock(void); 191 192 #ifdef CONFIG_LOCKDEP 193 int lockdep_ovsl_is_held(void); 194 #else 195 #define lockdep_ovsl_is_held() 1 196 #endif 197 198 #define ASSERT_OVSL() WARN_ON(!lockdep_ovsl_is_held()) 199 #define ovsl_dereference(p) \ 200 rcu_dereference_protected(p, lockdep_ovsl_is_held()) 201 #define rcu_dereference_ovsl(p) \ 202 rcu_dereference_check(p, lockdep_ovsl_is_held()) 203 204 static inline struct net *ovs_dp_get_net(const struct datapath *dp) 205 { 206 return read_pnet(&dp->net); 207 } 208 209 static inline void ovs_dp_set_net(struct datapath *dp, struct net *net) 210 { 211 write_pnet(&dp->net, net); 212 } 213 214 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); 215 216 static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) 217 { 218 WARN_ON_ONCE(!rcu_read_lock_held()); 219 return ovs_lookup_vport(dp, port_no); 220 } 221 222 static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no) 223 { 224 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); 225 return ovs_lookup_vport(dp, port_no); 226 } 227 228 static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no) 229 { 230 ASSERT_OVSL(); 231 return ovs_lookup_vport(dp, port_no); 232 } 233 234 /* Must be called with rcu_read_lock. */ 235 static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) 236 { 237 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); 238 239 if (dev) { 240 struct vport *vport = ovs_internal_dev_get_vport(dev); 241 242 if (vport) 243 return vport->dp; 244 } 245 246 return NULL; 247 } 248 249 /* The caller must hold either ovs_mutex or rcu_read_lock to keep the 250 * returned dp pointer valid. 251 */ 252 static inline struct datapath *get_dp(struct net *net, int dp_ifindex) 253 { 254 struct datapath *dp; 255 256 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); 257 rcu_read_lock(); 258 dp = get_dp_rcu(net, dp_ifindex); 259 rcu_read_unlock(); 260 261 return dp; 262 } 263 264 extern struct notifier_block ovs_dp_device_notifier; 265 extern struct genl_family dp_vport_genl_family; 266 267 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); 268 void ovs_dp_detach_port(struct vport *); 269 int ovs_dp_upcall(struct datapath *, struct sk_buff *, 270 const struct sw_flow_key *, const struct dp_upcall_info *, 271 uint32_t cutlen); 272 273 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id); 274 275 const char *ovs_dp_name(const struct datapath *dp); 276 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, 277 u32 portid, u32 seq, u8 cmd); 278 279 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 280 const struct sw_flow_actions *, struct sw_flow_key *); 281 282 void ovs_dp_notify_wq(struct work_struct *work); 283 284 int action_fifos_init(void); 285 void action_fifos_exit(void); 286 287 /* 'KEY' must not have any bits set outside of the 'MASK' */ 288 #define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) 289 #define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK)) 290 291 #define OVS_NLERR(logging_allowed, fmt, ...) \ 292 do { \ 293 if (logging_allowed && net_ratelimit()) \ 294 pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \ 295 } while (0) 296 #endif /* datapath.h */ 297