xref: /linux/net/openvswitch/flow.c (revision ad06a566e118e57b852cab5933dbbbaebb141de3)
1c9422999SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2ccb1352eSJesse Gross /*
3971427f3SAndy Zhou  * Copyright (c) 2007-2014 Nicira, Inc.
4ccb1352eSJesse Gross  */
5ccb1352eSJesse Gross 
6ccb1352eSJesse Gross #include <linux/uaccess.h>
7ccb1352eSJesse Gross #include <linux/netdevice.h>
8ccb1352eSJesse Gross #include <linux/etherdevice.h>
9ccb1352eSJesse Gross #include <linux/if_ether.h>
10ccb1352eSJesse Gross #include <linux/if_vlan.h>
11ccb1352eSJesse Gross #include <net/llc_pdu.h>
12ccb1352eSJesse Gross #include <linux/kernel.h>
13ccb1352eSJesse Gross #include <linux/jhash.h>
14ccb1352eSJesse Gross #include <linux/jiffies.h>
15ccb1352eSJesse Gross #include <linux/llc.h>
16ccb1352eSJesse Gross #include <linux/module.h>
17ccb1352eSJesse Gross #include <linux/in.h>
18ccb1352eSJesse Gross #include <linux/rcupdate.h>
19db74a333SThadeu Lima de Souza Cascardo #include <linux/cpumask.h>
20ccb1352eSJesse Gross #include <linux/if_arp.h>
21ccb1352eSJesse Gross #include <linux/ip.h>
22ccb1352eSJesse Gross #include <linux/ipv6.h>
2325cd9ba0SSimon Horman #include <linux/mpls.h>
24a175a723SJoe Stringer #include <linux/sctp.h>
25e298e505SPravin B Shelar #include <linux/smp.h>
26ccb1352eSJesse Gross #include <linux/tcp.h>
27ccb1352eSJesse Gross #include <linux/udp.h>
28ccb1352eSJesse Gross #include <linux/icmp.h>
29ccb1352eSJesse Gross #include <linux/icmpv6.h>
30ccb1352eSJesse Gross #include <linux/rculist.h>
31ccb1352eSJesse Gross #include <net/ip.h>
327d5437c7SPravin B Shelar #include <net/ip_tunnels.h>
33ccb1352eSJesse Gross #include <net/ipv6.h>
3425cd9ba0SSimon Horman #include <net/mpls.h>
35ccb1352eSJesse Gross #include <net/ndisc.h>
36b2d0f5d5SYi Yang #include <net/nsh.h>
37ccb1352eSJesse Gross 
38a581b96dSPravin B Shelar #include "conntrack.h"
3983c8df26SPravin B Shelar #include "datapath.h"
4083c8df26SPravin B Shelar #include "flow.h"
4183c8df26SPravin B Shelar #include "flow_netlink.h"
42a581b96dSPravin B Shelar #include "vport.h"
4383c8df26SPravin B Shelar 
44e6445719SPravin B Shelar u64 ovs_flow_used_time(unsigned long flow_jiffies)
4503f0d916SAndy Zhou {
46311af51dSArnd Bergmann 	struct timespec64 cur_ts;
47e6445719SPravin B Shelar 	u64 cur_ms, idle_ms;
4803f0d916SAndy Zhou 
49311af51dSArnd Bergmann 	ktime_get_ts64(&cur_ts);
50e6445719SPravin B Shelar 	idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
51311af51dSArnd Bergmann 	cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
52e6445719SPravin B Shelar 		 cur_ts.tv_nsec / NSEC_PER_MSEC;
5303f0d916SAndy Zhou 
54e6445719SPravin B Shelar 	return cur_ms - idle_ms;
5503f0d916SAndy Zhou }
5603f0d916SAndy Zhou 
57df23e9f6SJarno Rajahalme #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
5803f0d916SAndy Zhou 
59ad552007SBen Pfaff void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
6012eb18f7SThomas Graf 			   const struct sk_buff *skb)
615828cd9aSAndy Zhou {
62aef833c5SPablo Neira Ayuso 	struct sw_flow_stats *stats;
63c4b2bf6bSTonghao Zhang 	unsigned int cpu = smp_processor_id();
64df8a39deSJiri Pirko 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
65e6445719SPravin B Shelar 
66db74a333SThadeu Lima de Souza Cascardo 	stats = rcu_dereference(flow->stats[cpu]);
67e298e505SPravin B Shelar 
68db74a333SThadeu Lima de Souza Cascardo 	/* Check if already have CPU-specific stats. */
6963e7959cSJarno Rajahalme 	if (likely(stats)) {
70e298e505SPravin B Shelar 		spin_lock(&stats->lock);
7163e7959cSJarno Rajahalme 		/* Mark if we write on the pre-allocated stats. */
72db74a333SThadeu Lima de Souza Cascardo 		if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
73db74a333SThadeu Lima de Souza Cascardo 			flow->stats_last_writer = cpu;
7463e7959cSJarno Rajahalme 	} else {
7563e7959cSJarno Rajahalme 		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
7663e7959cSJarno Rajahalme 		spin_lock(&stats->lock);
7763e7959cSJarno Rajahalme 
78db74a333SThadeu Lima de Souza Cascardo 		/* If the current CPU is the only writer on the
7963e7959cSJarno Rajahalme 		 * pre-allocated stats keep using them.
8063e7959cSJarno Rajahalme 		 */
81db74a333SThadeu Lima de Souza Cascardo 		if (unlikely(flow->stats_last_writer != cpu)) {
8263e7959cSJarno Rajahalme 			/* A previous locker may have already allocated the
83db74a333SThadeu Lima de Souza Cascardo 			 * stats, so we need to check again.  If CPU-specific
8463e7959cSJarno Rajahalme 			 * stats were already allocated, we update the pre-
8563e7959cSJarno Rajahalme 			 * allocated stats as we have already locked them.
8663e7959cSJarno Rajahalme 			 */
87db74a333SThadeu Lima de Souza Cascardo 			if (likely(flow->stats_last_writer != -1) &&
88db74a333SThadeu Lima de Souza Cascardo 			    likely(!rcu_access_pointer(flow->stats[cpu]))) {
89db74a333SThadeu Lima de Souza Cascardo 				/* Try to allocate CPU-specific stats. */
90aef833c5SPablo Neira Ayuso 				struct sw_flow_stats *new_stats;
9163e7959cSJarno Rajahalme 
9263e7959cSJarno Rajahalme 				new_stats =
9363e7959cSJarno Rajahalme 					kmem_cache_alloc_node(flow_stats_cache,
944167e9b2SDavid Rientjes 							      GFP_NOWAIT |
954167e9b2SDavid Rientjes 							      __GFP_THISNODE |
964167e9b2SDavid Rientjes 							      __GFP_NOWARN |
9763e7959cSJarno Rajahalme 							      __GFP_NOMEMALLOC,
98c57c054eSTonghao Zhang 							      numa_node_id());
9963e7959cSJarno Rajahalme 				if (likely(new_stats)) {
10063e7959cSJarno Rajahalme 					new_stats->used = jiffies;
10163e7959cSJarno Rajahalme 					new_stats->packet_count = 1;
10224cc59d1SBen Pfaff 					new_stats->byte_count = len;
10363e7959cSJarno Rajahalme 					new_stats->tcp_flags = tcp_flags;
10463e7959cSJarno Rajahalme 					spin_lock_init(&new_stats->lock);
10563e7959cSJarno Rajahalme 
106db74a333SThadeu Lima de Souza Cascardo 					rcu_assign_pointer(flow->stats[cpu],
10763e7959cSJarno Rajahalme 							   new_stats);
108c4b2bf6bSTonghao Zhang 					cpumask_set_cpu(cpu, &flow->cpu_used_mask);
10963e7959cSJarno Rajahalme 					goto unlock;
11063e7959cSJarno Rajahalme 				}
11163e7959cSJarno Rajahalme 			}
112db74a333SThadeu Lima de Souza Cascardo 			flow->stats_last_writer = cpu;
11363e7959cSJarno Rajahalme 		}
11463e7959cSJarno Rajahalme 	}
11563e7959cSJarno Rajahalme 
116e298e505SPravin B Shelar 	stats->used = jiffies;
117e298e505SPravin B Shelar 	stats->packet_count++;
11824cc59d1SBen Pfaff 	stats->byte_count += len;
119e298e505SPravin B Shelar 	stats->tcp_flags |= tcp_flags;
12063e7959cSJarno Rajahalme unlock:
121e298e505SPravin B Shelar 	spin_unlock(&stats->lock);
122e298e505SPravin B Shelar }
123e298e505SPravin B Shelar 
12486ec8dbaSJarno Rajahalme /* Must be called with rcu_read_lock or ovs_mutex. */
12586ec8dbaSJarno Rajahalme void ovs_flow_stats_get(const struct sw_flow *flow,
12686ec8dbaSJarno Rajahalme 			struct ovs_flow_stats *ovs_stats,
127e298e505SPravin B Shelar 			unsigned long *used, __be16 *tcp_flags)
128e298e505SPravin B Shelar {
129db74a333SThadeu Lima de Souza Cascardo 	int cpu;
130e298e505SPravin B Shelar 
131e298e505SPravin B Shelar 	*used = 0;
132e298e505SPravin B Shelar 	*tcp_flags = 0;
133e298e505SPravin B Shelar 	memset(ovs_stats, 0, sizeof(*ovs_stats));
134e298e505SPravin B Shelar 
135db74a333SThadeu Lima de Souza Cascardo 	/* We open code this to make sure cpu 0 is always considered */
136c4b2bf6bSTonghao Zhang 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
137aef833c5SPablo Neira Ayuso 		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
13823dabf88SJarno Rajahalme 
13963e7959cSJarno Rajahalme 		if (stats) {
14063e7959cSJarno Rajahalme 			/* Local CPU may write on non-local stats, so we must
14163e7959cSJarno Rajahalme 			 * block bottom-halves here.
14263e7959cSJarno Rajahalme 			 */
14363e7959cSJarno Rajahalme 			spin_lock_bh(&stats->lock);
14463e7959cSJarno Rajahalme 			if (!*used || time_after(stats->used, *used))
14563e7959cSJarno Rajahalme 				*used = stats->used;
14663e7959cSJarno Rajahalme 			*tcp_flags |= stats->tcp_flags;
14763e7959cSJarno Rajahalme 			ovs_stats->n_packets += stats->packet_count;
14863e7959cSJarno Rajahalme 			ovs_stats->n_bytes += stats->byte_count;
14963e7959cSJarno Rajahalme 			spin_unlock_bh(&stats->lock);
1504f647e0aSFlavio Leitner 		}
151e298e505SPravin B Shelar 	}
152e298e505SPravin B Shelar }
153e298e505SPravin B Shelar 
15486ec8dbaSJarno Rajahalme /* Called with ovs_mutex. */
155e298e505SPravin B Shelar void ovs_flow_stats_clear(struct sw_flow *flow)
156e298e505SPravin B Shelar {
157db74a333SThadeu Lima de Souza Cascardo 	int cpu;
158e298e505SPravin B Shelar 
159db74a333SThadeu Lima de Souza Cascardo 	/* We open code this to make sure cpu 0 is always considered */
160c4b2bf6bSTonghao Zhang 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
161aef833c5SPablo Neira Ayuso 		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
16223dabf88SJarno Rajahalme 
16363e7959cSJarno Rajahalme 		if (stats) {
16463e7959cSJarno Rajahalme 			spin_lock_bh(&stats->lock);
16563e7959cSJarno Rajahalme 			stats->used = 0;
16663e7959cSJarno Rajahalme 			stats->packet_count = 0;
16763e7959cSJarno Rajahalme 			stats->byte_count = 0;
16863e7959cSJarno Rajahalme 			stats->tcp_flags = 0;
16963e7959cSJarno Rajahalme 			spin_unlock_bh(&stats->lock);
17063e7959cSJarno Rajahalme 		}
17163e7959cSJarno Rajahalme 	}
172e298e505SPravin B Shelar }
17303f0d916SAndy Zhou 
174ccb1352eSJesse Gross static int check_header(struct sk_buff *skb, int len)
175ccb1352eSJesse Gross {
176ccb1352eSJesse Gross 	if (unlikely(skb->len < len))
177ccb1352eSJesse Gross 		return -EINVAL;
178ccb1352eSJesse Gross 	if (unlikely(!pskb_may_pull(skb, len)))
179ccb1352eSJesse Gross 		return -ENOMEM;
180ccb1352eSJesse Gross 	return 0;
181ccb1352eSJesse Gross }
182ccb1352eSJesse Gross 
183ccb1352eSJesse Gross static bool arphdr_ok(struct sk_buff *skb)
184ccb1352eSJesse Gross {
185ccb1352eSJesse Gross 	return pskb_may_pull(skb, skb_network_offset(skb) +
186ccb1352eSJesse Gross 				  sizeof(struct arp_eth_header));
187ccb1352eSJesse Gross }
188ccb1352eSJesse Gross 
189ccb1352eSJesse Gross static int check_iphdr(struct sk_buff *skb)
190ccb1352eSJesse Gross {
191ccb1352eSJesse Gross 	unsigned int nh_ofs = skb_network_offset(skb);
192ccb1352eSJesse Gross 	unsigned int ip_len;
193ccb1352eSJesse Gross 	int err;
194ccb1352eSJesse Gross 
195ccb1352eSJesse Gross 	err = check_header(skb, nh_ofs + sizeof(struct iphdr));
196ccb1352eSJesse Gross 	if (unlikely(err))
197ccb1352eSJesse Gross 		return err;
198ccb1352eSJesse Gross 
199ccb1352eSJesse Gross 	ip_len = ip_hdrlen(skb);
200ccb1352eSJesse Gross 	if (unlikely(ip_len < sizeof(struct iphdr) ||
201ccb1352eSJesse Gross 		     skb->len < nh_ofs + ip_len))
202ccb1352eSJesse Gross 		return -EINVAL;
203ccb1352eSJesse Gross 
204ccb1352eSJesse Gross 	skb_set_transport_header(skb, nh_ofs + ip_len);
205ccb1352eSJesse Gross 	return 0;
206ccb1352eSJesse Gross }
207ccb1352eSJesse Gross 
208ccb1352eSJesse Gross static bool tcphdr_ok(struct sk_buff *skb)
209ccb1352eSJesse Gross {
210ccb1352eSJesse Gross 	int th_ofs = skb_transport_offset(skb);
211ccb1352eSJesse Gross 	int tcp_len;
212ccb1352eSJesse Gross 
213ccb1352eSJesse Gross 	if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
214ccb1352eSJesse Gross 		return false;
215ccb1352eSJesse Gross 
216ccb1352eSJesse Gross 	tcp_len = tcp_hdrlen(skb);
217ccb1352eSJesse Gross 	if (unlikely(tcp_len < sizeof(struct tcphdr) ||
218ccb1352eSJesse Gross 		     skb->len < th_ofs + tcp_len))
219ccb1352eSJesse Gross 		return false;
220ccb1352eSJesse Gross 
221ccb1352eSJesse Gross 	return true;
222ccb1352eSJesse Gross }
223ccb1352eSJesse Gross 
224ccb1352eSJesse Gross static bool udphdr_ok(struct sk_buff *skb)
225ccb1352eSJesse Gross {
226ccb1352eSJesse Gross 	return pskb_may_pull(skb, skb_transport_offset(skb) +
227ccb1352eSJesse Gross 				  sizeof(struct udphdr));
228ccb1352eSJesse Gross }
229ccb1352eSJesse Gross 
230a175a723SJoe Stringer static bool sctphdr_ok(struct sk_buff *skb)
231a175a723SJoe Stringer {
232a175a723SJoe Stringer 	return pskb_may_pull(skb, skb_transport_offset(skb) +
233a175a723SJoe Stringer 				  sizeof(struct sctphdr));
234a175a723SJoe Stringer }
235a175a723SJoe Stringer 
236ccb1352eSJesse Gross static bool icmphdr_ok(struct sk_buff *skb)
237ccb1352eSJesse Gross {
238ccb1352eSJesse Gross 	return pskb_may_pull(skb, skb_transport_offset(skb) +
239ccb1352eSJesse Gross 				  sizeof(struct icmphdr));
240ccb1352eSJesse Gross }
241ccb1352eSJesse Gross 
24203f0d916SAndy Zhou static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
243ccb1352eSJesse Gross {
244fa642f08SYi-Hung Wei 	unsigned short frag_off;
245fa642f08SYi-Hung Wei 	unsigned int payload_ofs = 0;
246ccb1352eSJesse Gross 	unsigned int nh_ofs = skb_network_offset(skb);
247ccb1352eSJesse Gross 	unsigned int nh_len;
248ccb1352eSJesse Gross 	struct ipv6hdr *nh;
249fa642f08SYi-Hung Wei 	int err, nexthdr, flags = 0;
250ccb1352eSJesse Gross 
251ccb1352eSJesse Gross 	err = check_header(skb, nh_ofs + sizeof(*nh));
252ccb1352eSJesse Gross 	if (unlikely(err))
253ccb1352eSJesse Gross 		return err;
254ccb1352eSJesse Gross 
255ccb1352eSJesse Gross 	nh = ipv6_hdr(skb);
256ccb1352eSJesse Gross 
257ccb1352eSJesse Gross 	key->ip.proto = NEXTHDR_NONE;
258ccb1352eSJesse Gross 	key->ip.tos = ipv6_get_dsfield(nh);
259ccb1352eSJesse Gross 	key->ip.ttl = nh->hop_limit;
260ccb1352eSJesse Gross 	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
261ccb1352eSJesse Gross 	key->ipv6.addr.src = nh->saddr;
262ccb1352eSJesse Gross 	key->ipv6.addr.dst = nh->daddr;
263ccb1352eSJesse Gross 
264fa642f08SYi-Hung Wei 	nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
265fa642f08SYi-Hung Wei 	if (flags & IP6_FH_F_FRAG) {
26641e4e2cdSYi-Hung Wei 		if (frag_off) {
267ccb1352eSJesse Gross 			key->ip.frag = OVS_FRAG_TYPE_LATER;
26841e4e2cdSYi-Hung Wei 			key->ip.proto = nexthdr;
26941e4e2cdSYi-Hung Wei 			return 0;
27041e4e2cdSYi-Hung Wei 		}
271ccb1352eSJesse Gross 		key->ip.frag = OVS_FRAG_TYPE_FIRST;
27225ef1328SPravin B Shelar 	} else {
27325ef1328SPravin B Shelar 		key->ip.frag = OVS_FRAG_TYPE_NONE;
274ccb1352eSJesse Gross 	}
275ccb1352eSJesse Gross 
276fa642f08SYi-Hung Wei 	/* Delayed handling of error in ipv6_find_hdr() as it
277fa642f08SYi-Hung Wei 	 * always sets flags and frag_off to a valid value which may be
278c30da497SSimon Horman 	 * used to set key->ip.frag above.
279c30da497SSimon Horman 	 */
280fa642f08SYi-Hung Wei 	if (unlikely(nexthdr < 0))
281c30da497SSimon Horman 		return -EPROTO;
282c30da497SSimon Horman 
283ccb1352eSJesse Gross 	nh_len = payload_ofs - nh_ofs;
284ccb1352eSJesse Gross 	skb_set_transport_header(skb, nh_ofs + nh_len);
285ccb1352eSJesse Gross 	key->ip.proto = nexthdr;
286ccb1352eSJesse Gross 	return nh_len;
287ccb1352eSJesse Gross }
288ccb1352eSJesse Gross 
289ccb1352eSJesse Gross static bool icmp6hdr_ok(struct sk_buff *skb)
290ccb1352eSJesse Gross {
291ccb1352eSJesse Gross 	return pskb_may_pull(skb, skb_transport_offset(skb) +
292ccb1352eSJesse Gross 				  sizeof(struct icmp6hdr));
293ccb1352eSJesse Gross }
294ccb1352eSJesse Gross 
295018c1ddaSEric Garver /**
296018c1ddaSEric Garver  * Parse vlan tag from vlan header.
297018c1ddaSEric Garver  * Returns ERROR on memory error.
298018c1ddaSEric Garver  * Returns 0 if it encounters a non-vlan or incomplete packet.
299018c1ddaSEric Garver  * Returns 1 after successfully parsing vlan tag.
300018c1ddaSEric Garver  */
301df30f740Spravin shelar static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
302df30f740Spravin shelar 			  bool untag_vlan)
303ccb1352eSJesse Gross {
304018c1ddaSEric Garver 	struct vlan_head *vh = (struct vlan_head *)skb->data;
305ccb1352eSJesse Gross 
306018c1ddaSEric Garver 	if (likely(!eth_type_vlan(vh->tpid)))
307ccb1352eSJesse Gross 		return 0;
308ccb1352eSJesse Gross 
309018c1ddaSEric Garver 	if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
310018c1ddaSEric Garver 		return 0;
311018c1ddaSEric Garver 
312018c1ddaSEric Garver 	if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
313ccb1352eSJesse Gross 				 sizeof(__be16))))
314ccb1352eSJesse Gross 		return -ENOMEM;
315ccb1352eSJesse Gross 
316018c1ddaSEric Garver 	vh = (struct vlan_head *)skb->data;
3179df46aefSMichał Mirosław 	key_vh->tci = vh->tci | htons(VLAN_CFI_MASK);
318018c1ddaSEric Garver 	key_vh->tpid = vh->tpid;
319018c1ddaSEric Garver 
320df30f740Spravin shelar 	if (unlikely(untag_vlan)) {
321df30f740Spravin shelar 		int offset = skb->data - skb_mac_header(skb);
322df30f740Spravin shelar 		u16 tci;
323df30f740Spravin shelar 		int err;
324df30f740Spravin shelar 
325df30f740Spravin shelar 		__skb_push(skb, offset);
326df30f740Spravin shelar 		err = __skb_vlan_pop(skb, &tci);
327df30f740Spravin shelar 		__skb_pull(skb, offset);
328df30f740Spravin shelar 		if (err)
329df30f740Spravin shelar 			return err;
330df30f740Spravin shelar 		__vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
331df30f740Spravin shelar 	} else {
332018c1ddaSEric Garver 		__skb_pull(skb, sizeof(struct vlan_head));
333df30f740Spravin shelar 	}
334018c1ddaSEric Garver 	return 1;
335018c1ddaSEric Garver }
336018c1ddaSEric Garver 
3375108bbadSJiri Benc static void clear_vlan(struct sw_flow_key *key)
338018c1ddaSEric Garver {
339018c1ddaSEric Garver 	key->eth.vlan.tci = 0;
340018c1ddaSEric Garver 	key->eth.vlan.tpid = 0;
341018c1ddaSEric Garver 	key->eth.cvlan.tci = 0;
342018c1ddaSEric Garver 	key->eth.cvlan.tpid = 0;
3435108bbadSJiri Benc }
3445108bbadSJiri Benc 
3455108bbadSJiri Benc static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
3465108bbadSJiri Benc {
3475108bbadSJiri Benc 	int res;
348018c1ddaSEric Garver 
34920ecf1e4SJiri Benc 	if (skb_vlan_tag_present(skb)) {
3509df46aefSMichał Mirosław 		key->eth.vlan.tci = htons(skb->vlan_tci) | htons(VLAN_CFI_MASK);
351018c1ddaSEric Garver 		key->eth.vlan.tpid = skb->vlan_proto;
352018c1ddaSEric Garver 	} else {
353018c1ddaSEric Garver 		/* Parse outer vlan tag in the non-accelerated case. */
354df30f740Spravin shelar 		res = parse_vlan_tag(skb, &key->eth.vlan, true);
355018c1ddaSEric Garver 		if (res <= 0)
356018c1ddaSEric Garver 			return res;
357018c1ddaSEric Garver 	}
358018c1ddaSEric Garver 
359018c1ddaSEric Garver 	/* Parse inner vlan tag. */
360df30f740Spravin shelar 	res = parse_vlan_tag(skb, &key->eth.cvlan, false);
361018c1ddaSEric Garver 	if (res <= 0)
362018c1ddaSEric Garver 		return res;
363ccb1352eSJesse Gross 
364ccb1352eSJesse Gross 	return 0;
365ccb1352eSJesse Gross }
366ccb1352eSJesse Gross 
367ccb1352eSJesse Gross static __be16 parse_ethertype(struct sk_buff *skb)
368ccb1352eSJesse Gross {
369ccb1352eSJesse Gross 	struct llc_snap_hdr {
370ccb1352eSJesse Gross 		u8  dsap;  /* Always 0xAA */
371ccb1352eSJesse Gross 		u8  ssap;  /* Always 0xAA */
372ccb1352eSJesse Gross 		u8  ctrl;
373ccb1352eSJesse Gross 		u8  oui[3];
374ccb1352eSJesse Gross 		__be16 ethertype;
375ccb1352eSJesse Gross 	};
376ccb1352eSJesse Gross 	struct llc_snap_hdr *llc;
377ccb1352eSJesse Gross 	__be16 proto;
378ccb1352eSJesse Gross 
379ccb1352eSJesse Gross 	proto = *(__be16 *) skb->data;
380ccb1352eSJesse Gross 	__skb_pull(skb, sizeof(__be16));
381ccb1352eSJesse Gross 
3826713fc9bSAlexander Duyck 	if (eth_proto_is_802_3(proto))
383ccb1352eSJesse Gross 		return proto;
384ccb1352eSJesse Gross 
385ccb1352eSJesse Gross 	if (skb->len < sizeof(struct llc_snap_hdr))
386ccb1352eSJesse Gross 		return htons(ETH_P_802_2);
387ccb1352eSJesse Gross 
388ccb1352eSJesse Gross 	if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
389ccb1352eSJesse Gross 		return htons(0);
390ccb1352eSJesse Gross 
391ccb1352eSJesse Gross 	llc = (struct llc_snap_hdr *) skb->data;
392ccb1352eSJesse Gross 	if (llc->dsap != LLC_SAP_SNAP ||
393ccb1352eSJesse Gross 	    llc->ssap != LLC_SAP_SNAP ||
394ccb1352eSJesse Gross 	    (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
395ccb1352eSJesse Gross 		return htons(ETH_P_802_2);
396ccb1352eSJesse Gross 
397ccb1352eSJesse Gross 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
39817b682a0SRich Lane 
3996713fc9bSAlexander Duyck 	if (eth_proto_is_802_3(llc->ethertype))
400ccb1352eSJesse Gross 		return llc->ethertype;
40117b682a0SRich Lane 
40217b682a0SRich Lane 	return htons(ETH_P_802_2);
403ccb1352eSJesse Gross }
404ccb1352eSJesse Gross 
405ccb1352eSJesse Gross static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
40603f0d916SAndy Zhou 			int nh_len)
407ccb1352eSJesse Gross {
408ccb1352eSJesse Gross 	struct icmp6hdr *icmp = icmp6_hdr(skb);
409ccb1352eSJesse Gross 
410ccb1352eSJesse Gross 	/* The ICMPv6 type and code fields use the 16-bit transport port
411ccb1352eSJesse Gross 	 * fields, so we need to store them in 16-bit network byte order.
412ccb1352eSJesse Gross 	 */
4131139e241SJarno Rajahalme 	key->tp.src = htons(icmp->icmp6_type);
4141139e241SJarno Rajahalme 	key->tp.dst = htons(icmp->icmp6_code);
41525ef1328SPravin B Shelar 	memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
416ccb1352eSJesse Gross 
417ccb1352eSJesse Gross 	if (icmp->icmp6_code == 0 &&
418ccb1352eSJesse Gross 	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
419ccb1352eSJesse Gross 	     icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
420ccb1352eSJesse Gross 		int icmp_len = skb->len - skb_transport_offset(skb);
421ccb1352eSJesse Gross 		struct nd_msg *nd;
422ccb1352eSJesse Gross 		int offset;
423ccb1352eSJesse Gross 
424ccb1352eSJesse Gross 		/* In order to process neighbor discovery options, we need the
425ccb1352eSJesse Gross 		 * entire packet.
426ccb1352eSJesse Gross 		 */
427ccb1352eSJesse Gross 		if (unlikely(icmp_len < sizeof(*nd)))
42803f0d916SAndy Zhou 			return 0;
42903f0d916SAndy Zhou 
43003f0d916SAndy Zhou 		if (unlikely(skb_linearize(skb)))
43103f0d916SAndy Zhou 			return -ENOMEM;
432ccb1352eSJesse Gross 
433ccb1352eSJesse Gross 		nd = (struct nd_msg *)skb_transport_header(skb);
434ccb1352eSJesse Gross 		key->ipv6.nd.target = nd->target;
435ccb1352eSJesse Gross 
436ccb1352eSJesse Gross 		icmp_len -= sizeof(*nd);
437ccb1352eSJesse Gross 		offset = 0;
438ccb1352eSJesse Gross 		while (icmp_len >= 8) {
439ccb1352eSJesse Gross 			struct nd_opt_hdr *nd_opt =
440ccb1352eSJesse Gross 				 (struct nd_opt_hdr *)(nd->opt + offset);
441ccb1352eSJesse Gross 			int opt_len = nd_opt->nd_opt_len * 8;
442ccb1352eSJesse Gross 
443ccb1352eSJesse Gross 			if (unlikely(!opt_len || opt_len > icmp_len))
44403f0d916SAndy Zhou 				return 0;
445ccb1352eSJesse Gross 
446ccb1352eSJesse Gross 			/* Store the link layer address if the appropriate
447ccb1352eSJesse Gross 			 * option is provided.  It is considered an error if
448ccb1352eSJesse Gross 			 * the same link layer option is specified twice.
449ccb1352eSJesse Gross 			 */
450ccb1352eSJesse Gross 			if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
451ccb1352eSJesse Gross 			    && opt_len == 8) {
452ccb1352eSJesse Gross 				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
453ccb1352eSJesse Gross 					goto invalid;
4548c63ff09SJoe Perches 				ether_addr_copy(key->ipv6.nd.sll,
4558c63ff09SJoe Perches 						&nd->opt[offset+sizeof(*nd_opt)]);
456ccb1352eSJesse Gross 			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
457ccb1352eSJesse Gross 				   && opt_len == 8) {
458ccb1352eSJesse Gross 				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
459ccb1352eSJesse Gross 					goto invalid;
4608c63ff09SJoe Perches 				ether_addr_copy(key->ipv6.nd.tll,
4618c63ff09SJoe Perches 						&nd->opt[offset+sizeof(*nd_opt)]);
462ccb1352eSJesse Gross 			}
463ccb1352eSJesse Gross 
464ccb1352eSJesse Gross 			icmp_len -= opt_len;
465ccb1352eSJesse Gross 			offset += opt_len;
466ccb1352eSJesse Gross 		}
467ccb1352eSJesse Gross 	}
468ccb1352eSJesse Gross 
46903f0d916SAndy Zhou 	return 0;
470ccb1352eSJesse Gross 
471ccb1352eSJesse Gross invalid:
472ccb1352eSJesse Gross 	memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
473ccb1352eSJesse Gross 	memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
474ccb1352eSJesse Gross 	memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
475ccb1352eSJesse Gross 
47603f0d916SAndy Zhou 	return 0;
477ccb1352eSJesse Gross }
478ccb1352eSJesse Gross 
479b2d0f5d5SYi Yang static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
480b2d0f5d5SYi Yang {
481b2d0f5d5SYi Yang 	struct nshhdr *nh;
482b2d0f5d5SYi Yang 	unsigned int nh_ofs = skb_network_offset(skb);
483b2d0f5d5SYi Yang 	u8 version, length;
484b2d0f5d5SYi Yang 	int err;
485b2d0f5d5SYi Yang 
486b2d0f5d5SYi Yang 	err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
487b2d0f5d5SYi Yang 	if (unlikely(err))
488b2d0f5d5SYi Yang 		return err;
489b2d0f5d5SYi Yang 
490b2d0f5d5SYi Yang 	nh = nsh_hdr(skb);
491b2d0f5d5SYi Yang 	version = nsh_get_ver(nh);
492b2d0f5d5SYi Yang 	length = nsh_hdr_len(nh);
493b2d0f5d5SYi Yang 
494b2d0f5d5SYi Yang 	if (version != 0)
495b2d0f5d5SYi Yang 		return -EINVAL;
496b2d0f5d5SYi Yang 
497b2d0f5d5SYi Yang 	err = check_header(skb, nh_ofs + length);
498b2d0f5d5SYi Yang 	if (unlikely(err))
499b2d0f5d5SYi Yang 		return err;
500b2d0f5d5SYi Yang 
501b2d0f5d5SYi Yang 	nh = nsh_hdr(skb);
502b2d0f5d5SYi Yang 	key->nsh.base.flags = nsh_get_flags(nh);
503b2d0f5d5SYi Yang 	key->nsh.base.ttl = nsh_get_ttl(nh);
504b2d0f5d5SYi Yang 	key->nsh.base.mdtype = nh->mdtype;
505b2d0f5d5SYi Yang 	key->nsh.base.np = nh->np;
506b2d0f5d5SYi Yang 	key->nsh.base.path_hdr = nh->path_hdr;
507b2d0f5d5SYi Yang 	switch (key->nsh.base.mdtype) {
508b2d0f5d5SYi Yang 	case NSH_M_TYPE1:
509b2d0f5d5SYi Yang 		if (length != NSH_M_TYPE1_LEN)
510b2d0f5d5SYi Yang 			return -EINVAL;
511b2d0f5d5SYi Yang 		memcpy(key->nsh.context, nh->md1.context,
512b2d0f5d5SYi Yang 		       sizeof(nh->md1));
513b2d0f5d5SYi Yang 		break;
514b2d0f5d5SYi Yang 	case NSH_M_TYPE2:
515b2d0f5d5SYi Yang 		memset(key->nsh.context, 0,
516b2d0f5d5SYi Yang 		       sizeof(nh->md1));
517b2d0f5d5SYi Yang 		break;
518b2d0f5d5SYi Yang 	default:
519b2d0f5d5SYi Yang 		return -EINVAL;
520b2d0f5d5SYi Yang 	}
521b2d0f5d5SYi Yang 
522b2d0f5d5SYi Yang 	return 0;
523b2d0f5d5SYi Yang }
524b2d0f5d5SYi Yang 
525ccb1352eSJesse Gross /**
526*ad06a566SGreg Rose  * key_extract_l3l4 - extracts L3/L4 header information.
527ccb1352eSJesse Gross  * @skb: sk_buff that contains the frame, with skb->data pointing to the
528*ad06a566SGreg Rose  *       L3 header
529ccb1352eSJesse Gross  * @key: output flow key
530ccb1352eSJesse Gross  *
531ccb1352eSJesse Gross  */
532*ad06a566SGreg Rose static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
533ccb1352eSJesse Gross {
53403f0d916SAndy Zhou 	int error;
535ccb1352eSJesse Gross 
536ccb1352eSJesse Gross 	/* Network layer. */
537ccb1352eSJesse Gross 	if (key->eth.type == htons(ETH_P_IP)) {
538ccb1352eSJesse Gross 		struct iphdr *nh;
539ccb1352eSJesse Gross 		__be16 offset;
540ccb1352eSJesse Gross 
541ccb1352eSJesse Gross 		error = check_iphdr(skb);
542ccb1352eSJesse Gross 		if (unlikely(error)) {
54307148121SJesse Gross 			memset(&key->ip, 0, sizeof(key->ip));
54407148121SJesse Gross 			memset(&key->ipv4, 0, sizeof(key->ipv4));
545ccb1352eSJesse Gross 			if (error == -EINVAL) {
546ccb1352eSJesse Gross 				skb->transport_header = skb->network_header;
547ccb1352eSJesse Gross 				error = 0;
548ccb1352eSJesse Gross 			}
54903f0d916SAndy Zhou 			return error;
550ccb1352eSJesse Gross 		}
551ccb1352eSJesse Gross 
552ccb1352eSJesse Gross 		nh = ip_hdr(skb);
553ccb1352eSJesse Gross 		key->ipv4.addr.src = nh->saddr;
554ccb1352eSJesse Gross 		key->ipv4.addr.dst = nh->daddr;
555ccb1352eSJesse Gross 
556ccb1352eSJesse Gross 		key->ip.proto = nh->protocol;
557ccb1352eSJesse Gross 		key->ip.tos = nh->tos;
558ccb1352eSJesse Gross 		key->ip.ttl = nh->ttl;
559ccb1352eSJesse Gross 
560ccb1352eSJesse Gross 		offset = nh->frag_off & htons(IP_OFFSET);
561ccb1352eSJesse Gross 		if (offset) {
562ccb1352eSJesse Gross 			key->ip.frag = OVS_FRAG_TYPE_LATER;
56303f0d916SAndy Zhou 			return 0;
564ccb1352eSJesse Gross 		}
5650c19f846SWillem de Bruijn 		if (nh->frag_off & htons(IP_MF) ||
5660c19f846SWillem de Bruijn 			skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
567ccb1352eSJesse Gross 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
56807148121SJesse Gross 		else
56907148121SJesse Gross 			key->ip.frag = OVS_FRAG_TYPE_NONE;
570ccb1352eSJesse Gross 
571ccb1352eSJesse Gross 		/* Transport layer. */
572ccb1352eSJesse Gross 		if (key->ip.proto == IPPROTO_TCP) {
573ccb1352eSJesse Gross 			if (tcphdr_ok(skb)) {
574ccb1352eSJesse Gross 				struct tcphdr *tcp = tcp_hdr(skb);
5751139e241SJarno Rajahalme 				key->tp.src = tcp->source;
5761139e241SJarno Rajahalme 				key->tp.dst = tcp->dest;
5771139e241SJarno Rajahalme 				key->tp.flags = TCP_FLAGS_BE16(tcp);
57807148121SJesse Gross 			} else {
57907148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
580ccb1352eSJesse Gross 			}
58107148121SJesse Gross 
582ccb1352eSJesse Gross 		} else if (key->ip.proto == IPPROTO_UDP) {
583ccb1352eSJesse Gross 			if (udphdr_ok(skb)) {
584ccb1352eSJesse Gross 				struct udphdr *udp = udp_hdr(skb);
5851139e241SJarno Rajahalme 				key->tp.src = udp->source;
5861139e241SJarno Rajahalme 				key->tp.dst = udp->dest;
58707148121SJesse Gross 			} else {
58807148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
589ccb1352eSJesse Gross 			}
590a175a723SJoe Stringer 		} else if (key->ip.proto == IPPROTO_SCTP) {
591a175a723SJoe Stringer 			if (sctphdr_ok(skb)) {
592a175a723SJoe Stringer 				struct sctphdr *sctp = sctp_hdr(skb);
5931139e241SJarno Rajahalme 				key->tp.src = sctp->source;
5941139e241SJarno Rajahalme 				key->tp.dst = sctp->dest;
59507148121SJesse Gross 			} else {
59607148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
597a175a723SJoe Stringer 			}
598ccb1352eSJesse Gross 		} else if (key->ip.proto == IPPROTO_ICMP) {
599ccb1352eSJesse Gross 			if (icmphdr_ok(skb)) {
600ccb1352eSJesse Gross 				struct icmphdr *icmp = icmp_hdr(skb);
601ccb1352eSJesse Gross 				/* The ICMP type and code fields use the 16-bit
602ccb1352eSJesse Gross 				 * transport port fields, so we need to store
603ccb1352eSJesse Gross 				 * them in 16-bit network byte order. */
6041139e241SJarno Rajahalme 				key->tp.src = htons(icmp->type);
6051139e241SJarno Rajahalme 				key->tp.dst = htons(icmp->code);
60607148121SJesse Gross 			} else {
60707148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
608ccb1352eSJesse Gross 			}
609ccb1352eSJesse Gross 		}
610ccb1352eSJesse Gross 
61107148121SJesse Gross 	} else if (key->eth.type == htons(ETH_P_ARP) ||
61207148121SJesse Gross 		   key->eth.type == htons(ETH_P_RARP)) {
613ccb1352eSJesse Gross 		struct arp_eth_header *arp;
614389f4894SLi RongQing 		bool arp_available = arphdr_ok(skb);
615ccb1352eSJesse Gross 
616ccb1352eSJesse Gross 		arp = (struct arp_eth_header *)skb_network_header(skb);
617ccb1352eSJesse Gross 
618389f4894SLi RongQing 		if (arp_available &&
61907148121SJesse Gross 		    arp->ar_hrd == htons(ARPHRD_ETHER) &&
62007148121SJesse Gross 		    arp->ar_pro == htons(ETH_P_IP) &&
62107148121SJesse Gross 		    arp->ar_hln == ETH_ALEN &&
62207148121SJesse Gross 		    arp->ar_pln == 4) {
623ccb1352eSJesse Gross 
624ccb1352eSJesse Gross 			/* We only match on the lower 8 bits of the opcode. */
625ccb1352eSJesse Gross 			if (ntohs(arp->ar_op) <= 0xff)
626ccb1352eSJesse Gross 				key->ip.proto = ntohs(arp->ar_op);
62707148121SJesse Gross 			else
62807148121SJesse Gross 				key->ip.proto = 0;
62907148121SJesse Gross 
630ccb1352eSJesse Gross 			memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
631ccb1352eSJesse Gross 			memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
6328c63ff09SJoe Perches 			ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
6338c63ff09SJoe Perches 			ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
63407148121SJesse Gross 		} else {
63507148121SJesse Gross 			memset(&key->ip, 0, sizeof(key->ip));
63607148121SJesse Gross 			memset(&key->ipv4, 0, sizeof(key->ipv4));
637ccb1352eSJesse Gross 		}
63825cd9ba0SSimon Horman 	} else if (eth_p_mpls(key->eth.type)) {
63925cd9ba0SSimon Horman 		size_t stack_len = MPLS_HLEN;
64025cd9ba0SSimon Horman 
641f7d49bceSJiri Benc 		skb_set_inner_network_header(skb, skb->mac_len);
64225cd9ba0SSimon Horman 		while (1) {
64325cd9ba0SSimon Horman 			__be32 lse;
64425cd9ba0SSimon Horman 
64525cd9ba0SSimon Horman 			error = check_header(skb, skb->mac_len + stack_len);
64625cd9ba0SSimon Horman 			if (unlikely(error))
64725cd9ba0SSimon Horman 				return 0;
64825cd9ba0SSimon Horman 
649f7d49bceSJiri Benc 			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
65025cd9ba0SSimon Horman 
65125cd9ba0SSimon Horman 			if (stack_len == MPLS_HLEN)
65225cd9ba0SSimon Horman 				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
65325cd9ba0SSimon Horman 
654f7d49bceSJiri Benc 			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
65525cd9ba0SSimon Horman 			if (lse & htonl(MPLS_LS_S_MASK))
65625cd9ba0SSimon Horman 				break;
65725cd9ba0SSimon Horman 
65825cd9ba0SSimon Horman 			stack_len += MPLS_HLEN;
65925cd9ba0SSimon Horman 		}
660ccb1352eSJesse Gross 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
661ccb1352eSJesse Gross 		int nh_len;             /* IPv6 Header + Extensions */
662ccb1352eSJesse Gross 
66303f0d916SAndy Zhou 		nh_len = parse_ipv6hdr(skb, key);
664ccb1352eSJesse Gross 		if (unlikely(nh_len < 0)) {
665c30da497SSimon Horman 			switch (nh_len) {
666c30da497SSimon Horman 			case -EINVAL:
66707148121SJesse Gross 				memset(&key->ip, 0, sizeof(key->ip));
66807148121SJesse Gross 				memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
669c30da497SSimon Horman 				/* fall-through */
670c30da497SSimon Horman 			case -EPROTO:
671ccb1352eSJesse Gross 				skb->transport_header = skb->network_header;
67203f0d916SAndy Zhou 				error = 0;
673c30da497SSimon Horman 				break;
674c30da497SSimon Horman 			default:
675ccb1352eSJesse Gross 				error = nh_len;
67603f0d916SAndy Zhou 			}
67703f0d916SAndy Zhou 			return error;
678ccb1352eSJesse Gross 		}
679ccb1352eSJesse Gross 
680ccb1352eSJesse Gross 		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
68103f0d916SAndy Zhou 			return 0;
6820c19f846SWillem de Bruijn 		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
6830c19f846SWillem de Bruijn 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
6840c19f846SWillem de Bruijn 
685ccb1352eSJesse Gross 		/* Transport layer. */
686ccb1352eSJesse Gross 		if (key->ip.proto == NEXTHDR_TCP) {
687ccb1352eSJesse Gross 			if (tcphdr_ok(skb)) {
688ccb1352eSJesse Gross 				struct tcphdr *tcp = tcp_hdr(skb);
6891139e241SJarno Rajahalme 				key->tp.src = tcp->source;
6901139e241SJarno Rajahalme 				key->tp.dst = tcp->dest;
6911139e241SJarno Rajahalme 				key->tp.flags = TCP_FLAGS_BE16(tcp);
69207148121SJesse Gross 			} else {
69307148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
694ccb1352eSJesse Gross 			}
695ccb1352eSJesse Gross 		} else if (key->ip.proto == NEXTHDR_UDP) {
696ccb1352eSJesse Gross 			if (udphdr_ok(skb)) {
697ccb1352eSJesse Gross 				struct udphdr *udp = udp_hdr(skb);
6981139e241SJarno Rajahalme 				key->tp.src = udp->source;
6991139e241SJarno Rajahalme 				key->tp.dst = udp->dest;
70007148121SJesse Gross 			} else {
70107148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
702ccb1352eSJesse Gross 			}
703a175a723SJoe Stringer 		} else if (key->ip.proto == NEXTHDR_SCTP) {
704a175a723SJoe Stringer 			if (sctphdr_ok(skb)) {
705a175a723SJoe Stringer 				struct sctphdr *sctp = sctp_hdr(skb);
7061139e241SJarno Rajahalme 				key->tp.src = sctp->source;
7071139e241SJarno Rajahalme 				key->tp.dst = sctp->dest;
70807148121SJesse Gross 			} else {
70907148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
710a175a723SJoe Stringer 			}
711ccb1352eSJesse Gross 		} else if (key->ip.proto == NEXTHDR_ICMP) {
712ccb1352eSJesse Gross 			if (icmp6hdr_ok(skb)) {
71303f0d916SAndy Zhou 				error = parse_icmpv6(skb, key, nh_len);
71403f0d916SAndy Zhou 				if (error)
71503f0d916SAndy Zhou 					return error;
71607148121SJesse Gross 			} else {
71707148121SJesse Gross 				memset(&key->tp, 0, sizeof(key->tp));
718ccb1352eSJesse Gross 			}
719ccb1352eSJesse Gross 		}
720b2d0f5d5SYi Yang 	} else if (key->eth.type == htons(ETH_P_NSH)) {
721b2d0f5d5SYi Yang 		error = parse_nsh(skb, key);
722b2d0f5d5SYi Yang 		if (error)
723b2d0f5d5SYi Yang 			return error;
724ccb1352eSJesse Gross 	}
72503f0d916SAndy Zhou 	return 0;
726ccb1352eSJesse Gross }
72783c8df26SPravin B Shelar 
728*ad06a566SGreg Rose /**
729*ad06a566SGreg Rose  * key_extract - extracts a flow key from an Ethernet frame.
730*ad06a566SGreg Rose  * @skb: sk_buff that contains the frame, with skb->data pointing to the
731*ad06a566SGreg Rose  * Ethernet header
732*ad06a566SGreg Rose  * @key: output flow key
733*ad06a566SGreg Rose  *
734*ad06a566SGreg Rose  * The caller must ensure that skb->len >= ETH_HLEN.
735*ad06a566SGreg Rose  *
736*ad06a566SGreg Rose  * Returns 0 if successful, otherwise a negative errno value.
737*ad06a566SGreg Rose  *
738*ad06a566SGreg Rose  * Initializes @skb header fields as follows:
739*ad06a566SGreg Rose  *
740*ad06a566SGreg Rose  *    - skb->mac_header: the L2 header.
741*ad06a566SGreg Rose  *
742*ad06a566SGreg Rose  *    - skb->network_header: just past the L2 header, or just past the
743*ad06a566SGreg Rose  *      VLAN header, to the first byte of the L2 payload.
744*ad06a566SGreg Rose  *
745*ad06a566SGreg Rose  *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
746*ad06a566SGreg Rose  *      on output, then just past the IP header, if one is present and
747*ad06a566SGreg Rose  *      of a correct length, otherwise the same as skb->network_header.
748*ad06a566SGreg Rose  *      For other key->eth.type values it is left untouched.
749*ad06a566SGreg Rose  *
750*ad06a566SGreg Rose  *    - skb->protocol: the type of the data starting at skb->network_header.
751*ad06a566SGreg Rose  *      Equals to key->eth.type.
752*ad06a566SGreg Rose  */
753*ad06a566SGreg Rose static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
754*ad06a566SGreg Rose {
755*ad06a566SGreg Rose 	struct ethhdr *eth;
756*ad06a566SGreg Rose 
757*ad06a566SGreg Rose 	/* Flags are always used as part of stats */
758*ad06a566SGreg Rose 	key->tp.flags = 0;
759*ad06a566SGreg Rose 
760*ad06a566SGreg Rose 	skb_reset_mac_header(skb);
761*ad06a566SGreg Rose 
762*ad06a566SGreg Rose 	/* Link layer. */
763*ad06a566SGreg Rose 	clear_vlan(key);
764*ad06a566SGreg Rose 	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
765*ad06a566SGreg Rose 		if (unlikely(eth_type_vlan(skb->protocol)))
766*ad06a566SGreg Rose 			return -EINVAL;
767*ad06a566SGreg Rose 
768*ad06a566SGreg Rose 		skb_reset_network_header(skb);
769*ad06a566SGreg Rose 		key->eth.type = skb->protocol;
770*ad06a566SGreg Rose 	} else {
771*ad06a566SGreg Rose 		eth = eth_hdr(skb);
772*ad06a566SGreg Rose 		ether_addr_copy(key->eth.src, eth->h_source);
773*ad06a566SGreg Rose 		ether_addr_copy(key->eth.dst, eth->h_dest);
774*ad06a566SGreg Rose 
775*ad06a566SGreg Rose 		__skb_pull(skb, 2 * ETH_ALEN);
776*ad06a566SGreg Rose 		/* We are going to push all headers that we pull, so no need to
777*ad06a566SGreg Rose 		 * update skb->csum here.
778*ad06a566SGreg Rose 		 */
779*ad06a566SGreg Rose 
780*ad06a566SGreg Rose 		if (unlikely(parse_vlan(skb, key)))
781*ad06a566SGreg Rose 			return -ENOMEM;
782*ad06a566SGreg Rose 
783*ad06a566SGreg Rose 		key->eth.type = parse_ethertype(skb);
784*ad06a566SGreg Rose 		if (unlikely(key->eth.type == htons(0)))
785*ad06a566SGreg Rose 			return -ENOMEM;
786*ad06a566SGreg Rose 
787*ad06a566SGreg Rose 		/* Multiple tagged packets need to retain TPID to satisfy
788*ad06a566SGreg Rose 		 * skb_vlan_pop(), which will later shift the ethertype into
789*ad06a566SGreg Rose 		 * skb->protocol.
790*ad06a566SGreg Rose 		 */
791*ad06a566SGreg Rose 		if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
792*ad06a566SGreg Rose 			skb->protocol = key->eth.cvlan.tpid;
793*ad06a566SGreg Rose 		else
794*ad06a566SGreg Rose 			skb->protocol = key->eth.type;
795*ad06a566SGreg Rose 
796*ad06a566SGreg Rose 		skb_reset_network_header(skb);
797*ad06a566SGreg Rose 		__skb_push(skb, skb->data - skb_mac_header(skb));
798*ad06a566SGreg Rose 	}
799*ad06a566SGreg Rose 
800*ad06a566SGreg Rose 	skb_reset_mac_len(skb);
801*ad06a566SGreg Rose 
802*ad06a566SGreg Rose 	/* Fill out L3/L4 key info, if any */
803*ad06a566SGreg Rose 	return key_extract_l3l4(skb, key);
804*ad06a566SGreg Rose }
805*ad06a566SGreg Rose 
806*ad06a566SGreg Rose /* In the case of conntrack fragment handling it expects L3 headers,
807*ad06a566SGreg Rose  * add a helper.
808*ad06a566SGreg Rose  */
809*ad06a566SGreg Rose int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
810*ad06a566SGreg Rose {
811*ad06a566SGreg Rose 	return key_extract_l3l4(skb, key);
812*ad06a566SGreg Rose }
813*ad06a566SGreg Rose 
814971427f3SAndy Zhou int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
815971427f3SAndy Zhou {
8166f56f618SYi-Hung Wei 	int res;
8176f56f618SYi-Hung Wei 
8186f56f618SYi-Hung Wei 	res = key_extract(skb, key);
8196f56f618SYi-Hung Wei 	if (!res)
8206f56f618SYi-Hung Wei 		key->mac_proto &= ~SW_FLOW_KEY_INVALID;
8216f56f618SYi-Hung Wei 
8226f56f618SYi-Hung Wei 	return res;
823971427f3SAndy Zhou }
824971427f3SAndy Zhou 
8255108bbadSJiri Benc static int key_extract_mac_proto(struct sk_buff *skb)
8265108bbadSJiri Benc {
8275108bbadSJiri Benc 	switch (skb->dev->type) {
8285108bbadSJiri Benc 	case ARPHRD_ETHER:
8295108bbadSJiri Benc 		return MAC_PROTO_ETHERNET;
8305108bbadSJiri Benc 	case ARPHRD_NONE:
8315108bbadSJiri Benc 		if (skb->protocol == htons(ETH_P_TEB))
8325108bbadSJiri Benc 			return MAC_PROTO_ETHERNET;
8335108bbadSJiri Benc 		return MAC_PROTO_NONE;
8345108bbadSJiri Benc 	}
8355108bbadSJiri Benc 	WARN_ON_ONCE(1);
8365108bbadSJiri Benc 	return -EINVAL;
8375108bbadSJiri Benc }
8385108bbadSJiri Benc 
8391d8fff90SThomas Graf int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
8408c8b1b83SPravin B Shelar 			 struct sk_buff *skb, struct sw_flow_key *key)
84183c8df26SPravin B Shelar {
8429dd7f890SJarno Rajahalme 	int res, err;
8435108bbadSJiri Benc 
84483c8df26SPravin B Shelar 	/* Extract metadata from packet. */
845f5796684SJesse Gross 	if (tun_info) {
84600a93babSJiri Benc 		key->tun_proto = ip_tunnel_info_af(tun_info);
8471d8fff90SThomas Graf 		memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
848f5796684SJesse Gross 
8494c222798SPravin B Shelar 		if (tun_info->options_len) {
850f5796684SJesse Gross 			BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
851f5796684SJesse Gross 						   8)) - 1
852f5796684SJesse Gross 					> sizeof(key->tun_opts));
8534c222798SPravin B Shelar 
8544c222798SPravin B Shelar 			ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
8554c222798SPravin B Shelar 						tun_info);
856f5796684SJesse Gross 			key->tun_opts_len = tun_info->options_len;
857f5796684SJesse Gross 		} else {
858f5796684SJesse Gross 			key->tun_opts_len = 0;
859f5796684SJesse Gross 		}
860f5796684SJesse Gross 	} else  {
86100a93babSJiri Benc 		key->tun_proto = 0;
862f5796684SJesse Gross 		key->tun_opts_len = 0;
86307148121SJesse Gross 		memset(&key->tun_key, 0, sizeof(key->tun_key));
864f5796684SJesse Gross 	}
86583c8df26SPravin B Shelar 
86683c8df26SPravin B Shelar 	key->phy.priority = skb->priority;
86783c8df26SPravin B Shelar 	key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
86883c8df26SPravin B Shelar 	key->phy.skb_mark = skb->mark;
86907148121SJesse Gross 	key->ovs_flow_hash = 0;
8705108bbadSJiri Benc 	res = key_extract_mac_proto(skb);
8715108bbadSJiri Benc 	if (res < 0)
8725108bbadSJiri Benc 		return res;
8735108bbadSJiri Benc 	key->mac_proto = res;
87407148121SJesse Gross 	key->recirc_id = 0;
87507148121SJesse Gross 
8769dd7f890SJarno Rajahalme 	err = key_extract(skb, key);
8779dd7f890SJarno Rajahalme 	if (!err)
8789dd7f890SJarno Rajahalme 		ovs_ct_fill_key(skb, key);   /* Must be after key_extract(). */
8799dd7f890SJarno Rajahalme 	return err;
88083c8df26SPravin B Shelar }
88183c8df26SPravin B Shelar 
882c2ac6673SJoe Stringer int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
88383c8df26SPravin B Shelar 				   struct sk_buff *skb,
88405da5898SJarno Rajahalme 				   struct sw_flow_key *key, bool log)
88583c8df26SPravin B Shelar {
8869dd7f890SJarno Rajahalme 	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
8879dd7f890SJarno Rajahalme 	u64 attrs = 0;
88883c8df26SPravin B Shelar 	int err;
88983c8df26SPravin B Shelar 
8909dd7f890SJarno Rajahalme 	err = parse_flow_nlattrs(attr, a, &attrs, log);
8919dd7f890SJarno Rajahalme 	if (err)
8929dd7f890SJarno Rajahalme 		return -EINVAL;
8939dd7f890SJarno Rajahalme 
89483c8df26SPravin B Shelar 	/* Extract metadata from netlink attributes. */
8959dd7f890SJarno Rajahalme 	err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
89683c8df26SPravin B Shelar 	if (err)
89783c8df26SPravin B Shelar 		return err;
89883c8df26SPravin B Shelar 
8995108bbadSJiri Benc 	/* key_extract assumes that skb->protocol is set-up for
9005108bbadSJiri Benc 	 * layer 3 packets which is the case for other callers,
901df30f740Spravin shelar 	 * in particular packets received from the network stack.
9025108bbadSJiri Benc 	 * Here the correct value can be set from the metadata
9035108bbadSJiri Benc 	 * extracted above.
904df30f740Spravin shelar 	 * For L2 packet key eth type would be zero. skb protocol
905df30f740Spravin shelar 	 * would be set to correct value later during key-extact.
9065108bbadSJiri Benc 	 */
907df30f740Spravin shelar 
9085108bbadSJiri Benc 	skb->protocol = key->eth.type;
9099dd7f890SJarno Rajahalme 	err = key_extract(skb, key);
9109dd7f890SJarno Rajahalme 	if (err)
9119dd7f890SJarno Rajahalme 		return err;
9129dd7f890SJarno Rajahalme 
9139dd7f890SJarno Rajahalme 	/* Check that we have conntrack original direction tuple metadata only
9149dd7f890SJarno Rajahalme 	 * for packets for which it makes sense.  Otherwise the key may be
9159dd7f890SJarno Rajahalme 	 * corrupted due to overlapping key fields.
9169dd7f890SJarno Rajahalme 	 */
9179dd7f890SJarno Rajahalme 	if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
9189dd7f890SJarno Rajahalme 	    key->eth.type != htons(ETH_P_IP))
9199dd7f890SJarno Rajahalme 		return -EINVAL;
9209dd7f890SJarno Rajahalme 	if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
9219dd7f890SJarno Rajahalme 	    (key->eth.type != htons(ETH_P_IPV6) ||
9229dd7f890SJarno Rajahalme 	     sw_flow_key_is_nd(key)))
9239dd7f890SJarno Rajahalme 		return -EINVAL;
9249dd7f890SJarno Rajahalme 
9259dd7f890SJarno Rajahalme 	return 0;
92683c8df26SPravin B Shelar }
927