xref: /linux/net/openvswitch/flow_netlink.c (revision b2d0f5d5dc53532e6f07bc546a476a55ebdfe0f3)
1 /*
2  * Copyright (c) 2007-2017 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18 
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 
21 #include "flow.h"
22 #include "datapath.h"
23 #include <linux/uaccess.h>
24 #include <linux/netdevice.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <net/llc_pdu.h>
29 #include <linux/kernel.h>
30 #include <linux/jhash.h>
31 #include <linux/jiffies.h>
32 #include <linux/llc.h>
33 #include <linux/module.h>
34 #include <linux/in.h>
35 #include <linux/rcupdate.h>
36 #include <linux/if_arp.h>
37 #include <linux/ip.h>
38 #include <linux/ipv6.h>
39 #include <linux/sctp.h>
40 #include <linux/tcp.h>
41 #include <linux/udp.h>
42 #include <linux/icmp.h>
43 #include <linux/icmpv6.h>
44 #include <linux/rculist.h>
45 #include <net/geneve.h>
46 #include <net/ip.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/mpls.h>
50 #include <net/vxlan.h>
51 #include <net/tun_proto.h>
52 #include <net/erspan.h>
53 
54 #include "flow_netlink.h"
55 
56 struct ovs_len_tbl {
57 	int len;
58 	const struct ovs_len_tbl *next;
59 };
60 
61 #define OVS_ATTR_NESTED -1
62 #define OVS_ATTR_VARIABLE -2
63 
64 static bool actions_may_change_flow(const struct nlattr *actions)
65 {
66 	struct nlattr *nla;
67 	int rem;
68 
69 	nla_for_each_nested(nla, actions, rem) {
70 		u16 action = nla_type(nla);
71 
72 		switch (action) {
73 		case OVS_ACTION_ATTR_OUTPUT:
74 		case OVS_ACTION_ATTR_RECIRC:
75 		case OVS_ACTION_ATTR_TRUNC:
76 		case OVS_ACTION_ATTR_USERSPACE:
77 			break;
78 
79 		case OVS_ACTION_ATTR_CT:
80 		case OVS_ACTION_ATTR_CT_CLEAR:
81 		case OVS_ACTION_ATTR_HASH:
82 		case OVS_ACTION_ATTR_POP_ETH:
83 		case OVS_ACTION_ATTR_POP_MPLS:
84 		case OVS_ACTION_ATTR_POP_NSH:
85 		case OVS_ACTION_ATTR_POP_VLAN:
86 		case OVS_ACTION_ATTR_PUSH_ETH:
87 		case OVS_ACTION_ATTR_PUSH_MPLS:
88 		case OVS_ACTION_ATTR_PUSH_NSH:
89 		case OVS_ACTION_ATTR_PUSH_VLAN:
90 		case OVS_ACTION_ATTR_SAMPLE:
91 		case OVS_ACTION_ATTR_SET:
92 		case OVS_ACTION_ATTR_SET_MASKED:
93 		default:
94 			return true;
95 		}
96 	}
97 	return false;
98 }
99 
100 static void update_range(struct sw_flow_match *match,
101 			 size_t offset, size_t size, bool is_mask)
102 {
103 	struct sw_flow_key_range *range;
104 	size_t start = rounddown(offset, sizeof(long));
105 	size_t end = roundup(offset + size, sizeof(long));
106 
107 	if (!is_mask)
108 		range = &match->range;
109 	else
110 		range = &match->mask->range;
111 
112 	if (range->start == range->end) {
113 		range->start = start;
114 		range->end = end;
115 		return;
116 	}
117 
118 	if (range->start > start)
119 		range->start = start;
120 
121 	if (range->end < end)
122 		range->end = end;
123 }
124 
125 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
126 	do { \
127 		update_range(match, offsetof(struct sw_flow_key, field),    \
128 			     sizeof((match)->key->field), is_mask);	    \
129 		if (is_mask)						    \
130 			(match)->mask->key.field = value;		    \
131 		else							    \
132 			(match)->key->field = value;		            \
133 	} while (0)
134 
135 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)	    \
136 	do {								    \
137 		update_range(match, offset, len, is_mask);		    \
138 		if (is_mask)						    \
139 			memcpy((u8 *)&(match)->mask->key + offset, value_p, \
140 			       len);					   \
141 		else							    \
142 			memcpy((u8 *)(match)->key + offset, value_p, len);  \
143 	} while (0)
144 
145 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)		      \
146 	SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
147 				  value_p, len, is_mask)
148 
149 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)		    \
150 	do {								    \
151 		update_range(match, offsetof(struct sw_flow_key, field),    \
152 			     sizeof((match)->key->field), is_mask);	    \
153 		if (is_mask)						    \
154 			memset((u8 *)&(match)->mask->key.field, value,      \
155 			       sizeof((match)->mask->key.field));	    \
156 		else							    \
157 			memset((u8 *)&(match)->key->field, value,           \
158 			       sizeof((match)->key->field));                \
159 	} while (0)
160 
161 static bool match_validate(const struct sw_flow_match *match,
162 			   u64 key_attrs, u64 mask_attrs, bool log)
163 {
164 	u64 key_expected = 0;
165 	u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
166 
167 	/* The following mask attributes allowed only if they
168 	 * pass the validation tests. */
169 	mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
170 			| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
171 			| (1 << OVS_KEY_ATTR_IPV6)
172 			| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
173 			| (1 << OVS_KEY_ATTR_TCP)
174 			| (1 << OVS_KEY_ATTR_TCP_FLAGS)
175 			| (1 << OVS_KEY_ATTR_UDP)
176 			| (1 << OVS_KEY_ATTR_SCTP)
177 			| (1 << OVS_KEY_ATTR_ICMP)
178 			| (1 << OVS_KEY_ATTR_ICMPV6)
179 			| (1 << OVS_KEY_ATTR_ARP)
180 			| (1 << OVS_KEY_ATTR_ND)
181 			| (1 << OVS_KEY_ATTR_MPLS)
182 			| (1 << OVS_KEY_ATTR_NSH));
183 
184 	/* Always allowed mask fields. */
185 	mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
186 		       | (1 << OVS_KEY_ATTR_IN_PORT)
187 		       | (1 << OVS_KEY_ATTR_ETHERTYPE));
188 
189 	/* Check key attributes. */
190 	if (match->key->eth.type == htons(ETH_P_ARP)
191 			|| match->key->eth.type == htons(ETH_P_RARP)) {
192 		key_expected |= 1 << OVS_KEY_ATTR_ARP;
193 		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
194 			mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
195 	}
196 
197 	if (eth_p_mpls(match->key->eth.type)) {
198 		key_expected |= 1 << OVS_KEY_ATTR_MPLS;
199 		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
200 			mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
201 	}
202 
203 	if (match->key->eth.type == htons(ETH_P_IP)) {
204 		key_expected |= 1 << OVS_KEY_ATTR_IPV4;
205 		if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
206 			mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
207 			mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
208 		}
209 
210 		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
211 			if (match->key->ip.proto == IPPROTO_UDP) {
212 				key_expected |= 1 << OVS_KEY_ATTR_UDP;
213 				if (match->mask && (match->mask->key.ip.proto == 0xff))
214 					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
215 			}
216 
217 			if (match->key->ip.proto == IPPROTO_SCTP) {
218 				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
219 				if (match->mask && (match->mask->key.ip.proto == 0xff))
220 					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
221 			}
222 
223 			if (match->key->ip.proto == IPPROTO_TCP) {
224 				key_expected |= 1 << OVS_KEY_ATTR_TCP;
225 				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
226 				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
227 					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
228 					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
229 				}
230 			}
231 
232 			if (match->key->ip.proto == IPPROTO_ICMP) {
233 				key_expected |= 1 << OVS_KEY_ATTR_ICMP;
234 				if (match->mask && (match->mask->key.ip.proto == 0xff))
235 					mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
236 			}
237 		}
238 	}
239 
240 	if (match->key->eth.type == htons(ETH_P_IPV6)) {
241 		key_expected |= 1 << OVS_KEY_ATTR_IPV6;
242 		if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
243 			mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
244 			mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
245 		}
246 
247 		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
248 			if (match->key->ip.proto == IPPROTO_UDP) {
249 				key_expected |= 1 << OVS_KEY_ATTR_UDP;
250 				if (match->mask && (match->mask->key.ip.proto == 0xff))
251 					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
252 			}
253 
254 			if (match->key->ip.proto == IPPROTO_SCTP) {
255 				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
256 				if (match->mask && (match->mask->key.ip.proto == 0xff))
257 					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
258 			}
259 
260 			if (match->key->ip.proto == IPPROTO_TCP) {
261 				key_expected |= 1 << OVS_KEY_ATTR_TCP;
262 				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
263 				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
264 					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
265 					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
266 				}
267 			}
268 
269 			if (match->key->ip.proto == IPPROTO_ICMPV6) {
270 				key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
271 				if (match->mask && (match->mask->key.ip.proto == 0xff))
272 					mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
273 
274 				if (match->key->tp.src ==
275 						htons(NDISC_NEIGHBOUR_SOLICITATION) ||
276 				    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
277 					key_expected |= 1 << OVS_KEY_ATTR_ND;
278 					/* Original direction conntrack tuple
279 					 * uses the same space as the ND fields
280 					 * in the key, so both are not allowed
281 					 * at the same time.
282 					 */
283 					mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
284 					if (match->mask && (match->mask->key.tp.src == htons(0xff)))
285 						mask_allowed |= 1 << OVS_KEY_ATTR_ND;
286 				}
287 			}
288 		}
289 	}
290 
291 	if (match->key->eth.type == htons(ETH_P_NSH)) {
292 		key_expected |= 1 << OVS_KEY_ATTR_NSH;
293 		if (match->mask &&
294 		    match->mask->key.eth.type == htons(0xffff)) {
295 			mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
296 		}
297 	}
298 
299 	if ((key_attrs & key_expected) != key_expected) {
300 		/* Key attributes check failed. */
301 		OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
302 			  (unsigned long long)key_attrs,
303 			  (unsigned long long)key_expected);
304 		return false;
305 	}
306 
307 	if ((mask_attrs & mask_allowed) != mask_attrs) {
308 		/* Mask attributes check failed. */
309 		OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
310 			  (unsigned long long)mask_attrs,
311 			  (unsigned long long)mask_allowed);
312 		return false;
313 	}
314 
315 	return true;
316 }
317 
318 size_t ovs_tun_key_attr_size(void)
319 {
320 	/* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
321 	 * updating this function.
322 	 */
323 	return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
324 		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
325 		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
326 		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
327 		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
328 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
329 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
330 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
331 		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
332 		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
333 		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
334 		 */
335 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
336 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_DST */
337 		+ nla_total_size(4);   /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
338 }
339 
340 size_t ovs_nsh_key_attr_size(void)
341 {
342 	/* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
343 	 * updating this function.
344 	 */
345 	return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
346 		/* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
347 		 * mutually exclusive, so the bigger one can cover
348 		 * the small one.
349 		 */
350 		+ nla_total_size(NSH_CTX_HDRS_MAX_LEN);
351 }
352 
353 size_t ovs_key_attr_size(void)
354 {
355 	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
356 	 * updating this function.
357 	 */
358 	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
359 
360 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
361 		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
362 		  + ovs_tun_key_attr_size()
363 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
364 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
365 		+ nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
366 		+ nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
367 		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
368 		+ nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
369 		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
370 		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
371 		+ nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
372 		+ nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
373 		  + ovs_nsh_key_attr_size()
374 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
375 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
376 		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
377 		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
378 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
379 		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
380 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
381 		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
382 }
383 
384 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
385 	[OVS_VXLAN_EXT_GBP]	    = { .len = sizeof(u32) },
386 };
387 
388 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
389 	[OVS_TUNNEL_KEY_ATTR_ID]	    = { .len = sizeof(u64) },
390 	[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]	    = { .len = sizeof(u32) },
391 	[OVS_TUNNEL_KEY_ATTR_IPV4_DST]	    = { .len = sizeof(u32) },
392 	[OVS_TUNNEL_KEY_ATTR_TOS]	    = { .len = 1 },
393 	[OVS_TUNNEL_KEY_ATTR_TTL]	    = { .len = 1 },
394 	[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
395 	[OVS_TUNNEL_KEY_ATTR_CSUM]	    = { .len = 0 },
396 	[OVS_TUNNEL_KEY_ATTR_TP_SRC]	    = { .len = sizeof(u16) },
397 	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
398 	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
399 	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
400 	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
401 						.next = ovs_vxlan_ext_key_lens },
402 	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
403 	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
404 	[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = sizeof(u32) },
405 };
406 
407 static const struct ovs_len_tbl
408 ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
409 	[OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
410 	[OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
411 	[OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
412 };
413 
414 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
415 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
416 	[OVS_KEY_ATTR_ENCAP]	 = { .len = OVS_ATTR_NESTED },
417 	[OVS_KEY_ATTR_PRIORITY]	 = { .len = sizeof(u32) },
418 	[OVS_KEY_ATTR_IN_PORT]	 = { .len = sizeof(u32) },
419 	[OVS_KEY_ATTR_SKB_MARK]	 = { .len = sizeof(u32) },
420 	[OVS_KEY_ATTR_ETHERNET]	 = { .len = sizeof(struct ovs_key_ethernet) },
421 	[OVS_KEY_ATTR_VLAN]	 = { .len = sizeof(__be16) },
422 	[OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
423 	[OVS_KEY_ATTR_IPV4]	 = { .len = sizeof(struct ovs_key_ipv4) },
424 	[OVS_KEY_ATTR_IPV6]	 = { .len = sizeof(struct ovs_key_ipv6) },
425 	[OVS_KEY_ATTR_TCP]	 = { .len = sizeof(struct ovs_key_tcp) },
426 	[OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
427 	[OVS_KEY_ATTR_UDP]	 = { .len = sizeof(struct ovs_key_udp) },
428 	[OVS_KEY_ATTR_SCTP]	 = { .len = sizeof(struct ovs_key_sctp) },
429 	[OVS_KEY_ATTR_ICMP]	 = { .len = sizeof(struct ovs_key_icmp) },
430 	[OVS_KEY_ATTR_ICMPV6]	 = { .len = sizeof(struct ovs_key_icmpv6) },
431 	[OVS_KEY_ATTR_ARP]	 = { .len = sizeof(struct ovs_key_arp) },
432 	[OVS_KEY_ATTR_ND]	 = { .len = sizeof(struct ovs_key_nd) },
433 	[OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
434 	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
435 	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
436 				     .next = ovs_tunnel_key_lens, },
437 	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
438 	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },
439 	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
440 	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
441 	[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
442 	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
443 		.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
444 	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
445 		.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
446 	[OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
447 				     .next = ovs_nsh_key_attr_lens, },
448 };
449 
450 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
451 {
452 	return expected_len == attr_len ||
453 	       expected_len == OVS_ATTR_NESTED ||
454 	       expected_len == OVS_ATTR_VARIABLE;
455 }
456 
457 static bool is_all_zero(const u8 *fp, size_t size)
458 {
459 	int i;
460 
461 	if (!fp)
462 		return false;
463 
464 	for (i = 0; i < size; i++)
465 		if (fp[i])
466 			return false;
467 
468 	return true;
469 }
470 
471 static int __parse_flow_nlattrs(const struct nlattr *attr,
472 				const struct nlattr *a[],
473 				u64 *attrsp, bool log, bool nz)
474 {
475 	const struct nlattr *nla;
476 	u64 attrs;
477 	int rem;
478 
479 	attrs = *attrsp;
480 	nla_for_each_nested(nla, attr, rem) {
481 		u16 type = nla_type(nla);
482 		int expected_len;
483 
484 		if (type > OVS_KEY_ATTR_MAX) {
485 			OVS_NLERR(log, "Key type %d is out of range max %d",
486 				  type, OVS_KEY_ATTR_MAX);
487 			return -EINVAL;
488 		}
489 
490 		if (attrs & (1 << type)) {
491 			OVS_NLERR(log, "Duplicate key (type %d).", type);
492 			return -EINVAL;
493 		}
494 
495 		expected_len = ovs_key_lens[type].len;
496 		if (!check_attr_len(nla_len(nla), expected_len)) {
497 			OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
498 				  type, nla_len(nla), expected_len);
499 			return -EINVAL;
500 		}
501 
502 		if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
503 			attrs |= 1 << type;
504 			a[type] = nla;
505 		}
506 	}
507 	if (rem) {
508 		OVS_NLERR(log, "Message has %d unknown bytes.", rem);
509 		return -EINVAL;
510 	}
511 
512 	*attrsp = attrs;
513 	return 0;
514 }
515 
516 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
517 				   const struct nlattr *a[], u64 *attrsp,
518 				   bool log)
519 {
520 	return __parse_flow_nlattrs(attr, a, attrsp, log, true);
521 }
522 
523 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
524 		       u64 *attrsp, bool log)
525 {
526 	return __parse_flow_nlattrs(attr, a, attrsp, log, false);
527 }
528 
529 static int genev_tun_opt_from_nlattr(const struct nlattr *a,
530 				     struct sw_flow_match *match, bool is_mask,
531 				     bool log)
532 {
533 	unsigned long opt_key_offset;
534 
535 	if (nla_len(a) > sizeof(match->key->tun_opts)) {
536 		OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
537 			  nla_len(a), sizeof(match->key->tun_opts));
538 		return -EINVAL;
539 	}
540 
541 	if (nla_len(a) % 4 != 0) {
542 		OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
543 			  nla_len(a));
544 		return -EINVAL;
545 	}
546 
547 	/* We need to record the length of the options passed
548 	 * down, otherwise packets with the same format but
549 	 * additional options will be silently matched.
550 	 */
551 	if (!is_mask) {
552 		SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
553 				false);
554 	} else {
555 		/* This is somewhat unusual because it looks at
556 		 * both the key and mask while parsing the
557 		 * attributes (and by extension assumes the key
558 		 * is parsed first). Normally, we would verify
559 		 * that each is the correct length and that the
560 		 * attributes line up in the validate function.
561 		 * However, that is difficult because this is
562 		 * variable length and we won't have the
563 		 * information later.
564 		 */
565 		if (match->key->tun_opts_len != nla_len(a)) {
566 			OVS_NLERR(log, "Geneve option len %d != mask len %d",
567 				  match->key->tun_opts_len, nla_len(a));
568 			return -EINVAL;
569 		}
570 
571 		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
572 	}
573 
574 	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
575 	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
576 				  nla_len(a), is_mask);
577 	return 0;
578 }
579 
580 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
581 				     struct sw_flow_match *match, bool is_mask,
582 				     bool log)
583 {
584 	struct nlattr *a;
585 	int rem;
586 	unsigned long opt_key_offset;
587 	struct vxlan_metadata opts;
588 
589 	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
590 
591 	memset(&opts, 0, sizeof(opts));
592 	nla_for_each_nested(a, attr, rem) {
593 		int type = nla_type(a);
594 
595 		if (type > OVS_VXLAN_EXT_MAX) {
596 			OVS_NLERR(log, "VXLAN extension %d out of range max %d",
597 				  type, OVS_VXLAN_EXT_MAX);
598 			return -EINVAL;
599 		}
600 
601 		if (!check_attr_len(nla_len(a),
602 				    ovs_vxlan_ext_key_lens[type].len)) {
603 			OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
604 				  type, nla_len(a),
605 				  ovs_vxlan_ext_key_lens[type].len);
606 			return -EINVAL;
607 		}
608 
609 		switch (type) {
610 		case OVS_VXLAN_EXT_GBP:
611 			opts.gbp = nla_get_u32(a);
612 			break;
613 		default:
614 			OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
615 				  type);
616 			return -EINVAL;
617 		}
618 	}
619 	if (rem) {
620 		OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
621 			  rem);
622 		return -EINVAL;
623 	}
624 
625 	if (!is_mask)
626 		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
627 	else
628 		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
629 
630 	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
631 	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
632 				  is_mask);
633 	return 0;
634 }
635 
636 static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
637 				      struct sw_flow_match *match, bool is_mask,
638 				      bool log)
639 {
640 	unsigned long opt_key_offset;
641 	struct erspan_metadata opts;
642 
643 	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
644 
645 	memset(&opts, 0, sizeof(opts));
646 	opts.index = nla_get_be32(attr);
647 
648 	/* Index has only 20-bit */
649 	if (ntohl(opts.index) & ~INDEX_MASK) {
650 		OVS_NLERR(log, "ERSPAN index number %x too large.",
651 			  ntohl(opts.index));
652 		return -EINVAL;
653 	}
654 
655 	SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
656 	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
657 	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
658 				  is_mask);
659 
660 	return 0;
661 }
662 
663 static int ip_tun_from_nlattr(const struct nlattr *attr,
664 			      struct sw_flow_match *match, bool is_mask,
665 			      bool log)
666 {
667 	bool ttl = false, ipv4 = false, ipv6 = false;
668 	__be16 tun_flags = 0;
669 	int opts_type = 0;
670 	struct nlattr *a;
671 	int rem;
672 
673 	nla_for_each_nested(a, attr, rem) {
674 		int type = nla_type(a);
675 		int err;
676 
677 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
678 			OVS_NLERR(log, "Tunnel attr %d out of range max %d",
679 				  type, OVS_TUNNEL_KEY_ATTR_MAX);
680 			return -EINVAL;
681 		}
682 
683 		if (!check_attr_len(nla_len(a),
684 				    ovs_tunnel_key_lens[type].len)) {
685 			OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
686 				  type, nla_len(a), ovs_tunnel_key_lens[type].len);
687 			return -EINVAL;
688 		}
689 
690 		switch (type) {
691 		case OVS_TUNNEL_KEY_ATTR_ID:
692 			SW_FLOW_KEY_PUT(match, tun_key.tun_id,
693 					nla_get_be64(a), is_mask);
694 			tun_flags |= TUNNEL_KEY;
695 			break;
696 		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
697 			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
698 					nla_get_in_addr(a), is_mask);
699 			ipv4 = true;
700 			break;
701 		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
702 			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
703 					nla_get_in_addr(a), is_mask);
704 			ipv4 = true;
705 			break;
706 		case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
707 			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
708 					nla_get_in6_addr(a), is_mask);
709 			ipv6 = true;
710 			break;
711 		case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
712 			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
713 					nla_get_in6_addr(a), is_mask);
714 			ipv6 = true;
715 			break;
716 		case OVS_TUNNEL_KEY_ATTR_TOS:
717 			SW_FLOW_KEY_PUT(match, tun_key.tos,
718 					nla_get_u8(a), is_mask);
719 			break;
720 		case OVS_TUNNEL_KEY_ATTR_TTL:
721 			SW_FLOW_KEY_PUT(match, tun_key.ttl,
722 					nla_get_u8(a), is_mask);
723 			ttl = true;
724 			break;
725 		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
726 			tun_flags |= TUNNEL_DONT_FRAGMENT;
727 			break;
728 		case OVS_TUNNEL_KEY_ATTR_CSUM:
729 			tun_flags |= TUNNEL_CSUM;
730 			break;
731 		case OVS_TUNNEL_KEY_ATTR_TP_SRC:
732 			SW_FLOW_KEY_PUT(match, tun_key.tp_src,
733 					nla_get_be16(a), is_mask);
734 			break;
735 		case OVS_TUNNEL_KEY_ATTR_TP_DST:
736 			SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
737 					nla_get_be16(a), is_mask);
738 			break;
739 		case OVS_TUNNEL_KEY_ATTR_OAM:
740 			tun_flags |= TUNNEL_OAM;
741 			break;
742 		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
743 			if (opts_type) {
744 				OVS_NLERR(log, "Multiple metadata blocks provided");
745 				return -EINVAL;
746 			}
747 
748 			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
749 			if (err)
750 				return err;
751 
752 			tun_flags |= TUNNEL_GENEVE_OPT;
753 			opts_type = type;
754 			break;
755 		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
756 			if (opts_type) {
757 				OVS_NLERR(log, "Multiple metadata blocks provided");
758 				return -EINVAL;
759 			}
760 
761 			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
762 			if (err)
763 				return err;
764 
765 			tun_flags |= TUNNEL_VXLAN_OPT;
766 			opts_type = type;
767 			break;
768 		case OVS_TUNNEL_KEY_ATTR_PAD:
769 			break;
770 		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
771 			if (opts_type) {
772 				OVS_NLERR(log, "Multiple metadata blocks provided");
773 				return -EINVAL;
774 			}
775 
776 			err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
777 			if (err)
778 				return err;
779 
780 			tun_flags |= TUNNEL_ERSPAN_OPT;
781 			opts_type = type;
782 			break;
783 		default:
784 			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
785 				  type);
786 			return -EINVAL;
787 		}
788 	}
789 
790 	SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
791 	if (is_mask)
792 		SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
793 	else
794 		SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
795 				false);
796 
797 	if (rem > 0) {
798 		OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
799 			  rem);
800 		return -EINVAL;
801 	}
802 
803 	if (ipv4 && ipv6) {
804 		OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
805 		return -EINVAL;
806 	}
807 
808 	if (!is_mask) {
809 		if (!ipv4 && !ipv6) {
810 			OVS_NLERR(log, "IP tunnel dst address not specified");
811 			return -EINVAL;
812 		}
813 		if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
814 			OVS_NLERR(log, "IPv4 tunnel dst address is zero");
815 			return -EINVAL;
816 		}
817 		if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
818 			OVS_NLERR(log, "IPv6 tunnel dst address is zero");
819 			return -EINVAL;
820 		}
821 
822 		if (!ttl) {
823 			OVS_NLERR(log, "IP tunnel TTL not specified.");
824 			return -EINVAL;
825 		}
826 	}
827 
828 	return opts_type;
829 }
830 
831 static int vxlan_opt_to_nlattr(struct sk_buff *skb,
832 			       const void *tun_opts, int swkey_tun_opts_len)
833 {
834 	const struct vxlan_metadata *opts = tun_opts;
835 	struct nlattr *nla;
836 
837 	nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
838 	if (!nla)
839 		return -EMSGSIZE;
840 
841 	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
842 		return -EMSGSIZE;
843 
844 	nla_nest_end(skb, nla);
845 	return 0;
846 }
847 
848 static int __ip_tun_to_nlattr(struct sk_buff *skb,
849 			      const struct ip_tunnel_key *output,
850 			      const void *tun_opts, int swkey_tun_opts_len,
851 			      unsigned short tun_proto)
852 {
853 	if (output->tun_flags & TUNNEL_KEY &&
854 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
855 			 OVS_TUNNEL_KEY_ATTR_PAD))
856 		return -EMSGSIZE;
857 	switch (tun_proto) {
858 	case AF_INET:
859 		if (output->u.ipv4.src &&
860 		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
861 				    output->u.ipv4.src))
862 			return -EMSGSIZE;
863 		if (output->u.ipv4.dst &&
864 		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
865 				    output->u.ipv4.dst))
866 			return -EMSGSIZE;
867 		break;
868 	case AF_INET6:
869 		if (!ipv6_addr_any(&output->u.ipv6.src) &&
870 		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
871 				     &output->u.ipv6.src))
872 			return -EMSGSIZE;
873 		if (!ipv6_addr_any(&output->u.ipv6.dst) &&
874 		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
875 				     &output->u.ipv6.dst))
876 			return -EMSGSIZE;
877 		break;
878 	}
879 	if (output->tos &&
880 	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
881 		return -EMSGSIZE;
882 	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
883 		return -EMSGSIZE;
884 	if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
885 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
886 		return -EMSGSIZE;
887 	if ((output->tun_flags & TUNNEL_CSUM) &&
888 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
889 		return -EMSGSIZE;
890 	if (output->tp_src &&
891 	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
892 		return -EMSGSIZE;
893 	if (output->tp_dst &&
894 	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
895 		return -EMSGSIZE;
896 	if ((output->tun_flags & TUNNEL_OAM) &&
897 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
898 		return -EMSGSIZE;
899 	if (swkey_tun_opts_len) {
900 		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
901 		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
902 			    swkey_tun_opts_len, tun_opts))
903 			return -EMSGSIZE;
904 		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
905 			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
906 			return -EMSGSIZE;
907 		else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
908 			 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
909 				      ((struct erspan_metadata *)tun_opts)->index))
910 			return -EMSGSIZE;
911 	}
912 
913 	return 0;
914 }
915 
916 static int ip_tun_to_nlattr(struct sk_buff *skb,
917 			    const struct ip_tunnel_key *output,
918 			    const void *tun_opts, int swkey_tun_opts_len,
919 			    unsigned short tun_proto)
920 {
921 	struct nlattr *nla;
922 	int err;
923 
924 	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
925 	if (!nla)
926 		return -EMSGSIZE;
927 
928 	err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
929 				 tun_proto);
930 	if (err)
931 		return err;
932 
933 	nla_nest_end(skb, nla);
934 	return 0;
935 }
936 
937 int ovs_nla_put_tunnel_info(struct sk_buff *skb,
938 			    struct ip_tunnel_info *tun_info)
939 {
940 	return __ip_tun_to_nlattr(skb, &tun_info->key,
941 				  ip_tunnel_info_opts(tun_info),
942 				  tun_info->options_len,
943 				  ip_tunnel_info_af(tun_info));
944 }
945 
946 static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
947 				    const struct nlattr *a[],
948 				    bool is_mask, bool inner)
949 {
950 	__be16 tci = 0;
951 	__be16 tpid = 0;
952 
953 	if (a[OVS_KEY_ATTR_VLAN])
954 		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
955 
956 	if (a[OVS_KEY_ATTR_ETHERTYPE])
957 		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
958 
959 	if (likely(!inner)) {
960 		SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
961 		SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
962 	} else {
963 		SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
964 		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
965 	}
966 	return 0;
967 }
968 
969 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
970 				      u64 key_attrs, bool inner,
971 				      const struct nlattr **a, bool log)
972 {
973 	__be16 tci = 0;
974 
975 	if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
976 	      (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
977 	       eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
978 		/* Not a VLAN. */
979 		return 0;
980 	}
981 
982 	if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
983 	      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
984 		OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
985 		return -EINVAL;
986 	}
987 
988 	if (a[OVS_KEY_ATTR_VLAN])
989 		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
990 
991 	if (!(tci & htons(VLAN_TAG_PRESENT))) {
992 		if (tci) {
993 			OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
994 				  (inner) ? "C-VLAN" : "VLAN");
995 			return -EINVAL;
996 		} else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
997 			/* Corner case for truncated VLAN header. */
998 			OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
999 				  (inner) ? "C-VLAN" : "VLAN");
1000 			return -EINVAL;
1001 		}
1002 	}
1003 
1004 	return 1;
1005 }
1006 
1007 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1008 					   u64 key_attrs, bool inner,
1009 					   const struct nlattr **a, bool log)
1010 {
1011 	__be16 tci = 0;
1012 	__be16 tpid = 0;
1013 	bool encap_valid = !!(match->key->eth.vlan.tci &
1014 			      htons(VLAN_TAG_PRESENT));
1015 	bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1016 				htons(VLAN_TAG_PRESENT));
1017 
1018 	if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1019 		/* Not a VLAN. */
1020 		return 0;
1021 	}
1022 
1023 	if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1024 		OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1025 			  (inner) ? "C-VLAN" : "VLAN");
1026 		return -EINVAL;
1027 	}
1028 
1029 	if (a[OVS_KEY_ATTR_VLAN])
1030 		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1031 
1032 	if (a[OVS_KEY_ATTR_ETHERTYPE])
1033 		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1034 
1035 	if (tpid != htons(0xffff)) {
1036 		OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1037 			  (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1038 		return -EINVAL;
1039 	}
1040 	if (!(tci & htons(VLAN_TAG_PRESENT))) {
1041 		OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
1042 			  (inner) ? "C-VLAN" : "VLAN");
1043 		return -EINVAL;
1044 	}
1045 
1046 	return 1;
1047 }
1048 
1049 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1050 				     u64 *key_attrs, bool inner,
1051 				     const struct nlattr **a, bool is_mask,
1052 				     bool log)
1053 {
1054 	int err;
1055 	const struct nlattr *encap;
1056 
1057 	if (!is_mask)
1058 		err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1059 						 a, log);
1060 	else
1061 		err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1062 						      a, log);
1063 	if (err <= 0)
1064 		return err;
1065 
1066 	err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1067 	if (err)
1068 		return err;
1069 
1070 	*key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1071 	*key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1072 	*key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1073 
1074 	encap = a[OVS_KEY_ATTR_ENCAP];
1075 
1076 	if (!is_mask)
1077 		err = parse_flow_nlattrs(encap, a, key_attrs, log);
1078 	else
1079 		err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1080 
1081 	return err;
1082 }
1083 
1084 static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1085 				   u64 *key_attrs, const struct nlattr **a,
1086 				   bool is_mask, bool log)
1087 {
1088 	int err;
1089 	bool encap_valid = false;
1090 
1091 	err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1092 					is_mask, log);
1093 	if (err)
1094 		return err;
1095 
1096 	encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
1097 	if (encap_valid) {
1098 		err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1099 						is_mask, log);
1100 		if (err)
1101 			return err;
1102 	}
1103 
1104 	return 0;
1105 }
1106 
1107 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1108 				       u64 *attrs, const struct nlattr **a,
1109 				       bool is_mask, bool log)
1110 {
1111 	__be16 eth_type;
1112 
1113 	eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1114 	if (is_mask) {
1115 		/* Always exact match EtherType. */
1116 		eth_type = htons(0xffff);
1117 	} else if (!eth_proto_is_802_3(eth_type)) {
1118 		OVS_NLERR(log, "EtherType %x is less than min %x",
1119 				ntohs(eth_type), ETH_P_802_3_MIN);
1120 		return -EINVAL;
1121 	}
1122 
1123 	SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1124 	*attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1125 	return 0;
1126 }
1127 
1128 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1129 				 u64 *attrs, const struct nlattr **a,
1130 				 bool is_mask, bool log)
1131 {
1132 	u8 mac_proto = MAC_PROTO_ETHERNET;
1133 
1134 	if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
1135 		u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1136 
1137 		SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1138 		*attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
1139 	}
1140 
1141 	if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
1142 		u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1143 
1144 		SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1145 		*attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
1146 	}
1147 
1148 	if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1149 		SW_FLOW_KEY_PUT(match, phy.priority,
1150 			  nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1151 		*attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1152 	}
1153 
1154 	if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1155 		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1156 
1157 		if (is_mask) {
1158 			in_port = 0xffffffff; /* Always exact match in_port. */
1159 		} else if (in_port >= DP_MAX_PORTS) {
1160 			OVS_NLERR(log, "Port %d exceeds max allowable %d",
1161 				  in_port, DP_MAX_PORTS);
1162 			return -EINVAL;
1163 		}
1164 
1165 		SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1166 		*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1167 	} else if (!is_mask) {
1168 		SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1169 	}
1170 
1171 	if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1172 		uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1173 
1174 		SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1175 		*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1176 	}
1177 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1178 		if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1179 				       is_mask, log) < 0)
1180 			return -EINVAL;
1181 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1182 	}
1183 
1184 	if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1185 	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1186 		u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1187 
1188 		if (ct_state & ~CT_SUPPORTED_MASK) {
1189 			OVS_NLERR(log, "ct_state flags %08x unsupported",
1190 				  ct_state);
1191 			return -EINVAL;
1192 		}
1193 
1194 		SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1195 		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1196 	}
1197 	if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1198 	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1199 		u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1200 
1201 		SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1202 		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1203 	}
1204 	if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1205 	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1206 		u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1207 
1208 		SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1209 		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1210 	}
1211 	if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1212 	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1213 		const struct ovs_key_ct_labels *cl;
1214 
1215 		cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1216 		SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1217 				   sizeof(*cl), is_mask);
1218 		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1219 	}
1220 	if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1221 		const struct ovs_key_ct_tuple_ipv4 *ct;
1222 
1223 		ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1224 
1225 		SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1226 		SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1227 		SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1228 		SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1229 		SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1230 		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1231 	}
1232 	if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1233 		const struct ovs_key_ct_tuple_ipv6 *ct;
1234 
1235 		ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1236 
1237 		SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1238 				   sizeof(match->key->ipv6.ct_orig.src),
1239 				   is_mask);
1240 		SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1241 				   sizeof(match->key->ipv6.ct_orig.dst),
1242 				   is_mask);
1243 		SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1244 		SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1245 		SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1246 		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1247 	}
1248 
1249 	/* For layer 3 packets the Ethernet type is provided
1250 	 * and treated as metadata but no MAC addresses are provided.
1251 	 */
1252 	if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1253 	    (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1254 		mac_proto = MAC_PROTO_NONE;
1255 
1256 	/* Always exact match mac_proto */
1257 	SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1258 
1259 	if (mac_proto == MAC_PROTO_NONE)
1260 		return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1261 						   log);
1262 
1263 	return 0;
1264 }
1265 
1266 int nsh_hdr_from_nlattr(const struct nlattr *attr,
1267 			struct nshhdr *nh, size_t size)
1268 {
1269 	struct nlattr *a;
1270 	int rem;
1271 	u8 flags = 0;
1272 	u8 ttl = 0;
1273 	int mdlen = 0;
1274 
1275 	/* validate_nsh has check this, so we needn't do duplicate check here
1276 	 */
1277 	if (size < NSH_BASE_HDR_LEN)
1278 		return -ENOBUFS;
1279 
1280 	nla_for_each_nested(a, attr, rem) {
1281 		int type = nla_type(a);
1282 
1283 		switch (type) {
1284 		case OVS_NSH_KEY_ATTR_BASE: {
1285 			const struct ovs_nsh_key_base *base = nla_data(a);
1286 
1287 			flags = base->flags;
1288 			ttl = base->ttl;
1289 			nh->np = base->np;
1290 			nh->mdtype = base->mdtype;
1291 			nh->path_hdr = base->path_hdr;
1292 			break;
1293 		}
1294 		case OVS_NSH_KEY_ATTR_MD1:
1295 			mdlen = nla_len(a);
1296 			if (mdlen > size - NSH_BASE_HDR_LEN)
1297 				return -ENOBUFS;
1298 			memcpy(&nh->md1, nla_data(a), mdlen);
1299 			break;
1300 
1301 		case OVS_NSH_KEY_ATTR_MD2:
1302 			mdlen = nla_len(a);
1303 			if (mdlen > size - NSH_BASE_HDR_LEN)
1304 				return -ENOBUFS;
1305 			memcpy(&nh->md2, nla_data(a), mdlen);
1306 			break;
1307 
1308 		default:
1309 			return -EINVAL;
1310 		}
1311 	}
1312 
1313 	/* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
1314 	nh->ver_flags_ttl_len = 0;
1315 	nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1316 
1317 	return 0;
1318 }
1319 
1320 int nsh_key_from_nlattr(const struct nlattr *attr,
1321 			struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1322 {
1323 	struct nlattr *a;
1324 	int rem;
1325 
1326 	/* validate_nsh has check this, so we needn't do duplicate check here
1327 	 */
1328 	nla_for_each_nested(a, attr, rem) {
1329 		int type = nla_type(a);
1330 
1331 		switch (type) {
1332 		case OVS_NSH_KEY_ATTR_BASE: {
1333 			const struct ovs_nsh_key_base *base = nla_data(a);
1334 			const struct ovs_nsh_key_base *base_mask = base + 1;
1335 
1336 			nsh->base = *base;
1337 			nsh_mask->base = *base_mask;
1338 			break;
1339 		}
1340 		case OVS_NSH_KEY_ATTR_MD1: {
1341 			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1342 			const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1343 
1344 			memcpy(nsh->context, md1->context, sizeof(*md1));
1345 			memcpy(nsh_mask->context, md1_mask->context,
1346 			       sizeof(*md1_mask));
1347 			break;
1348 		}
1349 		case OVS_NSH_KEY_ATTR_MD2:
1350 			/* Not supported yet */
1351 			return -ENOTSUPP;
1352 		default:
1353 			return -EINVAL;
1354 		}
1355 	}
1356 
1357 	return 0;
1358 }
1359 
1360 static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1361 				   struct sw_flow_match *match, bool is_mask,
1362 				   bool is_push_nsh, bool log)
1363 {
1364 	struct nlattr *a;
1365 	int rem;
1366 	bool has_base = false;
1367 	bool has_md1 = false;
1368 	bool has_md2 = false;
1369 	u8 mdtype = 0;
1370 	int mdlen = 0;
1371 
1372 	if (WARN_ON(is_push_nsh && is_mask))
1373 		return -EINVAL;
1374 
1375 	nla_for_each_nested(a, attr, rem) {
1376 		int type = nla_type(a);
1377 		int i;
1378 
1379 		if (type > OVS_NSH_KEY_ATTR_MAX) {
1380 			OVS_NLERR(log, "nsh attr %d is out of range max %d",
1381 				  type, OVS_NSH_KEY_ATTR_MAX);
1382 			return -EINVAL;
1383 		}
1384 
1385 		if (!check_attr_len(nla_len(a),
1386 				    ovs_nsh_key_attr_lens[type].len)) {
1387 			OVS_NLERR(
1388 			    log,
1389 			    "nsh attr %d has unexpected len %d expected %d",
1390 			    type,
1391 			    nla_len(a),
1392 			    ovs_nsh_key_attr_lens[type].len
1393 			);
1394 			return -EINVAL;
1395 		}
1396 
1397 		switch (type) {
1398 		case OVS_NSH_KEY_ATTR_BASE: {
1399 			const struct ovs_nsh_key_base *base = nla_data(a);
1400 
1401 			has_base = true;
1402 			mdtype = base->mdtype;
1403 			SW_FLOW_KEY_PUT(match, nsh.base.flags,
1404 					base->flags, is_mask);
1405 			SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1406 					base->ttl, is_mask);
1407 			SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1408 					base->mdtype, is_mask);
1409 			SW_FLOW_KEY_PUT(match, nsh.base.np,
1410 					base->np, is_mask);
1411 			SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1412 					base->path_hdr, is_mask);
1413 			break;
1414 		}
1415 		case OVS_NSH_KEY_ATTR_MD1: {
1416 			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1417 
1418 			has_md1 = true;
1419 			for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1420 				SW_FLOW_KEY_PUT(match, nsh.context[i],
1421 						md1->context[i], is_mask);
1422 			break;
1423 		}
1424 		case OVS_NSH_KEY_ATTR_MD2:
1425 			if (!is_push_nsh) /* Not supported MD type 2 yet */
1426 				return -ENOTSUPP;
1427 
1428 			has_md2 = true;
1429 			mdlen = nla_len(a);
1430 			if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1431 				OVS_NLERR(
1432 				    log,
1433 				    "Invalid MD length %d for MD type %d",
1434 				    mdlen,
1435 				    mdtype
1436 				);
1437 				return -EINVAL;
1438 			}
1439 			break;
1440 		default:
1441 			OVS_NLERR(log, "Unknown nsh attribute %d",
1442 				  type);
1443 			return -EINVAL;
1444 		}
1445 	}
1446 
1447 	if (rem > 0) {
1448 		OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1449 		return -EINVAL;
1450 	}
1451 
1452 	if (has_md1 && has_md2) {
1453 		OVS_NLERR(
1454 		    1,
1455 		    "invalid nsh attribute: md1 and md2 are exclusive."
1456 		);
1457 		return -EINVAL;
1458 	}
1459 
1460 	if (!is_mask) {
1461 		if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1462 		    (has_md2 && mdtype != NSH_M_TYPE2)) {
1463 			OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1464 				  mdtype);
1465 			return -EINVAL;
1466 		}
1467 
1468 		if (is_push_nsh &&
1469 		    (!has_base || (!has_md1 && !has_md2))) {
1470 			OVS_NLERR(
1471 			    1,
1472 			    "push_nsh: missing base or metadata attributes"
1473 			);
1474 			return -EINVAL;
1475 		}
1476 	}
1477 
1478 	return 0;
1479 }
1480 
1481 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1482 				u64 attrs, const struct nlattr **a,
1483 				bool is_mask, bool log)
1484 {
1485 	int err;
1486 
1487 	err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1488 	if (err)
1489 		return err;
1490 
1491 	if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1492 		const struct ovs_key_ethernet *eth_key;
1493 
1494 		eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1495 		SW_FLOW_KEY_MEMCPY(match, eth.src,
1496 				eth_key->eth_src, ETH_ALEN, is_mask);
1497 		SW_FLOW_KEY_MEMCPY(match, eth.dst,
1498 				eth_key->eth_dst, ETH_ALEN, is_mask);
1499 		attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1500 
1501 		if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1502 			/* VLAN attribute is always parsed before getting here since it
1503 			 * may occur multiple times.
1504 			 */
1505 			OVS_NLERR(log, "VLAN attribute unexpected.");
1506 			return -EINVAL;
1507 		}
1508 
1509 		if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1510 			err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1511 							  log);
1512 			if (err)
1513 				return err;
1514 		} else if (!is_mask) {
1515 			SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1516 		}
1517 	} else if (!match->key->eth.type) {
1518 		OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1519 		return -EINVAL;
1520 	}
1521 
1522 	if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1523 		const struct ovs_key_ipv4 *ipv4_key;
1524 
1525 		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1526 		if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1527 			OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1528 				  ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1529 			return -EINVAL;
1530 		}
1531 		SW_FLOW_KEY_PUT(match, ip.proto,
1532 				ipv4_key->ipv4_proto, is_mask);
1533 		SW_FLOW_KEY_PUT(match, ip.tos,
1534 				ipv4_key->ipv4_tos, is_mask);
1535 		SW_FLOW_KEY_PUT(match, ip.ttl,
1536 				ipv4_key->ipv4_ttl, is_mask);
1537 		SW_FLOW_KEY_PUT(match, ip.frag,
1538 				ipv4_key->ipv4_frag, is_mask);
1539 		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1540 				ipv4_key->ipv4_src, is_mask);
1541 		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1542 				ipv4_key->ipv4_dst, is_mask);
1543 		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1544 	}
1545 
1546 	if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1547 		const struct ovs_key_ipv6 *ipv6_key;
1548 
1549 		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1550 		if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1551 			OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1552 				  ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1553 			return -EINVAL;
1554 		}
1555 
1556 		if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1557 			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1558 				  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1559 			return -EINVAL;
1560 		}
1561 
1562 		SW_FLOW_KEY_PUT(match, ipv6.label,
1563 				ipv6_key->ipv6_label, is_mask);
1564 		SW_FLOW_KEY_PUT(match, ip.proto,
1565 				ipv6_key->ipv6_proto, is_mask);
1566 		SW_FLOW_KEY_PUT(match, ip.tos,
1567 				ipv6_key->ipv6_tclass, is_mask);
1568 		SW_FLOW_KEY_PUT(match, ip.ttl,
1569 				ipv6_key->ipv6_hlimit, is_mask);
1570 		SW_FLOW_KEY_PUT(match, ip.frag,
1571 				ipv6_key->ipv6_frag, is_mask);
1572 		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1573 				ipv6_key->ipv6_src,
1574 				sizeof(match->key->ipv6.addr.src),
1575 				is_mask);
1576 		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1577 				ipv6_key->ipv6_dst,
1578 				sizeof(match->key->ipv6.addr.dst),
1579 				is_mask);
1580 
1581 		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1582 	}
1583 
1584 	if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1585 		const struct ovs_key_arp *arp_key;
1586 
1587 		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1588 		if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1589 			OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1590 				  arp_key->arp_op);
1591 			return -EINVAL;
1592 		}
1593 
1594 		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1595 				arp_key->arp_sip, is_mask);
1596 		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1597 			arp_key->arp_tip, is_mask);
1598 		SW_FLOW_KEY_PUT(match, ip.proto,
1599 				ntohs(arp_key->arp_op), is_mask);
1600 		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1601 				arp_key->arp_sha, ETH_ALEN, is_mask);
1602 		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1603 				arp_key->arp_tha, ETH_ALEN, is_mask);
1604 
1605 		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1606 	}
1607 
1608 	if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1609 		if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1610 					    is_mask, false, log) < 0)
1611 			return -EINVAL;
1612 		attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1613 	}
1614 
1615 	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1616 		const struct ovs_key_mpls *mpls_key;
1617 
1618 		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1619 		SW_FLOW_KEY_PUT(match, mpls.top_lse,
1620 				mpls_key->mpls_lse, is_mask);
1621 
1622 		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1623 	 }
1624 
1625 	if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1626 		const struct ovs_key_tcp *tcp_key;
1627 
1628 		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1629 		SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1630 		SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1631 		attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1632 	}
1633 
1634 	if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1635 		SW_FLOW_KEY_PUT(match, tp.flags,
1636 				nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1637 				is_mask);
1638 		attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1639 	}
1640 
1641 	if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1642 		const struct ovs_key_udp *udp_key;
1643 
1644 		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1645 		SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1646 		SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1647 		attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1648 	}
1649 
1650 	if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1651 		const struct ovs_key_sctp *sctp_key;
1652 
1653 		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1654 		SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1655 		SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1656 		attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1657 	}
1658 
1659 	if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1660 		const struct ovs_key_icmp *icmp_key;
1661 
1662 		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1663 		SW_FLOW_KEY_PUT(match, tp.src,
1664 				htons(icmp_key->icmp_type), is_mask);
1665 		SW_FLOW_KEY_PUT(match, tp.dst,
1666 				htons(icmp_key->icmp_code), is_mask);
1667 		attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1668 	}
1669 
1670 	if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1671 		const struct ovs_key_icmpv6 *icmpv6_key;
1672 
1673 		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1674 		SW_FLOW_KEY_PUT(match, tp.src,
1675 				htons(icmpv6_key->icmpv6_type), is_mask);
1676 		SW_FLOW_KEY_PUT(match, tp.dst,
1677 				htons(icmpv6_key->icmpv6_code), is_mask);
1678 		attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1679 	}
1680 
1681 	if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1682 		const struct ovs_key_nd *nd_key;
1683 
1684 		nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1685 		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1686 			nd_key->nd_target,
1687 			sizeof(match->key->ipv6.nd.target),
1688 			is_mask);
1689 		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1690 			nd_key->nd_sll, ETH_ALEN, is_mask);
1691 		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1692 				nd_key->nd_tll, ETH_ALEN, is_mask);
1693 		attrs &= ~(1 << OVS_KEY_ATTR_ND);
1694 	}
1695 
1696 	if (attrs != 0) {
1697 		OVS_NLERR(log, "Unknown key attributes %llx",
1698 			  (unsigned long long)attrs);
1699 		return -EINVAL;
1700 	}
1701 
1702 	return 0;
1703 }
1704 
1705 static void nlattr_set(struct nlattr *attr, u8 val,
1706 		       const struct ovs_len_tbl *tbl)
1707 {
1708 	struct nlattr *nla;
1709 	int rem;
1710 
1711 	/* The nlattr stream should already have been validated */
1712 	nla_for_each_nested(nla, attr, rem) {
1713 		if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
1714 			if (tbl[nla_type(nla)].next)
1715 				tbl = tbl[nla_type(nla)].next;
1716 			nlattr_set(nla, val, tbl);
1717 		} else {
1718 			memset(nla_data(nla), val, nla_len(nla));
1719 		}
1720 
1721 		if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1722 			*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1723 	}
1724 }
1725 
1726 static void mask_set_nlattr(struct nlattr *attr, u8 val)
1727 {
1728 	nlattr_set(attr, val, ovs_key_lens);
1729 }
1730 
1731 /**
1732  * ovs_nla_get_match - parses Netlink attributes into a flow key and
1733  * mask. In case the 'mask' is NULL, the flow is treated as exact match
1734  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1735  * does not include any don't care bit.
1736  * @net: Used to determine per-namespace field support.
1737  * @match: receives the extracted flow match information.
1738  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1739  * sequence. The fields should of the packet that triggered the creation
1740  * of this flow.
1741  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1742  * attribute specifies the mask field of the wildcarded flow.
1743  * @log: Boolean to allow kernel error logging.  Normally true, but when
1744  * probing for feature compatibility this should be passed in as false to
1745  * suppress unnecessary error logging.
1746  */
1747 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1748 		      const struct nlattr *nla_key,
1749 		      const struct nlattr *nla_mask,
1750 		      bool log)
1751 {
1752 	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1753 	struct nlattr *newmask = NULL;
1754 	u64 key_attrs = 0;
1755 	u64 mask_attrs = 0;
1756 	int err;
1757 
1758 	err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1759 	if (err)
1760 		return err;
1761 
1762 	err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1763 	if (err)
1764 		return err;
1765 
1766 	err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1767 	if (err)
1768 		return err;
1769 
1770 	if (match->mask) {
1771 		if (!nla_mask) {
1772 			/* Create an exact match mask. We need to set to 0xff
1773 			 * all the 'match->mask' fields that have been touched
1774 			 * in 'match->key'. We cannot simply memset
1775 			 * 'match->mask', because padding bytes and fields not
1776 			 * specified in 'match->key' should be left to 0.
1777 			 * Instead, we use a stream of netlink attributes,
1778 			 * copied from 'key' and set to 0xff.
1779 			 * ovs_key_from_nlattrs() will take care of filling
1780 			 * 'match->mask' appropriately.
1781 			 */
1782 			newmask = kmemdup(nla_key,
1783 					  nla_total_size(nla_len(nla_key)),
1784 					  GFP_KERNEL);
1785 			if (!newmask)
1786 				return -ENOMEM;
1787 
1788 			mask_set_nlattr(newmask, 0xff);
1789 
1790 			/* The userspace does not send tunnel attributes that
1791 			 * are 0, but we should not wildcard them nonetheless.
1792 			 */
1793 			if (match->key->tun_proto)
1794 				SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1795 							 0xff, true);
1796 
1797 			nla_mask = newmask;
1798 		}
1799 
1800 		err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1801 		if (err)
1802 			goto free_newmask;
1803 
1804 		/* Always match on tci. */
1805 		SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1806 		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1807 
1808 		err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1809 		if (err)
1810 			goto free_newmask;
1811 
1812 		err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1813 					   log);
1814 		if (err)
1815 			goto free_newmask;
1816 	}
1817 
1818 	if (!match_validate(match, key_attrs, mask_attrs, log))
1819 		err = -EINVAL;
1820 
1821 free_newmask:
1822 	kfree(newmask);
1823 	return err;
1824 }
1825 
1826 static size_t get_ufid_len(const struct nlattr *attr, bool log)
1827 {
1828 	size_t len;
1829 
1830 	if (!attr)
1831 		return 0;
1832 
1833 	len = nla_len(attr);
1834 	if (len < 1 || len > MAX_UFID_LENGTH) {
1835 		OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1836 			  nla_len(attr), MAX_UFID_LENGTH);
1837 		return 0;
1838 	}
1839 
1840 	return len;
1841 }
1842 
1843 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1844  * or false otherwise.
1845  */
1846 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1847 		      bool log)
1848 {
1849 	sfid->ufid_len = get_ufid_len(attr, log);
1850 	if (sfid->ufid_len)
1851 		memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1852 
1853 	return sfid->ufid_len;
1854 }
1855 
1856 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1857 			   const struct sw_flow_key *key, bool log)
1858 {
1859 	struct sw_flow_key *new_key;
1860 
1861 	if (ovs_nla_get_ufid(sfid, ufid, log))
1862 		return 0;
1863 
1864 	/* If UFID was not provided, use unmasked key. */
1865 	new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1866 	if (!new_key)
1867 		return -ENOMEM;
1868 	memcpy(new_key, key, sizeof(*key));
1869 	sfid->unmasked_key = new_key;
1870 
1871 	return 0;
1872 }
1873 
1874 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1875 {
1876 	return attr ? nla_get_u32(attr) : 0;
1877 }
1878 
1879 /**
1880  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1881  * @net: Network namespace.
1882  * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1883  * metadata.
1884  * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1885  * attributes.
1886  * @attrs: Bit mask for the netlink attributes included in @a.
1887  * @log: Boolean to allow kernel error logging.  Normally true, but when
1888  * probing for feature compatibility this should be passed in as false to
1889  * suppress unnecessary error logging.
1890  *
1891  * This parses a series of Netlink attributes that form a flow key, which must
1892  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1893  * get the metadata, that is, the parts of the flow key that cannot be
1894  * extracted from the packet itself.
1895  *
1896  * This must be called before the packet key fields are filled in 'key'.
1897  */
1898 
1899 int ovs_nla_get_flow_metadata(struct net *net,
1900 			      const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1901 			      u64 attrs, struct sw_flow_key *key, bool log)
1902 {
1903 	struct sw_flow_match match;
1904 
1905 	memset(&match, 0, sizeof(match));
1906 	match.key = key;
1907 
1908 	key->ct_state = 0;
1909 	key->ct_zone = 0;
1910 	key->ct_orig_proto = 0;
1911 	memset(&key->ct, 0, sizeof(key->ct));
1912 	memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1913 	memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1914 
1915 	key->phy.in_port = DP_MAX_PORTS;
1916 
1917 	return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1918 }
1919 
1920 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1921 			    bool is_mask)
1922 {
1923 	__be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1924 
1925 	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1926 	    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1927 		return -EMSGSIZE;
1928 	return 0;
1929 }
1930 
1931 static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1932 			     struct sk_buff *skb)
1933 {
1934 	struct nlattr *start;
1935 
1936 	start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1937 	if (!start)
1938 		return -EMSGSIZE;
1939 
1940 	if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1941 		goto nla_put_failure;
1942 
1943 	if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1944 		if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1945 			    sizeof(nsh->context), nsh->context))
1946 			goto nla_put_failure;
1947 	}
1948 
1949 	/* Don't support MD type 2 yet */
1950 
1951 	nla_nest_end(skb, start);
1952 
1953 	return 0;
1954 
1955 nla_put_failure:
1956 	return -EMSGSIZE;
1957 }
1958 
1959 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1960 			     const struct sw_flow_key *output, bool is_mask,
1961 			     struct sk_buff *skb)
1962 {
1963 	struct ovs_key_ethernet *eth_key;
1964 	struct nlattr *nla;
1965 	struct nlattr *encap = NULL;
1966 	struct nlattr *in_encap = NULL;
1967 
1968 	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1969 		goto nla_put_failure;
1970 
1971 	if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1972 		goto nla_put_failure;
1973 
1974 	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1975 		goto nla_put_failure;
1976 
1977 	if ((swkey->tun_proto || is_mask)) {
1978 		const void *opts = NULL;
1979 
1980 		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1981 			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1982 
1983 		if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1984 				     swkey->tun_opts_len, swkey->tun_proto))
1985 			goto nla_put_failure;
1986 	}
1987 
1988 	if (swkey->phy.in_port == DP_MAX_PORTS) {
1989 		if (is_mask && (output->phy.in_port == 0xffff))
1990 			if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1991 				goto nla_put_failure;
1992 	} else {
1993 		u16 upper_u16;
1994 		upper_u16 = !is_mask ? 0 : 0xffff;
1995 
1996 		if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1997 				(upper_u16 << 16) | output->phy.in_port))
1998 			goto nla_put_failure;
1999 	}
2000 
2001 	if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2002 		goto nla_put_failure;
2003 
2004 	if (ovs_ct_put_key(swkey, output, skb))
2005 		goto nla_put_failure;
2006 
2007 	if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2008 		nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2009 		if (!nla)
2010 			goto nla_put_failure;
2011 
2012 		eth_key = nla_data(nla);
2013 		ether_addr_copy(eth_key->eth_src, output->eth.src);
2014 		ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2015 
2016 		if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2017 			if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2018 				goto nla_put_failure;
2019 			encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2020 			if (!swkey->eth.vlan.tci)
2021 				goto unencap;
2022 
2023 			if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2024 				if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2025 					goto nla_put_failure;
2026 				in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2027 				if (!swkey->eth.cvlan.tci)
2028 					goto unencap;
2029 			}
2030 		}
2031 
2032 		if (swkey->eth.type == htons(ETH_P_802_2)) {
2033 			/*
2034 			* Ethertype 802.2 is represented in the netlink with omitted
2035 			* OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2036 			* 0xffff in the mask attribute.  Ethertype can also
2037 			* be wildcarded.
2038 			*/
2039 			if (is_mask && output->eth.type)
2040 				if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2041 							output->eth.type))
2042 					goto nla_put_failure;
2043 			goto unencap;
2044 		}
2045 	}
2046 
2047 	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2048 		goto nla_put_failure;
2049 
2050 	if (eth_type_vlan(swkey->eth.type)) {
2051 		/* There are 3 VLAN tags, we don't know anything about the rest
2052 		 * of the packet, so truncate here.
2053 		 */
2054 		WARN_ON_ONCE(!(encap && in_encap));
2055 		goto unencap;
2056 	}
2057 
2058 	if (swkey->eth.type == htons(ETH_P_IP)) {
2059 		struct ovs_key_ipv4 *ipv4_key;
2060 
2061 		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2062 		if (!nla)
2063 			goto nla_put_failure;
2064 		ipv4_key = nla_data(nla);
2065 		ipv4_key->ipv4_src = output->ipv4.addr.src;
2066 		ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2067 		ipv4_key->ipv4_proto = output->ip.proto;
2068 		ipv4_key->ipv4_tos = output->ip.tos;
2069 		ipv4_key->ipv4_ttl = output->ip.ttl;
2070 		ipv4_key->ipv4_frag = output->ip.frag;
2071 	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2072 		struct ovs_key_ipv6 *ipv6_key;
2073 
2074 		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2075 		if (!nla)
2076 			goto nla_put_failure;
2077 		ipv6_key = nla_data(nla);
2078 		memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2079 				sizeof(ipv6_key->ipv6_src));
2080 		memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2081 				sizeof(ipv6_key->ipv6_dst));
2082 		ipv6_key->ipv6_label = output->ipv6.label;
2083 		ipv6_key->ipv6_proto = output->ip.proto;
2084 		ipv6_key->ipv6_tclass = output->ip.tos;
2085 		ipv6_key->ipv6_hlimit = output->ip.ttl;
2086 		ipv6_key->ipv6_frag = output->ip.frag;
2087 	} else if (swkey->eth.type == htons(ETH_P_NSH)) {
2088 		if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2089 			goto nla_put_failure;
2090 	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
2091 		   swkey->eth.type == htons(ETH_P_RARP)) {
2092 		struct ovs_key_arp *arp_key;
2093 
2094 		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2095 		if (!nla)
2096 			goto nla_put_failure;
2097 		arp_key = nla_data(nla);
2098 		memset(arp_key, 0, sizeof(struct ovs_key_arp));
2099 		arp_key->arp_sip = output->ipv4.addr.src;
2100 		arp_key->arp_tip = output->ipv4.addr.dst;
2101 		arp_key->arp_op = htons(output->ip.proto);
2102 		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2103 		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2104 	} else if (eth_p_mpls(swkey->eth.type)) {
2105 		struct ovs_key_mpls *mpls_key;
2106 
2107 		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
2108 		if (!nla)
2109 			goto nla_put_failure;
2110 		mpls_key = nla_data(nla);
2111 		mpls_key->mpls_lse = output->mpls.top_lse;
2112 	}
2113 
2114 	if ((swkey->eth.type == htons(ETH_P_IP) ||
2115 	     swkey->eth.type == htons(ETH_P_IPV6)) &&
2116 	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2117 
2118 		if (swkey->ip.proto == IPPROTO_TCP) {
2119 			struct ovs_key_tcp *tcp_key;
2120 
2121 			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2122 			if (!nla)
2123 				goto nla_put_failure;
2124 			tcp_key = nla_data(nla);
2125 			tcp_key->tcp_src = output->tp.src;
2126 			tcp_key->tcp_dst = output->tp.dst;
2127 			if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2128 					 output->tp.flags))
2129 				goto nla_put_failure;
2130 		} else if (swkey->ip.proto == IPPROTO_UDP) {
2131 			struct ovs_key_udp *udp_key;
2132 
2133 			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2134 			if (!nla)
2135 				goto nla_put_failure;
2136 			udp_key = nla_data(nla);
2137 			udp_key->udp_src = output->tp.src;
2138 			udp_key->udp_dst = output->tp.dst;
2139 		} else if (swkey->ip.proto == IPPROTO_SCTP) {
2140 			struct ovs_key_sctp *sctp_key;
2141 
2142 			nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2143 			if (!nla)
2144 				goto nla_put_failure;
2145 			sctp_key = nla_data(nla);
2146 			sctp_key->sctp_src = output->tp.src;
2147 			sctp_key->sctp_dst = output->tp.dst;
2148 		} else if (swkey->eth.type == htons(ETH_P_IP) &&
2149 			   swkey->ip.proto == IPPROTO_ICMP) {
2150 			struct ovs_key_icmp *icmp_key;
2151 
2152 			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2153 			if (!nla)
2154 				goto nla_put_failure;
2155 			icmp_key = nla_data(nla);
2156 			icmp_key->icmp_type = ntohs(output->tp.src);
2157 			icmp_key->icmp_code = ntohs(output->tp.dst);
2158 		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2159 			   swkey->ip.proto == IPPROTO_ICMPV6) {
2160 			struct ovs_key_icmpv6 *icmpv6_key;
2161 
2162 			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2163 						sizeof(*icmpv6_key));
2164 			if (!nla)
2165 				goto nla_put_failure;
2166 			icmpv6_key = nla_data(nla);
2167 			icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2168 			icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2169 
2170 			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
2171 			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
2172 				struct ovs_key_nd *nd_key;
2173 
2174 				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2175 				if (!nla)
2176 					goto nla_put_failure;
2177 				nd_key = nla_data(nla);
2178 				memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2179 							sizeof(nd_key->nd_target));
2180 				ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2181 				ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2182 			}
2183 		}
2184 	}
2185 
2186 unencap:
2187 	if (in_encap)
2188 		nla_nest_end(skb, in_encap);
2189 	if (encap)
2190 		nla_nest_end(skb, encap);
2191 
2192 	return 0;
2193 
2194 nla_put_failure:
2195 	return -EMSGSIZE;
2196 }
2197 
2198 int ovs_nla_put_key(const struct sw_flow_key *swkey,
2199 		    const struct sw_flow_key *output, int attr, bool is_mask,
2200 		    struct sk_buff *skb)
2201 {
2202 	int err;
2203 	struct nlattr *nla;
2204 
2205 	nla = nla_nest_start(skb, attr);
2206 	if (!nla)
2207 		return -EMSGSIZE;
2208 	err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2209 	if (err)
2210 		return err;
2211 	nla_nest_end(skb, nla);
2212 
2213 	return 0;
2214 }
2215 
2216 /* Called with ovs_mutex or RCU read lock. */
2217 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2218 {
2219 	if (ovs_identifier_is_ufid(&flow->id))
2220 		return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2221 			       flow->id.ufid);
2222 
2223 	return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2224 			       OVS_FLOW_ATTR_KEY, false, skb);
2225 }
2226 
2227 /* Called with ovs_mutex or RCU read lock. */
2228 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2229 {
2230 	return ovs_nla_put_key(&flow->key, &flow->key,
2231 				OVS_FLOW_ATTR_KEY, false, skb);
2232 }
2233 
2234 /* Called with ovs_mutex or RCU read lock. */
2235 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2236 {
2237 	return ovs_nla_put_key(&flow->key, &flow->mask->key,
2238 				OVS_FLOW_ATTR_MASK, true, skb);
2239 }
2240 
2241 #define MAX_ACTIONS_BUFSIZE	(32 * 1024)
2242 
2243 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
2244 {
2245 	struct sw_flow_actions *sfa;
2246 
2247 	if (size > MAX_ACTIONS_BUFSIZE) {
2248 		OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
2249 		return ERR_PTR(-EINVAL);
2250 	}
2251 
2252 	sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2253 	if (!sfa)
2254 		return ERR_PTR(-ENOMEM);
2255 
2256 	sfa->actions_len = 0;
2257 	return sfa;
2258 }
2259 
2260 static void ovs_nla_free_set_action(const struct nlattr *a)
2261 {
2262 	const struct nlattr *ovs_key = nla_data(a);
2263 	struct ovs_tunnel_info *ovs_tun;
2264 
2265 	switch (nla_type(ovs_key)) {
2266 	case OVS_KEY_ATTR_TUNNEL_INFO:
2267 		ovs_tun = nla_data(ovs_key);
2268 		dst_release((struct dst_entry *)ovs_tun->tun_dst);
2269 		break;
2270 	}
2271 }
2272 
2273 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2274 {
2275 	const struct nlattr *a;
2276 	int rem;
2277 
2278 	if (!sf_acts)
2279 		return;
2280 
2281 	nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
2282 		switch (nla_type(a)) {
2283 		case OVS_ACTION_ATTR_SET:
2284 			ovs_nla_free_set_action(a);
2285 			break;
2286 		case OVS_ACTION_ATTR_CT:
2287 			ovs_ct_free_action(a);
2288 			break;
2289 		}
2290 	}
2291 
2292 	kfree(sf_acts);
2293 }
2294 
2295 static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2296 {
2297 	ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2298 }
2299 
2300 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
2301  * The caller must hold rcu_read_lock for this to be sensible. */
2302 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2303 {
2304 	call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2305 }
2306 
2307 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2308 				       int attr_len, bool log)
2309 {
2310 
2311 	struct sw_flow_actions *acts;
2312 	int new_acts_size;
2313 	int req_size = NLA_ALIGN(attr_len);
2314 	int next_offset = offsetof(struct sw_flow_actions, actions) +
2315 					(*sfa)->actions_len;
2316 
2317 	if (req_size <= (ksize(*sfa) - next_offset))
2318 		goto out;
2319 
2320 	new_acts_size = ksize(*sfa) * 2;
2321 
2322 	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2323 		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
2324 			return ERR_PTR(-EMSGSIZE);
2325 		new_acts_size = MAX_ACTIONS_BUFSIZE;
2326 	}
2327 
2328 	acts = nla_alloc_flow_actions(new_acts_size, log);
2329 	if (IS_ERR(acts))
2330 		return (void *)acts;
2331 
2332 	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2333 	acts->actions_len = (*sfa)->actions_len;
2334 	acts->orig_len = (*sfa)->orig_len;
2335 	kfree(*sfa);
2336 	*sfa = acts;
2337 
2338 out:
2339 	(*sfa)->actions_len += req_size;
2340 	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2341 }
2342 
2343 static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2344 				   int attrtype, void *data, int len, bool log)
2345 {
2346 	struct nlattr *a;
2347 
2348 	a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2349 	if (IS_ERR(a))
2350 		return a;
2351 
2352 	a->nla_type = attrtype;
2353 	a->nla_len = nla_attr_size(len);
2354 
2355 	if (data)
2356 		memcpy(nla_data(a), data, len);
2357 	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2358 
2359 	return a;
2360 }
2361 
2362 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2363 		       int len, bool log)
2364 {
2365 	struct nlattr *a;
2366 
2367 	a = __add_action(sfa, attrtype, data, len, log);
2368 
2369 	return PTR_ERR_OR_ZERO(a);
2370 }
2371 
2372 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2373 					  int attrtype, bool log)
2374 {
2375 	int used = (*sfa)->actions_len;
2376 	int err;
2377 
2378 	err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2379 	if (err)
2380 		return err;
2381 
2382 	return used;
2383 }
2384 
2385 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2386 					 int st_offset)
2387 {
2388 	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2389 							       st_offset);
2390 
2391 	a->nla_len = sfa->actions_len - st_offset;
2392 }
2393 
2394 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2395 				  const struct sw_flow_key *key,
2396 				  struct sw_flow_actions **sfa,
2397 				  __be16 eth_type, __be16 vlan_tci, bool log);
2398 
2399 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2400 				    const struct sw_flow_key *key,
2401 				    struct sw_flow_actions **sfa,
2402 				    __be16 eth_type, __be16 vlan_tci,
2403 				    bool log, bool last)
2404 {
2405 	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2406 	const struct nlattr *probability, *actions;
2407 	const struct nlattr *a;
2408 	int rem, start, err;
2409 	struct sample_arg arg;
2410 
2411 	memset(attrs, 0, sizeof(attrs));
2412 	nla_for_each_nested(a, attr, rem) {
2413 		int type = nla_type(a);
2414 		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2415 			return -EINVAL;
2416 		attrs[type] = a;
2417 	}
2418 	if (rem)
2419 		return -EINVAL;
2420 
2421 	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2422 	if (!probability || nla_len(probability) != sizeof(u32))
2423 		return -EINVAL;
2424 
2425 	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2426 	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2427 		return -EINVAL;
2428 
2429 	/* validation done, copy sample action. */
2430 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2431 	if (start < 0)
2432 		return start;
2433 
2434 	/* When both skb and flow may be changed, put the sample
2435 	 * into a deferred fifo. On the other hand, if only skb
2436 	 * may be modified, the actions can be executed in place.
2437 	 *
2438 	 * Do this analysis at the flow installation time.
2439 	 * Set 'clone_action->exec' to true if the actions can be
2440 	 * executed without being deferred.
2441 	 *
2442 	 * If the sample is the last action, it can always be excuted
2443 	 * rather than deferred.
2444 	 */
2445 	arg.exec = last || !actions_may_change_flow(actions);
2446 	arg.probability = nla_get_u32(probability);
2447 
2448 	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2449 				 log);
2450 	if (err)
2451 		return err;
2452 
2453 	err = __ovs_nla_copy_actions(net, actions, key, sfa,
2454 				     eth_type, vlan_tci, log);
2455 
2456 	if (err)
2457 		return err;
2458 
2459 	add_nested_action_end(*sfa, start);
2460 
2461 	return 0;
2462 }
2463 
2464 void ovs_match_init(struct sw_flow_match *match,
2465 		    struct sw_flow_key *key,
2466 		    bool reset_key,
2467 		    struct sw_flow_mask *mask)
2468 {
2469 	memset(match, 0, sizeof(*match));
2470 	match->key = key;
2471 	match->mask = mask;
2472 
2473 	if (reset_key)
2474 		memset(key, 0, sizeof(*key));
2475 
2476 	if (mask) {
2477 		memset(&mask->key, 0, sizeof(mask->key));
2478 		mask->range.start = mask->range.end = 0;
2479 	}
2480 }
2481 
2482 static int validate_geneve_opts(struct sw_flow_key *key)
2483 {
2484 	struct geneve_opt *option;
2485 	int opts_len = key->tun_opts_len;
2486 	bool crit_opt = false;
2487 
2488 	option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2489 	while (opts_len > 0) {
2490 		int len;
2491 
2492 		if (opts_len < sizeof(*option))
2493 			return -EINVAL;
2494 
2495 		len = sizeof(*option) + option->length * 4;
2496 		if (len > opts_len)
2497 			return -EINVAL;
2498 
2499 		crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2500 
2501 		option = (struct geneve_opt *)((u8 *)option + len);
2502 		opts_len -= len;
2503 	};
2504 
2505 	key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2506 
2507 	return 0;
2508 }
2509 
2510 static int validate_and_copy_set_tun(const struct nlattr *attr,
2511 				     struct sw_flow_actions **sfa, bool log)
2512 {
2513 	struct sw_flow_match match;
2514 	struct sw_flow_key key;
2515 	struct metadata_dst *tun_dst;
2516 	struct ip_tunnel_info *tun_info;
2517 	struct ovs_tunnel_info *ovs_tun;
2518 	struct nlattr *a;
2519 	int err = 0, start, opts_type;
2520 
2521 	ovs_match_init(&match, &key, true, NULL);
2522 	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2523 	if (opts_type < 0)
2524 		return opts_type;
2525 
2526 	if (key.tun_opts_len) {
2527 		switch (opts_type) {
2528 		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2529 			err = validate_geneve_opts(&key);
2530 			if (err < 0)
2531 				return err;
2532 			break;
2533 		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2534 			break;
2535 		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2536 			break;
2537 		}
2538 	};
2539 
2540 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2541 	if (start < 0)
2542 		return start;
2543 
2544 	tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2545 				     GFP_KERNEL);
2546 
2547 	if (!tun_dst)
2548 		return -ENOMEM;
2549 
2550 	err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2551 	if (err) {
2552 		dst_release((struct dst_entry *)tun_dst);
2553 		return err;
2554 	}
2555 
2556 	a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2557 			 sizeof(*ovs_tun), log);
2558 	if (IS_ERR(a)) {
2559 		dst_release((struct dst_entry *)tun_dst);
2560 		return PTR_ERR(a);
2561 	}
2562 
2563 	ovs_tun = nla_data(a);
2564 	ovs_tun->tun_dst = tun_dst;
2565 
2566 	tun_info = &tun_dst->u.tun_info;
2567 	tun_info->mode = IP_TUNNEL_INFO_TX;
2568 	if (key.tun_proto == AF_INET6)
2569 		tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2570 	tun_info->key = key.tun_key;
2571 
2572 	/* We need to store the options in the action itself since
2573 	 * everything else will go away after flow setup. We can append
2574 	 * it to tun_info and then point there.
2575 	 */
2576 	ip_tunnel_info_opts_set(tun_info,
2577 				TUN_METADATA_OPTS(&key, key.tun_opts_len),
2578 				key.tun_opts_len);
2579 	add_nested_action_end(*sfa, start);
2580 
2581 	return err;
2582 }
2583 
2584 static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2585 			 bool is_push_nsh, bool log)
2586 {
2587 	struct sw_flow_match match;
2588 	struct sw_flow_key key;
2589 	int ret = 0;
2590 
2591 	ovs_match_init(&match, &key, true, NULL);
2592 	ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2593 				      is_push_nsh, log);
2594 	return !ret;
2595 }
2596 
2597 /* Return false if there are any non-masked bits set.
2598  * Mask follows data immediately, before any netlink padding.
2599  */
2600 static bool validate_masked(u8 *data, int len)
2601 {
2602 	u8 *mask = data + len;
2603 
2604 	while (len--)
2605 		if (*data++ & ~*mask++)
2606 			return false;
2607 
2608 	return true;
2609 }
2610 
2611 static int validate_set(const struct nlattr *a,
2612 			const struct sw_flow_key *flow_key,
2613 			struct sw_flow_actions **sfa, bool *skip_copy,
2614 			u8 mac_proto, __be16 eth_type, bool masked, bool log)
2615 {
2616 	const struct nlattr *ovs_key = nla_data(a);
2617 	int key_type = nla_type(ovs_key);
2618 	size_t key_len;
2619 
2620 	/* There can be only one key in a action */
2621 	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2622 		return -EINVAL;
2623 
2624 	key_len = nla_len(ovs_key);
2625 	if (masked)
2626 		key_len /= 2;
2627 
2628 	if (key_type > OVS_KEY_ATTR_MAX ||
2629 	    !check_attr_len(key_len, ovs_key_lens[key_type].len))
2630 		return -EINVAL;
2631 
2632 	if (masked && !validate_masked(nla_data(ovs_key), key_len))
2633 		return -EINVAL;
2634 
2635 	switch (key_type) {
2636 	const struct ovs_key_ipv4 *ipv4_key;
2637 	const struct ovs_key_ipv6 *ipv6_key;
2638 	int err;
2639 
2640 	case OVS_KEY_ATTR_PRIORITY:
2641 	case OVS_KEY_ATTR_SKB_MARK:
2642 	case OVS_KEY_ATTR_CT_MARK:
2643 	case OVS_KEY_ATTR_CT_LABELS:
2644 		break;
2645 
2646 	case OVS_KEY_ATTR_ETHERNET:
2647 		if (mac_proto != MAC_PROTO_ETHERNET)
2648 			return -EINVAL;
2649 		break;
2650 
2651 	case OVS_KEY_ATTR_TUNNEL:
2652 		if (masked)
2653 			return -EINVAL; /* Masked tunnel set not supported. */
2654 
2655 		*skip_copy = true;
2656 		err = validate_and_copy_set_tun(a, sfa, log);
2657 		if (err)
2658 			return err;
2659 		break;
2660 
2661 	case OVS_KEY_ATTR_IPV4:
2662 		if (eth_type != htons(ETH_P_IP))
2663 			return -EINVAL;
2664 
2665 		ipv4_key = nla_data(ovs_key);
2666 
2667 		if (masked) {
2668 			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2669 
2670 			/* Non-writeable fields. */
2671 			if (mask->ipv4_proto || mask->ipv4_frag)
2672 				return -EINVAL;
2673 		} else {
2674 			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2675 				return -EINVAL;
2676 
2677 			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2678 				return -EINVAL;
2679 		}
2680 		break;
2681 
2682 	case OVS_KEY_ATTR_IPV6:
2683 		if (eth_type != htons(ETH_P_IPV6))
2684 			return -EINVAL;
2685 
2686 		ipv6_key = nla_data(ovs_key);
2687 
2688 		if (masked) {
2689 			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2690 
2691 			/* Non-writeable fields. */
2692 			if (mask->ipv6_proto || mask->ipv6_frag)
2693 				return -EINVAL;
2694 
2695 			/* Invalid bits in the flow label mask? */
2696 			if (ntohl(mask->ipv6_label) & 0xFFF00000)
2697 				return -EINVAL;
2698 		} else {
2699 			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2700 				return -EINVAL;
2701 
2702 			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2703 				return -EINVAL;
2704 		}
2705 		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2706 			return -EINVAL;
2707 
2708 		break;
2709 
2710 	case OVS_KEY_ATTR_TCP:
2711 		if ((eth_type != htons(ETH_P_IP) &&
2712 		     eth_type != htons(ETH_P_IPV6)) ||
2713 		    flow_key->ip.proto != IPPROTO_TCP)
2714 			return -EINVAL;
2715 
2716 		break;
2717 
2718 	case OVS_KEY_ATTR_UDP:
2719 		if ((eth_type != htons(ETH_P_IP) &&
2720 		     eth_type != htons(ETH_P_IPV6)) ||
2721 		    flow_key->ip.proto != IPPROTO_UDP)
2722 			return -EINVAL;
2723 
2724 		break;
2725 
2726 	case OVS_KEY_ATTR_MPLS:
2727 		if (!eth_p_mpls(eth_type))
2728 			return -EINVAL;
2729 		break;
2730 
2731 	case OVS_KEY_ATTR_SCTP:
2732 		if ((eth_type != htons(ETH_P_IP) &&
2733 		     eth_type != htons(ETH_P_IPV6)) ||
2734 		    flow_key->ip.proto != IPPROTO_SCTP)
2735 			return -EINVAL;
2736 
2737 		break;
2738 
2739 	case OVS_KEY_ATTR_NSH:
2740 		if (eth_type != htons(ETH_P_NSH))
2741 			return -EINVAL;
2742 		if (!validate_nsh(nla_data(a), masked, false, log))
2743 			return -EINVAL;
2744 		break;
2745 
2746 	default:
2747 		return -EINVAL;
2748 	}
2749 
2750 	/* Convert non-masked non-tunnel set actions to masked set actions. */
2751 	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2752 		int start, len = key_len * 2;
2753 		struct nlattr *at;
2754 
2755 		*skip_copy = true;
2756 
2757 		start = add_nested_action_start(sfa,
2758 						OVS_ACTION_ATTR_SET_TO_MASKED,
2759 						log);
2760 		if (start < 0)
2761 			return start;
2762 
2763 		at = __add_action(sfa, key_type, NULL, len, log);
2764 		if (IS_ERR(at))
2765 			return PTR_ERR(at);
2766 
2767 		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2768 		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
2769 		/* Clear non-writeable bits from otherwise writeable fields. */
2770 		if (key_type == OVS_KEY_ATTR_IPV6) {
2771 			struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2772 
2773 			mask->ipv6_label &= htonl(0x000FFFFF);
2774 		}
2775 		add_nested_action_end(*sfa, start);
2776 	}
2777 
2778 	return 0;
2779 }
2780 
2781 static int validate_userspace(const struct nlattr *attr)
2782 {
2783 	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
2784 		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
2785 		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
2786 		[OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
2787 	};
2788 	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
2789 	int error;
2790 
2791 	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
2792 				 userspace_policy, NULL);
2793 	if (error)
2794 		return error;
2795 
2796 	if (!a[OVS_USERSPACE_ATTR_PID] ||
2797 	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
2798 		return -EINVAL;
2799 
2800 	return 0;
2801 }
2802 
2803 static int copy_action(const struct nlattr *from,
2804 		       struct sw_flow_actions **sfa, bool log)
2805 {
2806 	int totlen = NLA_ALIGN(from->nla_len);
2807 	struct nlattr *to;
2808 
2809 	to = reserve_sfa_size(sfa, from->nla_len, log);
2810 	if (IS_ERR(to))
2811 		return PTR_ERR(to);
2812 
2813 	memcpy(to, from, totlen);
2814 	return 0;
2815 }
2816 
2817 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2818 				  const struct sw_flow_key *key,
2819 				  struct sw_flow_actions **sfa,
2820 				  __be16 eth_type, __be16 vlan_tci, bool log)
2821 {
2822 	u8 mac_proto = ovs_key_mac_proto(key);
2823 	const struct nlattr *a;
2824 	int rem, err;
2825 
2826 	nla_for_each_nested(a, attr, rem) {
2827 		/* Expected argument lengths, (u32)-1 for variable length. */
2828 		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2829 			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2830 			[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2831 			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2832 			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2833 			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2834 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2835 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
2836 			[OVS_ACTION_ATTR_SET] = (u32)-1,
2837 			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2838 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2839 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2840 			[OVS_ACTION_ATTR_CT] = (u32)-1,
2841 			[OVS_ACTION_ATTR_CT_CLEAR] = 0,
2842 			[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2843 			[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2844 			[OVS_ACTION_ATTR_POP_ETH] = 0,
2845 			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2846 			[OVS_ACTION_ATTR_POP_NSH] = 0,
2847 		};
2848 		const struct ovs_action_push_vlan *vlan;
2849 		int type = nla_type(a);
2850 		bool skip_copy;
2851 
2852 		if (type > OVS_ACTION_ATTR_MAX ||
2853 		    (action_lens[type] != nla_len(a) &&
2854 		     action_lens[type] != (u32)-1))
2855 			return -EINVAL;
2856 
2857 		skip_copy = false;
2858 		switch (type) {
2859 		case OVS_ACTION_ATTR_UNSPEC:
2860 			return -EINVAL;
2861 
2862 		case OVS_ACTION_ATTR_USERSPACE:
2863 			err = validate_userspace(a);
2864 			if (err)
2865 				return err;
2866 			break;
2867 
2868 		case OVS_ACTION_ATTR_OUTPUT:
2869 			if (nla_get_u32(a) >= DP_MAX_PORTS)
2870 				return -EINVAL;
2871 			break;
2872 
2873 		case OVS_ACTION_ATTR_TRUNC: {
2874 			const struct ovs_action_trunc *trunc = nla_data(a);
2875 
2876 			if (trunc->max_len < ETH_HLEN)
2877 				return -EINVAL;
2878 			break;
2879 		}
2880 
2881 		case OVS_ACTION_ATTR_HASH: {
2882 			const struct ovs_action_hash *act_hash = nla_data(a);
2883 
2884 			switch (act_hash->hash_alg) {
2885 			case OVS_HASH_ALG_L4:
2886 				break;
2887 			default:
2888 				return  -EINVAL;
2889 			}
2890 
2891 			break;
2892 		}
2893 
2894 		case OVS_ACTION_ATTR_POP_VLAN:
2895 			if (mac_proto != MAC_PROTO_ETHERNET)
2896 				return -EINVAL;
2897 			vlan_tci = htons(0);
2898 			break;
2899 
2900 		case OVS_ACTION_ATTR_PUSH_VLAN:
2901 			if (mac_proto != MAC_PROTO_ETHERNET)
2902 				return -EINVAL;
2903 			vlan = nla_data(a);
2904 			if (!eth_type_vlan(vlan->vlan_tpid))
2905 				return -EINVAL;
2906 			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
2907 				return -EINVAL;
2908 			vlan_tci = vlan->vlan_tci;
2909 			break;
2910 
2911 		case OVS_ACTION_ATTR_RECIRC:
2912 			break;
2913 
2914 		case OVS_ACTION_ATTR_PUSH_MPLS: {
2915 			const struct ovs_action_push_mpls *mpls = nla_data(a);
2916 
2917 			if (!eth_p_mpls(mpls->mpls_ethertype))
2918 				return -EINVAL;
2919 			/* Prohibit push MPLS other than to a white list
2920 			 * for packets that have a known tag order.
2921 			 */
2922 			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2923 			    (eth_type != htons(ETH_P_IP) &&
2924 			     eth_type != htons(ETH_P_IPV6) &&
2925 			     eth_type != htons(ETH_P_ARP) &&
2926 			     eth_type != htons(ETH_P_RARP) &&
2927 			     !eth_p_mpls(eth_type)))
2928 				return -EINVAL;
2929 			eth_type = mpls->mpls_ethertype;
2930 			break;
2931 		}
2932 
2933 		case OVS_ACTION_ATTR_POP_MPLS:
2934 			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2935 			    !eth_p_mpls(eth_type))
2936 				return -EINVAL;
2937 
2938 			/* Disallow subsequent L2.5+ set and mpls_pop actions
2939 			 * as there is no check here to ensure that the new
2940 			 * eth_type is valid and thus set actions could
2941 			 * write off the end of the packet or otherwise
2942 			 * corrupt it.
2943 			 *
2944 			 * Support for these actions is planned using packet
2945 			 * recirculation.
2946 			 */
2947 			eth_type = htons(0);
2948 			break;
2949 
2950 		case OVS_ACTION_ATTR_SET:
2951 			err = validate_set(a, key, sfa,
2952 					   &skip_copy, mac_proto, eth_type,
2953 					   false, log);
2954 			if (err)
2955 				return err;
2956 			break;
2957 
2958 		case OVS_ACTION_ATTR_SET_MASKED:
2959 			err = validate_set(a, key, sfa,
2960 					   &skip_copy, mac_proto, eth_type,
2961 					   true, log);
2962 			if (err)
2963 				return err;
2964 			break;
2965 
2966 		case OVS_ACTION_ATTR_SAMPLE: {
2967 			bool last = nla_is_last(a, rem);
2968 
2969 			err = validate_and_copy_sample(net, a, key, sfa,
2970 						       eth_type, vlan_tci,
2971 						       log, last);
2972 			if (err)
2973 				return err;
2974 			skip_copy = true;
2975 			break;
2976 		}
2977 
2978 		case OVS_ACTION_ATTR_CT:
2979 			err = ovs_ct_copy_action(net, a, key, sfa, log);
2980 			if (err)
2981 				return err;
2982 			skip_copy = true;
2983 			break;
2984 
2985 		case OVS_ACTION_ATTR_CT_CLEAR:
2986 			break;
2987 
2988 		case OVS_ACTION_ATTR_PUSH_ETH:
2989 			/* Disallow pushing an Ethernet header if one
2990 			 * is already present */
2991 			if (mac_proto != MAC_PROTO_NONE)
2992 				return -EINVAL;
2993 			mac_proto = MAC_PROTO_NONE;
2994 			break;
2995 
2996 		case OVS_ACTION_ATTR_POP_ETH:
2997 			if (mac_proto != MAC_PROTO_ETHERNET)
2998 				return -EINVAL;
2999 			if (vlan_tci & htons(VLAN_TAG_PRESENT))
3000 				return -EINVAL;
3001 			mac_proto = MAC_PROTO_ETHERNET;
3002 			break;
3003 
3004 		case OVS_ACTION_ATTR_PUSH_NSH:
3005 			if (mac_proto != MAC_PROTO_ETHERNET) {
3006 				u8 next_proto;
3007 
3008 				next_proto = tun_p_from_eth_p(eth_type);
3009 				if (!next_proto)
3010 					return -EINVAL;
3011 			}
3012 			mac_proto = MAC_PROTO_NONE;
3013 			if (!validate_nsh(nla_data(a), false, true, true))
3014 				return -EINVAL;
3015 			break;
3016 
3017 		case OVS_ACTION_ATTR_POP_NSH: {
3018 			__be16 inner_proto;
3019 
3020 			if (eth_type != htons(ETH_P_NSH))
3021 				return -EINVAL;
3022 			inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3023 			if (!inner_proto)
3024 				return -EINVAL;
3025 			if (key->nsh.base.np == TUN_P_ETHERNET)
3026 				mac_proto = MAC_PROTO_ETHERNET;
3027 			else
3028 				mac_proto = MAC_PROTO_NONE;
3029 			break;
3030 		}
3031 
3032 		default:
3033 			OVS_NLERR(log, "Unknown Action type %d", type);
3034 			return -EINVAL;
3035 		}
3036 		if (!skip_copy) {
3037 			err = copy_action(a, sfa, log);
3038 			if (err)
3039 				return err;
3040 		}
3041 	}
3042 
3043 	if (rem > 0)
3044 		return -EINVAL;
3045 
3046 	return 0;
3047 }
3048 
3049 /* 'key' must be the masked key. */
3050 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3051 			 const struct sw_flow_key *key,
3052 			 struct sw_flow_actions **sfa, bool log)
3053 {
3054 	int err;
3055 
3056 	*sfa = nla_alloc_flow_actions(nla_len(attr), log);
3057 	if (IS_ERR(*sfa))
3058 		return PTR_ERR(*sfa);
3059 
3060 	(*sfa)->orig_len = nla_len(attr);
3061 	err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3062 				     key->eth.vlan.tci, log);
3063 	if (err)
3064 		ovs_nla_free_flow_actions(*sfa);
3065 
3066 	return err;
3067 }
3068 
3069 static int sample_action_to_attr(const struct nlattr *attr,
3070 				 struct sk_buff *skb)
3071 {
3072 	struct nlattr *start, *ac_start = NULL, *sample_arg;
3073 	int err = 0, rem = nla_len(attr);
3074 	const struct sample_arg *arg;
3075 	struct nlattr *actions;
3076 
3077 	start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
3078 	if (!start)
3079 		return -EMSGSIZE;
3080 
3081 	sample_arg = nla_data(attr);
3082 	arg = nla_data(sample_arg);
3083 	actions = nla_next(sample_arg, &rem);
3084 
3085 	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3086 		err = -EMSGSIZE;
3087 		goto out;
3088 	}
3089 
3090 	ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
3091 	if (!ac_start) {
3092 		err = -EMSGSIZE;
3093 		goto out;
3094 	}
3095 
3096 	err = ovs_nla_put_actions(actions, rem, skb);
3097 
3098 out:
3099 	if (err) {
3100 		nla_nest_cancel(skb, ac_start);
3101 		nla_nest_cancel(skb, start);
3102 	} else {
3103 		nla_nest_end(skb, ac_start);
3104 		nla_nest_end(skb, start);
3105 	}
3106 
3107 	return err;
3108 }
3109 
3110 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3111 {
3112 	const struct nlattr *ovs_key = nla_data(a);
3113 	int key_type = nla_type(ovs_key);
3114 	struct nlattr *start;
3115 	int err;
3116 
3117 	switch (key_type) {
3118 	case OVS_KEY_ATTR_TUNNEL_INFO: {
3119 		struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3120 		struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3121 
3122 		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3123 		if (!start)
3124 			return -EMSGSIZE;
3125 
3126 		err =  ip_tun_to_nlattr(skb, &tun_info->key,
3127 					ip_tunnel_info_opts(tun_info),
3128 					tun_info->options_len,
3129 					ip_tunnel_info_af(tun_info));
3130 		if (err)
3131 			return err;
3132 		nla_nest_end(skb, start);
3133 		break;
3134 	}
3135 	default:
3136 		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3137 			return -EMSGSIZE;
3138 		break;
3139 	}
3140 
3141 	return 0;
3142 }
3143 
3144 static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3145 						struct sk_buff *skb)
3146 {
3147 	const struct nlattr *ovs_key = nla_data(a);
3148 	struct nlattr *nla;
3149 	size_t key_len = nla_len(ovs_key) / 2;
3150 
3151 	/* Revert the conversion we did from a non-masked set action to
3152 	 * masked set action.
3153 	 */
3154 	nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3155 	if (!nla)
3156 		return -EMSGSIZE;
3157 
3158 	if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3159 		return -EMSGSIZE;
3160 
3161 	nla_nest_end(skb, nla);
3162 	return 0;
3163 }
3164 
3165 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3166 {
3167 	const struct nlattr *a;
3168 	int rem, err;
3169 
3170 	nla_for_each_attr(a, attr, len, rem) {
3171 		int type = nla_type(a);
3172 
3173 		switch (type) {
3174 		case OVS_ACTION_ATTR_SET:
3175 			err = set_action_to_attr(a, skb);
3176 			if (err)
3177 				return err;
3178 			break;
3179 
3180 		case OVS_ACTION_ATTR_SET_TO_MASKED:
3181 			err = masked_set_action_to_set_action_attr(a, skb);
3182 			if (err)
3183 				return err;
3184 			break;
3185 
3186 		case OVS_ACTION_ATTR_SAMPLE:
3187 			err = sample_action_to_attr(a, skb);
3188 			if (err)
3189 				return err;
3190 			break;
3191 
3192 		case OVS_ACTION_ATTR_CT:
3193 			err = ovs_ct_action_to_attr(nla_data(a), skb);
3194 			if (err)
3195 				return err;
3196 			break;
3197 
3198 		default:
3199 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
3200 				return -EMSGSIZE;
3201 			break;
3202 		}
3203 	}
3204 
3205 	return 0;
3206 }
3207