xref: /linux/drivers/infiniband/hw/mlx5/fs.c (revision e814f3fd16acfb7f9966773953de8f740a1e3202)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5 
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/uverbs_std_types.h>
11 #include <rdma/mlx5_user_ioctl_cmds.h>
12 #include <rdma/mlx5_user_ioctl_verbs.h>
13 #include <rdma/ib_hdrs.h>
14 #include <rdma/ib_umem.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/fs_helpers.h>
18 #include <linux/mlx5/eswitch.h>
19 #include <net/inet_ecn.h>
20 #include "mlx5_ib.h"
21 #include "counters.h"
22 #include "devx.h"
23 #include "fs.h"
24 
25 #define UVERBS_MODULE_NAME mlx5_ib
26 #include <rdma/uverbs_named_ioctl.h>
27 
28 enum {
29 	MATCH_CRITERIA_ENABLE_OUTER_BIT,
30 	MATCH_CRITERIA_ENABLE_MISC_BIT,
31 	MATCH_CRITERIA_ENABLE_INNER_BIT,
32 	MATCH_CRITERIA_ENABLE_MISC2_BIT
33 };
34 
35 #define HEADER_IS_ZERO(match_criteria, headers)			           \
36 	!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
37 		    0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
38 
39 static u8 get_match_criteria_enable(u32 *match_criteria)
40 {
41 	u8 match_criteria_enable;
42 
43 	match_criteria_enable =
44 		(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
45 		MATCH_CRITERIA_ENABLE_OUTER_BIT;
46 	match_criteria_enable |=
47 		(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
48 		MATCH_CRITERIA_ENABLE_MISC_BIT;
49 	match_criteria_enable |=
50 		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
51 		MATCH_CRITERIA_ENABLE_INNER_BIT;
52 	match_criteria_enable |=
53 		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
54 		MATCH_CRITERIA_ENABLE_MISC2_BIT;
55 
56 	return match_criteria_enable;
57 }
58 
59 static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
60 {
61 	u8 entry_mask;
62 	u8 entry_val;
63 	int err = 0;
64 
65 	if (!mask)
66 		goto out;
67 
68 	entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
69 			      ip_protocol);
70 	entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
71 			     ip_protocol);
72 	if (!entry_mask) {
73 		MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
74 		MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
75 		goto out;
76 	}
77 	/* Don't override existing ip protocol */
78 	if (mask != entry_mask || val != entry_val)
79 		err = -EINVAL;
80 out:
81 	return err;
82 }
83 
84 static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
85 			   bool inner)
86 {
87 	if (inner) {
88 		MLX5_SET(fte_match_set_misc,
89 			 misc_c, inner_ipv6_flow_label, mask);
90 		MLX5_SET(fte_match_set_misc,
91 			 misc_v, inner_ipv6_flow_label, val);
92 	} else {
93 		MLX5_SET(fte_match_set_misc,
94 			 misc_c, outer_ipv6_flow_label, mask);
95 		MLX5_SET(fte_match_set_misc,
96 			 misc_v, outer_ipv6_flow_label, val);
97 	}
98 }
99 
100 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
101 {
102 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
103 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
104 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
105 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
106 }
107 
108 static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
109 {
110 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
111 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
112 		return -EOPNOTSUPP;
113 
114 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
115 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
116 		return -EOPNOTSUPP;
117 
118 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
119 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
120 		return -EOPNOTSUPP;
121 
122 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
123 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
124 		return -EOPNOTSUPP;
125 
126 	return 0;
127 }
128 
129 #define LAST_ETH_FIELD vlan_tag
130 #define LAST_IPV4_FIELD tos
131 #define LAST_IPV6_FIELD traffic_class
132 #define LAST_TCP_UDP_FIELD src_port
133 #define LAST_TUNNEL_FIELD tunnel_id
134 #define LAST_FLOW_TAG_FIELD tag_id
135 #define LAST_DROP_FIELD size
136 #define LAST_COUNTERS_FIELD counters
137 
138 /* Field is the last supported field */
139 #define FIELDS_NOT_SUPPORTED(filter, field)                                    \
140 	memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
141 		   sizeof(filter) - offsetofend(typeof(filter), field))
142 
143 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
144 			   bool is_egress,
145 			   struct mlx5_flow_act *action)
146 {
147 
148 	switch (maction->ib_action.type) {
149 	case IB_FLOW_ACTION_UNSPECIFIED:
150 		if (maction->flow_action_raw.sub_type ==
151 		    MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
152 			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
153 				return -EINVAL;
154 			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
155 			action->modify_hdr =
156 				maction->flow_action_raw.modify_hdr;
157 			return 0;
158 		}
159 		if (maction->flow_action_raw.sub_type ==
160 		    MLX5_IB_FLOW_ACTION_DECAP) {
161 			if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
162 				return -EINVAL;
163 			action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
164 			return 0;
165 		}
166 		if (maction->flow_action_raw.sub_type ==
167 		    MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
168 			if (action->action &
169 			    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
170 				return -EINVAL;
171 			action->action |=
172 				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
173 			action->pkt_reformat =
174 				maction->flow_action_raw.pkt_reformat;
175 			return 0;
176 		}
177 		fallthrough;
178 	default:
179 		return -EOPNOTSUPP;
180 	}
181 }
182 
183 static int parse_flow_attr(struct mlx5_core_dev *mdev,
184 			   struct mlx5_flow_spec *spec,
185 			   const union ib_flow_spec *ib_spec,
186 			   const struct ib_flow_attr *flow_attr,
187 			   struct mlx5_flow_act *action, u32 prev_type)
188 {
189 	struct mlx5_flow_context *flow_context = &spec->flow_context;
190 	u32 *match_c = spec->match_criteria;
191 	u32 *match_v = spec->match_value;
192 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
193 					   misc_parameters);
194 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
195 					   misc_parameters);
196 	void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
197 					    misc_parameters_2);
198 	void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
199 					    misc_parameters_2);
200 	void *headers_c;
201 	void *headers_v;
202 	int match_ipv;
203 	int ret;
204 
205 	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
206 		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
207 					 inner_headers);
208 		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
209 					 inner_headers);
210 		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
211 					ft_field_support.inner_ip_version);
212 	} else {
213 		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
214 					 outer_headers);
215 		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
216 					 outer_headers);
217 		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
218 					ft_field_support.outer_ip_version);
219 	}
220 
221 	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
222 	case IB_FLOW_SPEC_ETH:
223 		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
224 			return -EOPNOTSUPP;
225 
226 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
227 					     dmac_47_16),
228 				ib_spec->eth.mask.dst_mac);
229 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
230 					     dmac_47_16),
231 				ib_spec->eth.val.dst_mac);
232 
233 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
234 					     smac_47_16),
235 				ib_spec->eth.mask.src_mac);
236 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
237 					     smac_47_16),
238 				ib_spec->eth.val.src_mac);
239 
240 		if (ib_spec->eth.mask.vlan_tag) {
241 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
242 				 cvlan_tag, 1);
243 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
244 				 cvlan_tag, 1);
245 
246 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
247 				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
248 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
249 				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
250 
251 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
252 				 first_cfi,
253 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
254 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
255 				 first_cfi,
256 				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
257 
258 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
259 				 first_prio,
260 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
261 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
262 				 first_prio,
263 				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
264 		}
265 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
266 			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
267 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
268 			 ethertype, ntohs(ib_spec->eth.val.ether_type));
269 		break;
270 	case IB_FLOW_SPEC_IPV4:
271 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
272 			return -EOPNOTSUPP;
273 
274 		if (match_ipv) {
275 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
276 				 ip_version, 0xf);
277 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
278 				 ip_version, MLX5_FS_IPV4_VERSION);
279 		} else {
280 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
281 				 ethertype, 0xffff);
282 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
283 				 ethertype, ETH_P_IP);
284 		}
285 
286 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
287 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
288 		       &ib_spec->ipv4.mask.src_ip,
289 		       sizeof(ib_spec->ipv4.mask.src_ip));
290 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
291 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
292 		       &ib_spec->ipv4.val.src_ip,
293 		       sizeof(ib_spec->ipv4.val.src_ip));
294 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
295 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
296 		       &ib_spec->ipv4.mask.dst_ip,
297 		       sizeof(ib_spec->ipv4.mask.dst_ip));
298 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
299 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
300 		       &ib_spec->ipv4.val.dst_ip,
301 		       sizeof(ib_spec->ipv4.val.dst_ip));
302 
303 		set_tos(headers_c, headers_v,
304 			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
305 
306 		if (set_proto(headers_c, headers_v,
307 			      ib_spec->ipv4.mask.proto,
308 			      ib_spec->ipv4.val.proto))
309 			return -EINVAL;
310 		break;
311 	case IB_FLOW_SPEC_IPV6:
312 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
313 			return -EOPNOTSUPP;
314 
315 		if (match_ipv) {
316 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
317 				 ip_version, 0xf);
318 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
319 				 ip_version, MLX5_FS_IPV6_VERSION);
320 		} else {
321 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
322 				 ethertype, 0xffff);
323 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
324 				 ethertype, ETH_P_IPV6);
325 		}
326 
327 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
328 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
329 		       &ib_spec->ipv6.mask.src_ip,
330 		       sizeof(ib_spec->ipv6.mask.src_ip));
331 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
332 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
333 		       &ib_spec->ipv6.val.src_ip,
334 		       sizeof(ib_spec->ipv6.val.src_ip));
335 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
336 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
337 		       &ib_spec->ipv6.mask.dst_ip,
338 		       sizeof(ib_spec->ipv6.mask.dst_ip));
339 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
340 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
341 		       &ib_spec->ipv6.val.dst_ip,
342 		       sizeof(ib_spec->ipv6.val.dst_ip));
343 
344 		set_tos(headers_c, headers_v,
345 			ib_spec->ipv6.mask.traffic_class,
346 			ib_spec->ipv6.val.traffic_class);
347 
348 		if (set_proto(headers_c, headers_v,
349 			      ib_spec->ipv6.mask.next_hdr,
350 			      ib_spec->ipv6.val.next_hdr))
351 			return -EINVAL;
352 
353 		set_flow_label(misc_params_c, misc_params_v,
354 			       ntohl(ib_spec->ipv6.mask.flow_label),
355 			       ntohl(ib_spec->ipv6.val.flow_label),
356 			       ib_spec->type & IB_FLOW_SPEC_INNER);
357 		break;
358 	case IB_FLOW_SPEC_ESP:
359 		return -EOPNOTSUPP;
360 	case IB_FLOW_SPEC_TCP:
361 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
362 					 LAST_TCP_UDP_FIELD))
363 			return -EOPNOTSUPP;
364 
365 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
366 			return -EINVAL;
367 
368 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
369 			 ntohs(ib_spec->tcp_udp.mask.src_port));
370 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
371 			 ntohs(ib_spec->tcp_udp.val.src_port));
372 
373 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
374 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
375 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
376 			 ntohs(ib_spec->tcp_udp.val.dst_port));
377 		break;
378 	case IB_FLOW_SPEC_UDP:
379 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
380 					 LAST_TCP_UDP_FIELD))
381 			return -EOPNOTSUPP;
382 
383 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
384 			return -EINVAL;
385 
386 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
387 			 ntohs(ib_spec->tcp_udp.mask.src_port));
388 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
389 			 ntohs(ib_spec->tcp_udp.val.src_port));
390 
391 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
392 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
393 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
394 			 ntohs(ib_spec->tcp_udp.val.dst_port));
395 		break;
396 	case IB_FLOW_SPEC_GRE:
397 		if (ib_spec->gre.mask.c_ks_res0_ver)
398 			return -EOPNOTSUPP;
399 
400 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
401 			return -EINVAL;
402 
403 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
404 			 0xff);
405 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
406 			 IPPROTO_GRE);
407 
408 		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
409 			 ntohs(ib_spec->gre.mask.protocol));
410 		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
411 			 ntohs(ib_spec->gre.val.protocol));
412 
413 		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
414 				    gre_key.nvgre.hi),
415 		       &ib_spec->gre.mask.key,
416 		       sizeof(ib_spec->gre.mask.key));
417 		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
418 				    gre_key.nvgre.hi),
419 		       &ib_spec->gre.val.key,
420 		       sizeof(ib_spec->gre.val.key));
421 		break;
422 	case IB_FLOW_SPEC_MPLS:
423 		switch (prev_type) {
424 		case IB_FLOW_SPEC_UDP:
425 			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
426 						   ft_field_support.outer_first_mpls_over_udp),
427 						   &ib_spec->mpls.mask.tag))
428 				return -EOPNOTSUPP;
429 
430 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
431 					    outer_first_mpls_over_udp),
432 			       &ib_spec->mpls.val.tag,
433 			       sizeof(ib_spec->mpls.val.tag));
434 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
435 					    outer_first_mpls_over_udp),
436 			       &ib_spec->mpls.mask.tag,
437 			       sizeof(ib_spec->mpls.mask.tag));
438 			break;
439 		case IB_FLOW_SPEC_GRE:
440 			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
441 						   ft_field_support.outer_first_mpls_over_gre),
442 						   &ib_spec->mpls.mask.tag))
443 				return -EOPNOTSUPP;
444 
445 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
446 					    outer_first_mpls_over_gre),
447 			       &ib_spec->mpls.val.tag,
448 			       sizeof(ib_spec->mpls.val.tag));
449 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
450 					    outer_first_mpls_over_gre),
451 			       &ib_spec->mpls.mask.tag,
452 			       sizeof(ib_spec->mpls.mask.tag));
453 			break;
454 		default:
455 			if (ib_spec->type & IB_FLOW_SPEC_INNER) {
456 				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
457 							   ft_field_support.inner_first_mpls),
458 							   &ib_spec->mpls.mask.tag))
459 					return -EOPNOTSUPP;
460 
461 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
462 						    inner_first_mpls),
463 				       &ib_spec->mpls.val.tag,
464 				       sizeof(ib_spec->mpls.val.tag));
465 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
466 						    inner_first_mpls),
467 				       &ib_spec->mpls.mask.tag,
468 				       sizeof(ib_spec->mpls.mask.tag));
469 			} else {
470 				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
471 							   ft_field_support.outer_first_mpls),
472 							   &ib_spec->mpls.mask.tag))
473 					return -EOPNOTSUPP;
474 
475 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
476 						    outer_first_mpls),
477 				       &ib_spec->mpls.val.tag,
478 				       sizeof(ib_spec->mpls.val.tag));
479 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
480 						    outer_first_mpls),
481 				       &ib_spec->mpls.mask.tag,
482 				       sizeof(ib_spec->mpls.mask.tag));
483 			}
484 		}
485 		break;
486 	case IB_FLOW_SPEC_VXLAN_TUNNEL:
487 		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
488 					 LAST_TUNNEL_FIELD))
489 			return -EOPNOTSUPP;
490 
491 		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
492 			 ntohl(ib_spec->tunnel.mask.tunnel_id));
493 		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
494 			 ntohl(ib_spec->tunnel.val.tunnel_id));
495 		break;
496 	case IB_FLOW_SPEC_ACTION_TAG:
497 		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
498 					 LAST_FLOW_TAG_FIELD))
499 			return -EOPNOTSUPP;
500 		if (ib_spec->flow_tag.tag_id >= BIT(24))
501 			return -EINVAL;
502 
503 		flow_context->flow_tag = ib_spec->flow_tag.tag_id;
504 		flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
505 		break;
506 	case IB_FLOW_SPEC_ACTION_DROP:
507 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
508 					 LAST_DROP_FIELD))
509 			return -EOPNOTSUPP;
510 		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
511 		break;
512 	case IB_FLOW_SPEC_ACTION_HANDLE:
513 		ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
514 			flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
515 		if (ret)
516 			return ret;
517 		break;
518 	case IB_FLOW_SPEC_ACTION_COUNT:
519 		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
520 					 LAST_COUNTERS_FIELD))
521 			return -EOPNOTSUPP;
522 
523 		/* for now support only one counters spec per flow */
524 		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
525 			return -EINVAL;
526 
527 		action->counters = ib_spec->flow_count.counters;
528 		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
529 		break;
530 	default:
531 		return -EINVAL;
532 	}
533 
534 	return 0;
535 }
536 
537 /* If a flow could catch both multicast and unicast packets,
538  * it won't fall into the multicast flow steering table and this rule
539  * could steal other multicast packets.
540  */
541 static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
542 {
543 	union ib_flow_spec *flow_spec;
544 
545 	if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
546 	    ib_attr->num_of_specs < 1)
547 		return false;
548 
549 	flow_spec = (union ib_flow_spec *)(ib_attr + 1);
550 	if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
551 		struct ib_flow_spec_ipv4 *ipv4_spec;
552 
553 		ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
554 		if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
555 			return true;
556 
557 		return false;
558 	}
559 
560 	if (flow_spec->type == IB_FLOW_SPEC_ETH) {
561 		struct ib_flow_spec_eth *eth_spec;
562 
563 		eth_spec = (struct ib_flow_spec_eth *)flow_spec;
564 		return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
565 		       is_multicast_ether_addr(eth_spec->val.dst_mac);
566 	}
567 
568 	return false;
569 }
570 
571 static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
572 			       const struct ib_flow_attr *flow_attr,
573 			       bool check_inner)
574 {
575 	union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
576 	int match_ipv = check_inner ?
577 			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
578 					ft_field_support.inner_ip_version) :
579 			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
580 					ft_field_support.outer_ip_version);
581 	int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
582 	bool ipv4_spec_valid, ipv6_spec_valid;
583 	unsigned int ip_spec_type = 0;
584 	bool has_ethertype = false;
585 	unsigned int spec_index;
586 	bool mask_valid = true;
587 	u16 eth_type = 0;
588 	bool type_valid;
589 
590 	/* Validate that ethertype is correct */
591 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
592 		if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
593 		    ib_spec->eth.mask.ether_type) {
594 			mask_valid = (ib_spec->eth.mask.ether_type ==
595 				      htons(0xffff));
596 			has_ethertype = true;
597 			eth_type = ntohs(ib_spec->eth.val.ether_type);
598 		} else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
599 			   (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
600 			ip_spec_type = ib_spec->type;
601 		}
602 		ib_spec = (void *)ib_spec + ib_spec->size;
603 	}
604 
605 	type_valid = (!has_ethertype) || (!ip_spec_type);
606 	if (!type_valid && mask_valid) {
607 		ipv4_spec_valid = (eth_type == ETH_P_IP) &&
608 			(ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
609 		ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
610 			(ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
611 
612 		type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
613 			     (((eth_type == ETH_P_MPLS_UC) ||
614 			       (eth_type == ETH_P_MPLS_MC)) && match_ipv);
615 	}
616 
617 	return type_valid;
618 }
619 
620 static bool is_valid_attr(struct mlx5_core_dev *mdev,
621 			  const struct ib_flow_attr *flow_attr)
622 {
623 	return is_valid_ethertype(mdev, flow_attr, false) &&
624 	       is_valid_ethertype(mdev, flow_attr, true);
625 }
626 
627 static void put_flow_table(struct mlx5_ib_dev *dev,
628 			   struct mlx5_ib_flow_prio *prio, bool ft_added)
629 {
630 	prio->refcount -= !!ft_added;
631 	if (!prio->refcount) {
632 		mlx5_destroy_flow_table(prio->flow_table);
633 		prio->flow_table = NULL;
634 	}
635 }
636 
637 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
638 {
639 	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
640 							  struct mlx5_ib_flow_handler,
641 							  ibflow);
642 	struct mlx5_ib_flow_handler *iter, *tmp;
643 	struct mlx5_ib_dev *dev = handler->dev;
644 
645 	mutex_lock(&dev->flow_db->lock);
646 
647 	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
648 		mlx5_del_flow_rules(iter->rule);
649 		put_flow_table(dev, iter->prio, true);
650 		list_del(&iter->list);
651 		kfree(iter);
652 	}
653 
654 	mlx5_del_flow_rules(handler->rule);
655 	put_flow_table(dev, handler->prio, true);
656 	mlx5_ib_counters_clear_description(handler->ibcounters);
657 	mutex_unlock(&dev->flow_db->lock);
658 	if (handler->flow_matcher)
659 		atomic_dec(&handler->flow_matcher->usecnt);
660 	kfree(handler);
661 
662 	return 0;
663 }
664 
665 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
666 {
667 	priority *= 2;
668 	if (!dont_trap)
669 		priority++;
670 	return priority;
671 }
672 
673 enum flow_table_type {
674 	MLX5_IB_FT_RX,
675 	MLX5_IB_FT_TX
676 };
677 
678 #define MLX5_FS_MAX_TYPES	 6
679 #define MLX5_FS_MAX_ENTRIES	 BIT(16)
680 
681 static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
682 {
683 	struct mlx5_ib_dev *dev = to_mdev(device);
684 
685 	return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
686 }
687 
688 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
689 					   struct mlx5_flow_namespace *ns,
690 					   struct mlx5_ib_flow_prio *prio,
691 					   int priority,
692 					   int num_entries, int num_groups,
693 					   u32 flags)
694 {
695 	struct mlx5_flow_table_attr ft_attr = {};
696 	struct mlx5_flow_table *ft;
697 
698 	ft_attr.prio = priority;
699 	ft_attr.max_fte = num_entries;
700 	ft_attr.flags = flags;
701 	ft_attr.autogroup.max_num_groups = num_groups;
702 	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
703 	if (IS_ERR(ft))
704 		return ERR_CAST(ft);
705 
706 	prio->flow_table = ft;
707 	prio->refcount = 0;
708 	return prio;
709 }
710 
711 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
712 						struct ib_flow_attr *flow_attr,
713 						enum flow_table_type ft_type)
714 {
715 	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
716 	struct mlx5_flow_namespace *ns = NULL;
717 	enum mlx5_flow_namespace_type fn_type;
718 	struct mlx5_ib_flow_prio *prio;
719 	struct mlx5_flow_table *ft;
720 	int max_table_size;
721 	int num_entries;
722 	int num_groups;
723 	bool esw_encap;
724 	u32 flags = 0;
725 	int priority;
726 
727 	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
728 						       log_max_ft_size));
729 	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
730 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
731 	switch (flow_attr->type) {
732 	case IB_FLOW_ATTR_NORMAL:
733 		if (flow_is_multicast_only(flow_attr) && !dont_trap)
734 			priority = MLX5_IB_FLOW_MCAST_PRIO;
735 		else
736 			priority = ib_prio_to_core_prio(flow_attr->priority,
737 							dont_trap);
738 		if (ft_type == MLX5_IB_FT_RX) {
739 			fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
740 			prio = &dev->flow_db->prios[priority];
741 			if (!dev->is_rep && !esw_encap &&
742 			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
743 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
744 			if (!dev->is_rep && !esw_encap &&
745 			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
746 						      reformat_l3_tunnel_to_l2))
747 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
748 		} else {
749 			max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
750 				dev->mdev, log_max_ft_size));
751 			fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
752 			prio = &dev->flow_db->egress_prios[priority];
753 			if (!dev->is_rep && !esw_encap &&
754 			    MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
755 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
756 		}
757 		ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
758 		num_entries = MLX5_FS_MAX_ENTRIES;
759 		num_groups = MLX5_FS_MAX_TYPES;
760 		break;
761 	case IB_FLOW_ATTR_ALL_DEFAULT:
762 	case IB_FLOW_ATTR_MC_DEFAULT:
763 		ns = mlx5_get_flow_namespace(dev->mdev,
764 					     MLX5_FLOW_NAMESPACE_LEFTOVERS);
765 		build_leftovers_ft_param(&priority, &num_entries, &num_groups);
766 		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
767 		break;
768 	case IB_FLOW_ATTR_SNIFFER:
769 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
770 					allow_sniffer_and_nic_rx_shared_tir))
771 			return ERR_PTR(-EOPNOTSUPP);
772 
773 		ns = mlx5_get_flow_namespace(
774 			dev->mdev, ft_type == MLX5_IB_FT_RX ?
775 					   MLX5_FLOW_NAMESPACE_SNIFFER_RX :
776 					   MLX5_FLOW_NAMESPACE_SNIFFER_TX);
777 
778 		prio = &dev->flow_db->sniffer[ft_type];
779 		priority = 0;
780 		num_entries = 1;
781 		num_groups = 1;
782 		break;
783 	default:
784 		break;
785 	}
786 
787 	if (!ns)
788 		return ERR_PTR(-EOPNOTSUPP);
789 
790 	max_table_size = min_t(int, num_entries, max_table_size);
791 
792 	ft = prio->flow_table;
793 	if (!ft)
794 		return _get_prio(dev, ns, prio, priority, max_table_size,
795 				 num_groups, flags);
796 
797 	return prio;
798 }
799 
800 enum {
801 	RDMA_RX_ECN_OPCOUNTER_PRIO,
802 	RDMA_RX_CNP_OPCOUNTER_PRIO,
803 };
804 
805 enum {
806 	RDMA_TX_CNP_OPCOUNTER_PRIO,
807 };
808 
809 static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
810 			      struct mlx5_flow_spec *spec)
811 {
812 	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
813 					ft_field_support.source_vhca_port) ||
814 	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
815 					ft_field_support.source_vhca_port))
816 		return -EOPNOTSUPP;
817 
818 	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
819 			 misc_parameters.source_vhca_port);
820 	MLX5_SET(fte_match_param, &spec->match_value,
821 		 misc_parameters.source_vhca_port, port_num);
822 
823 	return 0;
824 }
825 
826 static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
827 			   struct mlx5_flow_spec *spec, int ipv)
828 {
829 	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
830 					ft_field_support.outer_ip_version))
831 		return -EOPNOTSUPP;
832 
833 	if (mlx5_core_mp_enabled(dev->mdev) &&
834 	    set_vhca_port_spec(dev, port_num, spec))
835 		return -EOPNOTSUPP;
836 
837 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
838 			 outer_headers.ip_ecn);
839 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
840 		 INET_ECN_CE);
841 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
842 			 outer_headers.ip_version);
843 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
844 		 ipv);
845 
846 	spec->match_criteria_enable =
847 		get_match_criteria_enable(spec->match_criteria);
848 
849 	return 0;
850 }
851 
852 static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
853 			struct mlx5_flow_spec *spec)
854 {
855 	if (mlx5_core_mp_enabled(dev->mdev) &&
856 	    set_vhca_port_spec(dev, port_num, spec))
857 		return -EOPNOTSUPP;
858 
859 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
860 			 misc_parameters.bth_opcode);
861 	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
862 		 IB_BTH_OPCODE_CNP);
863 
864 	spec->match_criteria_enable =
865 		get_match_criteria_enable(spec->match_criteria);
866 
867 	return 0;
868 }
869 
870 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
871 			 struct mlx5_ib_op_fc *opfc,
872 			 enum mlx5_ib_optional_counter_type type)
873 {
874 	enum mlx5_flow_namespace_type fn_type;
875 	int priority, i, err, spec_num;
876 	struct mlx5_flow_act flow_act = {};
877 	struct mlx5_flow_destination dst;
878 	struct mlx5_flow_namespace *ns;
879 	struct mlx5_ib_flow_prio *prio;
880 	struct mlx5_flow_spec *spec;
881 
882 	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
883 	if (!spec)
884 		return -ENOMEM;
885 
886 	switch (type) {
887 	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
888 		if (set_ecn_ce_spec(dev, port_num, &spec[0],
889 				    MLX5_FS_IPV4_VERSION) ||
890 		    set_ecn_ce_spec(dev, port_num, &spec[1],
891 				    MLX5_FS_IPV6_VERSION)) {
892 			err = -EOPNOTSUPP;
893 			goto free;
894 		}
895 		spec_num = 2;
896 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
897 		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
898 		break;
899 
900 	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
901 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
902 					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
903 		    set_cnp_spec(dev, port_num, &spec[0])) {
904 			err = -EOPNOTSUPP;
905 			goto free;
906 		}
907 		spec_num = 1;
908 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
909 		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
910 		break;
911 
912 	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
913 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
914 					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
915 		    set_cnp_spec(dev, port_num, &spec[0])) {
916 			err = -EOPNOTSUPP;
917 			goto free;
918 		}
919 		spec_num = 1;
920 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
921 		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
922 		break;
923 
924 	default:
925 		err = -EOPNOTSUPP;
926 		goto free;
927 	}
928 
929 	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
930 	if (!ns) {
931 		err = -EOPNOTSUPP;
932 		goto free;
933 	}
934 
935 	prio = &dev->flow_db->opfcs[type];
936 	if (!prio->flow_table) {
937 		prio = _get_prio(dev, ns, prio, priority,
938 				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
939 		if (IS_ERR(prio)) {
940 			err = PTR_ERR(prio);
941 			goto free;
942 		}
943 	}
944 
945 	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
946 	dst.counter = opfc->fc;
947 
948 	flow_act.action =
949 		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
950 
951 	for (i = 0; i < spec_num; i++) {
952 		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
953 						    &flow_act, &dst, 1);
954 		if (IS_ERR(opfc->rule[i])) {
955 			err = PTR_ERR(opfc->rule[i]);
956 			goto del_rules;
957 		}
958 	}
959 	prio->refcount += spec_num;
960 	kfree(spec);
961 
962 	return 0;
963 
964 del_rules:
965 	for (i -= 1; i >= 0; i--)
966 		mlx5_del_flow_rules(opfc->rule[i]);
967 	put_flow_table(dev, prio, false);
968 free:
969 	kfree(spec);
970 	return err;
971 }
972 
973 void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
974 			     struct mlx5_ib_op_fc *opfc,
975 			     enum mlx5_ib_optional_counter_type type)
976 {
977 	int i;
978 
979 	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
980 		mlx5_del_flow_rules(opfc->rule[i]);
981 		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
982 	}
983 }
984 
985 static void set_underlay_qp(struct mlx5_ib_dev *dev,
986 			    struct mlx5_flow_spec *spec,
987 			    u32 underlay_qpn)
988 {
989 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
990 					   spec->match_criteria,
991 					   misc_parameters);
992 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
993 					   misc_parameters);
994 
995 	if (underlay_qpn &&
996 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
997 				      ft_field_support.bth_dst_qp)) {
998 		MLX5_SET(fte_match_set_misc,
999 			 misc_params_v, bth_dst_qp, underlay_qpn);
1000 		MLX5_SET(fte_match_set_misc,
1001 			 misc_params_c, bth_dst_qp, 0xffffff);
1002 	}
1003 }
1004 
1005 static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1006 					 struct mlx5_flow_spec *spec,
1007 					 struct mlx5_eswitch_rep *rep)
1008 {
1009 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1010 	void *misc;
1011 
1012 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1013 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1014 				    misc_parameters_2);
1015 
1016 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1017 			 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1018 								   rep->vport));
1019 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1020 				    misc_parameters_2);
1021 
1022 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1023 			 mlx5_eswitch_get_vport_metadata_mask());
1024 	} else {
1025 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1026 				    misc_parameters);
1027 
1028 		MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1029 
1030 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1031 				    misc_parameters);
1032 
1033 		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1034 	}
1035 }
1036 
1037 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1038 						      struct mlx5_ib_flow_prio *ft_prio,
1039 						      const struct ib_flow_attr *flow_attr,
1040 						      struct mlx5_flow_destination *dst,
1041 						      u32 underlay_qpn,
1042 						      struct mlx5_ib_create_flow *ucmd)
1043 {
1044 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
1045 	struct mlx5_ib_flow_handler *handler;
1046 	struct mlx5_flow_act flow_act = {};
1047 	struct mlx5_flow_spec *spec;
1048 	struct mlx5_flow_destination dest_arr[2] = {};
1049 	struct mlx5_flow_destination *rule_dst = dest_arr;
1050 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1051 	unsigned int spec_index;
1052 	u32 prev_type = 0;
1053 	int err = 0;
1054 	int dest_num = 0;
1055 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1056 
1057 	if (!is_valid_attr(dev->mdev, flow_attr))
1058 		return ERR_PTR(-EINVAL);
1059 
1060 	if (dev->is_rep && is_egress)
1061 		return ERR_PTR(-EINVAL);
1062 
1063 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1064 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1065 	if (!handler || !spec) {
1066 		err = -ENOMEM;
1067 		goto free;
1068 	}
1069 
1070 	INIT_LIST_HEAD(&handler->list);
1071 
1072 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1073 		err = parse_flow_attr(dev->mdev, spec,
1074 				      ib_flow, flow_attr, &flow_act,
1075 				      prev_type);
1076 		if (err < 0)
1077 			goto free;
1078 
1079 		prev_type = ((union ib_flow_spec *)ib_flow)->type;
1080 		ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1081 	}
1082 
1083 	if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1084 		memcpy(&dest_arr[0], dst, sizeof(*dst));
1085 		dest_num++;
1086 	}
1087 
1088 	if (!flow_is_multicast_only(flow_attr))
1089 		set_underlay_qp(dev, spec, underlay_qpn);
1090 
1091 	if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1092 		struct mlx5_eswitch_rep *rep;
1093 
1094 		rep = dev->port[flow_attr->port - 1].rep;
1095 		if (!rep) {
1096 			err = -EINVAL;
1097 			goto free;
1098 		}
1099 
1100 		mlx5_ib_set_rule_source_port(dev, spec, rep);
1101 	}
1102 
1103 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1104 
1105 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1106 		struct mlx5_ib_mcounters *mcounters;
1107 
1108 		err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1109 		if (err)
1110 			goto free;
1111 
1112 		mcounters = to_mcounters(flow_act.counters);
1113 		handler->ibcounters = flow_act.counters;
1114 		dest_arr[dest_num].type =
1115 			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1116 		dest_arr[dest_num].counter =
1117 			mcounters->hw_cntrs_hndl;
1118 		dest_num++;
1119 	}
1120 
1121 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1122 		if (!dest_num)
1123 			rule_dst = NULL;
1124 	} else {
1125 		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1126 			flow_act.action |=
1127 				MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1128 		if (is_egress)
1129 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1130 		else if (dest_num)
1131 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1132 	}
1133 
1134 	if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
1135 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1136 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1137 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1138 			     spec->flow_context.flow_tag, flow_attr->type);
1139 		err = -EINVAL;
1140 		goto free;
1141 	}
1142 	handler->rule = mlx5_add_flow_rules(ft, spec,
1143 					    &flow_act,
1144 					    rule_dst, dest_num);
1145 
1146 	if (IS_ERR(handler->rule)) {
1147 		err = PTR_ERR(handler->rule);
1148 		goto free;
1149 	}
1150 
1151 	ft_prio->refcount++;
1152 	handler->prio = ft_prio;
1153 	handler->dev = dev;
1154 
1155 	ft_prio->flow_table = ft;
1156 free:
1157 	if (err && handler) {
1158 		mlx5_ib_counters_clear_description(handler->ibcounters);
1159 		kfree(handler);
1160 	}
1161 	kvfree(spec);
1162 	return err ? ERR_PTR(err) : handler;
1163 }
1164 
1165 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1166 						     struct mlx5_ib_flow_prio *ft_prio,
1167 						     const struct ib_flow_attr *flow_attr,
1168 						     struct mlx5_flow_destination *dst)
1169 {
1170 	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1171 }
1172 
1173 enum {
1174 	LEFTOVERS_MC,
1175 	LEFTOVERS_UC,
1176 };
1177 
1178 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1179 							  struct mlx5_ib_flow_prio *ft_prio,
1180 							  struct ib_flow_attr *flow_attr,
1181 							  struct mlx5_flow_destination *dst)
1182 {
1183 	struct mlx5_ib_flow_handler *handler_ucast = NULL;
1184 	struct mlx5_ib_flow_handler *handler = NULL;
1185 
1186 	static struct {
1187 		struct ib_flow_attr	flow_attr;
1188 		struct ib_flow_spec_eth eth_flow;
1189 	} leftovers_specs[] = {
1190 		[LEFTOVERS_MC] = {
1191 			.flow_attr = {
1192 				.num_of_specs = 1,
1193 				.size = sizeof(leftovers_specs[0])
1194 			},
1195 			.eth_flow = {
1196 				.type = IB_FLOW_SPEC_ETH,
1197 				.size = sizeof(struct ib_flow_spec_eth),
1198 				.mask = {.dst_mac = {0x1} },
1199 				.val =  {.dst_mac = {0x1} }
1200 			}
1201 		},
1202 		[LEFTOVERS_UC] = {
1203 			.flow_attr = {
1204 				.num_of_specs = 1,
1205 				.size = sizeof(leftovers_specs[0])
1206 			},
1207 			.eth_flow = {
1208 				.type = IB_FLOW_SPEC_ETH,
1209 				.size = sizeof(struct ib_flow_spec_eth),
1210 				.mask = {.dst_mac = {0x1} },
1211 				.val = {.dst_mac = {} }
1212 			}
1213 		}
1214 	};
1215 
1216 	handler = create_flow_rule(dev, ft_prio,
1217 				   &leftovers_specs[LEFTOVERS_MC].flow_attr,
1218 				   dst);
1219 	if (!IS_ERR(handler) &&
1220 	    flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1221 		handler_ucast = create_flow_rule(dev, ft_prio,
1222 						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
1223 						 dst);
1224 		if (IS_ERR(handler_ucast)) {
1225 			mlx5_del_flow_rules(handler->rule);
1226 			ft_prio->refcount--;
1227 			kfree(handler);
1228 			handler = handler_ucast;
1229 		} else {
1230 			list_add(&handler_ucast->list, &handler->list);
1231 		}
1232 	}
1233 
1234 	return handler;
1235 }
1236 
1237 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1238 							struct mlx5_ib_flow_prio *ft_rx,
1239 							struct mlx5_ib_flow_prio *ft_tx,
1240 							struct mlx5_flow_destination *dst)
1241 {
1242 	struct mlx5_ib_flow_handler *handler_rx;
1243 	struct mlx5_ib_flow_handler *handler_tx;
1244 	int err;
1245 	static const struct ib_flow_attr flow_attr  = {
1246 		.num_of_specs = 0,
1247 		.type = IB_FLOW_ATTR_SNIFFER,
1248 		.size = sizeof(flow_attr)
1249 	};
1250 
1251 	handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1252 	if (IS_ERR(handler_rx)) {
1253 		err = PTR_ERR(handler_rx);
1254 		goto err;
1255 	}
1256 
1257 	handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1258 	if (IS_ERR(handler_tx)) {
1259 		err = PTR_ERR(handler_tx);
1260 		goto err_tx;
1261 	}
1262 
1263 	list_add(&handler_tx->list, &handler_rx->list);
1264 
1265 	return handler_rx;
1266 
1267 err_tx:
1268 	mlx5_del_flow_rules(handler_rx->rule);
1269 	ft_rx->refcount--;
1270 	kfree(handler_rx);
1271 err:
1272 	return ERR_PTR(err);
1273 }
1274 
1275 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1276 					   struct ib_flow_attr *flow_attr,
1277 					   struct ib_udata *udata)
1278 {
1279 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
1280 	struct mlx5_ib_qp *mqp = to_mqp(qp);
1281 	struct mlx5_ib_flow_handler *handler = NULL;
1282 	struct mlx5_flow_destination *dst = NULL;
1283 	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1284 	struct mlx5_ib_flow_prio *ft_prio;
1285 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1286 	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1287 	size_t min_ucmd_sz, required_ucmd_sz;
1288 	int err;
1289 	int underlay_qpn;
1290 
1291 	if (udata && udata->inlen) {
1292 		min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1293 		if (udata->inlen < min_ucmd_sz)
1294 			return ERR_PTR(-EOPNOTSUPP);
1295 
1296 		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1297 		if (err)
1298 			return ERR_PTR(err);
1299 
1300 		/* currently supports only one counters data */
1301 		if (ucmd_hdr.ncounters_data > 1)
1302 			return ERR_PTR(-EINVAL);
1303 
1304 		required_ucmd_sz = min_ucmd_sz +
1305 			sizeof(struct mlx5_ib_flow_counters_data) *
1306 			ucmd_hdr.ncounters_data;
1307 		if (udata->inlen > required_ucmd_sz &&
1308 		    !ib_is_udata_cleared(udata, required_ucmd_sz,
1309 					 udata->inlen - required_ucmd_sz))
1310 			return ERR_PTR(-EOPNOTSUPP);
1311 
1312 		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1313 		if (!ucmd)
1314 			return ERR_PTR(-ENOMEM);
1315 
1316 		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1317 		if (err)
1318 			goto free_ucmd;
1319 	}
1320 
1321 	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1322 		err = -ENOMEM;
1323 		goto free_ucmd;
1324 	}
1325 
1326 	if (flow_attr->flags &
1327 	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1328 		err = -EINVAL;
1329 		goto free_ucmd;
1330 	}
1331 
1332 	if (is_egress &&
1333 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1334 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1335 		err = -EINVAL;
1336 		goto free_ucmd;
1337 	}
1338 
1339 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1340 	if (!dst) {
1341 		err = -ENOMEM;
1342 		goto free_ucmd;
1343 	}
1344 
1345 	mutex_lock(&dev->flow_db->lock);
1346 
1347 	ft_prio = get_flow_table(dev, flow_attr,
1348 				 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1349 	if (IS_ERR(ft_prio)) {
1350 		err = PTR_ERR(ft_prio);
1351 		goto unlock;
1352 	}
1353 	if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1354 		ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1355 		if (IS_ERR(ft_prio_tx)) {
1356 			err = PTR_ERR(ft_prio_tx);
1357 			ft_prio_tx = NULL;
1358 			goto destroy_ft;
1359 		}
1360 	}
1361 
1362 	if (is_egress) {
1363 		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1364 	} else {
1365 		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1366 		if (mqp->is_rss)
1367 			dst->tir_num = mqp->rss_qp.tirn;
1368 		else
1369 			dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1370 	}
1371 
1372 	switch (flow_attr->type) {
1373 	case IB_FLOW_ATTR_NORMAL:
1374 		underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1375 				       mqp->underlay_qpn :
1376 				       0;
1377 		handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1378 					    underlay_qpn, ucmd);
1379 		break;
1380 	case IB_FLOW_ATTR_ALL_DEFAULT:
1381 	case IB_FLOW_ATTR_MC_DEFAULT:
1382 		handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1383 		break;
1384 	case IB_FLOW_ATTR_SNIFFER:
1385 		handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1386 		break;
1387 	default:
1388 		err = -EINVAL;
1389 		goto destroy_ft;
1390 	}
1391 
1392 	if (IS_ERR(handler)) {
1393 		err = PTR_ERR(handler);
1394 		handler = NULL;
1395 		goto destroy_ft;
1396 	}
1397 
1398 	mutex_unlock(&dev->flow_db->lock);
1399 	kfree(dst);
1400 	kfree(ucmd);
1401 
1402 	return &handler->ibflow;
1403 
1404 destroy_ft:
1405 	put_flow_table(dev, ft_prio, false);
1406 	if (ft_prio_tx)
1407 		put_flow_table(dev, ft_prio_tx, false);
1408 unlock:
1409 	mutex_unlock(&dev->flow_db->lock);
1410 	kfree(dst);
1411 free_ucmd:
1412 	kfree(ucmd);
1413 	return ERR_PTR(err);
1414 }
1415 
1416 static struct mlx5_ib_flow_prio *
1417 _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
1418 		enum mlx5_flow_namespace_type ns_type,
1419 		bool mcast)
1420 {
1421 	struct mlx5_flow_namespace *ns = NULL;
1422 	struct mlx5_ib_flow_prio *prio = NULL;
1423 	int max_table_size = 0;
1424 	bool esw_encap;
1425 	u32 flags = 0;
1426 	int priority;
1427 
1428 	if (mcast)
1429 		priority = MLX5_IB_FLOW_MCAST_PRIO;
1430 	else
1431 		priority = ib_prio_to_core_prio(user_priority, false);
1432 
1433 	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1434 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1435 	switch (ns_type) {
1436 	case MLX5_FLOW_NAMESPACE_BYPASS:
1437 		max_table_size = BIT(
1438 			MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1439 		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1440 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1441 		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1442 					      reformat_l3_tunnel_to_l2) &&
1443 		    !esw_encap)
1444 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1445 		break;
1446 	case MLX5_FLOW_NAMESPACE_EGRESS:
1447 		max_table_size = BIT(
1448 			MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1449 		if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1450 		    !esw_encap)
1451 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1452 		break;
1453 	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1454 		max_table_size = BIT(
1455 			MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1456 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1457 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1458 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1459 					       reformat_l3_tunnel_to_l2) &&
1460 		    esw_encap)
1461 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1462 		priority = user_priority;
1463 		break;
1464 	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1465 		max_table_size = BIT(
1466 			MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1467 		priority = user_priority;
1468 		break;
1469 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1470 		max_table_size = BIT(
1471 			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1472 		priority = user_priority;
1473 		break;
1474 	default:
1475 		break;
1476 	}
1477 
1478 	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
1479 
1480 	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
1481 	if (!ns)
1482 		return ERR_PTR(-EOPNOTSUPP);
1483 
1484 	switch (ns_type) {
1485 	case MLX5_FLOW_NAMESPACE_BYPASS:
1486 		prio = &dev->flow_db->prios[priority];
1487 		break;
1488 	case MLX5_FLOW_NAMESPACE_EGRESS:
1489 		prio = &dev->flow_db->egress_prios[priority];
1490 		break;
1491 	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1492 		prio = &dev->flow_db->fdb[priority];
1493 		break;
1494 	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1495 		prio = &dev->flow_db->rdma_rx[priority];
1496 		break;
1497 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1498 		prio = &dev->flow_db->rdma_tx[priority];
1499 		break;
1500 	default: return ERR_PTR(-EINVAL);
1501 	}
1502 
1503 	if (!prio)
1504 		return ERR_PTR(-EINVAL);
1505 
1506 	if (prio->flow_table)
1507 		return prio;
1508 
1509 	return _get_prio(dev, ns, prio, priority, max_table_size,
1510 			 MLX5_FS_MAX_TYPES, flags);
1511 }
1512 
1513 static struct mlx5_ib_flow_handler *
1514 _create_raw_flow_rule(struct mlx5_ib_dev *dev,
1515 		      struct mlx5_ib_flow_prio *ft_prio,
1516 		      struct mlx5_flow_destination *dst,
1517 		      struct mlx5_ib_flow_matcher  *fs_matcher,
1518 		      struct mlx5_flow_context *flow_context,
1519 		      struct mlx5_flow_act *flow_act,
1520 		      void *cmd_in, int inlen,
1521 		      int dst_num)
1522 {
1523 	struct mlx5_ib_flow_handler *handler;
1524 	struct mlx5_flow_spec *spec;
1525 	struct mlx5_flow_table *ft = ft_prio->flow_table;
1526 	int err = 0;
1527 
1528 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1529 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1530 	if (!handler || !spec) {
1531 		err = -ENOMEM;
1532 		goto free;
1533 	}
1534 
1535 	INIT_LIST_HEAD(&handler->list);
1536 
1537 	memcpy(spec->match_value, cmd_in, inlen);
1538 	memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
1539 	       fs_matcher->mask_len);
1540 	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
1541 	spec->flow_context = *flow_context;
1542 
1543 	handler->rule = mlx5_add_flow_rules(ft, spec,
1544 					    flow_act, dst, dst_num);
1545 
1546 	if (IS_ERR(handler->rule)) {
1547 		err = PTR_ERR(handler->rule);
1548 		goto free;
1549 	}
1550 
1551 	ft_prio->refcount++;
1552 	handler->prio = ft_prio;
1553 	handler->dev = dev;
1554 	ft_prio->flow_table = ft;
1555 
1556 free:
1557 	if (err)
1558 		kfree(handler);
1559 	kvfree(spec);
1560 	return err ? ERR_PTR(err) : handler;
1561 }
1562 
1563 static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
1564 				void *match_v)
1565 {
1566 	void *match_c;
1567 	void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
1568 	void *dmac, *dmac_mask;
1569 	void *ipv4, *ipv4_mask;
1570 
1571 	if (!(fs_matcher->match_criteria_enable &
1572 	      (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
1573 		return false;
1574 
1575 	match_c = fs_matcher->matcher_mask.match_params;
1576 	match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
1577 					   outer_headers);
1578 	match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
1579 					   outer_headers);
1580 
1581 	dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1582 			    dmac_47_16);
1583 	dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1584 				 dmac_47_16);
1585 
1586 	if (is_multicast_ether_addr(dmac) &&
1587 	    is_multicast_ether_addr(dmac_mask))
1588 		return true;
1589 
1590 	ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1591 			    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1592 
1593 	ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1594 				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1595 
1596 	if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
1597 	    ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
1598 		return true;
1599 
1600 	return false;
1601 }
1602 
1603 static struct mlx5_ib_flow_handler *raw_fs_rule_add(
1604 	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1605 	struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
1606 	struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type)
1607 {
1608 	struct mlx5_flow_destination *dst;
1609 	struct mlx5_ib_flow_prio *ft_prio;
1610 	struct mlx5_ib_flow_handler *handler;
1611 	int dst_num = 0;
1612 	bool mcast;
1613 	int err;
1614 
1615 	if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
1616 		return ERR_PTR(-EOPNOTSUPP);
1617 
1618 	if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
1619 		return ERR_PTR(-ENOMEM);
1620 
1621 	dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
1622 	if (!dst)
1623 		return ERR_PTR(-ENOMEM);
1624 
1625 	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
1626 	mutex_lock(&dev->flow_db->lock);
1627 
1628 	ft_prio = _get_flow_table(dev, fs_matcher->priority,
1629 				  fs_matcher->ns_type, mcast);
1630 	if (IS_ERR(ft_prio)) {
1631 		err = PTR_ERR(ft_prio);
1632 		goto unlock;
1633 	}
1634 
1635 	switch (dest_type) {
1636 	case MLX5_FLOW_DESTINATION_TYPE_TIR:
1637 		dst[dst_num].type = dest_type;
1638 		dst[dst_num++].tir_num = dest_id;
1639 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1640 		break;
1641 	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
1642 		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
1643 		dst[dst_num++].ft_num = dest_id;
1644 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1645 		break;
1646 	case MLX5_FLOW_DESTINATION_TYPE_PORT:
1647 		dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1648 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1649 		break;
1650 	default:
1651 		break;
1652 	}
1653 
1654 	if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1655 		if (WARN_ON(!counter)) {
1656 			err = -EINVAL;
1657 			goto unlock;
1658 		}
1659 		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1660 		dst[dst_num].counter = counter;
1661 		dst_num++;
1662 	}
1663 
1664 	handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
1665 					fs_matcher, flow_context, flow_act,
1666 					cmd_in, inlen, dst_num);
1667 
1668 	if (IS_ERR(handler)) {
1669 		err = PTR_ERR(handler);
1670 		goto destroy_ft;
1671 	}
1672 
1673 	mutex_unlock(&dev->flow_db->lock);
1674 	atomic_inc(&fs_matcher->usecnt);
1675 	handler->flow_matcher = fs_matcher;
1676 
1677 	kfree(dst);
1678 
1679 	return handler;
1680 
1681 destroy_ft:
1682 	put_flow_table(dev, ft_prio, false);
1683 unlock:
1684 	mutex_unlock(&dev->flow_db->lock);
1685 	kfree(dst);
1686 
1687 	return ERR_PTR(err);
1688 }
1689 
1690 static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1691 {
1692 	switch (maction->flow_action_raw.sub_type) {
1693 	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
1694 		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
1695 					   maction->flow_action_raw.modify_hdr);
1696 		break;
1697 	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
1698 		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
1699 					     maction->flow_action_raw.pkt_reformat);
1700 		break;
1701 	case MLX5_IB_FLOW_ACTION_DECAP:
1702 		break;
1703 	default:
1704 		break;
1705 	}
1706 }
1707 
1708 static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
1709 {
1710 	struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1711 
1712 	switch (action->type) {
1713 	case IB_FLOW_ACTION_UNSPECIFIED:
1714 		destroy_flow_action_raw(maction);
1715 		break;
1716 	default:
1717 		WARN_ON(true);
1718 		break;
1719 	}
1720 
1721 	kfree(maction);
1722 	return 0;
1723 }
1724 
1725 static int
1726 mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
1727 			     enum mlx5_flow_namespace_type *namespace)
1728 {
1729 	switch (table_type) {
1730 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
1731 		*namespace = MLX5_FLOW_NAMESPACE_BYPASS;
1732 		break;
1733 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
1734 		*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
1735 		break;
1736 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
1737 		*namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
1738 		break;
1739 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
1740 		*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
1741 		break;
1742 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
1743 		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
1744 		break;
1745 	default:
1746 		return -EINVAL;
1747 	}
1748 
1749 	return 0;
1750 }
1751 
1752 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
1753 	[MLX5_IB_FLOW_TYPE_NORMAL] = {
1754 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1755 		.u.ptr = {
1756 			.len = sizeof(u16), /* data is priority */
1757 			.min_len = sizeof(u16),
1758 		}
1759 	},
1760 	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
1761 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1762 		UVERBS_ATTR_NO_DATA(),
1763 	},
1764 	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
1765 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1766 		UVERBS_ATTR_NO_DATA(),
1767 	},
1768 	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
1769 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1770 		UVERBS_ATTR_NO_DATA(),
1771 	},
1772 };
1773 
1774 static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
1775 {
1776 	struct devx_obj *devx_obj = obj;
1777 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1778 
1779 	switch (opcode) {
1780 	case MLX5_CMD_OP_DESTROY_TIR:
1781 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1782 		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
1783 				    obj_id);
1784 		return true;
1785 
1786 	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
1787 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1788 		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
1789 				    table_id);
1790 		return true;
1791 	default:
1792 		return false;
1793 	}
1794 }
1795 
1796 static int get_dests(struct uverbs_attr_bundle *attrs,
1797 		     struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
1798 		     int *dest_type, struct ib_qp **qp, u32 *flags)
1799 {
1800 	bool dest_devx, dest_qp;
1801 	void *devx_obj;
1802 	int err;
1803 
1804 	dest_devx = uverbs_attr_is_valid(attrs,
1805 					 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1806 	dest_qp = uverbs_attr_is_valid(attrs,
1807 				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1808 
1809 	*flags = 0;
1810 	err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
1811 				 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
1812 					 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
1813 	if (err)
1814 		return err;
1815 
1816 	/* Both flags are not allowed */
1817 	if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
1818 	    *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1819 		return -EINVAL;
1820 
1821 	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
1822 		if (dest_devx && (dest_qp || *flags))
1823 			return -EINVAL;
1824 		else if (dest_qp && *flags)
1825 			return -EINVAL;
1826 	}
1827 
1828 	/* Allow only DEVX object, drop as dest for FDB */
1829 	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
1830 	    !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
1831 		return -EINVAL;
1832 
1833 	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
1834 	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1835 	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
1836 		return -EINVAL;
1837 
1838 	*qp = NULL;
1839 	if (dest_devx) {
1840 		devx_obj =
1841 			uverbs_attr_get_obj(attrs,
1842 					    MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1843 
1844 		/* Verify that the given DEVX object is a flow
1845 		 * steering destination.
1846 		 */
1847 		if (!is_flow_dest(devx_obj, dest_id, dest_type))
1848 			return -EINVAL;
1849 		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
1850 		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
1851 		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1852 		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1853 			return -EINVAL;
1854 	} else if (dest_qp) {
1855 		struct mlx5_ib_qp *mqp;
1856 
1857 		*qp = uverbs_attr_get_obj(attrs,
1858 					  MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1859 		if (IS_ERR(*qp))
1860 			return PTR_ERR(*qp);
1861 
1862 		if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
1863 			return -EINVAL;
1864 
1865 		mqp = to_mqp(*qp);
1866 		if (mqp->is_rss)
1867 			*dest_id = mqp->rss_qp.tirn;
1868 		else
1869 			*dest_id = mqp->raw_packet_qp.rq.tirn;
1870 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1871 	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1872 		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
1873 		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
1874 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1875 	}
1876 
1877 	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1878 	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1879 	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
1880 		return -EINVAL;
1881 
1882 	return 0;
1883 }
1884 
1885 static bool
1886 is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size)
1887 {
1888 	struct devx_obj *devx_obj = obj;
1889 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1890 
1891 	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
1892 
1893 		if (offset && offset >= devx_obj->flow_counter_bulk_size)
1894 			return false;
1895 
1896 		*fc_bulk_size = devx_obj->flow_counter_bulk_size;
1897 		*counter_id = MLX5_GET(dealloc_flow_counter_in,
1898 				       devx_obj->dinbox,
1899 				       flow_counter_id);
1900 		*counter_id += offset;
1901 		return true;
1902 	}
1903 
1904 	return false;
1905 }
1906 
1907 #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
1908 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
1909 	struct uverbs_attr_bundle *attrs)
1910 {
1911 	struct mlx5_flow_context flow_context = {.flow_tag =
1912 		MLX5_FS_DEFAULT_FLOW_TAG};
1913 	int dest_id, dest_type = -1, inlen, len, ret, i;
1914 	struct mlx5_ib_flow_handler *flow_handler;
1915 	struct mlx5_ib_flow_matcher *fs_matcher;
1916 	struct ib_uobject **arr_flow_actions;
1917 	struct ib_uflow_resources *uflow_res;
1918 	struct mlx5_flow_act flow_act = {};
1919 	struct mlx5_fc *counter = NULL;
1920 	struct ib_qp *qp = NULL;
1921 	void *devx_obj, *cmd_in;
1922 	struct ib_uobject *uobj;
1923 	struct mlx5_ib_dev *dev;
1924 	u32 flags;
1925 
1926 	if (!capable(CAP_NET_RAW))
1927 		return -EPERM;
1928 
1929 	fs_matcher = uverbs_attr_get_obj(attrs,
1930 					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
1931 	uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
1932 	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1933 
1934 	if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
1935 		return -EINVAL;
1936 
1937 	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
1938 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
1939 
1940 	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1941 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1942 
1943 	len = uverbs_attr_get_uobjs_arr(attrs,
1944 		MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
1945 	if (len) {
1946 		u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0;
1947 		devx_obj = arr_flow_actions[0]->object;
1948 
1949 		if (uverbs_attr_is_valid(attrs,
1950 					 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
1951 
1952 			int num_offsets = uverbs_attr_ptr_get_array_size(
1953 				attrs,
1954 				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
1955 				sizeof(u32));
1956 
1957 			if (num_offsets != 1)
1958 				return -EINVAL;
1959 
1960 			offset_attr = uverbs_attr_get_alloced_ptr(
1961 				attrs,
1962 				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
1963 			offset = *offset_attr;
1964 		}
1965 
1966 		if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size))
1967 			return -EINVAL;
1968 		counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size);
1969 		if (IS_ERR(counter))
1970 			return PTR_ERR(counter);
1971 
1972 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1973 	}
1974 
1975 	cmd_in = uverbs_attr_get_alloced_ptr(
1976 		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1977 	inlen = uverbs_attr_get_len(attrs,
1978 				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1979 
1980 	uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
1981 	if (!uflow_res) {
1982 		ret = -ENOMEM;
1983 		goto destroy_counter;
1984 	}
1985 
1986 	len = uverbs_attr_get_uobjs_arr(attrs,
1987 		MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
1988 	for (i = 0; i < len; i++) {
1989 		struct mlx5_ib_flow_action *maction =
1990 			to_mflow_act(arr_flow_actions[i]->object);
1991 
1992 		ret = parse_flow_flow_action(maction, false, &flow_act);
1993 		if (ret)
1994 			goto err_out;
1995 		flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
1996 				   arr_flow_actions[i]->object);
1997 	}
1998 
1999 	ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
2000 			       MLX5_IB_ATTR_CREATE_FLOW_TAG);
2001 	if (!ret) {
2002 		if (flow_context.flow_tag >= BIT(24)) {
2003 			ret = -EINVAL;
2004 			goto err_out;
2005 		}
2006 		flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
2007 	}
2008 
2009 	flow_handler =
2010 		raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
2011 				counter, cmd_in, inlen, dest_id, dest_type);
2012 	if (IS_ERR(flow_handler)) {
2013 		ret = PTR_ERR(flow_handler);
2014 		goto err_out;
2015 	}
2016 
2017 	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2018 
2019 	return 0;
2020 err_out:
2021 	ib_uverbs_flow_resources_free(uflow_res);
2022 destroy_counter:
2023 	if (counter)
2024 		mlx5_fc_local_destroy(counter);
2025 	return ret;
2026 }
2027 
2028 static int flow_matcher_cleanup(struct ib_uobject *uobject,
2029 				enum rdma_remove_reason why,
2030 				struct uverbs_attr_bundle *attrs)
2031 {
2032 	struct mlx5_ib_flow_matcher *obj = uobject->object;
2033 
2034 	if (atomic_read(&obj->usecnt))
2035 		return -EBUSY;
2036 
2037 	kfree(obj);
2038 	return 0;
2039 }
2040 
2041 static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
2042 				     struct mlx5_ib_flow_prio *ft_prio,
2043 				     enum mlx5_flow_namespace_type ns_type)
2044 {
2045 	struct mlx5_flow_table_attr ft_attr = {};
2046 	struct mlx5_flow_namespace *ns;
2047 	struct mlx5_flow_table *ft;
2048 
2049 	if (ft_prio->anchor.ft)
2050 		return 0;
2051 
2052 	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
2053 	if (!ns)
2054 		return -EOPNOTSUPP;
2055 
2056 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
2057 	ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
2058 	ft_attr.prio = 0;
2059 	ft_attr.max_fte = 2;
2060 	ft_attr.level = 1;
2061 
2062 	ft = mlx5_create_flow_table(ns, &ft_attr);
2063 	if (IS_ERR(ft))
2064 		return PTR_ERR(ft);
2065 
2066 	ft_prio->anchor.ft = ft;
2067 
2068 	return 0;
2069 }
2070 
2071 static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
2072 {
2073 	if (ft_prio->anchor.ft) {
2074 		mlx5_destroy_flow_table(ft_prio->anchor.ft);
2075 		ft_prio->anchor.ft = NULL;
2076 	}
2077 }
2078 
2079 static int
2080 steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2081 {
2082 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2083 	struct mlx5_flow_group *fg;
2084 	void *flow_group_in;
2085 	int err = 0;
2086 
2087 	if (ft_prio->anchor.fg_drop)
2088 		return 0;
2089 
2090 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2091 	if (!flow_group_in)
2092 		return -ENOMEM;
2093 
2094 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
2095 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
2096 
2097 	fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2098 	if (IS_ERR(fg)) {
2099 		err = PTR_ERR(fg);
2100 		goto out;
2101 	}
2102 
2103 	ft_prio->anchor.fg_drop = fg;
2104 
2105 out:
2106 	kvfree(flow_group_in);
2107 
2108 	return err;
2109 }
2110 
2111 static void
2112 steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2113 {
2114 	if (ft_prio->anchor.fg_drop) {
2115 		mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
2116 		ft_prio->anchor.fg_drop = NULL;
2117 	}
2118 }
2119 
2120 static int
2121 steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2122 {
2123 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2124 	struct mlx5_flow_group *fg;
2125 	void *flow_group_in;
2126 	int err = 0;
2127 
2128 	if (ft_prio->anchor.fg_goto_table)
2129 		return 0;
2130 
2131 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2132 	if (!flow_group_in)
2133 		return -ENOMEM;
2134 
2135 	fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2136 	if (IS_ERR(fg)) {
2137 		err = PTR_ERR(fg);
2138 		goto out;
2139 	}
2140 	ft_prio->anchor.fg_goto_table = fg;
2141 
2142 out:
2143 	kvfree(flow_group_in);
2144 
2145 	return err;
2146 }
2147 
2148 static void
2149 steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2150 {
2151 	if (ft_prio->anchor.fg_goto_table) {
2152 		mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
2153 		ft_prio->anchor.fg_goto_table = NULL;
2154 	}
2155 }
2156 
2157 static int
2158 steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2159 {
2160 	struct mlx5_flow_act flow_act = {};
2161 	struct mlx5_flow_handle *handle;
2162 
2163 	if (ft_prio->anchor.rule_drop)
2164 		return 0;
2165 
2166 	flow_act.fg = ft_prio->anchor.fg_drop;
2167 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2168 
2169 	handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2170 				     NULL, 0);
2171 	if (IS_ERR(handle))
2172 		return PTR_ERR(handle);
2173 
2174 	ft_prio->anchor.rule_drop = handle;
2175 
2176 	return 0;
2177 }
2178 
2179 static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2180 {
2181 	if (ft_prio->anchor.rule_drop) {
2182 		mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
2183 		ft_prio->anchor.rule_drop = NULL;
2184 	}
2185 }
2186 
2187 static int
2188 steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2189 {
2190 	struct mlx5_flow_destination dest = {};
2191 	struct mlx5_flow_act flow_act = {};
2192 	struct mlx5_flow_handle *handle;
2193 
2194 	if (ft_prio->anchor.rule_goto_table)
2195 		return 0;
2196 
2197 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2198 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
2199 	flow_act.fg = ft_prio->anchor.fg_goto_table;
2200 
2201 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2202 	dest.ft = ft_prio->flow_table;
2203 
2204 	handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2205 				     &dest, 1);
2206 	if (IS_ERR(handle))
2207 		return PTR_ERR(handle);
2208 
2209 	ft_prio->anchor.rule_goto_table = handle;
2210 
2211 	return 0;
2212 }
2213 
2214 static void
2215 steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2216 {
2217 	if (ft_prio->anchor.rule_goto_table) {
2218 		mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
2219 		ft_prio->anchor.rule_goto_table = NULL;
2220 	}
2221 }
2222 
2223 static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
2224 				      struct mlx5_ib_flow_prio *ft_prio,
2225 				      enum mlx5_flow_namespace_type ns_type)
2226 {
2227 	int err;
2228 
2229 	err = steering_anchor_create_ft(dev, ft_prio, ns_type);
2230 	if (err)
2231 		return err;
2232 
2233 	err = steering_anchor_create_fg_drop(ft_prio);
2234 	if (err)
2235 		goto destroy_ft;
2236 
2237 	err = steering_anchor_create_fg_goto_table(ft_prio);
2238 	if (err)
2239 		goto destroy_fg_drop;
2240 
2241 	err = steering_anchor_create_rule_drop(ft_prio);
2242 	if (err)
2243 		goto destroy_fg_goto_table;
2244 
2245 	err = steering_anchor_create_rule_goto_table(ft_prio);
2246 	if (err)
2247 		goto destroy_rule_drop;
2248 
2249 	return 0;
2250 
2251 destroy_rule_drop:
2252 	steering_anchor_destroy_rule_drop(ft_prio);
2253 destroy_fg_goto_table:
2254 	steering_anchor_destroy_fg_goto_table(ft_prio);
2255 destroy_fg_drop:
2256 	steering_anchor_destroy_fg_drop(ft_prio);
2257 destroy_ft:
2258 	steering_anchor_destroy_ft(ft_prio);
2259 
2260 	return err;
2261 }
2262 
2263 static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
2264 {
2265 	steering_anchor_destroy_rule_goto_table(ft_prio);
2266 	steering_anchor_destroy_rule_drop(ft_prio);
2267 	steering_anchor_destroy_fg_goto_table(ft_prio);
2268 	steering_anchor_destroy_fg_drop(ft_prio);
2269 	steering_anchor_destroy_ft(ft_prio);
2270 }
2271 
2272 static int steering_anchor_cleanup(struct ib_uobject *uobject,
2273 				   enum rdma_remove_reason why,
2274 				   struct uverbs_attr_bundle *attrs)
2275 {
2276 	struct mlx5_ib_steering_anchor *obj = uobject->object;
2277 
2278 	if (atomic_read(&obj->usecnt))
2279 		return -EBUSY;
2280 
2281 	mutex_lock(&obj->dev->flow_db->lock);
2282 	if (!--obj->ft_prio->anchor.rule_goto_table_ref)
2283 		steering_anchor_destroy_rule_goto_table(obj->ft_prio);
2284 
2285 	put_flow_table(obj->dev, obj->ft_prio, true);
2286 	mutex_unlock(&obj->dev->flow_db->lock);
2287 
2288 	kfree(obj);
2289 	return 0;
2290 }
2291 
2292 static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
2293 			      int count)
2294 {
2295 	while (count--)
2296 		mlx5_steering_anchor_destroy_res(&prio[count]);
2297 }
2298 
2299 void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
2300 {
2301 	fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
2302 	fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
2303 	fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
2304 	fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
2305 	fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
2306 	fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
2307 	fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
2308 }
2309 
2310 static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2311 			      struct mlx5_ib_flow_matcher *obj)
2312 {
2313 	enum mlx5_ib_uapi_flow_table_type ft_type =
2314 		MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2315 	u32 flags;
2316 	int err;
2317 
2318 	/* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2319 	 * users should switch to it. We leave this to not break userspace
2320 	 */
2321 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2322 	    uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2323 		return -EINVAL;
2324 
2325 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2326 		err = uverbs_get_const(&ft_type, attrs,
2327 				       MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2328 		if (err)
2329 			return err;
2330 
2331 		err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2332 		if (err)
2333 			return err;
2334 
2335 		return 0;
2336 	}
2337 
2338 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2339 		err = uverbs_get_flags32(&flags, attrs,
2340 					 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2341 					 IB_FLOW_ATTR_FLAGS_EGRESS);
2342 		if (err)
2343 			return err;
2344 
2345 		if (flags)
2346 			return mlx5_ib_ft_type_to_namespace(
2347 				MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2348 				&obj->ns_type);
2349 	}
2350 
2351 	obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2352 
2353 	return 0;
2354 }
2355 
2356 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2357 	struct uverbs_attr_bundle *attrs)
2358 {
2359 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2360 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2361 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2362 	struct mlx5_ib_flow_matcher *obj;
2363 	int err;
2364 
2365 	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2366 	if (!obj)
2367 		return -ENOMEM;
2368 
2369 	obj->mask_len = uverbs_attr_get_len(
2370 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2371 	err = uverbs_copy_from(&obj->matcher_mask,
2372 			       attrs,
2373 			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2374 	if (err)
2375 		goto end;
2376 
2377 	obj->flow_type = uverbs_attr_get_enum_id(
2378 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2379 
2380 	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2381 		err = uverbs_copy_from(&obj->priority,
2382 				       attrs,
2383 				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2384 		if (err)
2385 			goto end;
2386 	}
2387 
2388 	err = uverbs_copy_from(&obj->match_criteria_enable,
2389 			       attrs,
2390 			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2391 	if (err)
2392 		goto end;
2393 
2394 	err = mlx5_ib_matcher_ns(attrs, obj);
2395 	if (err)
2396 		goto end;
2397 
2398 	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2399 	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2400 		err = -EINVAL;
2401 		goto end;
2402 	}
2403 
2404 	uobj->object = obj;
2405 	obj->mdev = dev->mdev;
2406 	atomic_set(&obj->usecnt, 0);
2407 	return 0;
2408 
2409 end:
2410 	kfree(obj);
2411 	return err;
2412 }
2413 
2414 static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
2415 	struct uverbs_attr_bundle *attrs)
2416 {
2417 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2418 		attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
2419 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2420 	enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
2421 	enum mlx5_flow_namespace_type ns_type;
2422 	struct mlx5_ib_steering_anchor *obj;
2423 	struct mlx5_ib_flow_prio *ft_prio;
2424 	u16 priority;
2425 	u32 ft_id;
2426 	int err;
2427 
2428 	if (!capable(CAP_NET_RAW))
2429 		return -EPERM;
2430 
2431 	err = uverbs_get_const(&ib_uapi_ft_type, attrs,
2432 			       MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
2433 	if (err)
2434 		return err;
2435 
2436 	err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
2437 	if (err)
2438 		return err;
2439 
2440 	err = uverbs_copy_from(&priority, attrs,
2441 			       MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
2442 	if (err)
2443 		return err;
2444 
2445 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
2446 	if (!obj)
2447 		return -ENOMEM;
2448 
2449 	mutex_lock(&dev->flow_db->lock);
2450 
2451 	ft_prio = _get_flow_table(dev, priority, ns_type, 0);
2452 	if (IS_ERR(ft_prio)) {
2453 		err = PTR_ERR(ft_prio);
2454 		goto free_obj;
2455 	}
2456 
2457 	ft_prio->refcount++;
2458 
2459 	if (!ft_prio->anchor.rule_goto_table_ref) {
2460 		err = steering_anchor_create_res(dev, ft_prio, ns_type);
2461 		if (err)
2462 			goto put_flow_table;
2463 	}
2464 
2465 	ft_prio->anchor.rule_goto_table_ref++;
2466 
2467 	ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
2468 
2469 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2470 			     &ft_id, sizeof(ft_id));
2471 	if (err)
2472 		goto destroy_res;
2473 
2474 	mutex_unlock(&dev->flow_db->lock);
2475 
2476 	uobj->object = obj;
2477 	obj->dev = dev;
2478 	obj->ft_prio = ft_prio;
2479 	atomic_set(&obj->usecnt, 0);
2480 
2481 	return 0;
2482 
2483 destroy_res:
2484 	--ft_prio->anchor.rule_goto_table_ref;
2485 	mlx5_steering_anchor_destroy_res(ft_prio);
2486 put_flow_table:
2487 	put_flow_table(dev, ft_prio, true);
2488 free_obj:
2489 	mutex_unlock(&dev->flow_db->lock);
2490 	kfree(obj);
2491 
2492 	return err;
2493 }
2494 
2495 static struct ib_flow_action *
2496 mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
2497 			     enum mlx5_ib_uapi_flow_table_type ft_type,
2498 			     u8 num_actions, void *in)
2499 {
2500 	enum mlx5_flow_namespace_type namespace;
2501 	struct mlx5_ib_flow_action *maction;
2502 	int ret;
2503 
2504 	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2505 	if (ret)
2506 		return ERR_PTR(-EINVAL);
2507 
2508 	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2509 	if (!maction)
2510 		return ERR_PTR(-ENOMEM);
2511 
2512 	maction->flow_action_raw.modify_hdr =
2513 		mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
2514 
2515 	if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
2516 		ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
2517 		kfree(maction);
2518 		return ERR_PTR(ret);
2519 	}
2520 	maction->flow_action_raw.sub_type =
2521 		MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
2522 	maction->flow_action_raw.dev = dev;
2523 
2524 	return &maction->ib_action;
2525 }
2526 
2527 static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
2528 {
2529 	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
2530 					 max_modify_header_actions) ||
2531 	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
2532 					 max_modify_header_actions) ||
2533 	       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
2534 					 max_modify_header_actions);
2535 }
2536 
2537 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
2538 	struct uverbs_attr_bundle *attrs)
2539 {
2540 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2541 		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
2542 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2543 	enum mlx5_ib_uapi_flow_table_type ft_type;
2544 	struct ib_flow_action *action;
2545 	int num_actions;
2546 	void *in;
2547 	int ret;
2548 
2549 	if (!mlx5_ib_modify_header_supported(mdev))
2550 		return -EOPNOTSUPP;
2551 
2552 	in = uverbs_attr_get_alloced_ptr(attrs,
2553 		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
2554 
2555 	num_actions = uverbs_attr_ptr_get_array_size(
2556 		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2557 		MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
2558 	if (num_actions < 0)
2559 		return num_actions;
2560 
2561 	ret = uverbs_get_const(&ft_type, attrs,
2562 			       MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
2563 	if (ret)
2564 		return ret;
2565 	action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
2566 	if (IS_ERR(action))
2567 		return PTR_ERR(action);
2568 
2569 	uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
2570 				       IB_FLOW_ACTION_UNSPECIFIED);
2571 
2572 	return 0;
2573 }
2574 
2575 static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
2576 						      u8 packet_reformat_type,
2577 						      u8 ft_type)
2578 {
2579 	switch (packet_reformat_type) {
2580 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2581 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2582 			return MLX5_CAP_FLOWTABLE(ibdev->mdev,
2583 						  encap_general_header);
2584 		break;
2585 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2586 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2587 			return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
2588 				reformat_l2_to_l3_tunnel);
2589 		break;
2590 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2591 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2592 			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
2593 				reformat_l3_tunnel_to_l2);
2594 		break;
2595 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
2596 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2597 			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
2598 		break;
2599 	default:
2600 		break;
2601 	}
2602 
2603 	return false;
2604 }
2605 
2606 static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
2607 {
2608 	switch (dv_prt) {
2609 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2610 		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
2611 		break;
2612 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2613 		*prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
2614 		break;
2615 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2616 		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
2617 		break;
2618 	default:
2619 		return -EINVAL;
2620 	}
2621 
2622 	return 0;
2623 }
2624 
2625 static int mlx5_ib_flow_action_create_packet_reformat_ctx(
2626 	struct mlx5_ib_dev *dev,
2627 	struct mlx5_ib_flow_action *maction,
2628 	u8 ft_type, u8 dv_prt,
2629 	void *in, size_t len)
2630 {
2631 	struct mlx5_pkt_reformat_params reformat_params;
2632 	enum mlx5_flow_namespace_type namespace;
2633 	u8 prm_prt;
2634 	int ret;
2635 
2636 	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2637 	if (ret)
2638 		return ret;
2639 
2640 	ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
2641 	if (ret)
2642 		return ret;
2643 
2644 	memset(&reformat_params, 0, sizeof(reformat_params));
2645 	reformat_params.type = prm_prt;
2646 	reformat_params.size = len;
2647 	reformat_params.data = in;
2648 	maction->flow_action_raw.pkt_reformat =
2649 		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
2650 					   namespace);
2651 	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
2652 		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
2653 		return ret;
2654 	}
2655 
2656 	maction->flow_action_raw.sub_type =
2657 		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
2658 	maction->flow_action_raw.dev = dev;
2659 
2660 	return 0;
2661 }
2662 
2663 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
2664 	struct uverbs_attr_bundle *attrs)
2665 {
2666 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
2667 		MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
2668 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2669 	enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
2670 	enum mlx5_ib_uapi_flow_table_type ft_type;
2671 	struct mlx5_ib_flow_action *maction;
2672 	int ret;
2673 
2674 	ret = uverbs_get_const(&ft_type, attrs,
2675 			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
2676 	if (ret)
2677 		return ret;
2678 
2679 	ret = uverbs_get_const(&dv_prt, attrs,
2680 			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
2681 	if (ret)
2682 		return ret;
2683 
2684 	if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
2685 		return -EOPNOTSUPP;
2686 
2687 	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2688 	if (!maction)
2689 		return -ENOMEM;
2690 
2691 	if (dv_prt ==
2692 	    MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
2693 		maction->flow_action_raw.sub_type =
2694 			MLX5_IB_FLOW_ACTION_DECAP;
2695 		maction->flow_action_raw.dev = mdev;
2696 	} else {
2697 		void *in;
2698 		int len;
2699 
2700 		in = uverbs_attr_get_alloced_ptr(attrs,
2701 			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2702 		if (IS_ERR(in)) {
2703 			ret = PTR_ERR(in);
2704 			goto free_maction;
2705 		}
2706 
2707 		len = uverbs_attr_get_len(attrs,
2708 			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2709 
2710 		ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
2711 			maction, ft_type, dv_prt, in, len);
2712 		if (ret)
2713 			goto free_maction;
2714 	}
2715 
2716 	uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
2717 				       IB_FLOW_ACTION_UNSPECIFIED);
2718 	return 0;
2719 
2720 free_maction:
2721 	kfree(maction);
2722 	return ret;
2723 }
2724 
2725 DECLARE_UVERBS_NAMED_METHOD(
2726 	MLX5_IB_METHOD_CREATE_FLOW,
2727 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2728 			UVERBS_OBJECT_FLOW,
2729 			UVERBS_ACCESS_NEW,
2730 			UA_MANDATORY),
2731 	UVERBS_ATTR_PTR_IN(
2732 		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
2733 		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2734 		UA_MANDATORY,
2735 		UA_ALLOC_AND_COPY),
2736 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
2737 			MLX5_IB_OBJECT_FLOW_MATCHER,
2738 			UVERBS_ACCESS_READ,
2739 			UA_MANDATORY),
2740 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
2741 			UVERBS_OBJECT_QP,
2742 			UVERBS_ACCESS_READ),
2743 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
2744 			MLX5_IB_OBJECT_DEVX_OBJ,
2745 			UVERBS_ACCESS_READ),
2746 	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
2747 			     UVERBS_OBJECT_FLOW_ACTION,
2748 			     UVERBS_ACCESS_READ, 1,
2749 			     MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
2750 			     UA_OPTIONAL),
2751 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
2752 			   UVERBS_ATTR_TYPE(u32),
2753 			   UA_OPTIONAL),
2754 	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
2755 			     MLX5_IB_OBJECT_DEVX_OBJ,
2756 			     UVERBS_ACCESS_READ, 1, 1,
2757 			     UA_OPTIONAL),
2758 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2759 			   UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
2760 			   UA_OPTIONAL,
2761 			   UA_ALLOC_AND_COPY),
2762 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2763 			     enum mlx5_ib_create_flow_flags,
2764 			     UA_OPTIONAL));
2765 
2766 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2767 	MLX5_IB_METHOD_DESTROY_FLOW,
2768 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2769 			UVERBS_OBJECT_FLOW,
2770 			UVERBS_ACCESS_DESTROY,
2771 			UA_MANDATORY));
2772 
2773 ADD_UVERBS_METHODS(mlx5_ib_fs,
2774 		   UVERBS_OBJECT_FLOW,
2775 		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
2776 		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
2777 
2778 DECLARE_UVERBS_NAMED_METHOD(
2779 	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
2780 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
2781 			UVERBS_OBJECT_FLOW_ACTION,
2782 			UVERBS_ACCESS_NEW,
2783 			UA_MANDATORY),
2784 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2785 			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
2786 				   set_add_copy_action_in_auto)),
2787 			   UA_MANDATORY,
2788 			   UA_ALLOC_AND_COPY),
2789 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
2790 			     enum mlx5_ib_uapi_flow_table_type,
2791 			     UA_MANDATORY));
2792 
2793 DECLARE_UVERBS_NAMED_METHOD(
2794 	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
2795 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
2796 			UVERBS_OBJECT_FLOW_ACTION,
2797 			UVERBS_ACCESS_NEW,
2798 			UA_MANDATORY),
2799 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
2800 			   UVERBS_ATTR_MIN_SIZE(1),
2801 			   UA_ALLOC_AND_COPY,
2802 			   UA_OPTIONAL),
2803 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
2804 			     enum mlx5_ib_uapi_flow_action_packet_reformat_type,
2805 			     UA_MANDATORY),
2806 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
2807 			     enum mlx5_ib_uapi_flow_table_type,
2808 			     UA_MANDATORY));
2809 
2810 ADD_UVERBS_METHODS(
2811 	mlx5_ib_flow_actions,
2812 	UVERBS_OBJECT_FLOW_ACTION,
2813 	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
2814 	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
2815 
2816 DECLARE_UVERBS_NAMED_METHOD(
2817 	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
2818 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
2819 			MLX5_IB_OBJECT_FLOW_MATCHER,
2820 			UVERBS_ACCESS_NEW,
2821 			UA_MANDATORY),
2822 	UVERBS_ATTR_PTR_IN(
2823 		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
2824 		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2825 		UA_MANDATORY),
2826 	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
2827 			    mlx5_ib_flow_type,
2828 			    UA_MANDATORY),
2829 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
2830 			   UVERBS_ATTR_TYPE(u8),
2831 			   UA_MANDATORY),
2832 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2833 			     enum ib_flow_flags,
2834 			     UA_OPTIONAL),
2835 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
2836 			     enum mlx5_ib_uapi_flow_table_type,
2837 			     UA_OPTIONAL));
2838 
2839 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2840 	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
2841 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
2842 			MLX5_IB_OBJECT_FLOW_MATCHER,
2843 			UVERBS_ACCESS_DESTROY,
2844 			UA_MANDATORY));
2845 
2846 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
2847 			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
2848 			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
2849 			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
2850 
2851 DECLARE_UVERBS_NAMED_METHOD(
2852 	MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
2853 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
2854 			MLX5_IB_OBJECT_STEERING_ANCHOR,
2855 			UVERBS_ACCESS_NEW,
2856 			UA_MANDATORY),
2857 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
2858 			     enum mlx5_ib_uapi_flow_table_type,
2859 			     UA_MANDATORY),
2860 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
2861 			   UVERBS_ATTR_TYPE(u16),
2862 			   UA_MANDATORY),
2863 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2864 			   UVERBS_ATTR_TYPE(u32),
2865 			   UA_MANDATORY));
2866 
2867 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2868 	MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
2869 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
2870 			MLX5_IB_OBJECT_STEERING_ANCHOR,
2871 			UVERBS_ACCESS_DESTROY,
2872 			UA_MANDATORY));
2873 
2874 DECLARE_UVERBS_NAMED_OBJECT(
2875 	MLX5_IB_OBJECT_STEERING_ANCHOR,
2876 	UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
2877 	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
2878 	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
2879 
2880 const struct uapi_definition mlx5_ib_flow_defs[] = {
2881 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2882 		MLX5_IB_OBJECT_FLOW_MATCHER),
2883 	UAPI_DEF_CHAIN_OBJ_TREE(
2884 		UVERBS_OBJECT_FLOW,
2885 		&mlx5_ib_fs),
2886 	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
2887 				&mlx5_ib_flow_actions),
2888 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2889 		MLX5_IB_OBJECT_STEERING_ANCHOR,
2890 		UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
2891 	{},
2892 };
2893 
2894 static const struct ib_device_ops flow_ops = {
2895 	.create_flow = mlx5_ib_create_flow,
2896 	.destroy_flow = mlx5_ib_destroy_flow,
2897 	.destroy_flow_action = mlx5_ib_destroy_flow_action,
2898 };
2899 
2900 int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2901 {
2902 	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
2903 
2904 	if (!dev->flow_db)
2905 		return -ENOMEM;
2906 
2907 	mutex_init(&dev->flow_db->lock);
2908 
2909 	ib_set_device_ops(&dev->ib_dev, &flow_ops);
2910 	return 0;
2911 }
2912