1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
4 */
5
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/uverbs_std_types.h>
11 #include <rdma/mlx5_user_ioctl_cmds.h>
12 #include <rdma/mlx5_user_ioctl_verbs.h>
13 #include <rdma/ib_hdrs.h>
14 #include <rdma/ib_umem.h>
15 #include <rdma/ib_ucaps.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/fs_helpers.h>
19 #include <linux/mlx5/eswitch.h>
20 #include <net/inet_ecn.h>
21 #include "mlx5_ib.h"
22 #include "counters.h"
23 #include "devx.h"
24 #include "fs.h"
25
26 #define UVERBS_MODULE_NAME mlx5_ib
27 #include <rdma/uverbs_named_ioctl.h>
28
29 enum {
30 MATCH_CRITERIA_ENABLE_OUTER_BIT,
31 MATCH_CRITERIA_ENABLE_MISC_BIT,
32 MATCH_CRITERIA_ENABLE_INNER_BIT,
33 MATCH_CRITERIA_ENABLE_MISC2_BIT
34 };
35
36
37 struct mlx5_per_qp_opfc {
38 struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
39 };
40
41 #define HEADER_IS_ZERO(match_criteria, headers) \
42 !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
43 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
44
get_match_criteria_enable(u32 * match_criteria)45 static u8 get_match_criteria_enable(u32 *match_criteria)
46 {
47 u8 match_criteria_enable;
48
49 match_criteria_enable =
50 (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
51 MATCH_CRITERIA_ENABLE_OUTER_BIT;
52 match_criteria_enable |=
53 (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
54 MATCH_CRITERIA_ENABLE_MISC_BIT;
55 match_criteria_enable |=
56 (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
57 MATCH_CRITERIA_ENABLE_INNER_BIT;
58 match_criteria_enable |=
59 (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
60 MATCH_CRITERIA_ENABLE_MISC2_BIT;
61
62 return match_criteria_enable;
63 }
64
set_proto(void * outer_c,void * outer_v,u8 mask,u8 val)65 static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
66 {
67 u8 entry_mask;
68 u8 entry_val;
69 int err = 0;
70
71 if (!mask)
72 goto out;
73
74 entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
75 ip_protocol);
76 entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
77 ip_protocol);
78 if (!entry_mask) {
79 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
80 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
81 goto out;
82 }
83 /* Don't override existing ip protocol */
84 if (mask != entry_mask || val != entry_val)
85 err = -EINVAL;
86 out:
87 return err;
88 }
89
set_flow_label(void * misc_c,void * misc_v,u32 mask,u32 val,bool inner)90 static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
91 bool inner)
92 {
93 if (inner) {
94 MLX5_SET(fte_match_set_misc,
95 misc_c, inner_ipv6_flow_label, mask);
96 MLX5_SET(fte_match_set_misc,
97 misc_v, inner_ipv6_flow_label, val);
98 } else {
99 MLX5_SET(fte_match_set_misc,
100 misc_c, outer_ipv6_flow_label, mask);
101 MLX5_SET(fte_match_set_misc,
102 misc_v, outer_ipv6_flow_label, val);
103 }
104 }
105
set_tos(void * outer_c,void * outer_v,u8 mask,u8 val)106 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
107 {
108 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
109 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
110 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
111 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
112 }
113
check_mpls_supp_fields(u32 field_support,const __be32 * set_mask)114 static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
115 {
116 if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
117 !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
118 return -EOPNOTSUPP;
119
120 if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
121 !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
122 return -EOPNOTSUPP;
123
124 if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
125 !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
126 return -EOPNOTSUPP;
127
128 if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
129 !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
130 return -EOPNOTSUPP;
131
132 return 0;
133 }
134
135 #define LAST_ETH_FIELD vlan_tag
136 #define LAST_IPV4_FIELD tos
137 #define LAST_IPV6_FIELD traffic_class
138 #define LAST_TCP_UDP_FIELD src_port
139 #define LAST_TUNNEL_FIELD tunnel_id
140 #define LAST_FLOW_TAG_FIELD tag_id
141 #define LAST_DROP_FIELD size
142 #define LAST_COUNTERS_FIELD counters
143
144 /* Field is the last supported field */
145 #define FIELDS_NOT_SUPPORTED(filter, field) \
146 memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \
147 sizeof(filter) - offsetofend(typeof(filter), field))
148
parse_flow_flow_action(struct mlx5_ib_flow_action * maction,bool is_egress,struct mlx5_flow_act * action)149 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
150 bool is_egress,
151 struct mlx5_flow_act *action)
152 {
153
154 switch (maction->ib_action.type) {
155 case IB_FLOW_ACTION_UNSPECIFIED:
156 if (maction->flow_action_raw.sub_type ==
157 MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
158 if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
159 return -EINVAL;
160 action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
161 action->modify_hdr =
162 maction->flow_action_raw.modify_hdr;
163 return 0;
164 }
165 if (maction->flow_action_raw.sub_type ==
166 MLX5_IB_FLOW_ACTION_DECAP) {
167 if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
168 return -EINVAL;
169 action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
170 return 0;
171 }
172 if (maction->flow_action_raw.sub_type ==
173 MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
174 if (action->action &
175 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
176 return -EINVAL;
177 action->action |=
178 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
179 action->pkt_reformat =
180 maction->flow_action_raw.pkt_reformat;
181 return 0;
182 }
183 fallthrough;
184 default:
185 return -EOPNOTSUPP;
186 }
187 }
188
parse_flow_attr(struct mlx5_core_dev * mdev,struct mlx5_flow_spec * spec,const union ib_flow_spec * ib_spec,const struct ib_flow_attr * flow_attr,struct mlx5_flow_act * action,u32 prev_type)189 static int parse_flow_attr(struct mlx5_core_dev *mdev,
190 struct mlx5_flow_spec *spec,
191 const union ib_flow_spec *ib_spec,
192 const struct ib_flow_attr *flow_attr,
193 struct mlx5_flow_act *action, u32 prev_type)
194 {
195 struct mlx5_flow_context *flow_context = &spec->flow_context;
196 u32 *match_c = spec->match_criteria;
197 u32 *match_v = spec->match_value;
198 void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
199 misc_parameters);
200 void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
201 misc_parameters);
202 void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
203 misc_parameters_2);
204 void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
205 misc_parameters_2);
206 void *headers_c;
207 void *headers_v;
208 int match_ipv;
209 int ret;
210
211 if (ib_spec->type & IB_FLOW_SPEC_INNER) {
212 headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
213 inner_headers);
214 headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
215 inner_headers);
216 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
217 ft_field_support.inner_ip_version);
218 } else {
219 headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
220 outer_headers);
221 headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
222 outer_headers);
223 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
224 ft_field_support.outer_ip_version);
225 }
226
227 switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
228 case IB_FLOW_SPEC_ETH:
229 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
230 return -EOPNOTSUPP;
231
232 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
233 dmac_47_16),
234 ib_spec->eth.mask.dst_mac);
235 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
236 dmac_47_16),
237 ib_spec->eth.val.dst_mac);
238
239 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
240 smac_47_16),
241 ib_spec->eth.mask.src_mac);
242 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
243 smac_47_16),
244 ib_spec->eth.val.src_mac);
245
246 if (ib_spec->eth.mask.vlan_tag) {
247 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
248 cvlan_tag, 1);
249 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
250 cvlan_tag, 1);
251
252 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
253 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
254 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
255 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
256
257 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
258 first_cfi,
259 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
260 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
261 first_cfi,
262 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
263
264 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
265 first_prio,
266 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
267 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
268 first_prio,
269 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
270 }
271 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
272 ethertype, ntohs(ib_spec->eth.mask.ether_type));
273 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
274 ethertype, ntohs(ib_spec->eth.val.ether_type));
275 break;
276 case IB_FLOW_SPEC_IPV4:
277 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
278 return -EOPNOTSUPP;
279
280 if (match_ipv) {
281 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
282 ip_version, 0xf);
283 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
284 ip_version, MLX5_FS_IPV4_VERSION);
285 } else {
286 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
287 ethertype, 0xffff);
288 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
289 ethertype, ETH_P_IP);
290 }
291
292 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
293 src_ipv4_src_ipv6.ipv4_layout.ipv4),
294 &ib_spec->ipv4.mask.src_ip,
295 sizeof(ib_spec->ipv4.mask.src_ip));
296 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
297 src_ipv4_src_ipv6.ipv4_layout.ipv4),
298 &ib_spec->ipv4.val.src_ip,
299 sizeof(ib_spec->ipv4.val.src_ip));
300 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
301 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
302 &ib_spec->ipv4.mask.dst_ip,
303 sizeof(ib_spec->ipv4.mask.dst_ip));
304 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
305 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
306 &ib_spec->ipv4.val.dst_ip,
307 sizeof(ib_spec->ipv4.val.dst_ip));
308
309 set_tos(headers_c, headers_v,
310 ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
311
312 if (set_proto(headers_c, headers_v,
313 ib_spec->ipv4.mask.proto,
314 ib_spec->ipv4.val.proto))
315 return -EINVAL;
316 break;
317 case IB_FLOW_SPEC_IPV6:
318 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
319 return -EOPNOTSUPP;
320
321 if (match_ipv) {
322 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
323 ip_version, 0xf);
324 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
325 ip_version, MLX5_FS_IPV6_VERSION);
326 } else {
327 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
328 ethertype, 0xffff);
329 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
330 ethertype, ETH_P_IPV6);
331 }
332
333 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
334 src_ipv4_src_ipv6.ipv6_layout.ipv6),
335 &ib_spec->ipv6.mask.src_ip,
336 sizeof(ib_spec->ipv6.mask.src_ip));
337 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
338 src_ipv4_src_ipv6.ipv6_layout.ipv6),
339 &ib_spec->ipv6.val.src_ip,
340 sizeof(ib_spec->ipv6.val.src_ip));
341 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
342 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
343 &ib_spec->ipv6.mask.dst_ip,
344 sizeof(ib_spec->ipv6.mask.dst_ip));
345 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
346 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
347 &ib_spec->ipv6.val.dst_ip,
348 sizeof(ib_spec->ipv6.val.dst_ip));
349
350 set_tos(headers_c, headers_v,
351 ib_spec->ipv6.mask.traffic_class,
352 ib_spec->ipv6.val.traffic_class);
353
354 if (set_proto(headers_c, headers_v,
355 ib_spec->ipv6.mask.next_hdr,
356 ib_spec->ipv6.val.next_hdr))
357 return -EINVAL;
358
359 set_flow_label(misc_params_c, misc_params_v,
360 ntohl(ib_spec->ipv6.mask.flow_label),
361 ntohl(ib_spec->ipv6.val.flow_label),
362 ib_spec->type & IB_FLOW_SPEC_INNER);
363 break;
364 case IB_FLOW_SPEC_ESP:
365 return -EOPNOTSUPP;
366 case IB_FLOW_SPEC_TCP:
367 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
368 LAST_TCP_UDP_FIELD))
369 return -EOPNOTSUPP;
370
371 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
372 return -EINVAL;
373
374 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
375 ntohs(ib_spec->tcp_udp.mask.src_port));
376 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
377 ntohs(ib_spec->tcp_udp.val.src_port));
378
379 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
380 ntohs(ib_spec->tcp_udp.mask.dst_port));
381 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
382 ntohs(ib_spec->tcp_udp.val.dst_port));
383 break;
384 case IB_FLOW_SPEC_UDP:
385 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
386 LAST_TCP_UDP_FIELD))
387 return -EOPNOTSUPP;
388
389 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
390 return -EINVAL;
391
392 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
393 ntohs(ib_spec->tcp_udp.mask.src_port));
394 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
395 ntohs(ib_spec->tcp_udp.val.src_port));
396
397 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
398 ntohs(ib_spec->tcp_udp.mask.dst_port));
399 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
400 ntohs(ib_spec->tcp_udp.val.dst_port));
401 break;
402 case IB_FLOW_SPEC_GRE:
403 if (ib_spec->gre.mask.c_ks_res0_ver)
404 return -EOPNOTSUPP;
405
406 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
407 return -EINVAL;
408
409 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
410 0xff);
411 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
412 IPPROTO_GRE);
413
414 MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
415 ntohs(ib_spec->gre.mask.protocol));
416 MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
417 ntohs(ib_spec->gre.val.protocol));
418
419 memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
420 gre_key.nvgre.hi),
421 &ib_spec->gre.mask.key,
422 sizeof(ib_spec->gre.mask.key));
423 memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
424 gre_key.nvgre.hi),
425 &ib_spec->gre.val.key,
426 sizeof(ib_spec->gre.val.key));
427 break;
428 case IB_FLOW_SPEC_MPLS:
429 switch (prev_type) {
430 case IB_FLOW_SPEC_UDP:
431 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
432 ft_field_support.outer_first_mpls_over_udp),
433 &ib_spec->mpls.mask.tag))
434 return -EOPNOTSUPP;
435
436 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
437 outer_first_mpls_over_udp),
438 &ib_spec->mpls.val.tag,
439 sizeof(ib_spec->mpls.val.tag));
440 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
441 outer_first_mpls_over_udp),
442 &ib_spec->mpls.mask.tag,
443 sizeof(ib_spec->mpls.mask.tag));
444 break;
445 case IB_FLOW_SPEC_GRE:
446 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
447 ft_field_support.outer_first_mpls_over_gre),
448 &ib_spec->mpls.mask.tag))
449 return -EOPNOTSUPP;
450
451 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
452 outer_first_mpls_over_gre),
453 &ib_spec->mpls.val.tag,
454 sizeof(ib_spec->mpls.val.tag));
455 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
456 outer_first_mpls_over_gre),
457 &ib_spec->mpls.mask.tag,
458 sizeof(ib_spec->mpls.mask.tag));
459 break;
460 default:
461 if (ib_spec->type & IB_FLOW_SPEC_INNER) {
462 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
463 ft_field_support.inner_first_mpls),
464 &ib_spec->mpls.mask.tag))
465 return -EOPNOTSUPP;
466
467 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
468 inner_first_mpls),
469 &ib_spec->mpls.val.tag,
470 sizeof(ib_spec->mpls.val.tag));
471 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
472 inner_first_mpls),
473 &ib_spec->mpls.mask.tag,
474 sizeof(ib_spec->mpls.mask.tag));
475 } else {
476 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
477 ft_field_support.outer_first_mpls),
478 &ib_spec->mpls.mask.tag))
479 return -EOPNOTSUPP;
480
481 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
482 outer_first_mpls),
483 &ib_spec->mpls.val.tag,
484 sizeof(ib_spec->mpls.val.tag));
485 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
486 outer_first_mpls),
487 &ib_spec->mpls.mask.tag,
488 sizeof(ib_spec->mpls.mask.tag));
489 }
490 }
491 break;
492 case IB_FLOW_SPEC_VXLAN_TUNNEL:
493 if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
494 LAST_TUNNEL_FIELD))
495 return -EOPNOTSUPP;
496
497 MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
498 ntohl(ib_spec->tunnel.mask.tunnel_id));
499 MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
500 ntohl(ib_spec->tunnel.val.tunnel_id));
501 break;
502 case IB_FLOW_SPEC_ACTION_TAG:
503 if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
504 LAST_FLOW_TAG_FIELD))
505 return -EOPNOTSUPP;
506 if (ib_spec->flow_tag.tag_id >= BIT(24))
507 return -EINVAL;
508
509 flow_context->flow_tag = ib_spec->flow_tag.tag_id;
510 flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
511 break;
512 case IB_FLOW_SPEC_ACTION_DROP:
513 if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
514 LAST_DROP_FIELD))
515 return -EOPNOTSUPP;
516 action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
517 break;
518 case IB_FLOW_SPEC_ACTION_HANDLE:
519 ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
520 flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
521 if (ret)
522 return ret;
523 break;
524 case IB_FLOW_SPEC_ACTION_COUNT:
525 if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
526 LAST_COUNTERS_FIELD))
527 return -EOPNOTSUPP;
528
529 /* for now support only one counters spec per flow */
530 if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
531 return -EINVAL;
532
533 action->counters = ib_spec->flow_count.counters;
534 action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
535 break;
536 default:
537 return -EINVAL;
538 }
539
540 return 0;
541 }
542
543 /* If a flow could catch both multicast and unicast packets,
544 * it won't fall into the multicast flow steering table and this rule
545 * could steal other multicast packets.
546 */
flow_is_multicast_only(const struct ib_flow_attr * ib_attr)547 static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
548 {
549 union ib_flow_spec *flow_spec;
550
551 if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
552 ib_attr->num_of_specs < 1)
553 return false;
554
555 flow_spec = (union ib_flow_spec *)(ib_attr + 1);
556 if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
557 struct ib_flow_spec_ipv4 *ipv4_spec;
558
559 ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
560 if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
561 return true;
562
563 return false;
564 }
565
566 if (flow_spec->type == IB_FLOW_SPEC_ETH) {
567 struct ib_flow_spec_eth *eth_spec;
568
569 eth_spec = (struct ib_flow_spec_eth *)flow_spec;
570 return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
571 is_multicast_ether_addr(eth_spec->val.dst_mac);
572 }
573
574 return false;
575 }
576
is_valid_ethertype(struct mlx5_core_dev * mdev,const struct ib_flow_attr * flow_attr,bool check_inner)577 static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
578 const struct ib_flow_attr *flow_attr,
579 bool check_inner)
580 {
581 union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
582 int match_ipv = check_inner ?
583 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
584 ft_field_support.inner_ip_version) :
585 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
586 ft_field_support.outer_ip_version);
587 int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
588 bool ipv4_spec_valid, ipv6_spec_valid;
589 unsigned int ip_spec_type = 0;
590 bool has_ethertype = false;
591 unsigned int spec_index;
592 bool mask_valid = true;
593 u16 eth_type = 0;
594 bool type_valid;
595
596 /* Validate that ethertype is correct */
597 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
598 if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
599 ib_spec->eth.mask.ether_type) {
600 mask_valid = (ib_spec->eth.mask.ether_type ==
601 htons(0xffff));
602 has_ethertype = true;
603 eth_type = ntohs(ib_spec->eth.val.ether_type);
604 } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
605 (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
606 ip_spec_type = ib_spec->type;
607 }
608 ib_spec = (void *)ib_spec + ib_spec->size;
609 }
610
611 type_valid = (!has_ethertype) || (!ip_spec_type);
612 if (!type_valid && mask_valid) {
613 ipv4_spec_valid = (eth_type == ETH_P_IP) &&
614 (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
615 ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
616 (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
617
618 type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
619 (((eth_type == ETH_P_MPLS_UC) ||
620 (eth_type == ETH_P_MPLS_MC)) && match_ipv);
621 }
622
623 return type_valid;
624 }
625
is_valid_attr(struct mlx5_core_dev * mdev,const struct ib_flow_attr * flow_attr)626 static bool is_valid_attr(struct mlx5_core_dev *mdev,
627 const struct ib_flow_attr *flow_attr)
628 {
629 return is_valid_ethertype(mdev, flow_attr, false) &&
630 is_valid_ethertype(mdev, flow_attr, true);
631 }
632
put_flow_table(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * prio,bool ft_added)633 static void put_flow_table(struct mlx5_ib_dev *dev,
634 struct mlx5_ib_flow_prio *prio, bool ft_added)
635 {
636 prio->refcount -= !!ft_added;
637 if (!prio->refcount) {
638 mlx5_destroy_flow_table(prio->flow_table);
639 prio->flow_table = NULL;
640 }
641 }
642
mlx5_ib_destroy_flow(struct ib_flow * flow_id)643 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
644 {
645 struct mlx5_ib_flow_handler *handler = container_of(flow_id,
646 struct mlx5_ib_flow_handler,
647 ibflow);
648 struct mlx5_ib_flow_handler *iter, *tmp;
649 struct mlx5_ib_dev *dev = handler->dev;
650
651 mutex_lock(&dev->flow_db->lock);
652
653 list_for_each_entry_safe(iter, tmp, &handler->list, list) {
654 mlx5_del_flow_rules(iter->rule);
655 put_flow_table(dev, iter->prio, true);
656 list_del(&iter->list);
657 kfree(iter);
658 }
659
660 mlx5_del_flow_rules(handler->rule);
661 put_flow_table(dev, handler->prio, true);
662 mlx5_ib_counters_clear_description(handler->ibcounters);
663 mutex_unlock(&dev->flow_db->lock);
664 if (handler->flow_matcher)
665 atomic_dec(&handler->flow_matcher->usecnt);
666 kfree(handler);
667
668 return 0;
669 }
670
ib_prio_to_core_prio(unsigned int priority,bool dont_trap)671 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
672 {
673 priority *= 2;
674 if (!dont_trap)
675 priority++;
676 return priority;
677 }
678
679 enum flow_table_type {
680 MLX5_IB_FT_RX,
681 MLX5_IB_FT_TX
682 };
683
684 #define MLX5_FS_MAX_TYPES 6
685 #define MLX5_FS_MAX_ENTRIES BIT(16)
686
mlx5_ib_shared_ft_allowed(struct ib_device * device)687 static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
688 {
689 struct mlx5_ib_dev *dev = to_mdev(device);
690
691 return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
692 }
693
_get_prio(struct mlx5_ib_dev * dev,struct mlx5_flow_namespace * ns,struct mlx5_ib_flow_prio * prio,int priority,int num_entries,int num_groups,u32 flags,u16 vport)694 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
695 struct mlx5_flow_namespace *ns,
696 struct mlx5_ib_flow_prio *prio,
697 int priority,
698 int num_entries, int num_groups,
699 u32 flags, u16 vport)
700 {
701 struct mlx5_flow_table_attr ft_attr = {};
702 struct mlx5_flow_table *ft;
703
704 ft_attr.prio = priority;
705 ft_attr.max_fte = num_entries;
706 ft_attr.flags = flags;
707 ft_attr.vport = vport;
708 ft_attr.autogroup.max_num_groups = num_groups;
709 ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
710 if (IS_ERR(ft))
711 return ERR_CAST(ft);
712
713 prio->flow_table = ft;
714 prio->refcount = 0;
715 return prio;
716 }
717
get_flow_table(struct mlx5_ib_dev * dev,struct ib_flow_attr * flow_attr,enum flow_table_type ft_type)718 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
719 struct ib_flow_attr *flow_attr,
720 enum flow_table_type ft_type)
721 {
722 bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
723 struct mlx5_flow_namespace *ns = NULL;
724 enum mlx5_flow_namespace_type fn_type;
725 struct mlx5_ib_flow_prio *prio;
726 struct mlx5_flow_table *ft;
727 int max_table_size;
728 int num_entries;
729 int num_groups;
730 bool esw_encap;
731 u32 flags = 0;
732 int priority;
733
734 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
735 log_max_ft_size));
736 esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
737 DEVLINK_ESWITCH_ENCAP_MODE_NONE;
738 switch (flow_attr->type) {
739 case IB_FLOW_ATTR_NORMAL:
740 if (flow_is_multicast_only(flow_attr) && !dont_trap)
741 priority = MLX5_IB_FLOW_MCAST_PRIO;
742 else
743 priority = ib_prio_to_core_prio(flow_attr->priority,
744 dont_trap);
745 if (ft_type == MLX5_IB_FT_RX) {
746 fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
747 prio = &dev->flow_db->prios[priority];
748 if (!dev->is_rep && !esw_encap &&
749 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
750 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
751 if (!dev->is_rep && !esw_encap &&
752 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
753 reformat_l3_tunnel_to_l2))
754 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
755 } else {
756 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
757 dev->mdev, log_max_ft_size));
758 fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
759 prio = &dev->flow_db->egress_prios[priority];
760 if (!dev->is_rep && !esw_encap &&
761 MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
762 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
763 }
764 ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
765 num_entries = MLX5_FS_MAX_ENTRIES;
766 num_groups = MLX5_FS_MAX_TYPES;
767 break;
768 case IB_FLOW_ATTR_ALL_DEFAULT:
769 case IB_FLOW_ATTR_MC_DEFAULT:
770 ns = mlx5_get_flow_namespace(dev->mdev,
771 MLX5_FLOW_NAMESPACE_LEFTOVERS);
772 build_leftovers_ft_param(&priority, &num_entries, &num_groups);
773 prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
774 break;
775 case IB_FLOW_ATTR_SNIFFER:
776 if (!MLX5_CAP_FLOWTABLE(dev->mdev,
777 allow_sniffer_and_nic_rx_shared_tir))
778 return ERR_PTR(-EOPNOTSUPP);
779
780 ns = mlx5_get_flow_namespace(
781 dev->mdev, ft_type == MLX5_IB_FT_RX ?
782 MLX5_FLOW_NAMESPACE_SNIFFER_RX :
783 MLX5_FLOW_NAMESPACE_SNIFFER_TX);
784
785 prio = &dev->flow_db->sniffer[ft_type];
786 priority = 0;
787 num_entries = 1;
788 num_groups = 1;
789 break;
790 default:
791 break;
792 }
793
794 if (!ns)
795 return ERR_PTR(-EOPNOTSUPP);
796
797 max_table_size = min_t(int, num_entries, max_table_size);
798
799 ft = prio->flow_table;
800 if (!ft)
801 return _get_prio(dev, ns, prio, priority, max_table_size,
802 num_groups, flags, 0);
803
804 return prio;
805 }
806
807 enum {
808 RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO,
809 RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO,
810 RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
811 RDMA_RX_ECN_OPCOUNTER_PRIO,
812 RDMA_RX_CNP_OPCOUNTER_PRIO,
813 RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
814 };
815
816 enum {
817 RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO,
818 RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
819 RDMA_TX_CNP_OPCOUNTER_PRIO,
820 RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
821 };
822
set_vhca_port_spec(struct mlx5_ib_dev * dev,u32 port_num,struct mlx5_flow_spec * spec)823 static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
824 struct mlx5_flow_spec *spec)
825 {
826 if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
827 ft_field_support.source_vhca_port) ||
828 !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
829 ft_field_support.source_vhca_port))
830 return -EOPNOTSUPP;
831
832 MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
833 misc_parameters.source_vhca_port);
834 MLX5_SET(fte_match_param, &spec->match_value,
835 misc_parameters.source_vhca_port, port_num);
836
837 return 0;
838 }
839
set_ecn_ce_spec(struct mlx5_ib_dev * dev,u32 port_num,struct mlx5_flow_spec * spec,int ipv)840 static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
841 struct mlx5_flow_spec *spec, int ipv)
842 {
843 if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
844 ft_field_support.outer_ip_version))
845 return -EOPNOTSUPP;
846
847 if (mlx5_core_mp_enabled(dev->mdev) &&
848 set_vhca_port_spec(dev, port_num, spec))
849 return -EOPNOTSUPP;
850
851 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
852 outer_headers.ip_ecn);
853 MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
854 INET_ECN_CE);
855 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
856 outer_headers.ip_version);
857 MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
858 ipv);
859
860 spec->match_criteria_enable =
861 get_match_criteria_enable(spec->match_criteria);
862
863 return 0;
864 }
865
set_cnp_spec(struct mlx5_ib_dev * dev,u32 port_num,struct mlx5_flow_spec * spec)866 static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
867 struct mlx5_flow_spec *spec)
868 {
869 if (mlx5_core_mp_enabled(dev->mdev) &&
870 set_vhca_port_spec(dev, port_num, spec))
871 return -EOPNOTSUPP;
872
873 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
874 misc_parameters.bth_opcode);
875 MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
876 IB_BTH_OPCODE_CNP);
877
878 spec->match_criteria_enable =
879 get_match_criteria_enable(spec->match_criteria);
880
881 return 0;
882 }
883
884 /* Returns the prio we should use for the given optional counter type,
885 * whereas for bytes type we use the packet type, since they share the same
886 * resources.
887 */
get_opfc_prio(struct mlx5_ib_dev * dev,u32 type)888 static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
889 u32 type)
890 {
891 u32 prio_type;
892
893 switch (type) {
894 case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
895 prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
896 break;
897 case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
898 prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
899 break;
900 case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
901 prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
902 break;
903 case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
904 prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
905 break;
906 default:
907 prio_type = type;
908 }
909
910 return &dev->flow_db->opfcs[prio_type];
911 }
912
put_per_qp_prio(struct mlx5_ib_dev * dev,enum mlx5_ib_optional_counter_type type)913 static void put_per_qp_prio(struct mlx5_ib_dev *dev,
914 enum mlx5_ib_optional_counter_type type)
915 {
916 enum mlx5_ib_optional_counter_type per_qp_type;
917 struct mlx5_ib_flow_prio *prio;
918
919 switch (type) {
920 case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
921 per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
922 break;
923 case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
924 per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
925 break;
926 case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
927 per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
928 break;
929 case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
930 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
931 break;
932 case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
933 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
934 break;
935 case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
936 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
937 break;
938 case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
939 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
940 break;
941 default:
942 return;
943 }
944
945 prio = get_opfc_prio(dev, per_qp_type);
946 put_flow_table(dev, prio, true);
947 }
948
get_per_qp_prio(struct mlx5_ib_dev * dev,enum mlx5_ib_optional_counter_type type)949 static int get_per_qp_prio(struct mlx5_ib_dev *dev,
950 enum mlx5_ib_optional_counter_type type)
951 {
952 enum mlx5_ib_optional_counter_type per_qp_type;
953 enum mlx5_flow_namespace_type fn_type;
954 struct mlx5_flow_namespace *ns;
955 struct mlx5_ib_flow_prio *prio;
956 int priority;
957
958 switch (type) {
959 case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
960 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
961 priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO;
962 per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
963 break;
964 case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
965 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
966 priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO;
967 per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
968 break;
969 case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
970 fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
971 priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO;
972 per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
973 break;
974 case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
975 fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
976 priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
977 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
978 break;
979 case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
980 fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
981 priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
982 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
983 break;
984 case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
985 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
986 priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
987 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
988 break;
989 case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
990 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
991 priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
992 per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
993 break;
994 default:
995 return -EINVAL;
996 }
997
998 ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
999 if (!ns)
1000 return -EOPNOTSUPP;
1001
1002 prio = get_opfc_prio(dev, per_qp_type);
1003 if (prio->flow_table)
1004 return 0;
1005
1006 prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0);
1007 if (IS_ERR(prio))
1008 return PTR_ERR(prio);
1009
1010 prio->refcount = 1;
1011
1012 return 0;
1013 }
1014
1015 static struct mlx5_per_qp_opfc *
get_per_qp_opfc(struct mlx5_rdma_counter * mcounter,u32 qp_num,bool * new)1016 get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new)
1017 {
1018 struct mlx5_per_qp_opfc *per_qp_opfc;
1019
1020 *new = false;
1021
1022 per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num);
1023 if (per_qp_opfc)
1024 return per_qp_opfc;
1025 per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
1026
1027 if (!per_qp_opfc)
1028 return NULL;
1029
1030 *new = true;
1031 return per_qp_opfc;
1032 }
1033
add_op_fc_rules(struct mlx5_ib_dev * dev,struct mlx5_rdma_counter * mcounter,struct mlx5_per_qp_opfc * per_qp_opfc,struct mlx5_ib_flow_prio * prio,enum mlx5_ib_optional_counter_type type,u32 qp_num,u32 port_num)1034 static int add_op_fc_rules(struct mlx5_ib_dev *dev,
1035 struct mlx5_rdma_counter *mcounter,
1036 struct mlx5_per_qp_opfc *per_qp_opfc,
1037 struct mlx5_ib_flow_prio *prio,
1038 enum mlx5_ib_optional_counter_type type,
1039 u32 qp_num, u32 port_num)
1040 {
1041 struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc;
1042 struct mlx5_flow_act flow_act = {};
1043 struct mlx5_flow_destination dst;
1044 struct mlx5_flow_spec *spec;
1045 int i, err, spec_num;
1046 bool is_tx;
1047
1048 if (opfc->fc)
1049 return -EEXIST;
1050
1051 if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type,
1052 &in_use_opfc)) {
1053 opfc->fc = in_use_opfc->fc;
1054 opfc->rule[0] = in_use_opfc->rule[0];
1055 return 0;
1056 }
1057
1058 opfc->fc = mcounter->fc[type];
1059
1060 spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
1061 if (!spec) {
1062 err = -ENOMEM;
1063 goto null_fc;
1064 }
1065
1066 switch (type) {
1067 case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP:
1068 if (set_ecn_ce_spec(dev, port_num, &spec[0],
1069 MLX5_FS_IPV4_VERSION) ||
1070 set_ecn_ce_spec(dev, port_num, &spec[1],
1071 MLX5_FS_IPV6_VERSION)) {
1072 err = -EOPNOTSUPP;
1073 goto free_spec;
1074 }
1075 spec_num = 2;
1076 is_tx = false;
1077
1078 MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria,
1079 misc_parameters.bth_dst_qp);
1080 MLX5_SET(fte_match_param, spec[1].match_value,
1081 misc_parameters.bth_dst_qp, qp_num);
1082 spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
1083 break;
1084 case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP:
1085 if (!MLX5_CAP_FLOWTABLE(
1086 dev->mdev,
1087 ft_field_support_2_nic_receive_rdma.bth_opcode) ||
1088 set_cnp_spec(dev, port_num, &spec[0])) {
1089 err = -EOPNOTSUPP;
1090 goto free_spec;
1091 }
1092 spec_num = 1;
1093 is_tx = false;
1094 break;
1095 case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP:
1096 if (!MLX5_CAP_FLOWTABLE(
1097 dev->mdev,
1098 ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
1099 set_cnp_spec(dev, port_num, &spec[0])) {
1100 err = -EOPNOTSUPP;
1101 goto free_spec;
1102 }
1103 spec_num = 1;
1104 is_tx = true;
1105 break;
1106 case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
1107 case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
1108 spec_num = 1;
1109 is_tx = true;
1110 break;
1111 case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
1112 case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
1113 spec_num = 1;
1114 is_tx = false;
1115 break;
1116 default:
1117 err = -EINVAL;
1118 goto free_spec;
1119 }
1120
1121 if (is_tx) {
1122 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
1123 misc_parameters.source_sqn);
1124 MLX5_SET(fte_match_param, spec->match_value,
1125 misc_parameters.source_sqn, qp_num);
1126 } else {
1127 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
1128 misc_parameters.bth_dst_qp);
1129 MLX5_SET(fte_match_param, spec->match_value,
1130 misc_parameters.bth_dst_qp, qp_num);
1131 }
1132
1133 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
1134
1135 dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1136 dst.counter = opfc->fc;
1137
1138 flow_act.action =
1139 MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1140
1141 for (i = 0; i < spec_num; i++) {
1142 opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
1143 &flow_act, &dst, 1);
1144 if (IS_ERR(opfc->rule[i])) {
1145 err = PTR_ERR(opfc->rule[i]);
1146 goto del_rules;
1147 }
1148 }
1149 prio->refcount += spec_num;
1150
1151 err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc,
1152 GFP_KERNEL));
1153 if (err)
1154 goto del_rules;
1155
1156 kfree(spec);
1157
1158 return 0;
1159
1160 del_rules:
1161 while (i--)
1162 mlx5_del_flow_rules(opfc->rule[i]);
1163 put_flow_table(dev, prio, false);
1164 free_spec:
1165 kfree(spec);
1166 null_fc:
1167 opfc->fc = NULL;
1168 return err;
1169 }
1170
is_fc_shared_and_in_use(struct mlx5_rdma_counter * mcounter,u32 type,struct mlx5_fc ** fc)1171 static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
1172 u32 type, struct mlx5_fc **fc)
1173 {
1174 u32 shared_fc_type;
1175
1176 switch (type) {
1177 case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
1178 shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
1179 break;
1180 case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
1181 shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
1182 break;
1183 case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
1184 shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
1185 break;
1186 case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
1187 shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
1188 break;
1189 default:
1190 return false;
1191 }
1192
1193 *fc = mcounter->fc[shared_fc_type];
1194 if (!(*fc))
1195 return false;
1196
1197 return true;
1198 }
1199
mlx5r_fs_destroy_fcs(struct mlx5_ib_dev * dev,struct rdma_counter * counter)1200 void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
1201 struct rdma_counter *counter)
1202 {
1203 struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
1204 struct mlx5_fc *in_use_fc;
1205 int i;
1206
1207 for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
1208 i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
1209 if (!mcounter->fc[i])
1210 continue;
1211
1212 if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) {
1213 mcounter->fc[i] = NULL;
1214 continue;
1215 }
1216
1217 mlx5_fc_destroy(dev->mdev, mcounter->fc[i]);
1218 mcounter->fc[i] = NULL;
1219 }
1220 }
1221
mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev * dev,u32 port_num,struct mlx5_ib_op_fc * opfc,enum mlx5_ib_optional_counter_type type)1222 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
1223 struct mlx5_ib_op_fc *opfc,
1224 enum mlx5_ib_optional_counter_type type)
1225 {
1226 enum mlx5_flow_namespace_type fn_type;
1227 int priority, i, err, spec_num;
1228 struct mlx5_flow_act flow_act = {};
1229 struct mlx5_flow_destination dst;
1230 struct mlx5_flow_namespace *ns;
1231 struct mlx5_ib_flow_prio *prio;
1232 struct mlx5_flow_spec *spec;
1233
1234 spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
1235 if (!spec)
1236 return -ENOMEM;
1237
1238 switch (type) {
1239 case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
1240 if (set_ecn_ce_spec(dev, port_num, &spec[0],
1241 MLX5_FS_IPV4_VERSION) ||
1242 set_ecn_ce_spec(dev, port_num, &spec[1],
1243 MLX5_FS_IPV6_VERSION)) {
1244 err = -EOPNOTSUPP;
1245 goto free;
1246 }
1247 spec_num = 2;
1248 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
1249 priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
1250 break;
1251
1252 case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
1253 if (!MLX5_CAP_FLOWTABLE(dev->mdev,
1254 ft_field_support_2_nic_receive_rdma.bth_opcode) ||
1255 set_cnp_spec(dev, port_num, &spec[0])) {
1256 err = -EOPNOTSUPP;
1257 goto free;
1258 }
1259 spec_num = 1;
1260 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
1261 priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
1262 break;
1263
1264 case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
1265 if (!MLX5_CAP_FLOWTABLE(dev->mdev,
1266 ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
1267 set_cnp_spec(dev, port_num, &spec[0])) {
1268 err = -EOPNOTSUPP;
1269 goto free;
1270 }
1271 spec_num = 1;
1272 fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
1273 priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
1274 break;
1275
1276 case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
1277 case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
1278 spec_num = 1;
1279 fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
1280 priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
1281 break;
1282
1283 case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
1284 case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
1285 spec_num = 1;
1286 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
1287 priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
1288 break;
1289
1290 default:
1291 err = -EOPNOTSUPP;
1292 goto free;
1293 }
1294
1295 ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
1296 if (!ns) {
1297 err = -EOPNOTSUPP;
1298 goto free;
1299 }
1300
1301 prio = get_opfc_prio(dev, type);
1302 if (!prio->flow_table) {
1303 err = get_per_qp_prio(dev, type);
1304 if (err)
1305 goto free;
1306
1307 prio = _get_prio(dev, ns, prio, priority,
1308 dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
1309 if (IS_ERR(prio)) {
1310 err = PTR_ERR(prio);
1311 goto put_prio;
1312 }
1313 }
1314
1315 dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1316 dst.counter = opfc->fc;
1317
1318 flow_act.action =
1319 MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1320
1321 for (i = 0; i < spec_num; i++) {
1322 opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
1323 &flow_act, &dst, 1);
1324 if (IS_ERR(opfc->rule[i])) {
1325 err = PTR_ERR(opfc->rule[i]);
1326 goto del_rules;
1327 }
1328 }
1329 prio->refcount += spec_num;
1330 kfree(spec);
1331
1332 return 0;
1333
1334 del_rules:
1335 for (i -= 1; i >= 0; i--)
1336 mlx5_del_flow_rules(opfc->rule[i]);
1337 put_flow_table(dev, prio, false);
1338 put_prio:
1339 put_per_qp_prio(dev, type);
1340 free:
1341 kfree(spec);
1342 return err;
1343 }
1344
mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev * dev,struct mlx5_ib_op_fc * opfc,enum mlx5_ib_optional_counter_type type)1345 void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
1346 struct mlx5_ib_op_fc *opfc,
1347 enum mlx5_ib_optional_counter_type type)
1348 {
1349 struct mlx5_ib_flow_prio *prio;
1350 int i;
1351
1352 prio = get_opfc_prio(dev, type);
1353
1354 for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
1355 mlx5_del_flow_rules(opfc->rule[i]);
1356 put_flow_table(dev, prio, true);
1357 }
1358
1359 put_per_qp_prio(dev, type);
1360 }
1361
mlx5r_fs_unbind_op_fc(struct ib_qp * qp,struct rdma_counter * counter)1362 void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter)
1363 {
1364 struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
1365 struct mlx5_ib_dev *dev = to_mdev(counter->device);
1366 struct mlx5_per_qp_opfc *per_qp_opfc;
1367 struct mlx5_ib_op_fc *in_use_opfc;
1368 struct mlx5_ib_flow_prio *prio;
1369 int i, j;
1370
1371 per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num);
1372 if (!per_qp_opfc)
1373 return;
1374
1375 for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
1376 i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
1377 if (!per_qp_opfc->opfcs[i].fc)
1378 continue;
1379
1380 if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i,
1381 &in_use_opfc)) {
1382 per_qp_opfc->opfcs[i].fc = NULL;
1383 continue;
1384 }
1385
1386 for (j = 0; j < MAX_OPFC_RULES; j++) {
1387 if (!per_qp_opfc->opfcs[i].rule[j])
1388 continue;
1389 mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]);
1390 prio = get_opfc_prio(dev, i);
1391 put_flow_table(dev, prio, true);
1392 }
1393 per_qp_opfc->opfcs[i].fc = NULL;
1394 }
1395
1396 kfree(per_qp_opfc);
1397 xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num);
1398 }
1399
mlx5r_fs_bind_op_fc(struct ib_qp * qp,struct rdma_counter * counter,u32 port)1400 int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
1401 u32 port)
1402 {
1403 struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
1404 struct mlx5_ib_dev *dev = to_mdev(qp->device);
1405 struct mlx5_per_qp_opfc *per_qp_opfc;
1406 struct mlx5_ib_flow_prio *prio;
1407 struct mlx5_ib_counters *cnts;
1408 struct mlx5_ib_op_fc *opfc;
1409 struct mlx5_fc *in_use_fc;
1410 int i, err, per_qp_type;
1411 bool new;
1412
1413 if (!counter->mode.bind_opcnt)
1414 return 0;
1415
1416 cnts = &dev->port[port - 1].cnts;
1417
1418 for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
1419 opfc = &cnts->opfcs[i];
1420 if (!opfc->fc)
1421 continue;
1422
1423 per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
1424 prio = get_opfc_prio(dev, per_qp_type);
1425 WARN_ON(!prio->flow_table);
1426
1427 if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc))
1428 mcounter->fc[per_qp_type] = in_use_fc;
1429
1430 if (!mcounter->fc[per_qp_type]) {
1431 mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev,
1432 false);
1433 if (IS_ERR(mcounter->fc[per_qp_type]))
1434 return PTR_ERR(mcounter->fc[per_qp_type]);
1435 }
1436
1437 per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new);
1438 if (!per_qp_opfc) {
1439 err = -ENOMEM;
1440 goto free_fc;
1441 }
1442 err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio,
1443 per_qp_type, qp->qp_num, port);
1444 if (err)
1445 goto del_rules;
1446 }
1447
1448 return 0;
1449
1450 del_rules:
1451 mlx5r_fs_unbind_op_fc(qp, counter);
1452 if (new)
1453 kfree(per_qp_opfc);
1454 free_fc:
1455 if (xa_empty(&mcounter->qpn_opfc_xa))
1456 mlx5r_fs_destroy_fcs(dev, counter);
1457 return err;
1458 }
1459
set_underlay_qp(struct mlx5_ib_dev * dev,struct mlx5_flow_spec * spec,u32 underlay_qpn)1460 static void set_underlay_qp(struct mlx5_ib_dev *dev,
1461 struct mlx5_flow_spec *spec,
1462 u32 underlay_qpn)
1463 {
1464 void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
1465 spec->match_criteria,
1466 misc_parameters);
1467 void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1468 misc_parameters);
1469
1470 if (underlay_qpn &&
1471 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1472 ft_field_support.bth_dst_qp)) {
1473 MLX5_SET(fte_match_set_misc,
1474 misc_params_v, bth_dst_qp, underlay_qpn);
1475 MLX5_SET(fte_match_set_misc,
1476 misc_params_c, bth_dst_qp, 0xffffff);
1477 }
1478 }
1479
mlx5_ib_set_rule_source_port(struct mlx5_ib_dev * dev,struct mlx5_flow_spec * spec,struct mlx5_eswitch_rep * rep)1480 static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1481 struct mlx5_flow_spec *spec,
1482 struct mlx5_eswitch_rep *rep)
1483 {
1484 struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1485 void *misc;
1486
1487 if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1488 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1489 misc_parameters_2);
1490
1491 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1492 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1493 rep->vport));
1494 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1495 misc_parameters_2);
1496
1497 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1498 mlx5_eswitch_get_vport_metadata_mask());
1499 } else {
1500 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1501 misc_parameters);
1502
1503 MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1504
1505 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1506 misc_parameters);
1507
1508 MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1509 }
1510 }
1511
_create_flow_rule(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * ft_prio,const struct ib_flow_attr * flow_attr,struct mlx5_flow_destination * dst,u32 underlay_qpn,struct mlx5_ib_create_flow * ucmd)1512 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1513 struct mlx5_ib_flow_prio *ft_prio,
1514 const struct ib_flow_attr *flow_attr,
1515 struct mlx5_flow_destination *dst,
1516 u32 underlay_qpn,
1517 struct mlx5_ib_create_flow *ucmd)
1518 {
1519 struct mlx5_flow_table *ft = ft_prio->flow_table;
1520 struct mlx5_ib_flow_handler *handler;
1521 struct mlx5_flow_act flow_act = {};
1522 struct mlx5_flow_spec *spec;
1523 struct mlx5_flow_destination dest_arr[2] = {};
1524 struct mlx5_flow_destination *rule_dst = dest_arr;
1525 const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1526 unsigned int spec_index;
1527 u32 prev_type = 0;
1528 int err = 0;
1529 int dest_num = 0;
1530 bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1531
1532 if (!is_valid_attr(dev->mdev, flow_attr))
1533 return ERR_PTR(-EINVAL);
1534
1535 if (dev->is_rep && is_egress)
1536 return ERR_PTR(-EINVAL);
1537
1538 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1539 handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1540 if (!handler || !spec) {
1541 err = -ENOMEM;
1542 goto free;
1543 }
1544
1545 INIT_LIST_HEAD(&handler->list);
1546
1547 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1548 err = parse_flow_attr(dev->mdev, spec,
1549 ib_flow, flow_attr, &flow_act,
1550 prev_type);
1551 if (err < 0)
1552 goto free;
1553
1554 prev_type = ((union ib_flow_spec *)ib_flow)->type;
1555 ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1556 }
1557
1558 if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1559 memcpy(&dest_arr[0], dst, sizeof(*dst));
1560 dest_num++;
1561 }
1562
1563 if (!flow_is_multicast_only(flow_attr))
1564 set_underlay_qp(dev, spec, underlay_qpn);
1565
1566 if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1567 struct mlx5_eswitch_rep *rep;
1568
1569 rep = dev->port[flow_attr->port - 1].rep;
1570 if (!rep) {
1571 err = -EINVAL;
1572 goto free;
1573 }
1574
1575 mlx5_ib_set_rule_source_port(dev, spec, rep);
1576 }
1577
1578 spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1579
1580 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1581 struct mlx5_ib_mcounters *mcounters;
1582
1583 err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1584 if (err)
1585 goto free;
1586
1587 mcounters = to_mcounters(flow_act.counters);
1588 handler->ibcounters = flow_act.counters;
1589 dest_arr[dest_num].type =
1590 MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1591 dest_arr[dest_num].counter =
1592 mcounters->hw_cntrs_hndl;
1593 dest_num++;
1594 }
1595
1596 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1597 if (!dest_num)
1598 rule_dst = NULL;
1599 } else {
1600 if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1601 flow_act.action |=
1602 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1603 if (is_egress)
1604 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1605 else if (dest_num)
1606 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1607 }
1608
1609 if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
1610 (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1611 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1612 mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1613 spec->flow_context.flow_tag, flow_attr->type);
1614 err = -EINVAL;
1615 goto free;
1616 }
1617 handler->rule = mlx5_add_flow_rules(ft, spec,
1618 &flow_act,
1619 rule_dst, dest_num);
1620
1621 if (IS_ERR(handler->rule)) {
1622 err = PTR_ERR(handler->rule);
1623 goto free;
1624 }
1625
1626 ft_prio->refcount++;
1627 handler->prio = ft_prio;
1628 handler->dev = dev;
1629
1630 ft_prio->flow_table = ft;
1631 free:
1632 if (err && handler) {
1633 mlx5_ib_counters_clear_description(handler->ibcounters);
1634 kfree(handler);
1635 }
1636 kvfree(spec);
1637 return err ? ERR_PTR(err) : handler;
1638 }
1639
create_flow_rule(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * ft_prio,const struct ib_flow_attr * flow_attr,struct mlx5_flow_destination * dst)1640 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1641 struct mlx5_ib_flow_prio *ft_prio,
1642 const struct ib_flow_attr *flow_attr,
1643 struct mlx5_flow_destination *dst)
1644 {
1645 return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1646 }
1647
1648 enum {
1649 LEFTOVERS_MC,
1650 LEFTOVERS_UC,
1651 };
1652
create_leftovers_rule(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * ft_prio,struct ib_flow_attr * flow_attr,struct mlx5_flow_destination * dst)1653 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1654 struct mlx5_ib_flow_prio *ft_prio,
1655 struct ib_flow_attr *flow_attr,
1656 struct mlx5_flow_destination *dst)
1657 {
1658 struct mlx5_ib_flow_handler *handler_ucast = NULL;
1659 struct mlx5_ib_flow_handler *handler = NULL;
1660
1661 static struct {
1662 struct ib_flow_attr flow_attr;
1663 struct ib_flow_spec_eth eth_flow;
1664 } leftovers_specs[] = {
1665 [LEFTOVERS_MC] = {
1666 .flow_attr = {
1667 .num_of_specs = 1,
1668 .size = sizeof(leftovers_specs[0])
1669 },
1670 .eth_flow = {
1671 .type = IB_FLOW_SPEC_ETH,
1672 .size = sizeof(struct ib_flow_spec_eth),
1673 .mask = {.dst_mac = {0x1} },
1674 .val = {.dst_mac = {0x1} }
1675 }
1676 },
1677 [LEFTOVERS_UC] = {
1678 .flow_attr = {
1679 .num_of_specs = 1,
1680 .size = sizeof(leftovers_specs[0])
1681 },
1682 .eth_flow = {
1683 .type = IB_FLOW_SPEC_ETH,
1684 .size = sizeof(struct ib_flow_spec_eth),
1685 .mask = {.dst_mac = {0x1} },
1686 .val = {.dst_mac = {} }
1687 }
1688 }
1689 };
1690
1691 handler = create_flow_rule(dev, ft_prio,
1692 &leftovers_specs[LEFTOVERS_MC].flow_attr,
1693 dst);
1694 if (!IS_ERR(handler) &&
1695 flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1696 handler_ucast = create_flow_rule(dev, ft_prio,
1697 &leftovers_specs[LEFTOVERS_UC].flow_attr,
1698 dst);
1699 if (IS_ERR(handler_ucast)) {
1700 mlx5_del_flow_rules(handler->rule);
1701 ft_prio->refcount--;
1702 kfree(handler);
1703 handler = handler_ucast;
1704 } else {
1705 list_add(&handler_ucast->list, &handler->list);
1706 }
1707 }
1708
1709 return handler;
1710 }
1711
create_sniffer_rule(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * ft_rx,struct mlx5_ib_flow_prio * ft_tx,struct mlx5_flow_destination * dst)1712 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1713 struct mlx5_ib_flow_prio *ft_rx,
1714 struct mlx5_ib_flow_prio *ft_tx,
1715 struct mlx5_flow_destination *dst)
1716 {
1717 struct mlx5_ib_flow_handler *handler_rx;
1718 struct mlx5_ib_flow_handler *handler_tx;
1719 int err;
1720 static const struct ib_flow_attr flow_attr = {
1721 .num_of_specs = 0,
1722 .type = IB_FLOW_ATTR_SNIFFER,
1723 .size = sizeof(flow_attr)
1724 };
1725
1726 handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1727 if (IS_ERR(handler_rx)) {
1728 err = PTR_ERR(handler_rx);
1729 goto err;
1730 }
1731
1732 handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1733 if (IS_ERR(handler_tx)) {
1734 err = PTR_ERR(handler_tx);
1735 goto err_tx;
1736 }
1737
1738 list_add(&handler_tx->list, &handler_rx->list);
1739
1740 return handler_rx;
1741
1742 err_tx:
1743 mlx5_del_flow_rules(handler_rx->rule);
1744 ft_rx->refcount--;
1745 kfree(handler_rx);
1746 err:
1747 return ERR_PTR(err);
1748 }
1749
mlx5_ib_create_flow(struct ib_qp * qp,struct ib_flow_attr * flow_attr,struct ib_udata * udata)1750 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1751 struct ib_flow_attr *flow_attr,
1752 struct ib_udata *udata)
1753 {
1754 struct mlx5_ib_dev *dev = to_mdev(qp->device);
1755 struct mlx5_ib_qp *mqp = to_mqp(qp);
1756 struct mlx5_ib_flow_handler *handler = NULL;
1757 struct mlx5_flow_destination *dst = NULL;
1758 struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1759 struct mlx5_ib_flow_prio *ft_prio;
1760 bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1761 struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1762 size_t min_ucmd_sz, required_ucmd_sz;
1763 int err;
1764 int underlay_qpn;
1765
1766 if (udata && udata->inlen) {
1767 min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1768 if (udata->inlen < min_ucmd_sz)
1769 return ERR_PTR(-EOPNOTSUPP);
1770
1771 err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1772 if (err)
1773 return ERR_PTR(err);
1774
1775 /* currently supports only one counters data */
1776 if (ucmd_hdr.ncounters_data > 1)
1777 return ERR_PTR(-EINVAL);
1778
1779 required_ucmd_sz = min_ucmd_sz +
1780 sizeof(struct mlx5_ib_flow_counters_data) *
1781 ucmd_hdr.ncounters_data;
1782 if (udata->inlen > required_ucmd_sz &&
1783 !ib_is_udata_cleared(udata, required_ucmd_sz,
1784 udata->inlen - required_ucmd_sz))
1785 return ERR_PTR(-EOPNOTSUPP);
1786
1787 ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1788 if (!ucmd)
1789 return ERR_PTR(-ENOMEM);
1790
1791 err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1792 if (err)
1793 goto free_ucmd;
1794 }
1795
1796 if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1797 err = -ENOMEM;
1798 goto free_ucmd;
1799 }
1800
1801 if (flow_attr->flags &
1802 ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1803 err = -EINVAL;
1804 goto free_ucmd;
1805 }
1806
1807 if (is_egress &&
1808 (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1809 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1810 err = -EINVAL;
1811 goto free_ucmd;
1812 }
1813
1814 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1815 if (!dst) {
1816 err = -ENOMEM;
1817 goto free_ucmd;
1818 }
1819
1820 mutex_lock(&dev->flow_db->lock);
1821
1822 ft_prio = get_flow_table(dev, flow_attr,
1823 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1824 if (IS_ERR(ft_prio)) {
1825 err = PTR_ERR(ft_prio);
1826 goto unlock;
1827 }
1828 if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1829 ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1830 if (IS_ERR(ft_prio_tx)) {
1831 err = PTR_ERR(ft_prio_tx);
1832 ft_prio_tx = NULL;
1833 goto destroy_ft;
1834 }
1835 }
1836
1837 if (is_egress) {
1838 dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1839 } else {
1840 dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1841 if (mqp->is_rss)
1842 dst->tir_num = mqp->rss_qp.tirn;
1843 else
1844 dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1845 }
1846
1847 switch (flow_attr->type) {
1848 case IB_FLOW_ATTR_NORMAL:
1849 underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1850 mqp->underlay_qpn :
1851 0;
1852 handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1853 underlay_qpn, ucmd);
1854 break;
1855 case IB_FLOW_ATTR_ALL_DEFAULT:
1856 case IB_FLOW_ATTR_MC_DEFAULT:
1857 handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1858 break;
1859 case IB_FLOW_ATTR_SNIFFER:
1860 handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1861 break;
1862 default:
1863 err = -EINVAL;
1864 goto destroy_ft;
1865 }
1866
1867 if (IS_ERR(handler)) {
1868 err = PTR_ERR(handler);
1869 handler = NULL;
1870 goto destroy_ft;
1871 }
1872
1873 mutex_unlock(&dev->flow_db->lock);
1874 kfree(dst);
1875 kfree(ucmd);
1876
1877 return &handler->ibflow;
1878
1879 destroy_ft:
1880 put_flow_table(dev, ft_prio, false);
1881 if (ft_prio_tx)
1882 put_flow_table(dev, ft_prio_tx, false);
1883 unlock:
1884 mutex_unlock(&dev->flow_db->lock);
1885 kfree(dst);
1886 free_ucmd:
1887 kfree(ucmd);
1888 return ERR_PTR(err);
1889 }
1890
mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev * dev,enum mlx5_flow_namespace_type type,u32 * flags,u16 * vport_idx,u16 * vport,struct mlx5_core_dev ** ft_mdev,u32 ib_port)1891 static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
1892 enum mlx5_flow_namespace_type type,
1893 u32 *flags, u16 *vport_idx,
1894 u16 *vport,
1895 struct mlx5_core_dev **ft_mdev,
1896 u32 ib_port)
1897 {
1898 struct mlx5_core_dev *esw_mdev;
1899
1900 if (!is_mdev_switchdev_mode(dev->mdev))
1901 return 0;
1902
1903 if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
1904 return -EOPNOTSUPP;
1905
1906 if (!dev->port[ib_port - 1].rep)
1907 return -EINVAL;
1908
1909 esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
1910 if (esw_mdev != dev->mdev)
1911 return -EOPNOTSUPP;
1912
1913 *flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
1914 *ft_mdev = esw_mdev;
1915 *vport = dev->port[ib_port - 1].rep->vport;
1916 *vport_idx = dev->port[ib_port - 1].rep->vport_index;
1917
1918 return 0;
1919 }
1920
1921 static struct mlx5_ib_flow_prio *
_get_flow_table(struct mlx5_ib_dev * dev,u16 user_priority,enum mlx5_flow_namespace_type ns_type,bool mcast,u32 ib_port)1922 _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
1923 enum mlx5_flow_namespace_type ns_type,
1924 bool mcast, u32 ib_port)
1925 {
1926 struct mlx5_core_dev *ft_mdev = dev->mdev;
1927 struct mlx5_flow_namespace *ns = NULL;
1928 struct mlx5_ib_flow_prio *prio = NULL;
1929 int max_table_size = 0;
1930 u16 vport_idx = 0;
1931 bool esw_encap;
1932 u32 flags = 0;
1933 u16 vport = 0;
1934 int priority;
1935 int ret;
1936
1937 if (mcast)
1938 priority = MLX5_IB_FLOW_MCAST_PRIO;
1939 else
1940 priority = ib_prio_to_core_prio(user_priority, false);
1941
1942 esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1943 DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1944 switch (ns_type) {
1945 case MLX5_FLOW_NAMESPACE_BYPASS:
1946 max_table_size = BIT(
1947 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1948 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1949 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1950 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1951 reformat_l3_tunnel_to_l2) &&
1952 !esw_encap)
1953 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1954 break;
1955 case MLX5_FLOW_NAMESPACE_EGRESS:
1956 max_table_size = BIT(
1957 MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1958 if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1959 !esw_encap)
1960 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1961 break;
1962 case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1963 max_table_size = BIT(
1964 MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1965 if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1966 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1967 if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1968 reformat_l3_tunnel_to_l2) &&
1969 esw_encap)
1970 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1971 priority = user_priority;
1972 break;
1973 case MLX5_FLOW_NAMESPACE_RDMA_RX:
1974 max_table_size = BIT(
1975 MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1976 priority = user_priority;
1977 break;
1978 case MLX5_FLOW_NAMESPACE_RDMA_TX:
1979 max_table_size = BIT(
1980 MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1981 priority = user_priority;
1982 break;
1983 case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
1984 case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
1985 if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
1986 return ERR_PTR(-EINVAL);
1987 ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
1988 &vport_idx, &vport,
1989 &ft_mdev, ib_port);
1990 if (ret)
1991 return ERR_PTR(ret);
1992
1993 if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
1994 max_table_size =
1995 BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
1996 ft_mdev, log_max_ft_size));
1997 else
1998 max_table_size =
1999 BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
2000 ft_mdev, log_max_ft_size));
2001 priority = user_priority;
2002 break;
2003 default:
2004 break;
2005 }
2006
2007 max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
2008
2009 if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
2010 ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
2011 ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
2012 else
2013 ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
2014
2015 if (!ns)
2016 return ERR_PTR(-EOPNOTSUPP);
2017
2018 switch (ns_type) {
2019 case MLX5_FLOW_NAMESPACE_BYPASS:
2020 prio = &dev->flow_db->prios[priority];
2021 break;
2022 case MLX5_FLOW_NAMESPACE_EGRESS:
2023 prio = &dev->flow_db->egress_prios[priority];
2024 break;
2025 case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
2026 prio = &dev->flow_db->fdb[priority];
2027 break;
2028 case MLX5_FLOW_NAMESPACE_RDMA_RX:
2029 prio = &dev->flow_db->rdma_rx[priority];
2030 break;
2031 case MLX5_FLOW_NAMESPACE_RDMA_TX:
2032 prio = &dev->flow_db->rdma_tx[priority];
2033 break;
2034 case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
2035 prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
2036 break;
2037 case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
2038 prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
2039 break;
2040 default: return ERR_PTR(-EINVAL);
2041 }
2042
2043 if (!prio)
2044 return ERR_PTR(-EINVAL);
2045
2046 if (prio->flow_table)
2047 return prio;
2048
2049 return _get_prio(dev, ns, prio, priority, max_table_size,
2050 MLX5_FS_MAX_TYPES, flags, vport);
2051 }
2052
2053 static struct mlx5_ib_flow_handler *
_create_raw_flow_rule(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * ft_prio,struct mlx5_flow_destination * dst,struct mlx5_ib_flow_matcher * fs_matcher,struct mlx5_flow_context * flow_context,struct mlx5_flow_act * flow_act,void * cmd_in,int inlen,int dst_num)2054 _create_raw_flow_rule(struct mlx5_ib_dev *dev,
2055 struct mlx5_ib_flow_prio *ft_prio,
2056 struct mlx5_flow_destination *dst,
2057 struct mlx5_ib_flow_matcher *fs_matcher,
2058 struct mlx5_flow_context *flow_context,
2059 struct mlx5_flow_act *flow_act,
2060 void *cmd_in, int inlen,
2061 int dst_num)
2062 {
2063 struct mlx5_ib_flow_handler *handler;
2064 struct mlx5_flow_spec *spec;
2065 struct mlx5_flow_table *ft = ft_prio->flow_table;
2066 int err = 0;
2067
2068 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
2069 handler = kzalloc(sizeof(*handler), GFP_KERNEL);
2070 if (!handler || !spec) {
2071 err = -ENOMEM;
2072 goto free;
2073 }
2074
2075 INIT_LIST_HEAD(&handler->list);
2076
2077 memcpy(spec->match_value, cmd_in, inlen);
2078 memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
2079 fs_matcher->mask_len);
2080 spec->match_criteria_enable = fs_matcher->match_criteria_enable;
2081 spec->flow_context = *flow_context;
2082
2083 handler->rule = mlx5_add_flow_rules(ft, spec,
2084 flow_act, dst, dst_num);
2085
2086 if (IS_ERR(handler->rule)) {
2087 err = PTR_ERR(handler->rule);
2088 goto free;
2089 }
2090
2091 ft_prio->refcount++;
2092 handler->prio = ft_prio;
2093 handler->dev = dev;
2094 ft_prio->flow_table = ft;
2095
2096 free:
2097 if (err)
2098 kfree(handler);
2099 kvfree(spec);
2100 return err ? ERR_PTR(err) : handler;
2101 }
2102
raw_fs_is_multicast(struct mlx5_ib_flow_matcher * fs_matcher,void * match_v)2103 static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
2104 void *match_v)
2105 {
2106 void *match_c;
2107 void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
2108 void *dmac, *dmac_mask;
2109 void *ipv4, *ipv4_mask;
2110
2111 if (!(fs_matcher->match_criteria_enable &
2112 (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
2113 return false;
2114
2115 match_c = fs_matcher->matcher_mask.match_params;
2116 match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
2117 outer_headers);
2118 match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
2119 outer_headers);
2120
2121 dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
2122 dmac_47_16);
2123 dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
2124 dmac_47_16);
2125
2126 if (is_multicast_ether_addr(dmac) &&
2127 is_multicast_ether_addr(dmac_mask))
2128 return true;
2129
2130 ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
2131 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
2132
2133 ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
2134 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
2135
2136 if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
2137 ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
2138 return true;
2139
2140 return false;
2141 }
2142
raw_fs_rule_add(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_matcher * fs_matcher,struct mlx5_flow_context * flow_context,struct mlx5_flow_act * flow_act,struct mlx5_fc * counter,void * cmd_in,int inlen,int dest_id,int dest_type)2143 static struct mlx5_ib_flow_handler *raw_fs_rule_add(
2144 struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
2145 struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
2146 struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type)
2147 {
2148 struct mlx5_flow_destination *dst;
2149 struct mlx5_ib_flow_prio *ft_prio;
2150 struct mlx5_ib_flow_handler *handler;
2151 int dst_num = 0;
2152 bool mcast;
2153 int err;
2154
2155 if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
2156 return ERR_PTR(-EOPNOTSUPP);
2157
2158 if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
2159 return ERR_PTR(-ENOMEM);
2160
2161 dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
2162 if (!dst)
2163 return ERR_PTR(-ENOMEM);
2164
2165 mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
2166 mutex_lock(&dev->flow_db->lock);
2167
2168 ft_prio = _get_flow_table(dev, fs_matcher->priority,
2169 fs_matcher->ns_type, mcast,
2170 fs_matcher->ib_port);
2171 if (IS_ERR(ft_prio)) {
2172 err = PTR_ERR(ft_prio);
2173 goto unlock;
2174 }
2175
2176 switch (dest_type) {
2177 case MLX5_FLOW_DESTINATION_TYPE_TIR:
2178 dst[dst_num].type = dest_type;
2179 dst[dst_num++].tir_num = dest_id;
2180 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2181 break;
2182 case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
2183 dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
2184 dst[dst_num++].ft_num = dest_id;
2185 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2186 break;
2187 case MLX5_FLOW_DESTINATION_TYPE_PORT:
2188 dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
2189 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
2190 break;
2191 default:
2192 break;
2193 }
2194
2195 if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
2196 if (WARN_ON(!counter)) {
2197 err = -EINVAL;
2198 goto unlock;
2199 }
2200 dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
2201 dst[dst_num].counter = counter;
2202 dst_num++;
2203 }
2204
2205 handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
2206 fs_matcher, flow_context, flow_act,
2207 cmd_in, inlen, dst_num);
2208
2209 if (IS_ERR(handler)) {
2210 err = PTR_ERR(handler);
2211 goto destroy_ft;
2212 }
2213
2214 mutex_unlock(&dev->flow_db->lock);
2215 atomic_inc(&fs_matcher->usecnt);
2216 handler->flow_matcher = fs_matcher;
2217
2218 kfree(dst);
2219
2220 return handler;
2221
2222 destroy_ft:
2223 put_flow_table(dev, ft_prio, false);
2224 unlock:
2225 mutex_unlock(&dev->flow_db->lock);
2226 kfree(dst);
2227
2228 return ERR_PTR(err);
2229 }
2230
destroy_flow_action_raw(struct mlx5_ib_flow_action * maction)2231 static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
2232 {
2233 switch (maction->flow_action_raw.sub_type) {
2234 case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
2235 mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
2236 maction->flow_action_raw.modify_hdr);
2237 break;
2238 case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
2239 mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
2240 maction->flow_action_raw.pkt_reformat);
2241 break;
2242 case MLX5_IB_FLOW_ACTION_DECAP:
2243 break;
2244 default:
2245 break;
2246 }
2247 }
2248
mlx5_ib_destroy_flow_action(struct ib_flow_action * action)2249 static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
2250 {
2251 struct mlx5_ib_flow_action *maction = to_mflow_act(action);
2252
2253 switch (action->type) {
2254 case IB_FLOW_ACTION_UNSPECIFIED:
2255 destroy_flow_action_raw(maction);
2256 break;
2257 default:
2258 WARN_ON(true);
2259 break;
2260 }
2261
2262 kfree(maction);
2263 return 0;
2264 }
2265
2266 static int
mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,enum mlx5_flow_namespace_type * namespace)2267 mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
2268 enum mlx5_flow_namespace_type *namespace)
2269 {
2270 switch (table_type) {
2271 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
2272 *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
2273 break;
2274 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
2275 *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
2276 break;
2277 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
2278 *namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
2279 break;
2280 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
2281 *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
2282 break;
2283 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
2284 *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
2285 break;
2286 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
2287 *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
2288 break;
2289 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
2290 *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
2291 break;
2292 default:
2293 return -EINVAL;
2294 }
2295
2296 return 0;
2297 }
2298
2299 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
2300 [MLX5_IB_FLOW_TYPE_NORMAL] = {
2301 .type = UVERBS_ATTR_TYPE_PTR_IN,
2302 .u.ptr = {
2303 .len = sizeof(u16), /* data is priority */
2304 .min_len = sizeof(u16),
2305 }
2306 },
2307 [MLX5_IB_FLOW_TYPE_SNIFFER] = {
2308 .type = UVERBS_ATTR_TYPE_PTR_IN,
2309 UVERBS_ATTR_NO_DATA(),
2310 },
2311 [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
2312 .type = UVERBS_ATTR_TYPE_PTR_IN,
2313 UVERBS_ATTR_NO_DATA(),
2314 },
2315 [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
2316 .type = UVERBS_ATTR_TYPE_PTR_IN,
2317 UVERBS_ATTR_NO_DATA(),
2318 },
2319 };
2320
is_flow_dest(void * obj,int * dest_id,int * dest_type)2321 static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
2322 {
2323 struct devx_obj *devx_obj = obj;
2324 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
2325
2326 switch (opcode) {
2327 case MLX5_CMD_OP_DESTROY_TIR:
2328 *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
2329 *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
2330 obj_id);
2331 return true;
2332
2333 case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
2334 *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2335 *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
2336 table_id);
2337 return true;
2338 default:
2339 return false;
2340 }
2341 }
2342
get_dests(struct uverbs_attr_bundle * attrs,struct mlx5_ib_flow_matcher * fs_matcher,int * dest_id,int * dest_type,struct ib_qp ** qp,u32 * flags)2343 static int get_dests(struct uverbs_attr_bundle *attrs,
2344 struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
2345 int *dest_type, struct ib_qp **qp, u32 *flags)
2346 {
2347 bool dest_devx, dest_qp;
2348 void *devx_obj;
2349 int err;
2350
2351 dest_devx = uverbs_attr_is_valid(attrs,
2352 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
2353 dest_qp = uverbs_attr_is_valid(attrs,
2354 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
2355
2356 *flags = 0;
2357 err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2358 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
2359 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
2360 if (err)
2361 return err;
2362
2363 /* Both flags are not allowed */
2364 if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
2365 *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
2366 return -EINVAL;
2367
2368 if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
2369 if (dest_devx && (dest_qp || *flags))
2370 return -EINVAL;
2371 else if (dest_qp && *flags)
2372 return -EINVAL;
2373 }
2374
2375 /* Allow only DEVX object, drop as dest for FDB */
2376 if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2377 !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
2378 return -EINVAL;
2379
2380 /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
2381 if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
2382 fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
2383 ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
2384 return -EINVAL;
2385
2386 *qp = NULL;
2387 if (dest_devx) {
2388 devx_obj =
2389 uverbs_attr_get_obj(attrs,
2390 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
2391
2392 /* Verify that the given DEVX object is a flow
2393 * steering destination.
2394 */
2395 if (!is_flow_dest(devx_obj, dest_id, dest_type))
2396 return -EINVAL;
2397 /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
2398 if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
2399 fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
2400 fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
2401 *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
2402 return -EINVAL;
2403 } else if (dest_qp) {
2404 struct mlx5_ib_qp *mqp;
2405
2406 *qp = uverbs_attr_get_obj(attrs,
2407 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
2408 if (IS_ERR(*qp))
2409 return PTR_ERR(*qp);
2410
2411 if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
2412 return -EINVAL;
2413
2414 mqp = to_mqp(*qp);
2415 if (mqp->is_rss)
2416 *dest_id = mqp->rss_qp.tirn;
2417 else
2418 *dest_id = mqp->raw_packet_qp.rq.tirn;
2419 *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
2420 } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
2421 fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
2422 fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
2423 !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
2424 *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
2425 }
2426
2427 if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
2428 (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
2429 fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
2430 fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
2431 return -EINVAL;
2432
2433 return 0;
2434 }
2435
2436 static bool
is_flow_counter(void * obj,u32 offset,u32 * counter_id,u32 * fc_bulk_size)2437 is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size)
2438 {
2439 struct devx_obj *devx_obj = obj;
2440 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
2441
2442 if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
2443
2444 if (offset && offset >= devx_obj->flow_counter_bulk_size)
2445 return false;
2446
2447 *fc_bulk_size = devx_obj->flow_counter_bulk_size;
2448 *counter_id = MLX5_GET(dealloc_flow_counter_in,
2449 devx_obj->dinbox,
2450 flow_counter_id);
2451 *counter_id += offset;
2452 return true;
2453 }
2454
2455 return false;
2456 }
2457
2458 #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)2459 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
2460 struct uverbs_attr_bundle *attrs)
2461 {
2462 struct mlx5_flow_context flow_context = {.flow_tag =
2463 MLX5_FS_DEFAULT_FLOW_TAG};
2464 int dest_id, dest_type = -1, inlen, len, ret, i;
2465 struct mlx5_ib_flow_handler *flow_handler;
2466 struct mlx5_ib_flow_matcher *fs_matcher;
2467 struct ib_uobject **arr_flow_actions;
2468 struct ib_uflow_resources *uflow_res;
2469 struct mlx5_flow_act flow_act = {};
2470 struct mlx5_fc *counter = NULL;
2471 struct ib_qp *qp = NULL;
2472 void *devx_obj, *cmd_in;
2473 struct ib_uobject *uobj;
2474 struct mlx5_ib_dev *dev;
2475 u32 flags;
2476
2477 if (!capable(CAP_NET_RAW))
2478 return -EPERM;
2479
2480 fs_matcher = uverbs_attr_get_obj(attrs,
2481 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
2482 uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
2483 dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2484
2485 if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
2486 return -EINVAL;
2487
2488 if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
2489 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
2490
2491 if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
2492 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
2493
2494 len = uverbs_attr_get_uobjs_arr(attrs,
2495 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
2496 if (len) {
2497 u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0;
2498 devx_obj = arr_flow_actions[0]->object;
2499
2500 if (uverbs_attr_is_valid(attrs,
2501 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
2502
2503 int num_offsets = uverbs_attr_ptr_get_array_size(
2504 attrs,
2505 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2506 sizeof(u32));
2507
2508 if (num_offsets != 1)
2509 return -EINVAL;
2510
2511 offset_attr = uverbs_attr_get_alloced_ptr(
2512 attrs,
2513 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
2514 offset = *offset_attr;
2515 }
2516
2517 if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size))
2518 return -EINVAL;
2519 counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size);
2520 if (IS_ERR(counter))
2521 return PTR_ERR(counter);
2522
2523 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2524 }
2525
2526 cmd_in = uverbs_attr_get_alloced_ptr(
2527 attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
2528 inlen = uverbs_attr_get_len(attrs,
2529 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
2530
2531 uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
2532 if (!uflow_res) {
2533 ret = -ENOMEM;
2534 goto destroy_counter;
2535 }
2536
2537 len = uverbs_attr_get_uobjs_arr(attrs,
2538 MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
2539 for (i = 0; i < len; i++) {
2540 struct mlx5_ib_flow_action *maction =
2541 to_mflow_act(arr_flow_actions[i]->object);
2542
2543 ret = parse_flow_flow_action(maction, false, &flow_act);
2544 if (ret)
2545 goto err_out;
2546 flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
2547 arr_flow_actions[i]->object);
2548 }
2549
2550 ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
2551 MLX5_IB_ATTR_CREATE_FLOW_TAG);
2552 if (!ret) {
2553 if (flow_context.flow_tag >= BIT(24)) {
2554 ret = -EINVAL;
2555 goto err_out;
2556 }
2557 flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
2558 }
2559
2560 flow_handler =
2561 raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
2562 counter, cmd_in, inlen, dest_id, dest_type);
2563 if (IS_ERR(flow_handler)) {
2564 ret = PTR_ERR(flow_handler);
2565 goto err_out;
2566 }
2567
2568 ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2569
2570 return 0;
2571 err_out:
2572 ib_uverbs_flow_resources_free(uflow_res);
2573 destroy_counter:
2574 if (counter)
2575 mlx5_fc_local_destroy(counter);
2576 return ret;
2577 }
2578
flow_matcher_cleanup(struct ib_uobject * uobject,enum rdma_remove_reason why,struct uverbs_attr_bundle * attrs)2579 static int flow_matcher_cleanup(struct ib_uobject *uobject,
2580 enum rdma_remove_reason why,
2581 struct uverbs_attr_bundle *attrs)
2582 {
2583 struct mlx5_ib_flow_matcher *obj = uobject->object;
2584
2585 if (atomic_read(&obj->usecnt))
2586 return -EBUSY;
2587
2588 kfree(obj);
2589 return 0;
2590 }
2591
steering_anchor_create_ft(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * ft_prio,enum mlx5_flow_namespace_type ns_type)2592 static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
2593 struct mlx5_ib_flow_prio *ft_prio,
2594 enum mlx5_flow_namespace_type ns_type)
2595 {
2596 struct mlx5_flow_table_attr ft_attr = {};
2597 struct mlx5_flow_namespace *ns;
2598 struct mlx5_flow_table *ft;
2599
2600 if (ft_prio->anchor.ft)
2601 return 0;
2602
2603 ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
2604 if (!ns)
2605 return -EOPNOTSUPP;
2606
2607 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
2608 ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
2609 ft_attr.prio = 0;
2610 ft_attr.max_fte = 2;
2611 ft_attr.level = 1;
2612
2613 ft = mlx5_create_flow_table(ns, &ft_attr);
2614 if (IS_ERR(ft))
2615 return PTR_ERR(ft);
2616
2617 ft_prio->anchor.ft = ft;
2618
2619 return 0;
2620 }
2621
steering_anchor_destroy_ft(struct mlx5_ib_flow_prio * ft_prio)2622 static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
2623 {
2624 if (ft_prio->anchor.ft) {
2625 mlx5_destroy_flow_table(ft_prio->anchor.ft);
2626 ft_prio->anchor.ft = NULL;
2627 }
2628 }
2629
2630 static int
steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio * ft_prio)2631 steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2632 {
2633 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2634 struct mlx5_flow_group *fg;
2635 void *flow_group_in;
2636 int err = 0;
2637
2638 if (ft_prio->anchor.fg_drop)
2639 return 0;
2640
2641 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2642 if (!flow_group_in)
2643 return -ENOMEM;
2644
2645 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
2646 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
2647
2648 fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2649 if (IS_ERR(fg)) {
2650 err = PTR_ERR(fg);
2651 goto out;
2652 }
2653
2654 ft_prio->anchor.fg_drop = fg;
2655
2656 out:
2657 kvfree(flow_group_in);
2658
2659 return err;
2660 }
2661
2662 static void
steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio * ft_prio)2663 steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2664 {
2665 if (ft_prio->anchor.fg_drop) {
2666 mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
2667 ft_prio->anchor.fg_drop = NULL;
2668 }
2669 }
2670
2671 static int
steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio * ft_prio)2672 steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2673 {
2674 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2675 struct mlx5_flow_group *fg;
2676 void *flow_group_in;
2677 int err = 0;
2678
2679 if (ft_prio->anchor.fg_goto_table)
2680 return 0;
2681
2682 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2683 if (!flow_group_in)
2684 return -ENOMEM;
2685
2686 fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2687 if (IS_ERR(fg)) {
2688 err = PTR_ERR(fg);
2689 goto out;
2690 }
2691 ft_prio->anchor.fg_goto_table = fg;
2692
2693 out:
2694 kvfree(flow_group_in);
2695
2696 return err;
2697 }
2698
2699 static void
steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio * ft_prio)2700 steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2701 {
2702 if (ft_prio->anchor.fg_goto_table) {
2703 mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
2704 ft_prio->anchor.fg_goto_table = NULL;
2705 }
2706 }
2707
2708 static int
steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio * ft_prio)2709 steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2710 {
2711 struct mlx5_flow_act flow_act = {};
2712 struct mlx5_flow_handle *handle;
2713
2714 if (ft_prio->anchor.rule_drop)
2715 return 0;
2716
2717 flow_act.fg = ft_prio->anchor.fg_drop;
2718 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2719
2720 handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2721 NULL, 0);
2722 if (IS_ERR(handle))
2723 return PTR_ERR(handle);
2724
2725 ft_prio->anchor.rule_drop = handle;
2726
2727 return 0;
2728 }
2729
steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio * ft_prio)2730 static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2731 {
2732 if (ft_prio->anchor.rule_drop) {
2733 mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
2734 ft_prio->anchor.rule_drop = NULL;
2735 }
2736 }
2737
2738 static int
steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio * ft_prio)2739 steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2740 {
2741 struct mlx5_flow_destination dest = {};
2742 struct mlx5_flow_act flow_act = {};
2743 struct mlx5_flow_handle *handle;
2744
2745 if (ft_prio->anchor.rule_goto_table)
2746 return 0;
2747
2748 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2749 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
2750 flow_act.fg = ft_prio->anchor.fg_goto_table;
2751
2752 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2753 dest.ft = ft_prio->flow_table;
2754
2755 handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2756 &dest, 1);
2757 if (IS_ERR(handle))
2758 return PTR_ERR(handle);
2759
2760 ft_prio->anchor.rule_goto_table = handle;
2761
2762 return 0;
2763 }
2764
2765 static void
steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio * ft_prio)2766 steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2767 {
2768 if (ft_prio->anchor.rule_goto_table) {
2769 mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
2770 ft_prio->anchor.rule_goto_table = NULL;
2771 }
2772 }
2773
steering_anchor_create_res(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_prio * ft_prio,enum mlx5_flow_namespace_type ns_type)2774 static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
2775 struct mlx5_ib_flow_prio *ft_prio,
2776 enum mlx5_flow_namespace_type ns_type)
2777 {
2778 int err;
2779
2780 err = steering_anchor_create_ft(dev, ft_prio, ns_type);
2781 if (err)
2782 return err;
2783
2784 err = steering_anchor_create_fg_drop(ft_prio);
2785 if (err)
2786 goto destroy_ft;
2787
2788 err = steering_anchor_create_fg_goto_table(ft_prio);
2789 if (err)
2790 goto destroy_fg_drop;
2791
2792 err = steering_anchor_create_rule_drop(ft_prio);
2793 if (err)
2794 goto destroy_fg_goto_table;
2795
2796 err = steering_anchor_create_rule_goto_table(ft_prio);
2797 if (err)
2798 goto destroy_rule_drop;
2799
2800 return 0;
2801
2802 destroy_rule_drop:
2803 steering_anchor_destroy_rule_drop(ft_prio);
2804 destroy_fg_goto_table:
2805 steering_anchor_destroy_fg_goto_table(ft_prio);
2806 destroy_fg_drop:
2807 steering_anchor_destroy_fg_drop(ft_prio);
2808 destroy_ft:
2809 steering_anchor_destroy_ft(ft_prio);
2810
2811 return err;
2812 }
2813
mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio * ft_prio)2814 static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
2815 {
2816 steering_anchor_destroy_rule_goto_table(ft_prio);
2817 steering_anchor_destroy_rule_drop(ft_prio);
2818 steering_anchor_destroy_fg_goto_table(ft_prio);
2819 steering_anchor_destroy_fg_drop(ft_prio);
2820 steering_anchor_destroy_ft(ft_prio);
2821 }
2822
steering_anchor_cleanup(struct ib_uobject * uobject,enum rdma_remove_reason why,struct uverbs_attr_bundle * attrs)2823 static int steering_anchor_cleanup(struct ib_uobject *uobject,
2824 enum rdma_remove_reason why,
2825 struct uverbs_attr_bundle *attrs)
2826 {
2827 struct mlx5_ib_steering_anchor *obj = uobject->object;
2828
2829 if (atomic_read(&obj->usecnt))
2830 return -EBUSY;
2831
2832 mutex_lock(&obj->dev->flow_db->lock);
2833 if (!--obj->ft_prio->anchor.rule_goto_table_ref)
2834 steering_anchor_destroy_rule_goto_table(obj->ft_prio);
2835
2836 put_flow_table(obj->dev, obj->ft_prio, true);
2837 mutex_unlock(&obj->dev->flow_db->lock);
2838
2839 kfree(obj);
2840 return 0;
2841 }
2842
fs_cleanup_anchor(struct mlx5_ib_flow_prio * prio,int count)2843 static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
2844 int count)
2845 {
2846 while (count--)
2847 mlx5_steering_anchor_destroy_res(&prio[count]);
2848 }
2849
mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev * dev)2850 void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
2851 {
2852 fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
2853 fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
2854 fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
2855 fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
2856 fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
2857 fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
2858 fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
2859 }
2860
mlx5_ib_matcher_ns(struct uverbs_attr_bundle * attrs,struct mlx5_ib_flow_matcher * obj)2861 static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2862 struct mlx5_ib_flow_matcher *obj)
2863 {
2864 enum mlx5_ib_uapi_flow_table_type ft_type =
2865 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2866 u32 flags;
2867 int err;
2868
2869 /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2870 * users should switch to it. We leave this to not break userspace
2871 */
2872 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2873 uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2874 return -EINVAL;
2875
2876 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2877 err = uverbs_get_const(&ft_type, attrs,
2878 MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2879 if (err)
2880 return err;
2881
2882 err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2883 if (err)
2884 return err;
2885
2886 return 0;
2887 }
2888
2889 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2890 err = uverbs_get_flags32(&flags, attrs,
2891 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2892 IB_FLOW_ATTR_FLAGS_EGRESS);
2893 if (err)
2894 return err;
2895
2896 if (flags)
2897 return mlx5_ib_ft_type_to_namespace(
2898 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2899 &obj->ns_type);
2900 }
2901
2902 obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2903
2904 return 0;
2905 }
2906
verify_context_caps(struct mlx5_ib_dev * dev,u64 enabled_caps)2907 static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
2908 {
2909 if (is_mdev_switchdev_mode(dev->mdev))
2910 return UCAP_ENABLED(enabled_caps,
2911 RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
2912
2913 return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
2914 }
2915
UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)2916 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2917 struct uverbs_attr_bundle *attrs)
2918 {
2919 struct ib_uobject *uobj = uverbs_attr_get_uobject(
2920 attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2921 struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2922 struct mlx5_ib_flow_matcher *obj;
2923 int err;
2924
2925 obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2926 if (!obj)
2927 return -ENOMEM;
2928
2929 obj->mask_len = uverbs_attr_get_len(
2930 attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2931 err = uverbs_copy_from(&obj->matcher_mask,
2932 attrs,
2933 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2934 if (err)
2935 goto end;
2936
2937 obj->flow_type = uverbs_attr_get_enum_id(
2938 attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2939
2940 if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2941 err = uverbs_copy_from(&obj->priority,
2942 attrs,
2943 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2944 if (err)
2945 goto end;
2946 }
2947
2948 err = uverbs_copy_from(&obj->match_criteria_enable,
2949 attrs,
2950 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2951 if (err)
2952 goto end;
2953
2954 err = mlx5_ib_matcher_ns(attrs, obj);
2955 if (err)
2956 goto end;
2957
2958 if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2959 mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2960 err = -EINVAL;
2961 goto end;
2962 }
2963
2964 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
2965 err = uverbs_copy_from(&obj->ib_port, attrs,
2966 MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
2967 if (err)
2968 goto end;
2969 if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
2970 err = -EINVAL;
2971 goto end;
2972 }
2973 if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
2974 obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
2975 err = -EINVAL;
2976 goto end;
2977 }
2978 if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
2979 err = -EOPNOTSUPP;
2980 goto end;
2981 }
2982 }
2983
2984 uobj->object = obj;
2985 obj->mdev = dev->mdev;
2986 atomic_set(&obj->usecnt, 0);
2987 return 0;
2988
2989 end:
2990 kfree(obj);
2991 return err;
2992 }
2993
UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)2994 static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
2995 struct uverbs_attr_bundle *attrs)
2996 {
2997 struct ib_uobject *uobj = uverbs_attr_get_uobject(
2998 attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
2999 struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
3000 enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
3001 enum mlx5_flow_namespace_type ns_type;
3002 struct mlx5_ib_steering_anchor *obj;
3003 struct mlx5_ib_flow_prio *ft_prio;
3004 u16 priority;
3005 u32 ft_id;
3006 int err;
3007
3008 if (!capable(CAP_NET_RAW))
3009 return -EPERM;
3010
3011 err = uverbs_get_const(&ib_uapi_ft_type, attrs,
3012 MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
3013 if (err)
3014 return err;
3015
3016 err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
3017 if (err)
3018 return err;
3019
3020 err = uverbs_copy_from(&priority, attrs,
3021 MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
3022 if (err)
3023 return err;
3024
3025 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3026 if (!obj)
3027 return -ENOMEM;
3028
3029 mutex_lock(&dev->flow_db->lock);
3030
3031 ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
3032 if (IS_ERR(ft_prio)) {
3033 err = PTR_ERR(ft_prio);
3034 goto free_obj;
3035 }
3036
3037 ft_prio->refcount++;
3038
3039 if (!ft_prio->anchor.rule_goto_table_ref) {
3040 err = steering_anchor_create_res(dev, ft_prio, ns_type);
3041 if (err)
3042 goto put_flow_table;
3043 }
3044
3045 ft_prio->anchor.rule_goto_table_ref++;
3046
3047 ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
3048
3049 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
3050 &ft_id, sizeof(ft_id));
3051 if (err)
3052 goto destroy_res;
3053
3054 mutex_unlock(&dev->flow_db->lock);
3055
3056 uobj->object = obj;
3057 obj->dev = dev;
3058 obj->ft_prio = ft_prio;
3059 atomic_set(&obj->usecnt, 0);
3060
3061 return 0;
3062
3063 destroy_res:
3064 --ft_prio->anchor.rule_goto_table_ref;
3065 mlx5_steering_anchor_destroy_res(ft_prio);
3066 put_flow_table:
3067 put_flow_table(dev, ft_prio, true);
3068 free_obj:
3069 mutex_unlock(&dev->flow_db->lock);
3070 kfree(obj);
3071
3072 return err;
3073 }
3074
3075 static struct ib_flow_action *
mlx5_ib_create_modify_header(struct mlx5_ib_dev * dev,enum mlx5_ib_uapi_flow_table_type ft_type,u8 num_actions,void * in)3076 mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
3077 enum mlx5_ib_uapi_flow_table_type ft_type,
3078 u8 num_actions, void *in)
3079 {
3080 enum mlx5_flow_namespace_type namespace;
3081 struct mlx5_ib_flow_action *maction;
3082 int ret;
3083
3084 ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
3085 if (ret)
3086 return ERR_PTR(-EINVAL);
3087
3088 maction = kzalloc(sizeof(*maction), GFP_KERNEL);
3089 if (!maction)
3090 return ERR_PTR(-ENOMEM);
3091
3092 maction->flow_action_raw.modify_hdr =
3093 mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
3094
3095 if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
3096 ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
3097 kfree(maction);
3098 return ERR_PTR(ret);
3099 }
3100 maction->flow_action_raw.sub_type =
3101 MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
3102 maction->flow_action_raw.dev = dev;
3103
3104 return &maction->ib_action;
3105 }
3106
mlx5_ib_modify_header_supported(struct mlx5_ib_dev * dev)3107 static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
3108 {
3109 return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3110 max_modify_header_actions) ||
3111 MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
3112 max_modify_header_actions) ||
3113 MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
3114 max_modify_header_actions);
3115 }
3116
UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)3117 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
3118 struct uverbs_attr_bundle *attrs)
3119 {
3120 struct ib_uobject *uobj = uverbs_attr_get_uobject(
3121 attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
3122 struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
3123 enum mlx5_ib_uapi_flow_table_type ft_type;
3124 struct ib_flow_action *action;
3125 int num_actions;
3126 void *in;
3127 int ret;
3128
3129 if (!mlx5_ib_modify_header_supported(mdev))
3130 return -EOPNOTSUPP;
3131
3132 in = uverbs_attr_get_alloced_ptr(attrs,
3133 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
3134
3135 num_actions = uverbs_attr_ptr_get_array_size(
3136 attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
3137 MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
3138 if (num_actions < 0)
3139 return num_actions;
3140
3141 ret = uverbs_get_const(&ft_type, attrs,
3142 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
3143 if (ret)
3144 return ret;
3145 action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
3146 if (IS_ERR(action))
3147 return PTR_ERR(action);
3148
3149 uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
3150 IB_FLOW_ACTION_UNSPECIFIED);
3151
3152 return 0;
3153 }
3154
mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev * ibdev,u8 packet_reformat_type,u8 ft_type)3155 static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
3156 u8 packet_reformat_type,
3157 u8 ft_type)
3158 {
3159 switch (packet_reformat_type) {
3160 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
3161 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
3162 return MLX5_CAP_FLOWTABLE(ibdev->mdev,
3163 encap_general_header);
3164 break;
3165 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
3166 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
3167 return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
3168 reformat_l2_to_l3_tunnel);
3169 break;
3170 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
3171 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
3172 return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
3173 reformat_l3_tunnel_to_l2);
3174 break;
3175 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
3176 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
3177 return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
3178 break;
3179 default:
3180 break;
3181 }
3182
3183 return false;
3184 }
3185
mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt,u8 * prm_prt)3186 static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
3187 {
3188 switch (dv_prt) {
3189 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
3190 *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
3191 break;
3192 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
3193 *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
3194 break;
3195 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
3196 *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
3197 break;
3198 default:
3199 return -EINVAL;
3200 }
3201
3202 return 0;
3203 }
3204
mlx5_ib_flow_action_create_packet_reformat_ctx(struct mlx5_ib_dev * dev,struct mlx5_ib_flow_action * maction,u8 ft_type,u8 dv_prt,void * in,size_t len)3205 static int mlx5_ib_flow_action_create_packet_reformat_ctx(
3206 struct mlx5_ib_dev *dev,
3207 struct mlx5_ib_flow_action *maction,
3208 u8 ft_type, u8 dv_prt,
3209 void *in, size_t len)
3210 {
3211 struct mlx5_pkt_reformat_params reformat_params;
3212 enum mlx5_flow_namespace_type namespace;
3213 u8 prm_prt;
3214 int ret;
3215
3216 ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
3217 if (ret)
3218 return ret;
3219
3220 ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
3221 if (ret)
3222 return ret;
3223
3224 memset(&reformat_params, 0, sizeof(reformat_params));
3225 reformat_params.type = prm_prt;
3226 reformat_params.size = len;
3227 reformat_params.data = in;
3228 maction->flow_action_raw.pkt_reformat =
3229 mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
3230 namespace);
3231 if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
3232 ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
3233 return ret;
3234 }
3235
3236 maction->flow_action_raw.sub_type =
3237 MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
3238 maction->flow_action_raw.dev = dev;
3239
3240 return 0;
3241 }
3242
UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)3243 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
3244 struct uverbs_attr_bundle *attrs)
3245 {
3246 struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
3247 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
3248 struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
3249 enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
3250 enum mlx5_ib_uapi_flow_table_type ft_type;
3251 struct mlx5_ib_flow_action *maction;
3252 int ret;
3253
3254 ret = uverbs_get_const(&ft_type, attrs,
3255 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
3256 if (ret)
3257 return ret;
3258
3259 ret = uverbs_get_const(&dv_prt, attrs,
3260 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
3261 if (ret)
3262 return ret;
3263
3264 if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
3265 return -EOPNOTSUPP;
3266
3267 maction = kzalloc(sizeof(*maction), GFP_KERNEL);
3268 if (!maction)
3269 return -ENOMEM;
3270
3271 if (dv_prt ==
3272 MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
3273 maction->flow_action_raw.sub_type =
3274 MLX5_IB_FLOW_ACTION_DECAP;
3275 maction->flow_action_raw.dev = mdev;
3276 } else {
3277 void *in;
3278 int len;
3279
3280 in = uverbs_attr_get_alloced_ptr(attrs,
3281 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
3282 if (IS_ERR(in)) {
3283 ret = PTR_ERR(in);
3284 goto free_maction;
3285 }
3286
3287 len = uverbs_attr_get_len(attrs,
3288 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
3289
3290 ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
3291 maction, ft_type, dv_prt, in, len);
3292 if (ret)
3293 goto free_maction;
3294 }
3295
3296 uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
3297 IB_FLOW_ACTION_UNSPECIFIED);
3298 return 0;
3299
3300 free_maction:
3301 kfree(maction);
3302 return ret;
3303 }
3304
3305 DECLARE_UVERBS_NAMED_METHOD(
3306 MLX5_IB_METHOD_CREATE_FLOW,
3307 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
3308 UVERBS_OBJECT_FLOW,
3309 UVERBS_ACCESS_NEW,
3310 UA_MANDATORY),
3311 UVERBS_ATTR_PTR_IN(
3312 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
3313 UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
3314 UA_MANDATORY,
3315 UA_ALLOC_AND_COPY),
3316 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
3317 MLX5_IB_OBJECT_FLOW_MATCHER,
3318 UVERBS_ACCESS_READ,
3319 UA_MANDATORY),
3320 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
3321 UVERBS_OBJECT_QP,
3322 UVERBS_ACCESS_READ),
3323 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
3324 MLX5_IB_OBJECT_DEVX_OBJ,
3325 UVERBS_ACCESS_READ),
3326 UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
3327 UVERBS_OBJECT_FLOW_ACTION,
3328 UVERBS_ACCESS_READ, 1,
3329 MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
3330 UA_OPTIONAL),
3331 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
3332 UVERBS_ATTR_TYPE(u32),
3333 UA_OPTIONAL),
3334 UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
3335 MLX5_IB_OBJECT_DEVX_OBJ,
3336 UVERBS_ACCESS_READ, 1, 1,
3337 UA_OPTIONAL),
3338 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
3339 UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
3340 UA_OPTIONAL,
3341 UA_ALLOC_AND_COPY),
3342 UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
3343 enum mlx5_ib_create_flow_flags,
3344 UA_OPTIONAL));
3345
3346 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
3347 MLX5_IB_METHOD_DESTROY_FLOW,
3348 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
3349 UVERBS_OBJECT_FLOW,
3350 UVERBS_ACCESS_DESTROY,
3351 UA_MANDATORY));
3352
3353 ADD_UVERBS_METHODS(mlx5_ib_fs,
3354 UVERBS_OBJECT_FLOW,
3355 &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
3356 &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
3357
3358 DECLARE_UVERBS_NAMED_METHOD(
3359 MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
3360 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
3361 UVERBS_OBJECT_FLOW_ACTION,
3362 UVERBS_ACCESS_NEW,
3363 UA_MANDATORY),
3364 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
3365 UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
3366 set_add_copy_action_in_auto)),
3367 UA_MANDATORY,
3368 UA_ALLOC_AND_COPY),
3369 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
3370 enum mlx5_ib_uapi_flow_table_type,
3371 UA_MANDATORY));
3372
3373 DECLARE_UVERBS_NAMED_METHOD(
3374 MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
3375 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
3376 UVERBS_OBJECT_FLOW_ACTION,
3377 UVERBS_ACCESS_NEW,
3378 UA_MANDATORY),
3379 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
3380 UVERBS_ATTR_MIN_SIZE(1),
3381 UA_ALLOC_AND_COPY,
3382 UA_OPTIONAL),
3383 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
3384 enum mlx5_ib_uapi_flow_action_packet_reformat_type,
3385 UA_MANDATORY),
3386 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
3387 enum mlx5_ib_uapi_flow_table_type,
3388 UA_MANDATORY));
3389
3390 ADD_UVERBS_METHODS(
3391 mlx5_ib_flow_actions,
3392 UVERBS_OBJECT_FLOW_ACTION,
3393 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
3394 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
3395
3396 DECLARE_UVERBS_NAMED_METHOD(
3397 MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
3398 UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
3399 MLX5_IB_OBJECT_FLOW_MATCHER,
3400 UVERBS_ACCESS_NEW,
3401 UA_MANDATORY),
3402 UVERBS_ATTR_PTR_IN(
3403 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
3404 UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
3405 UA_MANDATORY),
3406 UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
3407 mlx5_ib_flow_type,
3408 UA_MANDATORY),
3409 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
3410 UVERBS_ATTR_TYPE(u8),
3411 UA_MANDATORY),
3412 UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
3413 enum ib_flow_flags,
3414 UA_OPTIONAL),
3415 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
3416 enum mlx5_ib_uapi_flow_table_type,
3417 UA_OPTIONAL),
3418 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
3419 UVERBS_ATTR_TYPE(u32),
3420 UA_OPTIONAL));
3421
3422 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
3423 MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
3424 UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
3425 MLX5_IB_OBJECT_FLOW_MATCHER,
3426 UVERBS_ACCESS_DESTROY,
3427 UA_MANDATORY));
3428
3429 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
3430 UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
3431 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
3432 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
3433
3434 DECLARE_UVERBS_NAMED_METHOD(
3435 MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
3436 UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
3437 MLX5_IB_OBJECT_STEERING_ANCHOR,
3438 UVERBS_ACCESS_NEW,
3439 UA_MANDATORY),
3440 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
3441 enum mlx5_ib_uapi_flow_table_type,
3442 UA_MANDATORY),
3443 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
3444 UVERBS_ATTR_TYPE(u16),
3445 UA_MANDATORY),
3446 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
3447 UVERBS_ATTR_TYPE(u32),
3448 UA_MANDATORY));
3449
3450 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
3451 MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
3452 UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
3453 MLX5_IB_OBJECT_STEERING_ANCHOR,
3454 UVERBS_ACCESS_DESTROY,
3455 UA_MANDATORY));
3456
3457 DECLARE_UVERBS_NAMED_OBJECT(
3458 MLX5_IB_OBJECT_STEERING_ANCHOR,
3459 UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
3460 &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
3461 &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
3462
3463 const struct uapi_definition mlx5_ib_flow_defs[] = {
3464 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
3465 MLX5_IB_OBJECT_FLOW_MATCHER),
3466 UAPI_DEF_CHAIN_OBJ_TREE(
3467 UVERBS_OBJECT_FLOW,
3468 &mlx5_ib_fs),
3469 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
3470 &mlx5_ib_flow_actions),
3471 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
3472 MLX5_IB_OBJECT_STEERING_ANCHOR,
3473 UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
3474 {},
3475 };
3476
3477 static const struct ib_device_ops flow_ops = {
3478 .create_flow = mlx5_ib_create_flow,
3479 .destroy_flow = mlx5_ib_destroy_flow,
3480 .destroy_flow_action = mlx5_ib_destroy_flow_action,
3481 };
3482
mlx5_ib_fs_init(struct mlx5_ib_dev * dev)3483 int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
3484 {
3485 dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
3486
3487 if (!dev->flow_db)
3488 return -ENOMEM;
3489
3490 dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
3491 sizeof(struct mlx5_ib_flow_prio),
3492 GFP_KERNEL);
3493 if (!dev->flow_db->rdma_transport_rx)
3494 goto free_flow_db;
3495
3496 dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
3497 sizeof(struct mlx5_ib_flow_prio),
3498 GFP_KERNEL);
3499 if (!dev->flow_db->rdma_transport_tx)
3500 goto free_rdma_transport_rx;
3501
3502 mutex_init(&dev->flow_db->lock);
3503
3504 ib_set_device_ops(&dev->ib_dev, &flow_ops);
3505 return 0;
3506
3507 free_rdma_transport_rx:
3508 kfree(dev->flow_db->rdma_transport_rx);
3509 free_flow_db:
3510 kfree(dev->flow_db);
3511 return -ENOMEM;
3512 }
3513