xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c (revision 1cac38910ecb881b09f61f57545a771bbe57ba68)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/arp.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
46 #include <net/dst_metadata.h>
47 #include "devlink.h"
48 #include "en.h"
49 #include "en/tc/post_act.h"
50 #include "en/tc/act_stats.h"
51 #include "en_rep.h"
52 #include "en/rep/tc.h"
53 #include "en/rep/neigh.h"
54 #include "en_tc.h"
55 #include "eswitch.h"
56 #include "fs_core.h"
57 #include "en/port.h"
58 #include "en/tc_tun.h"
59 #include "en/mapping.h"
60 #include "en/tc_ct.h"
61 #include "en/mod_hdr.h"
62 #include "en/tc_tun_encap.h"
63 #include "en/tc/sample.h"
64 #include "en/tc/act/act.h"
65 #include "en/tc/post_meter.h"
66 #include "lib/devcom.h"
67 #include "lib/geneve.h"
68 #include "lib/fs_chains.h"
69 #include "lib/mlx5.h"
70 #include "diag/en_tc_tracepoint.h"
71 #include <asm/div64.h>
72 #include "lag/lag.h"
73 #include "lag/mp.h"
74 
75 #define MLX5E_TC_TABLE_NUM_GROUPS 4
76 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
77 
78 struct mlx5e_tc_table {
79 	/* Protects the dynamic assignment of the t parameter
80 	 * which is the nic tc root table.
81 	 */
82 	struct mutex			t_lock;
83 	struct mlx5e_priv		*priv;
84 	struct mlx5_flow_table		*t;
85 	struct mlx5_flow_table		*miss_t;
86 	struct mlx5_fs_chains           *chains;
87 	struct mlx5e_post_act		*post_act;
88 
89 	struct rhashtable               ht;
90 
91 	struct mod_hdr_tbl mod_hdr;
92 	struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
93 	DECLARE_HASHTABLE(hairpin_tbl, 8);
94 
95 	struct notifier_block     netdevice_nb;
96 	struct netdev_net_notifier	netdevice_nn;
97 
98 	struct mlx5_tc_ct_priv         *ct;
99 	struct mapping_ctx             *mapping;
100 	struct dentry                  *dfs_root;
101 
102 	/* tc action stats */
103 	struct mlx5e_tc_act_stats_handle *action_stats_handle;
104 };
105 
106 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
107 	[MAPPED_OBJ_TO_REG] = {
108 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
109 		.moffset = 0,
110 		.mlen = 16,
111 	},
112 	[VPORT_TO_REG] = {
113 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
114 		.moffset = 16,
115 		.mlen = 16,
116 	},
117 	[TUNNEL_TO_REG] = {
118 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
119 		.moffset = 8,
120 		.mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
121 		.soffset = MLX5_BYTE_OFF(fte_match_param,
122 					 misc_parameters_2.metadata_reg_c_1),
123 	},
124 	[ZONE_TO_REG] = zone_to_reg_ct,
125 	[ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
126 	[CTSTATE_TO_REG] = ctstate_to_reg_ct,
127 	[MARK_TO_REG] = mark_to_reg_ct,
128 	[LABELS_TO_REG] = labels_to_reg_ct,
129 	[FTEID_TO_REG] = fteid_to_reg_ct,
130 	/* For NIC rules we store the restore metadata directly
131 	 * into reg_b that is passed to SW since we don't
132 	 * jump between steering domains.
133 	 */
134 	[NIC_MAPPED_OBJ_TO_REG] = {
135 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
136 		.moffset = 0,
137 		.mlen = 16,
138 	},
139 	[NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
140 	[PACKET_COLOR_TO_REG] = packet_color_to_reg,
141 };
142 
143 struct mlx5e_tc_jump_state {
144 	u32 jump_count;
145 	bool jump_target;
146 	struct mlx5_flow_attr *jumping_attr;
147 
148 	enum flow_action_id last_id;
149 	u32 last_index;
150 };
151 
mlx5e_tc_table_alloc(void)152 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
153 {
154 	struct mlx5e_tc_table *tc;
155 
156 	tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
157 	return tc ? tc : ERR_PTR(-ENOMEM);
158 }
159 
mlx5e_tc_table_free(struct mlx5e_tc_table * tc)160 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
161 {
162 	kvfree(tc);
163 }
164 
mlx5e_nic_chains(struct mlx5e_tc_table * tc)165 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
166 {
167 	return tc->chains;
168 }
169 
170 /* To avoid false lock dependency warning set the tc_ht lock
171  * class different than the lock class of the ht being used when deleting
172  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
173  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
174  * it's different than the ht->mutex here.
175  */
176 static struct lock_class_key tc_ht_lock_key;
177 static struct lock_class_key tc_ht_wq_key;
178 
179 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
180 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
181 static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow,
182 					struct mlx5_flow_attr *attr);
183 
184 void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 val,u32 mask)185 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
186 			    enum mlx5e_tc_attr_to_reg type,
187 			    u32 val,
188 			    u32 mask)
189 {
190 	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
191 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
192 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
193 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
194 	u32 max_mask = GENMASK(match_len - 1, 0);
195 	__be32 curr_mask_be, curr_val_be;
196 	u32 curr_mask, curr_val;
197 
198 	fmask = headers_c + soffset;
199 	fval = headers_v + soffset;
200 
201 	memcpy(&curr_mask_be, fmask, 4);
202 	memcpy(&curr_val_be, fval, 4);
203 
204 	curr_mask = be32_to_cpu(curr_mask_be);
205 	curr_val = be32_to_cpu(curr_val_be);
206 
207 	//move to correct offset
208 	WARN_ON(mask > max_mask);
209 	mask <<= moffset;
210 	val <<= moffset;
211 	max_mask <<= moffset;
212 
213 	//zero val and mask
214 	curr_mask &= ~max_mask;
215 	curr_val &= ~max_mask;
216 
217 	//add current to mask
218 	curr_mask |= mask;
219 	curr_val |= val;
220 
221 	//back to be32 and write
222 	curr_mask_be = cpu_to_be32(curr_mask);
223 	curr_val_be = cpu_to_be32(curr_val);
224 
225 	memcpy(fmask, &curr_mask_be, 4);
226 	memcpy(fval, &curr_val_be, 4);
227 
228 	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
229 }
230 
231 void
mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 * val,u32 * mask)232 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
233 				enum mlx5e_tc_attr_to_reg type,
234 				u32 *val,
235 				u32 *mask)
236 {
237 	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
238 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
239 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
240 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
241 	u32 max_mask = GENMASK(match_len - 1, 0);
242 	__be32 curr_mask_be, curr_val_be;
243 	u32 curr_mask, curr_val;
244 
245 	fmask = headers_c + soffset;
246 	fval = headers_v + soffset;
247 
248 	memcpy(&curr_mask_be, fmask, 4);
249 	memcpy(&curr_val_be, fval, 4);
250 
251 	curr_mask = be32_to_cpu(curr_mask_be);
252 	curr_val = be32_to_cpu(curr_val_be);
253 
254 	*mask = (curr_mask >> moffset) & max_mask;
255 	*val = (curr_val >> moffset) & max_mask;
256 }
257 
258 int
mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)259 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
260 				     struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
261 				     enum mlx5_flow_namespace_type ns,
262 				     enum mlx5e_tc_attr_to_reg type,
263 				     u32 data)
264 {
265 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
266 	int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
267 	int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
268 	char *modact;
269 	int err;
270 
271 	modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
272 	if (IS_ERR(modact))
273 		return PTR_ERR(modact);
274 
275 	/* Firmware has 5bit length field and 0 means 32bits */
276 	if (mlen == 32)
277 		mlen = 0;
278 
279 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
280 	MLX5_SET(set_action_in, modact, field, mfield);
281 	MLX5_SET(set_action_in, modact, offset, moffset);
282 	MLX5_SET(set_action_in, modact, length, mlen);
283 	MLX5_SET(set_action_in, modact, data, data);
284 	err = mod_hdr_acts->num_actions;
285 	mod_hdr_acts->num_actions++;
286 
287 	return err;
288 }
289 
290 static struct mlx5e_tc_act_stats_handle  *
get_act_stats_handle(struct mlx5e_priv * priv)291 get_act_stats_handle(struct mlx5e_priv *priv)
292 {
293 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
294 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
295 	struct mlx5_rep_uplink_priv *uplink_priv;
296 	struct mlx5e_rep_priv *uplink_rpriv;
297 
298 	if (is_mdev_switchdev_mode(priv->mdev)) {
299 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
300 		uplink_priv = &uplink_rpriv->uplink_priv;
301 
302 		return uplink_priv->action_stats_handle;
303 	}
304 
305 	return tc->action_stats_handle;
306 }
307 
308 struct mlx5e_tc_int_port_priv *
mlx5e_get_int_port_priv(struct mlx5e_priv * priv)309 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
310 {
311 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
312 	struct mlx5_rep_uplink_priv *uplink_priv;
313 	struct mlx5e_rep_priv *uplink_rpriv;
314 
315 	if (is_mdev_switchdev_mode(priv->mdev)) {
316 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
317 		uplink_priv = &uplink_rpriv->uplink_priv;
318 
319 		return uplink_priv->int_port_priv;
320 	}
321 
322 	return NULL;
323 }
324 
325 struct mlx5e_flow_meters *
mlx5e_get_flow_meters(struct mlx5_core_dev * dev)326 mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
327 {
328 	struct mlx5_eswitch *esw = dev->priv.eswitch;
329 	struct mlx5_rep_uplink_priv *uplink_priv;
330 	struct mlx5e_rep_priv *uplink_rpriv;
331 	struct mlx5e_priv *priv;
332 
333 	if (is_mdev_switchdev_mode(dev)) {
334 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
335 		uplink_priv = &uplink_rpriv->uplink_priv;
336 		priv = netdev_priv(uplink_rpriv->netdev);
337 		if (!uplink_priv->flow_meters)
338 			uplink_priv->flow_meters =
339 				mlx5e_flow_meters_init(priv,
340 						       MLX5_FLOW_NAMESPACE_FDB,
341 						       uplink_priv->post_act);
342 		if (!IS_ERR(uplink_priv->flow_meters))
343 			return uplink_priv->flow_meters;
344 	}
345 
346 	return NULL;
347 }
348 
349 static struct mlx5_tc_ct_priv *
get_ct_priv(struct mlx5e_priv * priv)350 get_ct_priv(struct mlx5e_priv *priv)
351 {
352 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
353 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
354 	struct mlx5_rep_uplink_priv *uplink_priv;
355 	struct mlx5e_rep_priv *uplink_rpriv;
356 
357 	if (is_mdev_switchdev_mode(priv->mdev)) {
358 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
359 		uplink_priv = &uplink_rpriv->uplink_priv;
360 
361 		return uplink_priv->ct_priv;
362 	}
363 
364 	return tc->ct;
365 }
366 
367 static struct mlx5e_tc_psample *
get_sample_priv(struct mlx5e_priv * priv)368 get_sample_priv(struct mlx5e_priv *priv)
369 {
370 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
371 	struct mlx5_rep_uplink_priv *uplink_priv;
372 	struct mlx5e_rep_priv *uplink_rpriv;
373 
374 	if (is_mdev_switchdev_mode(priv->mdev)) {
375 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
376 		uplink_priv = &uplink_rpriv->uplink_priv;
377 
378 		return uplink_priv->tc_psample;
379 	}
380 
381 	return NULL;
382 }
383 
384 static struct mlx5e_post_act *
get_post_action(struct mlx5e_priv * priv)385 get_post_action(struct mlx5e_priv *priv)
386 {
387 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
388 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
389 	struct mlx5_rep_uplink_priv *uplink_priv;
390 	struct mlx5e_rep_priv *uplink_rpriv;
391 
392 	if (is_mdev_switchdev_mode(priv->mdev)) {
393 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
394 		uplink_priv = &uplink_rpriv->uplink_priv;
395 
396 		return uplink_priv->post_act;
397 	}
398 
399 	return tc->post_act;
400 }
401 
402 struct mlx5_flow_handle *
mlx5_tc_rule_insert(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)403 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
404 		    struct mlx5_flow_spec *spec,
405 		    struct mlx5_flow_attr *attr)
406 {
407 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
408 
409 	if (is_mdev_switchdev_mode(priv->mdev))
410 		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
411 
412 	return	mlx5e_add_offloaded_nic_rule(priv, spec, attr);
413 }
414 
415 void
mlx5_tc_rule_delete(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)416 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
417 		    struct mlx5_flow_handle *rule,
418 		    struct mlx5_flow_attr *attr)
419 {
420 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
421 
422 	if (is_mdev_switchdev_mode(priv->mdev)) {
423 		mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
424 		return;
425 	}
426 
427 	mlx5e_del_offloaded_nic_rule(priv, rule, attr);
428 }
429 
430 static bool
is_flow_meter_action(struct mlx5_flow_attr * attr)431 is_flow_meter_action(struct mlx5_flow_attr *attr)
432 {
433 	return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
434 		 (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) ||
435 		attr->flags & MLX5_ATTR_FLAG_MTU);
436 }
437 
438 static int
mlx5e_tc_add_flow_meter(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr)439 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
440 			struct mlx5_flow_attr *attr)
441 {
442 	struct mlx5e_post_act *post_act = get_post_action(priv);
443 	struct mlx5e_post_meter_priv *post_meter;
444 	enum mlx5_flow_namespace_type ns_type;
445 	struct mlx5e_flow_meter_handle *meter;
446 	enum mlx5e_post_meter_type type;
447 
448 	if (IS_ERR(post_act))
449 		return PTR_ERR(post_act);
450 
451 	meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
452 	if (IS_ERR(meter)) {
453 		mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
454 		return PTR_ERR(meter);
455 	}
456 
457 	ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
458 	type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE;
459 	post_meter = mlx5e_post_meter_init(priv, ns_type, post_act,
460 					   type,
461 					   meter->act_counter, meter->drop_counter,
462 					   attr->branch_true, attr->branch_false);
463 	if (IS_ERR(post_meter)) {
464 		mlx5_core_err(priv->mdev, "Failed to init post meter\n");
465 		goto err_meter_init;
466 	}
467 
468 	attr->meter_attr.meter = meter;
469 	attr->meter_attr.post_meter = post_meter;
470 	attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
471 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
472 
473 	return 0;
474 
475 err_meter_init:
476 	mlx5e_tc_meter_put(meter);
477 	return PTR_ERR(post_meter);
478 }
479 
480 static void
mlx5e_tc_del_flow_meter(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr)481 mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
482 {
483 	mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter);
484 	mlx5e_tc_meter_put(attr->meter_attr.meter);
485 }
486 
487 struct mlx5_flow_handle *
mlx5e_tc_rule_offload(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)488 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
489 		      struct mlx5_flow_spec *spec,
490 		      struct mlx5_flow_attr *attr)
491 {
492 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
493 	int err;
494 
495 	if (!is_mdev_switchdev_mode(priv->mdev))
496 		return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
497 
498 	if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
499 		return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
500 
501 	if (is_flow_meter_action(attr)) {
502 		err = mlx5e_tc_add_flow_meter(priv, attr);
503 		if (err)
504 			return ERR_PTR(err);
505 	}
506 
507 	return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
508 }
509 
510 void
mlx5e_tc_rule_unoffload(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)511 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
512 			struct mlx5_flow_handle *rule,
513 			struct mlx5_flow_attr *attr)
514 {
515 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
516 
517 	if (!is_mdev_switchdev_mode(priv->mdev)) {
518 		mlx5e_del_offloaded_nic_rule(priv, rule, attr);
519 		return;
520 	}
521 
522 	if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
523 		mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
524 		return;
525 	}
526 
527 	mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
528 
529 	if (attr->meter_attr.meter)
530 		mlx5e_tc_del_flow_meter(esw, attr);
531 }
532 
533 int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)534 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
535 			  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
536 			  enum mlx5_flow_namespace_type ns,
537 			  enum mlx5e_tc_attr_to_reg type,
538 			  u32 data)
539 {
540 	int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
541 
542 	return ret < 0 ? ret : 0;
543 }
544 
mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5e_tc_attr_to_reg type,int act_id,u32 data)545 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
546 					  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
547 					  enum mlx5e_tc_attr_to_reg type,
548 					  int act_id, u32 data)
549 {
550 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
551 	int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
552 	int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
553 	char *modact;
554 
555 	modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
556 
557 	/* Firmware has 5bit length field and 0 means 32bits */
558 	if (mlen == 32)
559 		mlen = 0;
560 
561 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
562 	MLX5_SET(set_action_in, modact, field, mfield);
563 	MLX5_SET(set_action_in, modact, offset, moffset);
564 	MLX5_SET(set_action_in, modact, length, mlen);
565 	MLX5_SET(set_action_in, modact, data, data);
566 }
567 
568 struct mlx5e_hairpin {
569 	struct mlx5_hairpin *pair;
570 
571 	struct mlx5_core_dev *func_mdev;
572 	struct mlx5e_priv *func_priv;
573 	u32 tdn;
574 	struct mlx5e_tir direct_tir;
575 
576 	int num_channels;
577 	u8 log_num_packets;
578 	struct mlx5e_rqt indir_rqt;
579 	struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
580 	struct mlx5_ttc_table *ttc;
581 };
582 
583 struct mlx5e_hairpin_entry {
584 	/* a node of a hash table which keeps all the  hairpin entries */
585 	struct hlist_node hairpin_hlist;
586 
587 	/* protects flows list */
588 	spinlock_t flows_lock;
589 	/* flows sharing the same hairpin */
590 	struct list_head flows;
591 	/* hpe's that were not fully initialized when dead peer update event
592 	 * function traversed them.
593 	 */
594 	struct list_head dead_peer_wait_list;
595 
596 	u16 peer_vhca_id;
597 	u8 prio;
598 	struct mlx5e_hairpin *hp;
599 	refcount_t refcnt;
600 	struct completion res_ready;
601 };
602 
603 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
604 			      struct mlx5e_tc_flow *flow);
605 
mlx5e_flow_get(struct mlx5e_tc_flow * flow)606 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
607 {
608 	if (!flow || !refcount_inc_not_zero(&flow->refcnt))
609 		return ERR_PTR(-EINVAL);
610 	return flow;
611 }
612 
mlx5e_flow_put(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)613 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
614 {
615 	if (refcount_dec_and_test(&flow->refcnt)) {
616 		mlx5e_tc_del_flow(priv, flow);
617 		kfree_rcu(flow, rcu_head);
618 	}
619 }
620 
mlx5e_is_eswitch_flow(struct mlx5e_tc_flow * flow)621 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
622 {
623 	return flow_flag_test(flow, ESWITCH);
624 }
625 
mlx5e_is_ft_flow(struct mlx5e_tc_flow * flow)626 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
627 {
628 	return flow_flag_test(flow, FT);
629 }
630 
mlx5e_is_offloaded_flow(struct mlx5e_tc_flow * flow)631 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
632 {
633 	return flow_flag_test(flow, OFFLOADED);
634 }
635 
mlx5e_get_flow_namespace(struct mlx5e_tc_flow * flow)636 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
637 {
638 	return mlx5e_is_eswitch_flow(flow) ?
639 		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
640 }
641 
642 static struct mlx5_core_dev *
get_flow_counter_dev(struct mlx5e_tc_flow * flow)643 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
644 {
645 	return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
646 }
647 
648 static struct mod_hdr_tbl *
get_mod_hdr_table(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)649 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
650 {
651 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
652 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
653 
654 	return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
655 		&esw->offloads.mod_hdr :
656 		&tc->mod_hdr;
657 }
658 
mlx5e_tc_attach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)659 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv,
660 			    struct mlx5e_tc_flow *flow,
661 			    struct mlx5_flow_attr *attr)
662 {
663 	struct mlx5e_mod_hdr_handle *mh;
664 
665 	mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
666 				  mlx5e_get_flow_namespace(flow),
667 				  &attr->parse_attr->mod_hdr_acts);
668 	if (IS_ERR(mh))
669 		return PTR_ERR(mh);
670 
671 	WARN_ON(attr->modify_hdr);
672 	attr->modify_hdr = mlx5e_mod_hdr_get(mh);
673 	attr->mh = mh;
674 
675 	return 0;
676 }
677 
mlx5e_tc_detach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)678 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv,
679 			     struct mlx5e_tc_flow *flow,
680 			     struct mlx5_flow_attr *attr)
681 {
682 	/* flow wasn't fully initialized */
683 	if (!attr->mh)
684 		return;
685 
686 	mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
687 			     attr->mh);
688 	attr->mh = NULL;
689 }
690 
691 static
mlx5e_hairpin_get_mdev(struct net * net,int ifindex)692 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
693 {
694 	struct mlx5_core_dev *mdev;
695 	struct net_device *netdev;
696 	struct mlx5e_priv *priv;
697 
698 	netdev = dev_get_by_index(net, ifindex);
699 	if (!netdev)
700 		return ERR_PTR(-ENODEV);
701 
702 	priv = netdev_priv(netdev);
703 	mdev = priv->mdev;
704 	dev_put(netdev);
705 
706 	/* Mirred tc action holds a refcount on the ifindex net_device (see
707 	 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
708 	 * after dev_put(netdev), while we're in the context of adding a tc flow.
709 	 *
710 	 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
711 	 * stored in a hairpin object, which exists until all flows, that refer to it, get
712 	 * removed.
713 	 *
714 	 * On the other hand, after a hairpin object has been created, the peer net_device may
715 	 * be removed/unbound while there are still some hairpin flows that are using it. This
716 	 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
717 	 * NETDEV_UNREGISTER event of the peer net_device.
718 	 */
719 	return mdev;
720 }
721 
mlx5e_hairpin_create_transport(struct mlx5e_hairpin * hp)722 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
723 {
724 	struct mlx5e_tir_builder *builder;
725 	int err;
726 
727 	builder = mlx5e_tir_builder_alloc(false);
728 	if (!builder)
729 		return -ENOMEM;
730 
731 	err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
732 	if (err)
733 		goto out;
734 
735 	mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
736 	err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
737 	if (err)
738 		goto create_tir_err;
739 
740 out:
741 	mlx5e_tir_builder_free(builder);
742 	return err;
743 
744 create_tir_err:
745 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
746 
747 	goto out;
748 }
749 
mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin * hp)750 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
751 {
752 	mlx5e_tir_destroy(&hp->direct_tir);
753 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
754 }
755 
mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin * hp)756 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
757 {
758 	struct mlx5e_priv *priv = hp->func_priv;
759 	struct mlx5_core_dev *mdev = priv->mdev;
760 	struct mlx5e_rss_params_indir indir;
761 	u32 rqt_size;
762 	int err;
763 
764 	rqt_size = mlx5e_rqt_size(mdev, hp->num_channels);
765 	err = mlx5e_rss_params_indir_init(&indir, rqt_size, rqt_size);
766 	if (err)
767 		return err;
768 
769 	mlx5e_rss_params_indir_init_uniform(&indir, hp->num_channels);
770 	err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, NULL, hp->num_channels,
771 				   mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
772 				   &indir);
773 
774 	mlx5e_rss_params_indir_cleanup(&indir);
775 	return err;
776 }
777 
mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin * hp)778 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
779 {
780 	struct mlx5e_priv *priv = hp->func_priv;
781 	struct mlx5e_rss_params_hash rss_hash;
782 	enum mlx5_traffic_types tt, max_tt;
783 	struct mlx5e_tir_builder *builder;
784 	int err = 0;
785 
786 	builder = mlx5e_tir_builder_alloc(false);
787 	if (!builder)
788 		return -ENOMEM;
789 
790 	rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
791 
792 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
793 		struct mlx5e_rss_params_traffic_type rss_tt;
794 
795 		rss_tt = mlx5e_rss_get_default_tt_config(tt);
796 
797 		mlx5e_tir_builder_build_rqt(builder, hp->tdn,
798 					    mlx5e_rqt_get_rqtn(&hp->indir_rqt),
799 					    false);
800 		mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
801 
802 		err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
803 		if (err) {
804 			mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
805 			goto err_destroy_tirs;
806 		}
807 
808 		mlx5e_tir_builder_clear(builder);
809 	}
810 
811 out:
812 	mlx5e_tir_builder_free(builder);
813 	return err;
814 
815 err_destroy_tirs:
816 	max_tt = tt;
817 	for (tt = 0; tt < max_tt; tt++)
818 		mlx5e_tir_destroy(&hp->indir_tir[tt]);
819 
820 	goto out;
821 }
822 
mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin * hp)823 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
824 {
825 	int tt;
826 
827 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
828 		mlx5e_tir_destroy(&hp->indir_tir[tt]);
829 }
830 
mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin * hp,struct ttc_params * ttc_params)831 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
832 					 struct ttc_params *ttc_params)
833 {
834 	struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
835 	int tt;
836 
837 	memset(ttc_params, 0, sizeof(*ttc_params));
838 
839 	ttc_params->ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
840 	for (tt = 0; tt < MLX5_NUM_TT; tt++) {
841 		if (mlx5_ttc_is_decrypted_esp_tt(tt))
842 			continue;
843 
844 		ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
845 		ttc_params->dests[tt].tir_num =
846 			tt == MLX5_TT_ANY ?
847 				mlx5e_tir_get_tirn(&hp->direct_tir) :
848 				mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
849 	}
850 
851 	ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
852 	ft_attr->prio = MLX5E_TC_PRIO;
853 }
854 
mlx5e_hairpin_rss_init(struct mlx5e_hairpin * hp)855 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
856 {
857 	struct mlx5e_priv *priv = hp->func_priv;
858 	struct ttc_params ttc_params;
859 	struct mlx5_ttc_table *ttc;
860 	int err;
861 
862 	err = mlx5e_hairpin_create_indirect_rqt(hp);
863 	if (err)
864 		return err;
865 
866 	err = mlx5e_hairpin_create_indirect_tirs(hp);
867 	if (err)
868 		goto err_create_indirect_tirs;
869 
870 	mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
871 	hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
872 	if (IS_ERR(hp->ttc)) {
873 		err = PTR_ERR(hp->ttc);
874 		goto err_create_ttc_table;
875 	}
876 
877 	ttc = mlx5e_fs_get_ttc(priv->fs, false);
878 	netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
879 		   hp->num_channels,
880 		   mlx5_get_ttc_flow_table(ttc)->id);
881 
882 	return 0;
883 
884 err_create_ttc_table:
885 	mlx5e_hairpin_destroy_indirect_tirs(hp);
886 err_create_indirect_tirs:
887 	mlx5e_rqt_destroy(&hp->indir_rqt);
888 
889 	return err;
890 }
891 
mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin * hp)892 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
893 {
894 	mlx5_destroy_ttc_table(hp->ttc);
895 	mlx5e_hairpin_destroy_indirect_tirs(hp);
896 	mlx5e_rqt_destroy(&hp->indir_rqt);
897 }
898 
899 static struct mlx5e_hairpin *
mlx5e_hairpin_create(struct mlx5e_priv * priv,struct mlx5_hairpin_params * params,int peer_ifindex)900 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
901 		     int peer_ifindex)
902 {
903 	struct mlx5_core_dev *func_mdev, *peer_mdev;
904 	struct mlx5e_hairpin *hp;
905 	struct mlx5_hairpin *pair;
906 	int err;
907 
908 	hp = kzalloc(sizeof(*hp), GFP_KERNEL);
909 	if (!hp)
910 		return ERR_PTR(-ENOMEM);
911 
912 	func_mdev = priv->mdev;
913 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
914 	if (IS_ERR(peer_mdev)) {
915 		err = PTR_ERR(peer_mdev);
916 		goto create_pair_err;
917 	}
918 
919 	pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
920 	if (IS_ERR(pair)) {
921 		err = PTR_ERR(pair);
922 		goto create_pair_err;
923 	}
924 	hp->pair = pair;
925 	hp->func_mdev = func_mdev;
926 	hp->func_priv = priv;
927 	hp->num_channels = params->num_channels;
928 	hp->log_num_packets = params->log_num_packets;
929 
930 	err = mlx5e_hairpin_create_transport(hp);
931 	if (err)
932 		goto create_transport_err;
933 
934 	if (hp->num_channels > 1) {
935 		err = mlx5e_hairpin_rss_init(hp);
936 		if (err)
937 			goto rss_init_err;
938 	}
939 
940 	return hp;
941 
942 rss_init_err:
943 	mlx5e_hairpin_destroy_transport(hp);
944 create_transport_err:
945 	mlx5_core_hairpin_destroy(hp->pair);
946 create_pair_err:
947 	kfree(hp);
948 	return ERR_PTR(err);
949 }
950 
mlx5e_hairpin_destroy(struct mlx5e_hairpin * hp)951 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
952 {
953 	if (hp->num_channels > 1)
954 		mlx5e_hairpin_rss_cleanup(hp);
955 	mlx5e_hairpin_destroy_transport(hp);
956 	mlx5_core_hairpin_destroy(hp->pair);
957 	kvfree(hp);
958 }
959 
hash_hairpin_info(u16 peer_vhca_id,u8 prio)960 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
961 {
962 	return (peer_vhca_id << 16 | prio);
963 }
964 
mlx5e_hairpin_get(struct mlx5e_priv * priv,u16 peer_vhca_id,u8 prio)965 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
966 						     u16 peer_vhca_id, u8 prio)
967 {
968 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
969 	struct mlx5e_hairpin_entry *hpe;
970 	u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
971 
972 	hash_for_each_possible(tc->hairpin_tbl, hpe,
973 			       hairpin_hlist, hash_key) {
974 		if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
975 			refcount_inc(&hpe->refcnt);
976 			return hpe;
977 		}
978 	}
979 
980 	return NULL;
981 }
982 
mlx5e_hairpin_put(struct mlx5e_priv * priv,struct mlx5e_hairpin_entry * hpe)983 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
984 			      struct mlx5e_hairpin_entry *hpe)
985 {
986 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
987 	/* no more hairpin flows for us, release the hairpin pair */
988 	if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
989 		return;
990 	hash_del(&hpe->hairpin_hlist);
991 	mutex_unlock(&tc->hairpin_tbl_lock);
992 
993 	if (!IS_ERR_OR_NULL(hpe->hp)) {
994 		netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
995 			   dev_name(hpe->hp->pair->peer_mdev->device));
996 
997 		mlx5e_hairpin_destroy(hpe->hp);
998 	}
999 
1000 	WARN_ON(!list_empty(&hpe->flows));
1001 	kfree(hpe);
1002 }
1003 
1004 #define UNKNOWN_MATCH_PRIO 8
1005 
mlx5e_hairpin_get_prio(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,u8 * match_prio,struct netlink_ext_ack * extack)1006 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
1007 				  struct mlx5_flow_spec *spec, u8 *match_prio,
1008 				  struct netlink_ext_ack *extack)
1009 {
1010 	void *headers_c, *headers_v;
1011 	u8 prio_val, prio_mask = 0;
1012 	bool vlan_present;
1013 
1014 #ifdef CONFIG_MLX5_CORE_EN_DCB
1015 	if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
1016 		NL_SET_ERR_MSG_MOD(extack,
1017 				   "only PCP trust state supported for hairpin");
1018 		return -EOPNOTSUPP;
1019 	}
1020 #endif
1021 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1022 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1023 
1024 	vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
1025 	if (vlan_present) {
1026 		prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
1027 		prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
1028 	}
1029 
1030 	if (!vlan_present || !prio_mask) {
1031 		prio_val = UNKNOWN_MATCH_PRIO;
1032 	} else if (prio_mask != 0x7) {
1033 		NL_SET_ERR_MSG_MOD(extack,
1034 				   "masked priority match not supported for hairpin");
1035 		return -EOPNOTSUPP;
1036 	}
1037 
1038 	*match_prio = prio_val;
1039 	return 0;
1040 }
1041 
debugfs_hairpin_num_active_get(void * data,u64 * val)1042 static int debugfs_hairpin_num_active_get(void *data, u64 *val)
1043 {
1044 	struct mlx5e_tc_table *tc = data;
1045 	struct mlx5e_hairpin_entry *hpe;
1046 	u32 cnt = 0;
1047 	u32 bkt;
1048 
1049 	mutex_lock(&tc->hairpin_tbl_lock);
1050 	hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1051 		cnt++;
1052 	mutex_unlock(&tc->hairpin_tbl_lock);
1053 
1054 	*val = cnt;
1055 
1056 	return 0;
1057 }
1058 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active,
1059 			 debugfs_hairpin_num_active_get, NULL, "%llu\n");
1060 
debugfs_hairpin_table_dump_show(struct seq_file * file,void * priv)1061 static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv)
1062 
1063 {
1064 	struct mlx5e_tc_table *tc = file->private;
1065 	struct mlx5e_hairpin_entry *hpe;
1066 	u32 bkt;
1067 
1068 	mutex_lock(&tc->hairpin_tbl_lock);
1069 	hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1070 		seq_printf(file,
1071 			   "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n",
1072 			   hpe->peer_vhca_id, hpe->prio,
1073 			   refcount_read(&hpe->refcnt), hpe->hp->num_channels,
1074 			   BIT(hpe->hp->log_num_packets));
1075 	mutex_unlock(&tc->hairpin_tbl_lock);
1076 
1077 	return 0;
1078 }
1079 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump);
1080 
mlx5e_tc_debugfs_init(struct mlx5e_tc_table * tc,struct dentry * dfs_root)1081 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc,
1082 				  struct dentry *dfs_root)
1083 {
1084 	if (IS_ERR_OR_NULL(dfs_root))
1085 		return;
1086 
1087 	tc->dfs_root = debugfs_create_dir("tc", dfs_root);
1088 
1089 	debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc,
1090 			    &fops_hairpin_num_active);
1091 	debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc,
1092 			    &debugfs_hairpin_table_dump_fops);
1093 }
1094 
mlx5e_hairpin_flow_add(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)1095 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
1096 				  struct mlx5e_tc_flow *flow,
1097 				  struct mlx5e_tc_flow_parse_attr *parse_attr,
1098 				  struct netlink_ext_ack *extack)
1099 {
1100 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1101 	struct devlink *devlink = priv_to_devlink(priv->mdev);
1102 	int peer_ifindex = parse_attr->mirred_ifindex[0];
1103 	union devlink_param_value val = {};
1104 	struct mlx5_hairpin_params params;
1105 	struct mlx5_core_dev *peer_mdev;
1106 	struct mlx5e_hairpin_entry *hpe;
1107 	struct mlx5e_hairpin *hp;
1108 	u8 match_prio;
1109 	u16 peer_id;
1110 	int err;
1111 
1112 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
1113 	if (IS_ERR(peer_mdev)) {
1114 		NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
1115 		return PTR_ERR(peer_mdev);
1116 	}
1117 
1118 	if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
1119 		NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
1120 		return -EOPNOTSUPP;
1121 	}
1122 
1123 	peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
1124 	err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
1125 				     extack);
1126 	if (err)
1127 		return err;
1128 
1129 	mutex_lock(&tc->hairpin_tbl_lock);
1130 	hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
1131 	if (hpe) {
1132 		mutex_unlock(&tc->hairpin_tbl_lock);
1133 		wait_for_completion(&hpe->res_ready);
1134 
1135 		if (IS_ERR(hpe->hp)) {
1136 			err = -EREMOTEIO;
1137 			goto out_err;
1138 		}
1139 		goto attach_flow;
1140 	}
1141 
1142 	hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
1143 	if (!hpe) {
1144 		mutex_unlock(&tc->hairpin_tbl_lock);
1145 		return -ENOMEM;
1146 	}
1147 
1148 	spin_lock_init(&hpe->flows_lock);
1149 	INIT_LIST_HEAD(&hpe->flows);
1150 	INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
1151 	hpe->peer_vhca_id = peer_id;
1152 	hpe->prio = match_prio;
1153 	refcount_set(&hpe->refcnt, 1);
1154 	init_completion(&hpe->res_ready);
1155 
1156 	hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
1157 		 hash_hairpin_info(peer_id, match_prio));
1158 	mutex_unlock(&tc->hairpin_tbl_lock);
1159 
1160 	err = devl_param_driverinit_value_get(
1161 		devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val);
1162 	if (err) {
1163 		err = -ENOMEM;
1164 		goto out_err;
1165 	}
1166 
1167 	params.log_num_packets = ilog2(val.vu32);
1168 	params.log_data_size =
1169 		clamp_t(u32,
1170 			params.log_num_packets +
1171 				MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev),
1172 			MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz),
1173 			MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
1174 
1175 	params.q_counter = priv->q_counter[0];
1176 	err = devl_param_driverinit_value_get(
1177 		devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val);
1178 	if (err) {
1179 		err = -ENOMEM;
1180 		goto out_err;
1181 	}
1182 
1183 	params.num_channels = val.vu32;
1184 
1185 	hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
1186 	hpe->hp = hp;
1187 	complete_all(&hpe->res_ready);
1188 	if (IS_ERR(hp)) {
1189 		err = PTR_ERR(hp);
1190 		goto out_err;
1191 	}
1192 
1193 	netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1194 		   mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
1195 		   dev_name(hp->pair->peer_mdev->device),
1196 		   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
1197 
1198 attach_flow:
1199 	if (hpe->hp->num_channels > 1) {
1200 		flow_flag_set(flow, HAIRPIN_RSS);
1201 		flow->attr->nic_attr->hairpin_ft =
1202 			mlx5_get_ttc_flow_table(hpe->hp->ttc);
1203 	} else {
1204 		flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
1205 	}
1206 
1207 	flow->hpe = hpe;
1208 	spin_lock(&hpe->flows_lock);
1209 	list_add(&flow->hairpin, &hpe->flows);
1210 	spin_unlock(&hpe->flows_lock);
1211 
1212 	return 0;
1213 
1214 out_err:
1215 	mlx5e_hairpin_put(priv, hpe);
1216 	return err;
1217 }
1218 
mlx5e_hairpin_flow_del(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1219 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
1220 				   struct mlx5e_tc_flow *flow)
1221 {
1222 	/* flow wasn't fully initialized */
1223 	if (!flow->hpe)
1224 		return;
1225 
1226 	spin_lock(&flow->hpe->flows_lock);
1227 	list_del(&flow->hairpin);
1228 	spin_unlock(&flow->hpe->flows_lock);
1229 
1230 	mlx5e_hairpin_put(priv, flow->hpe);
1231 	flow->hpe = NULL;
1232 }
1233 
1234 struct mlx5_flow_handle *
mlx5e_add_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)1235 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
1236 			     struct mlx5_flow_spec *spec,
1237 			     struct mlx5_flow_attr *attr)
1238 {
1239 	struct mlx5_flow_context *flow_context = &spec->flow_context;
1240 	struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
1241 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1242 	struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1243 	struct mlx5_flow_destination dest[2] = {};
1244 	struct mlx5_fs_chains *nic_chains;
1245 	struct mlx5_flow_act flow_act = {
1246 		.action = attr->action,
1247 		.flags    = FLOW_ACT_NO_APPEND,
1248 	};
1249 	struct mlx5_flow_handle *rule;
1250 	struct mlx5_flow_table *ft;
1251 	int dest_ix = 0;
1252 
1253 	nic_chains = mlx5e_nic_chains(tc);
1254 	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1255 	flow_context->flow_tag = nic_attr->flow_tag;
1256 
1257 	if (attr->dest_ft) {
1258 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1259 		dest[dest_ix].ft = attr->dest_ft;
1260 		dest_ix++;
1261 	} else if (nic_attr->hairpin_ft) {
1262 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1263 		dest[dest_ix].ft = nic_attr->hairpin_ft;
1264 		dest_ix++;
1265 	} else if (nic_attr->hairpin_tirn) {
1266 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1267 		dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1268 		dest_ix++;
1269 	} else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1270 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1271 		if (attr->dest_chain) {
1272 			dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1273 								 attr->dest_chain, 1,
1274 								 MLX5E_TC_FT_LEVEL);
1275 			if (IS_ERR(dest[dest_ix].ft))
1276 				return ERR_CAST(dest[dest_ix].ft);
1277 		} else {
1278 			dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
1279 		}
1280 		dest_ix++;
1281 	}
1282 
1283 	if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1284 	    MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1285 		flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1286 
1287 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1288 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1289 		dest[dest_ix].counter = attr->counter;
1290 		dest_ix++;
1291 	}
1292 
1293 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1294 		flow_act.modify_hdr = attr->modify_hdr;
1295 
1296 	mutex_lock(&tc->t_lock);
1297 	if (IS_ERR_OR_NULL(tc->t)) {
1298 		/* Create the root table here if doesn't exist yet */
1299 		tc->t =
1300 			mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1301 
1302 		if (IS_ERR(tc->t)) {
1303 			mutex_unlock(&tc->t_lock);
1304 			netdev_err(priv->netdev,
1305 				   "Failed to create tc offload table\n");
1306 			rule = ERR_CAST(tc->t);
1307 			goto err_ft_get;
1308 		}
1309 	}
1310 	mutex_unlock(&tc->t_lock);
1311 
1312 	if (attr->chain || attr->prio)
1313 		ft = mlx5_chains_get_table(nic_chains,
1314 					   attr->chain, attr->prio,
1315 					   MLX5E_TC_FT_LEVEL);
1316 	else
1317 		ft = attr->ft;
1318 
1319 	if (IS_ERR(ft)) {
1320 		rule = ERR_CAST(ft);
1321 		goto err_ft_get;
1322 	}
1323 
1324 	if (attr->outer_match_level != MLX5_MATCH_NONE)
1325 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1326 
1327 	rule = mlx5_add_flow_rules(ft, spec,
1328 				   &flow_act, dest, dest_ix);
1329 	if (IS_ERR(rule))
1330 		goto err_rule;
1331 
1332 	return rule;
1333 
1334 err_rule:
1335 	if (attr->chain || attr->prio)
1336 		mlx5_chains_put_table(nic_chains,
1337 				      attr->chain, attr->prio,
1338 				      MLX5E_TC_FT_LEVEL);
1339 err_ft_get:
1340 	if (attr->dest_chain)
1341 		mlx5_chains_put_table(nic_chains,
1342 				      attr->dest_chain, 1,
1343 				      MLX5E_TC_FT_LEVEL);
1344 
1345 	return ERR_CAST(rule);
1346 }
1347 
1348 static int
alloc_flow_attr_counter(struct mlx5_core_dev * counter_dev,struct mlx5_flow_attr * attr)1349 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1350 			struct mlx5_flow_attr *attr)
1351 
1352 {
1353 	struct mlx5_fc *counter;
1354 
1355 	counter = mlx5_fc_create(counter_dev, true);
1356 	if (IS_ERR(counter))
1357 		return PTR_ERR(counter);
1358 
1359 	attr->counter = counter;
1360 	return 0;
1361 }
1362 
1363 static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1364 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1365 		      struct mlx5e_tc_flow *flow,
1366 		      struct netlink_ext_ack *extack)
1367 {
1368 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1369 	struct mlx5_flow_attr *attr = flow->attr;
1370 	struct mlx5_core_dev *dev = priv->mdev;
1371 	int err;
1372 
1373 	parse_attr = attr->parse_attr;
1374 
1375 	if (flow_flag_test(flow, HAIRPIN)) {
1376 		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1377 		if (err)
1378 			return err;
1379 	}
1380 
1381 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1382 		err = alloc_flow_attr_counter(dev, attr);
1383 		if (err)
1384 			return err;
1385 	}
1386 
1387 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1388 		err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1389 		if (err)
1390 			return err;
1391 	}
1392 
1393 	flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr);
1394 	return PTR_ERR_OR_ZERO(flow->rule[0]);
1395 }
1396 
mlx5e_del_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)1397 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1398 				  struct mlx5_flow_handle *rule,
1399 				  struct mlx5_flow_attr *attr)
1400 {
1401 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1402 	struct mlx5_fs_chains *nic_chains;
1403 
1404 	nic_chains = mlx5e_nic_chains(tc);
1405 	mlx5_del_flow_rules(rule);
1406 
1407 	if (attr->chain || attr->prio)
1408 		mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1409 				      MLX5E_TC_FT_LEVEL);
1410 
1411 	if (attr->dest_chain)
1412 		mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1413 				      MLX5E_TC_FT_LEVEL);
1414 }
1415 
mlx5e_tc_del_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1416 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1417 				  struct mlx5e_tc_flow *flow)
1418 {
1419 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1420 	struct mlx5_flow_attr *attr = flow->attr;
1421 
1422 	flow_flag_clear(flow, OFFLOADED);
1423 
1424 	if (!IS_ERR_OR_NULL(flow->rule[0]))
1425 		mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1426 
1427 	/* Remove root table if no rules are left to avoid
1428 	 * extra steering hops.
1429 	 */
1430 	mutex_lock(&tc->t_lock);
1431 	if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1432 	    !IS_ERR_OR_NULL(tc->t)) {
1433 		mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
1434 		tc->t = NULL;
1435 	}
1436 	mutex_unlock(&tc->t_lock);
1437 
1438 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1439 		mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1440 		mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1441 	}
1442 
1443 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1444 		mlx5_fc_destroy(priv->mdev, attr->counter);
1445 
1446 	if (flow_flag_test(flow, HAIRPIN))
1447 		mlx5e_hairpin_flow_del(priv, flow);
1448 
1449 	free_flow_post_acts(flow);
1450 	mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
1451 
1452 	kvfree(attr->parse_attr);
1453 	kfree(flow->attr);
1454 }
1455 
1456 struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)1457 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1458 			   struct mlx5e_tc_flow *flow,
1459 			   struct mlx5_flow_spec *spec,
1460 			   struct mlx5_flow_attr *attr)
1461 {
1462 	struct mlx5_flow_handle *rule;
1463 
1464 	if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1465 		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1466 
1467 	rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1468 
1469 	if (IS_ERR(rule))
1470 		return rule;
1471 
1472 	if (attr->esw_attr->split_count) {
1473 		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1474 		if (IS_ERR(flow->rule[1]))
1475 			goto err_rule1;
1476 	}
1477 
1478 	return rule;
1479 
1480 err_rule1:
1481 	mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1482 	return flow->rule[1];
1483 }
1484 
mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1485 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1486 				  struct mlx5e_tc_flow *flow,
1487 				  struct mlx5_flow_attr *attr)
1488 {
1489 	flow_flag_clear(flow, OFFLOADED);
1490 
1491 	if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1492 		return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1493 
1494 	if (attr->esw_attr->split_count)
1495 		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1496 
1497 	mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1498 }
1499 
1500 struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)1501 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1502 			      struct mlx5e_tc_flow *flow,
1503 			      struct mlx5_flow_spec *spec)
1504 {
1505 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
1506 	struct mlx5e_mod_hdr_handle *mh = NULL;
1507 	struct mlx5_flow_attr *slow_attr;
1508 	struct mlx5_flow_handle *rule;
1509 	bool fwd_and_modify_cap;
1510 	u32 chain_mapping = 0;
1511 	int err;
1512 
1513 	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1514 	if (!slow_attr)
1515 		return ERR_PTR(-ENOMEM);
1516 
1517 	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1518 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1519 	slow_attr->esw_attr->split_count = 0;
1520 	slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1521 
1522 	fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
1523 	if (!fwd_and_modify_cap)
1524 		goto skip_restore;
1525 
1526 	err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
1527 	if (err)
1528 		goto err_get_chain;
1529 
1530 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
1531 					MAPPED_OBJ_TO_REG, chain_mapping);
1532 	if (err)
1533 		goto err_reg_set;
1534 
1535 	mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
1536 				  MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
1537 	if (IS_ERR(mh)) {
1538 		err = PTR_ERR(mh);
1539 		goto err_attach;
1540 	}
1541 
1542 	slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1543 	slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
1544 
1545 skip_restore:
1546 	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1547 	if (IS_ERR(rule)) {
1548 		err = PTR_ERR(rule);
1549 		goto err_offload;
1550 	}
1551 
1552 	flow->attr->slow_mh = mh;
1553 	flow->chain_mapping = chain_mapping;
1554 	flow_flag_set(flow, SLOW);
1555 
1556 	mlx5e_mod_hdr_dealloc(&mod_acts);
1557 	kfree(slow_attr);
1558 
1559 	return rule;
1560 
1561 err_offload:
1562 	if (fwd_and_modify_cap)
1563 		mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
1564 err_attach:
1565 err_reg_set:
1566 	if (fwd_and_modify_cap)
1567 		mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
1568 err_get_chain:
1569 	mlx5e_mod_hdr_dealloc(&mod_acts);
1570 	kfree(slow_attr);
1571 	return ERR_PTR(err);
1572 }
1573 
mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow)1574 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1575 				       struct mlx5e_tc_flow *flow)
1576 {
1577 	struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh;
1578 	struct mlx5_flow_attr *slow_attr;
1579 
1580 	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1581 	if (!slow_attr) {
1582 		mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1583 		return;
1584 	}
1585 
1586 	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1587 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1588 	slow_attr->esw_attr->split_count = 0;
1589 	slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1590 	if (slow_mh) {
1591 		slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1592 		slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh);
1593 	}
1594 	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1595 	if (slow_mh) {
1596 		mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh);
1597 		mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
1598 		flow->chain_mapping = 0;
1599 		flow->attr->slow_mh = NULL;
1600 	}
1601 	flow_flag_clear(flow, SLOW);
1602 	kfree(slow_attr);
1603 }
1604 
1605 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1606  * function.
1607  */
unready_flow_add(struct mlx5e_tc_flow * flow,struct list_head * unready_flows)1608 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1609 			     struct list_head *unready_flows)
1610 {
1611 	flow_flag_set(flow, NOT_READY);
1612 	list_add_tail(&flow->unready, unready_flows);
1613 }
1614 
1615 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1616  * function.
1617  */
unready_flow_del(struct mlx5e_tc_flow * flow)1618 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1619 {
1620 	list_del(&flow->unready);
1621 	flow_flag_clear(flow, NOT_READY);
1622 }
1623 
add_unready_flow(struct mlx5e_tc_flow * flow)1624 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1625 {
1626 	struct mlx5_rep_uplink_priv *uplink_priv;
1627 	struct mlx5e_rep_priv *rpriv;
1628 	struct mlx5_eswitch *esw;
1629 
1630 	esw = flow->priv->mdev->priv.eswitch;
1631 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1632 	uplink_priv = &rpriv->uplink_priv;
1633 
1634 	mutex_lock(&uplink_priv->unready_flows_lock);
1635 	unready_flow_add(flow, &uplink_priv->unready_flows);
1636 	mutex_unlock(&uplink_priv->unready_flows_lock);
1637 }
1638 
remove_unready_flow(struct mlx5e_tc_flow * flow)1639 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1640 {
1641 	struct mlx5_rep_uplink_priv *uplink_priv;
1642 	struct mlx5e_rep_priv *rpriv;
1643 	struct mlx5_eswitch *esw;
1644 
1645 	esw = flow->priv->mdev->priv.eswitch;
1646 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1647 	uplink_priv = &rpriv->uplink_priv;
1648 
1649 	mutex_lock(&uplink_priv->unready_flows_lock);
1650 	if (flow_flag_test(flow, NOT_READY))
1651 		unready_flow_del(flow);
1652 	mutex_unlock(&uplink_priv->unready_flows_lock);
1653 }
1654 
mlx5e_tc_is_vf_tunnel(struct net_device * out_dev,struct net_device * route_dev)1655 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1656 {
1657 	struct mlx5_core_dev *out_mdev, *route_mdev;
1658 	struct mlx5e_priv *out_priv, *route_priv;
1659 
1660 	out_priv = netdev_priv(out_dev);
1661 	out_mdev = out_priv->mdev;
1662 	route_priv = netdev_priv(route_dev);
1663 	route_mdev = route_priv->mdev;
1664 
1665 	if (out_mdev->coredev_type != MLX5_COREDEV_PF)
1666 		return false;
1667 
1668 	if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
1669 	    route_mdev->coredev_type != MLX5_COREDEV_SF)
1670 		return false;
1671 
1672 	return mlx5e_same_hw_devs(out_priv, route_priv);
1673 }
1674 
mlx5e_tc_query_route_vport(struct net_device * out_dev,struct net_device * route_dev,u16 * vport)1675 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1676 {
1677 	struct mlx5e_priv *out_priv, *route_priv;
1678 	struct mlx5_core_dev *route_mdev;
1679 	struct mlx5_devcom_comp_dev *pos;
1680 	struct mlx5_eswitch *esw;
1681 	u16 vhca_id;
1682 	int err;
1683 
1684 	out_priv = netdev_priv(out_dev);
1685 	esw = out_priv->mdev->priv.eswitch;
1686 	route_priv = netdev_priv(route_dev);
1687 	route_mdev = route_priv->mdev;
1688 
1689 	vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1690 	err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1691 	if (!err)
1692 		return err;
1693 
1694 	if (!mlx5_lag_is_active(out_priv->mdev))
1695 		return err;
1696 
1697 	rcu_read_lock();
1698 	err = -ENODEV;
1699 	mlx5_devcom_for_each_peer_entry_rcu(esw->devcom, esw, pos) {
1700 		err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1701 		if (!err)
1702 			break;
1703 	}
1704 	rcu_read_unlock();
1705 
1706 	return err;
1707 }
1708 
1709 static int
verify_attr_actions(u32 actions,struct netlink_ext_ack * extack)1710 verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
1711 {
1712 	if (!(actions &
1713 	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1714 		NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
1715 		return -EOPNOTSUPP;
1716 	}
1717 
1718 	if (!(~actions &
1719 	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1720 		NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
1721 		return -EOPNOTSUPP;
1722 	}
1723 
1724 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1725 	    actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1726 		NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
1727 		return -EOPNOTSUPP;
1728 	}
1729 
1730 	return 0;
1731 }
1732 
1733 static bool
has_encap_dests(struct mlx5_flow_attr * attr)1734 has_encap_dests(struct mlx5_flow_attr *attr)
1735 {
1736 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
1737 	int out_index;
1738 
1739 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1740 		if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
1741 			return true;
1742 
1743 	return false;
1744 }
1745 
1746 static int
extra_split_attr_dests_needed(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1747 extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
1748 {
1749 	bool int_dest = false, ext_dest = false;
1750 	struct mlx5_esw_flow_attr *esw_attr;
1751 	int i;
1752 
1753 	if (flow->attr != attr ||
1754 	    !list_is_first(&attr->list, &flow->attrs))
1755 		return 0;
1756 
1757 	esw_attr = attr->esw_attr;
1758 	if (!esw_attr->split_count ||
1759 	    esw_attr->split_count == esw_attr->out_count - 1)
1760 		return 0;
1761 
1762 	if (esw_attr->dest_int_port &&
1763 	    (esw_attr->dests[esw_attr->split_count].flags &
1764 	     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1765 		return esw_attr->split_count + 1;
1766 
1767 	for (i = esw_attr->split_count; i < esw_attr->out_count; i++) {
1768 		/* external dest with encap is considered as internal by firmware */
1769 		if (esw_attr->dests[i].vport == MLX5_VPORT_UPLINK &&
1770 		    !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1771 			ext_dest = true;
1772 		else
1773 			int_dest = true;
1774 
1775 		if (ext_dest && int_dest)
1776 			return esw_attr->split_count;
1777 	}
1778 
1779 	return 0;
1780 }
1781 
1782 static int
extra_split_attr_dests(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int split_count)1783 extra_split_attr_dests(struct mlx5e_tc_flow *flow,
1784 		       struct mlx5_flow_attr *attr, int split_count)
1785 {
1786 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
1787 	struct mlx5e_tc_flow_parse_attr *parse_attr, *parse_attr2;
1788 	struct mlx5_esw_flow_attr *esw_attr, *esw_attr2;
1789 	struct mlx5e_post_act_handle *handle;
1790 	struct mlx5_flow_attr *attr2;
1791 	int i, j, err;
1792 
1793 	if (IS_ERR(post_act))
1794 		return PTR_ERR(post_act);
1795 
1796 	attr2 = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
1797 	parse_attr2 = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
1798 	if (!attr2 || !parse_attr2) {
1799 		err = -ENOMEM;
1800 		goto err_free;
1801 	}
1802 	attr2->parse_attr = parse_attr2;
1803 
1804 	handle = mlx5e_tc_post_act_add(post_act, attr2);
1805 	if (IS_ERR(handle)) {
1806 		err = PTR_ERR(handle);
1807 		goto err_free;
1808 	}
1809 
1810 	esw_attr = attr->esw_attr;
1811 	esw_attr2 = attr2->esw_attr;
1812 	esw_attr2->in_rep = esw_attr->in_rep;
1813 
1814 	parse_attr = attr->parse_attr;
1815 	parse_attr2->filter_dev = parse_attr->filter_dev;
1816 
1817 	for (i = split_count, j = 0; i < esw_attr->out_count; i++, j++)
1818 		esw_attr2->dests[j] = esw_attr->dests[i];
1819 
1820 	esw_attr2->out_count = j;
1821 	attr2->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1822 
1823 	err = mlx5e_tc_post_act_offload(post_act, handle);
1824 	if (err)
1825 		goto err_post_act_offload;
1826 
1827 	err = mlx5e_tc_post_act_set_handle(flow->priv->mdev, handle,
1828 					   &parse_attr->mod_hdr_acts);
1829 	if (err)
1830 		goto err_post_act_set_handle;
1831 
1832 	esw_attr->out_count = split_count;
1833 	attr->extra_split_ft = mlx5e_tc_post_act_get_ft(post_act);
1834 	flow->extra_split_attr = attr2;
1835 
1836 	attr2->post_act_handle = handle;
1837 
1838 	return 0;
1839 
1840 err_post_act_set_handle:
1841 	mlx5e_tc_post_act_unoffload(post_act, handle);
1842 err_post_act_offload:
1843 	mlx5e_tc_post_act_del(post_act, handle);
1844 err_free:
1845 	kvfree(parse_attr2);
1846 	kfree(attr2);
1847 	return err;
1848 }
1849 
1850 static int
post_process_attr(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack)1851 post_process_attr(struct mlx5e_tc_flow *flow,
1852 		  struct mlx5_flow_attr *attr,
1853 		  struct netlink_ext_ack *extack)
1854 {
1855 	int extra_split;
1856 	bool vf_tun;
1857 	int err = 0;
1858 
1859 	err = verify_attr_actions(attr->action, extack);
1860 	if (err)
1861 		goto err_out;
1862 
1863 	if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) {
1864 		err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
1865 		if (err)
1866 			goto err_out;
1867 	}
1868 
1869 	extra_split = extra_split_attr_dests_needed(flow, attr);
1870 	if (extra_split > 0) {
1871 		err = extra_split_attr_dests(flow, attr, extra_split);
1872 		if (err)
1873 			goto err_out;
1874 	}
1875 
1876 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1877 		err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
1878 		if (err)
1879 			goto err_out;
1880 	}
1881 
1882 	if (attr->branch_true &&
1883 	    attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1884 		err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true);
1885 		if (err)
1886 			goto err_out;
1887 	}
1888 
1889 	if (attr->branch_false &&
1890 	    attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1891 		err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false);
1892 		if (err)
1893 			goto err_out;
1894 	}
1895 
1896 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1897 		err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
1898 		if (err)
1899 			goto err_out;
1900 	}
1901 
1902 err_out:
1903 	return err;
1904 }
1905 
1906 static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1907 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1908 		      struct mlx5e_tc_flow *flow,
1909 		      struct netlink_ext_ack *extack)
1910 {
1911 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1912 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1913 	struct mlx5_flow_attr *attr = flow->attr;
1914 	struct mlx5_esw_flow_attr *esw_attr;
1915 	u32 max_prio, max_chain;
1916 	int err = 0;
1917 
1918 	parse_attr = attr->parse_attr;
1919 	esw_attr = attr->esw_attr;
1920 
1921 	/* We check chain range only for tc flows.
1922 	 * For ft flows, we checked attr->chain was originally 0 and set it to
1923 	 * FDB_FT_CHAIN which is outside tc range.
1924 	 * See mlx5e_rep_setup_ft_cb().
1925 	 */
1926 	max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1927 	if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1928 		NL_SET_ERR_MSG_MOD(extack,
1929 				   "Requested chain is out of supported range");
1930 		err = -EOPNOTSUPP;
1931 		goto err_out;
1932 	}
1933 
1934 	max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1935 	if (attr->prio > max_prio) {
1936 		NL_SET_ERR_MSG_MOD(extack,
1937 				   "Requested priority is out of supported range");
1938 		err = -EOPNOTSUPP;
1939 		goto err_out;
1940 	}
1941 
1942 	if (flow_flag_test(flow, TUN_RX)) {
1943 		err = mlx5e_attach_decap_route(priv, flow);
1944 		if (err)
1945 			goto err_out;
1946 
1947 		if (!attr->chain && esw_attr->int_port &&
1948 		    attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1949 			/* If decap route device is internal port, change the
1950 			 * source vport value in reg_c0 back to uplink just in
1951 			 * case the rule performs goto chain > 0. If we have a miss
1952 			 * on chain > 0 we want the metadata regs to hold the
1953 			 * chain id so SW will resume handling of this packet
1954 			 * from the proper chain.
1955 			 */
1956 			u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1957 									esw_attr->in_rep->vport);
1958 
1959 			err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1960 							MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1961 							metadata);
1962 			if (err)
1963 				goto err_out;
1964 
1965 			attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1966 		}
1967 	}
1968 
1969 	if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1970 		err = mlx5e_attach_decap(priv, flow, extack);
1971 		if (err)
1972 			goto err_out;
1973 	}
1974 
1975 	if (netif_is_ovs_master(parse_attr->filter_dev)) {
1976 		struct mlx5e_tc_int_port *int_port;
1977 
1978 		if (attr->chain) {
1979 			NL_SET_ERR_MSG_MOD(extack,
1980 					   "Internal port rule is only supported on chain 0");
1981 			err = -EOPNOTSUPP;
1982 			goto err_out;
1983 		}
1984 
1985 		if (attr->dest_chain) {
1986 			NL_SET_ERR_MSG_MOD(extack,
1987 					   "Internal port rule offload doesn't support goto action");
1988 			err = -EOPNOTSUPP;
1989 			goto err_out;
1990 		}
1991 
1992 		int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1993 						 parse_attr->filter_dev->ifindex,
1994 						 flow_flag_test(flow, EGRESS) ?
1995 						 MLX5E_TC_INT_PORT_EGRESS :
1996 						 MLX5E_TC_INT_PORT_INGRESS);
1997 		if (IS_ERR(int_port)) {
1998 			err = PTR_ERR(int_port);
1999 			goto err_out;
2000 		}
2001 
2002 		esw_attr->int_port = int_port;
2003 	}
2004 
2005 	err = post_process_attr(flow, attr, extack);
2006 	if (err)
2007 		goto err_out;
2008 
2009 	err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow);
2010 	if (err)
2011 		goto err_out;
2012 
2013 	/* we get here if one of the following takes place:
2014 	 * (1) there's no error
2015 	 * (2) there's an encap action and we don't have valid neigh
2016 	 */
2017 	if (flow_flag_test(flow, SLOW))
2018 		flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
2019 	else
2020 		flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
2021 
2022 	if (IS_ERR(flow->rule[0])) {
2023 		err = PTR_ERR(flow->rule[0]);
2024 		goto err_out;
2025 	}
2026 	flow_flag_set(flow, OFFLOADED);
2027 
2028 	return 0;
2029 
2030 err_out:
2031 	flow_flag_set(flow, FAILED);
2032 	return err;
2033 }
2034 
mlx5_flow_has_geneve_opt(struct mlx5_flow_spec * spec)2035 static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec)
2036 {
2037 	void *headers_v = MLX5_ADDR_OF(fte_match_param,
2038 				       spec->match_value,
2039 				       misc_parameters_3);
2040 	u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
2041 					     headers_v,
2042 					     geneve_tlv_option_0_data);
2043 
2044 	return !!geneve_tlv_opt_0_data;
2045 }
2046 
free_branch_attr(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)2047 static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
2048 {
2049 	if (!attr)
2050 		return;
2051 
2052 	mlx5_free_flow_attr_actions(flow, attr);
2053 	kvfree(attr->parse_attr);
2054 	kfree(attr);
2055 }
2056 
mlx5e_tc_del_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)2057 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
2058 				  struct mlx5e_tc_flow *flow)
2059 {
2060 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2061 	struct mlx5_flow_attr *attr = flow->attr;
2062 
2063 	mlx5e_put_flow_tunnel_id(flow);
2064 
2065 	remove_unready_flow(flow);
2066 
2067 	if (mlx5e_is_offloaded_flow(flow)) {
2068 		if (flow_flag_test(flow, SLOW))
2069 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
2070 		else
2071 			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
2072 	}
2073 	complete_all(&flow->del_hw_done);
2074 
2075 	if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec))
2076 		mlx5_geneve_tlv_option_del(priv->mdev->geneve);
2077 
2078 	if (flow->decap_route)
2079 		mlx5e_detach_decap_route(priv, flow);
2080 
2081 	mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
2082 
2083 	if (flow_flag_test(flow, L3_TO_L2_DECAP))
2084 		mlx5e_detach_decap(priv, flow);
2085 
2086 	mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow);
2087 
2088 	free_flow_post_acts(flow);
2089 	if (flow->extra_split_attr) {
2090 		mlx5_free_flow_attr_actions(flow, flow->extra_split_attr);
2091 		kvfree(flow->extra_split_attr->parse_attr);
2092 		kfree(flow->extra_split_attr);
2093 	}
2094 	mlx5_free_flow_attr_actions(flow, attr);
2095 
2096 	kvfree(attr->esw_attr->rx_tun_attr);
2097 	kvfree(attr->parse_attr);
2098 	kfree(flow->attr);
2099 }
2100 
mlx5e_tc_get_counter(struct mlx5e_tc_flow * flow)2101 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
2102 {
2103 	struct mlx5_flow_attr *attr;
2104 
2105 	attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
2106 	return attr->counter;
2107 }
2108 
2109 /* Iterate over tmp_list of flows attached to flow_list head. */
mlx5e_put_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)2110 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
2111 {
2112 	struct mlx5e_tc_flow *flow, *tmp;
2113 
2114 	list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
2115 		mlx5e_flow_put(priv, flow);
2116 }
2117 
mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow * flow,int peer_index)2118 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
2119 				       int peer_index)
2120 {
2121 	struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
2122 	struct mlx5e_tc_flow *peer_flow;
2123 	struct mlx5e_tc_flow *tmp;
2124 
2125 	if (!flow_flag_test(flow, ESWITCH) ||
2126 	    !flow_flag_test(flow, DUP))
2127 		return;
2128 
2129 	mutex_lock(&esw->offloads.peer_mutex);
2130 	list_del(&flow->peer[peer_index]);
2131 	mutex_unlock(&esw->offloads.peer_mutex);
2132 
2133 	list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
2134 		if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
2135 			continue;
2136 
2137 		list_del(&peer_flow->peer_flows);
2138 		if (refcount_dec_and_test(&peer_flow->refcnt)) {
2139 			mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow);
2140 			kfree(peer_flow);
2141 		}
2142 	}
2143 
2144 	if (list_empty(&flow->peer_flows))
2145 		flow_flag_clear(flow, DUP);
2146 }
2147 
mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow * flow)2148 static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow)
2149 {
2150 	struct mlx5_devcom_comp_dev *devcom;
2151 	struct mlx5_devcom_comp_dev *pos;
2152 	struct mlx5_eswitch *peer_esw;
2153 	int i;
2154 
2155 	devcom = flow->priv->mdev->priv.eswitch->devcom;
2156 	mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
2157 		i = mlx5_get_dev_index(peer_esw->dev);
2158 		mlx5e_tc_del_fdb_peer_flow(flow, i);
2159 	}
2160 }
2161 
mlx5e_tc_del_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)2162 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
2163 			      struct mlx5e_tc_flow *flow)
2164 {
2165 	if (mlx5e_is_eswitch_flow(flow)) {
2166 		struct mlx5_devcom_comp_dev *devcom = flow->priv->mdev->priv.eswitch->devcom;
2167 
2168 		if (!mlx5_devcom_for_each_peer_begin(devcom)) {
2169 			mlx5e_tc_del_fdb_flow(priv, flow);
2170 			return;
2171 		}
2172 
2173 		mlx5e_tc_del_fdb_peers_flow(flow);
2174 		mlx5_devcom_for_each_peer_end(devcom);
2175 		mlx5e_tc_del_fdb_flow(priv, flow);
2176 	} else {
2177 		mlx5e_tc_del_nic_flow(priv, flow);
2178 	}
2179 }
2180 
flow_requires_tunnel_mapping(u32 chain,struct flow_cls_offload * f)2181 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
2182 {
2183 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2184 	struct flow_action *flow_action = &rule->action;
2185 	const struct flow_action_entry *act;
2186 	int i;
2187 
2188 	if (chain)
2189 		return false;
2190 
2191 	flow_action_for_each(i, act, flow_action) {
2192 		switch (act->id) {
2193 		case FLOW_ACTION_GOTO:
2194 			return true;
2195 		case FLOW_ACTION_SAMPLE:
2196 			return true;
2197 		default:
2198 			continue;
2199 		}
2200 	}
2201 
2202 	return false;
2203 }
2204 
2205 static int
enc_opts_is_dont_care_or_full_match(struct mlx5e_priv * priv,struct flow_dissector_key_enc_opts * opts,struct netlink_ext_ack * extack,bool * dont_care)2206 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
2207 				    struct flow_dissector_key_enc_opts *opts,
2208 				    struct netlink_ext_ack *extack,
2209 				    bool *dont_care)
2210 {
2211 	struct geneve_opt *opt;
2212 	int off = 0;
2213 
2214 	*dont_care = true;
2215 
2216 	while (opts->len > off) {
2217 		opt = (struct geneve_opt *)&opts->data[off];
2218 
2219 		if (!(*dont_care) || opt->opt_class || opt->type ||
2220 		    memchr_inv(opt->opt_data, 0, opt->length * 4)) {
2221 			*dont_care = false;
2222 
2223 			if (opt->opt_class != htons(U16_MAX) ||
2224 			    opt->type != U8_MAX) {
2225 				NL_SET_ERR_MSG_MOD(extack,
2226 						   "Partial match of tunnel options in chain > 0 isn't supported");
2227 				netdev_warn(priv->netdev,
2228 					    "Partial match of tunnel options in chain > 0 isn't supported");
2229 				return -EOPNOTSUPP;
2230 			}
2231 		}
2232 
2233 		off += sizeof(struct geneve_opt) + opt->length * 4;
2234 	}
2235 
2236 	return 0;
2237 }
2238 
2239 #define COPY_DISSECTOR(rule, diss_key, dst)\
2240 ({ \
2241 	struct flow_rule *__rule = (rule);\
2242 	typeof(dst) __dst = dst;\
2243 \
2244 	memcpy(__dst,\
2245 	       skb_flow_dissector_target(__rule->match.dissector,\
2246 					 diss_key,\
2247 					 __rule->match.key),\
2248 	       sizeof(*__dst));\
2249 })
2250 
mlx5e_get_flow_tunnel_id(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct flow_cls_offload * f,struct net_device * filter_dev)2251 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
2252 				    struct mlx5e_tc_flow *flow,
2253 				    struct flow_cls_offload *f,
2254 				    struct net_device *filter_dev)
2255 {
2256 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2257 	struct netlink_ext_ack *extack = f->common.extack;
2258 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
2259 	struct flow_match_enc_opts enc_opts_match;
2260 	struct tunnel_match_enc_opts tun_enc_opts;
2261 	struct mlx5_rep_uplink_priv *uplink_priv;
2262 	struct mlx5_flow_attr *attr = flow->attr;
2263 	struct mlx5e_rep_priv *uplink_rpriv;
2264 	struct tunnel_match_key tunnel_key;
2265 	bool enc_opts_is_dont_care = true;
2266 	u32 tun_id, enc_opts_id = 0;
2267 	struct mlx5_eswitch *esw;
2268 	u32 value, mask;
2269 	int err;
2270 
2271 	esw = priv->mdev->priv.eswitch;
2272 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2273 	uplink_priv = &uplink_rpriv->uplink_priv;
2274 
2275 	memset(&tunnel_key, 0, sizeof(tunnel_key));
2276 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2277 		       &tunnel_key.enc_control);
2278 	if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2279 		COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2280 			       &tunnel_key.enc_ipv4);
2281 	else
2282 		COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2283 			       &tunnel_key.enc_ipv6);
2284 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2285 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2286 		       &tunnel_key.enc_tp);
2287 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2288 		       &tunnel_key.enc_key_id);
2289 	tunnel_key.filter_ifindex = filter_dev->ifindex;
2290 
2291 	err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2292 	if (err)
2293 		return err;
2294 
2295 	flow_rule_match_enc_opts(rule, &enc_opts_match);
2296 	err = enc_opts_is_dont_care_or_full_match(priv,
2297 						  enc_opts_match.mask,
2298 						  extack,
2299 						  &enc_opts_is_dont_care);
2300 	if (err)
2301 		goto err_enc_opts;
2302 
2303 	if (!enc_opts_is_dont_care) {
2304 		memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2305 		memcpy(&tun_enc_opts.key, enc_opts_match.key,
2306 		       sizeof(*enc_opts_match.key));
2307 		memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2308 		       sizeof(*enc_opts_match.mask));
2309 
2310 		err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2311 				  &tun_enc_opts, &enc_opts_id);
2312 		if (err)
2313 			goto err_enc_opts;
2314 	}
2315 
2316 	value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2317 	mask = enc_opts_id ? TUNNEL_ID_MASK :
2318 			     (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2319 
2320 	if (attr->chain) {
2321 		mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2322 					    TUNNEL_TO_REG, value, mask);
2323 	} else {
2324 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2325 		err = mlx5e_tc_match_to_reg_set(priv->mdev,
2326 						mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2327 						TUNNEL_TO_REG, value);
2328 		if (err)
2329 			goto err_set;
2330 
2331 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2332 	}
2333 
2334 	flow->attr->tunnel_id = value;
2335 	return 0;
2336 
2337 err_set:
2338 	if (enc_opts_id)
2339 		mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2340 			       enc_opts_id);
2341 err_enc_opts:
2342 	mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2343 	return err;
2344 }
2345 
mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow * flow)2346 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2347 {
2348 	u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
2349 	u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
2350 	struct mlx5_rep_uplink_priv *uplink_priv;
2351 	struct mlx5e_rep_priv *uplink_rpriv;
2352 	struct mlx5_eswitch *esw;
2353 
2354 	esw = flow->priv->mdev->priv.eswitch;
2355 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2356 	uplink_priv = &uplink_rpriv->uplink_priv;
2357 
2358 	if (tun_id)
2359 		mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2360 	if (enc_opts_id)
2361 		mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2362 			       enc_opts_id);
2363 }
2364 
mlx5e_tc_set_ethertype(struct mlx5_core_dev * mdev,struct flow_match_basic * match,bool outer,void * headers_c,void * headers_v)2365 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2366 			    struct flow_match_basic *match, bool outer,
2367 			    void *headers_c, void *headers_v)
2368 {
2369 	bool ip_version_cap;
2370 
2371 	ip_version_cap = outer ?
2372 		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2373 					  ft_field_support.outer_ip_version) :
2374 		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2375 					  ft_field_support.inner_ip_version);
2376 
2377 	if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2378 	    (match->key->n_proto == htons(ETH_P_IP) ||
2379 	     match->key->n_proto == htons(ETH_P_IPV6))) {
2380 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2381 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2382 			 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2383 	} else {
2384 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2385 			 ntohs(match->mask->n_proto));
2386 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2387 			 ntohs(match->key->n_proto));
2388 	}
2389 }
2390 
mlx5e_tc_get_ip_version(struct mlx5_flow_spec * spec,bool outer)2391 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2392 {
2393 	void *headers_v;
2394 	u16 ethertype;
2395 	u8 ip_version;
2396 
2397 	if (outer)
2398 		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2399 	else
2400 		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2401 
2402 	ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2403 	/* Return ip_version converted from ethertype anyway */
2404 	if (!ip_version) {
2405 		ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2406 		if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2407 			ip_version = 4;
2408 		else if (ethertype == ETH_P_IPV6)
2409 			ip_version = 6;
2410 	}
2411 	return ip_version;
2412 }
2413 
2414 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2415  * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2416  *      +---------+----------------------------------------+
2417  *      |Arriving |         Arriving Outer Header          |
2418  *      |   Inner +---------+---------+---------+----------+
2419  *      |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
2420  *      +---------+---------+---------+---------+----------+
2421  *      | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
2422  *      |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
2423  *      |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
2424  *      |    CE   |   CE    |  CE     | CE      |   CE     |
2425  *      +---------+---------+---------+---------+----------+
2426  *
2427  * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2428  * the inner ip_ecn value before hardware decap action.
2429  *
2430  * Cells marked are changed from original inner packet ip_ecn value during decap, and
2431  * so matching those values on inner ip_ecn before decap will fail.
2432  *
2433  * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2434  * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2435  * and such we can drop the inner ip_ecn=CE match.
2436  */
2437 
mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv * priv,struct flow_cls_offload * f,bool * match_inner_ecn)2438 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2439 				      struct flow_cls_offload *f,
2440 				      bool *match_inner_ecn)
2441 {
2442 	u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2443 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2444 	struct netlink_ext_ack *extack = f->common.extack;
2445 	struct flow_match_ip match;
2446 
2447 	*match_inner_ecn = true;
2448 
2449 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2450 		flow_rule_match_enc_ip(rule, &match);
2451 		outer_ecn_key = match.key->tos & INET_ECN_MASK;
2452 		outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2453 	}
2454 
2455 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2456 		flow_rule_match_ip(rule, &match);
2457 		inner_ecn_key = match.key->tos & INET_ECN_MASK;
2458 		inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2459 	}
2460 
2461 	if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2462 		NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2463 		netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2464 		return -EOPNOTSUPP;
2465 	}
2466 
2467 	if (!outer_ecn_mask) {
2468 		if (!inner_ecn_mask)
2469 			return 0;
2470 
2471 		NL_SET_ERR_MSG_MOD(extack,
2472 				   "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2473 		netdev_warn(priv->netdev,
2474 			    "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2475 		return -EOPNOTSUPP;
2476 	}
2477 
2478 	if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2479 		NL_SET_ERR_MSG_MOD(extack,
2480 				   "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2481 		netdev_warn(priv->netdev,
2482 			    "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2483 		return -EOPNOTSUPP;
2484 	}
2485 
2486 	if (!inner_ecn_mask)
2487 		return 0;
2488 
2489 	/* Both inner and outer have full mask on ecn */
2490 
2491 	if (outer_ecn_key == INET_ECN_ECT_1) {
2492 		/* inner ecn might change by DECAP action */
2493 
2494 		NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2495 		netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2496 		return -EOPNOTSUPP;
2497 	}
2498 
2499 	if (outer_ecn_key != INET_ECN_CE)
2500 		return 0;
2501 
2502 	if (inner_ecn_key != INET_ECN_CE) {
2503 		/* Can't happen in software, as packet ecn will be changed to CE after decap */
2504 		NL_SET_ERR_MSG_MOD(extack,
2505 				   "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2506 		netdev_warn(priv->netdev,
2507 			    "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2508 		return -EOPNOTSUPP;
2509 	}
2510 
2511 	/* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2512 	 * drop match on inner ecn
2513 	 */
2514 	*match_inner_ecn = false;
2515 
2516 	return 0;
2517 }
2518 
parse_tunnel_attr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * match_level,bool * match_inner)2519 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2520 			     struct mlx5e_tc_flow *flow,
2521 			     struct mlx5_flow_spec *spec,
2522 			     struct flow_cls_offload *f,
2523 			     struct net_device *filter_dev,
2524 			     u8 *match_level,
2525 			     bool *match_inner)
2526 {
2527 	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2528 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2529 	struct netlink_ext_ack *extack = f->common.extack;
2530 	bool needs_mapping, sets_mapping;
2531 	int err;
2532 
2533 	if (!mlx5e_is_eswitch_flow(flow)) {
2534 		NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2535 		return -EOPNOTSUPP;
2536 	}
2537 
2538 	needs_mapping = !!flow->attr->chain;
2539 	sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2540 	*match_inner = !needs_mapping;
2541 
2542 	if ((needs_mapping || sets_mapping) &&
2543 	    !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2544 		NL_SET_ERR_MSG_MOD(extack,
2545 				   "Chains on tunnel devices isn't supported without register loopback support");
2546 		netdev_warn(priv->netdev,
2547 			    "Chains on tunnel devices isn't supported without register loopback support");
2548 		return -EOPNOTSUPP;
2549 	}
2550 
2551 	if (!flow->attr->chain) {
2552 		err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2553 					 match_level);
2554 		if (err) {
2555 			NL_SET_ERR_MSG_MOD(extack,
2556 					   "Failed to parse tunnel attributes");
2557 			netdev_warn(priv->netdev,
2558 				    "Failed to parse tunnel attributes");
2559 			return err;
2560 		}
2561 
2562 		/* With mpls over udp we decapsulate using packet reformat
2563 		 * object
2564 		 */
2565 		if (!netif_is_bareudp(filter_dev))
2566 			flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2567 		err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2568 		if (err)
2569 			return err;
2570 	} else if (tunnel) {
2571 		struct mlx5_flow_spec *tmp_spec;
2572 
2573 		tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2574 		if (!tmp_spec) {
2575 			NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec");
2576 			netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec");
2577 			return -ENOMEM;
2578 		}
2579 		memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2580 
2581 		err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2582 		if (err) {
2583 			NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2584 			netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2585 		} else {
2586 			err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2587 		}
2588 		if (mlx5_flow_has_geneve_opt(tmp_spec))
2589 			mlx5_geneve_tlv_option_del(priv->mdev->geneve);
2590 		kvfree(tmp_spec);
2591 		if (err)
2592 			return err;
2593 	}
2594 
2595 	if (!needs_mapping && !sets_mapping)
2596 		return 0;
2597 
2598 	return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2599 }
2600 
get_match_inner_headers_criteria(struct mlx5_flow_spec * spec)2601 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2602 {
2603 	return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2604 			    inner_headers);
2605 }
2606 
get_match_inner_headers_value(struct mlx5_flow_spec * spec)2607 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2608 {
2609 	return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2610 			    inner_headers);
2611 }
2612 
get_match_outer_headers_criteria(struct mlx5_flow_spec * spec)2613 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2614 {
2615 	return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2616 			    outer_headers);
2617 }
2618 
get_match_outer_headers_value(struct mlx5_flow_spec * spec)2619 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2620 {
2621 	return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2622 			    outer_headers);
2623 }
2624 
mlx5e_get_match_headers_value(u32 flags,struct mlx5_flow_spec * spec)2625 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2626 {
2627 	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2628 		get_match_inner_headers_value(spec) :
2629 		get_match_outer_headers_value(spec);
2630 }
2631 
mlx5e_get_match_headers_criteria(u32 flags,struct mlx5_flow_spec * spec)2632 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2633 {
2634 	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2635 		get_match_inner_headers_criteria(spec) :
2636 		get_match_outer_headers_criteria(spec);
2637 }
2638 
mlx5e_flower_parse_meta(struct net_device * filter_dev,struct flow_cls_offload * f)2639 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2640 				   struct flow_cls_offload *f)
2641 {
2642 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2643 	struct netlink_ext_ack *extack = f->common.extack;
2644 	struct net_device *ingress_dev;
2645 	struct flow_match_meta match;
2646 
2647 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2648 		return 0;
2649 
2650 	flow_rule_match_meta(rule, &match);
2651 
2652 	if (match.mask->l2_miss) {
2653 		NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
2654 		return -EOPNOTSUPP;
2655 	}
2656 
2657 	if (!match.mask->ingress_ifindex)
2658 		return 0;
2659 
2660 	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2661 		NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2662 		return -EOPNOTSUPP;
2663 	}
2664 
2665 	ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2666 					 match.key->ingress_ifindex);
2667 	if (!ingress_dev) {
2668 		NL_SET_ERR_MSG_MOD(extack,
2669 				   "Can't find the ingress port to match on");
2670 		return -ENOENT;
2671 	}
2672 
2673 	if (ingress_dev != filter_dev) {
2674 		NL_SET_ERR_MSG_MOD(extack,
2675 				   "Can't match on the ingress filter port");
2676 		return -EOPNOTSUPP;
2677 	}
2678 
2679 	return 0;
2680 }
2681 
skip_key_basic(struct net_device * filter_dev,struct flow_cls_offload * f)2682 static bool skip_key_basic(struct net_device *filter_dev,
2683 			   struct flow_cls_offload *f)
2684 {
2685 	/* When doing mpls over udp decap, the user needs to provide
2686 	 * MPLS_UC as the protocol in order to be able to match on mpls
2687 	 * label fields.  However, the actual ethertype is IP so we want to
2688 	 * avoid matching on this, otherwise we'll fail the match.
2689 	 */
2690 	if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2691 		return true;
2692 
2693 	return false;
2694 }
2695 
__parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * inner_match_level,u8 * outer_match_level)2696 static int __parse_cls_flower(struct mlx5e_priv *priv,
2697 			      struct mlx5e_tc_flow *flow,
2698 			      struct mlx5_flow_spec *spec,
2699 			      struct flow_cls_offload *f,
2700 			      struct net_device *filter_dev,
2701 			      u8 *inner_match_level, u8 *outer_match_level)
2702 {
2703 	struct netlink_ext_ack *extack = f->common.extack;
2704 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2705 				       outer_headers);
2706 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2707 				       outer_headers);
2708 	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2709 				    misc_parameters);
2710 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2711 				    misc_parameters);
2712 	void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2713 				    misc_parameters_3);
2714 	void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2715 				    misc_parameters_3);
2716 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2717 	struct flow_dissector *dissector = rule->match.dissector;
2718 	enum fs_flow_table_type fs_type;
2719 	bool match_inner_ecn = true;
2720 	u16 addr_type = 0;
2721 	u8 ip_proto = 0;
2722 	u8 *match_level;
2723 	int err;
2724 
2725 	fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2726 	match_level = outer_match_level;
2727 
2728 	if (dissector->used_keys &
2729 	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_META) |
2730 	      BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
2731 	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
2732 	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2733 	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
2734 	      BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
2735 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2736 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2737 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
2738 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2739 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2740 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2741 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS)	|
2742 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2743 	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
2744 	      BIT_ULL(FLOW_DISSECTOR_KEY_IP)  |
2745 	      BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
2746 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
2747 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2748 	      BIT_ULL(FLOW_DISSECTOR_KEY_ICMP) |
2749 	      BIT_ULL(FLOW_DISSECTOR_KEY_MPLS))) {
2750 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2751 		netdev_dbg(priv->netdev, "Unsupported key used: 0x%llx\n",
2752 			   dissector->used_keys);
2753 		return -EOPNOTSUPP;
2754 	}
2755 
2756 	if (mlx5e_get_tc_tun(filter_dev)) {
2757 		bool match_inner = false;
2758 
2759 		err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2760 					outer_match_level, &match_inner);
2761 		if (err)
2762 			return err;
2763 
2764 		if (match_inner) {
2765 			/* header pointers should point to the inner headers
2766 			 * if the packet was decapsulated already.
2767 			 * outer headers are set by parse_tunnel_attr.
2768 			 */
2769 			match_level = inner_match_level;
2770 			headers_c = get_match_inner_headers_criteria(spec);
2771 			headers_v = get_match_inner_headers_value(spec);
2772 		}
2773 
2774 		err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2775 		if (err)
2776 			return err;
2777 	}
2778 
2779 	err = mlx5e_flower_parse_meta(filter_dev, f);
2780 	if (err)
2781 		return err;
2782 
2783 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2784 	    !skip_key_basic(filter_dev, f)) {
2785 		struct flow_match_basic match;
2786 
2787 		flow_rule_match_basic(rule, &match);
2788 		mlx5e_tc_set_ethertype(priv->mdev, &match,
2789 				       match_level == outer_match_level,
2790 				       headers_c, headers_v);
2791 
2792 		if (match.mask->n_proto)
2793 			*match_level = MLX5_MATCH_L2;
2794 	}
2795 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2796 	    is_vlan_dev(filter_dev)) {
2797 		struct flow_dissector_key_vlan filter_dev_mask;
2798 		struct flow_dissector_key_vlan filter_dev_key;
2799 		struct flow_match_vlan match;
2800 
2801 		if (is_vlan_dev(filter_dev)) {
2802 			match.key = &filter_dev_key;
2803 			match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2804 			match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2805 			match.key->vlan_priority = 0;
2806 			match.mask = &filter_dev_mask;
2807 			memset(match.mask, 0xff, sizeof(*match.mask));
2808 			match.mask->vlan_priority = 0;
2809 		} else {
2810 			flow_rule_match_vlan(rule, &match);
2811 		}
2812 		if (match.mask->vlan_id ||
2813 		    match.mask->vlan_priority ||
2814 		    match.mask->vlan_tpid) {
2815 			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2816 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2817 					 svlan_tag, 1);
2818 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2819 					 svlan_tag, 1);
2820 			} else {
2821 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2822 					 cvlan_tag, 1);
2823 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2824 					 cvlan_tag, 1);
2825 			}
2826 
2827 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2828 				 match.mask->vlan_id);
2829 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2830 				 match.key->vlan_id);
2831 
2832 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2833 				 match.mask->vlan_priority);
2834 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2835 				 match.key->vlan_priority);
2836 
2837 			*match_level = MLX5_MATCH_L2;
2838 
2839 			if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2840 			    match.mask->vlan_eth_type &&
2841 			    MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2842 						    ft_field_support.outer_second_vid,
2843 						    fs_type)) {
2844 				MLX5_SET(fte_match_set_misc, misc_c,
2845 					 outer_second_cvlan_tag, 1);
2846 				spec->match_criteria_enable |=
2847 					MLX5_MATCH_MISC_PARAMETERS;
2848 			}
2849 		}
2850 	} else if (*match_level != MLX5_MATCH_NONE) {
2851 		/* cvlan_tag enabled in match criteria and
2852 		 * disabled in match value means both S & C tags
2853 		 * don't exist (untagged of both)
2854 		 */
2855 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2856 		*match_level = MLX5_MATCH_L2;
2857 	}
2858 
2859 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2860 		struct flow_match_vlan match;
2861 
2862 		flow_rule_match_cvlan(rule, &match);
2863 		if (match.mask->vlan_id ||
2864 		    match.mask->vlan_priority ||
2865 		    match.mask->vlan_tpid) {
2866 			if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2867 						     fs_type)) {
2868 				NL_SET_ERR_MSG_MOD(extack,
2869 						   "Matching on CVLAN is not supported");
2870 				return -EOPNOTSUPP;
2871 			}
2872 
2873 			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2874 				MLX5_SET(fte_match_set_misc, misc_c,
2875 					 outer_second_svlan_tag, 1);
2876 				MLX5_SET(fte_match_set_misc, misc_v,
2877 					 outer_second_svlan_tag, 1);
2878 			} else {
2879 				MLX5_SET(fte_match_set_misc, misc_c,
2880 					 outer_second_cvlan_tag, 1);
2881 				MLX5_SET(fte_match_set_misc, misc_v,
2882 					 outer_second_cvlan_tag, 1);
2883 			}
2884 
2885 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2886 				 match.mask->vlan_id);
2887 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2888 				 match.key->vlan_id);
2889 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2890 				 match.mask->vlan_priority);
2891 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2892 				 match.key->vlan_priority);
2893 
2894 			*match_level = MLX5_MATCH_L2;
2895 			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2896 		}
2897 	}
2898 
2899 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2900 		struct flow_match_eth_addrs match;
2901 
2902 		flow_rule_match_eth_addrs(rule, &match);
2903 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2904 					     dmac_47_16),
2905 				match.mask->dst);
2906 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2907 					     dmac_47_16),
2908 				match.key->dst);
2909 
2910 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2911 					     smac_47_16),
2912 				match.mask->src);
2913 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2914 					     smac_47_16),
2915 				match.key->src);
2916 
2917 		if (!is_zero_ether_addr(match.mask->src) ||
2918 		    !is_zero_ether_addr(match.mask->dst))
2919 			*match_level = MLX5_MATCH_L2;
2920 	}
2921 
2922 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2923 		struct flow_match_control match;
2924 
2925 		flow_rule_match_control(rule, &match);
2926 		addr_type = match.key->addr_type;
2927 
2928 		if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2929 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2930 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2931 				 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2932 
2933 			/* the HW doesn't need L3 inline to match on frag=no */
2934 			if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2935 				*match_level = MLX5_MATCH_L2;
2936 	/* ***  L2 attributes parsing up to here *** */
2937 			else
2938 				*match_level = MLX5_MATCH_L3;
2939 		}
2940 
2941 		if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT,
2942 						     match.mask->flags, extack))
2943 			return -EOPNOTSUPP;
2944 	}
2945 
2946 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2947 		struct flow_match_basic match;
2948 
2949 		flow_rule_match_basic(rule, &match);
2950 		ip_proto = match.key->ip_proto;
2951 
2952 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2953 			 match.mask->ip_proto);
2954 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2955 			 match.key->ip_proto);
2956 
2957 		if (match.mask->ip_proto)
2958 			*match_level = MLX5_MATCH_L3;
2959 	}
2960 
2961 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2962 		struct flow_match_ipv4_addrs match;
2963 
2964 		flow_rule_match_ipv4_addrs(rule, &match);
2965 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2966 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2967 		       &match.mask->src, sizeof(match.mask->src));
2968 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2969 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2970 		       &match.key->src, sizeof(match.key->src));
2971 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2972 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2973 		       &match.mask->dst, sizeof(match.mask->dst));
2974 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2975 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2976 		       &match.key->dst, sizeof(match.key->dst));
2977 
2978 		if (match.mask->src || match.mask->dst)
2979 			*match_level = MLX5_MATCH_L3;
2980 	}
2981 
2982 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2983 		struct flow_match_ipv6_addrs match;
2984 
2985 		flow_rule_match_ipv6_addrs(rule, &match);
2986 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2987 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2988 		       &match.mask->src, sizeof(match.mask->src));
2989 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2990 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2991 		       &match.key->src, sizeof(match.key->src));
2992 
2993 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2994 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2995 		       &match.mask->dst, sizeof(match.mask->dst));
2996 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2997 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2998 		       &match.key->dst, sizeof(match.key->dst));
2999 
3000 		if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
3001 		    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
3002 			*match_level = MLX5_MATCH_L3;
3003 	}
3004 
3005 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
3006 		struct flow_match_ip match;
3007 
3008 		flow_rule_match_ip(rule, &match);
3009 		if (match_inner_ecn) {
3010 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
3011 				 match.mask->tos & 0x3);
3012 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
3013 				 match.key->tos & 0x3);
3014 		}
3015 
3016 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
3017 			 match.mask->tos >> 2);
3018 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
3019 			 match.key->tos  >> 2);
3020 
3021 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
3022 			 match.mask->ttl);
3023 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
3024 			 match.key->ttl);
3025 
3026 		if (match.mask->ttl &&
3027 		    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
3028 						ft_field_support.outer_ipv4_ttl)) {
3029 			NL_SET_ERR_MSG_MOD(extack,
3030 					   "Matching on TTL is not supported");
3031 			return -EOPNOTSUPP;
3032 		}
3033 
3034 		if (match.mask->tos || match.mask->ttl)
3035 			*match_level = MLX5_MATCH_L3;
3036 	}
3037 
3038 	/* ***  L3 attributes parsing up to here *** */
3039 
3040 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
3041 		struct flow_match_ports match;
3042 
3043 		flow_rule_match_ports(rule, &match);
3044 		switch (ip_proto) {
3045 		case IPPROTO_TCP:
3046 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3047 				 tcp_sport, ntohs(match.mask->src));
3048 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3049 				 tcp_sport, ntohs(match.key->src));
3050 
3051 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3052 				 tcp_dport, ntohs(match.mask->dst));
3053 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3054 				 tcp_dport, ntohs(match.key->dst));
3055 			break;
3056 
3057 		case IPPROTO_UDP:
3058 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3059 				 udp_sport, ntohs(match.mask->src));
3060 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3061 				 udp_sport, ntohs(match.key->src));
3062 
3063 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3064 				 udp_dport, ntohs(match.mask->dst));
3065 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3066 				 udp_dport, ntohs(match.key->dst));
3067 			break;
3068 		default:
3069 			NL_SET_ERR_MSG_MOD(extack,
3070 					   "Only UDP and TCP transports are supported for L4 matching");
3071 			netdev_err(priv->netdev,
3072 				   "Only UDP and TCP transport are supported\n");
3073 			return -EINVAL;
3074 		}
3075 
3076 		if (match.mask->src || match.mask->dst)
3077 			*match_level = MLX5_MATCH_L4;
3078 	}
3079 
3080 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
3081 		struct flow_match_tcp match;
3082 
3083 		flow_rule_match_tcp(rule, &match);
3084 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
3085 			 ntohs(match.mask->flags));
3086 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
3087 			 ntohs(match.key->flags));
3088 
3089 		if (match.mask->flags)
3090 			*match_level = MLX5_MATCH_L4;
3091 	}
3092 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
3093 		struct flow_match_icmp match;
3094 
3095 		flow_rule_match_icmp(rule, &match);
3096 		switch (ip_proto) {
3097 		case IPPROTO_ICMP:
3098 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
3099 			      MLX5_FLEX_PROTO_ICMP)) {
3100 				NL_SET_ERR_MSG_MOD(extack,
3101 						   "Match on Flex protocols for ICMP is not supported");
3102 				return -EOPNOTSUPP;
3103 			}
3104 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
3105 				 match.mask->type);
3106 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
3107 				 match.key->type);
3108 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
3109 				 match.mask->code);
3110 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
3111 				 match.key->code);
3112 			break;
3113 		case IPPROTO_ICMPV6:
3114 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
3115 			      MLX5_FLEX_PROTO_ICMPV6)) {
3116 				NL_SET_ERR_MSG_MOD(extack,
3117 						   "Match on Flex protocols for ICMPV6 is not supported");
3118 				return -EOPNOTSUPP;
3119 			}
3120 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
3121 				 match.mask->type);
3122 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
3123 				 match.key->type);
3124 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
3125 				 match.mask->code);
3126 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
3127 				 match.key->code);
3128 			break;
3129 		default:
3130 			NL_SET_ERR_MSG_MOD(extack,
3131 					   "Code and type matching only with ICMP and ICMPv6");
3132 			netdev_err(priv->netdev,
3133 				   "Code and type matching only with ICMP and ICMPv6\n");
3134 			return -EINVAL;
3135 		}
3136 		if (match.mask->code || match.mask->type) {
3137 			*match_level = MLX5_MATCH_L4;
3138 			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
3139 		}
3140 	}
3141 	/* Currently supported only for MPLS over UDP */
3142 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
3143 	    !netif_is_bareudp(filter_dev)) {
3144 		NL_SET_ERR_MSG_MOD(extack,
3145 				   "Matching on MPLS is supported only for MPLS over UDP");
3146 		netdev_err(priv->netdev,
3147 			   "Matching on MPLS is supported only for MPLS over UDP\n");
3148 		return -EOPNOTSUPP;
3149 	}
3150 
3151 	return 0;
3152 }
3153 
parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev)3154 static int parse_cls_flower(struct mlx5e_priv *priv,
3155 			    struct mlx5e_tc_flow *flow,
3156 			    struct mlx5_flow_spec *spec,
3157 			    struct flow_cls_offload *f,
3158 			    struct net_device *filter_dev)
3159 {
3160 	u8 inner_match_level, outer_match_level, non_tunnel_match_level;
3161 	struct netlink_ext_ack *extack = f->common.extack;
3162 	struct mlx5_core_dev *dev = priv->mdev;
3163 	struct mlx5_eswitch *esw = dev->priv.eswitch;
3164 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
3165 	struct mlx5_eswitch_rep *rep;
3166 	bool is_eswitch_flow;
3167 	int err;
3168 
3169 	inner_match_level = MLX5_MATCH_NONE;
3170 	outer_match_level = MLX5_MATCH_NONE;
3171 
3172 	err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
3173 				 &inner_match_level, &outer_match_level);
3174 	non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
3175 				 outer_match_level : inner_match_level;
3176 
3177 	is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
3178 	if (!err && is_eswitch_flow) {
3179 		rep = rpriv->rep;
3180 		if (rep->vport != MLX5_VPORT_UPLINK &&
3181 		    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
3182 		    esw->offloads.inline_mode < non_tunnel_match_level)) {
3183 			NL_SET_ERR_MSG_MOD(extack,
3184 					   "Flow is not offloaded due to min inline setting");
3185 			netdev_warn(priv->netdev,
3186 				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
3187 				    non_tunnel_match_level, esw->offloads.inline_mode);
3188 			return -EOPNOTSUPP;
3189 		}
3190 	}
3191 
3192 	flow->attr->inner_match_level = inner_match_level;
3193 	flow->attr->outer_match_level = outer_match_level;
3194 
3195 
3196 	return err;
3197 }
3198 
3199 struct mlx5_fields {
3200 	u8  field;
3201 	u8  field_bsize;
3202 	u32 field_mask;
3203 	u32 offset;
3204 	u32 match_offset;
3205 };
3206 
3207 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
3208 		{MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
3209 		 offsetof(struct pedit_headers, field) + (off), \
3210 		 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
3211 
3212 /* masked values are the same and there are no rewrites that do not have a
3213  * match.
3214  */
3215 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
3216 	type matchmaskx = *(type *)(matchmaskp); \
3217 	type matchvalx = *(type *)(matchvalp); \
3218 	type maskx = *(type *)(maskp); \
3219 	type valx = *(type *)(valp); \
3220 	\
3221 	(valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
3222 								 matchmaskx)); \
3223 })
3224 
cmp_val_mask(void * valp,void * maskp,void * matchvalp,void * matchmaskp,u8 bsize)3225 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
3226 			 void *matchmaskp, u8 bsize)
3227 {
3228 	bool same = false;
3229 
3230 	switch (bsize) {
3231 	case 8:
3232 		same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
3233 		break;
3234 	case 16:
3235 		same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
3236 		break;
3237 	case 32:
3238 		same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
3239 		break;
3240 	}
3241 
3242 	return same;
3243 }
3244 
3245 static struct mlx5_fields fields[] = {
3246 	OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
3247 	OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
3248 	OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
3249 	OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
3250 	OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
3251 	OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
3252 
3253 	OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
3254 	OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
3255 	OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
3256 	OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
3257 
3258 	OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
3259 		src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
3260 	OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
3261 		src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
3262 	OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
3263 		src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
3264 	OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
3265 		src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
3266 	OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
3267 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
3268 	OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
3269 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
3270 	OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
3271 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
3272 	OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
3273 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
3274 	OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
3275 	OFFLOAD(IP_DSCP, 16,  0x0fc0, ip6, 0, ip_dscp),
3276 
3277 	OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
3278 	OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
3279 	/* in linux iphdr tcp_flags is 8 bits long */
3280 	OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
3281 
3282 	OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
3283 	OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
3284 };
3285 
mask_field_get(void * mask,struct mlx5_fields * f)3286 static u32 mask_field_get(void *mask, struct mlx5_fields *f)
3287 {
3288 	switch (f->field_bsize) {
3289 	case 32:
3290 		return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
3291 	case 16:
3292 		return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
3293 	default:
3294 		return *(u8 *)mask & (u8)f->field_mask;
3295 	}
3296 }
3297 
mask_field_clear(void * mask,struct mlx5_fields * f)3298 static void mask_field_clear(void *mask, struct mlx5_fields *f)
3299 {
3300 	switch (f->field_bsize) {
3301 	case 32:
3302 		*(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
3303 		break;
3304 	case 16:
3305 		*(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
3306 		break;
3307 	default:
3308 		*(u8 *)mask &= ~(u8)f->field_mask;
3309 		break;
3310 	}
3311 }
3312 
offload_pedit_fields(struct mlx5e_priv * priv,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,u32 * action_flags,struct netlink_ext_ack * extack)3313 static int offload_pedit_fields(struct mlx5e_priv *priv,
3314 				int namespace,
3315 				struct mlx5e_tc_flow_parse_attr *parse_attr,
3316 				u32 *action_flags,
3317 				struct netlink_ext_ack *extack)
3318 {
3319 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
3320 	struct pedit_headers_action *hdrs = parse_attr->hdrs;
3321 	void *headers_c, *headers_v, *action, *vals_p;
3322 	struct mlx5e_tc_mod_hdr_acts *mod_acts;
3323 	void *s_masks_p, *a_masks_p;
3324 	int i, first, last, next_z;
3325 	struct mlx5_fields *f;
3326 	unsigned long mask;
3327 	u32 s_mask, a_mask;
3328 	u8 cmd;
3329 
3330 	mod_acts = &parse_attr->mod_hdr_acts;
3331 	headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
3332 	headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
3333 
3334 	set_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].masks;
3335 	add_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].masks;
3336 	set_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].vals;
3337 	add_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].vals;
3338 
3339 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
3340 		bool skip;
3341 
3342 		f = &fields[i];
3343 		s_masks_p = (void *)set_masks + f->offset;
3344 		a_masks_p = (void *)add_masks + f->offset;
3345 
3346 		s_mask = mask_field_get(s_masks_p, f);
3347 		a_mask = mask_field_get(a_masks_p, f);
3348 
3349 		if (!s_mask && !a_mask) /* nothing to offload here */
3350 			continue;
3351 
3352 		if (s_mask && a_mask) {
3353 			NL_SET_ERR_MSG_MOD(extack,
3354 					   "can't set and add to the same HW field");
3355 			netdev_warn(priv->netdev,
3356 				    "mlx5: can't set and add to the same HW field (%x)\n",
3357 				    f->field);
3358 			return -EOPNOTSUPP;
3359 		}
3360 
3361 		skip = false;
3362 		if (s_mask) {
3363 			void *match_mask = headers_c + f->match_offset;
3364 			void *match_val = headers_v + f->match_offset;
3365 
3366 			cmd  = MLX5_ACTION_TYPE_SET;
3367 			mask = s_mask;
3368 			vals_p = (void *)set_vals + f->offset;
3369 			/* don't rewrite if we have a match on the same value */
3370 			if (cmp_val_mask(vals_p, s_masks_p, match_val,
3371 					 match_mask, f->field_bsize))
3372 				skip = true;
3373 			/* clear to denote we consumed this field */
3374 			mask_field_clear(s_masks_p, f);
3375 		} else {
3376 			cmd  = MLX5_ACTION_TYPE_ADD;
3377 			mask = a_mask;
3378 			vals_p = (void *)add_vals + f->offset;
3379 			/* add 0 is no change */
3380 			if (!mask_field_get(vals_p, f))
3381 				skip = true;
3382 			/* clear to denote we consumed this field */
3383 			mask_field_clear(a_masks_p, f);
3384 		}
3385 		if (skip)
3386 			continue;
3387 
3388 		first = find_first_bit(&mask, f->field_bsize);
3389 		next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3390 		last  = find_last_bit(&mask, f->field_bsize);
3391 		if (first < next_z && next_z < last) {
3392 			NL_SET_ERR_MSG_MOD(extack,
3393 					   "rewrite of few sub-fields isn't supported");
3394 			netdev_warn(priv->netdev,
3395 				    "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3396 				    mask);
3397 			return -EOPNOTSUPP;
3398 		}
3399 
3400 		action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3401 		if (IS_ERR(action)) {
3402 			NL_SET_ERR_MSG_MOD(extack,
3403 					   "too many pedit actions, can't offload");
3404 			mlx5_core_warn(priv->mdev,
3405 				       "mlx5: parsed %d pedit actions, can't do more\n",
3406 				       mod_acts->num_actions);
3407 			return PTR_ERR(action);
3408 		}
3409 
3410 		MLX5_SET(set_action_in, action, action_type, cmd);
3411 		MLX5_SET(set_action_in, action, field, f->field);
3412 
3413 		if (cmd == MLX5_ACTION_TYPE_SET) {
3414 			unsigned long field_mask = f->field_mask;
3415 			int start;
3416 
3417 			/* if field is bit sized it can start not from first bit */
3418 			start = find_first_bit(&field_mask, f->field_bsize);
3419 
3420 			MLX5_SET(set_action_in, action, offset, first - start);
3421 			/* length is num of bits to be written, zero means length of 32 */
3422 			MLX5_SET(set_action_in, action, length, (last - first + 1));
3423 		}
3424 
3425 		if (f->field_bsize == 32)
3426 			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3427 		else if (f->field_bsize == 16)
3428 			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3429 		else if (f->field_bsize == 8)
3430 			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3431 
3432 		++mod_acts->num_actions;
3433 	}
3434 
3435 	return 0;
3436 }
3437 
3438 static const struct pedit_headers zero_masks = {};
3439 
verify_offload_pedit_fields(struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)3440 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3441 				       struct mlx5e_tc_flow_parse_attr *parse_attr,
3442 				       struct netlink_ext_ack *extack)
3443 {
3444 	struct pedit_headers *cmd_masks;
3445 	u8 cmd;
3446 
3447 	for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3448 		cmd_masks = &parse_attr->hdrs[cmd].masks;
3449 		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3450 			NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3451 			netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3452 			print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3453 				       16, 1, cmd_masks, sizeof(zero_masks), true);
3454 			return -EOPNOTSUPP;
3455 		}
3456 	}
3457 
3458 	return 0;
3459 }
3460 
alloc_tc_pedit_action(struct mlx5e_priv * priv,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,u32 * action_flags,struct netlink_ext_ack * extack)3461 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3462 				 struct mlx5e_tc_flow_parse_attr *parse_attr,
3463 				 u32 *action_flags,
3464 				 struct netlink_ext_ack *extack)
3465 {
3466 	int err;
3467 
3468 	err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3469 	if (err)
3470 		goto out_dealloc_parsed_actions;
3471 
3472 	err = verify_offload_pedit_fields(priv, parse_attr, extack);
3473 	if (err)
3474 		goto out_dealloc_parsed_actions;
3475 
3476 	return 0;
3477 
3478 out_dealloc_parsed_actions:
3479 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3480 	return err;
3481 }
3482 
3483 struct ip_ttl_word {
3484 	__u8	ttl;
3485 	__u8	protocol;
3486 	__sum16	check;
3487 };
3488 
3489 struct ipv6_hoplimit_word {
3490 	__be16	payload_len;
3491 	__u8	nexthdr;
3492 	__u8	hop_limit;
3493 };
3494 
3495 static bool
is_flow_action_modify_ip_header(struct flow_action * flow_action)3496 is_flow_action_modify_ip_header(struct flow_action *flow_action)
3497 {
3498 	const struct flow_action_entry *act;
3499 	u32 mask, offset;
3500 	u8 htype;
3501 	int i;
3502 
3503 	/* For IPv4 & IPv6 header check 4 byte word,
3504 	 * to determine that modified fields
3505 	 * are NOT ttl & hop_limit only.
3506 	 */
3507 	flow_action_for_each(i, act, flow_action) {
3508 		if (act->id != FLOW_ACTION_MANGLE &&
3509 		    act->id != FLOW_ACTION_ADD)
3510 			continue;
3511 
3512 		htype = act->mangle.htype;
3513 		offset = act->mangle.offset;
3514 		mask = ~act->mangle.mask;
3515 
3516 		if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3517 			struct ip_ttl_word *ttl_word =
3518 				(struct ip_ttl_word *)&mask;
3519 
3520 			if (offset != offsetof(struct iphdr, ttl) ||
3521 			    ttl_word->protocol ||
3522 			    ttl_word->check)
3523 				return true;
3524 		} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3525 			struct ipv6_hoplimit_word *hoplimit_word =
3526 				(struct ipv6_hoplimit_word *)&mask;
3527 
3528 			if (offset != offsetof(struct ipv6hdr, payload_len) ||
3529 			    hoplimit_word->payload_len ||
3530 			    hoplimit_word->nexthdr)
3531 				return true;
3532 		}
3533 	}
3534 
3535 	return false;
3536 }
3537 
modify_header_match_supported(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_action * flow_action,u32 actions,struct netlink_ext_ack * extack)3538 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3539 					  struct mlx5_flow_spec *spec,
3540 					  struct flow_action *flow_action,
3541 					  u32 actions,
3542 					  struct netlink_ext_ack *extack)
3543 {
3544 	bool modify_ip_header;
3545 	void *headers_c;
3546 	void *headers_v;
3547 	u16 ethertype;
3548 	u8 ip_proto;
3549 
3550 	headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3551 	headers_v = mlx5e_get_match_headers_value(actions, spec);
3552 	ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3553 
3554 	/* for non-IP we only re-write MACs, so we're okay */
3555 	if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3556 	    ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3557 		goto out_ok;
3558 
3559 	modify_ip_header = is_flow_action_modify_ip_header(flow_action);
3560 	ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3561 	if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3562 	    ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3563 		NL_SET_ERR_MSG_MOD(extack,
3564 				   "can't offload re-write of non TCP/UDP");
3565 		netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3566 			    ip_proto);
3567 		return false;
3568 	}
3569 
3570 out_ok:
3571 	return true;
3572 }
3573 
3574 static bool
actions_match_supported_fdb(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3575 actions_match_supported_fdb(struct mlx5e_priv *priv,
3576 			    struct mlx5e_tc_flow *flow,
3577 			    struct netlink_ext_ack *extack)
3578 {
3579 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3580 
3581 	if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3582 		NL_SET_ERR_MSG_MOD(extack,
3583 				   "current firmware doesn't support split rule for port mirroring");
3584 		netdev_warn_once(priv->netdev,
3585 				 "current firmware doesn't support split rule for port mirroring\n");
3586 		return false;
3587 	}
3588 
3589 	return true;
3590 }
3591 
3592 static bool
actions_match_supported(struct mlx5e_priv * priv,struct flow_action * flow_action,u32 actions,struct mlx5e_tc_flow_parse_attr * parse_attr,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3593 actions_match_supported(struct mlx5e_priv *priv,
3594 			struct flow_action *flow_action,
3595 			u32 actions,
3596 			struct mlx5e_tc_flow_parse_attr *parse_attr,
3597 			struct mlx5e_tc_flow *flow,
3598 			struct netlink_ext_ack *extack)
3599 {
3600 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3601 	    !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions,
3602 					   extack))
3603 		return false;
3604 
3605 	if (mlx5e_is_eswitch_flow(flow) &&
3606 	    !actions_match_supported_fdb(priv, flow, extack))
3607 		return false;
3608 
3609 	return true;
3610 }
3611 
same_port_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3612 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3613 {
3614 	return priv->mdev == peer_priv->mdev;
3615 }
3616 
mlx5e_same_hw_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3617 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3618 {
3619 	struct mlx5_core_dev *fmdev, *pmdev;
3620 
3621 	fmdev = priv->mdev;
3622 	pmdev = peer_priv->mdev;
3623 
3624 	return mlx5_same_hw_devs(fmdev, pmdev);
3625 }
3626 
3627 static int
actions_prepare_mod_hdr_actions(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack)3628 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3629 				struct mlx5e_tc_flow *flow,
3630 				struct mlx5_flow_attr *attr,
3631 				struct netlink_ext_ack *extack)
3632 {
3633 	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3634 	struct pedit_headers_action *hdrs = parse_attr->hdrs;
3635 	enum mlx5_flow_namespace_type ns_type;
3636 	int err;
3637 
3638 	if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3639 	    !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3640 		return 0;
3641 
3642 	ns_type = mlx5e_get_flow_namespace(flow);
3643 
3644 	err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3645 	if (err)
3646 		return err;
3647 
3648 	if (parse_attr->mod_hdr_acts.num_actions > 0)
3649 		return 0;
3650 
3651 	/* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3652 	attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3653 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3654 
3655 	if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3656 		return 0;
3657 
3658 	if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3659 	      (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3660 		attr->esw_attr->split_count = 0;
3661 
3662 	return 0;
3663 }
3664 
3665 static struct mlx5_flow_attr*
mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr * attr,enum mlx5_flow_namespace_type ns_type)3666 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3667 				   enum mlx5_flow_namespace_type ns_type)
3668 {
3669 	struct mlx5e_tc_flow_parse_attr *parse_attr;
3670 	u32 attr_sz = ns_to_attr_sz(ns_type);
3671 	struct mlx5_flow_attr *attr2;
3672 
3673 	attr2 = mlx5_alloc_flow_attr(ns_type);
3674 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3675 	if (!attr2 || !parse_attr) {
3676 		kvfree(parse_attr);
3677 		kfree(attr2);
3678 		return NULL;
3679 	}
3680 
3681 	memcpy(attr2, attr, attr_sz);
3682 	INIT_LIST_HEAD(&attr2->list);
3683 	parse_attr->filter_dev = attr->parse_attr->filter_dev;
3684 	attr2->action = 0;
3685 	attr2->counter = NULL;
3686 	attr2->tc_act_cookies_count = 0;
3687 	attr2->flags = 0;
3688 	attr2->parse_attr = parse_attr;
3689 	attr2->dest_chain = 0;
3690 	attr2->dest_ft = NULL;
3691 	attr2->act_id_restore_rule = NULL;
3692 	memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr));
3693 
3694 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
3695 		attr2->esw_attr->out_count = 0;
3696 		attr2->esw_attr->split_count = 0;
3697 	}
3698 
3699 	attr2->branch_true = NULL;
3700 	attr2->branch_false = NULL;
3701 	attr2->jumping_attr = NULL;
3702 	return attr2;
3703 }
3704 
3705 struct mlx5_flow_attr *
mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow * flow)3706 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3707 {
3708 	struct mlx5_esw_flow_attr *esw_attr;
3709 	struct mlx5_flow_attr *attr;
3710 	int i;
3711 
3712 	list_for_each_entry(attr, &flow->attrs, list) {
3713 		esw_attr = attr->esw_attr;
3714 		for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3715 			if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3716 				return attr;
3717 		}
3718 	}
3719 
3720 	return NULL;
3721 }
3722 
3723 void
mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow * flow)3724 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3725 {
3726 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3727 	struct mlx5_flow_attr *attr;
3728 
3729 	list_for_each_entry(attr, &flow->attrs, list) {
3730 		if (list_is_last(&attr->list, &flow->attrs))
3731 			break;
3732 
3733 		mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3734 	}
3735 }
3736 
3737 static void
free_flow_post_acts(struct mlx5e_tc_flow * flow)3738 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3739 {
3740 	struct mlx5_flow_attr *attr, *tmp;
3741 
3742 	list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3743 		if (list_is_last(&attr->list, &flow->attrs))
3744 			break;
3745 
3746 		mlx5_free_flow_attr_actions(flow, attr);
3747 
3748 		list_del(&attr->list);
3749 		kvfree(attr->parse_attr);
3750 		kfree(attr);
3751 	}
3752 }
3753 
3754 int
mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow * flow)3755 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3756 {
3757 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3758 	struct mlx5_flow_attr *attr;
3759 	int err = 0;
3760 
3761 	list_for_each_entry(attr, &flow->attrs, list) {
3762 		if (list_is_last(&attr->list, &flow->attrs))
3763 			break;
3764 
3765 		err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3766 		if (err)
3767 			break;
3768 	}
3769 
3770 	return err;
3771 }
3772 
3773 /* TC filter rule HW translation:
3774  *
3775  * +---------------------+
3776  * + ft prio (tc chain)  +
3777  * + original match      +
3778  * +---------------------+
3779  *           |
3780  *           | if multi table action
3781  *           |
3782  *           v
3783  * +---------------------+
3784  * + post act ft         |<----.
3785  * + match fte id        |     | split on multi table action
3786  * + do actions          |-----'
3787  * +---------------------+
3788  *           |
3789  *           |
3790  *           v
3791  * Do rest of the actions after last multi table action.
3792  */
3793 static int
alloc_flow_post_acts(struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3794 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3795 {
3796 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3797 	struct mlx5_flow_attr *attr, *next_attr = NULL;
3798 	struct mlx5e_post_act_handle *handle;
3799 	int err;
3800 
3801 	/* This is going in reverse order as needed.
3802 	 * The first entry is the last attribute.
3803 	 */
3804 	list_for_each_entry(attr, &flow->attrs, list) {
3805 		if (!next_attr) {
3806 			/* Set counter action on last post act rule. */
3807 			attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3808 		}
3809 
3810 		if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) {
3811 			err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3812 			if (err)
3813 				goto out_free;
3814 		}
3815 
3816 		/* Don't add post_act rule for first attr (last in the list).
3817 		 * It's being handled by the caller.
3818 		 */
3819 		if (list_is_last(&attr->list, &flow->attrs))
3820 			break;
3821 
3822 		err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3823 		if (err)
3824 			goto out_free;
3825 
3826 		err = post_process_attr(flow, attr, extack);
3827 		if (err)
3828 			goto out_free;
3829 
3830 		handle = mlx5e_tc_post_act_add(post_act, attr);
3831 		if (IS_ERR(handle)) {
3832 			err = PTR_ERR(handle);
3833 			goto out_free;
3834 		}
3835 
3836 		attr->post_act_handle = handle;
3837 
3838 		if (attr->jumping_attr) {
3839 			err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr);
3840 			if (err)
3841 				goto out_free;
3842 		}
3843 
3844 		next_attr = attr;
3845 	}
3846 
3847 	if (flow_flag_test(flow, SLOW))
3848 		goto out;
3849 
3850 	err = mlx5e_tc_offload_flow_post_acts(flow);
3851 	if (err)
3852 		goto out_free;
3853 
3854 out:
3855 	return 0;
3856 
3857 out_free:
3858 	free_flow_post_acts(flow);
3859 	return err;
3860 }
3861 
3862 static int
set_branch_dest_ft(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr)3863 set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr)
3864 {
3865 	struct mlx5e_post_act *post_act = get_post_action(priv);
3866 
3867 	if (IS_ERR(post_act))
3868 		return PTR_ERR(post_act);
3869 
3870 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3871 	attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
3872 
3873 	return 0;
3874 }
3875 
3876 static int
alloc_branch_attr(struct mlx5e_tc_flow * flow,struct mlx5e_tc_act_branch_ctrl * cond,struct mlx5_flow_attr ** cond_attr,u32 * jump_count,struct netlink_ext_ack * extack)3877 alloc_branch_attr(struct mlx5e_tc_flow *flow,
3878 		  struct mlx5e_tc_act_branch_ctrl *cond,
3879 		  struct mlx5_flow_attr **cond_attr,
3880 		  u32 *jump_count,
3881 		  struct netlink_ext_ack *extack)
3882 {
3883 	struct mlx5_flow_attr *attr;
3884 	int err = 0;
3885 
3886 	*cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr,
3887 							mlx5e_get_flow_namespace(flow));
3888 	if (!(*cond_attr))
3889 		return -ENOMEM;
3890 
3891 	attr = *cond_attr;
3892 
3893 	switch (cond->act_id) {
3894 	case FLOW_ACTION_DROP:
3895 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3896 		break;
3897 	case FLOW_ACTION_ACCEPT:
3898 	case FLOW_ACTION_PIPE:
3899 		err = set_branch_dest_ft(flow->priv, attr);
3900 		if (err)
3901 			goto out_err;
3902 		break;
3903 	case FLOW_ACTION_JUMP:
3904 		if (*jump_count) {
3905 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps");
3906 			err = -EOPNOTSUPP;
3907 			goto out_err;
3908 		}
3909 		*jump_count = cond->extval;
3910 		err = set_branch_dest_ft(flow->priv, attr);
3911 		if (err)
3912 			goto out_err;
3913 		break;
3914 	default:
3915 		err = -EOPNOTSUPP;
3916 		goto out_err;
3917 	}
3918 
3919 	return err;
3920 out_err:
3921 	kfree(*cond_attr);
3922 	*cond_attr = NULL;
3923 	return err;
3924 }
3925 
3926 static void
dec_jump_count(struct flow_action_entry * act,struct mlx5e_tc_act * tc_act,struct mlx5_flow_attr * attr,struct mlx5e_priv * priv,struct mlx5e_tc_jump_state * jump_state)3927 dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
3928 	       struct mlx5_flow_attr *attr, struct mlx5e_priv *priv,
3929 	       struct mlx5e_tc_jump_state *jump_state)
3930 {
3931 	if (!jump_state->jump_count)
3932 		return;
3933 
3934 	/* Single tc action can instantiate multiple offload actions (e.g. pedit)
3935 	 * Jump only over a tc action
3936 	 */
3937 	if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index)
3938 		return;
3939 
3940 	jump_state->last_id = act->id;
3941 	jump_state->last_index = act->hw_index;
3942 
3943 	/* nothing to do for intermediate actions */
3944 	if (--jump_state->jump_count > 1)
3945 		return;
3946 
3947 	if (jump_state->jump_count == 1) { /* last action in the jump action list */
3948 
3949 		/* create a new attribute after this action */
3950 		jump_state->jump_target = true;
3951 
3952 		if (tc_act->is_terminating_action) { /* the branch ends here */
3953 			attr->flags |= MLX5_ATTR_FLAG_TERMINATING;
3954 			attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3955 		} else { /* the branch continues executing the rest of the actions */
3956 			struct mlx5e_post_act *post_act;
3957 
3958 			attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3959 			post_act = get_post_action(priv);
3960 			attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
3961 		}
3962 	} else if (jump_state->jump_count == 0) { /* first attr after the jump action list */
3963 		/* This is the post action for the jumping attribute (either red or green)
3964 		 * Use the stored jumping_attr to set the post act id on the jumping attribute
3965 		 */
3966 		attr->jumping_attr = jump_state->jumping_attr;
3967 	}
3968 }
3969 
3970 static int
parse_branch_ctrl(struct flow_action_entry * act,struct mlx5e_tc_act * tc_act,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_tc_jump_state * jump_state,struct netlink_ext_ack * extack)3971 parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
3972 		  struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr,
3973 		  struct mlx5e_tc_jump_state *jump_state,
3974 		  struct netlink_ext_ack *extack)
3975 {
3976 	struct mlx5e_tc_act_branch_ctrl cond_true, cond_false;
3977 	u32 jump_count = jump_state->jump_count;
3978 	int err;
3979 
3980 	if (!tc_act->get_branch_ctrl)
3981 		return 0;
3982 
3983 	tc_act->get_branch_ctrl(act, &cond_true, &cond_false);
3984 
3985 	err = alloc_branch_attr(flow, &cond_true,
3986 				&attr->branch_true, &jump_count, extack);
3987 	if (err)
3988 		goto out_err;
3989 
3990 	if (jump_count)
3991 		jump_state->jumping_attr = attr->branch_true;
3992 
3993 	err = alloc_branch_attr(flow, &cond_false,
3994 				&attr->branch_false, &jump_count, extack);
3995 	if (err)
3996 		goto err_branch_false;
3997 
3998 	if (jump_count && !jump_state->jumping_attr)
3999 		jump_state->jumping_attr = attr->branch_false;
4000 
4001 	jump_state->jump_count = jump_count;
4002 
4003 	/* branching action requires its own counter */
4004 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4005 	flow_flag_set(flow, USE_ACT_STATS);
4006 
4007 	return 0;
4008 
4009 err_branch_false:
4010 	free_branch_attr(flow, attr->branch_true);
4011 out_err:
4012 	return err;
4013 }
4014 
4015 static int
parse_tc_actions(struct mlx5e_tc_act_parse_state * parse_state,struct flow_action * flow_action)4016 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
4017 		 struct flow_action *flow_action)
4018 {
4019 	struct netlink_ext_ack *extack = parse_state->extack;
4020 	struct mlx5e_tc_flow *flow = parse_state->flow;
4021 	struct mlx5e_tc_jump_state jump_state = {};
4022 	struct mlx5_flow_attr *attr = flow->attr;
4023 	enum mlx5_flow_namespace_type ns_type;
4024 	struct mlx5e_priv *priv = flow->priv;
4025 	struct mlx5_flow_attr *prev_attr;
4026 	struct flow_action_entry *act;
4027 	struct mlx5e_tc_act *tc_act;
4028 	int err, i, i_split = 0;
4029 	bool is_missable;
4030 
4031 	ns_type = mlx5e_get_flow_namespace(flow);
4032 	list_add(&attr->list, &flow->attrs);
4033 
4034 	flow_action_for_each(i, act, flow_action) {
4035 		jump_state.jump_target = false;
4036 		is_missable = false;
4037 		prev_attr = attr;
4038 
4039 		tc_act = mlx5e_tc_act_get(act->id, ns_type);
4040 		if (!tc_act) {
4041 			NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
4042 			err = -EOPNOTSUPP;
4043 			goto out_free_post_acts;
4044 		}
4045 
4046 		if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) {
4047 			err = -EOPNOTSUPP;
4048 			goto out_free_post_acts;
4049 		}
4050 
4051 		err = tc_act->parse_action(parse_state, act, priv, attr);
4052 		if (err)
4053 			goto out_free_post_acts;
4054 
4055 		dec_jump_count(act, tc_act, attr, priv, &jump_state);
4056 
4057 		err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack);
4058 		if (err)
4059 			goto out_free_post_acts;
4060 
4061 		parse_state->actions |= attr->action;
4062 
4063 		/* Split attr for multi table act if not the last act. */
4064 		if (jump_state.jump_target ||
4065 		    (tc_act->is_multi_table_act &&
4066 		    tc_act->is_multi_table_act(priv, act, attr) &&
4067 		    i < flow_action->num_entries - 1)) {
4068 			is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false;
4069 
4070 			err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr,
4071 						      ns_type);
4072 			if (err)
4073 				goto out_free_post_acts;
4074 
4075 			attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
4076 			if (!attr) {
4077 				err = -ENOMEM;
4078 				goto out_free_post_acts;
4079 			}
4080 
4081 			i_split = i + 1;
4082 			parse_state->if_count = 0;
4083 			list_add(&attr->list, &flow->attrs);
4084 		}
4085 
4086 		if (is_missable) {
4087 			/* Add counter to prev, and assign act to new (next) attr */
4088 			prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4089 			flow_flag_set(flow, USE_ACT_STATS);
4090 
4091 			attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
4092 		} else if (!tc_act->stats_action) {
4093 			prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie;
4094 		}
4095 	}
4096 
4097 	err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type);
4098 	if (err)
4099 		goto out_free_post_acts;
4100 
4101 	err = alloc_flow_post_acts(flow, extack);
4102 	if (err)
4103 		goto out_free_post_acts;
4104 
4105 	return 0;
4106 
4107 out_free_post_acts:
4108 	free_flow_post_acts(flow);
4109 
4110 	return err;
4111 }
4112 
4113 static int
flow_action_supported(struct flow_action * flow_action,struct netlink_ext_ack * extack)4114 flow_action_supported(struct flow_action *flow_action,
4115 		      struct netlink_ext_ack *extack)
4116 {
4117 	if (!flow_action_has_entries(flow_action)) {
4118 		NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
4119 		return -EINVAL;
4120 	}
4121 
4122 	if (!flow_action_hw_stats_check(flow_action, extack,
4123 					FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
4124 		NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4125 		return -EOPNOTSUPP;
4126 	}
4127 
4128 	return 0;
4129 }
4130 
4131 static int
parse_tc_nic_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)4132 parse_tc_nic_actions(struct mlx5e_priv *priv,
4133 		     struct flow_action *flow_action,
4134 		     struct mlx5e_tc_flow *flow,
4135 		     struct netlink_ext_ack *extack)
4136 {
4137 	struct mlx5e_tc_act_parse_state *parse_state;
4138 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4139 	struct mlx5_flow_attr *attr = flow->attr;
4140 	int err;
4141 
4142 	err = flow_action_supported(flow_action, extack);
4143 	if (err)
4144 		return err;
4145 
4146 	attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
4147 	parse_attr = attr->parse_attr;
4148 	parse_state = &parse_attr->parse_state;
4149 	mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4150 	parse_state->ct_priv = get_ct_priv(priv);
4151 
4152 	err = parse_tc_actions(parse_state, flow_action);
4153 	if (err)
4154 		return err;
4155 
4156 	err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4157 	if (err)
4158 		return err;
4159 
4160 	err = verify_attr_actions(attr->action, extack);
4161 	if (err)
4162 		return err;
4163 
4164 	if (!actions_match_supported(priv, flow_action, parse_state->actions,
4165 				     parse_attr, flow, extack))
4166 		return -EOPNOTSUPP;
4167 
4168 	return 0;
4169 }
4170 
is_merged_eswitch_vfs(struct mlx5e_priv * priv,struct net_device * peer_netdev)4171 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
4172 				  struct net_device *peer_netdev)
4173 {
4174 	struct mlx5e_priv *peer_priv;
4175 
4176 	peer_priv = netdev_priv(peer_netdev);
4177 
4178 	return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
4179 		mlx5e_eswitch_vf_rep(priv->netdev) &&
4180 		mlx5e_eswitch_vf_rep(peer_netdev) &&
4181 		mlx5e_same_hw_devs(priv, peer_priv));
4182 }
4183 
same_hw_reps(struct mlx5e_priv * priv,struct net_device * peer_netdev)4184 static bool same_hw_reps(struct mlx5e_priv *priv,
4185 			 struct net_device *peer_netdev)
4186 {
4187 	struct mlx5e_priv *peer_priv;
4188 
4189 	peer_priv = netdev_priv(peer_netdev);
4190 
4191 	return mlx5e_eswitch_rep(priv->netdev) &&
4192 	       mlx5e_eswitch_rep(peer_netdev) &&
4193 	       mlx5e_same_hw_devs(priv, peer_priv);
4194 }
4195 
is_lag_dev(struct mlx5e_priv * priv,struct net_device * peer_netdev)4196 static bool is_lag_dev(struct mlx5e_priv *priv,
4197 		       struct net_device *peer_netdev)
4198 {
4199 	return ((mlx5_lag_is_sriov(priv->mdev) ||
4200 		 mlx5_lag_is_multipath(priv->mdev)) &&
4201 		 same_hw_reps(priv, peer_netdev));
4202 }
4203 
is_multiport_eligible(struct mlx5e_priv * priv,struct net_device * out_dev)4204 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
4205 {
4206 	return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev);
4207 }
4208 
mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv * priv,struct net_device * out_dev)4209 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4210 				    struct net_device *out_dev)
4211 {
4212 	if (is_merged_eswitch_vfs(priv, out_dev))
4213 		return true;
4214 
4215 	if (is_multiport_eligible(priv, out_dev))
4216 		return true;
4217 
4218 	if (is_lag_dev(priv, out_dev))
4219 		return true;
4220 
4221 	return mlx5e_eswitch_rep(out_dev) &&
4222 	       same_port_devs(priv, netdev_priv(out_dev));
4223 }
4224 
mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,int ifindex,enum mlx5e_tc_int_port_type type,u32 * action,int out_index)4225 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
4226 				      struct mlx5_flow_attr *attr,
4227 				      int ifindex,
4228 				      enum mlx5e_tc_int_port_type type,
4229 				      u32 *action,
4230 				      int out_index)
4231 {
4232 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4233 	struct mlx5e_tc_int_port_priv *int_port_priv;
4234 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4235 	struct mlx5e_tc_int_port *dest_int_port;
4236 	int err;
4237 
4238 	parse_attr = attr->parse_attr;
4239 	int_port_priv = mlx5e_get_int_port_priv(priv);
4240 
4241 	dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
4242 	if (IS_ERR(dest_int_port))
4243 		return PTR_ERR(dest_int_port);
4244 
4245 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
4246 					MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
4247 					mlx5e_tc_int_port_get_metadata(dest_int_port));
4248 	if (err) {
4249 		mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
4250 		return err;
4251 	}
4252 
4253 	*action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4254 
4255 	esw_attr->dest_int_port = dest_int_port;
4256 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
4257 	esw_attr->split_count = out_index;
4258 
4259 	/* Forward to root fdb for matching against the new source vport */
4260 	attr->dest_chain = 0;
4261 
4262 	return 0;
4263 }
4264 
4265 static int
parse_tc_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)4266 parse_tc_fdb_actions(struct mlx5e_priv *priv,
4267 		     struct flow_action *flow_action,
4268 		     struct mlx5e_tc_flow *flow,
4269 		     struct netlink_ext_ack *extack)
4270 {
4271 	struct mlx5e_tc_act_parse_state *parse_state;
4272 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4273 	struct mlx5_flow_attr *attr = flow->attr;
4274 	struct mlx5_esw_flow_attr *esw_attr;
4275 	struct net_device *filter_dev;
4276 	int err;
4277 
4278 	err = flow_action_supported(flow_action, extack);
4279 	if (err)
4280 		return err;
4281 
4282 	esw_attr = attr->esw_attr;
4283 	parse_attr = attr->parse_attr;
4284 	filter_dev = parse_attr->filter_dev;
4285 	parse_state = &parse_attr->parse_state;
4286 	mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4287 	parse_state->ct_priv = get_ct_priv(priv);
4288 
4289 	err = parse_tc_actions(parse_state, flow_action);
4290 	if (err)
4291 		return err;
4292 
4293 	/* Forward to/from internal port can only have 1 dest */
4294 	if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
4295 	    esw_attr->out_count > 1) {
4296 		NL_SET_ERR_MSG_MOD(extack,
4297 				   "Rules with internal port can have only one destination");
4298 		return -EOPNOTSUPP;
4299 	}
4300 
4301 	/* Forward from tunnel/internal port to internal port is not supported */
4302 	if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
4303 	    esw_attr->dest_int_port) {
4304 		NL_SET_ERR_MSG_MOD(extack,
4305 				   "Forwarding from tunnel/internal port to internal port is not supported");
4306 		return -EOPNOTSUPP;
4307 	}
4308 
4309 	err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4310 	if (err)
4311 		return err;
4312 
4313 	if (!actions_match_supported(priv, flow_action, parse_state->actions,
4314 				     parse_attr, flow, extack))
4315 		return -EOPNOTSUPP;
4316 
4317 	return 0;
4318 }
4319 
get_flags(int flags,unsigned long * flow_flags)4320 static void get_flags(int flags, unsigned long *flow_flags)
4321 {
4322 	unsigned long __flow_flags = 0;
4323 
4324 	if (flags & MLX5_TC_FLAG(INGRESS))
4325 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4326 	if (flags & MLX5_TC_FLAG(EGRESS))
4327 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4328 
4329 	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4330 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4331 	if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4332 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4333 	if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4334 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4335 
4336 	*flow_flags = __flow_flags;
4337 }
4338 
4339 static const struct rhashtable_params tc_ht_params = {
4340 	.head_offset = offsetof(struct mlx5e_tc_flow, node),
4341 	.key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4342 	.key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4343 	.automatic_shrinking = true,
4344 };
4345 
get_tc_ht(struct mlx5e_priv * priv,unsigned long flags)4346 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4347 				    unsigned long flags)
4348 {
4349 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4350 	struct mlx5e_rep_priv *rpriv;
4351 
4352 	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4353 		rpriv = priv->ppriv;
4354 		return &rpriv->tc_ht;
4355 	} else /* NIC offload */
4356 		return &tc->ht;
4357 }
4358 
is_peer_flow_needed(struct mlx5e_tc_flow * flow)4359 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4360 {
4361 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4362 	struct mlx5_flow_attr *attr = flow->attr;
4363 	bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4364 		flow_flag_test(flow, INGRESS);
4365 	bool act_is_encap = !!(attr->action &
4366 			       MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4367 	bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.eswitch->devcom);
4368 
4369 	if (!esw_paired)
4370 		return false;
4371 
4372 	if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4373 	     mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4374 	    (is_rep_ingress || act_is_encap))
4375 		return true;
4376 
4377 	if (mlx5_lag_is_mpesw(esw_attr->in_mdev))
4378 		return true;
4379 
4380 	return false;
4381 }
4382 
4383 struct mlx5_flow_attr *
mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)4384 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4385 {
4386 	u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4387 				sizeof(struct mlx5_esw_flow_attr) :
4388 				sizeof(struct mlx5_nic_flow_attr);
4389 	struct mlx5_flow_attr *attr;
4390 
4391 	attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4392 	if (!attr)
4393 		return attr;
4394 
4395 	INIT_LIST_HEAD(&attr->list);
4396 	return attr;
4397 }
4398 
4399 static void
mlx5_free_flow_attr_actions(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)4400 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
4401 {
4402 	struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
4403 	struct mlx5_esw_flow_attr *esw_attr;
4404 
4405 	if (!attr)
4406 		return;
4407 
4408 	if (attr->post_act_handle)
4409 		mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
4410 
4411 	mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr);
4412 
4413 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
4414 		mlx5_fc_destroy(counter_dev, attr->counter);
4415 
4416 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
4417 		mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
4418 		mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
4419 	}
4420 
4421 	if (mlx5e_is_eswitch_flow(flow)) {
4422 		esw_attr = attr->esw_attr;
4423 
4424 		if (esw_attr->int_port)
4425 			mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
4426 					      esw_attr->int_port);
4427 
4428 		if (esw_attr->dest_int_port)
4429 			mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
4430 					      esw_attr->dest_int_port);
4431 	}
4432 
4433 	mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
4434 
4435 	free_branch_attr(flow, attr->branch_true);
4436 	free_branch_attr(flow, attr->branch_false);
4437 }
4438 
4439 static int
mlx5e_alloc_flow(struct mlx5e_priv * priv,int attr_size,struct flow_cls_offload * f,unsigned long flow_flags,struct mlx5e_tc_flow_parse_attr ** __parse_attr,struct mlx5e_tc_flow ** __flow)4440 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4441 		 struct flow_cls_offload *f, unsigned long flow_flags,
4442 		 struct mlx5e_tc_flow_parse_attr **__parse_attr,
4443 		 struct mlx5e_tc_flow **__flow)
4444 {
4445 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4446 	struct mlx5_flow_attr *attr;
4447 	struct mlx5e_tc_flow *flow;
4448 	int err = -ENOMEM;
4449 	int out_index;
4450 
4451 	flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4452 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4453 	if (!parse_attr || !flow)
4454 		goto err_free;
4455 
4456 	flow->flags = flow_flags;
4457 	flow->cookie = f->cookie;
4458 	flow->priv = priv;
4459 
4460 	attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4461 	if (!attr)
4462 		goto err_free;
4463 
4464 	flow->attr = attr;
4465 
4466 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4467 		INIT_LIST_HEAD(&flow->encaps[out_index].list);
4468 	INIT_LIST_HEAD(&flow->hairpin);
4469 	INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4470 	INIT_LIST_HEAD(&flow->attrs);
4471 	INIT_LIST_HEAD(&flow->peer_flows);
4472 	refcount_set(&flow->refcnt, 1);
4473 	init_completion(&flow->init_done);
4474 	init_completion(&flow->del_hw_done);
4475 
4476 	*__flow = flow;
4477 	*__parse_attr = parse_attr;
4478 
4479 	return 0;
4480 
4481 err_free:
4482 	kfree(flow);
4483 	kvfree(parse_attr);
4484 	return err;
4485 }
4486 
4487 static void
mlx5e_flow_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f)4488 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4489 		     struct mlx5e_tc_flow_parse_attr *parse_attr,
4490 		     struct flow_cls_offload *f)
4491 {
4492 	attr->parse_attr = parse_attr;
4493 	attr->chain = f->common.chain_index;
4494 	attr->prio = f->common.prio;
4495 }
4496 
4497 static void
mlx5e_flow_esw_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4498 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4499 			 struct mlx5e_priv *priv,
4500 			 struct mlx5e_tc_flow_parse_attr *parse_attr,
4501 			 struct flow_cls_offload *f,
4502 			 struct mlx5_eswitch_rep *in_rep,
4503 			 struct mlx5_core_dev *in_mdev)
4504 {
4505 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4506 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4507 
4508 	mlx5e_flow_attr_init(attr, parse_attr, f);
4509 
4510 	esw_attr->in_rep = in_rep;
4511 	esw_attr->in_mdev = in_mdev;
4512 
4513 	if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4514 	    MLX5_COUNTER_SOURCE_ESWITCH)
4515 		esw_attr->counter_dev = in_mdev;
4516 	else
4517 		esw_attr->counter_dev = priv->mdev;
4518 }
4519 
4520 static struct mlx5e_tc_flow *
__mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4521 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4522 		     struct flow_cls_offload *f,
4523 		     unsigned long flow_flags,
4524 		     struct net_device *filter_dev,
4525 		     struct mlx5_eswitch_rep *in_rep,
4526 		     struct mlx5_core_dev *in_mdev)
4527 {
4528 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4529 	struct netlink_ext_ack *extack = f->common.extack;
4530 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4531 	struct mlx5e_tc_flow *flow;
4532 	int attr_size, err;
4533 
4534 	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4535 	attr_size  = sizeof(struct mlx5_esw_flow_attr);
4536 	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4537 			       &parse_attr, &flow);
4538 	if (err)
4539 		goto out;
4540 
4541 	parse_attr->filter_dev = filter_dev;
4542 	mlx5e_flow_esw_attr_init(flow->attr,
4543 				 priv, parse_attr,
4544 				 f, in_rep, in_mdev);
4545 
4546 	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4547 			       f, filter_dev);
4548 	if (err)
4549 		goto err_free;
4550 
4551 	/* actions validation depends on parsing the ct matches first */
4552 	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4553 				   &flow->attr->ct_attr, extack);
4554 	if (err)
4555 		goto err_free;
4556 
4557 	err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4558 	if (err)
4559 		goto err_free;
4560 
4561 	err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4562 	complete_all(&flow->init_done);
4563 	if (err) {
4564 		if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4565 			goto err_free;
4566 
4567 		add_unready_flow(flow);
4568 	}
4569 
4570 	return flow;
4571 
4572 err_free:
4573 	mlx5e_flow_put(priv, flow);
4574 out:
4575 	return ERR_PTR(err);
4576 }
4577 
mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload * f,struct mlx5e_tc_flow * flow,unsigned long flow_flags,struct mlx5_eswitch * peer_esw)4578 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4579 				      struct mlx5e_tc_flow *flow,
4580 				      unsigned long flow_flags,
4581 				      struct mlx5_eswitch *peer_esw)
4582 {
4583 	struct mlx5e_priv *priv = flow->priv, *peer_priv;
4584 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4585 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4586 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4587 	int i = mlx5_get_dev_index(peer_esw->dev);
4588 	struct mlx5e_rep_priv *peer_urpriv;
4589 	struct mlx5e_tc_flow *peer_flow;
4590 	struct mlx5_core_dev *in_mdev;
4591 	int err = 0;
4592 
4593 	peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4594 	peer_priv = netdev_priv(peer_urpriv->netdev);
4595 
4596 	/* in_mdev is assigned of which the packet originated from.
4597 	 * So packets redirected to uplink use the same mdev of the
4598 	 * original flow and packets redirected from uplink use the
4599 	 * peer mdev.
4600 	 * In multiport eswitch it's a special case that we need to
4601 	 * keep the original mdev.
4602 	 */
4603 	if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev))
4604 		in_mdev = peer_priv->mdev;
4605 	else
4606 		in_mdev = priv->mdev;
4607 
4608 	parse_attr = flow->attr->parse_attr;
4609 	peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4610 					 parse_attr->filter_dev,
4611 					 attr->in_rep, in_mdev);
4612 	if (IS_ERR(peer_flow)) {
4613 		err = PTR_ERR(peer_flow);
4614 		goto out;
4615 	}
4616 
4617 	list_add_tail(&peer_flow->peer_flows, &flow->peer_flows);
4618 	flow_flag_set(flow, DUP);
4619 	mutex_lock(&esw->offloads.peer_mutex);
4620 	list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]);
4621 	mutex_unlock(&esw->offloads.peer_mutex);
4622 
4623 out:
4624 	return err;
4625 }
4626 
4627 static int
mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4628 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4629 		   struct flow_cls_offload *f,
4630 		   unsigned long flow_flags,
4631 		   struct net_device *filter_dev,
4632 		   struct mlx5e_tc_flow **__flow)
4633 {
4634 	struct mlx5_devcom_comp_dev *devcom = priv->mdev->priv.eswitch->devcom, *pos;
4635 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4636 	struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4637 	struct mlx5_core_dev *in_mdev = priv->mdev;
4638 	struct mlx5_eswitch *peer_esw;
4639 	struct mlx5e_tc_flow *flow;
4640 	int err;
4641 
4642 	flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4643 				    in_mdev);
4644 	if (IS_ERR(flow))
4645 		return PTR_ERR(flow);
4646 
4647 	if (!is_peer_flow_needed(flow)) {
4648 		*__flow = flow;
4649 		return 0;
4650 	}
4651 
4652 	if (!mlx5_devcom_for_each_peer_begin(devcom)) {
4653 		err = -ENODEV;
4654 		goto clean_flow;
4655 	}
4656 
4657 	mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
4658 		err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw);
4659 		if (err)
4660 			goto peer_clean;
4661 	}
4662 
4663 	mlx5_devcom_for_each_peer_end(devcom);
4664 
4665 	*__flow = flow;
4666 	return 0;
4667 
4668 peer_clean:
4669 	mlx5e_tc_del_fdb_peers_flow(flow);
4670 	mlx5_devcom_for_each_peer_end(devcom);
4671 clean_flow:
4672 	mlx5e_tc_del_fdb_flow(priv, flow);
4673 	return err;
4674 }
4675 
4676 static int
mlx5e_add_nic_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4677 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4678 		   struct flow_cls_offload *f,
4679 		   unsigned long flow_flags,
4680 		   struct net_device *filter_dev,
4681 		   struct mlx5e_tc_flow **__flow)
4682 {
4683 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4684 	struct netlink_ext_ack *extack = f->common.extack;
4685 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4686 	struct mlx5e_tc_flow *flow;
4687 	int attr_size, err;
4688 
4689 	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4690 		if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4691 			return -EOPNOTSUPP;
4692 	} else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4693 		return -EOPNOTSUPP;
4694 	}
4695 
4696 	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4697 	attr_size  = sizeof(struct mlx5_nic_flow_attr);
4698 	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4699 			       &parse_attr, &flow);
4700 	if (err)
4701 		goto out;
4702 
4703 	parse_attr->filter_dev = filter_dev;
4704 	mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4705 
4706 	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4707 			       f, filter_dev);
4708 	if (err)
4709 		goto err_free;
4710 
4711 	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4712 				   &flow->attr->ct_attr, extack);
4713 	if (err)
4714 		goto err_free;
4715 
4716 	err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4717 	if (err)
4718 		goto err_free;
4719 
4720 	err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4721 	if (err)
4722 		goto err_free;
4723 
4724 	flow_flag_set(flow, OFFLOADED);
4725 	*__flow = flow;
4726 
4727 	return 0;
4728 
4729 err_free:
4730 	flow_flag_set(flow, FAILED);
4731 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4732 	mlx5e_flow_put(priv, flow);
4733 out:
4734 	return err;
4735 }
4736 
4737 static int
mlx5e_tc_add_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** flow)4738 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4739 		  struct flow_cls_offload *f,
4740 		  unsigned long flags,
4741 		  struct net_device *filter_dev,
4742 		  struct mlx5e_tc_flow **flow)
4743 {
4744 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4745 	unsigned long flow_flags;
4746 	int err;
4747 
4748 	get_flags(flags, &flow_flags);
4749 
4750 	if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4751 		return -EOPNOTSUPP;
4752 
4753 	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4754 		err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4755 					 filter_dev, flow);
4756 	else
4757 		err = mlx5e_add_nic_flow(priv, f, flow_flags,
4758 					 filter_dev, flow);
4759 
4760 	return err;
4761 }
4762 
is_flow_rule_duplicate_allowed(struct net_device * dev,struct mlx5e_rep_priv * rpriv)4763 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4764 					   struct mlx5e_rep_priv *rpriv)
4765 {
4766 	/* Offloaded flow rule is allowed to duplicate on non-uplink representor
4767 	 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4768 	 * function is called from NIC mode.
4769 	 */
4770 	return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4771 }
4772 
4773 /* As IPsec and TC order is not aligned between software and hardware-offload,
4774  * either IPsec offload or TC offload, not both, is allowed for a specific interface.
4775  */
is_tc_ipsec_order_check_needed(struct net_device * filter,struct mlx5e_priv * priv)4776 static bool is_tc_ipsec_order_check_needed(struct net_device *filter, struct mlx5e_priv *priv)
4777 {
4778 	if (!IS_ENABLED(CONFIG_MLX5_EN_IPSEC))
4779 		return false;
4780 
4781 	if (filter != priv->netdev)
4782 		return false;
4783 
4784 	if (mlx5e_eswitch_vf_rep(priv->netdev))
4785 		return false;
4786 
4787 	return true;
4788 }
4789 
mlx5e_tc_block_ipsec_offload(struct net_device * filter,struct mlx5e_priv * priv)4790 static int mlx5e_tc_block_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv)
4791 {
4792 	struct mlx5_core_dev *mdev = priv->mdev;
4793 
4794 	if (!is_tc_ipsec_order_check_needed(filter, priv))
4795 		return 0;
4796 
4797 	if (mdev->num_block_tc)
4798 		return -EBUSY;
4799 
4800 	mdev->num_block_ipsec++;
4801 
4802 	return 0;
4803 }
4804 
mlx5e_tc_unblock_ipsec_offload(struct net_device * filter,struct mlx5e_priv * priv)4805 static void mlx5e_tc_unblock_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv)
4806 {
4807 	if (!is_tc_ipsec_order_check_needed(filter, priv))
4808 		return;
4809 
4810 	priv->mdev->num_block_ipsec--;
4811 }
4812 
mlx5e_configure_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4813 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4814 			   struct flow_cls_offload *f, unsigned long flags)
4815 {
4816 	struct netlink_ext_ack *extack = f->common.extack;
4817 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4818 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4819 	struct mlx5e_tc_flow *flow;
4820 	int err = 0;
4821 
4822 	if (!mlx5_esw_hold(priv->mdev))
4823 		return -EBUSY;
4824 
4825 	err = mlx5e_tc_block_ipsec_offload(dev, priv);
4826 	if (err)
4827 		goto esw_release;
4828 
4829 	mlx5_esw_get(priv->mdev);
4830 
4831 	rcu_read_lock();
4832 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4833 	if (flow) {
4834 		/* Same flow rule offloaded to non-uplink representor sharing tc block,
4835 		 * just return 0.
4836 		 */
4837 		if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4838 			goto rcu_unlock;
4839 
4840 		NL_SET_ERR_MSG_MOD(extack,
4841 				   "flow cookie already exists, ignoring");
4842 		netdev_warn_once(priv->netdev,
4843 				 "flow cookie %lx already exists, ignoring\n",
4844 				 f->cookie);
4845 		err = -EEXIST;
4846 		goto rcu_unlock;
4847 	}
4848 rcu_unlock:
4849 	rcu_read_unlock();
4850 	if (flow)
4851 		goto out;
4852 
4853 	trace_mlx5e_configure_flower(f);
4854 	err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4855 	if (err)
4856 		goto out;
4857 
4858 	/* Flow rule offloaded to non-uplink representor sharing tc block,
4859 	 * set the flow's owner dev.
4860 	 */
4861 	if (is_flow_rule_duplicate_allowed(dev, rpriv))
4862 		flow->orig_dev = dev;
4863 
4864 	err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4865 	if (err)
4866 		goto err_free;
4867 
4868 	mlx5_esw_release(priv->mdev);
4869 	return 0;
4870 
4871 err_free:
4872 	mlx5e_flow_put(priv, flow);
4873 out:
4874 	mlx5e_tc_unblock_ipsec_offload(dev, priv);
4875 	mlx5_esw_put(priv->mdev);
4876 esw_release:
4877 	mlx5_esw_release(priv->mdev);
4878 	return err;
4879 }
4880 
same_flow_direction(struct mlx5e_tc_flow * flow,int flags)4881 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4882 {
4883 	bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4884 	bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4885 
4886 	return flow_flag_test(flow, INGRESS) == dir_ingress &&
4887 		flow_flag_test(flow, EGRESS) == dir_egress;
4888 }
4889 
mlx5e_delete_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4890 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4891 			struct flow_cls_offload *f, unsigned long flags)
4892 {
4893 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4894 	struct mlx5e_tc_flow *flow;
4895 	int err;
4896 
4897 	rcu_read_lock();
4898 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4899 	if (!flow || !same_flow_direction(flow, flags)) {
4900 		err = -EINVAL;
4901 		goto errout;
4902 	}
4903 
4904 	/* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4905 	 * set.
4906 	 */
4907 	if (flow_flag_test_and_set(flow, DELETED)) {
4908 		err = -EINVAL;
4909 		goto errout;
4910 	}
4911 	rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4912 	rcu_read_unlock();
4913 
4914 	trace_mlx5e_delete_flower(f);
4915 	mlx5e_flow_put(priv, flow);
4916 
4917 	mlx5e_tc_unblock_ipsec_offload(dev, priv);
4918 	mlx5_esw_put(priv->mdev);
4919 	return 0;
4920 
4921 errout:
4922 	rcu_read_unlock();
4923 	return err;
4924 }
4925 
mlx5e_tc_fill_action_stats(struct mlx5e_priv * priv,struct flow_offload_action * fl_act)4926 int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv,
4927 			       struct flow_offload_action *fl_act)
4928 {
4929 	return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act);
4930 }
4931 
mlx5e_stats_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4932 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4933 		       struct flow_cls_offload *f, unsigned long flags)
4934 {
4935 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4936 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4937 	struct mlx5e_tc_flow *flow;
4938 	struct mlx5_fc *counter;
4939 	u64 lastuse = 0;
4940 	u64 packets = 0;
4941 	u64 bytes = 0;
4942 	int err = 0;
4943 
4944 	rcu_read_lock();
4945 	flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4946 						tc_ht_params));
4947 	rcu_read_unlock();
4948 	if (IS_ERR(flow))
4949 		return PTR_ERR(flow);
4950 
4951 	if (!same_flow_direction(flow, flags)) {
4952 		err = -EINVAL;
4953 		goto errout;
4954 	}
4955 
4956 	if (mlx5e_is_offloaded_flow(flow)) {
4957 		if (flow_flag_test(flow, USE_ACT_STATS)) {
4958 			f->use_act_stats = true;
4959 		} else {
4960 			counter = mlx5e_tc_get_counter(flow);
4961 			if (!counter)
4962 				goto errout;
4963 
4964 			mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4965 		}
4966 	}
4967 
4968 	/* Under multipath it's possible for one rule to be currently
4969 	 * un-offloaded while the other rule is offloaded.
4970 	 */
4971 	if (esw && !mlx5_devcom_for_each_peer_begin(esw->devcom))
4972 		goto out;
4973 
4974 	if (flow_flag_test(flow, DUP)) {
4975 		struct mlx5e_tc_flow *peer_flow;
4976 
4977 		list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) {
4978 			u64 packets2;
4979 			u64 lastuse2;
4980 			u64 bytes2;
4981 
4982 			if (!flow_flag_test(peer_flow, OFFLOADED))
4983 				continue;
4984 			if (flow_flag_test(flow, USE_ACT_STATS)) {
4985 				f->use_act_stats = true;
4986 				break;
4987 			}
4988 
4989 			counter = mlx5e_tc_get_counter(peer_flow);
4990 			if (!counter)
4991 				goto no_peer_counter;
4992 			mlx5_fc_query_cached(counter, &bytes2, &packets2,
4993 					     &lastuse2);
4994 
4995 			bytes += bytes2;
4996 			packets += packets2;
4997 			lastuse = max_t(u64, lastuse, lastuse2);
4998 		}
4999 	}
5000 
5001 no_peer_counter:
5002 	if (esw)
5003 		mlx5_devcom_for_each_peer_end(esw->devcom);
5004 out:
5005 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
5006 			  FLOW_ACTION_HW_STATS_DELAYED);
5007 	trace_mlx5e_stats_flower(f);
5008 errout:
5009 	mlx5e_flow_put(priv, flow);
5010 	return err;
5011 }
5012 
apply_police_params(struct mlx5e_priv * priv,u64 rate,struct netlink_ext_ack * extack)5013 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
5014 			       struct netlink_ext_ack *extack)
5015 {
5016 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
5017 	struct mlx5_eswitch *esw;
5018 	u32 rate_mbps = 0;
5019 	u16 vport_num;
5020 	int err;
5021 
5022 	vport_num = rpriv->rep->vport;
5023 	if (vport_num >= MLX5_VPORT_ECPF) {
5024 		NL_SET_ERR_MSG_MOD(extack,
5025 				   "Ingress rate limit is supported only for Eswitch ports connected to VFs");
5026 		return -EOPNOTSUPP;
5027 	}
5028 
5029 	esw = priv->mdev->priv.eswitch;
5030 	/* rate is given in bytes/sec.
5031 	 * First convert to bits/sec and then round to the nearest mbit/secs.
5032 	 * mbit means million bits.
5033 	 * Moreover, if rate is non zero we choose to configure to a minimum of
5034 	 * 1 mbit/sec.
5035 	 */
5036 	if (rate) {
5037 		rate = (rate * BITS_PER_BYTE) + 500000;
5038 		do_div(rate, 1000000);
5039 		rate_mbps = max_t(u32, rate, 1);
5040 	}
5041 
5042 	err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
5043 	if (err)
5044 		NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
5045 
5046 	return err;
5047 }
5048 
5049 static int
tc_matchall_police_validate(const struct flow_action * action,const struct flow_action_entry * act,struct netlink_ext_ack * extack)5050 tc_matchall_police_validate(const struct flow_action *action,
5051 			    const struct flow_action_entry *act,
5052 			    struct netlink_ext_ack *extack)
5053 {
5054 	if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
5055 		NL_SET_ERR_MSG_MOD(extack,
5056 				   "Offload not supported when conform action is not continue");
5057 		return -EOPNOTSUPP;
5058 	}
5059 
5060 	if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
5061 		NL_SET_ERR_MSG_MOD(extack,
5062 				   "Offload not supported when exceed action is not drop");
5063 		return -EOPNOTSUPP;
5064 	}
5065 
5066 	if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
5067 	    !flow_action_is_last_entry(action, act)) {
5068 		NL_SET_ERR_MSG_MOD(extack,
5069 				   "Offload not supported when conform action is ok, but action is not last");
5070 		return -EOPNOTSUPP;
5071 	}
5072 
5073 	if (act->police.peakrate_bytes_ps ||
5074 	    act->police.avrate || act->police.overhead) {
5075 		NL_SET_ERR_MSG_MOD(extack,
5076 				   "Offload not supported when peakrate/avrate/overhead is configured");
5077 		return -EOPNOTSUPP;
5078 	}
5079 
5080 	return 0;
5081 }
5082 
scan_tc_matchall_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct netlink_ext_ack * extack)5083 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
5084 					struct flow_action *flow_action,
5085 					struct netlink_ext_ack *extack)
5086 {
5087 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
5088 	const struct flow_action_entry *act;
5089 	int err;
5090 	int i;
5091 
5092 	if (!flow_action_has_entries(flow_action)) {
5093 		NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
5094 		return -EINVAL;
5095 	}
5096 
5097 	if (!flow_offload_has_one_action(flow_action)) {
5098 		NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
5099 		return -EOPNOTSUPP;
5100 	}
5101 
5102 	if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
5103 		NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
5104 		return -EOPNOTSUPP;
5105 	}
5106 
5107 	flow_action_for_each(i, act, flow_action) {
5108 		switch (act->id) {
5109 		case FLOW_ACTION_POLICE:
5110 			err = tc_matchall_police_validate(flow_action, act, extack);
5111 			if (err)
5112 				return err;
5113 
5114 			err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
5115 			if (err)
5116 				return err;
5117 
5118 			mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats,
5119 						   &priv->stats.rep_stats);
5120 			break;
5121 		default:
5122 			NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5123 			return -EOPNOTSUPP;
5124 		}
5125 	}
5126 
5127 	return 0;
5128 }
5129 
mlx5e_tc_configure_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)5130 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5131 				struct tc_cls_matchall_offload *ma)
5132 {
5133 	struct netlink_ext_ack *extack = ma->common.extack;
5134 
5135 	if (ma->common.prio != 1) {
5136 		NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
5137 		return -EINVAL;
5138 	}
5139 
5140 	return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5141 }
5142 
mlx5e_tc_delete_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)5143 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5144 			     struct tc_cls_matchall_offload *ma)
5145 {
5146 	struct netlink_ext_ack *extack = ma->common.extack;
5147 
5148 	return apply_police_params(priv, 0, extack);
5149 }
5150 
mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)5151 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
5152 					      struct mlx5e_priv *peer_priv)
5153 {
5154 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5155 	struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
5156 	struct mlx5e_hairpin_entry *hpe, *tmp;
5157 	LIST_HEAD(init_wait_list);
5158 	u16 peer_vhca_id;
5159 	int bkt;
5160 
5161 	if (!mlx5e_same_hw_devs(priv, peer_priv))
5162 		return;
5163 
5164 	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
5165 
5166 	mutex_lock(&tc->hairpin_tbl_lock);
5167 	hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
5168 		if (refcount_inc_not_zero(&hpe->refcnt))
5169 			list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5170 	mutex_unlock(&tc->hairpin_tbl_lock);
5171 
5172 	list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5173 		wait_for_completion(&hpe->res_ready);
5174 		if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5175 			mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
5176 
5177 		mlx5e_hairpin_put(priv, hpe);
5178 	}
5179 }
5180 
mlx5e_tc_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)5181 static int mlx5e_tc_netdev_event(struct notifier_block *this,
5182 				 unsigned long event, void *ptr)
5183 {
5184 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
5185 	struct mlx5e_priv *peer_priv;
5186 	struct mlx5e_tc_table *tc;
5187 	struct mlx5e_priv *priv;
5188 
5189 	if (ndev->netdev_ops != &mlx5e_netdev_ops ||
5190 	    event != NETDEV_UNREGISTER ||
5191 	    ndev->reg_state == NETREG_REGISTERED)
5192 		return NOTIFY_DONE;
5193 
5194 	tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5195 	priv = tc->priv;
5196 	peer_priv = netdev_priv(ndev);
5197 	if (priv == peer_priv ||
5198 	    !(priv->netdev->features & NETIF_F_HW_TC))
5199 		return NOTIFY_DONE;
5200 
5201 	mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5202 
5203 	return NOTIFY_DONE;
5204 }
5205 
mlx5e_tc_nic_create_miss_table(struct mlx5e_priv * priv)5206 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
5207 {
5208 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5209 	struct mlx5_flow_table **ft = &tc->miss_t;
5210 	struct mlx5_flow_table_attr ft_attr = {};
5211 	struct mlx5_flow_namespace *ns;
5212 	int err = 0;
5213 
5214 	ft_attr.max_fte = 1;
5215 	ft_attr.autogroup.max_num_groups = 1;
5216 	ft_attr.level = MLX5E_TC_MISS_LEVEL;
5217 	ft_attr.prio = 0;
5218 	ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
5219 
5220 	*ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
5221 	if (IS_ERR(*ft)) {
5222 		err = PTR_ERR(*ft);
5223 		netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
5224 	}
5225 
5226 	return err;
5227 }
5228 
mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv * priv)5229 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
5230 {
5231 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5232 
5233 	mlx5_destroy_flow_table(tc->miss_t);
5234 }
5235 
mlx5e_tc_nic_init(struct mlx5e_priv * priv)5236 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5237 {
5238 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5239 	u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES];
5240 	struct mlx5_core_dev *dev = priv->mdev;
5241 	struct mapping_ctx *chains_mapping;
5242 	struct mlx5_chains_attr attr = {};
5243 	u8 id_len;
5244 	int err;
5245 
5246 	mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5247 	mutex_init(&tc->t_lock);
5248 	mutex_init(&tc->hairpin_tbl_lock);
5249 	hash_init(tc->hairpin_tbl);
5250 	tc->priv = priv;
5251 
5252 	err = rhashtable_init(&tc->ht, &tc_ht_params);
5253 	if (err)
5254 		return err;
5255 
5256 	lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
5257 	lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5258 
5259 	mlx5_query_nic_sw_system_image_guid(dev, mapping_id, &id_len);
5260 
5261 	chains_mapping = mapping_create_for_id(mapping_id, id_len,
5262 					       MAPPING_TYPE_CHAIN,
5263 					       sizeof(struct mlx5_mapped_obj),
5264 					       MLX5E_TC_TABLE_CHAIN_TAG_MASK,
5265 					       true);
5266 
5267 	if (IS_ERR(chains_mapping)) {
5268 		err = PTR_ERR(chains_mapping);
5269 		goto err_mapping;
5270 	}
5271 	tc->mapping = chains_mapping;
5272 
5273 	err = mlx5e_tc_nic_create_miss_table(priv);
5274 	if (err)
5275 		goto err_chains;
5276 
5277 	if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
5278 		attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5279 			MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5280 	attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5281 	attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5282 	attr.default_ft = tc->miss_t;
5283 	attr.mapping = chains_mapping;
5284 	attr.fs_base_prio = MLX5E_TC_PRIO;
5285 
5286 	tc->chains = mlx5_chains_create(dev, &attr);
5287 	if (IS_ERR(tc->chains)) {
5288 		err = PTR_ERR(tc->chains);
5289 		goto err_miss;
5290 	}
5291 
5292 	mlx5_chains_print_info(tc->chains);
5293 
5294 	tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
5295 	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
5296 				 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
5297 
5298 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5299 	err = register_netdevice_notifier_dev_net(priv->netdev,
5300 						  &tc->netdevice_nb,
5301 						  &tc->netdevice_nn);
5302 	if (err) {
5303 		tc->netdevice_nb.notifier_call = NULL;
5304 		mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5305 		goto err_reg;
5306 	}
5307 
5308 	mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs));
5309 
5310 	tc->action_stats_handle = mlx5e_tc_act_stats_create();
5311 	if (IS_ERR(tc->action_stats_handle)) {
5312 		err = PTR_ERR(tc->action_stats_handle);
5313 		goto err_act_stats;
5314 	}
5315 
5316 	return 0;
5317 
5318 err_act_stats:
5319 	unregister_netdevice_notifier_dev_net(priv->netdev,
5320 					      &tc->netdevice_nb,
5321 					      &tc->netdevice_nn);
5322 err_reg:
5323 	mlx5_tc_ct_clean(tc->ct);
5324 	mlx5e_tc_post_act_destroy(tc->post_act);
5325 	mlx5_chains_destroy(tc->chains);
5326 err_miss:
5327 	mlx5e_tc_nic_destroy_miss_table(priv);
5328 err_chains:
5329 	mapping_destroy(chains_mapping);
5330 err_mapping:
5331 	rhashtable_destroy(&tc->ht);
5332 	return err;
5333 }
5334 
_mlx5e_tc_del_flow(void * ptr,void * arg)5335 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5336 {
5337 	struct mlx5e_tc_flow *flow = ptr;
5338 	struct mlx5e_priv *priv = flow->priv;
5339 
5340 	mlx5e_tc_del_flow(priv, flow);
5341 	kfree(flow);
5342 }
5343 
mlx5e_tc_nic_cleanup(struct mlx5e_priv * priv)5344 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5345 {
5346 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5347 
5348 	debugfs_remove_recursive(tc->dfs_root);
5349 
5350 	if (tc->netdevice_nb.notifier_call)
5351 		unregister_netdevice_notifier_dev_net(priv->netdev,
5352 						      &tc->netdevice_nb,
5353 						      &tc->netdevice_nn);
5354 
5355 	mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5356 	mutex_destroy(&tc->hairpin_tbl_lock);
5357 
5358 	rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5359 
5360 	if (!IS_ERR_OR_NULL(tc->t)) {
5361 		mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5362 		tc->t = NULL;
5363 	}
5364 	mutex_destroy(&tc->t_lock);
5365 
5366 	mlx5_tc_ct_clean(tc->ct);
5367 	mlx5e_tc_post_act_destroy(tc->post_act);
5368 	mapping_destroy(tc->mapping);
5369 	mlx5_chains_destroy(tc->chains);
5370 	mlx5e_tc_nic_destroy_miss_table(priv);
5371 	mlx5e_tc_act_stats_free(tc->action_stats_handle);
5372 }
5373 
mlx5e_tc_ht_init(struct rhashtable * tc_ht)5374 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
5375 {
5376 	int err;
5377 
5378 	err = rhashtable_init(tc_ht, &tc_ht_params);
5379 	if (err)
5380 		return err;
5381 
5382 	lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
5383 	lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5384 
5385 	return 0;
5386 }
5387 
mlx5e_tc_ht_cleanup(struct rhashtable * tc_ht)5388 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
5389 {
5390 	rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5391 }
5392 
mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv * uplink_priv)5393 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
5394 {
5395 	const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5396 	u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES];
5397 	struct mlx5_devcom_match_attr attr = {};
5398 	struct netdev_phys_item_id ppid;
5399 	struct mlx5e_rep_priv *rpriv;
5400 	struct mapping_ctx *mapping;
5401 	struct mlx5_eswitch *esw;
5402 	struct mlx5e_priv *priv;
5403 	int err = 0;
5404 	u8 id_len;
5405 
5406 	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5407 	priv = netdev_priv(rpriv->netdev);
5408 	esw = priv->mdev->priv.eswitch;
5409 
5410 	uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
5411 						       MLX5_FLOW_NAMESPACE_FDB);
5412 	uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5413 					       esw_chains(esw),
5414 					       &esw->offloads.mod_hdr,
5415 					       MLX5_FLOW_NAMESPACE_FDB,
5416 					       uplink_priv->post_act);
5417 
5418 	uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
5419 
5420 	uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
5421 
5422 	mlx5_query_nic_sw_system_image_guid(esw->dev, mapping_id, &id_len);
5423 
5424 	mapping = mapping_create_for_id(mapping_id, id_len, MAPPING_TYPE_TUNNEL,
5425 					sizeof(struct tunnel_match_key),
5426 					TUNNEL_INFO_BITS_MASK, true);
5427 
5428 	if (IS_ERR(mapping)) {
5429 		err = PTR_ERR(mapping);
5430 		goto err_tun_mapping;
5431 	}
5432 	uplink_priv->tunnel_mapping = mapping;
5433 
5434 	/* Two last values are reserved for stack devices slow path table mark
5435 	 * and bridge ingress push mark.
5436 	 */
5437 	mapping = mapping_create_for_id(mapping_id, id_len,
5438 					MAPPING_TYPE_TUNNEL_ENC_OPTS,
5439 					sz_enc_opts, ENC_OPTS_BITS_MASK - 2,
5440 					true);
5441 	if (IS_ERR(mapping)) {
5442 		err = PTR_ERR(mapping);
5443 		goto err_enc_opts_mapping;
5444 	}
5445 	uplink_priv->tunnel_enc_opts_mapping = mapping;
5446 
5447 	uplink_priv->encap = mlx5e_tc_tun_init(priv);
5448 	if (IS_ERR(uplink_priv->encap)) {
5449 		err = PTR_ERR(uplink_priv->encap);
5450 		goto err_register_fib_notifier;
5451 	}
5452 
5453 	uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create();
5454 	if (IS_ERR(uplink_priv->action_stats_handle)) {
5455 		err = PTR_ERR(uplink_priv->action_stats_handle);
5456 		goto err_action_counter;
5457 	}
5458 
5459 	err = netif_get_port_parent_id(priv->netdev, &ppid, false);
5460 	if (!err) {
5461 		memcpy(&attr.key.buf, &ppid.id, ppid.id_len);
5462 		attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS;
5463 		attr.net = mlx5_core_net(esw->dev);
5464 		mlx5_esw_offloads_devcom_init(esw, &attr);
5465 	}
5466 
5467 	return 0;
5468 
5469 err_action_counter:
5470 	mlx5e_tc_tun_cleanup(uplink_priv->encap);
5471 err_register_fib_notifier:
5472 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5473 err_enc_opts_mapping:
5474 	mapping_destroy(uplink_priv->tunnel_mapping);
5475 err_tun_mapping:
5476 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5477 	mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5478 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
5479 	netdev_warn(priv->netdev,
5480 		    "Failed to initialize tc (eswitch), err: %d", err);
5481 	mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5482 	return err;
5483 }
5484 
mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv * uplink_priv)5485 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
5486 {
5487 	struct mlx5e_rep_priv *rpriv;
5488 	struct mlx5_eswitch *esw;
5489 	struct mlx5e_priv *priv;
5490 
5491 	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5492 	priv = netdev_priv(rpriv->netdev);
5493 	esw = priv->mdev->priv.eswitch;
5494 
5495 	mlx5_esw_offloads_devcom_cleanup(esw);
5496 
5497 	mlx5e_tc_tun_cleanup(uplink_priv->encap);
5498 
5499 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5500 	mapping_destroy(uplink_priv->tunnel_mapping);
5501 
5502 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5503 	mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5504 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
5505 	mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
5506 	mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5507 	mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle);
5508 }
5509 
mlx5e_tc_num_filters(struct mlx5e_priv * priv,unsigned long flags)5510 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5511 {
5512 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5513 
5514 	return atomic_read(&tc_ht->nelems);
5515 }
5516 
mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch * esw)5517 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5518 {
5519 	struct mlx5_devcom_comp_dev *devcom;
5520 	struct mlx5_devcom_comp_dev *pos;
5521 	struct mlx5e_tc_flow *flow, *tmp;
5522 	struct mlx5_eswitch *peer_esw;
5523 	int i;
5524 
5525 	devcom = esw->devcom;
5526 
5527 	mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
5528 		i = mlx5_get_dev_index(peer_esw->dev);
5529 		list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i])
5530 			mlx5e_tc_del_fdb_peers_flow(flow);
5531 	}
5532 }
5533 
mlx5e_tc_reoffload_flows_work(struct work_struct * work)5534 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5535 {
5536 	struct mlx5_rep_uplink_priv *rpriv =
5537 		container_of(work, struct mlx5_rep_uplink_priv,
5538 			     reoffload_flows_work);
5539 	struct mlx5e_tc_flow *flow, *tmp;
5540 
5541 	mutex_lock(&rpriv->unready_flows_lock);
5542 	list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5543 		if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5544 			unready_flow_del(flow);
5545 	}
5546 	mutex_unlock(&rpriv->unready_flows_lock);
5547 }
5548 
mlx5e_setup_tc_cls_flower(struct mlx5e_priv * priv,struct flow_cls_offload * cls_flower,unsigned long flags)5549 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5550 				     struct flow_cls_offload *cls_flower,
5551 				     unsigned long flags)
5552 {
5553 	switch (cls_flower->command) {
5554 	case FLOW_CLS_REPLACE:
5555 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5556 					      flags);
5557 	case FLOW_CLS_DESTROY:
5558 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5559 					   flags);
5560 	case FLOW_CLS_STATS:
5561 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5562 					  flags);
5563 	default:
5564 		return -EOPNOTSUPP;
5565 	}
5566 }
5567 
mlx5e_setup_tc_block_cb(enum tc_setup_type type,void * type_data,void * cb_priv)5568 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5569 			    void *cb_priv)
5570 {
5571 	unsigned long flags = MLX5_TC_FLAG(INGRESS);
5572 	struct mlx5e_priv *priv = cb_priv;
5573 
5574 	if (!priv->netdev || !netif_device_present(priv->netdev))
5575 		return -EOPNOTSUPP;
5576 
5577 	if (mlx5e_is_uplink_rep(priv))
5578 		flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5579 	else
5580 		flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5581 
5582 	switch (type) {
5583 	case TC_SETUP_CLSFLOWER:
5584 		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5585 	default:
5586 		return -EOPNOTSUPP;
5587 	}
5588 }
5589 
mlx5e_tc_restore_tunnel(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5e_tc_update_priv * tc_priv,u32 tunnel_id)5590 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
5591 				    struct mlx5e_tc_update_priv *tc_priv,
5592 				    u32 tunnel_id)
5593 {
5594 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5595 	struct tunnel_match_enc_opts enc_opts = {};
5596 	struct mlx5_rep_uplink_priv *uplink_priv;
5597 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
5598 	struct mlx5e_rep_priv *uplink_rpriv;
5599 	struct metadata_dst *tun_dst;
5600 	struct tunnel_match_key key;
5601 	u32 tun_id, enc_opts_id;
5602 	struct net_device *dev;
5603 	int err;
5604 
5605 	__set_bit(IP_TUNNEL_KEY_BIT, flags);
5606 
5607 	enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
5608 	tun_id = tunnel_id >> ENC_OPTS_BITS;
5609 
5610 	if (!tun_id)
5611 		return true;
5612 
5613 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5614 	uplink_priv = &uplink_rpriv->uplink_priv;
5615 
5616 	err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
5617 	if (err) {
5618 		netdev_dbg(priv->netdev,
5619 			   "Couldn't find tunnel for tun_id: %d, err: %d\n",
5620 			   tun_id, err);
5621 		return false;
5622 	}
5623 
5624 	if (enc_opts_id) {
5625 		err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
5626 				   enc_opts_id, &enc_opts);
5627 		if (err) {
5628 			netdev_dbg(priv->netdev,
5629 				   "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
5630 				   enc_opts_id, err);
5631 			return false;
5632 		}
5633 	}
5634 
5635 	switch (key.enc_control.addr_type) {
5636 	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
5637 		tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
5638 					   key.enc_ip.tos, key.enc_ip.ttl,
5639 					   key.enc_tp.dst, flags,
5640 					   key32_to_tunnel_id(key.enc_key_id.keyid),
5641 					   enc_opts.key.len);
5642 		break;
5643 	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
5644 		tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
5645 					     key.enc_ip.tos, key.enc_ip.ttl,
5646 					     key.enc_tp.dst, 0, flags,
5647 					     key32_to_tunnel_id(key.enc_key_id.keyid),
5648 					     enc_opts.key.len);
5649 		break;
5650 	default:
5651 		netdev_dbg(priv->netdev,
5652 			   "Couldn't restore tunnel, unsupported addr_type: %d\n",
5653 			   key.enc_control.addr_type);
5654 		return false;
5655 	}
5656 
5657 	if (!tun_dst) {
5658 		netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
5659 		return false;
5660 	}
5661 
5662 	tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
5663 
5664 	if (enc_opts.key.len) {
5665 		ip_tunnel_flags_zero(flags);
5666 		if (enc_opts.key.dst_opt_type)
5667 			__set_bit(enc_opts.key.dst_opt_type, flags);
5668 
5669 		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
5670 					enc_opts.key.data,
5671 					enc_opts.key.len,
5672 					flags);
5673 	}
5674 
5675 	skb_dst_set(skb, (struct dst_entry *)tun_dst);
5676 	dev = dev_get_by_index(&init_net, key.filter_ifindex);
5677 	if (!dev) {
5678 		netdev_dbg(priv->netdev,
5679 			   "Couldn't find tunnel device with ifindex: %d\n",
5680 			   key.filter_ifindex);
5681 		return false;
5682 	}
5683 
5684 	/* Set fwd_dev so we do dev_put() after datapath */
5685 	tc_priv->fwd_dev = dev;
5686 
5687 	skb->dev = dev;
5688 
5689 	return true;
5690 }
5691 
mlx5e_tc_restore_skb_tc_meta(struct sk_buff * skb,struct mlx5_tc_ct_priv * ct_priv,struct mlx5_mapped_obj * mapped_obj,u32 zone_restore_id,u32 tunnel_id,struct mlx5e_tc_update_priv * tc_priv)5692 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv,
5693 					 struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id,
5694 					 u32 tunnel_id,  struct mlx5e_tc_update_priv *tc_priv)
5695 {
5696 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
5697 	struct tc_skb_ext *tc_skb_ext;
5698 	u64 act_miss_cookie;
5699 	u32 chain;
5700 
5701 	chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0;
5702 	act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ?
5703 			  mapped_obj->act_miss_cookie : 0;
5704 	if (chain || act_miss_cookie) {
5705 		if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id))
5706 			return false;
5707 
5708 		tc_skb_ext = tc_skb_ext_alloc(skb);
5709 		if (!tc_skb_ext) {
5710 			WARN_ON(1);
5711 			return false;
5712 		}
5713 
5714 		if (act_miss_cookie) {
5715 			tc_skb_ext->act_miss_cookie = act_miss_cookie;
5716 			tc_skb_ext->act_miss = 1;
5717 		} else {
5718 			tc_skb_ext->chain = chain;
5719 		}
5720 	}
5721 
5722 	if (tc_priv)
5723 		return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5724 
5725 	return true;
5726 }
5727 
mlx5e_tc_restore_skb_sample(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5_mapped_obj * mapped_obj,struct mlx5e_tc_update_priv * tc_priv)5728 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
5729 					struct mlx5_mapped_obj *mapped_obj,
5730 					struct mlx5e_tc_update_priv *tc_priv)
5731 {
5732 	if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
5733 		netdev_dbg(priv->netdev,
5734 			   "Failed to restore tunnel info for sampled packet\n");
5735 		return;
5736 	}
5737 	mlx5e_tc_sample_skb(skb, mapped_obj);
5738 }
5739 
mlx5e_tc_restore_skb_int_port(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5_mapped_obj * mapped_obj,struct mlx5e_tc_update_priv * tc_priv,u32 tunnel_id)5740 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
5741 					  struct mlx5_mapped_obj *mapped_obj,
5742 					  struct mlx5e_tc_update_priv *tc_priv,
5743 					  u32 tunnel_id)
5744 {
5745 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5746 	struct mlx5_rep_uplink_priv *uplink_priv;
5747 	struct mlx5e_rep_priv *uplink_rpriv;
5748 	bool forward_tx = false;
5749 
5750 	/* Tunnel restore takes precedence over int port restore */
5751 	if (tunnel_id)
5752 		return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5753 
5754 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5755 	uplink_priv = &uplink_rpriv->uplink_priv;
5756 
5757 	if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
5758 				      mapped_obj->int_port_metadata, &forward_tx)) {
5759 		/* Set fwd_dev for future dev_put */
5760 		tc_priv->fwd_dev = skb->dev;
5761 		tc_priv->forward_tx = forward_tx;
5762 
5763 		return true;
5764 	}
5765 
5766 	return false;
5767 }
5768 
mlx5e_tc_update_skb(struct mlx5_cqe64 * cqe,struct sk_buff * skb,struct mapping_ctx * mapping_ctx,u32 mapped_obj_id,struct mlx5_tc_ct_priv * ct_priv,u32 zone_restore_id,u32 tunnel_id,struct mlx5e_tc_update_priv * tc_priv)5769 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
5770 			 struct mapping_ctx *mapping_ctx, u32 mapped_obj_id,
5771 			 struct mlx5_tc_ct_priv *ct_priv,
5772 			 u32 zone_restore_id, u32 tunnel_id,
5773 			 struct mlx5e_tc_update_priv *tc_priv)
5774 {
5775 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
5776 	struct mlx5_mapped_obj mapped_obj;
5777 	int err;
5778 
5779 	err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj);
5780 	if (err) {
5781 		netdev_dbg(skb->dev,
5782 			   "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n",
5783 			   mapped_obj_id, err);
5784 		return false;
5785 	}
5786 
5787 	switch (mapped_obj.type) {
5788 	case MLX5_MAPPED_OBJ_CHAIN:
5789 	case MLX5_MAPPED_OBJ_ACT_MISS:
5790 		return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id,
5791 						    tunnel_id, tc_priv);
5792 	case MLX5_MAPPED_OBJ_SAMPLE:
5793 		mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv);
5794 		tc_priv->skb_done = true;
5795 		return true;
5796 	case MLX5_MAPPED_OBJ_INT_PORT_METADATA:
5797 		return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id);
5798 	default:
5799 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5800 		return false;
5801 	}
5802 
5803 	return false;
5804 }
5805 
mlx5e_tc_update_skb_nic(struct mlx5_cqe64 * cqe,struct sk_buff * skb)5806 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
5807 {
5808 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
5809 	u32 mapped_obj_id, reg_b, zone_restore_id;
5810 	struct mlx5_tc_ct_priv *ct_priv;
5811 	struct mapping_ctx *mapping_ctx;
5812 	struct mlx5e_tc_table *tc;
5813 
5814 	reg_b = be32_to_cpu(cqe->ft_metadata);
5815 	tc = mlx5e_fs_get_tc(priv->fs);
5816 	mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5817 	zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5818 			  ESW_ZONE_ID_MASK;
5819 	ct_priv = tc->ct;
5820 	mapping_ctx = tc->mapping;
5821 
5822 	return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id,
5823 				   0, NULL);
5824 }
5825 
5826 static struct mapping_ctx *
mlx5e_get_priv_obj_mapping(struct mlx5e_priv * priv)5827 mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv)
5828 {
5829 	struct mlx5e_tc_table *tc;
5830 	struct mlx5_eswitch *esw;
5831 	struct mapping_ctx *ctx;
5832 
5833 	if (is_mdev_switchdev_mode(priv->mdev)) {
5834 		esw = priv->mdev->priv.eswitch;
5835 		ctx = esw->offloads.reg_c0_obj_pool;
5836 	} else {
5837 		tc = mlx5e_fs_get_tc(priv->fs);
5838 		ctx = tc->mapping;
5839 	}
5840 
5841 	return ctx;
5842 }
5843 
mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,u64 act_miss_cookie,u32 * act_miss_mapping)5844 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5845 				     u64 act_miss_cookie, u32 *act_miss_mapping)
5846 {
5847 	struct mlx5_mapped_obj mapped_obj = {};
5848 	struct mlx5_eswitch *esw;
5849 	struct mapping_ctx *ctx;
5850 	int err;
5851 
5852 	ctx = mlx5e_get_priv_obj_mapping(priv);
5853 	mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
5854 	mapped_obj.act_miss_cookie = act_miss_cookie;
5855 	err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
5856 	if (err)
5857 		return err;
5858 
5859 	if (!is_mdev_switchdev_mode(priv->mdev))
5860 		return 0;
5861 
5862 	esw = priv->mdev->priv.eswitch;
5863 	attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
5864 	if (IS_ERR(attr->act_id_restore_rule)) {
5865 		err = PTR_ERR(attr->act_id_restore_rule);
5866 		goto err_rule;
5867 	}
5868 
5869 	return 0;
5870 
5871 err_rule:
5872 	mapping_remove(ctx, *act_miss_mapping);
5873 	return err;
5874 }
5875 
mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,u32 act_miss_mapping)5876 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5877 				      u32 act_miss_mapping)
5878 {
5879 	struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv);
5880 
5881 	if (is_mdev_switchdev_mode(priv->mdev))
5882 		mlx5_del_flow_rules(attr->act_id_restore_rule);
5883 	mapping_remove(ctx, act_miss_mapping);
5884 }
5885