xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c (revision 7bcfb19465fca99efd09ecb5d3ef8f91179d7ff1)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/etherdevice.h>
34 #include <linux/idr.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/mlx5_ifc.h>
37 #include <linux/mlx5/vport.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/lockdep.h>
40 #include "mlx5_core.h"
41 #include "eswitch.h"
42 #include "esw/indir_table.h"
43 #include "esw/acl/ofld.h"
44 #include "rdma.h"
45 #include "en.h"
46 #include "fs_core.h"
47 #include "lib/mlx5.h"
48 #include "lib/devcom.h"
49 #include "lib/sd.h"
50 #include "lib/eq.h"
51 #include "lib/fs_chains.h"
52 #include "en_tc.h"
53 #include "en/mapping.h"
54 #include "devlink.h"
55 #include "lag/lag.h"
56 #include "en/tc/post_meter.h"
57 #include "fw_reset.h"
58 
59 /* There are two match-all miss flows, one for unicast dst mac and
60  * one for multicast.
61  */
62 #define MLX5_ESW_MISS_FLOWS (2)
63 #define UPLINK_REP_INDEX 0
64 
65 #define MLX5_ESW_VPORT_TBL_SIZE 128
66 #define MLX5_ESW_VPORT_TBL_NUM_GROUPS  4
67 
68 #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
69 
70 #define MLX5_ESW_MAX_CTRL_EQS 4
71 #define MLX5_ESW_DEFAULT_SF_COMP_EQS 8
72 
73 static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
74 	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
75 	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
76 	.flags = 0,
77 };
78 
79 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
80 						     u16 vport_num)
81 {
82 	return xa_load(&esw->offloads.vport_reps, vport_num);
83 }
84 
85 static void
86 mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
87 				  struct mlx5_flow_spec *spec,
88 				  struct mlx5_esw_flow_attr *attr)
89 {
90 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) || !attr || !attr->in_rep)
91 		return;
92 
93 	if (attr->int_port) {
94 		spec->flow_context.flow_source = mlx5e_tc_int_port_get_flow_source(attr->int_port);
95 
96 		return;
97 	}
98 
99 	spec->flow_context.flow_source = (attr->in_rep->vport == MLX5_VPORT_UPLINK) ?
100 					 MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK :
101 					 MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
102 }
103 
104 /* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits
105  * are not needed as well in the following process. So clear them all for simplicity.
106  */
107 void
108 mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec)
109 {
110 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
111 		void *misc2;
112 
113 		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
114 		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
115 
116 		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
117 		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
118 
119 		if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2)))
120 			spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2;
121 	}
122 }
123 
124 static void
125 mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
126 				  struct mlx5_flow_spec *spec,
127 				  struct mlx5_flow_attr *attr,
128 				  struct mlx5_eswitch *src_esw,
129 				  u16 vport)
130 {
131 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
132 	u32 metadata;
133 	void *misc2;
134 	void *misc;
135 
136 	/* Use metadata matching because vport is not represented by single
137 	 * VHCA in dual-port RoCE mode, and matching on source vport may fail.
138 	 */
139 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
140 		if (mlx5_esw_indir_table_decap_vport(attr))
141 			vport = mlx5_esw_indir_table_decap_vport(attr);
142 
143 		if (!attr->chain && esw_attr && esw_attr->int_port)
144 			metadata =
145 				mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
146 		else
147 			metadata =
148 				mlx5_eswitch_get_vport_metadata_for_match(src_esw, vport);
149 
150 		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
151 		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, metadata);
152 
153 		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
154 		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
155 			 mlx5_eswitch_get_vport_metadata_mask());
156 
157 		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
158 	} else {
159 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
160 		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
161 
162 		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
163 			MLX5_SET(fte_match_set_misc, misc,
164 				 source_eswitch_owner_vhca_id,
165 				 MLX5_CAP_GEN(src_esw->dev, vhca_id));
166 
167 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
168 		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
169 		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
170 			MLX5_SET_TO_ONES(fte_match_set_misc, misc,
171 					 source_eswitch_owner_vhca_id);
172 
173 		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
174 	}
175 }
176 
177 static int
178 esw_setup_decap_indir(struct mlx5_eswitch *esw,
179 		      struct mlx5_flow_attr *attr)
180 {
181 	struct mlx5_flow_table *ft;
182 
183 	if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
184 		return -EOPNOTSUPP;
185 
186 	ft = mlx5_esw_indir_table_get(esw, attr,
187 				      mlx5_esw_indir_table_decap_vport(attr), true);
188 	return PTR_ERR_OR_ZERO(ft);
189 }
190 
191 static void
192 esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
193 			struct mlx5_flow_attr *attr)
194 {
195 	if (mlx5_esw_indir_table_decap_vport(attr))
196 		mlx5_esw_indir_table_put(esw,
197 					 mlx5_esw_indir_table_decap_vport(attr),
198 					 true);
199 }
200 
201 static int
202 esw_setup_mtu_dest(struct mlx5_flow_destination *dest,
203 		   struct mlx5e_meter_attr *meter,
204 		   int i)
205 {
206 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_RANGE;
207 	dest[i].range.field = MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN;
208 	dest[i].range.min = 0;
209 	dest[i].range.max = meter->params.mtu;
210 	dest[i].range.hit_ft = mlx5e_post_meter_get_mtu_true_ft(meter->post_meter);
211 	dest[i].range.miss_ft = mlx5e_post_meter_get_mtu_false_ft(meter->post_meter);
212 
213 	return 0;
214 }
215 
216 static int
217 esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
218 		       struct mlx5_flow_act *flow_act,
219 		       u32 sampler_id,
220 		       int i)
221 {
222 	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
223 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
224 	dest[i].sampler_id = sampler_id;
225 
226 	return 0;
227 }
228 
229 static int
230 esw_setup_ft_dest(struct mlx5_flow_destination *dest,
231 		  struct mlx5_flow_act *flow_act,
232 		  struct mlx5_eswitch *esw,
233 		  struct mlx5_flow_attr *attr,
234 		  int i)
235 {
236 	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
237 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
238 	dest[i].ft = attr->dest_ft;
239 
240 	if (mlx5_esw_indir_table_decap_vport(attr))
241 		return esw_setup_decap_indir(esw, attr);
242 	return 0;
243 }
244 
245 static void
246 esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
247 		      struct mlx5_fs_chains *chains, int i)
248 {
249 	if (mlx5_chains_ignore_flow_level_supported(chains))
250 		flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
251 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
252 	dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
253 }
254 
255 static void
256 esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
257 			 struct mlx5_eswitch *esw, int i)
258 {
259 	if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
260 		flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
261 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
262 	dest[i].ft = mlx5_eswitch_get_slow_fdb(esw);
263 }
264 
265 static int
266 esw_setup_chain_dest(struct mlx5_flow_destination *dest,
267 		     struct mlx5_flow_act *flow_act,
268 		     struct mlx5_fs_chains *chains,
269 		     u32 chain, u32 prio, u32 level,
270 		     int i)
271 {
272 	struct mlx5_flow_table *ft;
273 
274 	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
275 	ft = mlx5_chains_get_table(chains, chain, prio, level);
276 	if (IS_ERR(ft))
277 		return PTR_ERR(ft);
278 
279 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
280 	dest[i].ft = ft;
281 	return  0;
282 }
283 
284 static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
285 				     int from, int to)
286 {
287 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
288 	struct mlx5_fs_chains *chains = esw_chains(esw);
289 	int i;
290 
291 	for (i = from; i < to; i++)
292 		if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
293 			mlx5_chains_put_table(chains, 0, 1, 0);
294 		else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport,
295 						     esw_attr->dests[i].mdev))
296 			mlx5_esw_indir_table_put(esw, esw_attr->dests[i].vport, false);
297 }
298 
299 static bool
300 esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr)
301 {
302 	int i;
303 
304 	for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
305 		if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
306 			return true;
307 	return false;
308 }
309 
310 static int
311 esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
312 				 struct mlx5_flow_act *flow_act,
313 				 struct mlx5_eswitch *esw,
314 				 struct mlx5_fs_chains *chains,
315 				 struct mlx5_flow_attr *attr,
316 				 int *i)
317 {
318 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
319 	int err;
320 
321 	if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
322 		return -EOPNOTSUPP;
323 
324 	/* flow steering cannot handle more than one dest with the same ft
325 	 * in a single flow
326 	 */
327 	if (esw_attr->out_count - esw_attr->split_count > 1)
328 		return -EOPNOTSUPP;
329 
330 	err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
331 	if (err)
332 		return err;
333 
334 	if (esw_attr->dests[esw_attr->split_count].pkt_reformat) {
335 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
336 		flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat;
337 	}
338 	(*i)++;
339 
340 	return 0;
341 }
342 
343 static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw,
344 					       struct mlx5_flow_attr *attr)
345 {
346 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
347 
348 	esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
349 }
350 
351 static bool
352 esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
353 {
354 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
355 	bool result = false;
356 	int i;
357 
358 	/* Indirect table is supported only for flows with in_port uplink
359 	 * and the destination is vport on the same eswitch as the uplink,
360 	 * return false in case at least one of destinations doesn't meet
361 	 * this criteria.
362 	 */
363 	for (i = esw_attr->split_count; i < esw_attr->out_count; i++) {
364 		if (esw_attr->dests[i].vport_valid &&
365 		    mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport,
366 						esw_attr->dests[i].mdev)) {
367 			result = true;
368 		} else {
369 			result = false;
370 			break;
371 		}
372 	}
373 	return result;
374 }
375 
376 static int
377 esw_setup_indir_table(struct mlx5_flow_destination *dest,
378 		      struct mlx5_flow_act *flow_act,
379 		      struct mlx5_eswitch *esw,
380 		      struct mlx5_flow_attr *attr,
381 		      int *i)
382 {
383 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
384 	int j, err;
385 
386 	if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
387 		return -EOPNOTSUPP;
388 
389 	for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
390 		flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
391 		dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
392 
393 		dest[*i].ft = mlx5_esw_indir_table_get(esw, attr,
394 						       esw_attr->dests[j].vport, false);
395 		if (IS_ERR(dest[*i].ft)) {
396 			err = PTR_ERR(dest[*i].ft);
397 			goto err_indir_tbl_get;
398 		}
399 	}
400 
401 	if (mlx5_esw_indir_table_decap_vport(attr)) {
402 		err = esw_setup_decap_indir(esw, attr);
403 		if (err)
404 			goto err_indir_tbl_get;
405 	}
406 
407 	return 0;
408 
409 err_indir_tbl_get:
410 	esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
411 	return err;
412 }
413 
414 static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
415 {
416 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
417 
418 	esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
419 	esw_cleanup_decap_indir(esw, attr);
420 }
421 
422 static void
423 esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level)
424 {
425 	mlx5_chains_put_table(chains, chain, prio, level);
426 }
427 
428 static bool esw_same_vhca_id(struct mlx5_core_dev *mdev1, struct mlx5_core_dev *mdev2)
429 {
430 	return MLX5_CAP_GEN(mdev1, vhca_id) == MLX5_CAP_GEN(mdev2, vhca_id);
431 }
432 
433 static bool esw_setup_uplink_fwd_ipsec_needed(struct mlx5_eswitch *esw,
434 					      struct mlx5_esw_flow_attr *esw_attr,
435 					      int attr_idx)
436 {
437 	if (esw->offloads.ft_ipsec_tx_pol &&
438 	    esw_attr->dests[attr_idx].vport_valid &&
439 	    esw_attr->dests[attr_idx].vport == MLX5_VPORT_UPLINK &&
440 	    /* To be aligned with software, encryption is needed only for tunnel device */
441 	    (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) &&
442 	    esw_attr->dests[attr_idx].vport != esw_attr->in_rep->vport &&
443 	    esw_same_vhca_id(esw_attr->dests[attr_idx].mdev, esw->dev))
444 		return true;
445 
446 	return false;
447 }
448 
449 static bool esw_flow_dests_fwd_ipsec_check(struct mlx5_eswitch *esw,
450 					   struct mlx5_esw_flow_attr *esw_attr)
451 {
452 	int i;
453 
454 	if (!esw->offloads.ft_ipsec_tx_pol)
455 		return true;
456 
457 	for (i = 0; i < esw_attr->split_count; i++)
458 		if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i))
459 			return false;
460 
461 	for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
462 		if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i) &&
463 		    (esw_attr->out_count - esw_attr->split_count > 1))
464 			return false;
465 
466 	return true;
467 }
468 
469 static void
470 esw_setup_dest_fwd_vport(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
471 			 struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
472 			 int attr_idx, int dest_idx, bool pkt_reformat)
473 {
474 	dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
475 	dest[dest_idx].vport.num = esw_attr->dests[attr_idx].vport;
476 	if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
477 		dest[dest_idx].vport.vhca_id =
478 			MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
479 		dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
480 		if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK &&
481 		    mlx5_lag_is_mpesw(esw->dev))
482 			dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
483 	}
484 	if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) {
485 		if (pkt_reformat) {
486 			flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
487 			flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
488 		}
489 		dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
490 		dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
491 	}
492 }
493 
494 static void
495 esw_setup_dest_fwd_ipsec(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
496 			 struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
497 			 int attr_idx, int dest_idx, bool pkt_reformat)
498 {
499 	dest[dest_idx].ft = esw->offloads.ft_ipsec_tx_pol;
500 	dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
501 	if (pkt_reformat &&
502 	    esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) {
503 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
504 		flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
505 	}
506 }
507 
508 static void
509 esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
510 		     struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
511 		     int attr_idx, int dest_idx, bool pkt_reformat)
512 {
513 	if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx))
514 		esw_setup_dest_fwd_ipsec(dest, flow_act, esw, esw_attr,
515 					 attr_idx, dest_idx, pkt_reformat);
516 	else
517 		esw_setup_dest_fwd_vport(dest, flow_act, esw, esw_attr,
518 					 attr_idx, dest_idx, pkt_reformat);
519 }
520 
521 static int
522 esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
523 		      struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
524 		      int i)
525 {
526 	int j;
527 
528 	for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++)
529 		esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true);
530 	return i;
531 }
532 
533 static bool
534 esw_src_port_rewrite_supported(struct mlx5_eswitch *esw)
535 {
536 	return MLX5_CAP_GEN(esw->dev, reg_c_preserve) &&
537 	       mlx5_eswitch_vport_match_metadata_enabled(esw) &&
538 	       MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level);
539 }
540 
541 static bool
542 esw_dests_to_int_external(struct mlx5_flow_destination *dests, int max_dest)
543 {
544 	bool internal_dest = false, external_dest = false;
545 	int i;
546 
547 	for (i = 0; i < max_dest; i++) {
548 		if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT &&
549 		    dests[i].type != MLX5_FLOW_DESTINATION_TYPE_UPLINK)
550 			continue;
551 
552 		/* Uplink dest is external, but considered as internal
553 		 * if there is reformat because firmware uses LB+hairpin to support it.
554 		 */
555 		if (dests[i].vport.num == MLX5_VPORT_UPLINK &&
556 		    !(dests[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID))
557 			external_dest = true;
558 		else
559 			internal_dest = true;
560 
561 		if (internal_dest && external_dest)
562 			return true;
563 	}
564 
565 	return false;
566 }
567 
568 static int
569 esw_setup_dests(struct mlx5_flow_destination *dest,
570 		struct mlx5_flow_act *flow_act,
571 		struct mlx5_eswitch *esw,
572 		struct mlx5_flow_attr *attr,
573 		struct mlx5_flow_spec *spec,
574 		int *i)
575 {
576 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
577 	struct mlx5_fs_chains *chains = esw_chains(esw);
578 	int err = 0;
579 
580 	if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
581 	    esw_src_port_rewrite_supported(esw))
582 		attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE;
583 
584 	if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) {
585 		esw_setup_slow_path_dest(dest, flow_act, esw, *i);
586 		(*i)++;
587 		goto out;
588 	}
589 
590 	if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
591 		esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i);
592 		(*i)++;
593 	} else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) {
594 		esw_setup_accept_dest(dest, flow_act, chains, *i);
595 		(*i)++;
596 	} else if (attr->flags & MLX5_ATTR_FLAG_MTU) {
597 		err = esw_setup_mtu_dest(dest, &attr->meter_attr, *i);
598 		(*i)++;
599 	} else if (esw_is_indir_table(esw, attr)) {
600 		err = esw_setup_indir_table(dest, flow_act, esw, attr, i);
601 	} else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) {
602 		err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i);
603 	} else {
604 		*i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i);
605 
606 		if (attr->dest_ft) {
607 			err = esw_setup_ft_dest(dest, flow_act, esw, attr, *i);
608 			(*i)++;
609 		} else if (attr->dest_chain) {
610 			err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
611 						   1, 0, *i);
612 			(*i)++;
613 		}
614 	}
615 
616 	if (attr->extra_split_ft) {
617 		flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
618 		dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
619 		dest[*i].ft = attr->extra_split_ft;
620 		(*i)++;
621 	}
622 
623 out:
624 	return err;
625 }
626 
627 static void
628 esw_cleanup_dests(struct mlx5_eswitch *esw,
629 		  struct mlx5_flow_attr *attr)
630 {
631 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
632 	struct mlx5_fs_chains *chains = esw_chains(esw);
633 
634 	if (attr->dest_ft) {
635 		esw_cleanup_decap_indir(esw, attr);
636 	} else if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
637 		if (attr->dest_chain)
638 			esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
639 		else if (esw_is_indir_table(esw, attr))
640 			esw_cleanup_indir_table(esw, attr);
641 		else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
642 			esw_cleanup_chain_src_port_rewrite(esw, attr);
643 	}
644 }
645 
646 static void
647 esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act)
648 {
649 	struct mlx5e_flow_meter_handle *meter;
650 
651 	meter = attr->meter_attr.meter;
652 	flow_act->exe_aso.type = attr->exe_aso_type;
653 	flow_act->exe_aso.object_id = meter->obj_id;
654 	flow_act->exe_aso.base_id = mlx5e_flow_meter_get_base_id(meter);
655 	flow_act->exe_aso.flow_meter.meter_idx = meter->idx;
656 	flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN;
657 	/* use metadata reg 5 for packet color */
658 	flow_act->exe_aso.return_reg_id = 5;
659 }
660 
661 struct mlx5_flow_handle *
662 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
663 				struct mlx5_flow_spec *spec,
664 				struct mlx5_flow_attr *attr)
665 {
666 	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
667 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
668 	struct mlx5_fs_chains *chains = esw_chains(esw);
669 	bool split = !!(esw_attr->split_count);
670 	struct mlx5_vport_tbl_attr fwd_attr;
671 	struct mlx5_flow_destination *dest;
672 	struct mlx5_flow_handle *rule;
673 	struct mlx5_flow_table *fdb;
674 	int i = 0;
675 
676 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
677 		return ERR_PTR(-EOPNOTSUPP);
678 
679 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
680 		return ERR_PTR(-EOPNOTSUPP);
681 
682 	if (!esw_flow_dests_fwd_ipsec_check(esw, esw_attr))
683 		return ERR_PTR(-EOPNOTSUPP);
684 
685 	dest = kzalloc_objs(*dest, MLX5_MAX_FLOW_FWD_VPORTS + 1);
686 	if (!dest)
687 		return ERR_PTR(-ENOMEM);
688 
689 	flow_act.action = attr->action;
690 
691 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
692 		flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]);
693 		flow_act.vlan[0].vid = esw_attr->vlan_vid[0];
694 		flow_act.vlan[0].prio = esw_attr->vlan_prio[0];
695 		if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
696 			flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]);
697 			flow_act.vlan[1].vid = esw_attr->vlan_vid[1];
698 			flow_act.vlan[1].prio = esw_attr->vlan_prio[1];
699 		}
700 	}
701 
702 	mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
703 
704 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
705 		int err;
706 
707 		err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i);
708 		if (err) {
709 			rule = ERR_PTR(err);
710 			goto err_create_goto_table;
711 		}
712 
713 		/* Header rewrite with combined wire+loopback in FDB is not allowed */
714 		if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) &&
715 		    esw_dests_to_int_external(dest, i)) {
716 			esw_warn(esw->dev,
717 				 "FDB: Header rewrite with forwarding to both internal and external dests is not allowed\n");
718 			rule = ERR_PTR(-EINVAL);
719 			goto err_esw_get;
720 		}
721 	}
722 
723 	if (esw_attr->decap_pkt_reformat)
724 		flow_act.pkt_reformat = esw_attr->decap_pkt_reformat;
725 
726 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
727 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
728 		dest[i].counter = attr->counter;
729 		i++;
730 	}
731 
732 	if (attr->outer_match_level != MLX5_MATCH_NONE)
733 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
734 	if (attr->inner_match_level != MLX5_MATCH_NONE)
735 		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
736 
737 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
738 		flow_act.modify_hdr = attr->modify_hdr;
739 
740 	if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
741 	    attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)
742 		esw_setup_meter(attr, &flow_act);
743 
744 	if (split) {
745 		fwd_attr.chain = attr->chain;
746 		fwd_attr.prio = attr->prio;
747 		fwd_attr.vport = esw_attr->in_rep->vport;
748 		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
749 
750 		fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
751 	} else {
752 		if (attr->chain || attr->prio)
753 			fdb = mlx5_chains_get_table(chains, attr->chain,
754 						    attr->prio, 0);
755 		else
756 			fdb = attr->ft;
757 
758 		if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT))
759 			mlx5_eswitch_set_rule_source_port(esw, spec, attr,
760 							  esw_attr->in_mdev->priv.eswitch,
761 							  esw_attr->in_rep->vport);
762 	}
763 	if (IS_ERR(fdb)) {
764 		rule = ERR_CAST(fdb);
765 		goto err_esw_get;
766 	}
767 
768 	if (!i) {
769 		kfree(dest);
770 		dest = NULL;
771 	}
772 
773 	if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
774 		rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
775 						     &flow_act, dest, i);
776 	else
777 		rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
778 	if (IS_ERR(rule))
779 		goto err_add_rule;
780 	else
781 		atomic64_inc(&esw->offloads.num_flows);
782 
783 	kfree(dest);
784 	return rule;
785 
786 err_add_rule:
787 	if (split)
788 		mlx5_esw_vporttbl_put(esw, &fwd_attr);
789 	else if (attr->chain || attr->prio)
790 		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
791 err_esw_get:
792 	esw_cleanup_dests(esw, attr);
793 err_create_goto_table:
794 	kfree(dest);
795 	return rule;
796 }
797 
798 struct mlx5_flow_handle *
799 mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
800 			  struct mlx5_flow_spec *spec,
801 			  struct mlx5_flow_attr *attr)
802 {
803 	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
804 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
805 	struct mlx5_fs_chains *chains = esw_chains(esw);
806 	struct mlx5_vport_tbl_attr fwd_attr;
807 	struct mlx5_flow_destination *dest;
808 	struct mlx5_flow_table *fast_fdb;
809 	struct mlx5_flow_table *fwd_fdb;
810 	struct mlx5_flow_handle *rule;
811 	int i, err = 0;
812 
813 	dest = kzalloc_objs(*dest, MLX5_MAX_FLOW_FWD_VPORTS + 1);
814 	if (!dest)
815 		return ERR_PTR(-ENOMEM);
816 
817 	fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
818 	if (IS_ERR(fast_fdb)) {
819 		rule = ERR_CAST(fast_fdb);
820 		goto err_get_fast;
821 	}
822 
823 	fwd_attr.chain = attr->chain;
824 	fwd_attr.prio = attr->prio;
825 	fwd_attr.vport = esw_attr->in_rep->vport;
826 	fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
827 	fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
828 	if (IS_ERR(fwd_fdb)) {
829 		rule = ERR_CAST(fwd_fdb);
830 		goto err_get_fwd;
831 	}
832 
833 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
834 	for (i = 0; i < esw_attr->split_count; i++) {
835 		if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
836 			/* Source port rewrite (forward to ovs internal port or statck device) isn't
837 			 * supported in the rule of split action.
838 			 */
839 			err = -EOPNOTSUPP;
840 		else
841 			esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
842 
843 		if (err) {
844 			rule = ERR_PTR(err);
845 			goto err_chain_src_rewrite;
846 		}
847 	}
848 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
849 	dest[i].ft = fwd_fdb;
850 	i++;
851 
852 	mlx5_eswitch_set_rule_source_port(esw, spec, attr,
853 					  esw_attr->in_mdev->priv.eswitch,
854 					  esw_attr->in_rep->vport);
855 
856 	if (attr->outer_match_level != MLX5_MATCH_NONE)
857 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
858 
859 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
860 	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
861 
862 	if (IS_ERR(rule)) {
863 		i = esw_attr->split_count;
864 		goto err_chain_src_rewrite;
865 	}
866 
867 	atomic64_inc(&esw->offloads.num_flows);
868 
869 	kfree(dest);
870 	return rule;
871 err_chain_src_rewrite:
872 	mlx5_esw_vporttbl_put(esw, &fwd_attr);
873 err_get_fwd:
874 	mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
875 err_get_fast:
876 	kfree(dest);
877 	return rule;
878 }
879 
880 static void
881 __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
882 			struct mlx5_flow_handle *rule,
883 			struct mlx5_flow_attr *attr,
884 			bool fwd_rule)
885 {
886 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
887 	struct mlx5_fs_chains *chains = esw_chains(esw);
888 	bool split = (esw_attr->split_count > 0);
889 	struct mlx5_vport_tbl_attr fwd_attr;
890 	int i;
891 
892 	mlx5_del_flow_rules(rule);
893 
894 	if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
895 		/* unref the term table */
896 		for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
897 			if (esw_attr->dests[i].termtbl)
898 				mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl);
899 		}
900 	}
901 
902 	atomic64_dec(&esw->offloads.num_flows);
903 
904 	if (fwd_rule || split) {
905 		fwd_attr.chain = attr->chain;
906 		fwd_attr.prio = attr->prio;
907 		fwd_attr.vport = esw_attr->in_rep->vport;
908 		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
909 	}
910 
911 	if (fwd_rule)  {
912 		mlx5_esw_vporttbl_put(esw, &fwd_attr);
913 		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
914 	} else {
915 		if (split)
916 			mlx5_esw_vporttbl_put(esw, &fwd_attr);
917 		else if (attr->chain || attr->prio)
918 			mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
919 		esw_cleanup_dests(esw, attr);
920 	}
921 }
922 
923 void
924 mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
925 				struct mlx5_flow_handle *rule,
926 				struct mlx5_flow_attr *attr)
927 {
928 	__mlx5_eswitch_del_rule(esw, rule, attr, false);
929 }
930 
931 void
932 mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
933 			  struct mlx5_flow_handle *rule,
934 			  struct mlx5_flow_attr *attr)
935 {
936 	__mlx5_eswitch_del_rule(esw, rule, attr, true);
937 }
938 
939 struct mlx5_flow_handle *
940 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
941 				    struct mlx5_eswitch *from_esw,
942 				    struct mlx5_eswitch_rep *rep,
943 				    u32 sqn)
944 {
945 	struct mlx5_flow_act flow_act = {0};
946 	struct mlx5_flow_destination dest = {};
947 	struct mlx5_flow_handle *flow_rule;
948 	struct mlx5_flow_spec *spec;
949 	void *misc;
950 	u16 vport;
951 
952 	spec = kvzalloc_obj(*spec);
953 	if (!spec) {
954 		flow_rule = ERR_PTR(-ENOMEM);
955 		goto out;
956 	}
957 
958 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
959 	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
960 
961 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
962 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
963 
964 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
965 
966 	/* source vport is the esw manager */
967 	vport = from_esw->manager_vport;
968 
969 	if (mlx5_eswitch_vport_match_metadata_enabled(on_esw)) {
970 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
971 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
972 			 mlx5_eswitch_get_vport_metadata_for_match(from_esw, vport));
973 
974 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
975 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
976 			 mlx5_eswitch_get_vport_metadata_mask());
977 
978 		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
979 	} else {
980 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
981 		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
982 
983 		if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
984 			MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
985 				 MLX5_CAP_GEN(from_esw->dev, vhca_id));
986 
987 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
988 		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
989 
990 		if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
991 			MLX5_SET_TO_ONES(fte_match_set_misc, misc,
992 					 source_eswitch_owner_vhca_id);
993 
994 		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
995 	}
996 
997 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
998 	dest.vport.num = rep->vport;
999 	dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
1000 	dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
1001 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1002 
1003 	if (rep->vport == MLX5_VPORT_UPLINK &&
1004 	    on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) {
1005 		dest.ft = on_esw->offloads.ft_ipsec_tx_pol;
1006 		flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL;
1007 		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1008 	} else {
1009 		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
1010 		dest.vport.num = rep->vport;
1011 		dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
1012 		dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
1013 	}
1014 
1015 	if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) &&
1016 	    rep->vport == MLX5_VPORT_UPLINK)
1017 		spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
1018 
1019 	flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(on_esw),
1020 					spec, &flow_act, &dest, 1);
1021 	if (IS_ERR(flow_rule))
1022 		esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %pe\n",
1023 			 flow_rule);
1024 out:
1025 	kvfree(spec);
1026 	return flow_rule;
1027 }
1028 EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
1029 
1030 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
1031 {
1032 	mlx5_del_flow_rules(rule);
1033 }
1034 
1035 void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule)
1036 {
1037 	if (rule)
1038 		mlx5_del_flow_rules(rule);
1039 }
1040 
1041 struct mlx5_flow_handle *
1042 mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num)
1043 {
1044 	struct mlx5_flow_destination dest = {};
1045 	struct mlx5_flow_act flow_act = {0};
1046 	struct mlx5_flow_handle *flow_rule;
1047 	struct mlx5_flow_spec *spec;
1048 
1049 	spec = kvzalloc_obj(*spec);
1050 	if (!spec)
1051 		return ERR_PTR(-ENOMEM);
1052 
1053 	MLX5_SET(fte_match_param, spec->match_criteria,
1054 		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
1055 	MLX5_SET(fte_match_param, spec->match_criteria,
1056 		 misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
1057 	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1,
1058 		 ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK);
1059 
1060 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
1061 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
1062 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1063 
1064 	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
1065 		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
1066 	dest.vport.num = vport_num;
1067 
1068 	flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
1069 					spec, &flow_act, &dest, 1);
1070 	if (IS_ERR(flow_rule))
1071 		esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %pe\n",
1072 			 vport_num, flow_rule);
1073 
1074 	kvfree(spec);
1075 	return flow_rule;
1076 }
1077 
1078 static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
1079 {
1080 	return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
1081 	       MLX5_FDB_TO_VPORT_REG_C_1;
1082 }
1083 
1084 static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
1085 {
1086 	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
1087 	u32 min[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
1088 	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
1089 	u8 curr, wanted;
1090 	int err;
1091 
1092 	if (!mlx5_eswitch_reg_c1_loopback_supported(esw) &&
1093 	    !mlx5_eswitch_vport_match_metadata_enabled(esw))
1094 		return 0;
1095 
1096 	MLX5_SET(query_esw_vport_context_in, in, opcode,
1097 		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
1098 	err = mlx5_cmd_exec_inout(esw->dev, query_esw_vport_context, in, out);
1099 	if (err)
1100 		return err;
1101 
1102 	curr = MLX5_GET(query_esw_vport_context_out, out,
1103 			esw_vport_context.fdb_to_vport_reg_c_id);
1104 	wanted = MLX5_FDB_TO_VPORT_REG_C_0;
1105 	if (mlx5_eswitch_reg_c1_loopback_supported(esw))
1106 		wanted |= MLX5_FDB_TO_VPORT_REG_C_1;
1107 
1108 	if (enable)
1109 		curr |= wanted;
1110 	else
1111 		curr &= ~wanted;
1112 
1113 	MLX5_SET(modify_esw_vport_context_in, min,
1114 		 esw_vport_context.fdb_to_vport_reg_c_id, curr);
1115 	MLX5_SET(modify_esw_vport_context_in, min,
1116 		 field_select.fdb_to_vport_reg_c_id, 1);
1117 
1118 	err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, min);
1119 	if (!err) {
1120 		if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1))
1121 			esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
1122 		else
1123 			esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
1124 	}
1125 
1126 	return err;
1127 }
1128 
1129 static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
1130 				  struct mlx5_core_dev *peer_dev,
1131 				  struct mlx5_flow_spec *spec,
1132 				  struct mlx5_flow_destination *dest)
1133 {
1134 	void *misc;
1135 
1136 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1137 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1138 				    misc_parameters_2);
1139 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1140 			 mlx5_eswitch_get_vport_metadata_mask());
1141 
1142 		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
1143 	} else {
1144 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1145 				    misc_parameters);
1146 
1147 		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
1148 			 MLX5_CAP_GEN(peer_dev, vhca_id));
1149 
1150 		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
1151 
1152 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1153 				    misc_parameters);
1154 		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1155 		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
1156 				 source_eswitch_owner_vhca_id);
1157 	}
1158 
1159 	dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
1160 	dest->vport.num = peer_dev->priv.eswitch->manager_vport;
1161 	dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
1162 	dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
1163 }
1164 
1165 static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
1166 					       struct mlx5_eswitch *peer_esw,
1167 					       struct mlx5_flow_spec *spec,
1168 					       u16 vport)
1169 {
1170 	void *misc;
1171 
1172 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1173 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1174 				    misc_parameters_2);
1175 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1176 			 mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
1177 								   vport));
1178 	} else {
1179 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1180 				    misc_parameters);
1181 		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
1182 	}
1183 }
1184 
1185 static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
1186 				       struct mlx5_core_dev *peer_dev)
1187 {
1188 	struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch;
1189 	struct mlx5_flow_destination dest = {};
1190 	struct mlx5_flow_act flow_act = {0};
1191 	struct mlx5_flow_handle **flows;
1192 	struct mlx5_flow_handle *flow;
1193 	struct mlx5_vport *peer_vport;
1194 	struct mlx5_flow_spec *spec;
1195 	int err;
1196 	unsigned long i;
1197 	void *misc;
1198 
1199 	if (!MLX5_VPORT_MANAGER(peer_dev) &&
1200 	    !mlx5_core_is_ecpf_esw_manager(peer_dev))
1201 		return 0;
1202 
1203 	spec = kvzalloc_obj(*spec);
1204 	if (!spec)
1205 		return -ENOMEM;
1206 
1207 	peer_miss_rules_setup(esw, peer_dev, spec, &dest);
1208 
1209 	flows = kvzalloc_objs(*flows, peer_esw->total_vports);
1210 	if (!flows) {
1211 		err = -ENOMEM;
1212 		goto alloc_flows_err;
1213 	}
1214 
1215 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1216 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1217 			    misc_parameters);
1218 
1219 	if (mlx5_core_is_ecpf_esw_manager(peer_dev) &&
1220 	    mlx5_esw_host_functions_enabled(peer_dev)) {
1221 		peer_vport = mlx5_eswitch_get_vport(peer_esw,
1222 						    MLX5_VPORT_HOST_PF);
1223 		esw_set_peer_miss_rule_source_port(esw, peer_esw, spec,
1224 						   MLX5_VPORT_HOST_PF);
1225 
1226 		flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
1227 					   spec, &flow_act, &dest, 1);
1228 		if (IS_ERR(flow)) {
1229 			err = PTR_ERR(flow);
1230 			goto add_pf_flow_err;
1231 		}
1232 		flows[peer_vport->index] = flow;
1233 	}
1234 
1235 	mlx5_esw_for_each_spf_vport(peer_esw, i, peer_vport) {
1236 		esw_set_peer_miss_rule_source_port(esw, peer_esw, spec,
1237 						   peer_vport->vport);
1238 
1239 		flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
1240 					   spec, &flow_act, &dest, 1);
1241 		if (IS_ERR(flow)) {
1242 			err = PTR_ERR(flow);
1243 			goto add_ecpf_flow_err;
1244 		}
1245 		flows[peer_vport->index] = flow;
1246 	}
1247 
1248 	if (mlx5_ecpf_vport_exists(peer_dev)) {
1249 		peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
1250 		MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
1251 		flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
1252 					   spec, &flow_act, &dest, 1);
1253 		if (IS_ERR(flow)) {
1254 			err = PTR_ERR(flow);
1255 			goto add_ecpf_flow_err;
1256 		}
1257 		flows[peer_vport->index] = flow;
1258 	}
1259 
1260 	mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
1261 				   mlx5_core_max_vfs(peer_dev)) {
1262 		esw_set_peer_miss_rule_source_port(esw, peer_esw, spec,
1263 						   peer_vport->vport);
1264 		flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
1265 					   spec, &flow_act, &dest, 1);
1266 		if (IS_ERR(flow)) {
1267 			err = PTR_ERR(flow);
1268 			goto add_vf_flow_err;
1269 		}
1270 		flows[peer_vport->index] = flow;
1271 	}
1272 
1273 	if (mlx5_core_ec_sriov_enabled(peer_dev)) {
1274 		mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
1275 					      mlx5_core_max_ec_vfs(peer_dev)) {
1276 			esw_set_peer_miss_rule_source_port(esw, peer_esw,
1277 							   spec,
1278 							   peer_vport->vport);
1279 			flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
1280 						   spec, &flow_act, &dest, 1);
1281 			if (IS_ERR(flow)) {
1282 				err = PTR_ERR(flow);
1283 				goto add_ec_vf_flow_err;
1284 			}
1285 			flows[peer_vport->index] = flow;
1286 		}
1287 	}
1288 
1289 	err = xa_insert(&esw->fdb_table.offloads.peer_miss_rules,
1290 			MLX5_CAP_GEN(peer_dev, vhca_id), flows, GFP_KERNEL);
1291 	if (err)
1292 		goto add_ec_vf_flow_err;
1293 
1294 	kvfree(spec);
1295 	return 0;
1296 
1297 add_ec_vf_flow_err:
1298 	mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
1299 				      mlx5_core_max_ec_vfs(peer_dev)) {
1300 		if (!flows[peer_vport->index])
1301 			continue;
1302 		mlx5_del_flow_rules(flows[peer_vport->index]);
1303 	}
1304 add_vf_flow_err:
1305 	mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
1306 				   mlx5_core_max_vfs(peer_dev)) {
1307 		if (!flows[peer_vport->index])
1308 			continue;
1309 		mlx5_del_flow_rules(flows[peer_vport->index]);
1310 	}
1311 	if (mlx5_ecpf_vport_exists(peer_dev)) {
1312 		peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
1313 		mlx5_del_flow_rules(flows[peer_vport->index]);
1314 	}
1315 add_ecpf_flow_err:
1316 	mlx5_esw_for_each_spf_vport(peer_esw, i, peer_vport) {
1317 		if (!flows[peer_vport->index])
1318 			continue;
1319 		mlx5_del_flow_rules(flows[peer_vport->index]);
1320 	}
1321 	if (mlx5_core_is_ecpf_esw_manager(peer_dev) &&
1322 	    mlx5_esw_host_functions_enabled(peer_dev)) {
1323 		peer_vport = mlx5_eswitch_get_vport(peer_esw,
1324 						    MLX5_VPORT_HOST_PF);
1325 		mlx5_del_flow_rules(flows[peer_vport->index]);
1326 	}
1327 add_pf_flow_err:
1328 	esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
1329 	kvfree(flows);
1330 alloc_flows_err:
1331 	kvfree(spec);
1332 	return err;
1333 }
1334 
1335 static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
1336 					struct mlx5_core_dev *peer_dev)
1337 {
1338 	struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch;
1339 	u16 peer_vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
1340 	struct mlx5_flow_handle **flows;
1341 	struct mlx5_vport *peer_vport;
1342 	unsigned long i;
1343 
1344 	flows = xa_erase(&esw->fdb_table.offloads.peer_miss_rules,
1345 			 peer_vhca_id);
1346 	if (!flows)
1347 		return;
1348 
1349 	if (mlx5_core_ec_sriov_enabled(peer_dev)) {
1350 		mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
1351 					      mlx5_core_max_ec_vfs(peer_dev))
1352 			mlx5_del_flow_rules(flows[peer_vport->index]);
1353 	}
1354 
1355 	mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
1356 				   mlx5_core_max_vfs(peer_dev))
1357 		mlx5_del_flow_rules(flows[peer_vport->index]);
1358 
1359 	if (mlx5_ecpf_vport_exists(peer_dev)) {
1360 		peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
1361 		mlx5_del_flow_rules(flows[peer_vport->index]);
1362 	}
1363 
1364 	mlx5_esw_for_each_spf_vport(peer_esw, i, peer_vport)
1365 		mlx5_del_flow_rules(flows[peer_vport->index]);
1366 
1367 	if (mlx5_core_is_ecpf_esw_manager(peer_dev) &&
1368 	    mlx5_esw_host_functions_enabled(peer_dev)) {
1369 		peer_vport = mlx5_eswitch_get_vport(peer_esw,
1370 						    MLX5_VPORT_HOST_PF);
1371 		mlx5_del_flow_rules(flows[peer_vport->index]);
1372 	}
1373 
1374 	kvfree(flows);
1375 }
1376 
1377 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
1378 {
1379 	struct mlx5_flow_act flow_act = {0};
1380 	struct mlx5_flow_destination dest = {};
1381 	struct mlx5_flow_handle *flow_rule = NULL;
1382 	struct mlx5_flow_spec *spec;
1383 	void *headers_c;
1384 	void *headers_v;
1385 	int err = 0;
1386 	u8 *dmac_c;
1387 	u8 *dmac_v;
1388 
1389 	spec = kvzalloc_obj(*spec);
1390 	if (!spec) {
1391 		err = -ENOMEM;
1392 		goto out;
1393 	}
1394 
1395 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1396 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1397 				 outer_headers);
1398 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
1399 			      outer_headers.dmac_47_16);
1400 	dmac_c[0] = 0x01;
1401 
1402 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
1403 	dest.vport.num = esw->manager_vport;
1404 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1405 
1406 	flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
1407 					spec, &flow_act, &dest, 1);
1408 	if (IS_ERR(flow_rule)) {
1409 		err = PTR_ERR(flow_rule);
1410 		esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
1411 		goto out;
1412 	}
1413 
1414 	esw->fdb_table.offloads.miss_rule_uni = flow_rule;
1415 
1416 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1417 				 outer_headers);
1418 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
1419 			      outer_headers.dmac_47_16);
1420 	dmac_v[0] = 0x01;
1421 	flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
1422 					spec, &flow_act, &dest, 1);
1423 	if (IS_ERR(flow_rule)) {
1424 		err = PTR_ERR(flow_rule);
1425 		esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
1426 		mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
1427 		goto out;
1428 	}
1429 
1430 	esw->fdb_table.offloads.miss_rule_multi = flow_rule;
1431 
1432 out:
1433 	kvfree(spec);
1434 	return err;
1435 }
1436 
1437 struct mlx5_flow_handle *
1438 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
1439 {
1440 	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
1441 	struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore;
1442 	struct mlx5_flow_context *flow_context;
1443 	struct mlx5_flow_handle *flow_rule;
1444 	struct mlx5_flow_destination dest;
1445 	struct mlx5_flow_spec *spec;
1446 	void *misc;
1447 
1448 	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
1449 		return ERR_PTR(-EOPNOTSUPP);
1450 
1451 	spec = kvzalloc_obj(*spec);
1452 	if (!spec)
1453 		return ERR_PTR(-ENOMEM);
1454 
1455 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1456 			    misc_parameters_2);
1457 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1458 		 ESW_REG_C0_USER_DATA_METADATA_MASK);
1459 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1460 			    misc_parameters_2);
1461 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag);
1462 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
1463 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1464 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1465 	flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id;
1466 
1467 	flow_context = &spec->flow_context;
1468 	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1469 	flow_context->flow_tag = tag;
1470 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1471 	dest.ft = esw->offloads.ft_offloads;
1472 
1473 	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1474 	kvfree(spec);
1475 
1476 	if (IS_ERR(flow_rule))
1477 		esw_warn(esw->dev,
1478 			 "Failed to create restore rule for tag: %d, err(%d)\n",
1479 			 tag, (int)PTR_ERR(flow_rule));
1480 
1481 	return flow_rule;
1482 }
1483 
1484 struct mlx5_flow_group *
1485 mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw,
1486 			     struct mlx5_flow_table *ft)
1487 {
1488 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1489 	struct mlx5_flow_group *fg;
1490 	void *match_criteria;
1491 	void *flow_group_in;
1492 
1493 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
1494 		return ERR_PTR(-EOPNOTSUPP);
1495 
1496 	if (IS_ERR(ft))
1497 		return ERR_CAST(ft);
1498 
1499 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1500 	if (!flow_group_in)
1501 		return ERR_PTR(-ENOMEM);
1502 
1503 	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1504 				      match_criteria);
1505 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1506 		 MLX5_MATCH_MISC_PARAMETERS_2);
1507 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1508 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
1509 		 ft->max_fte - 1);
1510 
1511 	MLX5_SET(fte_match_param, match_criteria,
1512 		 misc_parameters_2.metadata_reg_c_0,
1513 		 mlx5_eswitch_get_vport_metadata_mask());
1514 
1515 	fg = mlx5_create_flow_group(ft, flow_group_in);
1516 	kvfree(flow_group_in);
1517 	if (IS_ERR(fg))
1518 		esw_warn(esw->dev, "Can't create LAG demux flow group\n");
1519 
1520 	return fg;
1521 }
1522 
1523 struct mlx5_flow_handle *
1524 mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num,
1525 			       struct mlx5_flow_table *lag_ft)
1526 {
1527 	struct mlx5_flow_spec *spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1528 	struct mlx5_flow_destination dest = {};
1529 	struct mlx5_flow_act flow_act = {};
1530 	struct mlx5_flow_handle *ret;
1531 	void *misc;
1532 
1533 	if (!spec)
1534 		return ERR_PTR(-ENOMEM);
1535 
1536 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1537 		kvfree(spec);
1538 		return ERR_PTR(-EOPNOTSUPP);
1539 	}
1540 
1541 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1542 			    misc_parameters_2);
1543 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1544 		 mlx5_eswitch_get_vport_metadata_mask());
1545 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
1546 
1547 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1548 			    misc_parameters_2);
1549 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1550 		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
1551 
1552 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1553 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VHCA_RX;
1554 	dest.vhca.id = MLX5_CAP_GEN(esw->dev, vhca_id);
1555 
1556 	ret = mlx5_add_flow_rules(lag_ft, spec, &flow_act, &dest, 1);
1557 	kvfree(spec);
1558 	return ret;
1559 }
1560 
1561 #define MAX_PF_SQ 256
1562 #define MAX_SQ_NVPORTS 32
1563 
1564 void
1565 mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
1566 				    u32 *flow_group_in,
1567 				    int match_params)
1568 {
1569 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
1570 					    flow_group_in,
1571 					    match_criteria);
1572 
1573 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1574 		MLX5_SET(create_flow_group_in, flow_group_in,
1575 			 match_criteria_enable,
1576 			 MLX5_MATCH_MISC_PARAMETERS_2 | match_params);
1577 
1578 		MLX5_SET(fte_match_param, match_criteria,
1579 			 misc_parameters_2.metadata_reg_c_0,
1580 			 mlx5_eswitch_get_vport_metadata_mask());
1581 	} else {
1582 		MLX5_SET(create_flow_group_in, flow_group_in,
1583 			 match_criteria_enable,
1584 			 MLX5_MATCH_MISC_PARAMETERS | match_params);
1585 
1586 		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1587 				 misc_parameters.source_port);
1588 	}
1589 }
1590 
1591 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
1592 static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
1593 {
1594 	struct mlx5_vport_tbl_attr attr;
1595 	struct mlx5_vport *vport;
1596 	unsigned long i;
1597 
1598 	attr.chain = 0;
1599 	attr.prio = 1;
1600 	mlx5_esw_for_each_vport(esw, i, vport) {
1601 		attr.vport = vport->vport;
1602 		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
1603 		mlx5_esw_vporttbl_put(esw, &attr);
1604 	}
1605 }
1606 
1607 static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
1608 {
1609 	struct mlx5_vport_tbl_attr attr;
1610 	struct mlx5_flow_table *fdb;
1611 	struct mlx5_vport *vport;
1612 	unsigned long i;
1613 
1614 	attr.chain = 0;
1615 	attr.prio = 1;
1616 	mlx5_esw_for_each_vport(esw, i, vport) {
1617 		attr.vport = vport->vport;
1618 		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
1619 		fdb = mlx5_esw_vporttbl_get(esw, &attr);
1620 		if (IS_ERR(fdb))
1621 			goto out;
1622 	}
1623 	return 0;
1624 
1625 out:
1626 	esw_vport_tbl_put(esw);
1627 	return PTR_ERR(fdb);
1628 }
1629 
1630 #define fdb_modify_header_fwd_to_table_supported(esw) \
1631 	(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
1632 static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
1633 {
1634 	struct mlx5_core_dev *dev = esw->dev;
1635 
1636 	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level))
1637 		*flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
1638 
1639 	if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) &&
1640 	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
1641 		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
1642 		esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
1643 	} else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1644 		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
1645 		esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
1646 	} else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
1647 		/* Disabled when ttl workaround is needed, e.g
1648 		 * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
1649 		 */
1650 		esw_warn(dev,
1651 			 "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
1652 		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
1653 	} else {
1654 		*flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED;
1655 		esw_info(dev, "Supported tc chains and prios offload\n");
1656 	}
1657 
1658 	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
1659 		*flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED;
1660 }
1661 
1662 static int
1663 esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
1664 {
1665 	struct mlx5_core_dev *dev = esw->dev;
1666 	struct mlx5_flow_table *nf_ft, *ft;
1667 	struct mlx5_chains_attr attr = {};
1668 	struct mlx5_fs_chains *chains;
1669 	int err;
1670 
1671 	esw_init_chains_offload_flags(esw, &attr.flags);
1672 	attr.ns = MLX5_FLOW_NAMESPACE_FDB;
1673 	attr.max_grp_num = esw->params.large_group_num;
1674 	attr.default_ft = miss_fdb;
1675 	attr.mapping = esw->offloads.reg_c0_obj_pool;
1676 	attr.fs_base_prio = FDB_BYPASS_PATH;
1677 
1678 	chains = mlx5_chains_create(dev, &attr);
1679 	if (IS_ERR(chains)) {
1680 		err = PTR_ERR(chains);
1681 		esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
1682 		return err;
1683 	}
1684 	mlx5_chains_print_info(chains);
1685 
1686 	esw->fdb_table.offloads.esw_chains_priv = chains;
1687 
1688 	/* Create tc_end_ft which is the always created ft chain */
1689 	nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains),
1690 				      1, 0);
1691 	if (IS_ERR(nf_ft)) {
1692 		err = PTR_ERR(nf_ft);
1693 		goto nf_ft_err;
1694 	}
1695 
1696 	/* Always open the root for fast path */
1697 	ft = mlx5_chains_get_table(chains, 0, 1, 0);
1698 	if (IS_ERR(ft)) {
1699 		err = PTR_ERR(ft);
1700 		goto level_0_err;
1701 	}
1702 
1703 	/* Open level 1 for split fdb rules now if prios isn't supported  */
1704 	if (!mlx5_chains_prios_supported(chains)) {
1705 		err = esw_vport_tbl_get(esw);
1706 		if (err)
1707 			goto level_1_err;
1708 	}
1709 
1710 	mlx5_chains_set_end_ft(chains, nf_ft);
1711 
1712 	return 0;
1713 
1714 level_1_err:
1715 	mlx5_chains_put_table(chains, 0, 1, 0);
1716 level_0_err:
1717 	mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
1718 nf_ft_err:
1719 	mlx5_chains_destroy(chains);
1720 	esw->fdb_table.offloads.esw_chains_priv = NULL;
1721 
1722 	return err;
1723 }
1724 
1725 static void
1726 esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
1727 {
1728 	if (!mlx5_chains_prios_supported(chains))
1729 		esw_vport_tbl_put(esw);
1730 	mlx5_chains_put_table(chains, 0, 1, 0);
1731 	mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
1732 	mlx5_chains_destroy(chains);
1733 }
1734 
1735 #else /* CONFIG_MLX5_CLS_ACT */
1736 
1737 static int
1738 esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
1739 { return 0; }
1740 
1741 static void
1742 esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
1743 {}
1744 
1745 #endif
1746 
1747 static int
1748 esw_create_send_to_vport_group(struct mlx5_eswitch *esw,
1749 			       struct mlx5_flow_table *fdb,
1750 			       u32 *flow_group_in,
1751 			       int *ix)
1752 {
1753 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1754 	struct mlx5_flow_group *g;
1755 	void *match_criteria;
1756 	int count, err = 0;
1757 
1758 	memset(flow_group_in, 0, inlen);
1759 
1760 	mlx5_esw_set_flow_group_source_port(esw, flow_group_in, MLX5_MATCH_MISC_PARAMETERS);
1761 
1762 	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
1763 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
1764 
1765 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
1766 	    MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
1767 		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1768 				 misc_parameters.source_eswitch_owner_vhca_id);
1769 		MLX5_SET(create_flow_group_in, flow_group_in,
1770 			 source_eswitch_owner_vhca_id_valid, 1);
1771 	}
1772 
1773 	/* See comment at table_size calculation */
1774 	count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
1775 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1776 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1);
1777 	*ix += count;
1778 
1779 	g = mlx5_create_flow_group(fdb, flow_group_in);
1780 	if (IS_ERR(g)) {
1781 		err = PTR_ERR(g);
1782 		esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err);
1783 		goto out;
1784 	}
1785 	esw->fdb_table.offloads.send_to_vport_grp = g;
1786 
1787 out:
1788 	return err;
1789 }
1790 
1791 static int
1792 esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw,
1793 				    struct mlx5_flow_table *fdb,
1794 				    u32 *flow_group_in,
1795 				    int *ix)
1796 {
1797 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1798 	struct mlx5_flow_group *g;
1799 	void *match_criteria;
1800 	int err = 0;
1801 
1802 	if (!esw_src_port_rewrite_supported(esw))
1803 		return 0;
1804 
1805 	memset(flow_group_in, 0, inlen);
1806 
1807 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1808 		 MLX5_MATCH_MISC_PARAMETERS_2);
1809 
1810 	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
1811 
1812 	MLX5_SET(fte_match_param, match_criteria,
1813 		 misc_parameters_2.metadata_reg_c_0,
1814 		 mlx5_eswitch_get_vport_metadata_mask());
1815 	MLX5_SET(fte_match_param, match_criteria,
1816 		 misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
1817 
1818 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
1819 	MLX5_SET(create_flow_group_in, flow_group_in,
1820 		 end_flow_index, *ix + esw->total_vports - 1);
1821 	*ix += esw->total_vports;
1822 
1823 	g = mlx5_create_flow_group(fdb, flow_group_in);
1824 	if (IS_ERR(g)) {
1825 		err = PTR_ERR(g);
1826 		esw_warn(esw->dev,
1827 			 "Failed to create send-to-vport meta flow group err(%d)\n", err);
1828 		goto send_vport_meta_err;
1829 	}
1830 	esw->fdb_table.offloads.send_to_vport_meta_grp = g;
1831 
1832 	return 0;
1833 
1834 send_vport_meta_err:
1835 	return err;
1836 }
1837 
1838 static int
1839 esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
1840 			       struct mlx5_flow_table *fdb,
1841 			       u32 *flow_group_in,
1842 			       int *ix)
1843 {
1844 	int max_peer_ports = (esw->total_vports - 1) * (MLX5_MAX_PORTS - 1);
1845 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1846 	struct mlx5_flow_group *g;
1847 	void *match_criteria;
1848 	int err = 0;
1849 
1850 	if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
1851 		return 0;
1852 
1853 	memset(flow_group_in, 0, inlen);
1854 
1855 	mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0);
1856 
1857 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1858 		match_criteria = MLX5_ADDR_OF(create_flow_group_in,
1859 					      flow_group_in,
1860 					      match_criteria);
1861 
1862 		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1863 				 misc_parameters.source_eswitch_owner_vhca_id);
1864 
1865 		MLX5_SET(create_flow_group_in, flow_group_in,
1866 			 source_eswitch_owner_vhca_id_valid, 1);
1867 	}
1868 
1869 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
1870 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
1871 		 *ix + max_peer_ports);
1872 	*ix += max_peer_ports + 1;
1873 
1874 	g = mlx5_create_flow_group(fdb, flow_group_in);
1875 	if (IS_ERR(g)) {
1876 		err = PTR_ERR(g);
1877 		esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err);
1878 		goto out;
1879 	}
1880 	esw->fdb_table.offloads.peer_miss_grp = g;
1881 
1882 out:
1883 	return err;
1884 }
1885 
1886 static int
1887 esw_create_miss_group(struct mlx5_eswitch *esw,
1888 		      struct mlx5_flow_table *fdb,
1889 		      u32 *flow_group_in,
1890 		      int *ix)
1891 {
1892 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1893 	struct mlx5_flow_group *g;
1894 	void *match_criteria;
1895 	int err = 0;
1896 	u8 *dmac;
1897 
1898 	memset(flow_group_in, 0, inlen);
1899 
1900 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1901 		 MLX5_MATCH_OUTER_HEADERS);
1902 	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1903 				      match_criteria);
1904 	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
1905 			    outer_headers.dmac_47_16);
1906 	dmac[0] = 0x01;
1907 
1908 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
1909 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
1910 		 *ix + MLX5_ESW_MISS_FLOWS);
1911 
1912 	g = mlx5_create_flow_group(fdb, flow_group_in);
1913 	if (IS_ERR(g)) {
1914 		err = PTR_ERR(g);
1915 		esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err);
1916 		goto miss_err;
1917 	}
1918 	esw->fdb_table.offloads.miss_grp = g;
1919 
1920 	err = esw_add_fdb_miss_rule(esw);
1921 	if (err)
1922 		goto miss_rule_err;
1923 
1924 	return 0;
1925 
1926 miss_rule_err:
1927 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
1928 miss_err:
1929 	return err;
1930 }
1931 
1932 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
1933 {
1934 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1935 	struct mlx5_flow_table_attr ft_attr = {};
1936 	struct mlx5_core_dev *dev = esw->dev;
1937 	struct mlx5_flow_namespace *root_ns;
1938 	struct mlx5_flow_table *fdb = NULL;
1939 	int table_size, ix = 0, err = 0;
1940 	u32 flags = 0, *flow_group_in;
1941 
1942 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
1943 
1944 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1945 	if (!flow_group_in)
1946 		return -ENOMEM;
1947 
1948 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
1949 	if (!root_ns) {
1950 		esw_warn(dev, "Failed to get FDB flow namespace\n");
1951 		err = -EOPNOTSUPP;
1952 		goto ns_err;
1953 	}
1954 	esw->fdb_table.offloads.ns = root_ns;
1955 	err = mlx5_flow_namespace_set_mode(root_ns,
1956 					   esw->dev->priv.steering->mode);
1957 	if (err) {
1958 		esw_warn(dev, "Failed to set FDB namespace steering mode\n");
1959 		goto ns_err;
1960 	}
1961 
1962 	/* To be strictly correct:
1963 	 *	MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
1964 	 * should be:
1965 	 *	esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
1966 	 *	peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
1967 	 * but as the peer device might not be in switchdev mode it's not
1968 	 * possible. We use the fact that by default FW sets max vfs and max sfs
1969 	 * to the same value on both devices. If it needs to be changed in the future note
1970 	 * the peer miss group should also be created based on the number of
1971 	 * total vports of the peer (currently is also uses esw->total_vports).
1972 	 */
1973 	table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
1974 		     esw->total_vports * MLX5_MAX_PORTS + MLX5_ESW_MISS_FLOWS;
1975 
1976 	/* create the slow path fdb with encap set, so further table instances
1977 	 * can be created at run time while VFs are probed if the FW allows that.
1978 	 */
1979 	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
1980 		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
1981 			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
1982 
1983 	ft_attr.flags = flags;
1984 	ft_attr.max_fte = table_size;
1985 	ft_attr.prio = FDB_SLOW_PATH;
1986 
1987 	fdb = mlx5_create_flow_table(root_ns, &ft_attr);
1988 	if (IS_ERR(fdb)) {
1989 		err = PTR_ERR(fdb);
1990 		esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
1991 		goto slow_fdb_err;
1992 	}
1993 	esw->fdb_table.offloads.slow_fdb = fdb;
1994 
1995 	/* Create empty TC-miss managed table. This allows plugging in following
1996 	 * priorities without directly exposing their level 0 table to
1997 	 * eswitch_offloads and passing it as miss_fdb to following call to
1998 	 * esw_chains_create().
1999 	 */
2000 	memset(&ft_attr, 0, sizeof(ft_attr));
2001 	ft_attr.prio = FDB_TC_MISS;
2002 	esw->fdb_table.offloads.tc_miss_table = mlx5_create_flow_table(root_ns, &ft_attr);
2003 	if (IS_ERR(esw->fdb_table.offloads.tc_miss_table)) {
2004 		err = PTR_ERR(esw->fdb_table.offloads.tc_miss_table);
2005 		esw_warn(dev, "Failed to create TC miss FDB Table err %d\n", err);
2006 		goto tc_miss_table_err;
2007 	}
2008 
2009 	err = esw_chains_create(esw, esw->fdb_table.offloads.tc_miss_table);
2010 	if (err) {
2011 		esw_warn(dev, "Failed to open fdb chains err(%d)\n", err);
2012 		goto fdb_chains_err;
2013 	}
2014 
2015 	err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix);
2016 	if (err)
2017 		goto send_vport_err;
2018 
2019 	err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix);
2020 	if (err)
2021 		goto send_vport_meta_err;
2022 
2023 	err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix);
2024 	if (err)
2025 		goto peer_miss_err;
2026 
2027 	err = esw_create_miss_group(esw, fdb, flow_group_in, &ix);
2028 	if (err)
2029 		goto miss_err;
2030 
2031 	kvfree(flow_group_in);
2032 	return 0;
2033 
2034 miss_err:
2035 	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
2036 		mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
2037 peer_miss_err:
2038 	if (esw->fdb_table.offloads.send_to_vport_meta_grp)
2039 		mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
2040 send_vport_meta_err:
2041 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
2042 send_vport_err:
2043 	esw_chains_destroy(esw, esw_chains(esw));
2044 fdb_chains_err:
2045 	mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
2046 tc_miss_table_err:
2047 	mlx5_destroy_flow_table(mlx5_eswitch_get_slow_fdb(esw));
2048 slow_fdb_err:
2049 	/* Holds true only as long as DMFS is the default */
2050 	mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS);
2051 ns_err:
2052 	kvfree(flow_group_in);
2053 	return err;
2054 }
2055 
2056 static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
2057 {
2058 	if (!mlx5_eswitch_get_slow_fdb(esw))
2059 		return;
2060 
2061 	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
2062 	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
2063 	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
2064 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
2065 	if (esw->fdb_table.offloads.send_to_vport_meta_grp)
2066 		mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
2067 	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
2068 		mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
2069 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
2070 
2071 	esw_chains_destroy(esw, esw_chains(esw));
2072 
2073 	mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
2074 	mlx5_destroy_flow_table(mlx5_eswitch_get_slow_fdb(esw));
2075 	/* Holds true only as long as DMFS is the default */
2076 	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
2077 				     MLX5_FLOW_STEERING_MODE_DMFS);
2078 }
2079 
2080 static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw)
2081 {
2082 	int nvports;
2083 
2084 	nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
2085 	if (mlx5e_tc_int_port_supported(esw))
2086 		nvports += MLX5E_TC_MAX_INT_PORT_NUM;
2087 
2088 	return nvports;
2089 }
2090 
2091 static int esw_create_offloads_table(struct mlx5_eswitch *esw)
2092 {
2093 	struct mlx5_flow_table_attr ft_attr = {};
2094 	struct mlx5_core_dev *dev = esw->dev;
2095 	struct mlx5_flow_table *ft_offloads;
2096 	struct mlx5_flow_namespace *ns;
2097 	int err = 0;
2098 
2099 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
2100 	if (!ns) {
2101 		esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
2102 		return -EOPNOTSUPP;
2103 	}
2104 
2105 	ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) +
2106 			  MLX5_ESW_FT_OFFLOADS_DROP_RULE;
2107 	ft_attr.prio = 1;
2108 
2109 	ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
2110 	if (IS_ERR(ft_offloads)) {
2111 		err = PTR_ERR(ft_offloads);
2112 		esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
2113 		return err;
2114 	}
2115 
2116 	esw->offloads.ft_offloads = ft_offloads;
2117 	return 0;
2118 }
2119 
2120 static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
2121 {
2122 	struct mlx5_esw_offload *offloads = &esw->offloads;
2123 
2124 	mlx5_destroy_flow_table(offloads->ft_offloads);
2125 }
2126 
2127 static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
2128 {
2129 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2130 	struct mlx5_flow_group *g;
2131 	u32 *flow_group_in;
2132 	int nvports;
2133 	int err = 0;
2134 
2135 	nvports = esw_get_nr_ft_offloads_steering_src_ports(esw);
2136 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2137 	if (!flow_group_in)
2138 		return -ENOMEM;
2139 
2140 	mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0);
2141 
2142 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
2143 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
2144 
2145 	g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
2146 
2147 	if (IS_ERR(g)) {
2148 		err = PTR_ERR(g);
2149 		esw_warn(esw->dev, "Failed to create vport rx group err %d\n",
2150 			 err);
2151 		goto out;
2152 	}
2153 
2154 	esw->offloads.vport_rx_group = g;
2155 out:
2156 	kvfree(flow_group_in);
2157 	return err;
2158 }
2159 
2160 static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
2161 {
2162 	mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
2163 }
2164 
2165 static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw)
2166 {
2167 	/* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
2168 	 * for the drop rule, which is placed at the end of the table.
2169 	 * So return the total of vport and int_port as rule index.
2170 	 */
2171 	return esw_get_nr_ft_offloads_steering_src_ports(esw);
2172 }
2173 
2174 static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw)
2175 {
2176 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2177 	struct mlx5_flow_group *g;
2178 	u32 *flow_group_in;
2179 	int flow_index;
2180 	int err = 0;
2181 
2182 	flow_index = esw_create_vport_rx_drop_rule_index(esw);
2183 
2184 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2185 	if (!flow_group_in)
2186 		return -ENOMEM;
2187 
2188 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
2189 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
2190 
2191 	g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
2192 
2193 	if (IS_ERR(g)) {
2194 		err = PTR_ERR(g);
2195 		esw_warn(esw->dev,
2196 			 "Failed to create vport rx drop group err %d\n", err);
2197 		goto out;
2198 	}
2199 
2200 	esw->offloads.vport_rx_drop_group = g;
2201 out:
2202 	kvfree(flow_group_in);
2203 	return err;
2204 }
2205 
2206 static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw)
2207 {
2208 	if (esw->offloads.vport_rx_drop_group)
2209 		mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group);
2210 }
2211 
2212 void
2213 mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw,
2214 			      u16 vport,
2215 			      struct mlx5_flow_spec *spec)
2216 {
2217 	void *misc;
2218 
2219 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
2220 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
2221 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
2222 			 mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
2223 
2224 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
2225 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
2226 			 mlx5_eswitch_get_vport_metadata_mask());
2227 
2228 		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
2229 	} else {
2230 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
2231 		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
2232 
2233 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
2234 		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
2235 
2236 		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
2237 	}
2238 }
2239 
2240 struct mlx5_flow_handle *
2241 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
2242 				  struct mlx5_flow_destination *dest)
2243 {
2244 	struct mlx5_flow_act flow_act = {0};
2245 	struct mlx5_flow_handle *flow_rule;
2246 	struct mlx5_flow_spec *spec;
2247 
2248 	spec = kvzalloc_obj(*spec);
2249 	if (!spec) {
2250 		flow_rule = ERR_PTR(-ENOMEM);
2251 		goto out;
2252 	}
2253 
2254 	mlx5_esw_set_spec_source_port(esw, vport, spec);
2255 
2256 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2257 	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
2258 					&flow_act, dest, 1);
2259 	if (IS_ERR(flow_rule)) {
2260 		esw_warn(esw->dev,
2261 			 "fs offloads: Failed to add vport rx rule err %pe\n",
2262 			 flow_rule);
2263 		goto out;
2264 	}
2265 
2266 out:
2267 	kvfree(spec);
2268 	return flow_rule;
2269 }
2270 
2271 static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw)
2272 {
2273 	struct mlx5_flow_act flow_act = {};
2274 	struct mlx5_flow_handle *flow_rule;
2275 
2276 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2277 	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL,
2278 					&flow_act, NULL, 0);
2279 	if (IS_ERR(flow_rule)) {
2280 		esw_warn(esw->dev,
2281 			 "fs offloads: Failed to add vport rx drop rule err %pe\n",
2282 			 flow_rule);
2283 		return PTR_ERR(flow_rule);
2284 	}
2285 
2286 	esw->offloads.vport_rx_drop_rule = flow_rule;
2287 
2288 	return 0;
2289 }
2290 
2291 static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw)
2292 {
2293 	if (esw->offloads.vport_rx_drop_rule)
2294 		mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule);
2295 }
2296 
2297 static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
2298 {
2299 	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
2300 	struct mlx5_core_dev *dev = esw->dev;
2301 	struct mlx5_vport *vport;
2302 	unsigned long i;
2303 
2304 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
2305 		return -EOPNOTSUPP;
2306 
2307 	if (!mlx5_esw_is_fdb_created(esw))
2308 		return -EOPNOTSUPP;
2309 
2310 	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
2311 	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
2312 		mlx5_mode = MLX5_INLINE_MODE_NONE;
2313 		goto out;
2314 	case MLX5_CAP_INLINE_MODE_L2:
2315 		mlx5_mode = MLX5_INLINE_MODE_L2;
2316 		goto out;
2317 	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
2318 		goto query_vports;
2319 	}
2320 
2321 query_vports:
2322 	mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
2323 	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
2324 		mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode);
2325 		if (prev_mlx5_mode != mlx5_mode)
2326 			return -EINVAL;
2327 		prev_mlx5_mode = mlx5_mode;
2328 	}
2329 
2330 out:
2331 	*mode = mlx5_mode;
2332 	return 0;
2333 }
2334 
2335 static void esw_destroy_restore_table(struct mlx5_eswitch *esw)
2336 {
2337 	struct mlx5_esw_offload *offloads = &esw->offloads;
2338 
2339 	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
2340 		return;
2341 
2342 	mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id);
2343 	mlx5_destroy_flow_group(offloads->restore_group);
2344 	mlx5_destroy_flow_table(offloads->ft_offloads_restore);
2345 }
2346 
2347 static int esw_create_restore_table(struct mlx5_eswitch *esw)
2348 {
2349 	u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
2350 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2351 	struct mlx5_flow_table_attr ft_attr = {};
2352 	struct mlx5_core_dev *dev = esw->dev;
2353 	struct mlx5_flow_namespace *ns;
2354 	struct mlx5_modify_hdr *mod_hdr;
2355 	void *match_criteria, *misc;
2356 	struct mlx5_flow_table *ft;
2357 	struct mlx5_flow_group *g;
2358 	u32 *flow_group_in;
2359 	int err = 0;
2360 
2361 	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
2362 		return 0;
2363 
2364 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
2365 	if (!ns) {
2366 		esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
2367 		return -EOPNOTSUPP;
2368 	}
2369 
2370 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2371 	if (!flow_group_in) {
2372 		err = -ENOMEM;
2373 		goto out_free;
2374 	}
2375 
2376 	ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS;
2377 	ft = mlx5_create_flow_table(ns, &ft_attr);
2378 	if (IS_ERR(ft)) {
2379 		err = PTR_ERR(ft);
2380 		esw_warn(esw->dev, "Failed to create restore table, err %d\n",
2381 			 err);
2382 		goto out_free;
2383 	}
2384 
2385 	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
2386 				      match_criteria);
2387 	misc = MLX5_ADDR_OF(fte_match_param, match_criteria,
2388 			    misc_parameters_2);
2389 
2390 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
2391 		 ESW_REG_C0_USER_DATA_METADATA_MASK);
2392 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
2393 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
2394 		 ft_attr.max_fte - 1);
2395 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
2396 		 MLX5_MATCH_MISC_PARAMETERS_2);
2397 	g = mlx5_create_flow_group(ft, flow_group_in);
2398 	if (IS_ERR(g)) {
2399 		err = PTR_ERR(g);
2400 		esw_warn(dev, "Failed to create restore flow group, err: %d\n",
2401 			 err);
2402 		goto err_group;
2403 	}
2404 
2405 	MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY);
2406 	MLX5_SET(copy_action_in, modact, src_field,
2407 		 MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
2408 	MLX5_SET(copy_action_in, modact, dst_field,
2409 		 MLX5_ACTION_IN_FIELD_METADATA_REG_B);
2410 	mod_hdr = mlx5_modify_header_alloc(esw->dev,
2411 					   MLX5_FLOW_NAMESPACE_KERNEL, 1,
2412 					   modact);
2413 	if (IS_ERR(mod_hdr)) {
2414 		err = PTR_ERR(mod_hdr);
2415 		esw_warn(dev, "Failed to create restore mod header, err: %d\n",
2416 			 err);
2417 		goto err_mod_hdr;
2418 	}
2419 
2420 	esw->offloads.ft_offloads_restore = ft;
2421 	esw->offloads.restore_group = g;
2422 	esw->offloads.restore_copy_hdr_id = mod_hdr;
2423 
2424 	kvfree(flow_group_in);
2425 
2426 	return 0;
2427 
2428 err_mod_hdr:
2429 	mlx5_destroy_flow_group(g);
2430 err_group:
2431 	mlx5_destroy_flow_table(ft);
2432 out_free:
2433 	kvfree(flow_group_in);
2434 
2435 	return err;
2436 }
2437 
2438 static void mlx5_esw_assert_reps_locked(struct mlx5_eswitch *esw)
2439 {
2440 	lockdep_assert_held(&esw->offloads.reps_lock);
2441 }
2442 
2443 void mlx5_esw_reps_block(struct mlx5_eswitch *esw)
2444 {
2445 	mutex_lock(&esw->offloads.reps_lock);
2446 }
2447 
2448 static void mlx5_esw_reps_block_nested(struct mlx5_eswitch *esw)
2449 {
2450 	mutex_lock_nested(&esw->offloads.reps_lock, SINGLE_DEPTH_NESTING);
2451 }
2452 
2453 void mlx5_esw_reps_unblock(struct mlx5_eswitch *esw)
2454 {
2455 	mutex_unlock(&esw->offloads.reps_lock);
2456 }
2457 
2458 static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode)
2459 {
2460 	mlx5_esw_reps_unblock(esw);
2461 	mlx5_devcom_comp_lock(esw->dev->priv.hca_devcom_comp);
2462 	if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV ||
2463 	    mlx5_core_mp_enabled(esw->dev)) {
2464 		esw->mode = mode;
2465 		goto out;
2466 	}
2467 
2468 	esw->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
2469 	mlx5_rescan_drivers_locked(esw->dev);
2470 	esw->mode = mode;
2471 	esw->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
2472 out:
2473 	mlx5_rescan_drivers_locked(esw->dev);
2474 	mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp);
2475 	mlx5_esw_reps_block(esw);
2476 }
2477 
2478 static void mlx5_esw_fdb_drop_destroy(struct mlx5_eswitch *esw)
2479 {
2480 	if (!esw->fdb_table.offloads.drop_root)
2481 		return;
2482 
2483 	esw_debug(esw->dev, "Destroying FDB drop root table %#x fc %#x\n",
2484 		  esw->fdb_table.offloads.drop_root->id,
2485 		  esw->fdb_table.offloads.drop_root_fc->id);
2486 	mlx5_del_flow_rules(esw->fdb_table.offloads.drop_root_rule);
2487 	/* Don't free flow counter here, can be reused on a later activation */
2488 	mlx5_destroy_flow_table(esw->fdb_table.offloads.drop_root);
2489 	esw->fdb_table.offloads.drop_root_rule = NULL;
2490 	esw->fdb_table.offloads.drop_root = NULL;
2491 }
2492 
2493 static int mlx5_esw_fdb_drop_create(struct mlx5_eswitch *esw)
2494 {
2495 	struct mlx5_flow_destination drop_fc_dst = {};
2496 	struct mlx5_flow_table_attr ft_attr = {};
2497 	struct mlx5_flow_destination *dst = NULL;
2498 	struct mlx5_core_dev *dev = esw->dev;
2499 	struct mlx5_flow_namespace *root_ns;
2500 	struct mlx5_flow_act flow_act = {};
2501 	struct mlx5_flow_handle *flow_rule;
2502 	struct mlx5_flow_table *table;
2503 	int err = 0, dst_num = 0;
2504 
2505 	if (esw->fdb_table.offloads.drop_root)
2506 		return 0;
2507 
2508 	root_ns = esw->fdb_table.offloads.ns;
2509 
2510 	ft_attr.prio = FDB_DROP_ROOT;
2511 	ft_attr.max_fte = 1;
2512 	ft_attr.autogroup.max_num_groups = 1;
2513 	table = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
2514 	if (IS_ERR(table)) {
2515 		esw_warn(dev, "Failed to create fdb drop root table, err %pe\n",
2516 			 table);
2517 		return PTR_ERR(table);
2518 	}
2519 
2520 	/* Drop FC reusable, create once on first deactivation of FDB */
2521 	if (!esw->fdb_table.offloads.drop_root_fc) {
2522 		struct mlx5_fc *counter = mlx5_fc_create(dev, 0);
2523 
2524 		err = PTR_ERR_OR_ZERO(counter);
2525 		if (err)
2526 			esw_warn(esw->dev, "create fdb drop fc err %d\n", err);
2527 		else
2528 			esw->fdb_table.offloads.drop_root_fc = counter;
2529 	}
2530 
2531 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2532 
2533 	if (esw->fdb_table.offloads.drop_root_fc) {
2534 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2535 		drop_fc_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
2536 		drop_fc_dst.counter = esw->fdb_table.offloads.drop_root_fc;
2537 		dst = &drop_fc_dst;
2538 		dst_num++;
2539 	}
2540 
2541 	flow_rule = mlx5_add_flow_rules(table, NULL, &flow_act, dst, dst_num);
2542 	err = PTR_ERR_OR_ZERO(flow_rule);
2543 	if (err) {
2544 		esw_warn(esw->dev,
2545 			 "fs offloads: Failed to add vport rx drop rule err %d\n",
2546 			 err);
2547 		goto err_flow_rule;
2548 	}
2549 
2550 	esw->fdb_table.offloads.drop_root = table;
2551 	esw->fdb_table.offloads.drop_root_rule = flow_rule;
2552 	esw_debug(esw->dev, "Created FDB drop root table %#x fc %#x\n",
2553 		  table->id, dst ? dst->counter->id : 0);
2554 	return 0;
2555 
2556 err_flow_rule:
2557 	/* no need to free drop fc, esw_offloads_steering_cleanup will do it */
2558 	mlx5_destroy_flow_table(table);
2559 	return err;
2560 }
2561 
2562 static void mlx5_esw_fdb_active(struct mlx5_eswitch *esw)
2563 {
2564 	struct mlx5_vport *vport;
2565 	unsigned long i;
2566 
2567 	mlx5_esw_fdb_drop_destroy(esw);
2568 	mlx5_mpfs_enable(esw->dev);
2569 
2570 	mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) {
2571 		if (!vport->adjacent)
2572 			continue;
2573 		esw_debug(esw->dev, "Connecting vport %d to eswitch\n",
2574 			  vport->vport);
2575 		mlx5_esw_adj_vport_modify(esw->dev, vport->vport, true);
2576 	}
2577 
2578 	esw->offloads_inactive = false;
2579 	esw_warn(esw->dev, "MPFS/FDB active\n");
2580 }
2581 
2582 static void mlx5_esw_fdb_inactive(struct mlx5_eswitch *esw)
2583 {
2584 	struct mlx5_vport *vport;
2585 	unsigned long i;
2586 
2587 	mlx5_mpfs_disable(esw->dev);
2588 	mlx5_esw_fdb_drop_create(esw);
2589 
2590 	mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) {
2591 		if (!vport->adjacent)
2592 			continue;
2593 		esw_debug(esw->dev, "Disconnecting vport %u from eswitch\n",
2594 			  vport->vport);
2595 
2596 		mlx5_esw_adj_vport_modify(esw->dev, vport->vport, false);
2597 	}
2598 
2599 	esw->offloads_inactive = true;
2600 	esw_warn(esw->dev, "MPFS/FDB inactive\n");
2601 }
2602 
2603 static int esw_offloads_start(struct mlx5_eswitch *esw,
2604 			      struct netlink_ext_ack *extack)
2605 {
2606 	int err;
2607 
2608 	esw_mode_change(esw, MLX5_ESWITCH_OFFLOADS);
2609 	err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs);
2610 	if (err) {
2611 		NL_SET_ERR_MSG_MOD(extack,
2612 				   "Failed setting eswitch to offloads");
2613 		esw_mode_change(esw, MLX5_ESWITCH_LEGACY);
2614 		return err;
2615 	}
2616 	if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
2617 		if (mlx5_eswitch_inline_mode_get(esw,
2618 						 &esw->offloads.inline_mode)) {
2619 			esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
2620 			NL_SET_ERR_MSG_MOD(extack,
2621 					   "Inline mode is different between vports");
2622 		}
2623 	}
2624 	return 0;
2625 }
2626 
2627 void mlx5_esw_offloads_rep_remove(struct mlx5_eswitch *esw,
2628 				  const struct mlx5_vport *vport)
2629 {
2630 	struct mlx5_eswitch_rep *rep = xa_load(&esw->offloads.vport_reps,
2631 					       vport->vport);
2632 
2633 	if (!rep)
2634 		return;
2635 	xa_erase(&esw->offloads.vport_reps, vport->vport);
2636 	kfree(rep);
2637 }
2638 
2639 int mlx5_esw_offloads_rep_add(struct mlx5_eswitch *esw,
2640 			      const struct mlx5_vport *vport)
2641 {
2642 	struct mlx5_eswitch_rep *rep;
2643 	int rep_type;
2644 	int err;
2645 
2646 	rep = kzalloc_obj(*rep);
2647 	if (!rep)
2648 		return -ENOMEM;
2649 
2650 	rep->vport = vport->vport;
2651 	rep->vport_index = vport->index;
2652 	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
2653 		if (!esw->offloads.rep_ops[rep_type]) {
2654 			atomic_set(&rep->rep_data[rep_type].state,
2655 				   REP_UNREGISTERED);
2656 			continue;
2657 		}
2658 		/* Dynamic/delegated vports add their representors after
2659 		 * mlx5_eswitch_register_vport_reps, so mark them as registered
2660 		 * for them to be loaded later with the others.
2661 		 */
2662 		rep->esw = esw;
2663 		atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED);
2664 	}
2665 	err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
2666 	if (err)
2667 		goto insert_err;
2668 
2669 	return 0;
2670 
2671 insert_err:
2672 	kfree(rep);
2673 	return err;
2674 }
2675 
2676 static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw,
2677 					  struct mlx5_eswitch_rep *rep)
2678 {
2679 	xa_erase(&esw->offloads.vport_reps, rep->vport);
2680 	kfree(rep);
2681 }
2682 
2683 static void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
2684 {
2685 	struct mlx5_eswitch_rep *rep;
2686 	unsigned long i;
2687 
2688 	mlx5_esw_for_each_rep(esw, i, rep)
2689 		mlx5_esw_offloads_rep_cleanup(esw, rep);
2690 	xa_destroy(&esw->offloads.vport_reps);
2691 	mutex_destroy(&esw->offloads.reps_lock);
2692 }
2693 
2694 static int esw_offloads_init_reps(struct mlx5_eswitch *esw)
2695 {
2696 	struct mlx5_vport *vport;
2697 	unsigned long i;
2698 	int err;
2699 
2700 	xa_init(&esw->offloads.vport_reps);
2701 	mutex_init(&esw->offloads.reps_lock);
2702 
2703 	mlx5_esw_for_each_vport(esw, i, vport) {
2704 		err = mlx5_esw_offloads_rep_add(esw, vport);
2705 		if (err)
2706 			goto err;
2707 	}
2708 	return 0;
2709 
2710 err:
2711 	esw_offloads_cleanup_reps(esw);
2712 	return err;
2713 }
2714 
2715 static int esw_port_metadata_set(struct devlink *devlink, u32 id,
2716 				 struct devlink_param_gset_ctx *ctx,
2717 				 struct netlink_ext_ack *extack)
2718 {
2719 	struct mlx5_core_dev *dev = devlink_priv(devlink);
2720 	struct mlx5_eswitch *esw = dev->priv.eswitch;
2721 	int err = 0;
2722 
2723 	down_write(&esw->mode_lock);
2724 	if (mlx5_esw_is_fdb_created(esw)) {
2725 		err = -EBUSY;
2726 		goto done;
2727 	}
2728 	if (!mlx5_esw_vport_match_metadata_supported(esw)) {
2729 		err = -EOPNOTSUPP;
2730 		goto done;
2731 	}
2732 	if (ctx->val.vbool)
2733 		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
2734 	else
2735 		esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
2736 done:
2737 	up_write(&esw->mode_lock);
2738 	return err;
2739 }
2740 
2741 static int esw_port_metadata_get(struct devlink *devlink, u32 id,
2742 				 struct devlink_param_gset_ctx *ctx,
2743 				 struct netlink_ext_ack *extack)
2744 {
2745 	struct mlx5_core_dev *dev = devlink_priv(devlink);
2746 
2747 	ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch);
2748 	return 0;
2749 }
2750 
2751 static int esw_port_metadata_validate(struct devlink *devlink, u32 id,
2752 				      union devlink_param_value *val,
2753 				      struct netlink_ext_ack *extack)
2754 {
2755 	struct mlx5_core_dev *dev = devlink_priv(devlink);
2756 	u8 esw_mode;
2757 
2758 	esw_mode = mlx5_eswitch_mode(dev);
2759 	if (esw_mode == MLX5_ESWITCH_OFFLOADS) {
2760 		NL_SET_ERR_MSG_MOD(extack,
2761 				   "E-Switch must either disabled or non switchdev mode");
2762 		return -EBUSY;
2763 	}
2764 	return 0;
2765 }
2766 
2767 static const struct devlink_param esw_devlink_params[] = {
2768 	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
2769 			     "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL,
2770 			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
2771 			     esw_port_metadata_get,
2772 			     esw_port_metadata_set,
2773 			     esw_port_metadata_validate),
2774 };
2775 
2776 int esw_offloads_init(struct mlx5_eswitch *esw)
2777 {
2778 	int err;
2779 
2780 	err = esw_offloads_init_reps(esw);
2781 	if (err)
2782 		return err;
2783 
2784 	if (MLX5_ESWITCH_MANAGER(esw->dev) &&
2785 	    mlx5_esw_vport_match_metadata_supported(esw))
2786 		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
2787 
2788 	err = devl_params_register(priv_to_devlink(esw->dev),
2789 				   esw_devlink_params,
2790 				   ARRAY_SIZE(esw_devlink_params));
2791 	if (err)
2792 		goto err_params;
2793 
2794 	return 0;
2795 
2796 err_params:
2797 	esw_offloads_cleanup_reps(esw);
2798 	return err;
2799 }
2800 
2801 void esw_offloads_cleanup(struct mlx5_eswitch *esw)
2802 {
2803 	devl_params_unregister(priv_to_devlink(esw->dev),
2804 			       esw_devlink_params,
2805 			       ARRAY_SIZE(esw_devlink_params));
2806 	esw_offloads_cleanup_reps(esw);
2807 }
2808 
2809 static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
2810 				   struct mlx5_eswitch_rep *rep,
2811 				   u8 rep_type, bool *newly_loaded)
2812 {
2813 	int err;
2814 
2815 	mlx5_esw_assert_reps_locked(esw);
2816 
2817 	if (newly_loaded)
2818 		*newly_loaded = false;
2819 
2820 	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
2821 			   REP_REGISTERED, REP_LOADED) != REP_REGISTERED)
2822 		return 0;
2823 
2824 	err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
2825 	if (err) {
2826 		atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED);
2827 		return err;
2828 	}
2829 
2830 	if (newly_loaded)
2831 		*newly_loaded = true;
2832 
2833 	return 0;
2834 }
2835 
2836 static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
2837 				      struct mlx5_eswitch_rep *rep, u8 rep_type)
2838 {
2839 	mlx5_esw_assert_reps_locked(esw);
2840 
2841 	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
2842 			   REP_LOADED, REP_REGISTERED) == REP_LOADED) {
2843 		if (rep_type == REP_ETH)
2844 			__esw_offloads_unload_rep(esw, rep, REP_IB);
2845 		esw->offloads.rep_ops[rep_type]->unload(rep);
2846 	}
2847 }
2848 
2849 static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
2850 {
2851 	struct mlx5_eswitch_rep *rep;
2852 	unsigned long i;
2853 
2854 	mlx5_esw_for_each_rep(esw, i, rep)
2855 		__esw_offloads_unload_rep(esw, rep, rep_type);
2856 }
2857 
2858 static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
2859 {
2860 	struct mlx5_eswitch_rep *rep;
2861 	unsigned long loaded = 0;
2862 	bool newly_loaded;
2863 	int rep_type;
2864 	int err;
2865 
2866 	if (vport_num != MLX5_VPORT_UPLINK &&
2867 	    mlx5_get_sd(esw->dev) && !mlx5_lag_is_active(esw->dev))
2868 		return 0;
2869 
2870 	rep = mlx5_eswitch_get_rep(esw, vport_num);
2871 	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
2872 		err = __esw_offloads_load_rep(esw, rep, rep_type,
2873 					      &newly_loaded);
2874 		if (err)
2875 			goto err_reps;
2876 		if (newly_loaded)
2877 			loaded |= BIT(rep_type);
2878 	}
2879 
2880 	return 0;
2881 
2882 err_reps:
2883 	while (--rep_type >= 0)
2884 		if (test_bit(rep_type, &loaded))
2885 			__esw_offloads_unload_rep(esw, rep, rep_type);
2886 	return err;
2887 }
2888 
2889 static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
2890 {
2891 	struct mlx5_eswitch_rep *rep;
2892 	int rep_type;
2893 
2894 	rep = mlx5_eswitch_get_rep(esw, vport_num);
2895 	for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--)
2896 		__esw_offloads_unload_rep(esw, rep, rep_type);
2897 }
2898 
2899 int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
2900 {
2901 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
2902 		return 0;
2903 
2904 	return mlx5_esw_offloads_pf_vf_devlink_port_init(esw, vport);
2905 }
2906 
2907 void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
2908 {
2909 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
2910 		return;
2911 
2912 	mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport);
2913 }
2914 
2915 int mlx5_esw_offloads_init_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
2916 				  struct mlx5_devlink_port *dl_port,
2917 				  u32 controller, u32 sfnum)
2918 {
2919 	return mlx5_esw_offloads_sf_devlink_port_init(esw, vport, dl_port, controller, sfnum);
2920 }
2921 
2922 void mlx5_esw_offloads_cleanup_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
2923 {
2924 	mlx5_esw_offloads_sf_devlink_port_cleanup(esw, vport);
2925 }
2926 
2927 int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
2928 {
2929 	int err;
2930 
2931 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
2932 		return 0;
2933 
2934 	err = mlx5_esw_offloads_devlink_port_register(esw, vport);
2935 	if (err)
2936 		return err;
2937 
2938 	err = mlx5_esw_offloads_rep_load(esw, vport->vport);
2939 	if (err)
2940 		goto load_err;
2941 	return err;
2942 
2943 load_err:
2944 	mlx5_esw_offloads_devlink_port_unregister(vport);
2945 	return err;
2946 }
2947 
2948 void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
2949 {
2950 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
2951 		return;
2952 
2953 	mlx5_esw_offloads_rep_unload(esw, vport->vport);
2954 
2955 	mlx5_esw_offloads_devlink_port_unregister(vport);
2956 }
2957 
2958 static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
2959 				  struct mlx5_core_dev *slave)
2960 {
2961 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
2962 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
2963 	struct mlx5_flow_root_namespace *root;
2964 	struct mlx5_flow_namespace *ns;
2965 	int err;
2966 
2967 	MLX5_SET(set_flow_table_root_in, in, opcode,
2968 		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
2969 	MLX5_SET(set_flow_table_root_in, in, table_type,
2970 		 FS_FT_FDB);
2971 
2972 	if (master) {
2973 		ns = mlx5_get_flow_namespace(master,
2974 					     MLX5_FLOW_NAMESPACE_FDB);
2975 		root = find_root(&ns->node);
2976 		mutex_lock(&root->chain_lock);
2977 		MLX5_SET(set_flow_table_root_in, in,
2978 			 table_eswitch_owner_vhca_id_valid, 1);
2979 		MLX5_SET(set_flow_table_root_in, in,
2980 			 table_eswitch_owner_vhca_id,
2981 			 MLX5_CAP_GEN(master, vhca_id));
2982 		MLX5_SET(set_flow_table_root_in, in, table_id,
2983 			 root->root_ft->id);
2984 	} else {
2985 		ns = mlx5_get_flow_namespace(slave,
2986 					     MLX5_FLOW_NAMESPACE_FDB);
2987 		root = find_root(&ns->node);
2988 		mutex_lock(&root->chain_lock);
2989 		MLX5_SET(set_flow_table_root_in, in, table_id,
2990 			 root->root_ft->id);
2991 	}
2992 
2993 	err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
2994 	mutex_unlock(&root->chain_lock);
2995 
2996 	return err;
2997 }
2998 
2999 static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
3000 					struct mlx5_core_dev *slave,
3001 					struct mlx5_vport *vport,
3002 					struct mlx5_flow_table *acl)
3003 {
3004 	u16 slave_index = MLX5_CAP_GEN(slave, vhca_id);
3005 	struct mlx5_flow_handle *flow_rule = NULL;
3006 	struct mlx5_flow_destination dest = {};
3007 	struct mlx5_flow_act flow_act = {};
3008 	struct mlx5_flow_spec *spec;
3009 	int err = 0;
3010 	void *misc;
3011 
3012 	spec = kvzalloc_obj(*spec);
3013 	if (!spec)
3014 		return -ENOMEM;
3015 
3016 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
3017 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
3018 			    misc_parameters);
3019 	MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
3020 	MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, slave_index);
3021 
3022 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
3023 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
3024 	MLX5_SET_TO_ONES(fte_match_set_misc, misc,
3025 			 source_eswitch_owner_vhca_id);
3026 
3027 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3028 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
3029 	dest.vport.num = slave->priv.eswitch->manager_vport;
3030 	dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
3031 	dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
3032 
3033 	flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
3034 					&dest, 1);
3035 	if (IS_ERR(flow_rule)) {
3036 		err = PTR_ERR(flow_rule);
3037 	} else {
3038 		err = xa_insert(&vport->egress.offloads.bounce_rules,
3039 				slave_index, flow_rule, GFP_KERNEL);
3040 		if (err)
3041 			mlx5_del_flow_rules(flow_rule);
3042 	}
3043 
3044 	kvfree(spec);
3045 	return err;
3046 }
3047 
3048 static int esw_slave_egress_create_resources(struct mlx5_eswitch *esw,
3049 					     struct mlx5_vport *vport)
3050 {
3051 	struct mlx5_flow_table_attr ft_attr = {
3052 		.max_fte = 1, .prio = 0, .level = 0,
3053 	};
3054 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
3055 	struct mlx5_flow_namespace *ns;
3056 	struct mlx5_flow_table *acl;
3057 	struct mlx5_flow_group *g;
3058 	u32 *flow_group_in;
3059 	int err = 0;
3060 
3061 	if (vport->egress.acl)
3062 		return 0;
3063 
3064 	xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC);
3065 	ns = mlx5_get_flow_vport_namespace(esw->dev,
3066 					   MLX5_FLOW_NAMESPACE_ESW_EGRESS,
3067 					   vport->index);
3068 	if (!ns)
3069 		return -EINVAL;
3070 
3071 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
3072 	if (!flow_group_in)
3073 		return -ENOMEM;
3074 
3075 	if (vport->vport || mlx5_core_is_ecpf(esw->dev))
3076 		ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT;
3077 
3078 	acl = mlx5_create_vport_flow_table(ns, &ft_attr, vport->vport);
3079 	if (IS_ERR(acl)) {
3080 		err = PTR_ERR(acl);
3081 		goto out;
3082 	}
3083 
3084 	g = mlx5_create_flow_group(acl, flow_group_in);
3085 	if (IS_ERR(g)) {
3086 		err = PTR_ERR(g);
3087 		goto err_table;
3088 	}
3089 
3090 	vport->egress.acl = acl;
3091 	vport->egress.offloads.bounce_grp = g;
3092 	vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB;
3093 	err = 0;
3094 
3095 err_table:
3096 	if (err && !IS_ERR_OR_NULL(acl)) {
3097 		mlx5_destroy_flow_table(acl);
3098 		vport->egress.acl = NULL;
3099 	}
3100 out:
3101 	kvfree(flow_group_in);
3102 	return err;
3103 }
3104 
3105 static void esw_slave_egress_destroy_resources(struct mlx5_vport *vport)
3106 {
3107 	if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
3108 		mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
3109 		vport->egress.offloads.bounce_grp = NULL;
3110 	}
3111 	if (!IS_ERR_OR_NULL(vport->egress.acl)) {
3112 		esw_acl_egress_ofld_cleanup(vport);
3113 		xa_destroy(&vport->egress.offloads.bounce_rules);
3114 	}
3115 }
3116 
3117 static int esw_set_slave_egress_rule(struct mlx5_core_dev *master,
3118 				     struct mlx5_core_dev *slave)
3119 {
3120 	struct mlx5_eswitch *slave_esw = slave->priv.eswitch;
3121 	u16 master_vhca = MLX5_CAP_GEN(master, vhca_id);
3122 	struct mlx5_flow_destination dest = {};
3123 	struct mlx5_flow_handle *bounce_rule;
3124 	struct mlx5_flow_act flow_act = {};
3125 	struct mlx5_vport *slave_vport;
3126 	int err;
3127 
3128 	slave_vport = mlx5_eswitch_get_vport(slave_esw,
3129 					     slave_esw->manager_vport);
3130 	if (IS_ERR(slave_vport))
3131 		return PTR_ERR(slave_vport);
3132 
3133 	err = esw_slave_egress_create_resources(slave_esw, slave_vport);
3134 	if (err)
3135 		return err;
3136 
3137 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3138 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
3139 	dest.vport.num = master->priv.eswitch->manager_vport;
3140 	dest.vport.vhca_id = master_vhca;
3141 	dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
3142 
3143 	bounce_rule = mlx5_add_flow_rules(slave_vport->egress.acl, NULL,
3144 					  &flow_act, &dest, 1);
3145 	if (IS_ERR(bounce_rule)) {
3146 		err = PTR_ERR(bounce_rule);
3147 		goto err_rule;
3148 	}
3149 	err = xa_insert(&slave_vport->egress.offloads.bounce_rules,
3150 			master_vhca, bounce_rule, GFP_KERNEL);
3151 	if (err)
3152 		goto err_insert;
3153 
3154 	return 0;
3155 err_insert:
3156 	mlx5_del_flow_rules(bounce_rule);
3157 err_rule:
3158 	esw_slave_egress_destroy_resources(slave_vport);
3159 	return err;
3160 }
3161 
3162 static void esw_unset_slave_egress_rule(struct mlx5_core_dev *master,
3163 					struct mlx5_core_dev *slave)
3164 {
3165 	struct mlx5_eswitch *slave_esw = slave->priv.eswitch;
3166 	u16 master_vhca = MLX5_CAP_GEN(master, vhca_id);
3167 	struct mlx5_vport *slave_vport;
3168 
3169 	slave_vport = mlx5_eswitch_get_vport(slave_esw,
3170 					     slave_esw->manager_vport);
3171 	if (IS_ERR(slave_vport))
3172 		return;
3173 
3174 	esw_acl_egress_ofld_bounce_rule_destroy(slave_vport, master_vhca);
3175 	esw_slave_egress_destroy_resources(slave_vport);
3176 }
3177 
3178 static int esw_master_egress_create_resources(struct mlx5_eswitch *esw,
3179 					      struct mlx5_flow_namespace *egress_ns,
3180 					      struct mlx5_vport *vport, size_t count)
3181 {
3182 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
3183 	struct mlx5_flow_table_attr ft_attr = {
3184 		.max_fte = count, .prio = 0, .level = 0,
3185 	};
3186 	struct mlx5_flow_table *acl;
3187 	struct mlx5_flow_group *g;
3188 	void *match_criteria;
3189 	u32 *flow_group_in;
3190 	int err;
3191 
3192 	if (vport->egress.acl)
3193 		return 0;
3194 
3195 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
3196 	if (!flow_group_in)
3197 		return -ENOMEM;
3198 
3199 	if (vport->vport || mlx5_core_is_ecpf(esw->dev))
3200 		ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT;
3201 
3202 	acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
3203 	if (IS_ERR(acl)) {
3204 		err = PTR_ERR(acl);
3205 		goto out;
3206 	}
3207 
3208 	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
3209 				      match_criteria);
3210 	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
3211 			 misc_parameters.source_port);
3212 	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
3213 			 misc_parameters.source_eswitch_owner_vhca_id);
3214 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
3215 		 MLX5_MATCH_MISC_PARAMETERS);
3216 
3217 	MLX5_SET(create_flow_group_in, flow_group_in,
3218 		 source_eswitch_owner_vhca_id_valid, 1);
3219 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
3220 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, count);
3221 
3222 	g = mlx5_create_flow_group(acl, flow_group_in);
3223 	if (IS_ERR(g)) {
3224 		err = PTR_ERR(g);
3225 		goto err_group;
3226 	}
3227 
3228 	vport->egress.acl = acl;
3229 	vport->egress.offloads.bounce_grp = g;
3230 	vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB;
3231 	xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC);
3232 
3233 	kvfree(flow_group_in);
3234 
3235 	return 0;
3236 
3237 err_group:
3238 	mlx5_destroy_flow_table(acl);
3239 out:
3240 	kvfree(flow_group_in);
3241 	return err;
3242 }
3243 
3244 static void esw_master_egress_destroy_resources(struct mlx5_vport *vport)
3245 {
3246 	if (!xa_empty(&vport->egress.offloads.bounce_rules))
3247 		return;
3248 	mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
3249 	vport->egress.offloads.bounce_grp = NULL;
3250 	mlx5_destroy_flow_table(vport->egress.acl);
3251 	vport->egress.acl = NULL;
3252 }
3253 
3254 static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
3255 				      struct mlx5_core_dev *slave, size_t count)
3256 {
3257 	struct mlx5_eswitch *esw = master->priv.eswitch;
3258 	u16 slave_index = MLX5_CAP_GEN(slave, vhca_id);
3259 	struct mlx5_flow_namespace *egress_ns;
3260 	struct mlx5_vport *vport;
3261 	int err;
3262 
3263 	vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
3264 	if (IS_ERR(vport))
3265 		return PTR_ERR(vport);
3266 
3267 	egress_ns = mlx5_get_flow_vport_namespace(master,
3268 						  MLX5_FLOW_NAMESPACE_ESW_EGRESS,
3269 						  vport->index);
3270 	if (!egress_ns)
3271 		return -EINVAL;
3272 
3273 	if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB)
3274 		return 0;
3275 
3276 	err = esw_master_egress_create_resources(esw, egress_ns, vport, count);
3277 	if (err)
3278 		return err;
3279 
3280 	if (xa_load(&vport->egress.offloads.bounce_rules, slave_index))
3281 		return -EINVAL;
3282 
3283 	err = __esw_set_master_egress_rule(master, slave, vport, vport->egress.acl);
3284 	if (err)
3285 		goto err_rule;
3286 
3287 	return 0;
3288 
3289 err_rule:
3290 	esw_master_egress_destroy_resources(vport);
3291 	return err;
3292 }
3293 
3294 static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev,
3295 					 struct mlx5_core_dev *slave_dev)
3296 {
3297 	struct mlx5_vport *vport;
3298 
3299 	vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
3300 				       dev->priv.eswitch->manager_vport);
3301 
3302 	if (!vport->egress.acl)
3303 		return;
3304 
3305 	esw_acl_egress_ofld_bounce_rule_destroy(vport, MLX5_CAP_GEN(slave_dev, vhca_id));
3306 
3307 	if (xa_empty(&vport->egress.offloads.bounce_rules)) {
3308 		esw_acl_egress_ofld_cleanup(vport);
3309 		xa_destroy(&vport->egress.offloads.bounce_rules);
3310 	}
3311 }
3312 
3313 int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
3314 					     struct mlx5_eswitch *slave_esw, int max_slaves)
3315 {
3316 	int err;
3317 
3318 	err = esw_set_slave_root_fdb(master_esw->dev,
3319 				     slave_esw->dev);
3320 	if (err)
3321 		return err;
3322 
3323 	if (!mlx5_sd_is_primary(slave_esw->dev))
3324 		return 0;
3325 
3326 	err = esw_set_master_egress_rule(master_esw->dev,
3327 					 slave_esw->dev, max_slaves);
3328 	if (err)
3329 		goto err_acl;
3330 
3331 	return err;
3332 
3333 err_acl:
3334 	esw_set_slave_root_fdb(NULL, slave_esw->dev);
3335 	return err;
3336 }
3337 
3338 void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
3339 					      struct mlx5_eswitch *slave_esw)
3340 {
3341 	esw_set_slave_root_fdb(NULL, slave_esw->dev);
3342 	esw_unset_master_egress_rule(master_esw->dev, slave_esw->dev);
3343 }
3344 
3345 int mlx5_eswitch_offloads_vport_lag_add_one(struct mlx5_eswitch *master_esw,
3346 					    struct mlx5_eswitch *slave_esw)
3347 {
3348 	return esw_set_slave_egress_rule(master_esw->dev, slave_esw->dev);
3349 }
3350 
3351 void mlx5_eswitch_offloads_vport_lag_del_one(struct mlx5_eswitch *master_esw,
3352 					     struct mlx5_eswitch *slave_esw)
3353 {
3354 	esw_unset_slave_egress_rule(master_esw->dev, slave_esw->dev);
3355 }
3356 
3357 #define ESW_OFFLOADS_DEVCOM_PAIR	(0)
3358 #define ESW_OFFLOADS_DEVCOM_UNPAIR	(1)
3359 
3360 static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw,
3361 					       struct mlx5_eswitch *peer_esw)
3362 {
3363 	const struct mlx5_eswitch_rep_ops *ops;
3364 	struct mlx5_eswitch_rep *rep;
3365 	unsigned long i;
3366 	u8 rep_type;
3367 
3368 	mlx5_esw_for_each_rep(esw, i, rep) {
3369 		rep_type = NUM_REP_TYPES;
3370 		while (rep_type--) {
3371 			ops = esw->offloads.rep_ops[rep_type];
3372 			if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
3373 			    ops->event)
3374 				ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, peer_esw);
3375 		}
3376 	}
3377 }
3378 
3379 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw,
3380 				     struct mlx5_eswitch *peer_esw)
3381 {
3382 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
3383 	mlx5e_tc_clean_fdb_peer_flows(esw);
3384 #endif
3385 	mlx5_esw_offloads_rep_event_unpair(esw, peer_esw);
3386 	esw_del_fdb_peer_miss_rules(esw, peer_esw->dev);
3387 }
3388 
3389 static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
3390 				  struct mlx5_eswitch *peer_esw)
3391 {
3392 	const struct mlx5_eswitch_rep_ops *ops;
3393 	struct mlx5_eswitch_rep *rep;
3394 	unsigned long i;
3395 	u8 rep_type;
3396 	int err;
3397 
3398 	err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
3399 	if (err)
3400 		return err;
3401 
3402 	mlx5_esw_for_each_rep(esw, i, rep) {
3403 		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
3404 			ops = esw->offloads.rep_ops[rep_type];
3405 			if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
3406 			    ops->event) {
3407 				err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
3408 				if (err)
3409 					goto err_out;
3410 			}
3411 		}
3412 	}
3413 
3414 	return 0;
3415 
3416 err_out:
3417 	mlx5_esw_offloads_unpair(esw, peer_esw);
3418 	return err;
3419 }
3420 
3421 static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
3422 					 struct mlx5_eswitch *peer_esw,
3423 					 bool pair)
3424 {
3425 	u16 peer_vhca_id = MLX5_CAP_GEN(peer_esw->dev, vhca_id);
3426 	u16 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
3427 	struct mlx5_flow_root_namespace *peer_ns;
3428 	struct mlx5_flow_root_namespace *ns;
3429 	int err;
3430 
3431 	peer_ns = peer_esw->dev->priv.steering->fdb_root_ns;
3432 	ns = esw->dev->priv.steering->fdb_root_ns;
3433 
3434 	if (pair) {
3435 		err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_vhca_id);
3436 		if (err)
3437 			return err;
3438 
3439 		err = mlx5_flow_namespace_set_peer(peer_ns, ns, vhca_id);
3440 		if (err) {
3441 			mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id);
3442 			return err;
3443 		}
3444 	} else {
3445 		mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id);
3446 		mlx5_flow_namespace_set_peer(peer_ns, NULL, vhca_id);
3447 	}
3448 
3449 	return 0;
3450 }
3451 
3452 bool mlx5_eswitch_is_peer(struct mlx5_eswitch *esw,
3453 			  struct mlx5_eswitch *peer_esw)
3454 {
3455 	u16 peer_esw_i;
3456 
3457 	if (!mlx5_esw_allowed(esw) || !mlx5_esw_allowed(peer_esw))
3458 		return false;
3459 
3460 	peer_esw_i = MLX5_CAP_GEN(peer_esw->dev, vhca_id);
3461 	return !!xa_load(&esw->paired, peer_esw_i);
3462 }
3463 
3464 static int mlx5_esw_offloads_devcom_event(int event,
3465 					  void *my_data,
3466 					  void *event_data)
3467 {
3468 	struct mlx5_eswitch *esw = my_data;
3469 	struct mlx5_eswitch *peer_esw = event_data;
3470 	u16 esw_i, peer_esw_i;
3471 	bool esw_paired;
3472 	int err;
3473 
3474 	peer_esw_i = MLX5_CAP_GEN(peer_esw->dev, vhca_id);
3475 	esw_i = MLX5_CAP_GEN(esw->dev, vhca_id);
3476 	esw_paired = !!xa_load(&esw->paired, peer_esw_i);
3477 
3478 	switch (event) {
3479 	case ESW_OFFLOADS_DEVCOM_PAIR:
3480 		if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
3481 		    mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
3482 			break;
3483 
3484 		if (esw_paired)
3485 			break;
3486 
3487 		err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
3488 		if (err)
3489 			goto err_out;
3490 
3491 		err = mlx5_esw_offloads_pair(esw, peer_esw);
3492 		if (err)
3493 			goto err_peer;
3494 
3495 		err = mlx5_esw_offloads_pair(peer_esw, esw);
3496 		if (err)
3497 			goto err_pair;
3498 
3499 		err = xa_insert(&esw->paired, peer_esw_i, peer_esw, GFP_KERNEL);
3500 		if (err)
3501 			goto err_xa;
3502 
3503 		err = xa_insert(&peer_esw->paired, esw_i, esw, GFP_KERNEL);
3504 		if (err)
3505 			goto err_peer_xa;
3506 
3507 		esw->num_peers++;
3508 		peer_esw->num_peers++;
3509 		mlx5_devcom_comp_set_ready(esw->devcom, true);
3510 		break;
3511 
3512 	case ESW_OFFLOADS_DEVCOM_UNPAIR:
3513 		if (!esw_paired)
3514 			break;
3515 
3516 		peer_esw->num_peers--;
3517 		esw->num_peers--;
3518 		if (!esw->num_peers && !peer_esw->num_peers)
3519 			mlx5_devcom_comp_set_ready(esw->devcom, false);
3520 		xa_erase(&peer_esw->paired, esw_i);
3521 		xa_erase(&esw->paired, peer_esw_i);
3522 		mlx5_esw_offloads_unpair(peer_esw, esw);
3523 		mlx5_esw_offloads_unpair(esw, peer_esw);
3524 		mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
3525 		break;
3526 	}
3527 
3528 	return 0;
3529 
3530 err_peer_xa:
3531 	xa_erase(&esw->paired, peer_esw_i);
3532 err_xa:
3533 	mlx5_esw_offloads_unpair(peer_esw, esw);
3534 err_pair:
3535 	mlx5_esw_offloads_unpair(esw, peer_esw);
3536 err_peer:
3537 	mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
3538 err_out:
3539 	mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
3540 		      event, err);
3541 	return err;
3542 }
3543 
3544 void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw,
3545 				   const struct mlx5_devcom_match_attr *attr)
3546 {
3547 	int i;
3548 
3549 	for (i = 0; i < MLX5_MAX_PORTS; i++)
3550 		INIT_LIST_HEAD(&esw->offloads.peer_flows[i]);
3551 	mutex_init(&esw->offloads.peer_mutex);
3552 
3553 	if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
3554 		return;
3555 
3556 	if ((MLX5_VPORT_MANAGER(esw->dev) || mlx5_core_is_ecpf_esw_manager(esw->dev)) &&
3557 	    (!mlx5_lag_is_supported(esw->dev) && !mlx5_get_sd(esw->dev)))
3558 		return;
3559 
3560 	xa_init(&esw->paired);
3561 	xa_init(&esw->fdb_table.offloads.peer_miss_rules);
3562 	esw->num_peers = 0;
3563 	esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc,
3564 						     MLX5_DEVCOM_ESW_OFFLOADS,
3565 						     attr,
3566 						     mlx5_esw_offloads_devcom_event,
3567 						     esw);
3568 	if (!esw->devcom)
3569 		return;
3570 
3571 	mlx5_devcom_send_event(esw->devcom,
3572 			       ESW_OFFLOADS_DEVCOM_PAIR,
3573 			       ESW_OFFLOADS_DEVCOM_UNPAIR,
3574 			       esw);
3575 }
3576 
3577 void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
3578 {
3579 	if (!esw->devcom)
3580 		return;
3581 
3582 	mlx5_devcom_send_event(esw->devcom,
3583 			       ESW_OFFLOADS_DEVCOM_UNPAIR,
3584 			       ESW_OFFLOADS_DEVCOM_UNPAIR,
3585 			       esw);
3586 
3587 	mlx5_devcom_unregister_component(esw->devcom);
3588 	xa_destroy(&esw->paired);
3589 	xa_destroy(&esw->fdb_table.offloads.peer_miss_rules);
3590 	esw->devcom = NULL;
3591 }
3592 
3593 bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw)
3594 {
3595 	return mlx5_devcom_comp_is_ready(esw->devcom);
3596 }
3597 
3598 bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
3599 {
3600 	if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
3601 		return false;
3602 
3603 	if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
3604 	      MLX5_FDB_TO_VPORT_REG_C_0))
3605 		return false;
3606 
3607 	return true;
3608 }
3609 
3610 #define MLX5_ESW_METADATA_RSVD_UPLINK 1
3611 
3612 /* Share the same metadata for uplink's. This is fine because:
3613  * (a) In shared FDB mode (LAG) both uplink's are treated the
3614  *     same and tagged with the same metadata.
3615  * (b) In non shared FDB mode, packets from physical port0
3616  *     cannot hit eswitch of PF1 and vice versa.
3617  */
3618 static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw)
3619 {
3620 	return MLX5_ESW_METADATA_RSVD_UPLINK;
3621 }
3622 
3623 u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
3624 {
3625 	u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
3626 	/* Reserve 0xf for internal port offload */
3627 	u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2;
3628 	int pf_num;
3629 	int id;
3630 
3631 	/* Only 4 bits of pf_num */
3632 	pf_num = mlx5_sd_pf_num_get(esw->dev);
3633 	if (pf_num < 0 || pf_num > max_pf_num)
3634 		return 0;
3635 
3636 	/* Metadata is 4 bits of PFNUM and 12 bits of unique id */
3637 	/* Use only non-zero vport_id (2-4095) for all PF's */
3638 	id = ida_alloc_range(&esw->offloads.vport_metadata_ida,
3639 			     MLX5_ESW_METADATA_RSVD_UPLINK + 1,
3640 			     vport_end_ida, GFP_KERNEL);
3641 	if (id < 0)
3642 		return 0;
3643 	id = (pf_num << ESW_VPORT_BITS) | id;
3644 	return id;
3645 }
3646 
3647 void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
3648 {
3649 	u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1;
3650 
3651 	/* Metadata contains only 12 bits of actual ida id */
3652 	ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask);
3653 }
3654 
3655 static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
3656 					     struct mlx5_vport *vport)
3657 {
3658 	if (vport->vport == MLX5_VPORT_UPLINK)
3659 		vport->default_metadata = mlx5_esw_match_metadata_reserved(esw);
3660 	else
3661 		vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
3662 
3663 	vport->metadata = vport->default_metadata;
3664 	return vport->metadata ? 0 : -ENOSPC;
3665 }
3666 
3667 static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
3668 						struct mlx5_vport *vport)
3669 {
3670 	if (!vport->default_metadata)
3671 		return;
3672 
3673 	if (vport->vport == MLX5_VPORT_UPLINK)
3674 		return;
3675 
3676 	WARN_ON(vport->metadata != vport->default_metadata);
3677 	mlx5_esw_match_metadata_free(esw, vport->default_metadata);
3678 	vport->default_metadata = 0;
3679 }
3680 
3681 static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
3682 {
3683 	struct mlx5_vport *vport;
3684 	unsigned long i;
3685 
3686 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
3687 		return;
3688 
3689 	mlx5_esw_for_each_vport(esw, i, vport)
3690 		esw_offloads_vport_metadata_cleanup(esw, vport);
3691 }
3692 
3693 static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
3694 {
3695 	struct mlx5_vport *vport;
3696 	unsigned long i;
3697 	int err;
3698 
3699 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
3700 		return 0;
3701 
3702 	mlx5_esw_for_each_vport(esw, i, vport) {
3703 		err = esw_offloads_vport_metadata_setup(esw, vport);
3704 		if (err)
3705 			goto metadata_err;
3706 	}
3707 
3708 	return 0;
3709 
3710 metadata_err:
3711 	esw_offloads_metadata_uninit(esw);
3712 	return err;
3713 }
3714 
3715 /* Deferred metadata init for SD devices: allocate vport metadata and
3716  * refresh the ingress ACL for every vport whose ACL was created with
3717  * metadata=0 in esw_create_offloads_acl_tables() / esw_vport_setup().
3718  *
3719  * No Rep is loaded at this point ==> no Rep net-dev exists, so no need
3720  * to take rtnl lock.
3721  *
3722  * Safe to call multiple times - subsequent calls are no-ops.
3723  */
3724 int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw)
3725 {
3726 	struct mlx5_vport *manager, *vport;
3727 	unsigned long i;
3728 	int err;
3729 
3730 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
3731 		return 0;
3732 
3733 	manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
3734 	if (IS_ERR(manager))
3735 		return PTR_ERR(manager);
3736 
3737 	/* Sanity check: skip if metadata was already initialized */
3738 	if (manager->default_metadata)
3739 		return 0;
3740 
3741 	err = esw_offloads_metadata_init(esw);
3742 	if (err)
3743 		return err;
3744 
3745 	mutex_lock(&esw->state_lock);
3746 	/* Manager vport doesn't have a rep/netdev loaded but its ingress ACL
3747 	 * was programmed with metadata=0 - refresh it explicitly.
3748 	 */
3749 	err = mlx5_esw_acl_ingress_vport_metadata_update(esw,
3750 							 esw->manager_vport,
3751 							 0);
3752 	if (err)
3753 		goto err_acl;
3754 
3755 	/* UPLINK is never marked enabled but its ACL is programmed in
3756 	 * esw_create_offloads_acl_tables(); refresh it explicitly.
3757 	 */
3758 	err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK,
3759 							 0);
3760 	if (err)
3761 		goto err_acl;
3762 
3763 	mlx5_esw_for_each_vport(esw, i, vport) {
3764 		if (!vport || !vport->enabled)
3765 			continue;
3766 		err = mlx5_esw_acl_ingress_vport_metadata_update(esw,
3767 								 vport->vport,
3768 								 0);
3769 		if (err)
3770 			goto err_acl;
3771 	}
3772 
3773 	mutex_unlock(&esw->state_lock);
3774 	return 0;
3775 
3776 err_acl:
3777 	esw_offloads_metadata_uninit(esw);
3778 	mutex_unlock(&esw->state_lock);
3779 	return err;
3780 }
3781 
3782 int
3783 esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
3784 				     struct mlx5_vport *vport)
3785 {
3786 	int err;
3787 
3788 	err = esw_acl_ingress_ofld_setup(esw, vport);
3789 	if (err)
3790 		return err;
3791 
3792 	err = esw_acl_egress_ofld_setup(esw, vport);
3793 	if (err)
3794 		goto egress_err;
3795 
3796 	return 0;
3797 
3798 egress_err:
3799 	esw_acl_ingress_ofld_cleanup(esw, vport);
3800 	return err;
3801 }
3802 
3803 void
3804 esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
3805 				      struct mlx5_vport *vport)
3806 {
3807 	esw_acl_egress_ofld_cleanup(vport);
3808 	esw_acl_ingress_ofld_cleanup(esw, vport);
3809 }
3810 
3811 static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
3812 {
3813 	struct mlx5_vport *uplink, *manager;
3814 	int ret;
3815 
3816 	uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
3817 	if (IS_ERR(uplink))
3818 		return PTR_ERR(uplink);
3819 
3820 	ret = esw_vport_create_offloads_acl_tables(esw, uplink);
3821 	if (ret)
3822 		return ret;
3823 
3824 	manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
3825 	if (IS_ERR(manager)) {
3826 		ret = PTR_ERR(manager);
3827 		goto err_manager;
3828 	}
3829 
3830 	ret = esw_vport_create_offloads_acl_tables(esw, manager);
3831 	if (ret)
3832 		goto err_manager;
3833 
3834 	return 0;
3835 
3836 err_manager:
3837 	esw_vport_destroy_offloads_acl_tables(esw, uplink);
3838 	return ret;
3839 }
3840 
3841 static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
3842 {
3843 	struct mlx5_vport *vport;
3844 
3845 	vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
3846 	if (!IS_ERR(vport))
3847 		esw_vport_destroy_offloads_acl_tables(esw, vport);
3848 
3849 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
3850 	if (!IS_ERR(vport))
3851 		esw_vport_destroy_offloads_acl_tables(esw, vport);
3852 }
3853 
3854 void mlx5_eswitch_unload_reps(struct mlx5_eswitch *esw)
3855 {
3856 	struct mlx5_eswitch_rep *rep;
3857 	unsigned long i;
3858 
3859 	if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
3860 		return;
3861 
3862 	mlx5_esw_for_each_rep(esw, i, rep) {
3863 		if (rep->vport == MLX5_VPORT_UPLINK)
3864 			continue;
3865 		mlx5_esw_offloads_rep_unload(esw, rep->vport);
3866 	}
3867 }
3868 
3869 int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw)
3870 {
3871 	struct mlx5_eswitch_rep *rep;
3872 	unsigned long i;
3873 	int ret;
3874 
3875 	if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
3876 		return 0;
3877 
3878 	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
3879 	if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
3880 		return 0;
3881 
3882 	/* SD secondary devices share the primary's uplink and do not
3883 	 * have their own uplink representor. Only load VF/SF vports.
3884 	 */
3885 	if (mlx5_sd_is_primary(esw->dev)) {
3886 		ret = __esw_offloads_load_rep(esw, rep, REP_IB, NULL);
3887 		if (ret)
3888 			return ret;
3889 	}
3890 
3891 	mlx5_esw_for_each_rep(esw, i, rep) {
3892 		if (!mlx5_sd_is_primary(esw->dev) &&
3893 		    rep->vport == MLX5_VPORT_UPLINK)
3894 			continue;
3895 		if (rep->vport != MLX5_VPORT_UPLINK &&
3896 		    mlx5_get_sd(esw->dev) && !mlx5_lag_is_active(esw->dev))
3897 			continue;
3898 
3899 		if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
3900 			__esw_offloads_load_rep(esw, rep, REP_IB, NULL);
3901 	}
3902 
3903 	return 0;
3904 }
3905 
3906 static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
3907 {
3908 	struct mlx5_esw_indir_table *indir;
3909 	int err;
3910 
3911 	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
3912 	mutex_init(&esw->fdb_table.offloads.vports.lock);
3913 	hash_init(esw->fdb_table.offloads.vports.table);
3914 	atomic64_set(&esw->user_count, 0);
3915 
3916 	indir = mlx5_esw_indir_table_init();
3917 	if (IS_ERR(indir)) {
3918 		err = PTR_ERR(indir);
3919 		goto create_indir_err;
3920 	}
3921 	esw->fdb_table.offloads.indir = indir;
3922 
3923 	err = esw_create_offloads_acl_tables(esw);
3924 	if (err)
3925 		goto create_acl_err;
3926 
3927 	err = esw_create_offloads_table(esw);
3928 	if (err)
3929 		goto create_offloads_err;
3930 
3931 	err = esw_create_restore_table(esw);
3932 	if (err)
3933 		goto create_restore_err;
3934 
3935 	err = esw_create_offloads_fdb_tables(esw);
3936 	if (err)
3937 		goto create_fdb_err;
3938 
3939 	err = esw_create_vport_rx_group(esw);
3940 	if (err)
3941 		goto create_fg_err;
3942 
3943 	err = esw_create_vport_rx_drop_group(esw);
3944 	if (err)
3945 		goto create_rx_drop_fg_err;
3946 
3947 	err = esw_create_vport_rx_drop_rule(esw);
3948 	if (err)
3949 		goto create_rx_drop_rule_err;
3950 
3951 	return 0;
3952 
3953 create_rx_drop_rule_err:
3954 	esw_destroy_vport_rx_drop_group(esw);
3955 create_rx_drop_fg_err:
3956 	esw_destroy_vport_rx_group(esw);
3957 create_fg_err:
3958 	esw_destroy_offloads_fdb_tables(esw);
3959 create_fdb_err:
3960 	esw_destroy_restore_table(esw);
3961 create_restore_err:
3962 	esw_destroy_offloads_table(esw);
3963 create_offloads_err:
3964 	esw_destroy_offloads_acl_tables(esw);
3965 create_acl_err:
3966 	mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
3967 create_indir_err:
3968 	mutex_destroy(&esw->fdb_table.offloads.vports.lock);
3969 	return err;
3970 }
3971 
3972 static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
3973 {
3974 	mlx5_esw_fdb_drop_destroy(esw);
3975 	if (esw->fdb_table.offloads.drop_root_fc)
3976 		mlx5_fc_destroy(esw->dev, esw->fdb_table.offloads.drop_root_fc);
3977 	esw->fdb_table.offloads.drop_root_fc = NULL;
3978 	esw_destroy_vport_rx_drop_rule(esw);
3979 	esw_destroy_vport_rx_drop_group(esw);
3980 	esw_destroy_vport_rx_group(esw);
3981 	esw_destroy_offloads_fdb_tables(esw);
3982 	esw_destroy_restore_table(esw);
3983 	esw_destroy_offloads_table(esw);
3984 	esw_destroy_offloads_acl_tables(esw);
3985 	mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
3986 	mutex_destroy(&esw->fdb_table.offloads.vports.lock);
3987 }
3988 
3989 static void esw_vfs_changed_event_handler(struct mlx5_eswitch *esw)
3990 {
3991 	struct mlx5_esw_pf_info host_pf_info;
3992 	u16 new_num_vfs;
3993 	const u32 *out;
3994 
3995 	out = mlx5_esw_query_functions(esw->dev);
3996 	if (IS_ERR(out))
3997 		return;
3998 
3999 	host_pf_info = mlx5_esw_get_host_pf_info(esw->dev, out);
4000 	new_num_vfs = host_pf_info.num_of_vfs;
4001 
4002 	if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_info.pf_disabled)
4003 		goto free;
4004 
4005 	mlx5_esw_reps_block(esw);
4006 	/* Number of VFs can only change from "0 to x" or "x to 0". */
4007 	if (esw->esw_funcs.num_vfs > 0) {
4008 		mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
4009 	} else {
4010 		int err;
4011 
4012 		err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs,
4013 						  MLX5_VPORT_UC_ADDR_CHANGE);
4014 		if (err)
4015 			goto unblock;
4016 	}
4017 	esw->esw_funcs.num_vfs = new_num_vfs;
4018 unblock:
4019 	mlx5_esw_reps_unblock(esw);
4020 free:
4021 	kvfree(out);
4022 }
4023 
4024 static void esw_wq_handler(struct work_struct *work)
4025 {
4026 	struct mlx5_host_work *host_work;
4027 	struct mlx5_eswitch *esw;
4028 	struct devlink *devlink;
4029 	int work_gen;
4030 
4031 	host_work = container_of(work, struct mlx5_host_work, work);
4032 	esw = host_work->esw;
4033 	work_gen = host_work->work_gen;
4034 	devlink = priv_to_devlink(esw->dev);
4035 
4036 	/* Do not block on devlink lock until stale work is filtered out.
4037 	 * Teardown can invalidate the generation and then wait for this
4038 	 * workqueue while holding devlink lock.
4039 	 */
4040 	for (;;) {
4041 		if (work_gen != atomic_read(&esw->generation))
4042 			goto free;
4043 
4044 		if (devl_trylock(devlink))
4045 			break;
4046 
4047 		wait_event_timeout(esw->work_queue_wait,
4048 				   work_gen != atomic_read(&esw->generation),
4049 				   msecs_to_jiffies(60));
4050 	}
4051 
4052 	/* Stale work from one or more mode changes ago. Bail out. */
4053 	if (work_gen != atomic_read(&esw->generation))
4054 		goto unlock;
4055 
4056 	host_work->func(esw);
4057 
4058 unlock:
4059 	devl_unlock(devlink);
4060 free:
4061 	kfree(host_work);
4062 }
4063 
4064 static int mlx5_esw_add_work(struct mlx5_eswitch *esw,
4065 			     void (*func)(struct mlx5_eswitch *esw),
4066 			     gfp_t gfp)
4067 {
4068 	struct mlx5_host_work *host_work;
4069 
4070 	host_work = kzalloc_obj(*host_work, gfp);
4071 	if (!host_work)
4072 		return -ENOMEM;
4073 
4074 	host_work->esw = esw;
4075 	host_work->work_gen = atomic_read(&esw->generation);
4076 
4077 	host_work->func = func;
4078 	INIT_WORK(&host_work->work, esw_wq_handler);
4079 	queue_work(esw->work_queue, &host_work->work);
4080 
4081 	return 0;
4082 }
4083 
4084 int mlx5_esw_funcs_changed_handler(struct notifier_block *nb,
4085 				   unsigned long type, void *data)
4086 {
4087 	struct mlx5_esw_functions *esw_funcs;
4088 	struct mlx5_eswitch *esw;
4089 	int ret;
4090 
4091 	esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
4092 	esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);
4093 
4094 	ret = mlx5_esw_add_work(esw, esw_vfs_changed_event_handler,
4095 				GFP_ATOMIC);
4096 	if (ret)
4097 		return NOTIFY_DONE;
4098 
4099 	return NOTIFY_OK;
4100 }
4101 
4102 bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller)
4103 {
4104 	const struct mlx5_esw_functions *esw_funcs;
4105 	int i;
4106 
4107 	/* Local controller is always valid */
4108 	if (controller == 0)
4109 		return true;
4110 
4111 	if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
4112 		return false;
4113 
4114 	/* External host number starts with zero in device */
4115 	if (controller == mlx5_esw_get_hpf_host_number(esw->dev) + 1)
4116 		return true;
4117 
4118 	esw_funcs = &esw->esw_funcs;
4119 	for (i = 0; i < esw_funcs->num_spfs; i++) {
4120 		if (controller == esw_funcs->spfs[i].host_number + 1)
4121 			return true;
4122 	}
4123 	return false;
4124 }
4125 
4126 int esw_offloads_enable(struct mlx5_eswitch *esw)
4127 {
4128 	u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES];
4129 	struct mlx5_devcom_match_attr attr = {};
4130 	struct mapping_ctx *reg_c0_obj_pool;
4131 	struct mlx5_vport *vport;
4132 	unsigned long i;
4133 	u8 id_len;
4134 	int err;
4135 
4136 	mutex_init(&esw->offloads.termtbl_mutex);
4137 	mlx5_esw_adjacent_vhcas_setup(esw);
4138 
4139 	err = mlx5_rdma_enable_roce(esw->dev);
4140 	if (err)
4141 		goto err_roce;
4142 
4143 	/* SD devices defer metadata init until SD is ready and
4144 	 * mlx5_sd_pf_num_get() can return the correct pf_num.
4145 	 */
4146 	if (!mlx5_get_sd(esw->dev)) {
4147 		err = esw_offloads_metadata_init(esw);
4148 		if (err)
4149 			goto err_metadata;
4150 	}
4151 
4152 	err = esw_set_passing_vport_metadata(esw, true);
4153 	if (err)
4154 		goto err_vport_metadata;
4155 
4156 	mlx5_query_nic_sw_system_image_guid(esw->dev, mapping_id, &id_len);
4157 
4158 	reg_c0_obj_pool = mapping_create_for_id(mapping_id, id_len,
4159 						MAPPING_TYPE_CHAIN,
4160 						sizeof(struct mlx5_mapped_obj),
4161 						ESW_REG_C0_USER_DATA_METADATA_MASK,
4162 						true);
4163 
4164 	if (IS_ERR(reg_c0_obj_pool)) {
4165 		err = PTR_ERR(reg_c0_obj_pool);
4166 		goto err_pool;
4167 	}
4168 	esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool;
4169 
4170 	err = esw_offloads_steering_init(esw);
4171 	if (err)
4172 		goto err_steering_init;
4173 
4174 	if (esw->offloads_inactive)
4175 		mlx5_esw_fdb_inactive(esw);
4176 	else
4177 		mlx5_esw_fdb_active(esw);
4178 
4179 	/* Representor will control the vport link state */
4180 	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
4181 		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN;
4182 	if (mlx5_core_ec_sriov_enabled(esw->dev))
4183 		mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs)
4184 			vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN;
4185 
4186 	/* Uplink vport rep must load first. */
4187 	err = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
4188 	if (err)
4189 		goto err_uplink;
4190 
4191 	err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
4192 	if (err)
4193 		goto err_vports;
4194 
4195 	memcpy(attr.key.buf, mapping_id, id_len);
4196 	attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS;
4197 	attr.net = mlx5_core_net(esw->dev);
4198 	mlx5_esw_offloads_devcom_init(esw, &attr);
4199 	return 0;
4200 
4201 err_vports:
4202 	/* rollback to legacy, indicates don't unregister the uplink netdev */
4203 	esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY;
4204 	mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK);
4205 err_uplink:
4206 	esw_offloads_steering_cleanup(esw);
4207 err_steering_init:
4208 	mapping_destroy(reg_c0_obj_pool);
4209 err_pool:
4210 	esw_set_passing_vport_metadata(esw, false);
4211 err_vport_metadata:
4212 	esw_offloads_metadata_uninit(esw);
4213 err_metadata:
4214 	mlx5_rdma_disable_roce(esw->dev);
4215 err_roce:
4216 	mlx5_esw_adjacent_vhcas_cleanup(esw);
4217 	mutex_destroy(&esw->offloads.termtbl_mutex);
4218 	return err;
4219 }
4220 
4221 static int esw_offloads_stop(struct mlx5_eswitch *esw,
4222 			     struct netlink_ext_ack *extack)
4223 {
4224 	int err;
4225 
4226 	esw_mode_change(esw, MLX5_ESWITCH_LEGACY);
4227 
4228 	/* If changing from switchdev to legacy mode without sriov enabled,
4229 	 * no need to create legacy fdb.
4230 	 */
4231 	if (!mlx5_core_is_pf(esw->dev) || !mlx5_sriov_is_enabled(esw->dev))
4232 		return 0;
4233 
4234 	err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS);
4235 	if (err)
4236 		NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
4237 
4238 	return err;
4239 }
4240 
4241 void esw_offloads_disable(struct mlx5_eswitch *esw)
4242 {
4243 	mlx5_esw_offloads_devcom_cleanup(esw);
4244 	mlx5_eswitch_disable_pf_vf_vports(esw);
4245 	mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK);
4246 	esw_set_passing_vport_metadata(esw, false);
4247 	esw_offloads_steering_cleanup(esw);
4248 	mapping_destroy(esw->offloads.reg_c0_obj_pool);
4249 	esw_offloads_metadata_uninit(esw);
4250 	mlx5_rdma_disable_roce(esw->dev);
4251 	mlx5_esw_adjacent_vhcas_cleanup(esw);
4252 	/* must be done after vhcas cleanup to avoid adjacent vports connect */
4253 	if (esw->offloads_inactive)
4254 		mlx5_esw_fdb_active(esw); /* legacy mode always active */
4255 	mutex_destroy(&esw->offloads.termtbl_mutex);
4256 }
4257 
4258 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
4259 {
4260 	switch (mode) {
4261 	case DEVLINK_ESWITCH_MODE_LEGACY:
4262 		*mlx5_mode = MLX5_ESWITCH_LEGACY;
4263 		break;
4264 	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
4265 	case DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE:
4266 		*mlx5_mode = MLX5_ESWITCH_OFFLOADS;
4267 		break;
4268 	default:
4269 		return -EINVAL;
4270 	}
4271 
4272 	return 0;
4273 }
4274 
4275 static int esw_mode_to_devlink(struct mlx5_eswitch *esw, u16 *mode)
4276 {
4277 	switch (esw->mode) {
4278 	case MLX5_ESWITCH_LEGACY:
4279 		*mode = DEVLINK_ESWITCH_MODE_LEGACY;
4280 		break;
4281 	case MLX5_ESWITCH_OFFLOADS:
4282 		if (esw->offloads_inactive)
4283 			*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE;
4284 		else
4285 			*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
4286 		break;
4287 	default:
4288 		return -EINVAL;
4289 	}
4290 
4291 	return 0;
4292 }
4293 
4294 static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
4295 {
4296 	switch (mode) {
4297 	case DEVLINK_ESWITCH_INLINE_MODE_NONE:
4298 		*mlx5_mode = MLX5_INLINE_MODE_NONE;
4299 		break;
4300 	case DEVLINK_ESWITCH_INLINE_MODE_LINK:
4301 		*mlx5_mode = MLX5_INLINE_MODE_L2;
4302 		break;
4303 	case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
4304 		*mlx5_mode = MLX5_INLINE_MODE_IP;
4305 		break;
4306 	case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
4307 		*mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
4308 		break;
4309 	default:
4310 		return -EINVAL;
4311 	}
4312 
4313 	return 0;
4314 }
4315 
4316 static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
4317 {
4318 	switch (mlx5_mode) {
4319 	case MLX5_INLINE_MODE_NONE:
4320 		*mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
4321 		break;
4322 	case MLX5_INLINE_MODE_L2:
4323 		*mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
4324 		break;
4325 	case MLX5_INLINE_MODE_IP:
4326 		*mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
4327 		break;
4328 	case MLX5_INLINE_MODE_TCP_UDP:
4329 		*mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
4330 		break;
4331 	default:
4332 		return -EINVAL;
4333 	}
4334 
4335 	return 0;
4336 }
4337 
4338 int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev)
4339 {
4340 	struct mlx5_eswitch *esw = dev->priv.eswitch;
4341 	int err;
4342 
4343 	if (!mlx5_esw_allowed(esw))
4344 		return 0;
4345 
4346 	/* Take TC into account */
4347 	err = mlx5_esw_try_lock(esw);
4348 	if (err < 0)
4349 		return err;
4350 
4351 	esw->offloads.num_block_mode++;
4352 	mlx5_esw_unlock(esw);
4353 	return 0;
4354 }
4355 
4356 void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev)
4357 {
4358 	struct mlx5_eswitch *esw = dev->priv.eswitch;
4359 
4360 	if (!mlx5_esw_allowed(esw))
4361 		return;
4362 
4363 	down_write(&esw->mode_lock);
4364 	esw->offloads.num_block_mode--;
4365 	up_write(&esw->mode_lock);
4366 }
4367 
4368 /* Returns false only when uplink netdev exists and its netns is different from
4369  * devlink's netns. True for all others so entering switchdev mode is allowed.
4370  */
4371 static bool mlx5_devlink_netdev_netns_immutable_set(struct devlink *devlink,
4372 						    bool immutable)
4373 {
4374 	struct mlx5_core_dev *mdev = devlink_priv(devlink);
4375 	struct net_device *netdev;
4376 	bool ret;
4377 
4378 	netdev = mlx5_uplink_netdev_get(mdev);
4379 	if (!netdev)
4380 		return true;
4381 
4382 	rtnl_lock();
4383 	netdev->netns_immutable = immutable;
4384 	ret = net_eq(dev_net(netdev), devlink_net(devlink));
4385 	rtnl_unlock();
4386 
4387 	mlx5_uplink_netdev_put(mdev, netdev);
4388 	return ret;
4389 }
4390 
4391 /* Returns true when only changing between active and inactive switchdev mode */
4392 static bool mlx5_devlink_switchdev_active_mode_change(struct mlx5_eswitch *esw,
4393 						      u16 devlink_mode)
4394 {
4395 	/* current mode is not switchdev */
4396 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
4397 		return false;
4398 
4399 	/* new mode is not switchdev */
4400 	if (devlink_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV &&
4401 	    devlink_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE)
4402 		return false;
4403 
4404 	/* already inactive: no change in current state */
4405 	if (devlink_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE &&
4406 	    esw->offloads_inactive)
4407 		return false;
4408 
4409 	/* already active: no change in current state */
4410 	if (devlink_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV &&
4411 	    !esw->offloads_inactive)
4412 		return false;
4413 
4414 	down_write(&esw->mode_lock);
4415 	esw->offloads_inactive = !esw->offloads_inactive;
4416 	esw->eswitch_operation_in_progress = true;
4417 	up_write(&esw->mode_lock);
4418 
4419 	if (esw->offloads_inactive)
4420 		mlx5_esw_fdb_inactive(esw);
4421 	else
4422 		mlx5_esw_fdb_active(esw);
4423 
4424 	down_write(&esw->mode_lock);
4425 	esw->eswitch_operation_in_progress = false;
4426 	up_write(&esw->mode_lock);
4427 	return true;
4428 }
4429 
4430 #define MLX5_ESW_HOLD_TIMEOUT_MS 7000
4431 #define MLX5_ESW_HOLD_RETRY_DELAY_MS 500
4432 
4433 void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev)
4434 {
4435 	unsigned long timeout;
4436 	bool hold_esw = true;
4437 
4438 	/* Wait for any concurrent eswitch mode transition to complete. */
4439 	if (!mlx5_esw_hold(dev)) {
4440 		timeout = jiffies + msecs_to_jiffies(MLX5_ESW_HOLD_TIMEOUT_MS);
4441 		while (!mlx5_esw_hold(dev)) {
4442 			if (!time_before(jiffies, timeout)) {
4443 				hold_esw = false;
4444 				break;
4445 			}
4446 			msleep(MLX5_ESW_HOLD_RETRY_DELAY_MS);
4447 		}
4448 	}
4449 	if (hold_esw) {
4450 		if (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS)
4451 			mlx5_core_reps_aux_devs_remove(dev);
4452 		mlx5_esw_release(dev);
4453 	}
4454 }
4455 
4456 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
4457 				  struct netlink_ext_ack *extack)
4458 {
4459 	u16 cur_mlx5_mode, mlx5_mode = 0;
4460 	struct mlx5_eswitch *esw;
4461 	int err = 0;
4462 
4463 	esw = mlx5_devlink_eswitch_get(devlink);
4464 	if (IS_ERR(esw))
4465 		return PTR_ERR(esw);
4466 
4467 	if (mlx5_fw_reset_in_progress(esw->dev)) {
4468 		NL_SET_ERR_MSG_MOD(extack, "Can't change eswitch mode during firmware reset");
4469 		return -EBUSY;
4470 	}
4471 
4472 	if (esw_mode_from_devlink(mode, &mlx5_mode))
4473 		return -EINVAL;
4474 
4475 	/* Avoid try_lock, active/inactive mode change is not restricted */
4476 	if (mlx5_devlink_switchdev_active_mode_change(esw, mode))
4477 		return 0;
4478 
4479 	mlx5_lag_disable_change(esw->dev);
4480 	err = mlx5_esw_try_lock(esw);
4481 	if (err < 0) {
4482 		NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
4483 		goto enable_lag;
4484 	}
4485 	cur_mlx5_mode = err;
4486 	err = 0;
4487 
4488 	if (cur_mlx5_mode == mlx5_mode)
4489 		goto unlock;
4490 
4491 	if (esw->offloads.num_block_mode) {
4492 		NL_SET_ERR_MSG_MOD(extack,
4493 				   "Can't change eswitch mode when IPsec SA and/or policies are configured");
4494 		err = -EOPNOTSUPP;
4495 		goto unlock;
4496 	}
4497 
4498 	/* Keep mode_lock and reps_lock unnested. The operation flag excludes
4499 	 * mode users while mode_lock is dropped before taking reps_lock.
4500 	 */
4501 	esw->eswitch_operation_in_progress = true;
4502 	up_write(&esw->mode_lock);
4503 
4504 	mlx5_esw_reps_block(esw);
4505 
4506 	if (mlx5_mode == MLX5_ESWITCH_OFFLOADS &&
4507 	    !mlx5_devlink_netdev_netns_immutable_set(devlink, true)) {
4508 		NL_SET_ERR_MSG_MOD(extack,
4509 				   "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's.");
4510 		err = -EINVAL;
4511 		goto skip;
4512 	}
4513 
4514 	if (mlx5_mode == MLX5_ESWITCH_LEGACY)
4515 		esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY;
4516 	if (mlx5_mode == MLX5_ESWITCH_OFFLOADS)
4517 		esw->dev->priv.flags &= ~MLX5_PRIV_FLAGS_SWITCH_LEGACY;
4518 	mlx5_eswitch_disable_locked(esw);
4519 	if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) {
4520 		if (mlx5_devlink_trap_get_num_active(esw->dev)) {
4521 			NL_SET_ERR_MSG_MOD(extack,
4522 					   "Can't change mode while devlink traps are active");
4523 			err = -EOPNOTSUPP;
4524 			goto skip;
4525 		}
4526 		esw->offloads_inactive =
4527 			(mode == DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE);
4528 		err = esw_offloads_start(esw, extack);
4529 	} else if (mlx5_mode == MLX5_ESWITCH_LEGACY) {
4530 		err = esw_offloads_stop(esw, extack);
4531 	} else {
4532 		err = -EINVAL;
4533 	}
4534 
4535 skip:
4536 	if (mlx5_mode == MLX5_ESWITCH_OFFLOADS && err)
4537 		mlx5_devlink_netdev_netns_immutable_set(devlink, false);
4538 	/* Reconfiguration is done; drop reps_lock before taking mode_lock again
4539 	 * to clear the operation flag.
4540 	 */
4541 	mlx5_esw_reps_unblock(esw);
4542 	down_write(&esw->mode_lock);
4543 	esw->eswitch_operation_in_progress = false;
4544 unlock:
4545 	mlx5_esw_unlock(esw);
4546 enable_lag:
4547 	mlx5_lag_enable_change(esw->dev);
4548 	/* Shared FDB activation is creating LAG which is changing reps. */
4549 	if (!err)
4550 		mlx5_sd_eswitch_mode_set(esw->dev, mlx5_mode);
4551 	return err;
4552 }
4553 
4554 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
4555 {
4556 	struct mlx5_eswitch *esw;
4557 
4558 	esw = mlx5_devlink_eswitch_get(devlink);
4559 	if (IS_ERR(esw))
4560 		return PTR_ERR(esw);
4561 
4562 	return esw_mode_to_devlink(esw, mode);
4563 }
4564 
4565 static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
4566 				      struct netlink_ext_ack *extack)
4567 {
4568 	struct mlx5_core_dev *dev = esw->dev;
4569 	struct mlx5_vport *vport;
4570 	u16 err_vport_num = 0;
4571 	unsigned long i;
4572 	int err = 0;
4573 
4574 	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
4575 		err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
4576 		if (err) {
4577 			err_vport_num = vport->vport;
4578 			NL_SET_ERR_MSG_MOD(extack,
4579 					   "Failed to set min inline on vport");
4580 			goto revert_inline_mode;
4581 		}
4582 	}
4583 	if (mlx5_core_ec_sriov_enabled(esw->dev)) {
4584 		mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) {
4585 			err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
4586 			if (err) {
4587 				err_vport_num = vport->vport;
4588 				NL_SET_ERR_MSG_MOD(extack,
4589 						   "Failed to set min inline on vport");
4590 				goto revert_ec_vf_inline_mode;
4591 			}
4592 		}
4593 	}
4594 	return 0;
4595 
4596 revert_ec_vf_inline_mode:
4597 	mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) {
4598 		if (vport->vport == err_vport_num)
4599 			break;
4600 		mlx5_modify_nic_vport_min_inline(dev,
4601 						 vport->vport,
4602 						 esw->offloads.inline_mode);
4603 	}
4604 revert_inline_mode:
4605 	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
4606 		if (vport->vport == err_vport_num)
4607 			break;
4608 		mlx5_modify_nic_vport_min_inline(dev,
4609 						 vport->vport,
4610 						 esw->offloads.inline_mode);
4611 	}
4612 	return err;
4613 }
4614 
4615 int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
4616 					 struct netlink_ext_ack *extack)
4617 {
4618 	struct mlx5_core_dev *dev = devlink_priv(devlink);
4619 	struct mlx5_eswitch *esw;
4620 	u8 mlx5_mode;
4621 	int err;
4622 
4623 	esw = mlx5_devlink_eswitch_get(devlink);
4624 	if (IS_ERR(esw))
4625 		return PTR_ERR(esw);
4626 
4627 	down_write(&esw->mode_lock);
4628 
4629 	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
4630 	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
4631 		if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
4632 			err = 0;
4633 			goto out;
4634 		}
4635 
4636 		fallthrough;
4637 	case MLX5_CAP_INLINE_MODE_L2:
4638 		NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
4639 		err = -EOPNOTSUPP;
4640 		goto out;
4641 	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
4642 		break;
4643 	}
4644 
4645 	if (atomic64_read(&esw->offloads.num_flows) > 0) {
4646 		NL_SET_ERR_MSG_MOD(extack,
4647 				   "Can't set inline mode when flows are configured");
4648 		err = -EOPNOTSUPP;
4649 		goto out;
4650 	}
4651 
4652 	err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
4653 	if (err)
4654 		goto out;
4655 
4656 	esw->eswitch_operation_in_progress = true;
4657 	up_write(&esw->mode_lock);
4658 
4659 	err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
4660 	if (!err)
4661 		esw->offloads.inline_mode = mlx5_mode;
4662 
4663 	down_write(&esw->mode_lock);
4664 	esw->eswitch_operation_in_progress = false;
4665 	up_write(&esw->mode_lock);
4666 	return 0;
4667 
4668 out:
4669 	up_write(&esw->mode_lock);
4670 	return err;
4671 }
4672 
4673 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
4674 {
4675 	struct mlx5_eswitch *esw;
4676 
4677 	esw = mlx5_devlink_eswitch_get(devlink);
4678 	if (IS_ERR(esw))
4679 		return PTR_ERR(esw);
4680 
4681 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
4682 }
4683 
4684 bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev, bool from_fdb)
4685 {
4686 	struct mlx5_eswitch *esw = dev->priv.eswitch;
4687 	enum devlink_eswitch_encap_mode encap;
4688 	bool allow_tunnel = false;
4689 
4690 	if (!mlx5_esw_allowed(esw))
4691 		return true;
4692 
4693 	down_write(&esw->mode_lock);
4694 	encap = esw->offloads.encap;
4695 	if (esw->mode == MLX5_ESWITCH_LEGACY ||
4696 	    (encap == DEVLINK_ESWITCH_ENCAP_MODE_NONE && !from_fdb)) {
4697 		allow_tunnel = true;
4698 		esw->offloads.num_block_encap++;
4699 	}
4700 	up_write(&esw->mode_lock);
4701 
4702 	return allow_tunnel;
4703 }
4704 
4705 void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
4706 {
4707 	struct mlx5_eswitch *esw = dev->priv.eswitch;
4708 
4709 	if (!mlx5_esw_allowed(esw))
4710 		return;
4711 
4712 	down_write(&esw->mode_lock);
4713 	esw->offloads.num_block_encap--;
4714 	up_write(&esw->mode_lock);
4715 }
4716 
4717 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
4718 					enum devlink_eswitch_encap_mode encap,
4719 					struct netlink_ext_ack *extack)
4720 {
4721 	struct mlx5_core_dev *dev = devlink_priv(devlink);
4722 	struct mlx5_eswitch *esw;
4723 	int err = 0;
4724 
4725 	esw = mlx5_devlink_eswitch_get(devlink);
4726 	if (IS_ERR(esw))
4727 		return PTR_ERR(esw);
4728 
4729 	down_write(&esw->mode_lock);
4730 
4731 	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
4732 	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
4733 	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) {
4734 		err = -EOPNOTSUPP;
4735 		goto unlock;
4736 	}
4737 
4738 	if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) {
4739 		err = -EOPNOTSUPP;
4740 		goto unlock;
4741 	}
4742 
4743 	if (esw->mode == MLX5_ESWITCH_LEGACY) {
4744 		esw->offloads.encap = encap;
4745 		goto unlock;
4746 	}
4747 
4748 	if (esw->offloads.encap == encap)
4749 		goto unlock;
4750 
4751 	if (atomic64_read(&esw->offloads.num_flows) > 0) {
4752 		NL_SET_ERR_MSG_MOD(extack,
4753 				   "Can't set encapsulation when flows are configured");
4754 		err = -EOPNOTSUPP;
4755 		goto unlock;
4756 	}
4757 
4758 	if (esw->offloads.num_block_encap) {
4759 		NL_SET_ERR_MSG_MOD(extack,
4760 				   "Can't set encapsulation when IPsec SA and/or policies are configured");
4761 		err = -EOPNOTSUPP;
4762 		goto unlock;
4763 	}
4764 
4765 	esw->eswitch_operation_in_progress = true;
4766 	up_write(&esw->mode_lock);
4767 
4768 	esw_destroy_offloads_fdb_tables(esw);
4769 
4770 	esw->offloads.encap = encap;
4771 
4772 	err = esw_create_offloads_fdb_tables(esw);
4773 
4774 	if (err) {
4775 		NL_SET_ERR_MSG_MOD(extack,
4776 				   "Failed re-creating fast FDB table");
4777 		esw->offloads.encap = !encap;
4778 		(void)esw_create_offloads_fdb_tables(esw);
4779 	}
4780 
4781 	down_write(&esw->mode_lock);
4782 	esw->eswitch_operation_in_progress = false;
4783 
4784 unlock:
4785 	up_write(&esw->mode_lock);
4786 	return err;
4787 }
4788 
4789 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
4790 					enum devlink_eswitch_encap_mode *encap)
4791 {
4792 	struct mlx5_eswitch *esw;
4793 
4794 	esw = mlx5_devlink_eswitch_get(devlink);
4795 	if (IS_ERR(esw))
4796 		return PTR_ERR(esw);
4797 
4798 	*encap = esw->offloads.encap;
4799 	return 0;
4800 }
4801 
4802 static bool
4803 mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num)
4804 {
4805 	/* Currently, only ECPF based device has representor for host PF. */
4806 	if (vport_num == MLX5_VPORT_HOST_PF &&
4807 	    (!mlx5_core_is_ecpf_esw_manager(esw->dev) ||
4808 	     !mlx5_esw_host_functions_enabled(esw->dev)))
4809 		return false;
4810 
4811 	if (vport_num == MLX5_VPORT_ECPF &&
4812 	    !mlx5_ecpf_vport_exists(esw->dev))
4813 		return false;
4814 
4815 	return true;
4816 }
4817 
4818 static void
4819 mlx5_eswitch_register_vport_reps_blocked(struct mlx5_eswitch *esw,
4820 					 const struct mlx5_eswitch_rep_ops *ops,
4821 					 u8 rep_type)
4822 {
4823 	struct mlx5_eswitch_rep_data *rep_data;
4824 	struct mlx5_eswitch_rep *rep;
4825 	unsigned long i;
4826 
4827 	esw->offloads.rep_ops[rep_type] = ops;
4828 	mlx5_esw_for_each_rep(esw, i, rep) {
4829 		if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) {
4830 			rep->esw = esw;
4831 			rep_data = &rep->rep_data[rep_type];
4832 			atomic_set(&rep_data->state, REP_REGISTERED);
4833 		}
4834 	}
4835 }
4836 
4837 static void mlx5_eswitch_reload_reps_blocked(struct mlx5_eswitch *esw)
4838 {
4839 	struct mlx5_eswitch_rep *uplink;
4840 	struct mlx5_vport *vport;
4841 	bool newly_loaded;
4842 	unsigned long i;
4843 
4844 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
4845 		return;
4846 
4847 	uplink = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
4848 	if (__esw_offloads_load_rep(esw, uplink, REP_ETH, &newly_loaded))
4849 		return;
4850 	if (mlx5_sd_is_primary(esw->dev) &&
4851 	    __esw_offloads_load_rep(esw, uplink, REP_IB, NULL)) {
4852 		if (newly_loaded)
4853 			__esw_offloads_unload_rep(esw, uplink, REP_ETH);
4854 		return;
4855 	}
4856 
4857 	if (mlx5_get_sd(esw->dev) && !mlx5_lag_is_active(esw->dev))
4858 		return;
4859 
4860 	mlx5_esw_for_each_vport(esw, i, vport) {
4861 		if (!vport)
4862 			continue;
4863 		if (!vport->enabled)
4864 			continue;
4865 		if (vport->vport == MLX5_VPORT_UPLINK)
4866 			continue;
4867 		if (!mlx5_eswitch_vport_has_rep(esw, vport->vport))
4868 			continue;
4869 
4870 		mlx5_esw_offloads_rep_load(esw, vport->vport);
4871 	}
4872 }
4873 
4874 static void mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
4875 {
4876 	mlx5_esw_reps_block(esw);
4877 	mlx5_eswitch_reload_reps_blocked(esw);
4878 	mlx5_esw_reps_unblock(esw);
4879 }
4880 
4881 static void
4882 mlx5_eswitch_register_vport_reps_locked(struct mlx5_eswitch *esw,
4883 					const struct mlx5_eswitch_rep_ops *ops,
4884 					u8 rep_type, bool nested)
4885 {
4886 	if (nested)
4887 		mlx5_esw_reps_block_nested(esw);
4888 	else
4889 		mlx5_esw_reps_block(esw);
4890 	mlx5_eswitch_register_vport_reps_blocked(esw, ops, rep_type);
4891 	mlx5_esw_reps_unblock(esw);
4892 
4893 	mlx5_esw_add_work(esw, mlx5_eswitch_reload_reps, GFP_KERNEL);
4894 }
4895 
4896 void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
4897 				      const struct mlx5_eswitch_rep_ops *ops,
4898 				      u8 rep_type)
4899 {
4900 	mlx5_eswitch_register_vport_reps_locked(esw, ops, rep_type, false);
4901 }
4902 EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
4903 
4904 void
4905 mlx5_eswitch_register_vport_reps_nested(struct mlx5_eswitch *esw,
4906 					const struct mlx5_eswitch_rep_ops *ops,
4907 					u8 rep_type)
4908 {
4909 	mlx5_eswitch_register_vport_reps_locked(esw, ops, rep_type, true);
4910 }
4911 EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps_nested);
4912 
4913 static void
4914 mlx5_eswitch_unregister_vport_reps_blocked(struct mlx5_eswitch *esw,
4915 					   u8 rep_type)
4916 {
4917 	struct mlx5_eswitch_rep *rep;
4918 	unsigned long i;
4919 
4920 	__unload_reps_all_vport(esw, rep_type);
4921 
4922 	mlx5_esw_for_each_rep(esw, i, rep)
4923 		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
4924 
4925 	esw->offloads.rep_ops[rep_type] = NULL;
4926 }
4927 
4928 static void
4929 mlx5_eswitch_unregister_vport_reps_locked(struct mlx5_eswitch *esw,
4930 					  u8 rep_type, bool nested)
4931 {
4932 	if (nested)
4933 		mlx5_esw_reps_block_nested(esw);
4934 	else
4935 		mlx5_esw_reps_block(esw);
4936 	mlx5_eswitch_unregister_vport_reps_blocked(esw, rep_type);
4937 	mlx5_esw_reps_unblock(esw);
4938 }
4939 
4940 void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
4941 {
4942 	mlx5_eswitch_unregister_vport_reps_locked(esw, rep_type, false);
4943 }
4944 EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
4945 
4946 void mlx5_eswitch_unregister_vport_reps_nested(struct mlx5_eswitch *esw,
4947 					       u8 rep_type)
4948 {
4949 	mlx5_eswitch_unregister_vport_reps_locked(esw, rep_type, true);
4950 }
4951 EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps_nested);
4952 
4953 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
4954 {
4955 	struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev);
4956 	struct mlx5_eswitch_rep *rep;
4957 
4958 	if (primary)
4959 		esw = primary->priv.eswitch;
4960 	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
4961 	return rep->rep_data[rep_type].priv;
4962 }
4963 
4964 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
4965 				 u16 vport,
4966 				 u8 rep_type)
4967 {
4968 	struct mlx5_eswitch_rep *rep;
4969 
4970 	rep = mlx5_eswitch_get_rep(esw, vport);
4971 
4972 	if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
4973 	    esw->offloads.rep_ops[rep_type]->get_proto_dev)
4974 		return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
4975 	return NULL;
4976 }
4977 EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
4978 
4979 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
4980 {
4981 	struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev);
4982 
4983 	if (primary)
4984 		esw = primary->priv.eswitch;
4985 
4986 	return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
4987 }
4988 EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
4989 
4990 struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
4991 						u16 vport)
4992 {
4993 	return mlx5_eswitch_get_rep(esw, vport);
4994 }
4995 EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
4996 
4997 bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
4998 {
4999 	return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED);
5000 }
5001 EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled);
5002 
5003 bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
5004 {
5005 	return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
5006 }
5007 EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
5008 
5009 u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
5010 					      u16 vport_num)
5011 {
5012 	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
5013 
5014 	if (WARN_ON_ONCE(IS_ERR(vport)))
5015 		return 0;
5016 
5017 	return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
5018 }
5019 EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
5020 
5021 int mlx5_esw_vport_vhca_id_map(struct mlx5_eswitch *esw,
5022 			       struct mlx5_vport *vport)
5023 {
5024 	u16 *old_entry, *vhca_map_entry, vhca_id;
5025 
5026 	if (WARN_ONCE(MLX5_VPORT_INVAL_VHCA_ID(vport),
5027 		      "vport %d vhca_id is not set", vport->vport)) {
5028 		int err;
5029 
5030 		err = mlx5_vport_get_vhca_id(vport->dev, vport->vport,
5031 					     &vhca_id);
5032 		if (err)
5033 			return err;
5034 		vport->vhca_id = vhca_id;
5035 	}
5036 
5037 	vhca_id = vport->vhca_id;
5038 	vhca_map_entry = kmalloc_obj(*vhca_map_entry);
5039 	if (!vhca_map_entry)
5040 		return -ENOMEM;
5041 
5042 	*vhca_map_entry = vport->vport;
5043 	old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL);
5044 	if (xa_is_err(old_entry)) {
5045 		kfree(vhca_map_entry);
5046 		return xa_err(old_entry);
5047 	}
5048 	kfree(old_entry);
5049 	return 0;
5050 }
5051 
5052 void mlx5_esw_vport_vhca_id_unmap(struct mlx5_eswitch *esw,
5053 				  struct mlx5_vport *vport)
5054 {
5055 	u16 *vhca_map_entry;
5056 
5057 	vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vport->vhca_id);
5058 	kfree(vhca_map_entry);
5059 }
5060 
5061 int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num)
5062 {
5063 	u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id);
5064 
5065 	if (!res)
5066 		return -ENOENT;
5067 
5068 	*vport_num = *res;
5069 	return 0;
5070 }
5071 
5072 u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
5073 					    u16 vport_num)
5074 {
5075 	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
5076 
5077 	if (WARN_ON_ONCE(IS_ERR(vport)))
5078 		return 0;
5079 
5080 	return vport->metadata;
5081 }
5082 EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set);
5083 
5084 int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port,
5085 				     u8 *hw_addr, int *hw_addr_len,
5086 				     struct netlink_ext_ack *extack)
5087 {
5088 	struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5089 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5090 
5091 	mutex_lock(&esw->state_lock);
5092 
5093 	mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, true,
5094 					 vport->info.mac);
5095 	ether_addr_copy(hw_addr, vport->info.mac);
5096 	*hw_addr_len = ETH_ALEN;
5097 	mutex_unlock(&esw->state_lock);
5098 	return 0;
5099 }
5100 
5101 int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port,
5102 				     const u8 *hw_addr, int hw_addr_len,
5103 				     struct netlink_ext_ack *extack)
5104 {
5105 	struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5106 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5107 
5108 	return mlx5_eswitch_set_vport_mac(esw, vport->vport, hw_addr);
5109 }
5110 
5111 int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled,
5112 					struct netlink_ext_ack *extack)
5113 {
5114 	struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5115 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5116 
5117 	if (!MLX5_CAP_GEN(esw->dev, migration)) {
5118 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
5119 		return -EOPNOTSUPP;
5120 	}
5121 
5122 	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) {
5123 		NL_SET_ERR_MSG_MOD(extack,
5124 				   "Device doesn't support vport group management");
5125 		return -EOPNOTSUPP;
5126 	}
5127 
5128 	mutex_lock(&esw->state_lock);
5129 	*is_enabled = vport->info.mig_enabled;
5130 	mutex_unlock(&esw->state_lock);
5131 	return 0;
5132 }
5133 
5134 int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
5135 					struct netlink_ext_ack *extack)
5136 {
5137 	struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5138 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5139 	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
5140 	void *query_ctx;
5141 	void *hca_caps;
5142 	int err;
5143 
5144 	if (!MLX5_CAP_GEN(esw->dev, migration)) {
5145 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
5146 		return -EOPNOTSUPP;
5147 	}
5148 
5149 	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
5150 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management");
5151 		return -EOPNOTSUPP;
5152 	}
5153 
5154 	mutex_lock(&esw->state_lock);
5155 
5156 	if (vport->info.mig_enabled == enable) {
5157 		err = 0;
5158 		goto out;
5159 	}
5160 
5161 	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
5162 	if (!query_ctx) {
5163 		err = -ENOMEM;
5164 		goto out;
5165 	}
5166 
5167 	err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
5168 					    MLX5_CAP_GENERAL_2);
5169 	if (err) {
5170 		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
5171 		goto out_free;
5172 	}
5173 
5174 	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
5175 	MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable);
5176 
5177 	err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
5178 					    MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
5179 	if (err) {
5180 		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA migratable cap");
5181 		goto out_free;
5182 	}
5183 
5184 	vport->info.mig_enabled = enable;
5185 
5186 out_free:
5187 	kfree(query_ctx);
5188 out:
5189 	mutex_unlock(&esw->state_lock);
5190 	return err;
5191 }
5192 
5193 int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
5194 				  struct netlink_ext_ack *extack)
5195 {
5196 	struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5197 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5198 
5199 	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) {
5200 		NL_SET_ERR_MSG_MOD(extack,
5201 				   "Device doesn't support vport group management");
5202 		return -EOPNOTSUPP;
5203 	}
5204 
5205 	mutex_lock(&esw->state_lock);
5206 	*is_enabled = vport->info.roce_enabled;
5207 	mutex_unlock(&esw->state_lock);
5208 	return 0;
5209 }
5210 
5211 int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
5212 				  struct netlink_ext_ack *extack)
5213 {
5214 	struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5215 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5216 	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
5217 	u16 vport_num = vport->vport;
5218 	void *query_ctx;
5219 	void *hca_caps;
5220 	int err;
5221 
5222 	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
5223 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management");
5224 		return -EOPNOTSUPP;
5225 	}
5226 
5227 	mutex_lock(&esw->state_lock);
5228 
5229 	if (vport->info.roce_enabled == enable) {
5230 		err = 0;
5231 		goto out;
5232 	}
5233 
5234 	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
5235 	if (!query_ctx) {
5236 		err = -ENOMEM;
5237 		goto out;
5238 	}
5239 
5240 	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
5241 					    MLX5_CAP_GENERAL);
5242 	if (err) {
5243 		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
5244 		goto out_free;
5245 	}
5246 
5247 	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
5248 	MLX5_SET(cmd_hca_cap, hca_caps, roce, enable);
5249 
5250 	err = mlx5_vport_set_other_func_general_cap(esw->dev, hca_caps,
5251 						    vport_num);
5252 	if (err) {
5253 		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA roce cap");
5254 		goto out_free;
5255 	}
5256 
5257 	vport->info.roce_enabled = enable;
5258 
5259 out_free:
5260 	kfree(query_ctx);
5261 out:
5262 	mutex_unlock(&esw->state_lock);
5263 	return err;
5264 }
5265 
5266 int mlx5_devlink_pf_port_fn_state_get(struct devlink_port *port,
5267 				      enum devlink_port_fn_state *state,
5268 				      enum devlink_port_fn_opstate *opstate,
5269 				      struct netlink_ext_ack *extack)
5270 {
5271 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5272 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
5273 	const u32 *query_out;
5274 	bool pf_disabled;
5275 
5276 	if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
5277 		NL_SET_ERR_MSG_MOD(extack, "State get is not supported for VF");
5278 		return -EOPNOTSUPP;
5279 	}
5280 
5281 	*state = vport->pf_activated ?
5282 		 DEVLINK_PORT_FN_STATE_ACTIVE : DEVLINK_PORT_FN_STATE_INACTIVE;
5283 
5284 	query_out = mlx5_esw_query_functions(vport->dev);
5285 	if (IS_ERR(query_out))
5286 		return PTR_ERR(query_out);
5287 
5288 	if (vport->vport == MLX5_VPORT_HOST_PF) {
5289 		struct mlx5_esw_pf_info host_pf_info;
5290 
5291 		host_pf_info = mlx5_esw_get_host_pf_info(vport->dev,
5292 							 query_out);
5293 		pf_disabled = host_pf_info.pf_disabled;
5294 	} else {
5295 		pf_disabled = mlx5_esw_get_spf_disabled(vport->dev, query_out,
5296 							vport->vhca_id);
5297 	}
5298 
5299 	*opstate = pf_disabled ? DEVLINK_PORT_FN_OPSTATE_DETACHED :
5300 				 DEVLINK_PORT_FN_OPSTATE_ATTACHED;
5301 
5302 	kvfree(query_out);
5303 	return 0;
5304 }
5305 
5306 int mlx5_devlink_pf_port_fn_state_set(struct devlink_port *port,
5307 				      enum devlink_port_fn_state state,
5308 				      struct netlink_ext_ack *extack)
5309 {
5310 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5311 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
5312 	struct mlx5_core_dev *dev;
5313 
5314 	if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
5315 		NL_SET_ERR_MSG_MOD(extack, "State set is not supported for VF");
5316 		return -EOPNOTSUPP;
5317 	}
5318 
5319 	dev = vport->dev;
5320 
5321 	switch (state) {
5322 	case DEVLINK_PORT_FN_STATE_ACTIVE:
5323 		return mlx5_esw_pf_enable_hca(dev, vport->vport);
5324 	case DEVLINK_PORT_FN_STATE_INACTIVE:
5325 		return mlx5_esw_pf_disable_hca(dev, vport->vport);
5326 	default:
5327 		return -EOPNOTSUPP;
5328 	}
5329 }
5330 
5331 int
5332 mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule,
5333 				struct mlx5_esw_flow_attr *esw_attr, int attr_idx)
5334 {
5335 	struct mlx5_flow_destination new_dest = {};
5336 	struct mlx5_flow_destination old_dest = {};
5337 
5338 	if (!esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx))
5339 		return 0;
5340 
5341 	esw_setup_dest_fwd_ipsec(&old_dest, NULL, esw, esw_attr, attr_idx, 0, false);
5342 	esw_setup_dest_fwd_vport(&new_dest, NULL, esw, esw_attr, attr_idx, 0, false);
5343 
5344 	return mlx5_modify_rule_destination(rule, &new_dest, &old_dest);
5345 }
5346 
5347 #ifdef CONFIG_XFRM_OFFLOAD
5348 int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_enabled,
5349 					  struct netlink_ext_ack *extack)
5350 {
5351 	struct mlx5_eswitch *esw;
5352 	struct mlx5_vport *vport;
5353 	int err = 0;
5354 
5355 	esw = mlx5_devlink_eswitch_get(port->devlink);
5356 	if (IS_ERR(esw))
5357 		return PTR_ERR(esw);
5358 
5359 	if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) {
5360 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPSec crypto");
5361 		return -EOPNOTSUPP;
5362 	}
5363 
5364 	vport = mlx5_devlink_port_vport_get(port);
5365 
5366 	mutex_lock(&esw->state_lock);
5367 	if (!vport->enabled) {
5368 		err = -EOPNOTSUPP;
5369 		goto unlock;
5370 	}
5371 
5372 	*is_enabled = vport->info.ipsec_crypto_enabled;
5373 unlock:
5374 	mutex_unlock(&esw->state_lock);
5375 	return err;
5376 }
5377 
5378 int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable,
5379 					  struct netlink_ext_ack *extack)
5380 {
5381 	struct mlx5_eswitch *esw;
5382 	struct mlx5_vport *vport;
5383 	u16 vport_num;
5384 	int err;
5385 
5386 	esw = mlx5_devlink_eswitch_get(port->devlink);
5387 	if (IS_ERR(esw))
5388 		return PTR_ERR(esw);
5389 
5390 	vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
5391 	err = mlx5_esw_ipsec_vf_crypto_offload_supported(esw->dev, vport_num);
5392 	if (err) {
5393 		NL_SET_ERR_MSG_MOD(extack,
5394 				   "Device doesn't support IPsec crypto");
5395 		return err;
5396 	}
5397 
5398 	vport = mlx5_devlink_port_vport_get(port);
5399 
5400 	mutex_lock(&esw->state_lock);
5401 	if (!vport->enabled) {
5402 		err = -EOPNOTSUPP;
5403 		NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
5404 		goto unlock;
5405 	}
5406 
5407 	if (vport->info.ipsec_crypto_enabled == enable)
5408 		goto unlock;
5409 
5410 	if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) {
5411 		err = -EBUSY;
5412 		goto unlock;
5413 	}
5414 
5415 	err = mlx5_esw_ipsec_vf_crypto_offload_set(esw, vport, enable);
5416 	if (err) {
5417 		NL_SET_ERR_MSG_MOD(extack, "Failed to set IPsec crypto");
5418 		goto unlock;
5419 	}
5420 
5421 	vport->info.ipsec_crypto_enabled = enable;
5422 	if (enable)
5423 		esw->enabled_ipsec_vf_count++;
5424 	else
5425 		esw->enabled_ipsec_vf_count--;
5426 unlock:
5427 	mutex_unlock(&esw->state_lock);
5428 	return err;
5429 }
5430 
5431 int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_enabled,
5432 					  struct netlink_ext_ack *extack)
5433 {
5434 	struct mlx5_eswitch *esw;
5435 	struct mlx5_vport *vport;
5436 	int err = 0;
5437 
5438 	esw = mlx5_devlink_eswitch_get(port->devlink);
5439 	if (IS_ERR(esw))
5440 		return PTR_ERR(esw);
5441 
5442 	if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) {
5443 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet");
5444 		return -EOPNOTSUPP;
5445 	}
5446 
5447 	vport = mlx5_devlink_port_vport_get(port);
5448 
5449 	mutex_lock(&esw->state_lock);
5450 	if (!vport->enabled) {
5451 		err = -EOPNOTSUPP;
5452 		goto unlock;
5453 	}
5454 
5455 	*is_enabled = vport->info.ipsec_packet_enabled;
5456 unlock:
5457 	mutex_unlock(&esw->state_lock);
5458 	return err;
5459 }
5460 
5461 int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port,
5462 					  bool enable,
5463 					  struct netlink_ext_ack *extack)
5464 {
5465 	struct mlx5_eswitch *esw;
5466 	struct mlx5_vport *vport;
5467 	u16 vport_num;
5468 	int err;
5469 
5470 	esw = mlx5_devlink_eswitch_get(port->devlink);
5471 	if (IS_ERR(esw))
5472 		return PTR_ERR(esw);
5473 
5474 	vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
5475 	err = mlx5_esw_ipsec_vf_packet_offload_supported(esw->dev, vport_num);
5476 	if (err) {
5477 		NL_SET_ERR_MSG_MOD(extack,
5478 				   "Device doesn't support IPsec packet mode");
5479 		return err;
5480 	}
5481 
5482 	vport = mlx5_devlink_port_vport_get(port);
5483 	mutex_lock(&esw->state_lock);
5484 	if (!vport->enabled) {
5485 		err = -EOPNOTSUPP;
5486 		NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
5487 		goto unlock;
5488 	}
5489 
5490 	if (vport->info.ipsec_packet_enabled == enable)
5491 		goto unlock;
5492 
5493 	if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) {
5494 		err = -EBUSY;
5495 		goto unlock;
5496 	}
5497 
5498 	err = mlx5_esw_ipsec_vf_packet_offload_set(esw, vport, enable);
5499 	if (err) {
5500 		NL_SET_ERR_MSG_MOD(extack,
5501 				   "Failed to set IPsec packet mode");
5502 		goto unlock;
5503 	}
5504 
5505 	vport->info.ipsec_packet_enabled = enable;
5506 	if (enable)
5507 		esw->enabled_ipsec_vf_count++;
5508 	else
5509 		esw->enabled_ipsec_vf_count--;
5510 unlock:
5511 	mutex_unlock(&esw->state_lock);
5512 	return err;
5513 }
5514 #endif /* CONFIG_XFRM_OFFLOAD */
5515 
5516 int
5517 mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs,
5518 				    struct netlink_ext_ack *extack)
5519 {
5520 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5521 	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
5522 	u16 vport_num = vport->vport;
5523 	struct mlx5_eswitch *esw;
5524 	void *query_ctx;
5525 	void *hca_caps;
5526 	u32 max_eqs;
5527 	int err;
5528 
5529 	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5530 	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) {
5531 		NL_SET_ERR_MSG_MOD(extack,
5532 				   "Device doesn't support vport group management");
5533 		return -EOPNOTSUPP;
5534 	}
5535 
5536 	if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) {
5537 		NL_SET_ERR_MSG_MOD(extack,
5538 				   "Device doesn't support getting the max number of EQs");
5539 		return -EOPNOTSUPP;
5540 	}
5541 
5542 	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
5543 	if (!query_ctx)
5544 		return -ENOMEM;
5545 
5546 	mutex_lock(&esw->state_lock);
5547 	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
5548 					    MLX5_CAP_GENERAL_2);
5549 	if (err) {
5550 		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
5551 		goto out;
5552 	}
5553 
5554 	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
5555 	max_eqs = MLX5_GET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b);
5556 	if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
5557 		*max_io_eqs = 0;
5558 	else
5559 		*max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS;
5560 out:
5561 	mutex_unlock(&esw->state_lock);
5562 	kfree(query_ctx);
5563 	return err;
5564 }
5565 
5566 int
5567 mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
5568 				    struct netlink_ext_ack *extack)
5569 {
5570 	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
5571 	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
5572 	u16 vport_num = vport->vport;
5573 	struct mlx5_eswitch *esw;
5574 	void *query_ctx;
5575 	void *hca_caps;
5576 	u16 max_eqs;
5577 	int err;
5578 
5579 	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
5580 	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
5581 		NL_SET_ERR_MSG_MOD(extack,
5582 				   "Device doesn't support VHCA management");
5583 		return -EOPNOTSUPP;
5584 	}
5585 
5586 	if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) {
5587 		NL_SET_ERR_MSG_MOD(extack,
5588 				   "Device doesn't support changing the max number of EQs");
5589 		return -EOPNOTSUPP;
5590 	}
5591 
5592 	if (check_add_overflow(max_io_eqs, MLX5_ESW_MAX_CTRL_EQS, &max_eqs)) {
5593 		NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range");
5594 		return -EINVAL;
5595 	}
5596 
5597 	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
5598 	if (!query_ctx)
5599 		return -ENOMEM;
5600 
5601 	mutex_lock(&esw->state_lock);
5602 	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
5603 					    MLX5_CAP_GENERAL_2);
5604 	if (err) {
5605 		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
5606 		goto out;
5607 	}
5608 
5609 	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
5610 	MLX5_SET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b, max_eqs);
5611 
5612 	if (mlx5_esw_is_sf_vport(esw, vport_num))
5613 		MLX5_SET(cmd_hca_cap_2, hca_caps, sf_eq_usage, 1);
5614 
5615 	err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
5616 					    MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
5617 	if (err)
5618 		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
5619 	vport->max_eqs_set = true;
5620 out:
5621 	mutex_unlock(&esw->state_lock);
5622 	kfree(query_ctx);
5623 	return err;
5624 }
5625 
5626 int
5627 mlx5_devlink_port_fn_max_io_eqs_set_sf_default(struct devlink_port *port,
5628 					       struct netlink_ext_ack *extack)
5629 {
5630 	return mlx5_devlink_port_fn_max_io_eqs_set(port,
5631 						   MLX5_ESW_DEFAULT_SF_COMP_EQS,
5632 						   extack);
5633 }
5634