xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c (revision 7bcfb19465fca99efd09ecb5d3ef8f91179d7ff1)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3 
4 #include "lib/sd.h"
5 #include "../lag/lag.h"
6 #include "mlx5_core.h"
7 #include "lib/mlx5.h"
8 #include "devlink.h"
9 #include "eswitch.h"
10 #include "fs_cmd.h"
11 #include <linux/mlx5/eswitch.h>
12 #include <linux/mlx5/vport.h>
13 #include <linux/debugfs.h>
14 
15 #define sd_info(__dev, format, ...) \
16 	dev_info((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__)
17 #define sd_warn(__dev, format, ...) \
18 	dev_warn((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__)
19 
20 struct mlx5_sd {
21 	u32 group_id;
22 	u8 host_buses;
23 	struct mlx5_devcom_comp_dev *devcom;
24 	struct dentry *dfs;
25 	u8 state;
26 	bool primary;
27 	bool fw_silents_secondaries;
28 	union {
29 		struct { /* primary */
30 			struct mlx5_core_dev *secondaries[MLX5_SD_MAX_GROUP_SZ - 1];
31 			struct mlx5_flow_table *tx_ft;
32 			/* Next index for secondary registration */
33 			u8 next_secondary_idx;
34 		};
35 		struct { /* secondary */
36 			struct mlx5_core_dev *primary_dev;
37 			u32 alias_obj_id;
38 			/* TX flow table root in switchdev (silent) config */
39 			bool tx_root_silent;
40 		};
41 	};
42 };
43 
44 enum mlx5_sd_state {
45 	MLX5_SD_STATE_DOWN = 0,
46 	MLX5_SD_STATE_UP,
47 };
48 
49 static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
50 {
51 	struct mlx5_sd *sd = mlx5_get_sd(dev);
52 
53 	if (!sd)
54 		return 1;
55 
56 	return sd->host_buses;
57 }
58 
59 struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev)
60 {
61 	struct mlx5_sd *sd = mlx5_get_sd(dev);
62 
63 	if (!sd)
64 		return dev;
65 
66 	if (!mlx5_devcom_comp_is_ready(sd->devcom))
67 		return NULL;
68 
69 	return sd->primary ? dev : sd->primary_dev;
70 }
71 
72 struct mlx5_devcom_comp_dev *mlx5_sd_get_devcom(struct mlx5_core_dev *dev)
73 {
74 	struct mlx5_sd *sd = mlx5_get_sd(dev);
75 
76 	if (!sd)
77 		return NULL;
78 
79 	return sd->devcom;
80 }
81 
82 bool mlx5_sd_is_primary(struct mlx5_core_dev *dev)
83 {
84 	struct mlx5_sd *sd = mlx5_get_sd(dev);
85 
86 	if (!sd)
87 		return true;
88 
89 	return sd->primary;
90 }
91 
92 int mlx5_sd_pf_num_get(struct mlx5_core_dev *dev)
93 {
94 	struct mlx5_sd *sd = mlx5_get_sd(dev);
95 	int pf_num = mlx5_get_dev_index(dev);
96 	struct mlx5_core_dev *pos;
97 	int i;
98 
99 	if (!sd)
100 		return pf_num;
101 
102 	mlx5_devcom_comp_assert_locked(sd->devcom);
103 	if (!mlx5_devcom_comp_is_ready(sd->devcom))
104 		return -ENODEV;
105 
106 	mlx5_sd_for_each_dev(i, mlx5_sd_get_primary(dev), pos)
107 		if (pos == dev)
108 			break;
109 
110 	return pf_num * sd->host_buses + i;
111 }
112 
113 struct mlx5_core_dev *
114 mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx)
115 {
116 	struct mlx5_sd *sd;
117 
118 	if (idx == 0)
119 		return primary;
120 
121 	if (idx >= mlx5_sd_get_host_buses(primary))
122 		return NULL;
123 
124 	sd = mlx5_get_sd(primary);
125 	return sd->secondaries[idx - 1];
126 }
127 
128 int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix)
129 {
130 	if (is_mdev_switchdev_mode(dev))
131 		return 0;
132 
133 	return ch_ix % mlx5_sd_get_host_buses(dev);
134 }
135 
136 int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix)
137 {
138 	if (is_mdev_switchdev_mode(dev))
139 		return ch_ix;
140 
141 	return ch_ix / mlx5_sd_get_host_buses(dev);
142 }
143 
144 struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int ch_ix)
145 {
146 	int mdev_idx = mlx5_sd_ch_ix_get_dev_ix(primary, ch_ix);
147 
148 	return mlx5_sd_primary_get_peer(primary, mdev_idx);
149 }
150 
151 static bool ft_create_alias_supported(struct mlx5_core_dev *dev)
152 {
153 	u64 obj_allowed = MLX5_CAP_GEN_2_64(dev, allowed_object_for_other_vhca_access);
154 	u32 obj_supp = MLX5_CAP_GEN_2(dev, cross_vhca_object_to_object_supported);
155 
156 	if (!(obj_supp &
157 	    MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE))
158 		return false;
159 
160 	if (!(obj_allowed & MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE))
161 		return false;
162 
163 	return true;
164 }
165 
166 static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
167 			 u8 *host_buses)
168 {
169 	u32 out[MLX5_ST_SZ_DW(mpir_reg)];
170 	int err;
171 
172 	err = mlx5_query_mpir_reg(dev, out);
173 	if (err)
174 		return err;
175 
176 	*sdm = MLX5_GET(mpir_reg, out, sdm);
177 	*host_buses = MLX5_GET(mpir_reg, out, host_buses);
178 
179 	return 0;
180 }
181 
182 static u32 mlx5_sd_group_id(struct mlx5_core_dev *dev, u8 sd_group)
183 {
184 	return (u32)((MLX5_CAP_GEN(dev, native_port_num) << 8) | sd_group);
185 }
186 
187 static bool mlx5_sd_caps_supported(struct mlx5_core_dev *dev, u8 host_buses)
188 {
189 	/* Honor the SW implementation limit */
190 	if (host_buses > MLX5_SD_MAX_GROUP_SZ)
191 		return false;
192 
193 	/* Disconnect secondaries from the network */
194 	if (!MLX5_CAP_GEN(dev, eswitch_manager))
195 		return false;
196 	if (!MLX5_CAP_GEN(dev, silent_mode_set) &&
197 	    !MLX5_CAP_GEN(dev, silent_mode_query))
198 		return false;
199 
200 	/* RX steering from primary to secondaries */
201 	if (!MLX5_CAP_GEN(dev, cross_vhca_rqt))
202 		return false;
203 	if (host_buses > MLX5_CAP_GEN_2(dev, max_rqt_vhca_id))
204 		return false;
205 
206 	/* TX steering from secondaries to primary */
207 	if (!ft_create_alias_supported(dev))
208 		return false;
209 	if (!MLX5_CAP_FLOWTABLE_NIC_TX(dev, reset_root_to_default))
210 		return false;
211 
212 	return true;
213 }
214 
215 bool mlx5_sd_is_supported(struct mlx5_core_dev *dev)
216 {
217 	u8 host_buses, sd_group;
218 	bool sdm;
219 	int err;
220 
221 	/* Feature is currently implemented for PFs only */
222 	if (!mlx5_core_is_pf(dev))
223 		return false;
224 
225 	err = mlx5_query_nic_vport_sd_group(dev, &sd_group);
226 	if (err || !sd_group)
227 		return false;
228 
229 	if (!MLX5_CAP_MCAM_REG(dev, mpir))
230 		return false;
231 
232 	err = mlx5_query_sd(dev, &sdm, &host_buses);
233 	if (err || !sdm)
234 		return false;
235 
236 	return mlx5_sd_caps_supported(dev, host_buses);
237 }
238 
239 static int sd_init(struct mlx5_core_dev *dev)
240 {
241 	u8 host_buses, sd_group;
242 	struct mlx5_sd *sd;
243 	u32 group_id;
244 	bool sdm;
245 	int err;
246 
247 	/* Feature is currently implemented for PFs only */
248 	if (!mlx5_core_is_pf(dev))
249 		return 0;
250 
251 	err = mlx5_query_nic_vport_sd_group(dev, &sd_group);
252 	if (err)
253 		return err;
254 
255 	if (!sd_group)
256 		return 0;
257 
258 	if (!MLX5_CAP_MCAM_REG(dev, mpir))
259 		return 0;
260 
261 	err = mlx5_query_sd(dev, &sdm, &host_buses);
262 	if (err)
263 		return err;
264 
265 	if (!sdm)
266 		return 0;
267 
268 	group_id = mlx5_sd_group_id(dev, sd_group);
269 
270 	if (!mlx5_sd_caps_supported(dev, host_buses)) {
271 		sd_warn(dev, "can't support requested netdev combining for group id 0x%x, skipping\n",
272 			group_id);
273 		return 0;
274 	}
275 
276 	sd = kzalloc_obj(*sd);
277 	if (!sd)
278 		return -ENOMEM;
279 
280 	sd->host_buses = host_buses;
281 	sd->group_id = group_id;
282 
283 	mlx5_set_sd(dev, sd);
284 
285 	return 0;
286 }
287 
288 static void sd_cleanup(struct mlx5_core_dev *dev)
289 {
290 	struct mlx5_sd *sd = mlx5_get_sd(dev);
291 
292 	mlx5_set_sd(dev, NULL);
293 	kfree(sd);
294 }
295 
296 static int sd_lag_state_show(struct seq_file *file, void *priv)
297 {
298 	struct mlx5_core_dev *dev = file->private;
299 	struct mlx5_lag *ldev;
300 	struct lag_func *pf;
301 	bool active = false;
302 	int i;
303 
304 	ldev = mlx5_lag_dev(dev);
305 	if (!ldev)
306 		return -EINVAL;
307 
308 	mutex_lock(&ldev->lock);
309 	mlx5_ldev_for_each(i, 0, ldev) {
310 		pf = mlx5_lag_pf(ldev, i);
311 		if (pf->dev == dev) {
312 			active = pf->sd_fdb_active;
313 			break;
314 		}
315 	}
316 	mutex_unlock(&ldev->lock);
317 
318 	seq_printf(file, "%s\n", active ? "active" : "disabled");
319 	return 0;
320 }
321 
322 DEFINE_SHOW_ATTRIBUTE(sd_lag_state);
323 
324 /* SD LAG integration is optional. If LAG isn't available on this device
325  * (e.g. lag caps are off), or registering secondaries fails, just warn
326  * and continue - SD can operate without the LAG-side bookkeeping.
327  */
328 static void sd_lag_init(struct mlx5_core_dev *dev)
329 {
330 	struct mlx5_core_dev *primary = mlx5_sd_get_primary(dev);
331 	struct mlx5_sd *sd = mlx5_get_sd(primary);
332 	struct mlx5_core_dev *pos, *to;
333 	struct mlx5_lag *ldev;
334 	struct lag_func *pf;
335 	int err;
336 	int i;
337 
338 	ldev = mlx5_lag_dev(primary);
339 	if (!ldev) {
340 		sd_warn(primary, "%s: no ldev (LAG caps off?), skipping\n",
341 			__func__);
342 		return;
343 	}
344 
345 	mutex_lock(&ldev->lock);
346 	pf = mlx5_lag_pf_by_dev(ldev, primary);
347 	if (!pf) {
348 		sd_warn(primary, "%s: primary not registered in ldev, skipping\n",
349 			__func__);
350 		goto out;
351 	}
352 
353 	pf->group_id = sd->group_id;
354 
355 	mlx5_sd_for_each_secondary(i, primary, pos) {
356 		err = mlx5_ldev_add_mdev(ldev, pos, sd->group_id);
357 		if (err) {
358 			sd_warn(primary, "%s: failed to add secondary %s to ldev: %d\n",
359 				__func__, dev_name(pos->device), err);
360 			goto err;
361 		}
362 	}
363 
364 out:
365 	mutex_unlock(&ldev->lock);
366 	return;
367 
368 err:
369 	to = pos;
370 	mlx5_sd_for_each_secondary_to(i, primary, to, pos)
371 		mlx5_ldev_remove_mdev(ldev, pos);
372 	pf->group_id = 0;
373 	mutex_unlock(&ldev->lock);
374 }
375 
376 static void sd_lag_cleanup(struct mlx5_core_dev *dev)
377 {
378 	struct mlx5_core_dev *primary = mlx5_sd_get_primary(dev);
379 	struct mlx5_core_dev *pos;
380 	struct mlx5_lag *ldev;
381 	struct lag_func *pf;
382 	int i;
383 
384 	ldev = mlx5_lag_dev(primary);
385 	if (!ldev)
386 		return;
387 
388 	mutex_lock(&ldev->lock);
389 	mlx5_sd_for_each_secondary(i, primary, pos)
390 		mlx5_ldev_remove_mdev(ldev, pos);
391 
392 	pf = mlx5_lag_pf_by_dev(ldev, primary);
393 	if (pf)
394 		pf->group_id = 0;
395 	mutex_unlock(&ldev->lock);
396 }
397 
398 enum {
399 	SD_PRIMARY_SET,
400 	SD_SECONDARIES_SET,
401 	SD_FW_SILENT_CHECK,
402 };
403 
404 static int sd_handle_fw_silent_check(struct mlx5_core_dev *dev,
405 				     struct mlx5_core_dev *peer)
406 {
407 	struct mlx5_sd *peer_sd = mlx5_get_sd(peer);
408 	struct mlx5_sd *sd = mlx5_get_sd(dev);
409 	u8 dev_silent = 0, peer_silent = 0;
410 	int err;
411 
412 	if (peer_sd->fw_silents_secondaries) {
413 		sd->fw_silents_secondaries = true;
414 		return 0;
415 	}
416 
417 	err = mlx5_fs_cmd_query_l2table_silent(dev, &dev_silent);
418 	if (err) {
419 		sd_warn(dev, "Failed to query silent mode for dev: %d\n", err);
420 		return err;
421 	}
422 
423 	err = mlx5_fs_cmd_query_l2table_silent(peer, &peer_silent);
424 	if (err) {
425 		sd_warn(dev, "Failed to query silent mode for peer: %d\n", err);
426 		return err;
427 	}
428 
429 	if (dev_silent || peer_silent) {
430 		sd->fw_silents_secondaries = true;
431 		peer_sd->fw_silents_secondaries = true;
432 		sd_info(dev, "FW indicates at least one device is silent\n");
433 	}
434 	return 0;
435 }
436 
437 static int sd_handle_primary_set(struct mlx5_core_dev *dev,
438 				 struct mlx5_core_dev *peer)
439 {
440 	struct mlx5_sd *peer_sd = mlx5_get_sd(peer);
441 	struct mlx5_sd *sd = mlx5_get_sd(dev);
442 	struct mlx5_core_dev *candidate;
443 	struct mlx5_sd *candidate_sd;
444 	bool dev_should_be_primary;
445 
446 	/* Peer is the device that being sent to all the other devices in the
447 	 * group. Hence, use peer to get the candidate device.
448 	 */
449 	candidate = peer_sd->primary ? peer : peer_sd->primary_dev;
450 
451 	if (sd->fw_silents_secondaries) {
452 		u8 candidate_silent = 0;
453 		int err;
454 
455 		err = mlx5_fs_cmd_query_l2table_silent(candidate,
456 						       &candidate_silent);
457 		if (err) {
458 			sd_warn(candidate, "Failed to query silent mode for dev: %d\n",
459 				err);
460 			return err;
461 		}
462 		/* Candidate is silent, dev should be primary */
463 		dev_should_be_primary = candidate_silent;
464 	} else {
465 		/* No FW silent mode, use bus number */
466 		dev_should_be_primary =
467 			dev->pdev->bus->number < candidate->pdev->bus->number;
468 	}
469 
470 	if (!dev_should_be_primary)
471 		return 0;
472 
473 	candidate_sd = mlx5_get_sd(candidate);
474 
475 	sd->primary = true;
476 	candidate_sd->primary = false;
477 	candidate_sd->primary_dev = dev;
478 	peer_sd->primary = false;
479 	peer_sd->primary_dev = dev;
480 	return 0;
481 }
482 
483 static void sd_handle_secondaries_set(struct mlx5_core_dev *dev,
484 				      struct mlx5_core_dev *peer)
485 {
486 	struct mlx5_sd *peer_sd = mlx5_get_sd(peer);
487 	struct mlx5_sd *sd = mlx5_get_sd(dev);
488 	u8 idx;
489 
490 	/* Primary has nothing to register with itself. */
491 	if (sd->primary)
492 		return;
493 
494 	/* dev is a secondary device, peer is the primary device.
495 	 * Secondary registers itself with the primary.
496 	 */
497 	idx = peer_sd->next_secondary_idx++;
498 	peer_sd->secondaries[idx] = dev;
499 	sd->primary_dev = peer;
500 }
501 
502 static int mlx5_sd_devcom_event(int event, void *my_data, void *event_data)
503 {
504 	struct mlx5_core_dev *peer = event_data;
505 	struct mlx5_core_dev *dev = my_data;
506 
507 	switch (event) {
508 	case SD_FW_SILENT_CHECK:
509 		return sd_handle_fw_silent_check(dev, peer);
510 	case SD_PRIMARY_SET:
511 		return sd_handle_primary_set(dev, peer);
512 	case SD_SECONDARIES_SET:
513 		sd_handle_secondaries_set(dev, peer);
514 		return 0;
515 	}
516 
517 	return 0;
518 }
519 
520 static int sd_register(struct mlx5_core_dev *dev)
521 {
522 	struct mlx5_devcom_match_attr attr = {};
523 	struct mlx5_devcom_comp_dev *devcom;
524 	struct mlx5_core_dev *primary;
525 	struct mlx5_sd *primary_sd;
526 	struct mlx5_sd *sd;
527 	int err;
528 
529 	sd = mlx5_get_sd(dev);
530 	attr.key.val = sd->group_id;
531 	attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS;
532 	attr.net = mlx5_core_net(dev);
533 	devcom = mlx5_devcom_register_component(dev->priv.devc,
534 						MLX5_DEVCOM_SD_GROUP,
535 						&attr, mlx5_sd_devcom_event,
536 						dev);
537 	if (!devcom)
538 		return -EINVAL;
539 
540 	sd->devcom = devcom;
541 
542 	mlx5_devcom_comp_lock(devcom);
543 	if (mlx5_devcom_comp_get_size(devcom) != sd->host_buses ||
544 	    mlx5_devcom_comp_is_ready(devcom))
545 		goto out;
546 
547 	/* If silent mode query is supported, ask each device whether it is
548 	 * silent and propagate the result to the whole group. In each group
549 	 * only one device is not silent
550 	 */
551 	if (MLX5_CAP_GEN(dev, silent_mode_query)) {
552 		err = mlx5_devcom_locked_send_event(devcom, SD_FW_SILENT_CHECK,
553 						    SD_FW_SILENT_CHECK, dev);
554 		if (err)
555 			goto err_devcom_unreg;
556 	}
557 
558 	/* Send SD_PRIMARY_SET event with this device.
559 	 * All peers will receive this event and compare to this device.
560 	 * If fw_silents_secondaries is set, choose non-silent device.
561 	 * Otherwise use bus number.
562 	 */
563 	sd->primary = true;
564 	err = mlx5_devcom_locked_send_event(devcom, SD_PRIMARY_SET,
565 					    SD_PRIMARY_SET, dev);
566 	if (err)
567 		goto err_devcom_unreg;
568 
569 	/* Broadcast SD_SECONDARIES_SET. Each non-sender peer's handler runs;
570 	 * the primary's handler returns early so only secondaries register.
571 	 */
572 	primary = sd->primary ? dev : sd->primary_dev;
573 	if (!sd->primary)
574 		sd_handle_secondaries_set(dev, primary);
575 	mlx5_devcom_locked_send_event(devcom, SD_SECONDARIES_SET,
576 				      DEVCOM_CANT_FAIL, primary);
577 
578 	primary_sd = mlx5_get_sd(primary);
579 	if (primary_sd->next_secondary_idx + 1 == sd->host_buses)
580 		mlx5_devcom_comp_set_ready(devcom, true);
581 out:
582 	mlx5_devcom_comp_unlock(devcom);
583 	return 0;
584 
585 err_devcom_unreg:
586 	mlx5_devcom_comp_unlock(devcom);
587 	mlx5_devcom_unregister_component(devcom);
588 	return err;
589 }
590 
591 static void sd_unregister(struct mlx5_core_dev *dev)
592 {
593 	struct mlx5_sd *sd = mlx5_get_sd(dev);
594 
595 	mlx5_devcom_unregister_component(sd->devcom);
596 }
597 
598 static int sd_cmd_set_primary(struct mlx5_core_dev *primary, u8 *alias_key)
599 {
600 	struct mlx5_cmd_allow_other_vhca_access_attr allow_attr = {};
601 	struct mlx5_sd *sd = mlx5_get_sd(primary);
602 	struct mlx5_flow_table_attr ft_attr = {};
603 	struct mlx5_flow_namespace *nic_ns;
604 	struct mlx5_flow_table *ft;
605 	int err;
606 
607 	nic_ns = mlx5_get_flow_namespace(primary, MLX5_FLOW_NAMESPACE_EGRESS);
608 	if (!nic_ns)
609 		return -EOPNOTSUPP;
610 
611 	ft = mlx5_create_flow_table(nic_ns, &ft_attr);
612 	if (IS_ERR(ft)) {
613 		err = PTR_ERR(ft);
614 		return err;
615 	}
616 	sd->tx_ft = ft;
617 	memcpy(allow_attr.access_key, alias_key, ACCESS_KEY_LEN);
618 	allow_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS;
619 	allow_attr.obj_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id;
620 
621 	err = mlx5_cmd_allow_other_vhca_access(primary, &allow_attr);
622 	if (err) {
623 		mlx5_core_err(primary, "Failed to allow other vhca access err=%d\n",
624 			      err);
625 		mlx5_destroy_flow_table(ft);
626 		return err;
627 	}
628 
629 	return 0;
630 }
631 
632 static void sd_cmd_unset_primary(struct mlx5_core_dev *primary)
633 {
634 	struct mlx5_sd *sd = mlx5_get_sd(primary);
635 
636 	mlx5_destroy_flow_table(sd->tx_ft);
637 }
638 
639 static int sd_secondary_create_alias_ft(struct mlx5_core_dev *secondary,
640 					struct mlx5_core_dev *primary,
641 					struct mlx5_flow_table *ft,
642 					u32 *obj_id, u8 *alias_key)
643 {
644 	u32 aliased_object_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id;
645 	u16 vhca_id_to_be_accessed = MLX5_CAP_GEN(primary, vhca_id);
646 	struct mlx5_cmd_alias_obj_create_attr alias_attr = {};
647 	int ret;
648 
649 	memcpy(alias_attr.access_key, alias_key, ACCESS_KEY_LEN);
650 	alias_attr.obj_id = aliased_object_id;
651 	alias_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS;
652 	alias_attr.vhca_id = vhca_id_to_be_accessed;
653 	ret = mlx5_cmd_alias_obj_create(secondary, &alias_attr, obj_id);
654 	if (ret) {
655 		mlx5_core_err(secondary, "Failed to create alias object err=%d\n",
656 			      ret);
657 		return ret;
658 	}
659 
660 	return 0;
661 }
662 
663 static void sd_secondary_destroy_alias_ft(struct mlx5_core_dev *secondary)
664 {
665 	struct mlx5_sd *sd = mlx5_get_sd(secondary);
666 
667 	mlx5_cmd_alias_obj_destroy(secondary, sd->alias_obj_id,
668 				   MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS);
669 }
670 
671 static int mlx5_sd_secondary_conf_tx_root(struct mlx5_core_dev *secondary,
672 					  bool disconnect)
673 {
674 	struct mlx5_sd *sd = mlx5_get_sd(secondary);
675 	int err;
676 
677 	/* Idempotent: skip if TX root is already in the requested state. */
678 	if (sd->tx_root_silent == disconnect)
679 		return 0;
680 
681 	if (disconnect)
682 		err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true);
683 	else
684 		err = mlx5_fs_cmd_set_tx_flow_table_root(secondary,
685 							 sd->alias_obj_id,
686 							 false);
687 	if (err)
688 		return err;
689 
690 	sd->tx_root_silent = disconnect;
691 	return 0;
692 }
693 
694 static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary,
695 				struct mlx5_core_dev *primary,
696 				u8 *alias_key)
697 {
698 	struct mlx5_sd *primary_sd = mlx5_get_sd(primary);
699 	struct mlx5_sd *sd = mlx5_get_sd(secondary);
700 	int err;
701 
702 	if (!primary_sd->fw_silents_secondaries) {
703 		err = mlx5_fs_cmd_set_l2table_entry_silent(secondary, 1);
704 		if (err)
705 			return err;
706 	}
707 
708 	err = sd_secondary_create_alias_ft(secondary, primary, primary_sd->tx_ft,
709 					   &sd->alias_obj_id, alias_key);
710 	if (err)
711 		goto err_unset_silent;
712 
713 	err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, sd->alias_obj_id,
714 						 false);
715 	if (err)
716 		goto err_destroy_alias_ft;
717 	sd->tx_root_silent = false;
718 
719 	return 0;
720 
721 err_destroy_alias_ft:
722 	sd_secondary_destroy_alias_ft(secondary);
723 err_unset_silent:
724 	if (!primary_sd->fw_silents_secondaries)
725 		mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
726 	return err;
727 }
728 
729 static void sd_cmd_unset_secondary(struct mlx5_core_dev *secondary)
730 {
731 	struct mlx5_sd *primary_sd;
732 
733 	primary_sd = mlx5_get_sd(mlx5_sd_get_primary(secondary));
734 	mlx5_sd_secondary_conf_tx_root(secondary, true);
735 	sd_secondary_destroy_alias_ft(secondary);
736 	if (!primary_sd->fw_silents_secondaries)
737 		mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
738 }
739 
740 static void sd_print_group(struct mlx5_core_dev *primary)
741 {
742 	struct mlx5_sd *sd = mlx5_get_sd(primary);
743 	struct mlx5_core_dev *pos;
744 	int i;
745 
746 	sd_info(primary, "group id %#x, primary %s, vhca %#x\n",
747 		sd->group_id, pci_name(primary->pdev),
748 		MLX5_CAP_GEN(primary, vhca_id));
749 	mlx5_sd_for_each_secondary(i, primary, pos)
750 		sd_info(primary, "group id %#x, secondary_%d %s, vhca %#x\n",
751 			sd->group_id, i - 1, pci_name(pos->pdev),
752 			MLX5_CAP_GEN(pos, vhca_id));
753 }
754 
755 static ssize_t dev_read(struct file *filp, char __user *buf, size_t count,
756 			loff_t *pos)
757 {
758 	struct mlx5_core_dev *dev;
759 	char tbuf[32];
760 	int ret;
761 
762 	dev = filp->private_data;
763 	ret = snprintf(tbuf, sizeof(tbuf), "%s vhca %#x\n", pci_name(dev->pdev),
764 		       MLX5_CAP_GEN(dev, vhca_id));
765 
766 	return simple_read_from_buffer(buf, count, pos, tbuf, ret);
767 }
768 
769 static const struct file_operations dev_fops = {
770 	.owner	= THIS_MODULE,
771 	.open	= simple_open,
772 	.read	= dev_read,
773 };
774 
775 int mlx5_sd_init(struct mlx5_core_dev *dev)
776 {
777 	struct mlx5_core_dev *primary, *pos, *to;
778 	struct mlx5_sd *sd = mlx5_get_sd(dev);
779 	u8 alias_key[ACCESS_KEY_LEN];
780 	struct mlx5_sd *primary_sd;
781 	int err, i;
782 
783 	err = sd_init(dev);
784 	if (err)
785 		return err;
786 
787 	sd = mlx5_get_sd(dev);
788 	if (!sd)
789 		return 0;
790 
791 	err = sd_register(dev);
792 	if (err)
793 		goto err_sd_cleanup;
794 
795 	mlx5_devcom_comp_lock(sd->devcom);
796 	if (!mlx5_devcom_comp_is_ready(sd->devcom))
797 		goto out;
798 
799 	primary = mlx5_sd_get_primary(dev);
800 	if (!primary)
801 		goto out;
802 
803 	primary_sd = mlx5_get_sd(primary);
804 	if (primary_sd->state != MLX5_SD_STATE_DOWN)
805 		goto out;
806 
807 	for (i = 0; i < ACCESS_KEY_LEN; i++)
808 		alias_key[i] = get_random_u8();
809 
810 	err = sd_cmd_set_primary(primary, alias_key);
811 	if (err)
812 		goto err_sd_unregister;
813 
814 	mlx5_sd_for_each_secondary(i, primary, pos) {
815 		err = sd_cmd_set_secondary(pos, primary, alias_key);
816 		if (err)
817 			goto err_unset_secondaries;
818 	}
819 
820 	sd_lag_init(primary);
821 
822 	primary_sd->dfs =
823 		debugfs_create_dir("multi-pf",
824 				   mlx5_debugfs_get_dev_root(primary));
825 	mlx5_sd_for_each_secondary(i, primary, pos) {
826 		char name[32];
827 
828 		snprintf(name, sizeof(name), "secondary_%d", i - 1);
829 		debugfs_create_file(name, 0400, primary_sd->dfs, pos,
830 				    &dev_fops);
831 	}
832 
833 	debugfs_create_file("sd_lag_state", 0400, primary_sd->dfs, primary,
834 			    &sd_lag_state_fops);
835 	debugfs_create_x32("group_id", 0400, primary_sd->dfs,
836 			   &primary_sd->group_id);
837 	debugfs_create_file("primary", 0400, primary_sd->dfs, primary,
838 			    &dev_fops);
839 
840 	sd_info(primary, "group id %#x, size %d, combined\n",
841 		sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
842 	sd_print_group(primary);
843 
844 	primary_sd->state = MLX5_SD_STATE_UP;
845 out:
846 	mlx5_devcom_comp_unlock(sd->devcom);
847 	return 0;
848 
849 err_unset_secondaries:
850 	to = pos;
851 	mlx5_sd_for_each_secondary_to(i, primary, to, pos)
852 		sd_cmd_unset_secondary(pos);
853 	sd_cmd_unset_primary(primary);
854 err_sd_unregister:
855 	mlx5_sd_for_each_secondary(i, primary, pos) {
856 		struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
857 
858 		primary_sd->secondaries[i - 1] = NULL;
859 		peer_sd->primary_dev = NULL;
860 	}
861 	primary_sd->primary = false;
862 	primary_sd->next_secondary_idx = 0;
863 	mlx5_devcom_comp_set_ready(sd->devcom, false);
864 	mlx5_devcom_comp_unlock(sd->devcom);
865 	sd_unregister(dev);
866 err_sd_cleanup:
867 	sd_cleanup(dev);
868 	return err;
869 }
870 
871 void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
872 {
873 	struct mlx5_sd *sd = mlx5_get_sd(dev);
874 	struct mlx5_core_dev *primary, *pos;
875 	struct mlx5_sd *primary_sd;
876 	int i;
877 
878 	if (!sd)
879 		return;
880 
881 	mlx5_devcom_comp_lock(sd->devcom);
882 	if (!mlx5_devcom_comp_is_ready(sd->devcom))
883 		goto out_unlock;
884 
885 	primary = mlx5_sd_get_primary(dev);
886 	if (!primary)
887 		goto out_ready_false;
888 
889 	primary_sd = mlx5_get_sd(primary);
890 	if (primary_sd->state != MLX5_SD_STATE_UP)
891 		goto out_clear_peers;
892 
893 	debugfs_remove_recursive(primary_sd->dfs);
894 	primary_sd->dfs = NULL;
895 	sd_lag_cleanup(primary);
896 	mlx5_sd_for_each_secondary(i, primary, pos)
897 		sd_cmd_unset_secondary(pos);
898 	sd_cmd_unset_primary(primary);
899 
900 	sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
901 	primary_sd->state = MLX5_SD_STATE_DOWN;
902 out_clear_peers:
903 	mlx5_sd_for_each_secondary(i, primary, pos) {
904 		struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
905 
906 		primary_sd->secondaries[i - 1] = NULL;
907 		peer_sd->primary_dev = NULL;
908 	}
909 	primary_sd->primary = false;
910 	primary_sd->next_secondary_idx = 0;
911 out_ready_false:
912 	mlx5_devcom_comp_set_ready(sd->devcom, false);
913 out_unlock:
914 	mlx5_devcom_comp_unlock(sd->devcom);
915 	sd_unregister(dev);
916 	sd_cleanup(dev);
917 }
918 
919 /* Lock order:
920  *   primary:   actual_adev_lock -> SD devcom comp lock
921  *   secondary: SD devcom comp lock -> (drop) -> actual_adev_lock
922  * The two locks are never held together, so no ABBA.
923  */
924 struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
925 					  struct auxiliary_device *adev,
926 					  int idx)
927 {
928 	struct mlx5_sd *sd = mlx5_get_sd(dev);
929 	struct mlx5_core_dev *primary;
930 	struct mlx5_adev *primary_adev;
931 
932 	if (!sd)
933 		return adev;
934 
935 	mlx5_devcom_comp_lock(sd->devcom);
936 	if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
937 		mlx5_devcom_comp_unlock(sd->devcom);
938 		return NULL;
939 	}
940 
941 	primary = mlx5_sd_get_primary(dev);
942 	if (!primary || dev == primary) {
943 		mlx5_devcom_comp_unlock(sd->devcom);
944 		return adev;
945 	}
946 
947 	primary_adev = primary->priv.adev[idx];
948 	get_device(&primary_adev->adev.dev);
949 	mlx5_devcom_comp_unlock(sd->devcom);
950 
951 	device_lock(&primary_adev->adev.dev);
952 	/* Primary may have completed remove between dropping devcom and
953 	 * acquiring device_lock; recheck.
954 	 */
955 	if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
956 		device_unlock(&primary_adev->adev.dev);
957 		put_device(&primary_adev->adev.dev);
958 		return NULL;
959 	}
960 	return &primary_adev->adev;
961 }
962 
963 #ifdef CONFIG_MLX5_ESWITCH
964 /* All SD members must have completed esw_offloads_enable (i.e., reached
965  * mlx5_esw_offloads_devcom_init) and become eswitch-peers of the primary.
966  * Until then, mlx5_eswitch_is_peer() returns false for the not-yet-paired
967  * member and shared_fdb_supported_filter would reject. When all PFs transition
968  * in parallel, only the last one to finish satisfies this gate; the earlier
969  * ones return 0 silently here.
970  */
971 static bool mlx5_sd_all_paired(struct mlx5_core_dev *primary)
972 {
973 	struct mlx5_eswitch *primary_esw = primary->priv.eswitch;
974 	struct mlx5_core_dev *pos;
975 	int i;
976 
977 	mlx5_sd_for_each_secondary(i, primary, pos) {
978 		if (!mlx5_eswitch_is_peer(primary_esw, pos->priv.eswitch))
979 			return false;
980 	}
981 	return true;
982 }
983 
984 static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
985 {
986 	struct mlx5_sd *sd = mlx5_get_sd(primary);
987 	struct mlx5_core_dev *pos;
988 	struct mlx5_lag *ldev;
989 	struct lag_func *pf;
990 	int err;
991 	int i;
992 
993 	ldev = mlx5_lag_dev(primary);
994 	if (!ldev) {
995 		sd_warn(primary, "Shared FDB MUST have ldev\n");
996 		return;
997 	}
998 
999 	mutex_lock(&ldev->lock);
1000 
1001 	if (ldev->mode_changes_in_progress)
1002 		goto unlock;
1003 
1004 	if (!mlx5_sd_all_paired(primary))
1005 		goto unlock;
1006 
1007 	/* Check if SD FDB is already active for this group */
1008 	mlx5_lag_for_each(i, 0, ldev, sd->group_id) {
1009 		pf = mlx5_lag_pf(ldev, i);
1010 		if (pf->sd_fdb_active)
1011 			goto unlock;
1012 		break;
1013 	}
1014 
1015 	if (!mlx5_lag_shared_fdb_supported_filter(ldev, sd->group_id)) {
1016 		sd_warn(primary, "Shared FDB not supported\n");
1017 		goto unlock;
1018 	}
1019 
1020 	/* Initialize vport metadata for all group devices. This is deferred
1021 	 * from esw_offloads_enable() because mlx5_sd_pf_num_get() requires
1022 	 * the SD group to be ready.
1023 	 */
1024 	mlx5_sd_for_each_dev(i, primary, pos) {
1025 		struct mlx5_eswitch *esw = pos->priv.eswitch;
1026 
1027 		err = mlx5_esw_offloads_init_deferred_metadata(esw);
1028 		if (err) {
1029 			sd_warn(primary, "Failed to init metadata for %s: %d\n",
1030 				dev_name(pos->device), err);
1031 			goto unlock;
1032 		}
1033 	}
1034 
1035 	err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, sd->group_id);
1036 	if (err)
1037 		sd_warn(primary, "Failed to create shared FDB: %d\n", err);
1038 	else
1039 		sd_info(primary, "Shared FDB created\n");
1040 
1041 unlock:
1042 	mutex_unlock(&ldev->lock);
1043 }
1044 
1045 void mlx5_sd_eswitch_mode_set(struct mlx5_core_dev *dev, u16 mlx5_mode)
1046 {
1047 	struct mlx5_core_dev *primary;
1048 	struct mlx5_sd *sd;
1049 	int err;
1050 
1051 	sd = mlx5_get_sd(dev);
1052 	if (!sd || !mlx5_devcom_comp_is_ready(sd->devcom))
1053 		return;
1054 
1055 	mlx5_devcom_comp_lock(sd->devcom);
1056 	if (!mlx5_devcom_comp_is_ready(sd->devcom))
1057 		goto unlock;
1058 
1059 	primary = mlx5_sd_get_primary(dev);
1060 
1061 	/* Secondary devices need TX root reconfiguration */
1062 	if (dev != primary) {
1063 		bool disconnect = (mlx5_mode == MLX5_ESWITCH_OFFLOADS);
1064 
1065 		err = mlx5_sd_secondary_conf_tx_root(dev, disconnect);
1066 		if (err) {
1067 			sd_warn(dev, "Failed to set TX root: %d\n", err);
1068 			goto unlock;
1069 		}
1070 	}
1071 
1072 	/* Try to activate shared FDB when all devices are in switchdev.
1073 	 * Shared FDB is optional - failure here doesn't fail the transition.
1074 	 */
1075 	if (mlx5_mode == MLX5_ESWITCH_OFFLOADS)
1076 		mlx5_sd_activate_shared_fdb(primary);
1077 
1078 unlock:
1079 	mlx5_devcom_comp_unlock(sd->devcom);
1080 }
1081 
1082 #endif /* CONFIG_MLX5_ESWITCH */
1083 
1084 void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
1085 		      struct auxiliary_device *adev)
1086 {
1087 	if (actual_adev != adev) {
1088 		device_unlock(&actual_adev->dev);
1089 		put_device(&actual_adev->dev);
1090 	}
1091 }
1092