xref: /linux/drivers/infiniband/hw/mlx5/ib_rep.c (revision 53597deca0e38c30e6cd4ba2114fa42d2bcd85bb)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4  */
5 
6 #include <linux/mlx5/lag.h>
7 #include <linux/mlx5/vport.h>
8 #include "ib_rep.h"
9 #include "srq.h"
10 
11 static int
12 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
13 		      struct mlx5_core_dev *rep_dev,
14 		      struct mlx5_eswitch_rep *rep,
15 		      int vport_index)
16 {
17 	struct mlx5_ib_dev *ibdev;
18 	struct net_device *ndev;
19 	int ret;
20 
21 	ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
22 	if (!ibdev)
23 		return -EINVAL;
24 
25 	ibdev->port[vport_index].rep = rep;
26 	rep->rep_data[REP_IB].priv = ibdev;
27 	ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
28 
29 	ret = ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
30 	if (ret)
31 		return ret;
32 
33 	/* Only Vports that are not native to the LAG master eswitch need to add
34 	 * demux rule.
35 	 */
36 	if (mlx5_eswitch_get_total_vports(dev) > vport_index)
37 		return 0;
38 
39 	return mlx5_lag_demux_rule_add(rep_dev, rep->vport, vport_index);
40 }
41 
42 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
43 
44 static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
45 {
46 	struct mlx5_core_dev *peer_dev;
47 	int i;
48 
49 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
50 		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
51 
52 		if (mlx5_lag_is_mpesw(peer_dev))
53 			*num_ports += peer_num_ports;
54 		else
55 			/* Only 1 ib port is the representor for all uplinks */
56 			*num_ports += peer_num_ports - 1;
57 	}
58 }
59 
60 static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
61 					struct mlx5_core_dev *new_owner)
62 {
63 	int ret;
64 
65 	if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
66 	    !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
67 		return 0;
68 
69 	if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
70 	    !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
71 		return 0;
72 
73 	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
74 				   FS_FT_RDMA_TRANSPORT_TX);
75 	if (ret)
76 		return ret;
77 
78 	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
79 				   FS_FT_RDMA_TRANSPORT_RX);
80 	if (ret) {
81 		mlx5_fs_set_root_dev(cur_owner, cur_owner,
82 				     FS_FT_RDMA_TRANSPORT_TX);
83 		return ret;
84 	}
85 
86 	return 0;
87 }
88 
89 static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
90 {
91 	struct mlx5_core_dev *peer_dev;
92 	int i, ret;
93 
94 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
95 		ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
96 		WARN_ON_ONCE(ret);
97 	}
98 }
99 
100 static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
101 {
102 	struct mlx5_core_dev *peer_dev;
103 	int ret;
104 	int i;
105 
106 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
107 		ret = mlx5_ib_set_owner_transport(peer_dev, dev);
108 		if (ret) {
109 			mlx5_ib_release_transport(dev);
110 			return ret;
111 		}
112 	}
113 
114 	return 0;
115 }
116 
117 static int
118 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
119 {
120 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
121 	struct mlx5_core_dev *lag_master = dev;
122 	const struct mlx5_ib_profile *profile;
123 	struct mlx5_core_dev *peer_dev;
124 	struct mlx5_ib_dev *ibdev;
125 	int new_uplink = false;
126 	int vport_index;
127 	int ret;
128 	int i;
129 
130 	vport_index = rep->vport_index;
131 
132 	if (mlx5_lag_is_shared_fdb(dev)) {
133 		if (mlx5_lag_is_master(dev)) {
134 			mlx5_ib_num_ports_update(dev, &num_ports);
135 		} else {
136 			if (rep->vport == MLX5_VPORT_UPLINK) {
137 				if (!mlx5_lag_is_mpesw(dev))
138 					return 0;
139 				new_uplink = true;
140 			}
141 			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
142 				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
143 
144 				if (mlx5_lag_is_master(peer_dev))
145 					lag_master = peer_dev;
146 				else if (!mlx5_lag_is_mpesw(peer_dev))
147 				/* Only 1 ib port is the representor for all uplinks */
148 					peer_n_ports--;
149 
150 				if (mlx5_lag_get_dev_seq(peer_dev) <
151 				    mlx5_lag_get_dev_seq(dev))
152 					vport_index += peer_n_ports;
153 			}
154 		}
155 	}
156 
157 	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
158 		profile = &raw_eth_profile;
159 	else
160 		return mlx5_ib_set_vport_rep(lag_master, dev, rep, vport_index);
161 
162 	if (mlx5_lag_is_shared_fdb(dev)) {
163 		ret = mlx5_ib_take_transport(lag_master);
164 		if (ret)
165 			return ret;
166 	}
167 
168 	ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
169 					 mlx5_core_net(lag_master));
170 	if (!ibdev) {
171 		ret = -ENOMEM;
172 		goto release_transport;
173 	}
174 
175 	ibdev->port = kzalloc_objs(*ibdev->port, num_ports);
176 	if (!ibdev->port) {
177 		ret = -ENOMEM;
178 		goto fail_port;
179 	}
180 
181 	ibdev->is_rep = true;
182 	vport_index = rep->vport_index;
183 	ibdev->port[vport_index].rep = rep;
184 	ibdev->mdev = lag_master;
185 	ibdev->num_ports = num_ports;
186 	ibdev->ib_dev.phys_port_cnt = num_ports;
187 	ret = ib_device_set_netdev(&ibdev->ib_dev,
188 			mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
189 					       rep->vport),
190 			vport_index + 1);
191 	if (ret)
192 		goto fail_add;
193 
194 	ret = __mlx5_ib_add(ibdev, profile);
195 	if (ret)
196 		goto fail_add;
197 
198 	rep->rep_data[REP_IB].priv = ibdev;
199 	if (mlx5_lag_is_shared_fdb(lag_master))
200 		mlx5_ib_register_peer_vport_reps(lag_master);
201 
202 	return 0;
203 
204 fail_add:
205 	kfree(ibdev->port);
206 fail_port:
207 	ib_dealloc_device(&ibdev->ib_dev);
208 release_transport:
209 	if (mlx5_lag_is_shared_fdb(lag_master))
210 		mlx5_ib_release_transport(lag_master);
211 
212 	return ret;
213 }
214 
215 static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
216 {
217 	return rep->rep_data[REP_IB].priv;
218 }
219 
220 static void
221 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
222 {
223 	struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
224 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
225 	int vport_index = rep->vport_index;
226 	struct mlx5_ib_port *port;
227 	int i;
228 
229 	if (WARN_ON(!mdev))
230 		return;
231 
232 	if (!dev)
233 		return;
234 
235 	if (mlx5_lag_is_shared_fdb(mdev) &&
236 	    !mlx5_lag_is_master(mdev)) {
237 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
238 			return;
239 		for (i = 0; i < dev->num_ports; i++) {
240 			if (dev->port[i].rep == rep)
241 				break;
242 		}
243 		if (WARN_ON(i == dev->num_ports))
244 			return;
245 		vport_index = i;
246 	}
247 
248 	mlx5_lag_demux_rule_del(mdev, vport_index);
249 
250 	port = &dev->port[vport_index];
251 
252 	ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
253 	rep->rep_data[REP_IB].priv = NULL;
254 	port->rep = NULL;
255 
256 	if (rep->vport == MLX5_VPORT_UPLINK) {
257 
258 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
259 			return;
260 
261 		if (mlx5_lag_is_shared_fdb(mdev)) {
262 			struct mlx5_core_dev *peer_mdev;
263 			struct mlx5_eswitch *esw;
264 
265 			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
266 				esw = peer_mdev->priv.eswitch;
267 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
268 			}
269 			mlx5_ib_release_transport(mdev);
270 		}
271 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
272 	}
273 }
274 
275 static const struct mlx5_eswitch_rep_ops rep_ops = {
276 	.load = mlx5_ib_vport_rep_load,
277 	.unload = mlx5_ib_vport_rep_unload,
278 	.get_proto_dev = mlx5_ib_rep_to_dev,
279 };
280 
281 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
282 {
283 	struct mlx5_core_dev *peer_mdev;
284 	struct mlx5_eswitch *esw;
285 	int i;
286 
287 	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
288 		esw = peer_mdev->priv.eswitch;
289 		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
290 	}
291 }
292 
293 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
294 					  u16 vport_num)
295 {
296 	return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
297 }
298 
299 struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
300 						   struct mlx5_ib_sq *sq,
301 						   u32 port)
302 {
303 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
304 	struct mlx5_eswitch_rep *rep;
305 
306 	if (!dev->is_rep || !port)
307 		return NULL;
308 
309 	if (!dev->port[port - 1].rep)
310 		return ERR_PTR(-EINVAL);
311 
312 	rep = dev->port[port - 1].rep;
313 
314 	return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
315 }
316 
317 static int mlx5r_rep_probe(struct auxiliary_device *adev,
318 			   const struct auxiliary_device_id *id)
319 {
320 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
321 	struct mlx5_core_dev *mdev = idev->mdev;
322 	struct mlx5_eswitch *esw;
323 
324 	esw = mdev->priv.eswitch;
325 	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
326 	return 0;
327 }
328 
329 static void mlx5r_rep_remove(struct auxiliary_device *adev)
330 {
331 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
332 	struct mlx5_core_dev *mdev = idev->mdev;
333 	struct mlx5_eswitch *esw;
334 
335 	esw = mdev->priv.eswitch;
336 	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
337 }
338 
339 static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
340 	{ .name = MLX5_ADEV_NAME ".rdma-rep", },
341 	{},
342 };
343 
344 MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
345 
346 static struct auxiliary_driver mlx5r_rep_driver = {
347 	.name = "rep",
348 	.probe = mlx5r_rep_probe,
349 	.remove = mlx5r_rep_remove,
350 	.id_table = mlx5r_rep_id_table,
351 };
352 
353 int mlx5r_rep_init(void)
354 {
355 	return auxiliary_driver_register(&mlx5r_rep_driver);
356 }
357 
358 void mlx5r_rep_cleanup(void)
359 {
360 	auxiliary_driver_unregister(&mlx5r_rep_driver);
361 }
362