xref: /linux/drivers/infiniband/hw/mlx5/ib_rep.c (revision d30c1683aaecb93d2ab95685dc4300a33d3cea7a)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4  */
5 
6 #include <linux/mlx5/vport.h>
7 #include "ib_rep.h"
8 #include "srq.h"
9 
10 static int
11 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
12 		      struct mlx5_eswitch_rep *rep,
13 		      int vport_index)
14 {
15 	struct mlx5_ib_dev *ibdev;
16 	struct net_device *ndev;
17 
18 	ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
19 	if (!ibdev)
20 		return -EINVAL;
21 
22 	ibdev->port[vport_index].rep = rep;
23 	rep->rep_data[REP_IB].priv = ibdev;
24 	ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
25 
26 	return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
27 }
28 
29 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
30 
31 static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
32 {
33 	struct mlx5_core_dev *peer_dev;
34 	int i;
35 
36 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
37 		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
38 
39 		if (mlx5_lag_is_mpesw(peer_dev))
40 			*num_ports += peer_num_ports;
41 		else
42 			/* Only 1 ib port is the representor for all uplinks */
43 			*num_ports += peer_num_ports - 1;
44 	}
45 }
46 
47 static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
48 					struct mlx5_core_dev *new_owner)
49 {
50 	int ret;
51 
52 	if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
53 	    !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
54 		return 0;
55 
56 	if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
57 	    !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
58 		return 0;
59 
60 	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
61 				   FS_FT_RDMA_TRANSPORT_TX);
62 	if (ret)
63 		return ret;
64 
65 	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
66 				   FS_FT_RDMA_TRANSPORT_RX);
67 	if (ret) {
68 		mlx5_fs_set_root_dev(cur_owner, cur_owner,
69 				     FS_FT_RDMA_TRANSPORT_TX);
70 		return ret;
71 	}
72 
73 	return 0;
74 }
75 
76 static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
77 {
78 	struct mlx5_core_dev *peer_dev;
79 	int i, ret;
80 
81 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
82 		ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
83 		WARN_ON_ONCE(ret);
84 	}
85 }
86 
87 static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
88 {
89 	struct mlx5_core_dev *peer_dev;
90 	int ret;
91 	int i;
92 
93 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
94 		ret = mlx5_ib_set_owner_transport(peer_dev, dev);
95 		if (ret) {
96 			mlx5_ib_release_transport(dev);
97 			return ret;
98 		}
99 	}
100 
101 	return 0;
102 }
103 
104 static int
105 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
106 {
107 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
108 	struct mlx5_core_dev *lag_master = dev;
109 	const struct mlx5_ib_profile *profile;
110 	struct mlx5_core_dev *peer_dev;
111 	struct mlx5_ib_dev *ibdev;
112 	int new_uplink = false;
113 	int vport_index;
114 	int ret;
115 	int i;
116 
117 	vport_index = rep->vport_index;
118 
119 	if (mlx5_lag_is_shared_fdb(dev)) {
120 		if (mlx5_lag_is_master(dev)) {
121 			mlx5_ib_num_ports_update(dev, &num_ports);
122 		} else {
123 			if (rep->vport == MLX5_VPORT_UPLINK) {
124 				if (!mlx5_lag_is_mpesw(dev))
125 					return 0;
126 				new_uplink = true;
127 			}
128 			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
129 				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
130 
131 				if (mlx5_lag_is_master(peer_dev))
132 					lag_master = peer_dev;
133 				else if (!mlx5_lag_is_mpesw(dev))
134 				/* Only 1 ib port is the representor for all uplinks */
135 					peer_n_ports--;
136 
137 				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
138 					vport_index += peer_n_ports;
139 			}
140 		}
141 	}
142 
143 	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
144 		profile = &raw_eth_profile;
145 	else
146 		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
147 
148 	if (mlx5_lag_is_shared_fdb(dev)) {
149 		ret = mlx5_ib_take_transport(lag_master);
150 		if (ret)
151 			return ret;
152 	}
153 
154 	ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
155 					 mlx5_core_net(lag_master));
156 	if (!ibdev) {
157 		ret = -ENOMEM;
158 		goto release_transport;
159 	}
160 
161 	ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
162 			      GFP_KERNEL);
163 	if (!ibdev->port) {
164 		ret = -ENOMEM;
165 		goto fail_port;
166 	}
167 
168 	ibdev->is_rep = true;
169 	vport_index = rep->vport_index;
170 	ibdev->port[vport_index].rep = rep;
171 	ibdev->mdev = lag_master;
172 	ibdev->num_ports = num_ports;
173 	ibdev->ib_dev.phys_port_cnt = num_ports;
174 	ret = ib_device_set_netdev(&ibdev->ib_dev,
175 			mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
176 					       rep->vport),
177 			vport_index + 1);
178 	if (ret)
179 		goto fail_add;
180 
181 	ret = __mlx5_ib_add(ibdev, profile);
182 	if (ret)
183 		goto fail_add;
184 
185 	rep->rep_data[REP_IB].priv = ibdev;
186 	if (mlx5_lag_is_shared_fdb(lag_master))
187 		mlx5_ib_register_peer_vport_reps(lag_master);
188 
189 	return 0;
190 
191 fail_add:
192 	kfree(ibdev->port);
193 fail_port:
194 	ib_dealloc_device(&ibdev->ib_dev);
195 release_transport:
196 	if (mlx5_lag_is_shared_fdb(lag_master))
197 		mlx5_ib_release_transport(lag_master);
198 
199 	return ret;
200 }
201 
202 static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
203 {
204 	return rep->rep_data[REP_IB].priv;
205 }
206 
207 static void
208 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
209 {
210 	struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
211 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
212 	int vport_index = rep->vport_index;
213 	struct mlx5_ib_port *port;
214 	int i;
215 
216 	if (WARN_ON(!mdev))
217 		return;
218 
219 	if (!dev)
220 		return;
221 
222 	if (mlx5_lag_is_shared_fdb(mdev) &&
223 	    !mlx5_lag_is_master(mdev)) {
224 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
225 			return;
226 		for (i = 0; i < dev->num_ports; i++) {
227 			if (dev->port[i].rep == rep)
228 				break;
229 		}
230 		if (WARN_ON(i == dev->num_ports))
231 			return;
232 		vport_index = i;
233 	}
234 
235 	port = &dev->port[vport_index];
236 
237 	ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
238 	rep->rep_data[REP_IB].priv = NULL;
239 	port->rep = NULL;
240 
241 	if (rep->vport == MLX5_VPORT_UPLINK) {
242 
243 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
244 			return;
245 
246 		if (mlx5_lag_is_shared_fdb(mdev)) {
247 			struct mlx5_core_dev *peer_mdev;
248 			struct mlx5_eswitch *esw;
249 
250 			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
251 				esw = peer_mdev->priv.eswitch;
252 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
253 			}
254 			mlx5_ib_release_transport(mdev);
255 		}
256 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
257 	}
258 }
259 
260 static const struct mlx5_eswitch_rep_ops rep_ops = {
261 	.load = mlx5_ib_vport_rep_load,
262 	.unload = mlx5_ib_vport_rep_unload,
263 	.get_proto_dev = mlx5_ib_rep_to_dev,
264 };
265 
266 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
267 {
268 	struct mlx5_core_dev *peer_mdev;
269 	struct mlx5_eswitch *esw;
270 	int i;
271 
272 	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
273 		esw = peer_mdev->priv.eswitch;
274 		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
275 	}
276 }
277 
278 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
279 					  u16 vport_num)
280 {
281 	return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
282 }
283 
284 struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
285 						   struct mlx5_ib_sq *sq,
286 						   u32 port)
287 {
288 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
289 	struct mlx5_eswitch_rep *rep;
290 
291 	if (!dev->is_rep || !port)
292 		return NULL;
293 
294 	if (!dev->port[port - 1].rep)
295 		return ERR_PTR(-EINVAL);
296 
297 	rep = dev->port[port - 1].rep;
298 
299 	return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
300 }
301 
302 static int mlx5r_rep_probe(struct auxiliary_device *adev,
303 			   const struct auxiliary_device_id *id)
304 {
305 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
306 	struct mlx5_core_dev *mdev = idev->mdev;
307 	struct mlx5_eswitch *esw;
308 
309 	esw = mdev->priv.eswitch;
310 	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
311 	return 0;
312 }
313 
314 static void mlx5r_rep_remove(struct auxiliary_device *adev)
315 {
316 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
317 	struct mlx5_core_dev *mdev = idev->mdev;
318 	struct mlx5_eswitch *esw;
319 
320 	esw = mdev->priv.eswitch;
321 	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
322 }
323 
324 static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
325 	{ .name = MLX5_ADEV_NAME ".rdma-rep", },
326 	{},
327 };
328 
329 MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
330 
331 static struct auxiliary_driver mlx5r_rep_driver = {
332 	.name = "rep",
333 	.probe = mlx5r_rep_probe,
334 	.remove = mlx5r_rep_remove,
335 	.id_table = mlx5r_rep_id_table,
336 };
337 
338 int mlx5r_rep_init(void)
339 {
340 	return auxiliary_driver_register(&mlx5r_rep_driver);
341 }
342 
343 void mlx5r_rep_cleanup(void)
344 {
345 	auxiliary_driver_unregister(&mlx5r_rep_driver);
346 }
347