xref: /linux/drivers/infiniband/hw/mlx5/ib_rep.c (revision 0b364cf53b20204e92bac7c6ebd1ee7d3ec62931)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4  */
5 
6 #include <linux/mlx5/vport.h>
7 #include "ib_rep.h"
8 #include "srq.h"
9 
10 static int
11 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
12 		      struct mlx5_eswitch_rep *rep,
13 		      int vport_index)
14 {
15 	struct mlx5_ib_dev *ibdev;
16 	struct net_device *ndev;
17 
18 	ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
19 	if (!ibdev)
20 		return -EINVAL;
21 
22 	ibdev->port[vport_index].rep = rep;
23 	rep->rep_data[REP_IB].priv = ibdev;
24 	ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
25 
26 	return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
27 }
28 
29 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
30 
31 static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
32 {
33 	struct mlx5_core_dev *peer_dev;
34 	int i;
35 
36 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
37 		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
38 
39 		if (mlx5_lag_is_mpesw(peer_dev))
40 			*num_ports += peer_num_ports;
41 		else
42 			/* Only 1 ib port is the representor for all uplinks */
43 			*num_ports += peer_num_ports - 1;
44 	}
45 }
46 
47 static int
48 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
49 {
50 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
51 	struct mlx5_core_dev *lag_master = dev;
52 	const struct mlx5_ib_profile *profile;
53 	struct mlx5_core_dev *peer_dev;
54 	struct mlx5_ib_dev *ibdev;
55 	int new_uplink = false;
56 	int vport_index;
57 	int ret;
58 	int i;
59 
60 	vport_index = rep->vport_index;
61 
62 	if (mlx5_lag_is_shared_fdb(dev)) {
63 		if (mlx5_lag_is_master(dev)) {
64 			mlx5_ib_num_ports_update(dev, &num_ports);
65 		} else {
66 			if (rep->vport == MLX5_VPORT_UPLINK) {
67 				if (!mlx5_lag_is_mpesw(dev))
68 					return 0;
69 				new_uplink = true;
70 			}
71 			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
72 				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
73 
74 				if (mlx5_lag_is_master(peer_dev))
75 					lag_master = peer_dev;
76 				else if (!mlx5_lag_is_mpesw(dev))
77 				/* Only 1 ib port is the representor for all uplinks */
78 					peer_n_ports--;
79 
80 				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
81 					vport_index += peer_n_ports;
82 			}
83 		}
84 	}
85 
86 	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
87 		profile = &raw_eth_profile;
88 	else
89 		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
90 
91 	ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
92 	if (!ibdev)
93 		return -ENOMEM;
94 
95 	ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
96 			      GFP_KERNEL);
97 	if (!ibdev->port) {
98 		ret = -ENOMEM;
99 		goto fail_port;
100 	}
101 
102 	ibdev->is_rep = true;
103 	vport_index = rep->vport_index;
104 	ibdev->port[vport_index].rep = rep;
105 	ibdev->mdev = lag_master;
106 	ibdev->num_ports = num_ports;
107 	ibdev->ib_dev.phys_port_cnt = num_ports;
108 	ret = ib_device_set_netdev(&ibdev->ib_dev,
109 			mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
110 					       rep->vport),
111 			vport_index + 1);
112 	if (ret)
113 		goto fail_add;
114 
115 	ret = __mlx5_ib_add(ibdev, profile);
116 	if (ret)
117 		goto fail_add;
118 
119 	rep->rep_data[REP_IB].priv = ibdev;
120 	if (mlx5_lag_is_shared_fdb(lag_master))
121 		mlx5_ib_register_peer_vport_reps(lag_master);
122 
123 	return 0;
124 
125 fail_add:
126 	kfree(ibdev->port);
127 fail_port:
128 	ib_dealloc_device(&ibdev->ib_dev);
129 	return ret;
130 }
131 
132 static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
133 {
134 	return rep->rep_data[REP_IB].priv;
135 }
136 
137 static void
138 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
139 {
140 	struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
141 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
142 	int vport_index = rep->vport_index;
143 	struct mlx5_ib_port *port;
144 	int i;
145 
146 	if (WARN_ON(!mdev))
147 		return;
148 
149 	if (!dev)
150 		return;
151 
152 	if (mlx5_lag_is_shared_fdb(mdev) &&
153 	    !mlx5_lag_is_master(mdev)) {
154 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
155 			return;
156 		for (i = 0; i < dev->num_ports; i++) {
157 			if (dev->port[i].rep == rep)
158 				break;
159 		}
160 		if (WARN_ON(i == dev->num_ports))
161 			return;
162 		vport_index = i;
163 	}
164 
165 	port = &dev->port[vport_index];
166 
167 	ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
168 	rep->rep_data[REP_IB].priv = NULL;
169 	port->rep = NULL;
170 
171 	if (rep->vport == MLX5_VPORT_UPLINK) {
172 
173 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
174 			return;
175 
176 		if (mlx5_lag_is_shared_fdb(mdev)) {
177 			struct mlx5_core_dev *peer_mdev;
178 			struct mlx5_eswitch *esw;
179 
180 			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
181 				esw = peer_mdev->priv.eswitch;
182 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
183 			}
184 		}
185 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
186 	}
187 }
188 
189 static const struct mlx5_eswitch_rep_ops rep_ops = {
190 	.load = mlx5_ib_vport_rep_load,
191 	.unload = mlx5_ib_vport_rep_unload,
192 	.get_proto_dev = mlx5_ib_rep_to_dev,
193 };
194 
195 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
196 {
197 	struct mlx5_core_dev *peer_mdev;
198 	struct mlx5_eswitch *esw;
199 	int i;
200 
201 	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
202 		esw = peer_mdev->priv.eswitch;
203 		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
204 	}
205 }
206 
207 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
208 					  u16 vport_num)
209 {
210 	return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
211 }
212 
213 struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
214 						   struct mlx5_ib_sq *sq,
215 						   u32 port)
216 {
217 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
218 	struct mlx5_eswitch_rep *rep;
219 
220 	if (!dev->is_rep || !port)
221 		return NULL;
222 
223 	if (!dev->port[port - 1].rep)
224 		return ERR_PTR(-EINVAL);
225 
226 	rep = dev->port[port - 1].rep;
227 
228 	return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
229 }
230 
231 static int mlx5r_rep_probe(struct auxiliary_device *adev,
232 			   const struct auxiliary_device_id *id)
233 {
234 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
235 	struct mlx5_core_dev *mdev = idev->mdev;
236 	struct mlx5_eswitch *esw;
237 
238 	esw = mdev->priv.eswitch;
239 	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
240 	return 0;
241 }
242 
243 static void mlx5r_rep_remove(struct auxiliary_device *adev)
244 {
245 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
246 	struct mlx5_core_dev *mdev = idev->mdev;
247 	struct mlx5_eswitch *esw;
248 
249 	esw = mdev->priv.eswitch;
250 	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
251 }
252 
253 static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
254 	{ .name = MLX5_ADEV_NAME ".rdma-rep", },
255 	{},
256 };
257 
258 MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
259 
260 static struct auxiliary_driver mlx5r_rep_driver = {
261 	.name = "rep",
262 	.probe = mlx5r_rep_probe,
263 	.remove = mlx5r_rep_remove,
264 	.id_table = mlx5r_rep_id_table,
265 };
266 
267 int mlx5r_rep_init(void)
268 {
269 	return auxiliary_driver_register(&mlx5r_rep_driver);
270 }
271 
272 void mlx5r_rep_cleanup(void)
273 {
274 	auxiliary_driver_unregister(&mlx5r_rep_driver);
275 }
276