1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2018 Mellanox Technologies. All rights reserved. 4 */ 5 6 #include <linux/mlx5/lag.h> 7 #include <linux/mlx5/vport.h> 8 #include "ib_rep.h" 9 #include "srq.h" 10 11 static int 12 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, 13 struct mlx5_core_dev *rep_dev, 14 struct mlx5_eswitch_rep *rep, 15 int vport_index) 16 { 17 struct mlx5_ib_dev *ibdev; 18 struct net_device *ndev; 19 int ret; 20 21 ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); 22 if (!ibdev) 23 return -EINVAL; 24 25 ibdev->port[vport_index].rep = rep; 26 rep->rep_data[REP_IB].priv = ibdev; 27 ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport); 28 29 ret = ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1); 30 if (ret) 31 return ret; 32 33 /* Only Vports that are not native to the LAG master eswitch need to add 34 * demux rule. 35 */ 36 if (mlx5_eswitch_get_total_vports(dev) > vport_index) 37 return 0; 38 39 return mlx5_lag_demux_rule_add(rep_dev, rep->vport, vport_index); 40 } 41 42 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); 43 44 static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports) 45 { 46 struct mlx5_core_dev *peer_dev; 47 int i; 48 49 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { 50 u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); 51 52 if (mlx5_lag_is_mpesw(peer_dev)) 53 *num_ports += peer_num_ports; 54 else 55 /* Only 1 ib port is the representor for all uplinks */ 56 *num_ports += peer_num_ports - 1; 57 } 58 } 59 60 static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner, 61 struct mlx5_core_dev *new_owner) 62 { 63 int ret; 64 65 if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) || 66 !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support)) 67 return 0; 68 69 if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) || 70 !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch)) 71 return 0; 72 73 ret = mlx5_fs_set_root_dev(cur_owner, new_owner, 74 FS_FT_RDMA_TRANSPORT_TX); 75 if (ret) 76 return ret; 77 78 ret = mlx5_fs_set_root_dev(cur_owner, new_owner, 79 FS_FT_RDMA_TRANSPORT_RX); 80 if (ret) { 81 mlx5_fs_set_root_dev(cur_owner, cur_owner, 82 FS_FT_RDMA_TRANSPORT_TX); 83 return ret; 84 } 85 86 return 0; 87 } 88 89 static void mlx5_ib_release_transport(struct mlx5_core_dev *dev) 90 { 91 struct mlx5_core_dev *peer_dev; 92 int i, ret; 93 94 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { 95 ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev); 96 WARN_ON_ONCE(ret); 97 } 98 } 99 100 static int mlx5_ib_take_transport(struct mlx5_core_dev *dev) 101 { 102 struct mlx5_core_dev *peer_dev; 103 int ret; 104 int i; 105 106 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { 107 ret = mlx5_ib_set_owner_transport(peer_dev, dev); 108 if (ret) { 109 mlx5_ib_release_transport(dev); 110 return ret; 111 } 112 } 113 114 return 0; 115 } 116 117 static int 118 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) 119 { 120 u32 num_ports = mlx5_eswitch_get_total_vports(dev); 121 struct mlx5_core_dev *lag_master = dev; 122 const struct mlx5_ib_profile *profile; 123 struct mlx5_core_dev *peer_dev; 124 struct mlx5_ib_dev *ibdev; 125 int new_uplink = false; 126 int vport_index; 127 int ret; 128 int i; 129 130 vport_index = rep->vport_index; 131 132 if (mlx5_lag_is_shared_fdb(dev)) { 133 if (mlx5_lag_is_master(dev)) { 134 mlx5_ib_num_ports_update(dev, &num_ports); 135 } else { 136 if (rep->vport == MLX5_VPORT_UPLINK) { 137 if (!mlx5_lag_is_mpesw(dev)) 138 return 0; 139 new_uplink = true; 140 } 141 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { 142 u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev); 143 144 if (mlx5_lag_is_master(peer_dev)) 145 lag_master = peer_dev; 146 else if (!mlx5_lag_is_mpesw(peer_dev)) 147 /* Only 1 ib port is the representor for all uplinks */ 148 peer_n_ports--; 149 150 if (mlx5_lag_get_dev_seq(peer_dev) < 151 mlx5_lag_get_dev_seq(dev)) 152 vport_index += peer_n_ports; 153 } 154 } 155 } 156 157 if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink) 158 profile = &raw_eth_profile; 159 else 160 return mlx5_ib_set_vport_rep(lag_master, dev, rep, vport_index); 161 162 if (mlx5_lag_is_shared_fdb(dev)) { 163 ret = mlx5_ib_take_transport(lag_master); 164 if (ret) 165 return ret; 166 } 167 168 ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev, 169 mlx5_core_net(lag_master)); 170 if (!ibdev) { 171 ret = -ENOMEM; 172 goto release_transport; 173 } 174 175 ibdev->port = kzalloc_objs(*ibdev->port, num_ports); 176 if (!ibdev->port) { 177 ret = -ENOMEM; 178 goto fail_port; 179 } 180 181 ibdev->is_rep = true; 182 vport_index = rep->vport_index; 183 ibdev->port[vport_index].rep = rep; 184 ibdev->mdev = lag_master; 185 ibdev->num_ports = num_ports; 186 ibdev->ib_dev.phys_port_cnt = num_ports; 187 ret = ib_device_set_netdev(&ibdev->ib_dev, 188 mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, 189 rep->vport), 190 vport_index + 1); 191 if (ret) 192 goto fail_add; 193 194 ret = __mlx5_ib_add(ibdev, profile); 195 if (ret) 196 goto fail_add; 197 198 rep->rep_data[REP_IB].priv = ibdev; 199 if (mlx5_lag_is_shared_fdb(lag_master)) 200 mlx5_ib_register_peer_vport_reps(lag_master); 201 202 return 0; 203 204 fail_add: 205 kfree(ibdev->port); 206 fail_port: 207 ib_dealloc_device(&ibdev->ib_dev); 208 release_transport: 209 if (mlx5_lag_is_shared_fdb(lag_master)) 210 mlx5_ib_release_transport(lag_master); 211 212 return ret; 213 } 214 215 static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) 216 { 217 return rep->rep_data[REP_IB].priv; 218 } 219 220 static void 221 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) 222 { 223 struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw); 224 struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); 225 int vport_index = rep->vport_index; 226 struct mlx5_ib_port *port; 227 int i; 228 229 if (WARN_ON(!mdev)) 230 return; 231 232 if (!dev) 233 return; 234 235 if (mlx5_lag_is_shared_fdb(mdev) && 236 !mlx5_lag_is_master(mdev)) { 237 if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev)) 238 return; 239 for (i = 0; i < dev->num_ports; i++) { 240 if (dev->port[i].rep == rep) 241 break; 242 } 243 if (WARN_ON(i == dev->num_ports)) 244 return; 245 vport_index = i; 246 } 247 248 mlx5_lag_demux_rule_del(mdev, vport_index); 249 250 port = &dev->port[vport_index]; 251 252 ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1); 253 rep->rep_data[REP_IB].priv = NULL; 254 port->rep = NULL; 255 256 if (rep->vport == MLX5_VPORT_UPLINK) { 257 258 if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev)) 259 return; 260 261 if (mlx5_lag_is_shared_fdb(mdev)) { 262 struct mlx5_core_dev *peer_mdev; 263 struct mlx5_eswitch *esw; 264 265 mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { 266 esw = peer_mdev->priv.eswitch; 267 mlx5_eswitch_unregister_vport_reps(esw, REP_IB); 268 } 269 mlx5_ib_release_transport(mdev); 270 } 271 __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); 272 } 273 } 274 275 static const struct mlx5_eswitch_rep_ops rep_ops = { 276 .load = mlx5_ib_vport_rep_load, 277 .unload = mlx5_ib_vport_rep_unload, 278 .get_proto_dev = mlx5_ib_rep_to_dev, 279 }; 280 281 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) 282 { 283 struct mlx5_core_dev *peer_mdev; 284 struct mlx5_eswitch *esw; 285 int i; 286 287 mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { 288 esw = peer_mdev->priv.eswitch; 289 mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); 290 } 291 } 292 293 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, 294 u16 vport_num) 295 { 296 return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH); 297 } 298 299 struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, 300 struct mlx5_ib_sq *sq, 301 u32 port) 302 { 303 struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; 304 struct mlx5_eswitch_rep *rep; 305 306 if (!dev->is_rep || !port) 307 return NULL; 308 309 if (!dev->port[port - 1].rep) 310 return ERR_PTR(-EINVAL); 311 312 rep = dev->port[port - 1].rep; 313 314 return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn); 315 } 316 317 static int mlx5r_rep_probe(struct auxiliary_device *adev, 318 const struct auxiliary_device_id *id) 319 { 320 struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); 321 struct mlx5_core_dev *mdev = idev->mdev; 322 struct mlx5_eswitch *esw; 323 324 esw = mdev->priv.eswitch; 325 mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); 326 return 0; 327 } 328 329 static void mlx5r_rep_remove(struct auxiliary_device *adev) 330 { 331 struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); 332 struct mlx5_core_dev *mdev = idev->mdev; 333 struct mlx5_eswitch *esw; 334 335 esw = mdev->priv.eswitch; 336 mlx5_eswitch_unregister_vport_reps(esw, REP_IB); 337 } 338 339 static const struct auxiliary_device_id mlx5r_rep_id_table[] = { 340 { .name = MLX5_ADEV_NAME ".rdma-rep", }, 341 {}, 342 }; 343 344 MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table); 345 346 static struct auxiliary_driver mlx5r_rep_driver = { 347 .name = "rep", 348 .probe = mlx5r_rep_probe, 349 .remove = mlx5r_rep_remove, 350 .id_table = mlx5r_rep_id_table, 351 }; 352 353 int mlx5r_rep_init(void) 354 { 355 return auxiliary_driver_register(&mlx5r_rep_driver); 356 } 357 358 void mlx5r_rep_cleanup(void) 359 { 360 auxiliary_driver_unregister(&mlx5r_rep_driver); 361 } 362