1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. 4 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 5 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 6 */ 7 8 /* 9 * rxe_mcast.c implements driver support for multicast transport. 10 * It is based on two data structures struct rxe_mcg ('mcg') and 11 * struct rxe_mca ('mca'). An mcg is allocated each time a qp is 12 * attached to a new mgid for the first time. These are indexed by 13 * a red-black tree using the mgid. This data structure is searched 14 * for the mcg when a multicast packet is received and when another 15 * qp is attached to the same mgid. It is cleaned up when the last qp 16 * is detached from the mcg. Each time a qp is attached to an mcg an 17 * mca is created. It holds a pointer to the qp and is added to a list 18 * of qp's that are attached to the mcg. The qp_list is used to replicate 19 * mcast packets in the rxe receive path. 20 */ 21 22 #include "rxe.h" 23 24 /** 25 * rxe_mcast_add - add multicast address to rxe device 26 * @rxe: rxe device object 27 * @mgid: multicast address as a gid 28 * 29 * Returns 0 on success else an error 30 */ 31 static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 32 { 33 unsigned char ll_addr[ETH_ALEN]; 34 struct net_device *ndev; 35 int ret; 36 37 ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 38 if (!ndev) 39 return -ENODEV; 40 41 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 42 43 ret = dev_mc_add(ndev, ll_addr); 44 dev_put(ndev); 45 46 return ret; 47 } 48 49 /** 50 * rxe_mcast_del - delete multicast address from rxe device 51 * @rxe: rxe device object 52 * @mgid: multicast address as a gid 53 * 54 * Returns 0 on success else an error 55 */ 56 static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) 57 { 58 unsigned char ll_addr[ETH_ALEN]; 59 struct net_device *ndev; 60 int ret; 61 62 ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 63 if (!ndev) 64 return -ENODEV; 65 66 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 67 68 ret = dev_mc_del(ndev, ll_addr); 69 dev_put(ndev); 70 71 return ret; 72 } 73 74 /** 75 * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) 76 * @mcg: mcg object with an embedded red-black tree node 77 * 78 * Context: caller must hold a reference to mcg and rxe->mcg_lock and 79 * is responsible to avoid adding the same mcg twice to the tree. 80 */ 81 static void __rxe_insert_mcg(struct rxe_mcg *mcg) 82 { 83 struct rb_root *tree = &mcg->rxe->mcg_tree; 84 struct rb_node **link = &tree->rb_node; 85 struct rb_node *node = NULL; 86 struct rxe_mcg *tmp; 87 int cmp; 88 89 while (*link) { 90 node = *link; 91 tmp = rb_entry(node, struct rxe_mcg, node); 92 93 cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); 94 if (cmp > 0) 95 link = &(*link)->rb_left; 96 else 97 link = &(*link)->rb_right; 98 } 99 100 rb_link_node(&mcg->node, node, link); 101 rb_insert_color(&mcg->node, tree); 102 } 103 104 /** 105 * __rxe_remove_mcg - remove an mcg from red-black tree holding lock 106 * @mcg: mcast group object with an embedded red-black tree node 107 * 108 * Context: caller must hold a reference to mcg and rxe->mcg_lock 109 */ 110 static void __rxe_remove_mcg(struct rxe_mcg *mcg) 111 { 112 rb_erase(&mcg->node, &mcg->rxe->mcg_tree); 113 } 114 115 /** 116 * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock 117 * @rxe: rxe device object 118 * @mgid: multicast IP address 119 * 120 * Context: caller must hold rxe->mcg_lock 121 * Returns: mcg on success and takes a ref to mcg else NULL 122 */ 123 static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, 124 union ib_gid *mgid) 125 { 126 struct rb_root *tree = &rxe->mcg_tree; 127 struct rxe_mcg *mcg; 128 struct rb_node *node; 129 int cmp; 130 131 node = tree->rb_node; 132 133 while (node) { 134 mcg = rb_entry(node, struct rxe_mcg, node); 135 136 cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); 137 138 if (cmp > 0) 139 node = node->rb_left; 140 else if (cmp < 0) 141 node = node->rb_right; 142 else 143 break; 144 } 145 146 if (node) { 147 kref_get(&mcg->ref_cnt); 148 return mcg; 149 } 150 151 return NULL; 152 } 153 154 /** 155 * rxe_lookup_mcg - lookup up mcg in red-back tree 156 * @rxe: rxe device object 157 * @mgid: multicast IP address 158 * 159 * Returns: mcg if found else NULL 160 */ 161 struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 162 { 163 struct rxe_mcg *mcg; 164 165 spin_lock_bh(&rxe->mcg_lock); 166 mcg = __rxe_lookup_mcg(rxe, mgid); 167 spin_unlock_bh(&rxe->mcg_lock); 168 169 return mcg; 170 } 171 172 /** 173 * __rxe_init_mcg - initialize a new mcg 174 * @rxe: rxe device 175 * @mgid: multicast address as a gid 176 * @mcg: new mcg object 177 * 178 * Context: caller should hold rxe->mcg lock 179 */ 180 static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, 181 struct rxe_mcg *mcg) 182 { 183 kref_init(&mcg->ref_cnt); 184 memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); 185 INIT_LIST_HEAD(&mcg->qp_list); 186 mcg->rxe = rxe; 187 188 /* caller holds a ref on mcg but that will be 189 * dropped when mcg goes out of scope. We need to take a ref 190 * on the pointer that will be saved in the red-black tree 191 * by __rxe_insert_mcg and used to lookup mcg from mgid later. 192 * Inserting mcg makes it visible to outside so this should 193 * be done last after the object is ready. 194 */ 195 kref_get(&mcg->ref_cnt); 196 __rxe_insert_mcg(mcg); 197 } 198 199 /** 200 * rxe_get_mcg - lookup or allocate a mcg 201 * @rxe: rxe device object 202 * @mgid: multicast IP address as a gid 203 * 204 * Returns: mcg on success else ERR_PTR(error) 205 */ 206 static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 207 { 208 struct rxe_mcg *mcg, *tmp; 209 int err; 210 211 if (rxe->attr.max_mcast_grp == 0) 212 return ERR_PTR(-EINVAL); 213 214 /* check to see if mcg already exists */ 215 mcg = rxe_lookup_mcg(rxe, mgid); 216 if (mcg) 217 return mcg; 218 219 /* check to see if we have reached limit */ 220 if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { 221 err = -ENOMEM; 222 goto err_dec; 223 } 224 225 /* speculative alloc of new mcg */ 226 mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); 227 if (!mcg) { 228 err = -ENOMEM; 229 goto err_dec; 230 } 231 232 spin_lock_bh(&rxe->mcg_lock); 233 /* re-check to see if someone else just added it */ 234 tmp = __rxe_lookup_mcg(rxe, mgid); 235 if (tmp) { 236 spin_unlock_bh(&rxe->mcg_lock); 237 atomic_dec(&rxe->mcg_num); 238 kfree(mcg); 239 return tmp; 240 } 241 242 __rxe_init_mcg(rxe, mgid, mcg); 243 spin_unlock_bh(&rxe->mcg_lock); 244 245 /* add mcast address outside of lock */ 246 err = rxe_mcast_add(rxe, mgid); 247 if (!err) 248 return mcg; 249 250 kfree(mcg); 251 err_dec: 252 atomic_dec(&rxe->mcg_num); 253 return ERR_PTR(err); 254 } 255 256 /** 257 * rxe_cleanup_mcg - cleanup mcg for kref_put 258 * @kref: struct kref embnedded in mcg 259 */ 260 void rxe_cleanup_mcg(struct kref *kref) 261 { 262 struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); 263 264 kfree(mcg); 265 } 266 267 /** 268 * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock 269 * @mcg: the mcg object 270 * 271 * Context: caller is holding rxe->mcg_lock 272 * no qp's are attached to mcg 273 */ 274 static void __rxe_destroy_mcg(struct rxe_mcg *mcg) 275 { 276 struct rxe_dev *rxe = mcg->rxe; 277 278 /* remove mcg from red-black tree then drop ref */ 279 __rxe_remove_mcg(mcg); 280 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 281 282 atomic_dec(&rxe->mcg_num); 283 } 284 285 /** 286 * rxe_destroy_mcg - destroy mcg object 287 * @mcg: the mcg object 288 * 289 * Context: no qp's are attached to mcg 290 */ 291 static void rxe_destroy_mcg(struct rxe_mcg *mcg) 292 { 293 /* delete mcast address outside of lock */ 294 rxe_mcast_del(mcg->rxe, &mcg->mgid); 295 296 spin_lock_bh(&mcg->rxe->mcg_lock); 297 __rxe_destroy_mcg(mcg); 298 spin_unlock_bh(&mcg->rxe->mcg_lock); 299 } 300 301 /** 302 * __rxe_init_mca - initialize a new mca holding lock 303 * @qp: qp object 304 * @mcg: mcg object 305 * @mca: empty space for new mca 306 * 307 * Context: caller must hold references on qp and mcg, rxe->mcg_lock 308 * and pass memory for new mca 309 * 310 * Returns: 0 on success else an error 311 */ 312 static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, 313 struct rxe_mca *mca) 314 { 315 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 316 int n; 317 318 n = atomic_inc_return(&rxe->mcg_attach); 319 if (n > rxe->attr.max_total_mcast_qp_attach) { 320 atomic_dec(&rxe->mcg_attach); 321 return -ENOMEM; 322 } 323 324 n = atomic_inc_return(&mcg->qp_num); 325 if (n > rxe->attr.max_mcast_qp_attach) { 326 atomic_dec(&mcg->qp_num); 327 atomic_dec(&rxe->mcg_attach); 328 return -ENOMEM; 329 } 330 331 atomic_inc(&qp->mcg_num); 332 333 rxe_get(qp); 334 mca->qp = qp; 335 336 list_add_tail(&mca->qp_list, &mcg->qp_list); 337 338 return 0; 339 } 340 341 /** 342 * rxe_attach_mcg - attach qp to mcg if not already attached 343 * @qp: qp object 344 * @mcg: mcg object 345 * 346 * Context: caller must hold reference on qp and mcg. 347 * Returns: 0 on success else an error 348 */ 349 static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 350 { 351 struct rxe_dev *rxe = mcg->rxe; 352 struct rxe_mca *mca, *tmp; 353 int err; 354 355 /* check to see if the qp is already a member of the group */ 356 spin_lock_bh(&rxe->mcg_lock); 357 list_for_each_entry(mca, &mcg->qp_list, qp_list) { 358 if (mca->qp == qp) { 359 spin_unlock_bh(&rxe->mcg_lock); 360 return 0; 361 } 362 } 363 spin_unlock_bh(&rxe->mcg_lock); 364 365 /* speculative alloc new mca without using GFP_ATOMIC */ 366 mca = kzalloc(sizeof(*mca), GFP_KERNEL); 367 if (!mca) 368 return -ENOMEM; 369 370 spin_lock_bh(&rxe->mcg_lock); 371 /* re-check to see if someone else just attached qp */ 372 list_for_each_entry(tmp, &mcg->qp_list, qp_list) { 373 if (tmp->qp == qp) { 374 kfree(mca); 375 err = 0; 376 goto out; 377 } 378 } 379 380 err = __rxe_init_mca(qp, mcg, mca); 381 if (err) 382 kfree(mca); 383 out: 384 spin_unlock_bh(&rxe->mcg_lock); 385 return err; 386 } 387 388 /** 389 * __rxe_cleanup_mca - cleanup mca object holding lock 390 * @mca: mca object 391 * @mcg: mcg object 392 * 393 * Context: caller must hold a reference to mcg and rxe->mcg_lock 394 */ 395 static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) 396 { 397 list_del(&mca->qp_list); 398 399 atomic_dec(&mcg->qp_num); 400 atomic_dec(&mcg->rxe->mcg_attach); 401 atomic_dec(&mca->qp->mcg_num); 402 rxe_put(mca->qp); 403 404 kfree(mca); 405 } 406 407 /** 408 * rxe_detach_mcg - detach qp from mcg 409 * @mcg: mcg object 410 * @qp: qp object 411 * 412 * Returns: 0 on success else an error if qp is not attached. 413 */ 414 static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 415 { 416 struct rxe_dev *rxe = mcg->rxe; 417 struct rxe_mca *mca, *tmp; 418 419 spin_lock_bh(&rxe->mcg_lock); 420 list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { 421 if (mca->qp == qp) { 422 __rxe_cleanup_mca(mca, mcg); 423 424 /* if the number of qp's attached to the 425 * mcast group falls to zero go ahead and 426 * tear it down. This will not free the 427 * object since we are still holding a ref 428 * from the caller 429 */ 430 if (atomic_read(&mcg->qp_num) <= 0) 431 __rxe_destroy_mcg(mcg); 432 433 spin_unlock_bh(&rxe->mcg_lock); 434 return 0; 435 } 436 } 437 438 /* we didn't find the qp on the list */ 439 spin_unlock_bh(&rxe->mcg_lock); 440 return -EINVAL; 441 } 442 443 /** 444 * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) 445 * @ibqp: (IB) qp object 446 * @mgid: multicast IP address 447 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 448 * 449 * Returns: 0 on success else an errno 450 */ 451 int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 452 { 453 int err; 454 struct rxe_dev *rxe = to_rdev(ibqp->device); 455 struct rxe_qp *qp = to_rqp(ibqp); 456 struct rxe_mcg *mcg; 457 458 /* takes a ref on mcg if successful */ 459 mcg = rxe_get_mcg(rxe, mgid); 460 if (IS_ERR(mcg)) 461 return PTR_ERR(mcg); 462 463 err = rxe_attach_mcg(mcg, qp); 464 465 /* if we failed to attach the first qp to mcg tear it down */ 466 if (atomic_read(&mcg->qp_num) == 0) 467 rxe_destroy_mcg(mcg); 468 469 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 470 471 return err; 472 } 473 474 /** 475 * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) 476 * @ibqp: address of (IB) qp object 477 * @mgid: multicast IP address 478 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 479 * 480 * Returns: 0 on success else an errno 481 */ 482 int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 483 { 484 struct rxe_dev *rxe = to_rdev(ibqp->device); 485 struct rxe_qp *qp = to_rqp(ibqp); 486 struct rxe_mcg *mcg; 487 int err; 488 489 mcg = rxe_lookup_mcg(rxe, mgid); 490 if (!mcg) 491 return -EINVAL; 492 493 err = rxe_detach_mcg(mcg, qp); 494 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 495 496 return err; 497 } 498