1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 #include <rdma/ib_cm.h> 56 #include <rdma/rdma_netlink.h> 57 #include "core_priv.h" 58 59 MODULE_AUTHOR("Sean Hefty"); 60 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 static unsigned int max_backlog = 1024; 64 65 static struct ctl_table_header *ucma_ctl_table_hdr; 66 static struct ctl_table ucma_ctl_table[] = { 67 { 68 .procname = "max_backlog", 69 .data = &max_backlog, 70 .maxlen = sizeof max_backlog, 71 .mode = 0644, 72 .proc_handler = proc_dointvec, 73 }, 74 { } 75 }; 76 77 struct ucma_file { 78 struct mutex mut; 79 struct file *filp; 80 struct list_head ctx_list; 81 struct list_head event_list; 82 wait_queue_head_t poll_wait; 83 }; 84 85 struct ucma_context { 86 u32 id; 87 struct completion comp; 88 refcount_t ref; 89 int events_reported; 90 atomic_t backlog; 91 92 struct ucma_file *file; 93 struct rdma_cm_id *cm_id; 94 struct mutex mutex; 95 u64 uid; 96 97 struct list_head list; 98 struct list_head mc_list; 99 struct work_struct close_work; 100 }; 101 102 struct ucma_multicast { 103 struct ucma_context *ctx; 104 u32 id; 105 int events_reported; 106 107 u64 uid; 108 u8 join_state; 109 struct list_head list; 110 struct sockaddr_storage addr; 111 }; 112 113 struct ucma_event { 114 struct ucma_context *ctx; 115 struct ucma_context *conn_req_ctx; 116 struct ucma_multicast *mc; 117 struct list_head list; 118 struct rdma_ucm_event_resp resp; 119 }; 120 121 static DEFINE_XARRAY_ALLOC(ctx_table); 122 static DEFINE_XARRAY_ALLOC(multicast_table); 123 124 static const struct file_operations ucma_fops; 125 static int ucma_destroy_private_ctx(struct ucma_context *ctx); 126 127 static inline struct ucma_context *_ucma_find_context(int id, 128 struct ucma_file *file) 129 { 130 struct ucma_context *ctx; 131 132 ctx = xa_load(&ctx_table, id); 133 if (!ctx) 134 ctx = ERR_PTR(-ENOENT); 135 else if (ctx->file != file) 136 ctx = ERR_PTR(-EINVAL); 137 return ctx; 138 } 139 140 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 141 { 142 struct ucma_context *ctx; 143 144 xa_lock(&ctx_table); 145 ctx = _ucma_find_context(id, file); 146 if (!IS_ERR(ctx)) 147 if (!refcount_inc_not_zero(&ctx->ref)) 148 ctx = ERR_PTR(-ENXIO); 149 xa_unlock(&ctx_table); 150 return ctx; 151 } 152 153 static void ucma_put_ctx(struct ucma_context *ctx) 154 { 155 if (refcount_dec_and_test(&ctx->ref)) 156 complete(&ctx->comp); 157 } 158 159 /* 160 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 161 * CM_ID is bound. 162 */ 163 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 164 { 165 struct ucma_context *ctx = ucma_get_ctx(file, id); 166 167 if (IS_ERR(ctx)) 168 return ctx; 169 if (!ctx->cm_id->device) { 170 ucma_put_ctx(ctx); 171 return ERR_PTR(-EINVAL); 172 } 173 return ctx; 174 } 175 176 static void ucma_close_id(struct work_struct *work) 177 { 178 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 179 180 /* once all inflight tasks are finished, we close all underlying 181 * resources. The context is still alive till its explicit destryoing 182 * by its creator. This puts back the xarray's reference. 183 */ 184 ucma_put_ctx(ctx); 185 wait_for_completion(&ctx->comp); 186 /* No new events will be generated after destroying the id. */ 187 rdma_destroy_id(ctx->cm_id); 188 189 /* Reading the cm_id without holding a positive ref is not allowed */ 190 ctx->cm_id = NULL; 191 } 192 193 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 194 { 195 struct ucma_context *ctx; 196 197 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 198 if (!ctx) 199 return NULL; 200 201 INIT_WORK(&ctx->close_work, ucma_close_id); 202 init_completion(&ctx->comp); 203 INIT_LIST_HEAD(&ctx->mc_list); 204 /* So list_del() will work if we don't do ucma_finish_ctx() */ 205 INIT_LIST_HEAD(&ctx->list); 206 ctx->file = file; 207 mutex_init(&ctx->mutex); 208 209 if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) { 210 kfree(ctx); 211 return NULL; 212 } 213 return ctx; 214 } 215 216 static void ucma_set_ctx_cm_id(struct ucma_context *ctx, 217 struct rdma_cm_id *cm_id) 218 { 219 refcount_set(&ctx->ref, 1); 220 ctx->cm_id = cm_id; 221 } 222 223 static void ucma_finish_ctx(struct ucma_context *ctx) 224 { 225 lockdep_assert_held(&ctx->file->mut); 226 list_add_tail(&ctx->list, &ctx->file->ctx_list); 227 xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL); 228 } 229 230 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 231 struct rdma_conn_param *src) 232 { 233 if (src->private_data_len) 234 memcpy(dst->private_data, src->private_data, 235 src->private_data_len); 236 dst->private_data_len = src->private_data_len; 237 dst->responder_resources = src->responder_resources; 238 dst->initiator_depth = src->initiator_depth; 239 dst->flow_control = src->flow_control; 240 dst->retry_count = src->retry_count; 241 dst->rnr_retry_count = src->rnr_retry_count; 242 dst->srq = src->srq; 243 dst->qp_num = src->qp_num; 244 } 245 246 static void ucma_copy_ud_event(struct ib_device *device, 247 struct rdma_ucm_ud_param *dst, 248 struct rdma_ud_param *src) 249 { 250 if (src->private_data_len) 251 memcpy(dst->private_data, src->private_data, 252 src->private_data_len); 253 dst->private_data_len = src->private_data_len; 254 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 255 dst->qp_num = src->qp_num; 256 dst->qkey = src->qkey; 257 } 258 259 static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, 260 struct rdma_cm_event *event) 261 { 262 struct ucma_event *uevent; 263 264 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 265 if (!uevent) 266 return NULL; 267 268 uevent->ctx = ctx; 269 switch (event->event) { 270 case RDMA_CM_EVENT_MULTICAST_JOIN: 271 case RDMA_CM_EVENT_MULTICAST_ERROR: 272 uevent->mc = (struct ucma_multicast *) 273 event->param.ud.private_data; 274 uevent->resp.uid = uevent->mc->uid; 275 uevent->resp.id = uevent->mc->id; 276 break; 277 default: 278 uevent->resp.uid = ctx->uid; 279 uevent->resp.id = ctx->id; 280 break; 281 } 282 uevent->resp.event = event->event; 283 uevent->resp.status = event->status; 284 if (ctx->cm_id->qp_type == IB_QPT_UD) 285 ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, 286 &event->param.ud); 287 else 288 ucma_copy_conn_event(&uevent->resp.param.conn, 289 &event->param.conn); 290 291 uevent->resp.ece.vendor_id = event->ece.vendor_id; 292 uevent->resp.ece.attr_mod = event->ece.attr_mod; 293 return uevent; 294 } 295 296 static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, 297 struct rdma_cm_event *event) 298 { 299 struct ucma_context *listen_ctx = cm_id->context; 300 struct ucma_context *ctx; 301 struct ucma_event *uevent; 302 303 if (!atomic_add_unless(&listen_ctx->backlog, -1, 0)) 304 return -ENOMEM; 305 ctx = ucma_alloc_ctx(listen_ctx->file); 306 if (!ctx) 307 goto err_backlog; 308 ucma_set_ctx_cm_id(ctx, cm_id); 309 310 uevent = ucma_create_uevent(listen_ctx, event); 311 if (!uevent) 312 goto err_alloc; 313 uevent->conn_req_ctx = ctx; 314 uevent->resp.id = ctx->id; 315 316 ctx->cm_id->context = ctx; 317 318 mutex_lock(&ctx->file->mut); 319 ucma_finish_ctx(ctx); 320 list_add_tail(&uevent->list, &ctx->file->event_list); 321 mutex_unlock(&ctx->file->mut); 322 wake_up_interruptible(&ctx->file->poll_wait); 323 return 0; 324 325 err_alloc: 326 ucma_destroy_private_ctx(ctx); 327 err_backlog: 328 atomic_inc(&listen_ctx->backlog); 329 /* Returning error causes the new ID to be destroyed */ 330 return -ENOMEM; 331 } 332 333 static int ucma_event_handler(struct rdma_cm_id *cm_id, 334 struct rdma_cm_event *event) 335 { 336 struct ucma_event *uevent; 337 struct ucma_context *ctx = cm_id->context; 338 339 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) 340 return ucma_connect_event_handler(cm_id, event); 341 342 /* 343 * We ignore events for new connections until userspace has set their 344 * context. This can only happen if an error occurs on a new connection 345 * before the user accepts it. This is okay, since the accept will just 346 * fail later. However, we do need to release the underlying HW 347 * resources in case of a device removal event. 348 */ 349 if (ctx->uid) { 350 uevent = ucma_create_uevent(ctx, event); 351 if (!uevent) 352 return 0; 353 354 mutex_lock(&ctx->file->mut); 355 list_add_tail(&uevent->list, &ctx->file->event_list); 356 mutex_unlock(&ctx->file->mut); 357 wake_up_interruptible(&ctx->file->poll_wait); 358 } 359 360 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { 361 xa_lock(&ctx_table); 362 if (xa_load(&ctx_table, ctx->id) == ctx) 363 queue_work(system_unbound_wq, &ctx->close_work); 364 xa_unlock(&ctx_table); 365 } 366 return 0; 367 } 368 369 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 370 int in_len, int out_len) 371 { 372 struct rdma_ucm_get_event cmd; 373 struct ucma_event *uevent; 374 375 /* 376 * Old 32 bit user space does not send the 4 byte padding in the 377 * reserved field. We don't care, allow it to keep working. 378 */ 379 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - 380 sizeof(uevent->resp.ece)) 381 return -ENOSPC; 382 383 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 384 return -EFAULT; 385 386 mutex_lock(&file->mut); 387 while (list_empty(&file->event_list)) { 388 mutex_unlock(&file->mut); 389 390 if (file->filp->f_flags & O_NONBLOCK) 391 return -EAGAIN; 392 393 if (wait_event_interruptible(file->poll_wait, 394 !list_empty(&file->event_list))) 395 return -ERESTARTSYS; 396 397 mutex_lock(&file->mut); 398 } 399 400 uevent = list_first_entry(&file->event_list, struct ucma_event, list); 401 402 if (copy_to_user(u64_to_user_ptr(cmd.response), 403 &uevent->resp, 404 min_t(size_t, out_len, sizeof(uevent->resp)))) { 405 mutex_unlock(&file->mut); 406 return -EFAULT; 407 } 408 409 list_del(&uevent->list); 410 uevent->ctx->events_reported++; 411 if (uevent->mc) 412 uevent->mc->events_reported++; 413 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 414 atomic_inc(&uevent->ctx->backlog); 415 mutex_unlock(&file->mut); 416 417 kfree(uevent); 418 return 0; 419 } 420 421 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 422 { 423 switch (cmd->ps) { 424 case RDMA_PS_TCP: 425 *qp_type = IB_QPT_RC; 426 return 0; 427 case RDMA_PS_UDP: 428 case RDMA_PS_IPOIB: 429 *qp_type = IB_QPT_UD; 430 return 0; 431 case RDMA_PS_IB: 432 *qp_type = cmd->qp_type; 433 return 0; 434 default: 435 return -EINVAL; 436 } 437 } 438 439 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 440 int in_len, int out_len) 441 { 442 struct rdma_ucm_create_id cmd; 443 struct rdma_ucm_create_id_resp resp; 444 struct ucma_context *ctx; 445 struct rdma_cm_id *cm_id; 446 enum ib_qp_type qp_type; 447 int ret; 448 449 if (out_len < sizeof(resp)) 450 return -ENOSPC; 451 452 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 453 return -EFAULT; 454 455 ret = ucma_get_qp_type(&cmd, &qp_type); 456 if (ret) 457 return ret; 458 459 ctx = ucma_alloc_ctx(file); 460 if (!ctx) 461 return -ENOMEM; 462 463 ctx->uid = cmd.uid; 464 cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); 465 if (IS_ERR(cm_id)) { 466 ret = PTR_ERR(cm_id); 467 goto err1; 468 } 469 ucma_set_ctx_cm_id(ctx, cm_id); 470 471 resp.id = ctx->id; 472 if (copy_to_user(u64_to_user_ptr(cmd.response), 473 &resp, sizeof(resp))) { 474 ret = -EFAULT; 475 goto err1; 476 } 477 478 mutex_lock(&file->mut); 479 ucma_finish_ctx(ctx); 480 mutex_unlock(&file->mut); 481 return 0; 482 483 err1: 484 ucma_destroy_private_ctx(ctx); 485 return ret; 486 } 487 488 static void ucma_cleanup_multicast(struct ucma_context *ctx) 489 { 490 struct ucma_multicast *mc, *tmp; 491 492 xa_lock(&multicast_table); 493 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 494 list_del(&mc->list); 495 /* 496 * At this point mc->ctx->ref is 0 so the mc cannot leave the 497 * lock on the reader and this is enough serialization 498 */ 499 __xa_erase(&multicast_table, mc->id); 500 kfree(mc); 501 } 502 xa_unlock(&multicast_table); 503 } 504 505 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 506 { 507 struct ucma_event *uevent, *tmp; 508 509 rdma_lock_handler(mc->ctx->cm_id); 510 mutex_lock(&mc->ctx->file->mut); 511 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 512 if (uevent->mc != mc) 513 continue; 514 515 list_del(&uevent->list); 516 kfree(uevent); 517 } 518 mutex_unlock(&mc->ctx->file->mut); 519 rdma_unlock_handler(mc->ctx->cm_id); 520 } 521 522 static int ucma_cleanup_ctx_events(struct ucma_context *ctx) 523 { 524 int events_reported; 525 struct ucma_event *uevent, *tmp; 526 LIST_HEAD(list); 527 528 /* Cleanup events not yet reported to the user.*/ 529 mutex_lock(&ctx->file->mut); 530 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 531 if (uevent->ctx != ctx) 532 continue; 533 534 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && 535 xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, 536 uevent->conn_req_ctx, XA_ZERO_ENTRY, 537 GFP_KERNEL) == uevent->conn_req_ctx) { 538 list_move_tail(&uevent->list, &list); 539 continue; 540 } 541 list_del(&uevent->list); 542 kfree(uevent); 543 } 544 list_del(&ctx->list); 545 events_reported = ctx->events_reported; 546 mutex_unlock(&ctx->file->mut); 547 548 /* 549 * If this was a listening ID then any connections spawned from it that 550 * have not been delivered to userspace are cleaned up too. Must be done 551 * outside any locks. 552 */ 553 list_for_each_entry_safe(uevent, tmp, &list, list) { 554 ucma_destroy_private_ctx(uevent->conn_req_ctx); 555 kfree(uevent); 556 } 557 return events_reported; 558 } 559 560 /* 561 * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie 562 * the ctx is not public to the user). This either because: 563 * - ucma_finish_ctx() hasn't been called 564 * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) 565 */ 566 static int ucma_destroy_private_ctx(struct ucma_context *ctx) 567 { 568 int events_reported; 569 570 /* 571 * Destroy the underlying cm_id. New work queuing is prevented now by 572 * the removal from the xarray. Once the work is cancled ref will either 573 * be 0 because the work ran to completion and consumed the ref from the 574 * xarray, or it will be positive because we still have the ref from the 575 * xarray. This can also be 0 in cases where cm_id was never set 576 */ 577 cancel_work_sync(&ctx->close_work); 578 if (refcount_read(&ctx->ref)) 579 ucma_close_id(&ctx->close_work); 580 581 events_reported = ucma_cleanup_ctx_events(ctx); 582 ucma_cleanup_multicast(ctx); 583 584 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, 585 GFP_KERNEL) != NULL); 586 mutex_destroy(&ctx->mutex); 587 kfree(ctx); 588 return events_reported; 589 } 590 591 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 592 int in_len, int out_len) 593 { 594 struct rdma_ucm_destroy_id cmd; 595 struct rdma_ucm_destroy_id_resp resp; 596 struct ucma_context *ctx; 597 int ret = 0; 598 599 if (out_len < sizeof(resp)) 600 return -ENOSPC; 601 602 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 603 return -EFAULT; 604 605 xa_lock(&ctx_table); 606 ctx = _ucma_find_context(cmd.id, file); 607 if (!IS_ERR(ctx)) { 608 if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 609 GFP_KERNEL) != ctx) 610 ctx = ERR_PTR(-ENOENT); 611 } 612 xa_unlock(&ctx_table); 613 614 if (IS_ERR(ctx)) 615 return PTR_ERR(ctx); 616 617 resp.events_reported = ucma_destroy_private_ctx(ctx); 618 if (copy_to_user(u64_to_user_ptr(cmd.response), 619 &resp, sizeof(resp))) 620 ret = -EFAULT; 621 622 return ret; 623 } 624 625 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 626 int in_len, int out_len) 627 { 628 struct rdma_ucm_bind_ip cmd; 629 struct ucma_context *ctx; 630 int ret; 631 632 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 633 return -EFAULT; 634 635 if (!rdma_addr_size_in6(&cmd.addr)) 636 return -EINVAL; 637 638 ctx = ucma_get_ctx(file, cmd.id); 639 if (IS_ERR(ctx)) 640 return PTR_ERR(ctx); 641 642 mutex_lock(&ctx->mutex); 643 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 644 mutex_unlock(&ctx->mutex); 645 646 ucma_put_ctx(ctx); 647 return ret; 648 } 649 650 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 651 int in_len, int out_len) 652 { 653 struct rdma_ucm_bind cmd; 654 struct ucma_context *ctx; 655 int ret; 656 657 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 658 return -EFAULT; 659 660 if (cmd.reserved || !cmd.addr_size || 661 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 662 return -EINVAL; 663 664 ctx = ucma_get_ctx(file, cmd.id); 665 if (IS_ERR(ctx)) 666 return PTR_ERR(ctx); 667 668 mutex_lock(&ctx->mutex); 669 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 670 mutex_unlock(&ctx->mutex); 671 ucma_put_ctx(ctx); 672 return ret; 673 } 674 675 static ssize_t ucma_resolve_ip(struct ucma_file *file, 676 const char __user *inbuf, 677 int in_len, int out_len) 678 { 679 struct rdma_ucm_resolve_ip cmd; 680 struct ucma_context *ctx; 681 int ret; 682 683 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 684 return -EFAULT; 685 686 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 687 !rdma_addr_size_in6(&cmd.dst_addr)) 688 return -EINVAL; 689 690 ctx = ucma_get_ctx(file, cmd.id); 691 if (IS_ERR(ctx)) 692 return PTR_ERR(ctx); 693 694 mutex_lock(&ctx->mutex); 695 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 696 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 697 mutex_unlock(&ctx->mutex); 698 ucma_put_ctx(ctx); 699 return ret; 700 } 701 702 static ssize_t ucma_resolve_addr(struct ucma_file *file, 703 const char __user *inbuf, 704 int in_len, int out_len) 705 { 706 struct rdma_ucm_resolve_addr cmd; 707 struct ucma_context *ctx; 708 int ret; 709 710 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 711 return -EFAULT; 712 713 if (cmd.reserved || 714 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 715 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 716 return -EINVAL; 717 718 ctx = ucma_get_ctx(file, cmd.id); 719 if (IS_ERR(ctx)) 720 return PTR_ERR(ctx); 721 722 mutex_lock(&ctx->mutex); 723 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 724 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 725 mutex_unlock(&ctx->mutex); 726 ucma_put_ctx(ctx); 727 return ret; 728 } 729 730 static ssize_t ucma_resolve_route(struct ucma_file *file, 731 const char __user *inbuf, 732 int in_len, int out_len) 733 { 734 struct rdma_ucm_resolve_route cmd; 735 struct ucma_context *ctx; 736 int ret; 737 738 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 739 return -EFAULT; 740 741 ctx = ucma_get_ctx_dev(file, cmd.id); 742 if (IS_ERR(ctx)) 743 return PTR_ERR(ctx); 744 745 mutex_lock(&ctx->mutex); 746 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 747 mutex_unlock(&ctx->mutex); 748 ucma_put_ctx(ctx); 749 return ret; 750 } 751 752 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 753 struct rdma_route *route) 754 { 755 struct rdma_dev_addr *dev_addr; 756 757 resp->num_paths = route->num_paths; 758 switch (route->num_paths) { 759 case 0: 760 dev_addr = &route->addr.dev_addr; 761 rdma_addr_get_dgid(dev_addr, 762 (union ib_gid *) &resp->ib_route[0].dgid); 763 rdma_addr_get_sgid(dev_addr, 764 (union ib_gid *) &resp->ib_route[0].sgid); 765 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 766 break; 767 case 2: 768 ib_copy_path_rec_to_user(&resp->ib_route[1], 769 &route->path_rec[1]); 770 fallthrough; 771 case 1: 772 ib_copy_path_rec_to_user(&resp->ib_route[0], 773 &route->path_rec[0]); 774 break; 775 default: 776 break; 777 } 778 } 779 780 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 781 struct rdma_route *route) 782 { 783 784 resp->num_paths = route->num_paths; 785 switch (route->num_paths) { 786 case 0: 787 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 788 (union ib_gid *)&resp->ib_route[0].dgid); 789 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 790 (union ib_gid *)&resp->ib_route[0].sgid); 791 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 792 break; 793 case 2: 794 ib_copy_path_rec_to_user(&resp->ib_route[1], 795 &route->path_rec[1]); 796 fallthrough; 797 case 1: 798 ib_copy_path_rec_to_user(&resp->ib_route[0], 799 &route->path_rec[0]); 800 break; 801 default: 802 break; 803 } 804 } 805 806 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 807 struct rdma_route *route) 808 { 809 struct rdma_dev_addr *dev_addr; 810 811 dev_addr = &route->addr.dev_addr; 812 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 813 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 814 } 815 816 static ssize_t ucma_query_route(struct ucma_file *file, 817 const char __user *inbuf, 818 int in_len, int out_len) 819 { 820 struct rdma_ucm_query cmd; 821 struct rdma_ucm_query_route_resp resp; 822 struct ucma_context *ctx; 823 struct sockaddr *addr; 824 int ret = 0; 825 826 if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index)) 827 return -ENOSPC; 828 829 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 830 return -EFAULT; 831 832 ctx = ucma_get_ctx(file, cmd.id); 833 if (IS_ERR(ctx)) 834 return PTR_ERR(ctx); 835 836 mutex_lock(&ctx->mutex); 837 memset(&resp, 0, sizeof resp); 838 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 839 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 840 sizeof(struct sockaddr_in) : 841 sizeof(struct sockaddr_in6)); 842 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 843 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 844 sizeof(struct sockaddr_in) : 845 sizeof(struct sockaddr_in6)); 846 if (!ctx->cm_id->device) 847 goto out; 848 849 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 850 resp.ibdev_index = ctx->cm_id->device->index; 851 resp.port_num = ctx->cm_id->port_num; 852 853 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 854 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 855 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 856 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 857 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 858 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 859 860 out: 861 mutex_unlock(&ctx->mutex); 862 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, 863 min_t(size_t, out_len, sizeof(resp)))) 864 ret = -EFAULT; 865 866 ucma_put_ctx(ctx); 867 return ret; 868 } 869 870 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 871 struct rdma_ucm_query_addr_resp *resp) 872 { 873 if (!cm_id->device) 874 return; 875 876 resp->node_guid = (__force __u64) cm_id->device->node_guid; 877 resp->ibdev_index = cm_id->device->index; 878 resp->port_num = cm_id->port_num; 879 resp->pkey = (__force __u16) cpu_to_be16( 880 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 881 } 882 883 static ssize_t ucma_query_addr(struct ucma_context *ctx, 884 void __user *response, int out_len) 885 { 886 struct rdma_ucm_query_addr_resp resp; 887 struct sockaddr *addr; 888 int ret = 0; 889 890 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 891 return -ENOSPC; 892 893 memset(&resp, 0, sizeof resp); 894 895 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 896 resp.src_size = rdma_addr_size(addr); 897 memcpy(&resp.src_addr, addr, resp.src_size); 898 899 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 900 resp.dst_size = rdma_addr_size(addr); 901 memcpy(&resp.dst_addr, addr, resp.dst_size); 902 903 ucma_query_device_addr(ctx->cm_id, &resp); 904 905 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 906 ret = -EFAULT; 907 908 return ret; 909 } 910 911 static ssize_t ucma_query_path(struct ucma_context *ctx, 912 void __user *response, int out_len) 913 { 914 struct rdma_ucm_query_path_resp *resp; 915 int i, ret = 0; 916 917 if (out_len < sizeof(*resp)) 918 return -ENOSPC; 919 920 resp = kzalloc(out_len, GFP_KERNEL); 921 if (!resp) 922 return -ENOMEM; 923 924 resp->num_paths = ctx->cm_id->route.num_paths; 925 for (i = 0, out_len -= sizeof(*resp); 926 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 927 i++, out_len -= sizeof(struct ib_path_rec_data)) { 928 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 929 930 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 931 IB_PATH_BIDIRECTIONAL; 932 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 933 struct sa_path_rec ib; 934 935 sa_convert_path_opa_to_ib(&ib, rec); 936 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 937 938 } else { 939 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 940 } 941 } 942 943 if (copy_to_user(response, resp, struct_size(resp, path_data, i))) 944 ret = -EFAULT; 945 946 kfree(resp); 947 return ret; 948 } 949 950 static ssize_t ucma_query_gid(struct ucma_context *ctx, 951 void __user *response, int out_len) 952 { 953 struct rdma_ucm_query_addr_resp resp; 954 struct sockaddr_ib *addr; 955 int ret = 0; 956 957 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 958 return -ENOSPC; 959 960 memset(&resp, 0, sizeof resp); 961 962 ucma_query_device_addr(ctx->cm_id, &resp); 963 964 addr = (struct sockaddr_ib *) &resp.src_addr; 965 resp.src_size = sizeof(*addr); 966 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 967 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 968 } else { 969 addr->sib_family = AF_IB; 970 addr->sib_pkey = (__force __be16) resp.pkey; 971 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 972 NULL); 973 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 974 &ctx->cm_id->route.addr.src_addr); 975 } 976 977 addr = (struct sockaddr_ib *) &resp.dst_addr; 978 resp.dst_size = sizeof(*addr); 979 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 980 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 981 } else { 982 addr->sib_family = AF_IB; 983 addr->sib_pkey = (__force __be16) resp.pkey; 984 rdma_read_gids(ctx->cm_id, NULL, 985 (union ib_gid *)&addr->sib_addr); 986 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 987 &ctx->cm_id->route.addr.dst_addr); 988 } 989 990 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 991 ret = -EFAULT; 992 993 return ret; 994 } 995 996 static ssize_t ucma_query(struct ucma_file *file, 997 const char __user *inbuf, 998 int in_len, int out_len) 999 { 1000 struct rdma_ucm_query cmd; 1001 struct ucma_context *ctx; 1002 void __user *response; 1003 int ret; 1004 1005 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1006 return -EFAULT; 1007 1008 response = u64_to_user_ptr(cmd.response); 1009 ctx = ucma_get_ctx(file, cmd.id); 1010 if (IS_ERR(ctx)) 1011 return PTR_ERR(ctx); 1012 1013 mutex_lock(&ctx->mutex); 1014 switch (cmd.option) { 1015 case RDMA_USER_CM_QUERY_ADDR: 1016 ret = ucma_query_addr(ctx, response, out_len); 1017 break; 1018 case RDMA_USER_CM_QUERY_PATH: 1019 ret = ucma_query_path(ctx, response, out_len); 1020 break; 1021 case RDMA_USER_CM_QUERY_GID: 1022 ret = ucma_query_gid(ctx, response, out_len); 1023 break; 1024 default: 1025 ret = -ENOSYS; 1026 break; 1027 } 1028 mutex_unlock(&ctx->mutex); 1029 1030 ucma_put_ctx(ctx); 1031 return ret; 1032 } 1033 1034 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1035 struct rdma_conn_param *dst, 1036 struct rdma_ucm_conn_param *src) 1037 { 1038 dst->private_data = src->private_data; 1039 dst->private_data_len = src->private_data_len; 1040 dst->responder_resources = src->responder_resources; 1041 dst->initiator_depth = src->initiator_depth; 1042 dst->flow_control = src->flow_control; 1043 dst->retry_count = src->retry_count; 1044 dst->rnr_retry_count = src->rnr_retry_count; 1045 dst->srq = src->srq; 1046 dst->qp_num = src->qp_num & 0xFFFFFF; 1047 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1048 } 1049 1050 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1051 int in_len, int out_len) 1052 { 1053 struct rdma_conn_param conn_param; 1054 struct rdma_ucm_ece ece = {}; 1055 struct rdma_ucm_connect cmd; 1056 struct ucma_context *ctx; 1057 size_t in_size; 1058 int ret; 1059 1060 if (in_len < offsetofend(typeof(cmd), reserved)) 1061 return -EINVAL; 1062 in_size = min_t(size_t, in_len, sizeof(cmd)); 1063 if (copy_from_user(&cmd, inbuf, in_size)) 1064 return -EFAULT; 1065 1066 if (!cmd.conn_param.valid) 1067 return -EINVAL; 1068 1069 ctx = ucma_get_ctx_dev(file, cmd.id); 1070 if (IS_ERR(ctx)) 1071 return PTR_ERR(ctx); 1072 1073 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1074 if (offsetofend(typeof(cmd), ece) <= in_size) { 1075 ece.vendor_id = cmd.ece.vendor_id; 1076 ece.attr_mod = cmd.ece.attr_mod; 1077 } 1078 1079 mutex_lock(&ctx->mutex); 1080 ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); 1081 mutex_unlock(&ctx->mutex); 1082 ucma_put_ctx(ctx); 1083 return ret; 1084 } 1085 1086 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1087 int in_len, int out_len) 1088 { 1089 struct rdma_ucm_listen cmd; 1090 struct ucma_context *ctx; 1091 int ret; 1092 1093 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1094 return -EFAULT; 1095 1096 ctx = ucma_get_ctx(file, cmd.id); 1097 if (IS_ERR(ctx)) 1098 return PTR_ERR(ctx); 1099 1100 if (cmd.backlog <= 0 || cmd.backlog > max_backlog) 1101 cmd.backlog = max_backlog; 1102 atomic_set(&ctx->backlog, cmd.backlog); 1103 1104 mutex_lock(&ctx->mutex); 1105 ret = rdma_listen(ctx->cm_id, cmd.backlog); 1106 mutex_unlock(&ctx->mutex); 1107 ucma_put_ctx(ctx); 1108 return ret; 1109 } 1110 1111 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1112 int in_len, int out_len) 1113 { 1114 struct rdma_ucm_accept cmd; 1115 struct rdma_conn_param conn_param; 1116 struct rdma_ucm_ece ece = {}; 1117 struct ucma_context *ctx; 1118 size_t in_size; 1119 int ret; 1120 1121 if (in_len < offsetofend(typeof(cmd), reserved)) 1122 return -EINVAL; 1123 in_size = min_t(size_t, in_len, sizeof(cmd)); 1124 if (copy_from_user(&cmd, inbuf, in_size)) 1125 return -EFAULT; 1126 1127 ctx = ucma_get_ctx_dev(file, cmd.id); 1128 if (IS_ERR(ctx)) 1129 return PTR_ERR(ctx); 1130 1131 if (offsetofend(typeof(cmd), ece) <= in_size) { 1132 ece.vendor_id = cmd.ece.vendor_id; 1133 ece.attr_mod = cmd.ece.attr_mod; 1134 } 1135 1136 if (cmd.conn_param.valid) { 1137 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1138 mutex_lock(&ctx->mutex); 1139 rdma_lock_handler(ctx->cm_id); 1140 ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); 1141 if (!ret) { 1142 /* The uid must be set atomically with the handler */ 1143 ctx->uid = cmd.uid; 1144 } 1145 rdma_unlock_handler(ctx->cm_id); 1146 mutex_unlock(&ctx->mutex); 1147 } else { 1148 mutex_lock(&ctx->mutex); 1149 rdma_lock_handler(ctx->cm_id); 1150 ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); 1151 rdma_unlock_handler(ctx->cm_id); 1152 mutex_unlock(&ctx->mutex); 1153 } 1154 ucma_put_ctx(ctx); 1155 return ret; 1156 } 1157 1158 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1159 int in_len, int out_len) 1160 { 1161 struct rdma_ucm_reject cmd; 1162 struct ucma_context *ctx; 1163 int ret; 1164 1165 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1166 return -EFAULT; 1167 1168 if (!cmd.reason) 1169 cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; 1170 1171 switch (cmd.reason) { 1172 case IB_CM_REJ_CONSUMER_DEFINED: 1173 case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: 1174 break; 1175 default: 1176 return -EINVAL; 1177 } 1178 1179 ctx = ucma_get_ctx_dev(file, cmd.id); 1180 if (IS_ERR(ctx)) 1181 return PTR_ERR(ctx); 1182 1183 mutex_lock(&ctx->mutex); 1184 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, 1185 cmd.reason); 1186 mutex_unlock(&ctx->mutex); 1187 ucma_put_ctx(ctx); 1188 return ret; 1189 } 1190 1191 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1192 int in_len, int out_len) 1193 { 1194 struct rdma_ucm_disconnect cmd; 1195 struct ucma_context *ctx; 1196 int ret; 1197 1198 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1199 return -EFAULT; 1200 1201 ctx = ucma_get_ctx_dev(file, cmd.id); 1202 if (IS_ERR(ctx)) 1203 return PTR_ERR(ctx); 1204 1205 mutex_lock(&ctx->mutex); 1206 ret = rdma_disconnect(ctx->cm_id); 1207 mutex_unlock(&ctx->mutex); 1208 ucma_put_ctx(ctx); 1209 return ret; 1210 } 1211 1212 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1213 const char __user *inbuf, 1214 int in_len, int out_len) 1215 { 1216 struct rdma_ucm_init_qp_attr cmd; 1217 struct ib_uverbs_qp_attr resp; 1218 struct ucma_context *ctx; 1219 struct ib_qp_attr qp_attr; 1220 int ret; 1221 1222 if (out_len < sizeof(resp)) 1223 return -ENOSPC; 1224 1225 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1226 return -EFAULT; 1227 1228 if (cmd.qp_state > IB_QPS_ERR) 1229 return -EINVAL; 1230 1231 ctx = ucma_get_ctx_dev(file, cmd.id); 1232 if (IS_ERR(ctx)) 1233 return PTR_ERR(ctx); 1234 1235 resp.qp_attr_mask = 0; 1236 memset(&qp_attr, 0, sizeof qp_attr); 1237 qp_attr.qp_state = cmd.qp_state; 1238 mutex_lock(&ctx->mutex); 1239 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1240 mutex_unlock(&ctx->mutex); 1241 if (ret) 1242 goto out; 1243 1244 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1245 if (copy_to_user(u64_to_user_ptr(cmd.response), 1246 &resp, sizeof(resp))) 1247 ret = -EFAULT; 1248 1249 out: 1250 ucma_put_ctx(ctx); 1251 return ret; 1252 } 1253 1254 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1255 void *optval, size_t optlen) 1256 { 1257 int ret = 0; 1258 1259 switch (optname) { 1260 case RDMA_OPTION_ID_TOS: 1261 if (optlen != sizeof(u8)) { 1262 ret = -EINVAL; 1263 break; 1264 } 1265 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1266 break; 1267 case RDMA_OPTION_ID_REUSEADDR: 1268 if (optlen != sizeof(int)) { 1269 ret = -EINVAL; 1270 break; 1271 } 1272 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1273 break; 1274 case RDMA_OPTION_ID_AFONLY: 1275 if (optlen != sizeof(int)) { 1276 ret = -EINVAL; 1277 break; 1278 } 1279 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1280 break; 1281 case RDMA_OPTION_ID_ACK_TIMEOUT: 1282 if (optlen != sizeof(u8)) { 1283 ret = -EINVAL; 1284 break; 1285 } 1286 ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); 1287 break; 1288 default: 1289 ret = -ENOSYS; 1290 } 1291 1292 return ret; 1293 } 1294 1295 static int ucma_set_ib_path(struct ucma_context *ctx, 1296 struct ib_path_rec_data *path_data, size_t optlen) 1297 { 1298 struct sa_path_rec sa_path; 1299 struct rdma_cm_event event; 1300 int ret; 1301 1302 if (optlen % sizeof(*path_data)) 1303 return -EINVAL; 1304 1305 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1306 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1307 IB_PATH_BIDIRECTIONAL)) 1308 break; 1309 } 1310 1311 if (!optlen) 1312 return -EINVAL; 1313 1314 if (!ctx->cm_id->device) 1315 return -EINVAL; 1316 1317 memset(&sa_path, 0, sizeof(sa_path)); 1318 1319 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1320 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1321 1322 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1323 struct sa_path_rec opa; 1324 1325 sa_convert_path_ib_to_opa(&opa, &sa_path); 1326 mutex_lock(&ctx->mutex); 1327 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1328 mutex_unlock(&ctx->mutex); 1329 } else { 1330 mutex_lock(&ctx->mutex); 1331 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1332 mutex_unlock(&ctx->mutex); 1333 } 1334 if (ret) 1335 return ret; 1336 1337 memset(&event, 0, sizeof event); 1338 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1339 return ucma_event_handler(ctx->cm_id, &event); 1340 } 1341 1342 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1343 void *optval, size_t optlen) 1344 { 1345 int ret; 1346 1347 switch (optname) { 1348 case RDMA_OPTION_IB_PATH: 1349 ret = ucma_set_ib_path(ctx, optval, optlen); 1350 break; 1351 default: 1352 ret = -ENOSYS; 1353 } 1354 1355 return ret; 1356 } 1357 1358 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1359 int optname, void *optval, size_t optlen) 1360 { 1361 int ret; 1362 1363 switch (level) { 1364 case RDMA_OPTION_ID: 1365 mutex_lock(&ctx->mutex); 1366 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1367 mutex_unlock(&ctx->mutex); 1368 break; 1369 case RDMA_OPTION_IB: 1370 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1371 break; 1372 default: 1373 ret = -ENOSYS; 1374 } 1375 1376 return ret; 1377 } 1378 1379 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1380 int in_len, int out_len) 1381 { 1382 struct rdma_ucm_set_option cmd; 1383 struct ucma_context *ctx; 1384 void *optval; 1385 int ret; 1386 1387 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1388 return -EFAULT; 1389 1390 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1391 return -EINVAL; 1392 1393 ctx = ucma_get_ctx(file, cmd.id); 1394 if (IS_ERR(ctx)) 1395 return PTR_ERR(ctx); 1396 1397 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1398 cmd.optlen); 1399 if (IS_ERR(optval)) { 1400 ret = PTR_ERR(optval); 1401 goto out; 1402 } 1403 1404 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1405 cmd.optlen); 1406 kfree(optval); 1407 1408 out: 1409 ucma_put_ctx(ctx); 1410 return ret; 1411 } 1412 1413 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1414 int in_len, int out_len) 1415 { 1416 struct rdma_ucm_notify cmd; 1417 struct ucma_context *ctx; 1418 int ret = -EINVAL; 1419 1420 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1421 return -EFAULT; 1422 1423 ctx = ucma_get_ctx(file, cmd.id); 1424 if (IS_ERR(ctx)) 1425 return PTR_ERR(ctx); 1426 1427 mutex_lock(&ctx->mutex); 1428 if (ctx->cm_id->device) 1429 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1430 mutex_unlock(&ctx->mutex); 1431 1432 ucma_put_ctx(ctx); 1433 return ret; 1434 } 1435 1436 static ssize_t ucma_process_join(struct ucma_file *file, 1437 struct rdma_ucm_join_mcast *cmd, int out_len) 1438 { 1439 struct rdma_ucm_create_id_resp resp; 1440 struct ucma_context *ctx; 1441 struct ucma_multicast *mc; 1442 struct sockaddr *addr; 1443 int ret; 1444 u8 join_state; 1445 1446 if (out_len < sizeof(resp)) 1447 return -ENOSPC; 1448 1449 addr = (struct sockaddr *) &cmd->addr; 1450 if (cmd->addr_size != rdma_addr_size(addr)) 1451 return -EINVAL; 1452 1453 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1454 join_state = BIT(FULLMEMBER_JOIN); 1455 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1456 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1457 else 1458 return -EINVAL; 1459 1460 ctx = ucma_get_ctx_dev(file, cmd->id); 1461 if (IS_ERR(ctx)) 1462 return PTR_ERR(ctx); 1463 1464 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 1465 if (!mc) { 1466 ret = -ENOMEM; 1467 goto err_put_ctx; 1468 } 1469 1470 mc->ctx = ctx; 1471 mc->join_state = join_state; 1472 mc->uid = cmd->uid; 1473 memcpy(&mc->addr, addr, cmd->addr_size); 1474 1475 xa_lock(&multicast_table); 1476 if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1477 GFP_KERNEL)) { 1478 ret = -ENOMEM; 1479 goto err_free_mc; 1480 } 1481 1482 list_add_tail(&mc->list, &ctx->mc_list); 1483 xa_unlock(&multicast_table); 1484 1485 mutex_lock(&ctx->mutex); 1486 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1487 join_state, mc); 1488 mutex_unlock(&ctx->mutex); 1489 if (ret) 1490 goto err_xa_erase; 1491 1492 resp.id = mc->id; 1493 if (copy_to_user(u64_to_user_ptr(cmd->response), 1494 &resp, sizeof(resp))) { 1495 ret = -EFAULT; 1496 goto err_leave_multicast; 1497 } 1498 1499 xa_store(&multicast_table, mc->id, mc, 0); 1500 1501 ucma_put_ctx(ctx); 1502 return 0; 1503 1504 err_leave_multicast: 1505 mutex_lock(&ctx->mutex); 1506 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1507 mutex_unlock(&ctx->mutex); 1508 ucma_cleanup_mc_events(mc); 1509 err_xa_erase: 1510 xa_lock(&multicast_table); 1511 list_del(&mc->list); 1512 __xa_erase(&multicast_table, mc->id); 1513 err_free_mc: 1514 xa_unlock(&multicast_table); 1515 kfree(mc); 1516 err_put_ctx: 1517 ucma_put_ctx(ctx); 1518 return ret; 1519 } 1520 1521 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1522 const char __user *inbuf, 1523 int in_len, int out_len) 1524 { 1525 struct rdma_ucm_join_ip_mcast cmd; 1526 struct rdma_ucm_join_mcast join_cmd; 1527 1528 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1529 return -EFAULT; 1530 1531 join_cmd.response = cmd.response; 1532 join_cmd.uid = cmd.uid; 1533 join_cmd.id = cmd.id; 1534 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1535 if (!join_cmd.addr_size) 1536 return -EINVAL; 1537 1538 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1539 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1540 1541 return ucma_process_join(file, &join_cmd, out_len); 1542 } 1543 1544 static ssize_t ucma_join_multicast(struct ucma_file *file, 1545 const char __user *inbuf, 1546 int in_len, int out_len) 1547 { 1548 struct rdma_ucm_join_mcast cmd; 1549 1550 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1551 return -EFAULT; 1552 1553 if (!rdma_addr_size_kss(&cmd.addr)) 1554 return -EINVAL; 1555 1556 return ucma_process_join(file, &cmd, out_len); 1557 } 1558 1559 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1560 const char __user *inbuf, 1561 int in_len, int out_len) 1562 { 1563 struct rdma_ucm_destroy_id cmd; 1564 struct rdma_ucm_destroy_id_resp resp; 1565 struct ucma_multicast *mc; 1566 int ret = 0; 1567 1568 if (out_len < sizeof(resp)) 1569 return -ENOSPC; 1570 1571 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1572 return -EFAULT; 1573 1574 xa_lock(&multicast_table); 1575 mc = xa_load(&multicast_table, cmd.id); 1576 if (!mc) 1577 mc = ERR_PTR(-ENOENT); 1578 else if (READ_ONCE(mc->ctx->file) != file) 1579 mc = ERR_PTR(-EINVAL); 1580 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1581 mc = ERR_PTR(-ENXIO); 1582 1583 if (IS_ERR(mc)) { 1584 xa_unlock(&multicast_table); 1585 ret = PTR_ERR(mc); 1586 goto out; 1587 } 1588 1589 list_del(&mc->list); 1590 __xa_erase(&multicast_table, mc->id); 1591 xa_unlock(&multicast_table); 1592 1593 mutex_lock(&mc->ctx->mutex); 1594 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1595 mutex_unlock(&mc->ctx->mutex); 1596 1597 ucma_cleanup_mc_events(mc); 1598 1599 ucma_put_ctx(mc->ctx); 1600 resp.events_reported = mc->events_reported; 1601 kfree(mc); 1602 1603 if (copy_to_user(u64_to_user_ptr(cmd.response), 1604 &resp, sizeof(resp))) 1605 ret = -EFAULT; 1606 out: 1607 return ret; 1608 } 1609 1610 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1611 const char __user *inbuf, 1612 int in_len, int out_len) 1613 { 1614 struct rdma_ucm_migrate_id cmd; 1615 struct rdma_ucm_migrate_resp resp; 1616 struct ucma_event *uevent, *tmp; 1617 struct ucma_context *ctx; 1618 LIST_HEAD(event_list); 1619 struct fd f; 1620 struct ucma_file *cur_file; 1621 int ret = 0; 1622 1623 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1624 return -EFAULT; 1625 1626 /* Get current fd to protect against it being closed */ 1627 f = fdget(cmd.fd); 1628 if (!f.file) 1629 return -ENOENT; 1630 if (f.file->f_op != &ucma_fops) { 1631 ret = -EINVAL; 1632 goto file_put; 1633 } 1634 cur_file = f.file->private_data; 1635 1636 /* Validate current fd and prevent destruction of id. */ 1637 ctx = ucma_get_ctx(cur_file, cmd.id); 1638 if (IS_ERR(ctx)) { 1639 ret = PTR_ERR(ctx); 1640 goto file_put; 1641 } 1642 1643 rdma_lock_handler(ctx->cm_id); 1644 /* 1645 * ctx->file can only be changed under the handler & xa_lock. xa_load() 1646 * must be checked again to ensure the ctx hasn't begun destruction 1647 * since the ucma_get_ctx(). 1648 */ 1649 xa_lock(&ctx_table); 1650 if (_ucma_find_context(cmd.id, cur_file) != ctx) { 1651 xa_unlock(&ctx_table); 1652 ret = -ENOENT; 1653 goto err_unlock; 1654 } 1655 ctx->file = new_file; 1656 xa_unlock(&ctx_table); 1657 1658 mutex_lock(&cur_file->mut); 1659 list_del(&ctx->list); 1660 /* 1661 * At this point lock_handler() prevents addition of new uevents for 1662 * this ctx. 1663 */ 1664 list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list) 1665 if (uevent->ctx == ctx) 1666 list_move_tail(&uevent->list, &event_list); 1667 resp.events_reported = ctx->events_reported; 1668 mutex_unlock(&cur_file->mut); 1669 1670 mutex_lock(&new_file->mut); 1671 list_add_tail(&ctx->list, &new_file->ctx_list); 1672 list_splice_tail(&event_list, &new_file->event_list); 1673 mutex_unlock(&new_file->mut); 1674 1675 if (copy_to_user(u64_to_user_ptr(cmd.response), 1676 &resp, sizeof(resp))) 1677 ret = -EFAULT; 1678 1679 err_unlock: 1680 rdma_unlock_handler(ctx->cm_id); 1681 ucma_put_ctx(ctx); 1682 file_put: 1683 fdput(f); 1684 return ret; 1685 } 1686 1687 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1688 const char __user *inbuf, 1689 int in_len, int out_len) = { 1690 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1691 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1692 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1693 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1694 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1695 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1696 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1697 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1698 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1699 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1700 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1701 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1702 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1703 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1704 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1705 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1706 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1707 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1708 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1709 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1710 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1711 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1712 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1713 }; 1714 1715 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1716 size_t len, loff_t *pos) 1717 { 1718 struct ucma_file *file = filp->private_data; 1719 struct rdma_ucm_cmd_hdr hdr; 1720 ssize_t ret; 1721 1722 if (!ib_safe_file_access(filp)) { 1723 pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1724 __func__, task_tgid_vnr(current), current->comm); 1725 return -EACCES; 1726 } 1727 1728 if (len < sizeof(hdr)) 1729 return -EINVAL; 1730 1731 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1732 return -EFAULT; 1733 1734 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1735 return -EINVAL; 1736 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1737 1738 if (hdr.in + sizeof(hdr) > len) 1739 return -EINVAL; 1740 1741 if (!ucma_cmd_table[hdr.cmd]) 1742 return -ENOSYS; 1743 1744 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1745 if (!ret) 1746 ret = len; 1747 1748 return ret; 1749 } 1750 1751 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1752 { 1753 struct ucma_file *file = filp->private_data; 1754 __poll_t mask = 0; 1755 1756 poll_wait(filp, &file->poll_wait, wait); 1757 1758 if (!list_empty(&file->event_list)) 1759 mask = EPOLLIN | EPOLLRDNORM; 1760 1761 return mask; 1762 } 1763 1764 /* 1765 * ucma_open() does not need the BKL: 1766 * 1767 * - no global state is referred to; 1768 * - there is no ioctl method to race against; 1769 * - no further module initialization is required for open to work 1770 * after the device is registered. 1771 */ 1772 static int ucma_open(struct inode *inode, struct file *filp) 1773 { 1774 struct ucma_file *file; 1775 1776 file = kmalloc(sizeof *file, GFP_KERNEL); 1777 if (!file) 1778 return -ENOMEM; 1779 1780 INIT_LIST_HEAD(&file->event_list); 1781 INIT_LIST_HEAD(&file->ctx_list); 1782 init_waitqueue_head(&file->poll_wait); 1783 mutex_init(&file->mut); 1784 1785 filp->private_data = file; 1786 file->filp = filp; 1787 1788 return stream_open(inode, filp); 1789 } 1790 1791 static int ucma_close(struct inode *inode, struct file *filp) 1792 { 1793 struct ucma_file *file = filp->private_data; 1794 1795 /* 1796 * All paths that touch ctx_list or ctx_list starting from write() are 1797 * prevented by this being a FD release function. The list_add_tail() in 1798 * ucma_connect_event_handler() can run concurrently, however it only 1799 * adds to the list *after* a listening ID. By only reading the first of 1800 * the list, and relying on ucma_destroy_private_ctx() to block 1801 * ucma_connect_event_handler(), no additional locking is needed. 1802 */ 1803 while (!list_empty(&file->ctx_list)) { 1804 struct ucma_context *ctx = list_first_entry( 1805 &file->ctx_list, struct ucma_context, list); 1806 1807 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 1808 GFP_KERNEL) != ctx); 1809 ucma_destroy_private_ctx(ctx); 1810 } 1811 kfree(file); 1812 return 0; 1813 } 1814 1815 static const struct file_operations ucma_fops = { 1816 .owner = THIS_MODULE, 1817 .open = ucma_open, 1818 .release = ucma_close, 1819 .write = ucma_write, 1820 .poll = ucma_poll, 1821 .llseek = no_llseek, 1822 }; 1823 1824 static struct miscdevice ucma_misc = { 1825 .minor = MISC_DYNAMIC_MINOR, 1826 .name = "rdma_cm", 1827 .nodename = "infiniband/rdma_cm", 1828 .mode = 0666, 1829 .fops = &ucma_fops, 1830 }; 1831 1832 static int ucma_get_global_nl_info(struct ib_client_nl_info *res) 1833 { 1834 res->abi = RDMA_USER_CM_ABI_VERSION; 1835 res->cdev = ucma_misc.this_device; 1836 return 0; 1837 } 1838 1839 static struct ib_client rdma_cma_client = { 1840 .name = "rdma_cm", 1841 .get_global_nl_info = ucma_get_global_nl_info, 1842 }; 1843 MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); 1844 1845 static ssize_t abi_version_show(struct device *dev, 1846 struct device_attribute *attr, char *buf) 1847 { 1848 return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1849 } 1850 static DEVICE_ATTR_RO(abi_version); 1851 1852 static int __init ucma_init(void) 1853 { 1854 int ret; 1855 1856 ret = misc_register(&ucma_misc); 1857 if (ret) 1858 return ret; 1859 1860 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1861 if (ret) { 1862 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1863 goto err1; 1864 } 1865 1866 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1867 if (!ucma_ctl_table_hdr) { 1868 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1869 ret = -ENOMEM; 1870 goto err2; 1871 } 1872 1873 ret = ib_register_client(&rdma_cma_client); 1874 if (ret) 1875 goto err3; 1876 1877 return 0; 1878 err3: 1879 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1880 err2: 1881 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1882 err1: 1883 misc_deregister(&ucma_misc); 1884 return ret; 1885 } 1886 1887 static void __exit ucma_cleanup(void) 1888 { 1889 ib_unregister_client(&rdma_cma_client); 1890 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1891 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1892 misc_deregister(&ucma_misc); 1893 } 1894 1895 module_init(ucma_init); 1896 module_exit(ucma_cleanup); 1897