1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 #include <rdma/ib_cm.h> 56 #include <rdma/rdma_netlink.h> 57 #include "core_priv.h" 58 59 MODULE_AUTHOR("Sean Hefty"); 60 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 static unsigned int max_backlog = 1024; 64 65 static struct ctl_table_header *ucma_ctl_table_hdr; 66 static struct ctl_table ucma_ctl_table[] = { 67 { 68 .procname = "max_backlog", 69 .data = &max_backlog, 70 .maxlen = sizeof max_backlog, 71 .mode = 0644, 72 .proc_handler = proc_dointvec, 73 }, 74 }; 75 76 struct ucma_file { 77 struct mutex mut; 78 struct file *filp; 79 struct list_head ctx_list; 80 struct list_head event_list; 81 wait_queue_head_t poll_wait; 82 }; 83 84 struct ucma_context { 85 u32 id; 86 struct completion comp; 87 refcount_t ref; 88 int events_reported; 89 atomic_t backlog; 90 91 struct ucma_file *file; 92 struct rdma_cm_id *cm_id; 93 struct mutex mutex; 94 u64 uid; 95 96 struct list_head list; 97 struct list_head mc_list; 98 struct work_struct close_work; 99 }; 100 101 struct ucma_multicast { 102 struct ucma_context *ctx; 103 u32 id; 104 int events_reported; 105 106 u64 uid; 107 u8 join_state; 108 struct list_head list; 109 struct sockaddr_storage addr; 110 }; 111 112 struct ucma_event { 113 struct ucma_context *ctx; 114 struct ucma_context *conn_req_ctx; 115 struct ucma_multicast *mc; 116 struct list_head list; 117 struct rdma_ucm_event_resp resp; 118 }; 119 120 static DEFINE_XARRAY_ALLOC(ctx_table); 121 static DEFINE_XARRAY_ALLOC(multicast_table); 122 123 static const struct file_operations ucma_fops; 124 static int ucma_destroy_private_ctx(struct ucma_context *ctx); 125 126 static inline struct ucma_context *_ucma_find_context(int id, 127 struct ucma_file *file) 128 { 129 struct ucma_context *ctx; 130 131 ctx = xa_load(&ctx_table, id); 132 if (!ctx) 133 ctx = ERR_PTR(-ENOENT); 134 else if (ctx->file != file) 135 ctx = ERR_PTR(-EINVAL); 136 return ctx; 137 } 138 139 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 140 { 141 struct ucma_context *ctx; 142 143 xa_lock(&ctx_table); 144 ctx = _ucma_find_context(id, file); 145 if (!IS_ERR(ctx)) 146 if (!refcount_inc_not_zero(&ctx->ref)) 147 ctx = ERR_PTR(-ENXIO); 148 xa_unlock(&ctx_table); 149 return ctx; 150 } 151 152 static void ucma_put_ctx(struct ucma_context *ctx) 153 { 154 if (refcount_dec_and_test(&ctx->ref)) 155 complete(&ctx->comp); 156 } 157 158 /* 159 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 160 * CM_ID is bound. 161 */ 162 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 163 { 164 struct ucma_context *ctx = ucma_get_ctx(file, id); 165 166 if (IS_ERR(ctx)) 167 return ctx; 168 if (!ctx->cm_id->device) { 169 ucma_put_ctx(ctx); 170 return ERR_PTR(-EINVAL); 171 } 172 return ctx; 173 } 174 175 static void ucma_close_id(struct work_struct *work) 176 { 177 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 178 179 /* once all inflight tasks are finished, we close all underlying 180 * resources. The context is still alive till its explicit destryoing 181 * by its creator. This puts back the xarray's reference. 182 */ 183 ucma_put_ctx(ctx); 184 wait_for_completion(&ctx->comp); 185 /* No new events will be generated after destroying the id. */ 186 rdma_destroy_id(ctx->cm_id); 187 188 /* Reading the cm_id without holding a positive ref is not allowed */ 189 ctx->cm_id = NULL; 190 } 191 192 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 193 { 194 struct ucma_context *ctx; 195 196 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 197 if (!ctx) 198 return NULL; 199 200 INIT_WORK(&ctx->close_work, ucma_close_id); 201 init_completion(&ctx->comp); 202 INIT_LIST_HEAD(&ctx->mc_list); 203 /* So list_del() will work if we don't do ucma_finish_ctx() */ 204 INIT_LIST_HEAD(&ctx->list); 205 ctx->file = file; 206 mutex_init(&ctx->mutex); 207 208 if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) { 209 kfree(ctx); 210 return NULL; 211 } 212 return ctx; 213 } 214 215 static void ucma_set_ctx_cm_id(struct ucma_context *ctx, 216 struct rdma_cm_id *cm_id) 217 { 218 refcount_set(&ctx->ref, 1); 219 ctx->cm_id = cm_id; 220 } 221 222 static void ucma_finish_ctx(struct ucma_context *ctx) 223 { 224 lockdep_assert_held(&ctx->file->mut); 225 list_add_tail(&ctx->list, &ctx->file->ctx_list); 226 xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL); 227 } 228 229 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 230 struct rdma_conn_param *src) 231 { 232 if (src->private_data_len) 233 memcpy(dst->private_data, src->private_data, 234 src->private_data_len); 235 dst->private_data_len = src->private_data_len; 236 dst->responder_resources = src->responder_resources; 237 dst->initiator_depth = src->initiator_depth; 238 dst->flow_control = src->flow_control; 239 dst->retry_count = src->retry_count; 240 dst->rnr_retry_count = src->rnr_retry_count; 241 dst->srq = src->srq; 242 dst->qp_num = src->qp_num; 243 } 244 245 static void ucma_copy_ud_event(struct ib_device *device, 246 struct rdma_ucm_ud_param *dst, 247 struct rdma_ud_param *src) 248 { 249 if (src->private_data_len) 250 memcpy(dst->private_data, src->private_data, 251 src->private_data_len); 252 dst->private_data_len = src->private_data_len; 253 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 254 dst->qp_num = src->qp_num; 255 dst->qkey = src->qkey; 256 } 257 258 static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, 259 struct rdma_cm_event *event) 260 { 261 struct ucma_event *uevent; 262 263 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 264 if (!uevent) 265 return NULL; 266 267 uevent->ctx = ctx; 268 switch (event->event) { 269 case RDMA_CM_EVENT_MULTICAST_JOIN: 270 case RDMA_CM_EVENT_MULTICAST_ERROR: 271 uevent->mc = (struct ucma_multicast *) 272 event->param.ud.private_data; 273 uevent->resp.uid = uevent->mc->uid; 274 uevent->resp.id = uevent->mc->id; 275 break; 276 default: 277 uevent->resp.uid = ctx->uid; 278 uevent->resp.id = ctx->id; 279 break; 280 } 281 uevent->resp.event = event->event; 282 uevent->resp.status = event->status; 283 if (ctx->cm_id->qp_type == IB_QPT_UD) 284 ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, 285 &event->param.ud); 286 else 287 ucma_copy_conn_event(&uevent->resp.param.conn, 288 &event->param.conn); 289 290 uevent->resp.ece.vendor_id = event->ece.vendor_id; 291 uevent->resp.ece.attr_mod = event->ece.attr_mod; 292 return uevent; 293 } 294 295 static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, 296 struct rdma_cm_event *event) 297 { 298 struct ucma_context *listen_ctx = cm_id->context; 299 struct ucma_context *ctx; 300 struct ucma_event *uevent; 301 302 if (!atomic_add_unless(&listen_ctx->backlog, -1, 0)) 303 return -ENOMEM; 304 ctx = ucma_alloc_ctx(listen_ctx->file); 305 if (!ctx) 306 goto err_backlog; 307 ucma_set_ctx_cm_id(ctx, cm_id); 308 309 uevent = ucma_create_uevent(listen_ctx, event); 310 if (!uevent) 311 goto err_alloc; 312 uevent->conn_req_ctx = ctx; 313 uevent->resp.id = ctx->id; 314 315 ctx->cm_id->context = ctx; 316 317 mutex_lock(&ctx->file->mut); 318 ucma_finish_ctx(ctx); 319 list_add_tail(&uevent->list, &ctx->file->event_list); 320 mutex_unlock(&ctx->file->mut); 321 wake_up_interruptible(&ctx->file->poll_wait); 322 return 0; 323 324 err_alloc: 325 ucma_destroy_private_ctx(ctx); 326 err_backlog: 327 atomic_inc(&listen_ctx->backlog); 328 /* Returning error causes the new ID to be destroyed */ 329 return -ENOMEM; 330 } 331 332 static int ucma_event_handler(struct rdma_cm_id *cm_id, 333 struct rdma_cm_event *event) 334 { 335 struct ucma_event *uevent; 336 struct ucma_context *ctx = cm_id->context; 337 338 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) 339 return ucma_connect_event_handler(cm_id, event); 340 341 /* 342 * We ignore events for new connections until userspace has set their 343 * context. This can only happen if an error occurs on a new connection 344 * before the user accepts it. This is okay, since the accept will just 345 * fail later. However, we do need to release the underlying HW 346 * resources in case of a device removal event. 347 */ 348 if (ctx->uid) { 349 uevent = ucma_create_uevent(ctx, event); 350 if (!uevent) 351 return 0; 352 353 mutex_lock(&ctx->file->mut); 354 list_add_tail(&uevent->list, &ctx->file->event_list); 355 mutex_unlock(&ctx->file->mut); 356 wake_up_interruptible(&ctx->file->poll_wait); 357 } 358 359 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { 360 xa_lock(&ctx_table); 361 if (xa_load(&ctx_table, ctx->id) == ctx) 362 queue_work(system_unbound_wq, &ctx->close_work); 363 xa_unlock(&ctx_table); 364 } 365 return 0; 366 } 367 368 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 369 int in_len, int out_len) 370 { 371 struct rdma_ucm_get_event cmd; 372 struct ucma_event *uevent; 373 374 /* 375 * Old 32 bit user space does not send the 4 byte padding in the 376 * reserved field. We don't care, allow it to keep working. 377 */ 378 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - 379 sizeof(uevent->resp.ece)) 380 return -ENOSPC; 381 382 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 383 return -EFAULT; 384 385 mutex_lock(&file->mut); 386 while (list_empty(&file->event_list)) { 387 mutex_unlock(&file->mut); 388 389 if (file->filp->f_flags & O_NONBLOCK) 390 return -EAGAIN; 391 392 if (wait_event_interruptible(file->poll_wait, 393 !list_empty(&file->event_list))) 394 return -ERESTARTSYS; 395 396 mutex_lock(&file->mut); 397 } 398 399 uevent = list_first_entry(&file->event_list, struct ucma_event, list); 400 401 if (copy_to_user(u64_to_user_ptr(cmd.response), 402 &uevent->resp, 403 min_t(size_t, out_len, sizeof(uevent->resp)))) { 404 mutex_unlock(&file->mut); 405 return -EFAULT; 406 } 407 408 list_del(&uevent->list); 409 uevent->ctx->events_reported++; 410 if (uevent->mc) 411 uevent->mc->events_reported++; 412 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 413 atomic_inc(&uevent->ctx->backlog); 414 mutex_unlock(&file->mut); 415 416 kfree(uevent); 417 return 0; 418 } 419 420 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 421 { 422 switch (cmd->ps) { 423 case RDMA_PS_TCP: 424 *qp_type = IB_QPT_RC; 425 return 0; 426 case RDMA_PS_UDP: 427 case RDMA_PS_IPOIB: 428 *qp_type = IB_QPT_UD; 429 return 0; 430 case RDMA_PS_IB: 431 *qp_type = cmd->qp_type; 432 return 0; 433 default: 434 return -EINVAL; 435 } 436 } 437 438 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 439 int in_len, int out_len) 440 { 441 struct rdma_ucm_create_id cmd; 442 struct rdma_ucm_create_id_resp resp; 443 struct ucma_context *ctx; 444 struct rdma_cm_id *cm_id; 445 enum ib_qp_type qp_type; 446 int ret; 447 448 if (out_len < sizeof(resp)) 449 return -ENOSPC; 450 451 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 452 return -EFAULT; 453 454 ret = ucma_get_qp_type(&cmd, &qp_type); 455 if (ret) 456 return ret; 457 458 ctx = ucma_alloc_ctx(file); 459 if (!ctx) 460 return -ENOMEM; 461 462 ctx->uid = cmd.uid; 463 cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); 464 if (IS_ERR(cm_id)) { 465 ret = PTR_ERR(cm_id); 466 goto err1; 467 } 468 ucma_set_ctx_cm_id(ctx, cm_id); 469 470 resp.id = ctx->id; 471 if (copy_to_user(u64_to_user_ptr(cmd.response), 472 &resp, sizeof(resp))) { 473 ret = -EFAULT; 474 goto err1; 475 } 476 477 mutex_lock(&file->mut); 478 ucma_finish_ctx(ctx); 479 mutex_unlock(&file->mut); 480 return 0; 481 482 err1: 483 ucma_destroy_private_ctx(ctx); 484 return ret; 485 } 486 487 static void ucma_cleanup_multicast(struct ucma_context *ctx) 488 { 489 struct ucma_multicast *mc, *tmp; 490 491 xa_lock(&multicast_table); 492 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 493 list_del(&mc->list); 494 /* 495 * At this point mc->ctx->ref is 0 so the mc cannot leave the 496 * lock on the reader and this is enough serialization 497 */ 498 __xa_erase(&multicast_table, mc->id); 499 kfree(mc); 500 } 501 xa_unlock(&multicast_table); 502 } 503 504 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 505 { 506 struct ucma_event *uevent, *tmp; 507 508 rdma_lock_handler(mc->ctx->cm_id); 509 mutex_lock(&mc->ctx->file->mut); 510 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 511 if (uevent->mc != mc) 512 continue; 513 514 list_del(&uevent->list); 515 kfree(uevent); 516 } 517 mutex_unlock(&mc->ctx->file->mut); 518 rdma_unlock_handler(mc->ctx->cm_id); 519 } 520 521 static int ucma_cleanup_ctx_events(struct ucma_context *ctx) 522 { 523 int events_reported; 524 struct ucma_event *uevent, *tmp; 525 LIST_HEAD(list); 526 527 /* Cleanup events not yet reported to the user.*/ 528 mutex_lock(&ctx->file->mut); 529 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 530 if (uevent->ctx != ctx) 531 continue; 532 533 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && 534 xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, 535 uevent->conn_req_ctx, XA_ZERO_ENTRY, 536 GFP_KERNEL) == uevent->conn_req_ctx) { 537 list_move_tail(&uevent->list, &list); 538 continue; 539 } 540 list_del(&uevent->list); 541 kfree(uevent); 542 } 543 list_del(&ctx->list); 544 events_reported = ctx->events_reported; 545 mutex_unlock(&ctx->file->mut); 546 547 /* 548 * If this was a listening ID then any connections spawned from it that 549 * have not been delivered to userspace are cleaned up too. Must be done 550 * outside any locks. 551 */ 552 list_for_each_entry_safe(uevent, tmp, &list, list) { 553 ucma_destroy_private_ctx(uevent->conn_req_ctx); 554 kfree(uevent); 555 } 556 return events_reported; 557 } 558 559 /* 560 * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie 561 * the ctx is not public to the user). This either because: 562 * - ucma_finish_ctx() hasn't been called 563 * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) 564 */ 565 static int ucma_destroy_private_ctx(struct ucma_context *ctx) 566 { 567 int events_reported; 568 569 /* 570 * Destroy the underlying cm_id. New work queuing is prevented now by 571 * the removal from the xarray. Once the work is cancled ref will either 572 * be 0 because the work ran to completion and consumed the ref from the 573 * xarray, or it will be positive because we still have the ref from the 574 * xarray. This can also be 0 in cases where cm_id was never set 575 */ 576 cancel_work_sync(&ctx->close_work); 577 if (refcount_read(&ctx->ref)) 578 ucma_close_id(&ctx->close_work); 579 580 events_reported = ucma_cleanup_ctx_events(ctx); 581 ucma_cleanup_multicast(ctx); 582 583 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, 584 GFP_KERNEL) != NULL); 585 mutex_destroy(&ctx->mutex); 586 kfree(ctx); 587 return events_reported; 588 } 589 590 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 591 int in_len, int out_len) 592 { 593 struct rdma_ucm_destroy_id cmd; 594 struct rdma_ucm_destroy_id_resp resp; 595 struct ucma_context *ctx; 596 int ret = 0; 597 598 if (out_len < sizeof(resp)) 599 return -ENOSPC; 600 601 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 602 return -EFAULT; 603 604 xa_lock(&ctx_table); 605 ctx = _ucma_find_context(cmd.id, file); 606 if (!IS_ERR(ctx)) { 607 if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 608 GFP_KERNEL) != ctx) 609 ctx = ERR_PTR(-ENOENT); 610 } 611 xa_unlock(&ctx_table); 612 613 if (IS_ERR(ctx)) 614 return PTR_ERR(ctx); 615 616 resp.events_reported = ucma_destroy_private_ctx(ctx); 617 if (copy_to_user(u64_to_user_ptr(cmd.response), 618 &resp, sizeof(resp))) 619 ret = -EFAULT; 620 621 return ret; 622 } 623 624 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 625 int in_len, int out_len) 626 { 627 struct rdma_ucm_bind_ip cmd; 628 struct ucma_context *ctx; 629 int ret; 630 631 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 632 return -EFAULT; 633 634 if (!rdma_addr_size_in6(&cmd.addr)) 635 return -EINVAL; 636 637 ctx = ucma_get_ctx(file, cmd.id); 638 if (IS_ERR(ctx)) 639 return PTR_ERR(ctx); 640 641 mutex_lock(&ctx->mutex); 642 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 643 mutex_unlock(&ctx->mutex); 644 645 ucma_put_ctx(ctx); 646 return ret; 647 } 648 649 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 650 int in_len, int out_len) 651 { 652 struct rdma_ucm_bind cmd; 653 struct ucma_context *ctx; 654 int ret; 655 656 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 657 return -EFAULT; 658 659 if (cmd.reserved || !cmd.addr_size || 660 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 661 return -EINVAL; 662 663 ctx = ucma_get_ctx(file, cmd.id); 664 if (IS_ERR(ctx)) 665 return PTR_ERR(ctx); 666 667 mutex_lock(&ctx->mutex); 668 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 669 mutex_unlock(&ctx->mutex); 670 ucma_put_ctx(ctx); 671 return ret; 672 } 673 674 static ssize_t ucma_resolve_ip(struct ucma_file *file, 675 const char __user *inbuf, 676 int in_len, int out_len) 677 { 678 struct rdma_ucm_resolve_ip cmd; 679 struct ucma_context *ctx; 680 int ret; 681 682 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 683 return -EFAULT; 684 685 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 686 !rdma_addr_size_in6(&cmd.dst_addr)) 687 return -EINVAL; 688 689 ctx = ucma_get_ctx(file, cmd.id); 690 if (IS_ERR(ctx)) 691 return PTR_ERR(ctx); 692 693 mutex_lock(&ctx->mutex); 694 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 695 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 696 mutex_unlock(&ctx->mutex); 697 ucma_put_ctx(ctx); 698 return ret; 699 } 700 701 static ssize_t ucma_resolve_addr(struct ucma_file *file, 702 const char __user *inbuf, 703 int in_len, int out_len) 704 { 705 struct rdma_ucm_resolve_addr cmd; 706 struct ucma_context *ctx; 707 int ret; 708 709 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 710 return -EFAULT; 711 712 if (cmd.reserved || 713 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 714 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 715 return -EINVAL; 716 717 ctx = ucma_get_ctx(file, cmd.id); 718 if (IS_ERR(ctx)) 719 return PTR_ERR(ctx); 720 721 mutex_lock(&ctx->mutex); 722 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 723 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 724 mutex_unlock(&ctx->mutex); 725 ucma_put_ctx(ctx); 726 return ret; 727 } 728 729 static ssize_t ucma_resolve_route(struct ucma_file *file, 730 const char __user *inbuf, 731 int in_len, int out_len) 732 { 733 struct rdma_ucm_resolve_route cmd; 734 struct ucma_context *ctx; 735 int ret; 736 737 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 738 return -EFAULT; 739 740 ctx = ucma_get_ctx_dev(file, cmd.id); 741 if (IS_ERR(ctx)) 742 return PTR_ERR(ctx); 743 744 mutex_lock(&ctx->mutex); 745 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 746 mutex_unlock(&ctx->mutex); 747 ucma_put_ctx(ctx); 748 return ret; 749 } 750 751 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 752 struct rdma_route *route) 753 { 754 struct rdma_dev_addr *dev_addr; 755 756 resp->num_paths = route->num_pri_alt_paths; 757 switch (route->num_pri_alt_paths) { 758 case 0: 759 dev_addr = &route->addr.dev_addr; 760 rdma_addr_get_dgid(dev_addr, 761 (union ib_gid *) &resp->ib_route[0].dgid); 762 rdma_addr_get_sgid(dev_addr, 763 (union ib_gid *) &resp->ib_route[0].sgid); 764 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 765 break; 766 case 2: 767 ib_copy_path_rec_to_user(&resp->ib_route[1], 768 &route->path_rec[1]); 769 fallthrough; 770 case 1: 771 ib_copy_path_rec_to_user(&resp->ib_route[0], 772 &route->path_rec[0]); 773 break; 774 default: 775 break; 776 } 777 } 778 779 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 780 struct rdma_route *route) 781 { 782 783 resp->num_paths = route->num_pri_alt_paths; 784 switch (route->num_pri_alt_paths) { 785 case 0: 786 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 787 (union ib_gid *)&resp->ib_route[0].dgid); 788 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 789 (union ib_gid *)&resp->ib_route[0].sgid); 790 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 791 break; 792 case 2: 793 ib_copy_path_rec_to_user(&resp->ib_route[1], 794 &route->path_rec[1]); 795 fallthrough; 796 case 1: 797 ib_copy_path_rec_to_user(&resp->ib_route[0], 798 &route->path_rec[0]); 799 break; 800 default: 801 break; 802 } 803 } 804 805 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 806 struct rdma_route *route) 807 { 808 struct rdma_dev_addr *dev_addr; 809 810 dev_addr = &route->addr.dev_addr; 811 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 812 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 813 } 814 815 static ssize_t ucma_query_route(struct ucma_file *file, 816 const char __user *inbuf, 817 int in_len, int out_len) 818 { 819 struct rdma_ucm_query cmd; 820 struct rdma_ucm_query_route_resp resp; 821 struct ucma_context *ctx; 822 struct sockaddr *addr; 823 int ret = 0; 824 825 if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index)) 826 return -ENOSPC; 827 828 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 829 return -EFAULT; 830 831 ctx = ucma_get_ctx(file, cmd.id); 832 if (IS_ERR(ctx)) 833 return PTR_ERR(ctx); 834 835 mutex_lock(&ctx->mutex); 836 memset(&resp, 0, sizeof resp); 837 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 838 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 839 sizeof(struct sockaddr_in) : 840 sizeof(struct sockaddr_in6)); 841 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 842 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 843 sizeof(struct sockaddr_in) : 844 sizeof(struct sockaddr_in6)); 845 if (!ctx->cm_id->device) 846 goto out; 847 848 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 849 resp.ibdev_index = ctx->cm_id->device->index; 850 resp.port_num = ctx->cm_id->port_num; 851 852 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 853 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 854 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 855 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 856 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 857 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 858 859 out: 860 mutex_unlock(&ctx->mutex); 861 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, 862 min_t(size_t, out_len, sizeof(resp)))) 863 ret = -EFAULT; 864 865 ucma_put_ctx(ctx); 866 return ret; 867 } 868 869 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 870 struct rdma_ucm_query_addr_resp *resp) 871 { 872 if (!cm_id->device) 873 return; 874 875 resp->node_guid = (__force __u64) cm_id->device->node_guid; 876 resp->ibdev_index = cm_id->device->index; 877 resp->port_num = cm_id->port_num; 878 resp->pkey = (__force __u16) cpu_to_be16( 879 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 880 } 881 882 static ssize_t ucma_query_addr(struct ucma_context *ctx, 883 void __user *response, int out_len) 884 { 885 struct rdma_ucm_query_addr_resp resp; 886 struct sockaddr *addr; 887 int ret = 0; 888 889 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 890 return -ENOSPC; 891 892 memset(&resp, 0, sizeof resp); 893 894 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 895 resp.src_size = rdma_addr_size(addr); 896 memcpy(&resp.src_addr, addr, resp.src_size); 897 898 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 899 resp.dst_size = rdma_addr_size(addr); 900 memcpy(&resp.dst_addr, addr, resp.dst_size); 901 902 ucma_query_device_addr(ctx->cm_id, &resp); 903 904 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 905 ret = -EFAULT; 906 907 return ret; 908 } 909 910 static ssize_t ucma_query_path(struct ucma_context *ctx, 911 void __user *response, int out_len) 912 { 913 struct rdma_ucm_query_path_resp *resp; 914 int i, ret = 0; 915 916 if (out_len < sizeof(*resp)) 917 return -ENOSPC; 918 919 resp = kzalloc(out_len, GFP_KERNEL); 920 if (!resp) 921 return -ENOMEM; 922 923 resp->num_paths = ctx->cm_id->route.num_pri_alt_paths; 924 for (i = 0, out_len -= sizeof(*resp); 925 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 926 i++, out_len -= sizeof(struct ib_path_rec_data)) { 927 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 928 929 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 930 IB_PATH_BIDIRECTIONAL; 931 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 932 struct sa_path_rec ib; 933 934 sa_convert_path_opa_to_ib(&ib, rec); 935 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 936 937 } else { 938 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 939 } 940 } 941 942 if (copy_to_user(response, resp, struct_size(resp, path_data, i))) 943 ret = -EFAULT; 944 945 kfree(resp); 946 return ret; 947 } 948 949 static ssize_t ucma_query_gid(struct ucma_context *ctx, 950 void __user *response, int out_len) 951 { 952 struct rdma_ucm_query_addr_resp resp; 953 struct sockaddr_ib *addr; 954 int ret = 0; 955 956 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 957 return -ENOSPC; 958 959 memset(&resp, 0, sizeof resp); 960 961 ucma_query_device_addr(ctx->cm_id, &resp); 962 963 addr = (struct sockaddr_ib *) &resp.src_addr; 964 resp.src_size = sizeof(*addr); 965 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 966 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 967 } else { 968 addr->sib_family = AF_IB; 969 addr->sib_pkey = (__force __be16) resp.pkey; 970 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 971 NULL); 972 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 973 &ctx->cm_id->route.addr.src_addr); 974 } 975 976 addr = (struct sockaddr_ib *) &resp.dst_addr; 977 resp.dst_size = sizeof(*addr); 978 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 979 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 980 } else { 981 addr->sib_family = AF_IB; 982 addr->sib_pkey = (__force __be16) resp.pkey; 983 rdma_read_gids(ctx->cm_id, NULL, 984 (union ib_gid *)&addr->sib_addr); 985 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 986 &ctx->cm_id->route.addr.dst_addr); 987 } 988 989 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 990 ret = -EFAULT; 991 992 return ret; 993 } 994 995 static ssize_t ucma_query(struct ucma_file *file, 996 const char __user *inbuf, 997 int in_len, int out_len) 998 { 999 struct rdma_ucm_query cmd; 1000 struct ucma_context *ctx; 1001 void __user *response; 1002 int ret; 1003 1004 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1005 return -EFAULT; 1006 1007 response = u64_to_user_ptr(cmd.response); 1008 ctx = ucma_get_ctx(file, cmd.id); 1009 if (IS_ERR(ctx)) 1010 return PTR_ERR(ctx); 1011 1012 mutex_lock(&ctx->mutex); 1013 switch (cmd.option) { 1014 case RDMA_USER_CM_QUERY_ADDR: 1015 ret = ucma_query_addr(ctx, response, out_len); 1016 break; 1017 case RDMA_USER_CM_QUERY_PATH: 1018 ret = ucma_query_path(ctx, response, out_len); 1019 break; 1020 case RDMA_USER_CM_QUERY_GID: 1021 ret = ucma_query_gid(ctx, response, out_len); 1022 break; 1023 default: 1024 ret = -ENOSYS; 1025 break; 1026 } 1027 mutex_unlock(&ctx->mutex); 1028 1029 ucma_put_ctx(ctx); 1030 return ret; 1031 } 1032 1033 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1034 struct rdma_conn_param *dst, 1035 struct rdma_ucm_conn_param *src) 1036 { 1037 dst->private_data = src->private_data; 1038 dst->private_data_len = src->private_data_len; 1039 dst->responder_resources = src->responder_resources; 1040 dst->initiator_depth = src->initiator_depth; 1041 dst->flow_control = src->flow_control; 1042 dst->retry_count = src->retry_count; 1043 dst->rnr_retry_count = src->rnr_retry_count; 1044 dst->srq = src->srq; 1045 dst->qp_num = src->qp_num & 0xFFFFFF; 1046 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1047 } 1048 1049 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1050 int in_len, int out_len) 1051 { 1052 struct rdma_conn_param conn_param; 1053 struct rdma_ucm_ece ece = {}; 1054 struct rdma_ucm_connect cmd; 1055 struct ucma_context *ctx; 1056 size_t in_size; 1057 int ret; 1058 1059 if (in_len < offsetofend(typeof(cmd), reserved)) 1060 return -EINVAL; 1061 in_size = min_t(size_t, in_len, sizeof(cmd)); 1062 if (copy_from_user(&cmd, inbuf, in_size)) 1063 return -EFAULT; 1064 1065 if (!cmd.conn_param.valid) 1066 return -EINVAL; 1067 1068 ctx = ucma_get_ctx_dev(file, cmd.id); 1069 if (IS_ERR(ctx)) 1070 return PTR_ERR(ctx); 1071 1072 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1073 if (offsetofend(typeof(cmd), ece) <= in_size) { 1074 ece.vendor_id = cmd.ece.vendor_id; 1075 ece.attr_mod = cmd.ece.attr_mod; 1076 } 1077 1078 mutex_lock(&ctx->mutex); 1079 ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); 1080 mutex_unlock(&ctx->mutex); 1081 ucma_put_ctx(ctx); 1082 return ret; 1083 } 1084 1085 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1086 int in_len, int out_len) 1087 { 1088 struct rdma_ucm_listen cmd; 1089 struct ucma_context *ctx; 1090 int ret; 1091 1092 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1093 return -EFAULT; 1094 1095 ctx = ucma_get_ctx(file, cmd.id); 1096 if (IS_ERR(ctx)) 1097 return PTR_ERR(ctx); 1098 1099 if (cmd.backlog <= 0 || cmd.backlog > max_backlog) 1100 cmd.backlog = max_backlog; 1101 atomic_set(&ctx->backlog, cmd.backlog); 1102 1103 mutex_lock(&ctx->mutex); 1104 ret = rdma_listen(ctx->cm_id, cmd.backlog); 1105 mutex_unlock(&ctx->mutex); 1106 ucma_put_ctx(ctx); 1107 return ret; 1108 } 1109 1110 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1111 int in_len, int out_len) 1112 { 1113 struct rdma_ucm_accept cmd; 1114 struct rdma_conn_param conn_param; 1115 struct rdma_ucm_ece ece = {}; 1116 struct ucma_context *ctx; 1117 size_t in_size; 1118 int ret; 1119 1120 if (in_len < offsetofend(typeof(cmd), reserved)) 1121 return -EINVAL; 1122 in_size = min_t(size_t, in_len, sizeof(cmd)); 1123 if (copy_from_user(&cmd, inbuf, in_size)) 1124 return -EFAULT; 1125 1126 ctx = ucma_get_ctx_dev(file, cmd.id); 1127 if (IS_ERR(ctx)) 1128 return PTR_ERR(ctx); 1129 1130 if (offsetofend(typeof(cmd), ece) <= in_size) { 1131 ece.vendor_id = cmd.ece.vendor_id; 1132 ece.attr_mod = cmd.ece.attr_mod; 1133 } 1134 1135 if (cmd.conn_param.valid) { 1136 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1137 mutex_lock(&ctx->mutex); 1138 rdma_lock_handler(ctx->cm_id); 1139 ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); 1140 if (!ret) { 1141 /* The uid must be set atomically with the handler */ 1142 ctx->uid = cmd.uid; 1143 } 1144 rdma_unlock_handler(ctx->cm_id); 1145 mutex_unlock(&ctx->mutex); 1146 } else { 1147 mutex_lock(&ctx->mutex); 1148 rdma_lock_handler(ctx->cm_id); 1149 ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); 1150 rdma_unlock_handler(ctx->cm_id); 1151 mutex_unlock(&ctx->mutex); 1152 } 1153 ucma_put_ctx(ctx); 1154 return ret; 1155 } 1156 1157 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1158 int in_len, int out_len) 1159 { 1160 struct rdma_ucm_reject cmd; 1161 struct ucma_context *ctx; 1162 int ret; 1163 1164 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1165 return -EFAULT; 1166 1167 if (!cmd.reason) 1168 cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; 1169 1170 switch (cmd.reason) { 1171 case IB_CM_REJ_CONSUMER_DEFINED: 1172 case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: 1173 break; 1174 default: 1175 return -EINVAL; 1176 } 1177 1178 ctx = ucma_get_ctx_dev(file, cmd.id); 1179 if (IS_ERR(ctx)) 1180 return PTR_ERR(ctx); 1181 1182 mutex_lock(&ctx->mutex); 1183 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, 1184 cmd.reason); 1185 mutex_unlock(&ctx->mutex); 1186 ucma_put_ctx(ctx); 1187 return ret; 1188 } 1189 1190 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1191 int in_len, int out_len) 1192 { 1193 struct rdma_ucm_disconnect cmd; 1194 struct ucma_context *ctx; 1195 int ret; 1196 1197 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1198 return -EFAULT; 1199 1200 ctx = ucma_get_ctx_dev(file, cmd.id); 1201 if (IS_ERR(ctx)) 1202 return PTR_ERR(ctx); 1203 1204 mutex_lock(&ctx->mutex); 1205 ret = rdma_disconnect(ctx->cm_id); 1206 mutex_unlock(&ctx->mutex); 1207 ucma_put_ctx(ctx); 1208 return ret; 1209 } 1210 1211 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1212 const char __user *inbuf, 1213 int in_len, int out_len) 1214 { 1215 struct rdma_ucm_init_qp_attr cmd; 1216 struct ib_uverbs_qp_attr resp; 1217 struct ucma_context *ctx; 1218 struct ib_qp_attr qp_attr; 1219 int ret; 1220 1221 if (out_len < sizeof(resp)) 1222 return -ENOSPC; 1223 1224 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1225 return -EFAULT; 1226 1227 if (cmd.qp_state > IB_QPS_ERR) 1228 return -EINVAL; 1229 1230 ctx = ucma_get_ctx_dev(file, cmd.id); 1231 if (IS_ERR(ctx)) 1232 return PTR_ERR(ctx); 1233 1234 resp.qp_attr_mask = 0; 1235 memset(&qp_attr, 0, sizeof qp_attr); 1236 qp_attr.qp_state = cmd.qp_state; 1237 mutex_lock(&ctx->mutex); 1238 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1239 mutex_unlock(&ctx->mutex); 1240 if (ret) 1241 goto out; 1242 1243 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1244 if (copy_to_user(u64_to_user_ptr(cmd.response), 1245 &resp, sizeof(resp))) 1246 ret = -EFAULT; 1247 1248 out: 1249 ucma_put_ctx(ctx); 1250 return ret; 1251 } 1252 1253 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1254 void *optval, size_t optlen) 1255 { 1256 int ret = 0; 1257 1258 switch (optname) { 1259 case RDMA_OPTION_ID_TOS: 1260 if (optlen != sizeof(u8)) { 1261 ret = -EINVAL; 1262 break; 1263 } 1264 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1265 break; 1266 case RDMA_OPTION_ID_REUSEADDR: 1267 if (optlen != sizeof(int)) { 1268 ret = -EINVAL; 1269 break; 1270 } 1271 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1272 break; 1273 case RDMA_OPTION_ID_AFONLY: 1274 if (optlen != sizeof(int)) { 1275 ret = -EINVAL; 1276 break; 1277 } 1278 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1279 break; 1280 case RDMA_OPTION_ID_ACK_TIMEOUT: 1281 if (optlen != sizeof(u8)) { 1282 ret = -EINVAL; 1283 break; 1284 } 1285 ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); 1286 break; 1287 default: 1288 ret = -ENOSYS; 1289 } 1290 1291 return ret; 1292 } 1293 1294 static int ucma_set_ib_path(struct ucma_context *ctx, 1295 struct ib_path_rec_data *path_data, size_t optlen) 1296 { 1297 struct sa_path_rec sa_path; 1298 struct rdma_cm_event event; 1299 int ret; 1300 1301 if (optlen % sizeof(*path_data)) 1302 return -EINVAL; 1303 1304 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1305 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1306 IB_PATH_BIDIRECTIONAL)) 1307 break; 1308 } 1309 1310 if (!optlen) 1311 return -EINVAL; 1312 1313 if (!ctx->cm_id->device) 1314 return -EINVAL; 1315 1316 memset(&sa_path, 0, sizeof(sa_path)); 1317 1318 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1319 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1320 1321 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1322 struct sa_path_rec opa; 1323 1324 sa_convert_path_ib_to_opa(&opa, &sa_path); 1325 mutex_lock(&ctx->mutex); 1326 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1327 mutex_unlock(&ctx->mutex); 1328 } else { 1329 mutex_lock(&ctx->mutex); 1330 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1331 mutex_unlock(&ctx->mutex); 1332 } 1333 if (ret) 1334 return ret; 1335 1336 memset(&event, 0, sizeof event); 1337 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1338 return ucma_event_handler(ctx->cm_id, &event); 1339 } 1340 1341 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1342 void *optval, size_t optlen) 1343 { 1344 int ret; 1345 1346 switch (optname) { 1347 case RDMA_OPTION_IB_PATH: 1348 ret = ucma_set_ib_path(ctx, optval, optlen); 1349 break; 1350 default: 1351 ret = -ENOSYS; 1352 } 1353 1354 return ret; 1355 } 1356 1357 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1358 int optname, void *optval, size_t optlen) 1359 { 1360 int ret; 1361 1362 switch (level) { 1363 case RDMA_OPTION_ID: 1364 mutex_lock(&ctx->mutex); 1365 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1366 mutex_unlock(&ctx->mutex); 1367 break; 1368 case RDMA_OPTION_IB: 1369 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1370 break; 1371 default: 1372 ret = -ENOSYS; 1373 } 1374 1375 return ret; 1376 } 1377 1378 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1379 int in_len, int out_len) 1380 { 1381 struct rdma_ucm_set_option cmd; 1382 struct ucma_context *ctx; 1383 void *optval; 1384 int ret; 1385 1386 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1387 return -EFAULT; 1388 1389 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1390 return -EINVAL; 1391 1392 ctx = ucma_get_ctx(file, cmd.id); 1393 if (IS_ERR(ctx)) 1394 return PTR_ERR(ctx); 1395 1396 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1397 cmd.optlen); 1398 if (IS_ERR(optval)) { 1399 ret = PTR_ERR(optval); 1400 goto out; 1401 } 1402 1403 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1404 cmd.optlen); 1405 kfree(optval); 1406 1407 out: 1408 ucma_put_ctx(ctx); 1409 return ret; 1410 } 1411 1412 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1413 int in_len, int out_len) 1414 { 1415 struct rdma_ucm_notify cmd; 1416 struct ucma_context *ctx; 1417 int ret = -EINVAL; 1418 1419 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1420 return -EFAULT; 1421 1422 ctx = ucma_get_ctx(file, cmd.id); 1423 if (IS_ERR(ctx)) 1424 return PTR_ERR(ctx); 1425 1426 mutex_lock(&ctx->mutex); 1427 if (ctx->cm_id->device) 1428 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1429 mutex_unlock(&ctx->mutex); 1430 1431 ucma_put_ctx(ctx); 1432 return ret; 1433 } 1434 1435 static ssize_t ucma_process_join(struct ucma_file *file, 1436 struct rdma_ucm_join_mcast *cmd, int out_len) 1437 { 1438 struct rdma_ucm_create_id_resp resp; 1439 struct ucma_context *ctx; 1440 struct ucma_multicast *mc; 1441 struct sockaddr *addr; 1442 int ret; 1443 u8 join_state; 1444 1445 if (out_len < sizeof(resp)) 1446 return -ENOSPC; 1447 1448 addr = (struct sockaddr *) &cmd->addr; 1449 if (cmd->addr_size != rdma_addr_size(addr)) 1450 return -EINVAL; 1451 1452 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1453 join_state = BIT(FULLMEMBER_JOIN); 1454 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1455 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1456 else 1457 return -EINVAL; 1458 1459 ctx = ucma_get_ctx_dev(file, cmd->id); 1460 if (IS_ERR(ctx)) 1461 return PTR_ERR(ctx); 1462 1463 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 1464 if (!mc) { 1465 ret = -ENOMEM; 1466 goto err_put_ctx; 1467 } 1468 1469 mc->ctx = ctx; 1470 mc->join_state = join_state; 1471 mc->uid = cmd->uid; 1472 memcpy(&mc->addr, addr, cmd->addr_size); 1473 1474 xa_lock(&multicast_table); 1475 if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1476 GFP_KERNEL)) { 1477 ret = -ENOMEM; 1478 goto err_free_mc; 1479 } 1480 1481 list_add_tail(&mc->list, &ctx->mc_list); 1482 xa_unlock(&multicast_table); 1483 1484 mutex_lock(&ctx->mutex); 1485 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1486 join_state, mc); 1487 mutex_unlock(&ctx->mutex); 1488 if (ret) 1489 goto err_xa_erase; 1490 1491 resp.id = mc->id; 1492 if (copy_to_user(u64_to_user_ptr(cmd->response), 1493 &resp, sizeof(resp))) { 1494 ret = -EFAULT; 1495 goto err_leave_multicast; 1496 } 1497 1498 xa_store(&multicast_table, mc->id, mc, 0); 1499 1500 ucma_put_ctx(ctx); 1501 return 0; 1502 1503 err_leave_multicast: 1504 mutex_lock(&ctx->mutex); 1505 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1506 mutex_unlock(&ctx->mutex); 1507 ucma_cleanup_mc_events(mc); 1508 err_xa_erase: 1509 xa_lock(&multicast_table); 1510 list_del(&mc->list); 1511 __xa_erase(&multicast_table, mc->id); 1512 err_free_mc: 1513 xa_unlock(&multicast_table); 1514 kfree(mc); 1515 err_put_ctx: 1516 ucma_put_ctx(ctx); 1517 return ret; 1518 } 1519 1520 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1521 const char __user *inbuf, 1522 int in_len, int out_len) 1523 { 1524 struct rdma_ucm_join_ip_mcast cmd; 1525 struct rdma_ucm_join_mcast join_cmd; 1526 1527 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1528 return -EFAULT; 1529 1530 join_cmd.response = cmd.response; 1531 join_cmd.uid = cmd.uid; 1532 join_cmd.id = cmd.id; 1533 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1534 if (!join_cmd.addr_size) 1535 return -EINVAL; 1536 1537 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1538 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1539 1540 return ucma_process_join(file, &join_cmd, out_len); 1541 } 1542 1543 static ssize_t ucma_join_multicast(struct ucma_file *file, 1544 const char __user *inbuf, 1545 int in_len, int out_len) 1546 { 1547 struct rdma_ucm_join_mcast cmd; 1548 1549 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1550 return -EFAULT; 1551 1552 if (!rdma_addr_size_kss(&cmd.addr)) 1553 return -EINVAL; 1554 1555 return ucma_process_join(file, &cmd, out_len); 1556 } 1557 1558 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1559 const char __user *inbuf, 1560 int in_len, int out_len) 1561 { 1562 struct rdma_ucm_destroy_id cmd; 1563 struct rdma_ucm_destroy_id_resp resp; 1564 struct ucma_multicast *mc; 1565 int ret = 0; 1566 1567 if (out_len < sizeof(resp)) 1568 return -ENOSPC; 1569 1570 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1571 return -EFAULT; 1572 1573 xa_lock(&multicast_table); 1574 mc = xa_load(&multicast_table, cmd.id); 1575 if (!mc) 1576 mc = ERR_PTR(-ENOENT); 1577 else if (READ_ONCE(mc->ctx->file) != file) 1578 mc = ERR_PTR(-EINVAL); 1579 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1580 mc = ERR_PTR(-ENXIO); 1581 1582 if (IS_ERR(mc)) { 1583 xa_unlock(&multicast_table); 1584 ret = PTR_ERR(mc); 1585 goto out; 1586 } 1587 1588 list_del(&mc->list); 1589 __xa_erase(&multicast_table, mc->id); 1590 xa_unlock(&multicast_table); 1591 1592 mutex_lock(&mc->ctx->mutex); 1593 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1594 mutex_unlock(&mc->ctx->mutex); 1595 1596 ucma_cleanup_mc_events(mc); 1597 1598 ucma_put_ctx(mc->ctx); 1599 resp.events_reported = mc->events_reported; 1600 kfree(mc); 1601 1602 if (copy_to_user(u64_to_user_ptr(cmd.response), 1603 &resp, sizeof(resp))) 1604 ret = -EFAULT; 1605 out: 1606 return ret; 1607 } 1608 1609 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1610 const char __user *inbuf, 1611 int in_len, int out_len) 1612 { 1613 struct rdma_ucm_migrate_id cmd; 1614 struct rdma_ucm_migrate_resp resp; 1615 struct ucma_event *uevent, *tmp; 1616 struct ucma_context *ctx; 1617 LIST_HEAD(event_list); 1618 struct ucma_file *cur_file; 1619 int ret = 0; 1620 1621 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1622 return -EFAULT; 1623 1624 /* Get current fd to protect against it being closed */ 1625 CLASS(fd, f)(cmd.fd); 1626 if (fd_empty(f)) 1627 return -ENOENT; 1628 if (fd_file(f)->f_op != &ucma_fops) 1629 return -EINVAL; 1630 cur_file = fd_file(f)->private_data; 1631 1632 /* Validate current fd and prevent destruction of id. */ 1633 ctx = ucma_get_ctx(cur_file, cmd.id); 1634 if (IS_ERR(ctx)) 1635 return PTR_ERR(ctx); 1636 1637 rdma_lock_handler(ctx->cm_id); 1638 /* 1639 * ctx->file can only be changed under the handler & xa_lock. xa_load() 1640 * must be checked again to ensure the ctx hasn't begun destruction 1641 * since the ucma_get_ctx(). 1642 */ 1643 xa_lock(&ctx_table); 1644 if (_ucma_find_context(cmd.id, cur_file) != ctx) { 1645 xa_unlock(&ctx_table); 1646 ret = -ENOENT; 1647 goto err_unlock; 1648 } 1649 ctx->file = new_file; 1650 xa_unlock(&ctx_table); 1651 1652 mutex_lock(&cur_file->mut); 1653 list_del(&ctx->list); 1654 /* 1655 * At this point lock_handler() prevents addition of new uevents for 1656 * this ctx. 1657 */ 1658 list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list) 1659 if (uevent->ctx == ctx) 1660 list_move_tail(&uevent->list, &event_list); 1661 resp.events_reported = ctx->events_reported; 1662 mutex_unlock(&cur_file->mut); 1663 1664 mutex_lock(&new_file->mut); 1665 list_add_tail(&ctx->list, &new_file->ctx_list); 1666 list_splice_tail(&event_list, &new_file->event_list); 1667 mutex_unlock(&new_file->mut); 1668 1669 if (copy_to_user(u64_to_user_ptr(cmd.response), 1670 &resp, sizeof(resp))) 1671 ret = -EFAULT; 1672 1673 err_unlock: 1674 rdma_unlock_handler(ctx->cm_id); 1675 ucma_put_ctx(ctx); 1676 return ret; 1677 } 1678 1679 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1680 const char __user *inbuf, 1681 int in_len, int out_len) = { 1682 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1683 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1684 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1685 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1686 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1687 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1688 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1689 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1690 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1691 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1692 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1693 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1694 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1695 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1696 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1697 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1698 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1699 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1700 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1701 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1702 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1703 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1704 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1705 }; 1706 1707 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1708 size_t len, loff_t *pos) 1709 { 1710 struct ucma_file *file = filp->private_data; 1711 struct rdma_ucm_cmd_hdr hdr; 1712 ssize_t ret; 1713 1714 if (!ib_safe_file_access(filp)) { 1715 pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1716 __func__, task_tgid_vnr(current), current->comm); 1717 return -EACCES; 1718 } 1719 1720 if (len < sizeof(hdr)) 1721 return -EINVAL; 1722 1723 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1724 return -EFAULT; 1725 1726 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1727 return -EINVAL; 1728 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1729 1730 if (hdr.in + sizeof(hdr) > len) 1731 return -EINVAL; 1732 1733 if (!ucma_cmd_table[hdr.cmd]) 1734 return -ENOSYS; 1735 1736 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1737 if (!ret) 1738 ret = len; 1739 1740 return ret; 1741 } 1742 1743 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1744 { 1745 struct ucma_file *file = filp->private_data; 1746 __poll_t mask = 0; 1747 1748 poll_wait(filp, &file->poll_wait, wait); 1749 1750 if (!list_empty(&file->event_list)) 1751 mask = EPOLLIN | EPOLLRDNORM; 1752 1753 return mask; 1754 } 1755 1756 /* 1757 * ucma_open() does not need the BKL: 1758 * 1759 * - no global state is referred to; 1760 * - there is no ioctl method to race against; 1761 * - no further module initialization is required for open to work 1762 * after the device is registered. 1763 */ 1764 static int ucma_open(struct inode *inode, struct file *filp) 1765 { 1766 struct ucma_file *file; 1767 1768 file = kmalloc(sizeof *file, GFP_KERNEL); 1769 if (!file) 1770 return -ENOMEM; 1771 1772 INIT_LIST_HEAD(&file->event_list); 1773 INIT_LIST_HEAD(&file->ctx_list); 1774 init_waitqueue_head(&file->poll_wait); 1775 mutex_init(&file->mut); 1776 1777 filp->private_data = file; 1778 file->filp = filp; 1779 1780 return stream_open(inode, filp); 1781 } 1782 1783 static int ucma_close(struct inode *inode, struct file *filp) 1784 { 1785 struct ucma_file *file = filp->private_data; 1786 1787 /* 1788 * All paths that touch ctx_list or ctx_list starting from write() are 1789 * prevented by this being a FD release function. The list_add_tail() in 1790 * ucma_connect_event_handler() can run concurrently, however it only 1791 * adds to the list *after* a listening ID. By only reading the first of 1792 * the list, and relying on ucma_destroy_private_ctx() to block 1793 * ucma_connect_event_handler(), no additional locking is needed. 1794 */ 1795 while (!list_empty(&file->ctx_list)) { 1796 struct ucma_context *ctx = list_first_entry( 1797 &file->ctx_list, struct ucma_context, list); 1798 1799 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 1800 GFP_KERNEL) != ctx); 1801 ucma_destroy_private_ctx(ctx); 1802 } 1803 kfree(file); 1804 return 0; 1805 } 1806 1807 static const struct file_operations ucma_fops = { 1808 .owner = THIS_MODULE, 1809 .open = ucma_open, 1810 .release = ucma_close, 1811 .write = ucma_write, 1812 .poll = ucma_poll, 1813 }; 1814 1815 static struct miscdevice ucma_misc = { 1816 .minor = MISC_DYNAMIC_MINOR, 1817 .name = "rdma_cm", 1818 .nodename = "infiniband/rdma_cm", 1819 .mode = 0666, 1820 .fops = &ucma_fops, 1821 }; 1822 1823 static int ucma_get_global_nl_info(struct ib_client_nl_info *res) 1824 { 1825 res->abi = RDMA_USER_CM_ABI_VERSION; 1826 res->cdev = ucma_misc.this_device; 1827 return 0; 1828 } 1829 1830 static struct ib_client rdma_cma_client = { 1831 .name = "rdma_cm", 1832 .get_global_nl_info = ucma_get_global_nl_info, 1833 }; 1834 MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); 1835 1836 static ssize_t abi_version_show(struct device *dev, 1837 struct device_attribute *attr, char *buf) 1838 { 1839 return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1840 } 1841 static DEVICE_ATTR_RO(abi_version); 1842 1843 static int __init ucma_init(void) 1844 { 1845 int ret; 1846 1847 ret = misc_register(&ucma_misc); 1848 if (ret) 1849 return ret; 1850 1851 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1852 if (ret) { 1853 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1854 goto err1; 1855 } 1856 1857 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1858 if (!ucma_ctl_table_hdr) { 1859 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1860 ret = -ENOMEM; 1861 goto err2; 1862 } 1863 1864 ret = ib_register_client(&rdma_cma_client); 1865 if (ret) 1866 goto err3; 1867 1868 return 0; 1869 err3: 1870 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1871 err2: 1872 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1873 err1: 1874 misc_deregister(&ucma_misc); 1875 return ret; 1876 } 1877 1878 static void __exit ucma_cleanup(void) 1879 { 1880 ib_unregister_client(&rdma_cma_client); 1881 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1882 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1883 misc_deregister(&ucma_misc); 1884 } 1885 1886 module_init(ucma_init); 1887 module_exit(ucma_cleanup); 1888