1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 */ 36 37 #include <linux/module.h> 38 #include <linux/init.h> 39 #include <linux/device.h> 40 #include <linux/err.h> 41 #include <linux/fs.h> 42 #include <linux/poll.h> 43 #include <linux/sched.h> 44 #include <linux/file.h> 45 #include <linux/cdev.h> 46 #include <linux/anon_inodes.h> 47 #include <linux/slab.h> 48 #include <linux/sched/mm.h> 49 50 #include <linux/uaccess.h> 51 52 #include <rdma/ib.h> 53 #include <rdma/uverbs_std_types.h> 54 55 #include "uverbs.h" 56 #include "core_priv.h" 57 #include "rdma_core.h" 58 59 MODULE_AUTHOR("Roland Dreier"); 60 MODULE_DESCRIPTION("InfiniBand userspace verbs access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 enum { 64 IB_UVERBS_MAJOR = 231, 65 IB_UVERBS_BASE_MINOR = 192, 66 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS, 67 IB_UVERBS_NUM_FIXED_MINOR = 32, 68 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR, 69 }; 70 71 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) 72 73 static dev_t dynamic_uverbs_dev; 74 static struct class *uverbs_class; 75 76 static DEFINE_IDA(uverbs_ida); 77 static void ib_uverbs_add_one(struct ib_device *device); 78 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 79 80 /* 81 * Must be called with the ufile->device->disassociate_srcu held, and the lock 82 * must be held until use of the ucontext is finished. 83 */ 84 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) 85 { 86 /* 87 * We do not hold the hw_destroy_rwsem lock for this flow, instead 88 * srcu is used. It does not matter if someone races this with 89 * get_context, we get NULL or valid ucontext. 90 */ 91 struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); 92 93 if (!srcu_dereference(ufile->device->ib_dev, 94 &ufile->device->disassociate_srcu)) 95 return ERR_PTR(-EIO); 96 97 if (!ucontext) 98 return ERR_PTR(-EINVAL); 99 100 return ucontext; 101 } 102 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); 103 104 int uverbs_dealloc_mw(struct ib_mw *mw) 105 { 106 struct ib_pd *pd = mw->pd; 107 int ret; 108 109 ret = mw->device->ops.dealloc_mw(mw); 110 if (!ret) 111 atomic_dec(&pd->usecnt); 112 return ret; 113 } 114 115 static void ib_uverbs_release_dev(struct device *device) 116 { 117 struct ib_uverbs_device *dev = 118 container_of(device, struct ib_uverbs_device, dev); 119 120 uverbs_destroy_api(dev->uapi); 121 cleanup_srcu_struct(&dev->disassociate_srcu); 122 kfree(dev); 123 } 124 125 static void ib_uverbs_release_async_event_file(struct kref *ref) 126 { 127 struct ib_uverbs_async_event_file *file = 128 container_of(ref, struct ib_uverbs_async_event_file, ref); 129 130 kfree(file); 131 } 132 133 void ib_uverbs_release_ucq(struct ib_uverbs_file *file, 134 struct ib_uverbs_completion_event_file *ev_file, 135 struct ib_ucq_object *uobj) 136 { 137 struct ib_uverbs_event *evt, *tmp; 138 139 if (ev_file) { 140 spin_lock_irq(&ev_file->ev_queue.lock); 141 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { 142 list_del(&evt->list); 143 kfree(evt); 144 } 145 spin_unlock_irq(&ev_file->ev_queue.lock); 146 147 uverbs_uobject_put(&ev_file->uobj); 148 } 149 150 spin_lock_irq(&file->async_file->ev_queue.lock); 151 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { 152 list_del(&evt->list); 153 kfree(evt); 154 } 155 spin_unlock_irq(&file->async_file->ev_queue.lock); 156 } 157 158 void ib_uverbs_release_uevent(struct ib_uverbs_file *file, 159 struct ib_uevent_object *uobj) 160 { 161 struct ib_uverbs_event *evt, *tmp; 162 163 spin_lock_irq(&file->async_file->ev_queue.lock); 164 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { 165 list_del(&evt->list); 166 kfree(evt); 167 } 168 spin_unlock_irq(&file->async_file->ev_queue.lock); 169 } 170 171 void ib_uverbs_detach_umcast(struct ib_qp *qp, 172 struct ib_uqp_object *uobj) 173 { 174 struct ib_uverbs_mcast_entry *mcast, *tmp; 175 176 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { 177 ib_detach_mcast(qp, &mcast->gid, mcast->lid); 178 list_del(&mcast->list); 179 kfree(mcast); 180 } 181 } 182 183 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) 184 { 185 complete(&dev->comp); 186 } 187 188 void ib_uverbs_release_file(struct kref *ref) 189 { 190 struct ib_uverbs_file *file = 191 container_of(ref, struct ib_uverbs_file, ref); 192 struct ib_device *ib_dev; 193 int srcu_key; 194 195 release_ufile_idr_uobject(file); 196 197 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 198 ib_dev = srcu_dereference(file->device->ib_dev, 199 &file->device->disassociate_srcu); 200 if (ib_dev && !ib_dev->ops.disassociate_ucontext) 201 module_put(ib_dev->owner); 202 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 203 204 if (atomic_dec_and_test(&file->device->refcount)) 205 ib_uverbs_comp_dev(file->device); 206 207 if (file->async_file) 208 kref_put(&file->async_file->ref, 209 ib_uverbs_release_async_event_file); 210 put_device(&file->device->dev); 211 212 if (file->disassociate_page) 213 __free_pages(file->disassociate_page, 0); 214 kfree(file); 215 } 216 217 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, 218 struct ib_uverbs_file *uverbs_file, 219 struct file *filp, char __user *buf, 220 size_t count, loff_t *pos, 221 size_t eventsz) 222 { 223 struct ib_uverbs_event *event; 224 int ret = 0; 225 226 spin_lock_irq(&ev_queue->lock); 227 228 while (list_empty(&ev_queue->event_list)) { 229 spin_unlock_irq(&ev_queue->lock); 230 231 if (filp->f_flags & O_NONBLOCK) 232 return -EAGAIN; 233 234 if (wait_event_interruptible(ev_queue->poll_wait, 235 (!list_empty(&ev_queue->event_list) || 236 /* The barriers built into wait_event_interruptible() 237 * and wake_up() guarentee this will see the null set 238 * without using RCU 239 */ 240 !uverbs_file->device->ib_dev))) 241 return -ERESTARTSYS; 242 243 /* If device was disassociated and no event exists set an error */ 244 if (list_empty(&ev_queue->event_list) && 245 !uverbs_file->device->ib_dev) 246 return -EIO; 247 248 spin_lock_irq(&ev_queue->lock); 249 } 250 251 event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); 252 253 if (eventsz > count) { 254 ret = -EINVAL; 255 event = NULL; 256 } else { 257 list_del(ev_queue->event_list.next); 258 if (event->counter) { 259 ++(*event->counter); 260 list_del(&event->obj_list); 261 } 262 } 263 264 spin_unlock_irq(&ev_queue->lock); 265 266 if (event) { 267 if (copy_to_user(buf, event, eventsz)) 268 ret = -EFAULT; 269 else 270 ret = eventsz; 271 } 272 273 kfree(event); 274 275 return ret; 276 } 277 278 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, 279 size_t count, loff_t *pos) 280 { 281 struct ib_uverbs_async_event_file *file = filp->private_data; 282 283 return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, 284 buf, count, pos, 285 sizeof(struct ib_uverbs_async_event_desc)); 286 } 287 288 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, 289 size_t count, loff_t *pos) 290 { 291 struct ib_uverbs_completion_event_file *comp_ev_file = 292 filp->private_data; 293 294 return ib_uverbs_event_read(&comp_ev_file->ev_queue, 295 comp_ev_file->uobj.ufile, filp, 296 buf, count, pos, 297 sizeof(struct ib_uverbs_comp_event_desc)); 298 } 299 300 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, 301 struct file *filp, 302 struct poll_table_struct *wait) 303 { 304 __poll_t pollflags = 0; 305 306 poll_wait(filp, &ev_queue->poll_wait, wait); 307 308 spin_lock_irq(&ev_queue->lock); 309 if (!list_empty(&ev_queue->event_list)) 310 pollflags = EPOLLIN | EPOLLRDNORM; 311 spin_unlock_irq(&ev_queue->lock); 312 313 return pollflags; 314 } 315 316 static __poll_t ib_uverbs_async_event_poll(struct file *filp, 317 struct poll_table_struct *wait) 318 { 319 return ib_uverbs_event_poll(filp->private_data, filp, wait); 320 } 321 322 static __poll_t ib_uverbs_comp_event_poll(struct file *filp, 323 struct poll_table_struct *wait) 324 { 325 struct ib_uverbs_completion_event_file *comp_ev_file = 326 filp->private_data; 327 328 return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); 329 } 330 331 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) 332 { 333 struct ib_uverbs_event_queue *ev_queue = filp->private_data; 334 335 return fasync_helper(fd, filp, on, &ev_queue->async_queue); 336 } 337 338 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) 339 { 340 struct ib_uverbs_completion_event_file *comp_ev_file = 341 filp->private_data; 342 343 return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); 344 } 345 346 static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) 347 { 348 struct ib_uverbs_async_event_file *file = filp->private_data; 349 struct ib_uverbs_file *uverbs_file = file->uverbs_file; 350 struct ib_uverbs_event *entry, *tmp; 351 int closed_already = 0; 352 353 mutex_lock(&uverbs_file->device->lists_mutex); 354 spin_lock_irq(&file->ev_queue.lock); 355 closed_already = file->ev_queue.is_closed; 356 file->ev_queue.is_closed = 1; 357 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 358 if (entry->counter) 359 list_del(&entry->obj_list); 360 kfree(entry); 361 } 362 spin_unlock_irq(&file->ev_queue.lock); 363 if (!closed_already) { 364 list_del(&file->list); 365 ib_unregister_event_handler(&uverbs_file->event_handler); 366 } 367 mutex_unlock(&uverbs_file->device->lists_mutex); 368 369 kref_put(&uverbs_file->ref, ib_uverbs_release_file); 370 kref_put(&file->ref, ib_uverbs_release_async_event_file); 371 372 return 0; 373 } 374 375 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) 376 { 377 struct ib_uobject *uobj = filp->private_data; 378 struct ib_uverbs_completion_event_file *file = container_of( 379 uobj, struct ib_uverbs_completion_event_file, uobj); 380 struct ib_uverbs_event *entry, *tmp; 381 382 spin_lock_irq(&file->ev_queue.lock); 383 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 384 if (entry->counter) 385 list_del(&entry->obj_list); 386 kfree(entry); 387 } 388 file->ev_queue.is_closed = 1; 389 spin_unlock_irq(&file->ev_queue.lock); 390 391 uverbs_close_fd(filp); 392 393 return 0; 394 } 395 396 const struct file_operations uverbs_event_fops = { 397 .owner = THIS_MODULE, 398 .read = ib_uverbs_comp_event_read, 399 .poll = ib_uverbs_comp_event_poll, 400 .release = ib_uverbs_comp_event_close, 401 .fasync = ib_uverbs_comp_event_fasync, 402 .llseek = no_llseek, 403 }; 404 405 static const struct file_operations uverbs_async_event_fops = { 406 .owner = THIS_MODULE, 407 .read = ib_uverbs_async_event_read, 408 .poll = ib_uverbs_async_event_poll, 409 .release = ib_uverbs_async_event_close, 410 .fasync = ib_uverbs_async_event_fasync, 411 .llseek = no_llseek, 412 }; 413 414 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) 415 { 416 struct ib_uverbs_event_queue *ev_queue = cq_context; 417 struct ib_ucq_object *uobj; 418 struct ib_uverbs_event *entry; 419 unsigned long flags; 420 421 if (!ev_queue) 422 return; 423 424 spin_lock_irqsave(&ev_queue->lock, flags); 425 if (ev_queue->is_closed) { 426 spin_unlock_irqrestore(&ev_queue->lock, flags); 427 return; 428 } 429 430 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 431 if (!entry) { 432 spin_unlock_irqrestore(&ev_queue->lock, flags); 433 return; 434 } 435 436 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); 437 438 entry->desc.comp.cq_handle = cq->uobject->user_handle; 439 entry->counter = &uobj->comp_events_reported; 440 441 list_add_tail(&entry->list, &ev_queue->event_list); 442 list_add_tail(&entry->obj_list, &uobj->comp_list); 443 spin_unlock_irqrestore(&ev_queue->lock, flags); 444 445 wake_up_interruptible(&ev_queue->poll_wait); 446 kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); 447 } 448 449 static void ib_uverbs_async_handler(struct ib_uverbs_file *file, 450 __u64 element, __u64 event, 451 struct list_head *obj_list, 452 u32 *counter) 453 { 454 struct ib_uverbs_event *entry; 455 unsigned long flags; 456 457 spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); 458 if (file->async_file->ev_queue.is_closed) { 459 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 460 return; 461 } 462 463 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 464 if (!entry) { 465 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 466 return; 467 } 468 469 entry->desc.async.element = element; 470 entry->desc.async.event_type = event; 471 entry->desc.async.reserved = 0; 472 entry->counter = counter; 473 474 list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); 475 if (obj_list) 476 list_add_tail(&entry->obj_list, obj_list); 477 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 478 479 wake_up_interruptible(&file->async_file->ev_queue.poll_wait); 480 kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); 481 } 482 483 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) 484 { 485 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, 486 struct ib_ucq_object, uobject); 487 488 ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, 489 event->event, &uobj->async_list, 490 &uobj->async_events_reported); 491 } 492 493 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) 494 { 495 struct ib_uevent_object *uobj; 496 497 /* for XRC target qp's, check that qp is live */ 498 if (!event->element.qp->uobject) 499 return; 500 501 uobj = container_of(event->element.qp->uobject, 502 struct ib_uevent_object, uobject); 503 504 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 505 event->event, &uobj->event_list, 506 &uobj->events_reported); 507 } 508 509 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) 510 { 511 struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, 512 struct ib_uevent_object, uobject); 513 514 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 515 event->event, &uobj->event_list, 516 &uobj->events_reported); 517 } 518 519 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) 520 { 521 struct ib_uevent_object *uobj; 522 523 uobj = container_of(event->element.srq->uobject, 524 struct ib_uevent_object, uobject); 525 526 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 527 event->event, &uobj->event_list, 528 &uobj->events_reported); 529 } 530 531 void ib_uverbs_event_handler(struct ib_event_handler *handler, 532 struct ib_event *event) 533 { 534 struct ib_uverbs_file *file = 535 container_of(handler, struct ib_uverbs_file, event_handler); 536 537 ib_uverbs_async_handler(file, event->element.port_num, event->event, 538 NULL, NULL); 539 } 540 541 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) 542 { 543 kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); 544 file->async_file = NULL; 545 } 546 547 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) 548 { 549 spin_lock_init(&ev_queue->lock); 550 INIT_LIST_HEAD(&ev_queue->event_list); 551 init_waitqueue_head(&ev_queue->poll_wait); 552 ev_queue->is_closed = 0; 553 ev_queue->async_queue = NULL; 554 } 555 556 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, 557 struct ib_device *ib_dev) 558 { 559 struct ib_uverbs_async_event_file *ev_file; 560 struct file *filp; 561 562 ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); 563 if (!ev_file) 564 return ERR_PTR(-ENOMEM); 565 566 ib_uverbs_init_event_queue(&ev_file->ev_queue); 567 ev_file->uverbs_file = uverbs_file; 568 kref_get(&ev_file->uverbs_file->ref); 569 kref_init(&ev_file->ref); 570 filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, 571 ev_file, O_RDONLY); 572 if (IS_ERR(filp)) 573 goto err_put_refs; 574 575 mutex_lock(&uverbs_file->device->lists_mutex); 576 list_add_tail(&ev_file->list, 577 &uverbs_file->device->uverbs_events_file_list); 578 mutex_unlock(&uverbs_file->device->lists_mutex); 579 580 WARN_ON(uverbs_file->async_file); 581 uverbs_file->async_file = ev_file; 582 kref_get(&uverbs_file->async_file->ref); 583 INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, 584 ib_dev, 585 ib_uverbs_event_handler); 586 ib_register_event_handler(&uverbs_file->event_handler); 587 /* At that point async file stuff was fully set */ 588 589 return filp; 590 591 err_put_refs: 592 kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); 593 kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); 594 return filp; 595 } 596 597 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, 598 struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, 599 const struct uverbs_api_write_method *method_elm) 600 { 601 if (method_elm->is_ex) { 602 count -= sizeof(*hdr) + sizeof(*ex_hdr); 603 604 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) 605 return -EINVAL; 606 607 if (hdr->in_words * 8 < method_elm->req_size) 608 return -ENOSPC; 609 610 if (ex_hdr->cmd_hdr_reserved) 611 return -EINVAL; 612 613 if (ex_hdr->response) { 614 if (!hdr->out_words && !ex_hdr->provider_out_words) 615 return -EINVAL; 616 617 if (hdr->out_words * 8 < method_elm->resp_size) 618 return -ENOSPC; 619 620 if (!access_ok(u64_to_user_ptr(ex_hdr->response), 621 (hdr->out_words + ex_hdr->provider_out_words) * 8)) 622 return -EFAULT; 623 } else { 624 if (hdr->out_words || ex_hdr->provider_out_words) 625 return -EINVAL; 626 } 627 628 return 0; 629 } 630 631 /* not extended command */ 632 if (hdr->in_words * 4 != count) 633 return -EINVAL; 634 635 if (count < method_elm->req_size + sizeof(hdr)) { 636 /* 637 * rdma-core v18 and v19 have a bug where they send DESTROY_CQ 638 * with a 16 byte write instead of 24. Old kernels didn't 639 * check the size so they allowed this. Now that the size is 640 * checked provide a compatibility work around to not break 641 * those userspaces. 642 */ 643 if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && 644 count == 16) { 645 hdr->in_words = 6; 646 return 0; 647 } 648 return -ENOSPC; 649 } 650 if (hdr->out_words * 4 < method_elm->resp_size) 651 return -ENOSPC; 652 653 return 0; 654 } 655 656 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, 657 size_t count, loff_t *pos) 658 { 659 struct ib_uverbs_file *file = filp->private_data; 660 const struct uverbs_api_write_method *method_elm; 661 struct uverbs_api *uapi = file->device->uapi; 662 struct ib_uverbs_ex_cmd_hdr ex_hdr; 663 struct ib_uverbs_cmd_hdr hdr; 664 struct uverbs_attr_bundle bundle; 665 int srcu_key; 666 ssize_t ret; 667 668 if (!ib_safe_file_access(filp)) { 669 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 670 task_tgid_vnr(current), current->comm); 671 return -EACCES; 672 } 673 674 if (count < sizeof(hdr)) 675 return -EINVAL; 676 677 if (copy_from_user(&hdr, buf, sizeof(hdr))) 678 return -EFAULT; 679 680 method_elm = uapi_get_method(uapi, hdr.command); 681 if (IS_ERR(method_elm)) 682 return PTR_ERR(method_elm); 683 684 if (method_elm->is_ex) { 685 if (count < (sizeof(hdr) + sizeof(ex_hdr))) 686 return -EINVAL; 687 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) 688 return -EFAULT; 689 } 690 691 ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); 692 if (ret) 693 return ret; 694 695 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 696 697 buf += sizeof(hdr); 698 699 memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); 700 bundle.ufile = file; 701 bundle.context = NULL; /* only valid if bundle has uobject */ 702 if (!method_elm->is_ex) { 703 size_t in_len = hdr.in_words * 4 - sizeof(hdr); 704 size_t out_len = hdr.out_words * 4; 705 u64 response = 0; 706 707 if (method_elm->has_udata) { 708 bundle.driver_udata.inlen = 709 in_len - method_elm->req_size; 710 in_len = method_elm->req_size; 711 if (bundle.driver_udata.inlen) 712 bundle.driver_udata.inbuf = buf + in_len; 713 else 714 bundle.driver_udata.inbuf = NULL; 715 } else { 716 memset(&bundle.driver_udata, 0, 717 sizeof(bundle.driver_udata)); 718 } 719 720 if (method_elm->has_resp) { 721 /* 722 * The macros check that if has_resp is set 723 * then the command request structure starts 724 * with a '__aligned u64 response' member. 725 */ 726 ret = get_user(response, (const u64 *)buf); 727 if (ret) 728 goto out_unlock; 729 730 if (method_elm->has_udata) { 731 bundle.driver_udata.outlen = 732 out_len - method_elm->resp_size; 733 out_len = method_elm->resp_size; 734 if (bundle.driver_udata.outlen) 735 bundle.driver_udata.outbuf = 736 u64_to_user_ptr(response + 737 out_len); 738 else 739 bundle.driver_udata.outbuf = NULL; 740 } 741 } else { 742 bundle.driver_udata.outlen = 0; 743 bundle.driver_udata.outbuf = NULL; 744 } 745 746 ib_uverbs_init_udata_buf_or_null( 747 &bundle.ucore, buf, u64_to_user_ptr(response), 748 in_len, out_len); 749 } else { 750 buf += sizeof(ex_hdr); 751 752 ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, 753 u64_to_user_ptr(ex_hdr.response), 754 hdr.in_words * 8, hdr.out_words * 8); 755 756 ib_uverbs_init_udata_buf_or_null( 757 &bundle.driver_udata, buf + bundle.ucore.inlen, 758 u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen, 759 ex_hdr.provider_in_words * 8, 760 ex_hdr.provider_out_words * 8); 761 762 } 763 764 ret = method_elm->handler(&bundle); 765 out_unlock: 766 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 767 return (ret) ? : count; 768 } 769 770 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 771 { 772 struct ib_uverbs_file *file = filp->private_data; 773 struct ib_ucontext *ucontext; 774 int ret = 0; 775 int srcu_key; 776 777 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 778 ucontext = ib_uverbs_get_ucontext_file(file); 779 if (IS_ERR(ucontext)) { 780 ret = PTR_ERR(ucontext); 781 goto out; 782 } 783 784 ret = ucontext->device->ops.mmap(ucontext, vma); 785 out: 786 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 787 return ret; 788 } 789 790 /* 791 * Each time we map IO memory into user space this keeps track of the mapping. 792 * When the device is hot-unplugged we 'zap' the mmaps in user space to point 793 * to the zero page and allow the hot unplug to proceed. 794 * 795 * This is necessary for cases like PCI physical hot unplug as the actual BAR 796 * memory may vanish after this and access to it from userspace could MCE. 797 * 798 * RDMA drivers supporting disassociation must have their user space designed 799 * to cope in some way with their IO pages going to the zero page. 800 */ 801 struct rdma_umap_priv { 802 struct vm_area_struct *vma; 803 struct list_head list; 804 }; 805 806 static const struct vm_operations_struct rdma_umap_ops; 807 808 static void rdma_umap_priv_init(struct rdma_umap_priv *priv, 809 struct vm_area_struct *vma) 810 { 811 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 812 813 priv->vma = vma; 814 vma->vm_private_data = priv; 815 vma->vm_ops = &rdma_umap_ops; 816 817 mutex_lock(&ufile->umap_lock); 818 list_add(&priv->list, &ufile->umaps); 819 mutex_unlock(&ufile->umap_lock); 820 } 821 822 /* 823 * The VMA has been dup'd, initialize the vm_private_data with a new tracking 824 * struct 825 */ 826 static void rdma_umap_open(struct vm_area_struct *vma) 827 { 828 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 829 struct rdma_umap_priv *opriv = vma->vm_private_data; 830 struct rdma_umap_priv *priv; 831 832 if (!opriv) 833 return; 834 835 /* We are racing with disassociation */ 836 if (!down_read_trylock(&ufile->hw_destroy_rwsem)) 837 goto out_zap; 838 /* 839 * Disassociation already completed, the VMA should already be zapped. 840 */ 841 if (!ufile->ucontext) 842 goto out_unlock; 843 844 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 845 if (!priv) 846 goto out_unlock; 847 rdma_umap_priv_init(priv, vma); 848 849 up_read(&ufile->hw_destroy_rwsem); 850 return; 851 852 out_unlock: 853 up_read(&ufile->hw_destroy_rwsem); 854 out_zap: 855 /* 856 * We can't allow the VMA to be created with the actual IO pages, that 857 * would break our API contract, and it can't be stopped at this 858 * point, so zap it. 859 */ 860 vma->vm_private_data = NULL; 861 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); 862 } 863 864 static void rdma_umap_close(struct vm_area_struct *vma) 865 { 866 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 867 struct rdma_umap_priv *priv = vma->vm_private_data; 868 869 if (!priv) 870 return; 871 872 /* 873 * The vma holds a reference on the struct file that created it, which 874 * in turn means that the ib_uverbs_file is guaranteed to exist at 875 * this point. 876 */ 877 mutex_lock(&ufile->umap_lock); 878 list_del(&priv->list); 879 mutex_unlock(&ufile->umap_lock); 880 kfree(priv); 881 } 882 883 /* 884 * Once the zap_vma_ptes has been called touches to the VMA will come here and 885 * we return a dummy writable zero page for all the pfns. 886 */ 887 static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) 888 { 889 struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; 890 struct rdma_umap_priv *priv = vmf->vma->vm_private_data; 891 vm_fault_t ret = 0; 892 893 if (!priv) 894 return VM_FAULT_SIGBUS; 895 896 /* Read only pages can just use the system zero page. */ 897 if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { 898 vmf->page = ZERO_PAGE(vmf->address); 899 get_page(vmf->page); 900 return 0; 901 } 902 903 mutex_lock(&ufile->umap_lock); 904 if (!ufile->disassociate_page) 905 ufile->disassociate_page = 906 alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); 907 908 if (ufile->disassociate_page) { 909 /* 910 * This VMA is forced to always be shared so this doesn't have 911 * to worry about COW. 912 */ 913 vmf->page = ufile->disassociate_page; 914 get_page(vmf->page); 915 } else { 916 ret = VM_FAULT_SIGBUS; 917 } 918 mutex_unlock(&ufile->umap_lock); 919 920 return ret; 921 } 922 923 static const struct vm_operations_struct rdma_umap_ops = { 924 .open = rdma_umap_open, 925 .close = rdma_umap_close, 926 .fault = rdma_umap_fault, 927 }; 928 929 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, 930 struct vm_area_struct *vma, 931 unsigned long size) 932 { 933 struct ib_uverbs_file *ufile = ucontext->ufile; 934 struct rdma_umap_priv *priv; 935 936 if (!(vma->vm_flags & VM_SHARED)) 937 return ERR_PTR(-EINVAL); 938 939 if (vma->vm_end - vma->vm_start != size) 940 return ERR_PTR(-EINVAL); 941 942 /* Driver is using this wrong, must be called by ib_uverbs_mmap */ 943 if (WARN_ON(!vma->vm_file || 944 vma->vm_file->private_data != ufile)) 945 return ERR_PTR(-EINVAL); 946 lockdep_assert_held(&ufile->device->disassociate_srcu); 947 948 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 949 if (!priv) 950 return ERR_PTR(-ENOMEM); 951 return priv; 952 } 953 954 /* 955 * Map IO memory into a process. This is to be called by drivers as part of 956 * their mmap() functions if they wish to send something like PCI-E BAR memory 957 * to userspace. 958 */ 959 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 960 unsigned long pfn, unsigned long size, pgprot_t prot) 961 { 962 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 963 964 if (IS_ERR(priv)) 965 return PTR_ERR(priv); 966 967 vma->vm_page_prot = prot; 968 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { 969 kfree(priv); 970 return -EAGAIN; 971 } 972 973 rdma_umap_priv_init(priv, vma); 974 return 0; 975 } 976 EXPORT_SYMBOL(rdma_user_mmap_io); 977 978 /* 979 * The page case is here for a slightly different reason, the driver expects 980 * to be able to free the page it is sharing to user space when it destroys 981 * its ucontext, which means we need to zap the user space references. 982 * 983 * We could handle this differently by providing an API to allocate a shared 984 * page and then only freeing the shared page when the last ufile is 985 * destroyed. 986 */ 987 int rdma_user_mmap_page(struct ib_ucontext *ucontext, 988 struct vm_area_struct *vma, struct page *page, 989 unsigned long size) 990 { 991 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 992 993 if (IS_ERR(priv)) 994 return PTR_ERR(priv); 995 996 if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, 997 vma->vm_page_prot)) { 998 kfree(priv); 999 return -EAGAIN; 1000 } 1001 1002 rdma_umap_priv_init(priv, vma); 1003 return 0; 1004 } 1005 EXPORT_SYMBOL(rdma_user_mmap_page); 1006 1007 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) 1008 { 1009 struct rdma_umap_priv *priv, *next_priv; 1010 1011 lockdep_assert_held(&ufile->hw_destroy_rwsem); 1012 1013 while (1) { 1014 struct mm_struct *mm = NULL; 1015 1016 /* Get an arbitrary mm pointer that hasn't been cleaned yet */ 1017 mutex_lock(&ufile->umap_lock); 1018 while (!list_empty(&ufile->umaps)) { 1019 int ret; 1020 1021 priv = list_first_entry(&ufile->umaps, 1022 struct rdma_umap_priv, list); 1023 mm = priv->vma->vm_mm; 1024 ret = mmget_not_zero(mm); 1025 if (!ret) { 1026 list_del_init(&priv->list); 1027 mm = NULL; 1028 continue; 1029 } 1030 break; 1031 } 1032 mutex_unlock(&ufile->umap_lock); 1033 if (!mm) 1034 return; 1035 1036 /* 1037 * The umap_lock is nested under mmap_sem since it used within 1038 * the vma_ops callbacks, so we have to clean the list one mm 1039 * at a time to get the lock ordering right. Typically there 1040 * will only be one mm, so no big deal. 1041 */ 1042 down_read(&mm->mmap_sem); 1043 if (!mmget_still_valid(mm)) 1044 goto skip_mm; 1045 mutex_lock(&ufile->umap_lock); 1046 list_for_each_entry_safe (priv, next_priv, &ufile->umaps, 1047 list) { 1048 struct vm_area_struct *vma = priv->vma; 1049 1050 if (vma->vm_mm != mm) 1051 continue; 1052 list_del_init(&priv->list); 1053 1054 zap_vma_ptes(vma, vma->vm_start, 1055 vma->vm_end - vma->vm_start); 1056 } 1057 mutex_unlock(&ufile->umap_lock); 1058 skip_mm: 1059 up_read(&mm->mmap_sem); 1060 mmput(mm); 1061 } 1062 } 1063 1064 /* 1065 * ib_uverbs_open() does not need the BKL: 1066 * 1067 * - the ib_uverbs_device structures are properly reference counted and 1068 * everything else is purely local to the file being created, so 1069 * races against other open calls are not a problem; 1070 * - there is no ioctl method to race against; 1071 * - the open method will either immediately run -ENXIO, or all 1072 * required initialization will be done. 1073 */ 1074 static int ib_uverbs_open(struct inode *inode, struct file *filp) 1075 { 1076 struct ib_uverbs_device *dev; 1077 struct ib_uverbs_file *file; 1078 struct ib_device *ib_dev; 1079 int ret; 1080 int module_dependent; 1081 int srcu_key; 1082 1083 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); 1084 if (!atomic_inc_not_zero(&dev->refcount)) 1085 return -ENXIO; 1086 1087 get_device(&dev->dev); 1088 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1089 mutex_lock(&dev->lists_mutex); 1090 ib_dev = srcu_dereference(dev->ib_dev, 1091 &dev->disassociate_srcu); 1092 if (!ib_dev) { 1093 ret = -EIO; 1094 goto err; 1095 } 1096 1097 /* In case IB device supports disassociate ucontext, there is no hard 1098 * dependency between uverbs device and its low level device. 1099 */ 1100 module_dependent = !(ib_dev->ops.disassociate_ucontext); 1101 1102 if (module_dependent) { 1103 if (!try_module_get(ib_dev->owner)) { 1104 ret = -ENODEV; 1105 goto err; 1106 } 1107 } 1108 1109 file = kzalloc(sizeof(*file), GFP_KERNEL); 1110 if (!file) { 1111 ret = -ENOMEM; 1112 if (module_dependent) 1113 goto err_module; 1114 1115 goto err; 1116 } 1117 1118 file->device = dev; 1119 kref_init(&file->ref); 1120 mutex_init(&file->ucontext_lock); 1121 1122 spin_lock_init(&file->uobjects_lock); 1123 INIT_LIST_HEAD(&file->uobjects); 1124 init_rwsem(&file->hw_destroy_rwsem); 1125 mutex_init(&file->umap_lock); 1126 INIT_LIST_HEAD(&file->umaps); 1127 1128 filp->private_data = file; 1129 list_add_tail(&file->list, &dev->uverbs_file_list); 1130 mutex_unlock(&dev->lists_mutex); 1131 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1132 1133 setup_ufile_idr_uobject(file); 1134 1135 return nonseekable_open(inode, filp); 1136 1137 err_module: 1138 module_put(ib_dev->owner); 1139 1140 err: 1141 mutex_unlock(&dev->lists_mutex); 1142 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1143 if (atomic_dec_and_test(&dev->refcount)) 1144 ib_uverbs_comp_dev(dev); 1145 1146 put_device(&dev->dev); 1147 return ret; 1148 } 1149 1150 static int ib_uverbs_close(struct inode *inode, struct file *filp) 1151 { 1152 struct ib_uverbs_file *file = filp->private_data; 1153 1154 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); 1155 1156 mutex_lock(&file->device->lists_mutex); 1157 list_del_init(&file->list); 1158 mutex_unlock(&file->device->lists_mutex); 1159 1160 kref_put(&file->ref, ib_uverbs_release_file); 1161 1162 return 0; 1163 } 1164 1165 static const struct file_operations uverbs_fops = { 1166 .owner = THIS_MODULE, 1167 .write = ib_uverbs_write, 1168 .open = ib_uverbs_open, 1169 .release = ib_uverbs_close, 1170 .llseek = no_llseek, 1171 .unlocked_ioctl = ib_uverbs_ioctl, 1172 .compat_ioctl = ib_uverbs_ioctl, 1173 }; 1174 1175 static const struct file_operations uverbs_mmap_fops = { 1176 .owner = THIS_MODULE, 1177 .write = ib_uverbs_write, 1178 .mmap = ib_uverbs_mmap, 1179 .open = ib_uverbs_open, 1180 .release = ib_uverbs_close, 1181 .llseek = no_llseek, 1182 .unlocked_ioctl = ib_uverbs_ioctl, 1183 .compat_ioctl = ib_uverbs_ioctl, 1184 }; 1185 1186 static struct ib_client uverbs_client = { 1187 .name = "uverbs", 1188 .no_kverbs_req = true, 1189 .add = ib_uverbs_add_one, 1190 .remove = ib_uverbs_remove_one 1191 }; 1192 1193 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, 1194 char *buf) 1195 { 1196 struct ib_uverbs_device *dev = 1197 container_of(device, struct ib_uverbs_device, dev); 1198 int ret = -ENODEV; 1199 int srcu_key; 1200 struct ib_device *ib_dev; 1201 1202 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1203 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1204 if (ib_dev) 1205 ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); 1206 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1207 1208 return ret; 1209 } 1210 static DEVICE_ATTR_RO(ibdev); 1211 1212 static ssize_t abi_version_show(struct device *device, 1213 struct device_attribute *attr, char *buf) 1214 { 1215 struct ib_uverbs_device *dev = 1216 container_of(device, struct ib_uverbs_device, dev); 1217 int ret = -ENODEV; 1218 int srcu_key; 1219 struct ib_device *ib_dev; 1220 1221 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1222 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1223 if (ib_dev) 1224 ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); 1225 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1226 1227 return ret; 1228 } 1229 static DEVICE_ATTR_RO(abi_version); 1230 1231 static struct attribute *ib_dev_attrs[] = { 1232 &dev_attr_abi_version.attr, 1233 &dev_attr_ibdev.attr, 1234 NULL, 1235 }; 1236 1237 static const struct attribute_group dev_attr_group = { 1238 .attrs = ib_dev_attrs, 1239 }; 1240 1241 static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1242 __stringify(IB_USER_VERBS_ABI_VERSION)); 1243 1244 static int ib_uverbs_create_uapi(struct ib_device *device, 1245 struct ib_uverbs_device *uverbs_dev) 1246 { 1247 struct uverbs_api *uapi; 1248 1249 uapi = uverbs_alloc_api(device); 1250 if (IS_ERR(uapi)) 1251 return PTR_ERR(uapi); 1252 1253 uverbs_dev->uapi = uapi; 1254 return 0; 1255 } 1256 1257 static void ib_uverbs_add_one(struct ib_device *device) 1258 { 1259 int devnum; 1260 dev_t base; 1261 struct ib_uverbs_device *uverbs_dev; 1262 int ret; 1263 1264 if (!device->ops.alloc_ucontext) 1265 return; 1266 1267 uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); 1268 if (!uverbs_dev) 1269 return; 1270 1271 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); 1272 if (ret) { 1273 kfree(uverbs_dev); 1274 return; 1275 } 1276 1277 device_initialize(&uverbs_dev->dev); 1278 uverbs_dev->dev.class = uverbs_class; 1279 uverbs_dev->dev.parent = device->dev.parent; 1280 uverbs_dev->dev.release = ib_uverbs_release_dev; 1281 uverbs_dev->groups[0] = &dev_attr_group; 1282 uverbs_dev->dev.groups = uverbs_dev->groups; 1283 atomic_set(&uverbs_dev->refcount, 1); 1284 init_completion(&uverbs_dev->comp); 1285 uverbs_dev->xrcd_tree = RB_ROOT; 1286 mutex_init(&uverbs_dev->xrcd_tree_mutex); 1287 mutex_init(&uverbs_dev->lists_mutex); 1288 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); 1289 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); 1290 rcu_assign_pointer(uverbs_dev->ib_dev, device); 1291 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1292 1293 devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, 1294 GFP_KERNEL); 1295 if (devnum < 0) 1296 goto err; 1297 uverbs_dev->devnum = devnum; 1298 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) 1299 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; 1300 else 1301 base = IB_UVERBS_BASE_DEV + devnum; 1302 1303 if (ib_uverbs_create_uapi(device, uverbs_dev)) 1304 goto err_uapi; 1305 1306 uverbs_dev->dev.devt = base; 1307 dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); 1308 1309 cdev_init(&uverbs_dev->cdev, 1310 device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops); 1311 uverbs_dev->cdev.owner = THIS_MODULE; 1312 1313 ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); 1314 if (ret) 1315 goto err_uapi; 1316 1317 ib_set_client_data(device, &uverbs_client, uverbs_dev); 1318 return; 1319 1320 err_uapi: 1321 ida_free(&uverbs_ida, devnum); 1322 err: 1323 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1324 ib_uverbs_comp_dev(uverbs_dev); 1325 wait_for_completion(&uverbs_dev->comp); 1326 put_device(&uverbs_dev->dev); 1327 return; 1328 } 1329 1330 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, 1331 struct ib_device *ib_dev) 1332 { 1333 struct ib_uverbs_file *file; 1334 struct ib_uverbs_async_event_file *event_file; 1335 struct ib_event event; 1336 1337 /* Pending running commands to terminate */ 1338 uverbs_disassociate_api_pre(uverbs_dev); 1339 event.event = IB_EVENT_DEVICE_FATAL; 1340 event.element.port_num = 0; 1341 event.device = ib_dev; 1342 1343 mutex_lock(&uverbs_dev->lists_mutex); 1344 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1345 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1346 struct ib_uverbs_file, list); 1347 list_del_init(&file->list); 1348 kref_get(&file->ref); 1349 1350 /* We must release the mutex before going ahead and calling 1351 * uverbs_cleanup_ufile, as it might end up indirectly calling 1352 * uverbs_close, for example due to freeing the resources (e.g 1353 * mmput). 1354 */ 1355 mutex_unlock(&uverbs_dev->lists_mutex); 1356 1357 ib_uverbs_event_handler(&file->event_handler, &event); 1358 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); 1359 kref_put(&file->ref, ib_uverbs_release_file); 1360 1361 mutex_lock(&uverbs_dev->lists_mutex); 1362 } 1363 1364 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { 1365 event_file = list_first_entry(&uverbs_dev-> 1366 uverbs_events_file_list, 1367 struct ib_uverbs_async_event_file, 1368 list); 1369 spin_lock_irq(&event_file->ev_queue.lock); 1370 event_file->ev_queue.is_closed = 1; 1371 spin_unlock_irq(&event_file->ev_queue.lock); 1372 1373 list_del(&event_file->list); 1374 ib_unregister_event_handler( 1375 &event_file->uverbs_file->event_handler); 1376 event_file->uverbs_file->event_handler.device = 1377 NULL; 1378 1379 wake_up_interruptible(&event_file->ev_queue.poll_wait); 1380 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); 1381 } 1382 mutex_unlock(&uverbs_dev->lists_mutex); 1383 1384 uverbs_disassociate_api(uverbs_dev->uapi); 1385 } 1386 1387 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) 1388 { 1389 struct ib_uverbs_device *uverbs_dev = client_data; 1390 int wait_clients = 1; 1391 1392 if (!uverbs_dev) 1393 return; 1394 1395 cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); 1396 ida_free(&uverbs_ida, uverbs_dev->devnum); 1397 1398 if (device->ops.disassociate_ucontext) { 1399 /* We disassociate HW resources and immediately return. 1400 * Userspace will see a EIO errno for all future access. 1401 * Upon returning, ib_device may be freed internally and is not 1402 * valid any more. 1403 * uverbs_device is still available until all clients close 1404 * their files, then the uverbs device ref count will be zero 1405 * and its resources will be freed. 1406 * Note: At this point no more files can be opened since the 1407 * cdev was deleted, however active clients can still issue 1408 * commands and close their open files. 1409 */ 1410 ib_uverbs_free_hw_resources(uverbs_dev, device); 1411 wait_clients = 0; 1412 } 1413 1414 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1415 ib_uverbs_comp_dev(uverbs_dev); 1416 if (wait_clients) 1417 wait_for_completion(&uverbs_dev->comp); 1418 1419 put_device(&uverbs_dev->dev); 1420 } 1421 1422 static char *uverbs_devnode(struct device *dev, umode_t *mode) 1423 { 1424 if (mode) 1425 *mode = 0666; 1426 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); 1427 } 1428 1429 static int __init ib_uverbs_init(void) 1430 { 1431 int ret; 1432 1433 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, 1434 IB_UVERBS_NUM_FIXED_MINOR, 1435 "infiniband_verbs"); 1436 if (ret) { 1437 pr_err("user_verbs: couldn't register device number\n"); 1438 goto out; 1439 } 1440 1441 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0, 1442 IB_UVERBS_NUM_DYNAMIC_MINOR, 1443 "infiniband_verbs"); 1444 if (ret) { 1445 pr_err("couldn't register dynamic device number\n"); 1446 goto out_alloc; 1447 } 1448 1449 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); 1450 if (IS_ERR(uverbs_class)) { 1451 ret = PTR_ERR(uverbs_class); 1452 pr_err("user_verbs: couldn't create class infiniband_verbs\n"); 1453 goto out_chrdev; 1454 } 1455 1456 uverbs_class->devnode = uverbs_devnode; 1457 1458 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); 1459 if (ret) { 1460 pr_err("user_verbs: couldn't create abi_version attribute\n"); 1461 goto out_class; 1462 } 1463 1464 ret = ib_register_client(&uverbs_client); 1465 if (ret) { 1466 pr_err("user_verbs: couldn't register client\n"); 1467 goto out_class; 1468 } 1469 1470 return 0; 1471 1472 out_class: 1473 class_destroy(uverbs_class); 1474 1475 out_chrdev: 1476 unregister_chrdev_region(dynamic_uverbs_dev, 1477 IB_UVERBS_NUM_DYNAMIC_MINOR); 1478 1479 out_alloc: 1480 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1481 IB_UVERBS_NUM_FIXED_MINOR); 1482 1483 out: 1484 return ret; 1485 } 1486 1487 static void __exit ib_uverbs_cleanup(void) 1488 { 1489 ib_unregister_client(&uverbs_client); 1490 class_destroy(uverbs_class); 1491 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1492 IB_UVERBS_NUM_FIXED_MINOR); 1493 unregister_chrdev_region(dynamic_uverbs_dev, 1494 IB_UVERBS_NUM_DYNAMIC_MINOR); 1495 } 1496 1497 module_init(ib_uverbs_init); 1498 module_exit(ib_uverbs_cleanup); 1499