xref: /linux/drivers/infiniband/core/rdma_core.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/file.h>
34 #include <linux/anon_inodes.h>
35 #include <linux/sched/mm.h>
36 #include <rdma/ib_verbs.h>
37 #include <rdma/uverbs_types.h>
38 #include <linux/rcupdate.h>
39 #include <rdma/uverbs_ioctl.h>
40 #include <rdma/rdma_user_ioctl.h>
41 #include "uverbs.h"
42 #include "core_priv.h"
43 #include "rdma_core.h"
44 
45 static void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
46 
47 void ib_uverbs_release_file(struct kref *ref)
48 {
49 	struct ib_uverbs_file *file =
50 		container_of(ref, struct ib_uverbs_file, ref);
51 	struct ib_device *ib_dev;
52 	int srcu_key;
53 
54 	release_ufile_idr_uobject(file);
55 
56 	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
57 	ib_dev = srcu_dereference(file->device->ib_dev,
58 				  &file->device->disassociate_srcu);
59 	if (ib_dev && !ib_dev->ops.disassociate_ucontext)
60 		module_put(ib_dev->ops.owner);
61 	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
62 
63 	if (refcount_dec_and_test(&file->device->refcount))
64 		ib_uverbs_comp_dev(file->device);
65 
66 	if (file->default_async_file)
67 		uverbs_uobject_put(&file->default_async_file->uobj);
68 	put_device(&file->device->dev);
69 
70 	if (file->disassociate_page)
71 		__free_pages(file->disassociate_page, 0);
72 	mutex_destroy(&file->disassociation_lock);
73 	mutex_destroy(&file->umap_lock);
74 	mutex_destroy(&file->ucontext_lock);
75 	kfree(file);
76 }
77 EXPORT_SYMBOL_NS_GPL(ib_uverbs_release_file, "rdma_core");
78 
79 static void uverbs_uobject_free(struct kref *ref)
80 {
81 	kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu);
82 }
83 
84 /*
85  * In order to indicate we no longer needs this uobject, uverbs_uobject_put
86  * is called. When the reference count is decreased, the uobject is freed.
87  * For example, this is used when attaching a completion channel to a CQ.
88  */
89 void uverbs_uobject_put(struct ib_uobject *uobject)
90 {
91 	kref_put(&uobject->ref, uverbs_uobject_free);
92 }
93 EXPORT_SYMBOL(uverbs_uobject_put);
94 
95 int uverbs_try_lock_object(struct ib_uobject *uobj,
96 			   enum rdma_lookup_mode mode)
97 {
98 	/*
99 	 * When a shared access is required, we use a positive counter. Each
100 	 * shared access request checks that the value != -1 and increment it.
101 	 * Exclusive access is required for operations like write or destroy.
102 	 * In exclusive access mode, we check that the counter is zero (nobody
103 	 * claimed this object) and we set it to -1. Releasing a shared access
104 	 * lock is done simply by decreasing the counter. As for exclusive
105 	 * access locks, since only a single one of them is allowed
106 	 * concurrently, setting the counter to zero is enough for releasing
107 	 * this lock.
108 	 */
109 	switch (mode) {
110 	case UVERBS_LOOKUP_READ:
111 		return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
112 			-EBUSY : 0;
113 	case UVERBS_LOOKUP_WRITE:
114 		/* lock is exclusive */
115 		return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
116 	case UVERBS_LOOKUP_DESTROY:
117 		return 0;
118 	}
119 	return 0;
120 }
121 EXPORT_SYMBOL(uverbs_try_lock_object);
122 
123 static void assert_uverbs_usecnt(struct ib_uobject *uobj,
124 				 enum rdma_lookup_mode mode)
125 {
126 #ifdef CONFIG_LOCKDEP
127 	switch (mode) {
128 	case UVERBS_LOOKUP_READ:
129 		WARN_ON(atomic_read(&uobj->usecnt) <= 0);
130 		break;
131 	case UVERBS_LOOKUP_WRITE:
132 		WARN_ON(atomic_read(&uobj->usecnt) != -1);
133 		break;
134 	case UVERBS_LOOKUP_DESTROY:
135 		break;
136 	}
137 #endif
138 }
139 
140 /*
141  * This must be called with the hw_destroy_rwsem locked for read or write,
142  * also the uobject itself must be locked for write.
143  *
144  * Upon return the HW object is guaranteed to be destroyed.
145  *
146  * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
147  * however the type's allocat_commit function cannot have been called and the
148  * uobject cannot be on the uobjects_lists
149  *
150  * For RDMA_REMOVE_DESTROY the caller should be holding a kref (eg via
151  * rdma_lookup_get_uobject) and the object is left in a state where the caller
152  * needs to call rdma_lookup_put_uobject.
153  *
154  * For all other destroy modes this function internally unlocks the uobject
155  * and consumes the kref on the uobj.
156  */
157 static int uverbs_destroy_uobject(struct ib_uobject *uobj,
158 				  enum rdma_remove_reason reason,
159 				  struct uverbs_attr_bundle *attrs)
160 {
161 	struct ib_uverbs_file *ufile = attrs->ufile;
162 	unsigned long flags;
163 	int ret;
164 
165 	lockdep_assert_held(&ufile->hw_destroy_rwsem);
166 	assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
167 
168 	if (reason == RDMA_REMOVE_ABORT) {
169 		WARN_ON(!list_empty(&uobj->list));
170 		WARN_ON(!uobj->context);
171 		uobj->uapi_object->type_class->alloc_abort(uobj);
172 	} else if (uobj->object) {
173 		ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
174 								attrs);
175 		if (ret)
176 			/* Nothing to be done, wait till ucontext will clean it */
177 			return ret;
178 
179 		uobj->object = NULL;
180 	}
181 
182 	uobj->context = NULL;
183 
184 	/*
185 	 * For DESTROY the usecnt is not changed, the caller is expected to
186 	 * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
187 	 * handle.
188 	 */
189 	if (reason != RDMA_REMOVE_DESTROY)
190 		atomic_set(&uobj->usecnt, 0);
191 	else
192 		uobj->uapi_object->type_class->remove_handle(uobj);
193 
194 	if (!list_empty(&uobj->list)) {
195 		spin_lock_irqsave(&ufile->uobjects_lock, flags);
196 		list_del_init(&uobj->list);
197 		spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
198 
199 		/*
200 		 * Pairs with the get in rdma_alloc_commit_uobject(), could
201 		 * destroy uobj.
202 		 */
203 		uverbs_uobject_put(uobj);
204 	}
205 
206 	/*
207 	 * When aborting the stack kref remains owned by the core code, and is
208 	 * not transferred into the type. Pairs with the get in alloc_uobj
209 	 */
210 	if (reason == RDMA_REMOVE_ABORT)
211 		uverbs_uobject_put(uobj);
212 
213 	return 0;
214 }
215 
216 /*
217  * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
218  * sequence. It should only be used from command callbacks. On success the
219  * caller must pair this with uobj_put_destroy(). This
220  * version requires the caller to have already obtained an
221  * LOOKUP_DESTROY uobject kref.
222  */
223 int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
224 {
225 	struct ib_uverbs_file *ufile = attrs->ufile;
226 	int ret;
227 
228 	down_read(&ufile->hw_destroy_rwsem);
229 
230 	/*
231 	 * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
232 	 * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
233 	 * This is because any other concurrent thread can still see the object
234 	 * in the xarray due to RCU. Leaving it locked ensures nothing else will
235 	 * touch it.
236 	 */
237 	ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
238 	if (ret)
239 		goto out_unlock;
240 
241 	ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs);
242 	if (ret) {
243 		atomic_set(&uobj->usecnt, 0);
244 		goto out_unlock;
245 	}
246 
247 out_unlock:
248 	up_read(&ufile->hw_destroy_rwsem);
249 	return ret;
250 }
251 EXPORT_SYMBOL_NS_GPL(uobj_destroy, "rdma_core");
252 
253 /*
254  * uobj_get_destroy destroys the HW object and returns a handle to the uobj
255  * with a NULL object pointer. The caller must pair this with
256  * uobj_put_destroy().
257  */
258 struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
259 				      u32 id, struct uverbs_attr_bundle *attrs)
260 {
261 	struct ib_uobject *uobj;
262 	int ret;
263 
264 	uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
265 				       UVERBS_LOOKUP_DESTROY, attrs);
266 	if (IS_ERR(uobj))
267 		return uobj;
268 
269 	ret = uobj_destroy(uobj, attrs);
270 	if (ret) {
271 		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
272 		return ERR_PTR(ret);
273 	}
274 
275 	return uobj;
276 }
277 EXPORT_SYMBOL_NS_GPL(__uobj_get_destroy, "rdma_core");
278 
279 /*
280  * Does both uobj_get_destroy() and uobj_put_destroy().  Returns 0 on success
281  * (negative errno on failure). For use by callers that do not need the uobj.
282  */
283 int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
284 			   struct uverbs_attr_bundle *attrs)
285 {
286 	struct ib_uobject *uobj;
287 
288 	uobj = __uobj_get_destroy(obj, id, attrs);
289 	if (IS_ERR(uobj))
290 		return PTR_ERR(uobj);
291 	uobj_put_destroy(uobj);
292 	return 0;
293 }
294 EXPORT_SYMBOL_NS_GPL(__uobj_perform_destroy, "rdma_core");
295 
296 /* alloc_uobj must be undone by uverbs_destroy_uobject() */
297 static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs,
298 				     const struct uverbs_api_object *obj)
299 {
300 	struct ib_uverbs_file *ufile = attrs->ufile;
301 	struct ib_uobject *uobj;
302 
303 	if (!attrs->context) {
304 		struct ib_ucontext *ucontext =
305 			ib_uverbs_get_ucontext_file(ufile);
306 
307 		if (IS_ERR(ucontext))
308 			return ERR_CAST(ucontext);
309 		attrs->context = ucontext;
310 	}
311 
312 	uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
313 	if (!uobj)
314 		return ERR_PTR(-ENOMEM);
315 	/*
316 	 * user_handle should be filled by the handler,
317 	 * The object is added to the list in the commit stage.
318 	 */
319 	uobj->ufile = ufile;
320 	uobj->context = attrs->context;
321 	INIT_LIST_HEAD(&uobj->list);
322 	uobj->uapi_object = obj;
323 	/*
324 	 * Allocated objects start out as write locked to deny any other
325 	 * syscalls from accessing them until they are committed. See
326 	 * rdma_alloc_commit_uobject
327 	 */
328 	atomic_set(&uobj->usecnt, -1);
329 	kref_init(&uobj->ref);
330 
331 	return uobj;
332 }
333 
334 static int idr_add_uobj(struct ib_uobject *uobj)
335 {
336        /*
337         * We start with allocating an idr pointing to NULL. This represents an
338         * object which isn't initialized yet. We'll replace it later on with
339         * the real object once we commit.
340         */
341 	return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL, xa_limit_32b,
342 			GFP_KERNEL);
343 }
344 
345 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
346 static struct ib_uobject *
347 lookup_get_idr_uobject(const struct uverbs_api_object *obj,
348 		       struct ib_uverbs_file *ufile, s64 id,
349 		       enum rdma_lookup_mode mode)
350 {
351 	struct ib_uobject *uobj;
352 
353 	if (id < 0 || id > ULONG_MAX)
354 		return ERR_PTR(-EINVAL);
355 
356 	rcu_read_lock();
357 	/*
358 	 * The idr_find is guaranteed to return a pointer to something that
359 	 * isn't freed yet, or NULL, as the free after idr_remove goes through
360 	 * kfree_rcu(). However the object may still have been released and
361 	 * kfree() could be called at any time.
362 	 */
363 	uobj = xa_load(&ufile->idr, id);
364 	if (!uobj || !kref_get_unless_zero(&uobj->ref))
365 		uobj = ERR_PTR(-ENOENT);
366 	rcu_read_unlock();
367 	return uobj;
368 }
369 
370 static struct ib_uobject *
371 lookup_get_fd_uobject(const struct uverbs_api_object *obj,
372 		      struct ib_uverbs_file *ufile, s64 id,
373 		      enum rdma_lookup_mode mode)
374 {
375 	const struct uverbs_obj_fd_type *fd_type;
376 	struct file *f;
377 	struct ib_uobject *uobject;
378 	int fdno = id;
379 
380 	if (fdno != id)
381 		return ERR_PTR(-EINVAL);
382 
383 	if (mode != UVERBS_LOOKUP_READ)
384 		return ERR_PTR(-EOPNOTSUPP);
385 
386 	if (!obj->type_attrs)
387 		return ERR_PTR(-EIO);
388 	fd_type =
389 		container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
390 
391 	f = fget(fdno);
392 	if (!f)
393 		return ERR_PTR(-EBADF);
394 
395 	uobject = f->private_data;
396 	/*
397 	 * fget(id) ensures we are not currently running
398 	 * uverbs_uobject_fd_release(), and the caller is expected to ensure
399 	 * that release is never done while a call to lookup is possible.
400 	 */
401 	if (f->f_op != fd_type->fops || uobject->ufile != ufile) {
402 		fput(f);
403 		return ERR_PTR(-EBADF);
404 	}
405 
406 	uverbs_uobject_get(uobject);
407 	return uobject;
408 }
409 
410 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
411 					   struct ib_uverbs_file *ufile, s64 id,
412 					   enum rdma_lookup_mode mode,
413 					   struct uverbs_attr_bundle *attrs)
414 {
415 	struct ib_uobject *uobj;
416 	int ret;
417 
418 	if (obj == ERR_PTR(-ENOMSG)) {
419 		/* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
420 		uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
421 		if (IS_ERR(uobj))
422 			return uobj;
423 	} else {
424 		if (IS_ERR(obj))
425 			return ERR_PTR(-EINVAL);
426 
427 		uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
428 		if (IS_ERR(uobj))
429 			return uobj;
430 
431 		if (uobj->uapi_object != obj) {
432 			ret = -EINVAL;
433 			goto free;
434 		}
435 	}
436 
437 	/*
438 	 * If we have been disassociated block every command except for
439 	 * DESTROY based commands.
440 	 */
441 	if (mode != UVERBS_LOOKUP_DESTROY &&
442 	    !srcu_dereference(ufile->device->ib_dev,
443 			      &ufile->device->disassociate_srcu)) {
444 		ret = -EIO;
445 		goto free;
446 	}
447 
448 	ret = uverbs_try_lock_object(uobj, mode);
449 	if (ret)
450 		goto free;
451 	if (attrs)
452 		attrs->context = uobj->context;
453 
454 	return uobj;
455 free:
456 	uobj->uapi_object->type_class->lookup_put(uobj, mode);
457 	uverbs_uobject_put(uobj);
458 	return ERR_PTR(ret);
459 }
460 EXPORT_SYMBOL_NS_GPL(rdma_lookup_get_uobject, "rdma_core");
461 
462 static struct ib_uobject *
463 alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
464 			struct uverbs_attr_bundle *attrs)
465 {
466 	int ret;
467 	struct ib_uobject *uobj;
468 
469 	uobj = alloc_uobj(attrs, obj);
470 	if (IS_ERR(uobj))
471 		return uobj;
472 
473 	ret = idr_add_uobj(uobj);
474 	if (ret)
475 		goto uobj_put;
476 
477 	ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
478 				   RDMACG_RESOURCE_HCA_OBJECT);
479 	if (ret)
480 		goto remove;
481 
482 	return uobj;
483 
484 remove:
485 	xa_erase(&attrs->ufile->idr, uobj->id);
486 uobj_put:
487 	uverbs_uobject_put(uobj);
488 	return ERR_PTR(ret);
489 }
490 
491 static struct ib_uobject *
492 alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
493 		       struct uverbs_attr_bundle *attrs)
494 {
495 	const struct uverbs_obj_fd_type *fd_type;
496 	int new_fd;
497 	struct ib_uobject *uobj, *ret;
498 	struct file *filp;
499 
500 	uobj = alloc_uobj(attrs, obj);
501 	if (IS_ERR(uobj))
502 		return uobj;
503 
504 	fd_type =
505 		container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
506 	if (WARN_ON(fd_type->fops &&
507 		    fd_type->fops->release != &uverbs_uobject_fd_release)) {
508 		ret = ERR_PTR(-EINVAL);
509 		goto err_fd;
510 	}
511 
512 	new_fd = get_unused_fd_flags(O_CLOEXEC);
513 	if (new_fd < 0) {
514 		ret = ERR_PTR(new_fd);
515 		goto err_fd;
516 	}
517 
518 	if (fd_type->fops) {
519 		/* Note that uverbs_uobject_fd_release() is called during abort */
520 		filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
521 					  fd_type->flags);
522 		if (IS_ERR(filp)) {
523 			ret = ERR_CAST(filp);
524 			goto err_getfile;
525 		}
526 		uobj->object = filp;
527 	}
528 
529 	uobj->id = new_fd;
530 	return uobj;
531 
532 err_getfile:
533 	put_unused_fd(new_fd);
534 err_fd:
535 	uverbs_uobject_put(uobj);
536 	return ret;
537 }
538 
539 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
540 					    struct uverbs_attr_bundle *attrs)
541 {
542 	struct ib_uverbs_file *ufile = attrs->ufile;
543 	struct ib_uobject *ret;
544 
545 	if (IS_ERR(obj))
546 		return ERR_PTR(-EINVAL);
547 
548 	/*
549 	 * The hw_destroy_rwsem is held across the entire object creation and
550 	 * released during rdma_alloc_commit_uobject or
551 	 * rdma_alloc_abort_uobject
552 	 */
553 	if (!down_read_trylock(&ufile->hw_destroy_rwsem))
554 		return ERR_PTR(-EIO);
555 
556 	ret = obj->type_class->alloc_begin(obj, attrs);
557 	if (IS_ERR(ret)) {
558 		up_read(&ufile->hw_destroy_rwsem);
559 		return ret;
560 	}
561 	return ret;
562 }
563 EXPORT_SYMBOL_NS_GPL(rdma_alloc_begin_uobject, "rdma_core");
564 
565 static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
566 {
567 	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
568 			   RDMACG_RESOURCE_HCA_OBJECT);
569 
570 	xa_erase(&uobj->ufile->idr, uobj->id);
571 }
572 
573 static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
574 					       enum rdma_remove_reason why,
575 					       struct uverbs_attr_bundle *attrs)
576 {
577 	const struct uverbs_obj_idr_type *idr_type =
578 		container_of(uobj->uapi_object->type_attrs,
579 			     struct uverbs_obj_idr_type, type);
580 	int ret = idr_type->destroy_object(uobj, why, attrs);
581 
582 	if (ret)
583 		return ret;
584 
585 	if (why == RDMA_REMOVE_ABORT)
586 		return 0;
587 
588 	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
589 			   RDMACG_RESOURCE_HCA_OBJECT);
590 
591 	return 0;
592 }
593 
594 static void remove_handle_idr_uobject(struct ib_uobject *uobj)
595 {
596 	xa_erase(&uobj->ufile->idr, uobj->id);
597 	/* Matches the kref in alloc_commit_idr_uobject */
598 	uverbs_uobject_put(uobj);
599 }
600 
601 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
602 {
603 	struct file *filp = uobj->object;
604 
605 	if (filp)
606 		fput(filp);
607 
608 	put_unused_fd(uobj->id);
609 }
610 
611 static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
612 					      enum rdma_remove_reason why,
613 					      struct uverbs_attr_bundle *attrs)
614 {
615 	const struct uverbs_obj_fd_type *fd_type = container_of(
616 		uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
617 
618 	fd_type->destroy_object(uobj, why);
619 	return 0;
620 }
621 
622 static void remove_handle_fd_uobject(struct ib_uobject *uobj)
623 {
624 }
625 
626 static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
627 {
628 	struct ib_uverbs_file *ufile = uobj->ufile;
629 	void *old;
630 
631 	/*
632 	 * We already allocated this XArray entry with a NULL pointer, so
633 	 * this shouldn't fail.
634 	 *
635 	 * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
636 	 * It will be put by remove_handle_idr_uobject()
637 	 */
638 	old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
639 	WARN_ON(old != NULL);
640 }
641 
642 static void swap_idr_uobjects(struct ib_uobject *obj_old,
643 			     struct ib_uobject *obj_new)
644 {
645 	struct ib_uverbs_file *ufile = obj_old->ufile;
646 	void *old;
647 
648 	/*
649 	 * New must be an object that been allocated but not yet committed, this
650 	 * moves the pre-committed state to obj_old, new still must be comitted.
651 	 */
652 	old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY,
653 			 GFP_KERNEL);
654 	if (WARN_ON(old != obj_old))
655 		return;
656 
657 	swap(obj_old->id, obj_new->id);
658 
659 	old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL);
660 	WARN_ON(old != NULL);
661 }
662 
663 static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
664 {
665 	int fd = uobj->id;
666 	struct file *filp = uobj->object;
667 
668 	/* Matching put will be done in uverbs_uobject_fd_release() */
669 	kref_get(&uobj->ufile->ref);
670 
671 	/* This shouldn't be used anymore. Use the file object instead */
672 	uobj->id = 0;
673 
674 	if (!filp->private_data) {
675 		/*
676 		 * NOTE: Once we install the file we loose ownership of our kref on
677 		 * uobj. It will be put by uverbs_uobject_fd_release()
678 		 */
679 		filp->private_data = uobj;
680 	}
681 
682 	fd_install(fd, filp);
683 }
684 
685 /*
686  * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
687  * caller can no longer assume uobj is valid. If this function fails it
688  * destroys the uboject, including the attached HW object.
689  */
690 void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
691 			       struct uverbs_attr_bundle *attrs)
692 {
693 	struct ib_uverbs_file *ufile = attrs->ufile;
694 
695 	/* kref is held so long as the uobj is on the uobj list. */
696 	uverbs_uobject_get(uobj);
697 	spin_lock_irq(&ufile->uobjects_lock);
698 	list_add(&uobj->list, &ufile->uobjects);
699 	spin_unlock_irq(&ufile->uobjects_lock);
700 
701 	/* matches atomic_set(-1) in alloc_uobj */
702 	atomic_set(&uobj->usecnt, 0);
703 
704 	/* alloc_commit consumes the uobj kref */
705 	uobj->uapi_object->type_class->alloc_commit(uobj);
706 
707 	/* Matches the down_read in rdma_alloc_begin_uobject */
708 	up_read(&ufile->hw_destroy_rwsem);
709 }
710 EXPORT_SYMBOL_NS_GPL(rdma_alloc_commit_uobject, "rdma_core");
711 
712 /*
713  * new_uobj will be assigned to the handle currently used by to_uobj, and
714  * to_uobj will be destroyed.
715  *
716  * Upon return the caller must do:
717  *    rdma_alloc_commit_uobject(new_uobj)
718  *    uobj_put_destroy(to_uobj)
719  *
720  * to_uobj must have a write get but the put mode switches to destroy once
721  * this is called.
722  */
723 void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj,
724 			struct uverbs_attr_bundle *attrs)
725 {
726 	assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE);
727 
728 	if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object ||
729 		    !to_uobj->uapi_object->type_class->swap_uobjects))
730 		return;
731 
732 	to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj);
733 
734 	/*
735 	 * If this fails then the uobject is still completely valid (though with
736 	 * a new ID) and we leak it until context close.
737 	 */
738 	uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs);
739 }
740 EXPORT_SYMBOL_NS_GPL(rdma_assign_uobject, "rdma_core");
741 
742 /*
743  * This consumes the kref for uobj. It is up to the caller to unwind the HW
744  * object and anything else connected to uobj before calling this.
745  */
746 void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
747 			      struct uverbs_attr_bundle *attrs,
748 			      bool hw_obj_valid)
749 {
750 	struct ib_uverbs_file *ufile = uobj->ufile;
751 	int ret;
752 
753 	if (hw_obj_valid) {
754 		ret = uobj->uapi_object->type_class->destroy_hw(
755 			uobj, RDMA_REMOVE_ABORT, attrs);
756 		/*
757 		 * If the driver couldn't destroy the object then go ahead and
758 		 * commit it. Leaking objects that can't be destroyed is only
759 		 * done during FD close after the driver has a few more tries to
760 		 * destroy it.
761 		 */
762 		if (WARN_ON(ret))
763 			return rdma_alloc_commit_uobject(uobj, attrs);
764 	}
765 
766 	uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
767 
768 	/* Matches the down_read in rdma_alloc_begin_uobject */
769 	up_read(&ufile->hw_destroy_rwsem);
770 }
771 EXPORT_SYMBOL_NS_GPL(rdma_alloc_abort_uobject, "rdma_core");
772 
773 static void lookup_put_idr_uobject(struct ib_uobject *uobj,
774 				   enum rdma_lookup_mode mode)
775 {
776 }
777 
778 static void lookup_put_fd_uobject(struct ib_uobject *uobj,
779 				  enum rdma_lookup_mode mode)
780 {
781 	struct file *filp = uobj->object;
782 
783 	WARN_ON(mode != UVERBS_LOOKUP_READ);
784 	/*
785 	 * This indirectly calls uverbs_uobject_fd_release() and free the
786 	 * object
787 	 */
788 	fput(filp);
789 }
790 
791 void rdma_lookup_put_uobject(struct ib_uobject *uobj,
792 			     enum rdma_lookup_mode mode)
793 {
794 	assert_uverbs_usecnt(uobj, mode);
795 	/*
796 	 * In order to unlock an object, either decrease its usecnt for
797 	 * read access or zero it in case of exclusive access. See
798 	 * uverbs_try_lock_object for locking schema information.
799 	 */
800 	switch (mode) {
801 	case UVERBS_LOOKUP_READ:
802 		atomic_dec(&uobj->usecnt);
803 		break;
804 	case UVERBS_LOOKUP_WRITE:
805 		atomic_set(&uobj->usecnt, 0);
806 		break;
807 	case UVERBS_LOOKUP_DESTROY:
808 		break;
809 	}
810 
811 	uobj->uapi_object->type_class->lookup_put(uobj, mode);
812 	/* Pairs with the kref obtained by type->lookup_get */
813 	uverbs_uobject_put(uobj);
814 }
815 EXPORT_SYMBOL_NS_GPL(rdma_lookup_put_uobject, "rdma_core");
816 
817 void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
818 {
819 	xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC);
820 }
821 EXPORT_SYMBOL_NS_GPL(setup_ufile_idr_uobject, "rdma_core");
822 
823 static void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
824 {
825 	struct ib_uobject *entry;
826 	unsigned long id;
827 
828 	/*
829 	 * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
830 	 * there are no HW objects left, however the xarray is still populated
831 	 * with anything that has not been cleaned up by userspace. Since the
832 	 * kref on ufile is 0, nothing is allowed to call lookup_get.
833 	 *
834 	 * This is an optimized equivalent to remove_handle_idr_uobject
835 	 */
836 	xa_for_each(&ufile->idr, id, entry) {
837 		WARN_ON(entry->object);
838 		uverbs_uobject_put(entry);
839 	}
840 
841 	xa_destroy(&ufile->idr);
842 }
843 
844 const struct uverbs_obj_type_class uverbs_idr_class = {
845 	.alloc_begin = alloc_begin_idr_uobject,
846 	.lookup_get = lookup_get_idr_uobject,
847 	.alloc_commit = alloc_commit_idr_uobject,
848 	.alloc_abort = alloc_abort_idr_uobject,
849 	.lookup_put = lookup_put_idr_uobject,
850 	.destroy_hw = destroy_hw_idr_uobject,
851 	.remove_handle = remove_handle_idr_uobject,
852 	.swap_uobjects = swap_idr_uobjects,
853 };
854 EXPORT_SYMBOL(uverbs_idr_class);
855 
856 int uverbs_uobject_release(struct ib_uobject *uobj)
857 {
858 	struct ib_uverbs_file *ufile;
859 
860 	ufile = uobj->ufile;
861 
862 	if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
863 		struct uverbs_attr_bundle attrs = {
864 			.context = uobj->context,
865 			.ufile = ufile,
866 		};
867 
868 		/*
869 		 * lookup_get_fd_uobject holds the kref on the struct file any
870 		 * time a FD uobj is locked, which prevents this release
871 		 * method from being invoked. Meaning we can always get the
872 		 * write lock here, or we have a kernel bug.
873 		 */
874 		WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
875 		uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs);
876 		up_read(&ufile->hw_destroy_rwsem);
877 	}
878 
879 	/* Matches the get in alloc_commit_fd_uobject() */
880 	kref_put(&ufile->ref, ib_uverbs_release_file);
881 
882 	/* Pairs with filp->private_data in alloc_begin_fd_uobject */
883 	uverbs_uobject_put(uobj);
884 	return 0;
885 }
886 EXPORT_SYMBOL_NS_GPL(uverbs_uobject_release, "rdma_core");
887 
888 /*
889  * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
890  * file_operations release method.
891  */
892 int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
893 {
894 	void (*release_cleanup)(struct ib_uobject *uobj) = NULL;
895 	struct ib_uobject *uobj = filp->private_data;
896 	const struct uverbs_obj_type *type_attrs;
897 	int ret;
898 
899 	/*
900 	 * This can only happen if the fput came from alloc_abort_fd_uobject()
901 	 */
902 	if (!uobj)
903 		return 0;
904 
905 	/*
906 	 * uverbs_disassociate_api() can NULL type_attrs after disassociate, but
907 	 * it won't if release_cleanup is used.
908 	 */
909 	type_attrs = READ_ONCE(uobj->uapi_object->type_attrs);
910 	if (type_attrs)
911 		release_cleanup = container_of(type_attrs,
912 					       struct uverbs_obj_fd_type, type)
913 					  ->release_cleanup;
914 	if (release_cleanup)
915 		uverbs_uobject_get(uobj);
916 
917 	ret = uverbs_uobject_release(uobj);
918 
919 	if (release_cleanup) {
920 		release_cleanup(uobj);
921 		uverbs_uobject_put(uobj);
922 	}
923 
924 	return ret;
925 }
926 EXPORT_SYMBOL(uverbs_uobject_fd_release);
927 
928 int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
929 			   enum rdma_remove_reason reason)
930 {
931 	struct uverbs_attr_bundle attrs = { .ufile = ufile };
932 	struct ib_ucontext *ucontext = ufile->ucontext;
933 	struct ib_device *ib_dev = ucontext->device;
934 	struct ib_uobject *obj, *next_obj;
935 	int ret = -EINVAL;
936 
937 	if (ib_dev->ops.ufile_hw_cleanup)
938 		ib_dev->ops.ufile_hw_cleanup(ufile);
939 
940 	/*
941 	 * This shouldn't run while executing other commands on this
942 	 * context. Thus, the only thing we should take care of is
943 	 * releasing a FD while traversing this list. The FD could be
944 	 * closed and released from the _release fop of this FD.
945 	 * In order to mitigate this, we add a lock.
946 	 * We take and release the lock per traversal in order to let
947 	 * other threads (which might still use the FDs) chance to run.
948 	 */
949 	list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
950 		attrs.context = obj->context;
951 		/*
952 		 * if we hit this WARN_ON, that means we are
953 		 * racing with a lookup_get.
954 		 */
955 		WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
956 		if (reason == RDMA_REMOVE_DRIVER_FAILURE)
957 			obj->object = NULL;
958 		if (!uverbs_destroy_uobject(obj, reason, &attrs))
959 			ret = 0;
960 		else
961 			atomic_set(&obj->usecnt, 0);
962 	}
963 
964 	if (reason == RDMA_REMOVE_DRIVER_FAILURE) {
965 		WARN_ON(!list_empty(&ufile->uobjects));
966 		return 0;
967 	}
968 	return ret;
969 }
970 EXPORT_SYMBOL_NS_GPL(__uverbs_cleanup_ufile, "rdma_core");
971 
972 const struct uverbs_obj_type_class uverbs_fd_class = {
973 	.alloc_begin = alloc_begin_fd_uobject,
974 	.lookup_get = lookup_get_fd_uobject,
975 	.alloc_commit = alloc_commit_fd_uobject,
976 	.alloc_abort = alloc_abort_fd_uobject,
977 	.lookup_put = lookup_put_fd_uobject,
978 	.destroy_hw = destroy_hw_fd_uobject,
979 	.remove_handle = remove_handle_fd_uobject,
980 };
981 EXPORT_SYMBOL(uverbs_fd_class);
982 
983 struct ib_uobject *
984 uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
985 			     s64 id, struct uverbs_attr_bundle *attrs)
986 {
987 	const struct uverbs_api_object *obj =
988 		uapi_get_object(attrs->ufile->device->uapi, object_id);
989 
990 	switch (access) {
991 	case UVERBS_ACCESS_READ:
992 		return rdma_lookup_get_uobject(obj, attrs->ufile, id,
993 					       UVERBS_LOOKUP_READ, attrs);
994 	case UVERBS_ACCESS_DESTROY:
995 		/* Actual destruction is done inside uverbs_handle_method */
996 		return rdma_lookup_get_uobject(obj, attrs->ufile, id,
997 					       UVERBS_LOOKUP_DESTROY, attrs);
998 	case UVERBS_ACCESS_WRITE:
999 		return rdma_lookup_get_uobject(obj, attrs->ufile, id,
1000 					       UVERBS_LOOKUP_WRITE, attrs);
1001 	case UVERBS_ACCESS_NEW:
1002 		return rdma_alloc_begin_uobject(obj, attrs);
1003 	default:
1004 		WARN_ON(true);
1005 		return ERR_PTR(-EOPNOTSUPP);
1006 	}
1007 }
1008 EXPORT_SYMBOL_NS_GPL(uverbs_get_uobject_from_file, "rdma_core");
1009 
1010 void uverbs_finalize_object(struct ib_uobject *uobj,
1011 			    enum uverbs_obj_access access, bool hw_obj_valid,
1012 			    bool commit, struct uverbs_attr_bundle *attrs)
1013 {
1014 	/*
1015 	 * refcounts should be handled at the object level and not at the
1016 	 * uobject level. Refcounts of the objects themselves are done in
1017 	 * handlers.
1018 	 */
1019 
1020 	switch (access) {
1021 	case UVERBS_ACCESS_READ:
1022 		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
1023 		break;
1024 	case UVERBS_ACCESS_WRITE:
1025 		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
1026 		break;
1027 	case UVERBS_ACCESS_DESTROY:
1028 		if (uobj)
1029 			rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
1030 		break;
1031 	case UVERBS_ACCESS_NEW:
1032 		if (commit)
1033 			rdma_alloc_commit_uobject(uobj, attrs);
1034 		else
1035 			rdma_alloc_abort_uobject(uobj, attrs, hw_obj_valid);
1036 		break;
1037 	default:
1038 		WARN_ON(true);
1039 	}
1040 }
1041 EXPORT_SYMBOL_NS_GPL(uverbs_finalize_object, "rdma_core");
1042 
1043 /**
1044  * rdma_uattrs_has_raw_cap() - Returns whether a rdma device linked to the
1045  *			       uverbs attributes file has CAP_NET_RAW
1046  *			       capability or not.
1047  *
1048  * @attrs:       Pointer to uverbs attributes
1049  *
1050  * Returns true if a rdma device's owning user namespace has CAP_NET_RAW
1051  * capability, otherwise false.
1052  */
1053 bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs)
1054 {
1055 	struct ib_uverbs_file *ufile = attrs->ufile;
1056 	struct ib_ucontext *ucontext;
1057 	bool has_cap = false;
1058 	int srcu_key;
1059 
1060 	srcu_key = srcu_read_lock(&ufile->device->disassociate_srcu);
1061 	ucontext = ib_uverbs_get_ucontext_file(ufile);
1062 	if (IS_ERR(ucontext))
1063 		goto out;
1064 	has_cap = rdma_dev_has_raw_cap(ucontext->device);
1065 
1066 out:
1067 	srcu_read_unlock(&ufile->device->disassociate_srcu, srcu_key);
1068 	return has_cap;
1069 }
1070 EXPORT_SYMBOL(rdma_uattrs_has_raw_cap);
1071 
1072 MODULE_DESCRIPTION("InfiniBand uverbs objects");
1073 MODULE_LICENSE("Dual BSD/GPL");
1074