xref: /linux/drivers/misc/vmw_vmci/vmci_context.c (revision bfd5bb6f90af092aa345b15cd78143956a13c2a8)
1 /*
2  * VMware VMCI Driver
3  *
4  * Copyright (C) 2012 VMware, Inc. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation version 2 and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * for more details.
14  */
15 
16 #include <linux/vmw_vmci_defs.h>
17 #include <linux/vmw_vmci_api.h>
18 #include <linux/highmem.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/sched.h>
22 #include <linux/cred.h>
23 #include <linux/slab.h>
24 
25 #include "vmci_queue_pair.h"
26 #include "vmci_datagram.h"
27 #include "vmci_doorbell.h"
28 #include "vmci_context.h"
29 #include "vmci_driver.h"
30 #include "vmci_event.h"
31 
32 /*
33  * List of current VMCI contexts.  Contexts can be added by
34  * vmci_ctx_create() and removed via vmci_ctx_destroy().
35  * These, along with context lookup, are protected by the
36  * list structure's lock.
37  */
38 static struct {
39 	struct list_head head;
40 	spinlock_t lock; /* Spinlock for context list operations */
41 } ctx_list = {
42 	.head = LIST_HEAD_INIT(ctx_list.head),
43 	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
44 };
45 
46 /* Used by contexts that did not set up notify flag pointers */
47 static bool ctx_dummy_notify;
48 
49 static void ctx_signal_notify(struct vmci_ctx *context)
50 {
51 	*context->notify = true;
52 }
53 
54 static void ctx_clear_notify(struct vmci_ctx *context)
55 {
56 	*context->notify = false;
57 }
58 
59 /*
60  * If nothing requires the attention of the guest, clears both
61  * notify flag and call.
62  */
63 static void ctx_clear_notify_call(struct vmci_ctx *context)
64 {
65 	if (context->pending_datagrams == 0 &&
66 	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
67 		ctx_clear_notify(context);
68 }
69 
70 /*
71  * Sets the context's notify flag iff datagrams are pending for this
72  * context.  Called from vmci_setup_notify().
73  */
74 void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
75 {
76 	spin_lock(&context->lock);
77 	if (context->pending_datagrams)
78 		ctx_signal_notify(context);
79 	spin_unlock(&context->lock);
80 }
81 
82 /*
83  * Allocates and initializes a VMCI context.
84  */
85 struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
86 				 uintptr_t event_hnd,
87 				 int user_version,
88 				 const struct cred *cred)
89 {
90 	struct vmci_ctx *context;
91 	int error;
92 
93 	if (cid == VMCI_INVALID_ID) {
94 		pr_devel("Invalid context ID for VMCI context\n");
95 		error = -EINVAL;
96 		goto err_out;
97 	}
98 
99 	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
100 		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
101 			 priv_flags);
102 		error = -EINVAL;
103 		goto err_out;
104 	}
105 
106 	if (user_version == 0) {
107 		pr_devel("Invalid suer_version %d\n", user_version);
108 		error = -EINVAL;
109 		goto err_out;
110 	}
111 
112 	context = kzalloc(sizeof(*context), GFP_KERNEL);
113 	if (!context) {
114 		pr_warn("Failed to allocate memory for VMCI context\n");
115 		error = -EINVAL;
116 		goto err_out;
117 	}
118 
119 	kref_init(&context->kref);
120 	spin_lock_init(&context->lock);
121 	INIT_LIST_HEAD(&context->list_item);
122 	INIT_LIST_HEAD(&context->datagram_queue);
123 	INIT_LIST_HEAD(&context->notifier_list);
124 
125 	/* Initialize host-specific VMCI context. */
126 	init_waitqueue_head(&context->host_context.wait_queue);
127 
128 	context->queue_pair_array = vmci_handle_arr_create(0);
129 	if (!context->queue_pair_array) {
130 		error = -ENOMEM;
131 		goto err_free_ctx;
132 	}
133 
134 	context->doorbell_array = vmci_handle_arr_create(0);
135 	if (!context->doorbell_array) {
136 		error = -ENOMEM;
137 		goto err_free_qp_array;
138 	}
139 
140 	context->pending_doorbell_array = vmci_handle_arr_create(0);
141 	if (!context->pending_doorbell_array) {
142 		error = -ENOMEM;
143 		goto err_free_db_array;
144 	}
145 
146 	context->user_version = user_version;
147 
148 	context->priv_flags = priv_flags;
149 
150 	if (cred)
151 		context->cred = get_cred(cred);
152 
153 	context->notify = &ctx_dummy_notify;
154 	context->notify_page = NULL;
155 
156 	/*
157 	 * If we collide with an existing context we generate a new
158 	 * and use it instead. The VMX will determine if regeneration
159 	 * is okay. Since there isn't 4B - 16 VMs running on a given
160 	 * host, the below loop will terminate.
161 	 */
162 	spin_lock(&ctx_list.lock);
163 
164 	while (vmci_ctx_exists(cid)) {
165 		/* We reserve the lowest 16 ids for fixed contexts. */
166 		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
167 		if (cid == VMCI_INVALID_ID)
168 			cid = VMCI_RESERVED_CID_LIMIT;
169 	}
170 	context->cid = cid;
171 
172 	list_add_tail_rcu(&context->list_item, &ctx_list.head);
173 	spin_unlock(&ctx_list.lock);
174 
175 	return context;
176 
177  err_free_db_array:
178 	vmci_handle_arr_destroy(context->doorbell_array);
179  err_free_qp_array:
180 	vmci_handle_arr_destroy(context->queue_pair_array);
181  err_free_ctx:
182 	kfree(context);
183  err_out:
184 	return ERR_PTR(error);
185 }
186 
187 /*
188  * Destroy VMCI context.
189  */
190 void vmci_ctx_destroy(struct vmci_ctx *context)
191 {
192 	spin_lock(&ctx_list.lock);
193 	list_del_rcu(&context->list_item);
194 	spin_unlock(&ctx_list.lock);
195 	synchronize_rcu();
196 
197 	vmci_ctx_put(context);
198 }
199 
200 /*
201  * Fire notification for all contexts interested in given cid.
202  */
203 static int ctx_fire_notification(u32 context_id, u32 priv_flags)
204 {
205 	u32 i, array_size;
206 	struct vmci_ctx *sub_ctx;
207 	struct vmci_handle_arr *subscriber_array;
208 	struct vmci_handle context_handle =
209 		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
210 
211 	/*
212 	 * We create an array to hold the subscribers we find when
213 	 * scanning through all contexts.
214 	 */
215 	subscriber_array = vmci_handle_arr_create(0);
216 	if (subscriber_array == NULL)
217 		return VMCI_ERROR_NO_MEM;
218 
219 	/*
220 	 * Scan all contexts to find who is interested in being
221 	 * notified about given contextID.
222 	 */
223 	rcu_read_lock();
224 	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
225 		struct vmci_handle_list *node;
226 
227 		/*
228 		 * We only deliver notifications of the removal of
229 		 * contexts, if the two contexts are allowed to
230 		 * interact.
231 		 */
232 		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
233 			continue;
234 
235 		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
236 			if (!vmci_handle_is_equal(node->handle, context_handle))
237 				continue;
238 
239 			vmci_handle_arr_append_entry(&subscriber_array,
240 					vmci_make_handle(sub_ctx->cid,
241 							 VMCI_EVENT_HANDLER));
242 		}
243 	}
244 	rcu_read_unlock();
245 
246 	/* Fire event to all subscribers. */
247 	array_size = vmci_handle_arr_get_size(subscriber_array);
248 	for (i = 0; i < array_size; i++) {
249 		int result;
250 		struct vmci_event_ctx ev;
251 
252 		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
253 		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
254 						  VMCI_CONTEXT_RESOURCE_ID);
255 		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
256 		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
257 		ev.payload.context_id = context_id;
258 
259 		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
260 						&ev.msg.hdr, false);
261 		if (result < VMCI_SUCCESS) {
262 			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
263 				 ev.msg.event_data.event,
264 				 ev.msg.hdr.dst.context);
265 			/* We continue to enqueue on next subscriber. */
266 		}
267 	}
268 	vmci_handle_arr_destroy(subscriber_array);
269 
270 	return VMCI_SUCCESS;
271 }
272 
273 /*
274  * Returns the current number of pending datagrams. The call may
275  * also serve as a synchronization point for the datagram queue,
276  * as no enqueue operations can occur concurrently.
277  */
278 int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
279 {
280 	struct vmci_ctx *context;
281 
282 	context = vmci_ctx_get(cid);
283 	if (context == NULL)
284 		return VMCI_ERROR_INVALID_ARGS;
285 
286 	spin_lock(&context->lock);
287 	if (pending)
288 		*pending = context->pending_datagrams;
289 	spin_unlock(&context->lock);
290 	vmci_ctx_put(context);
291 
292 	return VMCI_SUCCESS;
293 }
294 
295 /*
296  * Queues a VMCI datagram for the appropriate target VM context.
297  */
298 int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
299 {
300 	struct vmci_datagram_queue_entry *dq_entry;
301 	struct vmci_ctx *context;
302 	struct vmci_handle dg_src;
303 	size_t vmci_dg_size;
304 
305 	vmci_dg_size = VMCI_DG_SIZE(dg);
306 	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
307 		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
308 		return VMCI_ERROR_INVALID_ARGS;
309 	}
310 
311 	/* Get the target VM's VMCI context. */
312 	context = vmci_ctx_get(cid);
313 	if (!context) {
314 		pr_devel("Invalid context (ID=0x%x)\n", cid);
315 		return VMCI_ERROR_INVALID_ARGS;
316 	}
317 
318 	/* Allocate guest call entry and add it to the target VM's queue. */
319 	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
320 	if (dq_entry == NULL) {
321 		pr_warn("Failed to allocate memory for datagram\n");
322 		vmci_ctx_put(context);
323 		return VMCI_ERROR_NO_MEM;
324 	}
325 	dq_entry->dg = dg;
326 	dq_entry->dg_size = vmci_dg_size;
327 	dg_src = dg->src;
328 	INIT_LIST_HEAD(&dq_entry->list_item);
329 
330 	spin_lock(&context->lock);
331 
332 	/*
333 	 * We put a higher limit on datagrams from the hypervisor.  If
334 	 * the pending datagram is not from hypervisor, then we check
335 	 * if enqueueing it would exceed the
336 	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
337 	 * the pending datagram is from hypervisor, we allow it to be
338 	 * queued at the destination side provided we don't reach the
339 	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
340 	 */
341 	if (context->datagram_queue_size + vmci_dg_size >=
342 	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
343 	    (!vmci_handle_is_equal(dg_src,
344 				vmci_make_handle
345 				(VMCI_HYPERVISOR_CONTEXT_ID,
346 				 VMCI_CONTEXT_RESOURCE_ID)) ||
347 	     context->datagram_queue_size + vmci_dg_size >=
348 	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
349 		spin_unlock(&context->lock);
350 		vmci_ctx_put(context);
351 		kfree(dq_entry);
352 		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
353 		return VMCI_ERROR_NO_RESOURCES;
354 	}
355 
356 	list_add(&dq_entry->list_item, &context->datagram_queue);
357 	context->pending_datagrams++;
358 	context->datagram_queue_size += vmci_dg_size;
359 	ctx_signal_notify(context);
360 	wake_up(&context->host_context.wait_queue);
361 	spin_unlock(&context->lock);
362 	vmci_ctx_put(context);
363 
364 	return vmci_dg_size;
365 }
366 
367 /*
368  * Verifies whether a context with the specified context ID exists.
369  * FIXME: utility is dubious as no decisions can be reliably made
370  * using this data as context can appear and disappear at any time.
371  */
372 bool vmci_ctx_exists(u32 cid)
373 {
374 	struct vmci_ctx *context;
375 	bool exists = false;
376 
377 	rcu_read_lock();
378 
379 	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
380 		if (context->cid == cid) {
381 			exists = true;
382 			break;
383 		}
384 	}
385 
386 	rcu_read_unlock();
387 	return exists;
388 }
389 
390 /*
391  * Retrieves VMCI context corresponding to the given cid.
392  */
393 struct vmci_ctx *vmci_ctx_get(u32 cid)
394 {
395 	struct vmci_ctx *c, *context = NULL;
396 
397 	if (cid == VMCI_INVALID_ID)
398 		return NULL;
399 
400 	rcu_read_lock();
401 	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
402 		if (c->cid == cid) {
403 			/*
404 			 * The context owner drops its own reference to the
405 			 * context only after removing it from the list and
406 			 * waiting for RCU grace period to expire. This
407 			 * means that we are not about to increase the
408 			 * reference count of something that is in the
409 			 * process of being destroyed.
410 			 */
411 			context = c;
412 			kref_get(&context->kref);
413 			break;
414 		}
415 	}
416 	rcu_read_unlock();
417 
418 	return context;
419 }
420 
421 /*
422  * Deallocates all parts of a context data structure. This
423  * function doesn't lock the context, because it assumes that
424  * the caller was holding the last reference to context.
425  */
426 static void ctx_free_ctx(struct kref *kref)
427 {
428 	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
429 	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
430 	struct vmci_handle temp_handle;
431 	struct vmci_handle_list *notifier, *tmp;
432 
433 	/*
434 	 * Fire event to all contexts interested in knowing this
435 	 * context is dying.
436 	 */
437 	ctx_fire_notification(context->cid, context->priv_flags);
438 
439 	/*
440 	 * Cleanup all queue pair resources attached to context.  If
441 	 * the VM dies without cleaning up, this code will make sure
442 	 * that no resources are leaked.
443 	 */
444 	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
445 	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
446 		if (vmci_qp_broker_detach(temp_handle,
447 					  context) < VMCI_SUCCESS) {
448 			/*
449 			 * When vmci_qp_broker_detach() succeeds it
450 			 * removes the handle from the array.  If
451 			 * detach fails, we must remove the handle
452 			 * ourselves.
453 			 */
454 			vmci_handle_arr_remove_entry(context->queue_pair_array,
455 						     temp_handle);
456 		}
457 		temp_handle =
458 		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
459 	}
460 
461 	/*
462 	 * It is fine to destroy this without locking the callQueue, as
463 	 * this is the only thread having a reference to the context.
464 	 */
465 	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
466 				 &context->datagram_queue, list_item) {
467 		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
468 		list_del(&dq_entry->list_item);
469 		kfree(dq_entry->dg);
470 		kfree(dq_entry);
471 	}
472 
473 	list_for_each_entry_safe(notifier, tmp,
474 				 &context->notifier_list, node) {
475 		list_del(&notifier->node);
476 		kfree(notifier);
477 	}
478 
479 	vmci_handle_arr_destroy(context->queue_pair_array);
480 	vmci_handle_arr_destroy(context->doorbell_array);
481 	vmci_handle_arr_destroy(context->pending_doorbell_array);
482 	vmci_ctx_unset_notify(context);
483 	if (context->cred)
484 		put_cred(context->cred);
485 	kfree(context);
486 }
487 
488 /*
489  * Drops reference to VMCI context. If this is the last reference to
490  * the context it will be deallocated. A context is created with
491  * a reference count of one, and on destroy, it is removed from
492  * the context list before its reference count is decremented. Thus,
493  * if we reach zero, we are sure that nobody else are about to increment
494  * it (they need the entry in the context list for that), and so there
495  * is no need for locking.
496  */
497 void vmci_ctx_put(struct vmci_ctx *context)
498 {
499 	kref_put(&context->kref, ctx_free_ctx);
500 }
501 
502 /*
503  * Dequeues the next datagram and returns it to caller.
504  * The caller passes in a pointer to the max size datagram
505  * it can handle and the datagram is only unqueued if the
506  * size is less than max_size. If larger max_size is set to
507  * the size of the datagram to give the caller a chance to
508  * set up a larger buffer for the guestcall.
509  */
510 int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
511 			      size_t *max_size,
512 			      struct vmci_datagram **dg)
513 {
514 	struct vmci_datagram_queue_entry *dq_entry;
515 	struct list_head *list_item;
516 	int rv;
517 
518 	/* Dequeue the next datagram entry. */
519 	spin_lock(&context->lock);
520 	if (context->pending_datagrams == 0) {
521 		ctx_clear_notify_call(context);
522 		spin_unlock(&context->lock);
523 		pr_devel("No datagrams pending\n");
524 		return VMCI_ERROR_NO_MORE_DATAGRAMS;
525 	}
526 
527 	list_item = context->datagram_queue.next;
528 
529 	dq_entry =
530 	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
531 
532 	/* Check size of caller's buffer. */
533 	if (*max_size < dq_entry->dg_size) {
534 		*max_size = dq_entry->dg_size;
535 		spin_unlock(&context->lock);
536 		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
537 			 (u32) *max_size);
538 		return VMCI_ERROR_NO_MEM;
539 	}
540 
541 	list_del(list_item);
542 	context->pending_datagrams--;
543 	context->datagram_queue_size -= dq_entry->dg_size;
544 	if (context->pending_datagrams == 0) {
545 		ctx_clear_notify_call(context);
546 		rv = VMCI_SUCCESS;
547 	} else {
548 		/*
549 		 * Return the size of the next datagram.
550 		 */
551 		struct vmci_datagram_queue_entry *next_entry;
552 
553 		list_item = context->datagram_queue.next;
554 		next_entry =
555 		    list_entry(list_item, struct vmci_datagram_queue_entry,
556 			       list_item);
557 
558 		/*
559 		 * The following size_t -> int truncation is fine as
560 		 * the maximum size of a (routable) datagram is 68KB.
561 		 */
562 		rv = (int)next_entry->dg_size;
563 	}
564 	spin_unlock(&context->lock);
565 
566 	/* Caller must free datagram. */
567 	*dg = dq_entry->dg;
568 	dq_entry->dg = NULL;
569 	kfree(dq_entry);
570 
571 	return rv;
572 }
573 
574 /*
575  * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
576  * page mapped/locked by vmci_setup_notify().
577  */
578 void vmci_ctx_unset_notify(struct vmci_ctx *context)
579 {
580 	struct page *notify_page;
581 
582 	spin_lock(&context->lock);
583 
584 	notify_page = context->notify_page;
585 	context->notify = &ctx_dummy_notify;
586 	context->notify_page = NULL;
587 
588 	spin_unlock(&context->lock);
589 
590 	if (notify_page) {
591 		kunmap(notify_page);
592 		put_page(notify_page);
593 	}
594 }
595 
596 /*
597  * Add remote_cid to list of contexts current contexts wants
598  * notifications from/about.
599  */
600 int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
601 {
602 	struct vmci_ctx *context;
603 	struct vmci_handle_list *notifier, *n;
604 	int result;
605 	bool exists = false;
606 
607 	context = vmci_ctx_get(context_id);
608 	if (!context)
609 		return VMCI_ERROR_NOT_FOUND;
610 
611 	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
612 		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
613 			 context_id, remote_cid);
614 		result = VMCI_ERROR_DST_UNREACHABLE;
615 		goto out;
616 	}
617 
618 	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
619 		result = VMCI_ERROR_NO_ACCESS;
620 		goto out;
621 	}
622 
623 	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
624 	if (!notifier) {
625 		result = VMCI_ERROR_NO_MEM;
626 		goto out;
627 	}
628 
629 	INIT_LIST_HEAD(&notifier->node);
630 	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
631 
632 	spin_lock(&context->lock);
633 
634 	list_for_each_entry(n, &context->notifier_list, node) {
635 		if (vmci_handle_is_equal(n->handle, notifier->handle)) {
636 			exists = true;
637 			break;
638 		}
639 	}
640 
641 	if (exists) {
642 		kfree(notifier);
643 		result = VMCI_ERROR_ALREADY_EXISTS;
644 	} else {
645 		list_add_tail_rcu(&notifier->node, &context->notifier_list);
646 		context->n_notifiers++;
647 		result = VMCI_SUCCESS;
648 	}
649 
650 	spin_unlock(&context->lock);
651 
652  out:
653 	vmci_ctx_put(context);
654 	return result;
655 }
656 
657 /*
658  * Remove remote_cid from current context's list of contexts it is
659  * interested in getting notifications from/about.
660  */
661 int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
662 {
663 	struct vmci_ctx *context;
664 	struct vmci_handle_list *notifier, *tmp;
665 	struct vmci_handle handle;
666 	bool found = false;
667 
668 	context = vmci_ctx_get(context_id);
669 	if (!context)
670 		return VMCI_ERROR_NOT_FOUND;
671 
672 	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
673 
674 	spin_lock(&context->lock);
675 	list_for_each_entry_safe(notifier, tmp,
676 				 &context->notifier_list, node) {
677 		if (vmci_handle_is_equal(notifier->handle, handle)) {
678 			list_del_rcu(&notifier->node);
679 			context->n_notifiers--;
680 			found = true;
681 			break;
682 		}
683 	}
684 	spin_unlock(&context->lock);
685 
686 	if (found) {
687 		synchronize_rcu();
688 		kfree(notifier);
689 	}
690 
691 	vmci_ctx_put(context);
692 
693 	return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
694 }
695 
696 static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
697 					u32 *buf_size, void **pbuf)
698 {
699 	u32 *notifiers;
700 	size_t data_size;
701 	struct vmci_handle_list *entry;
702 	int i = 0;
703 
704 	if (context->n_notifiers == 0) {
705 		*buf_size = 0;
706 		*pbuf = NULL;
707 		return VMCI_SUCCESS;
708 	}
709 
710 	data_size = context->n_notifiers * sizeof(*notifiers);
711 	if (*buf_size < data_size) {
712 		*buf_size = data_size;
713 		return VMCI_ERROR_MORE_DATA;
714 	}
715 
716 	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
717 	if (!notifiers)
718 		return VMCI_ERROR_NO_MEM;
719 
720 	list_for_each_entry(entry, &context->notifier_list, node)
721 		notifiers[i++] = entry->handle.context;
722 
723 	*buf_size = data_size;
724 	*pbuf = notifiers;
725 	return VMCI_SUCCESS;
726 }
727 
728 static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
729 					u32 *buf_size, void **pbuf)
730 {
731 	struct dbell_cpt_state *dbells;
732 	size_t n_doorbells;
733 	int i;
734 
735 	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
736 	if (n_doorbells > 0) {
737 		size_t data_size = n_doorbells * sizeof(*dbells);
738 		if (*buf_size < data_size) {
739 			*buf_size = data_size;
740 			return VMCI_ERROR_MORE_DATA;
741 		}
742 
743 		dbells = kmalloc(data_size, GFP_ATOMIC);
744 		if (!dbells)
745 			return VMCI_ERROR_NO_MEM;
746 
747 		for (i = 0; i < n_doorbells; i++)
748 			dbells[i].handle = vmci_handle_arr_get_entry(
749 						context->doorbell_array, i);
750 
751 		*buf_size = data_size;
752 		*pbuf = dbells;
753 	} else {
754 		*buf_size = 0;
755 		*pbuf = NULL;
756 	}
757 
758 	return VMCI_SUCCESS;
759 }
760 
761 /*
762  * Get current context's checkpoint state of given type.
763  */
764 int vmci_ctx_get_chkpt_state(u32 context_id,
765 			     u32 cpt_type,
766 			     u32 *buf_size,
767 			     void **pbuf)
768 {
769 	struct vmci_ctx *context;
770 	int result;
771 
772 	context = vmci_ctx_get(context_id);
773 	if (!context)
774 		return VMCI_ERROR_NOT_FOUND;
775 
776 	spin_lock(&context->lock);
777 
778 	switch (cpt_type) {
779 	case VMCI_NOTIFICATION_CPT_STATE:
780 		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
781 		break;
782 
783 	case VMCI_WELLKNOWN_CPT_STATE:
784 		/*
785 		 * For compatibility with VMX'en with VM to VM communication, we
786 		 * always return zero wellknown handles.
787 		 */
788 
789 		*buf_size = 0;
790 		*pbuf = NULL;
791 		result = VMCI_SUCCESS;
792 		break;
793 
794 	case VMCI_DOORBELL_CPT_STATE:
795 		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
796 		break;
797 
798 	default:
799 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
800 		result = VMCI_ERROR_INVALID_ARGS;
801 		break;
802 	}
803 
804 	spin_unlock(&context->lock);
805 	vmci_ctx_put(context);
806 
807 	return result;
808 }
809 
810 /*
811  * Set current context's checkpoint state of given type.
812  */
813 int vmci_ctx_set_chkpt_state(u32 context_id,
814 			     u32 cpt_type,
815 			     u32 buf_size,
816 			     void *cpt_buf)
817 {
818 	u32 i;
819 	u32 current_id;
820 	int result = VMCI_SUCCESS;
821 	u32 num_ids = buf_size / sizeof(u32);
822 
823 	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
824 		/*
825 		 * We would end up here if VMX with VM to VM communication
826 		 * attempts to restore a checkpoint with wellknown handles.
827 		 */
828 		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
829 		return VMCI_ERROR_OBSOLETE;
830 	}
831 
832 	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
833 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
834 		return VMCI_ERROR_INVALID_ARGS;
835 	}
836 
837 	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
838 		current_id = ((u32 *)cpt_buf)[i];
839 		result = vmci_ctx_add_notification(context_id, current_id);
840 		if (result != VMCI_SUCCESS)
841 			break;
842 	}
843 	if (result != VMCI_SUCCESS)
844 		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
845 			 cpt_type, result);
846 
847 	return result;
848 }
849 
850 /*
851  * Retrieves the specified context's pending notifications in the
852  * form of a handle array. The handle arrays returned are the
853  * actual data - not a copy and should not be modified by the
854  * caller. They must be released using
855  * vmci_ctx_rcv_notifications_release.
856  */
857 int vmci_ctx_rcv_notifications_get(u32 context_id,
858 				   struct vmci_handle_arr **db_handle_array,
859 				   struct vmci_handle_arr **qp_handle_array)
860 {
861 	struct vmci_ctx *context;
862 	int result = VMCI_SUCCESS;
863 
864 	context = vmci_ctx_get(context_id);
865 	if (context == NULL)
866 		return VMCI_ERROR_NOT_FOUND;
867 
868 	spin_lock(&context->lock);
869 
870 	*db_handle_array = context->pending_doorbell_array;
871 	context->pending_doorbell_array = vmci_handle_arr_create(0);
872 	if (!context->pending_doorbell_array) {
873 		context->pending_doorbell_array = *db_handle_array;
874 		*db_handle_array = NULL;
875 		result = VMCI_ERROR_NO_MEM;
876 	}
877 	*qp_handle_array = NULL;
878 
879 	spin_unlock(&context->lock);
880 	vmci_ctx_put(context);
881 
882 	return result;
883 }
884 
885 /*
886  * Releases handle arrays with pending notifications previously
887  * retrieved using vmci_ctx_rcv_notifications_get. If the
888  * notifications were not successfully handed over to the guest,
889  * success must be false.
890  */
891 void vmci_ctx_rcv_notifications_release(u32 context_id,
892 					struct vmci_handle_arr *db_handle_array,
893 					struct vmci_handle_arr *qp_handle_array,
894 					bool success)
895 {
896 	struct vmci_ctx *context = vmci_ctx_get(context_id);
897 
898 	spin_lock(&context->lock);
899 	if (!success) {
900 		struct vmci_handle handle;
901 
902 		/*
903 		 * New notifications may have been added while we were not
904 		 * holding the context lock, so we transfer any new pending
905 		 * doorbell notifications to the old array, and reinstate the
906 		 * old array.
907 		 */
908 
909 		handle = vmci_handle_arr_remove_tail(
910 					context->pending_doorbell_array);
911 		while (!vmci_handle_is_invalid(handle)) {
912 			if (!vmci_handle_arr_has_entry(db_handle_array,
913 						       handle)) {
914 				vmci_handle_arr_append_entry(
915 						&db_handle_array, handle);
916 			}
917 			handle = vmci_handle_arr_remove_tail(
918 					context->pending_doorbell_array);
919 		}
920 		vmci_handle_arr_destroy(context->pending_doorbell_array);
921 		context->pending_doorbell_array = db_handle_array;
922 		db_handle_array = NULL;
923 	} else {
924 		ctx_clear_notify_call(context);
925 	}
926 	spin_unlock(&context->lock);
927 	vmci_ctx_put(context);
928 
929 	if (db_handle_array)
930 		vmci_handle_arr_destroy(db_handle_array);
931 
932 	if (qp_handle_array)
933 		vmci_handle_arr_destroy(qp_handle_array);
934 }
935 
936 /*
937  * Registers that a new doorbell handle has been allocated by the
938  * context. Only doorbell handles registered can be notified.
939  */
940 int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
941 {
942 	struct vmci_ctx *context;
943 	int result;
944 
945 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
946 		return VMCI_ERROR_INVALID_ARGS;
947 
948 	context = vmci_ctx_get(context_id);
949 	if (context == NULL)
950 		return VMCI_ERROR_NOT_FOUND;
951 
952 	spin_lock(&context->lock);
953 	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
954 		vmci_handle_arr_append_entry(&context->doorbell_array, handle);
955 		result = VMCI_SUCCESS;
956 	} else {
957 		result = VMCI_ERROR_DUPLICATE_ENTRY;
958 	}
959 
960 	spin_unlock(&context->lock);
961 	vmci_ctx_put(context);
962 
963 	return result;
964 }
965 
966 /*
967  * Unregisters a doorbell handle that was previously registered
968  * with vmci_ctx_dbell_create.
969  */
970 int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
971 {
972 	struct vmci_ctx *context;
973 	struct vmci_handle removed_handle;
974 
975 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
976 		return VMCI_ERROR_INVALID_ARGS;
977 
978 	context = vmci_ctx_get(context_id);
979 	if (context == NULL)
980 		return VMCI_ERROR_NOT_FOUND;
981 
982 	spin_lock(&context->lock);
983 	removed_handle =
984 	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
985 	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
986 	spin_unlock(&context->lock);
987 
988 	vmci_ctx_put(context);
989 
990 	return vmci_handle_is_invalid(removed_handle) ?
991 	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
992 }
993 
994 /*
995  * Unregisters all doorbell handles that were previously
996  * registered with vmci_ctx_dbell_create.
997  */
998 int vmci_ctx_dbell_destroy_all(u32 context_id)
999 {
1000 	struct vmci_ctx *context;
1001 	struct vmci_handle handle;
1002 
1003 	if (context_id == VMCI_INVALID_ID)
1004 		return VMCI_ERROR_INVALID_ARGS;
1005 
1006 	context = vmci_ctx_get(context_id);
1007 	if (context == NULL)
1008 		return VMCI_ERROR_NOT_FOUND;
1009 
1010 	spin_lock(&context->lock);
1011 	do {
1012 		struct vmci_handle_arr *arr = context->doorbell_array;
1013 		handle = vmci_handle_arr_remove_tail(arr);
1014 	} while (!vmci_handle_is_invalid(handle));
1015 	do {
1016 		struct vmci_handle_arr *arr = context->pending_doorbell_array;
1017 		handle = vmci_handle_arr_remove_tail(arr);
1018 	} while (!vmci_handle_is_invalid(handle));
1019 	spin_unlock(&context->lock);
1020 
1021 	vmci_ctx_put(context);
1022 
1023 	return VMCI_SUCCESS;
1024 }
1025 
1026 /*
1027  * Registers a notification of a doorbell handle initiated by the
1028  * specified source context. The notification of doorbells are
1029  * subject to the same isolation rules as datagram delivery. To
1030  * allow host side senders of notifications a finer granularity
1031  * of sender rights than those assigned to the sending context
1032  * itself, the host context is required to specify a different
1033  * set of privilege flags that will override the privileges of
1034  * the source context.
1035  */
1036 int vmci_ctx_notify_dbell(u32 src_cid,
1037 			  struct vmci_handle handle,
1038 			  u32 src_priv_flags)
1039 {
1040 	struct vmci_ctx *dst_context;
1041 	int result;
1042 
1043 	if (vmci_handle_is_invalid(handle))
1044 		return VMCI_ERROR_INVALID_ARGS;
1045 
1046 	/* Get the target VM's VMCI context. */
1047 	dst_context = vmci_ctx_get(handle.context);
1048 	if (!dst_context) {
1049 		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1050 		return VMCI_ERROR_NOT_FOUND;
1051 	}
1052 
1053 	if (src_cid != handle.context) {
1054 		u32 dst_priv_flags;
1055 
1056 		if (VMCI_CONTEXT_IS_VM(src_cid) &&
1057 		    VMCI_CONTEXT_IS_VM(handle.context)) {
1058 			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1059 				 src_cid, handle.context);
1060 			result = VMCI_ERROR_DST_UNREACHABLE;
1061 			goto out;
1062 		}
1063 
1064 		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1065 		if (result < VMCI_SUCCESS) {
1066 			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1067 				handle.context, handle.resource);
1068 			goto out;
1069 		}
1070 
1071 		if (src_cid != VMCI_HOST_CONTEXT_ID ||
1072 		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1073 			src_priv_flags = vmci_context_get_priv_flags(src_cid);
1074 		}
1075 
1076 		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1077 			result = VMCI_ERROR_NO_ACCESS;
1078 			goto out;
1079 		}
1080 	}
1081 
1082 	if (handle.context == VMCI_HOST_CONTEXT_ID) {
1083 		result = vmci_dbell_host_context_notify(src_cid, handle);
1084 	} else {
1085 		spin_lock(&dst_context->lock);
1086 
1087 		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1088 					       handle)) {
1089 			result = VMCI_ERROR_NOT_FOUND;
1090 		} else {
1091 			if (!vmci_handle_arr_has_entry(
1092 					dst_context->pending_doorbell_array,
1093 					handle)) {
1094 				vmci_handle_arr_append_entry(
1095 					&dst_context->pending_doorbell_array,
1096 					handle);
1097 
1098 				ctx_signal_notify(dst_context);
1099 				wake_up(&dst_context->host_context.wait_queue);
1100 
1101 			}
1102 			result = VMCI_SUCCESS;
1103 		}
1104 		spin_unlock(&dst_context->lock);
1105 	}
1106 
1107  out:
1108 	vmci_ctx_put(dst_context);
1109 
1110 	return result;
1111 }
1112 
1113 bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1114 {
1115 	return context && context->user_version >= VMCI_VERSION_HOSTQP;
1116 }
1117 
1118 /*
1119  * Registers that a new queue pair handle has been allocated by
1120  * the context.
1121  */
1122 int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1123 {
1124 	int result;
1125 
1126 	if (context == NULL || vmci_handle_is_invalid(handle))
1127 		return VMCI_ERROR_INVALID_ARGS;
1128 
1129 	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
1130 		vmci_handle_arr_append_entry(&context->queue_pair_array,
1131 					     handle);
1132 		result = VMCI_SUCCESS;
1133 	} else {
1134 		result = VMCI_ERROR_DUPLICATE_ENTRY;
1135 	}
1136 
1137 	return result;
1138 }
1139 
1140 /*
1141  * Unregisters a queue pair handle that was previously registered
1142  * with vmci_ctx_qp_create.
1143  */
1144 int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1145 {
1146 	struct vmci_handle hndl;
1147 
1148 	if (context == NULL || vmci_handle_is_invalid(handle))
1149 		return VMCI_ERROR_INVALID_ARGS;
1150 
1151 	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1152 
1153 	return vmci_handle_is_invalid(hndl) ?
1154 		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1155 }
1156 
1157 /*
1158  * Determines whether a given queue pair handle is registered
1159  * with the given context.
1160  */
1161 bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1162 {
1163 	if (context == NULL || vmci_handle_is_invalid(handle))
1164 		return false;
1165 
1166 	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1167 }
1168 
1169 /*
1170  * vmci_context_get_priv_flags() - Retrieve privilege flags.
1171  * @context_id: The context ID of the VMCI context.
1172  *
1173  * Retrieves privilege flags of the given VMCI context ID.
1174  */
1175 u32 vmci_context_get_priv_flags(u32 context_id)
1176 {
1177 	if (vmci_host_code_active()) {
1178 		u32 flags;
1179 		struct vmci_ctx *context;
1180 
1181 		context = vmci_ctx_get(context_id);
1182 		if (!context)
1183 			return VMCI_LEAST_PRIVILEGE_FLAGS;
1184 
1185 		flags = context->priv_flags;
1186 		vmci_ctx_put(context);
1187 		return flags;
1188 	}
1189 	return VMCI_NO_PRIVILEGE_FLAGS;
1190 }
1191 EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1192 
1193 /*
1194  * vmci_is_context_owner() - Determimnes if user is the context owner
1195  * @context_id: The context ID of the VMCI context.
1196  * @uid:        The host user id (real kernel value).
1197  *
1198  * Determines whether a given UID is the owner of given VMCI context.
1199  */
1200 bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1201 {
1202 	bool is_owner = false;
1203 
1204 	if (vmci_host_code_active()) {
1205 		struct vmci_ctx *context = vmci_ctx_get(context_id);
1206 		if (context) {
1207 			if (context->cred)
1208 				is_owner = uid_eq(context->cred->uid, uid);
1209 			vmci_ctx_put(context);
1210 		}
1211 	}
1212 
1213 	return is_owner;
1214 }
1215 EXPORT_SYMBOL_GPL(vmci_is_context_owner);
1216