xref: /linux/drivers/misc/vmw_vmci/vmci_context.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * VMware VMCI Driver
3  *
4  * Copyright (C) 2012 VMware, Inc. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation version 2 and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * for more details.
14  */
15 
16 #include <linux/vmw_vmci_defs.h>
17 #include <linux/vmw_vmci_api.h>
18 #include <linux/highmem.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/sched.h>
22 #include <linux/slab.h>
23 
24 #include "vmci_queue_pair.h"
25 #include "vmci_datagram.h"
26 #include "vmci_doorbell.h"
27 #include "vmci_context.h"
28 #include "vmci_driver.h"
29 #include "vmci_event.h"
30 
31 /*
32  * List of current VMCI contexts.  Contexts can be added by
33  * vmci_ctx_create() and removed via vmci_ctx_destroy().
34  * These, along with context lookup, are protected by the
35  * list structure's lock.
36  */
37 static struct {
38 	struct list_head head;
39 	spinlock_t lock; /* Spinlock for context list operations */
40 } ctx_list = {
41 	.head = LIST_HEAD_INIT(ctx_list.head),
42 	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
43 };
44 
45 /* Used by contexts that did not set up notify flag pointers */
46 static bool ctx_dummy_notify;
47 
48 static void ctx_signal_notify(struct vmci_ctx *context)
49 {
50 	*context->notify = true;
51 }
52 
53 static void ctx_clear_notify(struct vmci_ctx *context)
54 {
55 	*context->notify = false;
56 }
57 
58 /*
59  * If nothing requires the attention of the guest, clears both
60  * notify flag and call.
61  */
62 static void ctx_clear_notify_call(struct vmci_ctx *context)
63 {
64 	if (context->pending_datagrams == 0 &&
65 	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
66 		ctx_clear_notify(context);
67 }
68 
69 /*
70  * Sets the context's notify flag iff datagrams are pending for this
71  * context.  Called from vmci_setup_notify().
72  */
73 void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
74 {
75 	spin_lock(&context->lock);
76 	if (context->pending_datagrams)
77 		ctx_signal_notify(context);
78 	spin_unlock(&context->lock);
79 }
80 
81 /*
82  * Allocates and initializes a VMCI context.
83  */
84 struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
85 				 uintptr_t event_hnd,
86 				 int user_version,
87 				 const struct cred *cred)
88 {
89 	struct vmci_ctx *context;
90 	int error;
91 
92 	if (cid == VMCI_INVALID_ID) {
93 		pr_devel("Invalid context ID for VMCI context\n");
94 		error = -EINVAL;
95 		goto err_out;
96 	}
97 
98 	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
99 		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
100 			 priv_flags);
101 		error = -EINVAL;
102 		goto err_out;
103 	}
104 
105 	if (user_version == 0) {
106 		pr_devel("Invalid suer_version %d\n", user_version);
107 		error = -EINVAL;
108 		goto err_out;
109 	}
110 
111 	context = kzalloc(sizeof(*context), GFP_KERNEL);
112 	if (!context) {
113 		pr_warn("Failed to allocate memory for VMCI context\n");
114 		error = -EINVAL;
115 		goto err_out;
116 	}
117 
118 	kref_init(&context->kref);
119 	spin_lock_init(&context->lock);
120 	INIT_LIST_HEAD(&context->list_item);
121 	INIT_LIST_HEAD(&context->datagram_queue);
122 	INIT_LIST_HEAD(&context->notifier_list);
123 
124 	/* Initialize host-specific VMCI context. */
125 	init_waitqueue_head(&context->host_context.wait_queue);
126 
127 	context->queue_pair_array = vmci_handle_arr_create(0);
128 	if (!context->queue_pair_array) {
129 		error = -ENOMEM;
130 		goto err_free_ctx;
131 	}
132 
133 	context->doorbell_array = vmci_handle_arr_create(0);
134 	if (!context->doorbell_array) {
135 		error = -ENOMEM;
136 		goto err_free_qp_array;
137 	}
138 
139 	context->pending_doorbell_array = vmci_handle_arr_create(0);
140 	if (!context->pending_doorbell_array) {
141 		error = -ENOMEM;
142 		goto err_free_db_array;
143 	}
144 
145 	context->user_version = user_version;
146 
147 	context->priv_flags = priv_flags;
148 
149 	if (cred)
150 		context->cred = get_cred(cred);
151 
152 	context->notify = &ctx_dummy_notify;
153 	context->notify_page = NULL;
154 
155 	/*
156 	 * If we collide with an existing context we generate a new
157 	 * and use it instead. The VMX will determine if regeneration
158 	 * is okay. Since there isn't 4B - 16 VMs running on a given
159 	 * host, the below loop will terminate.
160 	 */
161 	spin_lock(&ctx_list.lock);
162 
163 	while (vmci_ctx_exists(cid)) {
164 		/* We reserve the lowest 16 ids for fixed contexts. */
165 		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
166 		if (cid == VMCI_INVALID_ID)
167 			cid = VMCI_RESERVED_CID_LIMIT;
168 	}
169 	context->cid = cid;
170 
171 	list_add_tail_rcu(&context->list_item, &ctx_list.head);
172 	spin_unlock(&ctx_list.lock);
173 
174 	return context;
175 
176  err_free_db_array:
177 	vmci_handle_arr_destroy(context->doorbell_array);
178  err_free_qp_array:
179 	vmci_handle_arr_destroy(context->queue_pair_array);
180  err_free_ctx:
181 	kfree(context);
182  err_out:
183 	return ERR_PTR(error);
184 }
185 
186 /*
187  * Destroy VMCI context.
188  */
189 void vmci_ctx_destroy(struct vmci_ctx *context)
190 {
191 	spin_lock(&ctx_list.lock);
192 	list_del_rcu(&context->list_item);
193 	spin_unlock(&ctx_list.lock);
194 	synchronize_rcu();
195 
196 	vmci_ctx_put(context);
197 }
198 
199 /*
200  * Fire notification for all contexts interested in given cid.
201  */
202 static int ctx_fire_notification(u32 context_id, u32 priv_flags)
203 {
204 	u32 i, array_size;
205 	struct vmci_ctx *sub_ctx;
206 	struct vmci_handle_arr *subscriber_array;
207 	struct vmci_handle context_handle =
208 		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
209 
210 	/*
211 	 * We create an array to hold the subscribers we find when
212 	 * scanning through all contexts.
213 	 */
214 	subscriber_array = vmci_handle_arr_create(0);
215 	if (subscriber_array == NULL)
216 		return VMCI_ERROR_NO_MEM;
217 
218 	/*
219 	 * Scan all contexts to find who is interested in being
220 	 * notified about given contextID.
221 	 */
222 	rcu_read_lock();
223 	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
224 		struct vmci_handle_list *node;
225 
226 		/*
227 		 * We only deliver notifications of the removal of
228 		 * contexts, if the two contexts are allowed to
229 		 * interact.
230 		 */
231 		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
232 			continue;
233 
234 		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
235 			if (!vmci_handle_is_equal(node->handle, context_handle))
236 				continue;
237 
238 			vmci_handle_arr_append_entry(&subscriber_array,
239 					vmci_make_handle(sub_ctx->cid,
240 							 VMCI_EVENT_HANDLER));
241 		}
242 	}
243 	rcu_read_unlock();
244 
245 	/* Fire event to all subscribers. */
246 	array_size = vmci_handle_arr_get_size(subscriber_array);
247 	for (i = 0; i < array_size; i++) {
248 		int result;
249 		struct vmci_event_ctx ev;
250 
251 		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
252 		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
253 						  VMCI_CONTEXT_RESOURCE_ID);
254 		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
255 		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
256 		ev.payload.context_id = context_id;
257 
258 		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
259 						&ev.msg.hdr, false);
260 		if (result < VMCI_SUCCESS) {
261 			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
262 				 ev.msg.event_data.event,
263 				 ev.msg.hdr.dst.context);
264 			/* We continue to enqueue on next subscriber. */
265 		}
266 	}
267 	vmci_handle_arr_destroy(subscriber_array);
268 
269 	return VMCI_SUCCESS;
270 }
271 
272 /*
273  * Returns the current number of pending datagrams. The call may
274  * also serve as a synchronization point for the datagram queue,
275  * as no enqueue operations can occur concurrently.
276  */
277 int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
278 {
279 	struct vmci_ctx *context;
280 
281 	context = vmci_ctx_get(cid);
282 	if (context == NULL)
283 		return VMCI_ERROR_INVALID_ARGS;
284 
285 	spin_lock(&context->lock);
286 	if (pending)
287 		*pending = context->pending_datagrams;
288 	spin_unlock(&context->lock);
289 	vmci_ctx_put(context);
290 
291 	return VMCI_SUCCESS;
292 }
293 
294 /*
295  * Queues a VMCI datagram for the appropriate target VM context.
296  */
297 int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
298 {
299 	struct vmci_datagram_queue_entry *dq_entry;
300 	struct vmci_ctx *context;
301 	struct vmci_handle dg_src;
302 	size_t vmci_dg_size;
303 
304 	vmci_dg_size = VMCI_DG_SIZE(dg);
305 	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
306 		pr_devel("Datagram too large (bytes=%Zu)\n", vmci_dg_size);
307 		return VMCI_ERROR_INVALID_ARGS;
308 	}
309 
310 	/* Get the target VM's VMCI context. */
311 	context = vmci_ctx_get(cid);
312 	if (!context) {
313 		pr_devel("Invalid context (ID=0x%x)\n", cid);
314 		return VMCI_ERROR_INVALID_ARGS;
315 	}
316 
317 	/* Allocate guest call entry and add it to the target VM's queue. */
318 	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
319 	if (dq_entry == NULL) {
320 		pr_warn("Failed to allocate memory for datagram\n");
321 		vmci_ctx_put(context);
322 		return VMCI_ERROR_NO_MEM;
323 	}
324 	dq_entry->dg = dg;
325 	dq_entry->dg_size = vmci_dg_size;
326 	dg_src = dg->src;
327 	INIT_LIST_HEAD(&dq_entry->list_item);
328 
329 	spin_lock(&context->lock);
330 
331 	/*
332 	 * We put a higher limit on datagrams from the hypervisor.  If
333 	 * the pending datagram is not from hypervisor, then we check
334 	 * if enqueueing it would exceed the
335 	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
336 	 * the pending datagram is from hypervisor, we allow it to be
337 	 * queued at the destination side provided we don't reach the
338 	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
339 	 */
340 	if (context->datagram_queue_size + vmci_dg_size >=
341 	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
342 	    (!vmci_handle_is_equal(dg_src,
343 				vmci_make_handle
344 				(VMCI_HYPERVISOR_CONTEXT_ID,
345 				 VMCI_CONTEXT_RESOURCE_ID)) ||
346 	     context->datagram_queue_size + vmci_dg_size >=
347 	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
348 		spin_unlock(&context->lock);
349 		vmci_ctx_put(context);
350 		kfree(dq_entry);
351 		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
352 		return VMCI_ERROR_NO_RESOURCES;
353 	}
354 
355 	list_add(&dq_entry->list_item, &context->datagram_queue);
356 	context->pending_datagrams++;
357 	context->datagram_queue_size += vmci_dg_size;
358 	ctx_signal_notify(context);
359 	wake_up(&context->host_context.wait_queue);
360 	spin_unlock(&context->lock);
361 	vmci_ctx_put(context);
362 
363 	return vmci_dg_size;
364 }
365 
366 /*
367  * Verifies whether a context with the specified context ID exists.
368  * FIXME: utility is dubious as no decisions can be reliably made
369  * using this data as context can appear and disappear at any time.
370  */
371 bool vmci_ctx_exists(u32 cid)
372 {
373 	struct vmci_ctx *context;
374 	bool exists = false;
375 
376 	rcu_read_lock();
377 
378 	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
379 		if (context->cid == cid) {
380 			exists = true;
381 			break;
382 		}
383 	}
384 
385 	rcu_read_unlock();
386 	return exists;
387 }
388 
389 /*
390  * Retrieves VMCI context corresponding to the given cid.
391  */
392 struct vmci_ctx *vmci_ctx_get(u32 cid)
393 {
394 	struct vmci_ctx *c, *context = NULL;
395 
396 	if (cid == VMCI_INVALID_ID)
397 		return NULL;
398 
399 	rcu_read_lock();
400 	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
401 		if (c->cid == cid) {
402 			/*
403 			 * The context owner drops its own reference to the
404 			 * context only after removing it from the list and
405 			 * waiting for RCU grace period to expire. This
406 			 * means that we are not about to increase the
407 			 * reference count of something that is in the
408 			 * process of being destroyed.
409 			 */
410 			context = c;
411 			kref_get(&context->kref);
412 			break;
413 		}
414 	}
415 	rcu_read_unlock();
416 
417 	return context;
418 }
419 
420 /*
421  * Deallocates all parts of a context data structure. This
422  * function doesn't lock the context, because it assumes that
423  * the caller was holding the last reference to context.
424  */
425 static void ctx_free_ctx(struct kref *kref)
426 {
427 	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
428 	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
429 	struct vmci_handle temp_handle;
430 	struct vmci_handle_list *notifier, *tmp;
431 
432 	/*
433 	 * Fire event to all contexts interested in knowing this
434 	 * context is dying.
435 	 */
436 	ctx_fire_notification(context->cid, context->priv_flags);
437 
438 	/*
439 	 * Cleanup all queue pair resources attached to context.  If
440 	 * the VM dies without cleaning up, this code will make sure
441 	 * that no resources are leaked.
442 	 */
443 	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
444 	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
445 		if (vmci_qp_broker_detach(temp_handle,
446 					  context) < VMCI_SUCCESS) {
447 			/*
448 			 * When vmci_qp_broker_detach() succeeds it
449 			 * removes the handle from the array.  If
450 			 * detach fails, we must remove the handle
451 			 * ourselves.
452 			 */
453 			vmci_handle_arr_remove_entry(context->queue_pair_array,
454 						     temp_handle);
455 		}
456 		temp_handle =
457 		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
458 	}
459 
460 	/*
461 	 * It is fine to destroy this without locking the callQueue, as
462 	 * this is the only thread having a reference to the context.
463 	 */
464 	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
465 				 &context->datagram_queue, list_item) {
466 		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
467 		list_del(&dq_entry->list_item);
468 		kfree(dq_entry->dg);
469 		kfree(dq_entry);
470 	}
471 
472 	list_for_each_entry_safe(notifier, tmp,
473 				 &context->notifier_list, node) {
474 		list_del(&notifier->node);
475 		kfree(notifier);
476 	}
477 
478 	vmci_handle_arr_destroy(context->queue_pair_array);
479 	vmci_handle_arr_destroy(context->doorbell_array);
480 	vmci_handle_arr_destroy(context->pending_doorbell_array);
481 	vmci_ctx_unset_notify(context);
482 	if (context->cred)
483 		put_cred(context->cred);
484 	kfree(context);
485 }
486 
487 /*
488  * Drops reference to VMCI context. If this is the last reference to
489  * the context it will be deallocated. A context is created with
490  * a reference count of one, and on destroy, it is removed from
491  * the context list before its reference count is decremented. Thus,
492  * if we reach zero, we are sure that nobody else are about to increment
493  * it (they need the entry in the context list for that), and so there
494  * is no need for locking.
495  */
496 void vmci_ctx_put(struct vmci_ctx *context)
497 {
498 	kref_put(&context->kref, ctx_free_ctx);
499 }
500 
501 /*
502  * Dequeues the next datagram and returns it to caller.
503  * The caller passes in a pointer to the max size datagram
504  * it can handle and the datagram is only unqueued if the
505  * size is less than max_size. If larger max_size is set to
506  * the size of the datagram to give the caller a chance to
507  * set up a larger buffer for the guestcall.
508  */
509 int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
510 			      size_t *max_size,
511 			      struct vmci_datagram **dg)
512 {
513 	struct vmci_datagram_queue_entry *dq_entry;
514 	struct list_head *list_item;
515 	int rv;
516 
517 	/* Dequeue the next datagram entry. */
518 	spin_lock(&context->lock);
519 	if (context->pending_datagrams == 0) {
520 		ctx_clear_notify_call(context);
521 		spin_unlock(&context->lock);
522 		pr_devel("No datagrams pending\n");
523 		return VMCI_ERROR_NO_MORE_DATAGRAMS;
524 	}
525 
526 	list_item = context->datagram_queue.next;
527 
528 	dq_entry =
529 	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
530 
531 	/* Check size of caller's buffer. */
532 	if (*max_size < dq_entry->dg_size) {
533 		*max_size = dq_entry->dg_size;
534 		spin_unlock(&context->lock);
535 		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
536 			 (u32) *max_size);
537 		return VMCI_ERROR_NO_MEM;
538 	}
539 
540 	list_del(list_item);
541 	context->pending_datagrams--;
542 	context->datagram_queue_size -= dq_entry->dg_size;
543 	if (context->pending_datagrams == 0) {
544 		ctx_clear_notify_call(context);
545 		rv = VMCI_SUCCESS;
546 	} else {
547 		/*
548 		 * Return the size of the next datagram.
549 		 */
550 		struct vmci_datagram_queue_entry *next_entry;
551 
552 		list_item = context->datagram_queue.next;
553 		next_entry =
554 		    list_entry(list_item, struct vmci_datagram_queue_entry,
555 			       list_item);
556 
557 		/*
558 		 * The following size_t -> int truncation is fine as
559 		 * the maximum size of a (routable) datagram is 68KB.
560 		 */
561 		rv = (int)next_entry->dg_size;
562 	}
563 	spin_unlock(&context->lock);
564 
565 	/* Caller must free datagram. */
566 	*dg = dq_entry->dg;
567 	dq_entry->dg = NULL;
568 	kfree(dq_entry);
569 
570 	return rv;
571 }
572 
573 /*
574  * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
575  * page mapped/locked by vmci_setup_notify().
576  */
577 void vmci_ctx_unset_notify(struct vmci_ctx *context)
578 {
579 	struct page *notify_page;
580 
581 	spin_lock(&context->lock);
582 
583 	notify_page = context->notify_page;
584 	context->notify = &ctx_dummy_notify;
585 	context->notify_page = NULL;
586 
587 	spin_unlock(&context->lock);
588 
589 	if (notify_page) {
590 		kunmap(notify_page);
591 		put_page(notify_page);
592 	}
593 }
594 
595 /*
596  * Add remote_cid to list of contexts current contexts wants
597  * notifications from/about.
598  */
599 int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
600 {
601 	struct vmci_ctx *context;
602 	struct vmci_handle_list *notifier, *n;
603 	int result;
604 	bool exists = false;
605 
606 	context = vmci_ctx_get(context_id);
607 	if (!context)
608 		return VMCI_ERROR_NOT_FOUND;
609 
610 	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
611 		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
612 			 context_id, remote_cid);
613 		result = VMCI_ERROR_DST_UNREACHABLE;
614 		goto out;
615 	}
616 
617 	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
618 		result = VMCI_ERROR_NO_ACCESS;
619 		goto out;
620 	}
621 
622 	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
623 	if (!notifier) {
624 		result = VMCI_ERROR_NO_MEM;
625 		goto out;
626 	}
627 
628 	INIT_LIST_HEAD(&notifier->node);
629 	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
630 
631 	spin_lock(&context->lock);
632 
633 	list_for_each_entry(n, &context->notifier_list, node) {
634 		if (vmci_handle_is_equal(n->handle, notifier->handle)) {
635 			exists = true;
636 			break;
637 		}
638 	}
639 
640 	if (exists) {
641 		kfree(notifier);
642 		result = VMCI_ERROR_ALREADY_EXISTS;
643 	} else {
644 		list_add_tail_rcu(&notifier->node, &context->notifier_list);
645 		context->n_notifiers++;
646 		result = VMCI_SUCCESS;
647 	}
648 
649 	spin_unlock(&context->lock);
650 
651  out:
652 	vmci_ctx_put(context);
653 	return result;
654 }
655 
656 /*
657  * Remove remote_cid from current context's list of contexts it is
658  * interested in getting notifications from/about.
659  */
660 int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
661 {
662 	struct vmci_ctx *context;
663 	struct vmci_handle_list *notifier, *tmp;
664 	struct vmci_handle handle;
665 	bool found = false;
666 
667 	context = vmci_ctx_get(context_id);
668 	if (!context)
669 		return VMCI_ERROR_NOT_FOUND;
670 
671 	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
672 
673 	spin_lock(&context->lock);
674 	list_for_each_entry_safe(notifier, tmp,
675 				 &context->notifier_list, node) {
676 		if (vmci_handle_is_equal(notifier->handle, handle)) {
677 			list_del_rcu(&notifier->node);
678 			context->n_notifiers--;
679 			found = true;
680 			break;
681 		}
682 	}
683 	spin_unlock(&context->lock);
684 
685 	if (found) {
686 		synchronize_rcu();
687 		kfree(notifier);
688 	}
689 
690 	vmci_ctx_put(context);
691 
692 	return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
693 }
694 
695 static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
696 					u32 *buf_size, void **pbuf)
697 {
698 	u32 *notifiers;
699 	size_t data_size;
700 	struct vmci_handle_list *entry;
701 	int i = 0;
702 
703 	if (context->n_notifiers == 0) {
704 		*buf_size = 0;
705 		*pbuf = NULL;
706 		return VMCI_SUCCESS;
707 	}
708 
709 	data_size = context->n_notifiers * sizeof(*notifiers);
710 	if (*buf_size < data_size) {
711 		*buf_size = data_size;
712 		return VMCI_ERROR_MORE_DATA;
713 	}
714 
715 	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
716 	if (!notifiers)
717 		return VMCI_ERROR_NO_MEM;
718 
719 	list_for_each_entry(entry, &context->notifier_list, node)
720 		notifiers[i++] = entry->handle.context;
721 
722 	*buf_size = data_size;
723 	*pbuf = notifiers;
724 	return VMCI_SUCCESS;
725 }
726 
727 static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
728 					u32 *buf_size, void **pbuf)
729 {
730 	struct dbell_cpt_state *dbells;
731 	size_t n_doorbells;
732 	int i;
733 
734 	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
735 	if (n_doorbells > 0) {
736 		size_t data_size = n_doorbells * sizeof(*dbells);
737 		if (*buf_size < data_size) {
738 			*buf_size = data_size;
739 			return VMCI_ERROR_MORE_DATA;
740 		}
741 
742 		dbells = kmalloc(data_size, GFP_ATOMIC);
743 		if (!dbells)
744 			return VMCI_ERROR_NO_MEM;
745 
746 		for (i = 0; i < n_doorbells; i++)
747 			dbells[i].handle = vmci_handle_arr_get_entry(
748 						context->doorbell_array, i);
749 
750 		*buf_size = data_size;
751 		*pbuf = dbells;
752 	} else {
753 		*buf_size = 0;
754 		*pbuf = NULL;
755 	}
756 
757 	return VMCI_SUCCESS;
758 }
759 
760 /*
761  * Get current context's checkpoint state of given type.
762  */
763 int vmci_ctx_get_chkpt_state(u32 context_id,
764 			     u32 cpt_type,
765 			     u32 *buf_size,
766 			     void **pbuf)
767 {
768 	struct vmci_ctx *context;
769 	int result;
770 
771 	context = vmci_ctx_get(context_id);
772 	if (!context)
773 		return VMCI_ERROR_NOT_FOUND;
774 
775 	spin_lock(&context->lock);
776 
777 	switch (cpt_type) {
778 	case VMCI_NOTIFICATION_CPT_STATE:
779 		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
780 		break;
781 
782 	case VMCI_WELLKNOWN_CPT_STATE:
783 		/*
784 		 * For compatibility with VMX'en with VM to VM communication, we
785 		 * always return zero wellknown handles.
786 		 */
787 
788 		*buf_size = 0;
789 		*pbuf = NULL;
790 		result = VMCI_SUCCESS;
791 		break;
792 
793 	case VMCI_DOORBELL_CPT_STATE:
794 		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
795 		break;
796 
797 	default:
798 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
799 		result = VMCI_ERROR_INVALID_ARGS;
800 		break;
801 	}
802 
803 	spin_unlock(&context->lock);
804 	vmci_ctx_put(context);
805 
806 	return result;
807 }
808 
809 /*
810  * Set current context's checkpoint state of given type.
811  */
812 int vmci_ctx_set_chkpt_state(u32 context_id,
813 			     u32 cpt_type,
814 			     u32 buf_size,
815 			     void *cpt_buf)
816 {
817 	u32 i;
818 	u32 current_id;
819 	int result = VMCI_SUCCESS;
820 	u32 num_ids = buf_size / sizeof(u32);
821 
822 	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
823 		/*
824 		 * We would end up here if VMX with VM to VM communication
825 		 * attempts to restore a checkpoint with wellknown handles.
826 		 */
827 		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
828 		return VMCI_ERROR_OBSOLETE;
829 	}
830 
831 	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
832 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
833 		return VMCI_ERROR_INVALID_ARGS;
834 	}
835 
836 	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
837 		current_id = ((u32 *)cpt_buf)[i];
838 		result = vmci_ctx_add_notification(context_id, current_id);
839 		if (result != VMCI_SUCCESS)
840 			break;
841 	}
842 	if (result != VMCI_SUCCESS)
843 		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
844 			 cpt_type, result);
845 
846 	return result;
847 }
848 
849 /*
850  * Retrieves the specified context's pending notifications in the
851  * form of a handle array. The handle arrays returned are the
852  * actual data - not a copy and should not be modified by the
853  * caller. They must be released using
854  * vmci_ctx_rcv_notifications_release.
855  */
856 int vmci_ctx_rcv_notifications_get(u32 context_id,
857 				   struct vmci_handle_arr **db_handle_array,
858 				   struct vmci_handle_arr **qp_handle_array)
859 {
860 	struct vmci_ctx *context;
861 	int result = VMCI_SUCCESS;
862 
863 	context = vmci_ctx_get(context_id);
864 	if (context == NULL)
865 		return VMCI_ERROR_NOT_FOUND;
866 
867 	spin_lock(&context->lock);
868 
869 	*db_handle_array = context->pending_doorbell_array;
870 	context->pending_doorbell_array = vmci_handle_arr_create(0);
871 	if (!context->pending_doorbell_array) {
872 		context->pending_doorbell_array = *db_handle_array;
873 		*db_handle_array = NULL;
874 		result = VMCI_ERROR_NO_MEM;
875 	}
876 	*qp_handle_array = NULL;
877 
878 	spin_unlock(&context->lock);
879 	vmci_ctx_put(context);
880 
881 	return result;
882 }
883 
884 /*
885  * Releases handle arrays with pending notifications previously
886  * retrieved using vmci_ctx_rcv_notifications_get. If the
887  * notifications were not successfully handed over to the guest,
888  * success must be false.
889  */
890 void vmci_ctx_rcv_notifications_release(u32 context_id,
891 					struct vmci_handle_arr *db_handle_array,
892 					struct vmci_handle_arr *qp_handle_array,
893 					bool success)
894 {
895 	struct vmci_ctx *context = vmci_ctx_get(context_id);
896 
897 	spin_lock(&context->lock);
898 	if (!success) {
899 		struct vmci_handle handle;
900 
901 		/*
902 		 * New notifications may have been added while we were not
903 		 * holding the context lock, so we transfer any new pending
904 		 * doorbell notifications to the old array, and reinstate the
905 		 * old array.
906 		 */
907 
908 		handle = vmci_handle_arr_remove_tail(
909 					context->pending_doorbell_array);
910 		while (!vmci_handle_is_invalid(handle)) {
911 			if (!vmci_handle_arr_has_entry(db_handle_array,
912 						       handle)) {
913 				vmci_handle_arr_append_entry(
914 						&db_handle_array, handle);
915 			}
916 			handle = vmci_handle_arr_remove_tail(
917 					context->pending_doorbell_array);
918 		}
919 		vmci_handle_arr_destroy(context->pending_doorbell_array);
920 		context->pending_doorbell_array = db_handle_array;
921 		db_handle_array = NULL;
922 	} else {
923 		ctx_clear_notify_call(context);
924 	}
925 	spin_unlock(&context->lock);
926 	vmci_ctx_put(context);
927 
928 	if (db_handle_array)
929 		vmci_handle_arr_destroy(db_handle_array);
930 
931 	if (qp_handle_array)
932 		vmci_handle_arr_destroy(qp_handle_array);
933 }
934 
935 /*
936  * Registers that a new doorbell handle has been allocated by the
937  * context. Only doorbell handles registered can be notified.
938  */
939 int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
940 {
941 	struct vmci_ctx *context;
942 	int result;
943 
944 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
945 		return VMCI_ERROR_INVALID_ARGS;
946 
947 	context = vmci_ctx_get(context_id);
948 	if (context == NULL)
949 		return VMCI_ERROR_NOT_FOUND;
950 
951 	spin_lock(&context->lock);
952 	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
953 		vmci_handle_arr_append_entry(&context->doorbell_array, handle);
954 		result = VMCI_SUCCESS;
955 	} else {
956 		result = VMCI_ERROR_DUPLICATE_ENTRY;
957 	}
958 
959 	spin_unlock(&context->lock);
960 	vmci_ctx_put(context);
961 
962 	return result;
963 }
964 
965 /*
966  * Unregisters a doorbell handle that was previously registered
967  * with vmci_ctx_dbell_create.
968  */
969 int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
970 {
971 	struct vmci_ctx *context;
972 	struct vmci_handle removed_handle;
973 
974 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
975 		return VMCI_ERROR_INVALID_ARGS;
976 
977 	context = vmci_ctx_get(context_id);
978 	if (context == NULL)
979 		return VMCI_ERROR_NOT_FOUND;
980 
981 	spin_lock(&context->lock);
982 	removed_handle =
983 	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
984 	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
985 	spin_unlock(&context->lock);
986 
987 	vmci_ctx_put(context);
988 
989 	return vmci_handle_is_invalid(removed_handle) ?
990 	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
991 }
992 
993 /*
994  * Unregisters all doorbell handles that were previously
995  * registered with vmci_ctx_dbell_create.
996  */
997 int vmci_ctx_dbell_destroy_all(u32 context_id)
998 {
999 	struct vmci_ctx *context;
1000 	struct vmci_handle handle;
1001 
1002 	if (context_id == VMCI_INVALID_ID)
1003 		return VMCI_ERROR_INVALID_ARGS;
1004 
1005 	context = vmci_ctx_get(context_id);
1006 	if (context == NULL)
1007 		return VMCI_ERROR_NOT_FOUND;
1008 
1009 	spin_lock(&context->lock);
1010 	do {
1011 		struct vmci_handle_arr *arr = context->doorbell_array;
1012 		handle = vmci_handle_arr_remove_tail(arr);
1013 	} while (!vmci_handle_is_invalid(handle));
1014 	do {
1015 		struct vmci_handle_arr *arr = context->pending_doorbell_array;
1016 		handle = vmci_handle_arr_remove_tail(arr);
1017 	} while (!vmci_handle_is_invalid(handle));
1018 	spin_unlock(&context->lock);
1019 
1020 	vmci_ctx_put(context);
1021 
1022 	return VMCI_SUCCESS;
1023 }
1024 
1025 /*
1026  * Registers a notification of a doorbell handle initiated by the
1027  * specified source context. The notification of doorbells are
1028  * subject to the same isolation rules as datagram delivery. To
1029  * allow host side senders of notifications a finer granularity
1030  * of sender rights than those assigned to the sending context
1031  * itself, the host context is required to specify a different
1032  * set of privilege flags that will override the privileges of
1033  * the source context.
1034  */
1035 int vmci_ctx_notify_dbell(u32 src_cid,
1036 			  struct vmci_handle handle,
1037 			  u32 src_priv_flags)
1038 {
1039 	struct vmci_ctx *dst_context;
1040 	int result;
1041 
1042 	if (vmci_handle_is_invalid(handle))
1043 		return VMCI_ERROR_INVALID_ARGS;
1044 
1045 	/* Get the target VM's VMCI context. */
1046 	dst_context = vmci_ctx_get(handle.context);
1047 	if (!dst_context) {
1048 		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1049 		return VMCI_ERROR_NOT_FOUND;
1050 	}
1051 
1052 	if (src_cid != handle.context) {
1053 		u32 dst_priv_flags;
1054 
1055 		if (VMCI_CONTEXT_IS_VM(src_cid) &&
1056 		    VMCI_CONTEXT_IS_VM(handle.context)) {
1057 			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1058 				 src_cid, handle.context);
1059 			result = VMCI_ERROR_DST_UNREACHABLE;
1060 			goto out;
1061 		}
1062 
1063 		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1064 		if (result < VMCI_SUCCESS) {
1065 			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1066 				handle.context, handle.resource);
1067 			goto out;
1068 		}
1069 
1070 		if (src_cid != VMCI_HOST_CONTEXT_ID ||
1071 		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1072 			src_priv_flags = vmci_context_get_priv_flags(src_cid);
1073 		}
1074 
1075 		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1076 			result = VMCI_ERROR_NO_ACCESS;
1077 			goto out;
1078 		}
1079 	}
1080 
1081 	if (handle.context == VMCI_HOST_CONTEXT_ID) {
1082 		result = vmci_dbell_host_context_notify(src_cid, handle);
1083 	} else {
1084 		spin_lock(&dst_context->lock);
1085 
1086 		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1087 					       handle)) {
1088 			result = VMCI_ERROR_NOT_FOUND;
1089 		} else {
1090 			if (!vmci_handle_arr_has_entry(
1091 					dst_context->pending_doorbell_array,
1092 					handle)) {
1093 				vmci_handle_arr_append_entry(
1094 					&dst_context->pending_doorbell_array,
1095 					handle);
1096 
1097 				ctx_signal_notify(dst_context);
1098 				wake_up(&dst_context->host_context.wait_queue);
1099 
1100 			}
1101 			result = VMCI_SUCCESS;
1102 		}
1103 		spin_unlock(&dst_context->lock);
1104 	}
1105 
1106  out:
1107 	vmci_ctx_put(dst_context);
1108 
1109 	return result;
1110 }
1111 
1112 bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1113 {
1114 	return context && context->user_version >= VMCI_VERSION_HOSTQP;
1115 }
1116 
1117 /*
1118  * Registers that a new queue pair handle has been allocated by
1119  * the context.
1120  */
1121 int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1122 {
1123 	int result;
1124 
1125 	if (context == NULL || vmci_handle_is_invalid(handle))
1126 		return VMCI_ERROR_INVALID_ARGS;
1127 
1128 	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
1129 		vmci_handle_arr_append_entry(&context->queue_pair_array,
1130 					     handle);
1131 		result = VMCI_SUCCESS;
1132 	} else {
1133 		result = VMCI_ERROR_DUPLICATE_ENTRY;
1134 	}
1135 
1136 	return result;
1137 }
1138 
1139 /*
1140  * Unregisters a queue pair handle that was previously registered
1141  * with vmci_ctx_qp_create.
1142  */
1143 int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1144 {
1145 	struct vmci_handle hndl;
1146 
1147 	if (context == NULL || vmci_handle_is_invalid(handle))
1148 		return VMCI_ERROR_INVALID_ARGS;
1149 
1150 	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1151 
1152 	return vmci_handle_is_invalid(hndl) ?
1153 		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1154 }
1155 
1156 /*
1157  * Determines whether a given queue pair handle is registered
1158  * with the given context.
1159  */
1160 bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1161 {
1162 	if (context == NULL || vmci_handle_is_invalid(handle))
1163 		return false;
1164 
1165 	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1166 }
1167 
1168 /*
1169  * vmci_context_get_priv_flags() - Retrieve privilege flags.
1170  * @context_id: The context ID of the VMCI context.
1171  *
1172  * Retrieves privilege flags of the given VMCI context ID.
1173  */
1174 u32 vmci_context_get_priv_flags(u32 context_id)
1175 {
1176 	if (vmci_host_code_active()) {
1177 		u32 flags;
1178 		struct vmci_ctx *context;
1179 
1180 		context = vmci_ctx_get(context_id);
1181 		if (!context)
1182 			return VMCI_LEAST_PRIVILEGE_FLAGS;
1183 
1184 		flags = context->priv_flags;
1185 		vmci_ctx_put(context);
1186 		return flags;
1187 	}
1188 	return VMCI_NO_PRIVILEGE_FLAGS;
1189 }
1190 EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1191 
1192 /*
1193  * vmci_is_context_owner() - Determimnes if user is the context owner
1194  * @context_id: The context ID of the VMCI context.
1195  * @uid:        The host user id (real kernel value).
1196  *
1197  * Determines whether a given UID is the owner of given VMCI context.
1198  */
1199 bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1200 {
1201 	bool is_owner = false;
1202 
1203 	if (vmci_host_code_active()) {
1204 		struct vmci_ctx *context = vmci_ctx_get(context_id);
1205 		if (context) {
1206 			if (context->cred)
1207 				is_owner = uid_eq(context->cred->uid, uid);
1208 			vmci_ctx_put(context);
1209 		}
1210 	}
1211 
1212 	return is_owner;
1213 }
1214 EXPORT_SYMBOL_GPL(vmci_is_context_owner);
1215