xref: /linux/drivers/hv/channel_mgmt.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  */
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <linux/mm.h>
27 #include <linux/slab.h>
28 #include <linux/list.h>
29 #include <linux/module.h>
30 #include <linux/completion.h>
31 #include <linux/delay.h>
32 #include <linux/hyperv.h>
33 
34 #include "hyperv_vmbus.h"
35 
36 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
37 
38 static const struct vmbus_device vmbus_devs[] = {
39 	/* IDE */
40 	{ .dev_type = HV_IDE,
41 	  HV_IDE_GUID,
42 	  .perf_device = true,
43 	},
44 
45 	/* SCSI */
46 	{ .dev_type = HV_SCSI,
47 	  HV_SCSI_GUID,
48 	  .perf_device = true,
49 	},
50 
51 	/* Fibre Channel */
52 	{ .dev_type = HV_FC,
53 	  HV_SYNTHFC_GUID,
54 	  .perf_device = true,
55 	},
56 
57 	/* Synthetic NIC */
58 	{ .dev_type = HV_NIC,
59 	  HV_NIC_GUID,
60 	  .perf_device = true,
61 	},
62 
63 	/* Network Direct */
64 	{ .dev_type = HV_ND,
65 	  HV_ND_GUID,
66 	  .perf_device = true,
67 	},
68 
69 	/* PCIE */
70 	{ .dev_type = HV_PCIE,
71 	  HV_PCIE_GUID,
72 	  .perf_device = true,
73 	},
74 
75 	/* Synthetic Frame Buffer */
76 	{ .dev_type = HV_FB,
77 	  HV_SYNTHVID_GUID,
78 	  .perf_device = false,
79 	},
80 
81 	/* Synthetic Keyboard */
82 	{ .dev_type = HV_KBD,
83 	  HV_KBD_GUID,
84 	  .perf_device = false,
85 	},
86 
87 	/* Synthetic MOUSE */
88 	{ .dev_type = HV_MOUSE,
89 	  HV_MOUSE_GUID,
90 	  .perf_device = false,
91 	},
92 
93 	/* KVP */
94 	{ .dev_type = HV_KVP,
95 	  HV_KVP_GUID,
96 	  .perf_device = false,
97 	},
98 
99 	/* Time Synch */
100 	{ .dev_type = HV_TS,
101 	  HV_TS_GUID,
102 	  .perf_device = false,
103 	},
104 
105 	/* Heartbeat */
106 	{ .dev_type = HV_HB,
107 	  HV_HEART_BEAT_GUID,
108 	  .perf_device = false,
109 	},
110 
111 	/* Shutdown */
112 	{ .dev_type = HV_SHUTDOWN,
113 	  HV_SHUTDOWN_GUID,
114 	  .perf_device = false,
115 	},
116 
117 	/* File copy */
118 	{ .dev_type = HV_FCOPY,
119 	  HV_FCOPY_GUID,
120 	  .perf_device = false,
121 	},
122 
123 	/* Backup */
124 	{ .dev_type = HV_BACKUP,
125 	  HV_VSS_GUID,
126 	  .perf_device = false,
127 	},
128 
129 	/* Dynamic Memory */
130 	{ .dev_type = HV_DM,
131 	  HV_DM_GUID,
132 	  .perf_device = false,
133 	},
134 
135 	/* Unknown GUID */
136 	{ .dev_type = HV_UNKOWN,
137 	  .perf_device = false,
138 	},
139 };
140 
141 static u16 hv_get_dev_type(const uuid_le *guid)
142 {
143 	u16 i;
144 
145 	for (i = HV_IDE; i < HV_UNKOWN; i++) {
146 		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
147 			return i;
148 	}
149 	pr_info("Unknown GUID: %pUl\n", guid);
150 	return i;
151 }
152 
153 /**
154  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
155  * @icmsghdrp: Pointer to msg header structure
156  * @icmsg_negotiate: Pointer to negotiate message structure
157  * @buf: Raw buffer channel data
158  *
159  * @icmsghdrp is of type &struct icmsg_hdr.
160  * @negop is of type &struct icmsg_negotiate.
161  * Set up and fill in default negotiate response message.
162  *
163  * The fw_version specifies the  framework version that
164  * we can support and srv_version specifies the service
165  * version we can support.
166  *
167  * Mainly used by Hyper-V drivers.
168  */
169 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
170 				struct icmsg_negotiate *negop, u8 *buf,
171 				int fw_version, int srv_version)
172 {
173 	int icframe_major, icframe_minor;
174 	int icmsg_major, icmsg_minor;
175 	int fw_major, fw_minor;
176 	int srv_major, srv_minor;
177 	int i;
178 	bool found_match = false;
179 
180 	icmsghdrp->icmsgsize = 0x10;
181 	fw_major = (fw_version >> 16);
182 	fw_minor = (fw_version & 0xFFFF);
183 
184 	srv_major = (srv_version >> 16);
185 	srv_minor = (srv_version & 0xFFFF);
186 
187 	negop = (struct icmsg_negotiate *)&buf[
188 		sizeof(struct vmbuspipe_hdr) +
189 		sizeof(struct icmsg_hdr)];
190 
191 	icframe_major = negop->icframe_vercnt;
192 	icframe_minor = 0;
193 
194 	icmsg_major = negop->icmsg_vercnt;
195 	icmsg_minor = 0;
196 
197 	/*
198 	 * Select the framework version number we will
199 	 * support.
200 	 */
201 
202 	for (i = 0; i < negop->icframe_vercnt; i++) {
203 		if ((negop->icversion_data[i].major == fw_major) &&
204 		   (negop->icversion_data[i].minor == fw_minor)) {
205 			icframe_major = negop->icversion_data[i].major;
206 			icframe_minor = negop->icversion_data[i].minor;
207 			found_match = true;
208 		}
209 	}
210 
211 	if (!found_match)
212 		goto fw_error;
213 
214 	found_match = false;
215 
216 	for (i = negop->icframe_vercnt;
217 		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
218 		if ((negop->icversion_data[i].major == srv_major) &&
219 		   (negop->icversion_data[i].minor == srv_minor)) {
220 			icmsg_major = negop->icversion_data[i].major;
221 			icmsg_minor = negop->icversion_data[i].minor;
222 			found_match = true;
223 		}
224 	}
225 
226 	/*
227 	 * Respond with the framework and service
228 	 * version numbers we can support.
229 	 */
230 
231 fw_error:
232 	if (!found_match) {
233 		negop->icframe_vercnt = 0;
234 		negop->icmsg_vercnt = 0;
235 	} else {
236 		negop->icframe_vercnt = 1;
237 		negop->icmsg_vercnt = 1;
238 	}
239 
240 	negop->icversion_data[0].major = icframe_major;
241 	negop->icversion_data[0].minor = icframe_minor;
242 	negop->icversion_data[1].major = icmsg_major;
243 	negop->icversion_data[1].minor = icmsg_minor;
244 	return found_match;
245 }
246 
247 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
248 
249 /*
250  * alloc_channel - Allocate and initialize a vmbus channel object
251  */
252 static struct vmbus_channel *alloc_channel(void)
253 {
254 	static atomic_t chan_num = ATOMIC_INIT(0);
255 	struct vmbus_channel *channel;
256 
257 	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
258 	if (!channel)
259 		return NULL;
260 
261 	channel->id = atomic_inc_return(&chan_num);
262 	channel->acquire_ring_lock = true;
263 	spin_lock_init(&channel->inbound_lock);
264 	spin_lock_init(&channel->lock);
265 
266 	INIT_LIST_HEAD(&channel->sc_list);
267 	INIT_LIST_HEAD(&channel->percpu_list);
268 
269 	return channel;
270 }
271 
272 /*
273  * free_channel - Release the resources used by the vmbus channel object
274  */
275 static void free_channel(struct vmbus_channel *channel)
276 {
277 	kfree(channel);
278 }
279 
280 static void percpu_channel_enq(void *arg)
281 {
282 	struct vmbus_channel *channel = arg;
283 	int cpu = smp_processor_id();
284 
285 	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
286 }
287 
288 static void percpu_channel_deq(void *arg)
289 {
290 	struct vmbus_channel *channel = arg;
291 
292 	list_del(&channel->percpu_list);
293 }
294 
295 
296 static void vmbus_release_relid(u32 relid)
297 {
298 	struct vmbus_channel_relid_released msg;
299 
300 	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
301 	msg.child_relid = relid;
302 	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
303 	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
304 }
305 
306 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
307 {
308 	unsigned long flags;
309 	struct vmbus_channel *primary_channel;
310 
311 	vmbus_release_relid(relid);
312 
313 	BUG_ON(!channel->rescind);
314 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
315 
316 	if (channel->target_cpu != get_cpu()) {
317 		put_cpu();
318 		smp_call_function_single(channel->target_cpu,
319 					 percpu_channel_deq, channel, true);
320 	} else {
321 		percpu_channel_deq(channel);
322 		put_cpu();
323 	}
324 
325 	if (channel->primary_channel == NULL) {
326 		list_del(&channel->listentry);
327 
328 		primary_channel = channel;
329 	} else {
330 		primary_channel = channel->primary_channel;
331 		spin_lock_irqsave(&primary_channel->lock, flags);
332 		list_del(&channel->sc_list);
333 		primary_channel->num_sc--;
334 		spin_unlock_irqrestore(&primary_channel->lock, flags);
335 	}
336 
337 	/*
338 	 * We need to free the bit for init_vp_index() to work in the case
339 	 * of sub-channel, when we reload drivers like hv_netvsc.
340 	 */
341 	cpumask_clear_cpu(channel->target_cpu,
342 			  &primary_channel->alloced_cpus_in_node);
343 
344 	free_channel(channel);
345 }
346 
347 void vmbus_free_channels(void)
348 {
349 	struct vmbus_channel *channel, *tmp;
350 
351 	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
352 		listentry) {
353 		/* hv_process_channel_removal() needs this */
354 		channel->rescind = true;
355 
356 		vmbus_device_unregister(channel->device_obj);
357 	}
358 }
359 
360 /*
361  * vmbus_process_offer - Process the offer by creating a channel/device
362  * associated with this offer
363  */
364 static void vmbus_process_offer(struct vmbus_channel *newchannel)
365 {
366 	struct vmbus_channel *channel;
367 	bool fnew = true;
368 	unsigned long flags;
369 	u16 dev_type;
370 	int ret;
371 
372 	/* Make sure this is a new offer */
373 	mutex_lock(&vmbus_connection.channel_mutex);
374 
375 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
376 		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
377 			newchannel->offermsg.offer.if_type) &&
378 			!uuid_le_cmp(channel->offermsg.offer.if_instance,
379 				newchannel->offermsg.offer.if_instance)) {
380 			fnew = false;
381 			break;
382 		}
383 	}
384 
385 	if (fnew)
386 		list_add_tail(&newchannel->listentry,
387 			      &vmbus_connection.chn_list);
388 
389 	mutex_unlock(&vmbus_connection.channel_mutex);
390 
391 	if (!fnew) {
392 		/*
393 		 * Check to see if this is a sub-channel.
394 		 */
395 		if (newchannel->offermsg.offer.sub_channel_index != 0) {
396 			/*
397 			 * Process the sub-channel.
398 			 */
399 			newchannel->primary_channel = channel;
400 			spin_lock_irqsave(&channel->lock, flags);
401 			list_add_tail(&newchannel->sc_list, &channel->sc_list);
402 			channel->num_sc++;
403 			spin_unlock_irqrestore(&channel->lock, flags);
404 		} else
405 			goto err_free_chan;
406 	}
407 
408 	dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
409 
410 	init_vp_index(newchannel, dev_type);
411 
412 	if (newchannel->target_cpu != get_cpu()) {
413 		put_cpu();
414 		smp_call_function_single(newchannel->target_cpu,
415 					 percpu_channel_enq,
416 					 newchannel, true);
417 	} else {
418 		percpu_channel_enq(newchannel);
419 		put_cpu();
420 	}
421 
422 	/*
423 	 * This state is used to indicate a successful open
424 	 * so that when we do close the channel normally, we
425 	 * can cleanup properly
426 	 */
427 	newchannel->state = CHANNEL_OPEN_STATE;
428 
429 	if (!fnew) {
430 		if (channel->sc_creation_callback != NULL)
431 			channel->sc_creation_callback(newchannel);
432 		return;
433 	}
434 
435 	/*
436 	 * Start the process of binding this offer to the driver
437 	 * We need to set the DeviceObject field before calling
438 	 * vmbus_child_dev_add()
439 	 */
440 	newchannel->device_obj = vmbus_device_create(
441 		&newchannel->offermsg.offer.if_type,
442 		&newchannel->offermsg.offer.if_instance,
443 		newchannel);
444 	if (!newchannel->device_obj)
445 		goto err_deq_chan;
446 
447 	newchannel->device_obj->device_id = dev_type;
448 	/*
449 	 * Add the new device to the bus. This will kick off device-driver
450 	 * binding which eventually invokes the device driver's AddDevice()
451 	 * method.
452 	 */
453 	mutex_lock(&vmbus_connection.channel_mutex);
454 	ret = vmbus_device_register(newchannel->device_obj);
455 	mutex_unlock(&vmbus_connection.channel_mutex);
456 
457 	if (ret != 0) {
458 		pr_err("unable to add child device object (relid %d)\n",
459 			newchannel->offermsg.child_relid);
460 		kfree(newchannel->device_obj);
461 		goto err_deq_chan;
462 	}
463 	return;
464 
465 err_deq_chan:
466 	vmbus_release_relid(newchannel->offermsg.child_relid);
467 
468 	mutex_lock(&vmbus_connection.channel_mutex);
469 	list_del(&newchannel->listentry);
470 	mutex_unlock(&vmbus_connection.channel_mutex);
471 
472 	if (newchannel->target_cpu != get_cpu()) {
473 		put_cpu();
474 		smp_call_function_single(newchannel->target_cpu,
475 					 percpu_channel_deq, newchannel, true);
476 	} else {
477 		percpu_channel_deq(newchannel);
478 		put_cpu();
479 	}
480 
481 err_free_chan:
482 	free_channel(newchannel);
483 }
484 
485 /*
486  * We use this state to statically distribute the channel interrupt load.
487  */
488 static int next_numa_node_id;
489 
490 /*
491  * Starting with Win8, we can statically distribute the incoming
492  * channel interrupt load by binding a channel to VCPU.
493  * We do this in a hierarchical fashion:
494  * First distribute the primary channels across available NUMA nodes
495  * and then distribute the subchannels amongst the CPUs in the NUMA
496  * node assigned to the primary channel.
497  *
498  * For pre-win8 hosts or non-performance critical channels we assign the
499  * first CPU in the first NUMA node.
500  */
501 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
502 {
503 	u32 cur_cpu;
504 	bool perf_chn = vmbus_devs[dev_type].perf_device;
505 	struct vmbus_channel *primary = channel->primary_channel;
506 	int next_node;
507 	struct cpumask available_mask;
508 	struct cpumask *alloced_mask;
509 
510 	if ((vmbus_proto_version == VERSION_WS2008) ||
511 	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
512 		/*
513 		 * Prior to win8, all channel interrupts are
514 		 * delivered on cpu 0.
515 		 * Also if the channel is not a performance critical
516 		 * channel, bind it to cpu 0.
517 		 */
518 		channel->numa_node = 0;
519 		channel->target_cpu = 0;
520 		channel->target_vp = hv_context.vp_index[0];
521 		return;
522 	}
523 
524 	/*
525 	 * We distribute primary channels evenly across all the available
526 	 * NUMA nodes and within the assigned NUMA node we will assign the
527 	 * first available CPU to the primary channel.
528 	 * The sub-channels will be assigned to the CPUs available in the
529 	 * NUMA node evenly.
530 	 */
531 	if (!primary) {
532 		while (true) {
533 			next_node = next_numa_node_id++;
534 			if (next_node == nr_node_ids)
535 				next_node = next_numa_node_id = 0;
536 			if (cpumask_empty(cpumask_of_node(next_node)))
537 				continue;
538 			break;
539 		}
540 		channel->numa_node = next_node;
541 		primary = channel;
542 	}
543 	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
544 
545 	if (cpumask_weight(alloced_mask) ==
546 	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
547 		/*
548 		 * We have cycled through all the CPUs in the node;
549 		 * reset the alloced map.
550 		 */
551 		cpumask_clear(alloced_mask);
552 	}
553 
554 	cpumask_xor(&available_mask, alloced_mask,
555 		    cpumask_of_node(primary->numa_node));
556 
557 	cur_cpu = -1;
558 
559 	/*
560 	 * Normally Hyper-V host doesn't create more subchannels than there
561 	 * are VCPUs on the node but it is possible when not all present VCPUs
562 	 * on the node are initialized by guest. Clear the alloced_cpus_in_node
563 	 * to start over.
564 	 */
565 	if (cpumask_equal(&primary->alloced_cpus_in_node,
566 			  cpumask_of_node(primary->numa_node)))
567 		cpumask_clear(&primary->alloced_cpus_in_node);
568 
569 	while (true) {
570 		cur_cpu = cpumask_next(cur_cpu, &available_mask);
571 		if (cur_cpu >= nr_cpu_ids) {
572 			cur_cpu = -1;
573 			cpumask_copy(&available_mask,
574 				     cpumask_of_node(primary->numa_node));
575 			continue;
576 		}
577 
578 		/*
579 		 * NOTE: in the case of sub-channel, we clear the sub-channel
580 		 * related bit(s) in primary->alloced_cpus_in_node in
581 		 * hv_process_channel_removal(), so when we reload drivers
582 		 * like hv_netvsc in SMP guest, here we're able to re-allocate
583 		 * bit from primary->alloced_cpus_in_node.
584 		 */
585 		if (!cpumask_test_cpu(cur_cpu,
586 				&primary->alloced_cpus_in_node)) {
587 			cpumask_set_cpu(cur_cpu,
588 					&primary->alloced_cpus_in_node);
589 			cpumask_set_cpu(cur_cpu, alloced_mask);
590 			break;
591 		}
592 	}
593 
594 	channel->target_cpu = cur_cpu;
595 	channel->target_vp = hv_context.vp_index[cur_cpu];
596 }
597 
598 static void vmbus_wait_for_unload(void)
599 {
600 	int cpu;
601 	void *page_addr;
602 	struct hv_message *msg;
603 	struct vmbus_channel_message_header *hdr;
604 	u32 message_type;
605 
606 	/*
607 	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
608 	 * used for initial contact or to CPU0 depending on host version. When
609 	 * we're crashing on a different CPU let's hope that IRQ handler on
610 	 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
611 	 * functional and vmbus_unload_response() will complete
612 	 * vmbus_connection.unload_event. If not, the last thing we can do is
613 	 * read message pages for all CPUs directly.
614 	 */
615 	while (1) {
616 		if (completion_done(&vmbus_connection.unload_event))
617 			break;
618 
619 		for_each_online_cpu(cpu) {
620 			page_addr = hv_context.synic_message_page[cpu];
621 			msg = (struct hv_message *)page_addr +
622 				VMBUS_MESSAGE_SINT;
623 
624 			message_type = READ_ONCE(msg->header.message_type);
625 			if (message_type == HVMSG_NONE)
626 				continue;
627 
628 			hdr = (struct vmbus_channel_message_header *)
629 				msg->u.payload;
630 
631 			if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
632 				complete(&vmbus_connection.unload_event);
633 
634 			vmbus_signal_eom(msg, message_type);
635 		}
636 
637 		mdelay(10);
638 	}
639 
640 	/*
641 	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
642 	 * maybe-pending messages on all CPUs to be able to receive new
643 	 * messages after we reconnect.
644 	 */
645 	for_each_online_cpu(cpu) {
646 		page_addr = hv_context.synic_message_page[cpu];
647 		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
648 		msg->header.message_type = HVMSG_NONE;
649 	}
650 }
651 
652 /*
653  * vmbus_unload_response - Handler for the unload response.
654  */
655 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
656 {
657 	/*
658 	 * This is a global event; just wakeup the waiting thread.
659 	 * Once we successfully unload, we can cleanup the monitor state.
660 	 */
661 	complete(&vmbus_connection.unload_event);
662 }
663 
664 void vmbus_initiate_unload(bool crash)
665 {
666 	struct vmbus_channel_message_header hdr;
667 
668 	/* Pre-Win2012R2 hosts don't support reconnect */
669 	if (vmbus_proto_version < VERSION_WIN8_1)
670 		return;
671 
672 	init_completion(&vmbus_connection.unload_event);
673 	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
674 	hdr.msgtype = CHANNELMSG_UNLOAD;
675 	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
676 
677 	/*
678 	 * vmbus_initiate_unload() is also called on crash and the crash can be
679 	 * happening in an interrupt context, where scheduling is impossible.
680 	 */
681 	if (!crash)
682 		wait_for_completion(&vmbus_connection.unload_event);
683 	else
684 		vmbus_wait_for_unload();
685 }
686 
687 /*
688  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
689  *
690  */
691 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
692 {
693 	struct vmbus_channel_offer_channel *offer;
694 	struct vmbus_channel *newchannel;
695 
696 	offer = (struct vmbus_channel_offer_channel *)hdr;
697 
698 	/* Allocate the channel object and save this offer. */
699 	newchannel = alloc_channel();
700 	if (!newchannel) {
701 		pr_err("Unable to allocate channel object\n");
702 		return;
703 	}
704 
705 	/*
706 	 * By default we setup state to enable batched
707 	 * reading. A specific service can choose to
708 	 * disable this prior to opening the channel.
709 	 */
710 	newchannel->batched_reading = true;
711 
712 	/*
713 	 * Setup state for signalling the host.
714 	 */
715 	newchannel->sig_event = (struct hv_input_signal_event *)
716 				(ALIGN((unsigned long)
717 				&newchannel->sig_buf,
718 				HV_HYPERCALL_PARAM_ALIGN));
719 
720 	newchannel->sig_event->connectionid.asu32 = 0;
721 	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
722 	newchannel->sig_event->flag_number = 0;
723 	newchannel->sig_event->rsvdz = 0;
724 
725 	if (vmbus_proto_version != VERSION_WS2008) {
726 		newchannel->is_dedicated_interrupt =
727 				(offer->is_dedicated_interrupt != 0);
728 		newchannel->sig_event->connectionid.u.id =
729 				offer->connection_id;
730 	}
731 
732 	memcpy(&newchannel->offermsg, offer,
733 	       sizeof(struct vmbus_channel_offer_channel));
734 	newchannel->monitor_grp = (u8)offer->monitorid / 32;
735 	newchannel->monitor_bit = (u8)offer->monitorid % 32;
736 
737 	vmbus_process_offer(newchannel);
738 }
739 
740 /*
741  * vmbus_onoffer_rescind - Rescind offer handler.
742  *
743  * We queue a work item to process this offer synchronously
744  */
745 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
746 {
747 	struct vmbus_channel_rescind_offer *rescind;
748 	struct vmbus_channel *channel;
749 	unsigned long flags;
750 	struct device *dev;
751 
752 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
753 
754 	mutex_lock(&vmbus_connection.channel_mutex);
755 	channel = relid2channel(rescind->child_relid);
756 
757 	if (channel == NULL) {
758 		/*
759 		 * This is very impossible, because in
760 		 * vmbus_process_offer(), we have already invoked
761 		 * vmbus_release_relid() on error.
762 		 */
763 		goto out;
764 	}
765 
766 	spin_lock_irqsave(&channel->lock, flags);
767 	channel->rescind = true;
768 	spin_unlock_irqrestore(&channel->lock, flags);
769 
770 	if (channel->device_obj) {
771 		if (channel->chn_rescind_callback) {
772 			channel->chn_rescind_callback(channel);
773 			goto out;
774 		}
775 		/*
776 		 * We will have to unregister this device from the
777 		 * driver core.
778 		 */
779 		dev = get_device(&channel->device_obj->device);
780 		if (dev) {
781 			vmbus_device_unregister(channel->device_obj);
782 			put_device(dev);
783 		}
784 	} else {
785 		hv_process_channel_removal(channel,
786 			channel->offermsg.child_relid);
787 	}
788 
789 out:
790 	mutex_unlock(&vmbus_connection.channel_mutex);
791 }
792 
793 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
794 {
795 	mutex_lock(&vmbus_connection.channel_mutex);
796 
797 	BUG_ON(!is_hvsock_channel(channel));
798 
799 	channel->rescind = true;
800 	vmbus_device_unregister(channel->device_obj);
801 
802 	mutex_unlock(&vmbus_connection.channel_mutex);
803 }
804 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
805 
806 
807 /*
808  * vmbus_onoffers_delivered -
809  * This is invoked when all offers have been delivered.
810  *
811  * Nothing to do here.
812  */
813 static void vmbus_onoffers_delivered(
814 			struct vmbus_channel_message_header *hdr)
815 {
816 }
817 
818 /*
819  * vmbus_onopen_result - Open result handler.
820  *
821  * This is invoked when we received a response to our channel open request.
822  * Find the matching request, copy the response and signal the requesting
823  * thread.
824  */
825 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
826 {
827 	struct vmbus_channel_open_result *result;
828 	struct vmbus_channel_msginfo *msginfo;
829 	struct vmbus_channel_message_header *requestheader;
830 	struct vmbus_channel_open_channel *openmsg;
831 	unsigned long flags;
832 
833 	result = (struct vmbus_channel_open_result *)hdr;
834 
835 	/*
836 	 * Find the open msg, copy the result and signal/unblock the wait event
837 	 */
838 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
839 
840 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
841 				msglistentry) {
842 		requestheader =
843 			(struct vmbus_channel_message_header *)msginfo->msg;
844 
845 		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
846 			openmsg =
847 			(struct vmbus_channel_open_channel *)msginfo->msg;
848 			if (openmsg->child_relid == result->child_relid &&
849 			    openmsg->openid == result->openid) {
850 				memcpy(&msginfo->response.open_result,
851 				       result,
852 				       sizeof(
853 					struct vmbus_channel_open_result));
854 				complete(&msginfo->waitevent);
855 				break;
856 			}
857 		}
858 	}
859 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
860 }
861 
862 /*
863  * vmbus_ongpadl_created - GPADL created handler.
864  *
865  * This is invoked when we received a response to our gpadl create request.
866  * Find the matching request, copy the response and signal the requesting
867  * thread.
868  */
869 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
870 {
871 	struct vmbus_channel_gpadl_created *gpadlcreated;
872 	struct vmbus_channel_msginfo *msginfo;
873 	struct vmbus_channel_message_header *requestheader;
874 	struct vmbus_channel_gpadl_header *gpadlheader;
875 	unsigned long flags;
876 
877 	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
878 
879 	/*
880 	 * Find the establish msg, copy the result and signal/unblock the wait
881 	 * event
882 	 */
883 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
884 
885 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
886 				msglistentry) {
887 		requestheader =
888 			(struct vmbus_channel_message_header *)msginfo->msg;
889 
890 		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
891 			gpadlheader =
892 			(struct vmbus_channel_gpadl_header *)requestheader;
893 
894 			if ((gpadlcreated->child_relid ==
895 			     gpadlheader->child_relid) &&
896 			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
897 				memcpy(&msginfo->response.gpadl_created,
898 				       gpadlcreated,
899 				       sizeof(
900 					struct vmbus_channel_gpadl_created));
901 				complete(&msginfo->waitevent);
902 				break;
903 			}
904 		}
905 	}
906 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
907 }
908 
909 /*
910  * vmbus_ongpadl_torndown - GPADL torndown handler.
911  *
912  * This is invoked when we received a response to our gpadl teardown request.
913  * Find the matching request, copy the response and signal the requesting
914  * thread.
915  */
916 static void vmbus_ongpadl_torndown(
917 			struct vmbus_channel_message_header *hdr)
918 {
919 	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
920 	struct vmbus_channel_msginfo *msginfo;
921 	struct vmbus_channel_message_header *requestheader;
922 	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
923 	unsigned long flags;
924 
925 	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
926 
927 	/*
928 	 * Find the open msg, copy the result and signal/unblock the wait event
929 	 */
930 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
931 
932 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
933 				msglistentry) {
934 		requestheader =
935 			(struct vmbus_channel_message_header *)msginfo->msg;
936 
937 		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
938 			gpadl_teardown =
939 			(struct vmbus_channel_gpadl_teardown *)requestheader;
940 
941 			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
942 				memcpy(&msginfo->response.gpadl_torndown,
943 				       gpadl_torndown,
944 				       sizeof(
945 					struct vmbus_channel_gpadl_torndown));
946 				complete(&msginfo->waitevent);
947 				break;
948 			}
949 		}
950 	}
951 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
952 }
953 
954 /*
955  * vmbus_onversion_response - Version response handler
956  *
957  * This is invoked when we received a response to our initiate contact request.
958  * Find the matching request, copy the response and signal the requesting
959  * thread.
960  */
961 static void vmbus_onversion_response(
962 		struct vmbus_channel_message_header *hdr)
963 {
964 	struct vmbus_channel_msginfo *msginfo;
965 	struct vmbus_channel_message_header *requestheader;
966 	struct vmbus_channel_version_response *version_response;
967 	unsigned long flags;
968 
969 	version_response = (struct vmbus_channel_version_response *)hdr;
970 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
971 
972 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
973 				msglistentry) {
974 		requestheader =
975 			(struct vmbus_channel_message_header *)msginfo->msg;
976 
977 		if (requestheader->msgtype ==
978 		    CHANNELMSG_INITIATE_CONTACT) {
979 			memcpy(&msginfo->response.version_response,
980 			      version_response,
981 			      sizeof(struct vmbus_channel_version_response));
982 			complete(&msginfo->waitevent);
983 		}
984 	}
985 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
986 }
987 
988 /* Channel message dispatch table */
989 struct vmbus_channel_message_table_entry
990 	channel_message_table[CHANNELMSG_COUNT] = {
991 	{CHANNELMSG_INVALID,			0, NULL},
992 	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
993 	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
994 	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
995 	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
996 	{CHANNELMSG_OPENCHANNEL,		0, NULL},
997 	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
998 	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
999 	{CHANNELMSG_GPADL_HEADER,		0, NULL},
1000 	{CHANNELMSG_GPADL_BODY,			0, NULL},
1001 	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
1002 	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
1003 	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
1004 	{CHANNELMSG_RELID_RELEASED,		0, NULL},
1005 	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
1006 	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
1007 	{CHANNELMSG_UNLOAD,			0, NULL},
1008 	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
1009 	{CHANNELMSG_18,				0, NULL},
1010 	{CHANNELMSG_19,				0, NULL},
1011 	{CHANNELMSG_20,				0, NULL},
1012 	{CHANNELMSG_TL_CONNECT_REQUEST,		0, NULL},
1013 };
1014 
1015 /*
1016  * vmbus_onmessage - Handler for channel protocol messages.
1017  *
1018  * This is invoked in the vmbus worker thread context.
1019  */
1020 void vmbus_onmessage(void *context)
1021 {
1022 	struct hv_message *msg = context;
1023 	struct vmbus_channel_message_header *hdr;
1024 	int size;
1025 
1026 	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
1027 	size = msg->header.payload_size;
1028 
1029 	if (hdr->msgtype >= CHANNELMSG_COUNT) {
1030 		pr_err("Received invalid channel message type %d size %d\n",
1031 			   hdr->msgtype, size);
1032 		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1033 				     (unsigned char *)msg->u.payload, size);
1034 		return;
1035 	}
1036 
1037 	if (channel_message_table[hdr->msgtype].message_handler)
1038 		channel_message_table[hdr->msgtype].message_handler(hdr);
1039 	else
1040 		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1041 }
1042 
1043 /*
1044  * vmbus_request_offers - Send a request to get all our pending offers.
1045  */
1046 int vmbus_request_offers(void)
1047 {
1048 	struct vmbus_channel_message_header *msg;
1049 	struct vmbus_channel_msginfo *msginfo;
1050 	int ret;
1051 
1052 	msginfo = kmalloc(sizeof(*msginfo) +
1053 			  sizeof(struct vmbus_channel_message_header),
1054 			  GFP_KERNEL);
1055 	if (!msginfo)
1056 		return -ENOMEM;
1057 
1058 	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1059 
1060 	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1061 
1062 
1063 	ret = vmbus_post_msg(msg,
1064 			       sizeof(struct vmbus_channel_message_header));
1065 	if (ret != 0) {
1066 		pr_err("Unable to request offers - %d\n", ret);
1067 
1068 		goto cleanup;
1069 	}
1070 
1071 cleanup:
1072 	kfree(msginfo);
1073 
1074 	return ret;
1075 }
1076 
1077 /*
1078  * Retrieve the (sub) channel on which to send an outgoing request.
1079  * When a primary channel has multiple sub-channels, we try to
1080  * distribute the load equally amongst all available channels.
1081  */
1082 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1083 {
1084 	struct list_head *cur, *tmp;
1085 	int cur_cpu;
1086 	struct vmbus_channel *cur_channel;
1087 	struct vmbus_channel *outgoing_channel = primary;
1088 	int next_channel;
1089 	int i = 1;
1090 
1091 	if (list_empty(&primary->sc_list))
1092 		return outgoing_channel;
1093 
1094 	next_channel = primary->next_oc++;
1095 
1096 	if (next_channel > (primary->num_sc)) {
1097 		primary->next_oc = 0;
1098 		return outgoing_channel;
1099 	}
1100 
1101 	cur_cpu = hv_context.vp_index[get_cpu()];
1102 	put_cpu();
1103 	list_for_each_safe(cur, tmp, &primary->sc_list) {
1104 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1105 		if (cur_channel->state != CHANNEL_OPENED_STATE)
1106 			continue;
1107 
1108 		if (cur_channel->target_vp == cur_cpu)
1109 			return cur_channel;
1110 
1111 		if (i == next_channel)
1112 			return cur_channel;
1113 
1114 		i++;
1115 	}
1116 
1117 	return outgoing_channel;
1118 }
1119 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1120 
1121 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1122 {
1123 	struct list_head *cur, *tmp;
1124 	struct vmbus_channel *cur_channel;
1125 
1126 	if (primary_channel->sc_creation_callback == NULL)
1127 		return;
1128 
1129 	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1130 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1131 
1132 		primary_channel->sc_creation_callback(cur_channel);
1133 	}
1134 }
1135 
1136 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1137 				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1138 {
1139 	primary_channel->sc_creation_callback = sc_cr_cb;
1140 }
1141 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1142 
1143 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1144 {
1145 	bool ret;
1146 
1147 	ret = !list_empty(&primary->sc_list);
1148 
1149 	if (ret) {
1150 		/*
1151 		 * Invoke the callback on sub-channel creation.
1152 		 * This will present a uniform interface to the
1153 		 * clients.
1154 		 */
1155 		invoke_sc_cb(primary);
1156 	}
1157 
1158 	return ret;
1159 }
1160 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1161 
1162 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1163 		void (*chn_rescind_cb)(struct vmbus_channel *))
1164 {
1165 	channel->chn_rescind_callback = chn_rescind_cb;
1166 }
1167 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
1168