xref: /linux/drivers/hv/hv_util.c (revision e5c86679d5e864947a52fb31e45a425dea3e7fa9)
1 /*
2  * Copyright (c) 2010, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  */
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 
23 #include <linux/kernel.h>
24 #include <linux/init.h>
25 #include <linux/module.h>
26 #include <linux/slab.h>
27 #include <linux/sysctl.h>
28 #include <linux/reboot.h>
29 #include <linux/hyperv.h>
30 #include <linux/clockchips.h>
31 #include <linux/ptp_clock_kernel.h>
32 #include <asm/mshyperv.h>
33 
34 #include "hyperv_vmbus.h"
35 
36 #define SD_MAJOR	3
37 #define SD_MINOR	0
38 #define SD_VERSION	(SD_MAJOR << 16 | SD_MINOR)
39 
40 #define SD_MAJOR_1	1
41 #define SD_VERSION_1	(SD_MAJOR_1 << 16 | SD_MINOR)
42 
43 #define TS_MAJOR	4
44 #define TS_MINOR	0
45 #define TS_VERSION	(TS_MAJOR << 16 | TS_MINOR)
46 
47 #define TS_MAJOR_1	1
48 #define TS_VERSION_1	(TS_MAJOR_1 << 16 | TS_MINOR)
49 
50 #define TS_MAJOR_3	3
51 #define TS_VERSION_3	(TS_MAJOR_3 << 16 | TS_MINOR)
52 
53 #define HB_MAJOR	3
54 #define HB_MINOR	0
55 #define HB_VERSION	(HB_MAJOR << 16 | HB_MINOR)
56 
57 #define HB_MAJOR_1	1
58 #define HB_VERSION_1	(HB_MAJOR_1 << 16 | HB_MINOR)
59 
60 static int sd_srv_version;
61 static int ts_srv_version;
62 static int hb_srv_version;
63 
64 #define SD_VER_COUNT 2
65 static const int sd_versions[] = {
66 	SD_VERSION,
67 	SD_VERSION_1
68 };
69 
70 #define TS_VER_COUNT 3
71 static const int ts_versions[] = {
72 	TS_VERSION,
73 	TS_VERSION_3,
74 	TS_VERSION_1
75 };
76 
77 #define HB_VER_COUNT 2
78 static const int hb_versions[] = {
79 	HB_VERSION,
80 	HB_VERSION_1
81 };
82 
83 #define FW_VER_COUNT 2
84 static const int fw_versions[] = {
85 	UTIL_FW_VERSION,
86 	UTIL_WS2K8_FW_VERSION
87 };
88 
89 static void shutdown_onchannelcallback(void *context);
90 static struct hv_util_service util_shutdown = {
91 	.util_cb = shutdown_onchannelcallback,
92 };
93 
94 static int hv_timesync_init(struct hv_util_service *srv);
95 static void hv_timesync_deinit(void);
96 
97 static void timesync_onchannelcallback(void *context);
98 static struct hv_util_service util_timesynch = {
99 	.util_cb = timesync_onchannelcallback,
100 	.util_init = hv_timesync_init,
101 	.util_deinit = hv_timesync_deinit,
102 };
103 
104 static void heartbeat_onchannelcallback(void *context);
105 static struct hv_util_service util_heartbeat = {
106 	.util_cb = heartbeat_onchannelcallback,
107 };
108 
109 static struct hv_util_service util_kvp = {
110 	.util_cb = hv_kvp_onchannelcallback,
111 	.util_init = hv_kvp_init,
112 	.util_deinit = hv_kvp_deinit,
113 };
114 
115 static struct hv_util_service util_vss = {
116 	.util_cb = hv_vss_onchannelcallback,
117 	.util_init = hv_vss_init,
118 	.util_deinit = hv_vss_deinit,
119 };
120 
121 static struct hv_util_service util_fcopy = {
122 	.util_cb = hv_fcopy_onchannelcallback,
123 	.util_init = hv_fcopy_init,
124 	.util_deinit = hv_fcopy_deinit,
125 };
126 
127 static void perform_shutdown(struct work_struct *dummy)
128 {
129 	orderly_poweroff(true);
130 }
131 
132 /*
133  * Perform the shutdown operation in a thread context.
134  */
135 static DECLARE_WORK(shutdown_work, perform_shutdown);
136 
137 static void shutdown_onchannelcallback(void *context)
138 {
139 	struct vmbus_channel *channel = context;
140 	u32 recvlen;
141 	u64 requestid;
142 	bool execute_shutdown = false;
143 	u8  *shut_txf_buf = util_shutdown.recv_buffer;
144 
145 	struct shutdown_msg_data *shutdown_msg;
146 
147 	struct icmsg_hdr *icmsghdrp;
148 
149 	vmbus_recvpacket(channel, shut_txf_buf,
150 			 PAGE_SIZE, &recvlen, &requestid);
151 
152 	if (recvlen > 0) {
153 		icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[
154 			sizeof(struct vmbuspipe_hdr)];
155 
156 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
157 			if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf,
158 					fw_versions, FW_VER_COUNT,
159 					sd_versions, SD_VER_COUNT,
160 					NULL, &sd_srv_version)) {
161 				pr_info("Shutdown IC version %d.%d\n",
162 					sd_srv_version >> 16,
163 					sd_srv_version & 0xFFFF);
164 			}
165 		} else {
166 			shutdown_msg =
167 				(struct shutdown_msg_data *)&shut_txf_buf[
168 					sizeof(struct vmbuspipe_hdr) +
169 					sizeof(struct icmsg_hdr)];
170 
171 			switch (shutdown_msg->flags) {
172 			case 0:
173 			case 1:
174 				icmsghdrp->status = HV_S_OK;
175 				execute_shutdown = true;
176 
177 				pr_info("Shutdown request received -"
178 					    " graceful shutdown initiated\n");
179 				break;
180 			default:
181 				icmsghdrp->status = HV_E_FAIL;
182 				execute_shutdown = false;
183 
184 				pr_info("Shutdown request received -"
185 					    " Invalid request\n");
186 				break;
187 			}
188 		}
189 
190 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
191 			| ICMSGHDRFLAG_RESPONSE;
192 
193 		vmbus_sendpacket(channel, shut_txf_buf,
194 				       recvlen, requestid,
195 				       VM_PKT_DATA_INBAND, 0);
196 	}
197 
198 	if (execute_shutdown == true)
199 		schedule_work(&shutdown_work);
200 }
201 
202 /*
203  * Set the host time in a process context.
204  */
205 
206 struct adj_time_work {
207 	struct work_struct work;
208 	u64	host_time;
209 	u64	ref_time;
210 	u8	flags;
211 };
212 
213 static void hv_set_host_time(struct work_struct *work)
214 {
215 	struct adj_time_work *wrk;
216 	struct timespec64 host_ts;
217 	u64 reftime, newtime;
218 
219 	wrk = container_of(work, struct adj_time_work, work);
220 
221 	reftime = hyperv_cs->read(hyperv_cs);
222 	newtime = wrk->host_time + (reftime - wrk->ref_time);
223 	host_ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
224 
225 	do_settimeofday64(&host_ts);
226 }
227 
228 /*
229  * Synchronize time with host after reboot, restore, etc.
230  *
231  * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
232  * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
233  * message after the timesync channel is opened. Since the hv_utils module is
234  * loaded after hv_vmbus, the first message is usually missed. This bit is
235  * considered a hard request to discipline the clock.
236  *
237  * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
238  * typically used as a hint to the guest. The guest is under no obligation
239  * to discipline the clock.
240  */
241 static struct adj_time_work  wrk;
242 
243 /*
244  * The last time sample, received from the host. PTP device responds to
245  * requests by using this data and the current partition-wide time reference
246  * count.
247  */
248 static struct {
249 	u64				host_time;
250 	u64				ref_time;
251 	struct system_time_snapshot	snap;
252 	spinlock_t			lock;
253 } host_ts;
254 
255 static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
256 {
257 	unsigned long flags;
258 	u64 cur_reftime;
259 
260 	/*
261 	 * This check is safe since we are executing in the
262 	 * interrupt context and time synch messages are always
263 	 * delivered on the same CPU.
264 	 */
265 	if (adj_flags & ICTIMESYNCFLAG_SYNC) {
266 		/* Queue a job to do do_settimeofday64() */
267 		if (work_pending(&wrk.work))
268 			return;
269 
270 		wrk.host_time = hosttime;
271 		wrk.ref_time = reftime;
272 		wrk.flags = adj_flags;
273 		schedule_work(&wrk.work);
274 	} else {
275 		/*
276 		 * Save the adjusted time sample from the host and the snapshot
277 		 * of the current system time for PTP device.
278 		 */
279 		spin_lock_irqsave(&host_ts.lock, flags);
280 
281 		cur_reftime = hyperv_cs->read(hyperv_cs);
282 		host_ts.host_time = hosttime;
283 		host_ts.ref_time = cur_reftime;
284 		ktime_get_snapshot(&host_ts.snap);
285 
286 		/*
287 		 * TimeSync v4 messages contain reference time (guest's Hyper-V
288 		 * clocksource read when the time sample was generated), we can
289 		 * improve the precision by adding the delta between now and the
290 		 * time of generation.
291 		 */
292 		if (ts_srv_version > TS_VERSION_3)
293 			host_ts.host_time += (cur_reftime - reftime);
294 
295 		spin_unlock_irqrestore(&host_ts.lock, flags);
296 	}
297 }
298 
299 /*
300  * Time Sync Channel message handler.
301  */
302 static void timesync_onchannelcallback(void *context)
303 {
304 	struct vmbus_channel *channel = context;
305 	u32 recvlen;
306 	u64 requestid;
307 	struct icmsg_hdr *icmsghdrp;
308 	struct ictimesync_data *timedatap;
309 	struct ictimesync_ref_data *refdata;
310 	u8 *time_txf_buf = util_timesynch.recv_buffer;
311 
312 	vmbus_recvpacket(channel, time_txf_buf,
313 			 PAGE_SIZE, &recvlen, &requestid);
314 
315 	if (recvlen > 0) {
316 		icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[
317 				sizeof(struct vmbuspipe_hdr)];
318 
319 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
320 			if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf,
321 						fw_versions, FW_VER_COUNT,
322 						ts_versions, TS_VER_COUNT,
323 						NULL, &ts_srv_version)) {
324 				pr_info("TimeSync IC version %d.%d\n",
325 					ts_srv_version >> 16,
326 					ts_srv_version & 0xFFFF);
327 			}
328 		} else {
329 			if (ts_srv_version > TS_VERSION_3) {
330 				refdata = (struct ictimesync_ref_data *)
331 					&time_txf_buf[
332 					sizeof(struct vmbuspipe_hdr) +
333 					sizeof(struct icmsg_hdr)];
334 
335 				adj_guesttime(refdata->parenttime,
336 						refdata->vmreferencetime,
337 						refdata->flags);
338 			} else {
339 				timedatap = (struct ictimesync_data *)
340 					&time_txf_buf[
341 					sizeof(struct vmbuspipe_hdr) +
342 					sizeof(struct icmsg_hdr)];
343 				adj_guesttime(timedatap->parenttime,
344 						0,
345 						timedatap->flags);
346 			}
347 		}
348 
349 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
350 			| ICMSGHDRFLAG_RESPONSE;
351 
352 		vmbus_sendpacket(channel, time_txf_buf,
353 				recvlen, requestid,
354 				VM_PKT_DATA_INBAND, 0);
355 	}
356 }
357 
358 /*
359  * Heartbeat functionality.
360  * Every two seconds, Hyper-V send us a heartbeat request message.
361  * we respond to this message, and Hyper-V knows we are alive.
362  */
363 static void heartbeat_onchannelcallback(void *context)
364 {
365 	struct vmbus_channel *channel = context;
366 	u32 recvlen;
367 	u64 requestid;
368 	struct icmsg_hdr *icmsghdrp;
369 	struct heartbeat_msg_data *heartbeat_msg;
370 	u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
371 
372 	while (1) {
373 
374 		vmbus_recvpacket(channel, hbeat_txf_buf,
375 				 PAGE_SIZE, &recvlen, &requestid);
376 
377 		if (!recvlen)
378 			break;
379 
380 		icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[
381 				sizeof(struct vmbuspipe_hdr)];
382 
383 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
384 			if (vmbus_prep_negotiate_resp(icmsghdrp,
385 					hbeat_txf_buf,
386 					fw_versions, FW_VER_COUNT,
387 					hb_versions, HB_VER_COUNT,
388 					NULL, &hb_srv_version)) {
389 
390 				pr_info("Heartbeat IC version %d.%d\n",
391 					hb_srv_version >> 16,
392 					hb_srv_version & 0xFFFF);
393 			}
394 		} else {
395 			heartbeat_msg =
396 				(struct heartbeat_msg_data *)&hbeat_txf_buf[
397 					sizeof(struct vmbuspipe_hdr) +
398 					sizeof(struct icmsg_hdr)];
399 
400 			heartbeat_msg->seq_num += 1;
401 		}
402 
403 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
404 			| ICMSGHDRFLAG_RESPONSE;
405 
406 		vmbus_sendpacket(channel, hbeat_txf_buf,
407 				       recvlen, requestid,
408 				       VM_PKT_DATA_INBAND, 0);
409 	}
410 }
411 
412 static int util_probe(struct hv_device *dev,
413 			const struct hv_vmbus_device_id *dev_id)
414 {
415 	struct hv_util_service *srv =
416 		(struct hv_util_service *)dev_id->driver_data;
417 	int ret;
418 
419 	srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
420 	if (!srv->recv_buffer)
421 		return -ENOMEM;
422 	srv->channel = dev->channel;
423 	if (srv->util_init) {
424 		ret = srv->util_init(srv);
425 		if (ret) {
426 			ret = -ENODEV;
427 			goto error1;
428 		}
429 	}
430 
431 	/*
432 	 * The set of services managed by the util driver are not performance
433 	 * critical and do not need batched reading. Furthermore, some services
434 	 * such as KVP can only handle one message from the host at a time.
435 	 * Turn off batched reading for all util drivers before we open the
436 	 * channel.
437 	 */
438 	set_channel_read_mode(dev->channel, HV_CALL_DIRECT);
439 
440 	hv_set_drvdata(dev, srv);
441 
442 	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
443 			srv->util_cb, dev->channel);
444 	if (ret)
445 		goto error;
446 
447 	return 0;
448 
449 error:
450 	if (srv->util_deinit)
451 		srv->util_deinit();
452 error1:
453 	kfree(srv->recv_buffer);
454 	return ret;
455 }
456 
457 static int util_remove(struct hv_device *dev)
458 {
459 	struct hv_util_service *srv = hv_get_drvdata(dev);
460 
461 	if (srv->util_deinit)
462 		srv->util_deinit();
463 	vmbus_close(dev->channel);
464 	kfree(srv->recv_buffer);
465 
466 	return 0;
467 }
468 
469 static const struct hv_vmbus_device_id id_table[] = {
470 	/* Shutdown guid */
471 	{ HV_SHUTDOWN_GUID,
472 	  .driver_data = (unsigned long)&util_shutdown
473 	},
474 	/* Time synch guid */
475 	{ HV_TS_GUID,
476 	  .driver_data = (unsigned long)&util_timesynch
477 	},
478 	/* Heartbeat guid */
479 	{ HV_HEART_BEAT_GUID,
480 	  .driver_data = (unsigned long)&util_heartbeat
481 	},
482 	/* KVP guid */
483 	{ HV_KVP_GUID,
484 	  .driver_data = (unsigned long)&util_kvp
485 	},
486 	/* VSS GUID */
487 	{ HV_VSS_GUID,
488 	  .driver_data = (unsigned long)&util_vss
489 	},
490 	/* File copy GUID */
491 	{ HV_FCOPY_GUID,
492 	  .driver_data = (unsigned long)&util_fcopy
493 	},
494 	{ },
495 };
496 
497 MODULE_DEVICE_TABLE(vmbus, id_table);
498 
499 /* The one and only one */
500 static  struct hv_driver util_drv = {
501 	.name = "hv_util",
502 	.id_table = id_table,
503 	.probe =  util_probe,
504 	.remove =  util_remove,
505 };
506 
507 static int hv_ptp_enable(struct ptp_clock_info *info,
508 			 struct ptp_clock_request *request, int on)
509 {
510 	return -EOPNOTSUPP;
511 }
512 
513 static int hv_ptp_settime(struct ptp_clock_info *p, const struct timespec64 *ts)
514 {
515 	return -EOPNOTSUPP;
516 }
517 
518 static int hv_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
519 {
520 	return -EOPNOTSUPP;
521 }
522 static int hv_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
523 {
524 	return -EOPNOTSUPP;
525 }
526 
527 static int hv_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts)
528 {
529 	unsigned long flags;
530 	u64 newtime, reftime;
531 
532 	spin_lock_irqsave(&host_ts.lock, flags);
533 	reftime = hyperv_cs->read(hyperv_cs);
534 	newtime = host_ts.host_time + (reftime - host_ts.ref_time);
535 	*ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
536 	spin_unlock_irqrestore(&host_ts.lock, flags);
537 
538 	return 0;
539 }
540 
541 static int hv_ptp_get_syncdevicetime(ktime_t *device,
542 				     struct system_counterval_t *system,
543 				     void *ctx)
544 {
545 	system->cs = hyperv_cs;
546 	system->cycles = host_ts.ref_time;
547 	*device = ns_to_ktime((host_ts.host_time - WLTIMEDELTA) * 100);
548 
549 	return 0;
550 }
551 
552 static int hv_ptp_getcrosststamp(struct ptp_clock_info *ptp,
553 				 struct system_device_crosststamp *xtstamp)
554 {
555 	unsigned long flags;
556 	int ret;
557 
558 	spin_lock_irqsave(&host_ts.lock, flags);
559 
560 	/*
561 	 * host_ts contains the last time sample from the host and the snapshot
562 	 * of system time. We don't need to calculate the time delta between
563 	 * the reception and now as get_device_system_crosststamp() does the
564 	 * required interpolation.
565 	 */
566 	ret = get_device_system_crosststamp(hv_ptp_get_syncdevicetime,
567 					    NULL, &host_ts.snap, xtstamp);
568 
569 	spin_unlock_irqrestore(&host_ts.lock, flags);
570 
571 	return ret;
572 }
573 
574 static struct ptp_clock_info ptp_hyperv_info = {
575 	.name		= "hyperv",
576 	.enable         = hv_ptp_enable,
577 	.adjtime        = hv_ptp_adjtime,
578 	.adjfreq        = hv_ptp_adjfreq,
579 	.gettime64      = hv_ptp_gettime,
580 	.getcrosststamp = hv_ptp_getcrosststamp,
581 	.settime64      = hv_ptp_settime,
582 	.owner		= THIS_MODULE,
583 };
584 
585 static struct ptp_clock *hv_ptp_clock;
586 
587 static int hv_timesync_init(struct hv_util_service *srv)
588 {
589 	/* TimeSync requires Hyper-V clocksource. */
590 	if (!hyperv_cs)
591 		return -ENODEV;
592 
593 	spin_lock_init(&host_ts.lock);
594 
595 	INIT_WORK(&wrk.work, hv_set_host_time);
596 
597 	/*
598 	 * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is
599 	 * disabled but the driver is still useful without the PTP device
600 	 * as it still handles the ICTIMESYNCFLAG_SYNC case.
601 	 */
602 	hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
603 	if (IS_ERR_OR_NULL(hv_ptp_clock)) {
604 		pr_err("cannot register PTP clock: %ld\n",
605 		       PTR_ERR(hv_ptp_clock));
606 		hv_ptp_clock = NULL;
607 	}
608 
609 	return 0;
610 }
611 
612 static void hv_timesync_deinit(void)
613 {
614 	if (hv_ptp_clock)
615 		ptp_clock_unregister(hv_ptp_clock);
616 	cancel_work_sync(&wrk.work);
617 }
618 
619 static int __init init_hyperv_utils(void)
620 {
621 	pr_info("Registering HyperV Utility Driver\n");
622 
623 	return vmbus_driver_register(&util_drv);
624 }
625 
626 static void exit_hyperv_utils(void)
627 {
628 	pr_info("De-Registered HyperV Utility Driver\n");
629 
630 	vmbus_driver_unregister(&util_drv);
631 }
632 
633 module_init(init_hyperv_utils);
634 module_exit(exit_hyperv_utils);
635 
636 MODULE_DESCRIPTION("Hyper-V Utilities");
637 MODULE_LICENSE("GPL");
638