xref: /linux/drivers/ptp/ptp_vmclock.c (revision bc484a5096732cd858771cccd3164ec985bdc03d)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtual PTP 1588 clock for use with LM-safe VMclock device.
4  *
5  * Copyright © 2024 Amazon.com, Inc. or its affiliates.
6  */
7 
8 #include "linux/poll.h"
9 #include "linux/types.h"
10 #include "linux/wait.h"
11 #include <linux/acpi.h>
12 #include <linux/device.h>
13 #include <linux/err.h>
14 #include <linux/file.h>
15 #include <linux/fs.h>
16 #include <linux/init.h>
17 #include <linux/io.h>
18 #include <linux/interrupt.h>
19 #include <linux/kernel.h>
20 #include <linux/miscdevice.h>
21 #include <linux/mm.h>
22 #include <linux/module.h>
23 #include <linux/of.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 
27 #include <uapi/linux/vmclock-abi.h>
28 
29 #include <linux/ptp_clock_kernel.h>
30 
31 #ifdef CONFIG_X86
32 #include <asm/pvclock.h>
33 #include <asm/kvmclock.h>
34 #endif
35 
36 #ifdef CONFIG_KVM_GUEST
37 #define SUPPORT_KVMCLOCK
38 #endif
39 
40 static DEFINE_IDA(vmclock_ida);
41 
42 ACPI_MODULE_NAME("vmclock");
43 
44 struct vmclock_state {
45 	struct resource res;
46 	struct vmclock_abi *clk;
47 	struct miscdevice miscdev;
48 	wait_queue_head_t disrupt_wait;
49 	struct ptp_clock_info ptp_clock_info;
50 	struct ptp_clock *ptp_clock;
51 	enum clocksource_ids cs_id, sys_cs_id;
52 	int index;
53 	char *name;
54 };
55 
56 #define VMCLOCK_MAX_WAIT ms_to_ktime(100)
57 
58 /* Require at least the flags field to be present. All else can be optional. */
59 #define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad)
60 
61 #define VMCLOCK_FIELD_PRESENT(_c, _f)			  \
62 	(le32_to_cpu((_c)->size) >= (offsetof(struct vmclock_abi, _f) +	\
63 				     sizeof((_c)->_f)))
64 
65 /*
66  * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64
67  * and add the fractional second part of the reference time.
68  *
69  * The result is a 128-bit value, the top 64 bits of which are seconds, and
70  * the low 64 bits are (seconds >> 64).
71  */
72 static uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
73 					uint64_t period, uint8_t shift,
74 					uint64_t frac_sec)
75 {
76 	unsigned __int128 res = (unsigned __int128)delta * period;
77 
78 	res >>= shift;
79 	res += frac_sec;
80 	*res_hi = res >> 64;
81 	return (uint64_t)res;
82 }
83 
84 static bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
85 {
86 	if (clk->time_type == VMCLOCK_TIME_TAI)
87 		return true;
88 
89 	if (clk->time_type == VMCLOCK_TIME_UTC &&
90 	    (le64_to_cpu(clk->flags) & VMCLOCK_FLAG_TAI_OFFSET_VALID)) {
91 		if (sec)
92 			*sec -= (int16_t)le16_to_cpu(clk->tai_offset_sec);
93 		return true;
94 	}
95 	return false;
96 }
97 
98 static int vmclock_get_crosststamp(struct vmclock_state *st,
99 				   struct ptp_system_timestamp *sts,
100 				   struct system_counterval_t *system_counter,
101 				   struct timespec64 *tspec)
102 {
103 	ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
104 	uint64_t cycle, delta, seq, frac_sec;
105 
106 #ifdef CONFIG_X86
107 	/*
108 	 * We'd expect the hypervisor to know this and to report the clock
109 	 * status as VMCLOCK_STATUS_UNRELIABLE. But be paranoid.
110 	 */
111 	if (check_tsc_unstable())
112 		return -EINVAL;
113 #endif
114 
115 	while (1) {
116 		seq = le32_to_cpu(st->clk->seq_count) & ~1ULL;
117 
118 		/*
119 		 * This pairs with a write barrier in the hypervisor
120 		 * which populates this structure.
121 		 */
122 		virt_rmb();
123 
124 		if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE)
125 			return -EINVAL;
126 
127 		/*
128 		 * When invoked for gettimex64(), fill in the pre/post system
129 		 * times. The simple case is when system time is based on the
130 		 * same counter as st->cs_id, in which case all three times
131 		 * will be derived from the *same* counter value.
132 		 *
133 		 * If the system isn't using the same counter, then the value
134 		 * from ptp_read_system_prets() will still be used as pre_ts,
135 		 * and ptp_read_system_postts() is called to populate postts
136 		 * after calling get_cycles().
137 		 */
138 		if (sts) {
139 			ptp_read_system_prets(sts);
140 			if (sts->pre_sts.cs_id == st->cs_id) {
141 				cycle = sts->pre_sts.cycles;
142 				sts->post_sts = sts->pre_sts;
143 			} else if (sts->pre_sts.hw_csid == st->cs_id &&
144 				   sts->pre_sts.hw_cycles) {
145 				cycle = sts->pre_sts.hw_cycles;
146 				sts->post_sts = sts->pre_sts;
147 			} else {
148 				cycle = get_cycles();
149 				ptp_read_system_postts(sts);
150 			}
151 		} else {
152 			cycle = get_cycles();
153 		}
154 
155 		delta = cycle - le64_to_cpu(st->clk->counter_value);
156 
157 		frac_sec = mul_u64_u64_shr_add_u64(&tspec->tv_sec, delta,
158 						   le64_to_cpu(st->clk->counter_period_frac_sec),
159 						   st->clk->counter_period_shift,
160 						   le64_to_cpu(st->clk->time_frac_sec));
161 		tspec->tv_nsec = mul_u64_u64_shr(frac_sec, NSEC_PER_SEC, 64);
162 		tspec->tv_sec += le64_to_cpu(st->clk->time_sec);
163 
164 		if (!tai_adjust(st->clk, &tspec->tv_sec))
165 			return -EINVAL;
166 
167 		/*
168 		 * This pairs with a write barrier in the hypervisor
169 		 * which populates this structure.
170 		 */
171 		virt_rmb();
172 		if (seq == le32_to_cpu(st->clk->seq_count))
173 			break;
174 
175 		if (ktime_after(ktime_get(), deadline))
176 			return -ETIMEDOUT;
177 	}
178 
179 	if (system_counter) {
180 		system_counter->cycles = cycle;
181 		system_counter->cs_id = st->cs_id;
182 	}
183 
184 	return 0;
185 }
186 
187 #ifdef SUPPORT_KVMCLOCK
188 /*
189  * In the case where the system is using the KVM clock for timekeeping, convert
190  * the TSC value into a KVM clock time in order to return a paired reading that
191  * get_device_system_crosststamp() can cope with.
192  */
193 static int vmclock_get_crosststamp_kvmclock(struct vmclock_state *st,
194 					    struct ptp_system_timestamp *sts,
195 					    struct system_counterval_t *system_counter,
196 					    struct timespec64 *tspec)
197 {
198 	struct pvclock_vcpu_time_info *pvti = this_cpu_pvti();
199 	unsigned int pvti_ver;
200 	int ret;
201 
202 	preempt_disable_notrace();
203 
204 	do {
205 		pvti_ver = pvclock_read_begin(pvti);
206 
207 		ret = vmclock_get_crosststamp(st, sts, system_counter, tspec);
208 		if (ret)
209 			break;
210 
211 		system_counter->cycles = __pvclock_read_cycles(pvti,
212 							       system_counter->cycles);
213 		system_counter->cs_id = CSID_X86_KVM_CLK;
214 
215 		/*
216 		 * This retry should never really happen; if the TSC is
217 		 * stable and reliable enough across vCPUS that it is sane
218 		 * for the hypervisor to expose a VMCLOCK device which uses
219 		 * it as the reference counter, then the KVM clock sohuld be
220 		 * in 'master clock mode' and basically never changed. But
221 		 * the KVM clock is a fickle and often broken thing, so do
222 		 * it "properly" just in case.
223 		 */
224 	} while (pvclock_read_retry(pvti, pvti_ver));
225 
226 	preempt_enable_notrace();
227 
228 	return ret;
229 }
230 #endif
231 
232 static int ptp_vmclock_get_time_fn(ktime_t *device_time,
233 				   struct system_counterval_t *system_counter,
234 				   void *ctx)
235 {
236 	struct vmclock_state *st = ctx;
237 	struct timespec64 tspec;
238 	int ret;
239 
240 #ifdef SUPPORT_KVMCLOCK
241 	if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK)
242 		ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter,
243 						       &tspec);
244 	else
245 #endif
246 		ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec);
247 
248 	if (!ret)
249 		*device_time = timespec64_to_ktime(tspec);
250 
251 	return ret;
252 }
253 
254 static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp,
255 				      struct system_device_crosststamp *xtstamp)
256 {
257 	struct vmclock_state *st = container_of(ptp, struct vmclock_state,
258 						ptp_clock_info);
259 	int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, st,
260 						NULL, xtstamp);
261 #ifdef SUPPORT_KVMCLOCK
262 	/*
263 	 * On x86, the KVM clock may be used for the system time. We can
264 	 * actually convert a TSC reading to that, and return a paired
265 	 * timestamp that get_device_system_crosststamp() *can* handle.
266 	 */
267 	if (ret == -ENODEV) {
268 		struct system_time_snapshot systime_snapshot;
269 
270 		ktime_get_snapshot_id(CLOCK_REALTIME, &systime_snapshot);
271 
272 		if (systime_snapshot.cs_id == CSID_X86_TSC ||
273 		    systime_snapshot.cs_id == CSID_X86_KVM_CLK) {
274 			WRITE_ONCE(st->sys_cs_id, systime_snapshot.cs_id);
275 			ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn,
276 							    st, NULL, xtstamp);
277 		}
278 	}
279 #endif
280 	return ret;
281 }
282 
283 /*
284  * PTP clock operations
285  */
286 
287 static int ptp_vmclock_adjfine(struct ptp_clock_info *ptp, long delta)
288 {
289 	return -EOPNOTSUPP;
290 }
291 
292 static int ptp_vmclock_adjtime(struct ptp_clock_info *ptp, s64 delta)
293 {
294 	return -EOPNOTSUPP;
295 }
296 
297 static int ptp_vmclock_settime(struct ptp_clock_info *ptp,
298 			   const struct timespec64 *ts)
299 {
300 	return -EOPNOTSUPP;
301 }
302 
303 static int ptp_vmclock_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
304 				struct ptp_system_timestamp *sts)
305 {
306 	struct vmclock_state *st = container_of(ptp, struct vmclock_state,
307 						ptp_clock_info);
308 
309 	return vmclock_get_crosststamp(st, sts, NULL, ts);
310 }
311 
312 static int ptp_vmclock_enable(struct ptp_clock_info *ptp,
313 			  struct ptp_clock_request *rq, int on)
314 {
315 	return -EOPNOTSUPP;
316 }
317 
318 static const struct ptp_clock_info ptp_vmclock_info = {
319 	.owner		= THIS_MODULE,
320 	.max_adj	= 0,
321 	.n_ext_ts	= 0,
322 	.n_pins		= 0,
323 	.pps		= 0,
324 	.adjfine	= ptp_vmclock_adjfine,
325 	.adjtime	= ptp_vmclock_adjtime,
326 	.gettimex64	= ptp_vmclock_gettimex,
327 	.settime64	= ptp_vmclock_settime,
328 	.enable		= ptp_vmclock_enable,
329 	.getcrosststamp = ptp_vmclock_getcrosststamp,
330 };
331 
332 static struct ptp_clock *vmclock_ptp_register(struct device *dev,
333 					      struct vmclock_state *st)
334 {
335 	enum clocksource_ids cs_id;
336 
337 	if (IS_ENABLED(CONFIG_ARM64) &&
338 	    st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) {
339 		/* Can we check it's the virtual counter? */
340 		cs_id = CSID_ARM_ARCH_COUNTER;
341 	} else if (IS_ENABLED(CONFIG_X86) &&
342 		   st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) {
343 		cs_id = CSID_X86_TSC;
344 	} else {
345 		return NULL;
346 	}
347 
348 	/* Accept TAI directly, or UTC with valid offset for conversion to TAI */
349 	if (!tai_adjust(st->clk, NULL)) {
350 		dev_info(dev, "vmclock does not provide unambiguous time\n");
351 		return NULL;
352 	}
353 
354 	st->sys_cs_id = cs_id;
355 	st->cs_id = cs_id;
356 	st->ptp_clock_info = ptp_vmclock_info;
357 	strscpy(st->ptp_clock_info.name, st->name);
358 
359 	return ptp_clock_register(&st->ptp_clock_info, dev);
360 }
361 
362 struct vmclock_file_state {
363 	struct vmclock_state *st;
364 	atomic_t seq;
365 };
366 
367 static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
368 {
369 	struct vmclock_file_state *fst = fp->private_data;
370 	struct vmclock_state *st = fst->st;
371 
372 	if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ)
373 		return -EROFS;
374 
375 	if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff)
376 		return -EINVAL;
377 
378 	if (io_remap_pfn_range(vma, vma->vm_start,
379 			       st->res.start >> PAGE_SHIFT, PAGE_SIZE,
380 			       vma->vm_page_prot))
381 		return -EAGAIN;
382 
383 	return 0;
384 }
385 
386 static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
387 				    size_t count, loff_t *ppos)
388 {
389 	ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
390 	struct vmclock_file_state *fst = fp->private_data;
391 	struct vmclock_state *st = fst->st;
392 	uint32_t seq, old_seq;
393 	size_t max_count;
394 
395 	if (*ppos >= PAGE_SIZE)
396 		return 0;
397 
398 	max_count = PAGE_SIZE - *ppos;
399 	if (count > max_count)
400 		count = max_count;
401 
402 	old_seq = atomic_read(&fst->seq);
403 	while (1) {
404 		seq = le32_to_cpu(st->clk->seq_count) & ~1U;
405 		/* Pairs with hypervisor wmb */
406 		virt_rmb();
407 
408 		if (copy_to_user(buf, ((char *)st->clk) + *ppos, count))
409 			return -EFAULT;
410 
411 		/* Pairs with hypervisor wmb */
412 		virt_rmb();
413 		if (seq == le32_to_cpu(st->clk->seq_count)) {
414 			/*
415 			 * Either we updated fst->seq to seq (the latest version we observed)
416 			 * or someone else did (old_seq == seq), so we can break.
417 			 */
418 			if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) ||
419 			    old_seq == seq) {
420 				break;
421 			}
422 		}
423 
424 		if (ktime_after(ktime_get(), deadline))
425 			return -ETIMEDOUT;
426 	}
427 
428 	*ppos += count;
429 	return count;
430 }
431 
432 static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait)
433 {
434 	struct vmclock_file_state *fst = fp->private_data;
435 	struct vmclock_state *st = fst->st;
436 	uint32_t seq;
437 
438 	/*
439 	 * Hypervisor will not send us any notifications, so fail immediately
440 	 * to avoid having caller sleeping for ever.
441 	 */
442 	if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
443 		return POLLHUP;
444 
445 	poll_wait(fp, &st->disrupt_wait, wait);
446 
447 	seq = le32_to_cpu(st->clk->seq_count);
448 	if (atomic_read(&fst->seq) != seq)
449 		return POLLIN | POLLRDNORM;
450 
451 	return 0;
452 }
453 
454 static int vmclock_miscdev_open(struct inode *inode, struct file *fp)
455 {
456 	struct vmclock_state *st = container_of(fp->private_data,
457 						struct vmclock_state, miscdev);
458 	struct vmclock_file_state *fst = kzalloc_obj(*fst);
459 
460 	if (!fst)
461 		return -ENOMEM;
462 
463 	fst->st = st;
464 	atomic_set(&fst->seq, 0);
465 
466 	fp->private_data = fst;
467 
468 	return 0;
469 }
470 
471 static int vmclock_miscdev_release(struct inode *inode, struct file *fp)
472 {
473 	kfree(fp->private_data);
474 	return 0;
475 }
476 
477 static const struct file_operations vmclock_miscdev_fops = {
478 	.owner = THIS_MODULE,
479 	.open = vmclock_miscdev_open,
480 	.release = vmclock_miscdev_release,
481 	.mmap = vmclock_miscdev_mmap,
482 	.read = vmclock_miscdev_read,
483 	.poll = vmclock_miscdev_poll,
484 };
485 
486 /* module operations */
487 
488 #if IS_ENABLED(CONFIG_ACPI)
489 static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data)
490 {
491 	struct vmclock_state *st = data;
492 	struct resource_win win;
493 	struct resource *res = &win.res;
494 
495 	if (ares->type == ACPI_RESOURCE_TYPE_END_TAG)
496 		return AE_OK;
497 
498 	/* There can be only one */
499 	if (resource_type(&st->res) == IORESOURCE_MEM)
500 		return AE_ERROR;
501 
502 	if (acpi_dev_resource_memory(ares, res) ||
503 	    acpi_dev_resource_address_space(ares, &win)) {
504 
505 		if (resource_type(res) != IORESOURCE_MEM ||
506 		    resource_size(res) < sizeof(st->clk))
507 			return AE_ERROR;
508 
509 		st->res = *res;
510 		return AE_OK;
511 	}
512 
513 	return AE_ERROR;
514 }
515 
516 static void
517 vmclock_acpi_notification_handler(acpi_handle __always_unused handle,
518 				  u32 __always_unused event, void *dev)
519 {
520 	struct device *device = dev;
521 	struct vmclock_state *st = device->driver_data;
522 
523 	wake_up_interruptible(&st->disrupt_wait);
524 }
525 
526 static int vmclock_setup_acpi_notification(struct device *dev)
527 {
528 	struct acpi_device *adev = ACPI_COMPANION(dev);
529 	acpi_status status;
530 
531 	/*
532 	 * This should never happen as this function is only called when
533 	 * has_acpi_companion(dev) is true, but the logic is sufficiently
534 	 * complex that Coverity can't see the tautology.
535 	 */
536 	if (!adev)
537 		return -ENODEV;
538 
539 	status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
540 					     vmclock_acpi_notification_handler,
541 					     dev);
542 	if (ACPI_FAILURE(status)) {
543 		dev_err(dev, "failed to install notification handler");
544 		return -ENODEV;
545 	}
546 
547 	return 0;
548 }
549 
550 static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st)
551 {
552 	struct acpi_device *adev = ACPI_COMPANION(dev);
553 	acpi_status status;
554 
555 	/*
556 	 * This should never happen as this function is only called when
557 	 * has_acpi_companion(dev) is true, but the logic is sufficiently
558 	 * complex that Coverity can't see the tautology.
559 	 */
560 	if (!adev)
561 		return -ENODEV;
562 
563 	status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
564 				     vmclock_acpi_resources, st);
565 	if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
566 		dev_err(dev, "failed to get resources\n");
567 		return -ENODEV;
568 	}
569 
570 	return 0;
571 }
572 #endif /* CONFIG_ACPI */
573 
574 static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *_st)
575 {
576 	struct vmclock_state *st = _st;
577 
578 	wake_up_interruptible(&st->disrupt_wait);
579 	return IRQ_HANDLED;
580 }
581 
582 static int vmclock_probe_dt(struct device *dev, struct vmclock_state *st)
583 {
584 	struct platform_device *pdev = to_platform_device(dev);
585 	struct resource *res;
586 
587 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
588 	if (!res)
589 		return -ENODEV;
590 
591 	st->res = *res;
592 
593 	return 0;
594 }
595 
596 static int vmclock_setup_of_notification(struct device *dev)
597 {
598 	struct platform_device *pdev = to_platform_device(dev);
599 	int irq;
600 
601 	irq = platform_get_irq(pdev, 0);
602 	if (irq < 0)
603 		return irq;
604 
605 	return devm_request_irq(dev, irq, vmclock_of_irq_handler, IRQF_SHARED,
606 				"vmclock", dev->driver_data);
607 }
608 
609 static int vmclock_setup_notification(struct device *dev,
610 				      struct vmclock_state *st)
611 {
612 	/* The device does not support notifications. Nothing else to do */
613 	if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
614 		return 0;
615 
616 #if IS_ENABLED(CONFIG_ACPI)
617 	if (has_acpi_companion(dev))
618 		return vmclock_setup_acpi_notification(dev);
619 #endif
620 	return vmclock_setup_of_notification(dev);
621 }
622 
623 static void vmclock_remove(void *data)
624 {
625 	struct device *dev = data;
626 	struct vmclock_state *st = dev->driver_data;
627 
628 	if (!st) {
629 		dev_err(dev, "%s called with NULL driver_data", __func__);
630 		return;
631 	}
632 
633 #if IS_ENABLED(CONFIG_ACPI)
634 	if (has_acpi_companion(dev))
635 		acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle,
636 					   ACPI_DEVICE_NOTIFY,
637 					   vmclock_acpi_notification_handler);
638 #endif
639 
640 	if (st->ptp_clock)
641 		ptp_clock_unregister(st->ptp_clock);
642 
643 	if (st->miscdev.minor != MISC_DYNAMIC_MINOR)
644 		misc_deregister(&st->miscdev);
645 
646 	dev->driver_data = NULL;
647 }
648 
649 static void vmclock_put_idx(void *data)
650 {
651 	struct vmclock_state *st = data;
652 
653 	ida_free(&vmclock_ida, st->index);
654 }
655 
656 static int vmclock_probe(struct platform_device *pdev)
657 {
658 	struct device *dev = &pdev->dev;
659 	struct vmclock_state *st;
660 	int ret;
661 
662 	st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL);
663 	if (!st)
664 		return -ENOMEM;
665 
666 #if IS_ENABLED(CONFIG_ACPI)
667 	if (has_acpi_companion(dev))
668 		ret = vmclock_probe_acpi(dev, st);
669 	else
670 #endif
671 		ret = vmclock_probe_dt(dev, st);
672 
673 	if (ret) {
674 		dev_info(dev, "Failed to obtain physical address: %d\n", ret);
675 		return ret;
676 	}
677 
678 	if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) {
679 		dev_info(dev, "Region too small (0x%llx)\n",
680 			 resource_size(&st->res));
681 		return -EINVAL;
682 	}
683 	st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res),
684 				MEMREMAP_WB | MEMREMAP_DEC);
685 	if (IS_ERR(st->clk)) {
686 		ret = PTR_ERR(st->clk);
687 		dev_info(dev, "failed to map shared memory\n");
688 		st->clk = NULL;
689 		return ret;
690 	}
691 
692 	if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC ||
693 	    le32_to_cpu(st->clk->size) > resource_size(&st->res) ||
694 	    le16_to_cpu(st->clk->version) != 1) {
695 		dev_info(dev, "vmclock magic fields invalid\n");
696 		return -EINVAL;
697 	}
698 
699 	ret = ida_alloc(&vmclock_ida, GFP_KERNEL);
700 	if (ret < 0)
701 		return ret;
702 
703 	st->index = ret;
704 	ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st);
705 	if (ret)
706 		return ret;
707 
708 	st->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "vmclock%d", st->index);
709 	if (!st->name)
710 		return -ENOMEM;
711 
712 	st->miscdev.minor = MISC_DYNAMIC_MINOR;
713 
714 	init_waitqueue_head(&st->disrupt_wait);
715 	dev->driver_data = st;
716 
717 	ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, dev);
718 	if (ret)
719 		return ret;
720 
721 	ret = vmclock_setup_notification(dev, st);
722 	if (ret)
723 		return ret;
724 
725 	/*
726 	 * If the structure is big enough, it can be mapped to userspace.
727 	 * Theoretically a guest OS even using larger pages could still
728 	 * use 4KiB PTEs to map smaller MMIO regions like this, but let's
729 	 * cross that bridge if/when we come to it.
730 	 */
731 	if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) {
732 		st->miscdev.fops = &vmclock_miscdev_fops;
733 		st->miscdev.name = st->name;
734 
735 		ret = misc_register(&st->miscdev);
736 		if (ret)
737 			return ret;
738 	}
739 
740 	/* If there is valid clock information, register a PTP clock */
741 	if (VMCLOCK_FIELD_PRESENT(st->clk, time_frac_sec)) {
742 		/* Can return a silent NULL, or an error. */
743 		st->ptp_clock = vmclock_ptp_register(dev, st);
744 		if (IS_ERR(st->ptp_clock)) {
745 			ret = PTR_ERR(st->ptp_clock);
746 			st->ptp_clock = NULL;
747 			return ret;
748 		}
749 	}
750 
751 	if (!st->miscdev.minor && !st->ptp_clock) {
752 		/* Neither miscdev nor PTP registered */
753 		dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n");
754 		return -ENODEV;
755 	}
756 
757 	dev_info(dev, "%s: registered %s%s%s\n", st->name,
758 		 st->miscdev.minor ? "miscdev" : "",
759 		 (st->miscdev.minor && st->ptp_clock) ? ", " : "",
760 		 st->ptp_clock ? "PTP" : "");
761 
762 	return 0;
763 }
764 
765 static const struct acpi_device_id vmclock_acpi_ids[] = {
766 	{ "AMZNC10C", 0 },
767 	{ "VMCLOCK", 0 },
768 	{}
769 };
770 MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids);
771 
772 static const struct of_device_id vmclock_of_ids[] = {
773 	{ .compatible = "amazon,vmclock", },
774 	{ },
775 };
776 MODULE_DEVICE_TABLE(of, vmclock_of_ids);
777 
778 static struct platform_driver vmclock_platform_driver = {
779 	.probe		= vmclock_probe,
780 	.driver	= {
781 		.name	= "vmclock",
782 		.acpi_match_table = vmclock_acpi_ids,
783 		.of_match_table = vmclock_of_ids,
784 	},
785 };
786 
787 module_platform_driver(vmclock_platform_driver)
788 
789 MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
790 MODULE_DESCRIPTION("PTP clock using VMCLOCK");
791 MODULE_LICENSE("GPL");
792