xref: /linux/drivers/ptp/ptp_vmclock.c (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtual PTP 1588 clock for use with LM-safe VMclock device.
4  *
5  * Copyright © 2024 Amazon.com, Inc. or its affiliates.
6  */
7 
8 #include "linux/poll.h"
9 #include "linux/types.h"
10 #include "linux/wait.h"
11 #include <linux/acpi.h>
12 #include <linux/device.h>
13 #include <linux/err.h>
14 #include <linux/file.h>
15 #include <linux/fs.h>
16 #include <linux/init.h>
17 #include <linux/io.h>
18 #include <linux/interrupt.h>
19 #include <linux/kernel.h>
20 #include <linux/miscdevice.h>
21 #include <linux/mm.h>
22 #include <linux/module.h>
23 #include <linux/of.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 
27 #include <uapi/linux/vmclock-abi.h>
28 
29 #include <linux/ptp_clock_kernel.h>
30 
31 #ifdef CONFIG_X86
32 #include <asm/pvclock.h>
33 #include <asm/kvmclock.h>
34 #endif
35 
36 #ifdef CONFIG_KVM_GUEST
37 #define SUPPORT_KVMCLOCK
38 #endif
39 
40 static DEFINE_IDA(vmclock_ida);
41 
42 ACPI_MODULE_NAME("vmclock");
43 
44 struct vmclock_state {
45 	struct resource res;
46 	struct vmclock_abi *clk;
47 	struct miscdevice miscdev;
48 	wait_queue_head_t disrupt_wait;
49 	struct ptp_clock_info ptp_clock_info;
50 	struct ptp_clock *ptp_clock;
51 	enum clocksource_ids cs_id, sys_cs_id;
52 	int index;
53 	char *name;
54 };
55 
56 #define VMCLOCK_MAX_WAIT ms_to_ktime(100)
57 
58 /* Require at least the flags field to be present. All else can be optional. */
59 #define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad)
60 
61 #define VMCLOCK_FIELD_PRESENT(_c, _f)			  \
62 	(le32_to_cpu((_c)->size) >= (offsetof(struct vmclock_abi, _f) +	\
63 				     sizeof((_c)->_f)))
64 
65 /*
66  * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64
67  * and add the fractional second part of the reference time.
68  *
69  * The result is a 128-bit value, the top 64 bits of which are seconds, and
70  * the low 64 bits are (seconds >> 64).
71  */
72 static uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
73 					uint64_t period, uint8_t shift,
74 					uint64_t frac_sec)
75 {
76 	unsigned __int128 res = (unsigned __int128)delta * period;
77 
78 	res >>= shift;
79 	res += frac_sec;
80 	*res_hi = res >> 64;
81 	return (uint64_t)res;
82 }
83 
84 static bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
85 {
86 	if (clk->time_type == VMCLOCK_TIME_TAI)
87 		return true;
88 
89 	if (clk->time_type == VMCLOCK_TIME_UTC &&
90 	    (le64_to_cpu(clk->flags) & VMCLOCK_FLAG_TAI_OFFSET_VALID)) {
91 		if (sec)
92 			*sec -= (int16_t)le16_to_cpu(clk->tai_offset_sec);
93 		return true;
94 	}
95 	return false;
96 }
97 
98 static int vmclock_get_crosststamp(struct vmclock_state *st,
99 				   struct ptp_system_timestamp *sts,
100 				   struct system_counterval_t *system_counter,
101 				   struct timespec64 *tspec)
102 {
103 	ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
104 	struct system_time_snapshot systime_snapshot;
105 	uint64_t cycle, delta, seq, frac_sec;
106 
107 #ifdef CONFIG_X86
108 	/*
109 	 * We'd expect the hypervisor to know this and to report the clock
110 	 * status as VMCLOCK_STATUS_UNRELIABLE. But be paranoid.
111 	 */
112 	if (check_tsc_unstable())
113 		return -EINVAL;
114 #endif
115 
116 	while (1) {
117 		seq = le32_to_cpu(st->clk->seq_count) & ~1ULL;
118 
119 		/*
120 		 * This pairs with a write barrier in the hypervisor
121 		 * which populates this structure.
122 		 */
123 		virt_rmb();
124 
125 		if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE)
126 			return -EINVAL;
127 
128 		/*
129 		 * When invoked for gettimex64(), fill in the pre/post system
130 		 * times. The simple case is when system time is based on the
131 		 * same counter as st->cs_id, in which case all three times
132 		 * will be derived from the *same* counter value.
133 		 *
134 		 * If the system isn't using the same counter, then the value
135 		 * from ktime_get_snapshot() will still be used as pre_ts, and
136 		 * ptp_read_system_postts() is called to populate postts after
137 		 * calling get_cycles().
138 		 *
139 		 * The conversion to timespec64 happens further down, outside
140 		 * the seq_count loop.
141 		 */
142 		if (sts) {
143 			ktime_get_snapshot(&systime_snapshot);
144 			if (systime_snapshot.cs_id == st->cs_id) {
145 				cycle = systime_snapshot.cycles;
146 			} else {
147 				cycle = get_cycles();
148 				ptp_read_system_postts(sts);
149 			}
150 		} else {
151 			cycle = get_cycles();
152 		}
153 
154 		delta = cycle - le64_to_cpu(st->clk->counter_value);
155 
156 		frac_sec = mul_u64_u64_shr_add_u64(&tspec->tv_sec, delta,
157 						   le64_to_cpu(st->clk->counter_period_frac_sec),
158 						   st->clk->counter_period_shift,
159 						   le64_to_cpu(st->clk->time_frac_sec));
160 		tspec->tv_nsec = mul_u64_u64_shr(frac_sec, NSEC_PER_SEC, 64);
161 		tspec->tv_sec += le64_to_cpu(st->clk->time_sec);
162 
163 		if (!tai_adjust(st->clk, &tspec->tv_sec))
164 			return -EINVAL;
165 
166 		/*
167 		 * This pairs with a write barrier in the hypervisor
168 		 * which populates this structure.
169 		 */
170 		virt_rmb();
171 		if (seq == le32_to_cpu(st->clk->seq_count))
172 			break;
173 
174 		if (ktime_after(ktime_get(), deadline))
175 			return -ETIMEDOUT;
176 	}
177 
178 	if (system_counter) {
179 		system_counter->cycles = cycle;
180 		system_counter->cs_id = st->cs_id;
181 	}
182 
183 	if (sts) {
184 		sts->pre_ts = ktime_to_timespec64(systime_snapshot.real);
185 		if (systime_snapshot.cs_id == st->cs_id)
186 			sts->post_ts = sts->pre_ts;
187 	}
188 
189 	return 0;
190 }
191 
192 #ifdef SUPPORT_KVMCLOCK
193 /*
194  * In the case where the system is using the KVM clock for timekeeping, convert
195  * the TSC value into a KVM clock time in order to return a paired reading that
196  * get_device_system_crosststamp() can cope with.
197  */
198 static int vmclock_get_crosststamp_kvmclock(struct vmclock_state *st,
199 					    struct ptp_system_timestamp *sts,
200 					    struct system_counterval_t *system_counter,
201 					    struct timespec64 *tspec)
202 {
203 	struct pvclock_vcpu_time_info *pvti = this_cpu_pvti();
204 	unsigned int pvti_ver;
205 	int ret;
206 
207 	preempt_disable_notrace();
208 
209 	do {
210 		pvti_ver = pvclock_read_begin(pvti);
211 
212 		ret = vmclock_get_crosststamp(st, sts, system_counter, tspec);
213 		if (ret)
214 			break;
215 
216 		system_counter->cycles = __pvclock_read_cycles(pvti,
217 							       system_counter->cycles);
218 		system_counter->cs_id = CSID_X86_KVM_CLK;
219 
220 		/*
221 		 * This retry should never really happen; if the TSC is
222 		 * stable and reliable enough across vCPUS that it is sane
223 		 * for the hypervisor to expose a VMCLOCK device which uses
224 		 * it as the reference counter, then the KVM clock sohuld be
225 		 * in 'master clock mode' and basically never changed. But
226 		 * the KVM clock is a fickle and often broken thing, so do
227 		 * it "properly" just in case.
228 		 */
229 	} while (pvclock_read_retry(pvti, pvti_ver));
230 
231 	preempt_enable_notrace();
232 
233 	return ret;
234 }
235 #endif
236 
237 static int ptp_vmclock_get_time_fn(ktime_t *device_time,
238 				   struct system_counterval_t *system_counter,
239 				   void *ctx)
240 {
241 	struct vmclock_state *st = ctx;
242 	struct timespec64 tspec;
243 	int ret;
244 
245 #ifdef SUPPORT_KVMCLOCK
246 	if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK)
247 		ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter,
248 						       &tspec);
249 	else
250 #endif
251 		ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec);
252 
253 	if (!ret)
254 		*device_time = timespec64_to_ktime(tspec);
255 
256 	return ret;
257 }
258 
259 static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp,
260 				      struct system_device_crosststamp *xtstamp)
261 {
262 	struct vmclock_state *st = container_of(ptp, struct vmclock_state,
263 						ptp_clock_info);
264 	int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, st,
265 						NULL, xtstamp);
266 #ifdef SUPPORT_KVMCLOCK
267 	/*
268 	 * On x86, the KVM clock may be used for the system time. We can
269 	 * actually convert a TSC reading to that, and return a paired
270 	 * timestamp that get_device_system_crosststamp() *can* handle.
271 	 */
272 	if (ret == -ENODEV) {
273 		struct system_time_snapshot systime_snapshot;
274 
275 		ktime_get_snapshot(&systime_snapshot);
276 
277 		if (systime_snapshot.cs_id == CSID_X86_TSC ||
278 		    systime_snapshot.cs_id == CSID_X86_KVM_CLK) {
279 			WRITE_ONCE(st->sys_cs_id, systime_snapshot.cs_id);
280 			ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn,
281 							    st, NULL, xtstamp);
282 		}
283 	}
284 #endif
285 	return ret;
286 }
287 
288 /*
289  * PTP clock operations
290  */
291 
292 static int ptp_vmclock_adjfine(struct ptp_clock_info *ptp, long delta)
293 {
294 	return -EOPNOTSUPP;
295 }
296 
297 static int ptp_vmclock_adjtime(struct ptp_clock_info *ptp, s64 delta)
298 {
299 	return -EOPNOTSUPP;
300 }
301 
302 static int ptp_vmclock_settime(struct ptp_clock_info *ptp,
303 			   const struct timespec64 *ts)
304 {
305 	return -EOPNOTSUPP;
306 }
307 
308 static int ptp_vmclock_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
309 				struct ptp_system_timestamp *sts)
310 {
311 	struct vmclock_state *st = container_of(ptp, struct vmclock_state,
312 						ptp_clock_info);
313 
314 	return vmclock_get_crosststamp(st, sts, NULL, ts);
315 }
316 
317 static int ptp_vmclock_enable(struct ptp_clock_info *ptp,
318 			  struct ptp_clock_request *rq, int on)
319 {
320 	return -EOPNOTSUPP;
321 }
322 
323 static const struct ptp_clock_info ptp_vmclock_info = {
324 	.owner		= THIS_MODULE,
325 	.max_adj	= 0,
326 	.n_ext_ts	= 0,
327 	.n_pins		= 0,
328 	.pps		= 0,
329 	.adjfine	= ptp_vmclock_adjfine,
330 	.adjtime	= ptp_vmclock_adjtime,
331 	.gettimex64	= ptp_vmclock_gettimex,
332 	.settime64	= ptp_vmclock_settime,
333 	.enable		= ptp_vmclock_enable,
334 	.getcrosststamp = ptp_vmclock_getcrosststamp,
335 };
336 
337 static struct ptp_clock *vmclock_ptp_register(struct device *dev,
338 					      struct vmclock_state *st)
339 {
340 	enum clocksource_ids cs_id;
341 
342 	if (IS_ENABLED(CONFIG_ARM64) &&
343 	    st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) {
344 		/* Can we check it's the virtual counter? */
345 		cs_id = CSID_ARM_ARCH_COUNTER;
346 	} else if (IS_ENABLED(CONFIG_X86) &&
347 		   st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) {
348 		cs_id = CSID_X86_TSC;
349 	} else {
350 		return NULL;
351 	}
352 
353 	/* Accept TAI directly, or UTC with valid offset for conversion to TAI */
354 	if (!tai_adjust(st->clk, NULL)) {
355 		dev_info(dev, "vmclock does not provide unambiguous time\n");
356 		return NULL;
357 	}
358 
359 	st->sys_cs_id = cs_id;
360 	st->cs_id = cs_id;
361 	st->ptp_clock_info = ptp_vmclock_info;
362 	strscpy(st->ptp_clock_info.name, st->name);
363 
364 	return ptp_clock_register(&st->ptp_clock_info, dev);
365 }
366 
367 struct vmclock_file_state {
368 	struct vmclock_state *st;
369 	atomic_t seq;
370 };
371 
372 static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
373 {
374 	struct vmclock_file_state *fst = fp->private_data;
375 	struct vmclock_state *st = fst->st;
376 
377 	if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ)
378 		return -EROFS;
379 
380 	if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff)
381 		return -EINVAL;
382 
383 	if (io_remap_pfn_range(vma, vma->vm_start,
384 			       st->res.start >> PAGE_SHIFT, PAGE_SIZE,
385 			       vma->vm_page_prot))
386 		return -EAGAIN;
387 
388 	return 0;
389 }
390 
391 static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
392 				    size_t count, loff_t *ppos)
393 {
394 	ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
395 	struct vmclock_file_state *fst = fp->private_data;
396 	struct vmclock_state *st = fst->st;
397 	uint32_t seq, old_seq;
398 	size_t max_count;
399 
400 	if (*ppos >= PAGE_SIZE)
401 		return 0;
402 
403 	max_count = PAGE_SIZE - *ppos;
404 	if (count > max_count)
405 		count = max_count;
406 
407 	old_seq = atomic_read(&fst->seq);
408 	while (1) {
409 		seq = le32_to_cpu(st->clk->seq_count) & ~1U;
410 		/* Pairs with hypervisor wmb */
411 		virt_rmb();
412 
413 		if (copy_to_user(buf, ((char *)st->clk) + *ppos, count))
414 			return -EFAULT;
415 
416 		/* Pairs with hypervisor wmb */
417 		virt_rmb();
418 		if (seq == le32_to_cpu(st->clk->seq_count)) {
419 			/*
420 			 * Either we updated fst->seq to seq (the latest version we observed)
421 			 * or someone else did (old_seq == seq), so we can break.
422 			 */
423 			if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) ||
424 			    old_seq == seq) {
425 				break;
426 			}
427 		}
428 
429 		if (ktime_after(ktime_get(), deadline))
430 			return -ETIMEDOUT;
431 	}
432 
433 	*ppos += count;
434 	return count;
435 }
436 
437 static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait)
438 {
439 	struct vmclock_file_state *fst = fp->private_data;
440 	struct vmclock_state *st = fst->st;
441 	uint32_t seq;
442 
443 	/*
444 	 * Hypervisor will not send us any notifications, so fail immediately
445 	 * to avoid having caller sleeping for ever.
446 	 */
447 	if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
448 		return POLLHUP;
449 
450 	poll_wait(fp, &st->disrupt_wait, wait);
451 
452 	seq = le32_to_cpu(st->clk->seq_count);
453 	if (atomic_read(&fst->seq) != seq)
454 		return POLLIN | POLLRDNORM;
455 
456 	return 0;
457 }
458 
459 static int vmclock_miscdev_open(struct inode *inode, struct file *fp)
460 {
461 	struct vmclock_state *st = container_of(fp->private_data,
462 						struct vmclock_state, miscdev);
463 	struct vmclock_file_state *fst = kzalloc_obj(*fst, GFP_KERNEL);
464 
465 	if (!fst)
466 		return -ENOMEM;
467 
468 	fst->st = st;
469 	atomic_set(&fst->seq, 0);
470 
471 	fp->private_data = fst;
472 
473 	return 0;
474 }
475 
476 static int vmclock_miscdev_release(struct inode *inode, struct file *fp)
477 {
478 	kfree(fp->private_data);
479 	return 0;
480 }
481 
482 static const struct file_operations vmclock_miscdev_fops = {
483 	.owner = THIS_MODULE,
484 	.open = vmclock_miscdev_open,
485 	.release = vmclock_miscdev_release,
486 	.mmap = vmclock_miscdev_mmap,
487 	.read = vmclock_miscdev_read,
488 	.poll = vmclock_miscdev_poll,
489 };
490 
491 /* module operations */
492 
493 #if IS_ENABLED(CONFIG_ACPI)
494 static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data)
495 {
496 	struct vmclock_state *st = data;
497 	struct resource_win win;
498 	struct resource *res = &win.res;
499 
500 	if (ares->type == ACPI_RESOURCE_TYPE_END_TAG)
501 		return AE_OK;
502 
503 	/* There can be only one */
504 	if (resource_type(&st->res) == IORESOURCE_MEM)
505 		return AE_ERROR;
506 
507 	if (acpi_dev_resource_memory(ares, res) ||
508 	    acpi_dev_resource_address_space(ares, &win)) {
509 
510 		if (resource_type(res) != IORESOURCE_MEM ||
511 		    resource_size(res) < sizeof(st->clk))
512 			return AE_ERROR;
513 
514 		st->res = *res;
515 		return AE_OK;
516 	}
517 
518 	return AE_ERROR;
519 }
520 
521 static void
522 vmclock_acpi_notification_handler(acpi_handle __always_unused handle,
523 				  u32 __always_unused event, void *dev)
524 {
525 	struct device *device = dev;
526 	struct vmclock_state *st = device->driver_data;
527 
528 	wake_up_interruptible(&st->disrupt_wait);
529 }
530 
531 static int vmclock_setup_acpi_notification(struct device *dev)
532 {
533 	struct acpi_device *adev = ACPI_COMPANION(dev);
534 	acpi_status status;
535 
536 	/*
537 	 * This should never happen as this function is only called when
538 	 * has_acpi_companion(dev) is true, but the logic is sufficiently
539 	 * complex that Coverity can't see the tautology.
540 	 */
541 	if (!adev)
542 		return -ENODEV;
543 
544 	status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
545 					     vmclock_acpi_notification_handler,
546 					     dev);
547 	if (ACPI_FAILURE(status)) {
548 		dev_err(dev, "failed to install notification handler");
549 		return -ENODEV;
550 	}
551 
552 	return 0;
553 }
554 
555 static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st)
556 {
557 	struct acpi_device *adev = ACPI_COMPANION(dev);
558 	acpi_status status;
559 
560 	/*
561 	 * This should never happen as this function is only called when
562 	 * has_acpi_companion(dev) is true, but the logic is sufficiently
563 	 * complex that Coverity can't see the tautology.
564 	 */
565 	if (!adev)
566 		return -ENODEV;
567 
568 	status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
569 				     vmclock_acpi_resources, st);
570 	if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
571 		dev_err(dev, "failed to get resources\n");
572 		return -ENODEV;
573 	}
574 
575 	return 0;
576 }
577 #endif /* CONFIG_ACPI */
578 
579 static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *_st)
580 {
581 	struct vmclock_state *st = _st;
582 
583 	wake_up_interruptible(&st->disrupt_wait);
584 	return IRQ_HANDLED;
585 }
586 
587 static int vmclock_probe_dt(struct device *dev, struct vmclock_state *st)
588 {
589 	struct platform_device *pdev = to_platform_device(dev);
590 	struct resource *res;
591 
592 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
593 	if (!res)
594 		return -ENODEV;
595 
596 	st->res = *res;
597 
598 	return 0;
599 }
600 
601 static int vmclock_setup_of_notification(struct device *dev)
602 {
603 	struct platform_device *pdev = to_platform_device(dev);
604 	int irq;
605 
606 	irq = platform_get_irq(pdev, 0);
607 	if (irq < 0)
608 		return irq;
609 
610 	return devm_request_irq(dev, irq, vmclock_of_irq_handler, IRQF_SHARED,
611 				"vmclock", dev->driver_data);
612 }
613 
614 static int vmclock_setup_notification(struct device *dev,
615 				      struct vmclock_state *st)
616 {
617 	/* The device does not support notifications. Nothing else to do */
618 	if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
619 		return 0;
620 
621 #if IS_ENABLED(CONFIG_ACPI)
622 	if (has_acpi_companion(dev))
623 		return vmclock_setup_acpi_notification(dev);
624 #endif
625 	return vmclock_setup_of_notification(dev);
626 }
627 
628 static void vmclock_remove(void *data)
629 {
630 	struct device *dev = data;
631 	struct vmclock_state *st = dev->driver_data;
632 
633 	if (!st) {
634 		dev_err(dev, "%s called with NULL driver_data", __func__);
635 		return;
636 	}
637 
638 #if IS_ENABLED(CONFIG_ACPI)
639 	if (has_acpi_companion(dev))
640 		acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle,
641 					   ACPI_DEVICE_NOTIFY,
642 					   vmclock_acpi_notification_handler);
643 #endif
644 
645 	if (st->ptp_clock)
646 		ptp_clock_unregister(st->ptp_clock);
647 
648 	if (st->miscdev.minor != MISC_DYNAMIC_MINOR)
649 		misc_deregister(&st->miscdev);
650 
651 	dev->driver_data = NULL;
652 }
653 
654 static void vmclock_put_idx(void *data)
655 {
656 	struct vmclock_state *st = data;
657 
658 	ida_free(&vmclock_ida, st->index);
659 }
660 
661 static int vmclock_probe(struct platform_device *pdev)
662 {
663 	struct device *dev = &pdev->dev;
664 	struct vmclock_state *st;
665 	int ret;
666 
667 	st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL);
668 	if (!st)
669 		return -ENOMEM;
670 
671 #if IS_ENABLED(CONFIG_ACPI)
672 	if (has_acpi_companion(dev))
673 		ret = vmclock_probe_acpi(dev, st);
674 	else
675 #endif
676 		ret = vmclock_probe_dt(dev, st);
677 
678 	if (ret) {
679 		dev_info(dev, "Failed to obtain physical address: %d\n", ret);
680 		return ret;
681 	}
682 
683 	if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) {
684 		dev_info(dev, "Region too small (0x%llx)\n",
685 			 resource_size(&st->res));
686 		return -EINVAL;
687 	}
688 	st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res),
689 				MEMREMAP_WB | MEMREMAP_DEC);
690 	if (IS_ERR(st->clk)) {
691 		ret = PTR_ERR(st->clk);
692 		dev_info(dev, "failed to map shared memory\n");
693 		st->clk = NULL;
694 		return ret;
695 	}
696 
697 	if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC ||
698 	    le32_to_cpu(st->clk->size) > resource_size(&st->res) ||
699 	    le16_to_cpu(st->clk->version) != 1) {
700 		dev_info(dev, "vmclock magic fields invalid\n");
701 		return -EINVAL;
702 	}
703 
704 	ret = ida_alloc(&vmclock_ida, GFP_KERNEL);
705 	if (ret < 0)
706 		return ret;
707 
708 	st->index = ret;
709 	ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st);
710 	if (ret)
711 		return ret;
712 
713 	st->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "vmclock%d", st->index);
714 	if (!st->name)
715 		return -ENOMEM;
716 
717 	st->miscdev.minor = MISC_DYNAMIC_MINOR;
718 
719 	init_waitqueue_head(&st->disrupt_wait);
720 	dev->driver_data = st;
721 
722 	ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, dev);
723 	if (ret)
724 		return ret;
725 
726 	ret = vmclock_setup_notification(dev, st);
727 	if (ret)
728 		return ret;
729 
730 	/*
731 	 * If the structure is big enough, it can be mapped to userspace.
732 	 * Theoretically a guest OS even using larger pages could still
733 	 * use 4KiB PTEs to map smaller MMIO regions like this, but let's
734 	 * cross that bridge if/when we come to it.
735 	 */
736 	if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) {
737 		st->miscdev.fops = &vmclock_miscdev_fops;
738 		st->miscdev.name = st->name;
739 
740 		ret = misc_register(&st->miscdev);
741 		if (ret)
742 			return ret;
743 	}
744 
745 	/* If there is valid clock information, register a PTP clock */
746 	if (VMCLOCK_FIELD_PRESENT(st->clk, time_frac_sec)) {
747 		/* Can return a silent NULL, or an error. */
748 		st->ptp_clock = vmclock_ptp_register(dev, st);
749 		if (IS_ERR(st->ptp_clock)) {
750 			ret = PTR_ERR(st->ptp_clock);
751 			st->ptp_clock = NULL;
752 			return ret;
753 		}
754 	}
755 
756 	if (!st->miscdev.minor && !st->ptp_clock) {
757 		/* Neither miscdev nor PTP registered */
758 		dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n");
759 		return -ENODEV;
760 	}
761 
762 	dev_info(dev, "%s: registered %s%s%s\n", st->name,
763 		 st->miscdev.minor ? "miscdev" : "",
764 		 (st->miscdev.minor && st->ptp_clock) ? ", " : "",
765 		 st->ptp_clock ? "PTP" : "");
766 
767 	return 0;
768 }
769 
770 static const struct acpi_device_id vmclock_acpi_ids[] = {
771 	{ "AMZNC10C", 0 },
772 	{ "VMCLOCK", 0 },
773 	{}
774 };
775 MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids);
776 
777 static const struct of_device_id vmclock_of_ids[] = {
778 	{ .compatible = "amazon,vmclock", },
779 	{ },
780 };
781 MODULE_DEVICE_TABLE(of, vmclock_of_ids);
782 
783 static struct platform_driver vmclock_platform_driver = {
784 	.probe		= vmclock_probe,
785 	.driver	= {
786 		.name	= "vmclock",
787 		.acpi_match_table = vmclock_acpi_ids,
788 		.of_match_table = vmclock_of_ids,
789 	},
790 };
791 
792 module_platform_driver(vmclock_platform_driver)
793 
794 MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
795 MODULE_DESCRIPTION("PTP clock using VMCLOCK");
796 MODULE_LICENSE("GPL");
797