xref: /linux/tools/testing/selftests/mm/hmm-tests.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * HMM stands for Heterogeneous Memory Management, it is a helper layer inside
4  * the linux kernel to help device drivers mirror a process address space in
5  * the device. This allows the device to use the same address space which
6  * makes communication and data exchange a lot easier.
7  *
8  * This framework's sole purpose is to exercise various code paths inside
9  * the kernel to make sure that HMM performs as expected and to flush out any
10  * bugs.
11  */
12 
13 #include "kselftest_harness.h"
14 
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <stdint.h>
20 #include <unistd.h>
21 #include <strings.h>
22 #include <time.h>
23 #include <pthread.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <sys/mman.h>
27 #include <sys/ioctl.h>
28 #include <sys/time.h>
29 
30 
31 /*
32  * This is a private UAPI to the kernel test module so it isn't exported
33  * in the usual include/uapi/... directory.
34  */
35 #include <lib/test_hmm_uapi.h>
36 #include <mm/gup_test.h>
37 #include <mm/vm_util.h>
38 
39 struct hmm_buffer {
40 	void		*ptr;
41 	void		*mirror;
42 	unsigned long	size;
43 	int		fd;
44 	uint64_t	cpages;
45 	uint64_t	faults;
46 };
47 
48 enum {
49 	HMM_PRIVATE_DEVICE_ONE,
50 	HMM_PRIVATE_DEVICE_TWO,
51 	HMM_COHERENCE_DEVICE_ONE,
52 	HMM_COHERENCE_DEVICE_TWO,
53 };
54 
55 #define ONEKB		(1 << 10)
56 #define ONEMEG		(1 << 20)
57 #define TWOMEG		(1 << 21)
58 #define HMM_BUFFER_SIZE (1024 << 12)
59 #define HMM_PATH_MAX    64
60 #define NTIMES		10
61 
62 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
63 /* Just the flags we need, copied from mm.h: */
64 
65 #ifndef FOLL_WRITE
66 #define FOLL_WRITE	0x01	/* check pte is writable */
67 #endif
68 
69 #ifndef FOLL_LONGTERM
70 #define FOLL_LONGTERM   0x100 /* mapping lifetime is indefinite */
71 #endif
72 FIXTURE(hmm)
73 {
74 	int		fd;
75 	unsigned int	page_size;
76 	unsigned int	page_shift;
77 };
78 
79 FIXTURE_VARIANT(hmm)
80 {
81 	int     device_number;
82 };
83 
84 FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
85 {
86 	.device_number = HMM_PRIVATE_DEVICE_ONE,
87 };
88 
89 FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
90 {
91 	.device_number = HMM_COHERENCE_DEVICE_ONE,
92 };
93 
94 FIXTURE(hmm2)
95 {
96 	int		fd0;
97 	int		fd1;
98 	unsigned int	page_size;
99 	unsigned int	page_shift;
100 };
101 
102 FIXTURE_VARIANT(hmm2)
103 {
104 	int     device_number0;
105 	int     device_number1;
106 };
107 
108 FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
109 {
110 	.device_number0 = HMM_PRIVATE_DEVICE_ONE,
111 	.device_number1 = HMM_PRIVATE_DEVICE_TWO,
112 };
113 
114 FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
115 {
116 	.device_number0 = HMM_COHERENCE_DEVICE_ONE,
117 	.device_number1 = HMM_COHERENCE_DEVICE_TWO,
118 };
119 
120 static int hmm_open(int unit)
121 {
122 	char pathname[HMM_PATH_MAX];
123 	int fd;
124 
125 	snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit);
126 	fd = open(pathname, O_RDWR, 0);
127 	if (fd < 0)
128 		fprintf(stderr, "could not open hmm dmirror driver (%s)\n",
129 			pathname);
130 	return fd;
131 }
132 
133 static bool hmm_is_coherent_type(int dev_num)
134 {
135 	return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
136 }
137 
138 FIXTURE_SETUP(hmm)
139 {
140 	self->page_size = sysconf(_SC_PAGE_SIZE);
141 	self->page_shift = ffs(self->page_size) - 1;
142 
143 	self->fd = hmm_open(variant->device_number);
144 	if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
145 		SKIP(return, "DEVICE_COHERENT not available");
146 	ASSERT_GE(self->fd, 0);
147 }
148 
149 FIXTURE_SETUP(hmm2)
150 {
151 	self->page_size = sysconf(_SC_PAGE_SIZE);
152 	self->page_shift = ffs(self->page_size) - 1;
153 
154 	self->fd0 = hmm_open(variant->device_number0);
155 	if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
156 		SKIP(return, "DEVICE_COHERENT not available");
157 	ASSERT_GE(self->fd0, 0);
158 	self->fd1 = hmm_open(variant->device_number1);
159 	ASSERT_GE(self->fd1, 0);
160 }
161 
162 FIXTURE_TEARDOWN(hmm)
163 {
164 	int ret = close(self->fd);
165 
166 	ASSERT_EQ(ret, 0);
167 	self->fd = -1;
168 }
169 
170 FIXTURE_TEARDOWN(hmm2)
171 {
172 	int ret = close(self->fd0);
173 
174 	ASSERT_EQ(ret, 0);
175 	self->fd0 = -1;
176 
177 	ret = close(self->fd1);
178 	ASSERT_EQ(ret, 0);
179 	self->fd1 = -1;
180 }
181 
182 static int hmm_dmirror_cmd(int fd,
183 			   unsigned long request,
184 			   struct hmm_buffer *buffer,
185 			   unsigned long npages)
186 {
187 	struct hmm_dmirror_cmd cmd;
188 	int ret;
189 
190 	/* Simulate a device reading system memory. */
191 	cmd.addr = (__u64)buffer->ptr;
192 	cmd.ptr = (__u64)buffer->mirror;
193 	cmd.npages = npages;
194 
195 	for (;;) {
196 		ret = ioctl(fd, request, &cmd);
197 		if (ret == 0)
198 			break;
199 		if (errno == EINTR)
200 			continue;
201 		return -errno;
202 	}
203 	buffer->cpages = cmd.cpages;
204 	buffer->faults = cmd.faults;
205 
206 	return 0;
207 }
208 
209 static void hmm_buffer_free(struct hmm_buffer *buffer)
210 {
211 	if (buffer == NULL)
212 		return;
213 
214 	if (buffer->ptr) {
215 		munmap(buffer->ptr, buffer->size);
216 		buffer->ptr = NULL;
217 	}
218 	free(buffer->mirror);
219 	free(buffer);
220 }
221 
222 /*
223  * Create a temporary file that will be deleted on close.
224  */
225 static int hmm_create_file(unsigned long size)
226 {
227 	char path[HMM_PATH_MAX];
228 	int fd;
229 
230 	strcpy(path, "/tmp");
231 	fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600);
232 	if (fd >= 0) {
233 		int r;
234 
235 		do {
236 			r = ftruncate(fd, size);
237 		} while (r == -1 && errno == EINTR);
238 		if (!r)
239 			return fd;
240 		close(fd);
241 	}
242 	return -1;
243 }
244 
245 /*
246  * Return a random unsigned number.
247  */
248 static unsigned int hmm_random(void)
249 {
250 	static int fd = -1;
251 	unsigned int r;
252 
253 	if (fd < 0) {
254 		fd = open("/dev/urandom", O_RDONLY);
255 		if (fd < 0) {
256 			fprintf(stderr, "%s:%d failed to open /dev/urandom\n",
257 					__FILE__, __LINE__);
258 			return ~0U;
259 		}
260 	}
261 	read(fd, &r, sizeof(r));
262 	return r;
263 }
264 
265 static void hmm_nanosleep(unsigned int n)
266 {
267 	struct timespec t;
268 
269 	t.tv_sec = 0;
270 	t.tv_nsec = n;
271 	nanosleep(&t, NULL);
272 }
273 
274 static int hmm_migrate_sys_to_dev(int fd,
275 				   struct hmm_buffer *buffer,
276 				   unsigned long npages)
277 {
278 	return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
279 }
280 
281 static int hmm_migrate_dev_to_sys(int fd,
282 				   struct hmm_buffer *buffer,
283 				   unsigned long npages)
284 {
285 	return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
286 }
287 
288 /*
289  * Simple NULL test of device open/close.
290  */
291 TEST_F(hmm, open_close)
292 {
293 }
294 
295 /*
296  * Read private anonymous memory.
297  */
298 TEST_F(hmm, anon_read)
299 {
300 	struct hmm_buffer *buffer;
301 	unsigned long npages;
302 	unsigned long size;
303 	unsigned long i;
304 	int *ptr;
305 	int ret;
306 	int val;
307 
308 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
309 	ASSERT_NE(npages, 0);
310 	size = npages << self->page_shift;
311 
312 	buffer = malloc(sizeof(*buffer));
313 	ASSERT_NE(buffer, NULL);
314 
315 	buffer->fd = -1;
316 	buffer->size = size;
317 	buffer->mirror = malloc(size);
318 	ASSERT_NE(buffer->mirror, NULL);
319 
320 	buffer->ptr = mmap(NULL, size,
321 			   PROT_READ | PROT_WRITE,
322 			   MAP_PRIVATE | MAP_ANONYMOUS,
323 			   buffer->fd, 0);
324 	ASSERT_NE(buffer->ptr, MAP_FAILED);
325 
326 	/*
327 	 * Initialize buffer in system memory but leave the first two pages
328 	 * zero (pte_none and pfn_zero).
329 	 */
330 	i = 2 * self->page_size / sizeof(*ptr);
331 	for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
332 		ptr[i] = i;
333 
334 	/* Set buffer permission to read-only. */
335 	ret = mprotect(buffer->ptr, size, PROT_READ);
336 	ASSERT_EQ(ret, 0);
337 
338 	/* Populate the CPU page table with a special zero page. */
339 	val = *(int *)(buffer->ptr + self->page_size);
340 	ASSERT_EQ(val, 0);
341 
342 	/* Simulate a device reading system memory. */
343 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
344 	ASSERT_EQ(ret, 0);
345 	ASSERT_EQ(buffer->cpages, npages);
346 	ASSERT_EQ(buffer->faults, 1);
347 
348 	/* Check what the device read. */
349 	ptr = buffer->mirror;
350 	for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i)
351 		ASSERT_EQ(ptr[i], 0);
352 	for (; i < size / sizeof(*ptr); ++i)
353 		ASSERT_EQ(ptr[i], i);
354 
355 	hmm_buffer_free(buffer);
356 }
357 
358 /*
359  * Read private anonymous memory which has been protected with
360  * mprotect() PROT_NONE.
361  */
362 TEST_F(hmm, anon_read_prot)
363 {
364 	struct hmm_buffer *buffer;
365 	unsigned long npages;
366 	unsigned long size;
367 	unsigned long i;
368 	int *ptr;
369 	int ret;
370 
371 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
372 	ASSERT_NE(npages, 0);
373 	size = npages << self->page_shift;
374 
375 	buffer = malloc(sizeof(*buffer));
376 	ASSERT_NE(buffer, NULL);
377 
378 	buffer->fd = -1;
379 	buffer->size = size;
380 	buffer->mirror = malloc(size);
381 	ASSERT_NE(buffer->mirror, NULL);
382 
383 	buffer->ptr = mmap(NULL, size,
384 			   PROT_READ | PROT_WRITE,
385 			   MAP_PRIVATE | MAP_ANONYMOUS,
386 			   buffer->fd, 0);
387 	ASSERT_NE(buffer->ptr, MAP_FAILED);
388 
389 	/* Initialize buffer in system memory. */
390 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
391 		ptr[i] = i;
392 
393 	/* Initialize mirror buffer so we can verify it isn't written. */
394 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
395 		ptr[i] = -i;
396 
397 	/* Protect buffer from reading. */
398 	ret = mprotect(buffer->ptr, size, PROT_NONE);
399 	ASSERT_EQ(ret, 0);
400 
401 	/* Simulate a device reading system memory. */
402 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
403 	ASSERT_EQ(ret, -EFAULT);
404 
405 	/* Allow CPU to read the buffer so we can check it. */
406 	ret = mprotect(buffer->ptr, size, PROT_READ);
407 	ASSERT_EQ(ret, 0);
408 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
409 		ASSERT_EQ(ptr[i], i);
410 
411 	/* Check what the device read. */
412 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
413 		ASSERT_EQ(ptr[i], -i);
414 
415 	hmm_buffer_free(buffer);
416 }
417 
418 /*
419  * Write private anonymous memory.
420  */
421 TEST_F(hmm, anon_write)
422 {
423 	struct hmm_buffer *buffer;
424 	unsigned long npages;
425 	unsigned long size;
426 	unsigned long i;
427 	int *ptr;
428 	int ret;
429 
430 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
431 	ASSERT_NE(npages, 0);
432 	size = npages << self->page_shift;
433 
434 	buffer = malloc(sizeof(*buffer));
435 	ASSERT_NE(buffer, NULL);
436 
437 	buffer->fd = -1;
438 	buffer->size = size;
439 	buffer->mirror = malloc(size);
440 	ASSERT_NE(buffer->mirror, NULL);
441 
442 	buffer->ptr = mmap(NULL, size,
443 			   PROT_READ | PROT_WRITE,
444 			   MAP_PRIVATE | MAP_ANONYMOUS,
445 			   buffer->fd, 0);
446 	ASSERT_NE(buffer->ptr, MAP_FAILED);
447 
448 	/* Initialize data that the device will write to buffer->ptr. */
449 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
450 		ptr[i] = i;
451 
452 	/* Simulate a device writing system memory. */
453 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
454 	ASSERT_EQ(ret, 0);
455 	ASSERT_EQ(buffer->cpages, npages);
456 	ASSERT_EQ(buffer->faults, 1);
457 
458 	/* Check what the device wrote. */
459 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
460 		ASSERT_EQ(ptr[i], i);
461 
462 	hmm_buffer_free(buffer);
463 }
464 
465 /*
466  * Write private anonymous memory which has been protected with
467  * mprotect() PROT_READ.
468  */
469 TEST_F(hmm, anon_write_prot)
470 {
471 	struct hmm_buffer *buffer;
472 	unsigned long npages;
473 	unsigned long size;
474 	unsigned long i;
475 	int *ptr;
476 	int ret;
477 
478 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
479 	ASSERT_NE(npages, 0);
480 	size = npages << self->page_shift;
481 
482 	buffer = malloc(sizeof(*buffer));
483 	ASSERT_NE(buffer, NULL);
484 
485 	buffer->fd = -1;
486 	buffer->size = size;
487 	buffer->mirror = malloc(size);
488 	ASSERT_NE(buffer->mirror, NULL);
489 
490 	buffer->ptr = mmap(NULL, size,
491 			   PROT_READ,
492 			   MAP_PRIVATE | MAP_ANONYMOUS,
493 			   buffer->fd, 0);
494 	ASSERT_NE(buffer->ptr, MAP_FAILED);
495 
496 	/* Simulate a device reading a zero page of memory. */
497 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1);
498 	ASSERT_EQ(ret, 0);
499 	ASSERT_EQ(buffer->cpages, 1);
500 	ASSERT_EQ(buffer->faults, 1);
501 
502 	/* Initialize data that the device will write to buffer->ptr. */
503 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
504 		ptr[i] = i;
505 
506 	/* Simulate a device writing system memory. */
507 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
508 	ASSERT_EQ(ret, -EPERM);
509 
510 	/* Check what the device wrote. */
511 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
512 		ASSERT_EQ(ptr[i], 0);
513 
514 	/* Now allow writing and see that the zero page is replaced. */
515 	ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ);
516 	ASSERT_EQ(ret, 0);
517 
518 	/* Simulate a device writing system memory. */
519 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
520 	ASSERT_EQ(ret, 0);
521 	ASSERT_EQ(buffer->cpages, npages);
522 	ASSERT_EQ(buffer->faults, 1);
523 
524 	/* Check what the device wrote. */
525 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
526 		ASSERT_EQ(ptr[i], i);
527 
528 	hmm_buffer_free(buffer);
529 }
530 
531 /*
532  * Check that a device writing an anonymous private mapping
533  * will copy-on-write if a child process inherits the mapping.
534  *
535  * Also verifies after fork() memory the device can be read by child.
536  */
537 TEST_F(hmm, anon_write_child)
538 {
539 	struct hmm_buffer *buffer;
540 	unsigned long npages;
541 	unsigned long size;
542 	unsigned long i;
543 	void *old_ptr;
544 	void *map;
545 	int *ptr;
546 	pid_t pid;
547 	int child_fd;
548 	int ret, use_thp, migrate;
549 
550 	for (migrate = 0; migrate < 2; ++migrate) {
551 		for (use_thp = 0; use_thp < 2; ++use_thp) {
552 			npages = ALIGN(use_thp ? read_pmd_pagesize() : HMM_BUFFER_SIZE,
553 				       self->page_size) >> self->page_shift;
554 			ASSERT_NE(npages, 0);
555 			size = npages << self->page_shift;
556 
557 			buffer = malloc(sizeof(*buffer));
558 			ASSERT_NE(buffer, NULL);
559 
560 			buffer->fd = -1;
561 			buffer->size = size * 2;
562 			buffer->mirror = malloc(size);
563 			ASSERT_NE(buffer->mirror, NULL);
564 
565 			buffer->ptr = mmap(NULL, size * 2,
566 					   PROT_READ | PROT_WRITE,
567 					   MAP_PRIVATE | MAP_ANONYMOUS,
568 					   buffer->fd, 0);
569 			ASSERT_NE(buffer->ptr, MAP_FAILED);
570 
571 			old_ptr = buffer->ptr;
572 			if (use_thp) {
573 				map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
574 				ret = madvise(map, size, MADV_HUGEPAGE);
575 				ASSERT_EQ(ret, 0);
576 				buffer->ptr = map;
577 			}
578 
579 			/* Initialize buffer->ptr so we can tell if it is written. */
580 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
581 				ptr[i] = i;
582 
583 			/* Initialize data that the device will write to buffer->ptr. */
584 			for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
585 				ptr[i] = -i;
586 
587 			if (migrate) {
588 				ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
589 				ASSERT_EQ(ret, 0);
590 				ASSERT_EQ(buffer->cpages, npages);
591 
592 			}
593 
594 			pid = fork();
595 			if (pid == -1)
596 				ASSERT_EQ(pid, 0);
597 			if (pid != 0) {
598 				waitpid(pid, &ret, 0);
599 				ASSERT_EQ(WIFEXITED(ret), 1);
600 
601 				/* Check that the parent's buffer did not change. */
602 				for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
603 					ASSERT_EQ(ptr[i], i);
604 
605 				buffer->ptr = old_ptr;
606 				hmm_buffer_free(buffer);
607 				continue;
608 			}
609 
610 			/* Check that we see the parent's values. */
611 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
612 				ASSERT_EQ(ptr[i], i);
613 			if (!migrate) {
614 				for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
615 					ASSERT_EQ(ptr[i], -i);
616 			}
617 
618 			/* The child process needs its own mirror to its own mm. */
619 			child_fd = hmm_open(0);
620 			ASSERT_GE(child_fd, 0);
621 
622 			/* Simulate a device writing system memory. */
623 			ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
624 			ASSERT_EQ(ret, 0);
625 			ASSERT_EQ(buffer->cpages, npages);
626 			ASSERT_EQ(buffer->faults, 1);
627 
628 			/* Check what the device wrote. */
629 			if (!migrate) {
630 				for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
631 					ASSERT_EQ(ptr[i], -i);
632 			}
633 
634 			close(child_fd);
635 			exit(0);
636 		}
637 	}
638 }
639 
640 /*
641  * Check that a device writing an anonymous shared mapping
642  * will not copy-on-write if a child process inherits the mapping.
643  */
644 TEST_F(hmm, anon_write_child_shared)
645 {
646 	struct hmm_buffer *buffer;
647 	unsigned long npages;
648 	unsigned long size;
649 	unsigned long i;
650 	int *ptr;
651 	pid_t pid;
652 	int child_fd;
653 	int ret;
654 
655 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
656 	ASSERT_NE(npages, 0);
657 	size = npages << self->page_shift;
658 
659 	buffer = malloc(sizeof(*buffer));
660 	ASSERT_NE(buffer, NULL);
661 
662 	buffer->fd = -1;
663 	buffer->size = size;
664 	buffer->mirror = malloc(size);
665 	ASSERT_NE(buffer->mirror, NULL);
666 
667 	buffer->ptr = mmap(NULL, size,
668 			   PROT_READ | PROT_WRITE,
669 			   MAP_SHARED | MAP_ANONYMOUS,
670 			   buffer->fd, 0);
671 	ASSERT_NE(buffer->ptr, MAP_FAILED);
672 
673 	/* Initialize buffer->ptr so we can tell if it is written. */
674 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
675 		ptr[i] = i;
676 
677 	/* Initialize data that the device will write to buffer->ptr. */
678 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
679 		ptr[i] = -i;
680 
681 	pid = fork();
682 	if (pid == -1)
683 		ASSERT_EQ(pid, 0);
684 	if (pid != 0) {
685 		waitpid(pid, &ret, 0);
686 		ASSERT_EQ(WIFEXITED(ret), 1);
687 
688 		/* Check that the parent's buffer did change. */
689 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
690 			ASSERT_EQ(ptr[i], -i);
691 		return;
692 	}
693 
694 	/* Check that we see the parent's values. */
695 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
696 		ASSERT_EQ(ptr[i], i);
697 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
698 		ASSERT_EQ(ptr[i], -i);
699 
700 	/* The child process needs its own mirror to its own mm. */
701 	child_fd = hmm_open(0);
702 	ASSERT_GE(child_fd, 0);
703 
704 	/* Simulate a device writing system memory. */
705 	ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
706 	ASSERT_EQ(ret, 0);
707 	ASSERT_EQ(buffer->cpages, npages);
708 	ASSERT_EQ(buffer->faults, 1);
709 
710 	/* Check what the device wrote. */
711 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
712 		ASSERT_EQ(ptr[i], -i);
713 
714 	close(child_fd);
715 	exit(0);
716 }
717 
718 /*
719  * Write private anonymous huge page.
720  */
721 TEST_F(hmm, anon_write_huge)
722 {
723 	struct hmm_buffer *buffer;
724 	unsigned long npages;
725 	unsigned long size;
726 	unsigned long i;
727 	void *old_ptr;
728 	void *map;
729 	int *ptr;
730 	int ret;
731 
732 	size = 2 * read_pmd_pagesize();
733 
734 	buffer = malloc(sizeof(*buffer));
735 	ASSERT_NE(buffer, NULL);
736 
737 	buffer->fd = -1;
738 	buffer->size = size;
739 	buffer->mirror = malloc(size);
740 	ASSERT_NE(buffer->mirror, NULL);
741 
742 	buffer->ptr = mmap(NULL, size,
743 			   PROT_READ | PROT_WRITE,
744 			   MAP_PRIVATE | MAP_ANONYMOUS,
745 			   buffer->fd, 0);
746 	ASSERT_NE(buffer->ptr, MAP_FAILED);
747 
748 	size /= 2;
749 	npages = size >> self->page_shift;
750 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
751 	ret = madvise(map, size, MADV_HUGEPAGE);
752 	ASSERT_EQ(ret, 0);
753 	old_ptr = buffer->ptr;
754 	buffer->ptr = map;
755 
756 	/* Initialize data that the device will write to buffer->ptr. */
757 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
758 		ptr[i] = i;
759 
760 	/* Simulate a device writing system memory. */
761 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
762 	ASSERT_EQ(ret, 0);
763 	ASSERT_EQ(buffer->cpages, npages);
764 	ASSERT_EQ(buffer->faults, 1);
765 
766 	/* Check what the device wrote. */
767 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
768 		ASSERT_EQ(ptr[i], i);
769 
770 	buffer->ptr = old_ptr;
771 	hmm_buffer_free(buffer);
772 }
773 
774 /*
775  * Write huge TLBFS page.
776  */
777 TEST_F(hmm, anon_write_hugetlbfs)
778 {
779 	struct hmm_buffer *buffer;
780 	unsigned long npages;
781 	unsigned long size;
782 	unsigned long default_hsize = default_huge_page_size();
783 	unsigned long i;
784 	int *ptr;
785 	int ret;
786 
787 	if (!default_hsize)
788 		SKIP(return, "Huge page size could not be determined");
789 
790 	size = ALIGN(TWOMEG, default_hsize);
791 	npages = size >> self->page_shift;
792 
793 	buffer = malloc(sizeof(*buffer));
794 	ASSERT_NE(buffer, NULL);
795 
796 	buffer->ptr = mmap(NULL, size,
797 				   PROT_READ | PROT_WRITE,
798 				   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
799 				   -1, 0);
800 	if (buffer->ptr == MAP_FAILED) {
801 		free(buffer);
802 		SKIP(return, "Huge page could not be allocated");
803 	}
804 
805 	buffer->fd = -1;
806 	buffer->size = size;
807 	buffer->mirror = malloc(size);
808 	ASSERT_NE(buffer->mirror, NULL);
809 
810 	/* Initialize data that the device will write to buffer->ptr. */
811 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
812 		ptr[i] = i;
813 
814 	/* Simulate a device writing system memory. */
815 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
816 	ASSERT_EQ(ret, 0);
817 	ASSERT_EQ(buffer->cpages, npages);
818 	ASSERT_EQ(buffer->faults, 1);
819 
820 	/* Check what the device wrote. */
821 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
822 		ASSERT_EQ(ptr[i], i);
823 
824 	munmap(buffer->ptr, buffer->size);
825 	buffer->ptr = NULL;
826 	hmm_buffer_free(buffer);
827 }
828 
829 /*
830  * Read mmap'ed file memory.
831  */
832 TEST_F(hmm, file_read)
833 {
834 	struct hmm_buffer *buffer;
835 	unsigned long npages;
836 	unsigned long size;
837 	unsigned long i;
838 	int *ptr;
839 	int ret;
840 	int fd;
841 	ssize_t len;
842 
843 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
844 	ASSERT_NE(npages, 0);
845 	size = npages << self->page_shift;
846 
847 	fd = hmm_create_file(size);
848 	ASSERT_GE(fd, 0);
849 
850 	buffer = malloc(sizeof(*buffer));
851 	ASSERT_NE(buffer, NULL);
852 
853 	buffer->fd = fd;
854 	buffer->size = size;
855 	buffer->mirror = malloc(size);
856 	ASSERT_NE(buffer->mirror, NULL);
857 
858 	/* Write initial contents of the file. */
859 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
860 		ptr[i] = i;
861 	len = pwrite(fd, buffer->mirror, size, 0);
862 	ASSERT_EQ(len, size);
863 	memset(buffer->mirror, 0, size);
864 
865 	buffer->ptr = mmap(NULL, size,
866 			   PROT_READ,
867 			   MAP_SHARED,
868 			   buffer->fd, 0);
869 	ASSERT_NE(buffer->ptr, MAP_FAILED);
870 
871 	/* Simulate a device reading system memory. */
872 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
873 	ASSERT_EQ(ret, 0);
874 	ASSERT_EQ(buffer->cpages, npages);
875 	ASSERT_EQ(buffer->faults, 1);
876 
877 	/* Check what the device read. */
878 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
879 		ASSERT_EQ(ptr[i], i);
880 
881 	hmm_buffer_free(buffer);
882 }
883 
884 /*
885  * Write mmap'ed file memory.
886  */
887 TEST_F(hmm, file_write)
888 {
889 	struct hmm_buffer *buffer;
890 	unsigned long npages;
891 	unsigned long size;
892 	unsigned long i;
893 	int *ptr;
894 	int ret;
895 	int fd;
896 	ssize_t len;
897 
898 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
899 	ASSERT_NE(npages, 0);
900 	size = npages << self->page_shift;
901 
902 	fd = hmm_create_file(size);
903 	ASSERT_GE(fd, 0);
904 
905 	buffer = malloc(sizeof(*buffer));
906 	ASSERT_NE(buffer, NULL);
907 
908 	buffer->fd = fd;
909 	buffer->size = size;
910 	buffer->mirror = malloc(size);
911 	ASSERT_NE(buffer->mirror, NULL);
912 
913 	buffer->ptr = mmap(NULL, size,
914 			   PROT_READ | PROT_WRITE,
915 			   MAP_SHARED,
916 			   buffer->fd, 0);
917 	ASSERT_NE(buffer->ptr, MAP_FAILED);
918 
919 	/* Initialize data that the device will write to buffer->ptr. */
920 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
921 		ptr[i] = i;
922 
923 	/* Simulate a device writing system memory. */
924 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
925 	ASSERT_EQ(ret, 0);
926 	ASSERT_EQ(buffer->cpages, npages);
927 	ASSERT_EQ(buffer->faults, 1);
928 
929 	/* Check what the device wrote. */
930 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
931 		ASSERT_EQ(ptr[i], i);
932 
933 	/* Check that the device also wrote the file. */
934 	len = pread(fd, buffer->mirror, size, 0);
935 	ASSERT_EQ(len, size);
936 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
937 		ASSERT_EQ(ptr[i], i);
938 
939 	hmm_buffer_free(buffer);
940 }
941 
942 /*
943  * Migrate anonymous memory to device private memory.
944  */
945 TEST_F(hmm, migrate)
946 {
947 	struct hmm_buffer *buffer;
948 	unsigned long npages;
949 	unsigned long size;
950 	unsigned long i;
951 	int *ptr;
952 	int ret;
953 
954 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
955 	ASSERT_NE(npages, 0);
956 	size = npages << self->page_shift;
957 
958 	buffer = malloc(sizeof(*buffer));
959 	ASSERT_NE(buffer, NULL);
960 
961 	buffer->fd = -1;
962 	buffer->size = size;
963 	buffer->mirror = malloc(size);
964 	ASSERT_NE(buffer->mirror, NULL);
965 
966 	buffer->ptr = mmap(NULL, size,
967 			   PROT_READ | PROT_WRITE,
968 			   MAP_PRIVATE | MAP_ANONYMOUS,
969 			   buffer->fd, 0);
970 	ASSERT_NE(buffer->ptr, MAP_FAILED);
971 
972 	/* Initialize buffer in system memory. */
973 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
974 		ptr[i] = i;
975 
976 	/* Migrate memory to device. */
977 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
978 	ASSERT_EQ(ret, 0);
979 	ASSERT_EQ(buffer->cpages, npages);
980 
981 	/* Check what the device read. */
982 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
983 		ASSERT_EQ(ptr[i], i);
984 
985 	hmm_buffer_free(buffer);
986 }
987 
988 /*
989  * Migrate private file memory to device private memory.
990  */
991 TEST_F(hmm, migrate_file_private)
992 {
993 	struct hmm_buffer *buffer;
994 	unsigned long npages;
995 	unsigned long size;
996 	unsigned long i;
997 	int *ptr;
998 	int ret;
999 	int fd;
1000 
1001 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1002 	ASSERT_NE(npages, 0);
1003 	size = npages << self->page_shift;
1004 
1005 	fd = hmm_create_file(size);
1006 	ASSERT_GE(fd, 0);
1007 
1008 	buffer = malloc(sizeof(*buffer));
1009 	ASSERT_NE(buffer, NULL);
1010 
1011 	buffer->fd = fd;
1012 	buffer->size = size;
1013 	buffer->mirror = malloc(size);
1014 	ASSERT_NE(buffer->mirror, NULL);
1015 
1016 	buffer->ptr = mmap(NULL, size,
1017 			   PROT_READ | PROT_WRITE,
1018 			   MAP_PRIVATE,
1019 			   buffer->fd, 0);
1020 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1021 
1022 	/* Initialize buffer in system memory. */
1023 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1024 		ptr[i] = i;
1025 
1026 	/* Migrate memory to device. */
1027 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1028 	ASSERT_EQ(ret, 0);
1029 	ASSERT_EQ(buffer->cpages, npages);
1030 
1031 	/* Check what the device read. */
1032 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1033 		ASSERT_EQ(ptr[i], i);
1034 
1035 	hmm_buffer_free(buffer);
1036 }
1037 
1038 /*
1039  * Migrate anonymous memory to device private memory and fault some of it back
1040  * to system memory, then try migrating the resulting mix of system and device
1041  * private memory to the device.
1042  */
1043 TEST_F(hmm, migrate_fault)
1044 {
1045 	struct hmm_buffer *buffer;
1046 	unsigned long npages;
1047 	unsigned long size;
1048 	unsigned long i;
1049 	int *ptr;
1050 	int ret;
1051 
1052 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1053 	ASSERT_NE(npages, 0);
1054 	size = npages << self->page_shift;
1055 
1056 	buffer = malloc(sizeof(*buffer));
1057 	ASSERT_NE(buffer, NULL);
1058 
1059 	buffer->fd = -1;
1060 	buffer->size = size;
1061 	buffer->mirror = malloc(size);
1062 	ASSERT_NE(buffer->mirror, NULL);
1063 
1064 	buffer->ptr = mmap(NULL, size,
1065 			   PROT_READ | PROT_WRITE,
1066 			   MAP_PRIVATE | MAP_ANONYMOUS,
1067 			   buffer->fd, 0);
1068 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1069 
1070 	/* Initialize buffer in system memory. */
1071 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1072 		ptr[i] = i;
1073 
1074 	/* Migrate memory to device. */
1075 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1076 	ASSERT_EQ(ret, 0);
1077 	ASSERT_EQ(buffer->cpages, npages);
1078 
1079 	/* Check what the device read. */
1080 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1081 		ASSERT_EQ(ptr[i], i);
1082 
1083 	/* Fault half the pages back to system memory and check them. */
1084 	for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
1085 		ASSERT_EQ(ptr[i], i);
1086 
1087 	/* Migrate memory to the device again. */
1088 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1089 	ASSERT_EQ(ret, 0);
1090 	ASSERT_EQ(buffer->cpages, npages);
1091 
1092 	/* Check what the device read. */
1093 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1094 		ASSERT_EQ(ptr[i], i);
1095 
1096 	hmm_buffer_free(buffer);
1097 }
1098 
1099 TEST_F(hmm, migrate_release)
1100 {
1101 	struct hmm_buffer *buffer;
1102 	unsigned long npages;
1103 	unsigned long size;
1104 	unsigned long i;
1105 	int *ptr;
1106 	int ret;
1107 
1108 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1109 	ASSERT_NE(npages, 0);
1110 	size = npages << self->page_shift;
1111 
1112 	buffer = malloc(sizeof(*buffer));
1113 	ASSERT_NE(buffer, NULL);
1114 
1115 	buffer->fd = -1;
1116 	buffer->size = size;
1117 	buffer->mirror = malloc(size);
1118 	ASSERT_NE(buffer->mirror, NULL);
1119 
1120 	buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
1121 			   MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
1122 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1123 
1124 	/* Initialize buffer in system memory. */
1125 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1126 		ptr[i] = i;
1127 
1128 	/* Migrate memory to device. */
1129 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1130 	ASSERT_EQ(ret, 0);
1131 	ASSERT_EQ(buffer->cpages, npages);
1132 
1133 	/* Check what the device read. */
1134 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1135 		ASSERT_EQ(ptr[i], i);
1136 
1137 	/* Release device memory. */
1138 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages);
1139 	ASSERT_EQ(ret, 0);
1140 
1141 	/* Fault pages back to system memory and check them. */
1142 	for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
1143 		ASSERT_EQ(ptr[i], i);
1144 
1145 	hmm_buffer_free(buffer);
1146 }
1147 
1148 /*
1149  * Migrate anonymous shared memory to device private memory.
1150  */
1151 TEST_F(hmm, migrate_shared)
1152 {
1153 	struct hmm_buffer *buffer;
1154 	unsigned long npages;
1155 	unsigned long size;
1156 	int ret;
1157 
1158 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1159 	ASSERT_NE(npages, 0);
1160 	size = npages << self->page_shift;
1161 
1162 	buffer = malloc(sizeof(*buffer));
1163 	ASSERT_NE(buffer, NULL);
1164 
1165 	buffer->fd = -1;
1166 	buffer->size = size;
1167 	buffer->mirror = malloc(size);
1168 	ASSERT_NE(buffer->mirror, NULL);
1169 
1170 	buffer->ptr = mmap(NULL, size,
1171 			   PROT_READ | PROT_WRITE,
1172 			   MAP_SHARED | MAP_ANONYMOUS,
1173 			   buffer->fd, 0);
1174 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1175 
1176 	/* Migrate memory to device. */
1177 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1178 	ASSERT_EQ(ret, -ENOENT);
1179 
1180 	hmm_buffer_free(buffer);
1181 }
1182 
1183 /*
1184  * Try to migrate various memory types to device private memory.
1185  */
1186 TEST_F(hmm2, migrate_mixed)
1187 {
1188 	struct hmm_buffer *buffer;
1189 	unsigned long npages;
1190 	unsigned long size;
1191 	int *ptr;
1192 	unsigned char *p;
1193 	int ret;
1194 	int val;
1195 
1196 	npages = 6;
1197 	size = npages << self->page_shift;
1198 
1199 	buffer = malloc(sizeof(*buffer));
1200 	ASSERT_NE(buffer, NULL);
1201 
1202 	buffer->fd = -1;
1203 	buffer->size = size;
1204 	buffer->mirror = malloc(size);
1205 	ASSERT_NE(buffer->mirror, NULL);
1206 
1207 	/* Reserve a range of addresses. */
1208 	buffer->ptr = mmap(NULL, size,
1209 			   PROT_NONE,
1210 			   MAP_PRIVATE | MAP_ANONYMOUS,
1211 			   buffer->fd, 0);
1212 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1213 	p = buffer->ptr;
1214 
1215 	/* Migrating a protected area should be an error. */
1216 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
1217 	ASSERT_EQ(ret, -EINVAL);
1218 
1219 	/* Punch a hole after the first page address. */
1220 	ret = munmap(buffer->ptr + self->page_size, self->page_size);
1221 	ASSERT_EQ(ret, 0);
1222 
1223 	/* We expect an error if the vma doesn't cover the range. */
1224 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
1225 	ASSERT_EQ(ret, -EINVAL);
1226 
1227 	/* Page 2 will be a read-only zero page. */
1228 	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
1229 				PROT_READ);
1230 	ASSERT_EQ(ret, 0);
1231 	ptr = (int *)(buffer->ptr + 2 * self->page_size);
1232 	val = *ptr + 3;
1233 	ASSERT_EQ(val, 3);
1234 
1235 	/* Page 3 will be read-only. */
1236 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1237 				PROT_READ | PROT_WRITE);
1238 	ASSERT_EQ(ret, 0);
1239 	ptr = (int *)(buffer->ptr + 3 * self->page_size);
1240 	*ptr = val;
1241 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1242 				PROT_READ);
1243 	ASSERT_EQ(ret, 0);
1244 
1245 	/* Page 4-5 will be read-write. */
1246 	ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size,
1247 				PROT_READ | PROT_WRITE);
1248 	ASSERT_EQ(ret, 0);
1249 	ptr = (int *)(buffer->ptr + 4 * self->page_size);
1250 	*ptr = val;
1251 	ptr = (int *)(buffer->ptr + 5 * self->page_size);
1252 	*ptr = val;
1253 
1254 	/* Now try to migrate pages 2-5 to device 1. */
1255 	buffer->ptr = p + 2 * self->page_size;
1256 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
1257 	ASSERT_EQ(ret, 0);
1258 	ASSERT_EQ(buffer->cpages, 4);
1259 
1260 	/* Page 5 won't be migrated to device 0 because it's on device 1. */
1261 	buffer->ptr = p + 5 * self->page_size;
1262 	ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
1263 	ASSERT_EQ(ret, -ENOENT);
1264 	buffer->ptr = p;
1265 
1266 	buffer->ptr = p;
1267 	hmm_buffer_free(buffer);
1268 }
1269 
1270 /*
1271  * Migrate anonymous memory to device memory and back to system memory
1272  * multiple times. In case of private zone configuration, this is done
1273  * through fault pages accessed by CPU. In case of coherent zone configuration,
1274  * the pages from the device should be explicitly migrated back to system memory.
1275  * The reason is Coherent device zone has coherent access by CPU, therefore
1276  * it will not generate any page fault.
1277  */
1278 TEST_F(hmm, migrate_multiple)
1279 {
1280 	struct hmm_buffer *buffer;
1281 	unsigned long npages;
1282 	unsigned long size;
1283 	unsigned long i;
1284 	unsigned long c;
1285 	int *ptr;
1286 	int ret;
1287 
1288 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1289 	ASSERT_NE(npages, 0);
1290 	size = npages << self->page_shift;
1291 
1292 	for (c = 0; c < NTIMES; c++) {
1293 		buffer = malloc(sizeof(*buffer));
1294 		ASSERT_NE(buffer, NULL);
1295 
1296 		buffer->fd = -1;
1297 		buffer->size = size;
1298 		buffer->mirror = malloc(size);
1299 		ASSERT_NE(buffer->mirror, NULL);
1300 
1301 		buffer->ptr = mmap(NULL, size,
1302 				   PROT_READ | PROT_WRITE,
1303 				   MAP_PRIVATE | MAP_ANONYMOUS,
1304 				   buffer->fd, 0);
1305 		ASSERT_NE(buffer->ptr, MAP_FAILED);
1306 
1307 		/* Initialize buffer in system memory. */
1308 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1309 			ptr[i] = i;
1310 
1311 		/* Migrate memory to device. */
1312 		ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1313 		ASSERT_EQ(ret, 0);
1314 		ASSERT_EQ(buffer->cpages, npages);
1315 
1316 		/* Check what the device read. */
1317 		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1318 			ASSERT_EQ(ptr[i], i);
1319 
1320 		/* Migrate back to system memory and check them. */
1321 		if (hmm_is_coherent_type(variant->device_number)) {
1322 			ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
1323 			ASSERT_EQ(ret, 0);
1324 			ASSERT_EQ(buffer->cpages, npages);
1325 		}
1326 
1327 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1328 			ASSERT_EQ(ptr[i], i);
1329 
1330 		hmm_buffer_free(buffer);
1331 	}
1332 }
1333 
1334 /*
1335  * Read anonymous memory multiple times.
1336  */
1337 TEST_F(hmm, anon_read_multiple)
1338 {
1339 	struct hmm_buffer *buffer;
1340 	unsigned long npages;
1341 	unsigned long size;
1342 	unsigned long i;
1343 	unsigned long c;
1344 	int *ptr;
1345 	int ret;
1346 
1347 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1348 	ASSERT_NE(npages, 0);
1349 	size = npages << self->page_shift;
1350 
1351 	for (c = 0; c < NTIMES; c++) {
1352 		buffer = malloc(sizeof(*buffer));
1353 		ASSERT_NE(buffer, NULL);
1354 
1355 		buffer->fd = -1;
1356 		buffer->size = size;
1357 		buffer->mirror = malloc(size);
1358 		ASSERT_NE(buffer->mirror, NULL);
1359 
1360 		buffer->ptr = mmap(NULL, size,
1361 				   PROT_READ | PROT_WRITE,
1362 				   MAP_PRIVATE | MAP_ANONYMOUS,
1363 				   buffer->fd, 0);
1364 		ASSERT_NE(buffer->ptr, MAP_FAILED);
1365 
1366 		/* Initialize buffer in system memory. */
1367 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1368 			ptr[i] = i + c;
1369 
1370 		/* Simulate a device reading system memory. */
1371 		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
1372 				      npages);
1373 		ASSERT_EQ(ret, 0);
1374 		ASSERT_EQ(buffer->cpages, npages);
1375 		ASSERT_EQ(buffer->faults, 1);
1376 
1377 		/* Check what the device read. */
1378 		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1379 			ASSERT_EQ(ptr[i], i + c);
1380 
1381 		hmm_buffer_free(buffer);
1382 	}
1383 }
1384 
1385 void *unmap_buffer(void *p)
1386 {
1387 	struct hmm_buffer *buffer = p;
1388 
1389 	/* Delay for a bit and then unmap buffer while it is being read. */
1390 	hmm_nanosleep(hmm_random() % 32000);
1391 	munmap(buffer->ptr + buffer->size / 2, buffer->size / 2);
1392 	buffer->ptr = NULL;
1393 
1394 	return NULL;
1395 }
1396 
1397 /*
1398  * Try reading anonymous memory while it is being unmapped.
1399  */
1400 TEST_F(hmm, anon_teardown)
1401 {
1402 	unsigned long npages;
1403 	unsigned long size;
1404 	unsigned long c;
1405 	void *ret;
1406 
1407 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1408 	ASSERT_NE(npages, 0);
1409 	size = npages << self->page_shift;
1410 
1411 	for (c = 0; c < NTIMES; ++c) {
1412 		pthread_t thread;
1413 		struct hmm_buffer *buffer;
1414 		unsigned long i;
1415 		int *ptr;
1416 		int rc;
1417 
1418 		buffer = malloc(sizeof(*buffer));
1419 		ASSERT_NE(buffer, NULL);
1420 
1421 		buffer->fd = -1;
1422 		buffer->size = size;
1423 		buffer->mirror = malloc(size);
1424 		ASSERT_NE(buffer->mirror, NULL);
1425 
1426 		buffer->ptr = mmap(NULL, size,
1427 				   PROT_READ | PROT_WRITE,
1428 				   MAP_PRIVATE | MAP_ANONYMOUS,
1429 				   buffer->fd, 0);
1430 		ASSERT_NE(buffer->ptr, MAP_FAILED);
1431 
1432 		/* Initialize buffer in system memory. */
1433 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1434 			ptr[i] = i + c;
1435 
1436 		rc = pthread_create(&thread, NULL, unmap_buffer, buffer);
1437 		ASSERT_EQ(rc, 0);
1438 
1439 		/* Simulate a device reading system memory. */
1440 		rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
1441 				     npages);
1442 		if (rc == 0) {
1443 			ASSERT_EQ(buffer->cpages, npages);
1444 			ASSERT_EQ(buffer->faults, 1);
1445 
1446 			/* Check what the device read. */
1447 			for (i = 0, ptr = buffer->mirror;
1448 			     i < size / sizeof(*ptr);
1449 			     ++i)
1450 				ASSERT_EQ(ptr[i], i + c);
1451 		}
1452 
1453 		pthread_join(thread, &ret);
1454 		hmm_buffer_free(buffer);
1455 	}
1456 }
1457 
1458 /*
1459  * Test memory snapshot without faulting in pages accessed by the device.
1460  */
1461 TEST_F(hmm, mixedmap)
1462 {
1463 	struct hmm_buffer *buffer;
1464 	unsigned long npages;
1465 	unsigned long size;
1466 	unsigned char *m;
1467 	int ret;
1468 
1469 	npages = 1;
1470 	size = npages << self->page_shift;
1471 
1472 	buffer = malloc(sizeof(*buffer));
1473 	ASSERT_NE(buffer, NULL);
1474 
1475 	buffer->fd = -1;
1476 	buffer->size = size;
1477 	buffer->mirror = malloc(npages);
1478 	ASSERT_NE(buffer->mirror, NULL);
1479 
1480 
1481 	/* Reserve a range of addresses. */
1482 	buffer->ptr = mmap(NULL, size,
1483 			   PROT_READ | PROT_WRITE,
1484 			   MAP_PRIVATE,
1485 			   self->fd, 0);
1486 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1487 
1488 	/* Simulate a device snapshotting CPU pagetables. */
1489 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1490 	ASSERT_EQ(ret, 0);
1491 	ASSERT_EQ(buffer->cpages, npages);
1492 
1493 	/* Check what the device saw. */
1494 	m = buffer->mirror;
1495 	ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
1496 
1497 	hmm_buffer_free(buffer);
1498 }
1499 
1500 /*
1501  * Test memory snapshot without faulting in pages accessed by the device.
1502  */
1503 TEST_F(hmm2, snapshot)
1504 {
1505 	struct hmm_buffer *buffer;
1506 	unsigned long npages;
1507 	unsigned long size;
1508 	int *ptr;
1509 	unsigned char *p;
1510 	unsigned char *m;
1511 	int ret;
1512 	int val;
1513 
1514 	npages = 7;
1515 	size = npages << self->page_shift;
1516 
1517 	buffer = malloc(sizeof(*buffer));
1518 	ASSERT_NE(buffer, NULL);
1519 
1520 	buffer->fd = -1;
1521 	buffer->size = size;
1522 	buffer->mirror = malloc(npages);
1523 	ASSERT_NE(buffer->mirror, NULL);
1524 
1525 	/* Reserve a range of addresses. */
1526 	buffer->ptr = mmap(NULL, size,
1527 			   PROT_NONE,
1528 			   MAP_PRIVATE | MAP_ANONYMOUS,
1529 			   buffer->fd, 0);
1530 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1531 	p = buffer->ptr;
1532 
1533 	/* Punch a hole after the first page address. */
1534 	ret = munmap(buffer->ptr + self->page_size, self->page_size);
1535 	ASSERT_EQ(ret, 0);
1536 
1537 	/* Page 2 will be read-only zero page. */
1538 	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
1539 				PROT_READ);
1540 	ASSERT_EQ(ret, 0);
1541 	ptr = (int *)(buffer->ptr + 2 * self->page_size);
1542 	val = *ptr + 3;
1543 	ASSERT_EQ(val, 3);
1544 
1545 	/* Page 3 will be read-only. */
1546 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1547 				PROT_READ | PROT_WRITE);
1548 	ASSERT_EQ(ret, 0);
1549 	ptr = (int *)(buffer->ptr + 3 * self->page_size);
1550 	*ptr = val;
1551 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1552 				PROT_READ);
1553 	ASSERT_EQ(ret, 0);
1554 
1555 	/* Page 4-6 will be read-write. */
1556 	ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size,
1557 				PROT_READ | PROT_WRITE);
1558 	ASSERT_EQ(ret, 0);
1559 	ptr = (int *)(buffer->ptr + 4 * self->page_size);
1560 	*ptr = val;
1561 
1562 	/* Page 5 will be migrated to device 0. */
1563 	buffer->ptr = p + 5 * self->page_size;
1564 	ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
1565 	ASSERT_EQ(ret, 0);
1566 	ASSERT_EQ(buffer->cpages, 1);
1567 
1568 	/* Page 6 will be migrated to device 1. */
1569 	buffer->ptr = p + 6 * self->page_size;
1570 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1);
1571 	ASSERT_EQ(ret, 0);
1572 	ASSERT_EQ(buffer->cpages, 1);
1573 
1574 	/* Simulate a device snapshotting CPU pagetables. */
1575 	buffer->ptr = p;
1576 	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1577 	ASSERT_EQ(ret, 0);
1578 	ASSERT_EQ(buffer->cpages, npages);
1579 
1580 	/* Check what the device saw. */
1581 	m = buffer->mirror;
1582 	ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR);
1583 	ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR);
1584 	ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
1585 	ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
1586 	ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
1587 	if (!hmm_is_coherent_type(variant->device_number0)) {
1588 		ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
1589 				HMM_DMIRROR_PROT_WRITE);
1590 		ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
1591 	} else {
1592 		ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL |
1593 				HMM_DMIRROR_PROT_WRITE);
1594 		ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE |
1595 				HMM_DMIRROR_PROT_WRITE);
1596 	}
1597 
1598 	hmm_buffer_free(buffer);
1599 }
1600 
1601 /*
1602  * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
1603  * should be mapped by a large page table entry.
1604  */
1605 TEST_F(hmm, compound)
1606 {
1607 	struct hmm_buffer *buffer;
1608 	unsigned long npages;
1609 	unsigned long size;
1610 	unsigned long default_hsize = default_huge_page_size();
1611 	int *ptr;
1612 	unsigned char *m;
1613 	int ret;
1614 	unsigned long i;
1615 
1616 	/* Skip test if we can't allocate a hugetlbfs page. */
1617 
1618 	if (!default_hsize)
1619 		SKIP(return, "Huge page size could not be determined");
1620 
1621 	size = ALIGN(TWOMEG, default_hsize);
1622 	npages = size >> self->page_shift;
1623 
1624 	buffer = malloc(sizeof(*buffer));
1625 	ASSERT_NE(buffer, NULL);
1626 
1627 	buffer->ptr = mmap(NULL, size,
1628 				   PROT_READ | PROT_WRITE,
1629 				   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
1630 				   -1, 0);
1631 	if (buffer->ptr == MAP_FAILED) {
1632 		free(buffer);
1633 		return;
1634 	}
1635 
1636 	buffer->size = size;
1637 	buffer->mirror = malloc(npages);
1638 	ASSERT_NE(buffer->mirror, NULL);
1639 
1640 	/* Initialize the pages the device will snapshot in buffer->ptr. */
1641 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1642 		ptr[i] = i;
1643 
1644 	/* Simulate a device snapshotting CPU pagetables. */
1645 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1646 	ASSERT_EQ(ret, 0);
1647 	ASSERT_EQ(buffer->cpages, npages);
1648 
1649 	/* Check what the device saw. */
1650 	m = buffer->mirror;
1651 	for (i = 0; i < npages; ++i)
1652 		ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE |
1653 				HMM_DMIRROR_PROT_PMD);
1654 
1655 	/* Make the region read-only. */
1656 	ret = mprotect(buffer->ptr, size, PROT_READ);
1657 	ASSERT_EQ(ret, 0);
1658 
1659 	/* Simulate a device snapshotting CPU pagetables. */
1660 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1661 	ASSERT_EQ(ret, 0);
1662 	ASSERT_EQ(buffer->cpages, npages);
1663 
1664 	/* Check what the device saw. */
1665 	m = buffer->mirror;
1666 	for (i = 0; i < npages; ++i)
1667 		ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
1668 				HMM_DMIRROR_PROT_PMD);
1669 
1670 	munmap(buffer->ptr, buffer->size);
1671 	buffer->ptr = NULL;
1672 	hmm_buffer_free(buffer);
1673 }
1674 
1675 /*
1676  * Test two devices reading the same memory (double mapped).
1677  */
1678 TEST_F(hmm2, double_map)
1679 {
1680 	struct hmm_buffer *buffer;
1681 	unsigned long npages;
1682 	unsigned long size;
1683 	unsigned long i;
1684 	int *ptr;
1685 	int ret;
1686 
1687 	npages = 6;
1688 	size = npages << self->page_shift;
1689 
1690 	buffer = malloc(sizeof(*buffer));
1691 	ASSERT_NE(buffer, NULL);
1692 
1693 	buffer->fd = -1;
1694 	buffer->size = size;
1695 	buffer->mirror = malloc(size);
1696 	ASSERT_NE(buffer->mirror, NULL);
1697 
1698 	/* Reserve a range of addresses. */
1699 	buffer->ptr = mmap(NULL, size,
1700 			   PROT_READ | PROT_WRITE,
1701 			   MAP_PRIVATE | MAP_ANONYMOUS,
1702 			   buffer->fd, 0);
1703 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1704 
1705 	/* Initialize buffer in system memory. */
1706 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1707 		ptr[i] = i;
1708 
1709 	/* Make region read-only. */
1710 	ret = mprotect(buffer->ptr, size, PROT_READ);
1711 	ASSERT_EQ(ret, 0);
1712 
1713 	/* Simulate device 0 reading system memory. */
1714 	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
1715 	ASSERT_EQ(ret, 0);
1716 	ASSERT_EQ(buffer->cpages, npages);
1717 	ASSERT_EQ(buffer->faults, 1);
1718 
1719 	/* Check what the device read. */
1720 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1721 		ASSERT_EQ(ptr[i], i);
1722 
1723 	/* Simulate device 1 reading system memory. */
1724 	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages);
1725 	ASSERT_EQ(ret, 0);
1726 	ASSERT_EQ(buffer->cpages, npages);
1727 	ASSERT_EQ(buffer->faults, 1);
1728 
1729 	/* Check what the device read. */
1730 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1731 		ASSERT_EQ(ptr[i], i);
1732 
1733 	/* Migrate pages to device 1 and try to read from device 0. */
1734 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
1735 	ASSERT_EQ(ret, 0);
1736 	ASSERT_EQ(buffer->cpages, npages);
1737 
1738 	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
1739 	ASSERT_EQ(ret, 0);
1740 	ASSERT_EQ(buffer->cpages, npages);
1741 	ASSERT_EQ(buffer->faults, 1);
1742 
1743 	/* Check what device 0 read. */
1744 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1745 		ASSERT_EQ(ptr[i], i);
1746 
1747 	hmm_buffer_free(buffer);
1748 }
1749 
1750 /*
1751  * Basic check of exclusive faulting.
1752  */
1753 TEST_F(hmm, exclusive)
1754 {
1755 	struct hmm_buffer *buffer;
1756 	unsigned long npages;
1757 	unsigned long size;
1758 	unsigned long i;
1759 	int *ptr;
1760 	int ret;
1761 
1762 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1763 	ASSERT_NE(npages, 0);
1764 	size = npages << self->page_shift;
1765 
1766 	buffer = malloc(sizeof(*buffer));
1767 	ASSERT_NE(buffer, NULL);
1768 
1769 	buffer->fd = -1;
1770 	buffer->size = size;
1771 	buffer->mirror = malloc(size);
1772 	ASSERT_NE(buffer->mirror, NULL);
1773 
1774 	buffer->ptr = mmap(NULL, size,
1775 			   PROT_READ | PROT_WRITE,
1776 			   MAP_PRIVATE | MAP_ANONYMOUS,
1777 			   buffer->fd, 0);
1778 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1779 
1780 	/* Initialize buffer in system memory. */
1781 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1782 		ptr[i] = i;
1783 
1784 	/* Map memory exclusively for device access. */
1785 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
1786 	ASSERT_EQ(ret, 0);
1787 	ASSERT_EQ(buffer->cpages, npages);
1788 
1789 	/* Check what the device read. */
1790 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1791 		ASSERT_EQ(ptr[i], i);
1792 
1793 	/* Fault pages back to system memory and check them. */
1794 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1795 		ASSERT_EQ(ptr[i]++, i);
1796 
1797 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1798 		ASSERT_EQ(ptr[i], i+1);
1799 
1800 	/* Check atomic access revoked */
1801 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages);
1802 	ASSERT_EQ(ret, 0);
1803 
1804 	hmm_buffer_free(buffer);
1805 }
1806 
1807 TEST_F(hmm, exclusive_mprotect)
1808 {
1809 	struct hmm_buffer *buffer;
1810 	unsigned long npages;
1811 	unsigned long size;
1812 	unsigned long i;
1813 	int *ptr;
1814 	int ret;
1815 
1816 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1817 	ASSERT_NE(npages, 0);
1818 	size = npages << self->page_shift;
1819 
1820 	buffer = malloc(sizeof(*buffer));
1821 	ASSERT_NE(buffer, NULL);
1822 
1823 	buffer->fd = -1;
1824 	buffer->size = size;
1825 	buffer->mirror = malloc(size);
1826 	ASSERT_NE(buffer->mirror, NULL);
1827 
1828 	buffer->ptr = mmap(NULL, size,
1829 			   PROT_READ | PROT_WRITE,
1830 			   MAP_PRIVATE | MAP_ANONYMOUS,
1831 			   buffer->fd, 0);
1832 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1833 
1834 	/* Initialize buffer in system memory. */
1835 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1836 		ptr[i] = i;
1837 
1838 	/* Map memory exclusively for device access. */
1839 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
1840 	ASSERT_EQ(ret, 0);
1841 	ASSERT_EQ(buffer->cpages, npages);
1842 
1843 	/* Check what the device read. */
1844 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1845 		ASSERT_EQ(ptr[i], i);
1846 
1847 	ret = mprotect(buffer->ptr, size, PROT_READ);
1848 	ASSERT_EQ(ret, 0);
1849 
1850 	/* Simulate a device writing system memory. */
1851 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
1852 	ASSERT_EQ(ret, -EPERM);
1853 
1854 	hmm_buffer_free(buffer);
1855 }
1856 
1857 /*
1858  * Check copy-on-write works.
1859  */
1860 TEST_F(hmm, exclusive_cow)
1861 {
1862 	struct hmm_buffer *buffer;
1863 	unsigned long npages;
1864 	unsigned long size;
1865 	unsigned long i;
1866 	int *ptr;
1867 	int ret;
1868 
1869 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1870 	ASSERT_NE(npages, 0);
1871 	size = npages << self->page_shift;
1872 
1873 	buffer = malloc(sizeof(*buffer));
1874 	ASSERT_NE(buffer, NULL);
1875 
1876 	buffer->fd = -1;
1877 	buffer->size = size;
1878 	buffer->mirror = malloc(size);
1879 	ASSERT_NE(buffer->mirror, NULL);
1880 
1881 	buffer->ptr = mmap(NULL, size,
1882 			   PROT_READ | PROT_WRITE,
1883 			   MAP_PRIVATE | MAP_ANONYMOUS,
1884 			   buffer->fd, 0);
1885 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1886 
1887 	/* Initialize buffer in system memory. */
1888 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1889 		ptr[i] = i;
1890 
1891 	/* Map memory exclusively for device access. */
1892 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
1893 	ASSERT_EQ(ret, 0);
1894 	ASSERT_EQ(buffer->cpages, npages);
1895 
1896 	fork();
1897 
1898 	/* Fault pages back to system memory and check them. */
1899 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1900 		ASSERT_EQ(ptr[i]++, i);
1901 
1902 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1903 		ASSERT_EQ(ptr[i], i+1);
1904 
1905 	hmm_buffer_free(buffer);
1906 }
1907 
1908 static int gup_test_exec(int gup_fd, unsigned long addr, int cmd,
1909 			 int npages, int size, int flags)
1910 {
1911 	struct gup_test gup = {
1912 		.nr_pages_per_call	= npages,
1913 		.addr			= addr,
1914 		.gup_flags		= FOLL_WRITE | flags,
1915 		.size			= size,
1916 	};
1917 
1918 	if (ioctl(gup_fd, cmd, &gup)) {
1919 		perror("ioctl on error\n");
1920 		return errno;
1921 	}
1922 
1923 	return 0;
1924 }
1925 
1926 /*
1927  * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
1928  * This should trigger a migration back to system memory for both, private
1929  * and coherent type pages.
1930  * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
1931  * to your configuration before you run it.
1932  */
1933 TEST_F(hmm, hmm_gup_test)
1934 {
1935 	struct hmm_buffer *buffer;
1936 	int gup_fd;
1937 	unsigned long npages;
1938 	unsigned long size;
1939 	unsigned long i;
1940 	int *ptr;
1941 	int ret;
1942 	unsigned char *m;
1943 
1944 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1945 	if (gup_fd == -1)
1946 		SKIP(return, "Skipping test, could not find gup_test driver");
1947 
1948 	npages = 4;
1949 	size = npages << self->page_shift;
1950 
1951 	buffer = malloc(sizeof(*buffer));
1952 	ASSERT_NE(buffer, NULL);
1953 
1954 	buffer->fd = -1;
1955 	buffer->size = size;
1956 	buffer->mirror = malloc(size);
1957 	ASSERT_NE(buffer->mirror, NULL);
1958 
1959 	buffer->ptr = mmap(NULL, size,
1960 			   PROT_READ | PROT_WRITE,
1961 			   MAP_PRIVATE | MAP_ANONYMOUS,
1962 			   buffer->fd, 0);
1963 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1964 
1965 	/* Initialize buffer in system memory. */
1966 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1967 		ptr[i] = i;
1968 
1969 	/* Migrate memory to device. */
1970 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1971 	ASSERT_EQ(ret, 0);
1972 	ASSERT_EQ(buffer->cpages, npages);
1973 	/* Check what the device read. */
1974 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1975 		ASSERT_EQ(ptr[i], i);
1976 
1977 	ASSERT_EQ(gup_test_exec(gup_fd,
1978 				(unsigned long)buffer->ptr,
1979 				GUP_BASIC_TEST, 1, self->page_size, 0), 0);
1980 	ASSERT_EQ(gup_test_exec(gup_fd,
1981 				(unsigned long)buffer->ptr + 1 * self->page_size,
1982 				GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0);
1983 	ASSERT_EQ(gup_test_exec(gup_fd,
1984 				(unsigned long)buffer->ptr + 2 * self->page_size,
1985 				PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0);
1986 	ASSERT_EQ(gup_test_exec(gup_fd,
1987 				(unsigned long)buffer->ptr + 3 * self->page_size,
1988 				PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0);
1989 
1990 	/* Take snapshot to CPU pagetables */
1991 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1992 	ASSERT_EQ(ret, 0);
1993 	ASSERT_EQ(buffer->cpages, npages);
1994 	m = buffer->mirror;
1995 	if (hmm_is_coherent_type(variant->device_number)) {
1996 		ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]);
1997 		ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]);
1998 	} else {
1999 		ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
2000 		ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
2001 	}
2002 	ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
2003 	ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]);
2004 	/*
2005 	 * Check again the content on the pages. Make sure there's no
2006 	 * corrupted data.
2007 	 */
2008 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2009 		ASSERT_EQ(ptr[i], i);
2010 
2011 	close(gup_fd);
2012 	hmm_buffer_free(buffer);
2013 }
2014 
2015 /*
2016  * Test copy-on-write in device pages.
2017  * In case of writing to COW private page(s), a page fault will migrate pages
2018  * back to system memory first. Then, these pages will be duplicated. In case
2019  * of COW device coherent type, pages are duplicated directly from device
2020  * memory.
2021  */
2022 TEST_F(hmm, hmm_cow_in_device)
2023 {
2024 	struct hmm_buffer *buffer;
2025 	unsigned long npages;
2026 	unsigned long size;
2027 	unsigned long i;
2028 	int *ptr;
2029 	int ret;
2030 	unsigned char *m;
2031 	pid_t pid;
2032 	int status;
2033 
2034 	npages = 4;
2035 	size = npages << self->page_shift;
2036 
2037 	buffer = malloc(sizeof(*buffer));
2038 	ASSERT_NE(buffer, NULL);
2039 
2040 	buffer->fd = -1;
2041 	buffer->size = size;
2042 	buffer->mirror = malloc(size);
2043 	ASSERT_NE(buffer->mirror, NULL);
2044 
2045 	buffer->ptr = mmap(NULL, size,
2046 			   PROT_READ | PROT_WRITE,
2047 			   MAP_PRIVATE | MAP_ANONYMOUS,
2048 			   buffer->fd, 0);
2049 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2050 
2051 	/* Initialize buffer in system memory. */
2052 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2053 		ptr[i] = i;
2054 
2055 	/* Migrate memory to device. */
2056 
2057 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2058 	ASSERT_EQ(ret, 0);
2059 	ASSERT_EQ(buffer->cpages, npages);
2060 
2061 	pid = fork();
2062 	if (pid == -1)
2063 		ASSERT_EQ(pid, 0);
2064 	if (!pid) {
2065 		/* Child process waits for SIGTERM from the parent. */
2066 		while (1) {
2067 		}
2068 		/* Should not reach this */
2069 	}
2070 	/* Parent process writes to COW pages(s) and gets a
2071 	 * new copy in system. In case of device private pages,
2072 	 * this write causes a migration to system mem first.
2073 	 */
2074 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2075 		ptr[i] = i;
2076 
2077 	/* Terminate child and wait */
2078 	EXPECT_EQ(0, kill(pid, SIGTERM));
2079 	EXPECT_EQ(pid, waitpid(pid, &status, 0));
2080 	EXPECT_NE(0, WIFSIGNALED(status));
2081 	EXPECT_EQ(SIGTERM, WTERMSIG(status));
2082 
2083 	/* Take snapshot to CPU pagetables */
2084 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
2085 	ASSERT_EQ(ret, 0);
2086 	ASSERT_EQ(buffer->cpages, npages);
2087 	m = buffer->mirror;
2088 	for (i = 0; i < npages; i++)
2089 		ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
2090 
2091 	hmm_buffer_free(buffer);
2092 }
2093 
2094 /*
2095  * Migrate private anonymous huge empty page.
2096  */
2097 TEST_F(hmm, migrate_anon_huge_empty)
2098 {
2099 	struct hmm_buffer *buffer;
2100 	unsigned long npages;
2101 	unsigned long size;
2102 	unsigned long i;
2103 	void *old_ptr;
2104 	void *map;
2105 	int *ptr;
2106 	int ret;
2107 
2108 	size = read_pmd_pagesize();
2109 
2110 	buffer = malloc(sizeof(*buffer));
2111 	ASSERT_NE(buffer, NULL);
2112 
2113 	buffer->fd = -1;
2114 	buffer->size = 2 * size;
2115 	buffer->mirror = malloc(size);
2116 	ASSERT_NE(buffer->mirror, NULL);
2117 	memset(buffer->mirror, 0xFF, size);
2118 
2119 	buffer->ptr = mmap(NULL, 2 * size,
2120 			   PROT_READ,
2121 			   MAP_PRIVATE | MAP_ANONYMOUS,
2122 			   buffer->fd, 0);
2123 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2124 
2125 	npages = size >> self->page_shift;
2126 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2127 	ret = madvise(map, size, MADV_HUGEPAGE);
2128 	ASSERT_EQ(ret, 0);
2129 	old_ptr = buffer->ptr;
2130 	buffer->ptr = map;
2131 
2132 	/* Migrate memory to device. */
2133 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2134 	ASSERT_EQ(ret, 0);
2135 	ASSERT_EQ(buffer->cpages, npages);
2136 
2137 	/* Check what the device read. */
2138 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2139 		ASSERT_EQ(ptr[i], 0);
2140 
2141 	buffer->ptr = old_ptr;
2142 	hmm_buffer_free(buffer);
2143 }
2144 
2145 /*
2146  * Migrate private anonymous huge zero page.
2147  */
2148 TEST_F(hmm, migrate_anon_huge_zero)
2149 {
2150 	struct hmm_buffer *buffer;
2151 	unsigned long npages;
2152 	unsigned long size;
2153 	unsigned long i;
2154 	void *old_ptr;
2155 	void *map;
2156 	int *ptr;
2157 	int ret;
2158 	int val;
2159 
2160 	size = read_pmd_pagesize();
2161 
2162 	buffer = malloc(sizeof(*buffer));
2163 	ASSERT_NE(buffer, NULL);
2164 
2165 	buffer->fd = -1;
2166 	buffer->size = 2 * size;
2167 	buffer->mirror = malloc(size);
2168 	ASSERT_NE(buffer->mirror, NULL);
2169 	memset(buffer->mirror, 0xFF, size);
2170 
2171 	buffer->ptr = mmap(NULL, 2 * size,
2172 			   PROT_READ,
2173 			   MAP_PRIVATE | MAP_ANONYMOUS,
2174 			   buffer->fd, 0);
2175 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2176 
2177 	npages = size >> self->page_shift;
2178 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2179 	ret = madvise(map, size, MADV_HUGEPAGE);
2180 	ASSERT_EQ(ret, 0);
2181 	old_ptr = buffer->ptr;
2182 	buffer->ptr = map;
2183 
2184 	/* Initialize a read-only zero huge page. */
2185 	val = *(int *)buffer->ptr;
2186 	ASSERT_EQ(val, 0);
2187 
2188 	/* Migrate memory to device. */
2189 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2190 	ASSERT_EQ(ret, 0);
2191 	ASSERT_EQ(buffer->cpages, npages);
2192 
2193 	/* Check what the device read. */
2194 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2195 		ASSERT_EQ(ptr[i], 0);
2196 
2197 	/* Fault pages back to system memory and check them. */
2198 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) {
2199 		ASSERT_EQ(ptr[i], 0);
2200 		/* If it asserts once, it probably will 500,000 times */
2201 		if (ptr[i] != 0)
2202 			break;
2203 	}
2204 
2205 	buffer->ptr = old_ptr;
2206 	hmm_buffer_free(buffer);
2207 }
2208 
2209 /*
2210  * Migrate private anonymous huge page and free.
2211  */
2212 TEST_F(hmm, migrate_anon_huge_free)
2213 {
2214 	struct hmm_buffer *buffer;
2215 	unsigned long npages;
2216 	unsigned long size;
2217 	unsigned long i;
2218 	void *old_ptr;
2219 	void *map;
2220 	int *ptr;
2221 	int ret;
2222 
2223 	size = read_pmd_pagesize();
2224 
2225 	buffer = malloc(sizeof(*buffer));
2226 	ASSERT_NE(buffer, NULL);
2227 
2228 	buffer->fd = -1;
2229 	buffer->size = 2 * size;
2230 	buffer->mirror = malloc(size);
2231 	ASSERT_NE(buffer->mirror, NULL);
2232 	memset(buffer->mirror, 0xFF, size);
2233 
2234 	buffer->ptr = mmap(NULL, 2 * size,
2235 			   PROT_READ | PROT_WRITE,
2236 			   MAP_PRIVATE | MAP_ANONYMOUS,
2237 			   buffer->fd, 0);
2238 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2239 
2240 	npages = size >> self->page_shift;
2241 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2242 	ret = madvise(map, size, MADV_HUGEPAGE);
2243 	ASSERT_EQ(ret, 0);
2244 	old_ptr = buffer->ptr;
2245 	buffer->ptr = map;
2246 
2247 	/* Initialize buffer in system memory. */
2248 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2249 		ptr[i] = i;
2250 
2251 	/* Migrate memory to device. */
2252 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2253 	ASSERT_EQ(ret, 0);
2254 	ASSERT_EQ(buffer->cpages, npages);
2255 
2256 	/* Check what the device read. */
2257 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2258 		ASSERT_EQ(ptr[i], i);
2259 
2260 	/* Try freeing it. */
2261 	ret = madvise(map, size, MADV_FREE);
2262 	ASSERT_EQ(ret, 0);
2263 
2264 	buffer->ptr = old_ptr;
2265 	hmm_buffer_free(buffer);
2266 }
2267 
2268 /*
2269  * Migrate private anonymous huge page and fault back to sysmem.
2270  */
2271 TEST_F(hmm, migrate_anon_huge_fault)
2272 {
2273 	struct hmm_buffer *buffer;
2274 	unsigned long npages;
2275 	unsigned long size;
2276 	unsigned long i;
2277 	unsigned char *m;
2278 	uint64_t entry;
2279 	void *old_ptr;
2280 	void *map;
2281 	int pagemap_fd;
2282 	int *ptr;
2283 	int ret;
2284 
2285 	size = read_pmd_pagesize();
2286 
2287 	buffer = malloc(sizeof(*buffer));
2288 	ASSERT_NE(buffer, NULL);
2289 
2290 	buffer->fd = -1;
2291 	buffer->size = 2 * size;
2292 	buffer->mirror = malloc(size);
2293 	ASSERT_NE(buffer->mirror, NULL);
2294 	memset(buffer->mirror, 0xFF, size);
2295 
2296 	buffer->ptr = mmap(NULL, 2 * size,
2297 			   PROT_READ | PROT_WRITE,
2298 			   MAP_PRIVATE | MAP_ANONYMOUS,
2299 			   buffer->fd, 0);
2300 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2301 
2302 	npages = size >> self->page_shift;
2303 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2304 	old_ptr = buffer->ptr;
2305 	buffer->ptr = map;
2306 
2307 	/* Initialize buffer in system memory. */
2308 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2309 		ptr[i] = i;
2310 
2311 	ret = madvise(map, size, MADV_COLLAPSE);
2312 	ASSERT_EQ(ret, 0);
2313 
2314 	/* Migrate memory to device. */
2315 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2316 	ASSERT_EQ(ret, 0);
2317 	ASSERT_EQ(buffer->cpages, npages);
2318 
2319 	/* Check what the device read. */
2320 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2321 		ASSERT_EQ(ptr[i], i);
2322 
2323 	if (!hmm_is_coherent_type(variant->device_number)) {
2324 		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT,
2325 				      buffer, npages);
2326 		ASSERT_EQ(ret, 0);
2327 		ASSERT_EQ(buffer->cpages, npages);
2328 
2329 		m = buffer->mirror;
2330 		for (i = 0; i < npages; ++i)
2331 			ASSERT_EQ(m[i], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
2332 					HMM_DMIRROR_PROT_WRITE |
2333 					HMM_DMIRROR_PROT_PMD);
2334 
2335 		pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
2336 		ASSERT_GE(pagemap_fd, 0);
2337 
2338 		for (i = 0; i < npages; ++i) {
2339 			entry = pagemap_get_entry(pagemap_fd,
2340 					(char *)buffer->ptr + i * self->page_size);
2341 
2342 			ASSERT_NE(entry & PM_SWAP, 0);
2343 			ASSERT_FALSE(PAGEMAP_PRESENT(entry));
2344 		}
2345 
2346 		close(pagemap_fd);
2347 	}
2348 
2349 	/* Fault pages back to system memory and check them. */
2350 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2351 		ASSERT_EQ(ptr[i], i);
2352 
2353 	buffer->ptr = old_ptr;
2354 	hmm_buffer_free(buffer);
2355 }
2356 
2357 /*
2358  * Migrate memory and fault back to sysmem after partially unmapping.
2359  */
2360 TEST_F(hmm, migrate_partial_unmap_fault)
2361 {
2362 	struct hmm_buffer *buffer;
2363 	unsigned long npages;
2364 	unsigned long size = read_pmd_pagesize();
2365 	unsigned long i;
2366 	void *old_ptr;
2367 	void *map;
2368 	int *ptr;
2369 	int ret, j, use_thp;
2370 	int offsets[] = { 0, 512 * ONEKB, ONEMEG };
2371 
2372 	for (use_thp = 0; use_thp < 2; ++use_thp) {
2373 		for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
2374 			buffer = malloc(sizeof(*buffer));
2375 			ASSERT_NE(buffer, NULL);
2376 
2377 			buffer->fd = -1;
2378 			buffer->size = 2 * size;
2379 			buffer->mirror = malloc(size);
2380 			ASSERT_NE(buffer->mirror, NULL);
2381 			memset(buffer->mirror, 0xFF, size);
2382 
2383 			buffer->ptr = mmap(NULL, 2 * size,
2384 					   PROT_READ | PROT_WRITE,
2385 					   MAP_PRIVATE | MAP_ANONYMOUS,
2386 					   buffer->fd, 0);
2387 			ASSERT_NE(buffer->ptr, MAP_FAILED);
2388 
2389 			npages = size >> self->page_shift;
2390 			map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2391 			if (use_thp)
2392 				ret = madvise(map, size, MADV_HUGEPAGE);
2393 			else
2394 				ret = madvise(map, size, MADV_NOHUGEPAGE);
2395 			ASSERT_EQ(ret, 0);
2396 			old_ptr = buffer->ptr;
2397 			buffer->ptr = map;
2398 
2399 			/* Initialize buffer in system memory. */
2400 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2401 				ptr[i] = i;
2402 
2403 			/* Migrate memory to device. */
2404 			ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2405 			ASSERT_EQ(ret, 0);
2406 			ASSERT_EQ(buffer->cpages, npages);
2407 
2408 			/* Check what the device read. */
2409 			for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2410 				ASSERT_EQ(ptr[i], i);
2411 
2412 			munmap(buffer->ptr + offsets[j], ONEMEG);
2413 
2414 			/* Fault pages back to system memory and check them. */
2415 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2416 				if (i * sizeof(int) < offsets[j] ||
2417 				    i * sizeof(int) >= offsets[j] + ONEMEG)
2418 					ASSERT_EQ(ptr[i], i);
2419 
2420 			buffer->ptr = old_ptr;
2421 			hmm_buffer_free(buffer);
2422 		}
2423 	}
2424 }
2425 
2426 TEST_F(hmm, migrate_remap_fault)
2427 {
2428 	struct hmm_buffer *buffer;
2429 	unsigned long npages;
2430 	unsigned long size = read_pmd_pagesize();
2431 	unsigned long i;
2432 	void *old_ptr, *new_ptr = NULL;
2433 	void *map;
2434 	int *ptr;
2435 	int ret, j, use_thp, dont_unmap, before;
2436 	int offsets[] = { 0, 512 * ONEKB, ONEMEG };
2437 
2438 	for (before = 0; before < 2; ++before) {
2439 		for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) {
2440 			for (use_thp = 0; use_thp < 2; ++use_thp) {
2441 				for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
2442 					int flags = MREMAP_MAYMOVE | MREMAP_FIXED;
2443 
2444 					if (dont_unmap)
2445 						flags |= MREMAP_DONTUNMAP;
2446 
2447 					buffer = malloc(sizeof(*buffer));
2448 					ASSERT_NE(buffer, NULL);
2449 
2450 					buffer->fd = -1;
2451 					buffer->size = 8 * size;
2452 					buffer->mirror = malloc(size);
2453 					ASSERT_NE(buffer->mirror, NULL);
2454 					memset(buffer->mirror, 0xFF, size);
2455 
2456 					buffer->ptr = mmap(NULL, buffer->size,
2457 							   PROT_READ | PROT_WRITE,
2458 							   MAP_PRIVATE | MAP_ANONYMOUS,
2459 							   buffer->fd, 0);
2460 					ASSERT_NE(buffer->ptr, MAP_FAILED);
2461 
2462 					npages = size >> self->page_shift;
2463 					map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2464 					if (use_thp)
2465 						ret = madvise(map, size, MADV_HUGEPAGE);
2466 					else
2467 						ret = madvise(map, size, MADV_NOHUGEPAGE);
2468 					ASSERT_EQ(ret, 0);
2469 					old_ptr = buffer->ptr;
2470 					munmap(map + size, size * 2);
2471 					buffer->ptr = map;
2472 
2473 					/* Initialize buffer in system memory. */
2474 					for (i = 0, ptr = buffer->ptr;
2475 					     i < size / sizeof(*ptr); ++i)
2476 						ptr[i] = i;
2477 
2478 					if (before) {
2479 						new_ptr = mremap((void *)map, size, size, flags,
2480 								 map + size + offsets[j]);
2481 						ASSERT_NE(new_ptr, MAP_FAILED);
2482 						buffer->ptr = new_ptr;
2483 					}
2484 
2485 					/* Migrate memory to device. */
2486 					ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2487 					ASSERT_EQ(ret, 0);
2488 					ASSERT_EQ(buffer->cpages, npages);
2489 
2490 					/* Check what the device read. */
2491 					for (i = 0, ptr = buffer->mirror;
2492 					     i < size / sizeof(*ptr); ++i)
2493 						ASSERT_EQ(ptr[i], i);
2494 
2495 					if (!before) {
2496 						new_ptr = mremap((void *)map, size, size, flags,
2497 								 map + size + offsets[j]);
2498 						ASSERT_NE(new_ptr, MAP_FAILED);
2499 						buffer->ptr = new_ptr;
2500 					}
2501 
2502 					/* Fault pages back to system memory and check them. */
2503 					for (i = 0, ptr = buffer->ptr;
2504 					     i < size / sizeof(*ptr); ++i)
2505 						ASSERT_EQ(ptr[i], i);
2506 
2507 					munmap(new_ptr, size);
2508 					buffer->ptr = old_ptr;
2509 					hmm_buffer_free(buffer);
2510 				}
2511 			}
2512 		}
2513 	}
2514 }
2515 
2516 /*
2517  * Migrate private anonymous huge page with allocation errors.
2518  */
2519 TEST_F(hmm, migrate_anon_huge_err)
2520 {
2521 	struct hmm_buffer *buffer;
2522 	unsigned long npages;
2523 	unsigned long size;
2524 	unsigned long i;
2525 	void *old_ptr;
2526 	void *map;
2527 	int *ptr;
2528 	int ret;
2529 
2530 	size = read_pmd_pagesize();
2531 
2532 	buffer = malloc(sizeof(*buffer));
2533 	ASSERT_NE(buffer, NULL);
2534 
2535 	buffer->fd = -1;
2536 	buffer->size = 2 * size;
2537 	buffer->mirror = malloc(2 * size);
2538 	ASSERT_NE(buffer->mirror, NULL);
2539 	memset(buffer->mirror, 0xFF, 2 * size);
2540 
2541 	old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
2542 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2543 	ASSERT_NE(old_ptr, MAP_FAILED);
2544 
2545 	npages = size >> self->page_shift;
2546 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2547 	ret = madvise(map, size, MADV_HUGEPAGE);
2548 	ASSERT_EQ(ret, 0);
2549 	buffer->ptr = map;
2550 
2551 	/* Initialize buffer in system memory. */
2552 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2553 		ptr[i] = i;
2554 
2555 	/* Migrate memory to device but force a THP allocation error. */
2556 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2557 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2558 	ASSERT_EQ(ret, 0);
2559 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2560 	ASSERT_EQ(ret, 0);
2561 	ASSERT_EQ(buffer->cpages, npages);
2562 
2563 	/* Check what the device read. */
2564 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) {
2565 		ASSERT_EQ(ptr[i], i);
2566 		if (ptr[i] != i)
2567 			break;
2568 	}
2569 
2570 	/* Try faulting back a single (PAGE_SIZE) page. */
2571 	ptr = buffer->ptr;
2572 	ASSERT_EQ(ptr[2048], 2048);
2573 
2574 	/* unmap and remap the region to reset things. */
2575 	ret = munmap(old_ptr, 2 * size);
2576 	ASSERT_EQ(ret, 0);
2577 	old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
2578 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2579 	ASSERT_NE(old_ptr, MAP_FAILED);
2580 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2581 	ret = madvise(map, size, MADV_HUGEPAGE);
2582 	ASSERT_EQ(ret, 0);
2583 	buffer->ptr = map;
2584 
2585 	/* Initialize buffer in system memory. */
2586 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2587 		ptr[i] = i;
2588 
2589 	/* Migrate THP to device. */
2590 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2591 	ASSERT_EQ(ret, 0);
2592 	ASSERT_EQ(buffer->cpages, npages);
2593 
2594 	/*
2595 	 * Force an allocation error when faulting back a THP resident in the
2596 	 * device.
2597 	 */
2598 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2599 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2600 	ASSERT_EQ(ret, 0);
2601 
2602 	ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
2603 	ASSERT_EQ(ret, 0);
2604 	ptr = buffer->ptr;
2605 	ASSERT_EQ(ptr[2048], 2048);
2606 
2607 	buffer->ptr = old_ptr;
2608 	hmm_buffer_free(buffer);
2609 }
2610 
2611 /*
2612  * Migrate private anonymous huge zero page with allocation errors.
2613  */
2614 TEST_F(hmm, migrate_anon_huge_zero_err)
2615 {
2616 	struct hmm_buffer *buffer;
2617 	unsigned long npages;
2618 	unsigned long size;
2619 	unsigned long i;
2620 	void *old_ptr;
2621 	void *map;
2622 	int *ptr;
2623 	int ret;
2624 
2625 	size = read_pmd_pagesize();
2626 
2627 	buffer = malloc(sizeof(*buffer));
2628 	ASSERT_NE(buffer, NULL);
2629 
2630 	buffer->fd = -1;
2631 	buffer->size = 2 * size;
2632 	buffer->mirror = malloc(2 * size);
2633 	ASSERT_NE(buffer->mirror, NULL);
2634 	memset(buffer->mirror, 0xFF, 2 * size);
2635 
2636 	old_ptr = mmap(NULL, 2 * size, PROT_READ,
2637 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2638 	ASSERT_NE(old_ptr, MAP_FAILED);
2639 
2640 	npages = size >> self->page_shift;
2641 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2642 	ret = madvise(map, size, MADV_HUGEPAGE);
2643 	ASSERT_EQ(ret, 0);
2644 	buffer->ptr = map;
2645 
2646 	/* Migrate memory to device but force a THP allocation error. */
2647 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2648 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2649 	ASSERT_EQ(ret, 0);
2650 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2651 	ASSERT_EQ(ret, 0);
2652 	ASSERT_EQ(buffer->cpages, npages);
2653 
2654 	/* Check what the device read. */
2655 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2656 		ASSERT_EQ(ptr[i], 0);
2657 
2658 	/* Try faulting back a single (PAGE_SIZE) page. */
2659 	ptr = buffer->ptr;
2660 	ASSERT_EQ(ptr[2048], 0);
2661 
2662 	/* unmap and remap the region to reset things. */
2663 	ret = munmap(old_ptr, 2 * size);
2664 	ASSERT_EQ(ret, 0);
2665 	old_ptr = mmap(NULL, 2 * size, PROT_READ,
2666 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2667 	ASSERT_NE(old_ptr, MAP_FAILED);
2668 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2669 	ret = madvise(map, size, MADV_HUGEPAGE);
2670 	ASSERT_EQ(ret, 0);
2671 	buffer->ptr = map;
2672 
2673 	/* Initialize buffer in system memory (zero THP page). */
2674 	ret = ptr[0];
2675 	ASSERT_EQ(ret, 0);
2676 
2677 	/* Migrate memory to device but force a THP allocation error. */
2678 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2679 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2680 	ASSERT_EQ(ret, 0);
2681 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2682 	ASSERT_EQ(ret, 0);
2683 	ASSERT_EQ(buffer->cpages, npages);
2684 
2685 	/* Fault the device memory back and check it. */
2686 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2687 		ASSERT_EQ(ptr[i], 0);
2688 
2689 	buffer->ptr = old_ptr;
2690 	hmm_buffer_free(buffer);
2691 }
2692 
2693 struct benchmark_results {
2694 	double sys_to_dev_time;
2695 	double dev_to_sys_time;
2696 	double throughput_s2d;
2697 	double throughput_d2s;
2698 };
2699 
2700 static double get_time_ms(void)
2701 {
2702 	struct timeval tv;
2703 
2704 	gettimeofday(&tv, NULL);
2705 	return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
2706 }
2707 
2708 static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size)
2709 {
2710 	struct hmm_buffer *buffer;
2711 
2712 	buffer = malloc(sizeof(*buffer));
2713 
2714 	buffer->fd = -1;
2715 	buffer->size = size;
2716 	buffer->mirror = malloc(size);
2717 	memset(buffer->mirror, 0xFF, size);
2718 	return buffer;
2719 }
2720 
2721 static void print_benchmark_results(const char *test_name, size_t buffer_size,
2722 				     struct benchmark_results *thp,
2723 				     struct benchmark_results *regular)
2724 {
2725 	double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) /
2726 				 regular->sys_to_dev_time) * 100.0;
2727 	double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) /
2728 				 regular->dev_to_sys_time) * 100.0;
2729 	double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) /
2730 					    regular->throughput_s2d) * 100.0;
2731 	double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) /
2732 					    regular->throughput_d2s) * 100.0;
2733 
2734 	printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0));
2735 	printf("                     | With THP        | Without THP     | Improvement\n");
2736 	printf("---------------------------------------------------------------------\n");
2737 	printf("Sys->Dev Migration   | %.3f ms        | %.3f ms        | %.1f%%\n",
2738 	       thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement);
2739 	printf("Dev->Sys Migration   | %.3f ms        | %.3f ms        | %.1f%%\n",
2740 	       thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement);
2741 	printf("S->D Throughput      | %.2f GB/s      | %.2f GB/s      | %.1f%%\n",
2742 	       thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement);
2743 	printf("D->S Throughput      | %.2f GB/s      | %.2f GB/s      | %.1f%%\n",
2744 	       thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement);
2745 }
2746 
2747 /*
2748  * Run a single migration benchmark
2749  * fd: file descriptor for hmm device
2750  * use_thp: whether to use THP
2751  * buffer_size: size of buffer to allocate
2752  * iterations: number of iterations
2753  * results: where to store results
2754  */
2755 static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size,
2756 					   int iterations, struct benchmark_results *results)
2757 {
2758 	struct hmm_buffer *buffer;
2759 	unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE);
2760 	double start, end;
2761 	double s2d_total = 0, d2s_total = 0;
2762 	int ret, i;
2763 	int *ptr;
2764 
2765 	buffer = hmm_buffer_alloc(buffer_size);
2766 
2767 	/* Map memory */
2768 	buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE,
2769 			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
2770 
2771 	if (buffer->ptr == MAP_FAILED)
2772 		return -1;
2773 
2774 	/* Apply THP hint if requested */
2775 	if (use_thp)
2776 		ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE);
2777 	else
2778 		ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE);
2779 
2780 	if (ret)
2781 		return ret;
2782 
2783 	/* Initialize memory to make sure pages are allocated */
2784 	ptr = (int *)buffer->ptr;
2785 	for (i = 0; i < buffer_size / sizeof(int); i++)
2786 		ptr[i] = i & 0xFF;
2787 
2788 	/* Warmup iteration */
2789 	ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
2790 	if (ret)
2791 		return ret;
2792 
2793 	ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
2794 	if (ret)
2795 		return ret;
2796 
2797 	/* Benchmark iterations */
2798 	for (i = 0; i < iterations; i++) {
2799 		/* System to device migration */
2800 		start = get_time_ms();
2801 
2802 		ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
2803 		if (ret)
2804 			return ret;
2805 
2806 		end = get_time_ms();
2807 		s2d_total += (end - start);
2808 
2809 		/* Device to system migration */
2810 		start = get_time_ms();
2811 
2812 		ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
2813 		if (ret)
2814 			return ret;
2815 
2816 		end = get_time_ms();
2817 		d2s_total += (end - start);
2818 	}
2819 
2820 	/* Calculate average times and throughput */
2821 	results->sys_to_dev_time = s2d_total / iterations;
2822 	results->dev_to_sys_time = d2s_total / iterations;
2823 	results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
2824 				 (results->sys_to_dev_time / 1000.0);
2825 	results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
2826 				 (results->dev_to_sys_time / 1000.0);
2827 
2828 	/* Cleanup */
2829 	hmm_buffer_free(buffer);
2830 	return 0;
2831 }
2832 
2833 /*
2834  * Benchmark THP migration with different buffer sizes
2835  */
2836 TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120)
2837 {
2838 	struct benchmark_results thp_results, regular_results;
2839 	size_t thp_size = 2 * 1024 * 1024; /* 2MB - typical THP size */
2840 	int iterations = 5;
2841 
2842 	printf("\nHMM THP Migration Benchmark\n");
2843 	printf("---------------------------\n");
2844 	printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE));
2845 
2846 	/* Test different buffer sizes */
2847 	size_t test_sizes[] = {
2848 		thp_size / 4,      /* 512KB - smaller than THP */
2849 		thp_size / 2,      /* 1MB - half THP */
2850 		thp_size,          /* 2MB - single THP */
2851 		thp_size * 2,      /* 4MB - two THPs */
2852 		thp_size * 4,      /* 8MB - four THPs */
2853 		thp_size * 8,       /* 16MB - eight THPs */
2854 		thp_size * 128,       /* 256MB - one twenty eight THPs */
2855 	};
2856 
2857 	static const char *const test_names[] = {
2858 		"Small Buffer (512KB)",
2859 		"Half THP Size (1MB)",
2860 		"Single THP Size (2MB)",
2861 		"Two THP Size (4MB)",
2862 		"Four THP Size (8MB)",
2863 		"Eight THP Size (16MB)",
2864 		"One twenty eight THP Size (256MB)"
2865 	};
2866 
2867 	int num_tests = ARRAY_SIZE(test_sizes);
2868 
2869 	/* Run all tests */
2870 	for (int i = 0; i < num_tests; i++) {
2871 		/* Test with THP */
2872 		ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i],
2873 					iterations, &thp_results), 0);
2874 
2875 		/* Test without THP */
2876 		ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i],
2877 					iterations, &regular_results), 0);
2878 
2879 		/* Print results */
2880 		print_benchmark_results(test_names[i], test_sizes[i],
2881 					&thp_results, &regular_results);
2882 	}
2883 }
2884 TEST_HARNESS_MAIN
2885