xref: /linux/tools/testing/selftests/mm/hmm-tests.c (revision cea5702144615878600d3a39b5d8b3cc34719012)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * HMM stands for Heterogeneous Memory Management, it is a helper layer inside
4  * the linux kernel to help device drivers mirror a process address space in
5  * the device. This allows the device to use the same address space which
6  * makes communication and data exchange a lot easier.
7  *
8  * This framework's sole purpose is to exercise various code paths inside
9  * the kernel to make sure that HMM performs as expected and to flush out any
10  * bugs.
11  */
12 
13 #include "kselftest_harness.h"
14 #include "hugepage_settings.h"
15 
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <strings.h>
23 #include <time.h>
24 #include <pthread.h>
25 #include <limits.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <sys/mman.h>
29 #include <sys/ioctl.h>
30 #include <sys/time.h>
31 
32 /*
33  * This is a private UAPI to the kernel test module so it isn't exported
34  * in the usual include/uapi/... directory.
35  */
36 #include <lib/test_hmm_uapi.h>
37 #include <mm/gup_test.h>
38 #include <mm/vm_util.h>
39 
40 struct hmm_buffer {
41 	void		*ptr;
42 	void		*mirror;
43 	unsigned long	size;
44 	int		fd;
45 	uint64_t	cpages;
46 	uint64_t	faults;
47 };
48 
49 enum {
50 	HMM_PRIVATE_DEVICE_ONE,
51 	HMM_PRIVATE_DEVICE_TWO,
52 	HMM_COHERENCE_DEVICE_ONE,
53 	HMM_COHERENCE_DEVICE_TWO,
54 };
55 
56 #define ONEKB		(1 << 10)
57 #define ONEMEG		(1 << 20)
58 #define TWOMEG		(1 << 21)
59 #define HMM_BUFFER_SIZE (1024 << 12)
60 #define HMM_PATH_MAX    64
61 #define NTIMES		10
62 
63 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
64 /* Just the flags we need, copied from mm.h: */
65 
66 #ifndef FOLL_WRITE
67 #define FOLL_WRITE	0x01	/* check pte is writable */
68 #endif
69 
70 #ifndef FOLL_LONGTERM
71 #define FOLL_LONGTERM   0x100 /* mapping lifetime is indefinite */
72 #endif
73 
74 HUGETLB_SETUP_DEFAULT_PAGES(1)
75 
76 FIXTURE(hmm)
77 {
78 	int		fd;
79 	unsigned int	page_size;
80 	unsigned int	page_shift;
81 };
82 
83 FIXTURE_VARIANT(hmm)
84 {
85 	int     device_number;
86 };
87 
88 FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
89 {
90 	.device_number = HMM_PRIVATE_DEVICE_ONE,
91 };
92 
93 FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
94 {
95 	.device_number = HMM_COHERENCE_DEVICE_ONE,
96 };
97 
98 FIXTURE(hmm2)
99 {
100 	int		fd0;
101 	int		fd1;
102 	unsigned int	page_size;
103 	unsigned int	page_shift;
104 };
105 
106 FIXTURE_VARIANT(hmm2)
107 {
108 	int     device_number0;
109 	int     device_number1;
110 };
111 
112 FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
113 {
114 	.device_number0 = HMM_PRIVATE_DEVICE_ONE,
115 	.device_number1 = HMM_PRIVATE_DEVICE_TWO,
116 };
117 
118 FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
119 {
120 	.device_number0 = HMM_COHERENCE_DEVICE_ONE,
121 	.device_number1 = HMM_COHERENCE_DEVICE_TWO,
122 };
123 
124 static int hmm_open(int unit)
125 {
126 	char pathname[HMM_PATH_MAX];
127 	int fd;
128 
129 	snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit);
130 	fd = open(pathname, O_RDWR, 0);
131 	if (fd < 0)
132 		fprintf(stderr, "could not open hmm dmirror driver (%s)\n",
133 			pathname);
134 	return fd;
135 }
136 
137 static bool hmm_is_coherent_type(int dev_num)
138 {
139 	return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
140 }
141 
142 FIXTURE_SETUP(hmm)
143 {
144 	self->page_size = sysconf(_SC_PAGE_SIZE);
145 	self->page_shift = ffs(self->page_size) - 1;
146 
147 	self->fd = hmm_open(variant->device_number);
148 	if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
149 		SKIP(return, "DEVICE_COHERENT not available");
150 	ASSERT_GE(self->fd, 0);
151 }
152 
153 FIXTURE_SETUP(hmm2)
154 {
155 	self->page_size = sysconf(_SC_PAGE_SIZE);
156 	self->page_shift = ffs(self->page_size) - 1;
157 
158 	self->fd0 = hmm_open(variant->device_number0);
159 	if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
160 		SKIP(return, "DEVICE_COHERENT not available");
161 	ASSERT_GE(self->fd0, 0);
162 	self->fd1 = hmm_open(variant->device_number1);
163 	ASSERT_GE(self->fd1, 0);
164 }
165 
166 FIXTURE_TEARDOWN(hmm)
167 {
168 	int ret = close(self->fd);
169 
170 	ASSERT_EQ(ret, 0);
171 	self->fd = -1;
172 }
173 
174 FIXTURE_TEARDOWN(hmm2)
175 {
176 	int ret = close(self->fd0);
177 
178 	ASSERT_EQ(ret, 0);
179 	self->fd0 = -1;
180 
181 	ret = close(self->fd1);
182 	ASSERT_EQ(ret, 0);
183 	self->fd1 = -1;
184 }
185 
186 static int hmm_dmirror_cmd(int fd,
187 			   unsigned long request,
188 			   struct hmm_buffer *buffer,
189 			   unsigned long npages)
190 {
191 	struct hmm_dmirror_cmd cmd;
192 	int ret;
193 
194 	/* Simulate a device reading system memory. */
195 	cmd.addr = (__u64)buffer->ptr;
196 	cmd.ptr = (__u64)buffer->mirror;
197 	cmd.npages = npages;
198 
199 	for (;;) {
200 		ret = ioctl(fd, request, &cmd);
201 		if (ret == 0)
202 			break;
203 		if (errno == EINTR)
204 			continue;
205 		return -errno;
206 	}
207 	buffer->cpages = cmd.cpages;
208 	buffer->faults = cmd.faults;
209 
210 	return 0;
211 }
212 
213 static void hmm_buffer_free(struct hmm_buffer *buffer)
214 {
215 	if (buffer == NULL)
216 		return;
217 
218 	if (buffer->ptr) {
219 		munmap(buffer->ptr, buffer->size);
220 		buffer->ptr = NULL;
221 	}
222 	free(buffer->mirror);
223 	free(buffer);
224 }
225 
226 /*
227  * Create a temporary file that will be deleted on close.
228  */
229 static int hmm_create_file(unsigned long size)
230 {
231 	char path[HMM_PATH_MAX];
232 	int fd;
233 
234 	strcpy(path, "/tmp");
235 	fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600);
236 	if (fd >= 0) {
237 		int r;
238 
239 		do {
240 			r = ftruncate(fd, size);
241 		} while (r == -1 && errno == EINTR);
242 		if (!r)
243 			return fd;
244 		close(fd);
245 	}
246 	return -1;
247 }
248 
249 /*
250  * Return a random unsigned number.
251  */
252 static unsigned int hmm_random(void)
253 {
254 	static int fd = -1;
255 	unsigned int r;
256 
257 	if (fd < 0) {
258 		fd = open("/dev/urandom", O_RDONLY);
259 		if (fd < 0) {
260 			fprintf(stderr, "%s:%d failed to open /dev/urandom\n",
261 					__FILE__, __LINE__);
262 			return ~0U;
263 		}
264 	}
265 	read(fd, &r, sizeof(r));
266 	return r;
267 }
268 
269 static void hmm_nanosleep(unsigned int n)
270 {
271 	struct timespec t;
272 
273 	t.tv_sec = 0;
274 	t.tv_nsec = n;
275 	nanosleep(&t, NULL);
276 }
277 
278 static int hmm_migrate_sys_to_dev(int fd,
279 				   struct hmm_buffer *buffer,
280 				   unsigned long npages)
281 {
282 	return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
283 }
284 
285 static int hmm_migrate_dev_to_sys(int fd,
286 				   struct hmm_buffer *buffer,
287 				   unsigned long npages)
288 {
289 	return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
290 }
291 
292 /*
293  * Simple NULL test of device open/close.
294  */
295 TEST_F(hmm, open_close)
296 {
297 }
298 
299 /*
300  * Read private anonymous memory.
301  */
302 TEST_F(hmm, anon_read)
303 {
304 	struct hmm_buffer *buffer;
305 	unsigned long npages;
306 	unsigned long size;
307 	unsigned long i;
308 	int *ptr;
309 	int ret;
310 	int val;
311 
312 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
313 	ASSERT_NE(npages, 0);
314 	size = npages << self->page_shift;
315 
316 	buffer = malloc(sizeof(*buffer));
317 	ASSERT_NE(buffer, NULL);
318 
319 	buffer->fd = -1;
320 	buffer->size = size;
321 	buffer->mirror = malloc(size);
322 	ASSERT_NE(buffer->mirror, NULL);
323 
324 	buffer->ptr = mmap(NULL, size,
325 			   PROT_READ | PROT_WRITE,
326 			   MAP_PRIVATE | MAP_ANONYMOUS,
327 			   buffer->fd, 0);
328 	ASSERT_NE(buffer->ptr, MAP_FAILED);
329 
330 	/*
331 	 * Initialize buffer in system memory but leave the first two pages
332 	 * zero (pte_none and pfn_zero).
333 	 */
334 	i = 2 * self->page_size / sizeof(*ptr);
335 	for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
336 		ptr[i] = i;
337 
338 	/* Set buffer permission to read-only. */
339 	ret = mprotect(buffer->ptr, size, PROT_READ);
340 	ASSERT_EQ(ret, 0);
341 
342 	/* Populate the CPU page table with a special zero page. */
343 	val = *(int *)(buffer->ptr + self->page_size);
344 	ASSERT_EQ(val, 0);
345 
346 	/* Simulate a device reading system memory. */
347 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
348 	ASSERT_EQ(ret, 0);
349 	ASSERT_EQ(buffer->cpages, npages);
350 	ASSERT_EQ(buffer->faults, 1);
351 
352 	/* Check what the device read. */
353 	ptr = buffer->mirror;
354 	for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i)
355 		ASSERT_EQ(ptr[i], 0);
356 	for (; i < size / sizeof(*ptr); ++i)
357 		ASSERT_EQ(ptr[i], i);
358 
359 	hmm_buffer_free(buffer);
360 }
361 
362 /*
363  * Read private anonymous memory which has been protected with
364  * mprotect() PROT_NONE.
365  */
366 TEST_F(hmm, anon_read_prot)
367 {
368 	struct hmm_buffer *buffer;
369 	unsigned long npages;
370 	unsigned long size;
371 	unsigned long i;
372 	int *ptr;
373 	int ret;
374 
375 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
376 	ASSERT_NE(npages, 0);
377 	size = npages << self->page_shift;
378 
379 	buffer = malloc(sizeof(*buffer));
380 	ASSERT_NE(buffer, NULL);
381 
382 	buffer->fd = -1;
383 	buffer->size = size;
384 	buffer->mirror = malloc(size);
385 	ASSERT_NE(buffer->mirror, NULL);
386 
387 	buffer->ptr = mmap(NULL, size,
388 			   PROT_READ | PROT_WRITE,
389 			   MAP_PRIVATE | MAP_ANONYMOUS,
390 			   buffer->fd, 0);
391 	ASSERT_NE(buffer->ptr, MAP_FAILED);
392 
393 	/* Initialize buffer in system memory. */
394 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
395 		ptr[i] = i;
396 
397 	/* Initialize mirror buffer so we can verify it isn't written. */
398 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
399 		ptr[i] = -i;
400 
401 	/* Protect buffer from reading. */
402 	ret = mprotect(buffer->ptr, size, PROT_NONE);
403 	ASSERT_EQ(ret, 0);
404 
405 	/* Simulate a device reading system memory. */
406 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
407 	ASSERT_EQ(ret, -EFAULT);
408 
409 	/* Allow CPU to read the buffer so we can check it. */
410 	ret = mprotect(buffer->ptr, size, PROT_READ);
411 	ASSERT_EQ(ret, 0);
412 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
413 		ASSERT_EQ(ptr[i], i);
414 
415 	/* Check what the device read. */
416 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
417 		ASSERT_EQ(ptr[i], -i);
418 
419 	hmm_buffer_free(buffer);
420 }
421 
422 /*
423  * Write private anonymous memory.
424  */
425 TEST_F(hmm, anon_write)
426 {
427 	struct hmm_buffer *buffer;
428 	unsigned long npages;
429 	unsigned long size;
430 	unsigned long i;
431 	int *ptr;
432 	int ret;
433 
434 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
435 	ASSERT_NE(npages, 0);
436 	size = npages << self->page_shift;
437 
438 	buffer = malloc(sizeof(*buffer));
439 	ASSERT_NE(buffer, NULL);
440 
441 	buffer->fd = -1;
442 	buffer->size = size;
443 	buffer->mirror = malloc(size);
444 	ASSERT_NE(buffer->mirror, NULL);
445 
446 	buffer->ptr = mmap(NULL, size,
447 			   PROT_READ | PROT_WRITE,
448 			   MAP_PRIVATE | MAP_ANONYMOUS,
449 			   buffer->fd, 0);
450 	ASSERT_NE(buffer->ptr, MAP_FAILED);
451 
452 	/* Initialize data that the device will write to buffer->ptr. */
453 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
454 		ptr[i] = i;
455 
456 	/* Simulate a device writing system memory. */
457 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
458 	ASSERT_EQ(ret, 0);
459 	ASSERT_EQ(buffer->cpages, npages);
460 	ASSERT_EQ(buffer->faults, 1);
461 
462 	/* Check what the device wrote. */
463 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
464 		ASSERT_EQ(ptr[i], i);
465 
466 	hmm_buffer_free(buffer);
467 }
468 
469 /*
470  * Write private anonymous memory which has been protected with
471  * mprotect() PROT_READ.
472  */
473 TEST_F(hmm, anon_write_prot)
474 {
475 	struct hmm_buffer *buffer;
476 	unsigned long npages;
477 	unsigned long size;
478 	unsigned long i;
479 	int *ptr;
480 	int ret;
481 
482 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
483 	ASSERT_NE(npages, 0);
484 	size = npages << self->page_shift;
485 
486 	buffer = malloc(sizeof(*buffer));
487 	ASSERT_NE(buffer, NULL);
488 
489 	buffer->fd = -1;
490 	buffer->size = size;
491 	buffer->mirror = malloc(size);
492 	ASSERT_NE(buffer->mirror, NULL);
493 
494 	buffer->ptr = mmap(NULL, size,
495 			   PROT_READ,
496 			   MAP_PRIVATE | MAP_ANONYMOUS,
497 			   buffer->fd, 0);
498 	ASSERT_NE(buffer->ptr, MAP_FAILED);
499 
500 	/* Simulate a device reading a zero page of memory. */
501 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1);
502 	ASSERT_EQ(ret, 0);
503 	ASSERT_EQ(buffer->cpages, 1);
504 	ASSERT_EQ(buffer->faults, 1);
505 
506 	/* Initialize data that the device will write to buffer->ptr. */
507 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
508 		ptr[i] = i;
509 
510 	/* Simulate a device writing system memory. */
511 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
512 	ASSERT_EQ(ret, -EPERM);
513 
514 	/* Check what the device wrote. */
515 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
516 		ASSERT_EQ(ptr[i], 0);
517 
518 	/* Now allow writing and see that the zero page is replaced. */
519 	ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ);
520 	ASSERT_EQ(ret, 0);
521 
522 	/* Simulate a device writing system memory. */
523 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
524 	ASSERT_EQ(ret, 0);
525 	ASSERT_EQ(buffer->cpages, npages);
526 	ASSERT_EQ(buffer->faults, 1);
527 
528 	/* Check what the device wrote. */
529 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
530 		ASSERT_EQ(ptr[i], i);
531 
532 	hmm_buffer_free(buffer);
533 }
534 
535 /*
536  * Check that a device writing an anonymous private mapping
537  * will copy-on-write if a child process inherits the mapping.
538  *
539  * Also verifies after fork() memory the device can be read by child.
540  */
541 TEST_F(hmm, anon_write_child)
542 {
543 	struct hmm_buffer *buffer;
544 	unsigned long npages;
545 	unsigned long size;
546 	unsigned long i;
547 	void *old_ptr;
548 	void *map;
549 	int *ptr;
550 	pid_t pid;
551 	int child_fd;
552 	int ret, use_thp, migrate;
553 
554 	for (migrate = 0; migrate < 2; ++migrate) {
555 		for (use_thp = 0; use_thp < 2; ++use_thp) {
556 			npages = ALIGN(use_thp ? read_pmd_pagesize() : HMM_BUFFER_SIZE,
557 				       self->page_size) >> self->page_shift;
558 			ASSERT_NE(npages, 0);
559 			size = npages << self->page_shift;
560 
561 			buffer = malloc(sizeof(*buffer));
562 			ASSERT_NE(buffer, NULL);
563 
564 			buffer->fd = -1;
565 			buffer->size = size * 2;
566 			buffer->mirror = malloc(size);
567 			ASSERT_NE(buffer->mirror, NULL);
568 
569 			buffer->ptr = mmap(NULL, size * 2,
570 					   PROT_READ | PROT_WRITE,
571 					   MAP_PRIVATE | MAP_ANONYMOUS,
572 					   buffer->fd, 0);
573 			ASSERT_NE(buffer->ptr, MAP_FAILED);
574 
575 			old_ptr = buffer->ptr;
576 			if (use_thp) {
577 				map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
578 				ret = madvise(map, size, MADV_HUGEPAGE);
579 				ASSERT_EQ(ret, 0);
580 				buffer->ptr = map;
581 			}
582 
583 			/* Initialize buffer->ptr so we can tell if it is written. */
584 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
585 				ptr[i] = i;
586 
587 			/* Initialize data that the device will write to buffer->ptr. */
588 			for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
589 				ptr[i] = -i;
590 
591 			if (migrate) {
592 				ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
593 				ASSERT_EQ(ret, 0);
594 				ASSERT_EQ(buffer->cpages, npages);
595 
596 			}
597 
598 			pid = fork();
599 			if (pid == -1)
600 				ASSERT_EQ(pid, 0);
601 			if (pid != 0) {
602 				waitpid(pid, &ret, 0);
603 				ASSERT_EQ(WIFEXITED(ret), 1);
604 
605 				/* Check that the parent's buffer did not change. */
606 				for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
607 					ASSERT_EQ(ptr[i], i);
608 
609 				buffer->ptr = old_ptr;
610 				hmm_buffer_free(buffer);
611 				continue;
612 			}
613 
614 			/* Check that we see the parent's values. */
615 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
616 				ASSERT_EQ(ptr[i], i);
617 			if (!migrate) {
618 				for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
619 					ASSERT_EQ(ptr[i], -i);
620 			}
621 
622 			/* The child process needs its own mirror to its own mm. */
623 			child_fd = hmm_open(0);
624 			ASSERT_GE(child_fd, 0);
625 
626 			/* Simulate a device writing system memory. */
627 			ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
628 			ASSERT_EQ(ret, 0);
629 			ASSERT_EQ(buffer->cpages, npages);
630 			ASSERT_EQ(buffer->faults, 1);
631 
632 			/* Check what the device wrote. */
633 			if (!migrate) {
634 				for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
635 					ASSERT_EQ(ptr[i], -i);
636 			}
637 
638 			close(child_fd);
639 			_exit(0);
640 		}
641 	}
642 }
643 
644 /*
645  * Check that a device writing an anonymous shared mapping
646  * will not copy-on-write if a child process inherits the mapping.
647  */
648 TEST_F(hmm, anon_write_child_shared)
649 {
650 	struct hmm_buffer *buffer;
651 	unsigned long npages;
652 	unsigned long size;
653 	unsigned long i;
654 	int *ptr;
655 	pid_t pid;
656 	int child_fd;
657 	int ret;
658 
659 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
660 	ASSERT_NE(npages, 0);
661 	size = npages << self->page_shift;
662 
663 	buffer = malloc(sizeof(*buffer));
664 	ASSERT_NE(buffer, NULL);
665 
666 	buffer->fd = -1;
667 	buffer->size = size;
668 	buffer->mirror = malloc(size);
669 	ASSERT_NE(buffer->mirror, NULL);
670 
671 	buffer->ptr = mmap(NULL, size,
672 			   PROT_READ | PROT_WRITE,
673 			   MAP_SHARED | MAP_ANONYMOUS,
674 			   buffer->fd, 0);
675 	ASSERT_NE(buffer->ptr, MAP_FAILED);
676 
677 	/* Initialize buffer->ptr so we can tell if it is written. */
678 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
679 		ptr[i] = i;
680 
681 	/* Initialize data that the device will write to buffer->ptr. */
682 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
683 		ptr[i] = -i;
684 
685 	pid = fork();
686 	if (pid == -1)
687 		ASSERT_EQ(pid, 0);
688 	if (pid != 0) {
689 		waitpid(pid, &ret, 0);
690 		ASSERT_EQ(WIFEXITED(ret), 1);
691 
692 		/* Check that the parent's buffer did change. */
693 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
694 			ASSERT_EQ(ptr[i], -i);
695 		return;
696 	}
697 
698 	/* Check that we see the parent's values. */
699 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
700 		ASSERT_EQ(ptr[i], i);
701 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
702 		ASSERT_EQ(ptr[i], -i);
703 
704 	/* The child process needs its own mirror to its own mm. */
705 	child_fd = hmm_open(0);
706 	ASSERT_GE(child_fd, 0);
707 
708 	/* Simulate a device writing system memory. */
709 	ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
710 	ASSERT_EQ(ret, 0);
711 	ASSERT_EQ(buffer->cpages, npages);
712 	ASSERT_EQ(buffer->faults, 1);
713 
714 	/* Check what the device wrote. */
715 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
716 		ASSERT_EQ(ptr[i], -i);
717 
718 	close(child_fd);
719 	_exit(0);
720 }
721 
722 /*
723  * Write private anonymous huge page.
724  */
725 TEST_F(hmm, anon_write_huge)
726 {
727 	struct hmm_buffer *buffer;
728 	unsigned long npages;
729 	unsigned long size;
730 	unsigned long i;
731 	void *old_ptr;
732 	void *map;
733 	int *ptr;
734 	int ret;
735 
736 	size = 2 * read_pmd_pagesize();
737 
738 	buffer = malloc(sizeof(*buffer));
739 	ASSERT_NE(buffer, NULL);
740 
741 	buffer->fd = -1;
742 	buffer->size = size;
743 	buffer->mirror = malloc(size);
744 	ASSERT_NE(buffer->mirror, NULL);
745 
746 	buffer->ptr = mmap(NULL, size,
747 			   PROT_READ | PROT_WRITE,
748 			   MAP_PRIVATE | MAP_ANONYMOUS,
749 			   buffer->fd, 0);
750 	ASSERT_NE(buffer->ptr, MAP_FAILED);
751 
752 	size /= 2;
753 	npages = size >> self->page_shift;
754 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
755 	ret = madvise(map, size, MADV_HUGEPAGE);
756 	ASSERT_EQ(ret, 0);
757 	old_ptr = buffer->ptr;
758 	buffer->ptr = map;
759 
760 	/* Initialize data that the device will write to buffer->ptr. */
761 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
762 		ptr[i] = i;
763 
764 	/* Simulate a device writing system memory. */
765 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
766 	ASSERT_EQ(ret, 0);
767 	ASSERT_EQ(buffer->cpages, npages);
768 	ASSERT_EQ(buffer->faults, 1);
769 
770 	/* Check what the device wrote. */
771 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
772 		ASSERT_EQ(ptr[i], i);
773 
774 	buffer->ptr = old_ptr;
775 	hmm_buffer_free(buffer);
776 }
777 
778 /*
779  * Write huge TLBFS page.
780  */
781 TEST_F(hmm, anon_write_hugetlbfs)
782 {
783 	struct hmm_buffer *buffer;
784 	unsigned long npages;
785 	unsigned long size;
786 	unsigned long default_hsize = default_huge_page_size();
787 	unsigned long i;
788 	int *ptr;
789 	int ret;
790 
791 	if (!hugetlb_free_default_pages())
792 		SKIP(return, "Not enough huge pages");
793 
794 	size = ALIGN(TWOMEG, default_hsize);
795 	npages = size >> self->page_shift;
796 
797 	buffer = malloc(sizeof(*buffer));
798 	ASSERT_NE(buffer, NULL);
799 
800 	buffer->ptr = mmap(NULL, size,
801 				   PROT_READ | PROT_WRITE,
802 				   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
803 				   -1, 0);
804 	if (buffer->ptr == MAP_FAILED) {
805 		free(buffer);
806 		SKIP(return, "Huge page could not be allocated");
807 	}
808 
809 	buffer->fd = -1;
810 	buffer->size = size;
811 	buffer->mirror = malloc(size);
812 	ASSERT_NE(buffer->mirror, NULL);
813 
814 	/* Initialize data that the device will write to buffer->ptr. */
815 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
816 		ptr[i] = i;
817 
818 	/* Simulate a device writing system memory. */
819 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
820 	ASSERT_EQ(ret, 0);
821 	ASSERT_EQ(buffer->cpages, npages);
822 	ASSERT_EQ(buffer->faults, 1);
823 
824 	/* Check what the device wrote. */
825 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
826 		ASSERT_EQ(ptr[i], i);
827 
828 	munmap(buffer->ptr, buffer->size);
829 	buffer->ptr = NULL;
830 	hmm_buffer_free(buffer);
831 }
832 
833 /*
834  * Read mmap'ed file memory.
835  */
836 TEST_F(hmm, file_read)
837 {
838 	struct hmm_buffer *buffer;
839 	unsigned long npages;
840 	unsigned long size;
841 	unsigned long i;
842 	int *ptr;
843 	int ret;
844 	int fd;
845 	ssize_t len;
846 
847 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
848 	ASSERT_NE(npages, 0);
849 	size = npages << self->page_shift;
850 
851 	fd = hmm_create_file(size);
852 	ASSERT_GE(fd, 0);
853 
854 	buffer = malloc(sizeof(*buffer));
855 	ASSERT_NE(buffer, NULL);
856 
857 	buffer->fd = fd;
858 	buffer->size = size;
859 	buffer->mirror = malloc(size);
860 	ASSERT_NE(buffer->mirror, NULL);
861 
862 	/* Write initial contents of the file. */
863 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
864 		ptr[i] = i;
865 	len = pwrite(fd, buffer->mirror, size, 0);
866 	ASSERT_EQ(len, size);
867 	memset(buffer->mirror, 0, size);
868 
869 	buffer->ptr = mmap(NULL, size,
870 			   PROT_READ,
871 			   MAP_SHARED,
872 			   buffer->fd, 0);
873 	ASSERT_NE(buffer->ptr, MAP_FAILED);
874 
875 	/* Simulate a device reading system memory. */
876 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
877 	ASSERT_EQ(ret, 0);
878 	ASSERT_EQ(buffer->cpages, npages);
879 	ASSERT_EQ(buffer->faults, 1);
880 
881 	/* Check what the device read. */
882 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
883 		ASSERT_EQ(ptr[i], i);
884 
885 	hmm_buffer_free(buffer);
886 }
887 
888 /*
889  * Write mmap'ed file memory.
890  */
891 TEST_F(hmm, file_write)
892 {
893 	struct hmm_buffer *buffer;
894 	unsigned long npages;
895 	unsigned long size;
896 	unsigned long i;
897 	int *ptr;
898 	int ret;
899 	int fd;
900 	ssize_t len;
901 
902 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
903 	ASSERT_NE(npages, 0);
904 	size = npages << self->page_shift;
905 
906 	fd = hmm_create_file(size);
907 	ASSERT_GE(fd, 0);
908 
909 	buffer = malloc(sizeof(*buffer));
910 	ASSERT_NE(buffer, NULL);
911 
912 	buffer->fd = fd;
913 	buffer->size = size;
914 	buffer->mirror = malloc(size);
915 	ASSERT_NE(buffer->mirror, NULL);
916 
917 	buffer->ptr = mmap(NULL, size,
918 			   PROT_READ | PROT_WRITE,
919 			   MAP_SHARED,
920 			   buffer->fd, 0);
921 	ASSERT_NE(buffer->ptr, MAP_FAILED);
922 
923 	/* Initialize data that the device will write to buffer->ptr. */
924 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
925 		ptr[i] = i;
926 
927 	/* Simulate a device writing system memory. */
928 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
929 	ASSERT_EQ(ret, 0);
930 	ASSERT_EQ(buffer->cpages, npages);
931 	ASSERT_EQ(buffer->faults, 1);
932 
933 	/* Check what the device wrote. */
934 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
935 		ASSERT_EQ(ptr[i], i);
936 
937 	/* Check that the device also wrote the file. */
938 	len = pread(fd, buffer->mirror, size, 0);
939 	ASSERT_EQ(len, size);
940 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
941 		ASSERT_EQ(ptr[i], i);
942 
943 	hmm_buffer_free(buffer);
944 }
945 
946 /*
947  * Migrate anonymous memory to device private memory.
948  */
949 TEST_F(hmm, migrate)
950 {
951 	struct hmm_buffer *buffer;
952 	unsigned long npages;
953 	unsigned long size;
954 	unsigned long i;
955 	int *ptr;
956 	int ret;
957 
958 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
959 	ASSERT_NE(npages, 0);
960 	size = npages << self->page_shift;
961 
962 	buffer = malloc(sizeof(*buffer));
963 	ASSERT_NE(buffer, NULL);
964 
965 	buffer->fd = -1;
966 	buffer->size = size;
967 	buffer->mirror = malloc(size);
968 	ASSERT_NE(buffer->mirror, NULL);
969 
970 	buffer->ptr = mmap(NULL, size,
971 			   PROT_READ | PROT_WRITE,
972 			   MAP_PRIVATE | MAP_ANONYMOUS,
973 			   buffer->fd, 0);
974 	ASSERT_NE(buffer->ptr, MAP_FAILED);
975 
976 	/* Initialize buffer in system memory. */
977 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
978 		ptr[i] = i;
979 
980 	/* Migrate memory to device. */
981 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
982 	ASSERT_EQ(ret, 0);
983 	ASSERT_EQ(buffer->cpages, npages);
984 
985 	/* Check what the device read. */
986 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
987 		ASSERT_EQ(ptr[i], i);
988 
989 	hmm_buffer_free(buffer);
990 }
991 
992 /*
993  * Migrate private file memory to device private memory.
994  */
995 TEST_F(hmm, migrate_file_private)
996 {
997 	struct hmm_buffer *buffer;
998 	unsigned long npages;
999 	unsigned long size;
1000 	unsigned long i;
1001 	int *ptr;
1002 	int ret;
1003 	int fd;
1004 
1005 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1006 	ASSERT_NE(npages, 0);
1007 	size = npages << self->page_shift;
1008 
1009 	fd = hmm_create_file(size);
1010 	ASSERT_GE(fd, 0);
1011 
1012 	buffer = malloc(sizeof(*buffer));
1013 	ASSERT_NE(buffer, NULL);
1014 
1015 	buffer->fd = fd;
1016 	buffer->size = size;
1017 	buffer->mirror = malloc(size);
1018 	ASSERT_NE(buffer->mirror, NULL);
1019 
1020 	buffer->ptr = mmap(NULL, size,
1021 			   PROT_READ | PROT_WRITE,
1022 			   MAP_PRIVATE,
1023 			   buffer->fd, 0);
1024 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1025 
1026 	/* Initialize buffer in system memory. */
1027 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1028 		ptr[i] = i;
1029 
1030 	/* Migrate memory to device. */
1031 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1032 	ASSERT_EQ(ret, 0);
1033 	ASSERT_EQ(buffer->cpages, npages);
1034 
1035 	/* Check what the device read. */
1036 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1037 		ASSERT_EQ(ptr[i], i);
1038 
1039 	hmm_buffer_free(buffer);
1040 }
1041 
1042 /*
1043  * Migrate anonymous memory to device private memory and fault some of it back
1044  * to system memory, then try migrating the resulting mix of system and device
1045  * private memory to the device.
1046  */
1047 TEST_F(hmm, migrate_fault)
1048 {
1049 	struct hmm_buffer *buffer;
1050 	unsigned long npages;
1051 	unsigned long size;
1052 	unsigned long i;
1053 	int *ptr;
1054 	int ret;
1055 
1056 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1057 	ASSERT_NE(npages, 0);
1058 	size = npages << self->page_shift;
1059 
1060 	buffer = malloc(sizeof(*buffer));
1061 	ASSERT_NE(buffer, NULL);
1062 
1063 	buffer->fd = -1;
1064 	buffer->size = size;
1065 	buffer->mirror = malloc(size);
1066 	ASSERT_NE(buffer->mirror, NULL);
1067 
1068 	buffer->ptr = mmap(NULL, size,
1069 			   PROT_READ | PROT_WRITE,
1070 			   MAP_PRIVATE | MAP_ANONYMOUS,
1071 			   buffer->fd, 0);
1072 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1073 
1074 	/* Initialize buffer in system memory. */
1075 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1076 		ptr[i] = i;
1077 
1078 	/* Migrate memory to device. */
1079 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1080 	ASSERT_EQ(ret, 0);
1081 	ASSERT_EQ(buffer->cpages, npages);
1082 
1083 	/* Check what the device read. */
1084 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1085 		ASSERT_EQ(ptr[i], i);
1086 
1087 	/* Fault half the pages back to system memory and check them. */
1088 	for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
1089 		ASSERT_EQ(ptr[i], i);
1090 
1091 	/* Migrate memory to the device again. */
1092 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1093 	ASSERT_EQ(ret, 0);
1094 	ASSERT_EQ(buffer->cpages, npages);
1095 
1096 	/* Check what the device read. */
1097 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1098 		ASSERT_EQ(ptr[i], i);
1099 
1100 	hmm_buffer_free(buffer);
1101 }
1102 
1103 TEST_F(hmm, migrate_release)
1104 {
1105 	struct hmm_buffer *buffer;
1106 	unsigned long npages;
1107 	unsigned long size;
1108 	unsigned long i;
1109 	int *ptr;
1110 	int ret;
1111 
1112 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1113 	ASSERT_NE(npages, 0);
1114 	size = npages << self->page_shift;
1115 
1116 	buffer = malloc(sizeof(*buffer));
1117 	ASSERT_NE(buffer, NULL);
1118 
1119 	buffer->fd = -1;
1120 	buffer->size = size;
1121 	buffer->mirror = malloc(size);
1122 	ASSERT_NE(buffer->mirror, NULL);
1123 
1124 	buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
1125 			   MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
1126 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1127 
1128 	/* Initialize buffer in system memory. */
1129 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1130 		ptr[i] = i;
1131 
1132 	/* Migrate memory to device. */
1133 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1134 	ASSERT_EQ(ret, 0);
1135 	ASSERT_EQ(buffer->cpages, npages);
1136 
1137 	/* Check what the device read. */
1138 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1139 		ASSERT_EQ(ptr[i], i);
1140 
1141 	/* Release device memory. */
1142 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages);
1143 	ASSERT_EQ(ret, 0);
1144 
1145 	/* Fault pages back to system memory and check them. */
1146 	for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
1147 		ASSERT_EQ(ptr[i], i);
1148 
1149 	hmm_buffer_free(buffer);
1150 }
1151 
1152 /*
1153  * Migrate anonymous shared memory to device private memory.
1154  */
1155 TEST_F(hmm, migrate_shared)
1156 {
1157 	struct hmm_buffer *buffer;
1158 	unsigned long npages;
1159 	unsigned long size;
1160 	int ret;
1161 
1162 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1163 	ASSERT_NE(npages, 0);
1164 	size = npages << self->page_shift;
1165 
1166 	buffer = malloc(sizeof(*buffer));
1167 	ASSERT_NE(buffer, NULL);
1168 
1169 	buffer->fd = -1;
1170 	buffer->size = size;
1171 	buffer->mirror = malloc(size);
1172 	ASSERT_NE(buffer->mirror, NULL);
1173 
1174 	buffer->ptr = mmap(NULL, size,
1175 			   PROT_READ | PROT_WRITE,
1176 			   MAP_SHARED | MAP_ANONYMOUS,
1177 			   buffer->fd, 0);
1178 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1179 
1180 	/* Migrate memory to device. */
1181 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1182 	ASSERT_EQ(ret, -ENOENT);
1183 
1184 	hmm_buffer_free(buffer);
1185 }
1186 
1187 /*
1188  * Try to migrate various memory types to device private memory.
1189  */
1190 TEST_F(hmm2, migrate_mixed)
1191 {
1192 	struct hmm_buffer *buffer;
1193 	unsigned long npages;
1194 	unsigned long size;
1195 	int *ptr;
1196 	unsigned char *p;
1197 	int ret;
1198 	int val;
1199 
1200 	npages = 6;
1201 	size = npages << self->page_shift;
1202 
1203 	buffer = malloc(sizeof(*buffer));
1204 	ASSERT_NE(buffer, NULL);
1205 
1206 	buffer->fd = -1;
1207 	buffer->size = size;
1208 	buffer->mirror = malloc(size);
1209 	ASSERT_NE(buffer->mirror, NULL);
1210 
1211 	/* Reserve a range of addresses. */
1212 	buffer->ptr = mmap(NULL, size,
1213 			   PROT_NONE,
1214 			   MAP_PRIVATE | MAP_ANONYMOUS,
1215 			   buffer->fd, 0);
1216 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1217 	p = buffer->ptr;
1218 
1219 	/* Migrating a protected area should be an error. */
1220 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
1221 	ASSERT_EQ(ret, -EINVAL);
1222 
1223 	/* Punch a hole after the first page address. */
1224 	ret = munmap(buffer->ptr + self->page_size, self->page_size);
1225 	ASSERT_EQ(ret, 0);
1226 
1227 	/* We expect an error if the vma doesn't cover the range. */
1228 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
1229 	ASSERT_EQ(ret, -EINVAL);
1230 
1231 	/* Page 2 will be a read-only zero page. */
1232 	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
1233 				PROT_READ);
1234 	ASSERT_EQ(ret, 0);
1235 	ptr = (int *)(buffer->ptr + 2 * self->page_size);
1236 	val = *ptr + 3;
1237 	ASSERT_EQ(val, 3);
1238 
1239 	/* Page 3 will be read-only. */
1240 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1241 				PROT_READ | PROT_WRITE);
1242 	ASSERT_EQ(ret, 0);
1243 	ptr = (int *)(buffer->ptr + 3 * self->page_size);
1244 	*ptr = val;
1245 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1246 				PROT_READ);
1247 	ASSERT_EQ(ret, 0);
1248 
1249 	/* Page 4-5 will be read-write. */
1250 	ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size,
1251 				PROT_READ | PROT_WRITE);
1252 	ASSERT_EQ(ret, 0);
1253 	ptr = (int *)(buffer->ptr + 4 * self->page_size);
1254 	*ptr = val;
1255 	ptr = (int *)(buffer->ptr + 5 * self->page_size);
1256 	*ptr = val;
1257 
1258 	/* Now try to migrate pages 2-5 to device 1. */
1259 	buffer->ptr = p + 2 * self->page_size;
1260 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
1261 	ASSERT_EQ(ret, 0);
1262 	ASSERT_EQ(buffer->cpages, 4);
1263 
1264 	/* Page 5 won't be migrated to device 0 because it's on device 1. */
1265 	buffer->ptr = p + 5 * self->page_size;
1266 	ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
1267 	ASSERT_EQ(ret, -ENOENT);
1268 	buffer->ptr = p;
1269 
1270 	buffer->ptr = p;
1271 	hmm_buffer_free(buffer);
1272 }
1273 
1274 /*
1275  * Migrate anonymous memory to device memory and back to system memory
1276  * multiple times. In case of private zone configuration, this is done
1277  * through fault pages accessed by CPU. In case of coherent zone configuration,
1278  * the pages from the device should be explicitly migrated back to system memory.
1279  * The reason is Coherent device zone has coherent access by CPU, therefore
1280  * it will not generate any page fault.
1281  */
1282 TEST_F(hmm, migrate_multiple)
1283 {
1284 	struct hmm_buffer *buffer;
1285 	unsigned long npages;
1286 	unsigned long size;
1287 	unsigned long i;
1288 	unsigned long c;
1289 	int *ptr;
1290 	int ret;
1291 
1292 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1293 	ASSERT_NE(npages, 0);
1294 	size = npages << self->page_shift;
1295 
1296 	for (c = 0; c < NTIMES; c++) {
1297 		buffer = malloc(sizeof(*buffer));
1298 		ASSERT_NE(buffer, NULL);
1299 
1300 		buffer->fd = -1;
1301 		buffer->size = size;
1302 		buffer->mirror = malloc(size);
1303 		ASSERT_NE(buffer->mirror, NULL);
1304 
1305 		buffer->ptr = mmap(NULL, size,
1306 				   PROT_READ | PROT_WRITE,
1307 				   MAP_PRIVATE | MAP_ANONYMOUS,
1308 				   buffer->fd, 0);
1309 		ASSERT_NE(buffer->ptr, MAP_FAILED);
1310 
1311 		/* Initialize buffer in system memory. */
1312 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1313 			ptr[i] = i;
1314 
1315 		/* Migrate memory to device. */
1316 		ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
1317 		ASSERT_EQ(ret, 0);
1318 		ASSERT_EQ(buffer->cpages, npages);
1319 
1320 		/* Check what the device read. */
1321 		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1322 			ASSERT_EQ(ptr[i], i);
1323 
1324 		/* Migrate back to system memory and check them. */
1325 		if (hmm_is_coherent_type(variant->device_number)) {
1326 			ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
1327 			ASSERT_EQ(ret, 0);
1328 			ASSERT_EQ(buffer->cpages, npages);
1329 		}
1330 
1331 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1332 			ASSERT_EQ(ptr[i], i);
1333 
1334 		hmm_buffer_free(buffer);
1335 	}
1336 }
1337 
1338 /*
1339  * Read anonymous memory multiple times.
1340  */
1341 TEST_F(hmm, anon_read_multiple)
1342 {
1343 	struct hmm_buffer *buffer;
1344 	unsigned long npages;
1345 	unsigned long size;
1346 	unsigned long i;
1347 	unsigned long c;
1348 	int *ptr;
1349 	int ret;
1350 
1351 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1352 	ASSERT_NE(npages, 0);
1353 	size = npages << self->page_shift;
1354 
1355 	for (c = 0; c < NTIMES; c++) {
1356 		buffer = malloc(sizeof(*buffer));
1357 		ASSERT_NE(buffer, NULL);
1358 
1359 		buffer->fd = -1;
1360 		buffer->size = size;
1361 		buffer->mirror = malloc(size);
1362 		ASSERT_NE(buffer->mirror, NULL);
1363 
1364 		buffer->ptr = mmap(NULL, size,
1365 				   PROT_READ | PROT_WRITE,
1366 				   MAP_PRIVATE | MAP_ANONYMOUS,
1367 				   buffer->fd, 0);
1368 		ASSERT_NE(buffer->ptr, MAP_FAILED);
1369 
1370 		/* Initialize buffer in system memory. */
1371 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1372 			ptr[i] = i + c;
1373 
1374 		/* Simulate a device reading system memory. */
1375 		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
1376 				      npages);
1377 		ASSERT_EQ(ret, 0);
1378 		ASSERT_EQ(buffer->cpages, npages);
1379 		ASSERT_EQ(buffer->faults, 1);
1380 
1381 		/* Check what the device read. */
1382 		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1383 			ASSERT_EQ(ptr[i], i + c);
1384 
1385 		hmm_buffer_free(buffer);
1386 	}
1387 }
1388 
1389 void *unmap_buffer(void *p)
1390 {
1391 	struct hmm_buffer *buffer = p;
1392 
1393 	/* Delay for a bit and then unmap buffer while it is being read. */
1394 	hmm_nanosleep(hmm_random() % 32000);
1395 	munmap(buffer->ptr + buffer->size / 2, buffer->size / 2);
1396 	buffer->ptr = NULL;
1397 
1398 	return NULL;
1399 }
1400 
1401 /*
1402  * Try reading anonymous memory while it is being unmapped.
1403  */
1404 TEST_F(hmm, anon_teardown)
1405 {
1406 	unsigned long npages;
1407 	unsigned long size;
1408 	unsigned long c;
1409 	void *ret;
1410 
1411 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1412 	ASSERT_NE(npages, 0);
1413 	size = npages << self->page_shift;
1414 
1415 	for (c = 0; c < NTIMES; ++c) {
1416 		pthread_t thread;
1417 		struct hmm_buffer *buffer;
1418 		unsigned long i;
1419 		int *ptr;
1420 		int rc;
1421 
1422 		buffer = malloc(sizeof(*buffer));
1423 		ASSERT_NE(buffer, NULL);
1424 
1425 		buffer->fd = -1;
1426 		buffer->size = size;
1427 		buffer->mirror = malloc(size);
1428 		ASSERT_NE(buffer->mirror, NULL);
1429 
1430 		buffer->ptr = mmap(NULL, size,
1431 				   PROT_READ | PROT_WRITE,
1432 				   MAP_PRIVATE | MAP_ANONYMOUS,
1433 				   buffer->fd, 0);
1434 		ASSERT_NE(buffer->ptr, MAP_FAILED);
1435 
1436 		/* Initialize buffer in system memory. */
1437 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1438 			ptr[i] = i + c;
1439 
1440 		rc = pthread_create(&thread, NULL, unmap_buffer, buffer);
1441 		ASSERT_EQ(rc, 0);
1442 
1443 		/* Simulate a device reading system memory. */
1444 		rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
1445 				     npages);
1446 		if (rc == 0) {
1447 			ASSERT_EQ(buffer->cpages, npages);
1448 			ASSERT_EQ(buffer->faults, 1);
1449 
1450 			/* Check what the device read. */
1451 			for (i = 0, ptr = buffer->mirror;
1452 			     i < size / sizeof(*ptr);
1453 			     ++i)
1454 				ASSERT_EQ(ptr[i], i + c);
1455 		}
1456 
1457 		pthread_join(thread, &ret);
1458 		hmm_buffer_free(buffer);
1459 	}
1460 }
1461 
1462 /*
1463  * Test memory snapshot without faulting in pages accessed by the device.
1464  */
1465 TEST_F(hmm, mixedmap)
1466 {
1467 	struct hmm_buffer *buffer;
1468 	unsigned long npages;
1469 	unsigned long size;
1470 	unsigned char *m;
1471 	int ret;
1472 
1473 	npages = 1;
1474 	size = npages << self->page_shift;
1475 
1476 	buffer = malloc(sizeof(*buffer));
1477 	ASSERT_NE(buffer, NULL);
1478 
1479 	buffer->fd = -1;
1480 	buffer->size = size;
1481 	buffer->mirror = malloc(npages);
1482 	ASSERT_NE(buffer->mirror, NULL);
1483 
1484 
1485 	/* Reserve a range of addresses. */
1486 	buffer->ptr = mmap(NULL, size,
1487 			   PROT_READ | PROT_WRITE,
1488 			   MAP_PRIVATE,
1489 			   self->fd, 0);
1490 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1491 
1492 	/* Simulate a device snapshotting CPU pagetables. */
1493 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1494 	ASSERT_EQ(ret, 0);
1495 	ASSERT_EQ(buffer->cpages, npages);
1496 
1497 	/* Check what the device saw. */
1498 	m = buffer->mirror;
1499 	ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
1500 
1501 	hmm_buffer_free(buffer);
1502 }
1503 
1504 /*
1505  * Test memory snapshot without faulting in pages accessed by the device.
1506  */
1507 TEST_F(hmm2, snapshot)
1508 {
1509 	struct hmm_buffer *buffer;
1510 	unsigned long npages;
1511 	unsigned long size;
1512 	int *ptr;
1513 	unsigned char *p;
1514 	unsigned char *m;
1515 	int ret;
1516 	int val;
1517 
1518 	npages = 7;
1519 	size = npages << self->page_shift;
1520 
1521 	buffer = malloc(sizeof(*buffer));
1522 	ASSERT_NE(buffer, NULL);
1523 
1524 	buffer->fd = -1;
1525 	buffer->size = size;
1526 	buffer->mirror = malloc(npages);
1527 	ASSERT_NE(buffer->mirror, NULL);
1528 
1529 	/* Reserve a range of addresses. */
1530 	buffer->ptr = mmap(NULL, size,
1531 			   PROT_NONE,
1532 			   MAP_PRIVATE | MAP_ANONYMOUS,
1533 			   buffer->fd, 0);
1534 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1535 	p = buffer->ptr;
1536 
1537 	/* Punch a hole after the first page address. */
1538 	ret = munmap(buffer->ptr + self->page_size, self->page_size);
1539 	ASSERT_EQ(ret, 0);
1540 
1541 	/* Page 2 will be read-only zero page. */
1542 	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
1543 				PROT_READ);
1544 	ASSERT_EQ(ret, 0);
1545 	ptr = (int *)(buffer->ptr + 2 * self->page_size);
1546 	val = *ptr + 3;
1547 	ASSERT_EQ(val, 3);
1548 
1549 	/* Page 3 will be read-only. */
1550 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1551 				PROT_READ | PROT_WRITE);
1552 	ASSERT_EQ(ret, 0);
1553 	ptr = (int *)(buffer->ptr + 3 * self->page_size);
1554 	*ptr = val;
1555 	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
1556 				PROT_READ);
1557 	ASSERT_EQ(ret, 0);
1558 
1559 	/* Page 4-6 will be read-write. */
1560 	ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size,
1561 				PROT_READ | PROT_WRITE);
1562 	ASSERT_EQ(ret, 0);
1563 	ptr = (int *)(buffer->ptr + 4 * self->page_size);
1564 	*ptr = val;
1565 
1566 	/* Page 5 will be migrated to device 0. */
1567 	buffer->ptr = p + 5 * self->page_size;
1568 	ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
1569 	ASSERT_EQ(ret, 0);
1570 	ASSERT_EQ(buffer->cpages, 1);
1571 
1572 	/* Page 6 will be migrated to device 1. */
1573 	buffer->ptr = p + 6 * self->page_size;
1574 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1);
1575 	ASSERT_EQ(ret, 0);
1576 	ASSERT_EQ(buffer->cpages, 1);
1577 
1578 	/* Simulate a device snapshotting CPU pagetables. */
1579 	buffer->ptr = p;
1580 	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1581 	ASSERT_EQ(ret, 0);
1582 	ASSERT_EQ(buffer->cpages, npages);
1583 
1584 	/* Check what the device saw. */
1585 	m = buffer->mirror;
1586 	ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR);
1587 	ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR);
1588 	ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
1589 	ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
1590 	ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
1591 	if (!hmm_is_coherent_type(variant->device_number0)) {
1592 		ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
1593 				HMM_DMIRROR_PROT_WRITE);
1594 		ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
1595 	} else {
1596 		ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL |
1597 				HMM_DMIRROR_PROT_WRITE);
1598 		ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE |
1599 				HMM_DMIRROR_PROT_WRITE);
1600 	}
1601 
1602 	hmm_buffer_free(buffer);
1603 }
1604 
1605 /*
1606  * Test the hmm_range_fault() handling of large pages (PMD or PUD)
1607  * that should be mapped by a large page table entry.
1608  */
1609 TEST_F(hmm, compound)
1610 {
1611 	struct hmm_buffer *buffer;
1612 	unsigned long npages;
1613 	unsigned long size;
1614 	unsigned long default_hsize = default_huge_page_size();
1615 	int *ptr;
1616 	unsigned char *m;
1617 	unsigned char prot;
1618 	int ret;
1619 	unsigned long i;
1620 
1621 	/* Skip test if we can't allocate a hugetlbfs page. */
1622 	if (!hugetlb_free_default_pages())
1623 		SKIP(return, "Not enough huge pages");
1624 
1625 	size = ALIGN(TWOMEG, default_hsize);
1626 	npages = size >> self->page_shift;
1627 
1628 	buffer = malloc(sizeof(*buffer));
1629 	ASSERT_NE(buffer, NULL);
1630 
1631 	buffer->ptr = mmap(NULL, size,
1632 				   PROT_READ | PROT_WRITE,
1633 				   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
1634 				   -1, 0);
1635 	if (buffer->ptr == MAP_FAILED) {
1636 		free(buffer);
1637 		return;
1638 	}
1639 
1640 	buffer->size = size;
1641 	buffer->mirror = malloc(npages);
1642 	ASSERT_NE(buffer->mirror, NULL);
1643 
1644 	/* Initialize the pages the device will snapshot in buffer->ptr. */
1645 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1646 		ptr[i] = i;
1647 
1648 	/* Simulate a device snapshotting CPU pagetables. */
1649 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1650 	ASSERT_EQ(ret, 0);
1651 	ASSERT_EQ(buffer->cpages, npages);
1652 
1653 	/*
1654 	 * Check what the device saw.  The region is backed by a single huge
1655 	 * page that the device reports either at PMD or at PUD level depending
1656 	 * on the configured default hugepage size.  Determine that level from
1657 	 * the first page and require every page in the range to match it
1658 	 * exactly, so that a fragmented mapping mixing levels (or a missing
1659 	 * large-page bit) is still caught and reported with its actual value.
1660 	 */
1661 	m = buffer->mirror;
1662 	prot = HMM_DMIRROR_PROT_WRITE |
1663 	       ((m[0] & HMM_DMIRROR_PROT_PUD) ? HMM_DMIRROR_PROT_PUD :
1664 						HMM_DMIRROR_PROT_PMD);
1665 	for (i = 0; i < npages; ++i)
1666 		ASSERT_EQ(m[i], prot);
1667 
1668 	/* Make the region read-only. */
1669 	ret = mprotect(buffer->ptr, size, PROT_READ);
1670 	ASSERT_EQ(ret, 0);
1671 
1672 	/* Simulate a device snapshotting CPU pagetables. */
1673 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
1674 	ASSERT_EQ(ret, 0);
1675 	ASSERT_EQ(buffer->cpages, npages);
1676 
1677 	/*
1678 	 * Check what the device saw after mprotect(PROT_READ).  Same
1679 	 * approach as above: determine the mapping level from the first
1680 	 * page and require every page to match it exactly.
1681 	 */
1682 	m = buffer->mirror;
1683 	prot = HMM_DMIRROR_PROT_READ |
1684 	       ((m[0] & HMM_DMIRROR_PROT_PUD) ? HMM_DMIRROR_PROT_PUD :
1685 						HMM_DMIRROR_PROT_PMD);
1686 	for (i = 0; i < npages; ++i)
1687 		ASSERT_EQ(m[i], prot);
1688 
1689 	munmap(buffer->ptr, buffer->size);
1690 	buffer->ptr = NULL;
1691 	hmm_buffer_free(buffer);
1692 }
1693 
1694 /*
1695  * Test two devices reading the same memory (double mapped).
1696  */
1697 TEST_F(hmm2, double_map)
1698 {
1699 	struct hmm_buffer *buffer;
1700 	unsigned long npages;
1701 	unsigned long size;
1702 	unsigned long i;
1703 	int *ptr;
1704 	int ret;
1705 
1706 	npages = 6;
1707 	size = npages << self->page_shift;
1708 
1709 	buffer = malloc(sizeof(*buffer));
1710 	ASSERT_NE(buffer, NULL);
1711 
1712 	buffer->fd = -1;
1713 	buffer->size = size;
1714 	buffer->mirror = malloc(size);
1715 	ASSERT_NE(buffer->mirror, NULL);
1716 
1717 	/* Reserve a range of addresses. */
1718 	buffer->ptr = mmap(NULL, size,
1719 			   PROT_READ | PROT_WRITE,
1720 			   MAP_PRIVATE | MAP_ANONYMOUS,
1721 			   buffer->fd, 0);
1722 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1723 
1724 	/* Initialize buffer in system memory. */
1725 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1726 		ptr[i] = i;
1727 
1728 	/* Make region read-only. */
1729 	ret = mprotect(buffer->ptr, size, PROT_READ);
1730 	ASSERT_EQ(ret, 0);
1731 
1732 	/* Simulate device 0 reading system memory. */
1733 	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
1734 	ASSERT_EQ(ret, 0);
1735 	ASSERT_EQ(buffer->cpages, npages);
1736 	ASSERT_EQ(buffer->faults, 1);
1737 
1738 	/* Check what the device read. */
1739 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1740 		ASSERT_EQ(ptr[i], i);
1741 
1742 	/* Simulate device 1 reading system memory. */
1743 	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages);
1744 	ASSERT_EQ(ret, 0);
1745 	ASSERT_EQ(buffer->cpages, npages);
1746 	ASSERT_EQ(buffer->faults, 1);
1747 
1748 	/* Check what the device read. */
1749 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1750 		ASSERT_EQ(ptr[i], i);
1751 
1752 	/* Migrate pages to device 1 and try to read from device 0. */
1753 	ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
1754 	ASSERT_EQ(ret, 0);
1755 	ASSERT_EQ(buffer->cpages, npages);
1756 
1757 	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
1758 	ASSERT_EQ(ret, 0);
1759 	ASSERT_EQ(buffer->cpages, npages);
1760 	ASSERT_EQ(buffer->faults, 1);
1761 
1762 	/* Check what device 0 read. */
1763 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1764 		ASSERT_EQ(ptr[i], i);
1765 
1766 	hmm_buffer_free(buffer);
1767 }
1768 
1769 /*
1770  * Basic check of exclusive faulting.
1771  */
1772 TEST_F(hmm, exclusive)
1773 {
1774 	struct hmm_buffer *buffer;
1775 	unsigned long npages;
1776 	unsigned long size;
1777 	unsigned long i;
1778 	int *ptr;
1779 	int ret;
1780 
1781 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1782 	ASSERT_NE(npages, 0);
1783 	size = npages << self->page_shift;
1784 
1785 	buffer = malloc(sizeof(*buffer));
1786 	ASSERT_NE(buffer, NULL);
1787 
1788 	buffer->fd = -1;
1789 	buffer->size = size;
1790 	buffer->mirror = malloc(size);
1791 	ASSERT_NE(buffer->mirror, NULL);
1792 
1793 	buffer->ptr = mmap(NULL, size,
1794 			   PROT_READ | PROT_WRITE,
1795 			   MAP_PRIVATE | MAP_ANONYMOUS,
1796 			   buffer->fd, 0);
1797 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1798 
1799 	/* Initialize buffer in system memory. */
1800 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1801 		ptr[i] = i;
1802 
1803 	/* Map memory exclusively for device access. */
1804 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
1805 	ASSERT_EQ(ret, 0);
1806 	ASSERT_EQ(buffer->cpages, npages);
1807 
1808 	/* Check what the device read. */
1809 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1810 		ASSERT_EQ(ptr[i], i);
1811 
1812 	/* Fault pages back to system memory and check them. */
1813 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1814 		ASSERT_EQ(ptr[i]++, i);
1815 
1816 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1817 		ASSERT_EQ(ptr[i], i+1);
1818 
1819 	/* Check atomic access revoked */
1820 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages);
1821 	ASSERT_EQ(ret, 0);
1822 
1823 	hmm_buffer_free(buffer);
1824 }
1825 
1826 TEST_F(hmm, exclusive_mprotect)
1827 {
1828 	struct hmm_buffer *buffer;
1829 	unsigned long npages;
1830 	unsigned long size;
1831 	unsigned long i;
1832 	int *ptr;
1833 	int ret;
1834 
1835 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1836 	ASSERT_NE(npages, 0);
1837 	size = npages << self->page_shift;
1838 
1839 	buffer = malloc(sizeof(*buffer));
1840 	ASSERT_NE(buffer, NULL);
1841 
1842 	buffer->fd = -1;
1843 	buffer->size = size;
1844 	buffer->mirror = malloc(size);
1845 	ASSERT_NE(buffer->mirror, NULL);
1846 
1847 	buffer->ptr = mmap(NULL, size,
1848 			   PROT_READ | PROT_WRITE,
1849 			   MAP_PRIVATE | MAP_ANONYMOUS,
1850 			   buffer->fd, 0);
1851 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1852 
1853 	/* Initialize buffer in system memory. */
1854 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1855 		ptr[i] = i;
1856 
1857 	/* Map memory exclusively for device access. */
1858 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
1859 	ASSERT_EQ(ret, 0);
1860 	ASSERT_EQ(buffer->cpages, npages);
1861 
1862 	/* Check what the device read. */
1863 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
1864 		ASSERT_EQ(ptr[i], i);
1865 
1866 	ret = mprotect(buffer->ptr, size, PROT_READ);
1867 	ASSERT_EQ(ret, 0);
1868 
1869 	/* Simulate a device writing system memory. */
1870 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
1871 	ASSERT_EQ(ret, -EPERM);
1872 
1873 	hmm_buffer_free(buffer);
1874 }
1875 
1876 /*
1877  * Check copy-on-write works.
1878  */
1879 TEST_F(hmm, exclusive_cow)
1880 {
1881 	struct hmm_buffer *buffer;
1882 	unsigned long npages;
1883 	unsigned long size;
1884 	unsigned long i;
1885 	int *ptr;
1886 	int ret;
1887 	pid_t pid;
1888 	int status;
1889 
1890 	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
1891 	ASSERT_NE(npages, 0);
1892 	size = npages << self->page_shift;
1893 
1894 	buffer = malloc(sizeof(*buffer));
1895 	ASSERT_NE(buffer, NULL);
1896 
1897 	buffer->fd = -1;
1898 	buffer->size = size;
1899 	buffer->mirror = malloc(size);
1900 	ASSERT_NE(buffer->mirror, NULL);
1901 
1902 	buffer->ptr = mmap(NULL, size,
1903 			   PROT_READ | PROT_WRITE,
1904 			   MAP_PRIVATE | MAP_ANONYMOUS,
1905 			   buffer->fd, 0);
1906 	ASSERT_NE(buffer->ptr, MAP_FAILED);
1907 
1908 	/* Initialize buffer in system memory. */
1909 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1910 		ptr[i] = i;
1911 
1912 	/* Map memory exclusively for device access. */
1913 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
1914 	ASSERT_EQ(ret, 0);
1915 	ASSERT_EQ(buffer->cpages, npages);
1916 
1917 	pid = fork();
1918 	if (pid == -1)
1919 		ASSERT_EQ(pid, 0);
1920 
1921 	if (pid == 0) {
1922 		/*
1923 		 * Child verifies COW independently, then _exit(0)s so it does
1924 		 * not run the test teardown.  A failed ASSERT_* here makes the
1925 		 * harness abort() the child, so the parent sees
1926 		 * !WIFEXITED(status) below and fails in turn.
1927 		 */
1928 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1929 			ASSERT_EQ(ptr[i]++, i);
1930 
1931 		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1932 			ASSERT_EQ(ptr[i], i + 1);
1933 
1934 		_exit(0);
1935 	}
1936 
1937 	/* Parent: also increment to verify COW works for both processes. */
1938 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1939 		ASSERT_EQ(ptr[i]++, i);
1940 
1941 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
1942 		ASSERT_EQ(ptr[i], i + 1);
1943 
1944 	/* Parent: wait for child and then free the buffer. */
1945 	ASSERT_EQ(waitpid(pid, &status, 0), pid);
1946 	ASSERT_TRUE(WIFEXITED(status));
1947 	ASSERT_EQ(WEXITSTATUS(status), 0);
1948 
1949 	hmm_buffer_free(buffer);
1950 }
1951 
1952 static int gup_test_exec(int gup_fd, unsigned long addr, int cmd,
1953 			 int npages, int size, int flags)
1954 {
1955 	struct gup_test gup = {
1956 		.nr_pages_per_call	= npages,
1957 		.addr			= addr,
1958 		.gup_flags		= FOLL_WRITE | flags,
1959 		.size			= size,
1960 	};
1961 
1962 	if (ioctl(gup_fd, cmd, &gup)) {
1963 		perror("ioctl on error\n");
1964 		return errno;
1965 	}
1966 
1967 	return 0;
1968 }
1969 
1970 /*
1971  * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
1972  * This should trigger a migration back to system memory for both, private
1973  * and coherent type pages.
1974  * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
1975  * to your configuration before you run it.
1976  */
1977 TEST_F(hmm, hmm_gup_test)
1978 {
1979 	struct hmm_buffer *buffer;
1980 	int gup_fd;
1981 	unsigned long npages;
1982 	unsigned long size;
1983 	unsigned long i;
1984 	int *ptr;
1985 	int ret;
1986 	unsigned char *m;
1987 
1988 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1989 	if (gup_fd == -1)
1990 		SKIP(return, "Skipping test, could not find gup_test driver");
1991 
1992 	npages = 4;
1993 	size = npages << self->page_shift;
1994 
1995 	buffer = malloc(sizeof(*buffer));
1996 	ASSERT_NE(buffer, NULL);
1997 
1998 	buffer->fd = -1;
1999 	buffer->size = size;
2000 	buffer->mirror = malloc(size);
2001 	ASSERT_NE(buffer->mirror, NULL);
2002 
2003 	buffer->ptr = mmap(NULL, size,
2004 			   PROT_READ | PROT_WRITE,
2005 			   MAP_PRIVATE | MAP_ANONYMOUS,
2006 			   buffer->fd, 0);
2007 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2008 
2009 	/* Initialize buffer in system memory. */
2010 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2011 		ptr[i] = i;
2012 
2013 	/* Migrate memory to device. */
2014 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2015 	ASSERT_EQ(ret, 0);
2016 	ASSERT_EQ(buffer->cpages, npages);
2017 	/* Check what the device read. */
2018 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2019 		ASSERT_EQ(ptr[i], i);
2020 
2021 	ASSERT_EQ(gup_test_exec(gup_fd,
2022 				(unsigned long)buffer->ptr,
2023 				GUP_BASIC_TEST, 1, self->page_size, 0), 0);
2024 	ASSERT_EQ(gup_test_exec(gup_fd,
2025 				(unsigned long)buffer->ptr + 1 * self->page_size,
2026 				GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0);
2027 	ASSERT_EQ(gup_test_exec(gup_fd,
2028 				(unsigned long)buffer->ptr + 2 * self->page_size,
2029 				PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0);
2030 	ASSERT_EQ(gup_test_exec(gup_fd,
2031 				(unsigned long)buffer->ptr + 3 * self->page_size,
2032 				PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0);
2033 
2034 	/* Take snapshot to CPU pagetables */
2035 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
2036 	ASSERT_EQ(ret, 0);
2037 	ASSERT_EQ(buffer->cpages, npages);
2038 	m = buffer->mirror;
2039 	if (hmm_is_coherent_type(variant->device_number)) {
2040 		ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]);
2041 		ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]);
2042 	} else {
2043 		ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
2044 		ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
2045 	}
2046 	ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
2047 	ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]);
2048 	/*
2049 	 * Check again the content on the pages. Make sure there's no
2050 	 * corrupted data.
2051 	 */
2052 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2053 		ASSERT_EQ(ptr[i], i);
2054 
2055 	close(gup_fd);
2056 	hmm_buffer_free(buffer);
2057 }
2058 
2059 /*
2060  * Test copy-on-write in device pages.
2061  * In case of writing to COW private page(s), a page fault will migrate pages
2062  * back to system memory first. Then, these pages will be duplicated. In case
2063  * of COW device coherent type, pages are duplicated directly from device
2064  * memory.
2065  */
2066 TEST_F(hmm, hmm_cow_in_device)
2067 {
2068 	struct hmm_buffer *buffer;
2069 	unsigned long npages;
2070 	unsigned long size;
2071 	unsigned long i;
2072 	int *ptr;
2073 	int ret;
2074 	unsigned char *m;
2075 	pid_t pid;
2076 	int status;
2077 
2078 	npages = 4;
2079 	size = npages << self->page_shift;
2080 
2081 	buffer = malloc(sizeof(*buffer));
2082 	ASSERT_NE(buffer, NULL);
2083 
2084 	buffer->fd = -1;
2085 	buffer->size = size;
2086 	buffer->mirror = malloc(size);
2087 	ASSERT_NE(buffer->mirror, NULL);
2088 
2089 	buffer->ptr = mmap(NULL, size,
2090 			   PROT_READ | PROT_WRITE,
2091 			   MAP_PRIVATE | MAP_ANONYMOUS,
2092 			   buffer->fd, 0);
2093 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2094 
2095 	/* Initialize buffer in system memory. */
2096 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2097 		ptr[i] = i;
2098 
2099 	/* Migrate memory to device. */
2100 
2101 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2102 	ASSERT_EQ(ret, 0);
2103 	ASSERT_EQ(buffer->cpages, npages);
2104 
2105 	pid = fork();
2106 	if (pid == -1)
2107 		ASSERT_EQ(pid, 0);
2108 	if (!pid) {
2109 		/* Child process waits for SIGKILL from the parent. */
2110 		while (1) {
2111 		}
2112 		/* Should not reach this */
2113 	}
2114 	/* Parent process writes to COW pages(s) and gets a
2115 	 * new copy in system. In case of device private pages,
2116 	 * this write causes a migration to system mem first.
2117 	 */
2118 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2119 		ptr[i] = i;
2120 
2121 	/* Terminate child and wait */
2122 	EXPECT_EQ(0, kill(pid, SIGKILL));
2123 	EXPECT_EQ(pid, waitpid(pid, &status, 0));
2124 	EXPECT_NE(0, WIFSIGNALED(status));
2125 	EXPECT_EQ(SIGKILL, WTERMSIG(status));
2126 
2127 	/* Take snapshot to CPU pagetables */
2128 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
2129 	ASSERT_EQ(ret, 0);
2130 	ASSERT_EQ(buffer->cpages, npages);
2131 	m = buffer->mirror;
2132 	for (i = 0; i < npages; i++)
2133 		ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
2134 
2135 	hmm_buffer_free(buffer);
2136 }
2137 
2138 /*
2139  * Migrate private anonymous huge empty page.
2140  */
2141 TEST_F(hmm, migrate_anon_huge_empty)
2142 {
2143 	struct hmm_buffer *buffer;
2144 	unsigned long npages;
2145 	unsigned long size;
2146 	unsigned long i;
2147 	void *old_ptr;
2148 	void *map;
2149 	int *ptr;
2150 	int ret;
2151 
2152 	size = read_pmd_pagesize();
2153 
2154 	buffer = malloc(sizeof(*buffer));
2155 	ASSERT_NE(buffer, NULL);
2156 
2157 	buffer->fd = -1;
2158 	buffer->size = 2 * size;
2159 	buffer->mirror = malloc(size);
2160 	ASSERT_NE(buffer->mirror, NULL);
2161 	memset(buffer->mirror, 0xFF, size);
2162 
2163 	buffer->ptr = mmap(NULL, 2 * size,
2164 			   PROT_READ,
2165 			   MAP_PRIVATE | MAP_ANONYMOUS,
2166 			   buffer->fd, 0);
2167 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2168 
2169 	npages = size >> self->page_shift;
2170 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2171 	ret = madvise(map, size, MADV_HUGEPAGE);
2172 	ASSERT_EQ(ret, 0);
2173 	old_ptr = buffer->ptr;
2174 	buffer->ptr = map;
2175 
2176 	/* Migrate memory to device. */
2177 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2178 	ASSERT_EQ(ret, 0);
2179 	ASSERT_EQ(buffer->cpages, npages);
2180 
2181 	/* Check what the device read. */
2182 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2183 		ASSERT_EQ(ptr[i], 0);
2184 
2185 	buffer->ptr = old_ptr;
2186 	hmm_buffer_free(buffer);
2187 }
2188 
2189 /*
2190  * Migrate private anonymous huge zero page.
2191  */
2192 TEST_F(hmm, migrate_anon_huge_zero)
2193 {
2194 	struct hmm_buffer *buffer;
2195 	unsigned long npages;
2196 	unsigned long size;
2197 	unsigned long i;
2198 	void *old_ptr;
2199 	void *map;
2200 	int *ptr;
2201 	int ret;
2202 	int val;
2203 
2204 	size = read_pmd_pagesize();
2205 
2206 	buffer = malloc(sizeof(*buffer));
2207 	ASSERT_NE(buffer, NULL);
2208 
2209 	buffer->fd = -1;
2210 	buffer->size = 2 * size;
2211 	buffer->mirror = malloc(size);
2212 	ASSERT_NE(buffer->mirror, NULL);
2213 	memset(buffer->mirror, 0xFF, size);
2214 
2215 	buffer->ptr = mmap(NULL, 2 * size,
2216 			   PROT_READ,
2217 			   MAP_PRIVATE | MAP_ANONYMOUS,
2218 			   buffer->fd, 0);
2219 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2220 
2221 	npages = size >> self->page_shift;
2222 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2223 	ret = madvise(map, size, MADV_HUGEPAGE);
2224 	ASSERT_EQ(ret, 0);
2225 	old_ptr = buffer->ptr;
2226 	buffer->ptr = map;
2227 
2228 	/* Initialize a read-only zero huge page. */
2229 	val = *(int *)buffer->ptr;
2230 	ASSERT_EQ(val, 0);
2231 
2232 	/* Migrate memory to device. */
2233 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2234 	ASSERT_EQ(ret, 0);
2235 	ASSERT_EQ(buffer->cpages, npages);
2236 
2237 	/* Check what the device read. */
2238 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2239 		ASSERT_EQ(ptr[i], 0);
2240 
2241 	/* Fault pages back to system memory and check them. */
2242 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) {
2243 		ASSERT_EQ(ptr[i], 0);
2244 		/* If it asserts once, it probably will 500,000 times */
2245 		if (ptr[i] != 0)
2246 			break;
2247 	}
2248 
2249 	buffer->ptr = old_ptr;
2250 	hmm_buffer_free(buffer);
2251 }
2252 
2253 /*
2254  * Migrate private anonymous huge page and free.
2255  */
2256 TEST_F(hmm, migrate_anon_huge_free)
2257 {
2258 	struct hmm_buffer *buffer;
2259 	unsigned long npages;
2260 	unsigned long size;
2261 	unsigned long i;
2262 	void *old_ptr;
2263 	void *map;
2264 	int *ptr;
2265 	int ret;
2266 
2267 	size = read_pmd_pagesize();
2268 
2269 	buffer = malloc(sizeof(*buffer));
2270 	ASSERT_NE(buffer, NULL);
2271 
2272 	buffer->fd = -1;
2273 	buffer->size = 2 * size;
2274 	buffer->mirror = malloc(size);
2275 	ASSERT_NE(buffer->mirror, NULL);
2276 	memset(buffer->mirror, 0xFF, size);
2277 
2278 	buffer->ptr = mmap(NULL, 2 * size,
2279 			   PROT_READ | PROT_WRITE,
2280 			   MAP_PRIVATE | MAP_ANONYMOUS,
2281 			   buffer->fd, 0);
2282 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2283 
2284 	npages = size >> self->page_shift;
2285 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2286 	ret = madvise(map, size, MADV_HUGEPAGE);
2287 	ASSERT_EQ(ret, 0);
2288 	old_ptr = buffer->ptr;
2289 	buffer->ptr = map;
2290 
2291 	/* Initialize buffer in system memory. */
2292 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2293 		ptr[i] = i;
2294 
2295 	/* Migrate memory to device. */
2296 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2297 	ASSERT_EQ(ret, 0);
2298 	ASSERT_EQ(buffer->cpages, npages);
2299 
2300 	/* Check what the device read. */
2301 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2302 		ASSERT_EQ(ptr[i], i);
2303 
2304 	/* Try freeing it. */
2305 	ret = madvise(map, size, MADV_FREE);
2306 	ASSERT_EQ(ret, 0);
2307 
2308 	buffer->ptr = old_ptr;
2309 	hmm_buffer_free(buffer);
2310 }
2311 
2312 /*
2313  * Migrate private anonymous huge page and fault back to sysmem.
2314  */
2315 TEST_F(hmm, migrate_anon_huge_fault)
2316 {
2317 	struct hmm_buffer *buffer;
2318 	unsigned long npages;
2319 	unsigned long size;
2320 	unsigned long i;
2321 	unsigned char *m;
2322 	uint64_t entry;
2323 	void *old_ptr;
2324 	void *map;
2325 	int pagemap_fd;
2326 	int *ptr;
2327 	int ret;
2328 
2329 	size = read_pmd_pagesize();
2330 
2331 	buffer = malloc(sizeof(*buffer));
2332 	ASSERT_NE(buffer, NULL);
2333 
2334 	buffer->fd = -1;
2335 	buffer->size = 2 * size;
2336 	buffer->mirror = malloc(size);
2337 	ASSERT_NE(buffer->mirror, NULL);
2338 	memset(buffer->mirror, 0xFF, size);
2339 
2340 	buffer->ptr = mmap(NULL, 2 * size,
2341 			   PROT_READ | PROT_WRITE,
2342 			   MAP_PRIVATE | MAP_ANONYMOUS,
2343 			   buffer->fd, 0);
2344 	ASSERT_NE(buffer->ptr, MAP_FAILED);
2345 
2346 	npages = size >> self->page_shift;
2347 	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2348 	old_ptr = buffer->ptr;
2349 	buffer->ptr = map;
2350 
2351 	/* Initialize buffer in system memory. */
2352 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2353 		ptr[i] = i;
2354 
2355 	ret = madvise(map, size, MADV_COLLAPSE);
2356 	ASSERT_EQ(ret, 0);
2357 
2358 	/* Migrate memory to device. */
2359 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2360 	ASSERT_EQ(ret, 0);
2361 	ASSERT_EQ(buffer->cpages, npages);
2362 
2363 	/* Check what the device read. */
2364 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2365 		ASSERT_EQ(ptr[i], i);
2366 
2367 	if (!hmm_is_coherent_type(variant->device_number)) {
2368 		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT,
2369 				      buffer, npages);
2370 		ASSERT_EQ(ret, 0);
2371 		ASSERT_EQ(buffer->cpages, npages);
2372 
2373 		m = buffer->mirror;
2374 		for (i = 0; i < npages; ++i)
2375 			ASSERT_EQ(m[i], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
2376 					HMM_DMIRROR_PROT_WRITE |
2377 					HMM_DMIRROR_PROT_PMD);
2378 
2379 		pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
2380 		ASSERT_GE(pagemap_fd, 0);
2381 
2382 		for (i = 0; i < npages; ++i) {
2383 			entry = pagemap_get_entry(pagemap_fd,
2384 					(char *)buffer->ptr + i * self->page_size);
2385 
2386 			ASSERT_NE(entry & PM_SWAP, 0);
2387 			ASSERT_FALSE(PAGEMAP_PRESENT(entry));
2388 		}
2389 
2390 		close(pagemap_fd);
2391 	}
2392 
2393 	/* Fault pages back to system memory and check them. */
2394 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2395 		ASSERT_EQ(ptr[i], i);
2396 
2397 	buffer->ptr = old_ptr;
2398 	hmm_buffer_free(buffer);
2399 }
2400 
2401 /*
2402  * Migrate memory and fault back to sysmem after partially unmapping.
2403  */
2404 TEST_F(hmm, migrate_partial_unmap_fault)
2405 {
2406 	struct hmm_buffer *buffer;
2407 	unsigned long npages;
2408 	unsigned long size = read_pmd_pagesize();
2409 	unsigned long unmap_size;
2410 	unsigned long offsets[3];
2411 	unsigned long i;
2412 	void *old_ptr;
2413 	void *map;
2414 	int *ptr;
2415 	int ret, j, use_thp;
2416 
2417 	if (!size)
2418 		size = TWOMEG;
2419 
2420 	unmap_size = size / 2;
2421 	offsets[0] = 0;
2422 	offsets[1] = size / 4;
2423 	offsets[2] = size / 2;
2424 
2425 	for (use_thp = 0; use_thp < 2; ++use_thp) {
2426 		for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
2427 			buffer = malloc(sizeof(*buffer));
2428 			ASSERT_NE(buffer, NULL);
2429 
2430 			buffer->fd = -1;
2431 			buffer->size = 2 * size;
2432 			buffer->mirror = malloc(size);
2433 			ASSERT_NE(buffer->mirror, NULL);
2434 			memset(buffer->mirror, 0xFF, size);
2435 
2436 			buffer->ptr = mmap(NULL, 2 * size,
2437 					   PROT_READ | PROT_WRITE,
2438 					   MAP_PRIVATE | MAP_ANONYMOUS,
2439 					   buffer->fd, 0);
2440 			ASSERT_NE(buffer->ptr, MAP_FAILED);
2441 
2442 			npages = size >> self->page_shift;
2443 			map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2444 			if (use_thp)
2445 				ret = madvise(map, size, MADV_HUGEPAGE);
2446 			else
2447 				ret = madvise(map, size, MADV_NOHUGEPAGE);
2448 			ASSERT_EQ(ret, 0);
2449 			old_ptr = buffer->ptr;
2450 			buffer->ptr = map;
2451 
2452 			/* Initialize buffer in system memory. */
2453 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2454 				ptr[i] = i;
2455 
2456 			/* Migrate memory to device. */
2457 			ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2458 			ASSERT_EQ(ret, 0);
2459 			ASSERT_EQ(buffer->cpages, npages);
2460 
2461 			/* Check what the device read. */
2462 			for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2463 				ASSERT_EQ(ptr[i], i);
2464 
2465 			munmap(buffer->ptr + offsets[j], unmap_size);
2466 
2467 			/* Fault pages back to system memory and check them. */
2468 			for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2469 				if (i * sizeof(int) < offsets[j] ||
2470 				    i * sizeof(int) >= offsets[j] + unmap_size)
2471 					ASSERT_EQ(ptr[i], i);
2472 
2473 			buffer->ptr = old_ptr;
2474 			hmm_buffer_free(buffer);
2475 		}
2476 	}
2477 }
2478 
2479 TEST_F(hmm, migrate_remap_fault)
2480 {
2481 	struct hmm_buffer *buffer;
2482 	unsigned long npages;
2483 	unsigned long size = read_pmd_pagesize();
2484 	unsigned long offsets[3];
2485 	unsigned long i;
2486 	void *old_ptr, *new_ptr = NULL;
2487 	void *map;
2488 	int *ptr;
2489 	int ret, j, use_thp, dont_unmap, before;
2490 
2491 	if (!size)
2492 		size = TWOMEG;
2493 
2494 	offsets[0] = 0;
2495 	offsets[1] = size / 4;
2496 	offsets[2] = size / 2;
2497 
2498 	for (before = 0; before < 2; ++before) {
2499 		for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) {
2500 			for (use_thp = 0; use_thp < 2; ++use_thp) {
2501 				for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
2502 					int flags = MREMAP_MAYMOVE | MREMAP_FIXED;
2503 
2504 					if (dont_unmap)
2505 						flags |= MREMAP_DONTUNMAP;
2506 
2507 					buffer = malloc(sizeof(*buffer));
2508 					ASSERT_NE(buffer, NULL);
2509 
2510 					buffer->fd = -1;
2511 					buffer->size = 8 * size;
2512 					buffer->mirror = malloc(size);
2513 					ASSERT_NE(buffer->mirror, NULL);
2514 					memset(buffer->mirror, 0xFF, size);
2515 
2516 					buffer->ptr = mmap(NULL, buffer->size,
2517 							   PROT_READ | PROT_WRITE,
2518 							   MAP_PRIVATE | MAP_ANONYMOUS,
2519 							   buffer->fd, 0);
2520 					ASSERT_NE(buffer->ptr, MAP_FAILED);
2521 
2522 					npages = size >> self->page_shift;
2523 					map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
2524 					if (use_thp)
2525 						ret = madvise(map, size, MADV_HUGEPAGE);
2526 					else
2527 						ret = madvise(map, size, MADV_NOHUGEPAGE);
2528 					ASSERT_EQ(ret, 0);
2529 					old_ptr = buffer->ptr;
2530 					munmap(map + size, size * 2);
2531 					buffer->ptr = map;
2532 
2533 					/* Initialize buffer in system memory. */
2534 					for (i = 0, ptr = buffer->ptr;
2535 					     i < size / sizeof(*ptr); ++i)
2536 						ptr[i] = i;
2537 
2538 					if (before) {
2539 						new_ptr = mremap((void *)map, size, size, flags,
2540 								 map + size + offsets[j]);
2541 						ASSERT_NE(new_ptr, MAP_FAILED);
2542 						buffer->ptr = new_ptr;
2543 					}
2544 
2545 					/* Migrate memory to device. */
2546 					ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2547 					ASSERT_EQ(ret, 0);
2548 					ASSERT_EQ(buffer->cpages, npages);
2549 
2550 					/* Check what the device read. */
2551 					for (i = 0, ptr = buffer->mirror;
2552 					     i < size / sizeof(*ptr); ++i)
2553 						ASSERT_EQ(ptr[i], i);
2554 
2555 					if (!before) {
2556 						new_ptr = mremap((void *)map, size, size, flags,
2557 								 map + size + offsets[j]);
2558 						ASSERT_NE(new_ptr, MAP_FAILED);
2559 						buffer->ptr = new_ptr;
2560 					}
2561 
2562 					/* Fault pages back to system memory and check them. */
2563 					for (i = 0, ptr = buffer->ptr;
2564 					     i < size / sizeof(*ptr); ++i)
2565 						ASSERT_EQ(ptr[i], i);
2566 
2567 					munmap(new_ptr, size);
2568 					buffer->ptr = old_ptr;
2569 					hmm_buffer_free(buffer);
2570 				}
2571 			}
2572 		}
2573 	}
2574 }
2575 
2576 /*
2577  * Migrate private anonymous huge page with allocation errors.
2578  */
2579 TEST_F(hmm, migrate_anon_huge_err)
2580 {
2581 	struct hmm_buffer *buffer;
2582 	unsigned long npages;
2583 	unsigned long size;
2584 	unsigned long i;
2585 	void *old_ptr;
2586 	void *map;
2587 	int *ptr;
2588 	int ret;
2589 
2590 	size = read_pmd_pagesize();
2591 
2592 	buffer = malloc(sizeof(*buffer));
2593 	ASSERT_NE(buffer, NULL);
2594 
2595 	buffer->fd = -1;
2596 	buffer->size = 2 * size;
2597 	buffer->mirror = malloc(2 * size);
2598 	ASSERT_NE(buffer->mirror, NULL);
2599 	memset(buffer->mirror, 0xFF, 2 * size);
2600 
2601 	old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
2602 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2603 	ASSERT_NE(old_ptr, MAP_FAILED);
2604 
2605 	npages = size >> self->page_shift;
2606 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2607 	ret = madvise(map, size, MADV_HUGEPAGE);
2608 	ASSERT_EQ(ret, 0);
2609 	buffer->ptr = map;
2610 
2611 	/* Initialize buffer in system memory. */
2612 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2613 		ptr[i] = i;
2614 
2615 	/* Migrate memory to device but force a THP allocation error. */
2616 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2617 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2618 	ASSERT_EQ(ret, 0);
2619 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2620 	ASSERT_EQ(ret, 0);
2621 	ASSERT_EQ(buffer->cpages, npages);
2622 
2623 	/* Check what the device read. */
2624 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) {
2625 		ASSERT_EQ(ptr[i], i);
2626 		if (ptr[i] != i)
2627 			break;
2628 	}
2629 
2630 	/* Try faulting back a single (PAGE_SIZE) page. */
2631 	ptr = buffer->ptr;
2632 	ASSERT_EQ(ptr[2048], 2048);
2633 
2634 	/* unmap and remap the region to reset things. */
2635 	ret = munmap(old_ptr, 2 * size);
2636 	ASSERT_EQ(ret, 0);
2637 	old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
2638 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2639 	ASSERT_NE(old_ptr, MAP_FAILED);
2640 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2641 	ret = madvise(map, size, MADV_HUGEPAGE);
2642 	ASSERT_EQ(ret, 0);
2643 	buffer->ptr = map;
2644 
2645 	/* Initialize buffer in system memory. */
2646 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2647 		ptr[i] = i;
2648 
2649 	/* Migrate THP to device. */
2650 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2651 	ASSERT_EQ(ret, 0);
2652 	ASSERT_EQ(buffer->cpages, npages);
2653 
2654 	/*
2655 	 * Force an allocation error when faulting back a THP resident in the
2656 	 * device.
2657 	 */
2658 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2659 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2660 	ASSERT_EQ(ret, 0);
2661 
2662 	ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
2663 	ASSERT_EQ(ret, 0);
2664 	ptr = buffer->ptr;
2665 	ASSERT_EQ(ptr[2048], 2048);
2666 
2667 	buffer->ptr = old_ptr;
2668 	hmm_buffer_free(buffer);
2669 }
2670 
2671 /*
2672  * Migrate private anonymous huge zero page with allocation errors.
2673  */
2674 TEST_F(hmm, migrate_anon_huge_zero_err)
2675 {
2676 	struct hmm_buffer *buffer;
2677 	unsigned long npages;
2678 	unsigned long size;
2679 	unsigned long i;
2680 	void *old_ptr;
2681 	void *map;
2682 	int *ptr;
2683 	int ret;
2684 
2685 	size = read_pmd_pagesize();
2686 
2687 	buffer = malloc(sizeof(*buffer));
2688 	ASSERT_NE(buffer, NULL);
2689 
2690 	buffer->fd = -1;
2691 	buffer->size = 2 * size;
2692 	buffer->mirror = malloc(2 * size);
2693 	ASSERT_NE(buffer->mirror, NULL);
2694 	memset(buffer->mirror, 0xFF, 2 * size);
2695 
2696 	old_ptr = mmap(NULL, 2 * size, PROT_READ,
2697 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2698 	ASSERT_NE(old_ptr, MAP_FAILED);
2699 
2700 	npages = size >> self->page_shift;
2701 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2702 	ret = madvise(map, size, MADV_HUGEPAGE);
2703 	ASSERT_EQ(ret, 0);
2704 	buffer->ptr = map;
2705 
2706 	/* Migrate memory to device but force a THP allocation error. */
2707 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2708 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2709 	ASSERT_EQ(ret, 0);
2710 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2711 	ASSERT_EQ(ret, 0);
2712 	ASSERT_EQ(buffer->cpages, npages);
2713 
2714 	/* Check what the device read. */
2715 	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
2716 		ASSERT_EQ(ptr[i], 0);
2717 
2718 	/* Try faulting back a single (PAGE_SIZE) page. */
2719 	ptr = buffer->ptr;
2720 	ASSERT_EQ(ptr[2048], 0);
2721 
2722 	/* unmap and remap the region to reset things. */
2723 	ret = munmap(old_ptr, 2 * size);
2724 	ASSERT_EQ(ret, 0);
2725 	old_ptr = mmap(NULL, 2 * size, PROT_READ,
2726 			MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
2727 	ASSERT_NE(old_ptr, MAP_FAILED);
2728 	map = (void *)ALIGN((uintptr_t)old_ptr, size);
2729 	ret = madvise(map, size, MADV_HUGEPAGE);
2730 	ASSERT_EQ(ret, 0);
2731 	buffer->ptr = map;
2732 
2733 	/* Initialize buffer in system memory (zero THP page). */
2734 	ret = ptr[0];
2735 	ASSERT_EQ(ret, 0);
2736 
2737 	/* Migrate memory to device but force a THP allocation error. */
2738 	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
2739 			      HMM_DMIRROR_FLAG_FAIL_ALLOC);
2740 	ASSERT_EQ(ret, 0);
2741 	ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
2742 	ASSERT_EQ(ret, 0);
2743 	ASSERT_EQ(buffer->cpages, npages);
2744 
2745 	/* Fault the device memory back and check it. */
2746 	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
2747 		ASSERT_EQ(ptr[i], 0);
2748 
2749 	buffer->ptr = old_ptr;
2750 	hmm_buffer_free(buffer);
2751 }
2752 
2753 struct benchmark_results {
2754 	double sys_to_dev_time;
2755 	double dev_to_sys_time;
2756 	double throughput_s2d;
2757 	double throughput_d2s;
2758 };
2759 
2760 static double get_time_ms(void)
2761 {
2762 	struct timeval tv;
2763 
2764 	gettimeofday(&tv, NULL);
2765 	return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
2766 }
2767 
2768 static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size)
2769 {
2770 	struct hmm_buffer *buffer;
2771 
2772 	buffer = malloc(sizeof(*buffer));
2773 
2774 	buffer->fd = -1;
2775 	buffer->size = size;
2776 	buffer->mirror = malloc(size);
2777 	memset(buffer->mirror, 0xFF, size);
2778 	return buffer;
2779 }
2780 
2781 static void print_benchmark_results(const char *test_name, size_t buffer_size,
2782 				     struct benchmark_results *thp,
2783 				     struct benchmark_results *regular)
2784 {
2785 	double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) /
2786 				 regular->sys_to_dev_time) * 100.0;
2787 	double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) /
2788 				 regular->dev_to_sys_time) * 100.0;
2789 	double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) /
2790 					    regular->throughput_s2d) * 100.0;
2791 	double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) /
2792 					    regular->throughput_d2s) * 100.0;
2793 
2794 	printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0));
2795 	printf("                     | With THP        | Without THP     | Improvement\n");
2796 	printf("---------------------------------------------------------------------\n");
2797 	printf("Sys->Dev Migration   | %.3f ms        | %.3f ms        | %.1f%%\n",
2798 	       thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement);
2799 	printf("Dev->Sys Migration   | %.3f ms        | %.3f ms        | %.1f%%\n",
2800 	       thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement);
2801 	printf("S->D Throughput      | %.2f GB/s      | %.2f GB/s      | %.1f%%\n",
2802 	       thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement);
2803 	printf("D->S Throughput      | %.2f GB/s      | %.2f GB/s      | %.1f%%\n",
2804 	       thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement);
2805 }
2806 
2807 /*
2808  * Run a single migration benchmark
2809  * fd: file descriptor for hmm device
2810  * use_thp: whether to use THP
2811  * buffer_size: size of buffer to allocate
2812  * iterations: number of iterations
2813  * results: where to store results
2814  */
2815 static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size,
2816 					   int iterations, struct benchmark_results *results)
2817 {
2818 	struct hmm_buffer *buffer;
2819 	unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE);
2820 	double start, end;
2821 	double s2d_total = 0, d2s_total = 0;
2822 	int ret, i;
2823 	int *ptr;
2824 
2825 	buffer = hmm_buffer_alloc(buffer_size);
2826 
2827 	/* Map memory */
2828 	buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE,
2829 			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
2830 
2831 	if (buffer->ptr == MAP_FAILED)
2832 		return -1;
2833 
2834 	/* Apply THP hint if requested */
2835 	if (use_thp)
2836 		ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE);
2837 	else
2838 		ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE);
2839 
2840 	if (ret)
2841 		return ret;
2842 
2843 	/* Initialize memory to make sure pages are allocated */
2844 	ptr = (int *)buffer->ptr;
2845 	for (i = 0; i < buffer_size / sizeof(int); i++)
2846 		ptr[i] = i & 0xFF;
2847 
2848 	/* Warmup iteration */
2849 	ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
2850 	if (ret)
2851 		return ret;
2852 
2853 	ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
2854 	if (ret)
2855 		return ret;
2856 
2857 	/* Benchmark iterations */
2858 	for (i = 0; i < iterations; i++) {
2859 		/* System to device migration */
2860 		start = get_time_ms();
2861 
2862 		ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
2863 		if (ret)
2864 			return ret;
2865 
2866 		end = get_time_ms();
2867 		s2d_total += (end - start);
2868 
2869 		/* Device to system migration */
2870 		start = get_time_ms();
2871 
2872 		ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
2873 		if (ret)
2874 			return ret;
2875 
2876 		end = get_time_ms();
2877 		d2s_total += (end - start);
2878 	}
2879 
2880 	/* Calculate average times and throughput */
2881 	results->sys_to_dev_time = s2d_total / iterations;
2882 	results->dev_to_sys_time = d2s_total / iterations;
2883 	results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
2884 				 (results->sys_to_dev_time / 1000.0);
2885 	results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
2886 				 (results->dev_to_sys_time / 1000.0);
2887 
2888 	/* Cleanup */
2889 	hmm_buffer_free(buffer);
2890 	return 0;
2891 }
2892 
2893 /*
2894  * Benchmark THP migration with different buffer sizes
2895  */
2896 TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120)
2897 {
2898 	struct benchmark_results thp_results, regular_results;
2899 	size_t thp_size = read_pmd_pagesize();
2900 	int iterations = 5;
2901 
2902 	if (!thp_size)
2903 		thp_size = TWOMEG;
2904 
2905 	printf("\nHMM THP Migration Benchmark\n");
2906 	printf("---------------------------\n");
2907 	printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE));
2908 
2909 	/* Test different buffer sizes */
2910 	size_t test_sizes[] = {
2911 		thp_size / 4,      /* quarter THP */
2912 		thp_size / 2,      /* half THP */
2913 		thp_size,          /* single THP */
2914 		thp_size * 2,      /* two THPs */
2915 		thp_size * 4,      /* four THPs */
2916 		thp_size * 8,      /* eight THPs */
2917 		thp_size * 128,    /* one twenty eight THPs */
2918 	};
2919 
2920 	static const char *const test_names[] = {
2921 		"Small Buffer",
2922 		"Half THP Size",
2923 		"Single THP Size",
2924 		"Two THP Size",
2925 		"Four THP Size",
2926 		"Eight THP Size",
2927 		"One twenty eight THP Size"
2928 	};
2929 
2930 	int num_tests = ARRAY_SIZE(test_sizes);
2931 
2932 	/* Run all tests */
2933 	for (int i = 0; i < num_tests; i++) {
2934 		/* Skip test sizes exceeding INT_MAX to avoid overflow */
2935 		if (test_sizes[i] > INT_MAX)
2936 			break;
2937 
2938 		/* Test with THP */
2939 		ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i],
2940 					iterations, &thp_results), 0);
2941 
2942 		/* Test without THP */
2943 		ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i],
2944 					iterations, &regular_results), 0);
2945 
2946 		/* Print results */
2947 		print_benchmark_results(test_names[i], test_sizes[i],
2948 					&thp_results, &regular_results);
2949 	}
2950 }
2951 TEST_HARNESS_MAIN
2952