xref: /linux/tools/testing/selftests/proc/proc-empty-vm.c (revision 20e34aa7e08dbac5d7f757fea81fae8df462aa42)
1 #if defined __amd64__ || defined __i386__
2 /*
3  * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 /*
18  * Create a process without mappings by unmapping everything at once and
19  * holding it with ptrace(2). See what happens to
20  *
21  *	/proc/${pid}/maps
22  *	/proc/${pid}/numa_maps
23  *	/proc/${pid}/smaps
24  *	/proc/${pid}/smaps_rollup
25  */
26 #undef NDEBUG
27 #include <assert.h>
28 #include <errno.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <sys/mman.h>
35 #include <sys/ptrace.h>
36 #include <sys/resource.h>
37 #include <sys/types.h>
38 #include <sys/wait.h>
39 #include <unistd.h>
40 
41 #ifdef __amd64__
42 #define TEST_VSYSCALL
43 #endif
44 
45 /*
46  * 0: vsyscall VMA doesn't exist	vsyscall=none
47  * 1: vsyscall VMA is --xp		vsyscall=xonly
48  * 2: vsyscall VMA is r-xp		vsyscall=emulate
49  */
50 static volatile int g_vsyscall;
51 static const char *g_proc_pid_maps_vsyscall;
52 static const char *g_proc_pid_smaps_vsyscall;
53 
54 static const char proc_pid_maps_vsyscall_0[] = "";
55 static const char proc_pid_maps_vsyscall_1[] =
56 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
57 static const char proc_pid_maps_vsyscall_2[] =
58 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
59 
60 static const char proc_pid_smaps_vsyscall_0[] = "";
61 
62 static const char proc_pid_smaps_vsyscall_1[] =
63 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
64 "Size:                  4 kB\n"
65 "KernelPageSize:        4 kB\n"
66 "MMUPageSize:           4 kB\n"
67 "Rss:                   0 kB\n"
68 "Pss:                   0 kB\n"
69 "Pss_Dirty:             0 kB\n"
70 "Shared_Clean:          0 kB\n"
71 "Shared_Dirty:          0 kB\n"
72 "Private_Clean:         0 kB\n"
73 "Private_Dirty:         0 kB\n"
74 "Referenced:            0 kB\n"
75 "Anonymous:             0 kB\n"
76 "KSM:                   0 kB\n"
77 "LazyFree:              0 kB\n"
78 "AnonHugePages:         0 kB\n"
79 "ShmemPmdMapped:        0 kB\n"
80 "FilePmdMapped:         0 kB\n"
81 "Shared_Hugetlb:        0 kB\n"
82 "Private_Hugetlb:       0 kB\n"
83 "Swap:                  0 kB\n"
84 "SwapPss:               0 kB\n"
85 "Locked:                0 kB\n"
86 "THPeligible:           0\n"
87 /*
88  * "ProtectionKey:" field is conditional. It is possible to check it as well,
89  * but I don't have such machine.
90  */
91 ;
92 
93 static const char proc_pid_smaps_vsyscall_2[] =
94 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
95 "Size:                  4 kB\n"
96 "KernelPageSize:        4 kB\n"
97 "MMUPageSize:           4 kB\n"
98 "Rss:                   0 kB\n"
99 "Pss:                   0 kB\n"
100 "Pss_Dirty:             0 kB\n"
101 "Shared_Clean:          0 kB\n"
102 "Shared_Dirty:          0 kB\n"
103 "Private_Clean:         0 kB\n"
104 "Private_Dirty:         0 kB\n"
105 "Referenced:            0 kB\n"
106 "Anonymous:             0 kB\n"
107 "KSM:                   0 kB\n"
108 "LazyFree:              0 kB\n"
109 "AnonHugePages:         0 kB\n"
110 "ShmemPmdMapped:        0 kB\n"
111 "FilePmdMapped:         0 kB\n"
112 "Shared_Hugetlb:        0 kB\n"
113 "Private_Hugetlb:       0 kB\n"
114 "Swap:                  0 kB\n"
115 "SwapPss:               0 kB\n"
116 "Locked:                0 kB\n"
117 "THPeligible:           0\n"
118 /*
119  * "ProtectionKey:" field is conditional. It is possible to check it as well,
120  * but I'm too tired.
121  */
122 ;
123 
124 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
125 {
126 	_exit(EXIT_FAILURE);
127 }
128 
129 #ifdef TEST_VSYSCALL
130 static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
131 {
132 	_exit(g_vsyscall);
133 }
134 
135 /*
136  * vsyscall page can't be unmapped, probe it directly.
137  */
138 static void vsyscall(void)
139 {
140 	pid_t pid;
141 	int wstatus;
142 
143 	pid = fork();
144 	if (pid < 0) {
145 		fprintf(stderr, "fork, errno %d\n", errno);
146 		exit(1);
147 	}
148 	if (pid == 0) {
149 		setrlimit(RLIMIT_CORE, &(struct rlimit){});
150 
151 		/* Hide "segfault at ffffffffff600000" messages. */
152 		struct sigaction act = {};
153 		act.sa_flags = SA_SIGINFO;
154 		act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
155 		sigaction(SIGSEGV, &act, NULL);
156 
157 		g_vsyscall = 0;
158 		/* gettimeofday(NULL, NULL); */
159 		uint64_t rax = 0xffffffffff600000;
160 		asm volatile (
161 			"call *%[rax]"
162 			: [rax] "+a" (rax)
163 			: "D" (NULL), "S" (NULL)
164 			: "rcx", "r11"
165 		);
166 
167 		g_vsyscall = 1;
168 		*(volatile int *)0xffffffffff600000UL;
169 
170 		g_vsyscall = 2;
171 		exit(g_vsyscall);
172 	}
173 	waitpid(pid, &wstatus, 0);
174 	if (WIFEXITED(wstatus)) {
175 		g_vsyscall = WEXITSTATUS(wstatus);
176 	} else {
177 		fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
178 		exit(1);
179 	}
180 }
181 #endif
182 
183 static int test_proc_pid_maps(pid_t pid)
184 {
185 	char buf[4096];
186 	snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
187 	int fd = open(buf, O_RDONLY);
188 	if (fd == -1) {
189 		perror("open /proc/${pid}/maps");
190 		return EXIT_FAILURE;
191 	} else {
192 		ssize_t rv = read(fd, buf, sizeof(buf));
193 		close(fd);
194 		if (g_vsyscall == 0) {
195 			assert(rv == 0);
196 		} else {
197 			size_t len = strlen(g_proc_pid_maps_vsyscall);
198 			assert(rv == len);
199 			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
200 		}
201 		return EXIT_SUCCESS;
202 	}
203 }
204 
205 static int test_proc_pid_numa_maps(pid_t pid)
206 {
207 	char buf[4096];
208 	snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
209 	int fd = open(buf, O_RDONLY);
210 	if (fd == -1) {
211 		if (errno == ENOENT) {
212 			/*
213 			 * /proc/${pid}/numa_maps is under CONFIG_NUMA,
214 			 * it doesn't necessarily exist.
215 			 */
216 			return EXIT_SUCCESS;
217 		}
218 		perror("open /proc/${pid}/numa_maps");
219 		return EXIT_FAILURE;
220 	} else {
221 		ssize_t rv = read(fd, buf, sizeof(buf));
222 		close(fd);
223 		assert(rv == 0);
224 		return EXIT_SUCCESS;
225 	}
226 }
227 
228 static int test_proc_pid_smaps(pid_t pid)
229 {
230 	char buf[4096];
231 	snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
232 	int fd = open(buf, O_RDONLY);
233 	if (fd == -1) {
234 		if (errno == ENOENT) {
235 			/*
236 			 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
237 			 * it doesn't necessarily exist.
238 			 */
239 			return EXIT_SUCCESS;
240 		}
241 		perror("open /proc/${pid}/smaps");
242 		return EXIT_FAILURE;
243 	} else {
244 		ssize_t rv = read(fd, buf, sizeof(buf));
245 		close(fd);
246 		if (g_vsyscall == 0) {
247 			assert(rv == 0);
248 		} else {
249 			size_t len = strlen(g_proc_pid_smaps_vsyscall);
250 			/* TODO "ProtectionKey:" */
251 			assert(rv > len);
252 			assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0);
253 		}
254 		return EXIT_SUCCESS;
255 	}
256 }
257 
258 static const char g_smaps_rollup[] =
259 "00000000-00000000 ---p 00000000 00:00 0                                  [rollup]\n"
260 "Rss:                   0 kB\n"
261 "Pss:                   0 kB\n"
262 "Pss_Dirty:             0 kB\n"
263 "Pss_Anon:              0 kB\n"
264 "Pss_File:              0 kB\n"
265 "Pss_Shmem:             0 kB\n"
266 "Shared_Clean:          0 kB\n"
267 "Shared_Dirty:          0 kB\n"
268 "Private_Clean:         0 kB\n"
269 "Private_Dirty:         0 kB\n"
270 "Referenced:            0 kB\n"
271 "Anonymous:             0 kB\n"
272 "KSM:                   0 kB\n"
273 "LazyFree:              0 kB\n"
274 "AnonHugePages:         0 kB\n"
275 "ShmemPmdMapped:        0 kB\n"
276 "FilePmdMapped:         0 kB\n"
277 "Shared_Hugetlb:        0 kB\n"
278 "Private_Hugetlb:       0 kB\n"
279 "Swap:                  0 kB\n"
280 "SwapPss:               0 kB\n"
281 "Locked:                0 kB\n"
282 ;
283 
284 static int test_proc_pid_smaps_rollup(pid_t pid)
285 {
286 	char buf[4096];
287 	snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
288 	int fd = open(buf, O_RDONLY);
289 	if (fd == -1) {
290 		if (errno == ENOENT) {
291 			/*
292 			 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
293 			 * it doesn't necessarily exist.
294 			 */
295 			return EXIT_SUCCESS;
296 		}
297 		perror("open /proc/${pid}/smaps_rollup");
298 		return EXIT_FAILURE;
299 	} else {
300 		ssize_t rv = read(fd, buf, sizeof(buf));
301 		close(fd);
302 		assert(rv == sizeof(g_smaps_rollup) - 1);
303 		assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
304 		return EXIT_SUCCESS;
305 	}
306 }
307 
308 static const char *parse_u64(const char *p, const char *const end, uint64_t *rv)
309 {
310 	*rv = 0;
311 	for (; p != end; p += 1) {
312 		if ('0' <= *p && *p <= '9') {
313 			assert(!__builtin_mul_overflow(*rv, 10, rv));
314 			assert(!__builtin_add_overflow(*rv, *p - '0', rv));
315 		} else {
316 			break;
317 		}
318 	}
319 	assert(p != end);
320 	return p;
321 }
322 
323 /*
324  * There seems to be 2 types of valid output:
325  * "0 A A B 0 0 0\n" for dynamic exeuctables,
326  * "0 0 0 B 0 0 0\n" for static executables.
327  */
328 static int test_proc_pid_statm(pid_t pid)
329 {
330 	char buf[4096];
331 	snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
332 	int fd = open(buf, O_RDONLY);
333 	if (fd == -1) {
334 		perror("open /proc/${pid}/statm");
335 		return EXIT_FAILURE;
336 	}
337 
338 	ssize_t rv = read(fd, buf, sizeof(buf));
339 	close(fd);
340 
341 	assert(rv >= 0);
342 	assert(rv <= sizeof(buf));
343 	if (0) {
344 		write(1, buf, rv);
345 	}
346 
347 	const char *p = buf;
348 	const char *const end = p + rv;
349 
350 	/* size */
351 	assert(p != end && *p++ == '0');
352 	assert(p != end && *p++ == ' ');
353 
354 	uint64_t resident;
355 	p = parse_u64(p, end, &resident);
356 	assert(p != end && *p++ == ' ');
357 
358 	uint64_t shared;
359 	p = parse_u64(p, end, &shared);
360 	assert(p != end && *p++ == ' ');
361 
362 	uint64_t text;
363 	p = parse_u64(p, end, &text);
364 	assert(p != end && *p++ == ' ');
365 
366 	assert(p != end && *p++ == '0');
367 	assert(p != end && *p++ == ' ');
368 
369 	/* data */
370 	assert(p != end && *p++ == '0');
371 	assert(p != end && *p++ == ' ');
372 
373 	assert(p != end && *p++ == '0');
374 	assert(p != end && *p++ == '\n');
375 
376 	assert(p == end);
377 
378 	/*
379 	 * "text" is "mm->end_code - mm->start_code" at execve(2) time.
380 	 * munmap() doesn't change it. It can be anything (just link
381 	 * statically). It can't be 0 because executing to this point
382 	 * implies at least 1 page of code.
383 	 */
384 	assert(text > 0);
385 
386 	/*
387 	 * These two are always equal. Always 0 for statically linked
388 	 * executables and sometimes 0 for dynamically linked executables.
389 	 * There is no way to tell one from another without parsing ELF
390 	 * which is too much for this test.
391 	 */
392 	assert(resident == shared);
393 
394 	return EXIT_SUCCESS;
395 }
396 
397 int main(void)
398 {
399 	int rv = EXIT_SUCCESS;
400 
401 #ifdef TEST_VSYSCALL
402 	vsyscall();
403 #endif
404 
405 	switch (g_vsyscall) {
406 	case 0:
407 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_0;
408 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
409 		break;
410 	case 1:
411 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_1;
412 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
413 		break;
414 	case 2:
415 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_2;
416 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
417 		break;
418 	default:
419 		abort();
420 	}
421 
422 	pid_t pid = fork();
423 	if (pid == -1) {
424 		perror("fork");
425 		return EXIT_FAILURE;
426 	} else if (pid == 0) {
427 		rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
428 		if (rv != 0) {
429 			if (errno == EPERM) {
430 				fprintf(stderr,
431 "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
432 				);
433 				kill(getppid(), SIGTERM);
434 				return EXIT_FAILURE;
435 			}
436 			perror("ptrace PTRACE_TRACEME");
437 			return EXIT_FAILURE;
438 		}
439 
440 		/*
441 		 * Hide "segfault at ..." messages. Signal handler won't run.
442 		 */
443 		struct sigaction act = {};
444 		act.sa_flags = SA_SIGINFO;
445 		act.sa_sigaction = sigaction_SIGSEGV;
446 		sigaction(SIGSEGV, &act, NULL);
447 
448 #ifdef __amd64__
449 		munmap(NULL, ((size_t)1 << 47) - 4096);
450 #elif defined __i386__
451 		{
452 			size_t len;
453 
454 			for (len = -4096;; len -= 4096) {
455 				munmap(NULL, len);
456 			}
457 		}
458 #else
459 #error "implement 'unmap everything'"
460 #endif
461 		return EXIT_FAILURE;
462 	} else {
463 		/*
464 		 * TODO find reliable way to signal parent that munmap(2) completed.
465 		 * Child can't do it directly because it effectively doesn't exist
466 		 * anymore. Looking at child's VM files isn't 100% reliable either:
467 		 * due to a bug they may not become empty or empty-like.
468 		 */
469 		sleep(1);
470 
471 		if (rv == EXIT_SUCCESS) {
472 			rv = test_proc_pid_maps(pid);
473 		}
474 		if (rv == EXIT_SUCCESS) {
475 			rv = test_proc_pid_numa_maps(pid);
476 		}
477 		if (rv == EXIT_SUCCESS) {
478 			rv = test_proc_pid_smaps(pid);
479 		}
480 		if (rv == EXIT_SUCCESS) {
481 			rv = test_proc_pid_smaps_rollup(pid);
482 		}
483 		if (rv == EXIT_SUCCESS) {
484 			rv = test_proc_pid_statm(pid);
485 		}
486 
487 		/* Cut the rope. */
488 		int wstatus;
489 		waitpid(pid, &wstatus, 0);
490 		assert(WIFSTOPPED(wstatus));
491 		assert(WSTOPSIG(wstatus) == SIGSEGV);
492 	}
493 
494 	return rv;
495 }
496 #else
497 int main(void)
498 {
499 	return 4;
500 }
501 #endif
502