xref: /linux/tools/testing/selftests/proc/proc-pid-vm.c (revision e406d57be7bd2a4e73ea512c1ae36a40a44e499e)
1 /*
2  * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 /*
17  * Fork and exec tiny 1 page executable which precisely controls its VM.
18  * Test /proc/$PID/maps
19  * Test /proc/$PID/smaps
20  * Test /proc/$PID/smaps_rollup
21  * Test /proc/$PID/statm
22  *
23  * FIXME require CONFIG_TMPFS which can be disabled
24  * FIXME test other values from "smaps"
25  * FIXME support other archs
26  */
27 #undef NDEBUG
28 #include <assert.h>
29 #include <errno.h>
30 #include <sched.h>
31 #include <signal.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <sys/mount.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <sys/syscall.h>
44 #include <sys/uio.h>
45 #include <linux/kdev_t.h>
46 #include <sys/time.h>
47 #include <sys/resource.h>
48 #include <linux/fs.h>
49 
50 #ifndef __maybe_unused
51 #define __maybe_unused __attribute__((__unused__))
52 #endif
53 
54 #include "../kselftest.h"
55 
sys_execveat(int dirfd,const char * pathname,char ** argv,char ** envp,int flags)56 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
57 {
58 	return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
59 }
60 
make_private_tmp(void)61 static void make_private_tmp(void)
62 {
63 	if (unshare(CLONE_NEWNS) == -1) {
64 		if (errno == ENOSYS || errno == EPERM) {
65 			exit(4);
66 		}
67 		exit(1);
68 	}
69 	if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
70 		exit(1);
71 	}
72 	if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
73 		exit(1);
74 	}
75 }
76 
77 static pid_t pid = -1;
ate(void)78 static void ate(void)
79 {
80 	if (pid > 0) {
81 		kill(pid, SIGTERM);
82 	}
83 }
84 
85 struct elf64_hdr {
86 	uint8_t e_ident[16];
87 	uint16_t e_type;
88 	uint16_t e_machine;
89 	uint32_t e_version;
90 	uint64_t e_entry;
91 	uint64_t e_phoff;
92 	uint64_t e_shoff;
93 	uint32_t e_flags;
94 	uint16_t e_ehsize;
95 	uint16_t e_phentsize;
96 	uint16_t e_phnum;
97 	uint16_t e_shentsize;
98 	uint16_t e_shnum;
99 	uint16_t e_shstrndx;
100 };
101 
102 struct elf64_phdr {
103 	uint32_t p_type;
104 	uint32_t p_flags;
105 	uint64_t p_offset;
106 	uint64_t p_vaddr;
107 	uint64_t p_paddr;
108 	uint64_t p_filesz;
109 	uint64_t p_memsz;
110 	uint64_t p_align;
111 };
112 
113 #ifdef __x86_64__
114 #define PAGE_SIZE 4096
115 #define VADDR (1UL << 32)
116 #define MAPS_OFFSET 73
117 
118 #define syscall	0x0f, 0x05
119 #define mov_rdi(x)	\
120 	0x48, 0xbf,	\
121 	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
122 	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
123 
124 #define mov_rsi(x)	\
125 	0x48, 0xbe,	\
126 	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
127 	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
128 
129 #define mov_eax(x)	\
130 	0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
131 
132 static const uint8_t payload[] = {
133 	/* Casually unmap stack, vDSO and everything else. */
134 	/* munmap */
135 	mov_rdi(VADDR + 4096),
136 	mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
137 	mov_eax(11),
138 	syscall,
139 
140 	/* Ping parent. */
141 	/* write(0, &c, 1); */
142 	0x31, 0xff,					/* xor edi, edi */
143 	0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00,	/* lea rsi, [rip] */
144 	0xba, 0x01, 0x00, 0x00, 0x00,			/* mov edx, 1 */
145 	mov_eax(1),
146 	syscall,
147 
148 	/* 1: pause(); */
149 	mov_eax(34),
150 	syscall,
151 
152 	0xeb, 0xf7,	/* jmp 1b */
153 };
154 
make_exe(const uint8_t * payload,size_t len)155 static int make_exe(const uint8_t *payload, size_t len)
156 {
157 	struct elf64_hdr h;
158 	struct elf64_phdr ph;
159 
160 	struct iovec iov[3] = {
161 		{&h, sizeof(struct elf64_hdr)},
162 		{&ph, sizeof(struct elf64_phdr)},
163 		{(void *)payload, len},
164 	};
165 	int fd, fd1;
166 	char buf[64];
167 
168 	memset(&h, 0, sizeof(h));
169 	h.e_ident[0] = 0x7f;
170 	h.e_ident[1] = 'E';
171 	h.e_ident[2] = 'L';
172 	h.e_ident[3] = 'F';
173 	h.e_ident[4] = 2;
174 	h.e_ident[5] = 1;
175 	h.e_ident[6] = 1;
176 	h.e_ident[7] = 0;
177 	h.e_type = 2;
178 	h.e_machine = 0x3e;
179 	h.e_version = 1;
180 	h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
181 	h.e_phoff = sizeof(struct elf64_hdr);
182 	h.e_shoff = 0;
183 	h.e_flags = 0;
184 	h.e_ehsize = sizeof(struct elf64_hdr);
185 	h.e_phentsize = sizeof(struct elf64_phdr);
186 	h.e_phnum = 1;
187 	h.e_shentsize = 0;
188 	h.e_shnum = 0;
189 	h.e_shstrndx = 0;
190 
191 	memset(&ph, 0, sizeof(ph));
192 	ph.p_type = 1;
193 	ph.p_flags = (1<<2)|1;
194 	ph.p_offset = 0;
195 	ph.p_vaddr = VADDR;
196 	ph.p_paddr = 0;
197 	ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
198 	ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
199 	ph.p_align = 4096;
200 
201 	fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
202 	if (fd == -1) {
203 		exit(1);
204 	}
205 
206 	if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
207 		exit(1);
208 	}
209 
210 	/* Avoid ETXTBSY on exec. */
211 	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
212 	fd1 = open(buf, O_RDONLY|O_CLOEXEC);
213 	close(fd);
214 
215 	return fd1;
216 }
217 #endif
218 
219 /*
220  * 0: vsyscall VMA doesn't exist	vsyscall=none
221  * 1: vsyscall VMA is --xp		vsyscall=xonly
222  * 2: vsyscall VMA is r-xp		vsyscall=emulate
223  */
224 static volatile int g_vsyscall;
225 static const char *str_vsyscall __maybe_unused;
226 
227 static const char str_vsyscall_0[] __maybe_unused = "";
228 static const char str_vsyscall_1[] __maybe_unused =
229 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
230 static const char str_vsyscall_2[] __maybe_unused =
231 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
232 
233 #ifdef __x86_64__
sigaction_SIGSEGV(int _,siginfo_t * __,void * ___)234 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
235 {
236 	_exit(g_vsyscall);
237 }
238 
239 /*
240  * vsyscall page can't be unmapped, probe it directly.
241  */
vsyscall(void)242 static void vsyscall(void)
243 {
244 	pid_t pid;
245 	int wstatus;
246 
247 	pid = fork();
248 	if (pid < 0) {
249 		fprintf(stderr, "fork, errno %d\n", errno);
250 		exit(1);
251 	}
252 	if (pid == 0) {
253 		struct rlimit rlim = {0, 0};
254 		(void)setrlimit(RLIMIT_CORE, &rlim);
255 
256 		/* Hide "segfault at ffffffffff600000" messages. */
257 		struct sigaction act;
258 		memset(&act, 0, sizeof(struct sigaction));
259 		act.sa_flags = SA_SIGINFO;
260 		act.sa_sigaction = sigaction_SIGSEGV;
261 		(void)sigaction(SIGSEGV, &act, NULL);
262 
263 		g_vsyscall = 0;
264 		/* gettimeofday(NULL, NULL); */
265 		uint64_t rax = 0xffffffffff600000;
266 		asm volatile (
267 			"call *%[rax]"
268 			: [rax] "+a" (rax)
269 			: "D" (NULL), "S" (NULL)
270 			: "rcx", "r11"
271 		);
272 
273 		g_vsyscall = 1;
274 		*(volatile int *)0xffffffffff600000UL;
275 
276 		g_vsyscall = 2;
277 		exit(g_vsyscall);
278 	}
279 	waitpid(pid, &wstatus, 0);
280 	if (WIFEXITED(wstatus)) {
281 		g_vsyscall = WEXITSTATUS(wstatus);
282 	} else {
283 		fprintf(stderr, "error: wstatus %08x\n", wstatus);
284 		exit(1);
285 	}
286 }
287 
main(void)288 int main(void)
289 {
290 	int pipefd[2];
291 	int exec_fd;
292 
293 	vsyscall();
294 	switch (g_vsyscall) {
295 	case 0:
296 		str_vsyscall = str_vsyscall_0;
297 		break;
298 	case 1:
299 		str_vsyscall = str_vsyscall_1;
300 		break;
301 	case 2:
302 		str_vsyscall = str_vsyscall_2;
303 		break;
304 	default:
305 		abort();
306 	}
307 
308 	atexit(ate);
309 
310 	make_private_tmp();
311 
312 	/* Reserve fd 0 for 1-byte pipe ping from child. */
313 	close(0);
314 	if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
315 		return 1;
316 	}
317 
318 	exec_fd = make_exe(payload, sizeof(payload));
319 
320 	if (pipe(pipefd) == -1) {
321 		return 1;
322 	}
323 	if (dup2(pipefd[1], 0) != 0) {
324 		return 1;
325 	}
326 
327 	pid = fork();
328 	if (pid == -1) {
329 		return 1;
330 	}
331 	if (pid == 0) {
332 		sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
333 		return 1;
334 	}
335 
336 	char _;
337 	if (read(pipefd[0], &_, 1) != 1) {
338 		return 1;
339 	}
340 
341 	struct stat st;
342 	if (fstat(exec_fd, &st) == -1) {
343 		return 1;
344 	}
345 
346 	/* Generate "head -n1 /proc/$PID/maps" */
347 	char buf0[256];
348 	memset(buf0, ' ', sizeof(buf0));
349 	int len = snprintf(buf0, sizeof(buf0),
350 			"%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
351 			VADDR, VADDR + PAGE_SIZE,
352 			MAJOR(st.st_dev), MINOR(st.st_dev),
353 			(unsigned long long)st.st_ino);
354 	buf0[len] = ' ';
355 	snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
356 		 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
357 
358 	/* Test /proc/$PID/maps */
359 	{
360 		const size_t len = strlen(buf0) + strlen(str_vsyscall);
361 		char buf[256];
362 		ssize_t rv;
363 		int fd;
364 
365 		snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
366 		fd = open(buf, O_RDONLY);
367 		if (fd == -1) {
368 			return 1;
369 		}
370 		rv = read(fd, buf, sizeof(buf));
371 		assert(rv == len);
372 		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
373 		if (g_vsyscall > 0) {
374 			assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
375 		}
376 	}
377 
378 	/* Test /proc/$PID/smaps */
379 	{
380 		char buf[4096];
381 		ssize_t rv;
382 		int fd;
383 
384 		snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
385 		fd = open(buf, O_RDONLY);
386 		if (fd == -1) {
387 			return 1;
388 		}
389 		rv = read(fd, buf, sizeof(buf));
390 		assert(0 <= rv && rv <= sizeof(buf));
391 
392 		assert(rv >= strlen(buf0));
393 		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
394 
395 #define RSS1 "Rss:                   4 kB\n"
396 #define RSS2 "Rss:                   0 kB\n"
397 #define PSS1 "Pss:                   4 kB\n"
398 #define PSS2 "Pss:                   0 kB\n"
399 		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
400 		       memmem(buf, rv, RSS2, strlen(RSS2)));
401 		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
402 		       memmem(buf, rv, PSS2, strlen(PSS2)));
403 
404 		static const char *S[] = {
405 			"Size:                  4 kB\n",
406 			"KernelPageSize:        4 kB\n",
407 			"MMUPageSize:           4 kB\n",
408 			"Anonymous:             0 kB\n",
409 			"AnonHugePages:         0 kB\n",
410 			"Shared_Hugetlb:        0 kB\n",
411 			"Private_Hugetlb:       0 kB\n",
412 			"Locked:                0 kB\n",
413 		};
414 		int i;
415 
416 		for (i = 0; i < ARRAY_SIZE(S); i++) {
417 			assert(memmem(buf, rv, S[i], strlen(S[i])));
418 		}
419 
420 		if (g_vsyscall > 0) {
421 			assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
422 		}
423 	}
424 
425 	/* Test /proc/$PID/smaps_rollup */
426 	{
427 		char bufr[256];
428 		memset(bufr, ' ', sizeof(bufr));
429 		len = snprintf(bufr, sizeof(bufr),
430 				"%08lx-%08lx ---p 00000000 00:00 0",
431 				VADDR, VADDR + PAGE_SIZE);
432 		bufr[len] = ' ';
433 		snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
434 			 "[rollup]\n");
435 
436 		char buf[1024];
437 		ssize_t rv;
438 		int fd;
439 
440 		snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
441 		fd = open(buf, O_RDONLY);
442 		if (fd == -1) {
443 			return 1;
444 		}
445 		rv = read(fd, buf, sizeof(buf));
446 		assert(0 <= rv && rv <= sizeof(buf));
447 
448 		assert(rv >= strlen(bufr));
449 		assert(memcmp(buf, bufr, strlen(bufr)) == 0);
450 
451 		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
452 		       memmem(buf, rv, RSS2, strlen(RSS2)));
453 		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
454 		       memmem(buf, rv, PSS2, strlen(PSS2)));
455 
456 		static const char *S[] = {
457 			"Anonymous:             0 kB\n",
458 			"AnonHugePages:         0 kB\n",
459 			"Shared_Hugetlb:        0 kB\n",
460 			"Private_Hugetlb:       0 kB\n",
461 			"Locked:                0 kB\n",
462 		};
463 		int i;
464 
465 		for (i = 0; i < ARRAY_SIZE(S); i++) {
466 			assert(memmem(buf, rv, S[i], strlen(S[i])));
467 		}
468 	}
469 
470 	/* Test /proc/$PID/statm */
471 	{
472 		char buf[64];
473 		ssize_t rv;
474 		int fd;
475 
476 		snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
477 		fd = open(buf, O_RDONLY);
478 		if (fd == -1) {
479 			return 1;
480 		}
481 		rv = read(fd, buf, sizeof(buf));
482 		assert(rv == 7 * 2);
483 
484 		assert(buf[0] == '1');	/* ->total_vm */
485 		assert(buf[1] == ' ');
486 		assert(buf[2] == '0' || buf[2] == '1');	/* rss */
487 		assert(buf[3] == ' ');
488 		assert(buf[4] == '0' || buf[2] == '1');	/* file rss */
489 		assert(buf[5] == ' ');
490 		assert(buf[6] == '1');	/* ELF executable segments */
491 		assert(buf[7] == ' ');
492 		assert(buf[8] == '0');
493 		assert(buf[9] == ' ');
494 		assert(buf[10] == '0');	/* ->data_vm + ->stack_vm */
495 		assert(buf[11] == ' ');
496 		assert(buf[12] == '0');
497 		assert(buf[13] == '\n');
498 	}
499 
500 	/* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */
501 	{
502 		char path_buf[256], exp_path_buf[256];
503 		struct procmap_query q;
504 		int fd, err;
505 
506 		snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid);
507 		fd = open(path_buf, O_RDONLY);
508 		if (fd == -1)
509 			return 1;
510 
511 		/* CASE 1: exact MATCH at VADDR */
512 		memset(&q, 0, sizeof(q));
513 		q.size = sizeof(q);
514 		q.query_addr = VADDR;
515 		q.query_flags = 0;
516 		q.vma_name_addr = (__u64)(unsigned long)path_buf;
517 		q.vma_name_size = sizeof(path_buf);
518 
519 		err = ioctl(fd, PROCMAP_QUERY, &q);
520 		assert(err == 0);
521 
522 		assert(q.query_addr == VADDR);
523 		assert(q.query_flags == 0);
524 
525 		assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE));
526 		assert(q.vma_start == VADDR);
527 		assert(q.vma_end == VADDR + PAGE_SIZE);
528 		assert(q.vma_page_size == PAGE_SIZE);
529 
530 		assert(q.vma_offset == 0);
531 		assert(q.inode == st.st_ino);
532 		assert(q.dev_major == MAJOR(st.st_dev));
533 		assert(q.dev_minor == MINOR(st.st_dev));
534 
535 		snprintf(exp_path_buf, sizeof(exp_path_buf),
536 			"/tmp/#%llu (deleted)", (unsigned long long)st.st_ino);
537 		assert(q.vma_name_size == strlen(exp_path_buf) + 1);
538 		assert(strcmp(path_buf, exp_path_buf) == 0);
539 
540 		/* CASE 2: NO MATCH at VADDR-1 */
541 		memset(&q, 0, sizeof(q));
542 		q.size = sizeof(q);
543 		q.query_addr = VADDR - 1;
544 		q.query_flags = 0; /* exact match */
545 
546 		err = ioctl(fd, PROCMAP_QUERY, &q);
547 		err = err < 0 ? -errno : 0;
548 		assert(err == -ENOENT);
549 
550 		/* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */
551 		memset(&q, 0, sizeof(q));
552 		q.size = sizeof(q);
553 		q.query_addr = VADDR - 1;
554 		q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA;
555 
556 		err = ioctl(fd, PROCMAP_QUERY, &q);
557 		assert(err == 0);
558 
559 		assert(q.query_addr == VADDR - 1);
560 		assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA);
561 		assert(q.vma_start == VADDR);
562 		assert(q.vma_end == VADDR + PAGE_SIZE);
563 
564 		/* CASE 4: NO MATCH at VADDR + PAGE_SIZE */
565 		memset(&q, 0, sizeof(q));
566 		q.size = sizeof(q);
567 		q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */
568 		q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA;
569 
570 		err = ioctl(fd, PROCMAP_QUERY, &q);
571 		err = err < 0 ? -errno : 0;
572 		assert(err == -ENOENT);
573 
574 		/* CASE 5: NO MATCH WRITABLE at VADDR */
575 		memset(&q, 0, sizeof(q));
576 		q.size = sizeof(q);
577 		q.query_addr = VADDR;
578 		q.query_flags = PROCMAP_QUERY_VMA_WRITABLE;
579 
580 		err = ioctl(fd, PROCMAP_QUERY, &q);
581 		err = err < 0 ? -errno : 0;
582 		assert(err == -ENOENT);
583 	}
584 
585 	return 0;
586 }
587 #else
main(void)588 int main(void)
589 {
590 	return 4;
591 }
592 #endif
593