1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * syscall_numbering.c - test calling the x86-64 kernel with various 4 * valid and invalid system call numbers. 5 * 6 * Copyright (c) 2018 Andrew Lutomirski 7 */ 8 9 #define _GNU_SOURCE 10 11 #include <stdlib.h> 12 #include <stdio.h> 13 #include <stdbool.h> 14 #include <errno.h> 15 #include <unistd.h> 16 #include <string.h> 17 #include <fcntl.h> 18 #include <limits.h> 19 #include <signal.h> 20 #include <sysexits.h> 21 22 #include <sys/ptrace.h> 23 #include <sys/user.h> 24 #include <sys/wait.h> 25 #include <sys/mman.h> 26 27 #include <linux/ptrace.h> 28 #include "../kselftest.h" 29 30 /* Common system call numbers */ 31 #define SYS_READ 0 32 #define SYS_WRITE 1 33 #define SYS_GETPID 39 34 /* x64-only system call numbers */ 35 #define X64_IOCTL 16 36 #define X64_READV 19 37 #define X64_WRITEV 20 38 /* x32-only system call numbers (without X32_BIT) */ 39 #define X32_IOCTL 514 40 #define X32_READV 515 41 #define X32_WRITEV 516 42 43 #define X32_BIT 0x40000000 44 45 static int nullfd = -1; /* File descriptor for /dev/null */ 46 static bool with_x32; /* x32 supported on this kernel? */ 47 48 enum ptrace_pass { 49 PTP_NOTHING, 50 PTP_GETREGS, 51 PTP_WRITEBACK, 52 PTP_FUZZRET, 53 PTP_FUZZHIGH, 54 PTP_INTNUM, 55 PTP_DONE 56 }; 57 58 static const char * const ptrace_pass_name[] = 59 { 60 [PTP_NOTHING] = "just stop, no data read", 61 [PTP_GETREGS] = "only getregs", 62 [PTP_WRITEBACK] = "getregs, unmodified setregs", 63 [PTP_FUZZRET] = "modifying the default return", 64 [PTP_FUZZHIGH] = "clobbering the top 32 bits", 65 [PTP_INTNUM] = "sign-extending the syscall number", 66 }; 67 68 /* 69 * Shared memory block between tracer and test 70 */ 71 struct shared { 72 unsigned int nerr; /* Total error count */ 73 unsigned int indent; /* Message indentation level */ 74 enum ptrace_pass ptrace_pass; 75 bool probing_syscall; /* In probe_syscall() */ 76 }; 77 static volatile struct shared *sh; 78 79 static inline unsigned int offset(void) 80 { 81 unsigned int level = sh ? sh->indent : 0; 82 83 return 8 + level * 4; 84 } 85 86 #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \ 87 ## __VA_ARGS__) 88 89 #define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__) 90 #define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__) 91 #define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__) 92 93 #define fail(fmt, ...) \ 94 do { \ 95 msg(FAIL, fmt, ## __VA_ARGS__); \ 96 sh->nerr++; \ 97 } while (0) 98 99 #define crit(fmt, ...) \ 100 do { \ 101 sh->indent = 0; \ 102 msg(FAIL, fmt, ## __VA_ARGS__); \ 103 msg(SKIP, "Unable to run test\n"); \ 104 exit(EX_OSERR); \ 105 } while (0) 106 107 /* Sentinel for ptrace-modified return value */ 108 #define MODIFIED_BY_PTRACE -9999 109 110 /* 111 * Directly invokes the given syscall with nullfd as the first argument 112 * and the rest zero. Avoids involving glibc wrappers in case they ever 113 * end up intercepting some system calls for some reason, or modify 114 * the system call number itself. 115 */ 116 static long long probe_syscall(int msb, int lsb) 117 { 118 register long long arg1 asm("rdi") = nullfd; 119 register long long arg2 asm("rsi") = 0; 120 register long long arg3 asm("rdx") = 0; 121 register long long arg4 asm("r10") = 0; 122 register long long arg5 asm("r8") = 0; 123 register long long arg6 asm("r9") = 0; 124 long long nr = ((long long)msb << 32) | (unsigned int)lsb; 125 long long ret; 126 127 /* 128 * We pass in an extra copy of the extended system call number 129 * in %rbx, so we can examine it from the ptrace handler without 130 * worrying about it being possibly modified. This is to test 131 * the validity of struct user regs.orig_rax a.k.a. 132 * struct pt_regs.orig_ax. 133 */ 134 sh->probing_syscall = true; 135 asm volatile("syscall" 136 : "=a" (ret) 137 : "a" (nr), "b" (nr), 138 "r" (arg1), "r" (arg2), "r" (arg3), 139 "r" (arg4), "r" (arg5), "r" (arg6) 140 : "rcx", "r11", "memory", "cc"); 141 sh->probing_syscall = false; 142 143 return ret; 144 } 145 146 static const char *syscall_str(int msb, int start, int end) 147 { 148 static char buf[64]; 149 const char * const type = (start & X32_BIT) ? "x32" : "x64"; 150 int lsb = start; 151 152 /* 153 * Improve readability by stripping the x32 bit, but round 154 * toward zero so we don't display -1 as -1073741825. 155 */ 156 if (lsb < 0) 157 lsb |= X32_BIT; 158 else 159 lsb &= ~X32_BIT; 160 161 if (start == end) 162 snprintf(buf, sizeof buf, "%s syscall %d:%d", 163 type, msb, lsb); 164 else 165 snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d", 166 type, msb, lsb, lsb + (end-start)); 167 168 return buf; 169 } 170 171 static unsigned int _check_for(int msb, int start, int end, long long expect, 172 const char *expect_str) 173 { 174 unsigned int err = 0; 175 176 sh->indent++; 177 if (start != end) 178 sh->indent++; 179 180 for (int nr = start; nr <= end; nr++) { 181 long long ret = probe_syscall(msb, nr); 182 183 if (ret != expect) { 184 fail("%s returned %lld, but it should have returned %s\n", 185 syscall_str(msb, nr, nr), 186 ret, expect_str); 187 err++; 188 } 189 } 190 191 if (start != end) 192 sh->indent--; 193 194 if (err) { 195 if (start != end) 196 fail("%s had %u failure%s\n", 197 syscall_str(msb, start, end), 198 err, err == 1 ? "s" : ""); 199 } else { 200 ok("%s returned %s as expected\n", 201 syscall_str(msb, start, end), expect_str); 202 } 203 204 sh->indent--; 205 206 return err; 207 } 208 209 #define check_for(msb,start,end,expect) \ 210 _check_for(msb,start,end,expect,#expect) 211 212 static bool check_zero(int msb, int nr) 213 { 214 return check_for(msb, nr, nr, 0); 215 } 216 217 static bool check_enosys(int msb, int nr) 218 { 219 return check_for(msb, nr, nr, -ENOSYS); 220 } 221 222 /* 223 * Anyone diagnosing a failure will want to know whether the kernel 224 * supports x32. Tell them. This can also be used to conditionalize 225 * tests based on existence or nonexistence of x32. 226 */ 227 static bool test_x32(void) 228 { 229 long long ret; 230 pid_t mypid = getpid(); 231 232 run("Checking for x32 by calling x32 getpid()\n"); 233 ret = probe_syscall(0, SYS_GETPID | X32_BIT); 234 235 sh->indent++; 236 if (ret == mypid) { 237 info("x32 is supported\n"); 238 with_x32 = true; 239 } else if (ret == -ENOSYS) { 240 info("x32 is not supported\n"); 241 with_x32 = false; 242 } else { 243 fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid); 244 with_x32 = false; 245 } 246 sh->indent--; 247 return with_x32; 248 } 249 250 static void test_syscalls_common(int msb) 251 { 252 enum ptrace_pass pass = sh->ptrace_pass; 253 254 run("Checking some common syscalls as 64 bit\n"); 255 check_zero(msb, SYS_READ); 256 check_zero(msb, SYS_WRITE); 257 258 run("Checking some 64-bit only syscalls as 64 bit\n"); 259 check_zero(msb, X64_READV); 260 check_zero(msb, X64_WRITEV); 261 262 run("Checking out of range system calls\n"); 263 check_for(msb, -64, -2, -ENOSYS); 264 if (pass >= PTP_FUZZRET) 265 check_for(msb, -1, -1, MODIFIED_BY_PTRACE); 266 else 267 check_for(msb, -1, -1, -ENOSYS); 268 check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS); 269 check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS); 270 check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS); 271 } 272 273 static void test_syscalls_with_x32(int msb) 274 { 275 /* 276 * Syscalls 512-547 are "x32" syscalls. They are 277 * intended to be called with the x32 (0x40000000) bit 278 * set. Calling them without the x32 bit set is 279 * nonsense and should not work. 280 */ 281 run("Checking x32 syscalls as 64 bit\n"); 282 check_for(msb, 512, 547, -ENOSYS); 283 284 run("Checking some common syscalls as x32\n"); 285 check_zero(msb, SYS_READ | X32_BIT); 286 check_zero(msb, SYS_WRITE | X32_BIT); 287 288 run("Checking some x32 syscalls as x32\n"); 289 check_zero(msb, X32_READV | X32_BIT); 290 check_zero(msb, X32_WRITEV | X32_BIT); 291 292 run("Checking some 64-bit syscalls as x32\n"); 293 check_enosys(msb, X64_IOCTL | X32_BIT); 294 check_enosys(msb, X64_READV | X32_BIT); 295 check_enosys(msb, X64_WRITEV | X32_BIT); 296 } 297 298 static void test_syscalls_without_x32(int msb) 299 { 300 run("Checking for absence of x32 system calls\n"); 301 check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS); 302 } 303 304 static void test_syscall_numbering(void) 305 { 306 static const int msbs[] = { 307 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX, 308 INT_MIN, INT_MIN+1 309 }; 310 311 sh->indent++; 312 313 /* 314 * The MSB is supposed to be ignored, so we loop over a few 315 * to test that out. 316 */ 317 for (size_t i = 0; i < ARRAY_SIZE(msbs); i++) { 318 int msb = msbs[i]; 319 run("Checking system calls with msb = %d (0x%x)\n", 320 msb, msb); 321 322 sh->indent++; 323 324 test_syscalls_common(msb); 325 if (with_x32) 326 test_syscalls_with_x32(msb); 327 else 328 test_syscalls_without_x32(msb); 329 330 sh->indent--; 331 } 332 333 sh->indent--; 334 } 335 336 static void syscall_numbering_tracee(void) 337 { 338 enum ptrace_pass pass; 339 340 if (ptrace(PTRACE_TRACEME, 0, 0, 0)) { 341 crit("Failed to request tracing\n"); 342 return; 343 } 344 raise(SIGSTOP); 345 346 for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE; 347 sh->ptrace_pass = ++pass) { 348 run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]); 349 test_syscall_numbering(); 350 } 351 } 352 353 static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass) 354 { 355 struct user_regs_struct regs; 356 357 sh->probing_syscall = false; /* Do this on entry only */ 358 359 /* For these, don't even getregs */ 360 if (pass == PTP_NOTHING || pass == PTP_DONE) 361 return; 362 363 ptrace(PTRACE_GETREGS, testpid, NULL, ®s); 364 365 if (regs.orig_rax != regs.rbx) { 366 fail("orig_rax %#llx doesn't match syscall number %#llx\n", 367 (unsigned long long)regs.orig_rax, 368 (unsigned long long)regs.rbx); 369 } 370 371 switch (pass) { 372 case PTP_GETREGS: 373 /* Just read, no writeback */ 374 return; 375 case PTP_WRITEBACK: 376 /* Write back the same register state verbatim */ 377 break; 378 case PTP_FUZZRET: 379 regs.rax = MODIFIED_BY_PTRACE; 380 break; 381 case PTP_FUZZHIGH: 382 regs.rax = MODIFIED_BY_PTRACE; 383 regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL; 384 break; 385 case PTP_INTNUM: 386 regs.rax = MODIFIED_BY_PTRACE; 387 regs.orig_rax = (int)regs.orig_rax; 388 break; 389 default: 390 crit("invalid ptrace_pass\n"); 391 break; 392 } 393 394 ptrace(PTRACE_SETREGS, testpid, NULL, ®s); 395 } 396 397 static void syscall_numbering_tracer(pid_t testpid) 398 { 399 int wstatus; 400 401 do { 402 pid_t wpid = waitpid(testpid, &wstatus, 0); 403 if (wpid < 0 && errno != EINTR) 404 break; 405 if (wpid != testpid) 406 continue; 407 if (!WIFSTOPPED(wstatus)) 408 break; /* Thread exited? */ 409 410 if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP) 411 mess_with_syscall(testpid, sh->ptrace_pass); 412 } while (sh->ptrace_pass != PTP_DONE && 413 !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL)); 414 415 ptrace(PTRACE_DETACH, testpid, NULL, NULL); 416 417 /* Wait for the child process to terminate */ 418 while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus)) 419 /* wait some more */; 420 } 421 422 static void test_traced_syscall_numbering(void) 423 { 424 pid_t testpid; 425 426 /* Launch the test thread; this thread continues as the tracer thread */ 427 testpid = fork(); 428 429 if (testpid < 0) { 430 crit("Unable to launch tracer process\n"); 431 } else if (testpid == 0) { 432 syscall_numbering_tracee(); 433 _exit(0); 434 } else { 435 syscall_numbering_tracer(testpid); 436 } 437 } 438 439 int main(void) 440 { 441 unsigned int nerr; 442 443 /* 444 * It is quite likely to get a segfault on a failure, so make 445 * sure the message gets out by setting stdout to nonbuffered. 446 */ 447 setvbuf(stdout, NULL, _IONBF, 0); 448 449 /* 450 * Harmless file descriptor to work on... 451 */ 452 nullfd = open("/dev/null", O_RDWR); 453 if (nullfd < 0) { 454 crit("Unable to open /dev/null: %s\n", strerror(errno)); 455 } 456 457 /* 458 * Set up a block of shared memory... 459 */ 460 sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE, 461 MAP_ANONYMOUS|MAP_SHARED, 0, 0); 462 if (sh == MAP_FAILED) { 463 crit("Unable to allocated shared memory block: %s\n", 464 strerror(errno)); 465 } 466 467 with_x32 = test_x32(); 468 469 run("Running tests without ptrace...\n"); 470 test_syscall_numbering(); 471 472 test_traced_syscall_numbering(); 473 474 nerr = sh->nerr; 475 if (!nerr) { 476 ok("All system calls succeeded or failed as expected\n"); 477 return 0; 478 } else { 479 fail("A total of %u system call%s had incorrect behavior\n", 480 nerr, nerr != 1 ? "s" : ""); 481 return 1; 482 } 483 } 484