1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <stdio.h> 6 #include <sys/time.h> 7 #include <time.h> 8 #include <stdlib.h> 9 #include <sys/syscall.h> 10 #include <unistd.h> 11 #include <dlfcn.h> 12 #include <string.h> 13 #include <inttypes.h> 14 #include <signal.h> 15 #include <sys/ucontext.h> 16 #include <errno.h> 17 #include <err.h> 18 #include <sched.h> 19 #include <stdbool.h> 20 #include <setjmp.h> 21 #include <sys/uio.h> 22 23 #include "helpers.h" 24 #include "../kselftest.h" 25 26 #ifdef __x86_64__ 27 #define TOTAL_TESTS 13 28 #else 29 #define TOTAL_TESTS 8 30 #endif 31 32 #ifdef __x86_64__ 33 # define VSYS(x) (x) 34 #else 35 # define VSYS(x) 0 36 #endif 37 38 #ifndef SYS_getcpu 39 # ifdef __x86_64__ 40 # define SYS_getcpu 309 41 # else 42 # define SYS_getcpu 318 43 # endif 44 #endif 45 46 /* max length of lines in /proc/self/maps - anything longer is skipped here */ 47 #define MAPS_LINE_LEN 128 48 49 /* vsyscalls and vDSO */ 50 bool vsyscall_map_r = false, vsyscall_map_x = false; 51 52 typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); 53 const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); 54 gtod_t vdso_gtod; 55 56 typedef int (*vgettime_t)(clockid_t, struct timespec *); 57 vgettime_t vdso_gettime; 58 59 typedef long (*time_func_t)(time_t *t); 60 const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); 61 time_func_t vdso_time; 62 63 typedef long (*getcpu_t)(unsigned *, unsigned *, void *); 64 const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); 65 getcpu_t vdso_getcpu; 66 67 static void init_vdso(void) 68 { 69 void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); 70 if (!vdso) 71 vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); 72 if (!vdso) { 73 ksft_print_msg("[WARN] failed to find vDSO\n"); 74 return; 75 } 76 77 vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); 78 if (!vdso_gtod) 79 ksft_print_msg("[WARN] failed to find gettimeofday in vDSO\n"); 80 81 vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); 82 if (!vdso_gettime) 83 ksft_print_msg("[WARN] failed to find clock_gettime in vDSO\n"); 84 85 vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); 86 if (!vdso_time) 87 ksft_print_msg("[WARN] failed to find time in vDSO\n"); 88 89 vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); 90 if (!vdso_getcpu) 91 ksft_print_msg("[WARN] failed to find getcpu in vDSO\n"); 92 } 93 94 /* syscalls */ 95 static inline long sys_gtod(struct timeval *tv, struct timezone *tz) 96 { 97 return syscall(SYS_gettimeofday, tv, tz); 98 } 99 100 static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) 101 { 102 return syscall(SYS_clock_gettime, id, ts); 103 } 104 105 static inline long sys_time(time_t *t) 106 { 107 return syscall(SYS_time, t); 108 } 109 110 static inline long sys_getcpu(unsigned * cpu, unsigned * node, 111 void* cache) 112 { 113 return syscall(SYS_getcpu, cpu, node, cache); 114 } 115 116 static double tv_diff(const struct timeval *a, const struct timeval *b) 117 { 118 return (double)(a->tv_sec - b->tv_sec) + 119 (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; 120 } 121 122 static void check_gtod(const struct timeval *tv_sys1, 123 const struct timeval *tv_sys2, 124 const struct timezone *tz_sys, 125 const char *which, 126 const struct timeval *tv_other, 127 const struct timezone *tz_other) 128 { 129 double d1, d2; 130 131 if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || 132 tz_sys->tz_dsttime != tz_other->tz_dsttime)) 133 ksft_print_msg("%s tz mismatch\n", which); 134 135 d1 = tv_diff(tv_other, tv_sys1); 136 d2 = tv_diff(tv_sys2, tv_other); 137 138 ksft_print_msg("%s time offsets: %lf %lf\n", which, d1, d2); 139 140 ksft_test_result(!(d1 < 0 || d2 < 0), "%s gettimeofday()'s timeval\n", which); 141 } 142 143 static void test_gtod(void) 144 { 145 struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; 146 struct timezone tz_sys, tz_vdso, tz_vsys; 147 long ret_vdso = -1; 148 long ret_vsys = -1; 149 150 ksft_print_msg("test gettimeofday()\n"); 151 152 if (sys_gtod(&tv_sys1, &tz_sys) != 0) 153 ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno)); 154 if (vdso_gtod) 155 ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); 156 if (vsyscall_map_x) 157 ret_vsys = vgtod(&tv_vsys, &tz_vsys); 158 if (sys_gtod(&tv_sys2, &tz_sys) != 0) 159 ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno)); 160 161 if (vdso_gtod) { 162 if (ret_vdso == 0) 163 check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); 164 else 165 ksft_test_result_fail("vDSO gettimeofday() failed: %ld\n", ret_vdso); 166 } else { 167 ksft_test_result_skip("vdso_gtod isn't set\n"); 168 } 169 170 if (vsyscall_map_x) { 171 if (ret_vsys == 0) 172 check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); 173 else 174 ksft_test_result_fail("vsys gettimeofday() failed: %ld\n", ret_vsys); 175 } else { 176 ksft_test_result_skip("vsyscall_map_x isn't set\n"); 177 } 178 } 179 180 static void test_time(void) 181 { 182 long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; 183 long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; 184 185 ksft_print_msg("test time()\n"); 186 t_sys1 = sys_time(&t2_sys1); 187 if (vdso_time) 188 t_vdso = vdso_time(&t2_vdso); 189 if (vsyscall_map_x) 190 t_vsys = vtime(&t2_vsys); 191 t_sys2 = sys_time(&t2_sys2); 192 if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { 193 ksft_print_msg("syscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", 194 t_sys1, t2_sys1, t_sys2, t2_sys2); 195 ksft_test_result_skip("vdso_time\n"); 196 ksft_test_result_skip("vdso_time\n"); 197 return; 198 } 199 200 if (vdso_time) { 201 if (t_vdso < 0 || t_vdso != t2_vdso) 202 ksft_test_result_fail("vDSO failed (ret:%ld output:%ld)\n", 203 t_vdso, t2_vdso); 204 else if (t_vdso < t_sys1 || t_vdso > t_sys2) 205 ksft_test_result_fail("vDSO returned the wrong time (%ld %ld %ld)\n", 206 t_sys1, t_vdso, t_sys2); 207 else 208 ksft_test_result_pass("vDSO time() is okay\n"); 209 } else { 210 ksft_test_result_skip("vdso_time isn't set\n"); 211 } 212 213 if (vsyscall_map_x) { 214 if (t_vsys < 0 || t_vsys != t2_vsys) 215 ksft_test_result_fail("vsyscall failed (ret:%ld output:%ld)\n", 216 t_vsys, t2_vsys); 217 else if (t_vsys < t_sys1 || t_vsys > t_sys2) 218 ksft_test_result_fail("vsyscall returned the wrong time (%ld %ld %ld)\n", 219 t_sys1, t_vsys, t_sys2); 220 else 221 ksft_test_result_pass("vsyscall time() is okay\n"); 222 } else { 223 ksft_test_result_skip("vsyscall_map_x isn't set\n"); 224 } 225 } 226 227 static void test_getcpu(int cpu) 228 { 229 unsigned int cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; 230 long ret_sys, ret_vdso = -1, ret_vsys = -1; 231 unsigned int node = 0; 232 bool have_node = false; 233 cpu_set_t cpuset; 234 235 ksft_print_msg("getcpu() on CPU %d\n", cpu); 236 237 CPU_ZERO(&cpuset); 238 CPU_SET(cpu, &cpuset); 239 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 240 ksft_print_msg("failed to force CPU %d\n", cpu); 241 ksft_test_result_skip("vdso_getcpu\n"); 242 ksft_test_result_skip("vsyscall_map_x\n"); 243 244 return; 245 } 246 247 ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); 248 if (vdso_getcpu) 249 ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); 250 if (vsyscall_map_x) 251 ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); 252 253 if (ret_sys == 0) { 254 if (cpu_sys != cpu) 255 ksft_print_msg("syscall reported CPU %hu but should be %d\n", 256 cpu_sys, cpu); 257 258 have_node = true; 259 node = node_sys; 260 } 261 262 if (vdso_getcpu) { 263 if (ret_vdso) { 264 ksft_test_result_fail("vDSO getcpu() failed\n"); 265 } else { 266 if (!have_node) { 267 have_node = true; 268 node = node_vdso; 269 } 270 271 if (cpu_vdso != cpu || node_vdso != node) { 272 if (cpu_vdso != cpu) 273 ksft_print_msg("vDSO reported CPU %hu but should be %d\n", 274 cpu_vdso, cpu); 275 if (node_vdso != node) 276 ksft_print_msg("vDSO reported node %hu but should be %hu\n", 277 node_vdso, node); 278 ksft_test_result_fail("Wrong values\n"); 279 } else { 280 ksft_test_result_pass("vDSO reported correct CPU and node\n"); 281 } 282 } 283 } else { 284 ksft_test_result_skip("vdso_getcpu isn't set\n"); 285 } 286 287 if (vsyscall_map_x) { 288 if (ret_vsys) { 289 ksft_test_result_fail("vsyscall getcpu() failed\n"); 290 } else { 291 if (!have_node) { 292 have_node = true; 293 node = node_vsys; 294 } 295 296 if (cpu_vsys != cpu || node_vsys != node) { 297 if (cpu_vsys != cpu) 298 ksft_print_msg("vsyscall reported CPU %hu but should be %d\n", 299 cpu_vsys, cpu); 300 if (node_vsys != node) 301 ksft_print_msg("vsyscall reported node %hu but should be %hu\n", 302 node_vsys, node); 303 ksft_test_result_fail("Wrong values\n"); 304 } else { 305 ksft_test_result_pass("vsyscall reported correct CPU and node\n"); 306 } 307 } 308 } else { 309 ksft_test_result_skip("vsyscall_map_x isn't set\n"); 310 } 311 } 312 313 #ifdef __x86_64__ 314 315 static jmp_buf jmpbuf; 316 static volatile unsigned long segv_err; 317 318 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 319 int flags) 320 { 321 struct sigaction sa; 322 323 memset(&sa, 0, sizeof(sa)); 324 sa.sa_sigaction = handler; 325 sa.sa_flags = SA_SIGINFO | flags; 326 sigemptyset(&sa.sa_mask); 327 if (sigaction(sig, &sa, 0)) 328 ksft_exit_fail_msg("sigaction failed\n"); 329 } 330 331 static void sigsegv(int sig, siginfo_t *info, void *ctx_void) 332 { 333 ucontext_t *ctx = (ucontext_t *)ctx_void; 334 335 segv_err = ctx->uc_mcontext.gregs[REG_ERR]; 336 siglongjmp(jmpbuf, 1); 337 } 338 339 static void test_vsys_r(void) 340 { 341 ksft_print_msg("Checking read access to the vsyscall page\n"); 342 bool can_read; 343 if (sigsetjmp(jmpbuf, 1) == 0) { 344 *(volatile int *)0xffffffffff600000; 345 can_read = true; 346 } else { 347 can_read = false; 348 } 349 350 if (can_read && !vsyscall_map_r) 351 ksft_test_result_fail("We have read access, but we shouldn't\n"); 352 else if (!can_read && vsyscall_map_r) 353 ksft_test_result_fail("We don't have read access, but we should\n"); 354 else if (can_read) 355 ksft_test_result_pass("We have read access\n"); 356 else 357 ksft_test_result_pass("We do not have read access: #PF(0x%lx)\n", segv_err); 358 } 359 360 static void test_vsys_x(void) 361 { 362 if (vsyscall_map_x) { 363 /* We already tested this adequately. */ 364 ksft_test_result_pass("vsyscall_map_x is true\n"); 365 return; 366 } 367 368 ksft_print_msg("Make sure that vsyscalls really page fault\n"); 369 370 bool can_exec; 371 if (sigsetjmp(jmpbuf, 1) == 0) { 372 vgtod(NULL, NULL); 373 can_exec = true; 374 } else { 375 can_exec = false; 376 } 377 378 if (can_exec) 379 ksft_test_result_fail("Executing the vsyscall did not page fault\n"); 380 else if (segv_err & (1 << 4)) /* INSTR */ 381 ksft_test_result_pass("Executing the vsyscall page failed: #PF(0x%lx)\n", 382 segv_err); 383 else 384 ksft_test_result_fail("Execution failed with the wrong error: #PF(0x%lx)\n", 385 segv_err); 386 } 387 388 /* 389 * Debuggers expect ptrace() to be able to peek at the vsyscall page. 390 * Use process_vm_readv() as a proxy for ptrace() to test this. We 391 * want it to work in the vsyscall=emulate case and to fail in the 392 * vsyscall=xonly case. 393 * 394 * It's worth noting that this ABI is a bit nutty. write(2) can't 395 * read from the vsyscall page on any kernel version or mode. The 396 * fact that ptrace() ever worked was a nice courtesy of old kernels, 397 * but the code to support it is fairly gross. 398 */ 399 static void test_process_vm_readv(void) 400 { 401 char buf[4096]; 402 struct iovec local, remote; 403 int ret; 404 405 ksft_print_msg("process_vm_readv() from vsyscall page\n"); 406 407 local.iov_base = buf; 408 local.iov_len = 4096; 409 remote.iov_base = (void *)0xffffffffff600000; 410 remote.iov_len = 4096; 411 ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); 412 if (ret != 4096) { 413 /* 414 * We expect process_vm_readv() to work if and only if the 415 * vsyscall page is readable. 416 */ 417 ksft_test_result(!vsyscall_map_r, 418 "process_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno); 419 return; 420 } 421 422 if (vsyscall_map_r) 423 ksft_test_result(!memcmp(buf, remote.iov_base, sizeof(buf)), "Read data\n"); 424 else 425 ksft_test_result_fail("process_rm_readv() succeeded, but it should have failed in this configuration\n"); 426 } 427 428 static void init_vsys(void) 429 { 430 int nerrs = 0; 431 FILE *maps; 432 char line[MAPS_LINE_LEN]; 433 bool found = false; 434 435 maps = fopen("/proc/self/maps", "r"); 436 if (!maps) { 437 ksft_test_result_skip("Could not open /proc/self/maps -- assuming vsyscall is r-x\n"); 438 vsyscall_map_r = true; 439 return; 440 } 441 442 while (fgets(line, MAPS_LINE_LEN, maps)) { 443 char r, x; 444 void *start, *end; 445 char name[MAPS_LINE_LEN]; 446 447 /* sscanf() is safe here as strlen(name) >= strlen(line) */ 448 if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", 449 &start, &end, &r, &x, name) != 5) 450 continue; 451 452 if (strcmp(name, "[vsyscall]")) 453 continue; 454 455 ksft_print_msg("vsyscall map: %s", line); 456 457 if (start != (void *)0xffffffffff600000 || 458 end != (void *)0xffffffffff601000) { 459 ksft_print_msg("address range is nonsense\n"); 460 nerrs++; 461 } 462 463 ksft_print_msg("vsyscall permissions are %c-%c\n", r, x); 464 vsyscall_map_r = (r == 'r'); 465 vsyscall_map_x = (x == 'x'); 466 467 found = true; 468 break; 469 } 470 471 fclose(maps); 472 473 if (!found) { 474 ksft_print_msg("no vsyscall map in /proc/self/maps\n"); 475 vsyscall_map_r = false; 476 vsyscall_map_x = false; 477 } 478 479 ksft_test_result(!nerrs, "vsyscall map\n"); 480 } 481 482 static volatile sig_atomic_t num_vsyscall_traps; 483 484 static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 485 { 486 ucontext_t *ctx = (ucontext_t *)ctx_void; 487 unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; 488 489 if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) 490 num_vsyscall_traps++; 491 } 492 493 static void test_emulation(void) 494 { 495 time_t tmp; 496 bool is_native; 497 498 if (!vsyscall_map_x) { 499 ksft_test_result_skip("vsyscall_map_x isn't set\n"); 500 return; 501 } 502 503 ksft_print_msg("checking that vsyscalls are emulated\n"); 504 sethandler(SIGTRAP, sigtrap, 0); 505 set_eflags(get_eflags() | X86_EFLAGS_TF); 506 vtime(&tmp); 507 set_eflags(get_eflags() & ~X86_EFLAGS_TF); 508 509 /* 510 * If vsyscalls are emulated, we expect a single trap in the 511 * vsyscall page -- the call instruction will trap with RIP 512 * pointing to the entry point before emulation takes over. 513 * In native mode, we expect two traps, since whatever code 514 * the vsyscall page contains will be more than just a ret 515 * instruction. 516 */ 517 is_native = (num_vsyscall_traps > 1); 518 519 ksft_test_result(!is_native, "vsyscalls are %s (%d instructions in vsyscall page)\n", 520 (is_native ? "native" : "emulated"), (int)num_vsyscall_traps); 521 } 522 #endif 523 524 int main(int argc, char **argv) 525 { 526 int total_tests = TOTAL_TESTS; 527 528 ksft_print_header(); 529 ksft_set_plan(total_tests); 530 531 init_vdso(); 532 #ifdef __x86_64__ 533 init_vsys(); 534 #endif 535 536 test_gtod(); 537 test_time(); 538 test_getcpu(0); 539 test_getcpu(1); 540 541 #ifdef __x86_64__ 542 sethandler(SIGSEGV, sigsegv, 0); 543 test_vsys_r(); 544 test_vsys_x(); 545 test_process_vm_readv(); 546 test_emulation(); 547 #endif 548 549 ksft_finished(); 550 } 551