1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2020 Collabora Ltd. 4 * 5 * Benchmark and test syscall user dispatch 6 */ 7 8 #define _GNU_SOURCE 9 #include <stdio.h> 10 #include <string.h> 11 #include <stdlib.h> 12 #include <signal.h> 13 #include <errno.h> 14 #include <time.h> 15 #include <sys/time.h> 16 #include <unistd.h> 17 #include <sys/sysinfo.h> 18 #include <sys/prctl.h> 19 #include <sys/syscall.h> 20 21 #ifndef PR_SET_SYSCALL_USER_DISPATCH 22 # define PR_SET_SYSCALL_USER_DISPATCH 59 23 # define PR_SYS_DISPATCH_OFF 0 24 # define PR_SYS_DISPATCH_ON 1 25 #endif 26 27 #ifdef __NR_syscalls 28 # define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ 29 #else 30 # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ 31 #endif 32 33 /* 34 * To test returning from a sigsys with selector blocked, the test 35 * requires some per-architecture support (i.e. knowledge about the 36 * signal trampoline address). On i386, we know it is on the vdso, and 37 * a small trampoline is open-coded for x86_64. Other architectures 38 * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN 39 * out of the box, but don't enable them until they support syscall user 40 * dispatch. 41 */ 42 #if defined(__x86_64__) || defined(__i386__) 43 #define TEST_BLOCKED_RETURN 44 #endif 45 46 #ifdef __x86_64__ 47 void* (syscall_dispatcher_start)(void); 48 void* (syscall_dispatcher_end)(void); 49 #else 50 unsigned long syscall_dispatcher_start = 0; 51 unsigned long syscall_dispatcher_end = 0; 52 #endif 53 54 unsigned long trapped_call_count = 0; 55 unsigned long native_call_count = 0; 56 57 char selector; 58 #define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON) 59 #define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF) 60 61 #define CALIBRATION_STEP 100000 62 #define CALIBRATE_TO_SECS 5 63 int factor; 64 65 static double one_sysinfo_step(void) 66 { 67 struct timespec t1, t2; 68 int i; 69 struct sysinfo info; 70 71 clock_gettime(CLOCK_MONOTONIC, &t1); 72 for (i = 0; i < CALIBRATION_STEP; i++) 73 sysinfo(&info); 74 clock_gettime(CLOCK_MONOTONIC, &t2); 75 return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec); 76 } 77 78 static void calibrate_set(void) 79 { 80 double elapsed = 0; 81 82 printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS); 83 84 while (elapsed < 1) { 85 elapsed += one_sysinfo_step(); 86 factor += CALIBRATE_TO_SECS; 87 } 88 89 printf("test iterations = %d\n", CALIBRATION_STEP * factor); 90 } 91 92 static double perf_syscall(void) 93 { 94 unsigned int i; 95 double partial = 0; 96 97 for (i = 0; i < factor; ++i) 98 partial += one_sysinfo_step()/(CALIBRATION_STEP*factor); 99 return partial; 100 } 101 102 static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) 103 { 104 char buf[1024]; 105 int len; 106 107 SYSCALL_UNBLOCK; 108 109 /* printf and friends are not signal-safe. */ 110 len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall); 111 write(1, buf, len); 112 113 if (info->si_syscall == MAGIC_SYSCALL_1) 114 trapped_call_count++; 115 else 116 native_call_count++; 117 118 #ifdef TEST_BLOCKED_RETURN 119 SYSCALL_BLOCK; 120 #endif 121 122 #ifdef __x86_64__ 123 __asm__ volatile("movq $0xf, %rax"); 124 __asm__ volatile("leaveq"); 125 __asm__ volatile("add $0x8, %rsp"); 126 __asm__ volatile("syscall_dispatcher_start:"); 127 __asm__ volatile("syscall"); 128 __asm__ volatile("nop"); /* Landing pad within dispatcher area */ 129 __asm__ volatile("syscall_dispatcher_end:"); 130 #endif 131 132 } 133 134 int main(void) 135 { 136 struct sigaction act; 137 double time1, time2; 138 int ret; 139 sigset_t mask; 140 141 memset(&act, 0, sizeof(act)); 142 sigemptyset(&mask); 143 144 act.sa_sigaction = handle_sigsys; 145 act.sa_flags = SA_SIGINFO; 146 act.sa_mask = mask; 147 148 calibrate_set(); 149 150 time1 = perf_syscall(); 151 printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9); 152 153 ret = sigaction(SIGSYS, &act, NULL); 154 if (ret) { 155 perror("Error sigaction:"); 156 exit(-1); 157 } 158 159 fprintf(stderr, "Enabling syscall trapping.\n"); 160 161 if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 162 syscall_dispatcher_start, 163 (syscall_dispatcher_end - syscall_dispatcher_start + 1), 164 &selector)) { 165 perror("prctl failed\n"); 166 exit(-1); 167 } 168 169 SYSCALL_BLOCK; 170 syscall(MAGIC_SYSCALL_1); 171 172 #ifdef TEST_BLOCKED_RETURN 173 if (selector == PR_SYS_DISPATCH_OFF) { 174 fprintf(stderr, "Failed to return with selector blocked.\n"); 175 exit(-1); 176 } 177 #endif 178 179 SYSCALL_UNBLOCK; 180 181 if (!trapped_call_count) { 182 fprintf(stderr, "syscall trapping does not work.\n"); 183 exit(-1); 184 } 185 186 time2 = perf_syscall(); 187 188 if (native_call_count) { 189 perror("syscall trapping intercepted more syscalls than expected\n"); 190 exit(-1); 191 } 192 193 printf("trapped_call_count %lu, native_call_count %lu.\n", 194 trapped_call_count, native_call_count); 195 printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9); 196 printf("Interception overhead: %.1lf%% (+%.0lfns).\n", 197 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1)); 198 return 0; 199 200 } 201