1 /* 2 * Minimal portability layer for system call differences between 3 * Capsicum OSes. 4 */ 5 #ifndef __SYSCALLS_H__ 6 #define __SYSCALLS_H__ 7 8 /************************************************************ 9 * FreeBSD 10 ************************************************************/ 11 #ifdef __FreeBSD__ 12 13 /* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */ 14 #define umount2(T, F) unmount(T, F) 15 16 /* Map sighandler_y (Linux) to sig_t (FreeBSD) */ 17 #define sighandler_t sig_t 18 19 /* profil(2) has a first argument of char* */ 20 #define profil_arg1_t char 21 22 /* FreeBSD has getdents(2) available */ 23 #include <sys/types.h> 24 #include <dirent.h> 25 inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { 26 return getdents(fd, (char*)dirp, count); 27 } 28 #include <sys/mman.h> 29 inline int mincore_(void *addr, size_t length, unsigned char *vec) { 30 return mincore(addr, length, (char*)vec); 31 } 32 #define getpid_ getpid 33 34 /* Map Linux-style sendfile to FreeBSD sendfile */ 35 #include <sys/socket.h> 36 #include <sys/uio.h> 37 inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) { 38 return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0); 39 } 40 41 /* A sample mount(2) call */ 42 #include <sys/param.h> 43 #include <sys/mount.h> 44 inline int bogus_mount_() { 45 return mount("procfs", "/not_mounted", 0, NULL); 46 } 47 48 /* Mappings for extended attribute functions */ 49 #include <sys/extattr.h> 50 inline ssize_t flistxattr_(int fd, char *list, size_t size) { 51 return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size); 52 } 53 inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) { 54 return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); 55 } 56 inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int) { 57 return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); 58 } 59 inline int fremovexattr_(int fd, const char *name) { 60 return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name); 61 } 62 63 /* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */ 64 #include <sys/syscall.h> 65 extern "C" { 66 extern int __sys_kmq_notify(int, const struct sigevent *); 67 extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *); 68 extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict); 69 extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t, 70 unsigned *__restrict, const struct timespec *__restrict); 71 extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned, 72 const struct timespec *); 73 extern int __sys_kmq_unlink(const char *); 74 } 75 #define mq_notify_ __sys_kmq_notify 76 #define mq_open_ __sys_kmq_open 77 #define mq_setattr_ __sys_kmq_setattr 78 #define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B) 79 #define mq_timedreceive_ __sys_kmq_timedreceive 80 #define mq_timedsend_ __sys_kmq_timedsend 81 #define mq_unlink_ __sys_kmq_unlink 82 #define mq_close_ close 83 #include <sys/ptrace.h> 84 inline long ptrace_(int request, pid_t pid, void *addr, void *data) { 85 return ptrace(request, pid, (caddr_t)addr, static_cast<int>((long)data)); 86 } 87 #define PTRACE_PEEKDATA_ PT_READ_D 88 #define getegid_ getegid 89 #define getgid_ getgid 90 #define geteuid_ geteuid 91 #define getuid_ getuid 92 #define getgroups_ getgroups 93 #define getrlimit_ getrlimit 94 #define bind_ bind 95 #define connect_ connect 96 97 /* Features available */ 98 #if __FreeBSD_version >= 1000000 99 #define HAVE_CHFLAGSAT 100 #define HAVE_BINDAT 101 #define HAVE_CONNECTAT 102 #endif 103 #define HAVE_CHFLAGS 104 #define HAVE_GETFSSTAT 105 #define HAVE_REVOKE 106 #define HAVE_GETLOGIN 107 #define HAVE_MKFIFOAT 108 #define HAVE_SYSARCH 109 #include <machine/sysarch.h> 110 #define HAVE_STAT_BIRTHTIME 111 #define HAVE_SYSCTL 112 #define HAVE_FPATHCONF 113 #define HAVE_F_DUP2FD 114 #define HAVE_PSELECT 115 #define HAVE_SCTP 116 117 /* FreeBSD only allows root to call mlock[all]/munlock[all] */ 118 #define MLOCK_REQUIRES_ROOT 1 119 /* FreeBSD effectively only allows root to call sched_setscheduler */ 120 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 121 122 #endif /* FreeBSD */ 123 124 /************************************************************ 125 * Linux 126 ************************************************************/ 127 #ifdef __linux__ 128 #include <fcntl.h> 129 #include <unistd.h> 130 #include <sys/prctl.h> 131 #include <sys/syscall.h> 132 #include <sys/types.h> 133 #include <sys/time.h> 134 #include <sys/resource.h> 135 #include <sys/wait.h> 136 #include <sys/sendfile.h> 137 #include <sys/statfs.h> 138 #include <sys/xattr.h> 139 #include <sys/mount.h> 140 #include <linux/net.h> 141 142 /* profil(2) has a first argument of unsigned short* */ 143 #define profil_arg1_t unsigned short 144 145 static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { 146 return syscall(__NR_getdents, fd, dirp, count); 147 } 148 /* A sample mount(2) call */ 149 static inline int bogus_mount_() { 150 return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, ""); 151 } 152 153 /* libc's getpid() wrapper caches the pid value, and doesn't invalidate 154 * the cached value on pdfork(), so directly syscall. */ 155 static inline pid_t getpid_() { 156 return syscall(__NR_getpid); 157 } 158 static inline int execveat(int fd, const char *path, 159 char *const argv[], char *const envp[], int flags) { 160 return syscall(__NR_execveat, fd, path, argv, envp, flags); 161 } 162 163 /* 164 * Linux glibc includes an fexecve() function, implemented via the /proc 165 * filesystem. Bypass this and go directly to the execveat(2) syscall. 166 */ 167 static inline int fexecve_(int fd, char *const argv[], char *const envp[]) { 168 return execveat(fd, "", argv, envp, AT_EMPTY_PATH); 169 } 170 /* 171 * Linux glibc attempts to be clever and intercepts various uid/gid functions. 172 * Bypass by calling the syscalls directly. 173 */ 174 static inline gid_t getegid_(void) { return syscall(__NR_getegid); } 175 static inline gid_t getgid_(void) { return syscall(__NR_getgid); } 176 static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); } 177 static inline uid_t getuid_(void) { return syscall(__NR_getuid); } 178 static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); } 179 static inline int getrlimit_(int resource, struct rlimit *rlim) { 180 return syscall(__NR_getrlimit, resource, rlim); 181 } 182 183 /* 184 * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation, 185 * so use the raw syscall for those operations that are disallowed in capability mode. 186 */ 187 #ifdef __NR_bind 188 #define bind_ bind 189 #else 190 static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { 191 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; 192 return syscall(__NR_socketcall, SYS_BIND, args); 193 } 194 #endif 195 #ifdef __NR_connect 196 #define connect_ connect 197 #else 198 static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { 199 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; 200 return syscall(__NR_socketcall, SYS_CONNECT, args); 201 } 202 #endif 203 204 #define mincore_ mincore 205 #define sendfile_ sendfile 206 #define flistxattr_ flistxattr 207 #define fgetxattr_ fgetxattr 208 #define fsetxattr_ fsetxattr 209 #define fremovexattr_ fremovexattr 210 #define mq_notify_ mq_notify 211 #define mq_open_ mq_open 212 #define mq_setattr_ mq_setattr 213 #define mq_getattr_ mq_getattr 214 #define mq_timedreceive_ mq_timedreceive 215 #define mq_timedsend_ mq_timedsend 216 #define mq_unlink_ mq_unlink 217 #define mq_close_ mq_close 218 #define ptrace_ ptrace 219 #define PTRACE_PEEKDATA_ PTRACE_PEEKDATA 220 221 /* Features available */ 222 #define HAVE_DUP3 223 #define HAVE_PIPE2 224 #include <sys/fsuid.h> /* for setfsgid()/setfsuid() */ 225 #define HAVE_SETFSUID 226 #define HAVE_SETFSGID 227 #define HAVE_READAHEAD 228 #define HAVE_SEND_RECV_MMSG 229 #define HAVE_SYNCFS 230 #define HAVE_SYNC_FILE_RANGE 231 #include <sys/uio.h> /* for vmsplice */ 232 #define HAVE_TEE 233 #define HAVE_SPLICE 234 #define HAVE_VMSPLICE 235 #define HAVE_PSELECT 236 #define HAVE_PPOLL 237 #define HAVE_EXECVEAT 238 #define HAVE_SYSCALL 239 #define HAVE_MKNOD_REG 240 #define HAVE_MKNOD_SOCKET 241 /* 242 * O_BENEATH is arch-specific, via <asm/fcntl.h>; however we cannot include both that file 243 * and the normal <fcntl.h> as they have some clashing definitions. Bypass by directly 244 * defining O_BENEATH, using the current proposed x86 value. (This will therefore not 245 * work for non-x86, and may need changing in future if a different value gets merged.) 246 */ 247 #ifndef O_BENEATH 248 #define O_BENEATH 040000000 /* no / or .. in openat path */ 249 #endif 250 251 252 /* Linux allows anyone to call mlock[all]/munlock[all] */ 253 #define MLOCK_REQUIRES_ROOT 0 254 /* Linux allows anyone to call sched_setscheduler */ 255 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 256 257 #endif /* Linux */ 258 259 #endif /*__SYSCALLS_H__*/ 260