1 /* 2 * Minimal portability layer for system call differences between 3 * Capsicum OSes. 4 */ 5 #ifndef __SYSCALLS_H__ 6 #define __SYSCALLS_H__ 7 8 /************************************************************ 9 * FreeBSD 10 ************************************************************/ 11 #ifdef __FreeBSD__ 12 13 /* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */ 14 #define umount2(T, F) unmount(T, F) 15 16 /* Map sighandler_y (Linux) to sig_t (FreeBSD) */ 17 #define sighandler_t sig_t 18 19 /* profil(2) has a first argument of char* */ 20 #define profil_arg1_t char 21 22 /* FreeBSD has getdents(2) available */ 23 #include <sys/types.h> 24 #include <dirent.h> 25 inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { 26 return getdents(fd, (char*)dirp, count); 27 } 28 #include <sys/mman.h> 29 inline int mincore_(void *addr, size_t length, unsigned char *vec) { 30 return mincore(addr, length, (char*)vec); 31 } 32 #define getpid_ getpid 33 34 /* Map Linux-style sendfile to FreeBSD sendfile */ 35 #include <sys/socket.h> 36 #include <sys/uio.h> 37 inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) { 38 return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0); 39 } 40 41 /* A sample mount(2) call */ 42 #include <sys/param.h> 43 #include <sys/mount.h> 44 inline int bogus_mount_() { 45 return mount("procfs", "/not_mounted", 0, NULL); 46 } 47 48 /* Mappings for extended attribute functions */ 49 #include <sys/extattr.h> 50 #include <errno.h> 51 static const char *fbsd_extattr_skip_prefix(const char *p) { 52 if (*p++ == 'u' && *p++ == 's' && *p++ == 'e' && *p++ == 'r' && *p++ == '.') 53 return p; 54 errno = EINVAL; 55 return NULL; 56 } 57 inline ssize_t flistxattr_(int fd, char *list, size_t size) { 58 return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size); 59 } 60 inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) { 61 if (!(name = fbsd_extattr_skip_prefix(name))) 62 return -1; 63 return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); 64 } 65 inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int) { 66 if (!(name = fbsd_extattr_skip_prefix(name))) 67 return -1; 68 return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); 69 } 70 inline int fremovexattr_(int fd, const char *name) { 71 if (!(name = fbsd_extattr_skip_prefix(name))) 72 return -1; 73 return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name); 74 } 75 76 /* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */ 77 #include <sys/syscall.h> 78 extern "C" { 79 extern int __sys_kmq_notify(int, const struct sigevent *); 80 extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *); 81 extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict); 82 extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t, 83 unsigned *__restrict, const struct timespec *__restrict); 84 extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned, 85 const struct timespec *); 86 extern int __sys_kmq_unlink(const char *); 87 } 88 #define mq_notify_ __sys_kmq_notify 89 #define mq_open_ __sys_kmq_open 90 #define mq_setattr_ __sys_kmq_setattr 91 #define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B) 92 #define mq_timedreceive_ __sys_kmq_timedreceive 93 #define mq_timedsend_ __sys_kmq_timedsend 94 #define mq_unlink_ __sys_kmq_unlink 95 #define mq_close_ close 96 #include <sys/ptrace.h> 97 inline long ptrace_(int request, pid_t pid, void *addr, void *data) { 98 return ptrace(request, pid, (caddr_t)addr, static_cast<int>((long)data)); 99 } 100 #define PTRACE_PEEKDATA_ PT_READ_D 101 #define getegid_ getegid 102 #define getgid_ getgid 103 #define geteuid_ geteuid 104 #define getuid_ getuid 105 #define getgroups_ getgroups 106 #define getrlimit_ getrlimit 107 #define bind_ bind 108 #define connect_ connect 109 110 /* Features available */ 111 #if __FreeBSD_version >= 1000000 112 #define HAVE_CHFLAGSAT 113 #define HAVE_BINDAT 114 #define HAVE_CONNECTAT 115 #endif 116 #define HAVE_CHFLAGS 117 #define HAVE_GETFSSTAT 118 #define HAVE_REVOKE 119 #define HAVE_GETLOGIN 120 #define HAVE_MKFIFOAT 121 #define HAVE_SYSARCH 122 #include <machine/sysarch.h> 123 #define HAVE_STAT_BIRTHTIME 124 #define HAVE_SYSCTL 125 #define HAVE_FPATHCONF 126 #define HAVE_F_DUP2FD 127 #define HAVE_PSELECT 128 #define HAVE_SCTP 129 130 /* FreeBSD only allows root to call mlock[all]/munlock[all] */ 131 #define MLOCK_REQUIRES_ROOT 1 132 /* FreeBSD effectively only allows root to call sched_setscheduler */ 133 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 134 135 #endif /* FreeBSD */ 136 137 /************************************************************ 138 * Linux 139 ************************************************************/ 140 #ifdef __linux__ 141 #include <fcntl.h> 142 #include <unistd.h> 143 #include <sys/prctl.h> 144 #include <sys/syscall.h> 145 #include <sys/types.h> 146 #include <sys/time.h> 147 #include <sys/resource.h> 148 #include <sys/wait.h> 149 #include <sys/sendfile.h> 150 #include <sys/statfs.h> 151 #include <sys/xattr.h> 152 #include <sys/mount.h> 153 #include <linux/net.h> 154 155 /* profil(2) has a first argument of unsigned short* */ 156 #define profil_arg1_t unsigned short 157 158 static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { 159 return syscall(__NR_getdents, fd, dirp, count); 160 } 161 /* A sample mount(2) call */ 162 static inline int bogus_mount_() { 163 return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, ""); 164 } 165 166 /* libc's getpid() wrapper caches the pid value, and doesn't invalidate 167 * the cached value on pdfork(), so directly syscall. */ 168 static inline pid_t getpid_() { 169 return syscall(__NR_getpid); 170 } 171 static inline int execveat(int fd, const char *path, 172 char *const argv[], char *const envp[], int flags) { 173 return syscall(__NR_execveat, fd, path, argv, envp, flags); 174 } 175 176 /* 177 * Linux glibc includes an fexecve() function, implemented via the /proc 178 * filesystem. Bypass this and go directly to the execveat(2) syscall. 179 */ 180 static inline int fexecve_(int fd, char *const argv[], char *const envp[]) { 181 return execveat(fd, "", argv, envp, AT_EMPTY_PATH); 182 } 183 /* 184 * Linux glibc attempts to be clever and intercepts various uid/gid functions. 185 * Bypass by calling the syscalls directly. 186 */ 187 static inline gid_t getegid_(void) { return syscall(__NR_getegid); } 188 static inline gid_t getgid_(void) { return syscall(__NR_getgid); } 189 static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); } 190 static inline uid_t getuid_(void) { return syscall(__NR_getuid); } 191 static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); } 192 static inline int getrlimit_(int resource, struct rlimit *rlim) { 193 return syscall(__NR_getrlimit, resource, rlim); 194 } 195 196 /* 197 * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation, 198 * so use the raw syscall for those operations that are disallowed in capability mode. 199 */ 200 #ifdef __NR_bind 201 #define bind_ bind 202 #else 203 static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { 204 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; 205 return syscall(__NR_socketcall, SYS_BIND, args); 206 } 207 #endif 208 #ifdef __NR_connect 209 #define connect_ connect 210 #else 211 static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { 212 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; 213 return syscall(__NR_socketcall, SYS_CONNECT, args); 214 } 215 #endif 216 217 #define mincore_ mincore 218 #define sendfile_ sendfile 219 #define flistxattr_ flistxattr 220 #define fgetxattr_ fgetxattr 221 #define fsetxattr_ fsetxattr 222 #define fremovexattr_ fremovexattr 223 #define mq_notify_ mq_notify 224 #define mq_open_ mq_open 225 #define mq_setattr_ mq_setattr 226 #define mq_getattr_ mq_getattr 227 #define mq_timedreceive_ mq_timedreceive 228 #define mq_timedsend_ mq_timedsend 229 #define mq_unlink_ mq_unlink 230 #define mq_close_ mq_close 231 #define ptrace_ ptrace 232 #define PTRACE_PEEKDATA_ PTRACE_PEEKDATA 233 234 /* Features available */ 235 #define HAVE_DUP3 236 #define HAVE_PIPE2 237 #include <sys/fsuid.h> /* for setfsgid()/setfsuid() */ 238 #define HAVE_SETFSUID 239 #define HAVE_SETFSGID 240 #define HAVE_READAHEAD 241 #define HAVE_SEND_RECV_MMSG 242 #define HAVE_SYNCFS 243 #define HAVE_SYNC_FILE_RANGE 244 #include <sys/uio.h> /* for vmsplice */ 245 #define HAVE_TEE 246 #define HAVE_SPLICE 247 #define HAVE_VMSPLICE 248 #define HAVE_PSELECT 249 #define HAVE_PPOLL 250 #define HAVE_EXECVEAT 251 #define HAVE_SYSCALL 252 #define HAVE_MKNOD_REG 253 #define HAVE_MKNOD_SOCKET 254 /* 255 * O_BENEATH is arch-specific, via <asm/fcntl.h>; however we cannot include both that file 256 * and the normal <fcntl.h> as they have some clashing definitions. Bypass by directly 257 * defining O_BENEATH, using the current proposed x86 value. (This will therefore not 258 * work for non-x86, and may need changing in future if a different value gets merged.) 259 */ 260 #ifndef O_BENEATH 261 #define O_BENEATH 040000000 /* no / or .. in openat path */ 262 #endif 263 264 265 /* Linux allows anyone to call mlock[all]/munlock[all] */ 266 #define MLOCK_REQUIRES_ROOT 0 267 /* Linux allows anyone to call sched_setscheduler */ 268 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 269 270 #endif /* Linux */ 271 272 #endif /*__SYSCALLS_H__*/ 273