1 /*
2 * Minimal portability layer for system call differences between
3 * Capsicum OSes.
4 */
5 #ifndef __SYSCALLS_H__
6 #define __SYSCALLS_H__
7
8 /************************************************************
9 * FreeBSD
10 ************************************************************/
11 #ifdef __FreeBSD__
12
13 /* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */
14 #define umount2(T, F) unmount(T, F)
15
16 /* Map sighandler_y (Linux) to sig_t (FreeBSD) */
17 #define sighandler_t sig_t
18
19 /* profil(2) has a first argument of char* */
20 #define profil_arg1_t char
21
22 /* FreeBSD has getdents(2) available */
23 #include <sys/types.h>
24 #include <dirent.h>
getdents_(unsigned int fd,void * dirp,unsigned int count)25 inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
26 return getdents(fd, (char*)dirp, count);
27 }
28 #include <sys/mman.h>
mincore_(void * addr,size_t length,unsigned char * vec)29 inline int mincore_(void *addr, size_t length, unsigned char *vec) {
30 return mincore(addr, length, (char*)vec);
31 }
32 #define getpid_ getpid
33
34 /* Map Linux-style sendfile to FreeBSD sendfile */
35 #include <sys/socket.h>
36 #include <sys/uio.h>
sendfile_(int out_fd,int in_fd,off_t * offset,size_t count)37 inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) {
38 return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0);
39 }
40
41 /* A sample mount(2) call */
42 #include <sys/param.h>
43 #include <sys/mount.h>
bogus_mount_()44 inline int bogus_mount_() {
45 return mount("procfs", "/not_mounted", 0, NULL);
46 }
47
48 /* Mappings for extended attribute functions */
49 #include <sys/extattr.h>
50 #include <errno.h>
fbsd_extattr_skip_prefix(const char * p)51 static const char *fbsd_extattr_skip_prefix(const char *p) {
52 if (*p++ == 'u' && *p++ == 's' && *p++ == 'e' && *p++ == 'r' && *p++ == '.')
53 return p;
54 errno = EINVAL;
55 return NULL;
56 }
flistxattr_(int fd,char * list,size_t size)57 inline ssize_t flistxattr_(int fd, char *list, size_t size) {
58 return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size);
59 }
fgetxattr_(int fd,const char * name,void * value,size_t size)60 inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) {
61 if (!(name = fbsd_extattr_skip_prefix(name)))
62 return -1;
63 return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
64 }
fsetxattr_(int fd,const char * name,const void * value,size_t size,int)65 inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int) {
66 if (!(name = fbsd_extattr_skip_prefix(name)))
67 return -1;
68 return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
69 }
fremovexattr_(int fd,const char * name)70 inline int fremovexattr_(int fd, const char *name) {
71 if (!(name = fbsd_extattr_skip_prefix(name)))
72 return -1;
73 return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name);
74 }
75
76 /* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */
77 #include <sys/syscall.h>
78 extern "C" {
79 extern int __sys_kmq_notify(int, const struct sigevent *);
80 extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *);
81 extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict);
82 extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t,
83 unsigned *__restrict, const struct timespec *__restrict);
84 extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned,
85 const struct timespec *);
86 extern int __sys_kmq_unlink(const char *);
87 }
88 #define mq_notify_ __sys_kmq_notify
89 #define mq_open_ __sys_kmq_open
90 #define mq_setattr_ __sys_kmq_setattr
91 #define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B)
92 #define mq_timedreceive_ __sys_kmq_timedreceive
93 #define mq_timedsend_ __sys_kmq_timedsend
94 #define mq_unlink_ __sys_kmq_unlink
95 #define mq_close_ close
96 #include <sys/ptrace.h>
ptrace_(int request,pid_t pid,void * addr,void * data)97 inline long ptrace_(int request, pid_t pid, void *addr, void *data) {
98 return ptrace(request, pid, (caddr_t)addr, static_cast<int>((long)data));
99 }
100 #define PTRACE_PEEKDATA_ PT_READ_D
101 #define getegid_ getegid
102 #define getgid_ getgid
103 #define geteuid_ geteuid
104 #define getuid_ getuid
105 #define getgroups_ getgroups
106 #define getrlimit_ getrlimit
107 #define bind_ bind
108 #define connect_ connect
109
110 /* Features available */
111 #if __FreeBSD_version >= 1000000
112 #define HAVE_CHFLAGSAT
113 #define HAVE_BINDAT
114 #define HAVE_CONNECTAT
115 #endif
116 #define HAVE_CHFLAGS
117 #define HAVE_GETFSSTAT
118 #define HAVE_REVOKE
119 #define HAVE_GETLOGIN
120 #define HAVE_MKFIFOAT
121 #define HAVE_SYSARCH
122 #include <machine/sysarch.h>
123 #define HAVE_STAT_BIRTHTIME
124 #define HAVE_SYSCTL
125 #define HAVE_FPATHCONF
126 #define HAVE_F_DUP2FD
127 #define HAVE_PSELECT
128 #define HAVE_SCTP
129
130 /* FreeBSD only allows root to call mlock[all]/munlock[all] */
131 #define MLOCK_REQUIRES_ROOT 1
132 /* FreeBSD effectively only allows root to call sched_setscheduler */
133 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
134
135 #endif /* FreeBSD */
136
137 /************************************************************
138 * Linux
139 ************************************************************/
140 #ifdef __linux__
141 #include <fcntl.h>
142 #include <unistd.h>
143 #include <sys/prctl.h>
144 #include <sys/syscall.h>
145 #include <sys/types.h>
146 #include <sys/time.h>
147 #include <sys/resource.h>
148 #include <sys/wait.h>
149 #include <sys/sendfile.h>
150 #include <sys/statfs.h>
151 #include <sys/xattr.h>
152 #include <sys/mount.h>
153 #include <linux/net.h>
154
155 /* profil(2) has a first argument of unsigned short* */
156 #define profil_arg1_t unsigned short
157
getdents_(unsigned int fd,void * dirp,unsigned int count)158 static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
159 return syscall(__NR_getdents, fd, dirp, count);
160 }
161 /* A sample mount(2) call */
bogus_mount_()162 static inline int bogus_mount_() {
163 return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, "");
164 }
165
166 /* libc's getpid() wrapper caches the pid value, and doesn't invalidate
167 * the cached value on pdfork(), so directly syscall. */
getpid_()168 static inline pid_t getpid_() {
169 return syscall(__NR_getpid);
170 }
execveat(int fd,const char * path,char * const argv[],char * const envp[],int flags)171 static inline int execveat(int fd, const char *path,
172 char *const argv[], char *const envp[], int flags) {
173 return syscall(__NR_execveat, fd, path, argv, envp, flags);
174 }
175
176 /*
177 * Linux glibc includes an fexecve() function, implemented via the /proc
178 * filesystem. Bypass this and go directly to the execveat(2) syscall.
179 */
fexecve_(int fd,char * const argv[],char * const envp[])180 static inline int fexecve_(int fd, char *const argv[], char *const envp[]) {
181 return execveat(fd, "", argv, envp, AT_EMPTY_PATH);
182 }
183 /*
184 * Linux glibc attempts to be clever and intercepts various uid/gid functions.
185 * Bypass by calling the syscalls directly.
186 */
getegid_(void)187 static inline gid_t getegid_(void) { return syscall(__NR_getegid); }
getgid_(void)188 static inline gid_t getgid_(void) { return syscall(__NR_getgid); }
geteuid_(void)189 static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); }
getuid_(void)190 static inline uid_t getuid_(void) { return syscall(__NR_getuid); }
getgroups_(int size,gid_t list[])191 static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); }
getrlimit_(int resource,struct rlimit * rlim)192 static inline int getrlimit_(int resource, struct rlimit *rlim) {
193 return syscall(__NR_getrlimit, resource, rlim);
194 }
195
196 /*
197 * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation,
198 * so use the raw syscall for those operations that are disallowed in capability mode.
199 */
200 #ifdef __NR_bind
201 #define bind_ bind
202 #else
bind_(int sockfd,const struct sockaddr * addr,socklen_t addrlen)203 static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
204 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
205 return syscall(__NR_socketcall, SYS_BIND, args);
206 }
207 #endif
208 #ifdef __NR_connect
209 #define connect_ connect
210 #else
connect_(int sockfd,const struct sockaddr * addr,socklen_t addrlen)211 static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
212 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
213 return syscall(__NR_socketcall, SYS_CONNECT, args);
214 }
215 #endif
216
217 #define mincore_ mincore
218 #define sendfile_ sendfile
219 #define flistxattr_ flistxattr
220 #define fgetxattr_ fgetxattr
221 #define fsetxattr_ fsetxattr
222 #define fremovexattr_ fremovexattr
223 #define mq_notify_ mq_notify
224 #define mq_open_ mq_open
225 #define mq_setattr_ mq_setattr
226 #define mq_getattr_ mq_getattr
227 #define mq_timedreceive_ mq_timedreceive
228 #define mq_timedsend_ mq_timedsend
229 #define mq_unlink_ mq_unlink
230 #define mq_close_ mq_close
231 #define ptrace_ ptrace
232 #define PTRACE_PEEKDATA_ PTRACE_PEEKDATA
233
234 /* Features available */
235 #define HAVE_DUP3
236 #define HAVE_PIPE2
237 #include <sys/fsuid.h> /* for setfsgid()/setfsuid() */
238 #define HAVE_SETFSUID
239 #define HAVE_SETFSGID
240 #define HAVE_READAHEAD
241 #define HAVE_SEND_RECV_MMSG
242 #define HAVE_SYNCFS
243 #define HAVE_SYNC_FILE_RANGE
244 #include <sys/uio.h> /* for vmsplice */
245 #define HAVE_TEE
246 #define HAVE_SPLICE
247 #define HAVE_VMSPLICE
248 #define HAVE_PSELECT
249 #define HAVE_PPOLL
250 #define HAVE_EXECVEAT
251 #define HAVE_SYSCALL
252 #define HAVE_MKNOD_REG
253 #define HAVE_MKNOD_SOCKET
254 /*
255 * O_BENEATH is arch-specific, via <asm/fcntl.h>; however we cannot include both that file
256 * and the normal <fcntl.h> as they have some clashing definitions. Bypass by directly
257 * defining O_BENEATH, using the current proposed x86 value. (This will therefore not
258 * work for non-x86, and may need changing in future if a different value gets merged.)
259 */
260 #ifndef O_BENEATH
261 #define O_BENEATH 040000000 /* no / or .. in openat path */
262 #endif
263
264
265 /* Linux allows anyone to call mlock[all]/munlock[all] */
266 #define MLOCK_REQUIRES_ROOT 0
267 /* Linux allows anyone to call sched_setscheduler */
268 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
269
270 #endif /* Linux */
271
272 #endif /*__SYSCALLS_H__*/
273