xref: /freebsd/contrib/capsicum-test/syscalls.h (revision 9729f076e4d93c5a37e78d427bfe0f1ab99bbcc6)
1 /*
2  * Minimal portability layer for system call differences between
3  * Capsicum OSes.
4  */
5 #ifndef __SYSCALLS_H__
6 #define __SYSCALLS_H__
7 
8 /************************************************************
9  * FreeBSD
10  ************************************************************/
11 #ifdef __FreeBSD__
12 
13 /* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */
14 #define umount2(T, F) unmount(T, F)
15 
16 /* Map sighandler_y (Linux) to sig_t (FreeBSD) */
17 #define sighandler_t sig_t
18 
19 /* profil(2) has a first argument of char* */
20 #define profil_arg1_t char
21 
22 /* FreeBSD has getdents(2) available */
23 #include <sys/types.h>
24 #include <dirent.h>
25 inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
26   return getdents(fd, (char*)dirp, count);
27 }
28 #include <sys/mman.h>
29 inline int mincore_(void *addr, size_t length, unsigned char *vec) {
30   return mincore(addr, length, (char*)vec);
31 }
32 #define getpid_ getpid
33 
34 /* Map Linux-style sendfile to FreeBSD sendfile */
35 #include <sys/socket.h>
36 #include <sys/uio.h>
37 inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) {
38   return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0);
39 }
40 
41 /* A sample mount(2) call */
42 #include <sys/param.h>
43 #include <sys/mount.h>
44 inline int bogus_mount_() {
45   return mount("procfs", "/not_mounted", 0, NULL);
46 }
47 
48 /* Mappings for extended attribute functions */
49 #include <sys/extattr.h>
50 #include <errno.h>
51 static const char *fbsd_extattr_skip_prefix(const char *p) {
52   if (*p++ == 'u' && *p++ == 's' && *p++ == 'e' && *p++ == 'r' && *p++ == '.')
53     return p;
54   errno = EINVAL;
55   return NULL;
56 }
57 inline ssize_t flistxattr_(int fd, char *list, size_t size) {
58   return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size);
59 }
60 inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) {
61   if (!(name = fbsd_extattr_skip_prefix(name)))
62     return -1;
63   return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
64 }
65 inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int) {
66   if (!(name = fbsd_extattr_skip_prefix(name)))
67     return -1;
68   return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
69 }
70 inline int fremovexattr_(int fd, const char *name) {
71   if (!(name = fbsd_extattr_skip_prefix(name)))
72     return -1;
73   return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name);
74 }
75 
76 /* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */
77 #include <sys/syscall.h>
78 extern "C" {
79 extern int __sys_kmq_notify(int, const struct sigevent *);
80 extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *);
81 extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict);
82 extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t,
83                                       unsigned *__restrict, const struct timespec *__restrict);
84 extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned,
85                                const struct timespec *);
86 extern int  __sys_kmq_unlink(const char *);
87 }
88 #define mq_notify_ __sys_kmq_notify
89 #define mq_open_ __sys_kmq_open
90 #define mq_setattr_ __sys_kmq_setattr
91 #define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B)
92 #define mq_timedreceive_ __sys_kmq_timedreceive
93 #define mq_timedsend_ __sys_kmq_timedsend
94 #define mq_unlink_ __sys_kmq_unlink
95 #define mq_close_ close
96 #include <sys/ptrace.h>
97 inline long ptrace_(int request, pid_t pid, void *addr, void *data) {
98   return ptrace(request, pid, (caddr_t)addr, static_cast<int>((long)data));
99 }
100 #define PTRACE_PEEKDATA_ PT_READ_D
101 #define getegid_ getegid
102 #define getgid_ getgid
103 #define geteuid_ geteuid
104 #define getuid_ getuid
105 #define getgroups_ getgroups
106 #define getrlimit_ getrlimit
107 #define bind_ bind
108 #define connect_ connect
109 
110 /* Features available */
111 #if __FreeBSD_version >= 1000000
112 #define HAVE_CHFLAGSAT
113 #define HAVE_BINDAT
114 #define HAVE_CONNECTAT
115 #endif
116 #define HAVE_CHFLAGS
117 #define HAVE_GETFSSTAT
118 #define HAVE_REVOKE
119 #define HAVE_GETLOGIN
120 #define HAVE_MKFIFOAT
121 #define HAVE_SYSARCH
122 #include <machine/sysarch.h>
123 #define HAVE_STAT_BIRTHTIME
124 #define HAVE_SYSCTL
125 #define HAVE_FPATHCONF
126 #define HAVE_F_DUP2FD
127 #define HAVE_PSELECT
128 #define HAVE_SCTP
129 
130 /* FreeBSD only allows root to call mlock[all]/munlock[all] */
131 #define MLOCK_REQUIRES_ROOT 1
132 /* FreeBSD effectively only allows root to call sched_setscheduler */
133 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
134 
135 #endif  /* FreeBSD */
136 
137 /************************************************************
138  * Linux
139  ************************************************************/
140 #ifdef __linux__
141 #include <fcntl.h>
142 #include <unistd.h>
143 #include <sys/prctl.h>
144 #include <sys/syscall.h>
145 #include <sys/types.h>
146 #include <sys/time.h>
147 #include <sys/resource.h>
148 #include <sys/wait.h>
149 #include <sys/sendfile.h>
150 #include <sys/statfs.h>
151 #include <sys/xattr.h>
152 #include <sys/mount.h>
153 #include <linux/net.h>
154 
155 /* profil(2) has a first argument of unsigned short* */
156 #define profil_arg1_t unsigned short
157 
158 static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
159   return syscall(__NR_getdents, fd, dirp, count);
160 }
161 /* A sample mount(2) call */
162 static inline int bogus_mount_() {
163   return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, "");
164 }
165 
166 /* libc's getpid() wrapper caches the pid value, and doesn't invalidate
167  * the cached value on pdfork(), so directly syscall. */
168 static inline pid_t getpid_() {
169   return syscall(__NR_getpid);
170 }
171 static inline int execveat(int fd, const char *path,
172                            char *const argv[], char *const envp[], int flags) {
173   return syscall(__NR_execveat, fd, path, argv, envp, flags);
174 }
175 
176 /*
177  * Linux glibc includes an fexecve() function, implemented via the /proc
178  * filesystem.  Bypass this and go directly to the execveat(2) syscall.
179  */
180 static inline int fexecve_(int fd, char *const argv[], char *const envp[]) {
181   return execveat(fd, "", argv, envp, AT_EMPTY_PATH);
182 }
183 /*
184  * Linux glibc attempts to be clever and intercepts various uid/gid functions.
185  * Bypass by calling the syscalls directly.
186  */
187 static inline gid_t getegid_(void) { return syscall(__NR_getegid); }
188 static inline gid_t getgid_(void) { return syscall(__NR_getgid); }
189 static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); }
190 static inline uid_t getuid_(void) { return syscall(__NR_getuid); }
191 static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); }
192 static inline int getrlimit_(int resource, struct rlimit *rlim) {
193   return syscall(__NR_getrlimit, resource, rlim);
194 }
195 
196 /*
197  * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation,
198  * so use the raw syscall for those operations that are disallowed in capability mode.
199  */
200 #ifdef __NR_bind
201 #define bind_ bind
202 #else
203 static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
204   unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
205   return syscall(__NR_socketcall, SYS_BIND, args);
206 }
207 #endif
208 #ifdef __NR_connect
209 #define connect_ connect
210 #else
211 static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
212   unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
213   return syscall(__NR_socketcall, SYS_CONNECT, args);
214 }
215 #endif
216 
217 #define mincore_ mincore
218 #define sendfile_ sendfile
219 #define flistxattr_ flistxattr
220 #define fgetxattr_ fgetxattr
221 #define fsetxattr_ fsetxattr
222 #define fremovexattr_ fremovexattr
223 #define mq_notify_ mq_notify
224 #define mq_open_ mq_open
225 #define mq_setattr_ mq_setattr
226 #define mq_getattr_ mq_getattr
227 #define mq_timedreceive_ mq_timedreceive
228 #define mq_timedsend_ mq_timedsend
229 #define mq_unlink_ mq_unlink
230 #define mq_close_ mq_close
231 #define ptrace_ ptrace
232 #define PTRACE_PEEKDATA_ PTRACE_PEEKDATA
233 
234 /* Features available */
235 #define HAVE_DUP3
236 #define HAVE_PIPE2
237 #include <sys/fsuid.h>  /* for setfsgid()/setfsuid() */
238 #define HAVE_SETFSUID
239 #define HAVE_SETFSGID
240 #define HAVE_READAHEAD
241 #define HAVE_SEND_RECV_MMSG
242 #define HAVE_SYNCFS
243 #define HAVE_SYNC_FILE_RANGE
244 #include <sys/uio.h>  /* for vmsplice */
245 #define HAVE_TEE
246 #define HAVE_SPLICE
247 #define HAVE_VMSPLICE
248 #define HAVE_PSELECT
249 #define HAVE_PPOLL
250 #define HAVE_EXECVEAT
251 #define HAVE_SYSCALL
252 #define HAVE_MKNOD_REG
253 #define HAVE_MKNOD_SOCKET
254 /*
255  * O_BENEATH is arch-specific, via <asm/fcntl.h>; however we cannot include both that file
256  * and the normal <fcntl.h> as they have some clashing definitions.  Bypass by directly
257  * defining O_BENEATH, using the current proposed x86 value.  (This will therefore not
258  * work for non-x86, and may need changing in future if a different value gets merged.)
259  */
260 #ifndef O_BENEATH
261 #define O_BENEATH	040000000	/* no / or .. in openat path */
262 #endif
263 
264 
265 /* Linux allows anyone to call mlock[all]/munlock[all] */
266 #define MLOCK_REQUIRES_ROOT 0
267 /* Linux allows anyone to call sched_setscheduler */
268 #define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
269 
270 #endif  /* Linux */
271 
272 #endif /*__SYSCALLS_H__*/
273