xref: /linux/tools/testing/selftests/filesystems/utils.c (revision 2487b6b9bf2874cfca7efb59c95650c5b1d88d43)
1c68946eeSChristian Brauner // SPDX-License-Identifier: GPL-2.0
2c68946eeSChristian Brauner #ifndef _GNU_SOURCE
3c68946eeSChristian Brauner #define _GNU_SOURCE
4c68946eeSChristian Brauner #endif
5c68946eeSChristian Brauner #include <fcntl.h>
6c68946eeSChristian Brauner #include <sys/types.h>
7c68946eeSChristian Brauner #include <dirent.h>
8c68946eeSChristian Brauner #include <grp.h>
9c68946eeSChristian Brauner #include <linux/limits.h>
10c68946eeSChristian Brauner #include <sched.h>
11c68946eeSChristian Brauner #include <stdio.h>
12c68946eeSChristian Brauner #include <stdlib.h>
13c68946eeSChristian Brauner #include <sys/eventfd.h>
14c68946eeSChristian Brauner #include <sys/fsuid.h>
15c68946eeSChristian Brauner #include <sys/prctl.h>
16c68946eeSChristian Brauner #include <sys/socket.h>
17c68946eeSChristian Brauner #include <sys/stat.h>
18c68946eeSChristian Brauner #include <sys/types.h>
19c68946eeSChristian Brauner #include <sys/wait.h>
20c68946eeSChristian Brauner #include <sys/xattr.h>
21c68946eeSChristian Brauner 
22c68946eeSChristian Brauner #include "utils.h"
23c68946eeSChristian Brauner 
24c68946eeSChristian Brauner #define MAX_USERNS_LEVEL 32
25c68946eeSChristian Brauner 
26c68946eeSChristian Brauner #define syserror(format, ...)                           \
27c68946eeSChristian Brauner 	({                                              \
28c68946eeSChristian Brauner 		fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
29c68946eeSChristian Brauner 		(-errno);                               \
30c68946eeSChristian Brauner 	})
31c68946eeSChristian Brauner 
32c68946eeSChristian Brauner #define syserror_set(__ret__, format, ...)                    \
33c68946eeSChristian Brauner 	({                                                    \
34c68946eeSChristian Brauner 		typeof(__ret__) __internal_ret__ = (__ret__); \
35c68946eeSChristian Brauner 		errno = labs(__ret__);                        \
36c68946eeSChristian Brauner 		fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__);       \
37c68946eeSChristian Brauner 		__internal_ret__;                             \
38c68946eeSChristian Brauner 	})
39c68946eeSChristian Brauner 
40c68946eeSChristian Brauner #define STRLITERALLEN(x) (sizeof(""x"") - 1)
41c68946eeSChristian Brauner 
42c68946eeSChristian Brauner #define INTTYPE_TO_STRLEN(type)             \
43c68946eeSChristian Brauner 	(2 + (sizeof(type) <= 1             \
44c68946eeSChristian Brauner 		  ? 3                       \
45c68946eeSChristian Brauner 		  : sizeof(type) <= 2       \
46c68946eeSChristian Brauner 			? 5                 \
47c68946eeSChristian Brauner 			: sizeof(type) <= 4 \
48c68946eeSChristian Brauner 			      ? 10          \
49c68946eeSChristian Brauner 			      : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
50c68946eeSChristian Brauner 
51c68946eeSChristian Brauner #define list_for_each(__iterator, __list) \
52c68946eeSChristian Brauner 	for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
53c68946eeSChristian Brauner 
54c68946eeSChristian Brauner typedef enum idmap_type_t {
55c68946eeSChristian Brauner 	ID_TYPE_UID,
56c68946eeSChristian Brauner 	ID_TYPE_GID
57c68946eeSChristian Brauner } idmap_type_t;
58c68946eeSChristian Brauner 
59c68946eeSChristian Brauner struct id_map {
60c68946eeSChristian Brauner 	idmap_type_t map_type;
61c68946eeSChristian Brauner 	__u32 nsid;
62c68946eeSChristian Brauner 	__u32 hostid;
63c68946eeSChristian Brauner 	__u32 range;
64c68946eeSChristian Brauner };
65c68946eeSChristian Brauner 
66c68946eeSChristian Brauner struct list {
67c68946eeSChristian Brauner 	void *elem;
68c68946eeSChristian Brauner 	struct list *next;
69c68946eeSChristian Brauner 	struct list *prev;
70c68946eeSChristian Brauner };
71c68946eeSChristian Brauner 
72c68946eeSChristian Brauner struct userns_hierarchy {
73c68946eeSChristian Brauner 	int fd_userns;
74c68946eeSChristian Brauner 	int fd_event;
75c68946eeSChristian Brauner 	unsigned int level;
76c68946eeSChristian Brauner 	struct list id_map;
77c68946eeSChristian Brauner };
78c68946eeSChristian Brauner 
79c68946eeSChristian Brauner static inline void list_init(struct list *list)
80c68946eeSChristian Brauner {
81c68946eeSChristian Brauner 	list->elem = NULL;
82c68946eeSChristian Brauner 	list->next = list->prev = list;
83c68946eeSChristian Brauner }
84c68946eeSChristian Brauner 
85c68946eeSChristian Brauner static inline int list_empty(const struct list *list)
86c68946eeSChristian Brauner {
87c68946eeSChristian Brauner 	return list == list->next;
88c68946eeSChristian Brauner }
89c68946eeSChristian Brauner 
90c68946eeSChristian Brauner static inline void __list_add(struct list *new, struct list *prev, struct list *next)
91c68946eeSChristian Brauner {
92c68946eeSChristian Brauner 	next->prev = new;
93c68946eeSChristian Brauner 	new->next = next;
94c68946eeSChristian Brauner 	new->prev = prev;
95c68946eeSChristian Brauner 	prev->next = new;
96c68946eeSChristian Brauner }
97c68946eeSChristian Brauner 
98c68946eeSChristian Brauner static inline void list_add_tail(struct list *head, struct list *list)
99c68946eeSChristian Brauner {
100c68946eeSChristian Brauner 	__list_add(list, head->prev, head);
101c68946eeSChristian Brauner }
102c68946eeSChristian Brauner 
103c68946eeSChristian Brauner static inline void list_del(struct list *list)
104c68946eeSChristian Brauner {
105c68946eeSChristian Brauner 	struct list *next, *prev;
106c68946eeSChristian Brauner 
107c68946eeSChristian Brauner 	next = list->next;
108c68946eeSChristian Brauner 	prev = list->prev;
109c68946eeSChristian Brauner 	next->prev = prev;
110c68946eeSChristian Brauner 	prev->next = next;
111c68946eeSChristian Brauner }
112c68946eeSChristian Brauner 
113c68946eeSChristian Brauner static ssize_t read_nointr(int fd, void *buf, size_t count)
114c68946eeSChristian Brauner {
115c68946eeSChristian Brauner 	ssize_t ret;
116c68946eeSChristian Brauner 
117c68946eeSChristian Brauner 	do {
118c68946eeSChristian Brauner 		ret = read(fd, buf, count);
119c68946eeSChristian Brauner 	} while (ret < 0 && errno == EINTR);
120c68946eeSChristian Brauner 
121c68946eeSChristian Brauner 	return ret;
122c68946eeSChristian Brauner }
123c68946eeSChristian Brauner 
124c68946eeSChristian Brauner static ssize_t write_nointr(int fd, const void *buf, size_t count)
125c68946eeSChristian Brauner {
126c68946eeSChristian Brauner 	ssize_t ret;
127c68946eeSChristian Brauner 
128c68946eeSChristian Brauner 	do {
129c68946eeSChristian Brauner 		ret = write(fd, buf, count);
130c68946eeSChristian Brauner 	} while (ret < 0 && errno == EINTR);
131c68946eeSChristian Brauner 
132c68946eeSChristian Brauner 	return ret;
133c68946eeSChristian Brauner }
134c68946eeSChristian Brauner 
135c68946eeSChristian Brauner #define __STACK_SIZE (8 * 1024 * 1024)
136c68946eeSChristian Brauner static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
137c68946eeSChristian Brauner {
138c68946eeSChristian Brauner 	void *stack;
139c68946eeSChristian Brauner 
140c68946eeSChristian Brauner 	stack = malloc(__STACK_SIZE);
141c68946eeSChristian Brauner 	if (!stack)
142c68946eeSChristian Brauner 		return -ENOMEM;
143c68946eeSChristian Brauner 
144c68946eeSChristian Brauner #ifdef __ia64__
145c68946eeSChristian Brauner 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
146c68946eeSChristian Brauner #else
147c68946eeSChristian Brauner 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
148c68946eeSChristian Brauner #endif
149c68946eeSChristian Brauner }
150c68946eeSChristian Brauner 
151c68946eeSChristian Brauner static int get_userns_fd_cb(void *data)
152c68946eeSChristian Brauner {
153c68946eeSChristian Brauner 	for (;;)
154c68946eeSChristian Brauner 		pause();
155c68946eeSChristian Brauner 	_exit(0);
156c68946eeSChristian Brauner }
157c68946eeSChristian Brauner 
158c68946eeSChristian Brauner static int wait_for_pid(pid_t pid)
159c68946eeSChristian Brauner {
160c68946eeSChristian Brauner 	int status, ret;
161c68946eeSChristian Brauner 
162c68946eeSChristian Brauner again:
163c68946eeSChristian Brauner 	ret = waitpid(pid, &status, 0);
164c68946eeSChristian Brauner 	if (ret == -1) {
165c68946eeSChristian Brauner 		if (errno == EINTR)
166c68946eeSChristian Brauner 			goto again;
167c68946eeSChristian Brauner 
168c68946eeSChristian Brauner 		return -1;
169c68946eeSChristian Brauner 	}
170c68946eeSChristian Brauner 
171c68946eeSChristian Brauner 	if (!WIFEXITED(status))
172c68946eeSChristian Brauner 		return -1;
173c68946eeSChristian Brauner 
174c68946eeSChristian Brauner 	return WEXITSTATUS(status);
175c68946eeSChristian Brauner }
176c68946eeSChristian Brauner 
177c68946eeSChristian Brauner static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
178c68946eeSChristian Brauner {
179c68946eeSChristian Brauner 	int fd = -EBADF, setgroups_fd = -EBADF;
180c68946eeSChristian Brauner 	int fret = -1;
181c68946eeSChristian Brauner 	int ret;
182c68946eeSChristian Brauner 	char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
183c68946eeSChristian Brauner 		  STRLITERALLEN("/setgroups") + 1];
184c68946eeSChristian Brauner 
185c68946eeSChristian Brauner 	if (geteuid() != 0 && map_type == ID_TYPE_GID) {
186c68946eeSChristian Brauner 		ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
187c68946eeSChristian Brauner 		if (ret < 0 || ret >= sizeof(path))
188c68946eeSChristian Brauner 			goto out;
189c68946eeSChristian Brauner 
190c68946eeSChristian Brauner 		setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
191c68946eeSChristian Brauner 		if (setgroups_fd < 0 && errno != ENOENT) {
192c68946eeSChristian Brauner 			syserror("Failed to open \"%s\"", path);
193c68946eeSChristian Brauner 			goto out;
194c68946eeSChristian Brauner 		}
195c68946eeSChristian Brauner 
196c68946eeSChristian Brauner 		if (setgroups_fd >= 0) {
197c68946eeSChristian Brauner 			ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
198c68946eeSChristian Brauner 			if (ret != STRLITERALLEN("deny\n")) {
199c68946eeSChristian Brauner 				syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
200c68946eeSChristian Brauner 				goto out;
201c68946eeSChristian Brauner 			}
202c68946eeSChristian Brauner 		}
203c68946eeSChristian Brauner 	}
204c68946eeSChristian Brauner 
205c68946eeSChristian Brauner 	ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
206c68946eeSChristian Brauner 	if (ret < 0 || ret >= sizeof(path))
207c68946eeSChristian Brauner 		goto out;
208c68946eeSChristian Brauner 
209c68946eeSChristian Brauner 	fd = open(path, O_WRONLY | O_CLOEXEC);
210c68946eeSChristian Brauner 	if (fd < 0) {
211c68946eeSChristian Brauner 		syserror("Failed to open \"%s\"", path);
212c68946eeSChristian Brauner 		goto out;
213c68946eeSChristian Brauner 	}
214c68946eeSChristian Brauner 
215c68946eeSChristian Brauner 	ret = write_nointr(fd, buf, buf_size);
216c68946eeSChristian Brauner 	if (ret != buf_size) {
217c68946eeSChristian Brauner 		syserror("Failed to write %cid mapping to \"%s\"",
218c68946eeSChristian Brauner 			 map_type == ID_TYPE_UID ? 'u' : 'g', path);
219c68946eeSChristian Brauner 		goto out;
220c68946eeSChristian Brauner 	}
221c68946eeSChristian Brauner 
222c68946eeSChristian Brauner 	fret = 0;
223c68946eeSChristian Brauner out:
224c68946eeSChristian Brauner 	close(fd);
225c68946eeSChristian Brauner 	close(setgroups_fd);
226c68946eeSChristian Brauner 
227c68946eeSChristian Brauner 	return fret;
228c68946eeSChristian Brauner }
229c68946eeSChristian Brauner 
230c68946eeSChristian Brauner static int map_ids_from_idmap(struct list *idmap, pid_t pid)
231c68946eeSChristian Brauner {
232c68946eeSChristian Brauner 	int fill, left;
233c68946eeSChristian Brauner 	char mapbuf[4096] = {};
234c68946eeSChristian Brauner 	bool had_entry = false;
235c68946eeSChristian Brauner 	idmap_type_t map_type, u_or_g;
236c68946eeSChristian Brauner 
237c68946eeSChristian Brauner 	if (list_empty(idmap))
238c68946eeSChristian Brauner 		return 0;
239c68946eeSChristian Brauner 
240c68946eeSChristian Brauner 	for (map_type = ID_TYPE_UID, u_or_g = 'u';
241c68946eeSChristian Brauner 	     map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
242c68946eeSChristian Brauner 		char *pos = mapbuf;
243c68946eeSChristian Brauner 		int ret;
244c68946eeSChristian Brauner 		struct list *iterator;
245c68946eeSChristian Brauner 
246c68946eeSChristian Brauner 
247c68946eeSChristian Brauner 		list_for_each(iterator, idmap) {
248c68946eeSChristian Brauner 			struct id_map *map = iterator->elem;
249c68946eeSChristian Brauner 			if (map->map_type != map_type)
250c68946eeSChristian Brauner 				continue;
251c68946eeSChristian Brauner 
252c68946eeSChristian Brauner 			had_entry = true;
253c68946eeSChristian Brauner 
254c68946eeSChristian Brauner 			left = 4096 - (pos - mapbuf);
255c68946eeSChristian Brauner 			fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
256c68946eeSChristian Brauner 			/*
257c68946eeSChristian Brauner 			 * The kernel only takes <= 4k for writes to
258c68946eeSChristian Brauner 			 * /proc/<pid>/{g,u}id_map
259c68946eeSChristian Brauner 			 */
260c68946eeSChristian Brauner 			if (fill <= 0 || fill >= left)
261c68946eeSChristian Brauner 				return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
262c68946eeSChristian Brauner 
263c68946eeSChristian Brauner 			pos += fill;
264c68946eeSChristian Brauner 		}
265c68946eeSChristian Brauner 		if (!had_entry)
266c68946eeSChristian Brauner 			continue;
267c68946eeSChristian Brauner 
268c68946eeSChristian Brauner 		ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
269c68946eeSChristian Brauner 		if (ret < 0)
270c68946eeSChristian Brauner 			return syserror("Failed to write mapping: %s", mapbuf);
271c68946eeSChristian Brauner 
272c68946eeSChristian Brauner 		memset(mapbuf, 0, sizeof(mapbuf));
273c68946eeSChristian Brauner 	}
274c68946eeSChristian Brauner 
275c68946eeSChristian Brauner 	return 0;
276c68946eeSChristian Brauner }
277c68946eeSChristian Brauner 
278c68946eeSChristian Brauner static int get_userns_fd_from_idmap(struct list *idmap)
279c68946eeSChristian Brauner {
280c68946eeSChristian Brauner 	int ret;
281c68946eeSChristian Brauner 	pid_t pid;
282c68946eeSChristian Brauner 	char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
283c68946eeSChristian Brauner 		     STRLITERALLEN("/ns/user") + 1];
284c68946eeSChristian Brauner 
285c68946eeSChristian Brauner 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
286c68946eeSChristian Brauner 	if (pid < 0)
287c68946eeSChristian Brauner 		return -errno;
288c68946eeSChristian Brauner 
289c68946eeSChristian Brauner 	ret = map_ids_from_idmap(idmap, pid);
290c68946eeSChristian Brauner 	if (ret < 0)
291c68946eeSChristian Brauner 		return ret;
292c68946eeSChristian Brauner 
293c68946eeSChristian Brauner 	ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
294c68946eeSChristian Brauner 	if (ret < 0 || (size_t)ret >= sizeof(path_ns))
295c68946eeSChristian Brauner 		ret = -EIO;
296c68946eeSChristian Brauner 	else
297c68946eeSChristian Brauner 		ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
298c68946eeSChristian Brauner 
299c68946eeSChristian Brauner 	(void)kill(pid, SIGKILL);
300c68946eeSChristian Brauner 	(void)wait_for_pid(pid);
301c68946eeSChristian Brauner 	return ret;
302c68946eeSChristian Brauner }
303c68946eeSChristian Brauner 
304c68946eeSChristian Brauner int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
305c68946eeSChristian Brauner {
306c68946eeSChristian Brauner 	struct list head, uid_mapl, gid_mapl;
307c68946eeSChristian Brauner 	struct id_map uid_map = {
308c68946eeSChristian Brauner 		.map_type	= ID_TYPE_UID,
309c68946eeSChristian Brauner 		.nsid		= nsid,
310c68946eeSChristian Brauner 		.hostid		= hostid,
311c68946eeSChristian Brauner 		.range		= range,
312c68946eeSChristian Brauner 	};
313c68946eeSChristian Brauner 	struct id_map gid_map = {
314c68946eeSChristian Brauner 		.map_type	= ID_TYPE_GID,
315c68946eeSChristian Brauner 		.nsid		= nsid,
316c68946eeSChristian Brauner 		.hostid		= hostid,
317c68946eeSChristian Brauner 		.range		= range,
318c68946eeSChristian Brauner 	};
319c68946eeSChristian Brauner 
320c68946eeSChristian Brauner 	list_init(&head);
321c68946eeSChristian Brauner 	uid_mapl.elem = &uid_map;
322c68946eeSChristian Brauner 	gid_mapl.elem = &gid_map;
323c68946eeSChristian Brauner 	list_add_tail(&head, &uid_mapl);
324c68946eeSChristian Brauner 	list_add_tail(&head, &gid_mapl);
325c68946eeSChristian Brauner 
326c68946eeSChristian Brauner 	return get_userns_fd_from_idmap(&head);
327c68946eeSChristian Brauner }
328c68946eeSChristian Brauner 
329c68946eeSChristian Brauner bool switch_ids(uid_t uid, gid_t gid)
330c68946eeSChristian Brauner {
331c68946eeSChristian Brauner 	if (setgroups(0, NULL))
332c68946eeSChristian Brauner 		return syserror("failure: setgroups");
333c68946eeSChristian Brauner 
334c68946eeSChristian Brauner 	if (setresgid(gid, gid, gid))
335c68946eeSChristian Brauner 		return syserror("failure: setresgid");
336c68946eeSChristian Brauner 
337c68946eeSChristian Brauner 	if (setresuid(uid, uid, uid))
338c68946eeSChristian Brauner 		return syserror("failure: setresuid");
339c68946eeSChristian Brauner 
340c68946eeSChristian Brauner 	/* Ensure we can access proc files from processes we can ptrace. */
341c68946eeSChristian Brauner 	if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
342c68946eeSChristian Brauner 		return syserror("failure: make dumpable");
343c68946eeSChristian Brauner 
344c68946eeSChristian Brauner 	return true;
345c68946eeSChristian Brauner }
346c68946eeSChristian Brauner 
347c68946eeSChristian Brauner static int create_userns_hierarchy(struct userns_hierarchy *h);
348c68946eeSChristian Brauner 
349c68946eeSChristian Brauner static int userns_fd_cb(void *data)
350c68946eeSChristian Brauner {
351c68946eeSChristian Brauner 	struct userns_hierarchy *h = data;
352c68946eeSChristian Brauner 	char c;
353c68946eeSChristian Brauner 	int ret;
354c68946eeSChristian Brauner 
355c68946eeSChristian Brauner 	ret = read_nointr(h->fd_event, &c, 1);
356c68946eeSChristian Brauner 	if (ret < 0)
357c68946eeSChristian Brauner 		return syserror("failure: read from socketpair");
358c68946eeSChristian Brauner 
359c68946eeSChristian Brauner 	/* Only switch ids if someone actually wrote a mapping for us. */
360c68946eeSChristian Brauner 	if (c == '1') {
361c68946eeSChristian Brauner 		if (!switch_ids(0, 0))
362c68946eeSChristian Brauner 			return syserror("failure: switch ids to 0");
363c68946eeSChristian Brauner 	}
364c68946eeSChristian Brauner 
365c68946eeSChristian Brauner 	ret = write_nointr(h->fd_event, "1", 1);
366c68946eeSChristian Brauner 	if (ret < 0)
367c68946eeSChristian Brauner 		return syserror("failure: write to socketpair");
368c68946eeSChristian Brauner 
369c68946eeSChristian Brauner 	ret = create_userns_hierarchy(++h);
370c68946eeSChristian Brauner 	if (ret < 0)
371c68946eeSChristian Brauner 		return syserror("failure: userns level %d", h->level);
372c68946eeSChristian Brauner 
373c68946eeSChristian Brauner 	return 0;
374c68946eeSChristian Brauner }
375c68946eeSChristian Brauner 
376c68946eeSChristian Brauner static int create_userns_hierarchy(struct userns_hierarchy *h)
377c68946eeSChristian Brauner {
378c68946eeSChristian Brauner 	int fret = -1;
379c68946eeSChristian Brauner 	char c;
380c68946eeSChristian Brauner 	int fd_socket[2];
381c68946eeSChristian Brauner 	int fd_userns = -EBADF, ret = -1;
382c68946eeSChristian Brauner 	ssize_t bytes;
383c68946eeSChristian Brauner 	pid_t pid;
384c68946eeSChristian Brauner 	char path[256];
385c68946eeSChristian Brauner 
386c68946eeSChristian Brauner 	if (h->level == MAX_USERNS_LEVEL)
387c68946eeSChristian Brauner 		return 0;
388c68946eeSChristian Brauner 
389c68946eeSChristian Brauner 	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
390c68946eeSChristian Brauner 	if (ret < 0)
391c68946eeSChristian Brauner 		return syserror("failure: create socketpair");
392c68946eeSChristian Brauner 
393c68946eeSChristian Brauner 	/* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
394c68946eeSChristian Brauner 	h->fd_event = fd_socket[1];
395c68946eeSChristian Brauner 	pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
396c68946eeSChristian Brauner 	if (pid < 0) {
397c68946eeSChristian Brauner 		syserror("failure: userns level %d", h->level);
398c68946eeSChristian Brauner 		goto out_close;
399c68946eeSChristian Brauner 	}
400c68946eeSChristian Brauner 
401c68946eeSChristian Brauner 	ret = map_ids_from_idmap(&h->id_map, pid);
402c68946eeSChristian Brauner 	if (ret < 0) {
403c68946eeSChristian Brauner 		kill(pid, SIGKILL);
404c68946eeSChristian Brauner 		syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
405c68946eeSChristian Brauner 		goto out_wait;
406c68946eeSChristian Brauner 	}
407c68946eeSChristian Brauner 
408c68946eeSChristian Brauner 	if (!list_empty(&h->id_map))
409c68946eeSChristian Brauner 		bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
410c68946eeSChristian Brauner 	else
411c68946eeSChristian Brauner 		bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
412c68946eeSChristian Brauner 	if (bytes < 0) {
413c68946eeSChristian Brauner 		kill(pid, SIGKILL);
414c68946eeSChristian Brauner 		syserror("failure: write to socketpair");
415c68946eeSChristian Brauner 		goto out_wait;
416c68946eeSChristian Brauner 	}
417c68946eeSChristian Brauner 
418c68946eeSChristian Brauner 	/* Wait for child to set*id() and become dumpable. */
419c68946eeSChristian Brauner 	bytes = read_nointr(fd_socket[0], &c, 1);
420c68946eeSChristian Brauner 	if (bytes < 0) {
421c68946eeSChristian Brauner 		kill(pid, SIGKILL);
422c68946eeSChristian Brauner 		syserror("failure: read from socketpair");
423c68946eeSChristian Brauner 		goto out_wait;
424c68946eeSChristian Brauner 	}
425c68946eeSChristian Brauner 
426c68946eeSChristian Brauner 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
427c68946eeSChristian Brauner 	fd_userns = open(path, O_RDONLY | O_CLOEXEC);
428c68946eeSChristian Brauner 	if (fd_userns < 0) {
429c68946eeSChristian Brauner 		kill(pid, SIGKILL);
430c68946eeSChristian Brauner 		syserror("failure: open userns level %d for %d", h->level, pid);
431c68946eeSChristian Brauner 		goto out_wait;
432c68946eeSChristian Brauner 	}
433c68946eeSChristian Brauner 
434c68946eeSChristian Brauner 	fret = 0;
435c68946eeSChristian Brauner 
436c68946eeSChristian Brauner out_wait:
437c68946eeSChristian Brauner 	if (!wait_for_pid(pid) && !fret) {
438c68946eeSChristian Brauner 		h->fd_userns = fd_userns;
439c68946eeSChristian Brauner 		fd_userns = -EBADF;
440c68946eeSChristian Brauner 	}
441c68946eeSChristian Brauner 
442c68946eeSChristian Brauner out_close:
443c68946eeSChristian Brauner 	if (fd_userns >= 0)
444c68946eeSChristian Brauner 		close(fd_userns);
445c68946eeSChristian Brauner 	close(fd_socket[0]);
446c68946eeSChristian Brauner 	close(fd_socket[1]);
447c68946eeSChristian Brauner 	return fret;
448c68946eeSChristian Brauner }
449c68946eeSChristian Brauner 
450c68946eeSChristian Brauner /* caps_down - lower all effective caps */
451c68946eeSChristian Brauner int caps_down(void)
452c68946eeSChristian Brauner {
453c68946eeSChristian Brauner 	bool fret = false;
454c68946eeSChristian Brauner 	cap_t caps = NULL;
455c68946eeSChristian Brauner 	int ret = -1;
456c68946eeSChristian Brauner 
457c68946eeSChristian Brauner 	caps = cap_get_proc();
458c68946eeSChristian Brauner 	if (!caps)
459c68946eeSChristian Brauner 		goto out;
460c68946eeSChristian Brauner 
461c68946eeSChristian Brauner 	ret = cap_clear_flag(caps, CAP_EFFECTIVE);
462c68946eeSChristian Brauner 	if (ret)
463c68946eeSChristian Brauner 		goto out;
464c68946eeSChristian Brauner 
465c68946eeSChristian Brauner 	ret = cap_set_proc(caps);
466c68946eeSChristian Brauner 	if (ret)
467c68946eeSChristian Brauner 		goto out;
468c68946eeSChristian Brauner 
469c68946eeSChristian Brauner 	fret = true;
470c68946eeSChristian Brauner 
471c68946eeSChristian Brauner out:
472c68946eeSChristian Brauner 	cap_free(caps);
473c68946eeSChristian Brauner 	return fret;
474c68946eeSChristian Brauner }
475*a1579f6bSChristian Brauner 
476*a1579f6bSChristian Brauner /* cap_down - lower an effective cap */
477*a1579f6bSChristian Brauner int cap_down(cap_value_t down)
478*a1579f6bSChristian Brauner {
479*a1579f6bSChristian Brauner 	bool fret = false;
480*a1579f6bSChristian Brauner 	cap_t caps = NULL;
481*a1579f6bSChristian Brauner 	cap_value_t cap = down;
482*a1579f6bSChristian Brauner 	int ret = -1;
483*a1579f6bSChristian Brauner 
484*a1579f6bSChristian Brauner 	caps = cap_get_proc();
485*a1579f6bSChristian Brauner 	if (!caps)
486*a1579f6bSChristian Brauner 		goto out;
487*a1579f6bSChristian Brauner 
488*a1579f6bSChristian Brauner 	ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0);
489*a1579f6bSChristian Brauner 	if (ret)
490*a1579f6bSChristian Brauner 		goto out;
491*a1579f6bSChristian Brauner 
492*a1579f6bSChristian Brauner 	ret = cap_set_proc(caps);
493*a1579f6bSChristian Brauner 	if (ret)
494*a1579f6bSChristian Brauner 		goto out;
495*a1579f6bSChristian Brauner 
496*a1579f6bSChristian Brauner 	fret = true;
497*a1579f6bSChristian Brauner 
498*a1579f6bSChristian Brauner out:
499*a1579f6bSChristian Brauner 	cap_free(caps);
500*a1579f6bSChristian Brauner 	return fret;
501*a1579f6bSChristian Brauner }
502