xref: /linux/kernel/bpf/helpers.c (revision 88a8e278ff0b6b461bf39d4ace17384e976a3f3f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <linux/bpf.h>
5 #include <linux/rcupdate.h>
6 #include <linux/random.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/ktime.h>
10 #include <linux/sched.h>
11 #include <linux/uidgid.h>
12 #include <linux/filter.h>
13 #include <linux/ctype.h>
14 #include <linux/jiffies.h>
15 #include <linux/pid_namespace.h>
16 #include <linux/proc_ns.h>
17 
18 #include "../../lib/kstrtox.h"
19 
20 /* If kernel subsystem is allowing eBPF programs to call this function,
21  * inside its own verifier_ops->get_func_proto() callback it should return
22  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
23  *
24  * Different map implementations will rely on rcu in map methods
25  * lookup/update/delete, therefore eBPF programs must run under rcu lock
26  * if program is allowed to access maps, so check rcu_read_lock_held in
27  * all three functions.
28  */
29 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
30 {
31 	WARN_ON_ONCE(!rcu_read_lock_held());
32 	return (unsigned long) map->ops->map_lookup_elem(map, key);
33 }
34 
35 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
36 	.func		= bpf_map_lookup_elem,
37 	.gpl_only	= false,
38 	.pkt_access	= true,
39 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
40 	.arg1_type	= ARG_CONST_MAP_PTR,
41 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
42 };
43 
44 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
45 	   void *, value, u64, flags)
46 {
47 	WARN_ON_ONCE(!rcu_read_lock_held());
48 	return map->ops->map_update_elem(map, key, value, flags);
49 }
50 
51 const struct bpf_func_proto bpf_map_update_elem_proto = {
52 	.func		= bpf_map_update_elem,
53 	.gpl_only	= false,
54 	.pkt_access	= true,
55 	.ret_type	= RET_INTEGER,
56 	.arg1_type	= ARG_CONST_MAP_PTR,
57 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
58 	.arg3_type	= ARG_PTR_TO_MAP_VALUE,
59 	.arg4_type	= ARG_ANYTHING,
60 };
61 
62 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
63 {
64 	WARN_ON_ONCE(!rcu_read_lock_held());
65 	return map->ops->map_delete_elem(map, key);
66 }
67 
68 const struct bpf_func_proto bpf_map_delete_elem_proto = {
69 	.func		= bpf_map_delete_elem,
70 	.gpl_only	= false,
71 	.pkt_access	= true,
72 	.ret_type	= RET_INTEGER,
73 	.arg1_type	= ARG_CONST_MAP_PTR,
74 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
75 };
76 
77 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
78 {
79 	return map->ops->map_push_elem(map, value, flags);
80 }
81 
82 const struct bpf_func_proto bpf_map_push_elem_proto = {
83 	.func		= bpf_map_push_elem,
84 	.gpl_only	= false,
85 	.pkt_access	= true,
86 	.ret_type	= RET_INTEGER,
87 	.arg1_type	= ARG_CONST_MAP_PTR,
88 	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
89 	.arg3_type	= ARG_ANYTHING,
90 };
91 
92 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
93 {
94 	return map->ops->map_pop_elem(map, value);
95 }
96 
97 const struct bpf_func_proto bpf_map_pop_elem_proto = {
98 	.func		= bpf_map_pop_elem,
99 	.gpl_only	= false,
100 	.ret_type	= RET_INTEGER,
101 	.arg1_type	= ARG_CONST_MAP_PTR,
102 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
103 };
104 
105 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
106 {
107 	return map->ops->map_peek_elem(map, value);
108 }
109 
110 const struct bpf_func_proto bpf_map_peek_elem_proto = {
111 	.func		= bpf_map_pop_elem,
112 	.gpl_only	= false,
113 	.ret_type	= RET_INTEGER,
114 	.arg1_type	= ARG_CONST_MAP_PTR,
115 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
116 };
117 
118 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
119 	.func		= bpf_user_rnd_u32,
120 	.gpl_only	= false,
121 	.ret_type	= RET_INTEGER,
122 };
123 
124 BPF_CALL_0(bpf_get_smp_processor_id)
125 {
126 	return smp_processor_id();
127 }
128 
129 const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
130 	.func		= bpf_get_smp_processor_id,
131 	.gpl_only	= false,
132 	.ret_type	= RET_INTEGER,
133 };
134 
135 BPF_CALL_0(bpf_get_numa_node_id)
136 {
137 	return numa_node_id();
138 }
139 
140 const struct bpf_func_proto bpf_get_numa_node_id_proto = {
141 	.func		= bpf_get_numa_node_id,
142 	.gpl_only	= false,
143 	.ret_type	= RET_INTEGER,
144 };
145 
146 BPF_CALL_0(bpf_ktime_get_ns)
147 {
148 	/* NMI safe access to clock monotonic */
149 	return ktime_get_mono_fast_ns();
150 }
151 
152 const struct bpf_func_proto bpf_ktime_get_ns_proto = {
153 	.func		= bpf_ktime_get_ns,
154 	.gpl_only	= true,
155 	.ret_type	= RET_INTEGER,
156 };
157 
158 BPF_CALL_0(bpf_get_current_pid_tgid)
159 {
160 	struct task_struct *task = current;
161 
162 	if (unlikely(!task))
163 		return -EINVAL;
164 
165 	return (u64) task->tgid << 32 | task->pid;
166 }
167 
168 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
169 	.func		= bpf_get_current_pid_tgid,
170 	.gpl_only	= false,
171 	.ret_type	= RET_INTEGER,
172 };
173 
174 BPF_CALL_0(bpf_get_current_uid_gid)
175 {
176 	struct task_struct *task = current;
177 	kuid_t uid;
178 	kgid_t gid;
179 
180 	if (unlikely(!task))
181 		return -EINVAL;
182 
183 	current_uid_gid(&uid, &gid);
184 	return (u64) from_kgid(&init_user_ns, gid) << 32 |
185 		     from_kuid(&init_user_ns, uid);
186 }
187 
188 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
189 	.func		= bpf_get_current_uid_gid,
190 	.gpl_only	= false,
191 	.ret_type	= RET_INTEGER,
192 };
193 
194 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
195 {
196 	struct task_struct *task = current;
197 
198 	if (unlikely(!task))
199 		goto err_clear;
200 
201 	strncpy(buf, task->comm, size);
202 
203 	/* Verifier guarantees that size > 0. For task->comm exceeding
204 	 * size, guarantee that buf is %NUL-terminated. Unconditionally
205 	 * done here to save the size test.
206 	 */
207 	buf[size - 1] = 0;
208 	return 0;
209 err_clear:
210 	memset(buf, 0, size);
211 	return -EINVAL;
212 }
213 
214 const struct bpf_func_proto bpf_get_current_comm_proto = {
215 	.func		= bpf_get_current_comm,
216 	.gpl_only	= false,
217 	.ret_type	= RET_INTEGER,
218 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
219 	.arg2_type	= ARG_CONST_SIZE,
220 };
221 
222 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
223 
224 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
225 {
226 	arch_spinlock_t *l = (void *)lock;
227 	union {
228 		__u32 val;
229 		arch_spinlock_t lock;
230 	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
231 
232 	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
233 	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
234 	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
235 	arch_spin_lock(l);
236 }
237 
238 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
239 {
240 	arch_spinlock_t *l = (void *)lock;
241 
242 	arch_spin_unlock(l);
243 }
244 
245 #else
246 
247 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
248 {
249 	atomic_t *l = (void *)lock;
250 
251 	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
252 	do {
253 		atomic_cond_read_relaxed(l, !VAL);
254 	} while (atomic_xchg(l, 1));
255 }
256 
257 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
258 {
259 	atomic_t *l = (void *)lock;
260 
261 	atomic_set_release(l, 0);
262 }
263 
264 #endif
265 
266 static DEFINE_PER_CPU(unsigned long, irqsave_flags);
267 
268 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
269 {
270 	unsigned long flags;
271 
272 	local_irq_save(flags);
273 	__bpf_spin_lock(lock);
274 	__this_cpu_write(irqsave_flags, flags);
275 	return 0;
276 }
277 
278 const struct bpf_func_proto bpf_spin_lock_proto = {
279 	.func		= bpf_spin_lock,
280 	.gpl_only	= false,
281 	.ret_type	= RET_VOID,
282 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
283 };
284 
285 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
286 {
287 	unsigned long flags;
288 
289 	flags = __this_cpu_read(irqsave_flags);
290 	__bpf_spin_unlock(lock);
291 	local_irq_restore(flags);
292 	return 0;
293 }
294 
295 const struct bpf_func_proto bpf_spin_unlock_proto = {
296 	.func		= bpf_spin_unlock,
297 	.gpl_only	= false,
298 	.ret_type	= RET_VOID,
299 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
300 };
301 
302 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
303 			   bool lock_src)
304 {
305 	struct bpf_spin_lock *lock;
306 
307 	if (lock_src)
308 		lock = src + map->spin_lock_off;
309 	else
310 		lock = dst + map->spin_lock_off;
311 	preempt_disable();
312 	____bpf_spin_lock(lock);
313 	copy_map_value(map, dst, src);
314 	____bpf_spin_unlock(lock);
315 	preempt_enable();
316 }
317 
318 BPF_CALL_0(bpf_jiffies64)
319 {
320 	return get_jiffies_64();
321 }
322 
323 const struct bpf_func_proto bpf_jiffies64_proto = {
324 	.func		= bpf_jiffies64,
325 	.gpl_only	= false,
326 	.ret_type	= RET_INTEGER,
327 };
328 
329 #ifdef CONFIG_CGROUPS
330 BPF_CALL_0(bpf_get_current_cgroup_id)
331 {
332 	struct cgroup *cgrp = task_dfl_cgroup(current);
333 
334 	return cgroup_id(cgrp);
335 }
336 
337 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
338 	.func		= bpf_get_current_cgroup_id,
339 	.gpl_only	= false,
340 	.ret_type	= RET_INTEGER,
341 };
342 
343 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
344 {
345 	struct cgroup *cgrp = task_dfl_cgroup(current);
346 	struct cgroup *ancestor;
347 
348 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
349 	if (!ancestor)
350 		return 0;
351 	return cgroup_id(ancestor);
352 }
353 
354 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
355 	.func		= bpf_get_current_ancestor_cgroup_id,
356 	.gpl_only	= false,
357 	.ret_type	= RET_INTEGER,
358 	.arg1_type	= ARG_ANYTHING,
359 };
360 
361 #ifdef CONFIG_CGROUP_BPF
362 DECLARE_PER_CPU(struct bpf_cgroup_storage*,
363 		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
364 
365 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
366 {
367 	/* flags argument is not used now,
368 	 * but provides an ability to extend the API.
369 	 * verifier checks that its value is correct.
370 	 */
371 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
372 	struct bpf_cgroup_storage *storage;
373 	void *ptr;
374 
375 	storage = this_cpu_read(bpf_cgroup_storage[stype]);
376 
377 	if (stype == BPF_CGROUP_STORAGE_SHARED)
378 		ptr = &READ_ONCE(storage->buf)->data[0];
379 	else
380 		ptr = this_cpu_ptr(storage->percpu_buf);
381 
382 	return (unsigned long)ptr;
383 }
384 
385 const struct bpf_func_proto bpf_get_local_storage_proto = {
386 	.func		= bpf_get_local_storage,
387 	.gpl_only	= false,
388 	.ret_type	= RET_PTR_TO_MAP_VALUE,
389 	.arg1_type	= ARG_CONST_MAP_PTR,
390 	.arg2_type	= ARG_ANYTHING,
391 };
392 #endif
393 
394 #define BPF_STRTOX_BASE_MASK 0x1F
395 
396 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
397 			  unsigned long long *res, bool *is_negative)
398 {
399 	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
400 	const char *cur_buf = buf;
401 	size_t cur_len = buf_len;
402 	unsigned int consumed;
403 	size_t val_len;
404 	char str[64];
405 
406 	if (!buf || !buf_len || !res || !is_negative)
407 		return -EINVAL;
408 
409 	if (base != 0 && base != 8 && base != 10 && base != 16)
410 		return -EINVAL;
411 
412 	if (flags & ~BPF_STRTOX_BASE_MASK)
413 		return -EINVAL;
414 
415 	while (cur_buf < buf + buf_len && isspace(*cur_buf))
416 		++cur_buf;
417 
418 	*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
419 	if (*is_negative)
420 		++cur_buf;
421 
422 	consumed = cur_buf - buf;
423 	cur_len -= consumed;
424 	if (!cur_len)
425 		return -EINVAL;
426 
427 	cur_len = min(cur_len, sizeof(str) - 1);
428 	memcpy(str, cur_buf, cur_len);
429 	str[cur_len] = '\0';
430 	cur_buf = str;
431 
432 	cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
433 	val_len = _parse_integer(cur_buf, base, res);
434 
435 	if (val_len & KSTRTOX_OVERFLOW)
436 		return -ERANGE;
437 
438 	if (val_len == 0)
439 		return -EINVAL;
440 
441 	cur_buf += val_len;
442 	consumed += cur_buf - str;
443 
444 	return consumed;
445 }
446 
447 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
448 			 long long *res)
449 {
450 	unsigned long long _res;
451 	bool is_negative;
452 	int err;
453 
454 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
455 	if (err < 0)
456 		return err;
457 	if (is_negative) {
458 		if ((long long)-_res > 0)
459 			return -ERANGE;
460 		*res = -_res;
461 	} else {
462 		if ((long long)_res < 0)
463 			return -ERANGE;
464 		*res = _res;
465 	}
466 	return err;
467 }
468 
469 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
470 	   long *, res)
471 {
472 	long long _res;
473 	int err;
474 
475 	err = __bpf_strtoll(buf, buf_len, flags, &_res);
476 	if (err < 0)
477 		return err;
478 	if (_res != (long)_res)
479 		return -ERANGE;
480 	*res = _res;
481 	return err;
482 }
483 
484 const struct bpf_func_proto bpf_strtol_proto = {
485 	.func		= bpf_strtol,
486 	.gpl_only	= false,
487 	.ret_type	= RET_INTEGER,
488 	.arg1_type	= ARG_PTR_TO_MEM,
489 	.arg2_type	= ARG_CONST_SIZE,
490 	.arg3_type	= ARG_ANYTHING,
491 	.arg4_type	= ARG_PTR_TO_LONG,
492 };
493 
494 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
495 	   unsigned long *, res)
496 {
497 	unsigned long long _res;
498 	bool is_negative;
499 	int err;
500 
501 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
502 	if (err < 0)
503 		return err;
504 	if (is_negative)
505 		return -EINVAL;
506 	if (_res != (unsigned long)_res)
507 		return -ERANGE;
508 	*res = _res;
509 	return err;
510 }
511 
512 const struct bpf_func_proto bpf_strtoul_proto = {
513 	.func		= bpf_strtoul,
514 	.gpl_only	= false,
515 	.ret_type	= RET_INTEGER,
516 	.arg1_type	= ARG_PTR_TO_MEM,
517 	.arg2_type	= ARG_CONST_SIZE,
518 	.arg3_type	= ARG_ANYTHING,
519 	.arg4_type	= ARG_PTR_TO_LONG,
520 };
521 #endif
522 
523 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
524 	   struct bpf_pidns_info *, nsdata, u32, size)
525 {
526 	struct task_struct *task = current;
527 	struct pid_namespace *pidns;
528 	int err = -EINVAL;
529 
530 	if (unlikely(size != sizeof(struct bpf_pidns_info)))
531 		goto clear;
532 
533 	if (unlikely((u64)(dev_t)dev != dev))
534 		goto clear;
535 
536 	if (unlikely(!task))
537 		goto clear;
538 
539 	pidns = task_active_pid_ns(task);
540 	if (unlikely(!pidns)) {
541 		err = -ENOENT;
542 		goto clear;
543 	}
544 
545 	if (!ns_match(&pidns->ns, (dev_t)dev, ino))
546 		goto clear;
547 
548 	nsdata->pid = task_pid_nr_ns(task, pidns);
549 	nsdata->tgid = task_tgid_nr_ns(task, pidns);
550 	return 0;
551 clear:
552 	memset((void *)nsdata, 0, (size_t) size);
553 	return err;
554 }
555 
556 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
557 	.func		= bpf_get_ns_current_pid_tgid,
558 	.gpl_only	= false,
559 	.ret_type	= RET_INTEGER,
560 	.arg1_type	= ARG_ANYTHING,
561 	.arg2_type	= ARG_ANYTHING,
562 	.arg3_type      = ARG_PTR_TO_UNINIT_MEM,
563 	.arg4_type      = ARG_CONST_SIZE,
564 };
565