1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/rcupdate.h> 6 #include <linux/random.h> 7 #include <linux/smp.h> 8 #include <linux/topology.h> 9 #include <linux/ktime.h> 10 #include <linux/sched.h> 11 #include <linux/uidgid.h> 12 #include <linux/filter.h> 13 #include <linux/ctype.h> 14 #include <linux/jiffies.h> 15 #include <linux/pid_namespace.h> 16 #include <linux/proc_ns.h> 17 #include <linux/security.h> 18 19 #include "../../lib/kstrtox.h" 20 21 /* If kernel subsystem is allowing eBPF programs to call this function, 22 * inside its own verifier_ops->get_func_proto() callback it should return 23 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments 24 * 25 * Different map implementations will rely on rcu in map methods 26 * lookup/update/delete, therefore eBPF programs must run under rcu lock 27 * if program is allowed to access maps, so check rcu_read_lock_held in 28 * all three functions. 29 */ 30 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) 31 { 32 WARN_ON_ONCE(!rcu_read_lock_held()); 33 return (unsigned long) map->ops->map_lookup_elem(map, key); 34 } 35 36 const struct bpf_func_proto bpf_map_lookup_elem_proto = { 37 .func = bpf_map_lookup_elem, 38 .gpl_only = false, 39 .pkt_access = true, 40 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 41 .arg1_type = ARG_CONST_MAP_PTR, 42 .arg2_type = ARG_PTR_TO_MAP_KEY, 43 }; 44 45 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, 46 void *, value, u64, flags) 47 { 48 WARN_ON_ONCE(!rcu_read_lock_held()); 49 return map->ops->map_update_elem(map, key, value, flags); 50 } 51 52 const struct bpf_func_proto bpf_map_update_elem_proto = { 53 .func = bpf_map_update_elem, 54 .gpl_only = false, 55 .pkt_access = true, 56 .ret_type = RET_INTEGER, 57 .arg1_type = ARG_CONST_MAP_PTR, 58 .arg2_type = ARG_PTR_TO_MAP_KEY, 59 .arg3_type = ARG_PTR_TO_MAP_VALUE, 60 .arg4_type = ARG_ANYTHING, 61 }; 62 63 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) 64 { 65 WARN_ON_ONCE(!rcu_read_lock_held()); 66 return map->ops->map_delete_elem(map, key); 67 } 68 69 const struct bpf_func_proto bpf_map_delete_elem_proto = { 70 .func = bpf_map_delete_elem, 71 .gpl_only = false, 72 .pkt_access = true, 73 .ret_type = RET_INTEGER, 74 .arg1_type = ARG_CONST_MAP_PTR, 75 .arg2_type = ARG_PTR_TO_MAP_KEY, 76 }; 77 78 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 79 { 80 return map->ops->map_push_elem(map, value, flags); 81 } 82 83 const struct bpf_func_proto bpf_map_push_elem_proto = { 84 .func = bpf_map_push_elem, 85 .gpl_only = false, 86 .pkt_access = true, 87 .ret_type = RET_INTEGER, 88 .arg1_type = ARG_CONST_MAP_PTR, 89 .arg2_type = ARG_PTR_TO_MAP_VALUE, 90 .arg3_type = ARG_ANYTHING, 91 }; 92 93 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 94 { 95 return map->ops->map_pop_elem(map, value); 96 } 97 98 const struct bpf_func_proto bpf_map_pop_elem_proto = { 99 .func = bpf_map_pop_elem, 100 .gpl_only = false, 101 .ret_type = RET_INTEGER, 102 .arg1_type = ARG_CONST_MAP_PTR, 103 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 104 }; 105 106 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 107 { 108 return map->ops->map_peek_elem(map, value); 109 } 110 111 const struct bpf_func_proto bpf_map_peek_elem_proto = { 112 .func = bpf_map_peek_elem, 113 .gpl_only = false, 114 .ret_type = RET_INTEGER, 115 .arg1_type = ARG_CONST_MAP_PTR, 116 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 117 }; 118 119 const struct bpf_func_proto bpf_get_prandom_u32_proto = { 120 .func = bpf_user_rnd_u32, 121 .gpl_only = false, 122 .ret_type = RET_INTEGER, 123 }; 124 125 BPF_CALL_0(bpf_get_smp_processor_id) 126 { 127 return smp_processor_id(); 128 } 129 130 const struct bpf_func_proto bpf_get_smp_processor_id_proto = { 131 .func = bpf_get_smp_processor_id, 132 .gpl_only = false, 133 .ret_type = RET_INTEGER, 134 }; 135 136 BPF_CALL_0(bpf_get_numa_node_id) 137 { 138 return numa_node_id(); 139 } 140 141 const struct bpf_func_proto bpf_get_numa_node_id_proto = { 142 .func = bpf_get_numa_node_id, 143 .gpl_only = false, 144 .ret_type = RET_INTEGER, 145 }; 146 147 BPF_CALL_0(bpf_ktime_get_ns) 148 { 149 /* NMI safe access to clock monotonic */ 150 return ktime_get_mono_fast_ns(); 151 } 152 153 const struct bpf_func_proto bpf_ktime_get_ns_proto = { 154 .func = bpf_ktime_get_ns, 155 .gpl_only = false, 156 .ret_type = RET_INTEGER, 157 }; 158 159 BPF_CALL_0(bpf_ktime_get_boot_ns) 160 { 161 /* NMI safe access to clock boottime */ 162 return ktime_get_boot_fast_ns(); 163 } 164 165 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { 166 .func = bpf_ktime_get_boot_ns, 167 .gpl_only = false, 168 .ret_type = RET_INTEGER, 169 }; 170 171 BPF_CALL_0(bpf_ktime_get_coarse_ns) 172 { 173 return ktime_get_coarse_ns(); 174 } 175 176 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { 177 .func = bpf_ktime_get_coarse_ns, 178 .gpl_only = false, 179 .ret_type = RET_INTEGER, 180 }; 181 182 BPF_CALL_0(bpf_get_current_pid_tgid) 183 { 184 struct task_struct *task = current; 185 186 if (unlikely(!task)) 187 return -EINVAL; 188 189 return (u64) task->tgid << 32 | task->pid; 190 } 191 192 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { 193 .func = bpf_get_current_pid_tgid, 194 .gpl_only = false, 195 .ret_type = RET_INTEGER, 196 }; 197 198 BPF_CALL_0(bpf_get_current_uid_gid) 199 { 200 struct task_struct *task = current; 201 kuid_t uid; 202 kgid_t gid; 203 204 if (unlikely(!task)) 205 return -EINVAL; 206 207 current_uid_gid(&uid, &gid); 208 return (u64) from_kgid(&init_user_ns, gid) << 32 | 209 from_kuid(&init_user_ns, uid); 210 } 211 212 const struct bpf_func_proto bpf_get_current_uid_gid_proto = { 213 .func = bpf_get_current_uid_gid, 214 .gpl_only = false, 215 .ret_type = RET_INTEGER, 216 }; 217 218 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) 219 { 220 struct task_struct *task = current; 221 222 if (unlikely(!task)) 223 goto err_clear; 224 225 strncpy(buf, task->comm, size); 226 227 /* Verifier guarantees that size > 0. For task->comm exceeding 228 * size, guarantee that buf is %NUL-terminated. Unconditionally 229 * done here to save the size test. 230 */ 231 buf[size - 1] = 0; 232 return 0; 233 err_clear: 234 memset(buf, 0, size); 235 return -EINVAL; 236 } 237 238 const struct bpf_func_proto bpf_get_current_comm_proto = { 239 .func = bpf_get_current_comm, 240 .gpl_only = false, 241 .ret_type = RET_INTEGER, 242 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 243 .arg2_type = ARG_CONST_SIZE, 244 }; 245 246 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) 247 248 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 249 { 250 arch_spinlock_t *l = (void *)lock; 251 union { 252 __u32 val; 253 arch_spinlock_t lock; 254 } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; 255 256 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 257 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 258 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 259 arch_spin_lock(l); 260 } 261 262 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 263 { 264 arch_spinlock_t *l = (void *)lock; 265 266 arch_spin_unlock(l); 267 } 268 269 #else 270 271 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 272 { 273 atomic_t *l = (void *)lock; 274 275 BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); 276 do { 277 atomic_cond_read_relaxed(l, !VAL); 278 } while (atomic_xchg(l, 1)); 279 } 280 281 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 282 { 283 atomic_t *l = (void *)lock; 284 285 atomic_set_release(l, 0); 286 } 287 288 #endif 289 290 static DEFINE_PER_CPU(unsigned long, irqsave_flags); 291 292 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) 293 { 294 unsigned long flags; 295 296 local_irq_save(flags); 297 __bpf_spin_lock(lock); 298 __this_cpu_write(irqsave_flags, flags); 299 return 0; 300 } 301 302 const struct bpf_func_proto bpf_spin_lock_proto = { 303 .func = bpf_spin_lock, 304 .gpl_only = false, 305 .ret_type = RET_VOID, 306 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 307 }; 308 309 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) 310 { 311 unsigned long flags; 312 313 flags = __this_cpu_read(irqsave_flags); 314 __bpf_spin_unlock(lock); 315 local_irq_restore(flags); 316 return 0; 317 } 318 319 const struct bpf_func_proto bpf_spin_unlock_proto = { 320 .func = bpf_spin_unlock, 321 .gpl_only = false, 322 .ret_type = RET_VOID, 323 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 324 }; 325 326 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, 327 bool lock_src) 328 { 329 struct bpf_spin_lock *lock; 330 331 if (lock_src) 332 lock = src + map->spin_lock_off; 333 else 334 lock = dst + map->spin_lock_off; 335 preempt_disable(); 336 ____bpf_spin_lock(lock); 337 copy_map_value(map, dst, src); 338 ____bpf_spin_unlock(lock); 339 preempt_enable(); 340 } 341 342 BPF_CALL_0(bpf_jiffies64) 343 { 344 return get_jiffies_64(); 345 } 346 347 const struct bpf_func_proto bpf_jiffies64_proto = { 348 .func = bpf_jiffies64, 349 .gpl_only = false, 350 .ret_type = RET_INTEGER, 351 }; 352 353 #ifdef CONFIG_CGROUPS 354 BPF_CALL_0(bpf_get_current_cgroup_id) 355 { 356 struct cgroup *cgrp = task_dfl_cgroup(current); 357 358 return cgroup_id(cgrp); 359 } 360 361 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { 362 .func = bpf_get_current_cgroup_id, 363 .gpl_only = false, 364 .ret_type = RET_INTEGER, 365 }; 366 367 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) 368 { 369 struct cgroup *cgrp = task_dfl_cgroup(current); 370 struct cgroup *ancestor; 371 372 ancestor = cgroup_ancestor(cgrp, ancestor_level); 373 if (!ancestor) 374 return 0; 375 return cgroup_id(ancestor); 376 } 377 378 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { 379 .func = bpf_get_current_ancestor_cgroup_id, 380 .gpl_only = false, 381 .ret_type = RET_INTEGER, 382 .arg1_type = ARG_ANYTHING, 383 }; 384 385 #ifdef CONFIG_CGROUP_BPF 386 DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 387 bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 388 389 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 390 { 391 /* flags argument is not used now, 392 * but provides an ability to extend the API. 393 * verifier checks that its value is correct. 394 */ 395 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 396 struct bpf_cgroup_storage *storage = NULL; 397 void *ptr; 398 int i; 399 400 for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 401 if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 402 continue; 403 404 storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); 405 break; 406 } 407 408 if (stype == BPF_CGROUP_STORAGE_SHARED) 409 ptr = &READ_ONCE(storage->buf)->data[0]; 410 else 411 ptr = this_cpu_ptr(storage->percpu_buf); 412 413 return (unsigned long)ptr; 414 } 415 416 const struct bpf_func_proto bpf_get_local_storage_proto = { 417 .func = bpf_get_local_storage, 418 .gpl_only = false, 419 .ret_type = RET_PTR_TO_MAP_VALUE, 420 .arg1_type = ARG_CONST_MAP_PTR, 421 .arg2_type = ARG_ANYTHING, 422 }; 423 #endif 424 425 #define BPF_STRTOX_BASE_MASK 0x1F 426 427 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, 428 unsigned long long *res, bool *is_negative) 429 { 430 unsigned int base = flags & BPF_STRTOX_BASE_MASK; 431 const char *cur_buf = buf; 432 size_t cur_len = buf_len; 433 unsigned int consumed; 434 size_t val_len; 435 char str[64]; 436 437 if (!buf || !buf_len || !res || !is_negative) 438 return -EINVAL; 439 440 if (base != 0 && base != 8 && base != 10 && base != 16) 441 return -EINVAL; 442 443 if (flags & ~BPF_STRTOX_BASE_MASK) 444 return -EINVAL; 445 446 while (cur_buf < buf + buf_len && isspace(*cur_buf)) 447 ++cur_buf; 448 449 *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); 450 if (*is_negative) 451 ++cur_buf; 452 453 consumed = cur_buf - buf; 454 cur_len -= consumed; 455 if (!cur_len) 456 return -EINVAL; 457 458 cur_len = min(cur_len, sizeof(str) - 1); 459 memcpy(str, cur_buf, cur_len); 460 str[cur_len] = '\0'; 461 cur_buf = str; 462 463 cur_buf = _parse_integer_fixup_radix(cur_buf, &base); 464 val_len = _parse_integer(cur_buf, base, res); 465 466 if (val_len & KSTRTOX_OVERFLOW) 467 return -ERANGE; 468 469 if (val_len == 0) 470 return -EINVAL; 471 472 cur_buf += val_len; 473 consumed += cur_buf - str; 474 475 return consumed; 476 } 477 478 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, 479 long long *res) 480 { 481 unsigned long long _res; 482 bool is_negative; 483 int err; 484 485 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 486 if (err < 0) 487 return err; 488 if (is_negative) { 489 if ((long long)-_res > 0) 490 return -ERANGE; 491 *res = -_res; 492 } else { 493 if ((long long)_res < 0) 494 return -ERANGE; 495 *res = _res; 496 } 497 return err; 498 } 499 500 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, 501 long *, res) 502 { 503 long long _res; 504 int err; 505 506 err = __bpf_strtoll(buf, buf_len, flags, &_res); 507 if (err < 0) 508 return err; 509 if (_res != (long)_res) 510 return -ERANGE; 511 *res = _res; 512 return err; 513 } 514 515 const struct bpf_func_proto bpf_strtol_proto = { 516 .func = bpf_strtol, 517 .gpl_only = false, 518 .ret_type = RET_INTEGER, 519 .arg1_type = ARG_PTR_TO_MEM, 520 .arg2_type = ARG_CONST_SIZE, 521 .arg3_type = ARG_ANYTHING, 522 .arg4_type = ARG_PTR_TO_LONG, 523 }; 524 525 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, 526 unsigned long *, res) 527 { 528 unsigned long long _res; 529 bool is_negative; 530 int err; 531 532 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 533 if (err < 0) 534 return err; 535 if (is_negative) 536 return -EINVAL; 537 if (_res != (unsigned long)_res) 538 return -ERANGE; 539 *res = _res; 540 return err; 541 } 542 543 const struct bpf_func_proto bpf_strtoul_proto = { 544 .func = bpf_strtoul, 545 .gpl_only = false, 546 .ret_type = RET_INTEGER, 547 .arg1_type = ARG_PTR_TO_MEM, 548 .arg2_type = ARG_CONST_SIZE, 549 .arg3_type = ARG_ANYTHING, 550 .arg4_type = ARG_PTR_TO_LONG, 551 }; 552 #endif 553 554 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, 555 struct bpf_pidns_info *, nsdata, u32, size) 556 { 557 struct task_struct *task = current; 558 struct pid_namespace *pidns; 559 int err = -EINVAL; 560 561 if (unlikely(size != sizeof(struct bpf_pidns_info))) 562 goto clear; 563 564 if (unlikely((u64)(dev_t)dev != dev)) 565 goto clear; 566 567 if (unlikely(!task)) 568 goto clear; 569 570 pidns = task_active_pid_ns(task); 571 if (unlikely(!pidns)) { 572 err = -ENOENT; 573 goto clear; 574 } 575 576 if (!ns_match(&pidns->ns, (dev_t)dev, ino)) 577 goto clear; 578 579 nsdata->pid = task_pid_nr_ns(task, pidns); 580 nsdata->tgid = task_tgid_nr_ns(task, pidns); 581 return 0; 582 clear: 583 memset((void *)nsdata, 0, (size_t) size); 584 return err; 585 } 586 587 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { 588 .func = bpf_get_ns_current_pid_tgid, 589 .gpl_only = false, 590 .ret_type = RET_INTEGER, 591 .arg1_type = ARG_ANYTHING, 592 .arg2_type = ARG_ANYTHING, 593 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 594 .arg4_type = ARG_CONST_SIZE, 595 }; 596 597 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 598 .func = bpf_get_raw_cpu_id, 599 .gpl_only = false, 600 .ret_type = RET_INTEGER, 601 }; 602 603 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, 604 u64, flags, void *, data, u64, size) 605 { 606 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 607 return -EINVAL; 608 609 return bpf_event_output(map, flags, data, size, NULL, 0, NULL); 610 } 611 612 const struct bpf_func_proto bpf_event_output_data_proto = { 613 .func = bpf_event_output_data, 614 .gpl_only = true, 615 .ret_type = RET_INTEGER, 616 .arg1_type = ARG_PTR_TO_CTX, 617 .arg2_type = ARG_CONST_MAP_PTR, 618 .arg3_type = ARG_ANYTHING, 619 .arg4_type = ARG_PTR_TO_MEM, 620 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 621 }; 622 623 BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, 624 const void __user *, user_ptr) 625 { 626 int ret = copy_from_user(dst, user_ptr, size); 627 628 if (unlikely(ret)) { 629 memset(dst, 0, size); 630 ret = -EFAULT; 631 } 632 633 return ret; 634 } 635 636 const struct bpf_func_proto bpf_copy_from_user_proto = { 637 .func = bpf_copy_from_user, 638 .gpl_only = false, 639 .ret_type = RET_INTEGER, 640 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 641 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 642 .arg3_type = ARG_ANYTHING, 643 }; 644 645 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) 646 { 647 if (cpu >= nr_cpu_ids) 648 return (unsigned long)NULL; 649 650 return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); 651 } 652 653 const struct bpf_func_proto bpf_per_cpu_ptr_proto = { 654 .func = bpf_per_cpu_ptr, 655 .gpl_only = false, 656 .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, 657 .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 658 .arg2_type = ARG_ANYTHING, 659 }; 660 661 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) 662 { 663 return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); 664 } 665 666 const struct bpf_func_proto bpf_this_cpu_ptr_proto = { 667 .func = bpf_this_cpu_ptr, 668 .gpl_only = false, 669 .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, 670 .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 671 }; 672 673 static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, 674 size_t bufsz) 675 { 676 void __user *user_ptr = (__force void __user *)unsafe_ptr; 677 678 buf[0] = 0; 679 680 switch (fmt_ptype) { 681 case 's': 682 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 683 if ((unsigned long)unsafe_ptr < TASK_SIZE) 684 return strncpy_from_user_nofault(buf, user_ptr, bufsz); 685 fallthrough; 686 #endif 687 case 'k': 688 return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); 689 case 'u': 690 return strncpy_from_user_nofault(buf, user_ptr, bufsz); 691 } 692 693 return -EINVAL; 694 } 695 696 /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary 697 * arguments representation. 698 */ 699 #define MAX_BPRINTF_BUF_LEN 512 700 701 /* Support executing three nested bprintf helper calls on a given CPU */ 702 #define MAX_BPRINTF_NEST_LEVEL 3 703 struct bpf_bprintf_buffers { 704 char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; 705 }; 706 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); 707 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); 708 709 static int try_get_fmt_tmp_buf(char **tmp_buf) 710 { 711 struct bpf_bprintf_buffers *bufs; 712 int nest_level; 713 714 preempt_disable(); 715 nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); 716 if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) { 717 this_cpu_dec(bpf_bprintf_nest_level); 718 preempt_enable(); 719 return -EBUSY; 720 } 721 bufs = this_cpu_ptr(&bpf_bprintf_bufs); 722 *tmp_buf = bufs->tmp_bufs[nest_level - 1]; 723 724 return 0; 725 } 726 727 void bpf_bprintf_cleanup(void) 728 { 729 if (this_cpu_read(bpf_bprintf_nest_level)) { 730 this_cpu_dec(bpf_bprintf_nest_level); 731 preempt_enable(); 732 } 733 } 734 735 /* 736 * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers 737 * 738 * Returns a negative value if fmt is an invalid format string or 0 otherwise. 739 * 740 * This can be used in two ways: 741 * - Format string verification only: when bin_args is NULL 742 * - Arguments preparation: in addition to the above verification, it writes in 743 * bin_args a binary representation of arguments usable by bstr_printf where 744 * pointers from BPF have been sanitized. 745 * 746 * In argument preparation mode, if 0 is returned, safe temporary buffers are 747 * allocated and bpf_bprintf_cleanup should be called to free them after use. 748 */ 749 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, 750 u32 **bin_args, u32 num_args) 751 { 752 char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; 753 size_t sizeof_cur_arg, sizeof_cur_ip; 754 int err, i, num_spec = 0; 755 u64 cur_arg; 756 char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; 757 758 fmt_end = strnchr(fmt, fmt_size, 0); 759 if (!fmt_end) 760 return -EINVAL; 761 fmt_size = fmt_end - fmt; 762 763 if (bin_args) { 764 if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) 765 return -EBUSY; 766 767 tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; 768 *bin_args = (u32 *)tmp_buf; 769 } 770 771 for (i = 0; i < fmt_size; i++) { 772 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { 773 err = -EINVAL; 774 goto out; 775 } 776 777 if (fmt[i] != '%') 778 continue; 779 780 if (fmt[i + 1] == '%') { 781 i++; 782 continue; 783 } 784 785 if (num_spec >= num_args) { 786 err = -EINVAL; 787 goto out; 788 } 789 790 /* The string is zero-terminated so if fmt[i] != 0, we can 791 * always access fmt[i + 1], in the worst case it will be a 0 792 */ 793 i++; 794 795 /* skip optional "[0 +-][num]" width formatting field */ 796 while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || 797 fmt[i] == ' ') 798 i++; 799 if (fmt[i] >= '1' && fmt[i] <= '9') { 800 i++; 801 while (fmt[i] >= '0' && fmt[i] <= '9') 802 i++; 803 } 804 805 if (fmt[i] == 'p') { 806 sizeof_cur_arg = sizeof(long); 807 808 if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && 809 fmt[i + 2] == 's') { 810 fmt_ptype = fmt[i + 1]; 811 i += 2; 812 goto fmt_str; 813 } 814 815 if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || 816 ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || 817 fmt[i + 1] == 'x' || fmt[i + 1] == 's' || 818 fmt[i + 1] == 'S') { 819 /* just kernel pointers */ 820 if (tmp_buf) 821 cur_arg = raw_args[num_spec]; 822 i++; 823 goto nocopy_fmt; 824 } 825 826 if (fmt[i + 1] == 'B') { 827 if (tmp_buf) { 828 err = snprintf(tmp_buf, 829 (tmp_buf_end - tmp_buf), 830 "%pB", 831 (void *)(long)raw_args[num_spec]); 832 tmp_buf += (err + 1); 833 } 834 835 i++; 836 num_spec++; 837 continue; 838 } 839 840 /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ 841 if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || 842 (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { 843 err = -EINVAL; 844 goto out; 845 } 846 847 i += 2; 848 if (!tmp_buf) 849 goto nocopy_fmt; 850 851 sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16; 852 if (tmp_buf_end - tmp_buf < sizeof_cur_ip) { 853 err = -ENOSPC; 854 goto out; 855 } 856 857 unsafe_ptr = (char *)(long)raw_args[num_spec]; 858 err = copy_from_kernel_nofault(cur_ip, unsafe_ptr, 859 sizeof_cur_ip); 860 if (err < 0) 861 memset(cur_ip, 0, sizeof_cur_ip); 862 863 /* hack: bstr_printf expects IP addresses to be 864 * pre-formatted as strings, ironically, the easiest way 865 * to do that is to call snprintf. 866 */ 867 ip_spec[2] = fmt[i - 1]; 868 ip_spec[3] = fmt[i]; 869 err = snprintf(tmp_buf, tmp_buf_end - tmp_buf, 870 ip_spec, &cur_ip); 871 872 tmp_buf += err + 1; 873 num_spec++; 874 875 continue; 876 } else if (fmt[i] == 's') { 877 fmt_ptype = fmt[i]; 878 fmt_str: 879 if (fmt[i + 1] != 0 && 880 !isspace(fmt[i + 1]) && 881 !ispunct(fmt[i + 1])) { 882 err = -EINVAL; 883 goto out; 884 } 885 886 if (!tmp_buf) 887 goto nocopy_fmt; 888 889 if (tmp_buf_end == tmp_buf) { 890 err = -ENOSPC; 891 goto out; 892 } 893 894 unsafe_ptr = (char *)(long)raw_args[num_spec]; 895 err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, 896 fmt_ptype, 897 tmp_buf_end - tmp_buf); 898 if (err < 0) { 899 tmp_buf[0] = '\0'; 900 err = 1; 901 } 902 903 tmp_buf += err; 904 num_spec++; 905 906 continue; 907 } 908 909 sizeof_cur_arg = sizeof(int); 910 911 if (fmt[i] == 'l') { 912 sizeof_cur_arg = sizeof(long); 913 i++; 914 } 915 if (fmt[i] == 'l') { 916 sizeof_cur_arg = sizeof(long long); 917 i++; 918 } 919 920 if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && 921 fmt[i] != 'x' && fmt[i] != 'X') { 922 err = -EINVAL; 923 goto out; 924 } 925 926 if (tmp_buf) 927 cur_arg = raw_args[num_spec]; 928 nocopy_fmt: 929 if (tmp_buf) { 930 tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32)); 931 if (tmp_buf_end - tmp_buf < sizeof_cur_arg) { 932 err = -ENOSPC; 933 goto out; 934 } 935 936 if (sizeof_cur_arg == 8) { 937 *(u32 *)tmp_buf = *(u32 *)&cur_arg; 938 *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1); 939 } else { 940 *(u32 *)tmp_buf = (u32)(long)cur_arg; 941 } 942 tmp_buf += sizeof_cur_arg; 943 } 944 num_spec++; 945 } 946 947 err = 0; 948 out: 949 if (err) 950 bpf_bprintf_cleanup(); 951 return err; 952 } 953 954 #define MAX_SNPRINTF_VARARGS 12 955 956 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, 957 const void *, data, u32, data_len) 958 { 959 int err, num_args; 960 u32 *bin_args; 961 962 if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 || 963 (data_len && !data)) 964 return -EINVAL; 965 num_args = data_len / 8; 966 967 /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we 968 * can safely give an unbounded size. 969 */ 970 err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); 971 if (err < 0) 972 return err; 973 974 err = bstr_printf(str, str_size, fmt, bin_args); 975 976 bpf_bprintf_cleanup(); 977 978 return err + 1; 979 } 980 981 const struct bpf_func_proto bpf_snprintf_proto = { 982 .func = bpf_snprintf, 983 .gpl_only = true, 984 .ret_type = RET_INTEGER, 985 .arg1_type = ARG_PTR_TO_MEM_OR_NULL, 986 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 987 .arg3_type = ARG_PTR_TO_CONST_STR, 988 .arg4_type = ARG_PTR_TO_MEM_OR_NULL, 989 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 990 }; 991 992 const struct bpf_func_proto bpf_get_current_task_proto __weak; 993 const struct bpf_func_proto bpf_probe_read_user_proto __weak; 994 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; 995 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; 996 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; 997 998 const struct bpf_func_proto * 999 bpf_base_func_proto(enum bpf_func_id func_id) 1000 { 1001 switch (func_id) { 1002 case BPF_FUNC_map_lookup_elem: 1003 return &bpf_map_lookup_elem_proto; 1004 case BPF_FUNC_map_update_elem: 1005 return &bpf_map_update_elem_proto; 1006 case BPF_FUNC_map_delete_elem: 1007 return &bpf_map_delete_elem_proto; 1008 case BPF_FUNC_map_push_elem: 1009 return &bpf_map_push_elem_proto; 1010 case BPF_FUNC_map_pop_elem: 1011 return &bpf_map_pop_elem_proto; 1012 case BPF_FUNC_map_peek_elem: 1013 return &bpf_map_peek_elem_proto; 1014 case BPF_FUNC_get_prandom_u32: 1015 return &bpf_get_prandom_u32_proto; 1016 case BPF_FUNC_get_smp_processor_id: 1017 return &bpf_get_raw_smp_processor_id_proto; 1018 case BPF_FUNC_get_numa_node_id: 1019 return &bpf_get_numa_node_id_proto; 1020 case BPF_FUNC_tail_call: 1021 return &bpf_tail_call_proto; 1022 case BPF_FUNC_ktime_get_ns: 1023 return &bpf_ktime_get_ns_proto; 1024 case BPF_FUNC_ktime_get_boot_ns: 1025 return &bpf_ktime_get_boot_ns_proto; 1026 case BPF_FUNC_ktime_get_coarse_ns: 1027 return &bpf_ktime_get_coarse_ns_proto; 1028 case BPF_FUNC_ringbuf_output: 1029 return &bpf_ringbuf_output_proto; 1030 case BPF_FUNC_ringbuf_reserve: 1031 return &bpf_ringbuf_reserve_proto; 1032 case BPF_FUNC_ringbuf_submit: 1033 return &bpf_ringbuf_submit_proto; 1034 case BPF_FUNC_ringbuf_discard: 1035 return &bpf_ringbuf_discard_proto; 1036 case BPF_FUNC_ringbuf_query: 1037 return &bpf_ringbuf_query_proto; 1038 case BPF_FUNC_for_each_map_elem: 1039 return &bpf_for_each_map_elem_proto; 1040 default: 1041 break; 1042 } 1043 1044 if (!bpf_capable()) 1045 return NULL; 1046 1047 switch (func_id) { 1048 case BPF_FUNC_spin_lock: 1049 return &bpf_spin_lock_proto; 1050 case BPF_FUNC_spin_unlock: 1051 return &bpf_spin_unlock_proto; 1052 case BPF_FUNC_jiffies64: 1053 return &bpf_jiffies64_proto; 1054 case BPF_FUNC_per_cpu_ptr: 1055 return &bpf_per_cpu_ptr_proto; 1056 case BPF_FUNC_this_cpu_ptr: 1057 return &bpf_this_cpu_ptr_proto; 1058 default: 1059 break; 1060 } 1061 1062 if (!perfmon_capable()) 1063 return NULL; 1064 1065 switch (func_id) { 1066 case BPF_FUNC_trace_printk: 1067 return bpf_get_trace_printk_proto(); 1068 case BPF_FUNC_get_current_task: 1069 return &bpf_get_current_task_proto; 1070 case BPF_FUNC_probe_read_user: 1071 return &bpf_probe_read_user_proto; 1072 case BPF_FUNC_probe_read_kernel: 1073 return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? 1074 NULL : &bpf_probe_read_kernel_proto; 1075 case BPF_FUNC_probe_read_user_str: 1076 return &bpf_probe_read_user_str_proto; 1077 case BPF_FUNC_probe_read_kernel_str: 1078 return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? 1079 NULL : &bpf_probe_read_kernel_str_proto; 1080 case BPF_FUNC_snprintf_btf: 1081 return &bpf_snprintf_btf_proto; 1082 case BPF_FUNC_snprintf: 1083 return &bpf_snprintf_proto; 1084 default: 1085 return NULL; 1086 } 1087 } 1088