1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/rcupdate.h> 6 #include <linux/random.h> 7 #include <linux/smp.h> 8 #include <linux/topology.h> 9 #include <linux/ktime.h> 10 #include <linux/sched.h> 11 #include <linux/uidgid.h> 12 #include <linux/filter.h> 13 #include <linux/ctype.h> 14 #include <linux/jiffies.h> 15 #include <linux/pid_namespace.h> 16 #include <linux/proc_ns.h> 17 18 #include "../../lib/kstrtox.h" 19 20 /* If kernel subsystem is allowing eBPF programs to call this function, 21 * inside its own verifier_ops->get_func_proto() callback it should return 22 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments 23 * 24 * Different map implementations will rely on rcu in map methods 25 * lookup/update/delete, therefore eBPF programs must run under rcu lock 26 * if program is allowed to access maps, so check rcu_read_lock_held in 27 * all three functions. 28 */ 29 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) 30 { 31 WARN_ON_ONCE(!rcu_read_lock_held()); 32 return (unsigned long) map->ops->map_lookup_elem(map, key); 33 } 34 35 const struct bpf_func_proto bpf_map_lookup_elem_proto = { 36 .func = bpf_map_lookup_elem, 37 .gpl_only = false, 38 .pkt_access = true, 39 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 40 .arg1_type = ARG_CONST_MAP_PTR, 41 .arg2_type = ARG_PTR_TO_MAP_KEY, 42 }; 43 44 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, 45 void *, value, u64, flags) 46 { 47 WARN_ON_ONCE(!rcu_read_lock_held()); 48 return map->ops->map_update_elem(map, key, value, flags); 49 } 50 51 const struct bpf_func_proto bpf_map_update_elem_proto = { 52 .func = bpf_map_update_elem, 53 .gpl_only = false, 54 .pkt_access = true, 55 .ret_type = RET_INTEGER, 56 .arg1_type = ARG_CONST_MAP_PTR, 57 .arg2_type = ARG_PTR_TO_MAP_KEY, 58 .arg3_type = ARG_PTR_TO_MAP_VALUE, 59 .arg4_type = ARG_ANYTHING, 60 }; 61 62 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) 63 { 64 WARN_ON_ONCE(!rcu_read_lock_held()); 65 return map->ops->map_delete_elem(map, key); 66 } 67 68 const struct bpf_func_proto bpf_map_delete_elem_proto = { 69 .func = bpf_map_delete_elem, 70 .gpl_only = false, 71 .pkt_access = true, 72 .ret_type = RET_INTEGER, 73 .arg1_type = ARG_CONST_MAP_PTR, 74 .arg2_type = ARG_PTR_TO_MAP_KEY, 75 }; 76 77 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 78 { 79 return map->ops->map_push_elem(map, value, flags); 80 } 81 82 const struct bpf_func_proto bpf_map_push_elem_proto = { 83 .func = bpf_map_push_elem, 84 .gpl_only = false, 85 .pkt_access = true, 86 .ret_type = RET_INTEGER, 87 .arg1_type = ARG_CONST_MAP_PTR, 88 .arg2_type = ARG_PTR_TO_MAP_VALUE, 89 .arg3_type = ARG_ANYTHING, 90 }; 91 92 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 93 { 94 return map->ops->map_pop_elem(map, value); 95 } 96 97 const struct bpf_func_proto bpf_map_pop_elem_proto = { 98 .func = bpf_map_pop_elem, 99 .gpl_only = false, 100 .ret_type = RET_INTEGER, 101 .arg1_type = ARG_CONST_MAP_PTR, 102 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 103 }; 104 105 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 106 { 107 return map->ops->map_peek_elem(map, value); 108 } 109 110 const struct bpf_func_proto bpf_map_peek_elem_proto = { 111 .func = bpf_map_peek_elem, 112 .gpl_only = false, 113 .ret_type = RET_INTEGER, 114 .arg1_type = ARG_CONST_MAP_PTR, 115 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 116 }; 117 118 const struct bpf_func_proto bpf_get_prandom_u32_proto = { 119 .func = bpf_user_rnd_u32, 120 .gpl_only = false, 121 .ret_type = RET_INTEGER, 122 }; 123 124 BPF_CALL_0(bpf_get_smp_processor_id) 125 { 126 return smp_processor_id(); 127 } 128 129 const struct bpf_func_proto bpf_get_smp_processor_id_proto = { 130 .func = bpf_get_smp_processor_id, 131 .gpl_only = false, 132 .ret_type = RET_INTEGER, 133 }; 134 135 BPF_CALL_0(bpf_get_numa_node_id) 136 { 137 return numa_node_id(); 138 } 139 140 const struct bpf_func_proto bpf_get_numa_node_id_proto = { 141 .func = bpf_get_numa_node_id, 142 .gpl_only = false, 143 .ret_type = RET_INTEGER, 144 }; 145 146 BPF_CALL_0(bpf_ktime_get_ns) 147 { 148 /* NMI safe access to clock monotonic */ 149 return ktime_get_mono_fast_ns(); 150 } 151 152 const struct bpf_func_proto bpf_ktime_get_ns_proto = { 153 .func = bpf_ktime_get_ns, 154 .gpl_only = false, 155 .ret_type = RET_INTEGER, 156 }; 157 158 BPF_CALL_0(bpf_ktime_get_boot_ns) 159 { 160 /* NMI safe access to clock boottime */ 161 return ktime_get_boot_fast_ns(); 162 } 163 164 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { 165 .func = bpf_ktime_get_boot_ns, 166 .gpl_only = false, 167 .ret_type = RET_INTEGER, 168 }; 169 170 BPF_CALL_0(bpf_ktime_get_coarse_ns) 171 { 172 return ktime_get_coarse_ns(); 173 } 174 175 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { 176 .func = bpf_ktime_get_coarse_ns, 177 .gpl_only = false, 178 .ret_type = RET_INTEGER, 179 }; 180 181 BPF_CALL_0(bpf_get_current_pid_tgid) 182 { 183 struct task_struct *task = current; 184 185 if (unlikely(!task)) 186 return -EINVAL; 187 188 return (u64) task->tgid << 32 | task->pid; 189 } 190 191 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { 192 .func = bpf_get_current_pid_tgid, 193 .gpl_only = false, 194 .ret_type = RET_INTEGER, 195 }; 196 197 BPF_CALL_0(bpf_get_current_uid_gid) 198 { 199 struct task_struct *task = current; 200 kuid_t uid; 201 kgid_t gid; 202 203 if (unlikely(!task)) 204 return -EINVAL; 205 206 current_uid_gid(&uid, &gid); 207 return (u64) from_kgid(&init_user_ns, gid) << 32 | 208 from_kuid(&init_user_ns, uid); 209 } 210 211 const struct bpf_func_proto bpf_get_current_uid_gid_proto = { 212 .func = bpf_get_current_uid_gid, 213 .gpl_only = false, 214 .ret_type = RET_INTEGER, 215 }; 216 217 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) 218 { 219 struct task_struct *task = current; 220 221 if (unlikely(!task)) 222 goto err_clear; 223 224 strncpy(buf, task->comm, size); 225 226 /* Verifier guarantees that size > 0. For task->comm exceeding 227 * size, guarantee that buf is %NUL-terminated. Unconditionally 228 * done here to save the size test. 229 */ 230 buf[size - 1] = 0; 231 return 0; 232 err_clear: 233 memset(buf, 0, size); 234 return -EINVAL; 235 } 236 237 const struct bpf_func_proto bpf_get_current_comm_proto = { 238 .func = bpf_get_current_comm, 239 .gpl_only = false, 240 .ret_type = RET_INTEGER, 241 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 242 .arg2_type = ARG_CONST_SIZE, 243 }; 244 245 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) 246 247 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 248 { 249 arch_spinlock_t *l = (void *)lock; 250 union { 251 __u32 val; 252 arch_spinlock_t lock; 253 } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; 254 255 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 256 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 257 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 258 arch_spin_lock(l); 259 } 260 261 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 262 { 263 arch_spinlock_t *l = (void *)lock; 264 265 arch_spin_unlock(l); 266 } 267 268 #else 269 270 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 271 { 272 atomic_t *l = (void *)lock; 273 274 BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); 275 do { 276 atomic_cond_read_relaxed(l, !VAL); 277 } while (atomic_xchg(l, 1)); 278 } 279 280 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 281 { 282 atomic_t *l = (void *)lock; 283 284 atomic_set_release(l, 0); 285 } 286 287 #endif 288 289 static DEFINE_PER_CPU(unsigned long, irqsave_flags); 290 291 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) 292 { 293 unsigned long flags; 294 295 local_irq_save(flags); 296 __bpf_spin_lock(lock); 297 __this_cpu_write(irqsave_flags, flags); 298 return 0; 299 } 300 301 const struct bpf_func_proto bpf_spin_lock_proto = { 302 .func = bpf_spin_lock, 303 .gpl_only = false, 304 .ret_type = RET_VOID, 305 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 306 }; 307 308 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) 309 { 310 unsigned long flags; 311 312 flags = __this_cpu_read(irqsave_flags); 313 __bpf_spin_unlock(lock); 314 local_irq_restore(flags); 315 return 0; 316 } 317 318 const struct bpf_func_proto bpf_spin_unlock_proto = { 319 .func = bpf_spin_unlock, 320 .gpl_only = false, 321 .ret_type = RET_VOID, 322 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 323 }; 324 325 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, 326 bool lock_src) 327 { 328 struct bpf_spin_lock *lock; 329 330 if (lock_src) 331 lock = src + map->spin_lock_off; 332 else 333 lock = dst + map->spin_lock_off; 334 preempt_disable(); 335 ____bpf_spin_lock(lock); 336 copy_map_value(map, dst, src); 337 ____bpf_spin_unlock(lock); 338 preempt_enable(); 339 } 340 341 BPF_CALL_0(bpf_jiffies64) 342 { 343 return get_jiffies_64(); 344 } 345 346 const struct bpf_func_proto bpf_jiffies64_proto = { 347 .func = bpf_jiffies64, 348 .gpl_only = false, 349 .ret_type = RET_INTEGER, 350 }; 351 352 #ifdef CONFIG_CGROUPS 353 BPF_CALL_0(bpf_get_current_cgroup_id) 354 { 355 struct cgroup *cgrp = task_dfl_cgroup(current); 356 357 return cgroup_id(cgrp); 358 } 359 360 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { 361 .func = bpf_get_current_cgroup_id, 362 .gpl_only = false, 363 .ret_type = RET_INTEGER, 364 }; 365 366 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) 367 { 368 struct cgroup *cgrp = task_dfl_cgroup(current); 369 struct cgroup *ancestor; 370 371 ancestor = cgroup_ancestor(cgrp, ancestor_level); 372 if (!ancestor) 373 return 0; 374 return cgroup_id(ancestor); 375 } 376 377 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { 378 .func = bpf_get_current_ancestor_cgroup_id, 379 .gpl_only = false, 380 .ret_type = RET_INTEGER, 381 .arg1_type = ARG_ANYTHING, 382 }; 383 384 #ifdef CONFIG_CGROUP_BPF 385 DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 386 bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 387 388 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 389 { 390 /* flags argument is not used now, 391 * but provides an ability to extend the API. 392 * verifier checks that its value is correct. 393 */ 394 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 395 struct bpf_cgroup_storage *storage = NULL; 396 void *ptr; 397 int i; 398 399 for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 400 if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 401 continue; 402 403 storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); 404 break; 405 } 406 407 if (stype == BPF_CGROUP_STORAGE_SHARED) 408 ptr = &READ_ONCE(storage->buf)->data[0]; 409 else 410 ptr = this_cpu_ptr(storage->percpu_buf); 411 412 return (unsigned long)ptr; 413 } 414 415 const struct bpf_func_proto bpf_get_local_storage_proto = { 416 .func = bpf_get_local_storage, 417 .gpl_only = false, 418 .ret_type = RET_PTR_TO_MAP_VALUE, 419 .arg1_type = ARG_CONST_MAP_PTR, 420 .arg2_type = ARG_ANYTHING, 421 }; 422 #endif 423 424 #define BPF_STRTOX_BASE_MASK 0x1F 425 426 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, 427 unsigned long long *res, bool *is_negative) 428 { 429 unsigned int base = flags & BPF_STRTOX_BASE_MASK; 430 const char *cur_buf = buf; 431 size_t cur_len = buf_len; 432 unsigned int consumed; 433 size_t val_len; 434 char str[64]; 435 436 if (!buf || !buf_len || !res || !is_negative) 437 return -EINVAL; 438 439 if (base != 0 && base != 8 && base != 10 && base != 16) 440 return -EINVAL; 441 442 if (flags & ~BPF_STRTOX_BASE_MASK) 443 return -EINVAL; 444 445 while (cur_buf < buf + buf_len && isspace(*cur_buf)) 446 ++cur_buf; 447 448 *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); 449 if (*is_negative) 450 ++cur_buf; 451 452 consumed = cur_buf - buf; 453 cur_len -= consumed; 454 if (!cur_len) 455 return -EINVAL; 456 457 cur_len = min(cur_len, sizeof(str) - 1); 458 memcpy(str, cur_buf, cur_len); 459 str[cur_len] = '\0'; 460 cur_buf = str; 461 462 cur_buf = _parse_integer_fixup_radix(cur_buf, &base); 463 val_len = _parse_integer(cur_buf, base, res); 464 465 if (val_len & KSTRTOX_OVERFLOW) 466 return -ERANGE; 467 468 if (val_len == 0) 469 return -EINVAL; 470 471 cur_buf += val_len; 472 consumed += cur_buf - str; 473 474 return consumed; 475 } 476 477 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, 478 long long *res) 479 { 480 unsigned long long _res; 481 bool is_negative; 482 int err; 483 484 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 485 if (err < 0) 486 return err; 487 if (is_negative) { 488 if ((long long)-_res > 0) 489 return -ERANGE; 490 *res = -_res; 491 } else { 492 if ((long long)_res < 0) 493 return -ERANGE; 494 *res = _res; 495 } 496 return err; 497 } 498 499 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, 500 long *, res) 501 { 502 long long _res; 503 int err; 504 505 err = __bpf_strtoll(buf, buf_len, flags, &_res); 506 if (err < 0) 507 return err; 508 if (_res != (long)_res) 509 return -ERANGE; 510 *res = _res; 511 return err; 512 } 513 514 const struct bpf_func_proto bpf_strtol_proto = { 515 .func = bpf_strtol, 516 .gpl_only = false, 517 .ret_type = RET_INTEGER, 518 .arg1_type = ARG_PTR_TO_MEM, 519 .arg2_type = ARG_CONST_SIZE, 520 .arg3_type = ARG_ANYTHING, 521 .arg4_type = ARG_PTR_TO_LONG, 522 }; 523 524 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, 525 unsigned long *, res) 526 { 527 unsigned long long _res; 528 bool is_negative; 529 int err; 530 531 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 532 if (err < 0) 533 return err; 534 if (is_negative) 535 return -EINVAL; 536 if (_res != (unsigned long)_res) 537 return -ERANGE; 538 *res = _res; 539 return err; 540 } 541 542 const struct bpf_func_proto bpf_strtoul_proto = { 543 .func = bpf_strtoul, 544 .gpl_only = false, 545 .ret_type = RET_INTEGER, 546 .arg1_type = ARG_PTR_TO_MEM, 547 .arg2_type = ARG_CONST_SIZE, 548 .arg3_type = ARG_ANYTHING, 549 .arg4_type = ARG_PTR_TO_LONG, 550 }; 551 #endif 552 553 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, 554 struct bpf_pidns_info *, nsdata, u32, size) 555 { 556 struct task_struct *task = current; 557 struct pid_namespace *pidns; 558 int err = -EINVAL; 559 560 if (unlikely(size != sizeof(struct bpf_pidns_info))) 561 goto clear; 562 563 if (unlikely((u64)(dev_t)dev != dev)) 564 goto clear; 565 566 if (unlikely(!task)) 567 goto clear; 568 569 pidns = task_active_pid_ns(task); 570 if (unlikely(!pidns)) { 571 err = -ENOENT; 572 goto clear; 573 } 574 575 if (!ns_match(&pidns->ns, (dev_t)dev, ino)) 576 goto clear; 577 578 nsdata->pid = task_pid_nr_ns(task, pidns); 579 nsdata->tgid = task_tgid_nr_ns(task, pidns); 580 return 0; 581 clear: 582 memset((void *)nsdata, 0, (size_t) size); 583 return err; 584 } 585 586 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { 587 .func = bpf_get_ns_current_pid_tgid, 588 .gpl_only = false, 589 .ret_type = RET_INTEGER, 590 .arg1_type = ARG_ANYTHING, 591 .arg2_type = ARG_ANYTHING, 592 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 593 .arg4_type = ARG_CONST_SIZE, 594 }; 595 596 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 597 .func = bpf_get_raw_cpu_id, 598 .gpl_only = false, 599 .ret_type = RET_INTEGER, 600 }; 601 602 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, 603 u64, flags, void *, data, u64, size) 604 { 605 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 606 return -EINVAL; 607 608 return bpf_event_output(map, flags, data, size, NULL, 0, NULL); 609 } 610 611 const struct bpf_func_proto bpf_event_output_data_proto = { 612 .func = bpf_event_output_data, 613 .gpl_only = true, 614 .ret_type = RET_INTEGER, 615 .arg1_type = ARG_PTR_TO_CTX, 616 .arg2_type = ARG_CONST_MAP_PTR, 617 .arg3_type = ARG_ANYTHING, 618 .arg4_type = ARG_PTR_TO_MEM, 619 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 620 }; 621 622 BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, 623 const void __user *, user_ptr) 624 { 625 int ret = copy_from_user(dst, user_ptr, size); 626 627 if (unlikely(ret)) { 628 memset(dst, 0, size); 629 ret = -EFAULT; 630 } 631 632 return ret; 633 } 634 635 const struct bpf_func_proto bpf_copy_from_user_proto = { 636 .func = bpf_copy_from_user, 637 .gpl_only = false, 638 .ret_type = RET_INTEGER, 639 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 640 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 641 .arg3_type = ARG_ANYTHING, 642 }; 643 644 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) 645 { 646 if (cpu >= nr_cpu_ids) 647 return (unsigned long)NULL; 648 649 return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); 650 } 651 652 const struct bpf_func_proto bpf_per_cpu_ptr_proto = { 653 .func = bpf_per_cpu_ptr, 654 .gpl_only = false, 655 .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, 656 .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 657 .arg2_type = ARG_ANYTHING, 658 }; 659 660 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) 661 { 662 return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); 663 } 664 665 const struct bpf_func_proto bpf_this_cpu_ptr_proto = { 666 .func = bpf_this_cpu_ptr, 667 .gpl_only = false, 668 .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, 669 .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 670 }; 671 672 static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, 673 size_t bufsz) 674 { 675 void __user *user_ptr = (__force void __user *)unsafe_ptr; 676 677 buf[0] = 0; 678 679 switch (fmt_ptype) { 680 case 's': 681 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 682 if ((unsigned long)unsafe_ptr < TASK_SIZE) 683 return strncpy_from_user_nofault(buf, user_ptr, bufsz); 684 fallthrough; 685 #endif 686 case 'k': 687 return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); 688 case 'u': 689 return strncpy_from_user_nofault(buf, user_ptr, bufsz); 690 } 691 692 return -EINVAL; 693 } 694 695 /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary 696 * arguments representation. 697 */ 698 #define MAX_BPRINTF_BUF_LEN 512 699 700 /* Support executing three nested bprintf helper calls on a given CPU */ 701 #define MAX_BPRINTF_NEST_LEVEL 3 702 struct bpf_bprintf_buffers { 703 char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; 704 }; 705 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); 706 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); 707 708 static int try_get_fmt_tmp_buf(char **tmp_buf) 709 { 710 struct bpf_bprintf_buffers *bufs; 711 int nest_level; 712 713 preempt_disable(); 714 nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); 715 if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) { 716 this_cpu_dec(bpf_bprintf_nest_level); 717 preempt_enable(); 718 return -EBUSY; 719 } 720 bufs = this_cpu_ptr(&bpf_bprintf_bufs); 721 *tmp_buf = bufs->tmp_bufs[nest_level - 1]; 722 723 return 0; 724 } 725 726 void bpf_bprintf_cleanup(void) 727 { 728 if (this_cpu_read(bpf_bprintf_nest_level)) { 729 this_cpu_dec(bpf_bprintf_nest_level); 730 preempt_enable(); 731 } 732 } 733 734 /* 735 * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers 736 * 737 * Returns a negative value if fmt is an invalid format string or 0 otherwise. 738 * 739 * This can be used in two ways: 740 * - Format string verification only: when bin_args is NULL 741 * - Arguments preparation: in addition to the above verification, it writes in 742 * bin_args a binary representation of arguments usable by bstr_printf where 743 * pointers from BPF have been sanitized. 744 * 745 * In argument preparation mode, if 0 is returned, safe temporary buffers are 746 * allocated and bpf_bprintf_cleanup should be called to free them after use. 747 */ 748 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, 749 u32 **bin_args, u32 num_args) 750 { 751 char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; 752 size_t sizeof_cur_arg, sizeof_cur_ip; 753 int err, i, num_spec = 0; 754 u64 cur_arg; 755 char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; 756 757 fmt_end = strnchr(fmt, fmt_size, 0); 758 if (!fmt_end) 759 return -EINVAL; 760 fmt_size = fmt_end - fmt; 761 762 if (bin_args) { 763 if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) 764 return -EBUSY; 765 766 tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; 767 *bin_args = (u32 *)tmp_buf; 768 } 769 770 for (i = 0; i < fmt_size; i++) { 771 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { 772 err = -EINVAL; 773 goto out; 774 } 775 776 if (fmt[i] != '%') 777 continue; 778 779 if (fmt[i + 1] == '%') { 780 i++; 781 continue; 782 } 783 784 if (num_spec >= num_args) { 785 err = -EINVAL; 786 goto out; 787 } 788 789 /* The string is zero-terminated so if fmt[i] != 0, we can 790 * always access fmt[i + 1], in the worst case it will be a 0 791 */ 792 i++; 793 794 /* skip optional "[0 +-][num]" width formatting field */ 795 while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || 796 fmt[i] == ' ') 797 i++; 798 if (fmt[i] >= '1' && fmt[i] <= '9') { 799 i++; 800 while (fmt[i] >= '0' && fmt[i] <= '9') 801 i++; 802 } 803 804 if (fmt[i] == 'p') { 805 sizeof_cur_arg = sizeof(long); 806 807 if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && 808 fmt[i + 2] == 's') { 809 fmt_ptype = fmt[i + 1]; 810 i += 2; 811 goto fmt_str; 812 } 813 814 if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || 815 ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || 816 fmt[i + 1] == 'x' || fmt[i + 1] == 's' || 817 fmt[i + 1] == 'S') { 818 /* just kernel pointers */ 819 if (tmp_buf) 820 cur_arg = raw_args[num_spec]; 821 i++; 822 goto nocopy_fmt; 823 } 824 825 if (fmt[i + 1] == 'B') { 826 if (tmp_buf) { 827 err = snprintf(tmp_buf, 828 (tmp_buf_end - tmp_buf), 829 "%pB", 830 (void *)(long)raw_args[num_spec]); 831 tmp_buf += (err + 1); 832 } 833 834 i++; 835 num_spec++; 836 continue; 837 } 838 839 /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ 840 if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || 841 (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { 842 err = -EINVAL; 843 goto out; 844 } 845 846 i += 2; 847 if (!tmp_buf) 848 goto nocopy_fmt; 849 850 sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16; 851 if (tmp_buf_end - tmp_buf < sizeof_cur_ip) { 852 err = -ENOSPC; 853 goto out; 854 } 855 856 unsafe_ptr = (char *)(long)raw_args[num_spec]; 857 err = copy_from_kernel_nofault(cur_ip, unsafe_ptr, 858 sizeof_cur_ip); 859 if (err < 0) 860 memset(cur_ip, 0, sizeof_cur_ip); 861 862 /* hack: bstr_printf expects IP addresses to be 863 * pre-formatted as strings, ironically, the easiest way 864 * to do that is to call snprintf. 865 */ 866 ip_spec[2] = fmt[i - 1]; 867 ip_spec[3] = fmt[i]; 868 err = snprintf(tmp_buf, tmp_buf_end - tmp_buf, 869 ip_spec, &cur_ip); 870 871 tmp_buf += err + 1; 872 num_spec++; 873 874 continue; 875 } else if (fmt[i] == 's') { 876 fmt_ptype = fmt[i]; 877 fmt_str: 878 if (fmt[i + 1] != 0 && 879 !isspace(fmt[i + 1]) && 880 !ispunct(fmt[i + 1])) { 881 err = -EINVAL; 882 goto out; 883 } 884 885 if (!tmp_buf) 886 goto nocopy_fmt; 887 888 if (tmp_buf_end == tmp_buf) { 889 err = -ENOSPC; 890 goto out; 891 } 892 893 unsafe_ptr = (char *)(long)raw_args[num_spec]; 894 err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, 895 fmt_ptype, 896 tmp_buf_end - tmp_buf); 897 if (err < 0) { 898 tmp_buf[0] = '\0'; 899 err = 1; 900 } 901 902 tmp_buf += err; 903 num_spec++; 904 905 continue; 906 } 907 908 sizeof_cur_arg = sizeof(int); 909 910 if (fmt[i] == 'l') { 911 sizeof_cur_arg = sizeof(long); 912 i++; 913 } 914 if (fmt[i] == 'l') { 915 sizeof_cur_arg = sizeof(long long); 916 i++; 917 } 918 919 if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && 920 fmt[i] != 'x' && fmt[i] != 'X') { 921 err = -EINVAL; 922 goto out; 923 } 924 925 if (tmp_buf) 926 cur_arg = raw_args[num_spec]; 927 nocopy_fmt: 928 if (tmp_buf) { 929 tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32)); 930 if (tmp_buf_end - tmp_buf < sizeof_cur_arg) { 931 err = -ENOSPC; 932 goto out; 933 } 934 935 if (sizeof_cur_arg == 8) { 936 *(u32 *)tmp_buf = *(u32 *)&cur_arg; 937 *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1); 938 } else { 939 *(u32 *)tmp_buf = (u32)(long)cur_arg; 940 } 941 tmp_buf += sizeof_cur_arg; 942 } 943 num_spec++; 944 } 945 946 err = 0; 947 out: 948 if (err) 949 bpf_bprintf_cleanup(); 950 return err; 951 } 952 953 #define MAX_SNPRINTF_VARARGS 12 954 955 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, 956 const void *, data, u32, data_len) 957 { 958 int err, num_args; 959 u32 *bin_args; 960 961 if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 || 962 (data_len && !data)) 963 return -EINVAL; 964 num_args = data_len / 8; 965 966 /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we 967 * can safely give an unbounded size. 968 */ 969 err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); 970 if (err < 0) 971 return err; 972 973 err = bstr_printf(str, str_size, fmt, bin_args); 974 975 bpf_bprintf_cleanup(); 976 977 return err + 1; 978 } 979 980 const struct bpf_func_proto bpf_snprintf_proto = { 981 .func = bpf_snprintf, 982 .gpl_only = true, 983 .ret_type = RET_INTEGER, 984 .arg1_type = ARG_PTR_TO_MEM_OR_NULL, 985 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 986 .arg3_type = ARG_PTR_TO_CONST_STR, 987 .arg4_type = ARG_PTR_TO_MEM_OR_NULL, 988 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 989 }; 990 991 const struct bpf_func_proto bpf_get_current_task_proto __weak; 992 const struct bpf_func_proto bpf_probe_read_user_proto __weak; 993 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; 994 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; 995 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; 996 997 const struct bpf_func_proto * 998 bpf_base_func_proto(enum bpf_func_id func_id) 999 { 1000 switch (func_id) { 1001 case BPF_FUNC_map_lookup_elem: 1002 return &bpf_map_lookup_elem_proto; 1003 case BPF_FUNC_map_update_elem: 1004 return &bpf_map_update_elem_proto; 1005 case BPF_FUNC_map_delete_elem: 1006 return &bpf_map_delete_elem_proto; 1007 case BPF_FUNC_map_push_elem: 1008 return &bpf_map_push_elem_proto; 1009 case BPF_FUNC_map_pop_elem: 1010 return &bpf_map_pop_elem_proto; 1011 case BPF_FUNC_map_peek_elem: 1012 return &bpf_map_peek_elem_proto; 1013 case BPF_FUNC_get_prandom_u32: 1014 return &bpf_get_prandom_u32_proto; 1015 case BPF_FUNC_get_smp_processor_id: 1016 return &bpf_get_raw_smp_processor_id_proto; 1017 case BPF_FUNC_get_numa_node_id: 1018 return &bpf_get_numa_node_id_proto; 1019 case BPF_FUNC_tail_call: 1020 return &bpf_tail_call_proto; 1021 case BPF_FUNC_ktime_get_ns: 1022 return &bpf_ktime_get_ns_proto; 1023 case BPF_FUNC_ktime_get_boot_ns: 1024 return &bpf_ktime_get_boot_ns_proto; 1025 case BPF_FUNC_ktime_get_coarse_ns: 1026 return &bpf_ktime_get_coarse_ns_proto; 1027 case BPF_FUNC_ringbuf_output: 1028 return &bpf_ringbuf_output_proto; 1029 case BPF_FUNC_ringbuf_reserve: 1030 return &bpf_ringbuf_reserve_proto; 1031 case BPF_FUNC_ringbuf_submit: 1032 return &bpf_ringbuf_submit_proto; 1033 case BPF_FUNC_ringbuf_discard: 1034 return &bpf_ringbuf_discard_proto; 1035 case BPF_FUNC_ringbuf_query: 1036 return &bpf_ringbuf_query_proto; 1037 case BPF_FUNC_for_each_map_elem: 1038 return &bpf_for_each_map_elem_proto; 1039 default: 1040 break; 1041 } 1042 1043 if (!bpf_capable()) 1044 return NULL; 1045 1046 switch (func_id) { 1047 case BPF_FUNC_spin_lock: 1048 return &bpf_spin_lock_proto; 1049 case BPF_FUNC_spin_unlock: 1050 return &bpf_spin_unlock_proto; 1051 case BPF_FUNC_jiffies64: 1052 return &bpf_jiffies64_proto; 1053 case BPF_FUNC_per_cpu_ptr: 1054 return &bpf_per_cpu_ptr_proto; 1055 case BPF_FUNC_this_cpu_ptr: 1056 return &bpf_this_cpu_ptr_proto; 1057 default: 1058 break; 1059 } 1060 1061 if (!perfmon_capable()) 1062 return NULL; 1063 1064 switch (func_id) { 1065 case BPF_FUNC_trace_printk: 1066 return bpf_get_trace_printk_proto(); 1067 case BPF_FUNC_get_current_task: 1068 return &bpf_get_current_task_proto; 1069 case BPF_FUNC_probe_read_user: 1070 return &bpf_probe_read_user_proto; 1071 case BPF_FUNC_probe_read_kernel: 1072 return &bpf_probe_read_kernel_proto; 1073 case BPF_FUNC_probe_read_user_str: 1074 return &bpf_probe_read_user_str_proto; 1075 case BPF_FUNC_probe_read_kernel_str: 1076 return &bpf_probe_read_kernel_str_proto; 1077 case BPF_FUNC_snprintf_btf: 1078 return &bpf_snprintf_btf_proto; 1079 case BPF_FUNC_snprintf: 1080 return &bpf_snprintf_proto; 1081 default: 1082 return NULL; 1083 } 1084 } 1085