1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 #include <linux/version.h> 20 21 static LIST_HEAD(bpf_map_types); 22 23 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 24 { 25 struct bpf_map_type_list *tl; 26 struct bpf_map *map; 27 28 list_for_each_entry(tl, &bpf_map_types, list_node) { 29 if (tl->type == attr->map_type) { 30 map = tl->ops->map_alloc(attr); 31 if (IS_ERR(map)) 32 return map; 33 map->ops = tl->ops; 34 map->map_type = attr->map_type; 35 return map; 36 } 37 } 38 return ERR_PTR(-EINVAL); 39 } 40 41 /* boot time registration of different map implementations */ 42 void bpf_register_map_type(struct bpf_map_type_list *tl) 43 { 44 list_add(&tl->list_node, &bpf_map_types); 45 } 46 47 /* called from workqueue */ 48 static void bpf_map_free_deferred(struct work_struct *work) 49 { 50 struct bpf_map *map = container_of(work, struct bpf_map, work); 51 52 /* implementation dependent freeing */ 53 map->ops->map_free(map); 54 } 55 56 /* decrement map refcnt and schedule it for freeing via workqueue 57 * (unrelying map implementation ops->map_free() might sleep) 58 */ 59 void bpf_map_put(struct bpf_map *map) 60 { 61 if (atomic_dec_and_test(&map->refcnt)) { 62 INIT_WORK(&map->work, bpf_map_free_deferred); 63 schedule_work(&map->work); 64 } 65 } 66 67 static int bpf_map_release(struct inode *inode, struct file *filp) 68 { 69 struct bpf_map *map = filp->private_data; 70 71 bpf_map_put(map); 72 return 0; 73 } 74 75 static const struct file_operations bpf_map_fops = { 76 .release = bpf_map_release, 77 }; 78 79 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 80 #define CHECK_ATTR(CMD) \ 81 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 82 sizeof(attr->CMD##_LAST_FIELD), 0, \ 83 sizeof(*attr) - \ 84 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 85 sizeof(attr->CMD##_LAST_FIELD)) != NULL 86 87 #define BPF_MAP_CREATE_LAST_FIELD max_entries 88 /* called via syscall */ 89 static int map_create(union bpf_attr *attr) 90 { 91 struct bpf_map *map; 92 int err; 93 94 err = CHECK_ATTR(BPF_MAP_CREATE); 95 if (err) 96 return -EINVAL; 97 98 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 99 map = find_and_alloc_map(attr); 100 if (IS_ERR(map)) 101 return PTR_ERR(map); 102 103 atomic_set(&map->refcnt, 1); 104 105 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); 106 107 if (err < 0) 108 /* failed to allocate fd */ 109 goto free_map; 110 111 return err; 112 113 free_map: 114 map->ops->map_free(map); 115 return err; 116 } 117 118 /* if error is returned, fd is released. 119 * On success caller should complete fd access with matching fdput() 120 */ 121 struct bpf_map *bpf_map_get(struct fd f) 122 { 123 struct bpf_map *map; 124 125 if (!f.file) 126 return ERR_PTR(-EBADF); 127 128 if (f.file->f_op != &bpf_map_fops) { 129 fdput(f); 130 return ERR_PTR(-EINVAL); 131 } 132 133 map = f.file->private_data; 134 135 return map; 136 } 137 138 /* helper to convert user pointers passed inside __aligned_u64 fields */ 139 static void __user *u64_to_ptr(__u64 val) 140 { 141 return (void __user *) (unsigned long) val; 142 } 143 144 /* last field in 'union bpf_attr' used by this command */ 145 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 146 147 static int map_lookup_elem(union bpf_attr *attr) 148 { 149 void __user *ukey = u64_to_ptr(attr->key); 150 void __user *uvalue = u64_to_ptr(attr->value); 151 int ufd = attr->map_fd; 152 struct fd f = fdget(ufd); 153 struct bpf_map *map; 154 void *key, *value, *ptr; 155 int err; 156 157 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 158 return -EINVAL; 159 160 map = bpf_map_get(f); 161 if (IS_ERR(map)) 162 return PTR_ERR(map); 163 164 err = -ENOMEM; 165 key = kmalloc(map->key_size, GFP_USER); 166 if (!key) 167 goto err_put; 168 169 err = -EFAULT; 170 if (copy_from_user(key, ukey, map->key_size) != 0) 171 goto free_key; 172 173 err = -ENOMEM; 174 value = kmalloc(map->value_size, GFP_USER); 175 if (!value) 176 goto free_key; 177 178 rcu_read_lock(); 179 ptr = map->ops->map_lookup_elem(map, key); 180 if (ptr) 181 memcpy(value, ptr, map->value_size); 182 rcu_read_unlock(); 183 184 err = -ENOENT; 185 if (!ptr) 186 goto free_value; 187 188 err = -EFAULT; 189 if (copy_to_user(uvalue, value, map->value_size) != 0) 190 goto free_value; 191 192 err = 0; 193 194 free_value: 195 kfree(value); 196 free_key: 197 kfree(key); 198 err_put: 199 fdput(f); 200 return err; 201 } 202 203 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 204 205 static int map_update_elem(union bpf_attr *attr) 206 { 207 void __user *ukey = u64_to_ptr(attr->key); 208 void __user *uvalue = u64_to_ptr(attr->value); 209 int ufd = attr->map_fd; 210 struct fd f = fdget(ufd); 211 struct bpf_map *map; 212 void *key, *value; 213 int err; 214 215 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 216 return -EINVAL; 217 218 map = bpf_map_get(f); 219 if (IS_ERR(map)) 220 return PTR_ERR(map); 221 222 err = -ENOMEM; 223 key = kmalloc(map->key_size, GFP_USER); 224 if (!key) 225 goto err_put; 226 227 err = -EFAULT; 228 if (copy_from_user(key, ukey, map->key_size) != 0) 229 goto free_key; 230 231 err = -ENOMEM; 232 value = kmalloc(map->value_size, GFP_USER); 233 if (!value) 234 goto free_key; 235 236 err = -EFAULT; 237 if (copy_from_user(value, uvalue, map->value_size) != 0) 238 goto free_value; 239 240 /* eBPF program that use maps are running under rcu_read_lock(), 241 * therefore all map accessors rely on this fact, so do the same here 242 */ 243 rcu_read_lock(); 244 err = map->ops->map_update_elem(map, key, value, attr->flags); 245 rcu_read_unlock(); 246 247 free_value: 248 kfree(value); 249 free_key: 250 kfree(key); 251 err_put: 252 fdput(f); 253 return err; 254 } 255 256 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 257 258 static int map_delete_elem(union bpf_attr *attr) 259 { 260 void __user *ukey = u64_to_ptr(attr->key); 261 int ufd = attr->map_fd; 262 struct fd f = fdget(ufd); 263 struct bpf_map *map; 264 void *key; 265 int err; 266 267 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 268 return -EINVAL; 269 270 map = bpf_map_get(f); 271 if (IS_ERR(map)) 272 return PTR_ERR(map); 273 274 err = -ENOMEM; 275 key = kmalloc(map->key_size, GFP_USER); 276 if (!key) 277 goto err_put; 278 279 err = -EFAULT; 280 if (copy_from_user(key, ukey, map->key_size) != 0) 281 goto free_key; 282 283 rcu_read_lock(); 284 err = map->ops->map_delete_elem(map, key); 285 rcu_read_unlock(); 286 287 free_key: 288 kfree(key); 289 err_put: 290 fdput(f); 291 return err; 292 } 293 294 /* last field in 'union bpf_attr' used by this command */ 295 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 296 297 static int map_get_next_key(union bpf_attr *attr) 298 { 299 void __user *ukey = u64_to_ptr(attr->key); 300 void __user *unext_key = u64_to_ptr(attr->next_key); 301 int ufd = attr->map_fd; 302 struct fd f = fdget(ufd); 303 struct bpf_map *map; 304 void *key, *next_key; 305 int err; 306 307 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 308 return -EINVAL; 309 310 map = bpf_map_get(f); 311 if (IS_ERR(map)) 312 return PTR_ERR(map); 313 314 err = -ENOMEM; 315 key = kmalloc(map->key_size, GFP_USER); 316 if (!key) 317 goto err_put; 318 319 err = -EFAULT; 320 if (copy_from_user(key, ukey, map->key_size) != 0) 321 goto free_key; 322 323 err = -ENOMEM; 324 next_key = kmalloc(map->key_size, GFP_USER); 325 if (!next_key) 326 goto free_key; 327 328 rcu_read_lock(); 329 err = map->ops->map_get_next_key(map, key, next_key); 330 rcu_read_unlock(); 331 if (err) 332 goto free_next_key; 333 334 err = -EFAULT; 335 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 336 goto free_next_key; 337 338 err = 0; 339 340 free_next_key: 341 kfree(next_key); 342 free_key: 343 kfree(key); 344 err_put: 345 fdput(f); 346 return err; 347 } 348 349 static LIST_HEAD(bpf_prog_types); 350 351 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 352 { 353 struct bpf_prog_type_list *tl; 354 355 list_for_each_entry(tl, &bpf_prog_types, list_node) { 356 if (tl->type == type) { 357 prog->aux->ops = tl->ops; 358 prog->aux->prog_type = type; 359 return 0; 360 } 361 } 362 return -EINVAL; 363 } 364 365 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 366 { 367 list_add(&tl->list_node, &bpf_prog_types); 368 } 369 370 /* fixup insn->imm field of bpf_call instructions: 371 * if (insn->imm == BPF_FUNC_map_lookup_elem) 372 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 373 * else if (insn->imm == BPF_FUNC_map_update_elem) 374 * insn->imm = bpf_map_update_elem - __bpf_call_base; 375 * else ... 376 * 377 * this function is called after eBPF program passed verification 378 */ 379 static void fixup_bpf_calls(struct bpf_prog *prog) 380 { 381 const struct bpf_func_proto *fn; 382 int i; 383 384 for (i = 0; i < prog->len; i++) { 385 struct bpf_insn *insn = &prog->insnsi[i]; 386 387 if (insn->code == (BPF_JMP | BPF_CALL)) { 388 /* we reach here when program has bpf_call instructions 389 * and it passed bpf_check(), means that 390 * ops->get_func_proto must have been supplied, check it 391 */ 392 BUG_ON(!prog->aux->ops->get_func_proto); 393 394 fn = prog->aux->ops->get_func_proto(insn->imm); 395 /* all functions that have prototype and verifier allowed 396 * programs to call them, must be real in-kernel functions 397 */ 398 BUG_ON(!fn->func); 399 insn->imm = fn->func - __bpf_call_base; 400 } 401 } 402 } 403 404 /* drop refcnt on maps used by eBPF program and free auxilary data */ 405 static void free_used_maps(struct bpf_prog_aux *aux) 406 { 407 int i; 408 409 for (i = 0; i < aux->used_map_cnt; i++) 410 bpf_map_put(aux->used_maps[i]); 411 412 kfree(aux->used_maps); 413 } 414 415 void bpf_prog_put(struct bpf_prog *prog) 416 { 417 if (atomic_dec_and_test(&prog->aux->refcnt)) { 418 free_used_maps(prog->aux); 419 bpf_prog_free(prog); 420 } 421 } 422 423 static int bpf_prog_release(struct inode *inode, struct file *filp) 424 { 425 struct bpf_prog *prog = filp->private_data; 426 427 bpf_prog_put(prog); 428 return 0; 429 } 430 431 static const struct file_operations bpf_prog_fops = { 432 .release = bpf_prog_release, 433 }; 434 435 static struct bpf_prog *get_prog(struct fd f) 436 { 437 struct bpf_prog *prog; 438 439 if (!f.file) 440 return ERR_PTR(-EBADF); 441 442 if (f.file->f_op != &bpf_prog_fops) { 443 fdput(f); 444 return ERR_PTR(-EINVAL); 445 } 446 447 prog = f.file->private_data; 448 449 return prog; 450 } 451 452 /* called by sockets/tracing/seccomp before attaching program to an event 453 * pairs with bpf_prog_put() 454 */ 455 struct bpf_prog *bpf_prog_get(u32 ufd) 456 { 457 struct fd f = fdget(ufd); 458 struct bpf_prog *prog; 459 460 prog = get_prog(f); 461 462 if (IS_ERR(prog)) 463 return prog; 464 465 atomic_inc(&prog->aux->refcnt); 466 fdput(f); 467 return prog; 468 } 469 470 /* last field in 'union bpf_attr' used by this command */ 471 #define BPF_PROG_LOAD_LAST_FIELD kern_version 472 473 static int bpf_prog_load(union bpf_attr *attr) 474 { 475 enum bpf_prog_type type = attr->prog_type; 476 struct bpf_prog *prog; 477 int err; 478 char license[128]; 479 bool is_gpl; 480 481 if (CHECK_ATTR(BPF_PROG_LOAD)) 482 return -EINVAL; 483 484 /* copy eBPF program license from user space */ 485 if (strncpy_from_user(license, u64_to_ptr(attr->license), 486 sizeof(license) - 1) < 0) 487 return -EFAULT; 488 license[sizeof(license) - 1] = 0; 489 490 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 491 is_gpl = license_is_gpl_compatible(license); 492 493 if (attr->insn_cnt >= BPF_MAXINSNS) 494 return -EINVAL; 495 496 if (type == BPF_PROG_TYPE_KPROBE && 497 attr->kern_version != LINUX_VERSION_CODE) 498 return -EINVAL; 499 500 /* plain bpf_prog allocation */ 501 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 502 if (!prog) 503 return -ENOMEM; 504 505 prog->len = attr->insn_cnt; 506 507 err = -EFAULT; 508 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 509 prog->len * sizeof(struct bpf_insn)) != 0) 510 goto free_prog; 511 512 prog->orig_prog = NULL; 513 prog->jited = false; 514 515 atomic_set(&prog->aux->refcnt, 1); 516 prog->aux->is_gpl_compatible = is_gpl; 517 518 /* find program type: socket_filter vs tracing_filter */ 519 err = find_prog_type(type, prog); 520 if (err < 0) 521 goto free_prog; 522 523 /* run eBPF verifier */ 524 err = bpf_check(prog, attr); 525 526 if (err < 0) 527 goto free_used_maps; 528 529 /* fixup BPF_CALL->imm field */ 530 fixup_bpf_calls(prog); 531 532 /* eBPF program is ready to be JITed */ 533 bpf_prog_select_runtime(prog); 534 535 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); 536 537 if (err < 0) 538 /* failed to allocate fd */ 539 goto free_used_maps; 540 541 return err; 542 543 free_used_maps: 544 free_used_maps(prog->aux); 545 free_prog: 546 bpf_prog_free(prog); 547 return err; 548 } 549 550 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 551 { 552 union bpf_attr attr = {}; 553 int err; 554 555 /* the syscall is limited to root temporarily. This restriction will be 556 * lifted when security audit is clean. Note that eBPF+tracing must have 557 * this restriction, since it may pass kernel data to user space 558 */ 559 if (!capable(CAP_SYS_ADMIN)) 560 return -EPERM; 561 562 if (!access_ok(VERIFY_READ, uattr, 1)) 563 return -EFAULT; 564 565 if (size > PAGE_SIZE) /* silly large */ 566 return -E2BIG; 567 568 /* If we're handed a bigger struct than we know of, 569 * ensure all the unknown bits are 0 - i.e. new 570 * user-space does not rely on any kernel feature 571 * extensions we dont know about yet. 572 */ 573 if (size > sizeof(attr)) { 574 unsigned char __user *addr; 575 unsigned char __user *end; 576 unsigned char val; 577 578 addr = (void __user *)uattr + sizeof(attr); 579 end = (void __user *)uattr + size; 580 581 for (; addr < end; addr++) { 582 err = get_user(val, addr); 583 if (err) 584 return err; 585 if (val) 586 return -E2BIG; 587 } 588 size = sizeof(attr); 589 } 590 591 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 592 if (copy_from_user(&attr, uattr, size) != 0) 593 return -EFAULT; 594 595 switch (cmd) { 596 case BPF_MAP_CREATE: 597 err = map_create(&attr); 598 break; 599 case BPF_MAP_LOOKUP_ELEM: 600 err = map_lookup_elem(&attr); 601 break; 602 case BPF_MAP_UPDATE_ELEM: 603 err = map_update_elem(&attr); 604 break; 605 case BPF_MAP_DELETE_ELEM: 606 err = map_delete_elem(&attr); 607 break; 608 case BPF_MAP_GET_NEXT_KEY: 609 err = map_get_next_key(&attr); 610 break; 611 case BPF_PROG_LOAD: 612 err = bpf_prog_load(&attr); 613 break; 614 default: 615 err = -EINVAL; 616 break; 617 } 618 619 return err; 620 } 621