1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2021 Facebook */ 3 4 #define _GNU_SOURCE /* See feature_test_macros(7) */ 5 #include <unistd.h> 6 #include <sched.h> 7 #include <pthread.h> 8 #include <sys/syscall.h> /* For SYS_xxx definitions */ 9 #include <sys/types.h> 10 #include <sys/eventfd.h> 11 #include <sys/mman.h> 12 #include <test_progs.h> 13 #include <bpf/btf.h> 14 #include "task_local_storage_helpers.h" 15 #include "task_local_storage.skel.h" 16 #include "task_local_storage_exit_creds.skel.h" 17 #include "task_ls_recursion.skel.h" 18 #include "task_storage_nodeadlock.skel.h" 19 #include "uptr_test_common.h" 20 #include "task_ls_uptr.skel.h" 21 #include "uptr_update_failure.skel.h" 22 #include "uptr_failure.skel.h" 23 #include "uptr_map_failure.skel.h" 24 25 static void test_sys_enter_exit(void) 26 { 27 struct task_local_storage *skel; 28 pid_t pid = sys_gettid(); 29 int err; 30 31 skel = task_local_storage__open_and_load(); 32 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 33 return; 34 35 err = task_local_storage__attach(skel); 36 if (!ASSERT_OK(err, "skel_attach")) 37 goto out; 38 39 /* Set target_pid after attach so that syscalls made during 40 * attach are not counted. 41 */ 42 skel->bss->target_pid = pid; 43 44 sys_gettid(); 45 sys_gettid(); 46 47 skel->bss->target_pid = 0; 48 49 /* 2x gettid syscalls */ 50 ASSERT_EQ(skel->bss->update_err, 0, "update_err"); 51 ASSERT_EQ(skel->bss->enter_cnt, 2, "enter_cnt"); 52 ASSERT_EQ(skel->bss->exit_cnt, 2, "exit_cnt"); 53 ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); 54 out: 55 task_local_storage__destroy(skel); 56 } 57 58 static void test_exit_creds(void) 59 { 60 struct task_local_storage_exit_creds *skel; 61 int err, run_count, sync_rcu_calls = 0; 62 const int MAX_SYNC_RCU_CALLS = 1000; 63 64 skel = task_local_storage_exit_creds__open_and_load(); 65 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 66 return; 67 68 err = task_local_storage_exit_creds__attach(skel); 69 if (!ASSERT_OK(err, "skel_attach")) 70 goto out; 71 72 /* trigger at least one exit_creds() */ 73 if (CHECK_FAIL(system("ls > /dev/null"))) 74 goto out; 75 76 /* kern_sync_rcu is not enough on its own as the read section we want 77 * to wait for may start after we enter synchronize_rcu, so our call 78 * won't wait for the section to finish. Loop on the run counter 79 * as well to ensure the program has run. 80 */ 81 do { 82 kern_sync_rcu(); 83 run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST); 84 } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS); 85 86 ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS, 87 "sync_rcu count too high"); 88 ASSERT_NEQ(run_count, 0, "run_count"); 89 ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count"); 90 ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count"); 91 out: 92 task_local_storage_exit_creds__destroy(skel); 93 } 94 95 static void test_recursion(void) 96 { 97 int err, map_fd, prog_fd, task_fd; 98 struct task_ls_recursion *skel; 99 struct bpf_prog_info info; 100 __u32 info_len = sizeof(info); 101 long value; 102 103 task_fd = sys_pidfd_open(getpid(), 0); 104 if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open")) 105 return; 106 107 skel = task_ls_recursion__open_and_load(); 108 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 109 goto out; 110 111 err = task_ls_recursion__attach(skel); 112 if (!ASSERT_OK(err, "skel_attach")) 113 goto out; 114 115 /* trigger sys_enter, make sure it does not cause deadlock */ 116 skel->bss->test_pid = getpid(); 117 sys_gettid(); 118 skel->bss->test_pid = 0; 119 task_ls_recursion__detach(skel); 120 121 /* Refer to the comment in BPF_PROG(on_update) for 122 * the explanation on the value 200 and 1. 123 */ 124 map_fd = bpf_map__fd(skel->maps.map_a); 125 err = bpf_map_lookup_elem(map_fd, &task_fd, &value); 126 ASSERT_OK(err, "lookup map_a"); 127 ASSERT_EQ(value, 200, "map_a value"); 128 ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); 129 130 map_fd = bpf_map__fd(skel->maps.map_b); 131 err = bpf_map_lookup_elem(map_fd, &task_fd, &value); 132 ASSERT_OK(err, "lookup map_b"); 133 ASSERT_EQ(value, 1, "map_b value"); 134 135 prog_fd = bpf_program__fd(skel->progs.on_update); 136 memset(&info, 0, sizeof(info)); 137 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 138 ASSERT_OK(err, "get prog info"); 139 ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion"); 140 141 prog_fd = bpf_program__fd(skel->progs.on_enter); 142 memset(&info, 0, sizeof(info)); 143 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 144 ASSERT_OK(err, "get prog info"); 145 ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion"); 146 147 out: 148 close(task_fd); 149 task_ls_recursion__destroy(skel); 150 } 151 152 static bool stop; 153 154 static void waitall(const pthread_t *tids, int nr) 155 { 156 int i; 157 158 stop = true; 159 for (i = 0; i < nr; i++) 160 pthread_join(tids[i], NULL); 161 } 162 163 static void *sock_create_loop(void *arg) 164 { 165 struct task_storage_nodeadlock *skel = arg; 166 int fd; 167 168 while (!stop) { 169 fd = socket(AF_INET, SOCK_STREAM, 0); 170 close(fd); 171 if (skel->bss->nr_get_errs || skel->bss->nr_del_errs) 172 stop = true; 173 } 174 175 return NULL; 176 } 177 178 static void test_nodeadlock(void) 179 { 180 struct task_storage_nodeadlock *skel; 181 struct bpf_prog_info info = {}; 182 __u32 info_len = sizeof(info); 183 const int nr_threads = 32; 184 pthread_t tids[nr_threads]; 185 int i, prog_fd, err; 186 cpu_set_t old, new; 187 188 /* Pin all threads to one cpu to increase the chance of preemption 189 * in a sleepable bpf prog. 190 */ 191 CPU_ZERO(&new); 192 CPU_SET(0, &new); 193 err = sched_getaffinity(getpid(), sizeof(old), &old); 194 if (!ASSERT_OK(err, "getaffinity")) 195 return; 196 err = sched_setaffinity(getpid(), sizeof(new), &new); 197 if (!ASSERT_OK(err, "setaffinity")) 198 return; 199 200 skel = task_storage_nodeadlock__open_and_load(); 201 if (!ASSERT_OK_PTR(skel, "open_and_load")) 202 goto done; 203 204 /* Unnecessary recursion and deadlock detection are reproducible 205 * in the preemptible kernel. 206 */ 207 if (!skel->kconfig->CONFIG_PREEMPTION) { 208 test__skip(); 209 goto done; 210 } 211 212 err = task_storage_nodeadlock__attach(skel); 213 ASSERT_OK(err, "attach prog"); 214 215 for (i = 0; i < nr_threads; i++) { 216 err = pthread_create(&tids[i], NULL, sock_create_loop, skel); 217 if (err) { 218 /* Only assert once here to avoid excessive 219 * PASS printing during test failure. 220 */ 221 ASSERT_OK(err, "pthread_create"); 222 waitall(tids, i); 223 goto done; 224 } 225 } 226 227 /* With 32 threads, 1s is enough to reproduce the issue */ 228 sleep(1); 229 waitall(tids, nr_threads); 230 231 info_len = sizeof(info); 232 prog_fd = bpf_program__fd(skel->progs.socket_post_create); 233 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 234 ASSERT_OK(err, "get prog info"); 235 ASSERT_EQ(info.recursion_misses, 0, "prog recursion"); 236 237 ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy"); 238 ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); 239 240 done: 241 task_storage_nodeadlock__destroy(skel); 242 sched_setaffinity(getpid(), sizeof(old), &old); 243 } 244 245 static struct user_data udata __attribute__((aligned(16))) = { 246 .a = 1, 247 .b = 2, 248 }; 249 250 static struct user_data udata2 __attribute__((aligned(16))) = { 251 .a = 3, 252 .b = 4, 253 }; 254 255 static void check_udata2(int expected) 256 { 257 udata2.result = udata2.nested_result = 0; 258 usleep(1); 259 ASSERT_EQ(udata2.result, expected, "udata2.result"); 260 ASSERT_EQ(udata2.nested_result, expected, "udata2.nested_result"); 261 } 262 263 static void test_uptr_basic(void) 264 { 265 int map_fd, parent_task_fd, ev_fd; 266 struct value_type value = {}; 267 struct task_ls_uptr *skel; 268 pid_t child_pid, my_tid; 269 __u64 ev_dummy_data = 1; 270 int err; 271 272 my_tid = sys_gettid(); 273 parent_task_fd = sys_pidfd_open(my_tid, 0); 274 if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd")) 275 return; 276 277 ev_fd = eventfd(0, 0); 278 if (!ASSERT_OK_FD(ev_fd, "ev_fd")) { 279 close(parent_task_fd); 280 return; 281 } 282 283 skel = task_ls_uptr__open_and_load(); 284 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 285 goto out; 286 287 map_fd = bpf_map__fd(skel->maps.datamap); 288 value.udata = &udata; 289 value.nested.udata = &udata; 290 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); 291 if (!ASSERT_OK(err, "update_elem(udata)")) 292 goto out; 293 294 err = task_ls_uptr__attach(skel); 295 if (!ASSERT_OK(err, "skel_attach")) 296 goto out; 297 298 child_pid = fork(); 299 if (!ASSERT_NEQ(child_pid, -1, "fork")) 300 goto out; 301 302 /* Call syscall in the child process, but access the map value of 303 * the parent process in the BPF program to check if the user kptr 304 * is translated/mapped correctly. 305 */ 306 if (child_pid == 0) { 307 /* child */ 308 309 /* Overwrite the user_data in the child process to check if 310 * the BPF program accesses the user_data of the parent. 311 */ 312 udata.a = 0; 313 udata.b = 0; 314 315 /* Wait for the parent to set child_pid */ 316 read(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); 317 exit(0); 318 } 319 320 skel->bss->parent_pid = my_tid; 321 skel->bss->target_pid = child_pid; 322 323 write(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); 324 325 err = waitpid(child_pid, NULL, 0); 326 ASSERT_EQ(err, child_pid, "waitpid"); 327 ASSERT_EQ(udata.result, MAGIC_VALUE + udata.a + udata.b, "udata.result"); 328 ASSERT_EQ(udata.nested_result, MAGIC_VALUE + udata.a + udata.b, "udata.nested_result"); 329 330 skel->bss->target_pid = my_tid; 331 332 /* update_elem: uptr changes from udata1 to udata2 */ 333 value.udata = &udata2; 334 value.nested.udata = &udata2; 335 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); 336 if (!ASSERT_OK(err, "update_elem(udata2)")) 337 goto out; 338 check_udata2(MAGIC_VALUE + udata2.a + udata2.b); 339 340 /* update_elem: uptr changes from udata2 uptr to NULL */ 341 memset(&value, 0, sizeof(value)); 342 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); 343 if (!ASSERT_OK(err, "update_elem(udata2)")) 344 goto out; 345 check_udata2(0); 346 347 /* update_elem: uptr changes from NULL to udata2 */ 348 value.udata = &udata2; 349 value.nested.udata = &udata2; 350 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); 351 if (!ASSERT_OK(err, "update_elem(udata2)")) 352 goto out; 353 check_udata2(MAGIC_VALUE + udata2.a + udata2.b); 354 355 /* Check if user programs can access the value of user kptrs 356 * through bpf_map_lookup_elem(). Make sure the kernel value is not 357 * leaked. 358 */ 359 err = bpf_map_lookup_elem(map_fd, &parent_task_fd, &value); 360 if (!ASSERT_OK(err, "bpf_map_lookup_elem")) 361 goto out; 362 ASSERT_EQ(value.udata, NULL, "value.udata"); 363 ASSERT_EQ(value.nested.udata, NULL, "value.nested.udata"); 364 365 /* delete_elem */ 366 err = bpf_map_delete_elem(map_fd, &parent_task_fd); 367 ASSERT_OK(err, "delete_elem(udata2)"); 368 check_udata2(0); 369 370 /* update_elem: add uptr back to test map_free */ 371 value.udata = &udata2; 372 value.nested.udata = &udata2; 373 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); 374 ASSERT_OK(err, "update_elem(udata2)"); 375 376 out: 377 task_ls_uptr__destroy(skel); 378 close(ev_fd); 379 close(parent_task_fd); 380 } 381 382 static void test_uptr_across_pages(void) 383 { 384 int page_size = getpagesize(); 385 struct value_type value = {}; 386 struct task_ls_uptr *skel; 387 int err, task_fd, map_fd; 388 void *mem; 389 390 task_fd = sys_pidfd_open(getpid(), 0); 391 if (!ASSERT_OK_FD(task_fd, "task_fd")) 392 return; 393 394 mem = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, 395 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 396 if (!ASSERT_OK_PTR(mem, "mmap(page_size * 2)")) { 397 close(task_fd); 398 return; 399 } 400 401 skel = task_ls_uptr__open_and_load(); 402 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 403 goto out; 404 405 map_fd = bpf_map__fd(skel->maps.datamap); 406 value.udata = mem + page_size - offsetof(struct user_data, b); 407 err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); 408 if (!ASSERT_ERR(err, "update_elem(udata)")) 409 goto out; 410 ASSERT_EQ(errno, EOPNOTSUPP, "errno"); 411 412 value.udata = mem + page_size - sizeof(struct user_data); 413 err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); 414 ASSERT_OK(err, "update_elem(udata)"); 415 416 out: 417 task_ls_uptr__destroy(skel); 418 close(task_fd); 419 munmap(mem, page_size * 2); 420 } 421 422 static void test_uptr_update_failure(void) 423 { 424 struct value_lock_type value = {}; 425 struct uptr_update_failure *skel; 426 int err, task_fd, map_fd; 427 428 task_fd = sys_pidfd_open(getpid(), 0); 429 if (!ASSERT_OK_FD(task_fd, "task_fd")) 430 return; 431 432 skel = uptr_update_failure__open_and_load(); 433 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 434 goto out; 435 436 map_fd = bpf_map__fd(skel->maps.datamap); 437 438 value.udata = &udata; 439 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_F_LOCK); 440 if (!ASSERT_ERR(err, "update_elem(udata, BPF_F_LOCK)")) 441 goto out; 442 ASSERT_EQ(errno, EOPNOTSUPP, "errno"); 443 444 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_EXIST); 445 if (!ASSERT_ERR(err, "update_elem(udata, BPF_EXIST)")) 446 goto out; 447 ASSERT_EQ(errno, ENOENT, "errno"); 448 449 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); 450 if (!ASSERT_OK(err, "update_elem(udata, BPF_NOEXIST)")) 451 goto out; 452 453 value.udata = &udata2; 454 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); 455 if (!ASSERT_ERR(err, "update_elem(udata2, BPF_NOEXIST)")) 456 goto out; 457 ASSERT_EQ(errno, EEXIST, "errno"); 458 459 out: 460 uptr_update_failure__destroy(skel); 461 close(task_fd); 462 } 463 464 static void test_uptr_map_failure(const char *map_name, int expected_errno) 465 { 466 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 467 struct uptr_map_failure *skel; 468 struct bpf_map *map; 469 struct btf *btf; 470 int map_fd, err; 471 472 skel = uptr_map_failure__open(); 473 if (!ASSERT_OK_PTR(skel, "uptr_map_failure__open")) 474 return; 475 476 map = bpf_object__find_map_by_name(skel->obj, map_name); 477 btf = bpf_object__btf(skel->obj); 478 err = btf__load_into_kernel(btf); 479 if (!ASSERT_OK(err, "btf__load_into_kernel")) 480 goto done; 481 482 create_attr.map_flags = bpf_map__map_flags(map); 483 create_attr.btf_fd = btf__fd(btf); 484 create_attr.btf_key_type_id = bpf_map__btf_key_type_id(map); 485 create_attr.btf_value_type_id = bpf_map__btf_value_type_id(map); 486 map_fd = bpf_map_create(bpf_map__type(map), map_name, 487 bpf_map__key_size(map), bpf_map__value_size(map), 488 0, &create_attr); 489 if (ASSERT_ERR_FD(map_fd, "map_create")) 490 ASSERT_EQ(errno, expected_errno, "errno"); 491 else 492 close(map_fd); 493 494 done: 495 uptr_map_failure__destroy(skel); 496 } 497 498 void test_task_local_storage(void) 499 { 500 if (test__start_subtest("sys_enter_exit")) 501 test_sys_enter_exit(); 502 if (test__start_subtest("exit_creds")) 503 test_exit_creds(); 504 if (test__start_subtest("recursion")) 505 test_recursion(); 506 if (test__start_subtest("nodeadlock")) 507 test_nodeadlock(); 508 if (test__start_subtest("uptr_basic")) 509 test_uptr_basic(); 510 if (test__start_subtest("uptr_across_pages")) 511 test_uptr_across_pages(); 512 if (test__start_subtest("uptr_update_failure")) 513 test_uptr_update_failure(); 514 if (test__start_subtest("uptr_map_failure_e2big")) { 515 if (getpagesize() == PAGE_SIZE) 516 test_uptr_map_failure("large_uptr_map", E2BIG); 517 else 518 test__skip(); 519 } 520 if (test__start_subtest("uptr_map_failure_size0")) 521 test_uptr_map_failure("empty_uptr_map", EINVAL); 522 if (test__start_subtest("uptr_map_failure_kstruct")) 523 test_uptr_map_failure("kstruct_uptr_map", EINVAL); 524 RUN_TESTS(uptr_failure); 525 } 526