1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/rculist.h> 4 #include <linux/list.h> 5 #include <linux/hash.h> 6 #include <linux/types.h> 7 #include <linux/spinlock.h> 8 #include <linux/bpf.h> 9 #include <linux/btf_ids.h> 10 #include <linux/bpf_local_storage.h> 11 #include <net/sock.h> 12 #include <uapi/linux/sock_diag.h> 13 #include <uapi/linux/btf.h> 14 #include <linux/rcupdate.h> 15 #include <linux/rcupdate_trace.h> 16 #include <linux/rcupdate_wait.h> 17 18 #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) 19 20 static struct bpf_local_storage_map_bucket * 21 select_bucket(struct bpf_local_storage_map *smap, 22 struct bpf_local_storage_elem *selem) 23 { 24 return &smap->buckets[hash_ptr(selem, smap->bucket_log)]; 25 } 26 27 static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) 28 { 29 struct bpf_map *map = &smap->map; 30 31 if (!map->ops->map_local_storage_charge) 32 return 0; 33 34 return map->ops->map_local_storage_charge(smap, owner, size); 35 } 36 37 static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner, 38 u32 size) 39 { 40 struct bpf_map *map = &smap->map; 41 42 if (map->ops->map_local_storage_uncharge) 43 map->ops->map_local_storage_uncharge(smap, owner, size); 44 } 45 46 static struct bpf_local_storage __rcu ** 47 owner_storage(struct bpf_local_storage_map *smap, void *owner) 48 { 49 struct bpf_map *map = &smap->map; 50 51 return map->ops->map_owner_storage_ptr(owner); 52 } 53 54 static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem) 55 { 56 return !hlist_unhashed_lockless(&selem->snode); 57 } 58 59 static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) 60 { 61 return !hlist_unhashed(&selem->snode); 62 } 63 64 static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem) 65 { 66 return !hlist_unhashed_lockless(&selem->map_node); 67 } 68 69 static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) 70 { 71 return !hlist_unhashed(&selem->map_node); 72 } 73 74 struct bpf_local_storage_elem * 75 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, 76 void *value, bool swap_uptrs, gfp_t gfp_flags) 77 { 78 struct bpf_local_storage_elem *selem; 79 80 if (mem_charge(smap, owner, smap->elem_size)) 81 return NULL; 82 83 if (smap->use_kmalloc_nolock) { 84 selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size, 85 __GFP_ZERO, NUMA_NO_NODE); 86 } else { 87 selem = bpf_map_kzalloc(&smap->map, smap->elem_size, 88 gfp_flags | __GFP_NOWARN); 89 } 90 91 if (selem) { 92 RCU_INIT_POINTER(SDATA(selem)->smap, smap); 93 94 if (value) { 95 /* No need to call check_and_init_map_value as memory is zero init */ 96 copy_map_value(&smap->map, SDATA(selem)->data, value); 97 if (swap_uptrs) 98 bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value); 99 } 100 return selem; 101 } 102 103 mem_uncharge(smap, owner, smap->elem_size); 104 105 return NULL; 106 } 107 108 /* rcu tasks trace callback for use_kmalloc_nolock == false */ 109 static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) 110 { 111 struct bpf_local_storage *local_storage; 112 113 /* If RCU Tasks Trace grace period implies RCU grace period, do 114 * kfree(), else do kfree_rcu(). 115 */ 116 local_storage = container_of(rcu, struct bpf_local_storage, rcu); 117 if (rcu_trace_implies_rcu_gp()) 118 kfree(local_storage); 119 else 120 kfree_rcu(local_storage, rcu); 121 } 122 123 /* Handle use_kmalloc_nolock == false */ 124 static void __bpf_local_storage_free(struct bpf_local_storage *local_storage, 125 bool vanilla_rcu) 126 { 127 if (vanilla_rcu) 128 kfree_rcu(local_storage, rcu); 129 else 130 call_rcu_tasks_trace(&local_storage->rcu, 131 __bpf_local_storage_free_trace_rcu); 132 } 133 134 static void bpf_local_storage_free_rcu(struct rcu_head *rcu) 135 { 136 struct bpf_local_storage *local_storage; 137 138 local_storage = container_of(rcu, struct bpf_local_storage, rcu); 139 kfree_nolock(local_storage); 140 } 141 142 static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) 143 { 144 if (rcu_trace_implies_rcu_gp()) 145 bpf_local_storage_free_rcu(rcu); 146 else 147 call_rcu(rcu, bpf_local_storage_free_rcu); 148 } 149 150 static void bpf_local_storage_free(struct bpf_local_storage *local_storage, 151 bool reuse_now) 152 { 153 if (!local_storage) 154 return; 155 156 if (!local_storage->use_kmalloc_nolock) { 157 __bpf_local_storage_free(local_storage, reuse_now); 158 return; 159 } 160 161 if (reuse_now) { 162 call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); 163 return; 164 } 165 166 call_rcu_tasks_trace(&local_storage->rcu, 167 bpf_local_storage_free_trace_rcu); 168 } 169 170 /* rcu tasks trace callback for use_kmalloc_nolock == false */ 171 static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) 172 { 173 struct bpf_local_storage_elem *selem; 174 175 selem = container_of(rcu, struct bpf_local_storage_elem, rcu); 176 if (rcu_trace_implies_rcu_gp()) 177 kfree(selem); 178 else 179 kfree_rcu(selem, rcu); 180 } 181 182 /* Handle use_kmalloc_nolock == false */ 183 static void __bpf_selem_free(struct bpf_local_storage_elem *selem, 184 bool vanilla_rcu) 185 { 186 if (vanilla_rcu) 187 kfree_rcu(selem, rcu); 188 else 189 call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); 190 } 191 192 static void bpf_selem_free_rcu(struct rcu_head *rcu) 193 { 194 struct bpf_local_storage_elem *selem; 195 struct bpf_local_storage_map *smap; 196 197 selem = container_of(rcu, struct bpf_local_storage_elem, rcu); 198 /* The bpf_local_storage_map_free will wait for rcu_barrier */ 199 smap = rcu_dereference_check(SDATA(selem)->smap, 1); 200 201 migrate_disable(); 202 bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); 203 migrate_enable(); 204 kfree_nolock(selem); 205 } 206 207 static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) 208 { 209 if (rcu_trace_implies_rcu_gp()) 210 bpf_selem_free_rcu(rcu); 211 else 212 call_rcu(rcu, bpf_selem_free_rcu); 213 } 214 215 void bpf_selem_free(struct bpf_local_storage_elem *selem, 216 bool reuse_now) 217 { 218 struct bpf_local_storage_map *smap; 219 220 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 221 222 if (!smap->use_kmalloc_nolock) { 223 /* 224 * No uptr will be unpin even when reuse_now == false since uptr 225 * is only supported in task local storage, where 226 * smap->use_kmalloc_nolock == true. 227 */ 228 bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); 229 __bpf_selem_free(selem, reuse_now); 230 return; 231 } 232 233 if (reuse_now) { 234 /* 235 * While it is okay to call bpf_obj_free_fields() that unpins uptr when 236 * reuse_now == true, keep it in bpf_selem_free_rcu() for simplicity. 237 */ 238 call_rcu(&selem->rcu, bpf_selem_free_rcu); 239 return; 240 } 241 242 call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); 243 } 244 245 static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now) 246 { 247 struct bpf_local_storage_elem *selem; 248 struct hlist_node *n; 249 250 /* The "_safe" iteration is needed. 251 * The loop is not removing the selem from the list 252 * but bpf_selem_free will use the selem->rcu_head 253 * which is union-ized with the selem->free_node. 254 */ 255 hlist_for_each_entry_safe(selem, n, list, free_node) 256 bpf_selem_free(selem, reuse_now); 257 } 258 259 /* local_storage->lock must be held and selem->local_storage == local_storage. 260 * The caller must ensure selem->smap is still valid to be 261 * dereferenced for its smap->elem_size and smap->cache_idx. 262 */ 263 static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, 264 struct bpf_local_storage_elem *selem, 265 struct hlist_head *free_selem_list) 266 { 267 struct bpf_local_storage_map *smap; 268 bool free_local_storage; 269 void *owner; 270 271 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 272 owner = local_storage->owner; 273 274 /* All uncharging on the owner must be done first. 275 * The owner may be freed once the last selem is unlinked 276 * from local_storage. 277 */ 278 mem_uncharge(smap, owner, smap->elem_size); 279 280 free_local_storage = hlist_is_singular_node(&selem->snode, 281 &local_storage->list); 282 if (free_local_storage) { 283 mem_uncharge(smap, owner, sizeof(struct bpf_local_storage)); 284 local_storage->owner = NULL; 285 286 /* After this RCU_INIT, owner may be freed and cannot be used */ 287 RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); 288 289 /* local_storage is not freed now. local_storage->lock is 290 * still held and raw_spin_unlock_bh(&local_storage->lock) 291 * will be done by the caller. 292 * 293 * Although the unlock will be done under 294 * rcu_read_lock(), it is more intuitive to 295 * read if the freeing of the storage is done 296 * after the raw_spin_unlock_bh(&local_storage->lock). 297 * 298 * Hence, a "bool free_local_storage" is returned 299 * to the caller which then calls then frees the storage after 300 * all the RCU grace periods have expired. 301 */ 302 } 303 hlist_del_init_rcu(&selem->snode); 304 if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == 305 SDATA(selem)) 306 RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); 307 308 hlist_add_head(&selem->free_node, free_selem_list); 309 310 if (rcu_access_pointer(local_storage->smap) == smap) 311 RCU_INIT_POINTER(local_storage->smap, NULL); 312 313 return free_local_storage; 314 } 315 316 static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, 317 bool reuse_now) 318 { 319 struct bpf_local_storage *local_storage; 320 bool free_local_storage = false; 321 HLIST_HEAD(selem_free_list); 322 unsigned long flags; 323 324 if (unlikely(!selem_linked_to_storage_lockless(selem))) 325 /* selem has already been unlinked from sk */ 326 return; 327 328 local_storage = rcu_dereference_check(selem->local_storage, 329 bpf_rcu_lock_held()); 330 331 raw_spin_lock_irqsave(&local_storage->lock, flags); 332 if (likely(selem_linked_to_storage(selem))) 333 free_local_storage = bpf_selem_unlink_storage_nolock( 334 local_storage, selem, &selem_free_list); 335 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 336 337 bpf_selem_free_list(&selem_free_list, reuse_now); 338 339 if (free_local_storage) 340 bpf_local_storage_free(local_storage, reuse_now); 341 } 342 343 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, 344 struct bpf_local_storage_elem *selem) 345 { 346 RCU_INIT_POINTER(selem->local_storage, local_storage); 347 hlist_add_head_rcu(&selem->snode, &local_storage->list); 348 } 349 350 static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) 351 { 352 struct bpf_local_storage_map *smap; 353 struct bpf_local_storage_map_bucket *b; 354 unsigned long flags; 355 356 if (unlikely(!selem_linked_to_map_lockless(selem))) 357 /* selem has already be unlinked from smap */ 358 return; 359 360 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 361 b = select_bucket(smap, selem); 362 raw_spin_lock_irqsave(&b->lock, flags); 363 if (likely(selem_linked_to_map(selem))) 364 hlist_del_init_rcu(&selem->map_node); 365 raw_spin_unlock_irqrestore(&b->lock, flags); 366 } 367 368 void bpf_selem_link_map(struct bpf_local_storage_map *smap, 369 struct bpf_local_storage_elem *selem) 370 { 371 struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); 372 unsigned long flags; 373 374 raw_spin_lock_irqsave(&b->lock, flags); 375 hlist_add_head_rcu(&selem->map_node, &b->list); 376 raw_spin_unlock_irqrestore(&b->lock, flags); 377 } 378 379 void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) 380 { 381 /* Always unlink from map before unlinking from local_storage 382 * because selem will be freed after successfully unlinked from 383 * the local_storage. 384 */ 385 bpf_selem_unlink_map(selem); 386 bpf_selem_unlink_storage(selem, reuse_now); 387 } 388 389 void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, 390 struct bpf_local_storage_map *smap, 391 struct bpf_local_storage_elem *selem) 392 { 393 unsigned long flags; 394 395 /* spinlock is needed to avoid racing with the 396 * parallel delete. Otherwise, publishing an already 397 * deleted sdata to the cache will become a use-after-free 398 * problem in the next bpf_local_storage_lookup(). 399 */ 400 raw_spin_lock_irqsave(&local_storage->lock, flags); 401 if (selem_linked_to_storage(selem)) 402 rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem)); 403 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 404 } 405 406 static int check_flags(const struct bpf_local_storage_data *old_sdata, 407 u64 map_flags) 408 { 409 if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 410 /* elem already exists */ 411 return -EEXIST; 412 413 if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 414 /* elem doesn't exist, cannot update it */ 415 return -ENOENT; 416 417 return 0; 418 } 419 420 int bpf_local_storage_alloc(void *owner, 421 struct bpf_local_storage_map *smap, 422 struct bpf_local_storage_elem *first_selem, 423 gfp_t gfp_flags) 424 { 425 struct bpf_local_storage *prev_storage, *storage; 426 struct bpf_local_storage **owner_storage_ptr; 427 int err; 428 429 err = mem_charge(smap, owner, sizeof(*storage)); 430 if (err) 431 return err; 432 433 if (smap->use_kmalloc_nolock) 434 storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage), 435 __GFP_ZERO, NUMA_NO_NODE); 436 else 437 storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), 438 gfp_flags | __GFP_NOWARN); 439 if (!storage) { 440 err = -ENOMEM; 441 goto uncharge; 442 } 443 444 RCU_INIT_POINTER(storage->smap, smap); 445 INIT_HLIST_HEAD(&storage->list); 446 raw_spin_lock_init(&storage->lock); 447 storage->owner = owner; 448 storage->use_kmalloc_nolock = smap->use_kmalloc_nolock; 449 450 bpf_selem_link_storage_nolock(storage, first_selem); 451 bpf_selem_link_map(smap, first_selem); 452 453 owner_storage_ptr = 454 (struct bpf_local_storage **)owner_storage(smap, owner); 455 /* Publish storage to the owner. 456 * Instead of using any lock of the kernel object (i.e. owner), 457 * cmpxchg will work with any kernel object regardless what 458 * the running context is, bh, irq...etc. 459 * 460 * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage) 461 * is protected by the storage->lock. Hence, when freeing 462 * the owner->storage, the storage->lock must be held before 463 * setting owner->storage ptr to NULL. 464 */ 465 prev_storage = cmpxchg(owner_storage_ptr, NULL, storage); 466 if (unlikely(prev_storage)) { 467 bpf_selem_unlink_map(first_selem); 468 err = -EAGAIN; 469 goto uncharge; 470 } 471 472 return 0; 473 474 uncharge: 475 bpf_local_storage_free(storage, true); 476 mem_uncharge(smap, owner, sizeof(*storage)); 477 return err; 478 } 479 480 /* sk cannot be going away because it is linking new elem 481 * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0). 482 * Otherwise, it will become a leak (and other memory issues 483 * during map destruction). 484 */ 485 struct bpf_local_storage_data * 486 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, 487 void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags) 488 { 489 struct bpf_local_storage_data *old_sdata = NULL; 490 struct bpf_local_storage_elem *alloc_selem, *selem = NULL; 491 struct bpf_local_storage *local_storage; 492 HLIST_HEAD(old_selem_free_list); 493 unsigned long flags; 494 int err; 495 496 /* BPF_EXIST and BPF_NOEXIST cannot be both set */ 497 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) || 498 /* BPF_F_LOCK can only be used in a value with spin_lock */ 499 unlikely((map_flags & BPF_F_LOCK) && 500 !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))) 501 return ERR_PTR(-EINVAL); 502 503 if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST) 504 return ERR_PTR(-EINVAL); 505 506 local_storage = rcu_dereference_check(*owner_storage(smap, owner), 507 bpf_rcu_lock_held()); 508 if (!local_storage || hlist_empty(&local_storage->list)) { 509 /* Very first elem for the owner */ 510 err = check_flags(NULL, map_flags); 511 if (err) 512 return ERR_PTR(err); 513 514 selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags); 515 if (!selem) 516 return ERR_PTR(-ENOMEM); 517 518 err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags); 519 if (err) { 520 bpf_selem_free(selem, true); 521 mem_uncharge(smap, owner, smap->elem_size); 522 return ERR_PTR(err); 523 } 524 525 return SDATA(selem); 526 } 527 528 if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) { 529 /* Hoping to find an old_sdata to do inline update 530 * such that it can avoid taking the local_storage->lock 531 * and changing the lists. 532 */ 533 old_sdata = 534 bpf_local_storage_lookup(local_storage, smap, false); 535 err = check_flags(old_sdata, map_flags); 536 if (err) 537 return ERR_PTR(err); 538 if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) { 539 copy_map_value_locked(&smap->map, old_sdata->data, 540 value, false); 541 return old_sdata; 542 } 543 } 544 545 /* A lookup has just been done before and concluded a new selem is 546 * needed. The chance of an unnecessary alloc is unlikely. 547 */ 548 alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags); 549 if (!alloc_selem) 550 return ERR_PTR(-ENOMEM); 551 552 raw_spin_lock_irqsave(&local_storage->lock, flags); 553 554 /* Recheck local_storage->list under local_storage->lock */ 555 if (unlikely(hlist_empty(&local_storage->list))) { 556 /* A parallel del is happening and local_storage is going 557 * away. It has just been checked before, so very 558 * unlikely. Return instead of retry to keep things 559 * simple. 560 */ 561 err = -EAGAIN; 562 goto unlock; 563 } 564 565 old_sdata = bpf_local_storage_lookup(local_storage, smap, false); 566 err = check_flags(old_sdata, map_flags); 567 if (err) 568 goto unlock; 569 570 if (old_sdata && (map_flags & BPF_F_LOCK)) { 571 copy_map_value_locked(&smap->map, old_sdata->data, value, 572 false); 573 selem = SELEM(old_sdata); 574 goto unlock; 575 } 576 577 alloc_selem = NULL; 578 /* First, link the new selem to the map */ 579 bpf_selem_link_map(smap, selem); 580 581 /* Second, link (and publish) the new selem to local_storage */ 582 bpf_selem_link_storage_nolock(local_storage, selem); 583 584 /* Third, remove old selem, SELEM(old_sdata) */ 585 if (old_sdata) { 586 bpf_selem_unlink_map(SELEM(old_sdata)); 587 bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), 588 &old_selem_free_list); 589 } 590 591 unlock: 592 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 593 bpf_selem_free_list(&old_selem_free_list, false); 594 if (alloc_selem) { 595 mem_uncharge(smap, owner, smap->elem_size); 596 bpf_selem_free(alloc_selem, true); 597 } 598 return err ? ERR_PTR(err) : SDATA(selem); 599 } 600 601 static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) 602 { 603 u64 min_usage = U64_MAX; 604 u16 i, res = 0; 605 606 spin_lock(&cache->idx_lock); 607 608 for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) { 609 if (cache->idx_usage_counts[i] < min_usage) { 610 min_usage = cache->idx_usage_counts[i]; 611 res = i; 612 613 /* Found a free cache_idx */ 614 if (!min_usage) 615 break; 616 } 617 } 618 cache->idx_usage_counts[res]++; 619 620 spin_unlock(&cache->idx_lock); 621 622 return res; 623 } 624 625 static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, 626 u16 idx) 627 { 628 spin_lock(&cache->idx_lock); 629 cache->idx_usage_counts[idx]--; 630 spin_unlock(&cache->idx_lock); 631 } 632 633 int bpf_local_storage_map_alloc_check(union bpf_attr *attr) 634 { 635 if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK || 636 !(attr->map_flags & BPF_F_NO_PREALLOC) || 637 attr->max_entries || 638 attr->key_size != sizeof(int) || !attr->value_size || 639 /* Enforce BTF for userspace sk dumping */ 640 !attr->btf_key_type_id || !attr->btf_value_type_id) 641 return -EINVAL; 642 643 if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) 644 return -E2BIG; 645 646 return 0; 647 } 648 649 int bpf_local_storage_map_check_btf(const struct bpf_map *map, 650 const struct btf *btf, 651 const struct btf_type *key_type, 652 const struct btf_type *value_type) 653 { 654 if (!btf_type_is_i32(key_type)) 655 return -EINVAL; 656 657 return 0; 658 } 659 660 void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) 661 { 662 struct bpf_local_storage_elem *selem; 663 bool free_storage = false; 664 HLIST_HEAD(free_selem_list); 665 struct hlist_node *n; 666 unsigned long flags; 667 668 /* Neither the bpf_prog nor the bpf_map's syscall 669 * could be modifying the local_storage->list now. 670 * Thus, no elem can be added to or deleted from the 671 * local_storage->list by the bpf_prog or by the bpf_map's syscall. 672 * 673 * It is racing with bpf_local_storage_map_free() alone 674 * when unlinking elem from the local_storage->list and 675 * the map's bucket->list. 676 */ 677 raw_spin_lock_irqsave(&local_storage->lock, flags); 678 hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { 679 /* Always unlink from map before unlinking from 680 * local_storage. 681 */ 682 bpf_selem_unlink_map(selem); 683 /* If local_storage list has only one element, the 684 * bpf_selem_unlink_storage_nolock() will return true. 685 * Otherwise, it will return false. The current loop iteration 686 * intends to remove all local storage. So the last iteration 687 * of the loop will set the free_cgroup_storage to true. 688 */ 689 free_storage = bpf_selem_unlink_storage_nolock( 690 local_storage, selem, &free_selem_list); 691 } 692 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 693 694 bpf_selem_free_list(&free_selem_list, true); 695 696 if (free_storage) 697 bpf_local_storage_free(local_storage, true); 698 } 699 700 u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) 701 { 702 struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map; 703 u64 usage = sizeof(*smap); 704 705 /* The dynamically callocated selems are not counted currently. */ 706 usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log); 707 return usage; 708 } 709 710 struct bpf_map * 711 bpf_local_storage_map_alloc(union bpf_attr *attr, 712 struct bpf_local_storage_cache *cache, 713 bool use_kmalloc_nolock) 714 { 715 struct bpf_local_storage_map *smap; 716 unsigned int i; 717 u32 nbuckets; 718 int err; 719 720 smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); 721 if (!smap) 722 return ERR_PTR(-ENOMEM); 723 bpf_map_init_from_attr(&smap->map, attr); 724 725 nbuckets = roundup_pow_of_two(num_possible_cpus()); 726 /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ 727 nbuckets = max_t(u32, 2, nbuckets); 728 smap->bucket_log = ilog2(nbuckets); 729 730 smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets, 731 sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN); 732 if (!smap->buckets) { 733 err = -ENOMEM; 734 goto free_smap; 735 } 736 737 for (i = 0; i < nbuckets; i++) { 738 INIT_HLIST_HEAD(&smap->buckets[i].list); 739 raw_spin_lock_init(&smap->buckets[i].lock); 740 } 741 742 smap->elem_size = offsetof(struct bpf_local_storage_elem, 743 sdata.data[attr->value_size]); 744 745 /* In PREEMPT_RT, kmalloc(GFP_ATOMIC) is still not safe in non 746 * preemptible context. Thus, enforce all storages to use 747 * kmalloc_nolock() when CONFIG_PREEMPT_RT is enabled. 748 */ 749 smap->use_kmalloc_nolock = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : use_kmalloc_nolock; 750 751 smap->cache_idx = bpf_local_storage_cache_idx_get(cache); 752 return &smap->map; 753 754 free_smap: 755 kvfree(smap->buckets); 756 bpf_map_area_free(smap); 757 return ERR_PTR(err); 758 } 759 760 void bpf_local_storage_map_free(struct bpf_map *map, 761 struct bpf_local_storage_cache *cache, 762 int __percpu *busy_counter) 763 { 764 struct bpf_local_storage_map_bucket *b; 765 struct bpf_local_storage_elem *selem; 766 struct bpf_local_storage_map *smap; 767 unsigned int i; 768 769 smap = (struct bpf_local_storage_map *)map; 770 bpf_local_storage_cache_idx_free(cache, smap->cache_idx); 771 772 /* Note that this map might be concurrently cloned from 773 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone 774 * RCU read section to finish before proceeding. New RCU 775 * read sections should be prevented via bpf_map_inc_not_zero. 776 */ 777 synchronize_rcu(); 778 779 /* bpf prog and the userspace can no longer access this map 780 * now. No new selem (of this map) can be added 781 * to the owner->storage or to the map bucket's list. 782 * 783 * The elem of this map can be cleaned up here 784 * or when the storage is freed e.g. 785 * by bpf_sk_storage_free() during __sk_destruct(). 786 */ 787 for (i = 0; i < (1U << smap->bucket_log); i++) { 788 b = &smap->buckets[i]; 789 790 rcu_read_lock(); 791 /* No one is adding to b->list now */ 792 while ((selem = hlist_entry_safe( 793 rcu_dereference_raw(hlist_first_rcu(&b->list)), 794 struct bpf_local_storage_elem, map_node))) { 795 if (busy_counter) 796 this_cpu_inc(*busy_counter); 797 bpf_selem_unlink(selem, true); 798 if (busy_counter) 799 this_cpu_dec(*busy_counter); 800 cond_resched_rcu(); 801 } 802 rcu_read_unlock(); 803 } 804 805 /* While freeing the storage we may still need to access the map. 806 * 807 * e.g. when bpf_sk_storage_free() has unlinked selem from the map 808 * which then made the above while((selem = ...)) loop 809 * exit immediately. 810 * 811 * However, while freeing the storage one still needs to access the 812 * smap->elem_size to do the uncharging in 813 * bpf_selem_unlink_storage_nolock(). 814 * 815 * Hence, wait another rcu grace period for the storage to be freed. 816 */ 817 synchronize_rcu(); 818 819 if (smap->use_kmalloc_nolock) { 820 rcu_barrier_tasks_trace(); 821 rcu_barrier(); 822 } 823 kvfree(smap->buckets); 824 bpf_map_area_free(smap); 825 } 826