1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/rculist.h> 4 #include <linux/list.h> 5 #include <linux/hash.h> 6 #include <linux/types.h> 7 #include <linux/spinlock.h> 8 #include <linux/bpf.h> 9 #include <linux/btf_ids.h> 10 #include <linux/bpf_local_storage.h> 11 #include <net/sock.h> 12 #include <uapi/linux/sock_diag.h> 13 #include <uapi/linux/btf.h> 14 #include <linux/rcupdate.h> 15 #include <linux/rcupdate_trace.h> 16 #include <linux/rcupdate_wait.h> 17 18 #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) 19 20 static struct bpf_local_storage_map_bucket * 21 select_bucket(struct bpf_local_storage_map *smap, 22 struct bpf_local_storage *local_storage) 23 { 24 return &smap->buckets[hash_ptr(local_storage, smap->bucket_log)]; 25 } 26 27 static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) 28 { 29 struct bpf_map *map = &smap->map; 30 31 if (!map->ops->map_local_storage_charge) 32 return 0; 33 34 return map->ops->map_local_storage_charge(smap, owner, size); 35 } 36 37 static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner, 38 u32 size) 39 { 40 struct bpf_map *map = &smap->map; 41 42 if (map->ops->map_local_storage_uncharge) 43 map->ops->map_local_storage_uncharge(smap, owner, size); 44 } 45 46 static struct bpf_local_storage __rcu ** 47 owner_storage(struct bpf_local_storage_map *smap, void *owner) 48 { 49 struct bpf_map *map = &smap->map; 50 51 return map->ops->map_owner_storage_ptr(owner); 52 } 53 54 static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem) 55 { 56 return !hlist_unhashed_lockless(&selem->snode); 57 } 58 59 static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) 60 { 61 return !hlist_unhashed(&selem->snode); 62 } 63 64 static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) 65 { 66 return !hlist_unhashed(&selem->map_node); 67 } 68 69 struct bpf_local_storage_elem * 70 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, 71 void *value, bool swap_uptrs) 72 { 73 struct bpf_local_storage_elem *selem; 74 75 if (mem_charge(smap, owner, smap->elem_size)) 76 return NULL; 77 78 selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size, 79 __GFP_ZERO, NUMA_NO_NODE); 80 81 if (selem) { 82 RCU_INIT_POINTER(SDATA(selem)->smap, smap); 83 atomic_set(&selem->state, 0); 84 85 if (value) { 86 /* No need to call check_and_init_map_value as memory is zero init */ 87 copy_map_value(&smap->map, SDATA(selem)->data, value); 88 if (swap_uptrs) 89 bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value); 90 } 91 return selem; 92 } 93 94 mem_uncharge(smap, owner, smap->elem_size); 95 96 return NULL; 97 } 98 99 static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) 100 { 101 struct bpf_local_storage *local_storage; 102 103 /* 104 * RCU Tasks Trace grace period implies RCU grace period, do 105 * kfree() directly. 106 */ 107 local_storage = container_of(rcu, struct bpf_local_storage, rcu); 108 kfree(local_storage); 109 } 110 111 static void bpf_local_storage_free(struct bpf_local_storage *local_storage, 112 bool reuse_now) 113 { 114 if (!local_storage) 115 return; 116 117 if (reuse_now) { 118 kfree_rcu(local_storage, rcu); 119 return; 120 } 121 122 call_rcu_tasks_trace(&local_storage->rcu, 123 bpf_local_storage_free_trace_rcu); 124 } 125 126 static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) 127 { 128 struct bpf_local_storage_elem *selem; 129 struct bpf_local_storage_map *smap; 130 131 selem = container_of(rcu, struct bpf_local_storage_elem, rcu); 132 /* The bpf_local_storage_map_free will wait for rcu_barrier */ 133 smap = rcu_dereference_check(SDATA(selem)->smap, 1); 134 135 if (smap) 136 bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); 137 /* 138 * RCU Tasks Trace grace period implies RCU grace period, do 139 * kfree() directly. 140 */ 141 kfree(selem); 142 } 143 144 void bpf_selem_free(struct bpf_local_storage_elem *selem, 145 bool reuse_now) 146 { 147 struct bpf_local_storage_map *smap; 148 149 smap = rcu_dereference_check(SDATA(selem)->smap, 1); 150 151 if (reuse_now) { 152 if (smap) 153 bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); 154 kfree_rcu(selem, rcu); 155 return; 156 } 157 158 call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); 159 } 160 161 static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now) 162 { 163 struct bpf_local_storage_elem *selem; 164 struct hlist_node *n; 165 166 /* The "_safe" iteration is needed. 167 * The loop is not removing the selem from the list 168 * but bpf_selem_free will use the selem->rcu_head 169 * which is union-ized with the selem->free_node. 170 */ 171 hlist_for_each_entry_safe(selem, n, list, free_node) 172 bpf_selem_free(selem, reuse_now); 173 } 174 175 static void bpf_selem_unlink_storage_nolock_misc(struct bpf_local_storage_elem *selem, 176 struct bpf_local_storage_map *smap, 177 struct bpf_local_storage *local_storage, 178 bool free_local_storage, bool pin_owner) 179 { 180 void *owner = local_storage->owner; 181 u32 uncharge = smap->elem_size; 182 183 if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == 184 SDATA(selem)) 185 RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); 186 187 if (pin_owner && !refcount_inc_not_zero(&local_storage->owner_refcnt)) 188 return; 189 190 uncharge += free_local_storage ? sizeof(*local_storage) : 0; 191 mem_uncharge(smap, local_storage->owner, uncharge); 192 local_storage->mem_charge -= uncharge; 193 194 if (free_local_storage) { 195 local_storage->owner = NULL; 196 197 /* After this RCU_INIT, owner may be freed and cannot be used */ 198 RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); 199 } 200 201 if (pin_owner) 202 refcount_dec(&local_storage->owner_refcnt); 203 } 204 205 /* local_storage->lock must be held and selem->local_storage == local_storage. 206 * The caller must ensure selem->smap is still valid to be 207 * dereferenced for its smap->elem_size and smap->cache_idx. 208 */ 209 static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, 210 struct bpf_local_storage_elem *selem, 211 struct hlist_head *free_selem_list) 212 { 213 struct bpf_local_storage_map *smap; 214 bool free_local_storage; 215 216 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 217 218 free_local_storage = hlist_is_singular_node(&selem->snode, 219 &local_storage->list); 220 221 bpf_selem_unlink_storage_nolock_misc(selem, smap, local_storage, 222 free_local_storage, false); 223 224 hlist_del_init_rcu(&selem->snode); 225 226 hlist_add_head(&selem->free_node, free_selem_list); 227 228 return free_local_storage; 229 } 230 231 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, 232 struct bpf_local_storage_elem *selem) 233 { 234 struct bpf_local_storage_map *smap; 235 236 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 237 local_storage->mem_charge += smap->elem_size; 238 239 RCU_INIT_POINTER(selem->local_storage, local_storage); 240 hlist_add_head_rcu(&selem->snode, &local_storage->list); 241 } 242 243 static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) 244 { 245 struct bpf_local_storage *local_storage; 246 struct bpf_local_storage_map *smap; 247 struct bpf_local_storage_map_bucket *b; 248 unsigned long flags; 249 int err; 250 251 local_storage = rcu_dereference_check(selem->local_storage, 252 bpf_rcu_lock_held()); 253 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 254 b = select_bucket(smap, local_storage); 255 err = raw_res_spin_lock_irqsave(&b->lock, flags); 256 if (err) 257 return err; 258 259 hlist_del_init_rcu(&selem->map_node); 260 raw_res_spin_unlock_irqrestore(&b->lock, flags); 261 262 return 0; 263 } 264 265 static void bpf_selem_unlink_map_nolock(struct bpf_local_storage_elem *selem) 266 { 267 hlist_del_init_rcu(&selem->map_node); 268 } 269 270 int bpf_selem_link_map(struct bpf_local_storage_map *smap, 271 struct bpf_local_storage *local_storage, 272 struct bpf_local_storage_elem *selem) 273 { 274 struct bpf_local_storage_map_bucket *b; 275 unsigned long flags; 276 int err; 277 278 b = select_bucket(smap, local_storage); 279 280 err = raw_res_spin_lock_irqsave(&b->lock, flags); 281 if (err) 282 return err; 283 284 hlist_add_head_rcu(&selem->map_node, &b->list); 285 raw_res_spin_unlock_irqrestore(&b->lock, flags); 286 287 return 0; 288 } 289 290 static void bpf_selem_link_map_nolock(struct bpf_local_storage_map_bucket *b, 291 struct bpf_local_storage_elem *selem) 292 { 293 hlist_add_head_rcu(&selem->map_node, &b->list); 294 } 295 296 /* 297 * Unlink an selem from map and local storage with lock held. 298 * This is the common path used by local storages to delete an selem. 299 */ 300 int bpf_selem_unlink(struct bpf_local_storage_elem *selem) 301 { 302 struct bpf_local_storage *local_storage; 303 bool free_local_storage = false; 304 HLIST_HEAD(selem_free_list); 305 unsigned long flags; 306 int err; 307 308 if (in_nmi()) 309 return -EOPNOTSUPP; 310 311 if (unlikely(!selem_linked_to_storage_lockless(selem))) 312 /* selem has already been unlinked from sk */ 313 return 0; 314 315 local_storage = rcu_dereference_check(selem->local_storage, 316 bpf_rcu_lock_held()); 317 318 err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); 319 if (err) 320 return err; 321 322 if (likely(selem_linked_to_storage(selem))) { 323 /* Always unlink from map before unlinking from local_storage 324 * because selem will be freed after successfully unlinked from 325 * the local_storage. 326 */ 327 err = bpf_selem_unlink_map(selem); 328 if (err) 329 goto out; 330 331 free_local_storage = bpf_selem_unlink_storage_nolock( 332 local_storage, selem, &selem_free_list); 333 } 334 out: 335 raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); 336 337 bpf_selem_free_list(&selem_free_list, false); 338 339 if (free_local_storage) 340 bpf_local_storage_free(local_storage, false); 341 342 return err; 343 } 344 345 /* 346 * Unlink an selem from map and local storage with lockless fallback if callers 347 * are racing or rqspinlock returns error. It should only be called by 348 * bpf_local_storage_destroy() or bpf_local_storage_map_free(). 349 */ 350 static void bpf_selem_unlink_nofail(struct bpf_local_storage_elem *selem, 351 struct bpf_local_storage_map_bucket *b) 352 { 353 bool in_map_free = !!b, free_storage = false; 354 struct bpf_local_storage *local_storage; 355 struct bpf_local_storage_map *smap; 356 unsigned long flags; 357 int err, unlink = 0; 358 359 local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held()); 360 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 361 362 if (smap) { 363 b = b ? : select_bucket(smap, local_storage); 364 err = raw_res_spin_lock_irqsave(&b->lock, flags); 365 if (!err) { 366 /* 367 * Call bpf_obj_free_fields() under b->lock to make sure it is done 368 * exactly once for an selem. Safe to free special fields immediately 369 * as no BPF program should be referencing the selem. 370 */ 371 if (likely(selem_linked_to_map(selem))) { 372 hlist_del_init_rcu(&selem->map_node); 373 bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); 374 unlink++; 375 } 376 raw_res_spin_unlock_irqrestore(&b->lock, flags); 377 } 378 /* 379 * Highly unlikely scenario: resource leak 380 * 381 * When map_free(selem1), destroy(selem1) and destroy(selem2) are racing 382 * and both selem belong to the same bucket, if destroy(selem2) acquired 383 * b->lock and block for too long, neither map_free(selem1) and 384 * destroy(selem1) will be able to free the special field associated 385 * with selem1 as raw_res_spin_lock_irqsave() returns -ETIMEDOUT. 386 */ 387 WARN_ON_ONCE(err && in_map_free); 388 if (!err || in_map_free) 389 RCU_INIT_POINTER(SDATA(selem)->smap, NULL); 390 } 391 392 if (local_storage) { 393 err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); 394 if (!err) { 395 if (likely(selem_linked_to_storage(selem))) { 396 free_storage = hlist_is_singular_node(&selem->snode, 397 &local_storage->list); 398 /* 399 * Okay to skip clearing owner_storage and storage->owner in 400 * destroy() since the owner is going away. No user or bpf 401 * programs should be able to reference it. 402 */ 403 if (smap && in_map_free) 404 bpf_selem_unlink_storage_nolock_misc( 405 selem, smap, local_storage, 406 free_storage, true); 407 hlist_del_init_rcu(&selem->snode); 408 unlink++; 409 } 410 raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); 411 } 412 /* 413 * Highly unlikely scenario: memory leak 414 * 415 * When destroy() fails to acqurire local_storage->lock and initializes 416 * selem->local_storage to NULL before any racing map_free() sees the same 417 * selem, no one will free the local storage. 418 */ 419 WARN_ON_ONCE(err && !in_map_free); 420 if (!err || !in_map_free) 421 RCU_INIT_POINTER(selem->local_storage, NULL); 422 } 423 424 if (unlink != 2) 425 atomic_or(in_map_free ? SELEM_MAP_UNLINKED : SELEM_STORAGE_UNLINKED, &selem->state); 426 427 /* 428 * Normally, an selem can be unlinked under local_storage->lock and b->lock, and 429 * then freed after an RCU grace period. However, if destroy() and map_free() are 430 * racing or rqspinlock returns errors in unlikely situations (unlink != 2), free 431 * the selem only after both map_free() and destroy() see the selem. 432 */ 433 if (unlink == 2 || 434 atomic_cmpxchg(&selem->state, SELEM_UNLINKED, SELEM_TOFREE) == SELEM_UNLINKED) 435 bpf_selem_free(selem, true); 436 437 if (free_storage) 438 bpf_local_storage_free(local_storage, true); 439 } 440 441 void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, 442 struct bpf_local_storage_map *smap, 443 struct bpf_local_storage_elem *selem) 444 { 445 unsigned long flags; 446 int err; 447 448 /* spinlock is needed to avoid racing with the 449 * parallel delete. Otherwise, publishing an already 450 * deleted sdata to the cache will become a use-after-free 451 * problem in the next bpf_local_storage_lookup(). 452 */ 453 err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); 454 if (err) 455 return; 456 457 if (selem_linked_to_storage(selem)) 458 rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem)); 459 raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); 460 } 461 462 static int check_flags(const struct bpf_local_storage_data *old_sdata, 463 u64 map_flags) 464 { 465 if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 466 /* elem already exists */ 467 return -EEXIST; 468 469 if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 470 /* elem doesn't exist, cannot update it */ 471 return -ENOENT; 472 473 return 0; 474 } 475 476 int bpf_local_storage_alloc(void *owner, 477 struct bpf_local_storage_map *smap, 478 struct bpf_local_storage_elem *first_selem) 479 { 480 struct bpf_local_storage *prev_storage, *storage; 481 struct bpf_local_storage **owner_storage_ptr; 482 struct bpf_local_storage_map_bucket *b; 483 unsigned long flags; 484 int err; 485 486 err = mem_charge(smap, owner, sizeof(*storage)); 487 if (err) 488 return err; 489 490 storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage), 491 __GFP_ZERO, NUMA_NO_NODE); 492 if (!storage) { 493 err = -ENOMEM; 494 goto uncharge; 495 } 496 497 INIT_HLIST_HEAD(&storage->list); 498 raw_res_spin_lock_init(&storage->lock); 499 storage->owner = owner; 500 storage->mem_charge = sizeof(*storage); 501 refcount_set(&storage->owner_refcnt, 1); 502 503 bpf_selem_link_storage_nolock(storage, first_selem); 504 505 b = select_bucket(smap, storage); 506 err = raw_res_spin_lock_irqsave(&b->lock, flags); 507 if (err) 508 goto uncharge; 509 510 bpf_selem_link_map_nolock(b, first_selem); 511 512 owner_storage_ptr = 513 (struct bpf_local_storage **)owner_storage(smap, owner); 514 /* Publish storage to the owner. 515 * Instead of using any lock of the kernel object (i.e. owner), 516 * cmpxchg will work with any kernel object regardless what 517 * the running context is, bh, irq...etc. 518 * 519 * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage) 520 * is protected by the storage->lock. Hence, when freeing 521 * the owner->storage, the storage->lock must be held before 522 * setting owner->storage ptr to NULL. 523 */ 524 prev_storage = cmpxchg(owner_storage_ptr, NULL, storage); 525 if (unlikely(prev_storage)) { 526 bpf_selem_unlink_map_nolock(first_selem); 527 raw_res_spin_unlock_irqrestore(&b->lock, flags); 528 err = -EAGAIN; 529 goto uncharge; 530 } 531 raw_res_spin_unlock_irqrestore(&b->lock, flags); 532 533 return 0; 534 535 uncharge: 536 bpf_local_storage_free(storage, true); 537 mem_uncharge(smap, owner, sizeof(*storage)); 538 return err; 539 } 540 541 /* sk cannot be going away because it is linking new elem 542 * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0). 543 * Otherwise, it will become a leak (and other memory issues 544 * during map destruction). 545 */ 546 struct bpf_local_storage_data * 547 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, 548 void *value, u64 map_flags, bool swap_uptrs) 549 { 550 struct bpf_local_storage_data *old_sdata = NULL; 551 struct bpf_local_storage_elem *alloc_selem, *selem = NULL; 552 struct bpf_local_storage *local_storage; 553 struct bpf_local_storage_map_bucket *b; 554 HLIST_HEAD(old_selem_free_list); 555 unsigned long flags, b_flags; 556 int err; 557 558 /* BPF_EXIST and BPF_NOEXIST cannot be both set */ 559 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) || 560 /* BPF_F_LOCK can only be used in a value with spin_lock */ 561 unlikely((map_flags & BPF_F_LOCK) && 562 !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))) 563 return ERR_PTR(-EINVAL); 564 565 local_storage = rcu_dereference_check(*owner_storage(smap, owner), 566 bpf_rcu_lock_held()); 567 if (!local_storage || hlist_empty(&local_storage->list)) { 568 /* Very first elem for the owner */ 569 err = check_flags(NULL, map_flags); 570 if (err) 571 return ERR_PTR(err); 572 573 selem = bpf_selem_alloc(smap, owner, value, swap_uptrs); 574 if (!selem) 575 return ERR_PTR(-ENOMEM); 576 577 err = bpf_local_storage_alloc(owner, smap, selem); 578 if (err) { 579 bpf_selem_free(selem, true); 580 mem_uncharge(smap, owner, smap->elem_size); 581 return ERR_PTR(err); 582 } 583 584 return SDATA(selem); 585 } 586 587 if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) { 588 /* Hoping to find an old_sdata to do inline update 589 * such that it can avoid taking the local_storage->lock 590 * and changing the lists. 591 */ 592 old_sdata = 593 bpf_local_storage_lookup(local_storage, smap, false); 594 err = check_flags(old_sdata, map_flags); 595 if (err) 596 return ERR_PTR(err); 597 if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) { 598 copy_map_value_locked(&smap->map, old_sdata->data, 599 value, false); 600 return old_sdata; 601 } 602 } 603 604 /* A lookup has just been done before and concluded a new selem is 605 * needed. The chance of an unnecessary alloc is unlikely. 606 */ 607 alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs); 608 if (!alloc_selem) 609 return ERR_PTR(-ENOMEM); 610 611 err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); 612 if (err) 613 goto free_selem; 614 615 /* Recheck local_storage->list under local_storage->lock */ 616 if (unlikely(hlist_empty(&local_storage->list))) { 617 /* A parallel del is happening and local_storage is going 618 * away. It has just been checked before, so very 619 * unlikely. Return instead of retry to keep things 620 * simple. 621 */ 622 err = -EAGAIN; 623 goto unlock; 624 } 625 626 old_sdata = bpf_local_storage_lookup(local_storage, smap, false); 627 err = check_flags(old_sdata, map_flags); 628 if (err) 629 goto unlock; 630 631 if (old_sdata && (map_flags & BPF_F_LOCK)) { 632 copy_map_value_locked(&smap->map, old_sdata->data, value, 633 false); 634 selem = SELEM(old_sdata); 635 goto unlock; 636 } 637 638 b = select_bucket(smap, local_storage); 639 640 err = raw_res_spin_lock_irqsave(&b->lock, b_flags); 641 if (err) 642 goto unlock; 643 644 alloc_selem = NULL; 645 /* First, link the new selem to the map */ 646 bpf_selem_link_map_nolock(b, selem); 647 648 /* Second, link (and publish) the new selem to local_storage */ 649 bpf_selem_link_storage_nolock(local_storage, selem); 650 651 /* Third, remove old selem, SELEM(old_sdata) */ 652 if (old_sdata) { 653 bpf_selem_unlink_map_nolock(SELEM(old_sdata)); 654 bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), 655 &old_selem_free_list); 656 } 657 658 raw_res_spin_unlock_irqrestore(&b->lock, b_flags); 659 unlock: 660 raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); 661 free_selem: 662 bpf_selem_free_list(&old_selem_free_list, false); 663 if (alloc_selem) { 664 mem_uncharge(smap, owner, smap->elem_size); 665 bpf_selem_free(alloc_selem, true); 666 } 667 return err ? ERR_PTR(err) : SDATA(selem); 668 } 669 670 static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) 671 { 672 u64 min_usage = U64_MAX; 673 u16 i, res = 0; 674 675 spin_lock(&cache->idx_lock); 676 677 for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) { 678 if (cache->idx_usage_counts[i] < min_usage) { 679 min_usage = cache->idx_usage_counts[i]; 680 res = i; 681 682 /* Found a free cache_idx */ 683 if (!min_usage) 684 break; 685 } 686 } 687 cache->idx_usage_counts[res]++; 688 689 spin_unlock(&cache->idx_lock); 690 691 return res; 692 } 693 694 static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, 695 u16 idx) 696 { 697 spin_lock(&cache->idx_lock); 698 cache->idx_usage_counts[idx]--; 699 spin_unlock(&cache->idx_lock); 700 } 701 702 int bpf_local_storage_map_alloc_check(union bpf_attr *attr) 703 { 704 if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK || 705 !(attr->map_flags & BPF_F_NO_PREALLOC) || 706 attr->max_entries || 707 attr->key_size != sizeof(int) || !attr->value_size || 708 /* Enforce BTF for userspace sk dumping */ 709 !attr->btf_key_type_id || !attr->btf_value_type_id) 710 return -EINVAL; 711 712 if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) 713 return -E2BIG; 714 715 return 0; 716 } 717 718 int bpf_local_storage_map_check_btf(struct bpf_map *map, 719 const struct btf *btf, 720 const struct btf_type *key_type, 721 const struct btf_type *value_type) 722 { 723 if (!btf_type_is_i32(key_type)) 724 return -EINVAL; 725 726 return 0; 727 } 728 729 /* 730 * Destroy local storage when the owner is going away. Caller must uncharge memory 731 * if memory charging is used. 732 */ 733 u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage) 734 { 735 struct bpf_local_storage_elem *selem; 736 737 /* Neither the bpf_prog nor the bpf_map's syscall 738 * could be modifying the local_storage->list now. 739 * Thus, no elem can be added to or deleted from the 740 * local_storage->list by the bpf_prog or by the bpf_map's syscall. 741 * 742 * It is racing with bpf_local_storage_map_free() alone 743 * when unlinking elem from the local_storage->list and 744 * the map's bucket->list. 745 */ 746 hlist_for_each_entry_rcu(selem, &local_storage->list, snode) 747 bpf_selem_unlink_nofail(selem, NULL); 748 749 if (!refcount_dec_and_test(&local_storage->owner_refcnt)) { 750 while (refcount_read(&local_storage->owner_refcnt)) 751 cpu_relax(); 752 /* 753 * Paired with refcount_dec() in bpf_selem_unlink_nofail() 754 * to make sure destroy() sees the correct local_storage->mem_charge. 755 */ 756 smp_mb(); 757 } 758 759 return local_storage->mem_charge; 760 } 761 762 u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) 763 { 764 struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map; 765 u64 usage = sizeof(*smap); 766 767 /* The dynamically callocated selems are not counted currently. */ 768 usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log); 769 return usage; 770 } 771 772 struct bpf_map * 773 bpf_local_storage_map_alloc(union bpf_attr *attr, 774 struct bpf_local_storage_cache *cache) 775 { 776 struct bpf_local_storage_map *smap; 777 unsigned int i; 778 u32 nbuckets; 779 int err; 780 781 smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); 782 if (!smap) 783 return ERR_PTR(-ENOMEM); 784 bpf_map_init_from_attr(&smap->map, attr); 785 786 nbuckets = roundup_pow_of_two(num_possible_cpus()); 787 /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ 788 nbuckets = max_t(u32, 2, nbuckets); 789 smap->bucket_log = ilog2(nbuckets); 790 791 smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets, 792 sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN); 793 if (!smap->buckets) { 794 err = -ENOMEM; 795 goto free_smap; 796 } 797 798 for (i = 0; i < nbuckets; i++) { 799 INIT_HLIST_HEAD(&smap->buckets[i].list); 800 raw_res_spin_lock_init(&smap->buckets[i].lock); 801 } 802 803 smap->elem_size = offsetof(struct bpf_local_storage_elem, 804 sdata.data[attr->value_size]); 805 806 smap->cache_idx = bpf_local_storage_cache_idx_get(cache); 807 return &smap->map; 808 809 free_smap: 810 kvfree(smap->buckets); 811 bpf_map_area_free(smap); 812 return ERR_PTR(err); 813 } 814 815 void bpf_local_storage_map_free(struct bpf_map *map, 816 struct bpf_local_storage_cache *cache) 817 { 818 struct bpf_local_storage_map_bucket *b; 819 struct bpf_local_storage_elem *selem; 820 struct bpf_local_storage_map *smap; 821 unsigned int i; 822 823 smap = (struct bpf_local_storage_map *)map; 824 bpf_local_storage_cache_idx_free(cache, smap->cache_idx); 825 826 /* Note that this map might be concurrently cloned from 827 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone 828 * RCU read section to finish before proceeding. New RCU 829 * read sections should be prevented via bpf_map_inc_not_zero. 830 */ 831 synchronize_rcu(); 832 833 /* bpf prog and the userspace can no longer access this map 834 * now. No new selem (of this map) can be added 835 * to the owner->storage or to the map bucket's list. 836 * 837 * The elem of this map can be cleaned up here 838 * or when the storage is freed e.g. 839 * by bpf_sk_storage_free() during __sk_destruct(). 840 */ 841 for (i = 0; i < (1U << smap->bucket_log); i++) { 842 b = &smap->buckets[i]; 843 844 rcu_read_lock(); 845 /* No one is adding to b->list now */ 846 restart: 847 hlist_for_each_entry_rcu(selem, &b->list, map_node) { 848 bpf_selem_unlink_nofail(selem, b); 849 850 if (need_resched()) { 851 cond_resched_rcu(); 852 goto restart; 853 } 854 } 855 rcu_read_unlock(); 856 } 857 858 /* While freeing the storage we may still need to access the map. 859 * 860 * e.g. when bpf_sk_storage_free() has unlinked selem from the map 861 * which then made the above while((selem = ...)) loop 862 * exit immediately. 863 * 864 * However, while freeing the storage one still needs to access the 865 * smap->elem_size to do the uncharging in 866 * bpf_selem_unlink_storage_nolock(). 867 * 868 * Hence, wait another rcu grace period for the storage to be freed. 869 */ 870 synchronize_rcu(); 871 872 /* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */ 873 rcu_barrier_tasks_trace(); 874 rcu_barrier(); 875 kvfree(smap->buckets); 876 bpf_map_area_free(smap); 877 } 878