1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/rculist.h> 4 #include <linux/list.h> 5 #include <linux/hash.h> 6 #include <linux/types.h> 7 #include <linux/spinlock.h> 8 #include <linux/bpf.h> 9 #include <linux/btf.h> 10 #include <linux/btf_ids.h> 11 #include <linux/bpf_local_storage.h> 12 #include <net/bpf_sk_storage.h> 13 #include <net/sock.h> 14 #include <uapi/linux/sock_diag.h> 15 #include <uapi/linux/btf.h> 16 #include <linux/rcupdate_trace.h> 17 18 DEFINE_BPF_STORAGE_CACHE(sk_cache); 19 20 static struct bpf_local_storage_data * 21 bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 22 { 23 struct bpf_local_storage *sk_storage; 24 struct bpf_local_storage_map *smap; 25 26 sk_storage = 27 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); 28 if (!sk_storage) 29 return NULL; 30 31 smap = (struct bpf_local_storage_map *)map; 32 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); 33 } 34 35 static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) 36 { 37 struct bpf_local_storage_data *sdata; 38 39 sdata = bpf_sk_storage_lookup(sk, map, false); 40 if (!sdata) 41 return -ENOENT; 42 43 bpf_selem_unlink(SELEM(sdata), false); 44 45 return 0; 46 } 47 48 /* Called by __sk_destruct() & bpf_sk_storage_clone() */ 49 void bpf_sk_storage_free(struct sock *sk) 50 { 51 struct bpf_local_storage *sk_storage; 52 53 migrate_disable(); 54 rcu_read_lock(); 55 sk_storage = rcu_dereference(sk->sk_bpf_storage); 56 if (!sk_storage) 57 goto out; 58 59 bpf_local_storage_destroy(sk_storage); 60 out: 61 rcu_read_unlock(); 62 migrate_enable(); 63 } 64 65 static void bpf_sk_storage_map_free(struct bpf_map *map) 66 { 67 bpf_local_storage_map_free(map, &sk_cache, NULL); 68 } 69 70 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) 71 { 72 return bpf_local_storage_map_alloc(attr, &sk_cache, false); 73 } 74 75 static int notsupp_get_next_key(struct bpf_map *map, void *key, 76 void *next_key) 77 { 78 return -ENOTSUPP; 79 } 80 81 static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) 82 { 83 struct bpf_local_storage_data *sdata; 84 struct socket *sock; 85 int fd, err; 86 87 fd = *(int *)key; 88 sock = sockfd_lookup(fd, &err); 89 if (sock) { 90 sdata = bpf_sk_storage_lookup(sock->sk, map, true); 91 sockfd_put(sock); 92 return sdata ? sdata->data : NULL; 93 } 94 95 return ERR_PTR(err); 96 } 97 98 static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, 99 void *value, u64 map_flags) 100 { 101 struct bpf_local_storage_data *sdata; 102 struct socket *sock; 103 int fd, err; 104 105 fd = *(int *)key; 106 sock = sockfd_lookup(fd, &err); 107 if (sock) { 108 sdata = bpf_local_storage_update( 109 sock->sk, (struct bpf_local_storage_map *)map, value, 110 map_flags, false, GFP_ATOMIC); 111 sockfd_put(sock); 112 return PTR_ERR_OR_ZERO(sdata); 113 } 114 115 return err; 116 } 117 118 static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) 119 { 120 struct socket *sock; 121 int fd, err; 122 123 fd = *(int *)key; 124 sock = sockfd_lookup(fd, &err); 125 if (sock) { 126 err = bpf_sk_storage_del(sock->sk, map); 127 sockfd_put(sock); 128 return err; 129 } 130 131 return err; 132 } 133 134 static struct bpf_local_storage_elem * 135 bpf_sk_storage_clone_elem(struct sock *newsk, 136 struct bpf_local_storage_map *smap, 137 struct bpf_local_storage_elem *selem) 138 { 139 struct bpf_local_storage_elem *copy_selem; 140 141 copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC); 142 if (!copy_selem) 143 return NULL; 144 145 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 146 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, 147 SDATA(selem)->data, true); 148 else 149 copy_map_value(&smap->map, SDATA(copy_selem)->data, 150 SDATA(selem)->data); 151 152 return copy_selem; 153 } 154 155 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) 156 { 157 struct bpf_local_storage *new_sk_storage = NULL; 158 struct bpf_local_storage *sk_storage; 159 struct bpf_local_storage_elem *selem; 160 int ret = 0; 161 162 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); 163 164 migrate_disable(); 165 rcu_read_lock(); 166 sk_storage = rcu_dereference(sk->sk_bpf_storage); 167 168 if (!sk_storage || hlist_empty(&sk_storage->list)) 169 goto out; 170 171 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 172 struct bpf_local_storage_elem *copy_selem; 173 struct bpf_local_storage_map *smap; 174 struct bpf_map *map; 175 176 smap = rcu_dereference(SDATA(selem)->smap); 177 if (!(smap->map.map_flags & BPF_F_CLONE)) 178 continue; 179 180 /* Note that for lockless listeners adding new element 181 * here can race with cleanup in bpf_local_storage_map_free. 182 * Try to grab map refcnt to make sure that it's still 183 * alive and prevent concurrent removal. 184 */ 185 map = bpf_map_inc_not_zero(&smap->map); 186 if (IS_ERR(map)) 187 continue; 188 189 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); 190 if (!copy_selem) { 191 ret = -ENOMEM; 192 bpf_map_put(map); 193 goto out; 194 } 195 196 if (new_sk_storage) { 197 bpf_selem_link_map(smap, copy_selem); 198 bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); 199 } else { 200 ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); 201 if (ret) { 202 bpf_selem_free(copy_selem, smap, true); 203 atomic_sub(smap->elem_size, 204 &newsk->sk_omem_alloc); 205 bpf_map_put(map); 206 goto out; 207 } 208 209 new_sk_storage = 210 rcu_dereference(copy_selem->local_storage); 211 } 212 bpf_map_put(map); 213 } 214 215 out: 216 rcu_read_unlock(); 217 migrate_enable(); 218 219 /* In case of an error, don't free anything explicitly here, the 220 * caller is responsible to call bpf_sk_storage_free. 221 */ 222 223 return ret; 224 } 225 226 /* *gfp_flags* is a hidden argument provided by the verifier */ 227 BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, 228 void *, value, u64, flags, gfp_t, gfp_flags) 229 { 230 struct bpf_local_storage_data *sdata; 231 232 WARN_ON_ONCE(!bpf_rcu_lock_held()); 233 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 234 return (unsigned long)NULL; 235 236 sdata = bpf_sk_storage_lookup(sk, map, true); 237 if (sdata) 238 return (unsigned long)sdata->data; 239 240 if (flags == BPF_SK_STORAGE_GET_F_CREATE && 241 /* Cannot add new elem to a going away sk. 242 * Otherwise, the new elem may become a leak 243 * (and also other memory issues during map 244 * destruction). 245 */ 246 refcount_inc_not_zero(&sk->sk_refcnt)) { 247 sdata = bpf_local_storage_update( 248 sk, (struct bpf_local_storage_map *)map, value, 249 BPF_NOEXIST, false, gfp_flags); 250 /* sk must be a fullsock (guaranteed by verifier), 251 * so sock_gen_put() is unnecessary. 252 */ 253 sock_put(sk); 254 return IS_ERR(sdata) ? 255 (unsigned long)NULL : (unsigned long)sdata->data; 256 } 257 258 return (unsigned long)NULL; 259 } 260 261 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) 262 { 263 WARN_ON_ONCE(!bpf_rcu_lock_held()); 264 if (!sk || !sk_fullsock(sk)) 265 return -EINVAL; 266 267 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 268 int err; 269 270 err = bpf_sk_storage_del(sk, map); 271 sock_put(sk); 272 return err; 273 } 274 275 return -ENOENT; 276 } 277 278 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, 279 void *owner, u32 size) 280 { 281 struct sock *sk = (struct sock *)owner; 282 int optmem_max; 283 284 optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); 285 /* same check as in sock_kmalloc() */ 286 if (size <= optmem_max && 287 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { 288 atomic_add(size, &sk->sk_omem_alloc); 289 return 0; 290 } 291 292 return -ENOMEM; 293 } 294 295 static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, 296 void *owner, u32 size) 297 { 298 struct sock *sk = owner; 299 300 atomic_sub(size, &sk->sk_omem_alloc); 301 } 302 303 static struct bpf_local_storage __rcu ** 304 bpf_sk_storage_ptr(void *owner) 305 { 306 struct sock *sk = owner; 307 308 return &sk->sk_bpf_storage; 309 } 310 311 const struct bpf_map_ops sk_storage_map_ops = { 312 .map_meta_equal = bpf_map_meta_equal, 313 .map_alloc_check = bpf_local_storage_map_alloc_check, 314 .map_alloc = bpf_sk_storage_map_alloc, 315 .map_free = bpf_sk_storage_map_free, 316 .map_get_next_key = notsupp_get_next_key, 317 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, 318 .map_update_elem = bpf_fd_sk_storage_update_elem, 319 .map_delete_elem = bpf_fd_sk_storage_delete_elem, 320 .map_check_btf = bpf_local_storage_map_check_btf, 321 .map_btf_id = &bpf_local_storage_map_btf_id[0], 322 .map_local_storage_charge = bpf_sk_storage_charge, 323 .map_local_storage_uncharge = bpf_sk_storage_uncharge, 324 .map_owner_storage_ptr = bpf_sk_storage_ptr, 325 .map_mem_usage = bpf_local_storage_map_mem_usage, 326 }; 327 328 const struct bpf_func_proto bpf_sk_storage_get_proto = { 329 .func = bpf_sk_storage_get, 330 .gpl_only = false, 331 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 332 .arg1_type = ARG_CONST_MAP_PTR, 333 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 334 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 335 .arg4_type = ARG_ANYTHING, 336 }; 337 338 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { 339 .func = bpf_sk_storage_get, 340 .gpl_only = false, 341 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 342 .arg1_type = ARG_CONST_MAP_PTR, 343 .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ 344 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 345 .arg4_type = ARG_ANYTHING, 346 }; 347 348 const struct bpf_func_proto bpf_sk_storage_delete_proto = { 349 .func = bpf_sk_storage_delete, 350 .gpl_only = false, 351 .ret_type = RET_INTEGER, 352 .arg1_type = ARG_CONST_MAP_PTR, 353 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 354 }; 355 356 static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) 357 { 358 const struct btf *btf_vmlinux; 359 const struct btf_type *t; 360 const char *tname; 361 u32 btf_id; 362 363 if (prog->aux->dst_prog) 364 return false; 365 366 /* Ensure the tracing program is not tracing 367 * any bpf_sk_storage*() function and also 368 * use the bpf_sk_storage_(get|delete) helper. 369 */ 370 switch (prog->expected_attach_type) { 371 case BPF_TRACE_ITER: 372 case BPF_TRACE_RAW_TP: 373 /* bpf_sk_storage has no trace point */ 374 return true; 375 case BPF_TRACE_FENTRY: 376 case BPF_TRACE_FEXIT: 377 btf_vmlinux = bpf_get_btf_vmlinux(); 378 if (IS_ERR_OR_NULL(btf_vmlinux)) 379 return false; 380 btf_id = prog->aux->attach_btf_id; 381 t = btf_type_by_id(btf_vmlinux, btf_id); 382 tname = btf_name_by_offset(btf_vmlinux, t->name_off); 383 return !!strncmp(tname, "bpf_sk_storage", 384 strlen("bpf_sk_storage")); 385 default: 386 return false; 387 } 388 389 return false; 390 } 391 392 /* *gfp_flags* is a hidden argument provided by the verifier */ 393 BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 394 void *, value, u64, flags, gfp_t, gfp_flags) 395 { 396 WARN_ON_ONCE(!bpf_rcu_lock_held()); 397 if (in_hardirq() || in_nmi()) 398 return (unsigned long)NULL; 399 400 return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, 401 gfp_flags); 402 } 403 404 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 405 struct sock *, sk) 406 { 407 WARN_ON_ONCE(!bpf_rcu_lock_held()); 408 if (in_hardirq() || in_nmi()) 409 return -EPERM; 410 411 return ____bpf_sk_storage_delete(map, sk); 412 } 413 414 const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { 415 .func = bpf_sk_storage_get_tracing, 416 .gpl_only = false, 417 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 418 .arg1_type = ARG_CONST_MAP_PTR, 419 .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 420 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 421 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 422 .arg4_type = ARG_ANYTHING, 423 .allowed = bpf_sk_storage_tracing_allowed, 424 }; 425 426 const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { 427 .func = bpf_sk_storage_delete_tracing, 428 .gpl_only = false, 429 .ret_type = RET_INTEGER, 430 .arg1_type = ARG_CONST_MAP_PTR, 431 .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 432 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 433 .allowed = bpf_sk_storage_tracing_allowed, 434 }; 435 436 struct bpf_sk_storage_diag { 437 u32 nr_maps; 438 struct bpf_map *maps[]; 439 }; 440 441 /* The reply will be like: 442 * INET_DIAG_BPF_SK_STORAGES (nla_nest) 443 * SK_DIAG_BPF_STORAGE (nla_nest) 444 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 445 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 446 * SK_DIAG_BPF_STORAGE (nla_nest) 447 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 448 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 449 * .... 450 */ 451 static int nla_value_size(u32 value_size) 452 { 453 /* SK_DIAG_BPF_STORAGE (nla_nest) 454 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 455 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 456 */ 457 return nla_total_size(0) + nla_total_size(sizeof(u32)) + 458 nla_total_size_64bit(value_size); 459 } 460 461 void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) 462 { 463 u32 i; 464 465 if (!diag) 466 return; 467 468 for (i = 0; i < diag->nr_maps; i++) 469 bpf_map_put(diag->maps[i]); 470 471 kfree(diag); 472 } 473 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); 474 475 static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, 476 const struct bpf_map *map) 477 { 478 u32 i; 479 480 for (i = 0; i < diag->nr_maps; i++) { 481 if (diag->maps[i] == map) 482 return true; 483 } 484 485 return false; 486 } 487 488 struct bpf_sk_storage_diag * 489 bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) 490 { 491 struct bpf_sk_storage_diag *diag; 492 struct nlattr *nla; 493 u32 nr_maps = 0; 494 int rem, err; 495 496 /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as 497 * the map_alloc_check() side also does. 498 */ 499 if (!bpf_capable()) 500 return ERR_PTR(-EPERM); 501 502 nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, 503 nla_stgs, rem) { 504 if (nla_len(nla) != sizeof(u32)) 505 return ERR_PTR(-EINVAL); 506 nr_maps++; 507 } 508 509 diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); 510 if (!diag) 511 return ERR_PTR(-ENOMEM); 512 513 nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, 514 nla_stgs, rem) { 515 int map_fd = nla_get_u32(nla); 516 struct bpf_map *map = bpf_map_get(map_fd); 517 518 if (IS_ERR(map)) { 519 err = PTR_ERR(map); 520 goto err_free; 521 } 522 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { 523 bpf_map_put(map); 524 err = -EINVAL; 525 goto err_free; 526 } 527 if (diag_check_dup(diag, map)) { 528 bpf_map_put(map); 529 err = -EEXIST; 530 goto err_free; 531 } 532 diag->maps[diag->nr_maps++] = map; 533 } 534 535 return diag; 536 537 err_free: 538 bpf_sk_storage_diag_free(diag); 539 return ERR_PTR(err); 540 } 541 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); 542 543 static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) 544 { 545 struct nlattr *nla_stg, *nla_value; 546 struct bpf_local_storage_map *smap; 547 548 /* It cannot exceed max nlattr's payload */ 549 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); 550 551 nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); 552 if (!nla_stg) 553 return -EMSGSIZE; 554 555 smap = rcu_dereference(sdata->smap); 556 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) 557 goto errout; 558 559 nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, 560 smap->map.value_size, 561 SK_DIAG_BPF_STORAGE_PAD); 562 if (!nla_value) 563 goto errout; 564 565 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 566 copy_map_value_locked(&smap->map, nla_data(nla_value), 567 sdata->data, true); 568 else 569 copy_map_value(&smap->map, nla_data(nla_value), sdata->data); 570 571 nla_nest_end(skb, nla_stg); 572 return 0; 573 574 errout: 575 nla_nest_cancel(skb, nla_stg); 576 return -EMSGSIZE; 577 } 578 579 static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, 580 int stg_array_type, 581 unsigned int *res_diag_size) 582 { 583 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 584 unsigned int diag_size = nla_total_size(0); 585 struct bpf_local_storage *sk_storage; 586 struct bpf_local_storage_elem *selem; 587 struct bpf_local_storage_map *smap; 588 struct nlattr *nla_stgs; 589 unsigned int saved_len; 590 int err = 0; 591 592 rcu_read_lock(); 593 594 sk_storage = rcu_dereference(sk->sk_bpf_storage); 595 if (!sk_storage || hlist_empty(&sk_storage->list)) { 596 rcu_read_unlock(); 597 return 0; 598 } 599 600 nla_stgs = nla_nest_start(skb, stg_array_type); 601 if (!nla_stgs) 602 /* Continue to learn diag_size */ 603 err = -EMSGSIZE; 604 605 saved_len = skb->len; 606 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 607 smap = rcu_dereference(SDATA(selem)->smap); 608 diag_size += nla_value_size(smap->map.value_size); 609 610 if (nla_stgs && diag_get(SDATA(selem), skb)) 611 /* Continue to learn diag_size */ 612 err = -EMSGSIZE; 613 } 614 615 rcu_read_unlock(); 616 617 if (nla_stgs) { 618 if (saved_len == skb->len) 619 nla_nest_cancel(skb, nla_stgs); 620 else 621 nla_nest_end(skb, nla_stgs); 622 } 623 624 if (diag_size == nla_total_size(0)) { 625 *res_diag_size = 0; 626 return 0; 627 } 628 629 *res_diag_size = diag_size; 630 return err; 631 } 632 633 int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, 634 struct sock *sk, struct sk_buff *skb, 635 int stg_array_type, 636 unsigned int *res_diag_size) 637 { 638 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 639 unsigned int diag_size = nla_total_size(0); 640 struct bpf_local_storage *sk_storage; 641 struct bpf_local_storage_data *sdata; 642 struct nlattr *nla_stgs; 643 unsigned int saved_len; 644 int err = 0; 645 u32 i; 646 647 *res_diag_size = 0; 648 649 /* No map has been specified. Dump all. */ 650 if (!diag->nr_maps) 651 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, 652 res_diag_size); 653 654 rcu_read_lock(); 655 sk_storage = rcu_dereference(sk->sk_bpf_storage); 656 if (!sk_storage || hlist_empty(&sk_storage->list)) { 657 rcu_read_unlock(); 658 return 0; 659 } 660 661 nla_stgs = nla_nest_start(skb, stg_array_type); 662 if (!nla_stgs) 663 /* Continue to learn diag_size */ 664 err = -EMSGSIZE; 665 666 saved_len = skb->len; 667 for (i = 0; i < diag->nr_maps; i++) { 668 sdata = bpf_local_storage_lookup(sk_storage, 669 (struct bpf_local_storage_map *)diag->maps[i], 670 false); 671 672 if (!sdata) 673 continue; 674 675 diag_size += nla_value_size(diag->maps[i]->value_size); 676 677 if (nla_stgs && diag_get(sdata, skb)) 678 /* Continue to learn diag_size */ 679 err = -EMSGSIZE; 680 } 681 rcu_read_unlock(); 682 683 if (nla_stgs) { 684 if (saved_len == skb->len) 685 nla_nest_cancel(skb, nla_stgs); 686 else 687 nla_nest_end(skb, nla_stgs); 688 } 689 690 if (diag_size == nla_total_size(0)) { 691 *res_diag_size = 0; 692 return 0; 693 } 694 695 *res_diag_size = diag_size; 696 return err; 697 } 698 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); 699 700 struct bpf_iter_seq_sk_storage_map_info { 701 struct bpf_map *map; 702 unsigned int bucket_id; 703 unsigned skip_elems; 704 }; 705 706 static struct bpf_local_storage_elem * 707 bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, 708 struct bpf_local_storage_elem *prev_selem) 709 __acquires(RCU) __releases(RCU) 710 { 711 struct bpf_local_storage *sk_storage; 712 struct bpf_local_storage_elem *selem; 713 u32 skip_elems = info->skip_elems; 714 struct bpf_local_storage_map *smap; 715 u32 bucket_id = info->bucket_id; 716 u32 i, count, n_buckets; 717 struct bpf_local_storage_map_bucket *b; 718 719 smap = (struct bpf_local_storage_map *)info->map; 720 n_buckets = 1U << smap->bucket_log; 721 if (bucket_id >= n_buckets) 722 return NULL; 723 724 /* try to find next selem in the same bucket */ 725 selem = prev_selem; 726 count = 0; 727 while (selem) { 728 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), 729 struct bpf_local_storage_elem, map_node); 730 if (!selem) { 731 /* not found, unlock and go to the next bucket */ 732 b = &smap->buckets[bucket_id++]; 733 rcu_read_unlock(); 734 skip_elems = 0; 735 break; 736 } 737 sk_storage = rcu_dereference(selem->local_storage); 738 if (sk_storage) { 739 info->skip_elems = skip_elems + count; 740 return selem; 741 } 742 count++; 743 } 744 745 for (i = bucket_id; i < (1U << smap->bucket_log); i++) { 746 b = &smap->buckets[i]; 747 rcu_read_lock(); 748 count = 0; 749 hlist_for_each_entry_rcu(selem, &b->list, map_node) { 750 sk_storage = rcu_dereference(selem->local_storage); 751 if (sk_storage && count >= skip_elems) { 752 info->bucket_id = i; 753 info->skip_elems = count; 754 return selem; 755 } 756 count++; 757 } 758 rcu_read_unlock(); 759 skip_elems = 0; 760 } 761 762 info->bucket_id = i; 763 info->skip_elems = 0; 764 return NULL; 765 } 766 767 static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) 768 { 769 struct bpf_local_storage_elem *selem; 770 771 selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); 772 if (!selem) 773 return NULL; 774 775 if (*pos == 0) 776 ++*pos; 777 return selem; 778 } 779 780 static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, 781 loff_t *pos) 782 { 783 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 784 785 ++*pos; 786 ++info->skip_elems; 787 return bpf_sk_storage_map_seq_find_next(seq->private, v); 788 } 789 790 struct bpf_iter__bpf_sk_storage_map { 791 __bpf_md_ptr(struct bpf_iter_meta *, meta); 792 __bpf_md_ptr(struct bpf_map *, map); 793 __bpf_md_ptr(struct sock *, sk); 794 __bpf_md_ptr(void *, value); 795 }; 796 797 DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, 798 struct bpf_map *map, struct sock *sk, 799 void *value) 800 801 static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, 802 struct bpf_local_storage_elem *selem) 803 { 804 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 805 struct bpf_iter__bpf_sk_storage_map ctx = {}; 806 struct bpf_local_storage *sk_storage; 807 struct bpf_iter_meta meta; 808 struct bpf_prog *prog; 809 int ret = 0; 810 811 meta.seq = seq; 812 prog = bpf_iter_get_info(&meta, selem == NULL); 813 if (prog) { 814 ctx.meta = &meta; 815 ctx.map = info->map; 816 if (selem) { 817 sk_storage = rcu_dereference(selem->local_storage); 818 ctx.sk = sk_storage->owner; 819 ctx.value = SDATA(selem)->data; 820 } 821 ret = bpf_iter_run_prog(prog, &ctx); 822 } 823 824 return ret; 825 } 826 827 static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) 828 { 829 return __bpf_sk_storage_map_seq_show(seq, v); 830 } 831 832 static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) 833 __releases(RCU) 834 { 835 if (!v) 836 (void)__bpf_sk_storage_map_seq_show(seq, v); 837 else 838 rcu_read_unlock(); 839 } 840 841 static int bpf_iter_init_sk_storage_map(void *priv_data, 842 struct bpf_iter_aux_info *aux) 843 { 844 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 845 846 bpf_map_inc_with_uref(aux->map); 847 seq_info->map = aux->map; 848 return 0; 849 } 850 851 static void bpf_iter_fini_sk_storage_map(void *priv_data) 852 { 853 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 854 855 bpf_map_put_with_uref(seq_info->map); 856 } 857 858 static int bpf_iter_attach_map(struct bpf_prog *prog, 859 union bpf_iter_link_info *linfo, 860 struct bpf_iter_aux_info *aux) 861 { 862 struct bpf_map *map; 863 int err = -EINVAL; 864 865 if (!linfo->map.map_fd) 866 return -EBADF; 867 868 map = bpf_map_get_with_uref(linfo->map.map_fd); 869 if (IS_ERR(map)) 870 return PTR_ERR(map); 871 872 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 873 goto put_map; 874 875 if (prog->aux->max_rdwr_access > map->value_size) { 876 err = -EACCES; 877 goto put_map; 878 } 879 880 aux->map = map; 881 return 0; 882 883 put_map: 884 bpf_map_put_with_uref(map); 885 return err; 886 } 887 888 static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) 889 { 890 bpf_map_put_with_uref(aux->map); 891 } 892 893 static const struct seq_operations bpf_sk_storage_map_seq_ops = { 894 .start = bpf_sk_storage_map_seq_start, 895 .next = bpf_sk_storage_map_seq_next, 896 .stop = bpf_sk_storage_map_seq_stop, 897 .show = bpf_sk_storage_map_seq_show, 898 }; 899 900 static const struct bpf_iter_seq_info iter_seq_info = { 901 .seq_ops = &bpf_sk_storage_map_seq_ops, 902 .init_seq_private = bpf_iter_init_sk_storage_map, 903 .fini_seq_private = bpf_iter_fini_sk_storage_map, 904 .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), 905 }; 906 907 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { 908 .target = "bpf_sk_storage_map", 909 .attach_target = bpf_iter_attach_map, 910 .detach_target = bpf_iter_detach_map, 911 .show_fdinfo = bpf_iter_map_show_fdinfo, 912 .fill_link_info = bpf_iter_map_fill_link_info, 913 .ctx_arg_info_size = 2, 914 .ctx_arg_info = { 915 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), 916 PTR_TO_BTF_ID_OR_NULL }, 917 { offsetof(struct bpf_iter__bpf_sk_storage_map, value), 918 PTR_TO_BUF | PTR_MAYBE_NULL }, 919 }, 920 .seq_info = &iter_seq_info, 921 }; 922 923 static int __init bpf_sk_storage_map_iter_init(void) 924 { 925 bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = 926 btf_sock_ids[BTF_SOCK_TYPE_SOCK]; 927 return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); 928 } 929 late_initcall(bpf_sk_storage_map_iter_init); 930