1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/rculist.h> 4 #include <linux/list.h> 5 #include <linux/hash.h> 6 #include <linux/types.h> 7 #include <linux/spinlock.h> 8 #include <linux/bpf.h> 9 #include <linux/btf.h> 10 #include <linux/btf_ids.h> 11 #include <linux/bpf_local_storage.h> 12 #include <net/bpf_sk_storage.h> 13 #include <net/sock.h> 14 #include <uapi/linux/sock_diag.h> 15 #include <uapi/linux/btf.h> 16 #include <linux/rcupdate_trace.h> 17 18 DEFINE_BPF_STORAGE_CACHE(sk_cache); 19 20 static struct bpf_local_storage_data * 21 bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 22 { 23 struct bpf_local_storage *sk_storage; 24 struct bpf_local_storage_map *smap; 25 26 sk_storage = 27 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); 28 if (!sk_storage) 29 return NULL; 30 31 smap = (struct bpf_local_storage_map *)map; 32 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); 33 } 34 35 static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) 36 { 37 struct bpf_local_storage_data *sdata; 38 39 sdata = bpf_sk_storage_lookup(sk, map, false); 40 if (!sdata) 41 return -ENOENT; 42 43 return bpf_selem_unlink(SELEM(sdata)); 44 } 45 46 /* Called by __sk_destruct() & bpf_sk_storage_clone() */ 47 void bpf_sk_storage_free(struct sock *sk) 48 { 49 struct bpf_local_storage *sk_storage; 50 u32 uncharge; 51 52 rcu_read_lock_dont_migrate(); 53 sk_storage = rcu_dereference(sk->sk_bpf_storage); 54 if (!sk_storage) 55 goto out; 56 57 uncharge = bpf_local_storage_destroy(sk_storage); 58 if (uncharge) 59 atomic_sub(uncharge, &sk->sk_omem_alloc); 60 out: 61 rcu_read_unlock_migrate(); 62 } 63 64 static void bpf_sk_storage_map_free(struct bpf_map *map) 65 { 66 bpf_local_storage_map_free(map, &sk_cache); 67 } 68 69 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) 70 { 71 return bpf_local_storage_map_alloc(attr, &sk_cache); 72 } 73 74 static int notsupp_get_next_key(struct bpf_map *map, void *key, 75 void *next_key) 76 { 77 return -ENOTSUPP; 78 } 79 80 static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) 81 { 82 struct bpf_local_storage_data *sdata; 83 struct socket *sock; 84 int fd, err; 85 86 fd = *(int *)key; 87 sock = sockfd_lookup(fd, &err); 88 if (sock) { 89 sdata = bpf_sk_storage_lookup(sock->sk, map, true); 90 sockfd_put(sock); 91 return sdata ? sdata->data : NULL; 92 } 93 94 return ERR_PTR(err); 95 } 96 97 static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, 98 void *value, u64 map_flags) 99 { 100 struct bpf_local_storage_data *sdata; 101 struct socket *sock; 102 int fd, err; 103 104 fd = *(int *)key; 105 sock = sockfd_lookup(fd, &err); 106 if (sock) { 107 sdata = bpf_local_storage_update( 108 sock->sk, (struct bpf_local_storage_map *)map, value, 109 map_flags, false); 110 sockfd_put(sock); 111 return PTR_ERR_OR_ZERO(sdata); 112 } 113 114 return err; 115 } 116 117 static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) 118 { 119 struct socket *sock; 120 int fd, err; 121 122 fd = *(int *)key; 123 sock = sockfd_lookup(fd, &err); 124 if (sock) { 125 err = bpf_sk_storage_del(sock->sk, map); 126 sockfd_put(sock); 127 return err; 128 } 129 130 return err; 131 } 132 133 static struct bpf_local_storage_elem * 134 bpf_sk_storage_clone_elem(struct sock *newsk, 135 struct bpf_local_storage_map *smap, 136 struct bpf_local_storage_elem *selem) 137 { 138 struct bpf_local_storage_elem *copy_selem; 139 140 copy_selem = bpf_selem_alloc(smap, newsk, NULL, false); 141 if (!copy_selem) 142 return NULL; 143 144 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 145 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, 146 SDATA(selem)->data, true); 147 else 148 copy_map_value(&smap->map, SDATA(copy_selem)->data, 149 SDATA(selem)->data); 150 151 return copy_selem; 152 } 153 154 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) 155 { 156 struct bpf_local_storage *new_sk_storage = NULL; 157 struct bpf_local_storage *sk_storage; 158 struct bpf_local_storage_elem *selem; 159 int ret = 0; 160 161 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); 162 163 rcu_read_lock_dont_migrate(); 164 sk_storage = rcu_dereference(sk->sk_bpf_storage); 165 166 if (!sk_storage || hlist_empty(&sk_storage->list)) 167 goto out; 168 169 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 170 struct bpf_local_storage_elem *copy_selem; 171 struct bpf_local_storage_map *smap; 172 struct bpf_map *map; 173 174 smap = rcu_dereference(SDATA(selem)->smap); 175 if (!(smap->map.map_flags & BPF_F_CLONE)) 176 continue; 177 178 /* Note that for lockless listeners adding new element 179 * here can race with cleanup in bpf_local_storage_map_free. 180 * Try to grab map refcnt to make sure that it's still 181 * alive and prevent concurrent removal. 182 */ 183 map = bpf_map_inc_not_zero(&smap->map); 184 if (IS_ERR(map)) 185 continue; 186 187 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); 188 if (!copy_selem) { 189 ret = -ENOMEM; 190 bpf_map_put(map); 191 goto out; 192 } 193 194 if (new_sk_storage) { 195 ret = bpf_selem_link_map(smap, new_sk_storage, copy_selem); 196 if (ret) { 197 bpf_selem_free(copy_selem, true); 198 atomic_sub(smap->elem_size, 199 &newsk->sk_omem_alloc); 200 bpf_map_put(map); 201 goto out; 202 } 203 bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); 204 } else { 205 ret = bpf_local_storage_alloc(newsk, smap, copy_selem); 206 if (ret) { 207 bpf_selem_free(copy_selem, true); 208 atomic_sub(smap->elem_size, 209 &newsk->sk_omem_alloc); 210 bpf_map_put(map); 211 goto out; 212 } 213 214 new_sk_storage = 215 rcu_dereference(copy_selem->local_storage); 216 } 217 bpf_map_put(map); 218 } 219 220 out: 221 rcu_read_unlock_migrate(); 222 223 /* In case of an error, don't free anything explicitly here, the 224 * caller is responsible to call bpf_sk_storage_free. 225 */ 226 227 return ret; 228 } 229 230 BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, 231 void *, value, u64, flags) 232 { 233 struct bpf_local_storage_data *sdata; 234 235 WARN_ON_ONCE(!bpf_rcu_lock_held()); 236 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 237 return (unsigned long)NULL; 238 239 sdata = bpf_sk_storage_lookup(sk, map, true); 240 if (sdata) 241 return (unsigned long)sdata->data; 242 243 if (flags == BPF_SK_STORAGE_GET_F_CREATE && 244 /* Cannot add new elem to a going away sk. 245 * Otherwise, the new elem may become a leak 246 * (and also other memory issues during map 247 * destruction). 248 */ 249 refcount_inc_not_zero(&sk->sk_refcnt)) { 250 sdata = bpf_local_storage_update( 251 sk, (struct bpf_local_storage_map *)map, value, 252 BPF_NOEXIST, false); 253 /* sk must be a fullsock (guaranteed by verifier), 254 * so sock_gen_put() is unnecessary. 255 */ 256 sock_put(sk); 257 return IS_ERR(sdata) ? 258 (unsigned long)NULL : (unsigned long)sdata->data; 259 } 260 261 return (unsigned long)NULL; 262 } 263 264 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) 265 { 266 WARN_ON_ONCE(!bpf_rcu_lock_held()); 267 if (!sk || !sk_fullsock(sk)) 268 return -EINVAL; 269 270 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 271 int err; 272 273 err = bpf_sk_storage_del(sk, map); 274 sock_put(sk); 275 return err; 276 } 277 278 return -ENOENT; 279 } 280 281 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, 282 void *owner, u32 size) 283 { 284 struct sock *sk = (struct sock *)owner; 285 int optmem_max; 286 287 optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); 288 /* same check as in sock_kmalloc() */ 289 if (size <= optmem_max && 290 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { 291 atomic_add(size, &sk->sk_omem_alloc); 292 return 0; 293 } 294 295 return -ENOMEM; 296 } 297 298 static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, 299 void *owner, u32 size) 300 { 301 struct sock *sk = owner; 302 303 atomic_sub(size, &sk->sk_omem_alloc); 304 } 305 306 static struct bpf_local_storage __rcu ** 307 bpf_sk_storage_ptr(void *owner) 308 { 309 struct sock *sk = owner; 310 311 return &sk->sk_bpf_storage; 312 } 313 314 const struct bpf_map_ops sk_storage_map_ops = { 315 .map_meta_equal = bpf_map_meta_equal, 316 .map_alloc_check = bpf_local_storage_map_alloc_check, 317 .map_alloc = bpf_sk_storage_map_alloc, 318 .map_free = bpf_sk_storage_map_free, 319 .map_get_next_key = notsupp_get_next_key, 320 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, 321 .map_update_elem = bpf_fd_sk_storage_update_elem, 322 .map_delete_elem = bpf_fd_sk_storage_delete_elem, 323 .map_check_btf = bpf_local_storage_map_check_btf, 324 .map_btf_id = &bpf_local_storage_map_btf_id[0], 325 .map_local_storage_charge = bpf_sk_storage_charge, 326 .map_local_storage_uncharge = bpf_sk_storage_uncharge, 327 .map_owner_storage_ptr = bpf_sk_storage_ptr, 328 .map_mem_usage = bpf_local_storage_map_mem_usage, 329 }; 330 331 const struct bpf_func_proto bpf_sk_storage_get_proto = { 332 .func = bpf_sk_storage_get, 333 .gpl_only = false, 334 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 335 .arg1_type = ARG_CONST_MAP_PTR, 336 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 337 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 338 .arg4_type = ARG_ANYTHING, 339 }; 340 341 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { 342 .func = bpf_sk_storage_get, 343 .gpl_only = false, 344 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 345 .arg1_type = ARG_CONST_MAP_PTR, 346 .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ 347 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 348 .arg4_type = ARG_ANYTHING, 349 }; 350 351 const struct bpf_func_proto bpf_sk_storage_delete_proto = { 352 .func = bpf_sk_storage_delete, 353 .gpl_only = false, 354 .ret_type = RET_INTEGER, 355 .arg1_type = ARG_CONST_MAP_PTR, 356 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 357 }; 358 359 static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) 360 { 361 if (prog->aux->dst_prog) 362 return false; 363 364 /* Ensure the tracing program is not tracing 365 * any bpf_sk_storage*() function and also 366 * use the bpf_sk_storage_(get|delete) helper. 367 */ 368 switch (prog->expected_attach_type) { 369 case BPF_TRACE_ITER: 370 case BPF_TRACE_RAW_TP: 371 /* bpf_sk_storage has no trace point */ 372 return true; 373 case BPF_TRACE_FENTRY: 374 case BPF_TRACE_FEXIT: 375 case BPF_TRACE_FSESSION: 376 return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", 377 strlen("bpf_sk_storage")); 378 default: 379 return false; 380 } 381 382 return false; 383 } 384 385 BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 386 void *, value, u64, flags) 387 { 388 WARN_ON_ONCE(!bpf_rcu_lock_held()); 389 if (in_hardirq() || in_nmi()) 390 return (unsigned long)NULL; 391 392 return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags); 393 } 394 395 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 396 struct sock *, sk) 397 { 398 WARN_ON_ONCE(!bpf_rcu_lock_held()); 399 if (in_hardirq() || in_nmi()) 400 return -EPERM; 401 402 return ____bpf_sk_storage_delete(map, sk); 403 } 404 405 const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { 406 .func = bpf_sk_storage_get_tracing, 407 .gpl_only = false, 408 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 409 .arg1_type = ARG_CONST_MAP_PTR, 410 .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 411 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 412 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 413 .arg4_type = ARG_ANYTHING, 414 .allowed = bpf_sk_storage_tracing_allowed, 415 }; 416 417 const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { 418 .func = bpf_sk_storage_delete_tracing, 419 .gpl_only = false, 420 .ret_type = RET_INTEGER, 421 .arg1_type = ARG_CONST_MAP_PTR, 422 .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 423 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 424 .allowed = bpf_sk_storage_tracing_allowed, 425 }; 426 427 struct bpf_sk_storage_diag { 428 u32 nr_maps; 429 struct bpf_map *maps[]; 430 }; 431 432 /* The reply will be like: 433 * INET_DIAG_BPF_SK_STORAGES (nla_nest) 434 * SK_DIAG_BPF_STORAGE (nla_nest) 435 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 436 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 437 * SK_DIAG_BPF_STORAGE (nla_nest) 438 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 439 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 440 * .... 441 */ 442 static int nla_value_size(u32 value_size) 443 { 444 /* SK_DIAG_BPF_STORAGE (nla_nest) 445 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 446 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 447 */ 448 return nla_total_size(0) + nla_total_size(sizeof(u32)) + 449 nla_total_size_64bit(value_size); 450 } 451 452 void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) 453 { 454 u32 i; 455 456 if (!diag) 457 return; 458 459 for (i = 0; i < diag->nr_maps; i++) 460 bpf_map_put(diag->maps[i]); 461 462 kfree(diag); 463 } 464 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); 465 466 static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, 467 const struct bpf_map *map) 468 { 469 u32 i; 470 471 for (i = 0; i < diag->nr_maps; i++) { 472 if (diag->maps[i] == map) 473 return true; 474 } 475 476 return false; 477 } 478 479 struct bpf_sk_storage_diag * 480 bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) 481 { 482 struct bpf_sk_storage_diag *diag; 483 struct nlattr *nla; 484 u32 nr_maps = 0; 485 int rem, err; 486 487 /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as 488 * the map_alloc_check() side also does. 489 */ 490 if (!bpf_capable()) 491 return ERR_PTR(-EPERM); 492 493 nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, 494 nla_stgs, rem) { 495 if (nla_len(nla) != sizeof(u32)) 496 return ERR_PTR(-EINVAL); 497 nr_maps++; 498 } 499 500 diag = kzalloc_flex(*diag, maps, nr_maps); 501 if (!diag) 502 return ERR_PTR(-ENOMEM); 503 504 nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, 505 nla_stgs, rem) { 506 int map_fd = nla_get_u32(nla); 507 struct bpf_map *map = bpf_map_get(map_fd); 508 509 if (IS_ERR(map)) { 510 err = PTR_ERR(map); 511 goto err_free; 512 } 513 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { 514 bpf_map_put(map); 515 err = -EINVAL; 516 goto err_free; 517 } 518 if (diag_check_dup(diag, map)) { 519 bpf_map_put(map); 520 err = -EEXIST; 521 goto err_free; 522 } 523 diag->maps[diag->nr_maps++] = map; 524 } 525 526 return diag; 527 528 err_free: 529 bpf_sk_storage_diag_free(diag); 530 return ERR_PTR(err); 531 } 532 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); 533 534 static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) 535 { 536 struct nlattr *nla_stg, *nla_value; 537 struct bpf_local_storage_map *smap; 538 539 /* It cannot exceed max nlattr's payload */ 540 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); 541 542 nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); 543 if (!nla_stg) 544 return -EMSGSIZE; 545 546 smap = rcu_dereference(sdata->smap); 547 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) 548 goto errout; 549 550 nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, 551 smap->map.value_size, 552 SK_DIAG_BPF_STORAGE_PAD); 553 if (!nla_value) 554 goto errout; 555 556 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 557 copy_map_value_locked(&smap->map, nla_data(nla_value), 558 sdata->data, true); 559 else 560 copy_map_value(&smap->map, nla_data(nla_value), sdata->data); 561 562 nla_nest_end(skb, nla_stg); 563 return 0; 564 565 errout: 566 nla_nest_cancel(skb, nla_stg); 567 return -EMSGSIZE; 568 } 569 570 static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, 571 int stg_array_type, 572 unsigned int *res_diag_size) 573 { 574 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 575 unsigned int diag_size = nla_total_size(0); 576 struct bpf_local_storage *sk_storage; 577 struct bpf_local_storage_elem *selem; 578 struct bpf_local_storage_map *smap; 579 struct nlattr *nla_stgs; 580 unsigned int saved_len; 581 int err = 0; 582 583 rcu_read_lock(); 584 585 sk_storage = rcu_dereference(sk->sk_bpf_storage); 586 if (!sk_storage || hlist_empty(&sk_storage->list)) { 587 rcu_read_unlock(); 588 return 0; 589 } 590 591 nla_stgs = nla_nest_start(skb, stg_array_type); 592 if (!nla_stgs) 593 /* Continue to learn diag_size */ 594 err = -EMSGSIZE; 595 596 saved_len = skb->len; 597 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 598 smap = rcu_dereference(SDATA(selem)->smap); 599 diag_size += nla_value_size(smap->map.value_size); 600 601 if (nla_stgs && diag_get(SDATA(selem), skb)) 602 /* Continue to learn diag_size */ 603 err = -EMSGSIZE; 604 } 605 606 rcu_read_unlock(); 607 608 if (nla_stgs) { 609 if (saved_len == skb->len) 610 nla_nest_cancel(skb, nla_stgs); 611 else 612 nla_nest_end(skb, nla_stgs); 613 } 614 615 if (diag_size == nla_total_size(0)) { 616 *res_diag_size = 0; 617 return 0; 618 } 619 620 *res_diag_size = diag_size; 621 return err; 622 } 623 624 int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, 625 struct sock *sk, struct sk_buff *skb, 626 int stg_array_type, 627 unsigned int *res_diag_size) 628 { 629 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 630 unsigned int diag_size = nla_total_size(0); 631 struct bpf_local_storage *sk_storage; 632 struct bpf_local_storage_data *sdata; 633 struct nlattr *nla_stgs; 634 unsigned int saved_len; 635 int err = 0; 636 u32 i; 637 638 *res_diag_size = 0; 639 640 /* No map has been specified. Dump all. */ 641 if (!diag->nr_maps) 642 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, 643 res_diag_size); 644 645 rcu_read_lock(); 646 sk_storage = rcu_dereference(sk->sk_bpf_storage); 647 if (!sk_storage || hlist_empty(&sk_storage->list)) { 648 rcu_read_unlock(); 649 return 0; 650 } 651 652 nla_stgs = nla_nest_start(skb, stg_array_type); 653 if (!nla_stgs) 654 /* Continue to learn diag_size */ 655 err = -EMSGSIZE; 656 657 saved_len = skb->len; 658 for (i = 0; i < diag->nr_maps; i++) { 659 sdata = bpf_local_storage_lookup(sk_storage, 660 (struct bpf_local_storage_map *)diag->maps[i], 661 false); 662 663 if (!sdata) 664 continue; 665 666 diag_size += nla_value_size(diag->maps[i]->value_size); 667 668 if (nla_stgs && diag_get(sdata, skb)) 669 /* Continue to learn diag_size */ 670 err = -EMSGSIZE; 671 } 672 rcu_read_unlock(); 673 674 if (nla_stgs) { 675 if (saved_len == skb->len) 676 nla_nest_cancel(skb, nla_stgs); 677 else 678 nla_nest_end(skb, nla_stgs); 679 } 680 681 if (diag_size == nla_total_size(0)) { 682 *res_diag_size = 0; 683 return 0; 684 } 685 686 *res_diag_size = diag_size; 687 return err; 688 } 689 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); 690 691 struct bpf_iter_seq_sk_storage_map_info { 692 struct bpf_map *map; 693 unsigned int bucket_id; 694 unsigned skip_elems; 695 }; 696 697 static struct bpf_local_storage_elem * 698 bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, 699 struct bpf_local_storage_elem *prev_selem) 700 __acquires(RCU) __releases(RCU) 701 { 702 struct bpf_local_storage *sk_storage; 703 struct bpf_local_storage_elem *selem; 704 u32 skip_elems = info->skip_elems; 705 struct bpf_local_storage_map *smap; 706 u32 bucket_id = info->bucket_id; 707 u32 i, count, n_buckets; 708 struct bpf_local_storage_map_bucket *b; 709 710 smap = (struct bpf_local_storage_map *)info->map; 711 n_buckets = 1U << smap->bucket_log; 712 if (bucket_id >= n_buckets) 713 return NULL; 714 715 /* try to find next selem in the same bucket */ 716 selem = prev_selem; 717 count = 0; 718 while (selem) { 719 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), 720 struct bpf_local_storage_elem, map_node); 721 if (!selem) { 722 /* not found, unlock and go to the next bucket */ 723 b = &smap->buckets[bucket_id++]; 724 rcu_read_unlock(); 725 skip_elems = 0; 726 break; 727 } 728 sk_storage = rcu_dereference(selem->local_storage); 729 if (sk_storage) { 730 info->skip_elems = skip_elems + count; 731 return selem; 732 } 733 count++; 734 } 735 736 for (i = bucket_id; i < (1U << smap->bucket_log); i++) { 737 b = &smap->buckets[i]; 738 rcu_read_lock(); 739 count = 0; 740 hlist_for_each_entry_rcu(selem, &b->list, map_node) { 741 sk_storage = rcu_dereference(selem->local_storage); 742 if (sk_storage && count >= skip_elems) { 743 info->bucket_id = i; 744 info->skip_elems = count; 745 return selem; 746 } 747 count++; 748 } 749 rcu_read_unlock(); 750 skip_elems = 0; 751 } 752 753 info->bucket_id = i; 754 info->skip_elems = 0; 755 return NULL; 756 } 757 758 static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) 759 { 760 struct bpf_local_storage_elem *selem; 761 762 selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); 763 if (!selem) 764 return NULL; 765 766 if (*pos == 0) 767 ++*pos; 768 return selem; 769 } 770 771 static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, 772 loff_t *pos) 773 { 774 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 775 776 ++*pos; 777 ++info->skip_elems; 778 return bpf_sk_storage_map_seq_find_next(seq->private, v); 779 } 780 781 struct bpf_iter__bpf_sk_storage_map { 782 __bpf_md_ptr(struct bpf_iter_meta *, meta); 783 __bpf_md_ptr(struct bpf_map *, map); 784 __bpf_md_ptr(struct sock *, sk); 785 __bpf_md_ptr(void *, value); 786 }; 787 788 DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, 789 struct bpf_map *map, struct sock *sk, 790 void *value) 791 792 static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, 793 struct bpf_local_storage_elem *selem) 794 { 795 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 796 struct bpf_iter__bpf_sk_storage_map ctx = {}; 797 struct bpf_local_storage *sk_storage; 798 struct bpf_iter_meta meta; 799 struct bpf_prog *prog; 800 int ret = 0; 801 802 meta.seq = seq; 803 prog = bpf_iter_get_info(&meta, selem == NULL); 804 if (prog) { 805 ctx.meta = &meta; 806 ctx.map = info->map; 807 if (selem) { 808 sk_storage = rcu_dereference(selem->local_storage); 809 ctx.sk = sk_storage->owner; 810 ctx.value = SDATA(selem)->data; 811 } 812 ret = bpf_iter_run_prog(prog, &ctx); 813 } 814 815 return ret; 816 } 817 818 static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) 819 { 820 return __bpf_sk_storage_map_seq_show(seq, v); 821 } 822 823 static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) 824 __releases(RCU) 825 { 826 if (!v) 827 (void)__bpf_sk_storage_map_seq_show(seq, v); 828 else 829 rcu_read_unlock(); 830 } 831 832 static int bpf_iter_init_sk_storage_map(void *priv_data, 833 struct bpf_iter_aux_info *aux) 834 { 835 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 836 837 bpf_map_inc_with_uref(aux->map); 838 seq_info->map = aux->map; 839 return 0; 840 } 841 842 static void bpf_iter_fini_sk_storage_map(void *priv_data) 843 { 844 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 845 846 bpf_map_put_with_uref(seq_info->map); 847 } 848 849 static int bpf_iter_attach_map(struct bpf_prog *prog, 850 union bpf_iter_link_info *linfo, 851 struct bpf_iter_aux_info *aux) 852 { 853 struct bpf_map *map; 854 int err = -EINVAL; 855 856 if (!linfo->map.map_fd) 857 return -EBADF; 858 859 map = bpf_map_get_with_uref(linfo->map.map_fd); 860 if (IS_ERR(map)) 861 return PTR_ERR(map); 862 863 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 864 goto put_map; 865 866 if (prog->aux->max_rdwr_access > map->value_size) { 867 err = -EACCES; 868 goto put_map; 869 } 870 871 aux->map = map; 872 return 0; 873 874 put_map: 875 bpf_map_put_with_uref(map); 876 return err; 877 } 878 879 static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) 880 { 881 bpf_map_put_with_uref(aux->map); 882 } 883 884 static const struct seq_operations bpf_sk_storage_map_seq_ops = { 885 .start = bpf_sk_storage_map_seq_start, 886 .next = bpf_sk_storage_map_seq_next, 887 .stop = bpf_sk_storage_map_seq_stop, 888 .show = bpf_sk_storage_map_seq_show, 889 }; 890 891 static const struct bpf_iter_seq_info iter_seq_info = { 892 .seq_ops = &bpf_sk_storage_map_seq_ops, 893 .init_seq_private = bpf_iter_init_sk_storage_map, 894 .fini_seq_private = bpf_iter_fini_sk_storage_map, 895 .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), 896 }; 897 898 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { 899 .target = "bpf_sk_storage_map", 900 .attach_target = bpf_iter_attach_map, 901 .detach_target = bpf_iter_detach_map, 902 .show_fdinfo = bpf_iter_map_show_fdinfo, 903 .fill_link_info = bpf_iter_map_fill_link_info, 904 .ctx_arg_info_size = 2, 905 .ctx_arg_info = { 906 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), 907 PTR_TO_BTF_ID_OR_NULL }, 908 { offsetof(struct bpf_iter__bpf_sk_storage_map, value), 909 PTR_TO_BUF | PTR_MAYBE_NULL }, 910 }, 911 .seq_info = &iter_seq_info, 912 }; 913 914 static int __init bpf_sk_storage_map_iter_init(void) 915 { 916 bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = 917 btf_sock_ids[BTF_SOCK_TYPE_SOCK]; 918 return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); 919 } 920 late_initcall(bpf_sk_storage_map_iter_init); 921