1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip6_flowlabel.c IPv6 flowlabel manager. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/errno.h> 10 #include <linux/types.h> 11 #include <linux/socket.h> 12 #include <linux/net.h> 13 #include <linux/netdevice.h> 14 #include <linux/in6.h> 15 #include <linux/proc_fs.h> 16 #include <linux/seq_file.h> 17 #include <linux/slab.h> 18 #include <linux/export.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/jump_label_ratelimit.h> 21 22 #include <net/net_namespace.h> 23 #include <net/sock.h> 24 25 #include <net/ipv6.h> 26 #include <net/rawv6.h> 27 #include <net/transp_v6.h> 28 29 #include <linux/uaccess.h> 30 31 #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified 32 in old IPv6 RFC. Well, it was reasonable value. 33 */ 34 #define FL_MAX_LINGER 150 /* Maximal linger timeout */ 35 36 /* FL hash table */ 37 38 #define FL_MAX_PER_SOCK 32 39 #define FL_MAX_SIZE 8192 40 #define FL_HASH_MASK 255 41 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) 42 43 static int fl_size; 44 static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; 45 46 static void ip6_fl_gc(struct timer_list *unused); 47 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); 48 49 /* FL hash table lock: it protects only of GC */ 50 51 static DEFINE_SPINLOCK(ip6_fl_lock); 52 53 /* Big socket sock */ 54 55 static DEFINE_SPINLOCK(ip6_sk_fl_lock); 56 57 DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); 58 EXPORT_SYMBOL(ipv6_flowlabel_exclusive); 59 60 #define for_each_fl_rcu(hash, fl) \ 61 for (fl = rcu_dereference(fl_ht[(hash)]); \ 62 fl != NULL; \ 63 fl = rcu_dereference(fl->next)) 64 #define for_each_fl_continue_rcu(fl) \ 65 for (fl = rcu_dereference(fl->next); \ 66 fl != NULL; \ 67 fl = rcu_dereference(fl->next)) 68 69 #define for_each_sk_fl_rcu(sk, sfl) \ 70 for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \ 71 sfl != NULL; \ 72 sfl = rcu_dereference(sfl->next)) 73 74 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) 75 { 76 struct ip6_flowlabel *fl; 77 78 for_each_fl_rcu(FL_HASH(label), fl) { 79 if (fl->label == label && net_eq(fl->fl_net, net)) 80 return fl; 81 } 82 return NULL; 83 } 84 85 static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) 86 { 87 struct ip6_flowlabel *fl; 88 89 rcu_read_lock(); 90 fl = __fl_lookup(net, label); 91 if (fl && !atomic_inc_not_zero(&fl->users)) 92 fl = NULL; 93 rcu_read_unlock(); 94 return fl; 95 } 96 97 static bool fl_shared_exclusive(struct ip6_flowlabel *fl) 98 { 99 return fl->share == IPV6_FL_S_EXCL || 100 fl->share == IPV6_FL_S_PROCESS || 101 fl->share == IPV6_FL_S_USER; 102 } 103 104 static void fl_free_rcu(struct rcu_head *head) 105 { 106 struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); 107 108 if (fl->share == IPV6_FL_S_PROCESS) 109 put_pid(fl->owner.pid); 110 kfree(fl->opt); 111 kfree(fl); 112 } 113 114 115 static void fl_free(struct ip6_flowlabel *fl) 116 { 117 if (!fl) 118 return; 119 120 if (fl_shared_exclusive(fl) || fl->opt) 121 static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive); 122 123 call_rcu(&fl->rcu, fl_free_rcu); 124 } 125 126 static void fl_release(struct ip6_flowlabel *fl) 127 { 128 spin_lock_bh(&ip6_fl_lock); 129 130 fl->lastuse = jiffies; 131 if (atomic_dec_and_test(&fl->users)) { 132 unsigned long ttd = fl->lastuse + fl->linger; 133 if (time_after(ttd, fl->expires)) 134 fl->expires = ttd; 135 ttd = fl->expires; 136 if (!timer_pending(&ip6_fl_gc_timer) || 137 time_after(ip6_fl_gc_timer.expires, ttd)) 138 mod_timer(&ip6_fl_gc_timer, ttd); 139 } 140 spin_unlock_bh(&ip6_fl_lock); 141 } 142 143 static void ip6_fl_gc(struct timer_list *unused) 144 { 145 int i; 146 unsigned long now = jiffies; 147 unsigned long sched = 0; 148 149 spin_lock(&ip6_fl_lock); 150 151 for (i = 0; i <= FL_HASH_MASK; i++) { 152 struct ip6_flowlabel *fl; 153 struct ip6_flowlabel __rcu **flp; 154 155 flp = &fl_ht[i]; 156 while ((fl = rcu_dereference_protected(*flp, 157 lockdep_is_held(&ip6_fl_lock))) != NULL) { 158 if (atomic_read(&fl->users) == 0) { 159 unsigned long ttd = fl->lastuse + fl->linger; 160 if (time_after(ttd, fl->expires)) 161 fl->expires = ttd; 162 ttd = fl->expires; 163 if (time_after_eq(now, ttd)) { 164 *flp = fl->next; 165 fl_size--; 166 fl->fl_net->ipv6.flowlabel_count--; 167 fl_free(fl); 168 continue; 169 } 170 if (!sched || time_before(ttd, sched)) 171 sched = ttd; 172 } 173 flp = &fl->next; 174 } 175 } 176 if (!sched && fl_size) 177 sched = now + FL_MAX_LINGER; 178 if (sched) { 179 mod_timer(&ip6_fl_gc_timer, sched); 180 } 181 spin_unlock(&ip6_fl_lock); 182 } 183 184 static void __net_exit ip6_fl_purge(struct net *net) 185 { 186 int i; 187 188 spin_lock_bh(&ip6_fl_lock); 189 for (i = 0; i <= FL_HASH_MASK; i++) { 190 struct ip6_flowlabel *fl; 191 struct ip6_flowlabel __rcu **flp; 192 193 flp = &fl_ht[i]; 194 while ((fl = rcu_dereference_protected(*flp, 195 lockdep_is_held(&ip6_fl_lock))) != NULL) { 196 if (net_eq(fl->fl_net, net) && 197 atomic_read(&fl->users) == 0) { 198 *flp = fl->next; 199 fl_free(fl); 200 fl_size--; 201 net->ipv6.flowlabel_count--; 202 continue; 203 } 204 flp = &fl->next; 205 } 206 } 207 spin_unlock_bh(&ip6_fl_lock); 208 } 209 210 static struct ip6_flowlabel *fl_intern(struct net *net, 211 struct ip6_flowlabel *fl, __be32 label) 212 { 213 struct ip6_flowlabel *lfl; 214 215 lockdep_assert_held(&ip6_fl_lock); 216 217 fl->label = label & IPV6_FLOWLABEL_MASK; 218 219 if (label == 0) { 220 for (;;) { 221 fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; 222 if (fl->label) { 223 lfl = __fl_lookup(net, fl->label); 224 if (!lfl) 225 break; 226 } 227 } 228 } else { 229 /* 230 * we dropper the ip6_fl_lock, so this entry could reappear 231 * and we need to recheck with it. 232 * 233 * OTOH no need to search the active socket first, like it is 234 * done in ipv6_flowlabel_opt - sock is locked, so new entry 235 * with the same label can only appear on another sock 236 */ 237 lfl = __fl_lookup(net, fl->label); 238 if (lfl) { 239 atomic_inc(&lfl->users); 240 return lfl; 241 } 242 } 243 244 fl->lastuse = jiffies; 245 fl->next = fl_ht[FL_HASH(fl->label)]; 246 rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); 247 fl_size++; 248 net->ipv6.flowlabel_count++; 249 return NULL; 250 } 251 252 253 254 /* Socket flowlabel lists */ 255 256 struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) 257 { 258 struct ipv6_fl_socklist *sfl; 259 260 label &= IPV6_FLOWLABEL_MASK; 261 262 rcu_read_lock(); 263 for_each_sk_fl_rcu(sk, sfl) { 264 struct ip6_flowlabel *fl = sfl->fl; 265 266 if (fl->label == label && atomic_inc_not_zero(&fl->users)) { 267 fl->lastuse = jiffies; 268 rcu_read_unlock(); 269 return fl; 270 } 271 } 272 rcu_read_unlock(); 273 return NULL; 274 } 275 EXPORT_SYMBOL_GPL(__fl6_sock_lookup); 276 277 void fl6_free_socklist(struct sock *sk) 278 { 279 struct inet_sock *inet = inet_sk(sk); 280 struct ipv6_fl_socklist *sfl; 281 282 if (!rcu_access_pointer(inet->ipv6_fl_list)) 283 return; 284 285 spin_lock_bh(&ip6_sk_fl_lock); 286 while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list, 287 lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { 288 inet->ipv6_fl_list = sfl->next; 289 spin_unlock_bh(&ip6_sk_fl_lock); 290 291 fl_release(sfl->fl); 292 kfree_rcu(sfl, rcu); 293 294 spin_lock_bh(&ip6_sk_fl_lock); 295 } 296 spin_unlock_bh(&ip6_sk_fl_lock); 297 } 298 299 /* Service routines */ 300 301 302 /* 303 It is the only difficult place. flowlabel enforces equal headers 304 before and including routing header, however user may supply options 305 following rthdr. 306 */ 307 308 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, 309 struct ip6_flowlabel *fl, 310 struct ipv6_txoptions *fopt) 311 { 312 struct ipv6_txoptions *fl_opt = fl->opt; 313 314 if (!fopt || fopt->opt_flen == 0) 315 return fl_opt; 316 317 if (fl_opt) { 318 opt_space->hopopt = fl_opt->hopopt; 319 opt_space->dst0opt = fl_opt->dst0opt; 320 opt_space->srcrt = fl_opt->srcrt; 321 opt_space->opt_nflen = fl_opt->opt_nflen; 322 } else { 323 if (fopt->opt_nflen == 0) 324 return fopt; 325 opt_space->hopopt = NULL; 326 opt_space->dst0opt = NULL; 327 opt_space->srcrt = NULL; 328 opt_space->opt_nflen = 0; 329 } 330 opt_space->dst1opt = fopt->dst1opt; 331 opt_space->opt_flen = fopt->opt_flen; 332 opt_space->tot_len = fopt->tot_len; 333 return opt_space; 334 } 335 EXPORT_SYMBOL_GPL(fl6_merge_options); 336 337 static unsigned long check_linger(unsigned long ttl) 338 { 339 if (ttl < FL_MIN_LINGER) 340 return FL_MIN_LINGER*HZ; 341 if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) 342 return 0; 343 return ttl*HZ; 344 } 345 346 static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires) 347 { 348 linger = check_linger(linger); 349 if (!linger) 350 return -EPERM; 351 expires = check_linger(expires); 352 if (!expires) 353 return -EPERM; 354 355 spin_lock_bh(&ip6_fl_lock); 356 fl->lastuse = jiffies; 357 if (time_before(fl->linger, linger)) 358 fl->linger = linger; 359 if (time_before(expires, fl->linger)) 360 expires = fl->linger; 361 if (time_before(fl->expires, fl->lastuse + expires)) 362 fl->expires = fl->lastuse + expires; 363 spin_unlock_bh(&ip6_fl_lock); 364 365 return 0; 366 } 367 368 static struct ip6_flowlabel * 369 fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, 370 sockptr_t optval, int optlen, int *err_p) 371 { 372 struct ip6_flowlabel *fl = NULL; 373 int olen; 374 int addr_type; 375 int err; 376 377 olen = optlen - CMSG_ALIGN(sizeof(*freq)); 378 err = -EINVAL; 379 if (olen > 64 * 1024) 380 goto done; 381 382 err = -ENOMEM; 383 fl = kzalloc_obj(*fl); 384 if (!fl) 385 goto done; 386 387 if (olen > 0) { 388 struct msghdr msg; 389 struct flowi6 flowi6; 390 struct ipcm6_cookie ipc6; 391 392 err = -ENOMEM; 393 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); 394 if (!fl->opt) 395 goto done; 396 397 memset(fl->opt, 0, sizeof(*fl->opt)); 398 fl->opt->tot_len = sizeof(*fl->opt) + olen; 399 err = -EFAULT; 400 if (copy_from_sockptr_offset(fl->opt + 1, optval, 401 CMSG_ALIGN(sizeof(*freq)), olen)) 402 goto done; 403 404 msg.msg_controllen = olen; 405 msg.msg_control = (void *)(fl->opt+1); 406 memset(&flowi6, 0, sizeof(flowi6)); 407 408 ipc6.opt = fl->opt; 409 err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6); 410 if (err) 411 goto done; 412 err = -EINVAL; 413 if (fl->opt->opt_flen) 414 goto done; 415 if (fl->opt->opt_nflen == 0) { 416 kfree(fl->opt); 417 fl->opt = NULL; 418 } 419 } 420 421 fl->fl_net = net; 422 fl->expires = jiffies; 423 err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); 424 if (err) 425 goto done; 426 fl->share = freq->flr_share; 427 addr_type = ipv6_addr_type(&freq->flr_dst); 428 if ((addr_type & IPV6_ADDR_MAPPED) || 429 addr_type == IPV6_ADDR_ANY) { 430 err = -EINVAL; 431 goto done; 432 } 433 fl->dst = freq->flr_dst; 434 atomic_set(&fl->users, 1); 435 switch (fl->share) { 436 case IPV6_FL_S_EXCL: 437 case IPV6_FL_S_ANY: 438 break; 439 case IPV6_FL_S_PROCESS: 440 fl->owner.pid = get_task_pid(current, PIDTYPE_PID); 441 break; 442 case IPV6_FL_S_USER: 443 fl->owner.uid = current_euid(); 444 break; 445 default: 446 err = -EINVAL; 447 goto done; 448 } 449 if (fl_shared_exclusive(fl) || fl->opt) { 450 WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1); 451 static_branch_deferred_inc(&ipv6_flowlabel_exclusive); 452 } 453 return fl; 454 455 done: 456 if (fl) { 457 kfree(fl->opt); 458 kfree(fl); 459 } 460 *err_p = err; 461 return NULL; 462 } 463 464 static int mem_check(struct sock *sk) 465 { 466 const int unpriv_total_limit = FL_MAX_SIZE - (FL_MAX_SIZE / 4); 467 const int unpriv_user_limit = unpriv_total_limit / 2; 468 struct net *net = sock_net(sk); 469 int room; 470 struct ipv6_fl_socklist *sfl; 471 int count = 0; 472 473 lockdep_assert_held(&ip6_fl_lock); 474 475 room = FL_MAX_SIZE - fl_size; 476 477 if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) 478 return 0; 479 480 rcu_read_lock(); 481 for_each_sk_fl_rcu(sk, sfl) 482 count++; 483 rcu_read_unlock(); 484 485 if (room <= 0 || 486 ((count >= FL_MAX_PER_SOCK || 487 (count > 0 && room < FL_MAX_SIZE / 2) || 488 room < FL_MAX_SIZE / 4 || 489 net->ipv6.flowlabel_count >= unpriv_user_limit) && 490 !capable(CAP_NET_ADMIN))) 491 return -ENOBUFS; 492 493 return 0; 494 } 495 496 static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl, 497 struct ip6_flowlabel *fl) 498 { 499 struct inet_sock *inet = inet_sk(sk); 500 501 spin_lock_bh(&ip6_sk_fl_lock); 502 sfl->fl = fl; 503 sfl->next = inet->ipv6_fl_list; 504 rcu_assign_pointer(inet->ipv6_fl_list, sfl); 505 spin_unlock_bh(&ip6_sk_fl_lock); 506 } 507 508 int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, 509 int flags) 510 { 511 struct ipv6_pinfo *np = inet6_sk(sk); 512 struct ipv6_fl_socklist *sfl; 513 514 if (flags & IPV6_FL_F_REMOTE) { 515 freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; 516 return 0; 517 } 518 519 if (inet6_test_bit(REPFLOW, sk)) { 520 freq->flr_label = np->flow_label; 521 return 0; 522 } 523 524 rcu_read_lock(); 525 526 for_each_sk_fl_rcu(sk, sfl) { 527 if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { 528 spin_lock_bh(&ip6_fl_lock); 529 freq->flr_label = sfl->fl->label; 530 freq->flr_dst = sfl->fl->dst; 531 freq->flr_share = sfl->fl->share; 532 freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; 533 freq->flr_linger = sfl->fl->linger / HZ; 534 535 spin_unlock_bh(&ip6_fl_lock); 536 rcu_read_unlock(); 537 return 0; 538 } 539 } 540 rcu_read_unlock(); 541 542 return -ENOENT; 543 } 544 545 #define socklist_dereference(__sflp) \ 546 rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock)) 547 548 static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) 549 { 550 struct ipv6_pinfo *np = inet6_sk(sk); 551 struct ipv6_fl_socklist __rcu **sflp; 552 struct ipv6_fl_socklist *sfl; 553 554 if (freq->flr_flags & IPV6_FL_F_REFLECT) { 555 if (sk->sk_protocol != IPPROTO_TCP) 556 return -ENOPROTOOPT; 557 if (!inet6_test_bit(REPFLOW, sk)) 558 return -ESRCH; 559 np->flow_label = 0; 560 inet6_clear_bit(REPFLOW, sk); 561 return 0; 562 } 563 564 spin_lock_bh(&ip6_sk_fl_lock); 565 for (sflp = &inet_sk(sk)->ipv6_fl_list; 566 (sfl = socklist_dereference(*sflp)) != NULL; 567 sflp = &sfl->next) { 568 if (sfl->fl->label == freq->flr_label) 569 goto found; 570 } 571 spin_unlock_bh(&ip6_sk_fl_lock); 572 return -ESRCH; 573 found: 574 if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK)) 575 np->flow_label &= ~IPV6_FLOWLABEL_MASK; 576 *sflp = sfl->next; 577 spin_unlock_bh(&ip6_sk_fl_lock); 578 fl_release(sfl->fl); 579 kfree_rcu(sfl, rcu); 580 return 0; 581 } 582 583 static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) 584 { 585 struct net *net = sock_net(sk); 586 struct ipv6_fl_socklist *sfl; 587 int err; 588 589 rcu_read_lock(); 590 for_each_sk_fl_rcu(sk, sfl) { 591 if (sfl->fl->label == freq->flr_label) { 592 err = fl6_renew(sfl->fl, freq->flr_linger, 593 freq->flr_expires); 594 rcu_read_unlock(); 595 return err; 596 } 597 } 598 rcu_read_unlock(); 599 600 if (freq->flr_share == IPV6_FL_S_NONE && 601 ns_capable(net->user_ns, CAP_NET_ADMIN)) { 602 struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label); 603 604 if (fl) { 605 err = fl6_renew(fl, freq->flr_linger, 606 freq->flr_expires); 607 fl_release(fl); 608 return err; 609 } 610 } 611 return -ESRCH; 612 } 613 614 static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, 615 sockptr_t optval, int optlen) 616 { 617 struct ipv6_fl_socklist *sfl, *sfl1 = NULL; 618 struct ip6_flowlabel *fl, *fl1 = NULL; 619 struct net *net = sock_net(sk); 620 int err; 621 622 if (freq->flr_flags & IPV6_FL_F_REFLECT) { 623 if (net->ipv6.sysctl.flowlabel_consistency) { 624 net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); 625 return -EPERM; 626 } 627 628 if (sk->sk_protocol != IPPROTO_TCP) 629 return -ENOPROTOOPT; 630 inet6_set_bit(REPFLOW, sk); 631 return 0; 632 } 633 634 if (freq->flr_label & ~IPV6_FLOWLABEL_MASK) 635 return -EINVAL; 636 if (net->ipv6.sysctl.flowlabel_state_ranges && 637 (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) 638 return -ERANGE; 639 640 fl = fl_create(net, sk, freq, optval, optlen, &err); 641 if (!fl) 642 return err; 643 644 sfl1 = kmalloc_obj(*sfl1); 645 646 if (freq->flr_label) { 647 err = -EEXIST; 648 rcu_read_lock(); 649 for_each_sk_fl_rcu(sk, sfl) { 650 if (sfl->fl->label == freq->flr_label) { 651 if (freq->flr_flags & IPV6_FL_F_EXCL) { 652 rcu_read_unlock(); 653 goto done; 654 } 655 fl1 = sfl->fl; 656 if (!atomic_inc_not_zero(&fl1->users)) 657 fl1 = NULL; 658 break; 659 } 660 } 661 rcu_read_unlock(); 662 663 if (!fl1) 664 fl1 = fl_lookup(net, freq->flr_label); 665 if (fl1) { 666 recheck: 667 err = -EEXIST; 668 if (freq->flr_flags&IPV6_FL_F_EXCL) 669 goto release; 670 err = -EPERM; 671 if (fl1->share == IPV6_FL_S_EXCL || 672 fl1->share != fl->share || 673 ((fl1->share == IPV6_FL_S_PROCESS) && 674 (fl1->owner.pid != fl->owner.pid)) || 675 ((fl1->share == IPV6_FL_S_USER) && 676 !uid_eq(fl1->owner.uid, fl->owner.uid))) 677 goto release; 678 679 err = -ENOMEM; 680 if (!sfl1) 681 goto release; 682 if (fl->linger > fl1->linger) 683 fl1->linger = fl->linger; 684 if ((long)(fl->expires - fl1->expires) > 0) 685 fl1->expires = fl->expires; 686 fl_link(sk, sfl1, fl1); 687 fl_free(fl); 688 return 0; 689 690 release: 691 fl_release(fl1); 692 goto done; 693 } 694 } 695 err = -ENOENT; 696 if (!(freq->flr_flags & IPV6_FL_F_CREATE)) 697 goto done; 698 699 err = -ENOMEM; 700 if (!sfl1) 701 goto done; 702 703 rcu_read_lock(); 704 spin_lock_bh(&ip6_fl_lock); 705 err = mem_check(sk); 706 if (err == 0) 707 fl1 = fl_intern(net, fl, freq->flr_label); 708 else 709 fl1 = NULL; 710 spin_unlock_bh(&ip6_fl_lock); 711 rcu_read_unlock(); 712 713 if (err != 0) 714 goto done; 715 716 if (fl1) 717 goto recheck; 718 719 if (!freq->flr_label) { 720 size_t offset = offsetof(struct in6_flowlabel_req, flr_label); 721 722 if (copy_to_sockptr_offset(optval, offset, &fl->label, 723 sizeof(fl->label))) { 724 /* Intentionally ignore fault. */ 725 } 726 } 727 728 fl_link(sk, sfl1, fl); 729 return 0; 730 done: 731 fl_free(fl); 732 kfree(sfl1); 733 return err; 734 } 735 736 int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen) 737 { 738 struct in6_flowlabel_req freq; 739 740 if (optlen < sizeof(freq)) 741 return -EINVAL; 742 if (copy_from_sockptr(&freq, optval, sizeof(freq))) 743 return -EFAULT; 744 745 switch (freq.flr_action) { 746 case IPV6_FL_A_PUT: 747 return ipv6_flowlabel_put(sk, &freq); 748 case IPV6_FL_A_RENEW: 749 return ipv6_flowlabel_renew(sk, &freq); 750 case IPV6_FL_A_GET: 751 return ipv6_flowlabel_get(sk, &freq, optval, optlen); 752 default: 753 return -EINVAL; 754 } 755 } 756 757 #ifdef CONFIG_PROC_FS 758 759 struct ip6fl_iter_state { 760 struct seq_net_private p; 761 struct pid_namespace *pid_ns; 762 int bucket; 763 }; 764 765 #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) 766 767 static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) 768 { 769 struct ip6_flowlabel *fl = NULL; 770 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 771 struct net *net = seq_file_net(seq); 772 773 for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { 774 for_each_fl_rcu(state->bucket, fl) { 775 if (net_eq(fl->fl_net, net)) 776 goto out; 777 } 778 } 779 fl = NULL; 780 out: 781 return fl; 782 } 783 784 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) 785 { 786 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 787 struct net *net = seq_file_net(seq); 788 789 for_each_fl_continue_rcu(fl) { 790 if (net_eq(fl->fl_net, net)) 791 goto out; 792 } 793 794 try_again: 795 if (++state->bucket <= FL_HASH_MASK) { 796 for_each_fl_rcu(state->bucket, fl) { 797 if (net_eq(fl->fl_net, net)) 798 goto out; 799 } 800 goto try_again; 801 } 802 fl = NULL; 803 804 out: 805 return fl; 806 } 807 808 static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) 809 { 810 struct ip6_flowlabel *fl = ip6fl_get_first(seq); 811 if (fl) 812 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL) 813 --pos; 814 return pos ? NULL : fl; 815 } 816 817 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) 818 __acquires(RCU) 819 { 820 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 821 822 state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb); 823 824 rcu_read_lock(); 825 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 826 } 827 828 static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) 829 { 830 struct ip6_flowlabel *fl; 831 832 if (v == SEQ_START_TOKEN) 833 fl = ip6fl_get_first(seq); 834 else 835 fl = ip6fl_get_next(seq, v); 836 ++*pos; 837 return fl; 838 } 839 840 static void ip6fl_seq_stop(struct seq_file *seq, void *v) 841 __releases(RCU) 842 { 843 rcu_read_unlock(); 844 } 845 846 static int ip6fl_seq_show(struct seq_file *seq, void *v) 847 { 848 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 849 if (v == SEQ_START_TOKEN) { 850 seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); 851 } else { 852 struct ip6_flowlabel *fl = v; 853 seq_printf(seq, 854 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", 855 (unsigned int)ntohl(fl->label), 856 fl->share, 857 ((fl->share == IPV6_FL_S_PROCESS) ? 858 pid_nr_ns(fl->owner.pid, state->pid_ns) : 859 ((fl->share == IPV6_FL_S_USER) ? 860 from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : 861 0)), 862 atomic_read(&fl->users), 863 fl->linger/HZ, 864 (long)(fl->expires - jiffies)/HZ, 865 &fl->dst, 866 fl->opt ? fl->opt->opt_nflen : 0); 867 } 868 return 0; 869 } 870 871 static const struct seq_operations ip6fl_seq_ops = { 872 .start = ip6fl_seq_start, 873 .next = ip6fl_seq_next, 874 .stop = ip6fl_seq_stop, 875 .show = ip6fl_seq_show, 876 }; 877 878 static int __net_init ip6_flowlabel_proc_init(struct net *net) 879 { 880 if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net, 881 &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) 882 return -ENOMEM; 883 return 0; 884 } 885 886 static void __net_exit ip6_flowlabel_proc_fini(struct net *net) 887 { 888 remove_proc_entry("ip6_flowlabel", net->proc_net); 889 } 890 #else 891 static inline int ip6_flowlabel_proc_init(struct net *net) 892 { 893 return 0; 894 } 895 static inline void ip6_flowlabel_proc_fini(struct net *net) 896 { 897 } 898 #endif 899 900 static void __net_exit ip6_flowlabel_net_exit(struct net *net) 901 { 902 ip6_fl_purge(net); 903 ip6_flowlabel_proc_fini(net); 904 } 905 906 static struct pernet_operations ip6_flowlabel_net_ops = { 907 .init = ip6_flowlabel_proc_init, 908 .exit = ip6_flowlabel_net_exit, 909 }; 910 911 int ip6_flowlabel_init(void) 912 { 913 return register_pernet_subsys(&ip6_flowlabel_net_ops); 914 } 915 916 void ip6_flowlabel_cleanup(void) 917 { 918 static_key_deferred_flush(&ipv6_flowlabel_exclusive); 919 timer_delete(&ip6_fl_gc_timer); 920 unregister_pernet_subsys(&ip6_flowlabel_net_ops); 921 } 922