1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip6_flowlabel.c IPv6 flowlabel manager. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/errno.h> 10 #include <linux/types.h> 11 #include <linux/socket.h> 12 #include <linux/net.h> 13 #include <linux/netdevice.h> 14 #include <linux/in6.h> 15 #include <linux/proc_fs.h> 16 #include <linux/seq_file.h> 17 #include <linux/slab.h> 18 #include <linux/export.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/jump_label_ratelimit.h> 21 22 #include <net/net_namespace.h> 23 #include <net/sock.h> 24 25 #include <net/ipv6.h> 26 #include <net/rawv6.h> 27 #include <net/transp_v6.h> 28 29 #include <linux/uaccess.h> 30 31 #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified 32 in old IPv6 RFC. Well, it was reasonable value. 33 */ 34 #define FL_MAX_LINGER 150 /* Maximal linger timeout */ 35 36 /* FL hash table */ 37 38 #define FL_MAX_PER_SOCK 32 39 #define FL_MAX_SIZE 4096 40 #define FL_HASH_MASK 255 41 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) 42 43 static atomic_t fl_size = ATOMIC_INIT(0); 44 static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; 45 46 static void ip6_fl_gc(struct timer_list *unused); 47 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); 48 49 /* FL hash table lock: it protects only of GC */ 50 51 static DEFINE_SPINLOCK(ip6_fl_lock); 52 53 /* Big socket sock */ 54 55 static DEFINE_SPINLOCK(ip6_sk_fl_lock); 56 57 DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); 58 EXPORT_SYMBOL(ipv6_flowlabel_exclusive); 59 60 #define for_each_fl_rcu(hash, fl) \ 61 for (fl = rcu_dereference(fl_ht[(hash)]); \ 62 fl != NULL; \ 63 fl = rcu_dereference(fl->next)) 64 #define for_each_fl_continue_rcu(fl) \ 65 for (fl = rcu_dereference(fl->next); \ 66 fl != NULL; \ 67 fl = rcu_dereference(fl->next)) 68 69 #define for_each_sk_fl_rcu(sk, sfl) \ 70 for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \ 71 sfl != NULL; \ 72 sfl = rcu_dereference(sfl->next)) 73 74 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) 75 { 76 struct ip6_flowlabel *fl; 77 78 for_each_fl_rcu(FL_HASH(label), fl) { 79 if (fl->label == label && net_eq(fl->fl_net, net)) 80 return fl; 81 } 82 return NULL; 83 } 84 85 static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) 86 { 87 struct ip6_flowlabel *fl; 88 89 rcu_read_lock(); 90 fl = __fl_lookup(net, label); 91 if (fl && !atomic_inc_not_zero(&fl->users)) 92 fl = NULL; 93 rcu_read_unlock(); 94 return fl; 95 } 96 97 static bool fl_shared_exclusive(struct ip6_flowlabel *fl) 98 { 99 return fl->share == IPV6_FL_S_EXCL || 100 fl->share == IPV6_FL_S_PROCESS || 101 fl->share == IPV6_FL_S_USER; 102 } 103 104 static void fl_free_rcu(struct rcu_head *head) 105 { 106 struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); 107 108 if (fl->share == IPV6_FL_S_PROCESS) 109 put_pid(fl->owner.pid); 110 kfree(fl->opt); 111 kfree(fl); 112 } 113 114 115 static void fl_free(struct ip6_flowlabel *fl) 116 { 117 if (!fl) 118 return; 119 120 if (fl_shared_exclusive(fl) || fl->opt) 121 static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive); 122 123 call_rcu(&fl->rcu, fl_free_rcu); 124 } 125 126 static void fl_release(struct ip6_flowlabel *fl) 127 { 128 spin_lock_bh(&ip6_fl_lock); 129 130 fl->lastuse = jiffies; 131 if (atomic_dec_and_test(&fl->users)) { 132 unsigned long ttd = fl->lastuse + fl->linger; 133 if (time_after(ttd, fl->expires)) 134 fl->expires = ttd; 135 ttd = fl->expires; 136 if (!timer_pending(&ip6_fl_gc_timer) || 137 time_after(ip6_fl_gc_timer.expires, ttd)) 138 mod_timer(&ip6_fl_gc_timer, ttd); 139 } 140 spin_unlock_bh(&ip6_fl_lock); 141 } 142 143 static void ip6_fl_gc(struct timer_list *unused) 144 { 145 int i; 146 unsigned long now = jiffies; 147 unsigned long sched = 0; 148 149 spin_lock(&ip6_fl_lock); 150 151 for (i = 0; i <= FL_HASH_MASK; i++) { 152 struct ip6_flowlabel *fl; 153 struct ip6_flowlabel __rcu **flp; 154 155 flp = &fl_ht[i]; 156 while ((fl = rcu_dereference_protected(*flp, 157 lockdep_is_held(&ip6_fl_lock))) != NULL) { 158 if (atomic_read(&fl->users) == 0) { 159 unsigned long ttd = fl->lastuse + fl->linger; 160 if (time_after(ttd, fl->expires)) 161 fl->expires = ttd; 162 ttd = fl->expires; 163 if (time_after_eq(now, ttd)) { 164 *flp = fl->next; 165 fl_free(fl); 166 atomic_dec(&fl_size); 167 continue; 168 } 169 if (!sched || time_before(ttd, sched)) 170 sched = ttd; 171 } 172 flp = &fl->next; 173 } 174 } 175 if (!sched && atomic_read(&fl_size)) 176 sched = now + FL_MAX_LINGER; 177 if (sched) { 178 mod_timer(&ip6_fl_gc_timer, sched); 179 } 180 spin_unlock(&ip6_fl_lock); 181 } 182 183 static void __net_exit ip6_fl_purge(struct net *net) 184 { 185 int i; 186 187 spin_lock_bh(&ip6_fl_lock); 188 for (i = 0; i <= FL_HASH_MASK; i++) { 189 struct ip6_flowlabel *fl; 190 struct ip6_flowlabel __rcu **flp; 191 192 flp = &fl_ht[i]; 193 while ((fl = rcu_dereference_protected(*flp, 194 lockdep_is_held(&ip6_fl_lock))) != NULL) { 195 if (net_eq(fl->fl_net, net) && 196 atomic_read(&fl->users) == 0) { 197 *flp = fl->next; 198 fl_free(fl); 199 atomic_dec(&fl_size); 200 continue; 201 } 202 flp = &fl->next; 203 } 204 } 205 spin_unlock_bh(&ip6_fl_lock); 206 } 207 208 static struct ip6_flowlabel *fl_intern(struct net *net, 209 struct ip6_flowlabel *fl, __be32 label) 210 { 211 struct ip6_flowlabel *lfl; 212 213 fl->label = label & IPV6_FLOWLABEL_MASK; 214 215 rcu_read_lock(); 216 spin_lock_bh(&ip6_fl_lock); 217 if (label == 0) { 218 for (;;) { 219 fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; 220 if (fl->label) { 221 lfl = __fl_lookup(net, fl->label); 222 if (!lfl) 223 break; 224 } 225 } 226 } else { 227 /* 228 * we dropper the ip6_fl_lock, so this entry could reappear 229 * and we need to recheck with it. 230 * 231 * OTOH no need to search the active socket first, like it is 232 * done in ipv6_flowlabel_opt - sock is locked, so new entry 233 * with the same label can only appear on another sock 234 */ 235 lfl = __fl_lookup(net, fl->label); 236 if (lfl) { 237 atomic_inc(&lfl->users); 238 spin_unlock_bh(&ip6_fl_lock); 239 rcu_read_unlock(); 240 return lfl; 241 } 242 } 243 244 fl->lastuse = jiffies; 245 fl->next = fl_ht[FL_HASH(fl->label)]; 246 rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); 247 atomic_inc(&fl_size); 248 spin_unlock_bh(&ip6_fl_lock); 249 rcu_read_unlock(); 250 return NULL; 251 } 252 253 254 255 /* Socket flowlabel lists */ 256 257 struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) 258 { 259 struct ipv6_fl_socklist *sfl; 260 261 label &= IPV6_FLOWLABEL_MASK; 262 263 rcu_read_lock(); 264 for_each_sk_fl_rcu(sk, sfl) { 265 struct ip6_flowlabel *fl = sfl->fl; 266 267 if (fl->label == label && atomic_inc_not_zero(&fl->users)) { 268 fl->lastuse = jiffies; 269 rcu_read_unlock(); 270 return fl; 271 } 272 } 273 rcu_read_unlock(); 274 return NULL; 275 } 276 EXPORT_SYMBOL_GPL(__fl6_sock_lookup); 277 278 void fl6_free_socklist(struct sock *sk) 279 { 280 struct inet_sock *inet = inet_sk(sk); 281 struct ipv6_fl_socklist *sfl; 282 283 if (!rcu_access_pointer(inet->ipv6_fl_list)) 284 return; 285 286 spin_lock_bh(&ip6_sk_fl_lock); 287 while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list, 288 lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { 289 inet->ipv6_fl_list = sfl->next; 290 spin_unlock_bh(&ip6_sk_fl_lock); 291 292 fl_release(sfl->fl); 293 kfree_rcu(sfl, rcu); 294 295 spin_lock_bh(&ip6_sk_fl_lock); 296 } 297 spin_unlock_bh(&ip6_sk_fl_lock); 298 } 299 300 /* Service routines */ 301 302 303 /* 304 It is the only difficult place. flowlabel enforces equal headers 305 before and including routing header, however user may supply options 306 following rthdr. 307 */ 308 309 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, 310 struct ip6_flowlabel *fl, 311 struct ipv6_txoptions *fopt) 312 { 313 struct ipv6_txoptions *fl_opt = fl->opt; 314 315 if (!fopt || fopt->opt_flen == 0) 316 return fl_opt; 317 318 if (fl_opt) { 319 opt_space->hopopt = fl_opt->hopopt; 320 opt_space->dst0opt = fl_opt->dst0opt; 321 opt_space->srcrt = fl_opt->srcrt; 322 opt_space->opt_nflen = fl_opt->opt_nflen; 323 } else { 324 if (fopt->opt_nflen == 0) 325 return fopt; 326 opt_space->hopopt = NULL; 327 opt_space->dst0opt = NULL; 328 opt_space->srcrt = NULL; 329 opt_space->opt_nflen = 0; 330 } 331 opt_space->dst1opt = fopt->dst1opt; 332 opt_space->opt_flen = fopt->opt_flen; 333 opt_space->tot_len = fopt->tot_len; 334 return opt_space; 335 } 336 EXPORT_SYMBOL_GPL(fl6_merge_options); 337 338 static unsigned long check_linger(unsigned long ttl) 339 { 340 if (ttl < FL_MIN_LINGER) 341 return FL_MIN_LINGER*HZ; 342 if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) 343 return 0; 344 return ttl*HZ; 345 } 346 347 static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires) 348 { 349 linger = check_linger(linger); 350 if (!linger) 351 return -EPERM; 352 expires = check_linger(expires); 353 if (!expires) 354 return -EPERM; 355 356 spin_lock_bh(&ip6_fl_lock); 357 fl->lastuse = jiffies; 358 if (time_before(fl->linger, linger)) 359 fl->linger = linger; 360 if (time_before(expires, fl->linger)) 361 expires = fl->linger; 362 if (time_before(fl->expires, fl->lastuse + expires)) 363 fl->expires = fl->lastuse + expires; 364 spin_unlock_bh(&ip6_fl_lock); 365 366 return 0; 367 } 368 369 static struct ip6_flowlabel * 370 fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, 371 sockptr_t optval, int optlen, int *err_p) 372 { 373 struct ip6_flowlabel *fl = NULL; 374 int olen; 375 int addr_type; 376 int err; 377 378 olen = optlen - CMSG_ALIGN(sizeof(*freq)); 379 err = -EINVAL; 380 if (olen > 64 * 1024) 381 goto done; 382 383 err = -ENOMEM; 384 fl = kzalloc_obj(*fl); 385 if (!fl) 386 goto done; 387 388 if (olen > 0) { 389 struct msghdr msg; 390 struct flowi6 flowi6; 391 struct ipcm6_cookie ipc6; 392 393 err = -ENOMEM; 394 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); 395 if (!fl->opt) 396 goto done; 397 398 memset(fl->opt, 0, sizeof(*fl->opt)); 399 fl->opt->tot_len = sizeof(*fl->opt) + olen; 400 err = -EFAULT; 401 if (copy_from_sockptr_offset(fl->opt + 1, optval, 402 CMSG_ALIGN(sizeof(*freq)), olen)) 403 goto done; 404 405 msg.msg_controllen = olen; 406 msg.msg_control = (void *)(fl->opt+1); 407 memset(&flowi6, 0, sizeof(flowi6)); 408 409 ipc6.opt = fl->opt; 410 err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6); 411 if (err) 412 goto done; 413 err = -EINVAL; 414 if (fl->opt->opt_flen) 415 goto done; 416 if (fl->opt->opt_nflen == 0) { 417 kfree(fl->opt); 418 fl->opt = NULL; 419 } 420 } 421 422 fl->fl_net = net; 423 fl->expires = jiffies; 424 err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); 425 if (err) 426 goto done; 427 fl->share = freq->flr_share; 428 addr_type = ipv6_addr_type(&freq->flr_dst); 429 if ((addr_type & IPV6_ADDR_MAPPED) || 430 addr_type == IPV6_ADDR_ANY) { 431 err = -EINVAL; 432 goto done; 433 } 434 fl->dst = freq->flr_dst; 435 atomic_set(&fl->users, 1); 436 switch (fl->share) { 437 case IPV6_FL_S_EXCL: 438 case IPV6_FL_S_ANY: 439 break; 440 case IPV6_FL_S_PROCESS: 441 fl->owner.pid = get_task_pid(current, PIDTYPE_PID); 442 break; 443 case IPV6_FL_S_USER: 444 fl->owner.uid = current_euid(); 445 break; 446 default: 447 err = -EINVAL; 448 goto done; 449 } 450 if (fl_shared_exclusive(fl) || fl->opt) { 451 WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1); 452 static_branch_deferred_inc(&ipv6_flowlabel_exclusive); 453 } 454 return fl; 455 456 done: 457 if (fl) { 458 kfree(fl->opt); 459 kfree(fl); 460 } 461 *err_p = err; 462 return NULL; 463 } 464 465 static int mem_check(struct sock *sk) 466 { 467 int room = FL_MAX_SIZE - atomic_read(&fl_size); 468 struct ipv6_fl_socklist *sfl; 469 int count = 0; 470 471 if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) 472 return 0; 473 474 rcu_read_lock(); 475 for_each_sk_fl_rcu(sk, sfl) 476 count++; 477 rcu_read_unlock(); 478 479 if (room <= 0 || 480 ((count >= FL_MAX_PER_SOCK || 481 (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && 482 !capable(CAP_NET_ADMIN))) 483 return -ENOBUFS; 484 485 return 0; 486 } 487 488 static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl, 489 struct ip6_flowlabel *fl) 490 { 491 struct inet_sock *inet = inet_sk(sk); 492 493 spin_lock_bh(&ip6_sk_fl_lock); 494 sfl->fl = fl; 495 sfl->next = inet->ipv6_fl_list; 496 rcu_assign_pointer(inet->ipv6_fl_list, sfl); 497 spin_unlock_bh(&ip6_sk_fl_lock); 498 } 499 500 int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, 501 int flags) 502 { 503 struct ipv6_pinfo *np = inet6_sk(sk); 504 struct ipv6_fl_socklist *sfl; 505 506 if (flags & IPV6_FL_F_REMOTE) { 507 freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; 508 return 0; 509 } 510 511 if (inet6_test_bit(REPFLOW, sk)) { 512 freq->flr_label = np->flow_label; 513 return 0; 514 } 515 516 rcu_read_lock(); 517 518 for_each_sk_fl_rcu(sk, sfl) { 519 if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { 520 spin_lock_bh(&ip6_fl_lock); 521 freq->flr_label = sfl->fl->label; 522 freq->flr_dst = sfl->fl->dst; 523 freq->flr_share = sfl->fl->share; 524 freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; 525 freq->flr_linger = sfl->fl->linger / HZ; 526 527 spin_unlock_bh(&ip6_fl_lock); 528 rcu_read_unlock(); 529 return 0; 530 } 531 } 532 rcu_read_unlock(); 533 534 return -ENOENT; 535 } 536 537 #define socklist_dereference(__sflp) \ 538 rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock)) 539 540 static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) 541 { 542 struct ipv6_pinfo *np = inet6_sk(sk); 543 struct ipv6_fl_socklist __rcu **sflp; 544 struct ipv6_fl_socklist *sfl; 545 546 if (freq->flr_flags & IPV6_FL_F_REFLECT) { 547 if (sk->sk_protocol != IPPROTO_TCP) 548 return -ENOPROTOOPT; 549 if (!inet6_test_bit(REPFLOW, sk)) 550 return -ESRCH; 551 np->flow_label = 0; 552 inet6_clear_bit(REPFLOW, sk); 553 return 0; 554 } 555 556 spin_lock_bh(&ip6_sk_fl_lock); 557 for (sflp = &inet_sk(sk)->ipv6_fl_list; 558 (sfl = socklist_dereference(*sflp)) != NULL; 559 sflp = &sfl->next) { 560 if (sfl->fl->label == freq->flr_label) 561 goto found; 562 } 563 spin_unlock_bh(&ip6_sk_fl_lock); 564 return -ESRCH; 565 found: 566 if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK)) 567 np->flow_label &= ~IPV6_FLOWLABEL_MASK; 568 *sflp = sfl->next; 569 spin_unlock_bh(&ip6_sk_fl_lock); 570 fl_release(sfl->fl); 571 kfree_rcu(sfl, rcu); 572 return 0; 573 } 574 575 static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) 576 { 577 struct net *net = sock_net(sk); 578 struct ipv6_fl_socklist *sfl; 579 int err; 580 581 rcu_read_lock(); 582 for_each_sk_fl_rcu(sk, sfl) { 583 if (sfl->fl->label == freq->flr_label) { 584 err = fl6_renew(sfl->fl, freq->flr_linger, 585 freq->flr_expires); 586 rcu_read_unlock(); 587 return err; 588 } 589 } 590 rcu_read_unlock(); 591 592 if (freq->flr_share == IPV6_FL_S_NONE && 593 ns_capable(net->user_ns, CAP_NET_ADMIN)) { 594 struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label); 595 596 if (fl) { 597 err = fl6_renew(fl, freq->flr_linger, 598 freq->flr_expires); 599 fl_release(fl); 600 return err; 601 } 602 } 603 return -ESRCH; 604 } 605 606 static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, 607 sockptr_t optval, int optlen) 608 { 609 struct ipv6_fl_socklist *sfl, *sfl1 = NULL; 610 struct ip6_flowlabel *fl, *fl1 = NULL; 611 struct net *net = sock_net(sk); 612 int err; 613 614 if (freq->flr_flags & IPV6_FL_F_REFLECT) { 615 if (net->ipv6.sysctl.flowlabel_consistency) { 616 net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); 617 return -EPERM; 618 } 619 620 if (sk->sk_protocol != IPPROTO_TCP) 621 return -ENOPROTOOPT; 622 inet6_set_bit(REPFLOW, sk); 623 return 0; 624 } 625 626 if (freq->flr_label & ~IPV6_FLOWLABEL_MASK) 627 return -EINVAL; 628 if (net->ipv6.sysctl.flowlabel_state_ranges && 629 (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) 630 return -ERANGE; 631 632 fl = fl_create(net, sk, freq, optval, optlen, &err); 633 if (!fl) 634 return err; 635 636 sfl1 = kmalloc_obj(*sfl1); 637 638 if (freq->flr_label) { 639 err = -EEXIST; 640 rcu_read_lock(); 641 for_each_sk_fl_rcu(sk, sfl) { 642 if (sfl->fl->label == freq->flr_label) { 643 if (freq->flr_flags & IPV6_FL_F_EXCL) { 644 rcu_read_unlock(); 645 goto done; 646 } 647 fl1 = sfl->fl; 648 if (!atomic_inc_not_zero(&fl1->users)) 649 fl1 = NULL; 650 break; 651 } 652 } 653 rcu_read_unlock(); 654 655 if (!fl1) 656 fl1 = fl_lookup(net, freq->flr_label); 657 if (fl1) { 658 recheck: 659 err = -EEXIST; 660 if (freq->flr_flags&IPV6_FL_F_EXCL) 661 goto release; 662 err = -EPERM; 663 if (fl1->share == IPV6_FL_S_EXCL || 664 fl1->share != fl->share || 665 ((fl1->share == IPV6_FL_S_PROCESS) && 666 (fl1->owner.pid != fl->owner.pid)) || 667 ((fl1->share == IPV6_FL_S_USER) && 668 !uid_eq(fl1->owner.uid, fl->owner.uid))) 669 goto release; 670 671 err = -ENOMEM; 672 if (!sfl1) 673 goto release; 674 if (fl->linger > fl1->linger) 675 fl1->linger = fl->linger; 676 if ((long)(fl->expires - fl1->expires) > 0) 677 fl1->expires = fl->expires; 678 fl_link(sk, sfl1, fl1); 679 fl_free(fl); 680 return 0; 681 682 release: 683 fl_release(fl1); 684 goto done; 685 } 686 } 687 err = -ENOENT; 688 if (!(freq->flr_flags & IPV6_FL_F_CREATE)) 689 goto done; 690 691 err = -ENOMEM; 692 if (!sfl1) 693 goto done; 694 695 err = mem_check(sk); 696 if (err != 0) 697 goto done; 698 699 fl1 = fl_intern(net, fl, freq->flr_label); 700 if (fl1) 701 goto recheck; 702 703 if (!freq->flr_label) { 704 size_t offset = offsetof(struct in6_flowlabel_req, flr_label); 705 706 if (copy_to_sockptr_offset(optval, offset, &fl->label, 707 sizeof(fl->label))) { 708 /* Intentionally ignore fault. */ 709 } 710 } 711 712 fl_link(sk, sfl1, fl); 713 return 0; 714 done: 715 fl_free(fl); 716 kfree(sfl1); 717 return err; 718 } 719 720 int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen) 721 { 722 struct in6_flowlabel_req freq; 723 724 if (optlen < sizeof(freq)) 725 return -EINVAL; 726 if (copy_from_sockptr(&freq, optval, sizeof(freq))) 727 return -EFAULT; 728 729 switch (freq.flr_action) { 730 case IPV6_FL_A_PUT: 731 return ipv6_flowlabel_put(sk, &freq); 732 case IPV6_FL_A_RENEW: 733 return ipv6_flowlabel_renew(sk, &freq); 734 case IPV6_FL_A_GET: 735 return ipv6_flowlabel_get(sk, &freq, optval, optlen); 736 default: 737 return -EINVAL; 738 } 739 } 740 741 #ifdef CONFIG_PROC_FS 742 743 struct ip6fl_iter_state { 744 struct seq_net_private p; 745 struct pid_namespace *pid_ns; 746 int bucket; 747 }; 748 749 #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) 750 751 static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) 752 { 753 struct ip6_flowlabel *fl = NULL; 754 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 755 struct net *net = seq_file_net(seq); 756 757 for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { 758 for_each_fl_rcu(state->bucket, fl) { 759 if (net_eq(fl->fl_net, net)) 760 goto out; 761 } 762 } 763 fl = NULL; 764 out: 765 return fl; 766 } 767 768 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) 769 { 770 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 771 struct net *net = seq_file_net(seq); 772 773 for_each_fl_continue_rcu(fl) { 774 if (net_eq(fl->fl_net, net)) 775 goto out; 776 } 777 778 try_again: 779 if (++state->bucket <= FL_HASH_MASK) { 780 for_each_fl_rcu(state->bucket, fl) { 781 if (net_eq(fl->fl_net, net)) 782 goto out; 783 } 784 goto try_again; 785 } 786 fl = NULL; 787 788 out: 789 return fl; 790 } 791 792 static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) 793 { 794 struct ip6_flowlabel *fl = ip6fl_get_first(seq); 795 if (fl) 796 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL) 797 --pos; 798 return pos ? NULL : fl; 799 } 800 801 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) 802 __acquires(RCU) 803 { 804 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 805 806 state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb); 807 808 rcu_read_lock(); 809 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 810 } 811 812 static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) 813 { 814 struct ip6_flowlabel *fl; 815 816 if (v == SEQ_START_TOKEN) 817 fl = ip6fl_get_first(seq); 818 else 819 fl = ip6fl_get_next(seq, v); 820 ++*pos; 821 return fl; 822 } 823 824 static void ip6fl_seq_stop(struct seq_file *seq, void *v) 825 __releases(RCU) 826 { 827 rcu_read_unlock(); 828 } 829 830 static int ip6fl_seq_show(struct seq_file *seq, void *v) 831 { 832 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 833 if (v == SEQ_START_TOKEN) { 834 seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); 835 } else { 836 struct ip6_flowlabel *fl = v; 837 seq_printf(seq, 838 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", 839 (unsigned int)ntohl(fl->label), 840 fl->share, 841 ((fl->share == IPV6_FL_S_PROCESS) ? 842 pid_nr_ns(fl->owner.pid, state->pid_ns) : 843 ((fl->share == IPV6_FL_S_USER) ? 844 from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : 845 0)), 846 atomic_read(&fl->users), 847 fl->linger/HZ, 848 (long)(fl->expires - jiffies)/HZ, 849 &fl->dst, 850 fl->opt ? fl->opt->opt_nflen : 0); 851 } 852 return 0; 853 } 854 855 static const struct seq_operations ip6fl_seq_ops = { 856 .start = ip6fl_seq_start, 857 .next = ip6fl_seq_next, 858 .stop = ip6fl_seq_stop, 859 .show = ip6fl_seq_show, 860 }; 861 862 static int __net_init ip6_flowlabel_proc_init(struct net *net) 863 { 864 if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net, 865 &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) 866 return -ENOMEM; 867 return 0; 868 } 869 870 static void __net_exit ip6_flowlabel_proc_fini(struct net *net) 871 { 872 remove_proc_entry("ip6_flowlabel", net->proc_net); 873 } 874 #else 875 static inline int ip6_flowlabel_proc_init(struct net *net) 876 { 877 return 0; 878 } 879 static inline void ip6_flowlabel_proc_fini(struct net *net) 880 { 881 } 882 #endif 883 884 static void __net_exit ip6_flowlabel_net_exit(struct net *net) 885 { 886 ip6_fl_purge(net); 887 ip6_flowlabel_proc_fini(net); 888 } 889 890 static struct pernet_operations ip6_flowlabel_net_ops = { 891 .init = ip6_flowlabel_proc_init, 892 .exit = ip6_flowlabel_net_exit, 893 }; 894 895 int ip6_flowlabel_init(void) 896 { 897 return register_pernet_subsys(&ip6_flowlabel_net_ops); 898 } 899 900 void ip6_flowlabel_cleanup(void) 901 { 902 static_key_deferred_flush(&ipv6_flowlabel_exclusive); 903 timer_delete(&ip6_fl_gc_timer); 904 unregister_pernet_subsys(&ip6_flowlabel_net_ops); 905 } 906