1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip6_flowlabel.c IPv6 flowlabel manager. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/errno.h> 10 #include <linux/types.h> 11 #include <linux/socket.h> 12 #include <linux/net.h> 13 #include <linux/netdevice.h> 14 #include <linux/in6.h> 15 #include <linux/proc_fs.h> 16 #include <linux/seq_file.h> 17 #include <linux/slab.h> 18 #include <linux/export.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/jump_label_ratelimit.h> 21 22 #include <net/net_namespace.h> 23 #include <net/sock.h> 24 25 #include <net/ipv6.h> 26 #include <net/rawv6.h> 27 #include <net/transp_v6.h> 28 29 #include <linux/uaccess.h> 30 31 #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified 32 in old IPv6 RFC. Well, it was reasonable value. 33 */ 34 #define FL_MAX_LINGER 150 /* Maximal linger timeout */ 35 36 /* FL hash table */ 37 38 #define FL_MAX_PER_SOCK 32 39 #define FL_MAX_SIZE 4096 40 #define FL_HASH_MASK 255 41 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) 42 43 static atomic_t fl_size = ATOMIC_INIT(0); 44 static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; 45 46 static void ip6_fl_gc(struct timer_list *unused); 47 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); 48 49 /* FL hash table lock: it protects only of GC */ 50 51 static DEFINE_SPINLOCK(ip6_fl_lock); 52 53 /* Big socket sock */ 54 55 static DEFINE_SPINLOCK(ip6_sk_fl_lock); 56 57 DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); 58 EXPORT_SYMBOL(ipv6_flowlabel_exclusive); 59 60 #define for_each_fl_rcu(hash, fl) \ 61 for (fl = rcu_dereference(fl_ht[(hash)]); \ 62 fl != NULL; \ 63 fl = rcu_dereference(fl->next)) 64 #define for_each_fl_continue_rcu(fl) \ 65 for (fl = rcu_dereference(fl->next); \ 66 fl != NULL; \ 67 fl = rcu_dereference(fl->next)) 68 69 #define for_each_sk_fl_rcu(np, sfl) \ 70 for (sfl = rcu_dereference(np->ipv6_fl_list); \ 71 sfl != NULL; \ 72 sfl = rcu_dereference(sfl->next)) 73 74 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) 75 { 76 struct ip6_flowlabel *fl; 77 78 for_each_fl_rcu(FL_HASH(label), fl) { 79 if (fl->label == label && net_eq(fl->fl_net, net)) 80 return fl; 81 } 82 return NULL; 83 } 84 85 static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) 86 { 87 struct ip6_flowlabel *fl; 88 89 rcu_read_lock(); 90 fl = __fl_lookup(net, label); 91 if (fl && !atomic_inc_not_zero(&fl->users)) 92 fl = NULL; 93 rcu_read_unlock(); 94 return fl; 95 } 96 97 static bool fl_shared_exclusive(struct ip6_flowlabel *fl) 98 { 99 return fl->share == IPV6_FL_S_EXCL || 100 fl->share == IPV6_FL_S_PROCESS || 101 fl->share == IPV6_FL_S_USER; 102 } 103 104 static void fl_free_rcu(struct rcu_head *head) 105 { 106 struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); 107 108 if (fl->share == IPV6_FL_S_PROCESS) 109 put_pid(fl->owner.pid); 110 kfree(fl->opt); 111 kfree(fl); 112 } 113 114 115 static void fl_free(struct ip6_flowlabel *fl) 116 { 117 if (!fl) 118 return; 119 120 if (fl_shared_exclusive(fl) || fl->opt) 121 static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive); 122 123 call_rcu(&fl->rcu, fl_free_rcu); 124 } 125 126 static void fl_release(struct ip6_flowlabel *fl) 127 { 128 spin_lock_bh(&ip6_fl_lock); 129 130 fl->lastuse = jiffies; 131 if (atomic_dec_and_test(&fl->users)) { 132 unsigned long ttd = fl->lastuse + fl->linger; 133 if (time_after(ttd, fl->expires)) 134 fl->expires = ttd; 135 ttd = fl->expires; 136 if (fl->opt && fl->share == IPV6_FL_S_EXCL) { 137 struct ipv6_txoptions *opt = fl->opt; 138 fl->opt = NULL; 139 kfree(opt); 140 } 141 if (!timer_pending(&ip6_fl_gc_timer) || 142 time_after(ip6_fl_gc_timer.expires, ttd)) 143 mod_timer(&ip6_fl_gc_timer, ttd); 144 } 145 spin_unlock_bh(&ip6_fl_lock); 146 } 147 148 static void ip6_fl_gc(struct timer_list *unused) 149 { 150 int i; 151 unsigned long now = jiffies; 152 unsigned long sched = 0; 153 154 spin_lock(&ip6_fl_lock); 155 156 for (i = 0; i <= FL_HASH_MASK; i++) { 157 struct ip6_flowlabel *fl; 158 struct ip6_flowlabel __rcu **flp; 159 160 flp = &fl_ht[i]; 161 while ((fl = rcu_dereference_protected(*flp, 162 lockdep_is_held(&ip6_fl_lock))) != NULL) { 163 if (atomic_read(&fl->users) == 0) { 164 unsigned long ttd = fl->lastuse + fl->linger; 165 if (time_after(ttd, fl->expires)) 166 fl->expires = ttd; 167 ttd = fl->expires; 168 if (time_after_eq(now, ttd)) { 169 *flp = fl->next; 170 fl_free(fl); 171 atomic_dec(&fl_size); 172 continue; 173 } 174 if (!sched || time_before(ttd, sched)) 175 sched = ttd; 176 } 177 flp = &fl->next; 178 } 179 } 180 if (!sched && atomic_read(&fl_size)) 181 sched = now + FL_MAX_LINGER; 182 if (sched) { 183 mod_timer(&ip6_fl_gc_timer, sched); 184 } 185 spin_unlock(&ip6_fl_lock); 186 } 187 188 static void __net_exit ip6_fl_purge(struct net *net) 189 { 190 int i; 191 192 spin_lock_bh(&ip6_fl_lock); 193 for (i = 0; i <= FL_HASH_MASK; i++) { 194 struct ip6_flowlabel *fl; 195 struct ip6_flowlabel __rcu **flp; 196 197 flp = &fl_ht[i]; 198 while ((fl = rcu_dereference_protected(*flp, 199 lockdep_is_held(&ip6_fl_lock))) != NULL) { 200 if (net_eq(fl->fl_net, net) && 201 atomic_read(&fl->users) == 0) { 202 *flp = fl->next; 203 fl_free(fl); 204 atomic_dec(&fl_size); 205 continue; 206 } 207 flp = &fl->next; 208 } 209 } 210 spin_unlock_bh(&ip6_fl_lock); 211 } 212 213 static struct ip6_flowlabel *fl_intern(struct net *net, 214 struct ip6_flowlabel *fl, __be32 label) 215 { 216 struct ip6_flowlabel *lfl; 217 218 fl->label = label & IPV6_FLOWLABEL_MASK; 219 220 rcu_read_lock(); 221 spin_lock_bh(&ip6_fl_lock); 222 if (label == 0) { 223 for (;;) { 224 fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; 225 if (fl->label) { 226 lfl = __fl_lookup(net, fl->label); 227 if (!lfl) 228 break; 229 } 230 } 231 } else { 232 /* 233 * we dropper the ip6_fl_lock, so this entry could reappear 234 * and we need to recheck with it. 235 * 236 * OTOH no need to search the active socket first, like it is 237 * done in ipv6_flowlabel_opt - sock is locked, so new entry 238 * with the same label can only appear on another sock 239 */ 240 lfl = __fl_lookup(net, fl->label); 241 if (lfl) { 242 atomic_inc(&lfl->users); 243 spin_unlock_bh(&ip6_fl_lock); 244 rcu_read_unlock(); 245 return lfl; 246 } 247 } 248 249 fl->lastuse = jiffies; 250 fl->next = fl_ht[FL_HASH(fl->label)]; 251 rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); 252 atomic_inc(&fl_size); 253 spin_unlock_bh(&ip6_fl_lock); 254 rcu_read_unlock(); 255 return NULL; 256 } 257 258 259 260 /* Socket flowlabel lists */ 261 262 struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) 263 { 264 struct ipv6_fl_socklist *sfl; 265 struct ipv6_pinfo *np = inet6_sk(sk); 266 267 label &= IPV6_FLOWLABEL_MASK; 268 269 rcu_read_lock(); 270 for_each_sk_fl_rcu(np, sfl) { 271 struct ip6_flowlabel *fl = sfl->fl; 272 273 if (fl->label == label && atomic_inc_not_zero(&fl->users)) { 274 fl->lastuse = jiffies; 275 rcu_read_unlock(); 276 return fl; 277 } 278 } 279 rcu_read_unlock(); 280 return NULL; 281 } 282 EXPORT_SYMBOL_GPL(__fl6_sock_lookup); 283 284 void fl6_free_socklist(struct sock *sk) 285 { 286 struct ipv6_pinfo *np = inet6_sk(sk); 287 struct ipv6_fl_socklist *sfl; 288 289 if (!rcu_access_pointer(np->ipv6_fl_list)) 290 return; 291 292 spin_lock_bh(&ip6_sk_fl_lock); 293 while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, 294 lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { 295 np->ipv6_fl_list = sfl->next; 296 spin_unlock_bh(&ip6_sk_fl_lock); 297 298 fl_release(sfl->fl); 299 kfree_rcu(sfl, rcu); 300 301 spin_lock_bh(&ip6_sk_fl_lock); 302 } 303 spin_unlock_bh(&ip6_sk_fl_lock); 304 } 305 306 /* Service routines */ 307 308 309 /* 310 It is the only difficult place. flowlabel enforces equal headers 311 before and including routing header, however user may supply options 312 following rthdr. 313 */ 314 315 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, 316 struct ip6_flowlabel *fl, 317 struct ipv6_txoptions *fopt) 318 { 319 struct ipv6_txoptions *fl_opt = fl->opt; 320 321 if (!fopt || fopt->opt_flen == 0) 322 return fl_opt; 323 324 if (fl_opt) { 325 opt_space->hopopt = fl_opt->hopopt; 326 opt_space->dst0opt = fl_opt->dst0opt; 327 opt_space->srcrt = fl_opt->srcrt; 328 opt_space->opt_nflen = fl_opt->opt_nflen; 329 } else { 330 if (fopt->opt_nflen == 0) 331 return fopt; 332 opt_space->hopopt = NULL; 333 opt_space->dst0opt = NULL; 334 opt_space->srcrt = NULL; 335 opt_space->opt_nflen = 0; 336 } 337 opt_space->dst1opt = fopt->dst1opt; 338 opt_space->opt_flen = fopt->opt_flen; 339 opt_space->tot_len = fopt->tot_len; 340 return opt_space; 341 } 342 EXPORT_SYMBOL_GPL(fl6_merge_options); 343 344 static unsigned long check_linger(unsigned long ttl) 345 { 346 if (ttl < FL_MIN_LINGER) 347 return FL_MIN_LINGER*HZ; 348 if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) 349 return 0; 350 return ttl*HZ; 351 } 352 353 static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires) 354 { 355 linger = check_linger(linger); 356 if (!linger) 357 return -EPERM; 358 expires = check_linger(expires); 359 if (!expires) 360 return -EPERM; 361 362 spin_lock_bh(&ip6_fl_lock); 363 fl->lastuse = jiffies; 364 if (time_before(fl->linger, linger)) 365 fl->linger = linger; 366 if (time_before(expires, fl->linger)) 367 expires = fl->linger; 368 if (time_before(fl->expires, fl->lastuse + expires)) 369 fl->expires = fl->lastuse + expires; 370 spin_unlock_bh(&ip6_fl_lock); 371 372 return 0; 373 } 374 375 static struct ip6_flowlabel * 376 fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, 377 sockptr_t optval, int optlen, int *err_p) 378 { 379 struct ip6_flowlabel *fl = NULL; 380 int olen; 381 int addr_type; 382 int err; 383 384 olen = optlen - CMSG_ALIGN(sizeof(*freq)); 385 err = -EINVAL; 386 if (olen > 64 * 1024) 387 goto done; 388 389 err = -ENOMEM; 390 fl = kzalloc(sizeof(*fl), GFP_KERNEL); 391 if (!fl) 392 goto done; 393 394 if (olen > 0) { 395 struct msghdr msg; 396 struct flowi6 flowi6; 397 struct ipcm6_cookie ipc6; 398 399 err = -ENOMEM; 400 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); 401 if (!fl->opt) 402 goto done; 403 404 memset(fl->opt, 0, sizeof(*fl->opt)); 405 fl->opt->tot_len = sizeof(*fl->opt) + olen; 406 err = -EFAULT; 407 if (copy_from_sockptr_offset(fl->opt + 1, optval, 408 CMSG_ALIGN(sizeof(*freq)), olen)) 409 goto done; 410 411 msg.msg_controllen = olen; 412 msg.msg_control = (void *)(fl->opt+1); 413 memset(&flowi6, 0, sizeof(flowi6)); 414 415 ipc6.opt = fl->opt; 416 err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6); 417 if (err) 418 goto done; 419 err = -EINVAL; 420 if (fl->opt->opt_flen) 421 goto done; 422 if (fl->opt->opt_nflen == 0) { 423 kfree(fl->opt); 424 fl->opt = NULL; 425 } 426 } 427 428 fl->fl_net = net; 429 fl->expires = jiffies; 430 err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); 431 if (err) 432 goto done; 433 fl->share = freq->flr_share; 434 addr_type = ipv6_addr_type(&freq->flr_dst); 435 if ((addr_type & IPV6_ADDR_MAPPED) || 436 addr_type == IPV6_ADDR_ANY) { 437 err = -EINVAL; 438 goto done; 439 } 440 fl->dst = freq->flr_dst; 441 atomic_set(&fl->users, 1); 442 switch (fl->share) { 443 case IPV6_FL_S_EXCL: 444 case IPV6_FL_S_ANY: 445 break; 446 case IPV6_FL_S_PROCESS: 447 fl->owner.pid = get_task_pid(current, PIDTYPE_PID); 448 break; 449 case IPV6_FL_S_USER: 450 fl->owner.uid = current_euid(); 451 break; 452 default: 453 err = -EINVAL; 454 goto done; 455 } 456 if (fl_shared_exclusive(fl) || fl->opt) { 457 WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1); 458 static_branch_deferred_inc(&ipv6_flowlabel_exclusive); 459 } 460 return fl; 461 462 done: 463 if (fl) { 464 kfree(fl->opt); 465 kfree(fl); 466 } 467 *err_p = err; 468 return NULL; 469 } 470 471 static int mem_check(struct sock *sk) 472 { 473 struct ipv6_pinfo *np = inet6_sk(sk); 474 struct ipv6_fl_socklist *sfl; 475 int room = FL_MAX_SIZE - atomic_read(&fl_size); 476 int count = 0; 477 478 if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) 479 return 0; 480 481 rcu_read_lock(); 482 for_each_sk_fl_rcu(np, sfl) 483 count++; 484 rcu_read_unlock(); 485 486 if (room <= 0 || 487 ((count >= FL_MAX_PER_SOCK || 488 (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && 489 !capable(CAP_NET_ADMIN))) 490 return -ENOBUFS; 491 492 return 0; 493 } 494 495 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, 496 struct ip6_flowlabel *fl) 497 { 498 spin_lock_bh(&ip6_sk_fl_lock); 499 sfl->fl = fl; 500 sfl->next = np->ipv6_fl_list; 501 rcu_assign_pointer(np->ipv6_fl_list, sfl); 502 spin_unlock_bh(&ip6_sk_fl_lock); 503 } 504 505 int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, 506 int flags) 507 { 508 struct ipv6_pinfo *np = inet6_sk(sk); 509 struct ipv6_fl_socklist *sfl; 510 511 if (flags & IPV6_FL_F_REMOTE) { 512 freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; 513 return 0; 514 } 515 516 if (inet6_test_bit(REPFLOW, sk)) { 517 freq->flr_label = np->flow_label; 518 return 0; 519 } 520 521 rcu_read_lock(); 522 523 for_each_sk_fl_rcu(np, sfl) { 524 if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { 525 spin_lock_bh(&ip6_fl_lock); 526 freq->flr_label = sfl->fl->label; 527 freq->flr_dst = sfl->fl->dst; 528 freq->flr_share = sfl->fl->share; 529 freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; 530 freq->flr_linger = sfl->fl->linger / HZ; 531 532 spin_unlock_bh(&ip6_fl_lock); 533 rcu_read_unlock(); 534 return 0; 535 } 536 } 537 rcu_read_unlock(); 538 539 return -ENOENT; 540 } 541 542 #define socklist_dereference(__sflp) \ 543 rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock)) 544 545 static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) 546 { 547 struct ipv6_pinfo *np = inet6_sk(sk); 548 struct ipv6_fl_socklist __rcu **sflp; 549 struct ipv6_fl_socklist *sfl; 550 551 if (freq->flr_flags & IPV6_FL_F_REFLECT) { 552 if (sk->sk_protocol != IPPROTO_TCP) 553 return -ENOPROTOOPT; 554 if (!inet6_test_bit(REPFLOW, sk)) 555 return -ESRCH; 556 np->flow_label = 0; 557 inet6_clear_bit(REPFLOW, sk); 558 return 0; 559 } 560 561 spin_lock_bh(&ip6_sk_fl_lock); 562 for (sflp = &np->ipv6_fl_list; 563 (sfl = socklist_dereference(*sflp)) != NULL; 564 sflp = &sfl->next) { 565 if (sfl->fl->label == freq->flr_label) 566 goto found; 567 } 568 spin_unlock_bh(&ip6_sk_fl_lock); 569 return -ESRCH; 570 found: 571 if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK)) 572 np->flow_label &= ~IPV6_FLOWLABEL_MASK; 573 *sflp = sfl->next; 574 spin_unlock_bh(&ip6_sk_fl_lock); 575 fl_release(sfl->fl); 576 kfree_rcu(sfl, rcu); 577 return 0; 578 } 579 580 static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) 581 { 582 struct ipv6_pinfo *np = inet6_sk(sk); 583 struct net *net = sock_net(sk); 584 struct ipv6_fl_socklist *sfl; 585 int err; 586 587 rcu_read_lock(); 588 for_each_sk_fl_rcu(np, sfl) { 589 if (sfl->fl->label == freq->flr_label) { 590 err = fl6_renew(sfl->fl, freq->flr_linger, 591 freq->flr_expires); 592 rcu_read_unlock(); 593 return err; 594 } 595 } 596 rcu_read_unlock(); 597 598 if (freq->flr_share == IPV6_FL_S_NONE && 599 ns_capable(net->user_ns, CAP_NET_ADMIN)) { 600 struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label); 601 602 if (fl) { 603 err = fl6_renew(fl, freq->flr_linger, 604 freq->flr_expires); 605 fl_release(fl); 606 return err; 607 } 608 } 609 return -ESRCH; 610 } 611 612 static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, 613 sockptr_t optval, int optlen) 614 { 615 struct ipv6_fl_socklist *sfl, *sfl1 = NULL; 616 struct ip6_flowlabel *fl, *fl1 = NULL; 617 struct ipv6_pinfo *np = inet6_sk(sk); 618 struct net *net = sock_net(sk); 619 int err; 620 621 if (freq->flr_flags & IPV6_FL_F_REFLECT) { 622 if (net->ipv6.sysctl.flowlabel_consistency) { 623 net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); 624 return -EPERM; 625 } 626 627 if (sk->sk_protocol != IPPROTO_TCP) 628 return -ENOPROTOOPT; 629 inet6_set_bit(REPFLOW, sk); 630 return 0; 631 } 632 633 if (freq->flr_label & ~IPV6_FLOWLABEL_MASK) 634 return -EINVAL; 635 if (net->ipv6.sysctl.flowlabel_state_ranges && 636 (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) 637 return -ERANGE; 638 639 fl = fl_create(net, sk, freq, optval, optlen, &err); 640 if (!fl) 641 return err; 642 643 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); 644 645 if (freq->flr_label) { 646 err = -EEXIST; 647 rcu_read_lock(); 648 for_each_sk_fl_rcu(np, sfl) { 649 if (sfl->fl->label == freq->flr_label) { 650 if (freq->flr_flags & IPV6_FL_F_EXCL) { 651 rcu_read_unlock(); 652 goto done; 653 } 654 fl1 = sfl->fl; 655 if (!atomic_inc_not_zero(&fl1->users)) 656 fl1 = NULL; 657 break; 658 } 659 } 660 rcu_read_unlock(); 661 662 if (!fl1) 663 fl1 = fl_lookup(net, freq->flr_label); 664 if (fl1) { 665 recheck: 666 err = -EEXIST; 667 if (freq->flr_flags&IPV6_FL_F_EXCL) 668 goto release; 669 err = -EPERM; 670 if (fl1->share == IPV6_FL_S_EXCL || 671 fl1->share != fl->share || 672 ((fl1->share == IPV6_FL_S_PROCESS) && 673 (fl1->owner.pid != fl->owner.pid)) || 674 ((fl1->share == IPV6_FL_S_USER) && 675 !uid_eq(fl1->owner.uid, fl->owner.uid))) 676 goto release; 677 678 err = -ENOMEM; 679 if (!sfl1) 680 goto release; 681 if (fl->linger > fl1->linger) 682 fl1->linger = fl->linger; 683 if ((long)(fl->expires - fl1->expires) > 0) 684 fl1->expires = fl->expires; 685 fl_link(np, sfl1, fl1); 686 fl_free(fl); 687 return 0; 688 689 release: 690 fl_release(fl1); 691 goto done; 692 } 693 } 694 err = -ENOENT; 695 if (!(freq->flr_flags & IPV6_FL_F_CREATE)) 696 goto done; 697 698 err = -ENOMEM; 699 if (!sfl1) 700 goto done; 701 702 err = mem_check(sk); 703 if (err != 0) 704 goto done; 705 706 fl1 = fl_intern(net, fl, freq->flr_label); 707 if (fl1) 708 goto recheck; 709 710 if (!freq->flr_label) { 711 size_t offset = offsetof(struct in6_flowlabel_req, flr_label); 712 713 if (copy_to_sockptr_offset(optval, offset, &fl->label, 714 sizeof(fl->label))) { 715 /* Intentionally ignore fault. */ 716 } 717 } 718 719 fl_link(np, sfl1, fl); 720 return 0; 721 done: 722 fl_free(fl); 723 kfree(sfl1); 724 return err; 725 } 726 727 int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen) 728 { 729 struct in6_flowlabel_req freq; 730 731 if (optlen < sizeof(freq)) 732 return -EINVAL; 733 if (copy_from_sockptr(&freq, optval, sizeof(freq))) 734 return -EFAULT; 735 736 switch (freq.flr_action) { 737 case IPV6_FL_A_PUT: 738 return ipv6_flowlabel_put(sk, &freq); 739 case IPV6_FL_A_RENEW: 740 return ipv6_flowlabel_renew(sk, &freq); 741 case IPV6_FL_A_GET: 742 return ipv6_flowlabel_get(sk, &freq, optval, optlen); 743 default: 744 return -EINVAL; 745 } 746 } 747 748 #ifdef CONFIG_PROC_FS 749 750 struct ip6fl_iter_state { 751 struct seq_net_private p; 752 struct pid_namespace *pid_ns; 753 int bucket; 754 }; 755 756 #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) 757 758 static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) 759 { 760 struct ip6_flowlabel *fl = NULL; 761 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 762 struct net *net = seq_file_net(seq); 763 764 for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { 765 for_each_fl_rcu(state->bucket, fl) { 766 if (net_eq(fl->fl_net, net)) 767 goto out; 768 } 769 } 770 fl = NULL; 771 out: 772 return fl; 773 } 774 775 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) 776 { 777 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 778 struct net *net = seq_file_net(seq); 779 780 for_each_fl_continue_rcu(fl) { 781 if (net_eq(fl->fl_net, net)) 782 goto out; 783 } 784 785 try_again: 786 if (++state->bucket <= FL_HASH_MASK) { 787 for_each_fl_rcu(state->bucket, fl) { 788 if (net_eq(fl->fl_net, net)) 789 goto out; 790 } 791 goto try_again; 792 } 793 fl = NULL; 794 795 out: 796 return fl; 797 } 798 799 static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) 800 { 801 struct ip6_flowlabel *fl = ip6fl_get_first(seq); 802 if (fl) 803 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL) 804 --pos; 805 return pos ? NULL : fl; 806 } 807 808 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) 809 __acquires(RCU) 810 { 811 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 812 813 state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb); 814 815 rcu_read_lock(); 816 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 817 } 818 819 static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) 820 { 821 struct ip6_flowlabel *fl; 822 823 if (v == SEQ_START_TOKEN) 824 fl = ip6fl_get_first(seq); 825 else 826 fl = ip6fl_get_next(seq, v); 827 ++*pos; 828 return fl; 829 } 830 831 static void ip6fl_seq_stop(struct seq_file *seq, void *v) 832 __releases(RCU) 833 { 834 rcu_read_unlock(); 835 } 836 837 static int ip6fl_seq_show(struct seq_file *seq, void *v) 838 { 839 struct ip6fl_iter_state *state = ip6fl_seq_private(seq); 840 if (v == SEQ_START_TOKEN) { 841 seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); 842 } else { 843 struct ip6_flowlabel *fl = v; 844 seq_printf(seq, 845 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", 846 (unsigned int)ntohl(fl->label), 847 fl->share, 848 ((fl->share == IPV6_FL_S_PROCESS) ? 849 pid_nr_ns(fl->owner.pid, state->pid_ns) : 850 ((fl->share == IPV6_FL_S_USER) ? 851 from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : 852 0)), 853 atomic_read(&fl->users), 854 fl->linger/HZ, 855 (long)(fl->expires - jiffies)/HZ, 856 &fl->dst, 857 fl->opt ? fl->opt->opt_nflen : 0); 858 } 859 return 0; 860 } 861 862 static const struct seq_operations ip6fl_seq_ops = { 863 .start = ip6fl_seq_start, 864 .next = ip6fl_seq_next, 865 .stop = ip6fl_seq_stop, 866 .show = ip6fl_seq_show, 867 }; 868 869 static int __net_init ip6_flowlabel_proc_init(struct net *net) 870 { 871 if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net, 872 &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) 873 return -ENOMEM; 874 return 0; 875 } 876 877 static void __net_exit ip6_flowlabel_proc_fini(struct net *net) 878 { 879 remove_proc_entry("ip6_flowlabel", net->proc_net); 880 } 881 #else 882 static inline int ip6_flowlabel_proc_init(struct net *net) 883 { 884 return 0; 885 } 886 static inline void ip6_flowlabel_proc_fini(struct net *net) 887 { 888 } 889 #endif 890 891 static void __net_exit ip6_flowlabel_net_exit(struct net *net) 892 { 893 ip6_fl_purge(net); 894 ip6_flowlabel_proc_fini(net); 895 } 896 897 static struct pernet_operations ip6_flowlabel_net_ops = { 898 .init = ip6_flowlabel_proc_init, 899 .exit = ip6_flowlabel_net_exit, 900 }; 901 902 int ip6_flowlabel_init(void) 903 { 904 return register_pernet_subsys(&ip6_flowlabel_net_ops); 905 } 906 907 void ip6_flowlabel_cleanup(void) 908 { 909 static_key_deferred_flush(&ipv6_flowlabel_exclusive); 910 del_timer(&ip6_fl_gc_timer); 911 unregister_pernet_subsys(&ip6_flowlabel_net_ops); 912 } 913