1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * The IP to API glue. 8 * 9 * Authors: see ip.c 10 * 11 * Fixes: 12 * Many : Split from ip.c , see ip.c for history. 13 * Martin Mares : TOS setting fixed. 14 * Alan Cox : Fixed a couple of oopses in Martin's 15 * TOS tweaks. 16 * Mike McLagan : Routing by source 17 */ 18 19 #include <linux/module.h> 20 #include <linux/types.h> 21 #include <linux/mm.h> 22 #include <linux/skbuff.h> 23 #include <linux/ip.h> 24 #include <linux/icmp.h> 25 #include <linux/inetdevice.h> 26 #include <linux/netdevice.h> 27 #include <linux/slab.h> 28 #include <net/sock.h> 29 #include <net/ip.h> 30 #include <net/icmp.h> 31 #include <net/tcp_states.h> 32 #include <linux/udp.h> 33 #include <linux/igmp.h> 34 #include <linux/netfilter.h> 35 #include <linux/route.h> 36 #include <linux/mroute.h> 37 #include <net/inet_ecn.h> 38 #include <net/route.h> 39 #include <net/xfrm.h> 40 #include <net/compat.h> 41 #include <net/checksum.h> 42 #if IS_ENABLED(CONFIG_IPV6) 43 #include <net/transp_v6.h> 44 #endif 45 #include <net/ip_fib.h> 46 47 #include <linux/errqueue.h> 48 #include <linux/uaccess.h> 49 50 #include <linux/bpfilter.h> 51 52 /* 53 * SOL_IP control messages. 54 */ 55 56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 57 { 58 struct in_pktinfo info = *PKTINFO_SKB_CB(skb); 59 60 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 61 62 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 63 } 64 65 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) 66 { 67 int ttl = ip_hdr(skb)->ttl; 68 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); 69 } 70 71 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) 72 { 73 put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); 74 } 75 76 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) 77 { 78 if (IPCB(skb)->opt.optlen == 0) 79 return; 80 81 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, 82 ip_hdr(skb) + 1); 83 } 84 85 86 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, 87 struct sk_buff *skb) 88 { 89 unsigned char optbuf[sizeof(struct ip_options) + 40]; 90 struct ip_options *opt = (struct ip_options *)optbuf; 91 92 if (IPCB(skb)->opt.optlen == 0) 93 return; 94 95 if (ip_options_echo(net, opt, skb)) { 96 msg->msg_flags |= MSG_CTRUNC; 97 return; 98 } 99 ip_options_undo(opt); 100 101 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); 102 } 103 104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) 105 { 106 int val; 107 108 if (IPCB(skb)->frag_max_size == 0) 109 return; 110 111 val = IPCB(skb)->frag_max_size; 112 put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); 113 } 114 115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, 116 int tlen, int offset) 117 { 118 __wsum csum = skb->csum; 119 120 if (skb->ip_summed != CHECKSUM_COMPLETE) 121 return; 122 123 if (offset != 0) { 124 int tend_off = skb_transport_offset(skb) + tlen; 125 csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0)); 126 } 127 128 put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); 129 } 130 131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) 132 { 133 char *secdata; 134 u32 seclen, secid; 135 int err; 136 137 err = security_socket_getpeersec_dgram(NULL, skb, &secid); 138 if (err) 139 return; 140 141 err = security_secid_to_secctx(secid, &secdata, &seclen); 142 if (err) 143 return; 144 145 put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); 146 security_release_secctx(secdata, seclen); 147 } 148 149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 150 { 151 __be16 _ports[2], *ports; 152 struct sockaddr_in sin; 153 154 /* All current transport protocols have the port numbers in the 155 * first four bytes of the transport header and this function is 156 * written with this assumption in mind. 157 */ 158 ports = skb_header_pointer(skb, skb_transport_offset(skb), 159 sizeof(_ports), &_ports); 160 if (!ports) 161 return; 162 163 sin.sin_family = AF_INET; 164 sin.sin_addr.s_addr = ip_hdr(skb)->daddr; 165 sin.sin_port = ports[1]; 166 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 167 168 put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); 169 } 170 171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, 172 struct sk_buff *skb, int tlen, int offset) 173 { 174 unsigned long flags = inet_cmsg_flags(inet_sk(sk)); 175 176 if (!flags) 177 return; 178 179 /* Ordered by supposed usage frequency */ 180 if (flags & IP_CMSG_PKTINFO) { 181 ip_cmsg_recv_pktinfo(msg, skb); 182 183 flags &= ~IP_CMSG_PKTINFO; 184 if (!flags) 185 return; 186 } 187 188 if (flags & IP_CMSG_TTL) { 189 ip_cmsg_recv_ttl(msg, skb); 190 191 flags &= ~IP_CMSG_TTL; 192 if (!flags) 193 return; 194 } 195 196 if (flags & IP_CMSG_TOS) { 197 ip_cmsg_recv_tos(msg, skb); 198 199 flags &= ~IP_CMSG_TOS; 200 if (!flags) 201 return; 202 } 203 204 if (flags & IP_CMSG_RECVOPTS) { 205 ip_cmsg_recv_opts(msg, skb); 206 207 flags &= ~IP_CMSG_RECVOPTS; 208 if (!flags) 209 return; 210 } 211 212 if (flags & IP_CMSG_RETOPTS) { 213 ip_cmsg_recv_retopts(sock_net(sk), msg, skb); 214 215 flags &= ~IP_CMSG_RETOPTS; 216 if (!flags) 217 return; 218 } 219 220 if (flags & IP_CMSG_PASSSEC) { 221 ip_cmsg_recv_security(msg, skb); 222 223 flags &= ~IP_CMSG_PASSSEC; 224 if (!flags) 225 return; 226 } 227 228 if (flags & IP_CMSG_ORIGDSTADDR) { 229 ip_cmsg_recv_dstaddr(msg, skb); 230 231 flags &= ~IP_CMSG_ORIGDSTADDR; 232 if (!flags) 233 return; 234 } 235 236 if (flags & IP_CMSG_CHECKSUM) 237 ip_cmsg_recv_checksum(msg, skb, tlen, offset); 238 239 if (flags & IP_CMSG_RECVFRAGSIZE) 240 ip_cmsg_recv_fragsize(msg, skb); 241 } 242 EXPORT_SYMBOL(ip_cmsg_recv_offset); 243 244 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, 245 bool allow_ipv6) 246 { 247 int err, val; 248 struct cmsghdr *cmsg; 249 struct net *net = sock_net(sk); 250 251 for_each_cmsghdr(cmsg, msg) { 252 if (!CMSG_OK(msg, cmsg)) 253 return -EINVAL; 254 #if IS_ENABLED(CONFIG_IPV6) 255 if (allow_ipv6 && 256 cmsg->cmsg_level == SOL_IPV6 && 257 cmsg->cmsg_type == IPV6_PKTINFO) { 258 struct in6_pktinfo *src_info; 259 260 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) 261 return -EINVAL; 262 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 263 if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) 264 return -EINVAL; 265 if (src_info->ipi6_ifindex) 266 ipc->oif = src_info->ipi6_ifindex; 267 ipc->addr = src_info->ipi6_addr.s6_addr32[3]; 268 continue; 269 } 270 #endif 271 if (cmsg->cmsg_level == SOL_SOCKET) { 272 err = __sock_cmsg_send(sk, cmsg, &ipc->sockc); 273 if (err) 274 return err; 275 continue; 276 } 277 278 if (cmsg->cmsg_level != SOL_IP) 279 continue; 280 switch (cmsg->cmsg_type) { 281 case IP_RETOPTS: 282 err = cmsg->cmsg_len - sizeof(struct cmsghdr); 283 284 /* Our caller is responsible for freeing ipc->opt */ 285 err = ip_options_get(net, &ipc->opt, 286 KERNEL_SOCKPTR(CMSG_DATA(cmsg)), 287 err < 40 ? err : 40); 288 if (err) 289 return err; 290 break; 291 case IP_PKTINFO: 292 { 293 struct in_pktinfo *info; 294 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 295 return -EINVAL; 296 info = (struct in_pktinfo *)CMSG_DATA(cmsg); 297 if (info->ipi_ifindex) 298 ipc->oif = info->ipi_ifindex; 299 ipc->addr = info->ipi_spec_dst.s_addr; 300 break; 301 } 302 case IP_TTL: 303 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 304 return -EINVAL; 305 val = *(int *)CMSG_DATA(cmsg); 306 if (val < 1 || val > 255) 307 return -EINVAL; 308 ipc->ttl = val; 309 break; 310 case IP_TOS: 311 if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) 312 val = *(int *)CMSG_DATA(cmsg); 313 else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) 314 val = *(u8 *)CMSG_DATA(cmsg); 315 else 316 return -EINVAL; 317 if (val < 0 || val > 255) 318 return -EINVAL; 319 ipc->tos = val; 320 ipc->priority = rt_tos2priority(ipc->tos); 321 break; 322 case IP_PROTOCOL: 323 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 324 return -EINVAL; 325 val = *(int *)CMSG_DATA(cmsg); 326 if (val < 1 || val > 255) 327 return -EINVAL; 328 ipc->protocol = val; 329 break; 330 default: 331 return -EINVAL; 332 } 333 } 334 return 0; 335 } 336 337 static void ip_ra_destroy_rcu(struct rcu_head *head) 338 { 339 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); 340 341 sock_put(ra->saved_sk); 342 kfree(ra); 343 } 344 345 int ip_ra_control(struct sock *sk, unsigned char on, 346 void (*destructor)(struct sock *)) 347 { 348 struct ip_ra_chain *ra, *new_ra; 349 struct ip_ra_chain __rcu **rap; 350 struct net *net = sock_net(sk); 351 352 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 353 return -EINVAL; 354 355 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 356 if (on && !new_ra) 357 return -ENOMEM; 358 359 mutex_lock(&net->ipv4.ra_mutex); 360 for (rap = &net->ipv4.ra_chain; 361 (ra = rcu_dereference_protected(*rap, 362 lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; 363 rap = &ra->next) { 364 if (ra->sk == sk) { 365 if (on) { 366 mutex_unlock(&net->ipv4.ra_mutex); 367 kfree(new_ra); 368 return -EADDRINUSE; 369 } 370 /* dont let ip_call_ra_chain() use sk again */ 371 ra->sk = NULL; 372 RCU_INIT_POINTER(*rap, ra->next); 373 mutex_unlock(&net->ipv4.ra_mutex); 374 375 if (ra->destructor) 376 ra->destructor(sk); 377 /* 378 * Delay sock_put(sk) and kfree(ra) after one rcu grace 379 * period. This guarantee ip_call_ra_chain() dont need 380 * to mess with socket refcounts. 381 */ 382 ra->saved_sk = sk; 383 call_rcu(&ra->rcu, ip_ra_destroy_rcu); 384 return 0; 385 } 386 } 387 if (!new_ra) { 388 mutex_unlock(&net->ipv4.ra_mutex); 389 return -ENOBUFS; 390 } 391 new_ra->sk = sk; 392 new_ra->destructor = destructor; 393 394 RCU_INIT_POINTER(new_ra->next, ra); 395 rcu_assign_pointer(*rap, new_ra); 396 sock_hold(sk); 397 mutex_unlock(&net->ipv4.ra_mutex); 398 399 return 0; 400 } 401 402 static void ipv4_icmp_error_rfc4884(const struct sk_buff *skb, 403 struct sock_ee_data_rfc4884 *out) 404 { 405 switch (icmp_hdr(skb)->type) { 406 case ICMP_DEST_UNREACH: 407 case ICMP_TIME_EXCEEDED: 408 case ICMP_PARAMETERPROB: 409 ip_icmp_error_rfc4884(skb, out, sizeof(struct icmphdr), 410 icmp_hdr(skb)->un.reserved[1] * 4); 411 } 412 } 413 414 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 415 __be16 port, u32 info, u8 *payload) 416 { 417 struct sock_exterr_skb *serr; 418 419 skb = skb_clone(skb, GFP_ATOMIC); 420 if (!skb) 421 return; 422 423 serr = SKB_EXT_ERR(skb); 424 serr->ee.ee_errno = err; 425 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; 426 serr->ee.ee_type = icmp_hdr(skb)->type; 427 serr->ee.ee_code = icmp_hdr(skb)->code; 428 serr->ee.ee_pad = 0; 429 serr->ee.ee_info = info; 430 serr->ee.ee_data = 0; 431 serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - 432 skb_network_header(skb); 433 serr->port = port; 434 435 if (skb_pull(skb, payload - skb->data)) { 436 if (inet_test_bit(RECVERR_RFC4884, sk)) 437 ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); 438 439 skb_reset_transport_header(skb); 440 if (sock_queue_err_skb(sk, skb) == 0) 441 return; 442 } 443 kfree_skb(skb); 444 } 445 EXPORT_SYMBOL_GPL(ip_icmp_error); 446 447 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) 448 { 449 struct sock_exterr_skb *serr; 450 struct iphdr *iph; 451 struct sk_buff *skb; 452 453 if (!inet_test_bit(RECVERR, sk)) 454 return; 455 456 skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); 457 if (!skb) 458 return; 459 460 skb_put(skb, sizeof(struct iphdr)); 461 skb_reset_network_header(skb); 462 iph = ip_hdr(skb); 463 iph->daddr = daddr; 464 465 serr = SKB_EXT_ERR(skb); 466 serr->ee.ee_errno = err; 467 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; 468 serr->ee.ee_type = 0; 469 serr->ee.ee_code = 0; 470 serr->ee.ee_pad = 0; 471 serr->ee.ee_info = info; 472 serr->ee.ee_data = 0; 473 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); 474 serr->port = port; 475 476 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 477 skb_reset_transport_header(skb); 478 479 if (sock_queue_err_skb(sk, skb)) 480 kfree_skb(skb); 481 } 482 483 /* For some errors we have valid addr_offset even with zero payload and 484 * zero port. Also, addr_offset should be supported if port is set. 485 */ 486 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) 487 { 488 return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || 489 serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; 490 } 491 492 /* IPv4 supports cmsg on all imcp errors and some timestamps 493 * 494 * Timestamp code paths do not initialize the fields expected by cmsg: 495 * the PKTINFO fields in skb->cb[]. Fill those in here. 496 */ 497 static bool ipv4_datagram_support_cmsg(const struct sock *sk, 498 struct sk_buff *skb, 499 int ee_origin) 500 { 501 struct in_pktinfo *info; 502 503 if (ee_origin == SO_EE_ORIGIN_ICMP) 504 return true; 505 506 if (ee_origin == SO_EE_ORIGIN_LOCAL) 507 return false; 508 509 /* Support IP_PKTINFO on tstamp packets if requested, to correlate 510 * timestamp with egress dev. Not possible for packets without iif 511 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). 512 */ 513 info = PKTINFO_SKB_CB(skb); 514 if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) || 515 !info->ipi_ifindex) 516 return false; 517 518 info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; 519 return true; 520 } 521 522 /* 523 * Handle MSG_ERRQUEUE 524 */ 525 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) 526 { 527 struct sock_exterr_skb *serr; 528 struct sk_buff *skb; 529 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 530 struct { 531 struct sock_extended_err ee; 532 struct sockaddr_in offender; 533 } errhdr; 534 int err; 535 int copied; 536 537 err = -EAGAIN; 538 skb = sock_dequeue_err_skb(sk); 539 if (!skb) 540 goto out; 541 542 copied = skb->len; 543 if (copied > len) { 544 msg->msg_flags |= MSG_TRUNC; 545 copied = len; 546 } 547 err = skb_copy_datagram_msg(skb, 0, msg, copied); 548 if (unlikely(err)) { 549 kfree_skb(skb); 550 return err; 551 } 552 sock_recv_timestamp(msg, sk, skb); 553 554 serr = SKB_EXT_ERR(skb); 555 556 if (sin && ipv4_datagram_support_addr(serr)) { 557 sin->sin_family = AF_INET; 558 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + 559 serr->addr_offset); 560 sin->sin_port = serr->port; 561 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 562 *addr_len = sizeof(*sin); 563 } 564 565 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); 566 sin = &errhdr.offender; 567 memset(sin, 0, sizeof(*sin)); 568 569 if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { 570 sin->sin_family = AF_INET; 571 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 572 if (inet_cmsg_flags(inet_sk(sk))) 573 ip_cmsg_recv(msg, skb); 574 } 575 576 put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr); 577 578 /* Now we could try to dump offended packet options */ 579 580 msg->msg_flags |= MSG_ERRQUEUE; 581 err = copied; 582 583 consume_skb(skb); 584 out: 585 return err; 586 } 587 588 void ip_sock_set_tos(struct sock *sk, int val) 589 { 590 u8 old_tos = READ_ONCE(inet_sk(sk)->tos); 591 592 if (sk->sk_type == SOCK_STREAM) { 593 val &= ~INET_ECN_MASK; 594 val |= old_tos & INET_ECN_MASK; 595 } 596 if (old_tos != val) { 597 WRITE_ONCE(inet_sk(sk)->tos, val); 598 WRITE_ONCE(sk->sk_priority, rt_tos2priority(val)); 599 sk_dst_reset(sk); 600 } 601 } 602 EXPORT_SYMBOL(ip_sock_set_tos); 603 604 void ip_sock_set_freebind(struct sock *sk) 605 { 606 inet_set_bit(FREEBIND, sk); 607 } 608 EXPORT_SYMBOL(ip_sock_set_freebind); 609 610 void ip_sock_set_recverr(struct sock *sk) 611 { 612 inet_set_bit(RECVERR, sk); 613 } 614 EXPORT_SYMBOL(ip_sock_set_recverr); 615 616 int ip_sock_set_mtu_discover(struct sock *sk, int val) 617 { 618 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 619 return -EINVAL; 620 WRITE_ONCE(inet_sk(sk)->pmtudisc, val); 621 return 0; 622 } 623 EXPORT_SYMBOL(ip_sock_set_mtu_discover); 624 625 void ip_sock_set_pktinfo(struct sock *sk) 626 { 627 inet_set_bit(PKTINFO, sk); 628 } 629 EXPORT_SYMBOL(ip_sock_set_pktinfo); 630 631 /* 632 * Socket option code for IP. This is the end of the line after any 633 * TCP,UDP etc options on an IP socket. 634 */ 635 static bool setsockopt_needs_rtnl(int optname) 636 { 637 switch (optname) { 638 case IP_ADD_MEMBERSHIP: 639 case IP_ADD_SOURCE_MEMBERSHIP: 640 case IP_BLOCK_SOURCE: 641 case IP_DROP_MEMBERSHIP: 642 case IP_DROP_SOURCE_MEMBERSHIP: 643 case IP_MSFILTER: 644 case IP_UNBLOCK_SOURCE: 645 case MCAST_BLOCK_SOURCE: 646 case MCAST_MSFILTER: 647 case MCAST_JOIN_GROUP: 648 case MCAST_JOIN_SOURCE_GROUP: 649 case MCAST_LEAVE_GROUP: 650 case MCAST_LEAVE_SOURCE_GROUP: 651 case MCAST_UNBLOCK_SOURCE: 652 return true; 653 } 654 return false; 655 } 656 657 static int set_mcast_msfilter(struct sock *sk, int ifindex, 658 int numsrc, int fmode, 659 struct sockaddr_storage *group, 660 struct sockaddr_storage *list) 661 { 662 struct ip_msfilter *msf; 663 struct sockaddr_in *psin; 664 int err, i; 665 666 msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL); 667 if (!msf) 668 return -ENOBUFS; 669 670 psin = (struct sockaddr_in *)group; 671 if (psin->sin_family != AF_INET) 672 goto Eaddrnotavail; 673 msf->imsf_multiaddr = psin->sin_addr.s_addr; 674 msf->imsf_interface = 0; 675 msf->imsf_fmode = fmode; 676 msf->imsf_numsrc = numsrc; 677 for (i = 0; i < numsrc; ++i) { 678 psin = (struct sockaddr_in *)&list[i]; 679 680 if (psin->sin_family != AF_INET) 681 goto Eaddrnotavail; 682 msf->imsf_slist_flex[i] = psin->sin_addr.s_addr; 683 } 684 err = ip_mc_msfilter(sk, msf, ifindex); 685 kfree(msf); 686 return err; 687 688 Eaddrnotavail: 689 kfree(msf); 690 return -EADDRNOTAVAIL; 691 } 692 693 static int copy_group_source_from_sockptr(struct group_source_req *greqs, 694 sockptr_t optval, int optlen) 695 { 696 if (in_compat_syscall()) { 697 struct compat_group_source_req gr32; 698 699 if (optlen != sizeof(gr32)) 700 return -EINVAL; 701 if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) 702 return -EFAULT; 703 greqs->gsr_interface = gr32.gsr_interface; 704 greqs->gsr_group = gr32.gsr_group; 705 greqs->gsr_source = gr32.gsr_source; 706 } else { 707 if (optlen != sizeof(*greqs)) 708 return -EINVAL; 709 if (copy_from_sockptr(greqs, optval, sizeof(*greqs))) 710 return -EFAULT; 711 } 712 713 return 0; 714 } 715 716 static int do_mcast_group_source(struct sock *sk, int optname, 717 sockptr_t optval, int optlen) 718 { 719 struct group_source_req greqs; 720 struct ip_mreq_source mreqs; 721 struct sockaddr_in *psin; 722 int omode, add, err; 723 724 err = copy_group_source_from_sockptr(&greqs, optval, optlen); 725 if (err) 726 return err; 727 728 if (greqs.gsr_group.ss_family != AF_INET || 729 greqs.gsr_source.ss_family != AF_INET) 730 return -EADDRNOTAVAIL; 731 732 psin = (struct sockaddr_in *)&greqs.gsr_group; 733 mreqs.imr_multiaddr = psin->sin_addr.s_addr; 734 psin = (struct sockaddr_in *)&greqs.gsr_source; 735 mreqs.imr_sourceaddr = psin->sin_addr.s_addr; 736 mreqs.imr_interface = 0; /* use index for mc_source */ 737 738 if (optname == MCAST_BLOCK_SOURCE) { 739 omode = MCAST_EXCLUDE; 740 add = 1; 741 } else if (optname == MCAST_UNBLOCK_SOURCE) { 742 omode = MCAST_EXCLUDE; 743 add = 0; 744 } else if (optname == MCAST_JOIN_SOURCE_GROUP) { 745 struct ip_mreqn mreq; 746 747 psin = (struct sockaddr_in *)&greqs.gsr_group; 748 mreq.imr_multiaddr = psin->sin_addr; 749 mreq.imr_address.s_addr = 0; 750 mreq.imr_ifindex = greqs.gsr_interface; 751 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 752 if (err && err != -EADDRINUSE) 753 return err; 754 greqs.gsr_interface = mreq.imr_ifindex; 755 omode = MCAST_INCLUDE; 756 add = 1; 757 } else /* MCAST_LEAVE_SOURCE_GROUP */ { 758 omode = MCAST_INCLUDE; 759 add = 0; 760 } 761 return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface); 762 } 763 764 static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) 765 { 766 struct group_filter *gsf = NULL; 767 int err; 768 769 if (optlen < GROUP_FILTER_SIZE(0)) 770 return -EINVAL; 771 if (optlen > READ_ONCE(sysctl_optmem_max)) 772 return -ENOBUFS; 773 774 gsf = memdup_sockptr(optval, optlen); 775 if (IS_ERR(gsf)) 776 return PTR_ERR(gsf); 777 778 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 779 err = -ENOBUFS; 780 if (gsf->gf_numsrc >= 0x1ffffff || 781 gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) 782 goto out_free_gsf; 783 784 err = -EINVAL; 785 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) 786 goto out_free_gsf; 787 788 err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, 789 gsf->gf_fmode, &gsf->gf_group, 790 gsf->gf_slist_flex); 791 out_free_gsf: 792 kfree(gsf); 793 return err; 794 } 795 796 static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, 797 int optlen) 798 { 799 const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); 800 struct compat_group_filter *gf32; 801 unsigned int n; 802 void *p; 803 int err; 804 805 if (optlen < size0) 806 return -EINVAL; 807 if (optlen > READ_ONCE(sysctl_optmem_max) - 4) 808 return -ENOBUFS; 809 810 p = kmalloc(optlen + 4, GFP_KERNEL); 811 if (!p) 812 return -ENOMEM; 813 gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ 814 815 err = -EFAULT; 816 if (copy_from_sockptr(gf32, optval, optlen)) 817 goto out_free_gsf; 818 819 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 820 n = gf32->gf_numsrc; 821 err = -ENOBUFS; 822 if (n >= 0x1ffffff) 823 goto out_free_gsf; 824 825 err = -EINVAL; 826 if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) 827 goto out_free_gsf; 828 829 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 830 err = -ENOBUFS; 831 if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) 832 goto out_free_gsf; 833 err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, 834 &gf32->gf_group, gf32->gf_slist_flex); 835 out_free_gsf: 836 kfree(p); 837 return err; 838 } 839 840 static int ip_mcast_join_leave(struct sock *sk, int optname, 841 sockptr_t optval, int optlen) 842 { 843 struct ip_mreqn mreq = { }; 844 struct sockaddr_in *psin; 845 struct group_req greq; 846 847 if (optlen < sizeof(struct group_req)) 848 return -EINVAL; 849 if (copy_from_sockptr(&greq, optval, sizeof(greq))) 850 return -EFAULT; 851 852 psin = (struct sockaddr_in *)&greq.gr_group; 853 if (psin->sin_family != AF_INET) 854 return -EINVAL; 855 mreq.imr_multiaddr = psin->sin_addr; 856 mreq.imr_ifindex = greq.gr_interface; 857 if (optname == MCAST_JOIN_GROUP) 858 return ip_mc_join_group(sk, &mreq); 859 return ip_mc_leave_group(sk, &mreq); 860 } 861 862 static int compat_ip_mcast_join_leave(struct sock *sk, int optname, 863 sockptr_t optval, int optlen) 864 { 865 struct compat_group_req greq; 866 struct ip_mreqn mreq = { }; 867 struct sockaddr_in *psin; 868 869 if (optlen < sizeof(struct compat_group_req)) 870 return -EINVAL; 871 if (copy_from_sockptr(&greq, optval, sizeof(greq))) 872 return -EFAULT; 873 874 psin = (struct sockaddr_in *)&greq.gr_group; 875 if (psin->sin_family != AF_INET) 876 return -EINVAL; 877 mreq.imr_multiaddr = psin->sin_addr; 878 mreq.imr_ifindex = greq.gr_interface; 879 880 if (optname == MCAST_JOIN_GROUP) 881 return ip_mc_join_group(sk, &mreq); 882 return ip_mc_leave_group(sk, &mreq); 883 } 884 885 DEFINE_STATIC_KEY_FALSE(ip4_min_ttl); 886 887 int do_ip_setsockopt(struct sock *sk, int level, int optname, 888 sockptr_t optval, unsigned int optlen) 889 { 890 struct inet_sock *inet = inet_sk(sk); 891 struct net *net = sock_net(sk); 892 int val = 0, err; 893 bool needs_rtnl = setsockopt_needs_rtnl(optname); 894 895 switch (optname) { 896 case IP_PKTINFO: 897 case IP_RECVTTL: 898 case IP_RECVOPTS: 899 case IP_RECVTOS: 900 case IP_RETOPTS: 901 case IP_TOS: 902 case IP_TTL: 903 case IP_HDRINCL: 904 case IP_MTU_DISCOVER: 905 case IP_RECVERR: 906 case IP_ROUTER_ALERT: 907 case IP_FREEBIND: 908 case IP_PASSSEC: 909 case IP_TRANSPARENT: 910 case IP_MINTTL: 911 case IP_NODEFRAG: 912 case IP_BIND_ADDRESS_NO_PORT: 913 case IP_UNICAST_IF: 914 case IP_MULTICAST_TTL: 915 case IP_MULTICAST_ALL: 916 case IP_MULTICAST_LOOP: 917 case IP_RECVORIGDSTADDR: 918 case IP_CHECKSUM: 919 case IP_RECVFRAGSIZE: 920 case IP_RECVERR_RFC4884: 921 case IP_LOCAL_PORT_RANGE: 922 if (optlen >= sizeof(int)) { 923 if (copy_from_sockptr(&val, optval, sizeof(val))) 924 return -EFAULT; 925 } else if (optlen >= sizeof(char)) { 926 unsigned char ucval; 927 928 if (copy_from_sockptr(&ucval, optval, sizeof(ucval))) 929 return -EFAULT; 930 val = (int) ucval; 931 } 932 } 933 934 /* If optlen==0, it is equivalent to val == 0 */ 935 936 if (optname == IP_ROUTER_ALERT) 937 return ip_ra_control(sk, val ? 1 : 0, NULL); 938 if (ip_mroute_opt(optname)) 939 return ip_mroute_setsockopt(sk, optname, optval, optlen); 940 941 /* Handle options that can be set without locking the socket. */ 942 switch (optname) { 943 case IP_PKTINFO: 944 inet_assign_bit(PKTINFO, sk, val); 945 return 0; 946 case IP_RECVTTL: 947 inet_assign_bit(TTL, sk, val); 948 return 0; 949 case IP_RECVTOS: 950 inet_assign_bit(TOS, sk, val); 951 return 0; 952 case IP_RECVOPTS: 953 inet_assign_bit(RECVOPTS, sk, val); 954 return 0; 955 case IP_RETOPTS: 956 inet_assign_bit(RETOPTS, sk, val); 957 return 0; 958 case IP_PASSSEC: 959 inet_assign_bit(PASSSEC, sk, val); 960 return 0; 961 case IP_RECVORIGDSTADDR: 962 inet_assign_bit(ORIGDSTADDR, sk, val); 963 return 0; 964 case IP_RECVFRAGSIZE: 965 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) 966 return -EINVAL; 967 inet_assign_bit(RECVFRAGSIZE, sk, val); 968 return 0; 969 case IP_RECVERR: 970 inet_assign_bit(RECVERR, sk, val); 971 if (!val) 972 skb_errqueue_purge(&sk->sk_error_queue); 973 return 0; 974 case IP_RECVERR_RFC4884: 975 if (val < 0 || val > 1) 976 return -EINVAL; 977 inet_assign_bit(RECVERR_RFC4884, sk, val); 978 return 0; 979 case IP_FREEBIND: 980 if (optlen < 1) 981 return -EINVAL; 982 inet_assign_bit(FREEBIND, sk, val); 983 return 0; 984 case IP_HDRINCL: 985 if (sk->sk_type != SOCK_RAW) 986 return -ENOPROTOOPT; 987 inet_assign_bit(HDRINCL, sk, val); 988 return 0; 989 case IP_MULTICAST_LOOP: 990 if (optlen < 1) 991 return -EINVAL; 992 inet_assign_bit(MC_LOOP, sk, val); 993 return 0; 994 case IP_MULTICAST_ALL: 995 if (optlen < 1) 996 return -EINVAL; 997 if (val != 0 && val != 1) 998 return -EINVAL; 999 inet_assign_bit(MC_ALL, sk, val); 1000 return 0; 1001 case IP_TRANSPARENT: 1002 if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 1003 !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1004 return -EPERM; 1005 if (optlen < 1) 1006 return -EINVAL; 1007 inet_assign_bit(TRANSPARENT, sk, val); 1008 return 0; 1009 case IP_NODEFRAG: 1010 if (sk->sk_type != SOCK_RAW) 1011 return -ENOPROTOOPT; 1012 inet_assign_bit(NODEFRAG, sk, val); 1013 return 0; 1014 case IP_BIND_ADDRESS_NO_PORT: 1015 inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val); 1016 return 0; 1017 case IP_TTL: 1018 if (optlen < 1) 1019 return -EINVAL; 1020 if (val != -1 && (val < 1 || val > 255)) 1021 return -EINVAL; 1022 WRITE_ONCE(inet->uc_ttl, val); 1023 return 0; 1024 case IP_MINTTL: 1025 if (optlen < 1) 1026 return -EINVAL; 1027 if (val < 0 || val > 255) 1028 return -EINVAL; 1029 1030 if (val) 1031 static_branch_enable(&ip4_min_ttl); 1032 1033 WRITE_ONCE(inet->min_ttl, val); 1034 return 0; 1035 case IP_MULTICAST_TTL: 1036 if (sk->sk_type == SOCK_STREAM) 1037 return -EINVAL; 1038 if (optlen < 1) 1039 return -EINVAL; 1040 if (val == -1) 1041 val = 1; 1042 if (val < 0 || val > 255) 1043 return -EINVAL; 1044 WRITE_ONCE(inet->mc_ttl, val); 1045 return 0; 1046 case IP_MTU_DISCOVER: 1047 return ip_sock_set_mtu_discover(sk, val); 1048 case IP_TOS: /* This sets both TOS and Precedence */ 1049 ip_sock_set_tos(sk, val); 1050 return 0; 1051 } 1052 1053 err = 0; 1054 if (needs_rtnl) 1055 rtnl_lock(); 1056 sockopt_lock_sock(sk); 1057 1058 switch (optname) { 1059 case IP_OPTIONS: 1060 { 1061 struct ip_options_rcu *old, *opt = NULL; 1062 1063 if (optlen > 40) 1064 goto e_inval; 1065 err = ip_options_get(sock_net(sk), &opt, optval, optlen); 1066 if (err) 1067 break; 1068 old = rcu_dereference_protected(inet->inet_opt, 1069 lockdep_sock_is_held(sk)); 1070 if (inet_test_bit(IS_ICSK, sk)) { 1071 struct inet_connection_sock *icsk = inet_csk(sk); 1072 #if IS_ENABLED(CONFIG_IPV6) 1073 if (sk->sk_family == PF_INET || 1074 (!((1 << sk->sk_state) & 1075 (TCPF_LISTEN | TCPF_CLOSE)) && 1076 inet->inet_daddr != LOOPBACK4_IPV6)) { 1077 #endif 1078 if (old) 1079 icsk->icsk_ext_hdr_len -= old->opt.optlen; 1080 if (opt) 1081 icsk->icsk_ext_hdr_len += opt->opt.optlen; 1082 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 1083 #if IS_ENABLED(CONFIG_IPV6) 1084 } 1085 #endif 1086 } 1087 rcu_assign_pointer(inet->inet_opt, opt); 1088 if (old) 1089 kfree_rcu(old, rcu); 1090 break; 1091 } 1092 case IP_CHECKSUM: 1093 if (val) { 1094 if (!(inet_test_bit(CHECKSUM, sk))) { 1095 inet_inc_convert_csum(sk); 1096 inet_set_bit(CHECKSUM, sk); 1097 } 1098 } else { 1099 if (inet_test_bit(CHECKSUM, sk)) { 1100 inet_dec_convert_csum(sk); 1101 inet_clear_bit(CHECKSUM, sk); 1102 } 1103 } 1104 break; 1105 case IP_UNICAST_IF: 1106 { 1107 struct net_device *dev = NULL; 1108 int ifindex; 1109 int midx; 1110 1111 if (optlen != sizeof(int)) 1112 goto e_inval; 1113 1114 ifindex = (__force int)ntohl((__force __be32)val); 1115 if (ifindex == 0) { 1116 WRITE_ONCE(inet->uc_index, 0); 1117 err = 0; 1118 break; 1119 } 1120 1121 dev = dev_get_by_index(sock_net(sk), ifindex); 1122 err = -EADDRNOTAVAIL; 1123 if (!dev) 1124 break; 1125 1126 midx = l3mdev_master_ifindex(dev); 1127 dev_put(dev); 1128 1129 err = -EINVAL; 1130 if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if) 1131 break; 1132 1133 WRITE_ONCE(inet->uc_index, ifindex); 1134 err = 0; 1135 break; 1136 } 1137 case IP_MULTICAST_IF: 1138 { 1139 struct ip_mreqn mreq; 1140 struct net_device *dev = NULL; 1141 int midx; 1142 1143 if (sk->sk_type == SOCK_STREAM) 1144 goto e_inval; 1145 /* 1146 * Check the arguments are allowable 1147 */ 1148 1149 if (optlen < sizeof(struct in_addr)) 1150 goto e_inval; 1151 1152 err = -EFAULT; 1153 if (optlen >= sizeof(struct ip_mreqn)) { 1154 if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) 1155 break; 1156 } else { 1157 memset(&mreq, 0, sizeof(mreq)); 1158 if (optlen >= sizeof(struct ip_mreq)) { 1159 if (copy_from_sockptr(&mreq, optval, 1160 sizeof(struct ip_mreq))) 1161 break; 1162 } else if (optlen >= sizeof(struct in_addr)) { 1163 if (copy_from_sockptr(&mreq.imr_address, optval, 1164 sizeof(struct in_addr))) 1165 break; 1166 } 1167 } 1168 1169 if (!mreq.imr_ifindex) { 1170 if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { 1171 WRITE_ONCE(inet->mc_index, 0); 1172 WRITE_ONCE(inet->mc_addr, 0); 1173 err = 0; 1174 break; 1175 } 1176 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); 1177 if (dev) 1178 mreq.imr_ifindex = dev->ifindex; 1179 } else 1180 dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); 1181 1182 1183 err = -EADDRNOTAVAIL; 1184 if (!dev) 1185 break; 1186 1187 midx = l3mdev_master_ifindex(dev); 1188 1189 dev_put(dev); 1190 1191 err = -EINVAL; 1192 if (sk->sk_bound_dev_if && 1193 mreq.imr_ifindex != sk->sk_bound_dev_if && 1194 midx != sk->sk_bound_dev_if) 1195 break; 1196 1197 WRITE_ONCE(inet->mc_index, mreq.imr_ifindex); 1198 WRITE_ONCE(inet->mc_addr, mreq.imr_address.s_addr); 1199 err = 0; 1200 break; 1201 } 1202 1203 case IP_ADD_MEMBERSHIP: 1204 case IP_DROP_MEMBERSHIP: 1205 { 1206 struct ip_mreqn mreq; 1207 1208 err = -EPROTO; 1209 if (inet_test_bit(IS_ICSK, sk)) 1210 break; 1211 1212 if (optlen < sizeof(struct ip_mreq)) 1213 goto e_inval; 1214 err = -EFAULT; 1215 if (optlen >= sizeof(struct ip_mreqn)) { 1216 if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) 1217 break; 1218 } else { 1219 memset(&mreq, 0, sizeof(mreq)); 1220 if (copy_from_sockptr(&mreq, optval, 1221 sizeof(struct ip_mreq))) 1222 break; 1223 } 1224 1225 if (optname == IP_ADD_MEMBERSHIP) 1226 err = ip_mc_join_group(sk, &mreq); 1227 else 1228 err = ip_mc_leave_group(sk, &mreq); 1229 break; 1230 } 1231 case IP_MSFILTER: 1232 { 1233 struct ip_msfilter *msf; 1234 1235 if (optlen < IP_MSFILTER_SIZE(0)) 1236 goto e_inval; 1237 if (optlen > READ_ONCE(sysctl_optmem_max)) { 1238 err = -ENOBUFS; 1239 break; 1240 } 1241 msf = memdup_sockptr(optval, optlen); 1242 if (IS_ERR(msf)) { 1243 err = PTR_ERR(msf); 1244 break; 1245 } 1246 /* numsrc >= (1G-4) overflow in 32 bits */ 1247 if (msf->imsf_numsrc >= 0x3ffffffcU || 1248 msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { 1249 kfree(msf); 1250 err = -ENOBUFS; 1251 break; 1252 } 1253 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { 1254 kfree(msf); 1255 err = -EINVAL; 1256 break; 1257 } 1258 err = ip_mc_msfilter(sk, msf, 0); 1259 kfree(msf); 1260 break; 1261 } 1262 case IP_BLOCK_SOURCE: 1263 case IP_UNBLOCK_SOURCE: 1264 case IP_ADD_SOURCE_MEMBERSHIP: 1265 case IP_DROP_SOURCE_MEMBERSHIP: 1266 { 1267 struct ip_mreq_source mreqs; 1268 int omode, add; 1269 1270 if (optlen != sizeof(struct ip_mreq_source)) 1271 goto e_inval; 1272 if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) { 1273 err = -EFAULT; 1274 break; 1275 } 1276 if (optname == IP_BLOCK_SOURCE) { 1277 omode = MCAST_EXCLUDE; 1278 add = 1; 1279 } else if (optname == IP_UNBLOCK_SOURCE) { 1280 omode = MCAST_EXCLUDE; 1281 add = 0; 1282 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { 1283 struct ip_mreqn mreq; 1284 1285 mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; 1286 mreq.imr_address.s_addr = mreqs.imr_interface; 1287 mreq.imr_ifindex = 0; 1288 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 1289 if (err && err != -EADDRINUSE) 1290 break; 1291 omode = MCAST_INCLUDE; 1292 add = 1; 1293 } else /* IP_DROP_SOURCE_MEMBERSHIP */ { 1294 omode = MCAST_INCLUDE; 1295 add = 0; 1296 } 1297 err = ip_mc_source(add, omode, sk, &mreqs, 0); 1298 break; 1299 } 1300 case MCAST_JOIN_GROUP: 1301 case MCAST_LEAVE_GROUP: 1302 if (in_compat_syscall()) 1303 err = compat_ip_mcast_join_leave(sk, optname, optval, 1304 optlen); 1305 else 1306 err = ip_mcast_join_leave(sk, optname, optval, optlen); 1307 break; 1308 case MCAST_JOIN_SOURCE_GROUP: 1309 case MCAST_LEAVE_SOURCE_GROUP: 1310 case MCAST_BLOCK_SOURCE: 1311 case MCAST_UNBLOCK_SOURCE: 1312 err = do_mcast_group_source(sk, optname, optval, optlen); 1313 break; 1314 case MCAST_MSFILTER: 1315 if (in_compat_syscall()) 1316 err = compat_ip_set_mcast_msfilter(sk, optval, optlen); 1317 else 1318 err = ip_set_mcast_msfilter(sk, optval, optlen); 1319 break; 1320 case IP_IPSEC_POLICY: 1321 case IP_XFRM_POLICY: 1322 err = -EPERM; 1323 if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1324 break; 1325 err = xfrm_user_policy(sk, optname, optval, optlen); 1326 break; 1327 1328 case IP_LOCAL_PORT_RANGE: 1329 { 1330 const __u16 lo = val; 1331 const __u16 hi = val >> 16; 1332 1333 if (optlen != sizeof(__u32)) 1334 goto e_inval; 1335 if (lo != 0 && hi != 0 && lo > hi) 1336 goto e_inval; 1337 1338 inet->local_port_range.lo = lo; 1339 inet->local_port_range.hi = hi; 1340 break; 1341 } 1342 default: 1343 err = -ENOPROTOOPT; 1344 break; 1345 } 1346 sockopt_release_sock(sk); 1347 if (needs_rtnl) 1348 rtnl_unlock(); 1349 return err; 1350 1351 e_inval: 1352 sockopt_release_sock(sk); 1353 if (needs_rtnl) 1354 rtnl_unlock(); 1355 return -EINVAL; 1356 } 1357 1358 /** 1359 * ipv4_pktinfo_prepare - transfer some info from rtable to skb 1360 * @sk: socket 1361 * @skb: buffer 1362 * 1363 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific 1364 * destination in skb->cb[] before dst drop. 1365 * This way, receiver doesn't make cache line misses to read rtable. 1366 */ 1367 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) 1368 { 1369 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); 1370 bool prepare = inet_test_bit(PKTINFO, sk) || 1371 ipv6_sk_rxinfo(sk); 1372 1373 if (prepare && skb_rtable(skb)) { 1374 /* skb->cb is overloaded: prior to this point it is IP{6}CB 1375 * which has interface index (iif) as the first member of the 1376 * underlying inet{6}_skb_parm struct. This code then overlays 1377 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first 1378 * element so the iif is picked up from the prior IPCB. If iif 1379 * is the loopback interface, then return the sending interface 1380 * (e.g., process binds socket to eth0 for Tx which is 1381 * redirected to loopback in the rtable/dst). 1382 */ 1383 struct rtable *rt = skb_rtable(skb); 1384 bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); 1385 1386 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) 1387 pktinfo->ipi_ifindex = inet_iif(skb); 1388 else if (l3slave && rt && rt->rt_iif) 1389 pktinfo->ipi_ifindex = rt->rt_iif; 1390 1391 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); 1392 } else { 1393 pktinfo->ipi_ifindex = 0; 1394 pktinfo->ipi_spec_dst.s_addr = 0; 1395 } 1396 skb_dst_drop(skb); 1397 } 1398 1399 int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, 1400 unsigned int optlen) 1401 { 1402 int err; 1403 1404 if (level != SOL_IP) 1405 return -ENOPROTOOPT; 1406 1407 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1408 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1409 if (optname >= BPFILTER_IPT_SO_SET_REPLACE && 1410 optname < BPFILTER_IPT_SET_MAX) 1411 err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); 1412 #endif 1413 #ifdef CONFIG_NETFILTER 1414 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1415 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1416 optname != IP_IPSEC_POLICY && 1417 optname != IP_XFRM_POLICY && 1418 !ip_mroute_opt(optname)) 1419 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); 1420 #endif 1421 return err; 1422 } 1423 EXPORT_SYMBOL(ip_setsockopt); 1424 1425 /* 1426 * Get the options. Note for future reference. The GET of IP options gets 1427 * the _received_ ones. The set sets the _sent_ ones. 1428 */ 1429 1430 static bool getsockopt_needs_rtnl(int optname) 1431 { 1432 switch (optname) { 1433 case IP_MSFILTER: 1434 case MCAST_MSFILTER: 1435 return true; 1436 } 1437 return false; 1438 } 1439 1440 static int ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval, 1441 sockptr_t optlen, int len) 1442 { 1443 const int size0 = offsetof(struct group_filter, gf_slist_flex); 1444 struct group_filter gsf; 1445 int num, gsf_size; 1446 int err; 1447 1448 if (len < size0) 1449 return -EINVAL; 1450 if (copy_from_sockptr(&gsf, optval, size0)) 1451 return -EFAULT; 1452 1453 num = gsf.gf_numsrc; 1454 err = ip_mc_gsfget(sk, &gsf, optval, 1455 offsetof(struct group_filter, gf_slist_flex)); 1456 if (err) 1457 return err; 1458 if (gsf.gf_numsrc < num) 1459 num = gsf.gf_numsrc; 1460 gsf_size = GROUP_FILTER_SIZE(num); 1461 if (copy_to_sockptr(optlen, &gsf_size, sizeof(int)) || 1462 copy_to_sockptr(optval, &gsf, size0)) 1463 return -EFAULT; 1464 return 0; 1465 } 1466 1467 static int compat_ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval, 1468 sockptr_t optlen, int len) 1469 { 1470 const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); 1471 struct compat_group_filter gf32; 1472 struct group_filter gf; 1473 int num; 1474 int err; 1475 1476 if (len < size0) 1477 return -EINVAL; 1478 if (copy_from_sockptr(&gf32, optval, size0)) 1479 return -EFAULT; 1480 1481 gf.gf_interface = gf32.gf_interface; 1482 gf.gf_fmode = gf32.gf_fmode; 1483 num = gf.gf_numsrc = gf32.gf_numsrc; 1484 gf.gf_group = gf32.gf_group; 1485 1486 err = ip_mc_gsfget(sk, &gf, optval, 1487 offsetof(struct compat_group_filter, gf_slist_flex)); 1488 if (err) 1489 return err; 1490 if (gf.gf_numsrc < num) 1491 num = gf.gf_numsrc; 1492 len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); 1493 if (copy_to_sockptr(optlen, &len, sizeof(int)) || 1494 copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode), 1495 &gf.gf_fmode, sizeof(gf.gf_fmode)) || 1496 copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc), 1497 &gf.gf_numsrc, sizeof(gf.gf_numsrc))) 1498 return -EFAULT; 1499 return 0; 1500 } 1501 1502 int do_ip_getsockopt(struct sock *sk, int level, int optname, 1503 sockptr_t optval, sockptr_t optlen) 1504 { 1505 struct inet_sock *inet = inet_sk(sk); 1506 bool needs_rtnl = getsockopt_needs_rtnl(optname); 1507 int val, err = 0; 1508 int len; 1509 1510 if (level != SOL_IP) 1511 return -EOPNOTSUPP; 1512 1513 if (ip_mroute_opt(optname)) 1514 return ip_mroute_getsockopt(sk, optname, optval, optlen); 1515 1516 if (copy_from_sockptr(&len, optlen, sizeof(int))) 1517 return -EFAULT; 1518 if (len < 0) 1519 return -EINVAL; 1520 1521 /* Handle options that can be read without locking the socket. */ 1522 switch (optname) { 1523 case IP_PKTINFO: 1524 val = inet_test_bit(PKTINFO, sk); 1525 goto copyval; 1526 case IP_RECVTTL: 1527 val = inet_test_bit(TTL, sk); 1528 goto copyval; 1529 case IP_RECVTOS: 1530 val = inet_test_bit(TOS, sk); 1531 goto copyval; 1532 case IP_RECVOPTS: 1533 val = inet_test_bit(RECVOPTS, sk); 1534 goto copyval; 1535 case IP_RETOPTS: 1536 val = inet_test_bit(RETOPTS, sk); 1537 goto copyval; 1538 case IP_PASSSEC: 1539 val = inet_test_bit(PASSSEC, sk); 1540 goto copyval; 1541 case IP_RECVORIGDSTADDR: 1542 val = inet_test_bit(ORIGDSTADDR, sk); 1543 goto copyval; 1544 case IP_CHECKSUM: 1545 val = inet_test_bit(CHECKSUM, sk); 1546 goto copyval; 1547 case IP_RECVFRAGSIZE: 1548 val = inet_test_bit(RECVFRAGSIZE, sk); 1549 goto copyval; 1550 case IP_RECVERR: 1551 val = inet_test_bit(RECVERR, sk); 1552 goto copyval; 1553 case IP_RECVERR_RFC4884: 1554 val = inet_test_bit(RECVERR_RFC4884, sk); 1555 goto copyval; 1556 case IP_FREEBIND: 1557 val = inet_test_bit(FREEBIND, sk); 1558 goto copyval; 1559 case IP_HDRINCL: 1560 val = inet_test_bit(HDRINCL, sk); 1561 goto copyval; 1562 case IP_MULTICAST_LOOP: 1563 val = inet_test_bit(MC_LOOP, sk); 1564 goto copyval; 1565 case IP_MULTICAST_ALL: 1566 val = inet_test_bit(MC_ALL, sk); 1567 goto copyval; 1568 case IP_TRANSPARENT: 1569 val = inet_test_bit(TRANSPARENT, sk); 1570 goto copyval; 1571 case IP_NODEFRAG: 1572 val = inet_test_bit(NODEFRAG, sk); 1573 goto copyval; 1574 case IP_BIND_ADDRESS_NO_PORT: 1575 val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); 1576 goto copyval; 1577 case IP_TTL: 1578 val = READ_ONCE(inet->uc_ttl); 1579 if (val < 0) 1580 val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl); 1581 goto copyval; 1582 case IP_MINTTL: 1583 val = READ_ONCE(inet->min_ttl); 1584 goto copyval; 1585 case IP_MULTICAST_TTL: 1586 val = READ_ONCE(inet->mc_ttl); 1587 goto copyval; 1588 case IP_MTU_DISCOVER: 1589 val = READ_ONCE(inet->pmtudisc); 1590 goto copyval; 1591 case IP_TOS: 1592 val = READ_ONCE(inet->tos); 1593 goto copyval; 1594 case IP_OPTIONS: 1595 { 1596 unsigned char optbuf[sizeof(struct ip_options)+40]; 1597 struct ip_options *opt = (struct ip_options *)optbuf; 1598 struct ip_options_rcu *inet_opt; 1599 1600 rcu_read_lock(); 1601 inet_opt = rcu_dereference(inet->inet_opt); 1602 opt->optlen = 0; 1603 if (inet_opt) 1604 memcpy(optbuf, &inet_opt->opt, 1605 sizeof(struct ip_options) + 1606 inet_opt->opt.optlen); 1607 rcu_read_unlock(); 1608 1609 if (opt->optlen == 0) { 1610 len = 0; 1611 return copy_to_sockptr(optlen, &len, sizeof(int)); 1612 } 1613 1614 ip_options_undo(opt); 1615 1616 len = min_t(unsigned int, len, opt->optlen); 1617 if (copy_to_sockptr(optlen, &len, sizeof(int))) 1618 return -EFAULT; 1619 if (copy_to_sockptr(optval, opt->__data, len)) 1620 return -EFAULT; 1621 return 0; 1622 } 1623 case IP_MTU: 1624 { 1625 struct dst_entry *dst; 1626 val = 0; 1627 dst = sk_dst_get(sk); 1628 if (dst) { 1629 val = dst_mtu(dst); 1630 dst_release(dst); 1631 } 1632 if (!val) 1633 return -ENOTCONN; 1634 goto copyval; 1635 } 1636 case IP_PKTOPTIONS: 1637 { 1638 struct msghdr msg; 1639 1640 if (sk->sk_type != SOCK_STREAM) 1641 return -ENOPROTOOPT; 1642 1643 if (optval.is_kernel) { 1644 msg.msg_control_is_user = false; 1645 msg.msg_control = optval.kernel; 1646 } else { 1647 msg.msg_control_is_user = true; 1648 msg.msg_control_user = optval.user; 1649 } 1650 msg.msg_controllen = len; 1651 msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; 1652 1653 if (inet_test_bit(PKTINFO, sk)) { 1654 struct in_pktinfo info; 1655 1656 info.ipi_addr.s_addr = READ_ONCE(inet->inet_rcv_saddr); 1657 info.ipi_spec_dst.s_addr = READ_ONCE(inet->inet_rcv_saddr); 1658 info.ipi_ifindex = READ_ONCE(inet->mc_index); 1659 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1660 } 1661 if (inet_test_bit(TTL, sk)) { 1662 int hlim = READ_ONCE(inet->mc_ttl); 1663 1664 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); 1665 } 1666 if (inet_test_bit(TOS, sk)) { 1667 int tos = READ_ONCE(inet->rcv_tos); 1668 put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); 1669 } 1670 len -= msg.msg_controllen; 1671 return copy_to_sockptr(optlen, &len, sizeof(int)); 1672 } 1673 case IP_UNICAST_IF: 1674 val = (__force int)htonl((__u32) READ_ONCE(inet->uc_index)); 1675 goto copyval; 1676 case IP_MULTICAST_IF: 1677 { 1678 struct in_addr addr; 1679 len = min_t(unsigned int, len, sizeof(struct in_addr)); 1680 addr.s_addr = READ_ONCE(inet->mc_addr); 1681 1682 if (copy_to_sockptr(optlen, &len, sizeof(int))) 1683 return -EFAULT; 1684 if (copy_to_sockptr(optval, &addr, len)) 1685 return -EFAULT; 1686 return 0; 1687 } 1688 } 1689 1690 if (needs_rtnl) 1691 rtnl_lock(); 1692 sockopt_lock_sock(sk); 1693 1694 switch (optname) { 1695 case IP_MSFILTER: 1696 { 1697 struct ip_msfilter msf; 1698 1699 if (len < IP_MSFILTER_SIZE(0)) { 1700 err = -EINVAL; 1701 goto out; 1702 } 1703 if (copy_from_sockptr(&msf, optval, IP_MSFILTER_SIZE(0))) { 1704 err = -EFAULT; 1705 goto out; 1706 } 1707 err = ip_mc_msfget(sk, &msf, optval, optlen); 1708 goto out; 1709 } 1710 case MCAST_MSFILTER: 1711 if (in_compat_syscall()) 1712 err = compat_ip_get_mcast_msfilter(sk, optval, optlen, 1713 len); 1714 else 1715 err = ip_get_mcast_msfilter(sk, optval, optlen, len); 1716 goto out; 1717 case IP_LOCAL_PORT_RANGE: 1718 val = inet->local_port_range.hi << 16 | inet->local_port_range.lo; 1719 break; 1720 case IP_PROTOCOL: 1721 val = inet_sk(sk)->inet_num; 1722 break; 1723 default: 1724 sockopt_release_sock(sk); 1725 return -ENOPROTOOPT; 1726 } 1727 sockopt_release_sock(sk); 1728 copyval: 1729 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1730 unsigned char ucval = (unsigned char)val; 1731 len = 1; 1732 if (copy_to_sockptr(optlen, &len, sizeof(int))) 1733 return -EFAULT; 1734 if (copy_to_sockptr(optval, &ucval, 1)) 1735 return -EFAULT; 1736 } else { 1737 len = min_t(unsigned int, sizeof(int), len); 1738 if (copy_to_sockptr(optlen, &len, sizeof(int))) 1739 return -EFAULT; 1740 if (copy_to_sockptr(optval, &val, len)) 1741 return -EFAULT; 1742 } 1743 return 0; 1744 1745 out: 1746 sockopt_release_sock(sk); 1747 if (needs_rtnl) 1748 rtnl_unlock(); 1749 return err; 1750 } 1751 1752 int ip_getsockopt(struct sock *sk, int level, 1753 int optname, char __user *optval, int __user *optlen) 1754 { 1755 int err; 1756 1757 err = do_ip_getsockopt(sk, level, optname, 1758 USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); 1759 1760 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1761 if (optname >= BPFILTER_IPT_SO_GET_INFO && 1762 optname < BPFILTER_IPT_GET_MAX) 1763 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1764 #endif 1765 #ifdef CONFIG_NETFILTER 1766 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1767 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1768 !ip_mroute_opt(optname)) { 1769 int len; 1770 1771 if (get_user(len, optlen)) 1772 return -EFAULT; 1773 1774 err = nf_getsockopt(sk, PF_INET, optname, optval, &len); 1775 if (err >= 0) 1776 err = put_user(len, optlen); 1777 return err; 1778 } 1779 #endif 1780 return err; 1781 } 1782 EXPORT_SYMBOL(ip_getsockopt); 1783