1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * The IP to API glue. 8 * 9 * Authors: see ip.c 10 * 11 * Fixes: 12 * Many : Split from ip.c , see ip.c for history. 13 * Martin Mares : TOS setting fixed. 14 * Alan Cox : Fixed a couple of oopses in Martin's 15 * TOS tweaks. 16 * Mike McLagan : Routing by source 17 */ 18 19 #include <linux/module.h> 20 #include <linux/types.h> 21 #include <linux/mm.h> 22 #include <linux/skbuff.h> 23 #include <linux/ip.h> 24 #include <linux/icmp.h> 25 #include <linux/inetdevice.h> 26 #include <linux/netdevice.h> 27 #include <linux/slab.h> 28 #include <net/sock.h> 29 #include <net/ip.h> 30 #include <net/icmp.h> 31 #include <net/tcp_states.h> 32 #include <linux/udp.h> 33 #include <linux/igmp.h> 34 #include <linux/netfilter.h> 35 #include <linux/route.h> 36 #include <linux/mroute.h> 37 #include <net/inet_ecn.h> 38 #include <net/route.h> 39 #include <net/xfrm.h> 40 #include <net/compat.h> 41 #include <net/checksum.h> 42 #if IS_ENABLED(CONFIG_IPV6) 43 #include <net/transp_v6.h> 44 #endif 45 #include <net/ip_fib.h> 46 47 #include <linux/errqueue.h> 48 #include <linux/uaccess.h> 49 50 #include <linux/bpfilter.h> 51 52 /* 53 * SOL_IP control messages. 54 */ 55 56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 57 { 58 struct in_pktinfo info = *PKTINFO_SKB_CB(skb); 59 60 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 61 62 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 63 } 64 65 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) 66 { 67 int ttl = ip_hdr(skb)->ttl; 68 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); 69 } 70 71 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) 72 { 73 put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); 74 } 75 76 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) 77 { 78 if (IPCB(skb)->opt.optlen == 0) 79 return; 80 81 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, 82 ip_hdr(skb) + 1); 83 } 84 85 86 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, 87 struct sk_buff *skb) 88 { 89 unsigned char optbuf[sizeof(struct ip_options) + 40]; 90 struct ip_options *opt = (struct ip_options *)optbuf; 91 92 if (IPCB(skb)->opt.optlen == 0) 93 return; 94 95 if (ip_options_echo(net, opt, skb)) { 96 msg->msg_flags |= MSG_CTRUNC; 97 return; 98 } 99 ip_options_undo(opt); 100 101 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); 102 } 103 104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) 105 { 106 int val; 107 108 if (IPCB(skb)->frag_max_size == 0) 109 return; 110 111 val = IPCB(skb)->frag_max_size; 112 put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); 113 } 114 115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, 116 int tlen, int offset) 117 { 118 __wsum csum = skb->csum; 119 120 if (skb->ip_summed != CHECKSUM_COMPLETE) 121 return; 122 123 if (offset != 0) { 124 int tend_off = skb_transport_offset(skb) + tlen; 125 csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0)); 126 } 127 128 put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); 129 } 130 131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) 132 { 133 char *secdata; 134 u32 seclen, secid; 135 int err; 136 137 err = security_socket_getpeersec_dgram(NULL, skb, &secid); 138 if (err) 139 return; 140 141 err = security_secid_to_secctx(secid, &secdata, &seclen); 142 if (err) 143 return; 144 145 put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); 146 security_release_secctx(secdata, seclen); 147 } 148 149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 150 { 151 __be16 _ports[2], *ports; 152 struct sockaddr_in sin; 153 154 /* All current transport protocols have the port numbers in the 155 * first four bytes of the transport header and this function is 156 * written with this assumption in mind. 157 */ 158 ports = skb_header_pointer(skb, skb_transport_offset(skb), 159 sizeof(_ports), &_ports); 160 if (!ports) 161 return; 162 163 sin.sin_family = AF_INET; 164 sin.sin_addr.s_addr = ip_hdr(skb)->daddr; 165 sin.sin_port = ports[1]; 166 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 167 168 put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); 169 } 170 171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, 172 struct sk_buff *skb, int tlen, int offset) 173 { 174 struct inet_sock *inet = inet_sk(sk); 175 unsigned int flags = inet->cmsg_flags; 176 177 /* Ordered by supposed usage frequency */ 178 if (flags & IP_CMSG_PKTINFO) { 179 ip_cmsg_recv_pktinfo(msg, skb); 180 181 flags &= ~IP_CMSG_PKTINFO; 182 if (!flags) 183 return; 184 } 185 186 if (flags & IP_CMSG_TTL) { 187 ip_cmsg_recv_ttl(msg, skb); 188 189 flags &= ~IP_CMSG_TTL; 190 if (!flags) 191 return; 192 } 193 194 if (flags & IP_CMSG_TOS) { 195 ip_cmsg_recv_tos(msg, skb); 196 197 flags &= ~IP_CMSG_TOS; 198 if (!flags) 199 return; 200 } 201 202 if (flags & IP_CMSG_RECVOPTS) { 203 ip_cmsg_recv_opts(msg, skb); 204 205 flags &= ~IP_CMSG_RECVOPTS; 206 if (!flags) 207 return; 208 } 209 210 if (flags & IP_CMSG_RETOPTS) { 211 ip_cmsg_recv_retopts(sock_net(sk), msg, skb); 212 213 flags &= ~IP_CMSG_RETOPTS; 214 if (!flags) 215 return; 216 } 217 218 if (flags & IP_CMSG_PASSSEC) { 219 ip_cmsg_recv_security(msg, skb); 220 221 flags &= ~IP_CMSG_PASSSEC; 222 if (!flags) 223 return; 224 } 225 226 if (flags & IP_CMSG_ORIGDSTADDR) { 227 ip_cmsg_recv_dstaddr(msg, skb); 228 229 flags &= ~IP_CMSG_ORIGDSTADDR; 230 if (!flags) 231 return; 232 } 233 234 if (flags & IP_CMSG_CHECKSUM) 235 ip_cmsg_recv_checksum(msg, skb, tlen, offset); 236 237 if (flags & IP_CMSG_RECVFRAGSIZE) 238 ip_cmsg_recv_fragsize(msg, skb); 239 } 240 EXPORT_SYMBOL(ip_cmsg_recv_offset); 241 242 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, 243 bool allow_ipv6) 244 { 245 int err, val; 246 struct cmsghdr *cmsg; 247 struct net *net = sock_net(sk); 248 249 for_each_cmsghdr(cmsg, msg) { 250 if (!CMSG_OK(msg, cmsg)) 251 return -EINVAL; 252 #if IS_ENABLED(CONFIG_IPV6) 253 if (allow_ipv6 && 254 cmsg->cmsg_level == SOL_IPV6 && 255 cmsg->cmsg_type == IPV6_PKTINFO) { 256 struct in6_pktinfo *src_info; 257 258 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) 259 return -EINVAL; 260 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 261 if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) 262 return -EINVAL; 263 if (src_info->ipi6_ifindex) 264 ipc->oif = src_info->ipi6_ifindex; 265 ipc->addr = src_info->ipi6_addr.s6_addr32[3]; 266 continue; 267 } 268 #endif 269 if (cmsg->cmsg_level == SOL_SOCKET) { 270 err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); 271 if (err) 272 return err; 273 continue; 274 } 275 276 if (cmsg->cmsg_level != SOL_IP) 277 continue; 278 switch (cmsg->cmsg_type) { 279 case IP_RETOPTS: 280 err = cmsg->cmsg_len - sizeof(struct cmsghdr); 281 282 /* Our caller is responsible for freeing ipc->opt */ 283 err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), 284 err < 40 ? err : 40); 285 if (err) 286 return err; 287 break; 288 case IP_PKTINFO: 289 { 290 struct in_pktinfo *info; 291 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 292 return -EINVAL; 293 info = (struct in_pktinfo *)CMSG_DATA(cmsg); 294 if (info->ipi_ifindex) 295 ipc->oif = info->ipi_ifindex; 296 ipc->addr = info->ipi_spec_dst.s_addr; 297 break; 298 } 299 case IP_TTL: 300 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 301 return -EINVAL; 302 val = *(int *)CMSG_DATA(cmsg); 303 if (val < 1 || val > 255) 304 return -EINVAL; 305 ipc->ttl = val; 306 break; 307 case IP_TOS: 308 if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) 309 val = *(int *)CMSG_DATA(cmsg); 310 else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) 311 val = *(u8 *)CMSG_DATA(cmsg); 312 else 313 return -EINVAL; 314 if (val < 0 || val > 255) 315 return -EINVAL; 316 ipc->tos = val; 317 ipc->priority = rt_tos2priority(ipc->tos); 318 break; 319 320 default: 321 return -EINVAL; 322 } 323 } 324 return 0; 325 } 326 327 static void ip_ra_destroy_rcu(struct rcu_head *head) 328 { 329 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); 330 331 sock_put(ra->saved_sk); 332 kfree(ra); 333 } 334 335 int ip_ra_control(struct sock *sk, unsigned char on, 336 void (*destructor)(struct sock *)) 337 { 338 struct ip_ra_chain *ra, *new_ra; 339 struct ip_ra_chain __rcu **rap; 340 struct net *net = sock_net(sk); 341 342 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 343 return -EINVAL; 344 345 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 346 if (on && !new_ra) 347 return -ENOMEM; 348 349 mutex_lock(&net->ipv4.ra_mutex); 350 for (rap = &net->ipv4.ra_chain; 351 (ra = rcu_dereference_protected(*rap, 352 lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; 353 rap = &ra->next) { 354 if (ra->sk == sk) { 355 if (on) { 356 mutex_unlock(&net->ipv4.ra_mutex); 357 kfree(new_ra); 358 return -EADDRINUSE; 359 } 360 /* dont let ip_call_ra_chain() use sk again */ 361 ra->sk = NULL; 362 RCU_INIT_POINTER(*rap, ra->next); 363 mutex_unlock(&net->ipv4.ra_mutex); 364 365 if (ra->destructor) 366 ra->destructor(sk); 367 /* 368 * Delay sock_put(sk) and kfree(ra) after one rcu grace 369 * period. This guarantee ip_call_ra_chain() dont need 370 * to mess with socket refcounts. 371 */ 372 ra->saved_sk = sk; 373 call_rcu(&ra->rcu, ip_ra_destroy_rcu); 374 return 0; 375 } 376 } 377 if (!new_ra) { 378 mutex_unlock(&net->ipv4.ra_mutex); 379 return -ENOBUFS; 380 } 381 new_ra->sk = sk; 382 new_ra->destructor = destructor; 383 384 RCU_INIT_POINTER(new_ra->next, ra); 385 rcu_assign_pointer(*rap, new_ra); 386 sock_hold(sk); 387 mutex_unlock(&net->ipv4.ra_mutex); 388 389 return 0; 390 } 391 392 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 393 __be16 port, u32 info, u8 *payload) 394 { 395 struct sock_exterr_skb *serr; 396 397 skb = skb_clone(skb, GFP_ATOMIC); 398 if (!skb) 399 return; 400 401 serr = SKB_EXT_ERR(skb); 402 serr->ee.ee_errno = err; 403 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; 404 serr->ee.ee_type = icmp_hdr(skb)->type; 405 serr->ee.ee_code = icmp_hdr(skb)->code; 406 serr->ee.ee_pad = 0; 407 serr->ee.ee_info = info; 408 serr->ee.ee_data = 0; 409 serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - 410 skb_network_header(skb); 411 serr->port = port; 412 413 if (skb_pull(skb, payload - skb->data)) { 414 skb_reset_transport_header(skb); 415 if (sock_queue_err_skb(sk, skb) == 0) 416 return; 417 } 418 kfree_skb(skb); 419 } 420 421 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) 422 { 423 struct inet_sock *inet = inet_sk(sk); 424 struct sock_exterr_skb *serr; 425 struct iphdr *iph; 426 struct sk_buff *skb; 427 428 if (!inet->recverr) 429 return; 430 431 skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); 432 if (!skb) 433 return; 434 435 skb_put(skb, sizeof(struct iphdr)); 436 skb_reset_network_header(skb); 437 iph = ip_hdr(skb); 438 iph->daddr = daddr; 439 440 serr = SKB_EXT_ERR(skb); 441 serr->ee.ee_errno = err; 442 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; 443 serr->ee.ee_type = 0; 444 serr->ee.ee_code = 0; 445 serr->ee.ee_pad = 0; 446 serr->ee.ee_info = info; 447 serr->ee.ee_data = 0; 448 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); 449 serr->port = port; 450 451 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 452 skb_reset_transport_header(skb); 453 454 if (sock_queue_err_skb(sk, skb)) 455 kfree_skb(skb); 456 } 457 458 /* For some errors we have valid addr_offset even with zero payload and 459 * zero port. Also, addr_offset should be supported if port is set. 460 */ 461 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) 462 { 463 return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || 464 serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; 465 } 466 467 /* IPv4 supports cmsg on all imcp errors and some timestamps 468 * 469 * Timestamp code paths do not initialize the fields expected by cmsg: 470 * the PKTINFO fields in skb->cb[]. Fill those in here. 471 */ 472 static bool ipv4_datagram_support_cmsg(const struct sock *sk, 473 struct sk_buff *skb, 474 int ee_origin) 475 { 476 struct in_pktinfo *info; 477 478 if (ee_origin == SO_EE_ORIGIN_ICMP) 479 return true; 480 481 if (ee_origin == SO_EE_ORIGIN_LOCAL) 482 return false; 483 484 /* Support IP_PKTINFO on tstamp packets if requested, to correlate 485 * timestamp with egress dev. Not possible for packets without iif 486 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). 487 */ 488 info = PKTINFO_SKB_CB(skb); 489 if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || 490 !info->ipi_ifindex) 491 return false; 492 493 info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; 494 return true; 495 } 496 497 /* 498 * Handle MSG_ERRQUEUE 499 */ 500 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) 501 { 502 struct sock_exterr_skb *serr; 503 struct sk_buff *skb; 504 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 505 struct { 506 struct sock_extended_err ee; 507 struct sockaddr_in offender; 508 } errhdr; 509 int err; 510 int copied; 511 512 err = -EAGAIN; 513 skb = sock_dequeue_err_skb(sk); 514 if (!skb) 515 goto out; 516 517 copied = skb->len; 518 if (copied > len) { 519 msg->msg_flags |= MSG_TRUNC; 520 copied = len; 521 } 522 err = skb_copy_datagram_msg(skb, 0, msg, copied); 523 if (unlikely(err)) { 524 kfree_skb(skb); 525 return err; 526 } 527 sock_recv_timestamp(msg, sk, skb); 528 529 serr = SKB_EXT_ERR(skb); 530 531 if (sin && ipv4_datagram_support_addr(serr)) { 532 sin->sin_family = AF_INET; 533 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + 534 serr->addr_offset); 535 sin->sin_port = serr->port; 536 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 537 *addr_len = sizeof(*sin); 538 } 539 540 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); 541 sin = &errhdr.offender; 542 memset(sin, 0, sizeof(*sin)); 543 544 if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { 545 sin->sin_family = AF_INET; 546 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 547 if (inet_sk(sk)->cmsg_flags) 548 ip_cmsg_recv(msg, skb); 549 } 550 551 put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr); 552 553 /* Now we could try to dump offended packet options */ 554 555 msg->msg_flags |= MSG_ERRQUEUE; 556 err = copied; 557 558 consume_skb(skb); 559 out: 560 return err; 561 } 562 563 564 /* 565 * Socket option code for IP. This is the end of the line after any 566 * TCP,UDP etc options on an IP socket. 567 */ 568 static bool setsockopt_needs_rtnl(int optname) 569 { 570 switch (optname) { 571 case IP_ADD_MEMBERSHIP: 572 case IP_ADD_SOURCE_MEMBERSHIP: 573 case IP_BLOCK_SOURCE: 574 case IP_DROP_MEMBERSHIP: 575 case IP_DROP_SOURCE_MEMBERSHIP: 576 case IP_MSFILTER: 577 case IP_UNBLOCK_SOURCE: 578 case MCAST_BLOCK_SOURCE: 579 case MCAST_MSFILTER: 580 case MCAST_JOIN_GROUP: 581 case MCAST_JOIN_SOURCE_GROUP: 582 case MCAST_LEAVE_GROUP: 583 case MCAST_LEAVE_SOURCE_GROUP: 584 case MCAST_UNBLOCK_SOURCE: 585 return true; 586 } 587 return false; 588 } 589 590 static int set_mcast_msfilter(struct sock *sk, int ifindex, 591 int numsrc, int fmode, 592 struct sockaddr_storage *group, 593 struct sockaddr_storage *list) 594 { 595 int msize = IP_MSFILTER_SIZE(numsrc); 596 struct ip_msfilter *msf; 597 struct sockaddr_in *psin; 598 int err, i; 599 600 msf = kmalloc(msize, GFP_KERNEL); 601 if (!msf) 602 return -ENOBUFS; 603 604 psin = (struct sockaddr_in *)group; 605 if (psin->sin_family != AF_INET) 606 goto Eaddrnotavail; 607 msf->imsf_multiaddr = psin->sin_addr.s_addr; 608 msf->imsf_interface = 0; 609 msf->imsf_fmode = fmode; 610 msf->imsf_numsrc = numsrc; 611 for (i = 0; i < numsrc; ++i) { 612 psin = (struct sockaddr_in *)&list[i]; 613 614 if (psin->sin_family != AF_INET) 615 goto Eaddrnotavail; 616 msf->imsf_slist[i] = psin->sin_addr.s_addr; 617 } 618 err = ip_mc_msfilter(sk, msf, ifindex); 619 kfree(msf); 620 return err; 621 622 Eaddrnotavail: 623 kfree(msf); 624 return -EADDRNOTAVAIL; 625 } 626 627 static int do_mcast_group_source(struct sock *sk, int optname, 628 struct group_source_req *greqs) 629 { 630 struct ip_mreq_source mreqs; 631 struct sockaddr_in *psin; 632 int omode, add, err; 633 634 if (greqs->gsr_group.ss_family != AF_INET || 635 greqs->gsr_source.ss_family != AF_INET) 636 return -EADDRNOTAVAIL; 637 638 psin = (struct sockaddr_in *)&greqs->gsr_group; 639 mreqs.imr_multiaddr = psin->sin_addr.s_addr; 640 psin = (struct sockaddr_in *)&greqs->gsr_source; 641 mreqs.imr_sourceaddr = psin->sin_addr.s_addr; 642 mreqs.imr_interface = 0; /* use index for mc_source */ 643 644 if (optname == MCAST_BLOCK_SOURCE) { 645 omode = MCAST_EXCLUDE; 646 add = 1; 647 } else if (optname == MCAST_UNBLOCK_SOURCE) { 648 omode = MCAST_EXCLUDE; 649 add = 0; 650 } else if (optname == MCAST_JOIN_SOURCE_GROUP) { 651 struct ip_mreqn mreq; 652 653 psin = (struct sockaddr_in *)&greqs->gsr_group; 654 mreq.imr_multiaddr = psin->sin_addr; 655 mreq.imr_address.s_addr = 0; 656 mreq.imr_ifindex = greqs->gsr_interface; 657 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 658 if (err && err != -EADDRINUSE) 659 return err; 660 greqs->gsr_interface = mreq.imr_ifindex; 661 omode = MCAST_INCLUDE; 662 add = 1; 663 } else /* MCAST_LEAVE_SOURCE_GROUP */ { 664 omode = MCAST_INCLUDE; 665 add = 0; 666 } 667 return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); 668 } 669 670 static int do_ip_setsockopt(struct sock *sk, int level, 671 int optname, char __user *optval, unsigned int optlen) 672 { 673 struct inet_sock *inet = inet_sk(sk); 674 struct net *net = sock_net(sk); 675 int val = 0, err; 676 bool needs_rtnl = setsockopt_needs_rtnl(optname); 677 678 switch (optname) { 679 case IP_PKTINFO: 680 case IP_RECVTTL: 681 case IP_RECVOPTS: 682 case IP_RECVTOS: 683 case IP_RETOPTS: 684 case IP_TOS: 685 case IP_TTL: 686 case IP_HDRINCL: 687 case IP_MTU_DISCOVER: 688 case IP_RECVERR: 689 case IP_ROUTER_ALERT: 690 case IP_FREEBIND: 691 case IP_PASSSEC: 692 case IP_TRANSPARENT: 693 case IP_MINTTL: 694 case IP_NODEFRAG: 695 case IP_BIND_ADDRESS_NO_PORT: 696 case IP_UNICAST_IF: 697 case IP_MULTICAST_TTL: 698 case IP_MULTICAST_ALL: 699 case IP_MULTICAST_LOOP: 700 case IP_RECVORIGDSTADDR: 701 case IP_CHECKSUM: 702 case IP_RECVFRAGSIZE: 703 if (optlen >= sizeof(int)) { 704 if (get_user(val, (int __user *) optval)) 705 return -EFAULT; 706 } else if (optlen >= sizeof(char)) { 707 unsigned char ucval; 708 709 if (get_user(ucval, (unsigned char __user *) optval)) 710 return -EFAULT; 711 val = (int) ucval; 712 } 713 } 714 715 /* If optlen==0, it is equivalent to val == 0 */ 716 717 if (optname == IP_ROUTER_ALERT) 718 return ip_ra_control(sk, val ? 1 : 0, NULL); 719 if (ip_mroute_opt(optname)) 720 return ip_mroute_setsockopt(sk, optname, optval, optlen); 721 722 err = 0; 723 if (needs_rtnl) 724 rtnl_lock(); 725 lock_sock(sk); 726 727 switch (optname) { 728 case IP_OPTIONS: 729 { 730 struct ip_options_rcu *old, *opt = NULL; 731 732 if (optlen > 40) 733 goto e_inval; 734 err = ip_options_get_from_user(sock_net(sk), &opt, 735 optval, optlen); 736 if (err) 737 break; 738 old = rcu_dereference_protected(inet->inet_opt, 739 lockdep_sock_is_held(sk)); 740 if (inet->is_icsk) { 741 struct inet_connection_sock *icsk = inet_csk(sk); 742 #if IS_ENABLED(CONFIG_IPV6) 743 if (sk->sk_family == PF_INET || 744 (!((1 << sk->sk_state) & 745 (TCPF_LISTEN | TCPF_CLOSE)) && 746 inet->inet_daddr != LOOPBACK4_IPV6)) { 747 #endif 748 if (old) 749 icsk->icsk_ext_hdr_len -= old->opt.optlen; 750 if (opt) 751 icsk->icsk_ext_hdr_len += opt->opt.optlen; 752 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 753 #if IS_ENABLED(CONFIG_IPV6) 754 } 755 #endif 756 } 757 rcu_assign_pointer(inet->inet_opt, opt); 758 if (old) 759 kfree_rcu(old, rcu); 760 break; 761 } 762 case IP_PKTINFO: 763 if (val) 764 inet->cmsg_flags |= IP_CMSG_PKTINFO; 765 else 766 inet->cmsg_flags &= ~IP_CMSG_PKTINFO; 767 break; 768 case IP_RECVTTL: 769 if (val) 770 inet->cmsg_flags |= IP_CMSG_TTL; 771 else 772 inet->cmsg_flags &= ~IP_CMSG_TTL; 773 break; 774 case IP_RECVTOS: 775 if (val) 776 inet->cmsg_flags |= IP_CMSG_TOS; 777 else 778 inet->cmsg_flags &= ~IP_CMSG_TOS; 779 break; 780 case IP_RECVOPTS: 781 if (val) 782 inet->cmsg_flags |= IP_CMSG_RECVOPTS; 783 else 784 inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; 785 break; 786 case IP_RETOPTS: 787 if (val) 788 inet->cmsg_flags |= IP_CMSG_RETOPTS; 789 else 790 inet->cmsg_flags &= ~IP_CMSG_RETOPTS; 791 break; 792 case IP_PASSSEC: 793 if (val) 794 inet->cmsg_flags |= IP_CMSG_PASSSEC; 795 else 796 inet->cmsg_flags &= ~IP_CMSG_PASSSEC; 797 break; 798 case IP_RECVORIGDSTADDR: 799 if (val) 800 inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; 801 else 802 inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; 803 break; 804 case IP_CHECKSUM: 805 if (val) { 806 if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { 807 inet_inc_convert_csum(sk); 808 inet->cmsg_flags |= IP_CMSG_CHECKSUM; 809 } 810 } else { 811 if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { 812 inet_dec_convert_csum(sk); 813 inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; 814 } 815 } 816 break; 817 case IP_RECVFRAGSIZE: 818 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) 819 goto e_inval; 820 if (val) 821 inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; 822 else 823 inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; 824 break; 825 case IP_TOS: /* This sets both TOS and Precedence */ 826 if (sk->sk_type == SOCK_STREAM) { 827 val &= ~INET_ECN_MASK; 828 val |= inet->tos & INET_ECN_MASK; 829 } 830 if (inet->tos != val) { 831 inet->tos = val; 832 sk->sk_priority = rt_tos2priority(val); 833 sk_dst_reset(sk); 834 } 835 break; 836 case IP_TTL: 837 if (optlen < 1) 838 goto e_inval; 839 if (val != -1 && (val < 1 || val > 255)) 840 goto e_inval; 841 inet->uc_ttl = val; 842 break; 843 case IP_HDRINCL: 844 if (sk->sk_type != SOCK_RAW) { 845 err = -ENOPROTOOPT; 846 break; 847 } 848 inet->hdrincl = val ? 1 : 0; 849 break; 850 case IP_NODEFRAG: 851 if (sk->sk_type != SOCK_RAW) { 852 err = -ENOPROTOOPT; 853 break; 854 } 855 inet->nodefrag = val ? 1 : 0; 856 break; 857 case IP_BIND_ADDRESS_NO_PORT: 858 inet->bind_address_no_port = val ? 1 : 0; 859 break; 860 case IP_MTU_DISCOVER: 861 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 862 goto e_inval; 863 inet->pmtudisc = val; 864 break; 865 case IP_RECVERR: 866 inet->recverr = !!val; 867 if (!val) 868 skb_queue_purge(&sk->sk_error_queue); 869 break; 870 case IP_MULTICAST_TTL: 871 if (sk->sk_type == SOCK_STREAM) 872 goto e_inval; 873 if (optlen < 1) 874 goto e_inval; 875 if (val == -1) 876 val = 1; 877 if (val < 0 || val > 255) 878 goto e_inval; 879 inet->mc_ttl = val; 880 break; 881 case IP_MULTICAST_LOOP: 882 if (optlen < 1) 883 goto e_inval; 884 inet->mc_loop = !!val; 885 break; 886 case IP_UNICAST_IF: 887 { 888 struct net_device *dev = NULL; 889 int ifindex; 890 int midx; 891 892 if (optlen != sizeof(int)) 893 goto e_inval; 894 895 ifindex = (__force int)ntohl((__force __be32)val); 896 if (ifindex == 0) { 897 inet->uc_index = 0; 898 err = 0; 899 break; 900 } 901 902 dev = dev_get_by_index(sock_net(sk), ifindex); 903 err = -EADDRNOTAVAIL; 904 if (!dev) 905 break; 906 907 midx = l3mdev_master_ifindex(dev); 908 dev_put(dev); 909 910 err = -EINVAL; 911 if (sk->sk_bound_dev_if && 912 (!midx || midx != sk->sk_bound_dev_if)) 913 break; 914 915 inet->uc_index = ifindex; 916 err = 0; 917 break; 918 } 919 case IP_MULTICAST_IF: 920 { 921 struct ip_mreqn mreq; 922 struct net_device *dev = NULL; 923 int midx; 924 925 if (sk->sk_type == SOCK_STREAM) 926 goto e_inval; 927 /* 928 * Check the arguments are allowable 929 */ 930 931 if (optlen < sizeof(struct in_addr)) 932 goto e_inval; 933 934 err = -EFAULT; 935 if (optlen >= sizeof(struct ip_mreqn)) { 936 if (copy_from_user(&mreq, optval, sizeof(mreq))) 937 break; 938 } else { 939 memset(&mreq, 0, sizeof(mreq)); 940 if (optlen >= sizeof(struct ip_mreq)) { 941 if (copy_from_user(&mreq, optval, 942 sizeof(struct ip_mreq))) 943 break; 944 } else if (optlen >= sizeof(struct in_addr)) { 945 if (copy_from_user(&mreq.imr_address, optval, 946 sizeof(struct in_addr))) 947 break; 948 } 949 } 950 951 if (!mreq.imr_ifindex) { 952 if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { 953 inet->mc_index = 0; 954 inet->mc_addr = 0; 955 err = 0; 956 break; 957 } 958 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); 959 if (dev) 960 mreq.imr_ifindex = dev->ifindex; 961 } else 962 dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); 963 964 965 err = -EADDRNOTAVAIL; 966 if (!dev) 967 break; 968 969 midx = l3mdev_master_ifindex(dev); 970 971 dev_put(dev); 972 973 err = -EINVAL; 974 if (sk->sk_bound_dev_if && 975 mreq.imr_ifindex != sk->sk_bound_dev_if && 976 (!midx || midx != sk->sk_bound_dev_if)) 977 break; 978 979 inet->mc_index = mreq.imr_ifindex; 980 inet->mc_addr = mreq.imr_address.s_addr; 981 err = 0; 982 break; 983 } 984 985 case IP_ADD_MEMBERSHIP: 986 case IP_DROP_MEMBERSHIP: 987 { 988 struct ip_mreqn mreq; 989 990 err = -EPROTO; 991 if (inet_sk(sk)->is_icsk) 992 break; 993 994 if (optlen < sizeof(struct ip_mreq)) 995 goto e_inval; 996 err = -EFAULT; 997 if (optlen >= sizeof(struct ip_mreqn)) { 998 if (copy_from_user(&mreq, optval, sizeof(mreq))) 999 break; 1000 } else { 1001 memset(&mreq, 0, sizeof(mreq)); 1002 if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) 1003 break; 1004 } 1005 1006 if (optname == IP_ADD_MEMBERSHIP) 1007 err = ip_mc_join_group(sk, &mreq); 1008 else 1009 err = ip_mc_leave_group(sk, &mreq); 1010 break; 1011 } 1012 case IP_MSFILTER: 1013 { 1014 struct ip_msfilter *msf; 1015 1016 if (optlen < IP_MSFILTER_SIZE(0)) 1017 goto e_inval; 1018 if (optlen > sysctl_optmem_max) { 1019 err = -ENOBUFS; 1020 break; 1021 } 1022 msf = memdup_user(optval, optlen); 1023 if (IS_ERR(msf)) { 1024 err = PTR_ERR(msf); 1025 break; 1026 } 1027 /* numsrc >= (1G-4) overflow in 32 bits */ 1028 if (msf->imsf_numsrc >= 0x3ffffffcU || 1029 msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { 1030 kfree(msf); 1031 err = -ENOBUFS; 1032 break; 1033 } 1034 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { 1035 kfree(msf); 1036 err = -EINVAL; 1037 break; 1038 } 1039 err = ip_mc_msfilter(sk, msf, 0); 1040 kfree(msf); 1041 break; 1042 } 1043 case IP_BLOCK_SOURCE: 1044 case IP_UNBLOCK_SOURCE: 1045 case IP_ADD_SOURCE_MEMBERSHIP: 1046 case IP_DROP_SOURCE_MEMBERSHIP: 1047 { 1048 struct ip_mreq_source mreqs; 1049 int omode, add; 1050 1051 if (optlen != sizeof(struct ip_mreq_source)) 1052 goto e_inval; 1053 if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { 1054 err = -EFAULT; 1055 break; 1056 } 1057 if (optname == IP_BLOCK_SOURCE) { 1058 omode = MCAST_EXCLUDE; 1059 add = 1; 1060 } else if (optname == IP_UNBLOCK_SOURCE) { 1061 omode = MCAST_EXCLUDE; 1062 add = 0; 1063 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { 1064 struct ip_mreqn mreq; 1065 1066 mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; 1067 mreq.imr_address.s_addr = mreqs.imr_interface; 1068 mreq.imr_ifindex = 0; 1069 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 1070 if (err && err != -EADDRINUSE) 1071 break; 1072 omode = MCAST_INCLUDE; 1073 add = 1; 1074 } else /* IP_DROP_SOURCE_MEMBERSHIP */ { 1075 omode = MCAST_INCLUDE; 1076 add = 0; 1077 } 1078 err = ip_mc_source(add, omode, sk, &mreqs, 0); 1079 break; 1080 } 1081 case MCAST_JOIN_GROUP: 1082 case MCAST_LEAVE_GROUP: 1083 { 1084 struct group_req greq; 1085 struct sockaddr_in *psin; 1086 struct ip_mreqn mreq; 1087 1088 if (optlen < sizeof(struct group_req)) 1089 goto e_inval; 1090 err = -EFAULT; 1091 if (copy_from_user(&greq, optval, sizeof(greq))) 1092 break; 1093 psin = (struct sockaddr_in *)&greq.gr_group; 1094 if (psin->sin_family != AF_INET) 1095 goto e_inval; 1096 memset(&mreq, 0, sizeof(mreq)); 1097 mreq.imr_multiaddr = psin->sin_addr; 1098 mreq.imr_ifindex = greq.gr_interface; 1099 1100 if (optname == MCAST_JOIN_GROUP) 1101 err = ip_mc_join_group(sk, &mreq); 1102 else 1103 err = ip_mc_leave_group(sk, &mreq); 1104 break; 1105 } 1106 case MCAST_JOIN_SOURCE_GROUP: 1107 case MCAST_LEAVE_SOURCE_GROUP: 1108 case MCAST_BLOCK_SOURCE: 1109 case MCAST_UNBLOCK_SOURCE: 1110 { 1111 struct group_source_req greqs; 1112 1113 if (optlen != sizeof(struct group_source_req)) 1114 goto e_inval; 1115 if (copy_from_user(&greqs, optval, sizeof(greqs))) { 1116 err = -EFAULT; 1117 break; 1118 } 1119 err = do_mcast_group_source(sk, optname, &greqs); 1120 break; 1121 } 1122 case MCAST_MSFILTER: 1123 { 1124 struct group_filter *gsf = NULL; 1125 1126 if (optlen < GROUP_FILTER_SIZE(0)) 1127 goto e_inval; 1128 if (optlen > sysctl_optmem_max) { 1129 err = -ENOBUFS; 1130 break; 1131 } 1132 gsf = memdup_user(optval, optlen); 1133 if (IS_ERR(gsf)) { 1134 err = PTR_ERR(gsf); 1135 break; 1136 } 1137 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1138 if (gsf->gf_numsrc >= 0x1ffffff || 1139 gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { 1140 err = -ENOBUFS; 1141 goto mc_msf_out; 1142 } 1143 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { 1144 err = -EINVAL; 1145 goto mc_msf_out; 1146 } 1147 err = set_mcast_msfilter(sk, gsf->gf_interface, 1148 gsf->gf_numsrc, gsf->gf_fmode, 1149 &gsf->gf_group, gsf->gf_slist); 1150 mc_msf_out: 1151 kfree(gsf); 1152 break; 1153 } 1154 case IP_MULTICAST_ALL: 1155 if (optlen < 1) 1156 goto e_inval; 1157 if (val != 0 && val != 1) 1158 goto e_inval; 1159 inet->mc_all = val; 1160 break; 1161 1162 case IP_FREEBIND: 1163 if (optlen < 1) 1164 goto e_inval; 1165 inet->freebind = !!val; 1166 break; 1167 1168 case IP_IPSEC_POLICY: 1169 case IP_XFRM_POLICY: 1170 err = -EPERM; 1171 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1172 break; 1173 err = xfrm_user_policy(sk, optname, optval, optlen); 1174 break; 1175 1176 case IP_TRANSPARENT: 1177 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 1178 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1179 err = -EPERM; 1180 break; 1181 } 1182 if (optlen < 1) 1183 goto e_inval; 1184 inet->transparent = !!val; 1185 break; 1186 1187 case IP_MINTTL: 1188 if (optlen < 1) 1189 goto e_inval; 1190 if (val < 0 || val > 255) 1191 goto e_inval; 1192 inet->min_ttl = val; 1193 break; 1194 1195 default: 1196 err = -ENOPROTOOPT; 1197 break; 1198 } 1199 release_sock(sk); 1200 if (needs_rtnl) 1201 rtnl_unlock(); 1202 return err; 1203 1204 e_inval: 1205 release_sock(sk); 1206 if (needs_rtnl) 1207 rtnl_unlock(); 1208 return -EINVAL; 1209 } 1210 1211 /** 1212 * ipv4_pktinfo_prepare - transfer some info from rtable to skb 1213 * @sk: socket 1214 * @skb: buffer 1215 * 1216 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific 1217 * destination in skb->cb[] before dst drop. 1218 * This way, receiver doesn't make cache line misses to read rtable. 1219 */ 1220 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) 1221 { 1222 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); 1223 bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || 1224 ipv6_sk_rxinfo(sk); 1225 1226 if (prepare && skb_rtable(skb)) { 1227 /* skb->cb is overloaded: prior to this point it is IP{6}CB 1228 * which has interface index (iif) as the first member of the 1229 * underlying inet{6}_skb_parm struct. This code then overlays 1230 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first 1231 * element so the iif is picked up from the prior IPCB. If iif 1232 * is the loopback interface, then return the sending interface 1233 * (e.g., process binds socket to eth0 for Tx which is 1234 * redirected to loopback in the rtable/dst). 1235 */ 1236 struct rtable *rt = skb_rtable(skb); 1237 bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); 1238 1239 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) 1240 pktinfo->ipi_ifindex = inet_iif(skb); 1241 else if (l3slave && rt && rt->rt_iif) 1242 pktinfo->ipi_ifindex = rt->rt_iif; 1243 1244 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); 1245 } else { 1246 pktinfo->ipi_ifindex = 0; 1247 pktinfo->ipi_spec_dst.s_addr = 0; 1248 } 1249 skb_dst_drop(skb); 1250 } 1251 1252 int ip_setsockopt(struct sock *sk, int level, 1253 int optname, char __user *optval, unsigned int optlen) 1254 { 1255 int err; 1256 1257 if (level != SOL_IP) 1258 return -ENOPROTOOPT; 1259 1260 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1261 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1262 if (optname >= BPFILTER_IPT_SO_SET_REPLACE && 1263 optname < BPFILTER_IPT_SET_MAX) 1264 err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); 1265 #endif 1266 #ifdef CONFIG_NETFILTER 1267 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1268 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1269 optname != IP_IPSEC_POLICY && 1270 optname != IP_XFRM_POLICY && 1271 !ip_mroute_opt(optname)) 1272 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); 1273 #endif 1274 return err; 1275 } 1276 EXPORT_SYMBOL(ip_setsockopt); 1277 1278 #ifdef CONFIG_COMPAT 1279 int compat_ip_setsockopt(struct sock *sk, int level, int optname, 1280 char __user *optval, unsigned int optlen) 1281 { 1282 int err; 1283 1284 if (level != SOL_IP) 1285 return -ENOPROTOOPT; 1286 1287 switch (optname) { 1288 case MCAST_JOIN_GROUP: 1289 case MCAST_LEAVE_GROUP: 1290 { 1291 struct compat_group_req __user *gr32 = (void __user *)optval; 1292 struct group_req greq; 1293 struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group; 1294 struct ip_mreqn mreq; 1295 1296 if (optlen < sizeof(struct compat_group_req)) 1297 return -EINVAL; 1298 1299 if (get_user(greq.gr_interface, &gr32->gr_interface) || 1300 copy_from_user(&greq.gr_group, &gr32->gr_group, 1301 sizeof(greq.gr_group))) 1302 return -EFAULT; 1303 1304 if (psin->sin_family != AF_INET) 1305 return -EINVAL; 1306 1307 memset(&mreq, 0, sizeof(mreq)); 1308 mreq.imr_multiaddr = psin->sin_addr; 1309 mreq.imr_ifindex = greq.gr_interface; 1310 1311 rtnl_lock(); 1312 lock_sock(sk); 1313 if (optname == MCAST_JOIN_GROUP) 1314 err = ip_mc_join_group(sk, &mreq); 1315 else 1316 err = ip_mc_leave_group(sk, &mreq); 1317 release_sock(sk); 1318 rtnl_unlock(); 1319 return err; 1320 } 1321 case MCAST_JOIN_SOURCE_GROUP: 1322 case MCAST_LEAVE_SOURCE_GROUP: 1323 case MCAST_BLOCK_SOURCE: 1324 case MCAST_UNBLOCK_SOURCE: 1325 { 1326 struct compat_group_source_req __user *gsr32 = (void __user *)optval; 1327 struct group_source_req greqs; 1328 1329 if (optlen != sizeof(struct compat_group_source_req)) 1330 return -EINVAL; 1331 1332 if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || 1333 copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, 1334 sizeof(greqs.gsr_group)) || 1335 copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, 1336 sizeof(greqs.gsr_source))) 1337 return -EFAULT; 1338 1339 rtnl_lock(); 1340 lock_sock(sk); 1341 err = do_mcast_group_source(sk, optname, &greqs); 1342 release_sock(sk); 1343 rtnl_unlock(); 1344 return err; 1345 } 1346 case MCAST_MSFILTER: 1347 { 1348 const int size0 = offsetof(struct compat_group_filter, gf_slist); 1349 struct compat_group_filter *gf32; 1350 void *p; 1351 int n; 1352 1353 if (optlen < size0) 1354 return -EINVAL; 1355 if (optlen > sysctl_optmem_max - 4) 1356 return -ENOBUFS; 1357 1358 p = kmalloc(optlen + 4, GFP_KERNEL); 1359 if (!p) 1360 return -ENOMEM; 1361 gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ 1362 if (copy_from_user(gf32, optval, optlen)) { 1363 err = -EFAULT; 1364 goto mc_msf_out; 1365 } 1366 1367 n = gf32->gf_numsrc; 1368 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1369 if (n >= 0x1ffffff) { 1370 err = -ENOBUFS; 1371 goto mc_msf_out; 1372 } 1373 if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { 1374 err = -EINVAL; 1375 goto mc_msf_out; 1376 } 1377 1378 rtnl_lock(); 1379 lock_sock(sk); 1380 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1381 if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) 1382 err = -ENOBUFS; 1383 else 1384 err = set_mcast_msfilter(sk, gf32->gf_interface, 1385 n, gf32->gf_fmode, 1386 &gf32->gf_group, gf32->gf_slist); 1387 release_sock(sk); 1388 rtnl_unlock(); 1389 mc_msf_out: 1390 kfree(p); 1391 return err; 1392 } 1393 } 1394 1395 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1396 #ifdef CONFIG_NETFILTER 1397 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1398 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1399 optname != IP_IPSEC_POLICY && 1400 optname != IP_XFRM_POLICY && 1401 !ip_mroute_opt(optname)) 1402 err = compat_nf_setsockopt(sk, PF_INET, optname, optval, 1403 optlen); 1404 #endif 1405 return err; 1406 } 1407 EXPORT_SYMBOL(compat_ip_setsockopt); 1408 #endif 1409 1410 /* 1411 * Get the options. Note for future reference. The GET of IP options gets 1412 * the _received_ ones. The set sets the _sent_ ones. 1413 */ 1414 1415 static bool getsockopt_needs_rtnl(int optname) 1416 { 1417 switch (optname) { 1418 case IP_MSFILTER: 1419 case MCAST_MSFILTER: 1420 return true; 1421 } 1422 return false; 1423 } 1424 1425 static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1426 char __user *optval, int __user *optlen, unsigned int flags) 1427 { 1428 struct inet_sock *inet = inet_sk(sk); 1429 bool needs_rtnl = getsockopt_needs_rtnl(optname); 1430 int val, err = 0; 1431 int len; 1432 1433 if (level != SOL_IP) 1434 return -EOPNOTSUPP; 1435 1436 if (ip_mroute_opt(optname)) 1437 return ip_mroute_getsockopt(sk, optname, optval, optlen); 1438 1439 if (get_user(len, optlen)) 1440 return -EFAULT; 1441 if (len < 0) 1442 return -EINVAL; 1443 1444 if (needs_rtnl) 1445 rtnl_lock(); 1446 lock_sock(sk); 1447 1448 switch (optname) { 1449 case IP_OPTIONS: 1450 { 1451 unsigned char optbuf[sizeof(struct ip_options)+40]; 1452 struct ip_options *opt = (struct ip_options *)optbuf; 1453 struct ip_options_rcu *inet_opt; 1454 1455 inet_opt = rcu_dereference_protected(inet->inet_opt, 1456 lockdep_sock_is_held(sk)); 1457 opt->optlen = 0; 1458 if (inet_opt) 1459 memcpy(optbuf, &inet_opt->opt, 1460 sizeof(struct ip_options) + 1461 inet_opt->opt.optlen); 1462 release_sock(sk); 1463 1464 if (opt->optlen == 0) 1465 return put_user(0, optlen); 1466 1467 ip_options_undo(opt); 1468 1469 len = min_t(unsigned int, len, opt->optlen); 1470 if (put_user(len, optlen)) 1471 return -EFAULT; 1472 if (copy_to_user(optval, opt->__data, len)) 1473 return -EFAULT; 1474 return 0; 1475 } 1476 case IP_PKTINFO: 1477 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; 1478 break; 1479 case IP_RECVTTL: 1480 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; 1481 break; 1482 case IP_RECVTOS: 1483 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; 1484 break; 1485 case IP_RECVOPTS: 1486 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; 1487 break; 1488 case IP_RETOPTS: 1489 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; 1490 break; 1491 case IP_PASSSEC: 1492 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; 1493 break; 1494 case IP_RECVORIGDSTADDR: 1495 val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; 1496 break; 1497 case IP_CHECKSUM: 1498 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; 1499 break; 1500 case IP_RECVFRAGSIZE: 1501 val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; 1502 break; 1503 case IP_TOS: 1504 val = inet->tos; 1505 break; 1506 case IP_TTL: 1507 { 1508 struct net *net = sock_net(sk); 1509 val = (inet->uc_ttl == -1 ? 1510 net->ipv4.sysctl_ip_default_ttl : 1511 inet->uc_ttl); 1512 break; 1513 } 1514 case IP_HDRINCL: 1515 val = inet->hdrincl; 1516 break; 1517 case IP_NODEFRAG: 1518 val = inet->nodefrag; 1519 break; 1520 case IP_BIND_ADDRESS_NO_PORT: 1521 val = inet->bind_address_no_port; 1522 break; 1523 case IP_MTU_DISCOVER: 1524 val = inet->pmtudisc; 1525 break; 1526 case IP_MTU: 1527 { 1528 struct dst_entry *dst; 1529 val = 0; 1530 dst = sk_dst_get(sk); 1531 if (dst) { 1532 val = dst_mtu(dst); 1533 dst_release(dst); 1534 } 1535 if (!val) { 1536 release_sock(sk); 1537 return -ENOTCONN; 1538 } 1539 break; 1540 } 1541 case IP_RECVERR: 1542 val = inet->recverr; 1543 break; 1544 case IP_MULTICAST_TTL: 1545 val = inet->mc_ttl; 1546 break; 1547 case IP_MULTICAST_LOOP: 1548 val = inet->mc_loop; 1549 break; 1550 case IP_UNICAST_IF: 1551 val = (__force int)htonl((__u32) inet->uc_index); 1552 break; 1553 case IP_MULTICAST_IF: 1554 { 1555 struct in_addr addr; 1556 len = min_t(unsigned int, len, sizeof(struct in_addr)); 1557 addr.s_addr = inet->mc_addr; 1558 release_sock(sk); 1559 1560 if (put_user(len, optlen)) 1561 return -EFAULT; 1562 if (copy_to_user(optval, &addr, len)) 1563 return -EFAULT; 1564 return 0; 1565 } 1566 case IP_MSFILTER: 1567 { 1568 struct ip_msfilter msf; 1569 1570 if (len < IP_MSFILTER_SIZE(0)) { 1571 err = -EINVAL; 1572 goto out; 1573 } 1574 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { 1575 err = -EFAULT; 1576 goto out; 1577 } 1578 err = ip_mc_msfget(sk, &msf, 1579 (struct ip_msfilter __user *)optval, optlen); 1580 goto out; 1581 } 1582 case MCAST_MSFILTER: 1583 { 1584 struct group_filter __user *p = (void __user *)optval; 1585 struct group_filter gsf; 1586 const int size0 = offsetof(struct group_filter, gf_slist); 1587 int num; 1588 1589 if (len < size0) { 1590 err = -EINVAL; 1591 goto out; 1592 } 1593 if (copy_from_user(&gsf, p, size0)) { 1594 err = -EFAULT; 1595 goto out; 1596 } 1597 num = gsf.gf_numsrc; 1598 err = ip_mc_gsfget(sk, &gsf, p->gf_slist); 1599 if (err) 1600 goto out; 1601 if (gsf.gf_numsrc < num) 1602 num = gsf.gf_numsrc; 1603 if (put_user(GROUP_FILTER_SIZE(num), optlen) || 1604 copy_to_user(p, &gsf, size0)) 1605 err = -EFAULT; 1606 goto out; 1607 } 1608 case IP_MULTICAST_ALL: 1609 val = inet->mc_all; 1610 break; 1611 case IP_PKTOPTIONS: 1612 { 1613 struct msghdr msg; 1614 1615 release_sock(sk); 1616 1617 if (sk->sk_type != SOCK_STREAM) 1618 return -ENOPROTOOPT; 1619 1620 msg.msg_control_is_user = true; 1621 msg.msg_control_user = optval; 1622 msg.msg_controllen = len; 1623 msg.msg_flags = flags; 1624 1625 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1626 struct in_pktinfo info; 1627 1628 info.ipi_addr.s_addr = inet->inet_rcv_saddr; 1629 info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; 1630 info.ipi_ifindex = inet->mc_index; 1631 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1632 } 1633 if (inet->cmsg_flags & IP_CMSG_TTL) { 1634 int hlim = inet->mc_ttl; 1635 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); 1636 } 1637 if (inet->cmsg_flags & IP_CMSG_TOS) { 1638 int tos = inet->rcv_tos; 1639 put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); 1640 } 1641 len -= msg.msg_controllen; 1642 return put_user(len, optlen); 1643 } 1644 case IP_FREEBIND: 1645 val = inet->freebind; 1646 break; 1647 case IP_TRANSPARENT: 1648 val = inet->transparent; 1649 break; 1650 case IP_MINTTL: 1651 val = inet->min_ttl; 1652 break; 1653 default: 1654 release_sock(sk); 1655 return -ENOPROTOOPT; 1656 } 1657 release_sock(sk); 1658 1659 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1660 unsigned char ucval = (unsigned char)val; 1661 len = 1; 1662 if (put_user(len, optlen)) 1663 return -EFAULT; 1664 if (copy_to_user(optval, &ucval, 1)) 1665 return -EFAULT; 1666 } else { 1667 len = min_t(unsigned int, sizeof(int), len); 1668 if (put_user(len, optlen)) 1669 return -EFAULT; 1670 if (copy_to_user(optval, &val, len)) 1671 return -EFAULT; 1672 } 1673 return 0; 1674 1675 out: 1676 release_sock(sk); 1677 if (needs_rtnl) 1678 rtnl_unlock(); 1679 return err; 1680 } 1681 1682 int ip_getsockopt(struct sock *sk, int level, 1683 int optname, char __user *optval, int __user *optlen) 1684 { 1685 int err; 1686 1687 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); 1688 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1689 if (optname >= BPFILTER_IPT_SO_GET_INFO && 1690 optname < BPFILTER_IPT_GET_MAX) 1691 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1692 #endif 1693 #ifdef CONFIG_NETFILTER 1694 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1695 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1696 !ip_mroute_opt(optname)) { 1697 int len; 1698 1699 if (get_user(len, optlen)) 1700 return -EFAULT; 1701 1702 err = nf_getsockopt(sk, PF_INET, optname, optval, &len); 1703 if (err >= 0) 1704 err = put_user(len, optlen); 1705 return err; 1706 } 1707 #endif 1708 return err; 1709 } 1710 EXPORT_SYMBOL(ip_getsockopt); 1711 1712 #ifdef CONFIG_COMPAT 1713 int compat_ip_getsockopt(struct sock *sk, int level, int optname, 1714 char __user *optval, int __user *optlen) 1715 { 1716 int err; 1717 1718 if (optname == MCAST_MSFILTER) { 1719 const int size0 = offsetof(struct compat_group_filter, gf_slist); 1720 struct compat_group_filter __user *p = (void __user *)optval; 1721 struct compat_group_filter gf32; 1722 struct group_filter gf; 1723 int ulen, err; 1724 int num; 1725 1726 if (level != SOL_IP) 1727 return -EOPNOTSUPP; 1728 1729 if (get_user(ulen, optlen)) 1730 return -EFAULT; 1731 1732 if (ulen < size0) 1733 return -EINVAL; 1734 1735 if (copy_from_user(&gf32, p, size0)) 1736 return -EFAULT; 1737 1738 gf.gf_interface = gf32.gf_interface; 1739 gf.gf_fmode = gf32.gf_fmode; 1740 num = gf.gf_numsrc = gf32.gf_numsrc; 1741 gf.gf_group = gf32.gf_group; 1742 1743 rtnl_lock(); 1744 lock_sock(sk); 1745 err = ip_mc_gsfget(sk, &gf, p->gf_slist); 1746 release_sock(sk); 1747 rtnl_unlock(); 1748 if (err) 1749 return err; 1750 if (gf.gf_numsrc < num) 1751 num = gf.gf_numsrc; 1752 ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); 1753 if (put_user(ulen, optlen) || 1754 put_user(gf.gf_fmode, &p->gf_fmode) || 1755 put_user(gf.gf_numsrc, &p->gf_numsrc)) 1756 return -EFAULT; 1757 return 0; 1758 } 1759 1760 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 1761 MSG_CMSG_COMPAT); 1762 1763 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1764 if (optname >= BPFILTER_IPT_SO_GET_INFO && 1765 optname < BPFILTER_IPT_GET_MAX) 1766 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1767 #endif 1768 #ifdef CONFIG_NETFILTER 1769 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1770 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1771 !ip_mroute_opt(optname)) { 1772 int len; 1773 1774 if (get_user(len, optlen)) 1775 return -EFAULT; 1776 1777 err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); 1778 if (err >= 0) 1779 err = put_user(len, optlen); 1780 return err; 1781 } 1782 #endif 1783 return err; 1784 } 1785 EXPORT_SYMBOL(compat_ip_getsockopt); 1786 #endif 1787