xref: /linux/net/ipv6/af_inet6.c (revision 4ce06406958b67fdddcc2e6948237dd6ff6ba112)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	PF_INET6 socket protocol family
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Adapted from linux/net/ipv4/af_inet.c
10  *
11  *	Fixes:
12  *	piggy, Karl Knutson	:	Socket protocol table
13  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
14  *	Arnaldo Melo		:	check proc_net_create return, cleanups
15  */
16 
17 #define pr_fmt(fmt) "IPv6: " fmt
18 
19 #include <linux/module.h>
20 #include <linux/capability.h>
21 #include <linux/errno.h>
22 #include <linux/types.h>
23 #include <linux/socket.h>
24 #include <linux/in.h>
25 #include <linux/kernel.h>
26 #include <linux/timer.h>
27 #include <linux/string.h>
28 #include <linux/sockios.h>
29 #include <linux/net.h>
30 #include <linux/fcntl.h>
31 #include <linux/mm.h>
32 #include <linux/interrupt.h>
33 #include <linux/proc_fs.h>
34 #include <linux/stat.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 
38 #include <linux/inet.h>
39 #include <linux/netdevice.h>
40 #include <linux/icmpv6.h>
41 
42 #include <net/ip.h>
43 #include <net/ipv6.h>
44 #include <net/udp.h>
45 #include <net/tcp.h>
46 #include <net/ping.h>
47 #include <net/protocol.h>
48 #include <net/inet_common.h>
49 #include <net/route.h>
50 #include <net/transp_v6.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/ndisc.h>
54 #ifdef CONFIG_IPV6_TUNNEL
55 #include <net/ip6_tunnel.h>
56 #endif
57 #include <net/calipso.h>
58 #include <net/seg6.h>
59 #include <net/rpl.h>
60 #include <net/compat.h>
61 #include <net/xfrm.h>
62 #include <net/ioam6.h>
63 #include <net/rawv6.h>
64 #include <net/rps.h>
65 
66 #include <linux/uaccess.h>
67 #include <linux/mroute6.h>
68 
69 #include "ip6_offload.h"
70 
71 /* The inetsw6 table contains everything that inet6_create needs to
72  * build a new socket.
73  */
74 static struct list_head inetsw6[SOCK_MAX];
75 static DEFINE_SPINLOCK(inetsw6_lock);
76 
77 struct ipv6_params ipv6_defaults = {
78 	.disable_ipv6 = 0,
79 	.autoconf = 1,
80 };
81 
82 module_param_named(disable, disable_ipv6_mod, int, 0444);
83 MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
84 
85 module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
86 MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
87 
88 module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
89 MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
90 
91 static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
92 {
93 	const int offset = sk->sk_prot->ipv6_pinfo_offset;
94 
95 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
96 }
97 
98 void inet6_sock_destruct(struct sock *sk)
99 {
100 	inet6_cleanup_sock(sk);
101 	inet_sock_destruct(sk);
102 }
103 EXPORT_SYMBOL_GPL(inet6_sock_destruct);
104 
105 static int inet6_create(struct net *net, struct socket *sock, int protocol,
106 			int kern)
107 {
108 	struct inet_sock *inet;
109 	struct ipv6_pinfo *np;
110 	struct sock *sk;
111 	struct inet_protosw *answer;
112 	struct proto *answer_prot;
113 	unsigned char answer_flags;
114 	int try_loading_module = 0;
115 	int err;
116 
117 	if (protocol < 0 || protocol >= IPPROTO_MAX)
118 		return -EINVAL;
119 
120 	/* Look for the requested type/protocol pair. */
121 lookup_protocol:
122 	err = -ESOCKTNOSUPPORT;
123 	rcu_read_lock();
124 	list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
125 
126 		err = 0;
127 		/* Check the non-wild match. */
128 		if (protocol == answer->protocol) {
129 			if (protocol != IPPROTO_IP)
130 				break;
131 		} else {
132 			/* Check for the two wild cases. */
133 			if (IPPROTO_IP == protocol) {
134 				protocol = answer->protocol;
135 				break;
136 			}
137 			if (IPPROTO_IP == answer->protocol)
138 				break;
139 		}
140 		err = -EPROTONOSUPPORT;
141 	}
142 
143 	if (err) {
144 		if (try_loading_module < 2) {
145 			rcu_read_unlock();
146 			/*
147 			 * Be more specific, e.g. net-pf-10-proto-132-type-1
148 			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
149 			 */
150 			if (++try_loading_module == 1)
151 				request_module("net-pf-%d-proto-%d-type-%d",
152 						PF_INET6, protocol, sock->type);
153 			/*
154 			 * Fall back to generic, e.g. net-pf-10-proto-132
155 			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
156 			 */
157 			else
158 				request_module("net-pf-%d-proto-%d",
159 						PF_INET6, protocol);
160 			goto lookup_protocol;
161 		} else
162 			goto out_rcu_unlock;
163 	}
164 
165 	err = -EPERM;
166 	if (sock->type == SOCK_RAW && !kern &&
167 	    !ns_capable(net->user_ns, CAP_NET_RAW))
168 		goto out_rcu_unlock;
169 
170 	sock->ops = answer->ops;
171 	answer_prot = answer->prot;
172 	answer_flags = answer->flags;
173 	rcu_read_unlock();
174 
175 	WARN_ON(!answer_prot->slab);
176 
177 	err = -ENOBUFS;
178 	sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
179 	if (!sk)
180 		goto out;
181 
182 	sock_init_data(sock, sk);
183 
184 	err = 0;
185 	if (INET_PROTOSW_REUSE & answer_flags)
186 		sk->sk_reuse = SK_CAN_REUSE;
187 
188 	if (INET_PROTOSW_ICSK & answer_flags)
189 		inet_init_csk_locks(sk);
190 
191 	inet = inet_sk(sk);
192 	inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
193 
194 	if (SOCK_RAW == sock->type) {
195 		inet->inet_num = protocol;
196 		if (IPPROTO_RAW == protocol)
197 			inet_set_bit(HDRINCL, sk);
198 	}
199 
200 	sk->sk_destruct		= inet6_sock_destruct;
201 	sk->sk_family		= PF_INET6;
202 	sk->sk_protocol		= protocol;
203 
204 	sk->sk_backlog_rcv	= answer->prot->backlog_rcv;
205 
206 	inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
207 	np->hop_limit	= -1;
208 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
209 	inet6_set_bit(MC6_LOOP, sk);
210 	inet6_set_bit(MC6_ALL, sk);
211 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
212 	inet6_assign_bit(REPFLOW, sk, READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) &
213 				      FLOWLABEL_REFLECT_ESTABLISHED);
214 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
215 	sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
216 
217 	/* Init the ipv4 part of the socket since we can have sockets
218 	 * using v6 API for ipv4.
219 	 */
220 	inet->uc_ttl	= -1;
221 
222 	inet_set_bit(MC_LOOP, sk);
223 	inet->mc_ttl	= 1;
224 	inet->mc_index	= 0;
225 	RCU_INIT_POINTER(inet->mc_list, NULL);
226 	inet->rcv_tos	= 0;
227 
228 	if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
229 		inet->pmtudisc = IP_PMTUDISC_DONT;
230 	else
231 		inet->pmtudisc = IP_PMTUDISC_WANT;
232 
233 	if (inet->inet_num) {
234 		/* It assumes that any protocol which allows
235 		 * the user to assign a number at socket
236 		 * creation time automatically shares.
237 		 */
238 		inet->inet_sport = htons(inet->inet_num);
239 		err = sk->sk_prot->hash(sk);
240 		if (err)
241 			goto out_sk_release;
242 	}
243 	if (sk->sk_prot->init) {
244 		err = sk->sk_prot->init(sk);
245 		if (err)
246 			goto out_sk_release;
247 	}
248 
249 	if (!kern) {
250 		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
251 		if (err)
252 			goto out_sk_release;
253 	}
254 out:
255 	return err;
256 out_rcu_unlock:
257 	rcu_read_unlock();
258 	goto out;
259 out_sk_release:
260 	sk_common_release(sk);
261 	sock->sk = NULL;
262 	goto out;
263 }
264 
265 int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
266 		 u32 flags)
267 {
268 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
269 	struct inet_sock *inet = inet_sk(sk);
270 	struct ipv6_pinfo *np = inet6_sk(sk);
271 	struct net *net = sock_net(sk);
272 	__be32 v4addr = 0;
273 	unsigned short snum;
274 	bool saved_ipv6only;
275 	int addr_type = 0;
276 	int err = 0;
277 
278 	if (addr->sin6_family != AF_INET6)
279 		return -EAFNOSUPPORT;
280 
281 	addr_type = ipv6_addr_type(&addr->sin6_addr);
282 	if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
283 		return -EINVAL;
284 
285 	snum = ntohs(addr->sin6_port);
286 	if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
287 	    snum && inet_port_requires_bind_service(net, snum) &&
288 	    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
289 		return -EACCES;
290 
291 	if (flags & BIND_WITH_LOCK)
292 		lock_sock(sk);
293 
294 	/* Check these errors (active socket, double bind). */
295 	if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
296 		err = -EINVAL;
297 		goto out;
298 	}
299 
300 	/* Check if the address belongs to the host. */
301 	if (addr_type == IPV6_ADDR_MAPPED) {
302 		struct net_device *dev = NULL;
303 		int chk_addr_ret;
304 
305 		/* Binding to v4-mapped address on a v6-only socket
306 		 * makes no sense
307 		 */
308 		if (ipv6_only_sock(sk)) {
309 			err = -EINVAL;
310 			goto out;
311 		}
312 
313 		rcu_read_lock();
314 		if (sk->sk_bound_dev_if) {
315 			dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
316 			if (!dev) {
317 				err = -ENODEV;
318 				goto out_unlock;
319 			}
320 		}
321 
322 		/* Reproduce AF_INET checks to make the bindings consistent */
323 		v4addr = addr->sin6_addr.s6_addr32[3];
324 		chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr);
325 		rcu_read_unlock();
326 
327 		if (!inet_addr_valid_or_nonlocal(net, inet, v4addr,
328 						 chk_addr_ret)) {
329 			err = -EADDRNOTAVAIL;
330 			goto out;
331 		}
332 	} else {
333 		if (addr_type != IPV6_ADDR_ANY) {
334 			struct net_device *dev = NULL;
335 
336 			rcu_read_lock();
337 			if (__ipv6_addr_needs_scope_id(addr_type)) {
338 				if (addr_len >= sizeof(struct sockaddr_in6) &&
339 				    addr->sin6_scope_id) {
340 					/* Override any existing binding, if another one
341 					 * is supplied by user.
342 					 */
343 					sk->sk_bound_dev_if = addr->sin6_scope_id;
344 				}
345 
346 				/* Binding to link-local address requires an interface */
347 				if (!sk->sk_bound_dev_if) {
348 					err = -EINVAL;
349 					goto out_unlock;
350 				}
351 			}
352 
353 			if (sk->sk_bound_dev_if) {
354 				dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
355 				if (!dev) {
356 					err = -ENODEV;
357 					goto out_unlock;
358 				}
359 			}
360 
361 			/* ipv4 addr of the socket is invalid.  Only the
362 			 * unspecified and mapped address have a v4 equivalent.
363 			 */
364 			v4addr = LOOPBACK4_IPV6;
365 			if (!(addr_type & IPV6_ADDR_MULTICAST))	{
366 				if (!ipv6_can_nonlocal_bind(net, inet) &&
367 				    !ipv6_chk_addr(net, &addr->sin6_addr,
368 						   dev, 0)) {
369 					err = -EADDRNOTAVAIL;
370 					goto out_unlock;
371 				}
372 			}
373 			rcu_read_unlock();
374 		}
375 	}
376 
377 	inet->inet_rcv_saddr = v4addr;
378 	inet->inet_saddr = v4addr;
379 
380 	sk->sk_v6_rcv_saddr = addr->sin6_addr;
381 
382 	if (!(addr_type & IPV6_ADDR_MULTICAST))
383 		np->saddr = addr->sin6_addr;
384 
385 	saved_ipv6only = sk->sk_ipv6only;
386 	if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
387 		sk->sk_ipv6only = 1;
388 
389 	/* Make sure we are allowed to bind here. */
390 	if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
391 		      (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
392 		err = sk->sk_prot->get_port(sk, snum);
393 		if (err) {
394 			sk->sk_ipv6only = saved_ipv6only;
395 			inet_reset_saddr(sk);
396 			goto out;
397 		}
398 		if (!(flags & BIND_FROM_BPF)) {
399 			err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
400 			if (err) {
401 				sk->sk_ipv6only = saved_ipv6only;
402 				inet_reset_saddr(sk);
403 				if (sk->sk_prot->put_port)
404 					sk->sk_prot->put_port(sk);
405 				goto out;
406 			}
407 		}
408 	}
409 
410 	if (addr_type != IPV6_ADDR_ANY)
411 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
412 	if (snum)
413 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
414 	inet->inet_sport = htons(inet->inet_num);
415 	inet->inet_dport = 0;
416 	inet->inet_daddr = 0;
417 out:
418 	if (flags & BIND_WITH_LOCK)
419 		release_sock(sk);
420 	return err;
421 out_unlock:
422 	rcu_read_unlock();
423 	goto out;
424 }
425 
426 int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len)
427 {
428 	u32 flags = BIND_WITH_LOCK;
429 	const struct proto *prot;
430 	int err = 0;
431 
432 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
433 	prot = READ_ONCE(sk->sk_prot);
434 	/* If the socket has its own bind function then use it. */
435 	if (prot->bind)
436 		return prot->bind(sk, uaddr, addr_len);
437 
438 	if (addr_len < SIN6_LEN_RFC2133)
439 		return -EINVAL;
440 
441 	/* BPF prog is run before any checks are done so that if the prog
442 	 * changes context in a wrong way it will be caught.
443 	 */
444 	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
445 						 CGROUP_INET6_BIND, &flags);
446 	if (err)
447 		return err;
448 
449 	return __inet6_bind(sk, uaddr, addr_len, flags);
450 }
451 
452 /* bind for INET6 API */
453 int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
454 {
455 	return inet6_bind_sk(sock->sk, uaddr, addr_len);
456 }
457 EXPORT_SYMBOL(inet6_bind);
458 
459 int inet6_release(struct socket *sock)
460 {
461 	struct sock *sk = sock->sk;
462 
463 	if (!sk)
464 		return -EINVAL;
465 
466 	/* Free mc lists */
467 	ipv6_sock_mc_close(sk);
468 
469 	/* Free ac lists */
470 	ipv6_sock_ac_close(sk);
471 
472 	return inet_release(sock);
473 }
474 EXPORT_SYMBOL(inet6_release);
475 
476 void inet6_cleanup_sock(struct sock *sk)
477 {
478 	struct ipv6_pinfo *np = inet6_sk(sk);
479 	struct sk_buff *skb;
480 	struct ipv6_txoptions *opt;
481 
482 	/* Release rx options */
483 
484 	skb = xchg(&np->pktoptions, NULL);
485 	kfree_skb(skb);
486 
487 	skb = xchg(&np->rxpmtu, NULL);
488 	kfree_skb(skb);
489 
490 	/* Free flowlabels */
491 	fl6_free_socklist(sk);
492 
493 	/* Free tx options */
494 
495 	opt = unrcu_pointer(xchg(&np->opt, NULL));
496 	if (opt) {
497 		atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
498 		txopt_put(opt);
499 	}
500 }
501 EXPORT_SYMBOL_GPL(inet6_cleanup_sock);
502 
503 /*
504  *	This does both peername and sockname.
505  */
506 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
507 		  int peer)
508 {
509 	struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
510 	int sin_addr_len = sizeof(*sin);
511 	struct sock *sk = sock->sk;
512 	struct inet_sock *inet = inet_sk(sk);
513 	struct ipv6_pinfo *np = inet6_sk(sk);
514 
515 	sin->sin6_family = AF_INET6;
516 	sin->sin6_flowinfo = 0;
517 	sin->sin6_scope_id = 0;
518 	lock_sock(sk);
519 	if (peer) {
520 		if (!inet->inet_dport ||
521 		    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
522 		    peer == 1)) {
523 			release_sock(sk);
524 			return -ENOTCONN;
525 		}
526 		sin->sin6_port = inet->inet_dport;
527 		sin->sin6_addr = sk->sk_v6_daddr;
528 		if (inet6_test_bit(SNDFLOW, sk))
529 			sin->sin6_flowinfo = np->flow_label;
530 		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
531 				       CGROUP_INET6_GETPEERNAME);
532 	} else {
533 		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
534 			sin->sin6_addr = np->saddr;
535 		else
536 			sin->sin6_addr = sk->sk_v6_rcv_saddr;
537 		sin->sin6_port = inet->inet_sport;
538 		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
539 				       CGROUP_INET6_GETSOCKNAME);
540 	}
541 	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
542 						 sk->sk_bound_dev_if);
543 	release_sock(sk);
544 	return sin_addr_len;
545 }
546 EXPORT_SYMBOL(inet6_getname);
547 
548 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
549 {
550 	void __user *argp = (void __user *)arg;
551 	struct sock *sk = sock->sk;
552 	struct net *net = sock_net(sk);
553 	const struct proto *prot;
554 
555 	switch (cmd) {
556 	case SIOCADDRT:
557 	case SIOCDELRT: {
558 		struct in6_rtmsg rtmsg;
559 
560 		if (copy_from_user(&rtmsg, argp, sizeof(rtmsg)))
561 			return -EFAULT;
562 		return ipv6_route_ioctl(net, cmd, &rtmsg);
563 	}
564 	case SIOCSIFADDR:
565 		return addrconf_add_ifaddr(net, argp);
566 	case SIOCDIFADDR:
567 		return addrconf_del_ifaddr(net, argp);
568 	case SIOCSIFDSTADDR:
569 		return addrconf_set_dstaddr(net, argp);
570 	default:
571 		/* IPV6_ADDRFORM can change sk->sk_prot under us. */
572 		prot = READ_ONCE(sk->sk_prot);
573 		if (!prot->ioctl)
574 			return -ENOIOCTLCMD;
575 		return sk_ioctl(sk, cmd, (void __user *)arg);
576 	}
577 	/*NOTREACHED*/
578 	return 0;
579 }
580 EXPORT_SYMBOL(inet6_ioctl);
581 
582 #ifdef CONFIG_COMPAT
583 struct compat_in6_rtmsg {
584 	struct in6_addr		rtmsg_dst;
585 	struct in6_addr		rtmsg_src;
586 	struct in6_addr		rtmsg_gateway;
587 	u32			rtmsg_type;
588 	u16			rtmsg_dst_len;
589 	u16			rtmsg_src_len;
590 	u32			rtmsg_metric;
591 	u32			rtmsg_info;
592 	u32			rtmsg_flags;
593 	s32			rtmsg_ifindex;
594 };
595 
596 static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
597 		struct compat_in6_rtmsg __user *ur)
598 {
599 	struct in6_rtmsg rt;
600 
601 	if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst,
602 			3 * sizeof(struct in6_addr)) ||
603 	    get_user(rt.rtmsg_type, &ur->rtmsg_type) ||
604 	    get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) ||
605 	    get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) ||
606 	    get_user(rt.rtmsg_metric, &ur->rtmsg_metric) ||
607 	    get_user(rt.rtmsg_info, &ur->rtmsg_info) ||
608 	    get_user(rt.rtmsg_flags, &ur->rtmsg_flags) ||
609 	    get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex))
610 		return -EFAULT;
611 
612 
613 	return ipv6_route_ioctl(sock_net(sk), cmd, &rt);
614 }
615 
616 int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
617 {
618 	void __user *argp = compat_ptr(arg);
619 	struct sock *sk = sock->sk;
620 
621 	switch (cmd) {
622 	case SIOCADDRT:
623 	case SIOCDELRT:
624 		return inet6_compat_routing_ioctl(sk, cmd, argp);
625 	default:
626 		return -ENOIOCTLCMD;
627 	}
628 }
629 EXPORT_SYMBOL_GPL(inet6_compat_ioctl);
630 #endif /* CONFIG_COMPAT */
631 
632 int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
633 {
634 	struct sock *sk = sock->sk;
635 	const struct proto *prot;
636 
637 	if (unlikely(inet_send_prepare(sk)))
638 		return -EAGAIN;
639 
640 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
641 	prot = READ_ONCE(sk->sk_prot);
642 	return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
643 			       sk, msg, size);
644 }
645 
646 int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
647 		  int flags)
648 {
649 	struct sock *sk = sock->sk;
650 	const struct proto *prot;
651 
652 	if (likely(!(flags & MSG_ERRQUEUE)))
653 		sock_rps_record_flow(sk);
654 
655 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
656 	prot = READ_ONCE(sk->sk_prot);
657 	return INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
658 			       sk, msg, size, flags);
659 }
660 
661 const struct proto_ops inet6_stream_ops = {
662 	.family		   = PF_INET6,
663 	.owner		   = THIS_MODULE,
664 	.release	   = inet6_release,
665 	.bind		   = inet6_bind,
666 	.connect	   = inet_stream_connect,	/* ok		*/
667 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
668 	.accept		   = inet_accept,		/* ok		*/
669 	.getname	   = inet6_getname,
670 	.poll		   = tcp_poll,			/* ok		*/
671 	.ioctl		   = inet6_ioctl,		/* must change  */
672 	.gettstamp	   = sock_gettstamp,
673 	.listen		   = inet_listen,		/* ok		*/
674 	.shutdown	   = inet_shutdown,		/* ok		*/
675 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
676 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
677 	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
678 	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
679 #ifdef CONFIG_MMU
680 	.mmap		   = tcp_mmap,
681 #endif
682 	.splice_eof	   = inet_splice_eof,
683 	.sendmsg_locked    = tcp_sendmsg_locked,
684 	.splice_read	   = tcp_splice_read,
685 	.set_peek_off      = sk_set_peek_off,
686 	.read_sock	   = tcp_read_sock,
687 	.read_skb	   = tcp_read_skb,
688 	.peek_len	   = tcp_peek_len,
689 #ifdef CONFIG_COMPAT
690 	.compat_ioctl	   = inet6_compat_ioctl,
691 #endif
692 	.set_rcvlowat	   = tcp_set_rcvlowat,
693 };
694 EXPORT_SYMBOL_GPL(inet6_stream_ops);
695 
696 const struct proto_ops inet6_dgram_ops = {
697 	.family		   = PF_INET6,
698 	.owner		   = THIS_MODULE,
699 	.release	   = inet6_release,
700 	.bind		   = inet6_bind,
701 	.connect	   = inet_dgram_connect,	/* ok		*/
702 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
703 	.accept		   = sock_no_accept,		/* a do nothing	*/
704 	.getname	   = inet6_getname,
705 	.poll		   = udp_poll,			/* ok		*/
706 	.ioctl		   = inet6_ioctl,		/* must change  */
707 	.gettstamp	   = sock_gettstamp,
708 	.listen		   = sock_no_listen,		/* ok		*/
709 	.shutdown	   = inet_shutdown,		/* ok		*/
710 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
711 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
712 	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
713 	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
714 	.read_skb	   = udp_read_skb,
715 	.mmap		   = sock_no_mmap,
716 	.set_peek_off	   = udp_set_peek_off,
717 #ifdef CONFIG_COMPAT
718 	.compat_ioctl	   = inet6_compat_ioctl,
719 #endif
720 };
721 
722 static const struct net_proto_family inet6_family_ops = {
723 	.family = PF_INET6,
724 	.create = inet6_create,
725 	.owner	= THIS_MODULE,
726 };
727 
728 int inet6_register_protosw(struct inet_protosw *p)
729 {
730 	struct list_head *lh;
731 	struct inet_protosw *answer;
732 	struct list_head *last_perm;
733 	int protocol = p->protocol;
734 	int ret;
735 
736 	spin_lock_bh(&inetsw6_lock);
737 
738 	ret = -EINVAL;
739 	if (p->type >= SOCK_MAX)
740 		goto out_illegal;
741 
742 	/* If we are trying to override a permanent protocol, bail. */
743 	answer = NULL;
744 	ret = -EPERM;
745 	last_perm = &inetsw6[p->type];
746 	list_for_each(lh, &inetsw6[p->type]) {
747 		answer = list_entry(lh, struct inet_protosw, list);
748 
749 		/* Check only the non-wild match. */
750 		if (INET_PROTOSW_PERMANENT & answer->flags) {
751 			if (protocol == answer->protocol)
752 				break;
753 			last_perm = lh;
754 		}
755 
756 		answer = NULL;
757 	}
758 	if (answer)
759 		goto out_permanent;
760 
761 	/* Add the new entry after the last permanent entry if any, so that
762 	 * the new entry does not override a permanent entry when matched with
763 	 * a wild-card protocol. But it is allowed to override any existing
764 	 * non-permanent entry.  This means that when we remove this entry, the
765 	 * system automatically returns to the old behavior.
766 	 */
767 	list_add_rcu(&p->list, last_perm);
768 	ret = 0;
769 out:
770 	spin_unlock_bh(&inetsw6_lock);
771 	return ret;
772 
773 out_permanent:
774 	pr_err("Attempt to override permanent protocol %d\n", protocol);
775 	goto out;
776 
777 out_illegal:
778 	pr_err("Ignoring attempt to register invalid socket type %d\n",
779 	       p->type);
780 	goto out;
781 }
782 EXPORT_SYMBOL(inet6_register_protosw);
783 
784 void
785 inet6_unregister_protosw(struct inet_protosw *p)
786 {
787 	if (INET_PROTOSW_PERMANENT & p->flags) {
788 		pr_err("Attempt to unregister permanent protocol %d\n",
789 		       p->protocol);
790 	} else {
791 		spin_lock_bh(&inetsw6_lock);
792 		list_del_rcu(&p->list);
793 		spin_unlock_bh(&inetsw6_lock);
794 
795 		synchronize_net();
796 	}
797 }
798 EXPORT_SYMBOL(inet6_unregister_protosw);
799 
800 int inet6_sk_rebuild_header(struct sock *sk)
801 {
802 	struct ipv6_pinfo *np = inet6_sk(sk);
803 	struct inet_sock *inet = inet_sk(sk);
804 	struct in6_addr *final_p;
805 	struct dst_entry *dst;
806 	struct flowi6 *fl6;
807 
808 	dst = __sk_dst_check(sk, np->dst_cookie);
809 	if (dst)
810 		return 0;
811 
812 	fl6 = &inet->cork.fl.u.ip6;
813 	memset(fl6, 0, sizeof(*fl6));
814 	fl6->flowi6_proto = sk->sk_protocol;
815 	fl6->daddr = sk->sk_v6_daddr;
816 	fl6->saddr = np->saddr;
817 	fl6->flowlabel = np->flow_label;
818 	fl6->flowi6_oif = sk->sk_bound_dev_if;
819 	fl6->flowi6_mark = sk->sk_mark;
820 	fl6->fl6_dport = inet->inet_dport;
821 	fl6->fl6_sport = inet->inet_sport;
822 	fl6->flowi6_uid = sk_uid(sk);
823 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
824 
825 	rcu_read_lock();
826 	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &np->final);
827 	rcu_read_unlock();
828 
829 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
830 	if (IS_ERR(dst)) {
831 		sk->sk_route_caps = 0;
832 		WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst));
833 		return PTR_ERR(dst);
834 	}
835 
836 	ip6_dst_store(sk, dst, false, false);
837 	return 0;
838 }
839 
840 bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
841 		       const struct inet6_skb_parm *opt)
842 {
843 	const struct ipv6_pinfo *np = inet6_sk(sk);
844 
845 	if (np->rxopt.all) {
846 		if (((opt->flags & IP6SKB_HOPBYHOP) &&
847 		     (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
848 		    (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
849 		     np->rxopt.bits.rxflow) ||
850 		    (opt->srcrt && (np->rxopt.bits.srcrt ||
851 		     np->rxopt.bits.osrcrt)) ||
852 		    ((opt->dst1 || opt->dst0) &&
853 		     (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
854 			return true;
855 	}
856 	return false;
857 }
858 
859 static struct packet_type ipv6_packet_type __read_mostly = {
860 	.type = cpu_to_be16(ETH_P_IPV6),
861 	.func = ipv6_rcv,
862 	.list_func = ipv6_list_rcv,
863 };
864 
865 static int __init ipv6_packet_init(void)
866 {
867 	dev_add_pack(&ipv6_packet_type);
868 	return 0;
869 }
870 
871 static void ipv6_packet_cleanup(void)
872 {
873 	dev_remove_pack(&ipv6_packet_type);
874 }
875 
876 static int __net_init ipv6_init_mibs(struct net *net)
877 {
878 	int i;
879 
880 	net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
881 	if (!net->mib.udp_stats_in6)
882 		return -ENOMEM;
883 
884 	net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
885 	if (!net->mib.ipv6_statistics)
886 		goto err_ip_mib;
887 
888 	for_each_possible_cpu(i) {
889 		struct ipstats_mib *af_inet6_stats;
890 		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
891 		u64_stats_init(&af_inet6_stats->syncp);
892 	}
893 
894 	net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
895 	if (!net->mib.icmpv6_statistics)
896 		goto err_icmp_mib;
897 
898 	net->mib.icmpv6msg_statistics = kzalloc_obj(struct icmpv6msg_mib);
899 	if (!net->mib.icmpv6msg_statistics)
900 		goto err_icmpmsg_mib;
901 	return 0;
902 
903 err_icmpmsg_mib:
904 	free_percpu(net->mib.icmpv6_statistics);
905 err_icmp_mib:
906 	free_percpu(net->mib.ipv6_statistics);
907 err_ip_mib:
908 	free_percpu(net->mib.udp_stats_in6);
909 	return -ENOMEM;
910 }
911 
912 static void ipv6_cleanup_mibs(struct net *net)
913 {
914 	free_percpu(net->mib.udp_stats_in6);
915 	free_percpu(net->mib.ipv6_statistics);
916 	free_percpu(net->mib.icmpv6_statistics);
917 	kfree(net->mib.icmpv6msg_statistics);
918 }
919 
920 static int __net_init inet6_net_init(struct net *net)
921 {
922 	int err = 0;
923 
924 	net->ipv6.sysctl.bindv6only = 0;
925 	net->ipv6.sysctl.icmpv6_time = HZ / 10;
926 	net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
927 	net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
928 	net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
929 	net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0;
930 	net->ipv6.sysctl.icmpv6_errors_extension_mask = 0;
931 
932 	/* By default, rate limit error messages.
933 	 * Except for pmtu discovery, it would break it.
934 	 * proc_do_large_bitmap needs pointer to the bitmap.
935 	 */
936 	bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
937 	bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
938 	net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
939 
940 	net->ipv6.sysctl.flowlabel_consistency = 1;
941 	net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
942 	net->ipv6.sysctl.idgen_retries = 3;
943 	net->ipv6.sysctl.idgen_delay = 1 * HZ;
944 	net->ipv6.sysctl.flowlabel_state_ranges = 0;
945 	net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
946 	net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
947 	net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
948 	net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
949 	net->ipv6.sysctl.fib_notify_on_flag_change = 0;
950 	atomic_set(&net->ipv6.fib6_sernum, 1);
951 
952 	net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
953 	net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
954 
955 	err = ipv6_init_mibs(net);
956 	if (err)
957 		return err;
958 #ifdef CONFIG_PROC_FS
959 	err = udp6_proc_init(net);
960 	if (err)
961 		goto out;
962 	err = tcp6_proc_init(net);
963 	if (err)
964 		goto proc_tcp6_fail;
965 	err = ac6_proc_init(net);
966 	if (err)
967 		goto proc_ac6_fail;
968 #endif
969 	return err;
970 
971 #ifdef CONFIG_PROC_FS
972 proc_ac6_fail:
973 	tcp6_proc_exit(net);
974 proc_tcp6_fail:
975 	udp6_proc_exit(net);
976 out:
977 	ipv6_cleanup_mibs(net);
978 	return err;
979 #endif
980 }
981 
982 static void __net_exit inet6_net_exit(struct net *net)
983 {
984 #ifdef CONFIG_PROC_FS
985 	udp6_proc_exit(net);
986 	tcp6_proc_exit(net);
987 	ac6_proc_exit(net);
988 #endif
989 	ipv6_cleanup_mibs(net);
990 }
991 
992 static struct pernet_operations inet6_net_ops = {
993 	.init = inet6_net_init,
994 	.exit = inet6_net_exit,
995 };
996 
997 static int __init inet6_init(void)
998 {
999 	struct list_head *r;
1000 	int err = 0;
1001 
1002 	sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
1003 
1004 	/* Register the socket-side information for inet6_create.  */
1005 	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
1006 		INIT_LIST_HEAD(r);
1007 
1008 	raw_hashinfo_init(&raw_v6_hashinfo);
1009 
1010 	if (disable_ipv6_mod) {
1011 		pr_info("Loaded, but administratively disabled, reboot required to enable\n");
1012 		goto out;
1013 	}
1014 
1015 	err = proto_register(&tcpv6_prot, 1);
1016 	if (err)
1017 		goto out;
1018 
1019 	err = proto_register(&udpv6_prot, 1);
1020 	if (err)
1021 		goto out_unregister_tcp_proto;
1022 
1023 	err = proto_register(&rawv6_prot, 1);
1024 	if (err)
1025 		goto out_unregister_udp_proto;
1026 
1027 	err = proto_register(&pingv6_prot, 1);
1028 	if (err)
1029 		goto out_unregister_raw_proto;
1030 
1031 	/* We MUST register RAW sockets before we create the ICMP6,
1032 	 * IGMP6, or NDISC control sockets.
1033 	 */
1034 	err = rawv6_init();
1035 	if (err)
1036 		goto out_unregister_ping_proto;
1037 
1038 	/* Register the family here so that the init calls below will
1039 	 * be able to create sockets. (?? is this dangerous ??)
1040 	 */
1041 	err = sock_register(&inet6_family_ops);
1042 	if (err)
1043 		goto out_sock_register_fail;
1044 
1045 	/*
1046 	 *	ipngwg API draft makes clear that the correct semantics
1047 	 *	for TCP and UDP is to consider one TCP and UDP instance
1048 	 *	in a host available by both INET and INET6 APIs and
1049 	 *	able to communicate via both network protocols.
1050 	 */
1051 
1052 	err = register_pernet_subsys(&inet6_net_ops);
1053 	if (err)
1054 		goto register_pernet_fail;
1055 	err = ip6_mr_init();
1056 	if (err)
1057 		goto ipmr_fail;
1058 	err = icmpv6_init();
1059 	if (err)
1060 		goto icmp_fail;
1061 	err = ndisc_init();
1062 	if (err)
1063 		goto ndisc_fail;
1064 	err = igmp6_init();
1065 	if (err)
1066 		goto igmp_fail;
1067 
1068 	/* Create /proc/foo6 entries. */
1069 #ifdef CONFIG_PROC_FS
1070 	err = -ENOMEM;
1071 	if (raw6_proc_init())
1072 		goto proc_raw6_fail;
1073 	if (ipv6_misc_proc_init())
1074 		goto proc_misc6_fail;
1075 	if (if6_proc_init())
1076 		goto proc_if6_fail;
1077 #endif
1078 	err = ip6_route_init();
1079 	if (err)
1080 		goto ip6_route_fail;
1081 	err = ndisc_late_init();
1082 	if (err)
1083 		goto ndisc_late_fail;
1084 	err = ip6_flowlabel_init();
1085 	if (err)
1086 		goto ip6_flowlabel_fail;
1087 	err = ipv6_anycast_init();
1088 	if (err)
1089 		goto ipv6_anycast_fail;
1090 	err = addrconf_init();
1091 	if (err)
1092 		goto addrconf_fail;
1093 
1094 	/* Init v6 extension headers. */
1095 	err = ipv6_exthdrs_init();
1096 	if (err)
1097 		goto ipv6_exthdrs_fail;
1098 
1099 	err = ipv6_frag_init();
1100 	if (err)
1101 		goto ipv6_frag_fail;
1102 
1103 	/* Init v6 transport protocols. */
1104 	err = udpv6_init();
1105 	if (err)
1106 		goto udpv6_fail;
1107 
1108 	err = udpv6_offload_init();
1109 	if (err)
1110 		goto udpv6_offload_fail;
1111 
1112 	err = tcpv6_init();
1113 	if (err)
1114 		goto tcpv6_fail;
1115 
1116 	err = ipv6_packet_init();
1117 	if (err)
1118 		goto ipv6_packet_fail;
1119 
1120 	err = pingv6_init();
1121 	if (err)
1122 		goto pingv6_fail;
1123 
1124 	err = calipso_init();
1125 	if (err)
1126 		goto calipso_fail;
1127 
1128 	err = seg6_init();
1129 	if (err)
1130 		goto seg6_fail;
1131 
1132 	err = rpl_init();
1133 	if (err)
1134 		goto rpl_fail;
1135 
1136 	err = ioam6_init();
1137 	if (err)
1138 		goto ioam6_fail;
1139 
1140 	err = igmp6_late_init();
1141 	if (err)
1142 		goto igmp6_late_err;
1143 
1144 #ifdef CONFIG_SYSCTL
1145 	err = ipv6_sysctl_register();
1146 	if (err)
1147 		goto sysctl_fail;
1148 #endif
1149 
1150 out:
1151 	return err;
1152 
1153 #ifdef CONFIG_SYSCTL
1154 sysctl_fail:
1155 	igmp6_late_cleanup();
1156 #endif
1157 igmp6_late_err:
1158 	ioam6_exit();
1159 ioam6_fail:
1160 	rpl_exit();
1161 rpl_fail:
1162 	seg6_exit();
1163 seg6_fail:
1164 	calipso_exit();
1165 calipso_fail:
1166 	pingv6_exit();
1167 pingv6_fail:
1168 	ipv6_packet_cleanup();
1169 ipv6_packet_fail:
1170 	tcpv6_exit();
1171 tcpv6_fail:
1172 	udpv6_offload_exit();
1173 udpv6_offload_fail:
1174 	udpv6_exit();
1175 udpv6_fail:
1176 	ipv6_frag_exit();
1177 ipv6_frag_fail:
1178 	ipv6_exthdrs_exit();
1179 ipv6_exthdrs_fail:
1180 	addrconf_cleanup();
1181 addrconf_fail:
1182 	ipv6_anycast_cleanup();
1183 ipv6_anycast_fail:
1184 	ip6_flowlabel_cleanup();
1185 ip6_flowlabel_fail:
1186 	ndisc_late_cleanup();
1187 ndisc_late_fail:
1188 	ip6_route_cleanup();
1189 ip6_route_fail:
1190 #ifdef CONFIG_PROC_FS
1191 	if6_proc_exit();
1192 proc_if6_fail:
1193 	ipv6_misc_proc_exit();
1194 proc_misc6_fail:
1195 	raw6_proc_exit();
1196 proc_raw6_fail:
1197 #endif
1198 	igmp6_cleanup();
1199 igmp_fail:
1200 	ndisc_cleanup();
1201 ndisc_fail:
1202 	icmpv6_cleanup();
1203 icmp_fail:
1204 	ip6_mr_cleanup();
1205 ipmr_fail:
1206 	unregister_pernet_subsys(&inet6_net_ops);
1207 register_pernet_fail:
1208 	sock_unregister(PF_INET6);
1209 	rtnl_unregister_all(PF_INET6);
1210 out_sock_register_fail:
1211 	rawv6_exit();
1212 out_unregister_ping_proto:
1213 	proto_unregister(&pingv6_prot);
1214 out_unregister_raw_proto:
1215 	proto_unregister(&rawv6_prot);
1216 out_unregister_udp_proto:
1217 	proto_unregister(&udpv6_prot);
1218 out_unregister_tcp_proto:
1219 	proto_unregister(&tcpv6_prot);
1220 	goto out;
1221 }
1222 device_initcall(inet6_init);
1223