xref: /linux/net/ipv6/raw.c (revision 606d099cdd1080bbb50ea50dc52d98252f8f10a1)
1 /*
2  *	RAW sockets for IPv6
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Adapted from linux/net/ipv4/raw.c
9  *
10  *	$Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $
11  *
12  *	Fixes:
13  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
14  *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance)
15  *	Kazunori MIYAZAWA @USAGI:	change process style to use ip6_append_data
16  *
17  *	This program is free software; you can redistribute it and/or
18  *      modify it under the terms of the GNU General Public License
19  *      as published by the Free Software Foundation; either version
20  *      2 of the License, or (at your option) any later version.
21  */
22 
23 #include <linux/errno.h>
24 #include <linux/types.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/sched.h>
28 #include <linux/net.h>
29 #include <linux/in6.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/icmpv6.h>
33 #include <linux/netfilter.h>
34 #include <linux/netfilter_ipv6.h>
35 #include <linux/skbuff.h>
36 #include <asm/uaccess.h>
37 #include <asm/ioctls.h>
38 
39 #include <net/ip.h>
40 #include <net/sock.h>
41 #include <net/snmp.h>
42 
43 #include <net/ipv6.h>
44 #include <net/ndisc.h>
45 #include <net/protocol.h>
46 #include <net/ip6_route.h>
47 #include <net/ip6_checksum.h>
48 #include <net/addrconf.h>
49 #include <net/transp_v6.h>
50 #include <net/udp.h>
51 #include <net/inet_common.h>
52 #include <net/tcp_states.h>
53 #ifdef CONFIG_IPV6_MIP6
54 #include <net/mip6.h>
55 #endif
56 
57 #include <net/rawv6.h>
58 #include <net/xfrm.h>
59 
60 #include <linux/proc_fs.h>
61 #include <linux/seq_file.h>
62 
63 struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
64 DEFINE_RWLOCK(raw_v6_lock);
65 
66 static void raw_v6_hash(struct sock *sk)
67 {
68 	struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num &
69 						 (RAWV6_HTABLE_SIZE - 1)];
70 
71 	write_lock_bh(&raw_v6_lock);
72 	sk_add_node(sk, list);
73 	sock_prot_inc_use(sk->sk_prot);
74  	write_unlock_bh(&raw_v6_lock);
75 }
76 
77 static void raw_v6_unhash(struct sock *sk)
78 {
79  	write_lock_bh(&raw_v6_lock);
80 	if (sk_del_node_init(sk))
81 		sock_prot_dec_use(sk->sk_prot);
82 	write_unlock_bh(&raw_v6_lock);
83 }
84 
85 
86 /* Grumble... icmp and ip_input want to get at this... */
87 struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
88 			     struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
89 			     int dif)
90 {
91 	struct hlist_node *node;
92 	int is_multicast = ipv6_addr_is_multicast(loc_addr);
93 
94 	sk_for_each_from(sk, node)
95 		if (inet_sk(sk)->num == num) {
96 			struct ipv6_pinfo *np = inet6_sk(sk);
97 
98 			if (!ipv6_addr_any(&np->daddr) &&
99 			    !ipv6_addr_equal(&np->daddr, rmt_addr))
100 				continue;
101 
102 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
103 				continue;
104 
105 			if (!ipv6_addr_any(&np->rcv_saddr)) {
106 				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
107 					goto found;
108 				if (is_multicast &&
109 				    inet6_mc_check(sk, loc_addr, rmt_addr))
110 					goto found;
111 				continue;
112 			}
113 			goto found;
114 		}
115 	sk = NULL;
116 found:
117 	return sk;
118 }
119 
120 /*
121  *	0 - deliver
122  *	1 - block
123  */
124 static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
125 {
126 	struct icmp6hdr *icmph;
127 	struct raw6_sock *rp = raw6_sk(sk);
128 
129 	if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
130 		__u32 *data = &rp->filter.data[0];
131 		int bit_nr;
132 
133 		icmph = (struct icmp6hdr *) skb->data;
134 		bit_nr = icmph->icmp6_type;
135 
136 		return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
137 	}
138 	return 0;
139 }
140 
141 /*
142  *	demultiplex raw sockets.
143  *	(should consider queueing the skb in the sock receive_queue
144  *	without calling rawv6.c)
145  *
146  *	Caller owns SKB so we must make clones.
147  */
148 int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
149 {
150 	struct in6_addr *saddr;
151 	struct in6_addr *daddr;
152 	struct sock *sk;
153 	int delivered = 0;
154 	__u8 hash;
155 
156 	saddr = &skb->nh.ipv6h->saddr;
157 	daddr = saddr + 1;
158 
159 	hash = nexthdr & (MAX_INET_PROTOS - 1);
160 
161 	read_lock(&raw_v6_lock);
162 	sk = sk_head(&raw_v6_htable[hash]);
163 
164 	/*
165 	 *	The first socket found will be delivered after
166 	 *	delivery to transport protocols.
167 	 */
168 
169 	if (sk == NULL)
170 		goto out;
171 
172 	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
173 
174 	while (sk) {
175 		int filtered;
176 
177 		delivered = 1;
178 		switch (nexthdr) {
179 		case IPPROTO_ICMPV6:
180 			filtered = icmpv6_filter(sk, skb);
181 			break;
182 #ifdef CONFIG_IPV6_MIP6
183 		case IPPROTO_MH:
184 			/* XXX: To validate MH only once for each packet,
185 			 * this is placed here. It should be after checking
186 			 * xfrm policy, however it doesn't. The checking xfrm
187 			 * policy is placed in rawv6_rcv() because it is
188 			 * required for each socket.
189 			 */
190 			filtered = mip6_mh_filter(sk, skb);
191 			break;
192 #endif
193 		default:
194 			filtered = 0;
195 			break;
196 		}
197 
198 		if (filtered < 0)
199 			break;
200 		if (filtered == 0) {
201 			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
202 
203 			/* Not releasing hash table! */
204 			if (clone) {
205 				nf_reset(clone);
206 				rawv6_rcv(sk, clone);
207 			}
208 		}
209 		sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
210 				     IP6CB(skb)->iif);
211 	}
212 out:
213 	read_unlock(&raw_v6_lock);
214 	return delivered;
215 }
216 
217 /* This cleans up af_inet6 a bit. -DaveM */
218 static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
219 {
220 	struct inet_sock *inet = inet_sk(sk);
221 	struct ipv6_pinfo *np = inet6_sk(sk);
222 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
223 	__be32 v4addr = 0;
224 	int addr_type;
225 	int err;
226 
227 	if (addr_len < SIN6_LEN_RFC2133)
228 		return -EINVAL;
229 	addr_type = ipv6_addr_type(&addr->sin6_addr);
230 
231 	/* Raw sockets are IPv6 only */
232 	if (addr_type == IPV6_ADDR_MAPPED)
233 		return(-EADDRNOTAVAIL);
234 
235 	lock_sock(sk);
236 
237 	err = -EINVAL;
238 	if (sk->sk_state != TCP_CLOSE)
239 		goto out;
240 
241 	/* Check if the address belongs to the host. */
242 	if (addr_type != IPV6_ADDR_ANY) {
243 		struct net_device *dev = NULL;
244 
245 		if (addr_type & IPV6_ADDR_LINKLOCAL) {
246 			if (addr_len >= sizeof(struct sockaddr_in6) &&
247 			    addr->sin6_scope_id) {
248 				/* Override any existing binding, if another
249 				 * one is supplied by user.
250 				 */
251 				sk->sk_bound_dev_if = addr->sin6_scope_id;
252 			}
253 
254 			/* Binding to link-local address requires an interface */
255 			if (!sk->sk_bound_dev_if)
256 				goto out;
257 
258 			dev = dev_get_by_index(sk->sk_bound_dev_if);
259 			if (!dev) {
260 				err = -ENODEV;
261 				goto out;
262 			}
263 		}
264 
265 		/* ipv4 addr of the socket is invalid.  Only the
266 		 * unspecified and mapped address have a v4 equivalent.
267 		 */
268 		v4addr = LOOPBACK4_IPV6;
269 		if (!(addr_type & IPV6_ADDR_MULTICAST))	{
270 			err = -EADDRNOTAVAIL;
271 			if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
272 				if (dev)
273 					dev_put(dev);
274 				goto out;
275 			}
276 		}
277 		if (dev)
278 			dev_put(dev);
279 	}
280 
281 	inet->rcv_saddr = inet->saddr = v4addr;
282 	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
283 	if (!(addr_type & IPV6_ADDR_MULTICAST))
284 		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
285 	err = 0;
286 out:
287 	release_sock(sk);
288 	return err;
289 }
290 
291 void rawv6_err(struct sock *sk, struct sk_buff *skb,
292 	       struct inet6_skb_parm *opt,
293 	       int type, int code, int offset, __be32 info)
294 {
295 	struct inet_sock *inet = inet_sk(sk);
296 	struct ipv6_pinfo *np = inet6_sk(sk);
297 	int err;
298 	int harderr;
299 
300 	/* Report error on raw socket, if:
301 	   1. User requested recverr.
302 	   2. Socket is connected (otherwise the error indication
303 	      is useless without recverr and error is hard.
304 	 */
305 	if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
306 		return;
307 
308 	harderr = icmpv6_err_convert(type, code, &err);
309 	if (type == ICMPV6_PKT_TOOBIG)
310 		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
311 
312 	if (np->recverr) {
313 		u8 *payload = skb->data;
314 		if (!inet->hdrincl)
315 			payload += offset;
316 		ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
317 	}
318 
319 	if (np->recverr || harderr) {
320 		sk->sk_err = err;
321 		sk->sk_error_report(sk);
322 	}
323 }
324 
325 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
326 {
327 	if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
328 	    skb_checksum_complete(skb)) {
329 		/* FIXME: increment a raw6 drops counter here */
330 		kfree_skb(skb);
331 		return 0;
332 	}
333 
334 	/* Charge it to the socket. */
335 	if (sock_queue_rcv_skb(sk,skb)<0) {
336 		/* FIXME: increment a raw6 drops counter here */
337 		kfree_skb(skb);
338 		return 0;
339 	}
340 
341 	return 0;
342 }
343 
344 /*
345  *	This is next to useless...
346  *	if we demultiplex in network layer we don't need the extra call
347  *	just to queue the skb...
348  *	maybe we could have the network decide upon a hint if it
349  *	should call raw_rcv for demultiplexing
350  */
351 int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
352 {
353 	struct inet_sock *inet = inet_sk(sk);
354 	struct raw6_sock *rp = raw6_sk(sk);
355 
356         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
357                 kfree_skb(skb);
358                 return NET_RX_DROP;
359         }
360 
361 	if (!rp->checksum)
362 		skb->ip_summed = CHECKSUM_UNNECESSARY;
363 
364 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
365 		skb_postpull_rcsum(skb, skb->nh.raw,
366 		                   skb->h.raw - skb->nh.raw);
367 		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
368 				     &skb->nh.ipv6h->daddr,
369 				     skb->len, inet->num, skb->csum))
370 			skb->ip_summed = CHECKSUM_UNNECESSARY;
371 	}
372 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
373 		skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
374 					     &skb->nh.ipv6h->daddr,
375 					     skb->len, inet->num, 0));
376 
377 	if (inet->hdrincl) {
378 		if (skb_checksum_complete(skb)) {
379 			/* FIXME: increment a raw6 drops counter here */
380 			kfree_skb(skb);
381 			return 0;
382 		}
383 	}
384 
385 	rawv6_rcv_skb(sk, skb);
386 	return 0;
387 }
388 
389 
390 /*
391  *	This should be easy, if there is something there
392  *	we return it, otherwise we block.
393  */
394 
395 static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
396 		  struct msghdr *msg, size_t len,
397 		  int noblock, int flags, int *addr_len)
398 {
399 	struct ipv6_pinfo *np = inet6_sk(sk);
400 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
401 	struct sk_buff *skb;
402 	size_t copied;
403 	int err;
404 
405 	if (flags & MSG_OOB)
406 		return -EOPNOTSUPP;
407 
408 	if (addr_len)
409 		*addr_len=sizeof(*sin6);
410 
411 	if (flags & MSG_ERRQUEUE)
412 		return ipv6_recv_error(sk, msg, len);
413 
414 	skb = skb_recv_datagram(sk, flags, noblock, &err);
415 	if (!skb)
416 		goto out;
417 
418 	copied = skb->len;
419   	if (copied > len) {
420   		copied = len;
421   		msg->msg_flags |= MSG_TRUNC;
422   	}
423 
424 	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
425 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
426 	} else if (msg->msg_flags&MSG_TRUNC) {
427 		if (__skb_checksum_complete(skb))
428 			goto csum_copy_err;
429 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
430 	} else {
431 		err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
432 		if (err == -EINVAL)
433 			goto csum_copy_err;
434 	}
435 	if (err)
436 		goto out_free;
437 
438 	/* Copy the address. */
439 	if (sin6) {
440 		sin6->sin6_family = AF_INET6;
441 		sin6->sin6_port = 0;
442 		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
443 		sin6->sin6_flowinfo = 0;
444 		sin6->sin6_scope_id = 0;
445 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
446 			sin6->sin6_scope_id = IP6CB(skb)->iif;
447 	}
448 
449 	sock_recv_timestamp(msg, sk, skb);
450 
451 	if (np->rxopt.all)
452 		datagram_recv_ctl(sk, msg, skb);
453 
454 	err = copied;
455 	if (flags & MSG_TRUNC)
456 		err = skb->len;
457 
458 out_free:
459 	skb_free_datagram(sk, skb);
460 out:
461 	return err;
462 
463 csum_copy_err:
464 	skb_kill_datagram(sk, skb, flags);
465 
466 	/* Error for blocking case is chosen to masquerade
467 	   as some normal condition.
468 	 */
469 	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
470 	/* FIXME: increment a raw6 drops counter here */
471 	goto out;
472 }
473 
474 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
475 				     struct raw6_sock *rp)
476 {
477 	struct sk_buff *skb;
478 	int err = 0;
479 	int offset;
480 	int len;
481 	int total_len;
482 	__wsum tmp_csum;
483 	__sum16 csum;
484 
485 	if (!rp->checksum)
486 		goto send;
487 
488 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
489 		goto out;
490 
491 	offset = rp->offset;
492 	total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
493 	if (offset >= total_len - 1) {
494 		err = -EINVAL;
495 		ip6_flush_pending_frames(sk);
496 		goto out;
497 	}
498 
499 	/* should be check HW csum miyazawa */
500 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
501 		/*
502 		 * Only one fragment on the socket.
503 		 */
504 		tmp_csum = skb->csum;
505 	} else {
506 		struct sk_buff *csum_skb = NULL;
507 		tmp_csum = 0;
508 
509 		skb_queue_walk(&sk->sk_write_queue, skb) {
510 			tmp_csum = csum_add(tmp_csum, skb->csum);
511 
512 			if (csum_skb)
513 				continue;
514 
515 			len = skb->len - (skb->h.raw - skb->data);
516 			if (offset >= len) {
517 				offset -= len;
518 				continue;
519 			}
520 
521 			csum_skb = skb;
522 		}
523 
524 		skb = csum_skb;
525 	}
526 
527 	offset += skb->h.raw - skb->data;
528 	if (skb_copy_bits(skb, offset, &csum, 2))
529 		BUG();
530 
531 	/* in case cksum was not initialized */
532 	if (unlikely(csum))
533 		tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
534 
535 	csum = csum_ipv6_magic(&fl->fl6_src,
536 				   &fl->fl6_dst,
537 				   total_len, fl->proto, tmp_csum);
538 
539 	if (csum == 0 && fl->proto == IPPROTO_UDP)
540 		csum = CSUM_MANGLED_0;
541 
542 	if (skb_store_bits(skb, offset, &csum, 2))
543 		BUG();
544 
545 send:
546 	err = ip6_push_pending_frames(sk);
547 out:
548 	return err;
549 }
550 
551 static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
552 			struct flowi *fl, struct rt6_info *rt,
553 			unsigned int flags)
554 {
555 	struct ipv6_pinfo *np = inet6_sk(sk);
556 	struct ipv6hdr *iph;
557 	struct sk_buff *skb;
558 	unsigned int hh_len;
559 	int err;
560 
561 	if (length > rt->u.dst.dev->mtu) {
562 		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
563 		return -EMSGSIZE;
564 	}
565 	if (flags&MSG_PROBE)
566 		goto out;
567 
568 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
569 
570 	skb = sock_alloc_send_skb(sk, length+hh_len+15,
571 				  flags&MSG_DONTWAIT, &err);
572 	if (skb == NULL)
573 		goto error;
574 	skb_reserve(skb, hh_len);
575 
576 	skb->priority = sk->sk_priority;
577 	skb->dst = dst_clone(&rt->u.dst);
578 
579 	skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
580 
581 	skb->ip_summed = CHECKSUM_NONE;
582 
583 	skb->h.raw = skb->nh.raw;
584 	err = memcpy_fromiovecend((void *)iph, from, 0, length);
585 	if (err)
586 		goto error_fault;
587 
588 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
589 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
590 		      dst_output);
591 	if (err > 0)
592 		err = np->recverr ? net_xmit_errno(err) : 0;
593 	if (err)
594 		goto error;
595 out:
596 	return 0;
597 
598 error_fault:
599 	err = -EFAULT;
600 	kfree_skb(skb);
601 error:
602 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
603 	return err;
604 }
605 
606 static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
607 {
608 	struct iovec *iov;
609 	u8 __user *type = NULL;
610 	u8 __user *code = NULL;
611 #ifdef CONFIG_IPV6_MIP6
612 	u8 len = 0;
613 #endif
614 	int probed = 0;
615 	int i;
616 
617 	if (!msg->msg_iov)
618 		return 0;
619 
620 	for (i = 0; i < msg->msg_iovlen; i++) {
621 		iov = &msg->msg_iov[i];
622 		if (!iov)
623 			continue;
624 
625 		switch (fl->proto) {
626 		case IPPROTO_ICMPV6:
627 			/* check if one-byte field is readable or not. */
628 			if (iov->iov_base && iov->iov_len < 1)
629 				break;
630 
631 			if (!type) {
632 				type = iov->iov_base;
633 				/* check if code field is readable or not. */
634 				if (iov->iov_len > 1)
635 					code = type + 1;
636 			} else if (!code)
637 				code = iov->iov_base;
638 
639 			if (type && code) {
640 				if (get_user(fl->fl_icmp_type, type) ||
641 				    get_user(fl->fl_icmp_code, code))
642 					return -EFAULT;
643 				probed = 1;
644 			}
645 			break;
646 #ifdef CONFIG_IPV6_MIP6
647 		case IPPROTO_MH:
648 			if (iov->iov_base && iov->iov_len < 1)
649 				break;
650 			/* check if type field is readable or not. */
651 			if (iov->iov_len > 2 - len) {
652 				u8 __user *p = iov->iov_base;
653 				if (get_user(fl->fl_mh_type, &p[2 - len]))
654 					return -EFAULT;
655 				probed = 1;
656 			} else
657 				len += iov->iov_len;
658 
659 			break;
660 #endif
661 		default:
662 			probed = 1;
663 			break;
664 		}
665 		if (probed)
666 			break;
667 	}
668 	return 0;
669 }
670 
671 static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
672 		   struct msghdr *msg, size_t len)
673 {
674 	struct ipv6_txoptions opt_space;
675 	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
676 	struct in6_addr *daddr, *final_p = NULL, final;
677 	struct inet_sock *inet = inet_sk(sk);
678 	struct ipv6_pinfo *np = inet6_sk(sk);
679 	struct raw6_sock *rp = raw6_sk(sk);
680 	struct ipv6_txoptions *opt = NULL;
681 	struct ip6_flowlabel *flowlabel = NULL;
682 	struct dst_entry *dst = NULL;
683 	struct flowi fl;
684 	int addr_len = msg->msg_namelen;
685 	int hlimit = -1;
686 	int tclass = -1;
687 	u16 proto;
688 	int err;
689 
690 	/* Rough check on arithmetic overflow,
691 	   better check is made in ip6_build_xmit
692 	 */
693 	if (len < 0)
694 		return -EMSGSIZE;
695 
696 	/* Mirror BSD error message compatibility */
697 	if (msg->msg_flags & MSG_OOB)
698 		return -EOPNOTSUPP;
699 
700 	/*
701 	 *	Get and verify the address.
702 	 */
703 	memset(&fl, 0, sizeof(fl));
704 
705 	if (sin6) {
706 		if (addr_len < SIN6_LEN_RFC2133)
707 			return -EINVAL;
708 
709 		if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
710 			return(-EAFNOSUPPORT);
711 
712 		/* port is the proto value [0..255] carried in nexthdr */
713 		proto = ntohs(sin6->sin6_port);
714 
715 		if (!proto)
716 			proto = inet->num;
717 		else if (proto != inet->num)
718 			return(-EINVAL);
719 
720 		if (proto > 255)
721 			return(-EINVAL);
722 
723 		daddr = &sin6->sin6_addr;
724 		if (np->sndflow) {
725 			fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
726 			if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
727 				flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
728 				if (flowlabel == NULL)
729 					return -EINVAL;
730 				daddr = &flowlabel->dst;
731 			}
732 		}
733 
734 		/*
735 		 * Otherwise it will be difficult to maintain
736 		 * sk->sk_dst_cache.
737 		 */
738 		if (sk->sk_state == TCP_ESTABLISHED &&
739 		    ipv6_addr_equal(daddr, &np->daddr))
740 			daddr = &np->daddr;
741 
742 		if (addr_len >= sizeof(struct sockaddr_in6) &&
743 		    sin6->sin6_scope_id &&
744 		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
745 			fl.oif = sin6->sin6_scope_id;
746 	} else {
747 		if (sk->sk_state != TCP_ESTABLISHED)
748 			return -EDESTADDRREQ;
749 
750 		proto = inet->num;
751 		daddr = &np->daddr;
752 		fl.fl6_flowlabel = np->flow_label;
753 	}
754 
755 	if (ipv6_addr_any(daddr)) {
756 		/*
757 		 * unspecified destination address
758 		 * treated as error... is this correct ?
759 		 */
760 		fl6_sock_release(flowlabel);
761 		return(-EINVAL);
762 	}
763 
764 	if (fl.oif == 0)
765 		fl.oif = sk->sk_bound_dev_if;
766 
767 	if (msg->msg_controllen) {
768 		opt = &opt_space;
769 		memset(opt, 0, sizeof(struct ipv6_txoptions));
770 		opt->tot_len = sizeof(struct ipv6_txoptions);
771 
772 		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
773 		if (err < 0) {
774 			fl6_sock_release(flowlabel);
775 			return err;
776 		}
777 		if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
778 			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
779 			if (flowlabel == NULL)
780 				return -EINVAL;
781 		}
782 		if (!(opt->opt_nflen|opt->opt_flen))
783 			opt = NULL;
784 	}
785 	if (opt == NULL)
786 		opt = np->opt;
787 	if (flowlabel)
788 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
789 	opt = ipv6_fixup_options(&opt_space, opt);
790 
791 	fl.proto = proto;
792 	err = rawv6_probe_proto_opt(&fl, msg);
793 	if (err)
794 		goto out;
795 
796 	ipv6_addr_copy(&fl.fl6_dst, daddr);
797 	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
798 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
799 
800 	/* merge ip6_build_xmit from ip6_output */
801 	if (opt && opt->srcrt) {
802 		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
803 		ipv6_addr_copy(&final, &fl.fl6_dst);
804 		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
805 		final_p = &final;
806 	}
807 
808 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
809 		fl.oif = np->mcast_oif;
810 	security_sk_classify_flow(sk, &fl);
811 
812 	err = ip6_dst_lookup(sk, &dst, &fl);
813 	if (err)
814 		goto out;
815 	if (final_p)
816 		ipv6_addr_copy(&fl.fl6_dst, final_p);
817 
818 	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
819 		goto out;
820 
821 	if (hlimit < 0) {
822 		if (ipv6_addr_is_multicast(&fl.fl6_dst))
823 			hlimit = np->mcast_hops;
824 		else
825 			hlimit = np->hop_limit;
826 		if (hlimit < 0)
827 			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
828 		if (hlimit < 0)
829 			hlimit = ipv6_get_hoplimit(dst->dev);
830 	}
831 
832 	if (tclass < 0) {
833 		tclass = np->tclass;
834 		if (tclass < 0)
835 			tclass = 0;
836 	}
837 
838 	if (msg->msg_flags&MSG_CONFIRM)
839 		goto do_confirm;
840 
841 back_from_confirm:
842 	if (inet->hdrincl) {
843 		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
844 	} else {
845 		lock_sock(sk);
846 		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
847 			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
848 			msg->msg_flags);
849 
850 		if (err)
851 			ip6_flush_pending_frames(sk);
852 		else if (!(msg->msg_flags & MSG_MORE))
853 			err = rawv6_push_pending_frames(sk, &fl, rp);
854 	}
855 done:
856 	dst_release(dst);
857 	if (!inet->hdrincl)
858 		release_sock(sk);
859 out:
860 	fl6_sock_release(flowlabel);
861 	return err<0?err:len;
862 do_confirm:
863 	dst_confirm(dst);
864 	if (!(msg->msg_flags & MSG_PROBE) || len)
865 		goto back_from_confirm;
866 	err = 0;
867 	goto done;
868 }
869 
870 static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
871 			       char __user *optval, int optlen)
872 {
873 	switch (optname) {
874 	case ICMPV6_FILTER:
875 		if (optlen > sizeof(struct icmp6_filter))
876 			optlen = sizeof(struct icmp6_filter);
877 		if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
878 			return -EFAULT;
879 		return 0;
880 	default:
881 		return -ENOPROTOOPT;
882 	};
883 
884 	return 0;
885 }
886 
887 static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
888 			       char __user *optval, int __user *optlen)
889 {
890 	int len;
891 
892 	switch (optname) {
893 	case ICMPV6_FILTER:
894 		if (get_user(len, optlen))
895 			return -EFAULT;
896 		if (len < 0)
897 			return -EINVAL;
898 		if (len > sizeof(struct icmp6_filter))
899 			len = sizeof(struct icmp6_filter);
900 		if (put_user(len, optlen))
901 			return -EFAULT;
902 		if (copy_to_user(optval, &raw6_sk(sk)->filter, len))
903 			return -EFAULT;
904 		return 0;
905 	default:
906 		return -ENOPROTOOPT;
907 	};
908 
909 	return 0;
910 }
911 
912 
913 static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
914 			    char __user *optval, int optlen)
915 {
916 	struct raw6_sock *rp = raw6_sk(sk);
917 	int val;
918 
919   	if (get_user(val, (int __user *)optval))
920 		return -EFAULT;
921 
922 	switch (optname) {
923 		case IPV6_CHECKSUM:
924 			/* You may get strange result with a positive odd offset;
925 			   RFC2292bis agrees with me. */
926 			if (val > 0 && (val&1))
927 				return(-EINVAL);
928 			if (val < 0) {
929 				rp->checksum = 0;
930 			} else {
931 				rp->checksum = 1;
932 				rp->offset = val;
933 			}
934 
935 			return 0;
936 			break;
937 
938 		default:
939 			return(-ENOPROTOOPT);
940 	}
941 }
942 
943 static int rawv6_setsockopt(struct sock *sk, int level, int optname,
944 			  char __user *optval, int optlen)
945 {
946 	switch(level) {
947 		case SOL_RAW:
948 			break;
949 
950 		case SOL_ICMPV6:
951 			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
952 				return -EOPNOTSUPP;
953 			return rawv6_seticmpfilter(sk, level, optname, optval,
954 						   optlen);
955 		case SOL_IPV6:
956 			if (optname == IPV6_CHECKSUM)
957 				break;
958 		default:
959 			return ipv6_setsockopt(sk, level, optname, optval,
960 					       optlen);
961 	};
962 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
963 }
964 
965 #ifdef CONFIG_COMPAT
966 static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
967 				   char __user *optval, int optlen)
968 {
969 	switch (level) {
970 	case SOL_RAW:
971 		break;
972 	case SOL_ICMPV6:
973 		if (inet_sk(sk)->num != IPPROTO_ICMPV6)
974 			return -EOPNOTSUPP;
975 		return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
976 	case SOL_IPV6:
977 		if (optname == IPV6_CHECKSUM)
978 			break;
979 	default:
980 		return compat_ipv6_setsockopt(sk, level, optname,
981 					      optval, optlen);
982 	};
983 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
984 }
985 #endif
986 
987 static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
988 			    char __user *optval, int __user *optlen)
989 {
990 	struct raw6_sock *rp = raw6_sk(sk);
991 	int val, len;
992 
993 	if (get_user(len,optlen))
994 		return -EFAULT;
995 
996 	switch (optname) {
997 	case IPV6_CHECKSUM:
998 		if (rp->checksum == 0)
999 			val = -1;
1000 		else
1001 			val = rp->offset;
1002 		break;
1003 
1004 	default:
1005 		return -ENOPROTOOPT;
1006 	}
1007 
1008 	len = min_t(unsigned int, sizeof(int), len);
1009 
1010 	if (put_user(len, optlen))
1011 		return -EFAULT;
1012 	if (copy_to_user(optval,&val,len))
1013 		return -EFAULT;
1014 	return 0;
1015 }
1016 
1017 static int rawv6_getsockopt(struct sock *sk, int level, int optname,
1018 			  char __user *optval, int __user *optlen)
1019 {
1020 	switch(level) {
1021 		case SOL_RAW:
1022 			break;
1023 
1024 		case SOL_ICMPV6:
1025 			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
1026 				return -EOPNOTSUPP;
1027 			return rawv6_geticmpfilter(sk, level, optname, optval,
1028 						   optlen);
1029 		case SOL_IPV6:
1030 			if (optname == IPV6_CHECKSUM)
1031 				break;
1032 		default:
1033 			return ipv6_getsockopt(sk, level, optname, optval,
1034 					       optlen);
1035 	};
1036 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1037 }
1038 
1039 #ifdef CONFIG_COMPAT
1040 static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
1041 				   char __user *optval, int __user *optlen)
1042 {
1043 	switch (level) {
1044 	case SOL_RAW:
1045 		break;
1046 	case SOL_ICMPV6:
1047 		if (inet_sk(sk)->num != IPPROTO_ICMPV6)
1048 			return -EOPNOTSUPP;
1049 		return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
1050 	case SOL_IPV6:
1051 		if (optname == IPV6_CHECKSUM)
1052 			break;
1053 	default:
1054 		return compat_ipv6_getsockopt(sk, level, optname,
1055 					      optval, optlen);
1056 	};
1057 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1058 }
1059 #endif
1060 
1061 static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
1062 {
1063 	switch(cmd) {
1064 		case SIOCOUTQ:
1065 		{
1066 			int amount = atomic_read(&sk->sk_wmem_alloc);
1067 			return put_user(amount, (int __user *)arg);
1068 		}
1069 		case SIOCINQ:
1070 		{
1071 			struct sk_buff *skb;
1072 			int amount = 0;
1073 
1074 			spin_lock_bh(&sk->sk_receive_queue.lock);
1075 			skb = skb_peek(&sk->sk_receive_queue);
1076 			if (skb != NULL)
1077 				amount = skb->tail - skb->h.raw;
1078 			spin_unlock_bh(&sk->sk_receive_queue.lock);
1079 			return put_user(amount, (int __user *)arg);
1080 		}
1081 
1082 		default:
1083 			return -ENOIOCTLCMD;
1084 	}
1085 }
1086 
1087 static void rawv6_close(struct sock *sk, long timeout)
1088 {
1089 	if (inet_sk(sk)->num == IPPROTO_RAW)
1090 		ip6_ra_control(sk, -1, NULL);
1091 
1092 	sk_common_release(sk);
1093 }
1094 
1095 static int rawv6_init_sk(struct sock *sk)
1096 {
1097 	if (inet_sk(sk)->num == IPPROTO_ICMPV6) {
1098 		struct raw6_sock *rp = raw6_sk(sk);
1099 		rp->checksum = 1;
1100 		rp->offset   = 2;
1101 	}
1102 	return(0);
1103 }
1104 
1105 struct proto rawv6_prot = {
1106 	.name		   = "RAWv6",
1107 	.owner		   = THIS_MODULE,
1108 	.close		   = rawv6_close,
1109 	.connect	   = ip6_datagram_connect,
1110 	.disconnect	   = udp_disconnect,
1111 	.ioctl		   = rawv6_ioctl,
1112 	.init		   = rawv6_init_sk,
1113 	.destroy	   = inet6_destroy_sock,
1114 	.setsockopt	   = rawv6_setsockopt,
1115 	.getsockopt	   = rawv6_getsockopt,
1116 	.sendmsg	   = rawv6_sendmsg,
1117 	.recvmsg	   = rawv6_recvmsg,
1118 	.bind		   = rawv6_bind,
1119 	.backlog_rcv	   = rawv6_rcv_skb,
1120 	.hash		   = raw_v6_hash,
1121 	.unhash		   = raw_v6_unhash,
1122 	.obj_size	   = sizeof(struct raw6_sock),
1123 #ifdef CONFIG_COMPAT
1124 	.compat_setsockopt = compat_rawv6_setsockopt,
1125 	.compat_getsockopt = compat_rawv6_getsockopt,
1126 #endif
1127 };
1128 
1129 #ifdef CONFIG_PROC_FS
1130 struct raw6_iter_state {
1131 	int bucket;
1132 };
1133 
1134 #define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private)
1135 
1136 static struct sock *raw6_get_first(struct seq_file *seq)
1137 {
1138 	struct sock *sk;
1139 	struct hlist_node *node;
1140 	struct raw6_iter_state* state = raw6_seq_private(seq);
1141 
1142 	for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket)
1143 		sk_for_each(sk, node, &raw_v6_htable[state->bucket])
1144 			if (sk->sk_family == PF_INET6)
1145 				goto out;
1146 	sk = NULL;
1147 out:
1148 	return sk;
1149 }
1150 
1151 static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk)
1152 {
1153 	struct raw6_iter_state* state = raw6_seq_private(seq);
1154 
1155 	do {
1156 		sk = sk_next(sk);
1157 try_again:
1158 		;
1159 	} while (sk && sk->sk_family != PF_INET6);
1160 
1161 	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
1162 		sk = sk_head(&raw_v6_htable[state->bucket]);
1163 		goto try_again;
1164 	}
1165 	return sk;
1166 }
1167 
1168 static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
1169 {
1170 	struct sock *sk = raw6_get_first(seq);
1171 	if (sk)
1172 		while (pos && (sk = raw6_get_next(seq, sk)) != NULL)
1173 			--pos;
1174 	return pos ? NULL : sk;
1175 }
1176 
1177 static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
1178 {
1179 	read_lock(&raw_v6_lock);
1180 	return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1181 }
1182 
1183 static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1184 {
1185 	struct sock *sk;
1186 
1187 	if (v == SEQ_START_TOKEN)
1188 		sk = raw6_get_first(seq);
1189 	else
1190 		sk = raw6_get_next(seq, v);
1191 	++*pos;
1192 	return sk;
1193 }
1194 
1195 static void raw6_seq_stop(struct seq_file *seq, void *v)
1196 {
1197 	read_unlock(&raw_v6_lock);
1198 }
1199 
1200 static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1201 {
1202 	struct ipv6_pinfo *np = inet6_sk(sp);
1203 	struct in6_addr *dest, *src;
1204 	__u16 destp, srcp;
1205 
1206 	dest  = &np->daddr;
1207 	src   = &np->rcv_saddr;
1208 	destp = 0;
1209 	srcp  = inet_sk(sp)->num;
1210 	seq_printf(seq,
1211 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1212 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
1213 		   i,
1214 		   src->s6_addr32[0], src->s6_addr32[1],
1215 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1216 		   dest->s6_addr32[0], dest->s6_addr32[1],
1217 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1218 		   sp->sk_state,
1219 		   atomic_read(&sp->sk_wmem_alloc),
1220 		   atomic_read(&sp->sk_rmem_alloc),
1221 		   0, 0L, 0,
1222 		   sock_i_uid(sp), 0,
1223 		   sock_i_ino(sp),
1224 		   atomic_read(&sp->sk_refcnt), sp);
1225 }
1226 
1227 static int raw6_seq_show(struct seq_file *seq, void *v)
1228 {
1229 	if (v == SEQ_START_TOKEN)
1230 		seq_printf(seq,
1231 			   "  sl  "
1232 			   "local_address                         "
1233 			   "remote_address                        "
1234 			   "st tx_queue rx_queue tr tm->when retrnsmt"
1235 			   "   uid  timeout inode\n");
1236 	else
1237 		raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket);
1238 	return 0;
1239 }
1240 
1241 static struct seq_operations raw6_seq_ops = {
1242 	.start =	raw6_seq_start,
1243 	.next =		raw6_seq_next,
1244 	.stop =		raw6_seq_stop,
1245 	.show =		raw6_seq_show,
1246 };
1247 
1248 static int raw6_seq_open(struct inode *inode, struct file *file)
1249 {
1250 	struct seq_file *seq;
1251 	int rc = -ENOMEM;
1252 	struct raw6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
1253 	if (!s)
1254 		goto out;
1255 	rc = seq_open(file, &raw6_seq_ops);
1256 	if (rc)
1257 		goto out_kfree;
1258 	seq = file->private_data;
1259 	seq->private = s;
1260 out:
1261 	return rc;
1262 out_kfree:
1263 	kfree(s);
1264 	goto out;
1265 }
1266 
1267 static struct file_operations raw6_seq_fops = {
1268 	.owner =	THIS_MODULE,
1269 	.open =		raw6_seq_open,
1270 	.read =		seq_read,
1271 	.llseek =	seq_lseek,
1272 	.release =	seq_release_private,
1273 };
1274 
1275 int __init raw6_proc_init(void)
1276 {
1277 	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
1278 		return -ENOMEM;
1279 	return 0;
1280 }
1281 
1282 void raw6_proc_exit(void)
1283 {
1284 	proc_net_remove("raw6");
1285 }
1286 #endif	/* CONFIG_PROC_FS */
1287