xref: /linux/net/ipv4/ip_output.c (revision 20d0021394c1b070bf04b22c5bc8fdb437edd4c5)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		The Internet Protocol (IP) output module.
7  *
8  * Version:	$Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Donald Becker, <becker@super.org>
13  *		Alan Cox, <Alan.Cox@linux.org>
14  *		Richard Underwood
15  *		Stefan Becker, <stefanb@yello.ping.de>
16  *		Jorge Cwik, <jorge@laser.satlink.net>
17  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18  *		Hirokazu Takahashi, <taka@valinux.co.jp>
19  *
20  *	See ip_input.c for original log
21  *
22  *	Fixes:
23  *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
24  *		Mike Kilburn	:	htons() missing in ip_build_xmit.
25  *		Bradford Johnson:	Fix faulty handling of some frames when
26  *					no route is found.
27  *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
28  *					(in case if packet not accepted by
29  *					output firewall rules)
30  *		Mike McLagan	:	Routing by source
31  *		Alexey Kuznetsov:	use new route cache
32  *		Andi Kleen:		Fix broken PMTU recovery and remove
33  *					some redundant tests.
34  *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
35  *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
36  *		Andi Kleen	:	Split fast and slow ip_build_xmit path
37  *					for decreased register pressure on x86
38  *					and more readibility.
39  *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
40  *					silently drop skb instead of failing with -EPERM.
41  *		Detlev Wengorz	:	Copy protocol for fragments.
42  *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
43  *					datagrams.
44  *		Hirokazu Takahashi:	sendfile() on UDP works now.
45  */
46 
47 #include <asm/uaccess.h>
48 #include <asm/system.h>
49 #include <linux/module.h>
50 #include <linux/types.h>
51 #include <linux/kernel.h>
52 #include <linux/sched.h>
53 #include <linux/mm.h>
54 #include <linux/string.h>
55 #include <linux/errno.h>
56 #include <linux/config.h>
57 
58 #include <linux/socket.h>
59 #include <linux/sockios.h>
60 #include <linux/in.h>
61 #include <linux/inet.h>
62 #include <linux/netdevice.h>
63 #include <linux/etherdevice.h>
64 #include <linux/proc_fs.h>
65 #include <linux/stat.h>
66 #include <linux/init.h>
67 
68 #include <net/snmp.h>
69 #include <net/ip.h>
70 #include <net/protocol.h>
71 #include <net/route.h>
72 #include <net/tcp.h>
73 #include <net/udp.h>
74 #include <linux/skbuff.h>
75 #include <net/sock.h>
76 #include <net/arp.h>
77 #include <net/icmp.h>
78 #include <net/raw.h>
79 #include <net/checksum.h>
80 #include <net/inetpeer.h>
81 #include <net/checksum.h>
82 #include <linux/igmp.h>
83 #include <linux/netfilter_ipv4.h>
84 #include <linux/netfilter_bridge.h>
85 #include <linux/mroute.h>
86 #include <linux/netlink.h>
87 
88 /*
89  *      Shall we try to damage output packets if routing dev changes?
90  */
91 
92 int sysctl_ip_dynaddr;
93 int sysctl_ip_default_ttl = IPDEFTTL;
94 
95 /* Generate a checksum for an outgoing IP datagram. */
96 __inline__ void ip_send_check(struct iphdr *iph)
97 {
98 	iph->check = 0;
99 	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
100 }
101 
102 /* dev_loopback_xmit for use with netfilter. */
103 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
104 {
105 	newskb->mac.raw = newskb->data;
106 	__skb_pull(newskb, newskb->nh.raw - newskb->data);
107 	newskb->pkt_type = PACKET_LOOPBACK;
108 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
109 	BUG_TRAP(newskb->dst);
110 	netif_rx(newskb);
111 	return 0;
112 }
113 
114 static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
115 {
116 	int ttl = inet->uc_ttl;
117 
118 	if (ttl < 0)
119 		ttl = dst_metric(dst, RTAX_HOPLIMIT);
120 	return ttl;
121 }
122 
123 /*
124  *		Add an ip header to a skbuff and send it out.
125  *
126  */
127 int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
128 			  u32 saddr, u32 daddr, struct ip_options *opt)
129 {
130 	struct inet_sock *inet = inet_sk(sk);
131 	struct rtable *rt = (struct rtable *)skb->dst;
132 	struct iphdr *iph;
133 
134 	/* Build the IP header. */
135 	if (opt)
136 		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
137 	else
138 		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
139 
140 	iph->version  = 4;
141 	iph->ihl      = 5;
142 	iph->tos      = inet->tos;
143 	if (ip_dont_fragment(sk, &rt->u.dst))
144 		iph->frag_off = htons(IP_DF);
145 	else
146 		iph->frag_off = 0;
147 	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
148 	iph->daddr    = rt->rt_dst;
149 	iph->saddr    = rt->rt_src;
150 	iph->protocol = sk->sk_protocol;
151 	iph->tot_len  = htons(skb->len);
152 	ip_select_ident(iph, &rt->u.dst, sk);
153 	skb->nh.iph   = iph;
154 
155 	if (opt && opt->optlen) {
156 		iph->ihl += opt->optlen>>2;
157 		ip_options_build(skb, opt, daddr, rt, 0);
158 	}
159 	ip_send_check(iph);
160 
161 	skb->priority = sk->sk_priority;
162 
163 	/* Send it out. */
164 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
165 		       dst_output);
166 }
167 
168 static inline int ip_finish_output2(struct sk_buff *skb)
169 {
170 	struct dst_entry *dst = skb->dst;
171 	struct hh_cache *hh = dst->hh;
172 	struct net_device *dev = dst->dev;
173 	int hh_len = LL_RESERVED_SPACE(dev);
174 
175 	/* Be paranoid, rather than too clever. */
176 	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
177 		struct sk_buff *skb2;
178 
179 		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
180 		if (skb2 == NULL) {
181 			kfree_skb(skb);
182 			return -ENOMEM;
183 		}
184 		if (skb->sk)
185 			skb_set_owner_w(skb2, skb->sk);
186 		kfree_skb(skb);
187 		skb = skb2;
188 	}
189 
190 	if (hh) {
191 		int hh_alen;
192 
193 		read_lock_bh(&hh->hh_lock);
194 		hh_alen = HH_DATA_ALIGN(hh->hh_len);
195   		memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
196 		read_unlock_bh(&hh->hh_lock);
197 	        skb_push(skb, hh->hh_len);
198 		return hh->hh_output(skb);
199 	} else if (dst->neighbour)
200 		return dst->neighbour->output(skb);
201 
202 	if (net_ratelimit())
203 		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
204 	kfree_skb(skb);
205 	return -EINVAL;
206 }
207 
208 int ip_finish_output(struct sk_buff *skb)
209 {
210 	struct net_device *dev = skb->dst->dev;
211 
212 	skb->dev = dev;
213 	skb->protocol = htons(ETH_P_IP);
214 
215 	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
216 		       ip_finish_output2);
217 }
218 
219 int ip_mc_output(struct sk_buff *skb)
220 {
221 	struct sock *sk = skb->sk;
222 	struct rtable *rt = (struct rtable*)skb->dst;
223 	struct net_device *dev = rt->u.dst.dev;
224 
225 	/*
226 	 *	If the indicated interface is up and running, send the packet.
227 	 */
228 	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
229 
230 	skb->dev = dev;
231 	skb->protocol = htons(ETH_P_IP);
232 
233 	/*
234 	 *	Multicasts are looped back for other local users
235 	 */
236 
237 	if (rt->rt_flags&RTCF_MULTICAST) {
238 		if ((!sk || inet_sk(sk)->mc_loop)
239 #ifdef CONFIG_IP_MROUTE
240 		/* Small optimization: do not loopback not local frames,
241 		   which returned after forwarding; they will be  dropped
242 		   by ip_mr_input in any case.
243 		   Note, that local frames are looped back to be delivered
244 		   to local recipients.
245 
246 		   This check is duplicated in ip_mr_input at the moment.
247 		 */
248 		    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
249 #endif
250 		) {
251 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
252 			if (newskb)
253 				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
254 					newskb->dev,
255 					ip_dev_loopback_xmit);
256 		}
257 
258 		/* Multicasts with ttl 0 must not go beyond the host */
259 
260 		if (skb->nh.iph->ttl == 0) {
261 			kfree_skb(skb);
262 			return 0;
263 		}
264 	}
265 
266 	if (rt->rt_flags&RTCF_BROADCAST) {
267 		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
268 		if (newskb)
269 			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
270 				newskb->dev, ip_dev_loopback_xmit);
271 	}
272 
273 	if (skb->len > dst_mtu(&rt->u.dst))
274 		return ip_fragment(skb, ip_finish_output);
275 	else
276 		return ip_finish_output(skb);
277 }
278 
279 int ip_output(struct sk_buff *skb)
280 {
281 	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
282 
283 	if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)
284 		return ip_fragment(skb, ip_finish_output);
285 	else
286 		return ip_finish_output(skb);
287 }
288 
289 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
290 {
291 	struct sock *sk = skb->sk;
292 	struct inet_sock *inet = inet_sk(sk);
293 	struct ip_options *opt = inet->opt;
294 	struct rtable *rt;
295 	struct iphdr *iph;
296 
297 	/* Skip all of this if the packet is already routed,
298 	 * f.e. by something like SCTP.
299 	 */
300 	rt = (struct rtable *) skb->dst;
301 	if (rt != NULL)
302 		goto packet_routed;
303 
304 	/* Make sure we can route this packet. */
305 	rt = (struct rtable *)__sk_dst_check(sk, 0);
306 	if (rt == NULL) {
307 		u32 daddr;
308 
309 		/* Use correct destination address if we have options. */
310 		daddr = inet->daddr;
311 		if(opt && opt->srr)
312 			daddr = opt->faddr;
313 
314 		{
315 			struct flowi fl = { .oif = sk->sk_bound_dev_if,
316 					    .nl_u = { .ip4_u =
317 						      { .daddr = daddr,
318 							.saddr = inet->saddr,
319 							.tos = RT_CONN_FLAGS(sk) } },
320 					    .proto = sk->sk_protocol,
321 					    .uli_u = { .ports =
322 						       { .sport = inet->sport,
323 							 .dport = inet->dport } } };
324 
325 			/* If this fails, retransmit mechanism of transport layer will
326 			 * keep trying until route appears or the connection times
327 			 * itself out.
328 			 */
329 			if (ip_route_output_flow(&rt, &fl, sk, 0))
330 				goto no_route;
331 		}
332 		__sk_dst_set(sk, &rt->u.dst);
333 		tcp_v4_setup_caps(sk, &rt->u.dst);
334 	}
335 	skb->dst = dst_clone(&rt->u.dst);
336 
337 packet_routed:
338 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
339 		goto no_route;
340 
341 	/* OK, we know where to send it, allocate and build IP header. */
342 	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
343 	*((__u16 *)iph)	= htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
344 	iph->tot_len = htons(skb->len);
345 	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
346 		iph->frag_off = htons(IP_DF);
347 	else
348 		iph->frag_off = 0;
349 	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
350 	iph->protocol = sk->sk_protocol;
351 	iph->saddr    = rt->rt_src;
352 	iph->daddr    = rt->rt_dst;
353 	skb->nh.iph   = iph;
354 	/* Transport layer set skb->h.foo itself. */
355 
356 	if (opt && opt->optlen) {
357 		iph->ihl += opt->optlen >> 2;
358 		ip_options_build(skb, opt, inet->daddr, rt, 0);
359 	}
360 
361 	ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
362 
363 	/* Add an IP checksum. */
364 	ip_send_check(iph);
365 
366 	skb->priority = sk->sk_priority;
367 
368 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
369 		       dst_output);
370 
371 no_route:
372 	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
373 	kfree_skb(skb);
374 	return -EHOSTUNREACH;
375 }
376 
377 
378 static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
379 {
380 	to->pkt_type = from->pkt_type;
381 	to->priority = from->priority;
382 	to->protocol = from->protocol;
383 	dst_release(to->dst);
384 	to->dst = dst_clone(from->dst);
385 	to->dev = from->dev;
386 
387 	/* Copy the flags to each fragment. */
388 	IPCB(to)->flags = IPCB(from)->flags;
389 
390 #ifdef CONFIG_NET_SCHED
391 	to->tc_index = from->tc_index;
392 #endif
393 #ifdef CONFIG_NETFILTER
394 	to->nfmark = from->nfmark;
395 	to->nfcache = from->nfcache;
396 	/* Connection association is same as pre-frag packet */
397 	nf_conntrack_put(to->nfct);
398 	to->nfct = from->nfct;
399 	nf_conntrack_get(to->nfct);
400 	to->nfctinfo = from->nfctinfo;
401 #ifdef CONFIG_BRIDGE_NETFILTER
402 	nf_bridge_put(to->nf_bridge);
403 	to->nf_bridge = from->nf_bridge;
404 	nf_bridge_get(to->nf_bridge);
405 #endif
406 #endif
407 }
408 
409 /*
410  *	This IP datagram is too large to be sent in one piece.  Break it up into
411  *	smaller pieces (each of size equal to IP header plus
412  *	a block of the data of the original IP data part) that will yet fit in a
413  *	single device frame, and queue such a frame for sending.
414  */
415 
416 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
417 {
418 	struct iphdr *iph;
419 	int raw = 0;
420 	int ptr;
421 	struct net_device *dev;
422 	struct sk_buff *skb2;
423 	unsigned int mtu, hlen, left, len, ll_rs;
424 	int offset;
425 	int not_last_frag;
426 	struct rtable *rt = (struct rtable*)skb->dst;
427 	int err = 0;
428 
429 	dev = rt->u.dst.dev;
430 
431 	/*
432 	 *	Point into the IP datagram header.
433 	 */
434 
435 	iph = skb->nh.iph;
436 
437 	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
438 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
439 			  htonl(dst_mtu(&rt->u.dst)));
440 		kfree_skb(skb);
441 		return -EMSGSIZE;
442 	}
443 
444 	/*
445 	 *	Setup starting values.
446 	 */
447 
448 	hlen = iph->ihl * 4;
449 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
450 
451 	/* When frag_list is given, use it. First, check its validity:
452 	 * some transformers could create wrong frag_list or break existing
453 	 * one, it is not prohibited. In this case fall back to copying.
454 	 *
455 	 * LATER: this step can be merged to real generation of fragments,
456 	 * we can switch to copy when see the first bad fragment.
457 	 */
458 	if (skb_shinfo(skb)->frag_list) {
459 		struct sk_buff *frag;
460 		int first_len = skb_pagelen(skb);
461 
462 		if (first_len - hlen > mtu ||
463 		    ((first_len - hlen) & 7) ||
464 		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
465 		    skb_cloned(skb))
466 			goto slow_path;
467 
468 		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
469 			/* Correct geometry. */
470 			if (frag->len > mtu ||
471 			    ((frag->len & 7) && frag->next) ||
472 			    skb_headroom(frag) < hlen)
473 			    goto slow_path;
474 
475 			/* Partially cloned skb? */
476 			if (skb_shared(frag))
477 				goto slow_path;
478 
479 			BUG_ON(frag->sk);
480 			if (skb->sk) {
481 				sock_hold(skb->sk);
482 				frag->sk = skb->sk;
483 				frag->destructor = sock_wfree;
484 				skb->truesize -= frag->truesize;
485 			}
486 		}
487 
488 		/* Everything is OK. Generate! */
489 
490 		err = 0;
491 		offset = 0;
492 		frag = skb_shinfo(skb)->frag_list;
493 		skb_shinfo(skb)->frag_list = NULL;
494 		skb->data_len = first_len - skb_headlen(skb);
495 		skb->len = first_len;
496 		iph->tot_len = htons(first_len);
497 		iph->frag_off = htons(IP_MF);
498 		ip_send_check(iph);
499 
500 		for (;;) {
501 			/* Prepare header of the next frame,
502 			 * before previous one went down. */
503 			if (frag) {
504 				frag->ip_summed = CHECKSUM_NONE;
505 				frag->h.raw = frag->data;
506 				frag->nh.raw = __skb_push(frag, hlen);
507 				memcpy(frag->nh.raw, iph, hlen);
508 				iph = frag->nh.iph;
509 				iph->tot_len = htons(frag->len);
510 				ip_copy_metadata(frag, skb);
511 				if (offset == 0)
512 					ip_options_fragment(frag);
513 				offset += skb->len - hlen;
514 				iph->frag_off = htons(offset>>3);
515 				if (frag->next != NULL)
516 					iph->frag_off |= htons(IP_MF);
517 				/* Ready, complete checksum */
518 				ip_send_check(iph);
519 			}
520 
521 			err = output(skb);
522 
523 			if (err || !frag)
524 				break;
525 
526 			skb = frag;
527 			frag = skb->next;
528 			skb->next = NULL;
529 		}
530 
531 		if (err == 0) {
532 			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
533 			return 0;
534 		}
535 
536 		while (frag) {
537 			skb = frag->next;
538 			kfree_skb(frag);
539 			frag = skb;
540 		}
541 		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
542 		return err;
543 	}
544 
545 slow_path:
546 	left = skb->len - hlen;		/* Space per frame */
547 	ptr = raw + hlen;		/* Where to start from */
548 
549 #ifdef CONFIG_BRIDGE_NETFILTER
550 	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
551 	 * we need to make room for the encapsulating header */
552 	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
553 	mtu -= nf_bridge_pad(skb);
554 #else
555 	ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
556 #endif
557 	/*
558 	 *	Fragment the datagram.
559 	 */
560 
561 	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
562 	not_last_frag = iph->frag_off & htons(IP_MF);
563 
564 	/*
565 	 *	Keep copying data until we run out.
566 	 */
567 
568 	while(left > 0)	{
569 		len = left;
570 		/* IF: it doesn't fit, use 'mtu' - the data space left */
571 		if (len > mtu)
572 			len = mtu;
573 		/* IF: we are not sending upto and including the packet end
574 		   then align the next start on an eight byte boundary */
575 		if (len < left)	{
576 			len &= ~7;
577 		}
578 		/*
579 		 *	Allocate buffer.
580 		 */
581 
582 		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
583 			NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
584 			err = -ENOMEM;
585 			goto fail;
586 		}
587 
588 		/*
589 		 *	Set up data on packet
590 		 */
591 
592 		ip_copy_metadata(skb2, skb);
593 		skb_reserve(skb2, ll_rs);
594 		skb_put(skb2, len + hlen);
595 		skb2->nh.raw = skb2->data;
596 		skb2->h.raw = skb2->data + hlen;
597 
598 		/*
599 		 *	Charge the memory for the fragment to any owner
600 		 *	it might possess
601 		 */
602 
603 		if (skb->sk)
604 			skb_set_owner_w(skb2, skb->sk);
605 
606 		/*
607 		 *	Copy the packet header into the new buffer.
608 		 */
609 
610 		memcpy(skb2->nh.raw, skb->data, hlen);
611 
612 		/*
613 		 *	Copy a block of the IP datagram.
614 		 */
615 		if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
616 			BUG();
617 		left -= len;
618 
619 		/*
620 		 *	Fill in the new header fields.
621 		 */
622 		iph = skb2->nh.iph;
623 		iph->frag_off = htons((offset >> 3));
624 
625 		/* ANK: dirty, but effective trick. Upgrade options only if
626 		 * the segment to be fragmented was THE FIRST (otherwise,
627 		 * options are already fixed) and make it ONCE
628 		 * on the initial skb, so that all the following fragments
629 		 * will inherit fixed options.
630 		 */
631 		if (offset == 0)
632 			ip_options_fragment(skb);
633 
634 		/*
635 		 *	Added AC : If we are fragmenting a fragment that's not the
636 		 *		   last fragment then keep MF on each bit
637 		 */
638 		if (left > 0 || not_last_frag)
639 			iph->frag_off |= htons(IP_MF);
640 		ptr += len;
641 		offset += len;
642 
643 		/*
644 		 *	Put this fragment into the sending queue.
645 		 */
646 
647 		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
648 
649 		iph->tot_len = htons(len + hlen);
650 
651 		ip_send_check(iph);
652 
653 		err = output(skb2);
654 		if (err)
655 			goto fail;
656 	}
657 	kfree_skb(skb);
658 	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
659 	return err;
660 
661 fail:
662 	kfree_skb(skb);
663 	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
664 	return err;
665 }
666 
667 int
668 ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
669 {
670 	struct iovec *iov = from;
671 
672 	if (skb->ip_summed == CHECKSUM_HW) {
673 		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
674 			return -EFAULT;
675 	} else {
676 		unsigned int csum = 0;
677 		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
678 			return -EFAULT;
679 		skb->csum = csum_block_add(skb->csum, csum, odd);
680 	}
681 	return 0;
682 }
683 
684 static inline unsigned int
685 csum_page(struct page *page, int offset, int copy)
686 {
687 	char *kaddr;
688 	unsigned int csum;
689 	kaddr = kmap(page);
690 	csum = csum_partial(kaddr + offset, copy, 0);
691 	kunmap(page);
692 	return csum;
693 }
694 
695 /*
696  *	ip_append_data() and ip_append_page() can make one large IP datagram
697  *	from many pieces of data. Each pieces will be holded on the socket
698  *	until ip_push_pending_frames() is called. Each piece can be a page
699  *	or non-page data.
700  *
701  *	Not only UDP, other transport protocols - e.g. raw sockets - can use
702  *	this interface potentially.
703  *
704  *	LATER: length must be adjusted by pad at tail, when it is required.
705  */
706 int ip_append_data(struct sock *sk,
707 		   int getfrag(void *from, char *to, int offset, int len,
708 			       int odd, struct sk_buff *skb),
709 		   void *from, int length, int transhdrlen,
710 		   struct ipcm_cookie *ipc, struct rtable *rt,
711 		   unsigned int flags)
712 {
713 	struct inet_sock *inet = inet_sk(sk);
714 	struct sk_buff *skb;
715 
716 	struct ip_options *opt = NULL;
717 	int hh_len;
718 	int exthdrlen;
719 	int mtu;
720 	int copy;
721 	int err;
722 	int offset = 0;
723 	unsigned int maxfraglen, fragheaderlen;
724 	int csummode = CHECKSUM_NONE;
725 
726 	if (flags&MSG_PROBE)
727 		return 0;
728 
729 	if (skb_queue_empty(&sk->sk_write_queue)) {
730 		/*
731 		 * setup for corking.
732 		 */
733 		opt = ipc->opt;
734 		if (opt) {
735 			if (inet->cork.opt == NULL) {
736 				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
737 				if (unlikely(inet->cork.opt == NULL))
738 					return -ENOBUFS;
739 			}
740 			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
741 			inet->cork.flags |= IPCORK_OPT;
742 			inet->cork.addr = ipc->addr;
743 		}
744 		dst_hold(&rt->u.dst);
745 		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
746 		inet->cork.rt = rt;
747 		inet->cork.length = 0;
748 		sk->sk_sndmsg_page = NULL;
749 		sk->sk_sndmsg_off = 0;
750 		if ((exthdrlen = rt->u.dst.header_len) != 0) {
751 			length += exthdrlen;
752 			transhdrlen += exthdrlen;
753 		}
754 	} else {
755 		rt = inet->cork.rt;
756 		if (inet->cork.flags & IPCORK_OPT)
757 			opt = inet->cork.opt;
758 
759 		transhdrlen = 0;
760 		exthdrlen = 0;
761 		mtu = inet->cork.fragsize;
762 	}
763 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
764 
765 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
766 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
767 
768 	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
769 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
770 		return -EMSGSIZE;
771 	}
772 
773 	/*
774 	 * transhdrlen > 0 means that this is the first fragment and we wish
775 	 * it won't be fragmented in the future.
776 	 */
777 	if (transhdrlen &&
778 	    length + fragheaderlen <= mtu &&
779 	    rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
780 	    !exthdrlen)
781 		csummode = CHECKSUM_HW;
782 
783 	inet->cork.length += length;
784 
785 	/* So, what's going on in the loop below?
786 	 *
787 	 * We use calculated fragment length to generate chained skb,
788 	 * each of segments is IP fragment ready for sending to network after
789 	 * adding appropriate IP header.
790 	 */
791 
792 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
793 		goto alloc_new_skb;
794 
795 	while (length > 0) {
796 		/* Check if the remaining data fits into current packet. */
797 		copy = mtu - skb->len;
798 		if (copy < length)
799 			copy = maxfraglen - skb->len;
800 		if (copy <= 0) {
801 			char *data;
802 			unsigned int datalen;
803 			unsigned int fraglen;
804 			unsigned int fraggap;
805 			unsigned int alloclen;
806 			struct sk_buff *skb_prev;
807 alloc_new_skb:
808 			skb_prev = skb;
809 			if (skb_prev)
810 				fraggap = skb_prev->len - maxfraglen;
811 			else
812 				fraggap = 0;
813 
814 			/*
815 			 * If remaining data exceeds the mtu,
816 			 * we know we need more fragment(s).
817 			 */
818 			datalen = length + fraggap;
819 			if (datalen > mtu - fragheaderlen)
820 				datalen = maxfraglen - fragheaderlen;
821 			fraglen = datalen + fragheaderlen;
822 
823 			if ((flags & MSG_MORE) &&
824 			    !(rt->u.dst.dev->features&NETIF_F_SG))
825 				alloclen = mtu;
826 			else
827 				alloclen = datalen + fragheaderlen;
828 
829 			/* The last fragment gets additional space at tail.
830 			 * Note, with MSG_MORE we overallocate on fragments,
831 			 * because we have no idea what fragment will be
832 			 * the last.
833 			 */
834 			if (datalen == length)
835 				alloclen += rt->u.dst.trailer_len;
836 
837 			if (transhdrlen) {
838 				skb = sock_alloc_send_skb(sk,
839 						alloclen + hh_len + 15,
840 						(flags & MSG_DONTWAIT), &err);
841 			} else {
842 				skb = NULL;
843 				if (atomic_read(&sk->sk_wmem_alloc) <=
844 				    2 * sk->sk_sndbuf)
845 					skb = sock_wmalloc(sk,
846 							   alloclen + hh_len + 15, 1,
847 							   sk->sk_allocation);
848 				if (unlikely(skb == NULL))
849 					err = -ENOBUFS;
850 			}
851 			if (skb == NULL)
852 				goto error;
853 
854 			/*
855 			 *	Fill in the control structures
856 			 */
857 			skb->ip_summed = csummode;
858 			skb->csum = 0;
859 			skb_reserve(skb, hh_len);
860 
861 			/*
862 			 *	Find where to start putting bytes.
863 			 */
864 			data = skb_put(skb, fraglen);
865 			skb->nh.raw = data + exthdrlen;
866 			data += fragheaderlen;
867 			skb->h.raw = data + exthdrlen;
868 
869 			if (fraggap) {
870 				skb->csum = skb_copy_and_csum_bits(
871 					skb_prev, maxfraglen,
872 					data + transhdrlen, fraggap, 0);
873 				skb_prev->csum = csum_sub(skb_prev->csum,
874 							  skb->csum);
875 				data += fraggap;
876 				skb_trim(skb_prev, maxfraglen);
877 			}
878 
879 			copy = datalen - transhdrlen - fraggap;
880 			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
881 				err = -EFAULT;
882 				kfree_skb(skb);
883 				goto error;
884 			}
885 
886 			offset += copy;
887 			length -= datalen - fraggap;
888 			transhdrlen = 0;
889 			exthdrlen = 0;
890 			csummode = CHECKSUM_NONE;
891 
892 			/*
893 			 * Put the packet on the pending queue.
894 			 */
895 			__skb_queue_tail(&sk->sk_write_queue, skb);
896 			continue;
897 		}
898 
899 		if (copy > length)
900 			copy = length;
901 
902 		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
903 			unsigned int off;
904 
905 			off = skb->len;
906 			if (getfrag(from, skb_put(skb, copy),
907 					offset, copy, off, skb) < 0) {
908 				__skb_trim(skb, off);
909 				err = -EFAULT;
910 				goto error;
911 			}
912 		} else {
913 			int i = skb_shinfo(skb)->nr_frags;
914 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
915 			struct page *page = sk->sk_sndmsg_page;
916 			int off = sk->sk_sndmsg_off;
917 			unsigned int left;
918 
919 			if (page && (left = PAGE_SIZE - off) > 0) {
920 				if (copy >= left)
921 					copy = left;
922 				if (page != frag->page) {
923 					if (i == MAX_SKB_FRAGS) {
924 						err = -EMSGSIZE;
925 						goto error;
926 					}
927 					get_page(page);
928 	 				skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
929 					frag = &skb_shinfo(skb)->frags[i];
930 				}
931 			} else if (i < MAX_SKB_FRAGS) {
932 				if (copy > PAGE_SIZE)
933 					copy = PAGE_SIZE;
934 				page = alloc_pages(sk->sk_allocation, 0);
935 				if (page == NULL)  {
936 					err = -ENOMEM;
937 					goto error;
938 				}
939 				sk->sk_sndmsg_page = page;
940 				sk->sk_sndmsg_off = 0;
941 
942 				skb_fill_page_desc(skb, i, page, 0, 0);
943 				frag = &skb_shinfo(skb)->frags[i];
944 				skb->truesize += PAGE_SIZE;
945 				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
946 			} else {
947 				err = -EMSGSIZE;
948 				goto error;
949 			}
950 			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
951 				err = -EFAULT;
952 				goto error;
953 			}
954 			sk->sk_sndmsg_off += copy;
955 			frag->size += copy;
956 			skb->len += copy;
957 			skb->data_len += copy;
958 		}
959 		offset += copy;
960 		length -= copy;
961 	}
962 
963 	return 0;
964 
965 error:
966 	inet->cork.length -= length;
967 	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
968 	return err;
969 }
970 
971 ssize_t	ip_append_page(struct sock *sk, struct page *page,
972 		       int offset, size_t size, int flags)
973 {
974 	struct inet_sock *inet = inet_sk(sk);
975 	struct sk_buff *skb;
976 	struct rtable *rt;
977 	struct ip_options *opt = NULL;
978 	int hh_len;
979 	int mtu;
980 	int len;
981 	int err;
982 	unsigned int maxfraglen, fragheaderlen, fraggap;
983 
984 	if (inet->hdrincl)
985 		return -EPERM;
986 
987 	if (flags&MSG_PROBE)
988 		return 0;
989 
990 	if (skb_queue_empty(&sk->sk_write_queue))
991 		return -EINVAL;
992 
993 	rt = inet->cork.rt;
994 	if (inet->cork.flags & IPCORK_OPT)
995 		opt = inet->cork.opt;
996 
997 	if (!(rt->u.dst.dev->features&NETIF_F_SG))
998 		return -EOPNOTSUPP;
999 
1000 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1001 	mtu = inet->cork.fragsize;
1002 
1003 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1004 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1005 
1006 	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1007 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1008 		return -EMSGSIZE;
1009 	}
1010 
1011 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1012 		return -EINVAL;
1013 
1014 	inet->cork.length += size;
1015 
1016 	while (size > 0) {
1017 		int i;
1018 
1019 		/* Check if the remaining data fits into current packet. */
1020 		len = mtu - skb->len;
1021 		if (len < size)
1022 			len = maxfraglen - skb->len;
1023 		if (len <= 0) {
1024 			struct sk_buff *skb_prev;
1025 			char *data;
1026 			struct iphdr *iph;
1027 			int alloclen;
1028 
1029 			skb_prev = skb;
1030 			if (skb_prev)
1031 				fraggap = skb_prev->len - maxfraglen;
1032 			else
1033 				fraggap = 0;
1034 
1035 			alloclen = fragheaderlen + hh_len + fraggap + 15;
1036 			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1037 			if (unlikely(!skb)) {
1038 				err = -ENOBUFS;
1039 				goto error;
1040 			}
1041 
1042 			/*
1043 			 *	Fill in the control structures
1044 			 */
1045 			skb->ip_summed = CHECKSUM_NONE;
1046 			skb->csum = 0;
1047 			skb_reserve(skb, hh_len);
1048 
1049 			/*
1050 			 *	Find where to start putting bytes.
1051 			 */
1052 			data = skb_put(skb, fragheaderlen + fraggap);
1053 			skb->nh.iph = iph = (struct iphdr *)data;
1054 			data += fragheaderlen;
1055 			skb->h.raw = data;
1056 
1057 			if (fraggap) {
1058 				skb->csum = skb_copy_and_csum_bits(
1059 					skb_prev, maxfraglen,
1060 					data, fraggap, 0);
1061 				skb_prev->csum = csum_sub(skb_prev->csum,
1062 							  skb->csum);
1063 				skb_trim(skb_prev, maxfraglen);
1064 			}
1065 
1066 			/*
1067 			 * Put the packet on the pending queue.
1068 			 */
1069 			__skb_queue_tail(&sk->sk_write_queue, skb);
1070 			continue;
1071 		}
1072 
1073 		i = skb_shinfo(skb)->nr_frags;
1074 		if (len > size)
1075 			len = size;
1076 		if (skb_can_coalesce(skb, i, page, offset)) {
1077 			skb_shinfo(skb)->frags[i-1].size += len;
1078 		} else if (i < MAX_SKB_FRAGS) {
1079 			get_page(page);
1080 			skb_fill_page_desc(skb, i, page, offset, len);
1081 		} else {
1082 			err = -EMSGSIZE;
1083 			goto error;
1084 		}
1085 
1086 		if (skb->ip_summed == CHECKSUM_NONE) {
1087 			unsigned int csum;
1088 			csum = csum_page(page, offset, len);
1089 			skb->csum = csum_block_add(skb->csum, csum, skb->len);
1090 		}
1091 
1092 		skb->len += len;
1093 		skb->data_len += len;
1094 		offset += len;
1095 		size -= len;
1096 	}
1097 	return 0;
1098 
1099 error:
1100 	inet->cork.length -= size;
1101 	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1102 	return err;
1103 }
1104 
1105 /*
1106  *	Combined all pending IP fragments on the socket as one IP datagram
1107  *	and push them out.
1108  */
1109 int ip_push_pending_frames(struct sock *sk)
1110 {
1111 	struct sk_buff *skb, *tmp_skb;
1112 	struct sk_buff **tail_skb;
1113 	struct inet_sock *inet = inet_sk(sk);
1114 	struct ip_options *opt = NULL;
1115 	struct rtable *rt = inet->cork.rt;
1116 	struct iphdr *iph;
1117 	int df = 0;
1118 	__u8 ttl;
1119 	int err = 0;
1120 
1121 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1122 		goto out;
1123 	tail_skb = &(skb_shinfo(skb)->frag_list);
1124 
1125 	/* move skb->data to ip header from ext header */
1126 	if (skb->data < skb->nh.raw)
1127 		__skb_pull(skb, skb->nh.raw - skb->data);
1128 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1129 		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1130 		*tail_skb = tmp_skb;
1131 		tail_skb = &(tmp_skb->next);
1132 		skb->len += tmp_skb->len;
1133 		skb->data_len += tmp_skb->len;
1134 		skb->truesize += tmp_skb->truesize;
1135 		__sock_put(tmp_skb->sk);
1136 		tmp_skb->destructor = NULL;
1137 		tmp_skb->sk = NULL;
1138 	}
1139 
1140 	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1141 	 * to fragment the frame generated here. No matter, what transforms
1142 	 * how transforms change size of the packet, it will come out.
1143 	 */
1144 	if (inet->pmtudisc != IP_PMTUDISC_DO)
1145 		skb->local_df = 1;
1146 
1147 	/* DF bit is set when we want to see DF on outgoing frames.
1148 	 * If local_df is set too, we still allow to fragment this frame
1149 	 * locally. */
1150 	if (inet->pmtudisc == IP_PMTUDISC_DO ||
1151 	    (skb->len <= dst_mtu(&rt->u.dst) &&
1152 	     ip_dont_fragment(sk, &rt->u.dst)))
1153 		df = htons(IP_DF);
1154 
1155 	if (inet->cork.flags & IPCORK_OPT)
1156 		opt = inet->cork.opt;
1157 
1158 	if (rt->rt_type == RTN_MULTICAST)
1159 		ttl = inet->mc_ttl;
1160 	else
1161 		ttl = ip_select_ttl(inet, &rt->u.dst);
1162 
1163 	iph = (struct iphdr *)skb->data;
1164 	iph->version = 4;
1165 	iph->ihl = 5;
1166 	if (opt) {
1167 		iph->ihl += opt->optlen>>2;
1168 		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1169 	}
1170 	iph->tos = inet->tos;
1171 	iph->tot_len = htons(skb->len);
1172 	iph->frag_off = df;
1173 	if (!df) {
1174 		__ip_select_ident(iph, &rt->u.dst, 0);
1175 	} else {
1176 		iph->id = htons(inet->id++);
1177 	}
1178 	iph->ttl = ttl;
1179 	iph->protocol = sk->sk_protocol;
1180 	iph->saddr = rt->rt_src;
1181 	iph->daddr = rt->rt_dst;
1182 	ip_send_check(iph);
1183 
1184 	skb->priority = sk->sk_priority;
1185 	skb->dst = dst_clone(&rt->u.dst);
1186 
1187 	/* Netfilter gets whole the not fragmented skb. */
1188 	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1189 		      skb->dst->dev, dst_output);
1190 	if (err) {
1191 		if (err > 0)
1192 			err = inet->recverr ? net_xmit_errno(err) : 0;
1193 		if (err)
1194 			goto error;
1195 	}
1196 
1197 out:
1198 	inet->cork.flags &= ~IPCORK_OPT;
1199 	if (inet->cork.opt) {
1200 		kfree(inet->cork.opt);
1201 		inet->cork.opt = NULL;
1202 	}
1203 	if (inet->cork.rt) {
1204 		ip_rt_put(inet->cork.rt);
1205 		inet->cork.rt = NULL;
1206 	}
1207 	return err;
1208 
1209 error:
1210 	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1211 	goto out;
1212 }
1213 
1214 /*
1215  *	Throw away all pending data on the socket.
1216  */
1217 void ip_flush_pending_frames(struct sock *sk)
1218 {
1219 	struct inet_sock *inet = inet_sk(sk);
1220 	struct sk_buff *skb;
1221 
1222 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1223 		kfree_skb(skb);
1224 
1225 	inet->cork.flags &= ~IPCORK_OPT;
1226 	if (inet->cork.opt) {
1227 		kfree(inet->cork.opt);
1228 		inet->cork.opt = NULL;
1229 	}
1230 	if (inet->cork.rt) {
1231 		ip_rt_put(inet->cork.rt);
1232 		inet->cork.rt = NULL;
1233 	}
1234 }
1235 
1236 
1237 /*
1238  *	Fetch data from kernel space and fill in checksum if needed.
1239  */
1240 static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1241 			      int len, int odd, struct sk_buff *skb)
1242 {
1243 	unsigned int csum;
1244 
1245 	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1246 	skb->csum = csum_block_add(skb->csum, csum, odd);
1247 	return 0;
1248 }
1249 
1250 /*
1251  *	Generic function to send a packet as reply to another packet.
1252  *	Used to send TCP resets so far. ICMP should use this function too.
1253  *
1254  *	Should run single threaded per socket because it uses the sock
1255  *     	structure to pass arguments.
1256  *
1257  *	LATER: switch from ip_build_xmit to ip_append_*
1258  */
1259 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1260 		   unsigned int len)
1261 {
1262 	struct inet_sock *inet = inet_sk(sk);
1263 	struct {
1264 		struct ip_options	opt;
1265 		char			data[40];
1266 	} replyopts;
1267 	struct ipcm_cookie ipc;
1268 	u32 daddr;
1269 	struct rtable *rt = (struct rtable*)skb->dst;
1270 
1271 	if (ip_options_echo(&replyopts.opt, skb))
1272 		return;
1273 
1274 	daddr = ipc.addr = rt->rt_src;
1275 	ipc.opt = NULL;
1276 
1277 	if (replyopts.opt.optlen) {
1278 		ipc.opt = &replyopts.opt;
1279 
1280 		if (ipc.opt->srr)
1281 			daddr = replyopts.opt.faddr;
1282 	}
1283 
1284 	{
1285 		struct flowi fl = { .nl_u = { .ip4_u =
1286 					      { .daddr = daddr,
1287 						.saddr = rt->rt_spec_dst,
1288 						.tos = RT_TOS(skb->nh.iph->tos) } },
1289 				    /* Not quite clean, but right. */
1290 				    .uli_u = { .ports =
1291 					       { .sport = skb->h.th->dest,
1292 					         .dport = skb->h.th->source } },
1293 				    .proto = sk->sk_protocol };
1294 		if (ip_route_output_key(&rt, &fl))
1295 			return;
1296 	}
1297 
1298 	/* And let IP do all the hard work.
1299 
1300 	   This chunk is not reenterable, hence spinlock.
1301 	   Note that it uses the fact, that this function is called
1302 	   with locally disabled BH and that sk cannot be already spinlocked.
1303 	 */
1304 	bh_lock_sock(sk);
1305 	inet->tos = skb->nh.iph->tos;
1306 	sk->sk_priority = skb->priority;
1307 	sk->sk_protocol = skb->nh.iph->protocol;
1308 	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1309 		       &ipc, rt, MSG_DONTWAIT);
1310 	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1311 		if (arg->csumoffset >= 0)
1312 			*((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1313 		skb->ip_summed = CHECKSUM_NONE;
1314 		ip_push_pending_frames(sk);
1315 	}
1316 
1317 	bh_unlock_sock(sk);
1318 
1319 	ip_rt_put(rt);
1320 }
1321 
1322 void __init ip_init(void)
1323 {
1324 	ip_rt_init();
1325 	inet_initpeers();
1326 
1327 #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1328 	igmp_mc_proc_init();
1329 #endif
1330 }
1331 
1332 EXPORT_SYMBOL(ip_finish_output);
1333 EXPORT_SYMBOL(ip_fragment);
1334 EXPORT_SYMBOL(ip_generic_getfrag);
1335 EXPORT_SYMBOL(ip_queue_xmit);
1336 EXPORT_SYMBOL(ip_send_check);
1337 
1338 #ifdef CONFIG_SYSCTL
1339 EXPORT_SYMBOL(sysctl_ip_default_ttl);
1340 #endif
1341