xref: /linux/net/xfrm/xfrm_iptfs.c (revision 4f9786035f9e519db41375818e1d0b5f20da2f10)
14b3faf61SChristian Hopps // SPDX-License-Identifier: GPL-2.0
24b3faf61SChristian Hopps /* xfrm_iptfs: IPTFS encapsulation support
34b3faf61SChristian Hopps  *
44b3faf61SChristian Hopps  * April 21 2022, Christian Hopps <chopps@labn.net>
54b3faf61SChristian Hopps  *
64b3faf61SChristian Hopps  * Copyright (c) 2022, LabN Consulting, L.L.C.
74b3faf61SChristian Hopps  *
84b3faf61SChristian Hopps  */
94b3faf61SChristian Hopps 
104b3faf61SChristian Hopps #include <linux/kernel.h>
114b3faf61SChristian Hopps #include <linux/icmpv6.h>
12b96ba312SChristian Hopps #include <linux/skbuff_ref.h>
134b3faf61SChristian Hopps #include <net/gro.h>
144b3faf61SChristian Hopps #include <net/icmp.h>
154b3faf61SChristian Hopps #include <net/ip6_route.h>
164b3faf61SChristian Hopps #include <net/inet_ecn.h>
174b3faf61SChristian Hopps #include <net/xfrm.h>
184b3faf61SChristian Hopps 
194b3faf61SChristian Hopps #include <crypto/aead.h>
204b3faf61SChristian Hopps 
214b3faf61SChristian Hopps #include "xfrm_inout.h"
22ed58b186SChristian Hopps #include "trace_iptfs.h"
234b3faf61SChristian Hopps 
246c82d243SChristian Hopps /* IPTFS encap (header) values. */
256c82d243SChristian Hopps #define IPTFS_SUBTYPE_BASIC 0
266c82d243SChristian Hopps #define IPTFS_SUBTYPE_CC 1
276c82d243SChristian Hopps 
2807569476SChristian Hopps /* ----------------------------------------------- */
2907569476SChristian Hopps /* IP-TFS default SA values (tunnel egress/dir-in) */
3007569476SChristian Hopps /* ----------------------------------------------- */
3107569476SChristian Hopps 
3207569476SChristian Hopps /**
3307569476SChristian Hopps  * define IPTFS_DEFAULT_DROP_TIME_USECS - default drop time
3407569476SChristian Hopps  *
3507569476SChristian Hopps  * The default IPTFS drop time in microseconds. The drop time is the amount of
3607569476SChristian Hopps  * time before a missing out-of-order IPTFS tunnel packet is considered lost.
3707569476SChristian Hopps  * See also the reorder window.
3807569476SChristian Hopps  *
3907569476SChristian Hopps  * Default 1s.
4007569476SChristian Hopps  */
4107569476SChristian Hopps #define IPTFS_DEFAULT_DROP_TIME_USECS 1000000
4207569476SChristian Hopps 
436be02e3eSChristian Hopps /**
446be02e3eSChristian Hopps  * define IPTFS_DEFAULT_REORDER_WINDOW - default reorder window size
456be02e3eSChristian Hopps  *
466be02e3eSChristian Hopps  * The default IPTFS reorder window size. The reorder window size dictates the
476be02e3eSChristian Hopps  * maximum number of IPTFS tunnel packets in a sequence that may arrive out of
486be02e3eSChristian Hopps  * order.
496be02e3eSChristian Hopps  *
506be02e3eSChristian Hopps  * Default 3. (tcp folks suggested)
516be02e3eSChristian Hopps  */
526be02e3eSChristian Hopps #define IPTFS_DEFAULT_REORDER_WINDOW 3
536be02e3eSChristian Hopps 
540e4fbf01SChristian Hopps /* ------------------------------------------------ */
550e4fbf01SChristian Hopps /* IPTFS default SA values (tunnel ingress/dir-out) */
560e4fbf01SChristian Hopps /* ------------------------------------------------ */
570e4fbf01SChristian Hopps 
580e4fbf01SChristian Hopps /**
590e4fbf01SChristian Hopps  * define IPTFS_DEFAULT_INIT_DELAY_USECS - default initial output delay
600e4fbf01SChristian Hopps  *
610e4fbf01SChristian Hopps  * The initial output delay is the amount of time prior to servicing the output
620e4fbf01SChristian Hopps  * queue after queueing the first packet on said queue. This applies anytime the
630e4fbf01SChristian Hopps  * output queue was previously empty.
640e4fbf01SChristian Hopps  *
650e4fbf01SChristian Hopps  * Default 0.
660e4fbf01SChristian Hopps  */
670e4fbf01SChristian Hopps #define IPTFS_DEFAULT_INIT_DELAY_USECS 0
680e4fbf01SChristian Hopps 
690e4fbf01SChristian Hopps /**
700e4fbf01SChristian Hopps  * define IPTFS_DEFAULT_MAX_QUEUE_SIZE - default max output queue size.
710e4fbf01SChristian Hopps  *
720e4fbf01SChristian Hopps  * The default IPTFS max output queue size in octets. The output queue is where
730e4fbf01SChristian Hopps  * received packets destined for output over an IPTFS tunnel are stored prior to
740e4fbf01SChristian Hopps  * being output in aggregated/fragmented form over the IPTFS tunnel.
750e4fbf01SChristian Hopps  *
760e4fbf01SChristian Hopps  * Default 1M.
770e4fbf01SChristian Hopps  */
780e4fbf01SChristian Hopps #define IPTFS_DEFAULT_MAX_QUEUE_SIZE (1024 * 10240)
790e4fbf01SChristian Hopps 
808579d342SChristian Hopps /* Assumed: skb->head is cache aligned.
818579d342SChristian Hopps  *
828579d342SChristian Hopps  * L2 Header resv: Arrange for cacheline to start at skb->data - 16 to keep the
838579d342SChristian Hopps  * to-be-pushed L2 header in the same cacheline as resulting `skb->data` (i.e.,
848579d342SChristian Hopps  * the L3 header). If cacheline size is > 64 then skb->data + pushed L2 will all
858579d342SChristian Hopps  * be in a single cacheline if we simply reserve 64 bytes.
868579d342SChristian Hopps  *
878579d342SChristian Hopps  * L3 Header resv: For L3+L2 headers (i.e., skb->data points at the IPTFS payload)
888579d342SChristian Hopps  * we want `skb->data` to be cacheline aligned and all pushed L2L3 headers will
898579d342SChristian Hopps  * be in their own cacheline[s]. 128 works for cachelins up to 128 bytes, for
908579d342SChristian Hopps  * any larger cacheline sizes the pushed headers will simply share the cacheline
918579d342SChristian Hopps  * with the start of the IPTFS payload (skb->data).
928579d342SChristian Hopps  */
938579d342SChristian Hopps #define XFRM_IPTFS_MIN_L3HEADROOM 128
948579d342SChristian Hopps #define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16)
958579d342SChristian Hopps 
965f2b6a90SChristian Hopps /* Min to try to share outer iptfs skb data vs copying into new skb */
975f2b6a90SChristian Hopps #define IPTFS_PKT_SHARE_MIN 129
985f2b6a90SChristian Hopps 
990e4fbf01SChristian Hopps #define NSECS_IN_USEC 1000
1000e4fbf01SChristian Hopps 
1010e4fbf01SChristian Hopps #define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT
1020e4fbf01SChristian Hopps 
1034b3faf61SChristian Hopps /**
1044b3faf61SChristian Hopps  * struct xfrm_iptfs_config - configuration for the IPTFS tunnel.
1054b3faf61SChristian Hopps  * @pkt_size: size of the outer IP packet. 0 to use interface and MTU discovery,
1064b3faf61SChristian Hopps  *	otherwise the user specified value.
1070e4fbf01SChristian Hopps  * @max_queue_size: The maximum number of octets allowed to be queued to be sent
1080e4fbf01SChristian Hopps  *	over the IPTFS SA. The queue size is measured as the size of all the
1090e4fbf01SChristian Hopps  *	packets enqueued.
1106be02e3eSChristian Hopps  * @reorder_win_size: the number slots in the reorder window, thus the number of
1116be02e3eSChristian Hopps  *	packets that may arrive out of order.
1128579d342SChristian Hopps  * @dont_frag: true to inhibit fragmenting across IPTFS outer packets.
1134b3faf61SChristian Hopps  */
1144b3faf61SChristian Hopps struct xfrm_iptfs_config {
1154b3faf61SChristian Hopps 	u32 pkt_size;	    /* outer_packet_size or 0 */
1160e4fbf01SChristian Hopps 	u32 max_queue_size; /* octets */
1176be02e3eSChristian Hopps 	u16 reorder_win_size;
1188579d342SChristian Hopps 	u8 dont_frag : 1;
1194b3faf61SChristian Hopps };
1204b3faf61SChristian Hopps 
1216be02e3eSChristian Hopps struct skb_wseq {
1226be02e3eSChristian Hopps 	struct sk_buff *skb;
1236be02e3eSChristian Hopps 	u64 drop_time;
1246be02e3eSChristian Hopps };
1256be02e3eSChristian Hopps 
1264b3faf61SChristian Hopps /**
1274b3faf61SChristian Hopps  * struct xfrm_iptfs_data - mode specific xfrm state.
1284b3faf61SChristian Hopps  * @cfg: IPTFS tunnel config.
1294b3faf61SChristian Hopps  * @x: owning SA (xfrm_state).
1300e4fbf01SChristian Hopps  * @queue: queued user packets to send.
1310e4fbf01SChristian Hopps  * @queue_size: number of octets on queue (sum of packet sizes).
1320e4fbf01SChristian Hopps  * @ecn_queue_size: octets above with ECN mark.
1330e4fbf01SChristian Hopps  * @init_delay_ns: nanoseconds to wait to send initial IPTFS packet.
1340e4fbf01SChristian Hopps  * @iptfs_timer: output timer.
135ed58b186SChristian Hopps  * @iptfs_settime: time the output timer was set.
1364b3faf61SChristian Hopps  * @payload_mtu: max payload size.
1376be02e3eSChristian Hopps  * @w_seq_set: true after first seq received.
1386be02e3eSChristian Hopps  * @w_wantseq: waiting for this seq number as next to process (in order).
1396be02e3eSChristian Hopps  * @w_saved: the saved buf array (reorder window).
1406be02e3eSChristian Hopps  * @w_savedlen: the saved len (not size).
14107569476SChristian Hopps  * @drop_lock: lock to protect reorder queue.
14207569476SChristian Hopps  * @drop_timer: timer for considering next packet lost.
14307569476SChristian Hopps  * @drop_time_ns: timer intervan in nanoseconds.
14407569476SChristian Hopps  * @ra_newskb: new pkt being reassembled.
14507569476SChristian Hopps  * @ra_wantseq: expected next sequence for reassembly.
14607569476SChristian Hopps  * @ra_runt: last pkt bytes from very end of last skb.
14707569476SChristian Hopps  * @ra_runtlen: size of ra_runt.
1484b3faf61SChristian Hopps  */
1494b3faf61SChristian Hopps struct xfrm_iptfs_data {
1504b3faf61SChristian Hopps 	struct xfrm_iptfs_config cfg;
1514b3faf61SChristian Hopps 
1524b3faf61SChristian Hopps 	/* Ingress User Input */
1534b3faf61SChristian Hopps 	struct xfrm_state *x;	   /* owning state */
1540e4fbf01SChristian Hopps 	struct sk_buff_head queue; /* output queue */
1550e4fbf01SChristian Hopps 
1560e4fbf01SChristian Hopps 	u32 queue_size;		    /* octets */
1570e4fbf01SChristian Hopps 	u32 ecn_queue_size;	    /* octets above which ECN mark */
1580e4fbf01SChristian Hopps 	u64 init_delay_ns;	    /* nanoseconds */
1590e4fbf01SChristian Hopps 	struct hrtimer iptfs_timer; /* output timer */
160ed58b186SChristian Hopps 	time64_t iptfs_settime;	    /* time timer was set */
1614b3faf61SChristian Hopps 	u32 payload_mtu;	    /* max payload size */
16207569476SChristian Hopps 
1636be02e3eSChristian Hopps 	/* Tunnel input reordering */
1646be02e3eSChristian Hopps 	bool w_seq_set;		  /* true after first seq received */
1656be02e3eSChristian Hopps 	u64 w_wantseq;		  /* expected next sequence */
1666be02e3eSChristian Hopps 	struct skb_wseq *w_saved; /* the saved buf array */
1676be02e3eSChristian Hopps 	u32 w_savedlen;		  /* the saved len (not size) */
16807569476SChristian Hopps 	spinlock_t drop_lock;
16907569476SChristian Hopps 	struct hrtimer drop_timer;
17007569476SChristian Hopps 	u64 drop_time_ns;
17107569476SChristian Hopps 
1726be02e3eSChristian Hopps 	/* Tunnel input reassembly */
17307569476SChristian Hopps 	struct sk_buff *ra_newskb; /* new pkt being reassembled */
17407569476SChristian Hopps 	u64 ra_wantseq;		   /* expected next sequence */
17507569476SChristian Hopps 	u8 ra_runt[6];		   /* last pkt bytes from last skb */
17607569476SChristian Hopps 	u8 ra_runtlen;		   /* count of ra_runt */
1774b3faf61SChristian Hopps };
1784b3faf61SChristian Hopps 
1798579d342SChristian Hopps static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu);
1800e4fbf01SChristian Hopps static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me);
18107569476SChristian Hopps static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me);
18207569476SChristian Hopps 
18307569476SChristian Hopps /* ================= */
18407569476SChristian Hopps /* Utility Functions */
18507569476SChristian Hopps /* ================= */
18607569476SChristian Hopps 
187ed58b186SChristian Hopps #ifdef TRACEPOINTS_ENABLED
188ed58b186SChristian Hopps static u32 __trace_ip_proto(struct iphdr *iph)
189ed58b186SChristian Hopps {
190ed58b186SChristian Hopps 	if (iph->version == 4)
191ed58b186SChristian Hopps 		return iph->protocol;
192ed58b186SChristian Hopps 	return ((struct ipv6hdr *)iph)->nexthdr;
193ed58b186SChristian Hopps }
194ed58b186SChristian Hopps 
195ed58b186SChristian Hopps static u32 __trace_ip_proto_seq(struct iphdr *iph)
196ed58b186SChristian Hopps {
197ed58b186SChristian Hopps 	void *nexthdr;
198ed58b186SChristian Hopps 	u32 protocol = 0;
199ed58b186SChristian Hopps 
200ed58b186SChristian Hopps 	if (iph->version == 4) {
201ed58b186SChristian Hopps 		nexthdr = (void *)(iph + 1);
202ed58b186SChristian Hopps 		protocol = iph->protocol;
203ed58b186SChristian Hopps 	} else if (iph->version == 6) {
204ed58b186SChristian Hopps 		nexthdr = (void *)(((struct ipv6hdr *)(iph)) + 1);
205ed58b186SChristian Hopps 		protocol = ((struct ipv6hdr *)(iph))->nexthdr;
206ed58b186SChristian Hopps 	}
207ed58b186SChristian Hopps 	switch (protocol) {
208ed58b186SChristian Hopps 	case IPPROTO_ICMP:
209ed58b186SChristian Hopps 		return ntohs(((struct icmphdr *)nexthdr)->un.echo.sequence);
210ed58b186SChristian Hopps 	case IPPROTO_ICMPV6:
211ed58b186SChristian Hopps 		return ntohs(((struct icmp6hdr *)nexthdr)->icmp6_sequence);
212ed58b186SChristian Hopps 	case IPPROTO_TCP:
213ed58b186SChristian Hopps 		return ntohl(((struct tcphdr *)nexthdr)->seq);
214ed58b186SChristian Hopps 	case IPPROTO_UDP:
215ed58b186SChristian Hopps 		return ntohs(((struct udphdr *)nexthdr)->source);
216ed58b186SChristian Hopps 	default:
217ed58b186SChristian Hopps 		return 0;
218ed58b186SChristian Hopps 	}
219ed58b186SChristian Hopps }
220ed58b186SChristian Hopps #endif /*TRACEPOINTS_ENABLED*/
221ed58b186SChristian Hopps 
22207569476SChristian Hopps static u64 __esp_seq(struct sk_buff *skb)
22307569476SChristian Hopps {
22407569476SChristian Hopps 	u64 seq = ntohl(XFRM_SKB_CB(skb)->seq.input.low);
22507569476SChristian Hopps 
22607569476SChristian Hopps 	return seq | (u64)ntohl(XFRM_SKB_CB(skb)->seq.input.hi) << 32;
22707569476SChristian Hopps }
2280e4fbf01SChristian Hopps 
229b96ba312SChristian Hopps /* ======================= */
230b96ba312SChristian Hopps /* IPTFS SK_BUFF Functions */
231b96ba312SChristian Hopps /* ======================= */
232b96ba312SChristian Hopps 
233b96ba312SChristian Hopps /**
2348579d342SChristian Hopps  * iptfs_alloc_skb() - Allocate a new `skb`.
2358579d342SChristian Hopps  * @tpl: the skb to copy required meta-data from.
2368579d342SChristian Hopps  * @len: the linear length of the head data, zero is fine.
2378579d342SChristian Hopps  * @l3resv: true if skb reserve needs to support pushing L3 headers
2388579d342SChristian Hopps  *
2398579d342SChristian Hopps  * A new `skb` is allocated and required meta-data is copied from `tpl`, the
2408579d342SChristian Hopps  * head data is sized to `len` + reserved space set according to the @l3resv
2418579d342SChristian Hopps  * boolean.
2428579d342SChristian Hopps  *
2438579d342SChristian Hopps  * When @l3resv is false, resv is XFRM_IPTFS_MIN_L2HEADROOM which arranges for
2448579d342SChristian Hopps  * `skb->data - 16`  which is a good guess for good cache alignment (placing the
2458579d342SChristian Hopps  * to be pushed L2 header at the start of a cacheline.
2468579d342SChristian Hopps  *
2478579d342SChristian Hopps  * Otherwise, @l3resv is true and resv is set to the correct reserved space for
2488579d342SChristian Hopps  * dst->dev plus the calculated L3 overhead for the xfrm dst or
2498579d342SChristian Hopps  * XFRM_IPTFS_MIN_L3HEADROOM whichever is larger. This is then cache aligned so
2508579d342SChristian Hopps  * that all the headers will commonly fall in a cacheline when possible.
2518579d342SChristian Hopps  *
2528579d342SChristian Hopps  * l3resv=true is used on tunnel ingress (tx), because we need to reserve for
2538579d342SChristian Hopps  * the new IPTFS packet (i.e., L2+L3 headers). On tunnel egress (rx) the data
2548579d342SChristian Hopps  * being copied into the skb includes the user L3 headers already so we only
2558579d342SChristian Hopps  * need to reserve for L2.
2568579d342SChristian Hopps  *
2578579d342SChristian Hopps  * Return: the new skb or NULL.
2588579d342SChristian Hopps  */
2598579d342SChristian Hopps static struct sk_buff *iptfs_alloc_skb(struct sk_buff *tpl, u32 len, bool l3resv)
2608579d342SChristian Hopps {
2618579d342SChristian Hopps 	struct sk_buff *skb;
2628579d342SChristian Hopps 	u32 resv;
2638579d342SChristian Hopps 
2648579d342SChristian Hopps 	if (!l3resv) {
2658579d342SChristian Hopps 		resv = XFRM_IPTFS_MIN_L2HEADROOM;
2668579d342SChristian Hopps 	} else {
2678579d342SChristian Hopps 		struct dst_entry *dst = skb_dst(tpl);
2688579d342SChristian Hopps 
2698579d342SChristian Hopps 		resv = LL_RESERVED_SPACE(dst->dev) + dst->header_len;
2708579d342SChristian Hopps 		resv = max(resv, XFRM_IPTFS_MIN_L3HEADROOM);
2718579d342SChristian Hopps 		resv = L1_CACHE_ALIGN(resv);
2728579d342SChristian Hopps 	}
2738579d342SChristian Hopps 
2748579d342SChristian Hopps 	skb = alloc_skb(len + resv, GFP_ATOMIC | __GFP_NOWARN);
2758579d342SChristian Hopps 	if (!skb)
2768579d342SChristian Hopps 		return NULL;
2778579d342SChristian Hopps 
2788579d342SChristian Hopps 	skb_reserve(skb, resv);
2798579d342SChristian Hopps 
2808579d342SChristian Hopps 	if (!l3resv) {
2818579d342SChristian Hopps 		/* xfrm_input resume needs dev and xfrm ext from tunnel pkt */
2828579d342SChristian Hopps 		skb->dev = tpl->dev;
2838579d342SChristian Hopps 		__skb_ext_copy(skb, tpl);
2848579d342SChristian Hopps 	}
2858579d342SChristian Hopps 
2868579d342SChristian Hopps 	/* dropped by xfrm_input, used by xfrm_output */
2878579d342SChristian Hopps 	skb_dst_copy(skb, tpl);
2888579d342SChristian Hopps 
2898579d342SChristian Hopps 	return skb;
2908579d342SChristian Hopps }
2918579d342SChristian Hopps 
2928579d342SChristian Hopps /**
293b96ba312SChristian Hopps  * iptfs_skb_head_to_frag() - initialize a skb_frag_t based on skb head data
294b96ba312SChristian Hopps  * @skb: skb with the head data
295b96ba312SChristian Hopps  * @frag: frag to initialize
296b96ba312SChristian Hopps  */
297b96ba312SChristian Hopps static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag)
298b96ba312SChristian Hopps {
299b96ba312SChristian Hopps 	struct page *page = virt_to_head_page(skb->data);
300b96ba312SChristian Hopps 	unsigned char *addr = (unsigned char *)page_address(page);
301b96ba312SChristian Hopps 
302b96ba312SChristian Hopps 	skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb));
303b96ba312SChristian Hopps }
304b96ba312SChristian Hopps 
3055f2b6a90SChristian Hopps /**
3065f2b6a90SChristian Hopps  * struct iptfs_skb_frag_walk - use to track a walk through fragments
3075f2b6a90SChristian Hopps  * @fragi: current fragment index
3085f2b6a90SChristian Hopps  * @past: length of data in fragments before @fragi
3095f2b6a90SChristian Hopps  * @total: length of data in all fragments
3105f2b6a90SChristian Hopps  * @nr_frags: number of fragments present in array
3115f2b6a90SChristian Hopps  * @initial_offset: the value passed in to skb_prepare_frag_walk()
3125f2b6a90SChristian Hopps  * @frags: the page fragments inc. room for head page
3135f2b6a90SChristian Hopps  * @pp_recycle: copy of skb->pp_recycle
3145f2b6a90SChristian Hopps  */
3155f2b6a90SChristian Hopps struct iptfs_skb_frag_walk {
3165f2b6a90SChristian Hopps 	u32 fragi;
3175f2b6a90SChristian Hopps 	u32 past;
3185f2b6a90SChristian Hopps 	u32 total;
3195f2b6a90SChristian Hopps 	u32 nr_frags;
3205f2b6a90SChristian Hopps 	u32 initial_offset;
3215f2b6a90SChristian Hopps 	skb_frag_t frags[MAX_SKB_FRAGS + 1];
3225f2b6a90SChristian Hopps 	bool pp_recycle;
3235f2b6a90SChristian Hopps };
3245f2b6a90SChristian Hopps 
3255f2b6a90SChristian Hopps /**
3265f2b6a90SChristian Hopps  * iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb.
3275f2b6a90SChristian Hopps  * @skb: the skb to walk.
3285f2b6a90SChristian Hopps  * @initial_offset: start the walk @initial_offset into the skb.
3295f2b6a90SChristian Hopps  * @walk: the walk to initialize
3305f2b6a90SChristian Hopps  *
3315f2b6a90SChristian Hopps  * Future calls to skb_add_frags() will expect the @offset value to be at
3325f2b6a90SChristian Hopps  * least @initial_offset large.
3335f2b6a90SChristian Hopps  */
3345f2b6a90SChristian Hopps static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset,
3355f2b6a90SChristian Hopps 					struct iptfs_skb_frag_walk *walk)
3365f2b6a90SChristian Hopps {
3375f2b6a90SChristian Hopps 	struct skb_shared_info *shinfo = skb_shinfo(skb);
3385f2b6a90SChristian Hopps 	skb_frag_t *frag, *from;
3395f2b6a90SChristian Hopps 	u32 i;
3405f2b6a90SChristian Hopps 
3415f2b6a90SChristian Hopps 	walk->initial_offset = initial_offset;
3425f2b6a90SChristian Hopps 	walk->fragi = 0;
3435f2b6a90SChristian Hopps 	walk->past = 0;
3445f2b6a90SChristian Hopps 	walk->total = 0;
3455f2b6a90SChristian Hopps 	walk->nr_frags = 0;
3465f2b6a90SChristian Hopps 	walk->pp_recycle = skb->pp_recycle;
3475f2b6a90SChristian Hopps 
3485f2b6a90SChristian Hopps 	if (skb->head_frag) {
3495f2b6a90SChristian Hopps 		if (initial_offset >= skb_headlen(skb)) {
3505f2b6a90SChristian Hopps 			initial_offset -= skb_headlen(skb);
3515f2b6a90SChristian Hopps 		} else {
3525f2b6a90SChristian Hopps 			frag = &walk->frags[walk->nr_frags++];
3535f2b6a90SChristian Hopps 			iptfs_skb_head_to_frag(skb, frag);
3545f2b6a90SChristian Hopps 			frag->offset += initial_offset;
3555f2b6a90SChristian Hopps 			frag->len -= initial_offset;
3565f2b6a90SChristian Hopps 			walk->total += frag->len;
3575f2b6a90SChristian Hopps 			initial_offset = 0;
3585f2b6a90SChristian Hopps 		}
3595f2b6a90SChristian Hopps 	} else {
3605f2b6a90SChristian Hopps 		initial_offset -= skb_headlen(skb);
3615f2b6a90SChristian Hopps 	}
3625f2b6a90SChristian Hopps 
3635f2b6a90SChristian Hopps 	for (i = 0; i < shinfo->nr_frags; i++) {
3645f2b6a90SChristian Hopps 		from = &shinfo->frags[i];
3655f2b6a90SChristian Hopps 		if (initial_offset >= from->len) {
3665f2b6a90SChristian Hopps 			initial_offset -= from->len;
3675f2b6a90SChristian Hopps 			continue;
3685f2b6a90SChristian Hopps 		}
3695f2b6a90SChristian Hopps 		frag = &walk->frags[walk->nr_frags++];
3705f2b6a90SChristian Hopps 		*frag = *from;
3715f2b6a90SChristian Hopps 		if (initial_offset) {
3725f2b6a90SChristian Hopps 			frag->offset += initial_offset;
3735f2b6a90SChristian Hopps 			frag->len -= initial_offset;
3745f2b6a90SChristian Hopps 			initial_offset = 0;
3755f2b6a90SChristian Hopps 		}
3765f2b6a90SChristian Hopps 		walk->total += frag->len;
3775f2b6a90SChristian Hopps 	}
3785f2b6a90SChristian Hopps }
3795f2b6a90SChristian Hopps 
3805f2b6a90SChristian Hopps static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk,
3815f2b6a90SChristian Hopps 				     u32 offset)
3825f2b6a90SChristian Hopps {
3835f2b6a90SChristian Hopps 	/* Adjust offset to refer to internal walk values */
3845f2b6a90SChristian Hopps 	offset -= walk->initial_offset;
3855f2b6a90SChristian Hopps 
3865f2b6a90SChristian Hopps 	/* Get to the correct fragment for offset */
3875f2b6a90SChristian Hopps 	while (offset < walk->past) {
3885f2b6a90SChristian Hopps 		walk->past -= walk->frags[--walk->fragi].len;
3895f2b6a90SChristian Hopps 		if (offset >= walk->past)
3905f2b6a90SChristian Hopps 			break;
3915f2b6a90SChristian Hopps 	}
3925f2b6a90SChristian Hopps 	while (offset >= walk->past + walk->frags[walk->fragi].len)
3935f2b6a90SChristian Hopps 		walk->past += walk->frags[walk->fragi++].len;
3945f2b6a90SChristian Hopps 
3955f2b6a90SChristian Hopps 	/* offset now relative to this current frag */
3965f2b6a90SChristian Hopps 	offset -= walk->past;
3975f2b6a90SChristian Hopps 	return offset;
3985f2b6a90SChristian Hopps }
3995f2b6a90SChristian Hopps 
4005f2b6a90SChristian Hopps /**
4015f2b6a90SChristian Hopps  * iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb
4025f2b6a90SChristian Hopps  * @skb: skb to check for adding frags to
4035f2b6a90SChristian Hopps  * @walk: the walk that will be used as source for frags.
4045f2b6a90SChristian Hopps  * @offset: offset from beginning of original skb to start from.
4055f2b6a90SChristian Hopps  * @len: amount of data to add frag references to in @skb.
4065f2b6a90SChristian Hopps  *
4075f2b6a90SChristian Hopps  * Return: true if ok to add frags.
4085f2b6a90SChristian Hopps  */
4095f2b6a90SChristian Hopps static bool iptfs_skb_can_add_frags(const struct sk_buff *skb,
4105f2b6a90SChristian Hopps 				    struct iptfs_skb_frag_walk *walk,
4115f2b6a90SChristian Hopps 				    u32 offset, u32 len)
4125f2b6a90SChristian Hopps {
4135f2b6a90SChristian Hopps 	struct skb_shared_info *shinfo = skb_shinfo(skb);
4145f2b6a90SChristian Hopps 	u32 fragi, nr_frags, fraglen;
4155f2b6a90SChristian Hopps 
4165f2b6a90SChristian Hopps 	if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle)
4175f2b6a90SChristian Hopps 		return false;
4185f2b6a90SChristian Hopps 
4195f2b6a90SChristian Hopps 	/* Make offset relative to current frag after setting that */
4205f2b6a90SChristian Hopps 	offset = iptfs_skb_reset_frag_walk(walk, offset);
4215f2b6a90SChristian Hopps 
4225f2b6a90SChristian Hopps 	/* Verify we have array space for the fragments we need to add */
4235f2b6a90SChristian Hopps 	fragi = walk->fragi;
4245f2b6a90SChristian Hopps 	nr_frags = shinfo->nr_frags;
4255f2b6a90SChristian Hopps 	while (len && fragi < walk->nr_frags) {
4265f2b6a90SChristian Hopps 		skb_frag_t *frag = &walk->frags[fragi];
4275f2b6a90SChristian Hopps 
4285f2b6a90SChristian Hopps 		fraglen = frag->len;
4295f2b6a90SChristian Hopps 		if (offset) {
4305f2b6a90SChristian Hopps 			fraglen -= offset;
4315f2b6a90SChristian Hopps 			offset = 0;
4325f2b6a90SChristian Hopps 		}
4335f2b6a90SChristian Hopps 		if (++nr_frags > MAX_SKB_FRAGS)
4345f2b6a90SChristian Hopps 			return false;
4355f2b6a90SChristian Hopps 		if (len <= fraglen)
4365f2b6a90SChristian Hopps 			return true;
4375f2b6a90SChristian Hopps 		len -= fraglen;
4385f2b6a90SChristian Hopps 		fragi++;
4395f2b6a90SChristian Hopps 	}
4405f2b6a90SChristian Hopps 	/* We may not copy all @len but what we have will fit. */
4415f2b6a90SChristian Hopps 	return true;
4425f2b6a90SChristian Hopps }
4435f2b6a90SChristian Hopps 
4445f2b6a90SChristian Hopps /**
4455f2b6a90SChristian Hopps  * iptfs_skb_add_frags() - add a range of fragment references into an skb
4465f2b6a90SChristian Hopps  * @skb: skb to add references into
4475f2b6a90SChristian Hopps  * @walk: the walk to add referenced fragments from.
4485f2b6a90SChristian Hopps  * @offset: offset from beginning of original skb to start from.
4495f2b6a90SChristian Hopps  * @len: amount of data to add frag references to in @skb.
4505f2b6a90SChristian Hopps  *
4515f2b6a90SChristian Hopps  * iptfs_skb_can_add_frags() should be called before this function to verify
4525f2b6a90SChristian Hopps  * that the destination @skb is compatible with the walk and has space in the
4535f2b6a90SChristian Hopps  * array for the to be added frag references.
4545f2b6a90SChristian Hopps  *
4555f2b6a90SChristian Hopps  * Return: The number of bytes not added to @skb b/c we reached the end of the
4565f2b6a90SChristian Hopps  * walk before adding all of @len.
4575f2b6a90SChristian Hopps  */
4585f2b6a90SChristian Hopps static int iptfs_skb_add_frags(struct sk_buff *skb,
4595f2b6a90SChristian Hopps 			       struct iptfs_skb_frag_walk *walk, u32 offset,
4605f2b6a90SChristian Hopps 			       u32 len)
4615f2b6a90SChristian Hopps {
4625f2b6a90SChristian Hopps 	struct skb_shared_info *shinfo = skb_shinfo(skb);
4635f2b6a90SChristian Hopps 	u32 fraglen;
4645f2b6a90SChristian Hopps 
4655f2b6a90SChristian Hopps 	if (!walk->nr_frags || offset >= walk->total + walk->initial_offset)
4665f2b6a90SChristian Hopps 		return len;
4675f2b6a90SChristian Hopps 
4685f2b6a90SChristian Hopps 	/* make offset relative to current frag after setting that */
4695f2b6a90SChristian Hopps 	offset = iptfs_skb_reset_frag_walk(walk, offset);
4705f2b6a90SChristian Hopps 
4715f2b6a90SChristian Hopps 	while (len && walk->fragi < walk->nr_frags) {
4725f2b6a90SChristian Hopps 		skb_frag_t *frag = &walk->frags[walk->fragi];
4735f2b6a90SChristian Hopps 		skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags];
4745f2b6a90SChristian Hopps 
4755f2b6a90SChristian Hopps 		*tofrag = *frag;
4765f2b6a90SChristian Hopps 		if (offset) {
4775f2b6a90SChristian Hopps 			tofrag->offset += offset;
4785f2b6a90SChristian Hopps 			tofrag->len -= offset;
4795f2b6a90SChristian Hopps 			offset = 0;
4805f2b6a90SChristian Hopps 		}
4815f2b6a90SChristian Hopps 		__skb_frag_ref(tofrag);
4825f2b6a90SChristian Hopps 		shinfo->nr_frags++;
4835f2b6a90SChristian Hopps 
4845f2b6a90SChristian Hopps 		/* see if we are done */
4855f2b6a90SChristian Hopps 		fraglen = tofrag->len;
4865f2b6a90SChristian Hopps 		if (len < fraglen) {
4875f2b6a90SChristian Hopps 			tofrag->len = len;
4885f2b6a90SChristian Hopps 			skb->len += len;
4895f2b6a90SChristian Hopps 			skb->data_len += len;
4905f2b6a90SChristian Hopps 			return 0;
4915f2b6a90SChristian Hopps 		}
4925f2b6a90SChristian Hopps 		/* advance to next source fragment */
4935f2b6a90SChristian Hopps 		len -= fraglen;			/* careful, use dst bv_len */
4945f2b6a90SChristian Hopps 		skb->len += fraglen;		/* careful, "   "    "     */
4955f2b6a90SChristian Hopps 		skb->data_len += fraglen;	/* careful, "   "    "     */
4965f2b6a90SChristian Hopps 		walk->past += frag->len;	/* careful, use src bv_len */
4975f2b6a90SChristian Hopps 		walk->fragi++;
4985f2b6a90SChristian Hopps 	}
4995f2b6a90SChristian Hopps 	return len;
5005f2b6a90SChristian Hopps }
5015f2b6a90SChristian Hopps 
5026c82d243SChristian Hopps /* ================================== */
503ed58b186SChristian Hopps /* IPTFS Trace Event Definitions      */
504ed58b186SChristian Hopps /* ================================== */
505ed58b186SChristian Hopps 
506ed58b186SChristian Hopps #define CREATE_TRACE_POINTS
507ed58b186SChristian Hopps #include "trace_iptfs.h"
508ed58b186SChristian Hopps 
509ed58b186SChristian Hopps /* ================================== */
5106c82d243SChristian Hopps /* IPTFS Receiving (egress) Functions */
5116c82d243SChristian Hopps /* ================================== */
5126c82d243SChristian Hopps 
5136c82d243SChristian Hopps /**
5145f2b6a90SChristian Hopps  * iptfs_pskb_add_frags() - Create and add frags into a new sk_buff.
5155f2b6a90SChristian Hopps  * @tpl: template to create new skb from.
5165f2b6a90SChristian Hopps  * @walk: The source for fragments to add.
5175f2b6a90SChristian Hopps  * @off: The offset into @walk to add frags from, also used with @st and
5185f2b6a90SChristian Hopps  *       @copy_len.
5195f2b6a90SChristian Hopps  * @len: The length of data to add covering frags from @walk into @skb.
5205f2b6a90SChristian Hopps  *       This must be <= @skblen.
5215f2b6a90SChristian Hopps  * @st: The sequence state to copy from into the new head skb.
5225f2b6a90SChristian Hopps  * @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb
5235f2b6a90SChristian Hopps  *            linear space.
5245f2b6a90SChristian Hopps  *
5255f2b6a90SChristian Hopps  * Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from
5265f2b6a90SChristian Hopps  * @st into the new skb linear space, and then add shared fragments from the
5275f2b6a90SChristian Hopps  * frag walk for the remaining @len of data (i.e., @len - @copy_len bytes).
5285f2b6a90SChristian Hopps  *
5295f2b6a90SChristian Hopps  * Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
5305f2b6a90SChristian Hopps  */
5315f2b6a90SChristian Hopps static struct sk_buff *
5325f2b6a90SChristian Hopps iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk,
5335f2b6a90SChristian Hopps 		     u32 off, u32 len, struct skb_seq_state *st, u32 copy_len)
5345f2b6a90SChristian Hopps {
5355f2b6a90SChristian Hopps 	struct sk_buff *skb;
5365f2b6a90SChristian Hopps 
5375f2b6a90SChristian Hopps 	skb = iptfs_alloc_skb(tpl, copy_len, false);
5385f2b6a90SChristian Hopps 	if (!skb)
5395f2b6a90SChristian Hopps 		return NULL;
5405f2b6a90SChristian Hopps 
5415f2b6a90SChristian Hopps 	/* this should not normally be happening */
5425f2b6a90SChristian Hopps 	if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len,
5435f2b6a90SChristian Hopps 				     len - copy_len)) {
5445f2b6a90SChristian Hopps 		kfree_skb(skb);
5455f2b6a90SChristian Hopps 		return NULL;
5465f2b6a90SChristian Hopps 	}
5475f2b6a90SChristian Hopps 
5485f2b6a90SChristian Hopps 	if (copy_len &&
5495f2b6a90SChristian Hopps 	    skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) {
5505f2b6a90SChristian Hopps 		XFRM_INC_STATS(dev_net(st->root_skb->dev),
5515f2b6a90SChristian Hopps 			       LINUX_MIB_XFRMINERROR);
5525f2b6a90SChristian Hopps 		kfree_skb(skb);
5535f2b6a90SChristian Hopps 		return NULL;
5545f2b6a90SChristian Hopps 	}
5555f2b6a90SChristian Hopps 
5565f2b6a90SChristian Hopps 	iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len);
5575f2b6a90SChristian Hopps 	return skb;
5585f2b6a90SChristian Hopps }
5595f2b6a90SChristian Hopps 
5605f2b6a90SChristian Hopps /**
5616c82d243SChristian Hopps  * iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
5626c82d243SChristian Hopps  * @skblen: the total data size for `skb`.
5636c82d243SChristian Hopps  * @st: The source for the rest of the data to copy into `skb`.
5646c82d243SChristian Hopps  * @off: The offset into @st to copy data from.
5656c82d243SChristian Hopps  * @len: The length of data to copy from @st into `skb`. This must be <=
5666c82d243SChristian Hopps  *       @skblen.
5676c82d243SChristian Hopps  *
5686c82d243SChristian Hopps  * Create a new sk_buff `skb` with @skblen of packet data space. If non-zero,
5696c82d243SChristian Hopps  * copy @rlen bytes of @runt into `skb`. Then using seq functions copy @len
5706c82d243SChristian Hopps  * bytes from @st into `skb` starting from @off.
5716c82d243SChristian Hopps  *
5726c82d243SChristian Hopps  * It is an error for @len to be greater than the amount of data left in @st.
5736c82d243SChristian Hopps  *
5746c82d243SChristian Hopps  * Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
5756c82d243SChristian Hopps  */
5766c82d243SChristian Hopps static struct sk_buff *
5776c82d243SChristian Hopps iptfs_pskb_extract_seq(u32 skblen, struct skb_seq_state *st, u32 off, int len)
5786c82d243SChristian Hopps {
5796c82d243SChristian Hopps 	struct sk_buff *skb = iptfs_alloc_skb(st->root_skb, skblen, false);
5806c82d243SChristian Hopps 
5816c82d243SChristian Hopps 	if (!skb)
5826c82d243SChristian Hopps 		return NULL;
5836c82d243SChristian Hopps 	if (skb_copy_seq_read(st, off, skb_put(skb, len), len)) {
5846c82d243SChristian Hopps 		XFRM_INC_STATS(dev_net(st->root_skb->dev), LINUX_MIB_XFRMINERROR);
5856c82d243SChristian Hopps 		kfree_skb(skb);
5866c82d243SChristian Hopps 		return NULL;
5876c82d243SChristian Hopps 	}
5886c82d243SChristian Hopps 	return skb;
5896c82d243SChristian Hopps }
5906c82d243SChristian Hopps 
5916c82d243SChristian Hopps /**
59207569476SChristian Hopps  * iptfs_input_save_runt() - save data in xtfs runt space.
59307569476SChristian Hopps  * @xtfs: xtfs state
59407569476SChristian Hopps  * @seq: the current sequence
59507569476SChristian Hopps  * @buf: packet data
59607569476SChristian Hopps  * @len: length of packet data
59707569476SChristian Hopps  *
59807569476SChristian Hopps  * Save the small (`len`) start of a fragmented packet in `buf` in the xtfs data
59907569476SChristian Hopps  * runt space.
60007569476SChristian Hopps  */
60107569476SChristian Hopps static void iptfs_input_save_runt(struct xfrm_iptfs_data *xtfs, u64 seq,
60207569476SChristian Hopps 				  u8 *buf, int len)
60307569476SChristian Hopps {
60407569476SChristian Hopps 	memcpy(xtfs->ra_runt, buf, len);
60507569476SChristian Hopps 
60607569476SChristian Hopps 	xtfs->ra_runtlen = len;
60707569476SChristian Hopps 	xtfs->ra_wantseq = seq + 1;
60807569476SChristian Hopps }
60907569476SChristian Hopps 
61007569476SChristian Hopps /**
61107569476SChristian Hopps  * __iptfs_iphlen() - return the v4/v6 header length using packet data.
61207569476SChristian Hopps  * @data: pointer at octet with version nibble
61307569476SChristian Hopps  *
61407569476SChristian Hopps  * The version data has been checked to be valid (i.e., either 4 or 6).
61507569476SChristian Hopps  *
61607569476SChristian Hopps  * Return: the IP header size based on the IP version.
61707569476SChristian Hopps  */
61807569476SChristian Hopps static u32 __iptfs_iphlen(u8 *data)
61907569476SChristian Hopps {
62007569476SChristian Hopps 	struct iphdr *iph = (struct iphdr *)data;
62107569476SChristian Hopps 
62207569476SChristian Hopps 	if (iph->version == 0x4)
62307569476SChristian Hopps 		return sizeof(*iph);
62407569476SChristian Hopps 	return sizeof(struct ipv6hdr);
62507569476SChristian Hopps }
62607569476SChristian Hopps 
62707569476SChristian Hopps /**
62807569476SChristian Hopps  * __iptfs_iplen() - return the v4/v6 length using packet data.
62907569476SChristian Hopps  * @data: pointer to ip (v4/v6) packet header
63007569476SChristian Hopps  *
63107569476SChristian Hopps  * Grab the IPv4 or IPv6 length value in the start of the inner packet header
63207569476SChristian Hopps  * pointed to by `data`. Assumes data len is enough for the length field only.
63307569476SChristian Hopps  *
63407569476SChristian Hopps  * The version data has been checked to be valid (i.e., either 4 or 6).
63507569476SChristian Hopps  *
63607569476SChristian Hopps  * Return: the length value.
63707569476SChristian Hopps  */
63807569476SChristian Hopps static u32 __iptfs_iplen(u8 *data)
63907569476SChristian Hopps {
64007569476SChristian Hopps 	struct iphdr *iph = (struct iphdr *)data;
64107569476SChristian Hopps 
64207569476SChristian Hopps 	if (iph->version == 0x4)
64307569476SChristian Hopps 		return ntohs(iph->tot_len);
64407569476SChristian Hopps 	return ntohs(((struct ipv6hdr *)iph)->payload_len) +
64507569476SChristian Hopps 		sizeof(struct ipv6hdr);
64607569476SChristian Hopps }
64707569476SChristian Hopps 
64807569476SChristian Hopps /**
6496c82d243SChristian Hopps  * iptfs_complete_inner_skb() - finish preparing the inner packet for gro recv.
6506c82d243SChristian Hopps  * @x: xfrm state
6516c82d243SChristian Hopps  * @skb: the inner packet
6526c82d243SChristian Hopps  *
6536c82d243SChristian Hopps  * Finish the standard xfrm processing on the inner packet prior to sending back
6546c82d243SChristian Hopps  * through gro_cells_receive. We do this separately b/c we are building a list
6556c82d243SChristian Hopps  * of packets in the hopes that one day a list will be taken by
6566c82d243SChristian Hopps  * xfrm_input.
6576c82d243SChristian Hopps  */
6586c82d243SChristian Hopps static void iptfs_complete_inner_skb(struct xfrm_state *x, struct sk_buff *skb)
6596c82d243SChristian Hopps {
6606c82d243SChristian Hopps 	skb_reset_network_header(skb);
6616c82d243SChristian Hopps 
6626c82d243SChristian Hopps 	/* The packet is going back through gro_cells_receive no need to
6636c82d243SChristian Hopps 	 * set this.
6646c82d243SChristian Hopps 	 */
6656c82d243SChristian Hopps 	skb_reset_transport_header(skb);
6666c82d243SChristian Hopps 
6676c82d243SChristian Hopps 	/* Packet already has checksum value set. */
6686c82d243SChristian Hopps 	skb->ip_summed = CHECKSUM_NONE;
6696c82d243SChristian Hopps 
6706c82d243SChristian Hopps 	/* Our skb will contain the header data copied when this outer packet
6716c82d243SChristian Hopps 	 * which contained the start of this inner packet. This is true
6726c82d243SChristian Hopps 	 * when we allocate a new skb as well as when we reuse the existing skb.
6736c82d243SChristian Hopps 	 */
6746c82d243SChristian Hopps 	if (ip_hdr(skb)->version == 0x4) {
6756c82d243SChristian Hopps 		struct iphdr *iph = ip_hdr(skb);
6766c82d243SChristian Hopps 
6776c82d243SChristian Hopps 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
6786c82d243SChristian Hopps 			ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph);
6796c82d243SChristian Hopps 		if (!(x->props.flags & XFRM_STATE_NOECN))
6806c82d243SChristian Hopps 			if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
6816c82d243SChristian Hopps 				IP_ECN_set_ce(iph);
6826c82d243SChristian Hopps 
6836c82d243SChristian Hopps 		skb->protocol = htons(ETH_P_IP);
6846c82d243SChristian Hopps 	} else {
6856c82d243SChristian Hopps 		struct ipv6hdr *iph = ipv6_hdr(skb);
6866c82d243SChristian Hopps 
6876c82d243SChristian Hopps 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
6886c82d243SChristian Hopps 			ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph);
6896c82d243SChristian Hopps 		if (!(x->props.flags & XFRM_STATE_NOECN))
6906c82d243SChristian Hopps 			if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
6916c82d243SChristian Hopps 				IP6_ECN_set_ce(skb, iph);
6926c82d243SChristian Hopps 
6936c82d243SChristian Hopps 		skb->protocol = htons(ETH_P_IPV6);
6946c82d243SChristian Hopps 	}
6956c82d243SChristian Hopps }
6966c82d243SChristian Hopps 
69707569476SChristian Hopps static void __iptfs_reassem_done(struct xfrm_iptfs_data *xtfs, bool free)
69807569476SChristian Hopps {
69907569476SChristian Hopps 	assert_spin_locked(&xtfs->drop_lock);
70007569476SChristian Hopps 
70107569476SChristian Hopps 	/* We don't care if it works locking takes care of things */
70207569476SChristian Hopps 	hrtimer_try_to_cancel(&xtfs->drop_timer);
70307569476SChristian Hopps 	if (free)
70407569476SChristian Hopps 		kfree_skb(xtfs->ra_newskb);
70507569476SChristian Hopps 	xtfs->ra_newskb = NULL;
70607569476SChristian Hopps }
70707569476SChristian Hopps 
70807569476SChristian Hopps /**
70907569476SChristian Hopps  * iptfs_reassem_abort() - In-progress packet is aborted free the state.
71007569476SChristian Hopps  * @xtfs: xtfs state
71107569476SChristian Hopps  */
71207569476SChristian Hopps static void iptfs_reassem_abort(struct xfrm_iptfs_data *xtfs)
71307569476SChristian Hopps {
71407569476SChristian Hopps 	__iptfs_reassem_done(xtfs, true);
71507569476SChristian Hopps }
71607569476SChristian Hopps 
71707569476SChristian Hopps /**
71807569476SChristian Hopps  * iptfs_reassem_done() - In-progress packet is complete, clear the state.
71907569476SChristian Hopps  * @xtfs: xtfs state
72007569476SChristian Hopps  */
72107569476SChristian Hopps static void iptfs_reassem_done(struct xfrm_iptfs_data *xtfs)
72207569476SChristian Hopps {
72307569476SChristian Hopps 	__iptfs_reassem_done(xtfs, false);
72407569476SChristian Hopps }
72507569476SChristian Hopps 
72607569476SChristian Hopps /**
72707569476SChristian Hopps  * iptfs_reassem_cont() - Continue the reassembly of an inner packets.
72807569476SChristian Hopps  * @xtfs: xtfs state
72907569476SChristian Hopps  * @seq: sequence of current packet
73007569476SChristian Hopps  * @st: seq read stat for current packet
73107569476SChristian Hopps  * @skb: current packet
73207569476SChristian Hopps  * @data: offset into sequential packet data
73307569476SChristian Hopps  * @blkoff: packet blkoff value
73407569476SChristian Hopps  * @list: list of skbs to enqueue completed packet on
73507569476SChristian Hopps  *
73607569476SChristian Hopps  * Process an IPTFS payload that has a non-zero `blkoff` or when we are
73707569476SChristian Hopps  * expecting the continuation b/c we have a runt or in-progress packet.
73807569476SChristian Hopps  *
73907569476SChristian Hopps  * Return: the new data offset to continue processing from.
74007569476SChristian Hopps  */
74107569476SChristian Hopps static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
74207569476SChristian Hopps 			      struct skb_seq_state *st, struct sk_buff *skb,
74307569476SChristian Hopps 			      u32 data, u32 blkoff, struct list_head *list)
74407569476SChristian Hopps {
7455f2b6a90SChristian Hopps 	struct iptfs_skb_frag_walk _fragwalk;
7465f2b6a90SChristian Hopps 	struct iptfs_skb_frag_walk *fragwalk = NULL;
74707569476SChristian Hopps 	struct sk_buff *newskb = xtfs->ra_newskb;
74807569476SChristian Hopps 	u32 remaining = skb->len - data;
74907569476SChristian Hopps 	u32 runtlen = xtfs->ra_runtlen;
75007569476SChristian Hopps 	u32 copylen, fraglen, ipremain, iphlen, iphremain, rrem;
75107569476SChristian Hopps 
75207569476SChristian Hopps 	/* Handle packet fragment we aren't expecting */
75307569476SChristian Hopps 	if (!runtlen && !xtfs->ra_newskb)
75407569476SChristian Hopps 		return data + min(blkoff, remaining);
75507569476SChristian Hopps 
75607569476SChristian Hopps 	/* Important to remember that input to this function is an ordered
75707569476SChristian Hopps 	 * packet stream (unless the user disabled the reorder window). Thus if
75807569476SChristian Hopps 	 * we are waiting for, and expecting the next packet so we can continue
75907569476SChristian Hopps 	 * assembly, a newer sequence number indicates older ones are not coming
76007569476SChristian Hopps 	 * (or if they do should be ignored). Technically we can receive older
76107569476SChristian Hopps 	 * ones when the reorder window is disabled; however, the user should
76207569476SChristian Hopps 	 * have disabled fragmentation in this case, and regardless we don't
76307569476SChristian Hopps 	 * deal with it.
76407569476SChristian Hopps 	 *
76507569476SChristian Hopps 	 * blkoff could be zero if the stream is messed up (or it's an all pad
76607569476SChristian Hopps 	 * insertion) be careful to handle that case in each of the below
76707569476SChristian Hopps 	 */
76807569476SChristian Hopps 
76907569476SChristian Hopps 	/* Too old case: This can happen when the reorder window is disabled so
77007569476SChristian Hopps 	 * ordering isn't actually guaranteed.
77107569476SChristian Hopps 	 */
77207569476SChristian Hopps 	if (seq < xtfs->ra_wantseq)
77307569476SChristian Hopps 		return data + remaining;
77407569476SChristian Hopps 
77507569476SChristian Hopps 	/* Too new case: We missed what we wanted cleanup. */
77607569476SChristian Hopps 	if (seq > xtfs->ra_wantseq) {
77707569476SChristian Hopps 		XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR);
77807569476SChristian Hopps 		goto abandon;
77907569476SChristian Hopps 	}
78007569476SChristian Hopps 
78107569476SChristian Hopps 	if (blkoff == 0) {
78207569476SChristian Hopps 		if ((*skb->data & 0xF0) != 0) {
78307569476SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x),
78407569476SChristian Hopps 				       LINUX_MIB_XFRMINIPTFSERROR);
78507569476SChristian Hopps 			goto abandon;
78607569476SChristian Hopps 		}
78707569476SChristian Hopps 		/* Handle all pad case, advance expected sequence number.
78807569476SChristian Hopps 		 * (RFC 9347 S2.2.3)
78907569476SChristian Hopps 		 */
79007569476SChristian Hopps 		xtfs->ra_wantseq++;
79107569476SChristian Hopps 		/* will end parsing */
79207569476SChristian Hopps 		return data + remaining;
79307569476SChristian Hopps 	}
79407569476SChristian Hopps 
79507569476SChristian Hopps 	if (runtlen) {
79607569476SChristian Hopps 		/* Regardless of what happens we're done with the runt */
79707569476SChristian Hopps 		xtfs->ra_runtlen = 0;
79807569476SChristian Hopps 
79907569476SChristian Hopps 		/* The start of this inner packet was at the very end of the last
80007569476SChristian Hopps 		 * iptfs payload which didn't include enough for the ip header
80107569476SChristian Hopps 		 * length field. We must have *at least* that now.
80207569476SChristian Hopps 		 */
80307569476SChristian Hopps 		rrem = sizeof(xtfs->ra_runt) - runtlen;
80407569476SChristian Hopps 		if (remaining < rrem || blkoff < rrem) {
80507569476SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x),
80607569476SChristian Hopps 				       LINUX_MIB_XFRMINIPTFSERROR);
80707569476SChristian Hopps 			goto abandon;
80807569476SChristian Hopps 		}
80907569476SChristian Hopps 
81007569476SChristian Hopps 		/* fill in the runt data */
81107569476SChristian Hopps 		if (skb_copy_seq_read(st, data, &xtfs->ra_runt[runtlen],
81207569476SChristian Hopps 				      rrem)) {
81307569476SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x),
81407569476SChristian Hopps 				       LINUX_MIB_XFRMINBUFFERERROR);
81507569476SChristian Hopps 			goto abandon;
81607569476SChristian Hopps 		}
81707569476SChristian Hopps 
81807569476SChristian Hopps 		/* We have enough data to get the ip length value now,
81907569476SChristian Hopps 		 * allocate an in progress skb
82007569476SChristian Hopps 		 */
82107569476SChristian Hopps 		ipremain = __iptfs_iplen(xtfs->ra_runt);
82207569476SChristian Hopps 		if (ipremain < sizeof(xtfs->ra_runt)) {
82307569476SChristian Hopps 			/* length has to be at least runtsize large */
82407569476SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x),
82507569476SChristian Hopps 				       LINUX_MIB_XFRMINIPTFSERROR);
82607569476SChristian Hopps 			goto abandon;
82707569476SChristian Hopps 		}
82807569476SChristian Hopps 
82907569476SChristian Hopps 		/* For the runt case we don't attempt sharing currently. NOTE:
83007569476SChristian Hopps 		 * Currently, this IPTFS implementation will not create runts.
83107569476SChristian Hopps 		 */
83207569476SChristian Hopps 
83307569476SChristian Hopps 		newskb = iptfs_alloc_skb(skb, ipremain, false);
83407569476SChristian Hopps 		if (!newskb) {
83507569476SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINERROR);
83607569476SChristian Hopps 			goto abandon;
83707569476SChristian Hopps 		}
83807569476SChristian Hopps 		xtfs->ra_newskb = newskb;
83907569476SChristian Hopps 
84007569476SChristian Hopps 		/* Copy the runt data into the buffer, but leave data
84107569476SChristian Hopps 		 * pointers the same as normal non-runt case. The extra `rrem`
84207569476SChristian Hopps 		 * recopied bytes are basically cacheline free. Allows using
84307569476SChristian Hopps 		 * same logic below to complete.
84407569476SChristian Hopps 		 */
84507569476SChristian Hopps 		memcpy(skb_put(newskb, runtlen), xtfs->ra_runt,
84607569476SChristian Hopps 		       sizeof(xtfs->ra_runt));
84707569476SChristian Hopps 	}
84807569476SChristian Hopps 
84907569476SChristian Hopps 	/* Continue reassembling the packet */
85007569476SChristian Hopps 	ipremain = __iptfs_iplen(newskb->data);
85107569476SChristian Hopps 	iphlen = __iptfs_iphlen(newskb->data);
85207569476SChristian Hopps 
85307569476SChristian Hopps 	ipremain -= newskb->len;
85407569476SChristian Hopps 	if (blkoff < ipremain) {
85507569476SChristian Hopps 		/* Corrupt data, we don't have enough to complete the packet */
85607569476SChristian Hopps 		XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR);
85707569476SChristian Hopps 		goto abandon;
85807569476SChristian Hopps 	}
85907569476SChristian Hopps 
86007569476SChristian Hopps 	/* We want the IP header in linear space */
86107569476SChristian Hopps 	if (newskb->len < iphlen) {
86207569476SChristian Hopps 		iphremain = iphlen - newskb->len;
86307569476SChristian Hopps 		if (blkoff < iphremain) {
86407569476SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x),
86507569476SChristian Hopps 				       LINUX_MIB_XFRMINIPTFSERROR);
86607569476SChristian Hopps 			goto abandon;
86707569476SChristian Hopps 		}
86807569476SChristian Hopps 		fraglen = min(blkoff, remaining);
86907569476SChristian Hopps 		copylen = min(fraglen, iphremain);
87007569476SChristian Hopps 		if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
87107569476SChristian Hopps 				      copylen)) {
87207569476SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x),
87307569476SChristian Hopps 				       LINUX_MIB_XFRMINBUFFERERROR);
87407569476SChristian Hopps 			goto abandon;
87507569476SChristian Hopps 		}
87607569476SChristian Hopps 		/* this is a silly condition that might occur anyway */
87707569476SChristian Hopps 		if (copylen < iphremain) {
87807569476SChristian Hopps 			xtfs->ra_wantseq++;
87907569476SChristian Hopps 			return data + fraglen;
88007569476SChristian Hopps 		}
88107569476SChristian Hopps 		/* update data and things derived from it */
88207569476SChristian Hopps 		data += copylen;
88307569476SChristian Hopps 		blkoff -= copylen;
88407569476SChristian Hopps 		remaining -= copylen;
88507569476SChristian Hopps 		ipremain -= copylen;
88607569476SChristian Hopps 	}
88707569476SChristian Hopps 
88807569476SChristian Hopps 	fraglen = min(blkoff, remaining);
88907569476SChristian Hopps 	copylen = min(fraglen, ipremain);
89007569476SChristian Hopps 
8915f2b6a90SChristian Hopps 	/* If we may have the opportunity to share prepare a fragwalk. */
8925f2b6a90SChristian Hopps 	if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) &&
8935f2b6a90SChristian Hopps 	    (skb->head_frag || skb->len == skb->data_len) &&
8945f2b6a90SChristian Hopps 	    skb->pp_recycle == newskb->pp_recycle) {
8955f2b6a90SChristian Hopps 		fragwalk = &_fragwalk;
8965f2b6a90SChristian Hopps 		iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
8975f2b6a90SChristian Hopps 	}
8985f2b6a90SChristian Hopps 
8995f2b6a90SChristian Hopps 	/* Try share then copy. */
9005f2b6a90SChristian Hopps 	if (fragwalk &&
9015f2b6a90SChristian Hopps 	    iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
9025f2b6a90SChristian Hopps 		iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
9035f2b6a90SChristian Hopps 	} else {
90407569476SChristian Hopps 		/* copy fragment data into newskb */
9055f2b6a90SChristian Hopps 		if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
9065f2b6a90SChristian Hopps 				      copylen)) {
9075f2b6a90SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x),
9085f2b6a90SChristian Hopps 				       LINUX_MIB_XFRMINBUFFERERROR);
90907569476SChristian Hopps 			goto abandon;
91007569476SChristian Hopps 		}
9115f2b6a90SChristian Hopps 	}
91207569476SChristian Hopps 
91307569476SChristian Hopps 	if (copylen < ipremain) {
91407569476SChristian Hopps 		xtfs->ra_wantseq++;
91507569476SChristian Hopps 	} else {
91607569476SChristian Hopps 		/* We are done with packet reassembly! */
91707569476SChristian Hopps 		iptfs_reassem_done(xtfs);
91807569476SChristian Hopps 		iptfs_complete_inner_skb(xtfs->x, newskb);
91907569476SChristian Hopps 		list_add_tail(&newskb->list, list);
92007569476SChristian Hopps 	}
92107569476SChristian Hopps 
92207569476SChristian Hopps 	/* will continue on to new data block or end */
92307569476SChristian Hopps 	return data + fraglen;
92407569476SChristian Hopps 
92507569476SChristian Hopps abandon:
92607569476SChristian Hopps 	if (xtfs->ra_newskb) {
92707569476SChristian Hopps 		iptfs_reassem_abort(xtfs);
92807569476SChristian Hopps 	} else {
92907569476SChristian Hopps 		xtfs->ra_runtlen = 0;
93007569476SChristian Hopps 		xtfs->ra_wantseq = 0;
93107569476SChristian Hopps 	}
93207569476SChristian Hopps 	/* skip past fragment, maybe to end */
93307569476SChristian Hopps 	return data + min(blkoff, remaining);
93407569476SChristian Hopps }
93507569476SChristian Hopps 
9366c82d243SChristian Hopps static bool __input_process_payload(struct xfrm_state *x, u32 data,
9376c82d243SChristian Hopps 				    struct skb_seq_state *skbseq,
9386c82d243SChristian Hopps 				    struct list_head *sublist)
9396c82d243SChristian Hopps {
9406c82d243SChristian Hopps 	u8 hbytes[sizeof(struct ipv6hdr)];
9415f2b6a90SChristian Hopps 	struct iptfs_skb_frag_walk _fragwalk;
9425f2b6a90SChristian Hopps 	struct iptfs_skb_frag_walk *fragwalk = NULL;
9433f333988SChristian Hopps 	struct sk_buff *defer, *first_skb, *next, *skb;
9446c82d243SChristian Hopps 	const unsigned char *old_mac;
94507569476SChristian Hopps 	struct xfrm_iptfs_data *xtfs;
9466c82d243SChristian Hopps 	struct iphdr *iph;
9476c82d243SChristian Hopps 	struct net *net;
9483f333988SChristian Hopps 	u32 first_iplen, iphlen, iplen, remaining, tail;
94907569476SChristian Hopps 	u32 capturelen;
95007569476SChristian Hopps 	u64 seq;
9516c82d243SChristian Hopps 
95207569476SChristian Hopps 	xtfs = x->mode_data;
9536c82d243SChristian Hopps 	net = xs_net(x);
9546c82d243SChristian Hopps 	skb = skbseq->root_skb;
9556c82d243SChristian Hopps 	first_skb = NULL;
9563f333988SChristian Hopps 	defer = NULL;
9576c82d243SChristian Hopps 
95807569476SChristian Hopps 	seq = __esp_seq(skb);
95907569476SChristian Hopps 
9606c82d243SChristian Hopps 	/* Save the old mac header if set */
9616c82d243SChristian Hopps 	old_mac = skb_mac_header_was_set(skb) ? skb_mac_header(skb) : NULL;
9626c82d243SChristian Hopps 
9636c82d243SChristian Hopps 	/* New packets */
9646c82d243SChristian Hopps 
9656c82d243SChristian Hopps 	tail = skb->len;
9666c82d243SChristian Hopps 	while (data < tail) {
9676c82d243SChristian Hopps 		__be16 protocol = 0;
9686c82d243SChristian Hopps 
9696c82d243SChristian Hopps 		/* Gather information on the next data block.
9706c82d243SChristian Hopps 		 * `data` points to the start of the data block.
9716c82d243SChristian Hopps 		 */
9726c82d243SChristian Hopps 		remaining = tail - data;
9736c82d243SChristian Hopps 
9746c82d243SChristian Hopps 		/* try and copy enough bytes to read length from ipv4/ipv6 */
9756c82d243SChristian Hopps 		iphlen = min_t(u32, remaining, 6);
9766c82d243SChristian Hopps 		if (skb_copy_seq_read(skbseq, data, hbytes, iphlen)) {
9776c82d243SChristian Hopps 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
9786c82d243SChristian Hopps 			goto done;
9796c82d243SChristian Hopps 		}
9806c82d243SChristian Hopps 
9816c82d243SChristian Hopps 		iph = (struct iphdr *)hbytes;
9826c82d243SChristian Hopps 		if (iph->version == 0x4) {
9836c82d243SChristian Hopps 			/* must have at least tot_len field present */
98407569476SChristian Hopps 			if (remaining < 4) {
98507569476SChristian Hopps 				/* save the bytes we have, advance data and exit */
98607569476SChristian Hopps 				iptfs_input_save_runt(xtfs, seq, hbytes,
98707569476SChristian Hopps 						      remaining);
98807569476SChristian Hopps 				data += remaining;
9896c82d243SChristian Hopps 				break;
99007569476SChristian Hopps 			}
9916c82d243SChristian Hopps 
9926c82d243SChristian Hopps 			iplen = be16_to_cpu(iph->tot_len);
9936c82d243SChristian Hopps 			iphlen = iph->ihl << 2;
9946c82d243SChristian Hopps 			protocol = cpu_to_be16(ETH_P_IP);
9956c82d243SChristian Hopps 			XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos;
9966c82d243SChristian Hopps 		} else if (iph->version == 0x6) {
9976c82d243SChristian Hopps 			/* must have at least payload_len field present */
99807569476SChristian Hopps 			if (remaining < 6) {
99907569476SChristian Hopps 				/* save the bytes we have, advance data and exit */
100007569476SChristian Hopps 				iptfs_input_save_runt(xtfs, seq, hbytes,
100107569476SChristian Hopps 						      remaining);
100207569476SChristian Hopps 				data += remaining;
10036c82d243SChristian Hopps 				break;
100407569476SChristian Hopps 			}
10056c82d243SChristian Hopps 
10066c82d243SChristian Hopps 			iplen = be16_to_cpu(((struct ipv6hdr *)hbytes)->payload_len);
10076c82d243SChristian Hopps 			iplen += sizeof(struct ipv6hdr);
10086c82d243SChristian Hopps 			iphlen = sizeof(struct ipv6hdr);
10096c82d243SChristian Hopps 			protocol = cpu_to_be16(ETH_P_IPV6);
10106c82d243SChristian Hopps 			XFRM_MODE_SKB_CB(skbseq->root_skb)->tos =
10116c82d243SChristian Hopps 				ipv6_get_dsfield((struct ipv6hdr *)iph);
10126c82d243SChristian Hopps 		} else if (iph->version == 0x0) {
10136c82d243SChristian Hopps 			/* pad */
101407569476SChristian Hopps 			data = tail;
10156c82d243SChristian Hopps 			break;
10166c82d243SChristian Hopps 		} else {
10176c82d243SChristian Hopps 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
10186c82d243SChristian Hopps 			goto done;
10196c82d243SChristian Hopps 		}
10206c82d243SChristian Hopps 
10216c82d243SChristian Hopps 		if (unlikely(skbseq->stepped_offset)) {
10226c82d243SChristian Hopps 			/* We need to reset our seq read, it can't backup at
10236c82d243SChristian Hopps 			 * this point.
10246c82d243SChristian Hopps 			 */
10256c82d243SChristian Hopps 			struct sk_buff *save = skbseq->root_skb;
10266c82d243SChristian Hopps 
10276c82d243SChristian Hopps 			skb_abort_seq_read(skbseq);
10286c82d243SChristian Hopps 			skb_prepare_seq_read(save, data, tail, skbseq);
10296c82d243SChristian Hopps 		}
10306c82d243SChristian Hopps 
10313f333988SChristian Hopps 		if (first_skb) {
10323f333988SChristian Hopps 			skb = NULL;
10333f333988SChristian Hopps 		} else {
10346c82d243SChristian Hopps 			first_skb = skb;
10353f333988SChristian Hopps 			first_iplen = iplen;
10365f2b6a90SChristian Hopps 			fragwalk = NULL;
10373f333988SChristian Hopps 
10383f333988SChristian Hopps 			/* We are going to skip over `data` bytes to reach the
10393f333988SChristian Hopps 			 * start of the IP header of `iphlen` len for `iplen`
10403f333988SChristian Hopps 			 * inner packet.
10413f333988SChristian Hopps 			 */
10423f333988SChristian Hopps 
10433f333988SChristian Hopps 			if (skb_has_frag_list(skb)) {
10443f333988SChristian Hopps 				defer = skb;
10453f333988SChristian Hopps 				skb = NULL;
10463f333988SChristian Hopps 			} else if (data + iphlen <= skb_headlen(skb) &&
10473f333988SChristian Hopps 				   /* make sure our header is 32-bit aligned? */
10483f333988SChristian Hopps 				   /* ((uintptr_t)(skb->data + data) & 0x3) == 0 && */
10493f333988SChristian Hopps 				   skb_tailroom(skb) + tail - data >= iplen) {
10503f333988SChristian Hopps 				/* Reuse the received skb.
10513f333988SChristian Hopps 				 *
10523f333988SChristian Hopps 				 * We have enough headlen to pull past any
10533f333988SChristian Hopps 				 * initial fragment data, leaving at least the
10543f333988SChristian Hopps 				 * IP header in the linear buffer space.
10553f333988SChristian Hopps 				 *
10563f333988SChristian Hopps 				 * For linear buffer space we only require that
10573f333988SChristian Hopps 				 * linear buffer space is large enough to
10583f333988SChristian Hopps 				 * eventually hold the entire reassembled
10593f333988SChristian Hopps 				 * packet (by including tailroom in the check).
10603f333988SChristian Hopps 				 *
10613f333988SChristian Hopps 				 * For non-linear tailroom is 0 and so we only
10623f333988SChristian Hopps 				 * re-use if the entire packet is present
10633f333988SChristian Hopps 				 * already.
10643f333988SChristian Hopps 				 *
10653f333988SChristian Hopps 				 * NOTE: there are many more options for
10663f333988SChristian Hopps 				 * sharing, KISS for now. Also, this can produce
10673f333988SChristian Hopps 				 * skb's with the IP header unaligned to 32
10683f333988SChristian Hopps 				 * bits. If that ends up being a problem then a
10693f333988SChristian Hopps 				 * check should be added to the conditional
10703f333988SChristian Hopps 				 * above that the header lies on a 32-bit
10713f333988SChristian Hopps 				 * boundary as well.
10723f333988SChristian Hopps 				 */
10733f333988SChristian Hopps 				skb_pull(skb, data);
10743f333988SChristian Hopps 
10753f333988SChristian Hopps 				/* our range just changed */
10763f333988SChristian Hopps 				data = 0;
10773f333988SChristian Hopps 				tail = skb->len;
10783f333988SChristian Hopps 				remaining = skb->len;
10793f333988SChristian Hopps 
10803f333988SChristian Hopps 				skb->protocol = protocol;
10813f333988SChristian Hopps 				skb_mac_header_rebuild(skb);
10823f333988SChristian Hopps 				if (skb->mac_len)
10833f333988SChristian Hopps 					eth_hdr(skb)->h_proto = skb->protocol;
10843f333988SChristian Hopps 
10853f333988SChristian Hopps 				/* all pointers could be changed now reset walk */
10863f333988SChristian Hopps 				skb_abort_seq_read(skbseq);
10873f333988SChristian Hopps 				skb_prepare_seq_read(skb, data, tail, skbseq);
10885f2b6a90SChristian Hopps 			} else if (skb->head_frag &&
10895f2b6a90SChristian Hopps 				   /* We have the IP header right now */
10905f2b6a90SChristian Hopps 				   remaining >= iphlen) {
10915f2b6a90SChristian Hopps 				fragwalk = &_fragwalk;
10925f2b6a90SChristian Hopps 				iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
10935f2b6a90SChristian Hopps 				defer = skb;
10945f2b6a90SChristian Hopps 				skb = NULL;
10953f333988SChristian Hopps 			} else {
10963f333988SChristian Hopps 				/* We couldn't reuse the input skb so allocate a
10973f333988SChristian Hopps 				 * new one.
10983f333988SChristian Hopps 				 */
10993f333988SChristian Hopps 				defer = skb;
11003f333988SChristian Hopps 				skb = NULL;
11013f333988SChristian Hopps 			}
11023f333988SChristian Hopps 
11033f333988SChristian Hopps 			/* Don't trim `first_skb` until the end as we are
11043f333988SChristian Hopps 			 * walking that data now.
11053f333988SChristian Hopps 			 */
11063f333988SChristian Hopps 		}
11076c82d243SChristian Hopps 
110807569476SChristian Hopps 		capturelen = min(iplen, remaining);
11093f333988SChristian Hopps 		if (!skb) {
11105f2b6a90SChristian Hopps 			if (!fragwalk ||
11115f2b6a90SChristian Hopps 			    /* Large enough to be worth sharing */
11125f2b6a90SChristian Hopps 			    iplen < IPTFS_PKT_SHARE_MIN ||
11135f2b6a90SChristian Hopps 			    /* Have IP header + some data to share. */
11145f2b6a90SChristian Hopps 			    capturelen <= iphlen ||
11155f2b6a90SChristian Hopps 			    /* Try creating skb and adding frags */
11165f2b6a90SChristian Hopps 			    !(skb = iptfs_pskb_add_frags(first_skb, fragwalk,
11175f2b6a90SChristian Hopps 							 data, capturelen,
11185f2b6a90SChristian Hopps 							 skbseq, iphlen))) {
11195f2b6a90SChristian Hopps 				skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen);
11205f2b6a90SChristian Hopps 			}
11216c82d243SChristian Hopps 			if (!skb) {
11226c82d243SChristian Hopps 				/* skip to next packet or done */
112307569476SChristian Hopps 				data += capturelen;
11246c82d243SChristian Hopps 				continue;
11256c82d243SChristian Hopps 			}
11266c82d243SChristian Hopps 
11276c82d243SChristian Hopps 			skb->protocol = protocol;
11286c82d243SChristian Hopps 			if (old_mac) {
11296c82d243SChristian Hopps 				/* rebuild the mac header */
11306c82d243SChristian Hopps 				skb_set_mac_header(skb, -first_skb->mac_len);
11316c82d243SChristian Hopps 				memcpy(skb_mac_header(skb), old_mac, first_skb->mac_len);
11326c82d243SChristian Hopps 				eth_hdr(skb)->h_proto = skb->protocol;
11336c82d243SChristian Hopps 			}
11343f333988SChristian Hopps 		}
11356c82d243SChristian Hopps 
113607569476SChristian Hopps 		data += capturelen;
113707569476SChristian Hopps 
113807569476SChristian Hopps 		if (skb->len < iplen) {
113907569476SChristian Hopps 			/* Start reassembly */
114007569476SChristian Hopps 			spin_lock(&xtfs->drop_lock);
114107569476SChristian Hopps 
114207569476SChristian Hopps 			xtfs->ra_newskb = skb;
114307569476SChristian Hopps 			xtfs->ra_wantseq = seq + 1;
114407569476SChristian Hopps 			if (!hrtimer_is_queued(&xtfs->drop_timer)) {
114507569476SChristian Hopps 				/* softirq blocked lest the timer fire and interrupt us */
114607569476SChristian Hopps 				hrtimer_start(&xtfs->drop_timer,
114707569476SChristian Hopps 					      xtfs->drop_time_ns,
114807569476SChristian Hopps 					      IPTFS_HRTIMER_MODE);
114907569476SChristian Hopps 			}
115007569476SChristian Hopps 
115107569476SChristian Hopps 			spin_unlock(&xtfs->drop_lock);
115207569476SChristian Hopps 
115307569476SChristian Hopps 			break;
115407569476SChristian Hopps 		}
115507569476SChristian Hopps 
11566c82d243SChristian Hopps 		iptfs_complete_inner_skb(x, skb);
11576c82d243SChristian Hopps 		list_add_tail(&skb->list, sublist);
11586c82d243SChristian Hopps 	}
11596c82d243SChristian Hopps 
116007569476SChristian Hopps 	if (data != tail)
116107569476SChristian Hopps 		/* this should not happen from the above code */
116207569476SChristian Hopps 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINIPTFSERROR);
116307569476SChristian Hopps 
11643f333988SChristian Hopps 	if (first_skb && first_iplen && !defer && first_skb != xtfs->ra_newskb) {
11653f333988SChristian Hopps 		/* first_skb is queued b/c !defer and not partial */
11663f333988SChristian Hopps 		if (pskb_trim(first_skb, first_iplen)) {
11673f333988SChristian Hopps 			/* error trimming */
11683f333988SChristian Hopps 			list_del(&first_skb->list);
11693f333988SChristian Hopps 			defer = first_skb;
11703f333988SChristian Hopps 		}
11713f333988SChristian Hopps 		first_skb->ip_summed = CHECKSUM_NONE;
11723f333988SChristian Hopps 	}
11733f333988SChristian Hopps 
11746c82d243SChristian Hopps 	/* Send the packets! */
11756c82d243SChristian Hopps 	list_for_each_entry_safe(skb, next, sublist, list) {
11766c82d243SChristian Hopps 		skb_list_del_init(skb);
11776c82d243SChristian Hopps 		if (xfrm_input(skb, 0, 0, -2))
11786c82d243SChristian Hopps 			kfree_skb(skb);
11796c82d243SChristian Hopps 	}
11806c82d243SChristian Hopps done:
11813f333988SChristian Hopps 	skb = skbseq->root_skb;
11823f333988SChristian Hopps 	skb_abort_seq_read(skbseq);
11833f333988SChristian Hopps 
11843f333988SChristian Hopps 	if (defer) {
11853f333988SChristian Hopps 		consume_skb(defer);
11863f333988SChristian Hopps 	} else if (!first_skb) {
11873f333988SChristian Hopps 		/* skb is the original passed in skb, but we didn't get far
11883f333988SChristian Hopps 		 * enough to process it as the first_skb, if we had it would
11893f333988SChristian Hopps 		 * either be save in ra_newskb, trimmed and sent on as an skb or
11903f333988SChristian Hopps 		 * placed in defer to be freed.
11913f333988SChristian Hopps 		 */
11923f333988SChristian Hopps 		kfree_skb(skb);
11933f333988SChristian Hopps 	}
11943f333988SChristian Hopps 	return true;
11956c82d243SChristian Hopps }
11966c82d243SChristian Hopps 
11976c82d243SChristian Hopps /**
11986be02e3eSChristian Hopps  * iptfs_input_ordered() - handle next in order IPTFS payload.
11996c82d243SChristian Hopps  * @x: xfrm state
12006be02e3eSChristian Hopps  * @skb: current packet
12016c82d243SChristian Hopps  *
12026c82d243SChristian Hopps  * Process the IPTFS payload in `skb` and consume it afterwards.
12036c82d243SChristian Hopps  */
12046be02e3eSChristian Hopps static void iptfs_input_ordered(struct xfrm_state *x, struct sk_buff *skb)
12056c82d243SChristian Hopps {
12066c82d243SChristian Hopps 	struct ip_iptfs_cc_hdr iptcch;
12076c82d243SChristian Hopps 	struct skb_seq_state skbseq;
12086c82d243SChristian Hopps 	struct list_head sublist; /* rename this it's just a list */
120907569476SChristian Hopps 	struct xfrm_iptfs_data *xtfs;
12106c82d243SChristian Hopps 	struct ip_iptfs_hdr *ipth;
12116c82d243SChristian Hopps 	struct net *net;
121207569476SChristian Hopps 	u32 blkoff, data, remaining;
12136c82d243SChristian Hopps 	bool consumed = false;
121407569476SChristian Hopps 	u64 seq;
12156c82d243SChristian Hopps 
121607569476SChristian Hopps 	xtfs = x->mode_data;
12176c82d243SChristian Hopps 	net = xs_net(x);
12186c82d243SChristian Hopps 
121907569476SChristian Hopps 	seq = __esp_seq(skb);
122007569476SChristian Hopps 
12216c82d243SChristian Hopps 	/* Large enough to hold both types of header */
12226c82d243SChristian Hopps 	ipth = (struct ip_iptfs_hdr *)&iptcch;
12236c82d243SChristian Hopps 
12246c82d243SChristian Hopps 	skb_prepare_seq_read(skb, 0, skb->len, &skbseq);
12256c82d243SChristian Hopps 
12266c82d243SChristian Hopps 	/* Get the IPTFS header and validate it */
12276c82d243SChristian Hopps 
12286c82d243SChristian Hopps 	if (skb_copy_seq_read(&skbseq, 0, ipth, sizeof(*ipth))) {
12296c82d243SChristian Hopps 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
12306c82d243SChristian Hopps 		goto done;
12316c82d243SChristian Hopps 	}
12326c82d243SChristian Hopps 	data = sizeof(*ipth);
12336c82d243SChristian Hopps 
1234ed58b186SChristian Hopps 	trace_iptfs_egress_recv(skb, xtfs, be16_to_cpu(ipth->block_offset));
1235ed58b186SChristian Hopps 
12366c82d243SChristian Hopps 	/* Set data past the basic header */
12376c82d243SChristian Hopps 	if (ipth->subtype == IPTFS_SUBTYPE_CC) {
12386c82d243SChristian Hopps 		/* Copy the rest of the CC header */
12396c82d243SChristian Hopps 		remaining = sizeof(iptcch) - sizeof(*ipth);
12406c82d243SChristian Hopps 		if (skb_copy_seq_read(&skbseq, data, ipth + 1, remaining)) {
12416c82d243SChristian Hopps 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
12426c82d243SChristian Hopps 			goto done;
12436c82d243SChristian Hopps 		}
12446c82d243SChristian Hopps 		data += remaining;
12456c82d243SChristian Hopps 	} else if (ipth->subtype != IPTFS_SUBTYPE_BASIC) {
12466c82d243SChristian Hopps 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
12476c82d243SChristian Hopps 		goto done;
12486c82d243SChristian Hopps 	}
12496c82d243SChristian Hopps 
12506c82d243SChristian Hopps 	if (ipth->flags != 0) {
12516c82d243SChristian Hopps 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
12526c82d243SChristian Hopps 		goto done;
12536c82d243SChristian Hopps 	}
12546c82d243SChristian Hopps 
12556c82d243SChristian Hopps 	INIT_LIST_HEAD(&sublist);
12566c82d243SChristian Hopps 
125707569476SChristian Hopps 	/* Handle fragment at start of payload, and/or waiting reassembly. */
125807569476SChristian Hopps 
125907569476SChristian Hopps 	blkoff = ntohs(ipth->block_offset);
126007569476SChristian Hopps 	/* check before locking i.e., maybe */
126107569476SChristian Hopps 	if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) {
126207569476SChristian Hopps 		spin_lock(&xtfs->drop_lock);
126307569476SChristian Hopps 
126407569476SChristian Hopps 		/* check again after lock */
126507569476SChristian Hopps 		if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) {
126607569476SChristian Hopps 			data = iptfs_reassem_cont(xtfs, seq, &skbseq, skb, data,
126707569476SChristian Hopps 						  blkoff, &sublist);
126807569476SChristian Hopps 		}
126907569476SChristian Hopps 
127007569476SChristian Hopps 		spin_unlock(&xtfs->drop_lock);
127107569476SChristian Hopps 	}
12726c82d243SChristian Hopps 
12736c82d243SChristian Hopps 	/* New packets */
12746c82d243SChristian Hopps 	consumed = __input_process_payload(x, data, &skbseq, &sublist);
12756c82d243SChristian Hopps done:
127607569476SChristian Hopps 	if (!consumed) {
12776c82d243SChristian Hopps 		skb = skbseq.root_skb;
12786c82d243SChristian Hopps 		skb_abort_seq_read(&skbseq);
12796c82d243SChristian Hopps 		kfree_skb(skb);
128007569476SChristian Hopps 	}
12816be02e3eSChristian Hopps }
12826c82d243SChristian Hopps 
12836be02e3eSChristian Hopps /* ------------------------------- */
12846be02e3eSChristian Hopps /* Input (Egress) Re-ordering Code */
12856be02e3eSChristian Hopps /* ------------------------------- */
12866be02e3eSChristian Hopps 
12876be02e3eSChristian Hopps static void __vec_shift(struct xfrm_iptfs_data *xtfs, u32 shift)
12886be02e3eSChristian Hopps {
12896be02e3eSChristian Hopps 	u32 savedlen = xtfs->w_savedlen;
12906be02e3eSChristian Hopps 
12916be02e3eSChristian Hopps 	if (shift > savedlen)
12926be02e3eSChristian Hopps 		shift = savedlen;
12936be02e3eSChristian Hopps 	if (shift != savedlen)
12946be02e3eSChristian Hopps 		memcpy(xtfs->w_saved, xtfs->w_saved + shift,
12956be02e3eSChristian Hopps 		       (savedlen - shift) * sizeof(*xtfs->w_saved));
12966be02e3eSChristian Hopps 	memset(xtfs->w_saved + savedlen - shift, 0,
12976be02e3eSChristian Hopps 	       shift * sizeof(*xtfs->w_saved));
12986be02e3eSChristian Hopps 	xtfs->w_savedlen -= shift;
12996be02e3eSChristian Hopps }
13006be02e3eSChristian Hopps 
13016be02e3eSChristian Hopps static void __reorder_past(struct xfrm_iptfs_data *xtfs, struct sk_buff *inskb,
13026be02e3eSChristian Hopps 			   struct list_head *freelist)
13036be02e3eSChristian Hopps {
13046be02e3eSChristian Hopps 	list_add_tail(&inskb->list, freelist);
13056be02e3eSChristian Hopps }
13066be02e3eSChristian Hopps 
13076be02e3eSChristian Hopps static u32 __reorder_drop(struct xfrm_iptfs_data *xtfs, struct list_head *list)
13086be02e3eSChristian Hopps 
13096be02e3eSChristian Hopps {
13106be02e3eSChristian Hopps 	struct skb_wseq *s, *se;
13116be02e3eSChristian Hopps 	const u32 savedlen = xtfs->w_savedlen;
13126be02e3eSChristian Hopps 	time64_t now = ktime_get_raw_fast_ns();
13136be02e3eSChristian Hopps 	u32 count = 0;
13146be02e3eSChristian Hopps 	u32 scount = 0;
13156be02e3eSChristian Hopps 
13166be02e3eSChristian Hopps 	if (xtfs->w_saved[0].drop_time > now)
13176be02e3eSChristian Hopps 		goto set_timer;
13186be02e3eSChristian Hopps 
13196be02e3eSChristian Hopps 	++xtfs->w_wantseq;
13206be02e3eSChristian Hopps 
13216be02e3eSChristian Hopps 	/* Keep flushing packets until we reach a drop time greater than now. */
13226be02e3eSChristian Hopps 	s = xtfs->w_saved;
13236be02e3eSChristian Hopps 	se = s + savedlen;
13246be02e3eSChristian Hopps 	do {
13256be02e3eSChristian Hopps 		/* Walking past empty slots until we reach a packet */
13266be02e3eSChristian Hopps 		for (; s < se && !s->skb; s++) {
13276be02e3eSChristian Hopps 			if (s->drop_time > now)
13286be02e3eSChristian Hopps 				goto outerdone;
13296be02e3eSChristian Hopps 		}
13306be02e3eSChristian Hopps 		/* Sending packets until we hit another empty slot. */
13316be02e3eSChristian Hopps 		for (; s < se && s->skb; scount++, s++)
13326be02e3eSChristian Hopps 			list_add_tail(&s->skb->list, list);
13336be02e3eSChristian Hopps 	} while (s < se);
13346be02e3eSChristian Hopps outerdone:
13356be02e3eSChristian Hopps 
13366be02e3eSChristian Hopps 	count = s - xtfs->w_saved;
13376be02e3eSChristian Hopps 	if (count) {
13386be02e3eSChristian Hopps 		xtfs->w_wantseq += count;
13396be02e3eSChristian Hopps 
13406be02e3eSChristian Hopps 		/* Shift handled slots plus final empty slot into slot 0. */
13416be02e3eSChristian Hopps 		__vec_shift(xtfs, count);
13426be02e3eSChristian Hopps 	}
13436be02e3eSChristian Hopps 
13446be02e3eSChristian Hopps 	if (xtfs->w_savedlen) {
13456be02e3eSChristian Hopps set_timer:
13466be02e3eSChristian Hopps 		/* Drifting is OK */
13476be02e3eSChristian Hopps 		hrtimer_start(&xtfs->drop_timer,
13486be02e3eSChristian Hopps 			      xtfs->w_saved[0].drop_time - now,
13496be02e3eSChristian Hopps 			      IPTFS_HRTIMER_MODE);
13506be02e3eSChristian Hopps 	}
13516be02e3eSChristian Hopps 	return scount;
13526be02e3eSChristian Hopps }
13536be02e3eSChristian Hopps 
13546be02e3eSChristian Hopps static void __reorder_this(struct xfrm_iptfs_data *xtfs, struct sk_buff *inskb,
13556be02e3eSChristian Hopps 			   struct list_head *list)
13566be02e3eSChristian Hopps {
13576be02e3eSChristian Hopps 	struct skb_wseq *s, *se;
13586be02e3eSChristian Hopps 	const u32 savedlen = xtfs->w_savedlen;
13596be02e3eSChristian Hopps 	u32 count = 0;
13606be02e3eSChristian Hopps 
13616be02e3eSChristian Hopps 	/* Got what we wanted. */
13626be02e3eSChristian Hopps 	list_add_tail(&inskb->list, list);
13636be02e3eSChristian Hopps 	++xtfs->w_wantseq;
13646be02e3eSChristian Hopps 	if (!savedlen)
13656be02e3eSChristian Hopps 		return;
13666be02e3eSChristian Hopps 
13676be02e3eSChristian Hopps 	/* Flush remaining consecutive packets. */
13686be02e3eSChristian Hopps 
13696be02e3eSChristian Hopps 	/* Keep sending until we hit another missed pkt. */
13706be02e3eSChristian Hopps 	for (s = xtfs->w_saved, se = s + savedlen; s < se && s->skb; s++)
13716be02e3eSChristian Hopps 		list_add_tail(&s->skb->list, list);
13726be02e3eSChristian Hopps 	count = s - xtfs->w_saved;
13736be02e3eSChristian Hopps 	if (count)
13746be02e3eSChristian Hopps 		xtfs->w_wantseq += count;
13756be02e3eSChristian Hopps 
13766be02e3eSChristian Hopps 	/* Shift handled slots plus final empty slot into slot 0. */
13776be02e3eSChristian Hopps 	__vec_shift(xtfs, count + 1);
13786be02e3eSChristian Hopps }
13796be02e3eSChristian Hopps 
13806be02e3eSChristian Hopps /* Set the slot's drop time and all the empty slots below it until reaching a
13816be02e3eSChristian Hopps  * filled slot which will already be set.
13826c82d243SChristian Hopps  */
13836be02e3eSChristian Hopps static void iptfs_set_window_drop_times(struct xfrm_iptfs_data *xtfs, int index)
13846be02e3eSChristian Hopps {
13856be02e3eSChristian Hopps 	const u32 savedlen = xtfs->w_savedlen;
13866be02e3eSChristian Hopps 	struct skb_wseq *s = xtfs->w_saved;
13876be02e3eSChristian Hopps 	time64_t drop_time;
13886be02e3eSChristian Hopps 
13896be02e3eSChristian Hopps 	assert_spin_locked(&xtfs->drop_lock);
13906be02e3eSChristian Hopps 
13916be02e3eSChristian Hopps 	if (savedlen > index + 1) {
13926be02e3eSChristian Hopps 		/* we are below another, our drop time and the timer are already set */
13936be02e3eSChristian Hopps 		return;
13946be02e3eSChristian Hopps 	}
13956be02e3eSChristian Hopps 	/* we are the most future so get a new drop time. */
13966be02e3eSChristian Hopps 	drop_time = ktime_get_raw_fast_ns();
13976be02e3eSChristian Hopps 	drop_time += xtfs->drop_time_ns;
13986be02e3eSChristian Hopps 
13996be02e3eSChristian Hopps 	/* Walk back through the array setting drop times as we go */
14006be02e3eSChristian Hopps 	s[index].drop_time = drop_time;
14016be02e3eSChristian Hopps 	while (index-- > 0 && !s[index].skb)
14026be02e3eSChristian Hopps 		s[index].drop_time = drop_time;
14036be02e3eSChristian Hopps 
14046be02e3eSChristian Hopps 	/* If we walked all the way back, schedule the drop timer if needed */
14056be02e3eSChristian Hopps 	if (index == -1 && !hrtimer_is_queued(&xtfs->drop_timer))
14066be02e3eSChristian Hopps 		hrtimer_start(&xtfs->drop_timer, xtfs->drop_time_ns,
14076be02e3eSChristian Hopps 			      IPTFS_HRTIMER_MODE);
14086be02e3eSChristian Hopps }
14096be02e3eSChristian Hopps 
14106be02e3eSChristian Hopps static void __reorder_future_fits(struct xfrm_iptfs_data *xtfs,
14116be02e3eSChristian Hopps 				  struct sk_buff *inskb,
14126be02e3eSChristian Hopps 				  struct list_head *freelist)
14136be02e3eSChristian Hopps {
14146be02e3eSChristian Hopps 	const u64 inseq = __esp_seq(inskb);
14156be02e3eSChristian Hopps 	const u64 wantseq = xtfs->w_wantseq;
14166be02e3eSChristian Hopps 	const u64 distance = inseq - wantseq;
14176be02e3eSChristian Hopps 	const u32 savedlen = xtfs->w_savedlen;
14186be02e3eSChristian Hopps 	const u32 index = distance - 1;
14196be02e3eSChristian Hopps 
14206be02e3eSChristian Hopps 	/* Handle future sequence number received which fits in the window.
14216be02e3eSChristian Hopps 	 *
14226be02e3eSChristian Hopps 	 * We know we don't have the seq we want so we won't be able to flush
14236be02e3eSChristian Hopps 	 * anything.
14246be02e3eSChristian Hopps 	 */
14256be02e3eSChristian Hopps 
14266be02e3eSChristian Hopps 	/* slot count is 4, saved size is 3 savedlen is 2
14276be02e3eSChristian Hopps 	 *
14286be02e3eSChristian Hopps 	 * "window boundary" is based on the fixed window size
14296be02e3eSChristian Hopps 	 * distance is also slot number
14306be02e3eSChristian Hopps 	 * index is an array index (i.e., - 1 of slot)
14316be02e3eSChristian Hopps 	 * : : - implicit NULL after array len
14326be02e3eSChristian Hopps 	 *
14336be02e3eSChristian Hopps 	 *          +--------- used length (savedlen == 2)
14346be02e3eSChristian Hopps 	 *          |   +----- array size (nslots - 1 == 3)
14356be02e3eSChristian Hopps 	 *          |   |   + window boundary (nslots == 4)
14366be02e3eSChristian Hopps 	 *          V   V | V
14376be02e3eSChristian Hopps 	 *                |
14386be02e3eSChristian Hopps 	 *  0   1   2   3 |   slot number
14396be02e3eSChristian Hopps 	 * ---  0   1   2 |   array index
14406be02e3eSChristian Hopps 	 *     [-] [b] : :|   array
14416be02e3eSChristian Hopps 	 *
14426be02e3eSChristian Hopps 	 * "2" "3" "4" *5*|   seq numbers
14436be02e3eSChristian Hopps 	 *
14446be02e3eSChristian Hopps 	 * We receive seq number 5
14456be02e3eSChristian Hopps 	 * distance == 3 [inseq(5) - w_wantseq(2)]
14466be02e3eSChristian Hopps 	 * index == 2 [distance(6) - 1]
14476be02e3eSChristian Hopps 	 */
14486be02e3eSChristian Hopps 
14496be02e3eSChristian Hopps 	if (xtfs->w_saved[index].skb) {
14506be02e3eSChristian Hopps 		/* a dup of a future */
14516be02e3eSChristian Hopps 		list_add_tail(&inskb->list, freelist);
14526be02e3eSChristian Hopps 		return;
14536be02e3eSChristian Hopps 	}
14546be02e3eSChristian Hopps 
14556be02e3eSChristian Hopps 	xtfs->w_saved[index].skb = inskb;
14566be02e3eSChristian Hopps 	xtfs->w_savedlen = max(savedlen, index + 1);
14576be02e3eSChristian Hopps 	iptfs_set_window_drop_times(xtfs, index);
14586be02e3eSChristian Hopps }
14596be02e3eSChristian Hopps 
14606be02e3eSChristian Hopps static void __reorder_future_shifts(struct xfrm_iptfs_data *xtfs,
14616be02e3eSChristian Hopps 				    struct sk_buff *inskb,
14626be02e3eSChristian Hopps 				    struct list_head *list)
14636be02e3eSChristian Hopps {
14646be02e3eSChristian Hopps 	const u32 nslots = xtfs->cfg.reorder_win_size + 1;
14656be02e3eSChristian Hopps 	const u64 inseq = __esp_seq(inskb);
14666be02e3eSChristian Hopps 	u32 savedlen = xtfs->w_savedlen;
14676be02e3eSChristian Hopps 	u64 wantseq = xtfs->w_wantseq;
14686be02e3eSChristian Hopps 	struct skb_wseq *wnext;
14696be02e3eSChristian Hopps 	struct sk_buff *slot0;
14706be02e3eSChristian Hopps 	u32 beyond, shifting, slot;
14716be02e3eSChristian Hopps 	u64 distance;
14726be02e3eSChristian Hopps 
14736be02e3eSChristian Hopps 	/* Handle future sequence number received.
14746be02e3eSChristian Hopps 	 *
14756be02e3eSChristian Hopps 	 * IMPORTANT: we are at least advancing w_wantseq (i.e., wantseq) by 1
14766be02e3eSChristian Hopps 	 * b/c we are beyond the window boundary.
14776be02e3eSChristian Hopps 	 *
14786be02e3eSChristian Hopps 	 * We know we don't have the wantseq so that counts as a drop.
14796be02e3eSChristian Hopps 	 */
14806be02e3eSChristian Hopps 
14816be02e3eSChristian Hopps 	/* example: slot count is 4, array size is 3 savedlen is 2, slot 0 is
14826be02e3eSChristian Hopps 	 * the missing sequence number.
14836be02e3eSChristian Hopps 	 *
14846be02e3eSChristian Hopps 	 * the final slot at savedlen (index savedlen - 1) is always occupied.
14856be02e3eSChristian Hopps 	 *
14866be02e3eSChristian Hopps 	 * beyond is "beyond array size" not savedlen.
14876be02e3eSChristian Hopps 	 *
14886be02e3eSChristian Hopps 	 *          +--------- array length (savedlen == 2)
14896be02e3eSChristian Hopps 	 *          |   +----- array size (nslots - 1 == 3)
14906be02e3eSChristian Hopps 	 *          |   | +- window boundary (nslots == 4)
14916be02e3eSChristian Hopps 	 *          V   V |
14926be02e3eSChristian Hopps 	 *                |
14936be02e3eSChristian Hopps 	 *  0   1   2   3 |   slot number
14946be02e3eSChristian Hopps 	 * ---  0   1   2 |   array index
14956be02e3eSChristian Hopps 	 *     [b] [c] : :|   array
14966be02e3eSChristian Hopps 	 *                |
14976be02e3eSChristian Hopps 	 * "2" "3" "4" "5"|*6*  seq numbers
14986be02e3eSChristian Hopps 	 *
14996be02e3eSChristian Hopps 	 * We receive seq number 6
15006be02e3eSChristian Hopps 	 * distance == 4 [inseq(6) - w_wantseq(2)]
15016be02e3eSChristian Hopps 	 * newslot == distance
15026be02e3eSChristian Hopps 	 * index == 3 [distance(4) - 1]
15036be02e3eSChristian Hopps 	 * beyond == 1 [newslot(4) - lastslot((nslots(4) - 1))]
15046be02e3eSChristian Hopps 	 * shifting == 1 [min(savedlen(2), beyond(1)]
15056be02e3eSChristian Hopps 	 * slot0_skb == [b], and should match w_wantseq
15066be02e3eSChristian Hopps 	 *
15076be02e3eSChristian Hopps 	 *                +--- window boundary (nslots == 4)
15086be02e3eSChristian Hopps 	 *  0   1   2   3 | 4   slot number
15096be02e3eSChristian Hopps 	 * ---  0   1   2 | 3   array index
15106be02e3eSChristian Hopps 	 *     [b] : : : :|     array
15116be02e3eSChristian Hopps 	 * "2" "3" "4" "5" *6*  seq numbers
15126be02e3eSChristian Hopps 	 *
15136be02e3eSChristian Hopps 	 * We receive seq number 6
15146be02e3eSChristian Hopps 	 * distance == 4 [inseq(6) - w_wantseq(2)]
15156be02e3eSChristian Hopps 	 * newslot == distance
15166be02e3eSChristian Hopps 	 * index == 3 [distance(4) - 1]
15176be02e3eSChristian Hopps 	 * beyond == 1 [newslot(4) - lastslot((nslots(4) - 1))]
15186be02e3eSChristian Hopps 	 * shifting == 1 [min(savedlen(1), beyond(1)]
15196be02e3eSChristian Hopps 	 * slot0_skb == [b] and should match w_wantseq
15206be02e3eSChristian Hopps 	 *
15216be02e3eSChristian Hopps 	 *                +-- window boundary (nslots == 4)
15226be02e3eSChristian Hopps 	 *  0   1   2   3 | 4   5   6   slot number
15236be02e3eSChristian Hopps 	 * ---  0   1   2 | 3   4   5   array index
15246be02e3eSChristian Hopps 	 *     [-] [c] : :|             array
15256be02e3eSChristian Hopps 	 * "2" "3" "4" "5" "6" "7" *8*  seq numbers
15266be02e3eSChristian Hopps 	 *
15276be02e3eSChristian Hopps 	 * savedlen = 2, beyond = 3
15286be02e3eSChristian Hopps 	 * iter 1: slot0 == NULL, missed++, lastdrop = 2 (2+1-1), slot0 = [-]
15296be02e3eSChristian Hopps 	 * iter 2: slot0 == NULL, missed++, lastdrop = 3 (2+2-1), slot0 = [c]
15306be02e3eSChristian Hopps 	 * 2 < 3, extra = 1 (3-2), missed += extra, lastdrop = 4 (2+2+1-1)
15316be02e3eSChristian Hopps 	 *
15326be02e3eSChristian Hopps 	 * We receive seq number 8
15336be02e3eSChristian Hopps 	 * distance == 6 [inseq(8) - w_wantseq(2)]
15346be02e3eSChristian Hopps 	 * newslot == distance
15356be02e3eSChristian Hopps 	 * index == 5 [distance(6) - 1]
15366be02e3eSChristian Hopps 	 * beyond == 3 [newslot(6) - lastslot((nslots(4) - 1))]
15376be02e3eSChristian Hopps 	 * shifting == 2 [min(savedlen(2), beyond(3)]
15386be02e3eSChristian Hopps 	 *
15396be02e3eSChristian Hopps 	 * slot0_skb == NULL changed from [b] when "savedlen < beyond" is true.
15406be02e3eSChristian Hopps 	 */
15416be02e3eSChristian Hopps 
15426be02e3eSChristian Hopps 	/* Now send any packets that are being shifted out of saved, and account
15436be02e3eSChristian Hopps 	 * for missing packets that are exiting the window as we shift it.
15446be02e3eSChristian Hopps 	 */
15456be02e3eSChristian Hopps 
15466be02e3eSChristian Hopps 	distance = inseq - wantseq;
15476be02e3eSChristian Hopps 	beyond = distance - (nslots - 1);
15486be02e3eSChristian Hopps 
15496be02e3eSChristian Hopps 	/* If savedlen > beyond we are shifting some, else all. */
15506be02e3eSChristian Hopps 	shifting = min(savedlen, beyond);
15516be02e3eSChristian Hopps 
15526be02e3eSChristian Hopps 	/* slot0 is the buf that just shifted out and into slot0 */
15536be02e3eSChristian Hopps 	slot0 = NULL;
15546be02e3eSChristian Hopps 	wnext = xtfs->w_saved;
15556be02e3eSChristian Hopps 	for (slot = 1; slot <= shifting; slot++, wnext++) {
15566be02e3eSChristian Hopps 		/* handle what was in slot0 before we occupy it */
15576be02e3eSChristian Hopps 		if (slot0)
15586be02e3eSChristian Hopps 			list_add_tail(&slot0->list, list);
15596be02e3eSChristian Hopps 		slot0 = wnext->skb;
15606be02e3eSChristian Hopps 		wnext->skb = NULL;
15616be02e3eSChristian Hopps 	}
15626be02e3eSChristian Hopps 
15636be02e3eSChristian Hopps 	/* slot0 is now either NULL (in which case it's what we now are waiting
15646be02e3eSChristian Hopps 	 * for, or a buf in which case we need to handle it like we received it;
15656be02e3eSChristian Hopps 	 * however, we may be advancing past that buffer as well..
15666be02e3eSChristian Hopps 	 */
15676be02e3eSChristian Hopps 
15686be02e3eSChristian Hopps 	/* Handle case where we need to shift more than we had saved, slot0 will
15696be02e3eSChristian Hopps 	 * be NULL iff savedlen is 0, otherwise slot0 will always be
15706be02e3eSChristian Hopps 	 * non-NULL b/c we shifted the final element, which is always set if
15716be02e3eSChristian Hopps 	 * there is any saved, into slot0.
15726be02e3eSChristian Hopps 	 */
15736be02e3eSChristian Hopps 	if (savedlen < beyond) {
15746be02e3eSChristian Hopps 		if (savedlen != 0)
15756be02e3eSChristian Hopps 			list_add_tail(&slot0->list, list);
15766be02e3eSChristian Hopps 		slot0 = NULL;
15776be02e3eSChristian Hopps 		/* slot0 has had an empty slot pushed into it */
15786be02e3eSChristian Hopps 	}
15796be02e3eSChristian Hopps 
15806be02e3eSChristian Hopps 	/* Remove the entries */
15816be02e3eSChristian Hopps 	__vec_shift(xtfs, beyond);
15826be02e3eSChristian Hopps 
15836be02e3eSChristian Hopps 	/* Advance want seq */
15846be02e3eSChristian Hopps 	xtfs->w_wantseq += beyond;
15856be02e3eSChristian Hopps 
15866be02e3eSChristian Hopps 	/* Process drops here when implementing congestion control */
15876be02e3eSChristian Hopps 
15886be02e3eSChristian Hopps 	/* We've shifted. plug the packet in at the end. */
15896be02e3eSChristian Hopps 	xtfs->w_savedlen = nslots - 1;
15906be02e3eSChristian Hopps 	xtfs->w_saved[xtfs->w_savedlen - 1].skb = inskb;
15916be02e3eSChristian Hopps 	iptfs_set_window_drop_times(xtfs, xtfs->w_savedlen - 1);
15926be02e3eSChristian Hopps 
15936be02e3eSChristian Hopps 	/* if we don't have a slot0 then we must wait for it */
15946be02e3eSChristian Hopps 	if (!slot0)
15956be02e3eSChristian Hopps 		return;
15966be02e3eSChristian Hopps 
15976be02e3eSChristian Hopps 	/* If slot0, seq must match new want seq */
15986be02e3eSChristian Hopps 
15996be02e3eSChristian Hopps 	/* slot0 is valid, treat like we received expected. */
16006be02e3eSChristian Hopps 	__reorder_this(xtfs, slot0, list);
16016be02e3eSChristian Hopps }
16026be02e3eSChristian Hopps 
16036be02e3eSChristian Hopps /* Receive a new packet into the reorder window. Return a list of ordered
16046be02e3eSChristian Hopps  * packets from the window.
16056be02e3eSChristian Hopps  */
16066be02e3eSChristian Hopps static void iptfs_input_reorder(struct xfrm_iptfs_data *xtfs,
16076be02e3eSChristian Hopps 				struct sk_buff *inskb, struct list_head *list,
16086be02e3eSChristian Hopps 				struct list_head *freelist)
16096be02e3eSChristian Hopps {
16106be02e3eSChristian Hopps 	const u32 nslots = xtfs->cfg.reorder_win_size + 1;
16116be02e3eSChristian Hopps 	u64 inseq = __esp_seq(inskb);
16126be02e3eSChristian Hopps 	u64 wantseq;
16136be02e3eSChristian Hopps 
16146be02e3eSChristian Hopps 	assert_spin_locked(&xtfs->drop_lock);
16156be02e3eSChristian Hopps 
16166be02e3eSChristian Hopps 	if (unlikely(!xtfs->w_seq_set)) {
16176be02e3eSChristian Hopps 		xtfs->w_seq_set = true;
16186be02e3eSChristian Hopps 		xtfs->w_wantseq = inseq;
16196be02e3eSChristian Hopps 	}
16206be02e3eSChristian Hopps 	wantseq = xtfs->w_wantseq;
16216be02e3eSChristian Hopps 
16226be02e3eSChristian Hopps 	if (likely(inseq == wantseq))
16236be02e3eSChristian Hopps 		__reorder_this(xtfs, inskb, list);
16246be02e3eSChristian Hopps 	else if (inseq < wantseq)
16256be02e3eSChristian Hopps 		__reorder_past(xtfs, inskb, freelist);
16266be02e3eSChristian Hopps 	else if ((inseq - wantseq) < nslots)
16276be02e3eSChristian Hopps 		__reorder_future_fits(xtfs, inskb, freelist);
16286be02e3eSChristian Hopps 	else
16296be02e3eSChristian Hopps 		__reorder_future_shifts(xtfs, inskb, list);
16306c82d243SChristian Hopps }
16316c82d243SChristian Hopps 
163207569476SChristian Hopps /**
163307569476SChristian Hopps  * iptfs_drop_timer() - Handle drop timer expiry.
163407569476SChristian Hopps  * @me: the timer
163507569476SChristian Hopps  *
163607569476SChristian Hopps  * This is similar to our input function.
163707569476SChristian Hopps  *
163807569476SChristian Hopps  * The drop timer is set when we start an in progress reassembly, and also when
163907569476SChristian Hopps  * we save a future packet in the window saved array.
164007569476SChristian Hopps  *
164107569476SChristian Hopps  * NOTE packets in the save window are always newer WRT drop times as
164207569476SChristian Hopps  * they get further in the future. i.e. for:
164307569476SChristian Hopps  *
164407569476SChristian Hopps  *    if slots (S0, S1, ... Sn) and `Dn` is the drop time for slot `Sn`,
164507569476SChristian Hopps  *    then D(n-1) <= D(n).
164607569476SChristian Hopps  *
164707569476SChristian Hopps  * So, regardless of why the timer is firing we can always discard any inprogress
164807569476SChristian Hopps  * fragment; either it's the reassembly timer, or slot 0 is going to be
164907569476SChristian Hopps  * dropped as S0 must have the most recent drop time, and slot 0 holds the
165007569476SChristian Hopps  * continuation fragment of the in progress packet.
165107569476SChristian Hopps  *
165207569476SChristian Hopps  * Returns HRTIMER_NORESTART.
165307569476SChristian Hopps  */
165407569476SChristian Hopps static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me)
165507569476SChristian Hopps {
16566be02e3eSChristian Hopps 	struct sk_buff *skb, *next;
16576be02e3eSChristian Hopps 	struct list_head list;
165807569476SChristian Hopps 	struct xfrm_iptfs_data *xtfs;
16596be02e3eSChristian Hopps 	struct xfrm_state *x;
16606be02e3eSChristian Hopps 	u32 count;
166107569476SChristian Hopps 
166207569476SChristian Hopps 	xtfs = container_of(me, typeof(*xtfs), drop_timer);
16636be02e3eSChristian Hopps 	x = xtfs->x;
16646be02e3eSChristian Hopps 
16656be02e3eSChristian Hopps 	INIT_LIST_HEAD(&list);
16666be02e3eSChristian Hopps 
16676be02e3eSChristian Hopps 	spin_lock(&xtfs->drop_lock);
166807569476SChristian Hopps 
166907569476SChristian Hopps 	/* Drop any in progress packet */
167007569476SChristian Hopps 	skb = xtfs->ra_newskb;
167107569476SChristian Hopps 	xtfs->ra_newskb = NULL;
16726be02e3eSChristian Hopps 
16736be02e3eSChristian Hopps 	/* Now drop as many packets as we should from the reordering window
16746be02e3eSChristian Hopps 	 * saved array
16756be02e3eSChristian Hopps 	 */
16766be02e3eSChristian Hopps 	count = xtfs->w_savedlen ? __reorder_drop(xtfs, &list) : 0;
16776be02e3eSChristian Hopps 
167807569476SChristian Hopps 	spin_unlock(&xtfs->drop_lock);
167907569476SChristian Hopps 
168007569476SChristian Hopps 	if (skb)
168107569476SChristian Hopps 		kfree_skb_reason(skb, SKB_DROP_REASON_FRAG_REASM_TIMEOUT);
168207569476SChristian Hopps 
16836be02e3eSChristian Hopps 	if (count) {
16846be02e3eSChristian Hopps 		list_for_each_entry_safe(skb, next, &list, list) {
16856be02e3eSChristian Hopps 			skb_list_del_init(skb);
16866be02e3eSChristian Hopps 			iptfs_input_ordered(x, skb);
16876be02e3eSChristian Hopps 		}
16886be02e3eSChristian Hopps 	}
16896be02e3eSChristian Hopps 
169007569476SChristian Hopps 	return HRTIMER_NORESTART;
169107569476SChristian Hopps }
169207569476SChristian Hopps 
16936be02e3eSChristian Hopps /**
16946be02e3eSChristian Hopps  * iptfs_input() - handle receipt of iptfs payload
16956be02e3eSChristian Hopps  * @x: xfrm state
16966be02e3eSChristian Hopps  * @skb: the packet
16976be02e3eSChristian Hopps  *
16986be02e3eSChristian Hopps  * We have an IPTFS payload order it if needed, then process newly in order
16996be02e3eSChristian Hopps  * packets.
17006be02e3eSChristian Hopps  *
17016be02e3eSChristian Hopps  * Return: -EINPROGRESS to inform xfrm_input to stop processing the skb.
17026be02e3eSChristian Hopps  */
17036be02e3eSChristian Hopps static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb)
17046be02e3eSChristian Hopps {
17056be02e3eSChristian Hopps 	struct list_head freelist, list;
17066be02e3eSChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
17076be02e3eSChristian Hopps 	struct sk_buff *next;
17086be02e3eSChristian Hopps 
17096be02e3eSChristian Hopps 	/* Fast path for no reorder window. */
17106be02e3eSChristian Hopps 	if (xtfs->cfg.reorder_win_size == 0) {
17116be02e3eSChristian Hopps 		iptfs_input_ordered(x, skb);
17126be02e3eSChristian Hopps 		goto done;
17136be02e3eSChristian Hopps 	}
17146be02e3eSChristian Hopps 
17156be02e3eSChristian Hopps 	/* Fetch list of in-order packets from the reordering window as well as
17166be02e3eSChristian Hopps 	 * a list of buffers we need to now free.
17176be02e3eSChristian Hopps 	 */
17186be02e3eSChristian Hopps 	INIT_LIST_HEAD(&list);
17196be02e3eSChristian Hopps 	INIT_LIST_HEAD(&freelist);
17206be02e3eSChristian Hopps 
17216be02e3eSChristian Hopps 	spin_lock(&xtfs->drop_lock);
17226be02e3eSChristian Hopps 	iptfs_input_reorder(xtfs, skb, &list, &freelist);
17236be02e3eSChristian Hopps 	spin_unlock(&xtfs->drop_lock);
17246be02e3eSChristian Hopps 
17256be02e3eSChristian Hopps 	list_for_each_entry_safe(skb, next, &list, list) {
17266be02e3eSChristian Hopps 		skb_list_del_init(skb);
17276be02e3eSChristian Hopps 		iptfs_input_ordered(x, skb);
17286be02e3eSChristian Hopps 	}
17296be02e3eSChristian Hopps 
17306be02e3eSChristian Hopps 	list_for_each_entry_safe(skb, next, &freelist, list) {
17316be02e3eSChristian Hopps 		skb_list_del_init(skb);
17326be02e3eSChristian Hopps 		kfree_skb(skb);
17336be02e3eSChristian Hopps 	}
17346be02e3eSChristian Hopps done:
17356be02e3eSChristian Hopps 	/* We always have dealt with the input SKB, either we are re-using it,
17366be02e3eSChristian Hopps 	 * or we have freed it. Return EINPROGRESS so that xfrm_input stops
17376be02e3eSChristian Hopps 	 * processing it.
17386be02e3eSChristian Hopps 	 */
17396be02e3eSChristian Hopps 	return -EINPROGRESS;
17406be02e3eSChristian Hopps }
17416be02e3eSChristian Hopps 
17420e4fbf01SChristian Hopps /* ================================= */
17430e4fbf01SChristian Hopps /* IPTFS Sending (ingress) Functions */
17440e4fbf01SChristian Hopps /* ================================= */
17450e4fbf01SChristian Hopps 
17460e4fbf01SChristian Hopps /* ------------------------- */
17470e4fbf01SChristian Hopps /* Enqueue to send functions */
17480e4fbf01SChristian Hopps /* ------------------------- */
17490e4fbf01SChristian Hopps 
17500e4fbf01SChristian Hopps /**
17510e4fbf01SChristian Hopps  * iptfs_enqueue() - enqueue packet if ok to send.
17520e4fbf01SChristian Hopps  * @xtfs: xtfs state
17530e4fbf01SChristian Hopps  * @skb: the packet
17540e4fbf01SChristian Hopps  *
17550e4fbf01SChristian Hopps  * Return: true if packet enqueued.
17560e4fbf01SChristian Hopps  */
17570e4fbf01SChristian Hopps static bool iptfs_enqueue(struct xfrm_iptfs_data *xtfs, struct sk_buff *skb)
17580e4fbf01SChristian Hopps {
17590e4fbf01SChristian Hopps 	u64 newsz = xtfs->queue_size + skb->len;
17600e4fbf01SChristian Hopps 	struct iphdr *iph;
17610e4fbf01SChristian Hopps 
17620e4fbf01SChristian Hopps 	assert_spin_locked(&xtfs->x->lock);
17630e4fbf01SChristian Hopps 
17640e4fbf01SChristian Hopps 	if (newsz > xtfs->cfg.max_queue_size)
17650e4fbf01SChristian Hopps 		return false;
17660e4fbf01SChristian Hopps 
17670e4fbf01SChristian Hopps 	/* Set ECN CE if we are above our ECN queue threshold */
17680e4fbf01SChristian Hopps 	if (newsz > xtfs->ecn_queue_size) {
17690e4fbf01SChristian Hopps 		iph = ip_hdr(skb);
17700e4fbf01SChristian Hopps 		if (iph->version == 4)
17710e4fbf01SChristian Hopps 			IP_ECN_set_ce(iph);
17720e4fbf01SChristian Hopps 		else if (iph->version == 6)
17730e4fbf01SChristian Hopps 			IP6_ECN_set_ce(skb, ipv6_hdr(skb));
17740e4fbf01SChristian Hopps 	}
17750e4fbf01SChristian Hopps 
17760e4fbf01SChristian Hopps 	__skb_queue_tail(&xtfs->queue, skb);
17770e4fbf01SChristian Hopps 	xtfs->queue_size += skb->len;
17780e4fbf01SChristian Hopps 	return true;
17790e4fbf01SChristian Hopps }
17800e4fbf01SChristian Hopps 
17810e4fbf01SChristian Hopps static int iptfs_get_cur_pmtu(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs,
17820e4fbf01SChristian Hopps 			      struct sk_buff *skb)
17830e4fbf01SChristian Hopps {
17840e4fbf01SChristian Hopps 	struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb);
17850e4fbf01SChristian Hopps 	u32 payload_mtu = xtfs->payload_mtu;
17868579d342SChristian Hopps 	u32 pmtu = __iptfs_get_inner_mtu(x, xdst->child_mtu_cached);
17870e4fbf01SChristian Hopps 
17880e4fbf01SChristian Hopps 	if (payload_mtu && payload_mtu < pmtu)
17890e4fbf01SChristian Hopps 		pmtu = payload_mtu;
17900e4fbf01SChristian Hopps 
17910e4fbf01SChristian Hopps 	return pmtu;
17920e4fbf01SChristian Hopps }
17930e4fbf01SChristian Hopps 
17940e4fbf01SChristian Hopps static int iptfs_is_too_big(struct sock *sk, struct sk_buff *skb, u32 pmtu)
17950e4fbf01SChristian Hopps {
17960e4fbf01SChristian Hopps 	if (skb->len <= pmtu)
17970e4fbf01SChristian Hopps 		return 0;
17980e4fbf01SChristian Hopps 
17990e4fbf01SChristian Hopps 	/* We only send ICMP too big if the user has configured us as
18000e4fbf01SChristian Hopps 	 * dont-fragment.
18010e4fbf01SChristian Hopps 	 */
18020e4fbf01SChristian Hopps 	if (skb->dev)
18030e4fbf01SChristian Hopps 		XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMOUTERROR);
18040e4fbf01SChristian Hopps 
18050e4fbf01SChristian Hopps 	if (sk)
18060e4fbf01SChristian Hopps 		xfrm_local_error(skb, pmtu);
18070e4fbf01SChristian Hopps 	else if (ip_hdr(skb)->version == 4)
18080e4fbf01SChristian Hopps 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(pmtu));
18090e4fbf01SChristian Hopps 	else
18100e4fbf01SChristian Hopps 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, pmtu);
18110e4fbf01SChristian Hopps 
18120e4fbf01SChristian Hopps 	return 1;
18130e4fbf01SChristian Hopps }
18140e4fbf01SChristian Hopps 
18150e4fbf01SChristian Hopps /* IPv4/IPv6 packet ingress to IPTFS tunnel, arrange to send in IPTFS payload
18160e4fbf01SChristian Hopps  * (i.e., aggregating or fragmenting as appropriate).
18170e4fbf01SChristian Hopps  * This is set in dst->output for an SA.
18180e4fbf01SChristian Hopps  */
18190e4fbf01SChristian Hopps static int iptfs_output_collect(struct net *net, struct sock *sk, struct sk_buff *skb)
18200e4fbf01SChristian Hopps {
18210e4fbf01SChristian Hopps 	struct dst_entry *dst = skb_dst(skb);
18220e4fbf01SChristian Hopps 	struct xfrm_state *x = dst->xfrm;
18230e4fbf01SChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
18240e4fbf01SChristian Hopps 	struct sk_buff *segs, *nskb;
18250e4fbf01SChristian Hopps 	u32 pmtu = 0;
18260e4fbf01SChristian Hopps 	bool ok = true;
18270e4fbf01SChristian Hopps 	bool was_gso;
18280e4fbf01SChristian Hopps 
18290e4fbf01SChristian Hopps 	/* We have hooked into dst_entry->output which means we have skipped the
18300e4fbf01SChristian Hopps 	 * protocol specific netfilter (see xfrm4_output, xfrm6_output).
18310e4fbf01SChristian Hopps 	 * when our timer runs we will end up calling xfrm_output directly on
18320e4fbf01SChristian Hopps 	 * the encapsulated traffic.
18330e4fbf01SChristian Hopps 	 *
18340e4fbf01SChristian Hopps 	 * For both cases this is the NF_INET_POST_ROUTING hook which allows
18350e4fbf01SChristian Hopps 	 * changing the skb->dst entry which then may not be xfrm based anymore
18360e4fbf01SChristian Hopps 	 * in which case a REROUTED flag is set. and dst_output is called.
18370e4fbf01SChristian Hopps 	 *
18380e4fbf01SChristian Hopps 	 * For IPv6 we are also skipping fragmentation handling for local
18390e4fbf01SChristian Hopps 	 * sockets, which may or may not be good depending on our tunnel DF
18400e4fbf01SChristian Hopps 	 * setting. Normally with fragmentation supported we want to skip this
18410e4fbf01SChristian Hopps 	 * fragmentation.
18420e4fbf01SChristian Hopps 	 */
18430e4fbf01SChristian Hopps 
18448579d342SChristian Hopps 	if (xtfs->cfg.dont_frag)
18450e4fbf01SChristian Hopps 		pmtu = iptfs_get_cur_pmtu(x, xtfs, skb);
18460e4fbf01SChristian Hopps 
18470e4fbf01SChristian Hopps 	/* Break apart GSO skbs. If the queue is nearing full then we want the
18480e4fbf01SChristian Hopps 	 * accounting and queuing to be based on the individual packets not on the
18490e4fbf01SChristian Hopps 	 * aggregate GSO buffer.
18500e4fbf01SChristian Hopps 	 */
18510e4fbf01SChristian Hopps 	was_gso = skb_is_gso(skb);
18520e4fbf01SChristian Hopps 	if (!was_gso) {
18530e4fbf01SChristian Hopps 		segs = skb;
18540e4fbf01SChristian Hopps 	} else {
18550e4fbf01SChristian Hopps 		segs = skb_gso_segment(skb, 0);
18560e4fbf01SChristian Hopps 		if (IS_ERR_OR_NULL(segs)) {
18570e4fbf01SChristian Hopps 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
18580e4fbf01SChristian Hopps 			kfree_skb(skb);
18590e4fbf01SChristian Hopps 			if (IS_ERR(segs))
18600e4fbf01SChristian Hopps 				return PTR_ERR(segs);
18610e4fbf01SChristian Hopps 			return -EINVAL;
18620e4fbf01SChristian Hopps 		}
18630e4fbf01SChristian Hopps 		consume_skb(skb);
18640e4fbf01SChristian Hopps 		skb = NULL;
18650e4fbf01SChristian Hopps 	}
18660e4fbf01SChristian Hopps 
18670e4fbf01SChristian Hopps 	/* We can be running on multiple cores and from the network softirq or
18680e4fbf01SChristian Hopps 	 * from user context depending on where the packet is coming from.
18690e4fbf01SChristian Hopps 	 */
18700e4fbf01SChristian Hopps 	spin_lock_bh(&x->lock);
18710e4fbf01SChristian Hopps 
18720e4fbf01SChristian Hopps 	skb_list_walk_safe(segs, skb, nskb) {
18730e4fbf01SChristian Hopps 		skb_mark_not_on_list(skb);
18740e4fbf01SChristian Hopps 
18750e4fbf01SChristian Hopps 		/* Once we drop due to no queue space we continue to drop the
18760e4fbf01SChristian Hopps 		 * rest of the packets from that GRO.
18770e4fbf01SChristian Hopps 		 */
18780e4fbf01SChristian Hopps 		if (!ok) {
18790e4fbf01SChristian Hopps nospace:
1880ed58b186SChristian Hopps 			trace_iptfs_no_queue_space(skb, xtfs, pmtu, was_gso);
18810e4fbf01SChristian Hopps 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOQSPACE);
18820e4fbf01SChristian Hopps 			kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING);
18830e4fbf01SChristian Hopps 			continue;
18840e4fbf01SChristian Hopps 		}
18850e4fbf01SChristian Hopps 
18868579d342SChristian Hopps 		/* If the user indicated no iptfs fragmenting check before
18878579d342SChristian Hopps 		 * enqueue.
18888579d342SChristian Hopps 		 */
18898579d342SChristian Hopps 		if (xtfs->cfg.dont_frag && iptfs_is_too_big(sk, skb, pmtu)) {
1890ed58b186SChristian Hopps 			trace_iptfs_too_big(skb, xtfs, pmtu, was_gso);
18910e4fbf01SChristian Hopps 			kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
18920e4fbf01SChristian Hopps 			continue;
18930e4fbf01SChristian Hopps 		}
18940e4fbf01SChristian Hopps 
18950e4fbf01SChristian Hopps 		/* Enqueue to send in tunnel */
18960e4fbf01SChristian Hopps 		ok = iptfs_enqueue(xtfs, skb);
18970e4fbf01SChristian Hopps 		if (!ok)
18980e4fbf01SChristian Hopps 			goto nospace;
1899ed58b186SChristian Hopps 
1900ed58b186SChristian Hopps 		trace_iptfs_enqueue(skb, xtfs, pmtu, was_gso);
19010e4fbf01SChristian Hopps 	}
19020e4fbf01SChristian Hopps 
19030e4fbf01SChristian Hopps 	/* Start a delay timer if we don't have one yet */
1904ed58b186SChristian Hopps 	if (!hrtimer_is_queued(&xtfs->iptfs_timer)) {
19050e4fbf01SChristian Hopps 		hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, IPTFS_HRTIMER_MODE);
1906ed58b186SChristian Hopps 		xtfs->iptfs_settime = ktime_get_raw_fast_ns();
1907ed58b186SChristian Hopps 		trace_iptfs_timer_start(xtfs, xtfs->init_delay_ns);
1908ed58b186SChristian Hopps 	}
19090e4fbf01SChristian Hopps 
19100e4fbf01SChristian Hopps 	spin_unlock_bh(&x->lock);
19110e4fbf01SChristian Hopps 	return 0;
19120e4fbf01SChristian Hopps }
19130e4fbf01SChristian Hopps 
19140e4fbf01SChristian Hopps /* -------------------------- */
19150e4fbf01SChristian Hopps /* Dequeue and send functions */
19160e4fbf01SChristian Hopps /* -------------------------- */
19170e4fbf01SChristian Hopps 
19180e4fbf01SChristian Hopps static void iptfs_output_prepare_skb(struct sk_buff *skb, u32 blkoff)
19190e4fbf01SChristian Hopps {
19200e4fbf01SChristian Hopps 	struct ip_iptfs_hdr *h;
19210e4fbf01SChristian Hopps 	size_t hsz = sizeof(*h);
19220e4fbf01SChristian Hopps 
19230e4fbf01SChristian Hopps 	/* now reset values to be pointing at the rest of the packets */
19240e4fbf01SChristian Hopps 	h = skb_push(skb, hsz);
19250e4fbf01SChristian Hopps 	memset(h, 0, hsz);
19260e4fbf01SChristian Hopps 	if (blkoff)
19270e4fbf01SChristian Hopps 		h->block_offset = htons(blkoff);
19280e4fbf01SChristian Hopps 
19290e4fbf01SChristian Hopps 	/* network_header current points at the inner IP packet
19300e4fbf01SChristian Hopps 	 * move it to the iptfs header
19310e4fbf01SChristian Hopps 	 */
19320e4fbf01SChristian Hopps 	skb->transport_header = skb->network_header;
19330e4fbf01SChristian Hopps 	skb->network_header -= hsz;
19340e4fbf01SChristian Hopps 
19350e4fbf01SChristian Hopps 	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
19360e4fbf01SChristian Hopps }
19370e4fbf01SChristian Hopps 
19388579d342SChristian Hopps /**
19398579d342SChristian Hopps  * iptfs_copy_create_frag() - create an inner fragment skb.
19408579d342SChristian Hopps  * @st: The source packet data.
19418579d342SChristian Hopps  * @offset: offset in @st of the new fragment data.
19428579d342SChristian Hopps  * @copy_len: the amount of data to copy from @st.
19438579d342SChristian Hopps  *
19448579d342SChristian Hopps  * Create a new skb holding a single IPTFS inner packet fragment. @copy_len must
19458579d342SChristian Hopps  * not be greater than the max fragment size.
19468579d342SChristian Hopps  *
19478579d342SChristian Hopps  * Return: the new fragment skb or an ERR_PTR().
19488579d342SChristian Hopps  */
19498579d342SChristian Hopps static struct sk_buff *iptfs_copy_create_frag(struct skb_seq_state *st, u32 offset, u32 copy_len)
19508579d342SChristian Hopps {
19518579d342SChristian Hopps 	struct sk_buff *src = st->root_skb;
19528579d342SChristian Hopps 	struct sk_buff *skb;
19538579d342SChristian Hopps 	int err;
19548579d342SChristian Hopps 
19558579d342SChristian Hopps 	skb = iptfs_alloc_skb(src, copy_len, true);
19568579d342SChristian Hopps 	if (!skb)
19578579d342SChristian Hopps 		return ERR_PTR(-ENOMEM);
19588579d342SChristian Hopps 
19598579d342SChristian Hopps 	/* Now copy `copy_len` data from src */
19608579d342SChristian Hopps 	err = skb_copy_seq_read(st, offset, skb_put(skb, copy_len), copy_len);
19618579d342SChristian Hopps 	if (err) {
19628579d342SChristian Hopps 		kfree_skb(skb);
19638579d342SChristian Hopps 		return ERR_PTR(err);
19648579d342SChristian Hopps 	}
19658579d342SChristian Hopps 
19668579d342SChristian Hopps 	return skb;
19678579d342SChristian Hopps }
19688579d342SChristian Hopps 
19698579d342SChristian Hopps /**
19708579d342SChristian Hopps  * iptfs_copy_create_frags() - create and send N-1 fragments of a larger skb.
19718579d342SChristian Hopps  * @skbp: the source packet skb (IN), skb holding the last fragment in
19728579d342SChristian Hopps  *        the fragment stream (OUT).
19738579d342SChristian Hopps  * @xtfs: IPTFS SA state.
19748579d342SChristian Hopps  * @mtu: the max IPTFS fragment size.
19758579d342SChristian Hopps  *
19768579d342SChristian Hopps  * This function is responsible for fragmenting a larger inner packet into a
19778579d342SChristian Hopps  * sequence of IPTFS payload packets. The last fragment is returned rather than
19788579d342SChristian Hopps  * being sent so that the caller can append more inner packets (aggregation) if
19798579d342SChristian Hopps  * there is room.
19808579d342SChristian Hopps  *
19818579d342SChristian Hopps  * Return: 0 on success or a negative error code on failure
19828579d342SChristian Hopps  */
19838579d342SChristian Hopps static int iptfs_copy_create_frags(struct sk_buff **skbp, struct xfrm_iptfs_data *xtfs, u32 mtu)
19848579d342SChristian Hopps {
19858579d342SChristian Hopps 	struct skb_seq_state skbseq;
19868579d342SChristian Hopps 	struct list_head sublist;
19878579d342SChristian Hopps 	struct sk_buff *skb = *skbp;
19888579d342SChristian Hopps 	struct sk_buff *nskb = *skbp;
19898579d342SChristian Hopps 	u32 copy_len, offset;
19908579d342SChristian Hopps 	u32 to_copy = skb->len - mtu;
1991ed58b186SChristian Hopps 	u32 blkoff = 0;
19928579d342SChristian Hopps 	int err = 0;
19938579d342SChristian Hopps 
19948579d342SChristian Hopps 	INIT_LIST_HEAD(&sublist);
19958579d342SChristian Hopps 
19968579d342SChristian Hopps 	skb_prepare_seq_read(skb, 0, skb->len, &skbseq);
19978579d342SChristian Hopps 
19988579d342SChristian Hopps 	/* A trimmed `skb` will be sent as the first fragment, later. */
19998579d342SChristian Hopps 	offset = mtu;
20008579d342SChristian Hopps 	to_copy = skb->len - offset;
20018579d342SChristian Hopps 	while (to_copy) {
20028579d342SChristian Hopps 		/* Send all but last fragment to allow agg. append */
2003ed58b186SChristian Hopps 		trace_iptfs_first_fragmenting(nskb, mtu, to_copy, NULL);
20048579d342SChristian Hopps 		list_add_tail(&nskb->list, &sublist);
20058579d342SChristian Hopps 
20068579d342SChristian Hopps 		/* FUTURE: if the packet has an odd/non-aligning length we could
20078579d342SChristian Hopps 		 * send less data in the penultimate fragment so that the last
20088579d342SChristian Hopps 		 * fragment then ends on an aligned boundary.
20098579d342SChristian Hopps 		 */
20108579d342SChristian Hopps 		copy_len = min(to_copy, mtu);
20118579d342SChristian Hopps 		nskb = iptfs_copy_create_frag(&skbseq, offset, copy_len);
20128579d342SChristian Hopps 		if (IS_ERR(nskb)) {
20138579d342SChristian Hopps 			XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMOUTERROR);
20148579d342SChristian Hopps 			skb_abort_seq_read(&skbseq);
20158579d342SChristian Hopps 			err = PTR_ERR(nskb);
20168579d342SChristian Hopps 			nskb = NULL;
20178579d342SChristian Hopps 			break;
20188579d342SChristian Hopps 		}
20198579d342SChristian Hopps 		iptfs_output_prepare_skb(nskb, to_copy);
20208579d342SChristian Hopps 		offset += copy_len;
20218579d342SChristian Hopps 		to_copy -= copy_len;
2022ed58b186SChristian Hopps 		blkoff = to_copy;
20238579d342SChristian Hopps 	}
20248579d342SChristian Hopps 	skb_abort_seq_read(&skbseq);
20258579d342SChristian Hopps 
20268579d342SChristian Hopps 	/* return last fragment that will be unsent (or NULL) */
20278579d342SChristian Hopps 	*skbp = nskb;
2028ed58b186SChristian Hopps 	if (nskb)
2029ed58b186SChristian Hopps 		trace_iptfs_first_final_fragment(nskb, mtu, blkoff, NULL);
20308579d342SChristian Hopps 
20318579d342SChristian Hopps 	/* trim the original skb to MTU */
20328579d342SChristian Hopps 	if (!err)
20338579d342SChristian Hopps 		err = pskb_trim(skb, mtu);
20348579d342SChristian Hopps 
20358579d342SChristian Hopps 	if (err) {
20368579d342SChristian Hopps 		/* Free all frags. Don't bother sending a partial packet we will
20378579d342SChristian Hopps 		 * never complete.
20388579d342SChristian Hopps 		 */
20398579d342SChristian Hopps 		kfree_skb(nskb);
20408579d342SChristian Hopps 		list_for_each_entry_safe(skb, nskb, &sublist, list) {
20418579d342SChristian Hopps 			skb_list_del_init(skb);
20428579d342SChristian Hopps 			kfree_skb(skb);
20438579d342SChristian Hopps 		}
20448579d342SChristian Hopps 		return err;
20458579d342SChristian Hopps 	}
20468579d342SChristian Hopps 
20478579d342SChristian Hopps 	/* prepare the initial fragment with an iptfs header */
20488579d342SChristian Hopps 	iptfs_output_prepare_skb(skb, 0);
20498579d342SChristian Hopps 
20508579d342SChristian Hopps 	/* Send all but last fragment, if we fail to send a fragment then free
20518579d342SChristian Hopps 	 * the rest -- no point in sending a packet that can't be reassembled.
20528579d342SChristian Hopps 	 */
20538579d342SChristian Hopps 	list_for_each_entry_safe(skb, nskb, &sublist, list) {
20548579d342SChristian Hopps 		skb_list_del_init(skb);
20558579d342SChristian Hopps 		if (!err)
20568579d342SChristian Hopps 			err = xfrm_output(NULL, skb);
20578579d342SChristian Hopps 		else
20588579d342SChristian Hopps 			kfree_skb(skb);
20598579d342SChristian Hopps 	}
20608579d342SChristian Hopps 	if (err)
20618579d342SChristian Hopps 		kfree_skb(*skbp);
20628579d342SChristian Hopps 	return err;
20638579d342SChristian Hopps }
20648579d342SChristian Hopps 
20658579d342SChristian Hopps /**
20668579d342SChristian Hopps  * iptfs_first_skb() - handle the first dequeued inner packet for output
20678579d342SChristian Hopps  * @skbp: the source packet skb (IN), skb holding the last fragment in
20688579d342SChristian Hopps  *        the fragment stream (OUT).
20698579d342SChristian Hopps  * @xtfs: IPTFS SA state.
20708579d342SChristian Hopps  * @mtu: the max IPTFS fragment size.
20718579d342SChristian Hopps  *
20728579d342SChristian Hopps  * This function is responsible for fragmenting a larger inner packet into a
20738579d342SChristian Hopps  * sequence of IPTFS payload packets.
20748579d342SChristian Hopps  *
20758579d342SChristian Hopps  * The last fragment is returned rather than being sent so that the caller can
20768579d342SChristian Hopps  * append more inner packets (aggregation) if there is room.
20778579d342SChristian Hopps  *
20788579d342SChristian Hopps  * Return: 0 on success or a negative error code on failure
20798579d342SChristian Hopps  */
20808579d342SChristian Hopps static int iptfs_first_skb(struct sk_buff **skbp, struct xfrm_iptfs_data *xtfs, u32 mtu)
20818579d342SChristian Hopps {
20828579d342SChristian Hopps 	struct sk_buff *skb = *skbp;
20838579d342SChristian Hopps 	int err;
20848579d342SChristian Hopps 
20858579d342SChristian Hopps 	/* Classic ESP skips the don't fragment ICMP error if DF is clear on
20868579d342SChristian Hopps 	 * the inner packet or ignore_df is set. Otherwise it will send an ICMP
20878579d342SChristian Hopps 	 * or local error if the inner packet won't fit it's MTU.
20888579d342SChristian Hopps 	 *
20898579d342SChristian Hopps 	 * With IPTFS we do not care about the inner packet DF bit. If the
20908579d342SChristian Hopps 	 * tunnel is configured to "don't fragment" we error back if things
20918579d342SChristian Hopps 	 * don't fit in our max packet size. Otherwise we iptfs-fragment as
20928579d342SChristian Hopps 	 * normal.
20938579d342SChristian Hopps 	 */
20948579d342SChristian Hopps 
20958579d342SChristian Hopps 	/* The opportunity for HW offload has ended */
20968579d342SChristian Hopps 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
20978579d342SChristian Hopps 		err = skb_checksum_help(skb);
20988579d342SChristian Hopps 		if (err)
20998579d342SChristian Hopps 			return err;
21008579d342SChristian Hopps 	}
21018579d342SChristian Hopps 
21028579d342SChristian Hopps 	/* We've split gso up before queuing */
21038579d342SChristian Hopps 
2104ed58b186SChristian Hopps 	trace_iptfs_first_dequeue(skb, mtu, 0, ip_hdr(skb));
2105ed58b186SChristian Hopps 
21068579d342SChristian Hopps 	/* Consider the buffer Tx'd and no longer owned */
21078579d342SChristian Hopps 	skb_orphan(skb);
21088579d342SChristian Hopps 
21098579d342SChristian Hopps 	/* Simple case -- it fits. `mtu` accounted for all the overhead
21108579d342SChristian Hopps 	 * including the basic IPTFS header.
21118579d342SChristian Hopps 	 */
21128579d342SChristian Hopps 	if (skb->len <= mtu) {
21138579d342SChristian Hopps 		iptfs_output_prepare_skb(skb, 0);
21148579d342SChristian Hopps 		return 0;
21158579d342SChristian Hopps 	}
21168579d342SChristian Hopps 
21178579d342SChristian Hopps 	return iptfs_copy_create_frags(skbp, xtfs, mtu);
21188579d342SChristian Hopps }
21198579d342SChristian Hopps 
21200e4fbf01SChristian Hopps static struct sk_buff **iptfs_rehome_fraglist(struct sk_buff **nextp, struct sk_buff *child)
21210e4fbf01SChristian Hopps {
21220e4fbf01SChristian Hopps 	u32 fllen = 0;
21230e4fbf01SChristian Hopps 
21240e4fbf01SChristian Hopps 	/* It might be possible to account for a frag list in addition to page
21250e4fbf01SChristian Hopps 	 * fragment if it's a valid state to be in. The page fragments size
21260e4fbf01SChristian Hopps 	 * should be kept as data_len so only the frag_list size is removed,
21270e4fbf01SChristian Hopps 	 * this must be done above as well.
21280e4fbf01SChristian Hopps 	 */
21290e4fbf01SChristian Hopps 	*nextp = skb_shinfo(child)->frag_list;
21300e4fbf01SChristian Hopps 	while (*nextp) {
21310e4fbf01SChristian Hopps 		fllen += (*nextp)->len;
21320e4fbf01SChristian Hopps 		nextp = &(*nextp)->next;
21330e4fbf01SChristian Hopps 	}
21340e4fbf01SChristian Hopps 	skb_frag_list_init(child);
21350e4fbf01SChristian Hopps 	child->len -= fllen;
21360e4fbf01SChristian Hopps 	child->data_len -= fllen;
21370e4fbf01SChristian Hopps 
21380e4fbf01SChristian Hopps 	return nextp;
21390e4fbf01SChristian Hopps }
21400e4fbf01SChristian Hopps 
2141b96ba312SChristian Hopps static void iptfs_consume_frags(struct sk_buff *to, struct sk_buff *from)
2142b96ba312SChristian Hopps {
2143b96ba312SChristian Hopps 	struct skb_shared_info *fromi = skb_shinfo(from);
2144b96ba312SChristian Hopps 	struct skb_shared_info *toi = skb_shinfo(to);
2145b96ba312SChristian Hopps 	unsigned int new_truesize;
2146b96ba312SChristian Hopps 
2147b96ba312SChristian Hopps 	/* If we have data in a head page, grab it */
2148b96ba312SChristian Hopps 	if (!skb_headlen(from)) {
2149b96ba312SChristian Hopps 		new_truesize = SKB_TRUESIZE(skb_end_offset(from));
2150b96ba312SChristian Hopps 	} else {
2151b96ba312SChristian Hopps 		iptfs_skb_head_to_frag(from, &toi->frags[toi->nr_frags]);
2152b96ba312SChristian Hopps 		skb_frag_ref(to, toi->nr_frags++);
2153b96ba312SChristian Hopps 		new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
2154b96ba312SChristian Hopps 	}
2155b96ba312SChristian Hopps 
2156b96ba312SChristian Hopps 	/* Move any other page fragments rather than copy */
2157b96ba312SChristian Hopps 	memcpy(&toi->frags[toi->nr_frags], fromi->frags,
2158b96ba312SChristian Hopps 	       sizeof(fromi->frags[0]) * fromi->nr_frags);
2159b96ba312SChristian Hopps 	toi->nr_frags += fromi->nr_frags;
2160b96ba312SChristian Hopps 	fromi->nr_frags = 0;
2161b96ba312SChristian Hopps 	from->data_len = 0;
2162b96ba312SChristian Hopps 	from->len = 0;
2163b96ba312SChristian Hopps 	to->truesize += from->truesize - new_truesize;
2164b96ba312SChristian Hopps 	from->truesize = new_truesize;
2165b96ba312SChristian Hopps 
2166b96ba312SChristian Hopps 	/* We are done with this SKB */
2167b96ba312SChristian Hopps 	consume_skb(from);
2168b96ba312SChristian Hopps }
2169b96ba312SChristian Hopps 
21700e4fbf01SChristian Hopps static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list)
21710e4fbf01SChristian Hopps {
21720e4fbf01SChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
21730e4fbf01SChristian Hopps 	struct sk_buff *skb, *skb2, **nextp;
2174b96ba312SChristian Hopps 	struct skb_shared_info *shi, *shi2;
21750e4fbf01SChristian Hopps 
21768579d342SChristian Hopps 	/* If we are fragmenting due to a large inner packet we will output all
21778579d342SChristian Hopps 	 * the outer IPTFS packets required to contain the fragments of the
21788579d342SChristian Hopps 	 * single large inner packet. These outer packets need to be sent
21798579d342SChristian Hopps 	 * consecutively (ESP seq-wise). Since this output function is always
21808579d342SChristian Hopps 	 * running from a timer we do not need a lock to provide this guarantee.
21818579d342SChristian Hopps 	 * We will output our packets consecutively before the timer is allowed
21828579d342SChristian Hopps 	 * to run again on some other CPU.
21838579d342SChristian Hopps 	 */
21848579d342SChristian Hopps 
21850e4fbf01SChristian Hopps 	while ((skb = __skb_dequeue(list))) {
21860e4fbf01SChristian Hopps 		u32 mtu = iptfs_get_cur_pmtu(x, xtfs, skb);
2187b96ba312SChristian Hopps 		bool share_ok = true;
21880e4fbf01SChristian Hopps 		int remaining;
21890e4fbf01SChristian Hopps 
21900e4fbf01SChristian Hopps 		/* protocol comes to us cleared sometimes */
21910e4fbf01SChristian Hopps 		skb->protocol = x->outer_mode.family == AF_INET ? htons(ETH_P_IP) :
21920e4fbf01SChristian Hopps 								  htons(ETH_P_IPV6);
21930e4fbf01SChristian Hopps 
21948579d342SChristian Hopps 		if (skb->len > mtu && xtfs->cfg.dont_frag) {
21950e4fbf01SChristian Hopps 			/* We handle this case before enqueueing so we are only
21960e4fbf01SChristian Hopps 			 * here b/c MTU changed after we enqueued before we
21970e4fbf01SChristian Hopps 			 * dequeued, just drop these.
21980e4fbf01SChristian Hopps 			 */
21990e4fbf01SChristian Hopps 			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTERROR);
22000e4fbf01SChristian Hopps 
2201ed58b186SChristian Hopps 			trace_iptfs_first_toobig(skb, mtu, 0, ip_hdr(skb));
22020e4fbf01SChristian Hopps 			kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
22030e4fbf01SChristian Hopps 			continue;
22040e4fbf01SChristian Hopps 		}
22050e4fbf01SChristian Hopps 
22068579d342SChristian Hopps 		/* Convert first inner packet into an outer IPTFS packet,
22078579d342SChristian Hopps 		 * dealing with any fragmentation into multiple outer packets
22088579d342SChristian Hopps 		 * if necessary.
22090e4fbf01SChristian Hopps 		 */
22108579d342SChristian Hopps 		if (iptfs_first_skb(&skb, xtfs, mtu))
22110e4fbf01SChristian Hopps 			continue;
22120e4fbf01SChristian Hopps 
22138579d342SChristian Hopps 		/* If fragmentation was required the returned skb is the last
22148579d342SChristian Hopps 		 * IPTFS fragment in the chain, and it's IPTFS header blkoff has
22158579d342SChristian Hopps 		 * been set just past the end of the fragment data.
22168579d342SChristian Hopps 		 *
22178579d342SChristian Hopps 		 * In either case the space remaining to send more inner packet
22188579d342SChristian Hopps 		 * data is `mtu` - (skb->len - sizeof iptfs header). This is b/c
22198579d342SChristian Hopps 		 * the `mtu` value has the basic IPTFS header len accounted for,
22208579d342SChristian Hopps 		 * and we added that header to the skb so it is a part of
22218579d342SChristian Hopps 		 * skb->len, thus we subtract it from the skb length.
22220e4fbf01SChristian Hopps 		 */
22230e4fbf01SChristian Hopps 		remaining = mtu - (skb->len - sizeof(struct ip_iptfs_hdr));
22240e4fbf01SChristian Hopps 
22250e4fbf01SChristian Hopps 		/* Re-home (un-nest) nested fragment lists. We need to do this
22260e4fbf01SChristian Hopps 		 * b/c we will simply be appending any following aggregated
2227b96ba312SChristian Hopps 		 * inner packets using the frag list.
22280e4fbf01SChristian Hopps 		 */
22290e4fbf01SChristian Hopps 		shi = skb_shinfo(skb);
22300e4fbf01SChristian Hopps 		nextp = &shi->frag_list;
22310e4fbf01SChristian Hopps 		while (*nextp) {
22320e4fbf01SChristian Hopps 			if (skb_has_frag_list(*nextp))
22330e4fbf01SChristian Hopps 				nextp = iptfs_rehome_fraglist(&(*nextp)->next, *nextp);
22340e4fbf01SChristian Hopps 			else
22350e4fbf01SChristian Hopps 				nextp = &(*nextp)->next;
22360e4fbf01SChristian Hopps 		}
22370e4fbf01SChristian Hopps 
2238b96ba312SChristian Hopps 		if (shi->frag_list || skb_cloned(skb) || skb_shared(skb))
2239b96ba312SChristian Hopps 			share_ok = false;
2240b96ba312SChristian Hopps 
22410e4fbf01SChristian Hopps 		/* See if we have enough space to simply append.
22420e4fbf01SChristian Hopps 		 *
22430e4fbf01SChristian Hopps 		 * NOTE: Maybe do not append if we will be mis-aligned,
22440e4fbf01SChristian Hopps 		 * SW-based endpoints will probably have to copy in this
22450e4fbf01SChristian Hopps 		 * case.
22460e4fbf01SChristian Hopps 		 */
22470e4fbf01SChristian Hopps 		while ((skb2 = skb_peek(list))) {
2248ed58b186SChristian Hopps 			trace_iptfs_ingress_nth_peek(skb2, remaining);
22490e4fbf01SChristian Hopps 			if (skb2->len > remaining)
22500e4fbf01SChristian Hopps 				break;
22510e4fbf01SChristian Hopps 
22520e4fbf01SChristian Hopps 			__skb_unlink(skb2, list);
22530e4fbf01SChristian Hopps 
22540e4fbf01SChristian Hopps 			/* Consider the buffer Tx'd and no longer owned */
22550e4fbf01SChristian Hopps 			skb_orphan(skb);
22560e4fbf01SChristian Hopps 
22570e4fbf01SChristian Hopps 			/* If we don't have a cksum in the packet we need to add
22580e4fbf01SChristian Hopps 			 * one before encapsulation.
22590e4fbf01SChristian Hopps 			 */
22600e4fbf01SChristian Hopps 			if (skb2->ip_summed == CHECKSUM_PARTIAL) {
22610e4fbf01SChristian Hopps 				if (skb_checksum_help(skb2)) {
22620e4fbf01SChristian Hopps 					XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTERROR);
22630e4fbf01SChristian Hopps 					kfree_skb(skb2);
22640e4fbf01SChristian Hopps 					continue;
22650e4fbf01SChristian Hopps 				}
22660e4fbf01SChristian Hopps 			}
22670e4fbf01SChristian Hopps 
2268b96ba312SChristian Hopps 			/* skb->pp_recycle is passed to __skb_flag_unref for all
2269b96ba312SChristian Hopps 			 * frag pages so we can only share pages with skb's who
2270b96ba312SChristian Hopps 			 * match ourselves.
2271b96ba312SChristian Hopps 			 */
2272b96ba312SChristian Hopps 			shi2 = skb_shinfo(skb2);
2273b96ba312SChristian Hopps 			if (share_ok &&
2274b96ba312SChristian Hopps 			    (shi2->frag_list ||
2275b96ba312SChristian Hopps 			     (!skb2->head_frag && skb_headlen(skb)) ||
2276b96ba312SChristian Hopps 			     skb->pp_recycle != skb2->pp_recycle ||
2277b96ba312SChristian Hopps 			     skb_zcopy(skb2) ||
2278b96ba312SChristian Hopps 			     (shi->nr_frags + shi2->nr_frags + 1 > MAX_SKB_FRAGS)))
2279b96ba312SChristian Hopps 				share_ok = false;
2280b96ba312SChristian Hopps 
22810e4fbf01SChristian Hopps 			/* Do accounting */
22820e4fbf01SChristian Hopps 			skb->data_len += skb2->len;
22830e4fbf01SChristian Hopps 			skb->len += skb2->len;
22840e4fbf01SChristian Hopps 			remaining -= skb2->len;
22850e4fbf01SChristian Hopps 
2286ed58b186SChristian Hopps 			trace_iptfs_ingress_nth_add(skb2, share_ok);
2287ed58b186SChristian Hopps 
2288b96ba312SChristian Hopps 			if (share_ok) {
2289b96ba312SChristian Hopps 				iptfs_consume_frags(skb, skb2);
2290b96ba312SChristian Hopps 			} else {
22910e4fbf01SChristian Hopps 				/* Append to the frag_list */
22920e4fbf01SChristian Hopps 				*nextp = skb2;
22930e4fbf01SChristian Hopps 				nextp = &skb2->next;
22940e4fbf01SChristian Hopps 				if (skb_has_frag_list(skb2))
2295b96ba312SChristian Hopps 					nextp = iptfs_rehome_fraglist(nextp,
2296b96ba312SChristian Hopps 								      skb2);
22970e4fbf01SChristian Hopps 				skb->truesize += skb2->truesize;
22980e4fbf01SChristian Hopps 			}
2299b96ba312SChristian Hopps 		}
23000e4fbf01SChristian Hopps 
23010e4fbf01SChristian Hopps 		xfrm_output(NULL, skb);
23020e4fbf01SChristian Hopps 	}
23030e4fbf01SChristian Hopps }
23040e4fbf01SChristian Hopps 
23050e4fbf01SChristian Hopps static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me)
23060e4fbf01SChristian Hopps {
23070e4fbf01SChristian Hopps 	struct sk_buff_head list;
23080e4fbf01SChristian Hopps 	struct xfrm_iptfs_data *xtfs;
23090e4fbf01SChristian Hopps 	struct xfrm_state *x;
2310ed58b186SChristian Hopps 	time64_t settime;
23110e4fbf01SChristian Hopps 
23120e4fbf01SChristian Hopps 	xtfs = container_of(me, typeof(*xtfs), iptfs_timer);
23130e4fbf01SChristian Hopps 	x = xtfs->x;
23140e4fbf01SChristian Hopps 
23150e4fbf01SChristian Hopps 	/* Process all the queued packets
23160e4fbf01SChristian Hopps 	 *
23170e4fbf01SChristian Hopps 	 * softirq execution order: timer > tasklet > hrtimer
23180e4fbf01SChristian Hopps 	 *
23190e4fbf01SChristian Hopps 	 * Network rx will have run before us giving one last chance to queue
23200e4fbf01SChristian Hopps 	 * ingress packets for us to process and transmit.
23210e4fbf01SChristian Hopps 	 */
23220e4fbf01SChristian Hopps 
23230e4fbf01SChristian Hopps 	spin_lock(&x->lock);
23240e4fbf01SChristian Hopps 	__skb_queue_head_init(&list);
23250e4fbf01SChristian Hopps 	skb_queue_splice_init(&xtfs->queue, &list);
23260e4fbf01SChristian Hopps 	xtfs->queue_size = 0;
2327ed58b186SChristian Hopps 	settime = xtfs->iptfs_settime;
23280e4fbf01SChristian Hopps 	spin_unlock(&x->lock);
23290e4fbf01SChristian Hopps 
23300e4fbf01SChristian Hopps 	/* After the above unlock, packets can begin queuing again, and the
23310e4fbf01SChristian Hopps 	 * timer can be set again, from another CPU either in softirq or user
23320e4fbf01SChristian Hopps 	 * context (not from this one since we are running at softirq level
23330e4fbf01SChristian Hopps 	 * already).
23340e4fbf01SChristian Hopps 	 */
23350e4fbf01SChristian Hopps 
2336ed58b186SChristian Hopps 	trace_iptfs_timer_expire(xtfs, (unsigned long long)(ktime_get_raw_fast_ns() - settime));
2337ed58b186SChristian Hopps 
23380e4fbf01SChristian Hopps 	iptfs_output_queued(x, &list);
23390e4fbf01SChristian Hopps 
23400e4fbf01SChristian Hopps 	return HRTIMER_NORESTART;
23410e4fbf01SChristian Hopps }
23420e4fbf01SChristian Hopps 
23430e4fbf01SChristian Hopps /**
23440e4fbf01SChristian Hopps  * iptfs_encap_add_ipv4() - add outer encaps
23450e4fbf01SChristian Hopps  * @x: xfrm state
23460e4fbf01SChristian Hopps  * @skb: the packet
23470e4fbf01SChristian Hopps  *
23480e4fbf01SChristian Hopps  * This was originally taken from xfrm4_tunnel_encap_add. The reason for the
23490e4fbf01SChristian Hopps  * copy is that IP-TFS/AGGFRAG can have different functionality for how to set
23500e4fbf01SChristian Hopps  * the TOS/DSCP bits. Sets the protocol to a different value and doesn't do
23510e4fbf01SChristian Hopps  * anything with inner headers as they aren't pointing into a normal IP
23520e4fbf01SChristian Hopps  * singleton inner packet.
23530e4fbf01SChristian Hopps  *
23540e4fbf01SChristian Hopps  * Return: 0 on success or a negative error code on failure
23550e4fbf01SChristian Hopps  */
23560e4fbf01SChristian Hopps static int iptfs_encap_add_ipv4(struct xfrm_state *x, struct sk_buff *skb)
23570e4fbf01SChristian Hopps {
23580e4fbf01SChristian Hopps 	struct dst_entry *dst = skb_dst(skb);
23590e4fbf01SChristian Hopps 	struct iphdr *top_iph;
23600e4fbf01SChristian Hopps 
23610e4fbf01SChristian Hopps 	skb_reset_inner_network_header(skb);
23620e4fbf01SChristian Hopps 	skb_reset_inner_transport_header(skb);
23630e4fbf01SChristian Hopps 
23640e4fbf01SChristian Hopps 	skb_set_network_header(skb, -(x->props.header_len - x->props.enc_hdr_len));
23650e4fbf01SChristian Hopps 	skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol);
23660e4fbf01SChristian Hopps 	skb->transport_header = skb->network_header + sizeof(*top_iph);
23670e4fbf01SChristian Hopps 
23680e4fbf01SChristian Hopps 	top_iph = ip_hdr(skb);
23690e4fbf01SChristian Hopps 	top_iph->ihl = 5;
23700e4fbf01SChristian Hopps 	top_iph->version = 4;
23710e4fbf01SChristian Hopps 	top_iph->protocol = IPPROTO_AGGFRAG;
23720e4fbf01SChristian Hopps 
23730e4fbf01SChristian Hopps 	/* As we have 0, fractional, 1 or N inner packets there's no obviously
23740e4fbf01SChristian Hopps 	 * correct DSCP mapping to inherit. ECN should be cleared per RFC9347
23750e4fbf01SChristian Hopps 	 * 3.1.
23760e4fbf01SChristian Hopps 	 */
23770e4fbf01SChristian Hopps 	top_iph->tos = 0;
23780e4fbf01SChristian Hopps 
23790e4fbf01SChristian Hopps 	top_iph->frag_off = htons(IP_DF);
23800e4fbf01SChristian Hopps 	top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
23810e4fbf01SChristian Hopps 	top_iph->saddr = x->props.saddr.a4;
23820e4fbf01SChristian Hopps 	top_iph->daddr = x->id.daddr.a4;
23830e4fbf01SChristian Hopps 	ip_select_ident(dev_net(dst->dev), skb, NULL);
23840e4fbf01SChristian Hopps 
23850e4fbf01SChristian Hopps 	return 0;
23860e4fbf01SChristian Hopps }
23870e4fbf01SChristian Hopps 
23880e4fbf01SChristian Hopps #if IS_ENABLED(CONFIG_IPV6)
23890e4fbf01SChristian Hopps /**
23900e4fbf01SChristian Hopps  * iptfs_encap_add_ipv6() - add outer encaps
23910e4fbf01SChristian Hopps  * @x: xfrm state
23920e4fbf01SChristian Hopps  * @skb: the packet
23930e4fbf01SChristian Hopps  *
23940e4fbf01SChristian Hopps  * This was originally taken from xfrm6_tunnel_encap_add. The reason for the
23950e4fbf01SChristian Hopps  * copy is that IP-TFS/AGGFRAG can have different functionality for how to set
23960e4fbf01SChristian Hopps  * the flow label and TOS/DSCP bits. It also sets the protocol to a different
23970e4fbf01SChristian Hopps  * value and doesn't do anything with inner headers as they aren't pointing into
23980e4fbf01SChristian Hopps  * a normal IP singleton inner packet.
23990e4fbf01SChristian Hopps  *
24000e4fbf01SChristian Hopps  * Return: 0 on success or a negative error code on failure
24010e4fbf01SChristian Hopps  */
24020e4fbf01SChristian Hopps static int iptfs_encap_add_ipv6(struct xfrm_state *x, struct sk_buff *skb)
24030e4fbf01SChristian Hopps {
24040e4fbf01SChristian Hopps 	struct dst_entry *dst = skb_dst(skb);
24050e4fbf01SChristian Hopps 	struct ipv6hdr *top_iph;
24060e4fbf01SChristian Hopps 	int dsfield;
24070e4fbf01SChristian Hopps 
24080e4fbf01SChristian Hopps 	skb_reset_inner_network_header(skb);
24090e4fbf01SChristian Hopps 	skb_reset_inner_transport_header(skb);
24100e4fbf01SChristian Hopps 
24110e4fbf01SChristian Hopps 	skb_set_network_header(skb, -x->props.header_len + x->props.enc_hdr_len);
24120e4fbf01SChristian Hopps 	skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr);
24130e4fbf01SChristian Hopps 	skb->transport_header = skb->network_header + sizeof(*top_iph);
24140e4fbf01SChristian Hopps 
24150e4fbf01SChristian Hopps 	top_iph = ipv6_hdr(skb);
24160e4fbf01SChristian Hopps 	top_iph->version = 6;
24170e4fbf01SChristian Hopps 	top_iph->priority = 0;
24180e4fbf01SChristian Hopps 	memset(top_iph->flow_lbl, 0, sizeof(top_iph->flow_lbl));
24190e4fbf01SChristian Hopps 	top_iph->nexthdr = IPPROTO_AGGFRAG;
24200e4fbf01SChristian Hopps 
24210e4fbf01SChristian Hopps 	/* As we have 0, fractional, 1 or N inner packets there's no obviously
24220e4fbf01SChristian Hopps 	 * correct DSCP mapping to inherit. ECN should be cleared per RFC9347
24230e4fbf01SChristian Hopps 	 * 3.1.
24240e4fbf01SChristian Hopps 	 */
24250e4fbf01SChristian Hopps 	dsfield = 0;
24260e4fbf01SChristian Hopps 	ipv6_change_dsfield(top_iph, 0, dsfield);
24270e4fbf01SChristian Hopps 
24280e4fbf01SChristian Hopps 	top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
24290e4fbf01SChristian Hopps 	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
24300e4fbf01SChristian Hopps 	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
24310e4fbf01SChristian Hopps 
24320e4fbf01SChristian Hopps 	return 0;
24330e4fbf01SChristian Hopps }
24340e4fbf01SChristian Hopps #endif
24350e4fbf01SChristian Hopps 
24360e4fbf01SChristian Hopps /**
24370e4fbf01SChristian Hopps  * iptfs_prepare_output() -  prepare the skb for output
24380e4fbf01SChristian Hopps  * @x: xfrm state
24390e4fbf01SChristian Hopps  * @skb: the packet
24400e4fbf01SChristian Hopps  *
24410e4fbf01SChristian Hopps  * Return: Error value, if 0 then skb values should be as follows:
24420e4fbf01SChristian Hopps  *    - transport_header should point at ESP header
24430e4fbf01SChristian Hopps  *    - network_header should point at Outer IP header
24440e4fbf01SChristian Hopps  *    - mac_header should point at protocol/nexthdr of the outer IP
24450e4fbf01SChristian Hopps  */
24460e4fbf01SChristian Hopps static int iptfs_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
24470e4fbf01SChristian Hopps {
24480e4fbf01SChristian Hopps 	if (x->outer_mode.family == AF_INET)
24490e4fbf01SChristian Hopps 		return iptfs_encap_add_ipv4(x, skb);
24500e4fbf01SChristian Hopps 	if (x->outer_mode.family == AF_INET6) {
24510e4fbf01SChristian Hopps #if IS_ENABLED(CONFIG_IPV6)
24520e4fbf01SChristian Hopps 		return iptfs_encap_add_ipv6(x, skb);
24530e4fbf01SChristian Hopps #else
24540e4fbf01SChristian Hopps 		return -EAFNOSUPPORT;
24550e4fbf01SChristian Hopps #endif
24560e4fbf01SChristian Hopps 	}
24570e4fbf01SChristian Hopps 	return -EOPNOTSUPP;
24580e4fbf01SChristian Hopps }
24590e4fbf01SChristian Hopps 
24604b3faf61SChristian Hopps /* ========================== */
24614b3faf61SChristian Hopps /* State Management Functions */
24624b3faf61SChristian Hopps /* ========================== */
24634b3faf61SChristian Hopps 
24644b3faf61SChristian Hopps /**
24658579d342SChristian Hopps  * __iptfs_get_inner_mtu() - return inner MTU with no fragmentation.
24664b3faf61SChristian Hopps  * @x: xfrm state.
24674b3faf61SChristian Hopps  * @outer_mtu: the outer mtu
24688579d342SChristian Hopps  *
24698579d342SChristian Hopps  * Return: Correct MTU taking in to account the encap overhead.
24704b3faf61SChristian Hopps  */
24718579d342SChristian Hopps static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu)
24724b3faf61SChristian Hopps {
24734b3faf61SChristian Hopps 	struct crypto_aead *aead;
24744b3faf61SChristian Hopps 	u32 blksize;
24754b3faf61SChristian Hopps 
24764b3faf61SChristian Hopps 	aead = x->data;
24774b3faf61SChristian Hopps 	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
24784b3faf61SChristian Hopps 	return ((outer_mtu - x->props.header_len - crypto_aead_authsize(aead)) &
24794b3faf61SChristian Hopps 		~(blksize - 1)) - 2;
24804b3faf61SChristian Hopps }
24814b3faf61SChristian Hopps 
24824b3faf61SChristian Hopps /**
24838579d342SChristian Hopps  * iptfs_get_inner_mtu() - return the inner MTU for an IPTFS xfrm.
24848579d342SChristian Hopps  * @x: xfrm state.
24858579d342SChristian Hopps  * @outer_mtu: Outer MTU for the encapsulated packet.
24868579d342SChristian Hopps  *
24878579d342SChristian Hopps  * Return: Correct MTU taking in to account the encap overhead.
24888579d342SChristian Hopps  */
24898579d342SChristian Hopps static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu)
24908579d342SChristian Hopps {
24918579d342SChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
24928579d342SChristian Hopps 
24938579d342SChristian Hopps 	/* If not dont-frag we have no MTU */
24948579d342SChristian Hopps 	if (!xtfs->cfg.dont_frag)
24958579d342SChristian Hopps 		return x->outer_mode.family == AF_INET ? IP_MAX_MTU : IP6_MAX_MTU;
24968579d342SChristian Hopps 	return __iptfs_get_inner_mtu(x, outer_mtu);
24978579d342SChristian Hopps }
24988579d342SChristian Hopps 
24998579d342SChristian Hopps /**
25004b3faf61SChristian Hopps  * iptfs_user_init() - initialize the SA with IPTFS options from netlink.
25014b3faf61SChristian Hopps  * @net: the net data
25024b3faf61SChristian Hopps  * @x: xfrm state
25034b3faf61SChristian Hopps  * @attrs: netlink attributes
25044b3faf61SChristian Hopps  * @extack: extack return data
25054b3faf61SChristian Hopps  *
25064b3faf61SChristian Hopps  * Return: 0 on success or a negative error code on failure
25074b3faf61SChristian Hopps  */
25084b3faf61SChristian Hopps static int iptfs_user_init(struct net *net, struct xfrm_state *x,
25094b3faf61SChristian Hopps 			   struct nlattr **attrs,
25104b3faf61SChristian Hopps 			   struct netlink_ext_ack *extack)
25114b3faf61SChristian Hopps {
25124b3faf61SChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
25134b3faf61SChristian Hopps 	struct xfrm_iptfs_config *xc;
25140e4fbf01SChristian Hopps 	u64 q;
25154b3faf61SChristian Hopps 
25164b3faf61SChristian Hopps 	xc = &xtfs->cfg;
25170e4fbf01SChristian Hopps 	xc->max_queue_size = IPTFS_DEFAULT_MAX_QUEUE_SIZE;
25186be02e3eSChristian Hopps 	xc->reorder_win_size = IPTFS_DEFAULT_REORDER_WINDOW;
251907569476SChristian Hopps 	xtfs->drop_time_ns = IPTFS_DEFAULT_DROP_TIME_USECS * NSECS_IN_USEC;
25200e4fbf01SChristian Hopps 	xtfs->init_delay_ns = IPTFS_DEFAULT_INIT_DELAY_USECS * NSECS_IN_USEC;
25214b3faf61SChristian Hopps 
25228579d342SChristian Hopps 	if (attrs[XFRMA_IPTFS_DONT_FRAG])
25238579d342SChristian Hopps 		xc->dont_frag = true;
25246be02e3eSChristian Hopps 	if (attrs[XFRMA_IPTFS_REORDER_WINDOW])
25256be02e3eSChristian Hopps 		xc->reorder_win_size =
25266be02e3eSChristian Hopps 			nla_get_u16(attrs[XFRMA_IPTFS_REORDER_WINDOW]);
25276be02e3eSChristian Hopps 	/* saved array is for saving 1..N seq nums from wantseq */
25286be02e3eSChristian Hopps 	if (xc->reorder_win_size) {
25296be02e3eSChristian Hopps 		xtfs->w_saved = kcalloc(xc->reorder_win_size,
25306be02e3eSChristian Hopps 					sizeof(*xtfs->w_saved), GFP_KERNEL);
25316be02e3eSChristian Hopps 		if (!xtfs->w_saved) {
25326be02e3eSChristian Hopps 			NL_SET_ERR_MSG(extack, "Cannot alloc reorder window");
25336be02e3eSChristian Hopps 			return -ENOMEM;
25346be02e3eSChristian Hopps 		}
25356be02e3eSChristian Hopps 	}
25364b3faf61SChristian Hopps 	if (attrs[XFRMA_IPTFS_PKT_SIZE]) {
25374b3faf61SChristian Hopps 		xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]);
25384b3faf61SChristian Hopps 		if (!xc->pkt_size) {
25394b3faf61SChristian Hopps 			xtfs->payload_mtu = 0;
25404b3faf61SChristian Hopps 		} else if (xc->pkt_size > x->props.header_len) {
25414b3faf61SChristian Hopps 			xtfs->payload_mtu = xc->pkt_size - x->props.header_len;
25424b3faf61SChristian Hopps 		} else {
25434b3faf61SChristian Hopps 			NL_SET_ERR_MSG(extack,
25444b3faf61SChristian Hopps 				       "Packet size must be 0 or greater than IPTFS/ESP header length");
25454b3faf61SChristian Hopps 			return -EINVAL;
25464b3faf61SChristian Hopps 		}
25474b3faf61SChristian Hopps 	}
25480e4fbf01SChristian Hopps 	if (attrs[XFRMA_IPTFS_MAX_QSIZE])
25490e4fbf01SChristian Hopps 		xc->max_queue_size = nla_get_u32(attrs[XFRMA_IPTFS_MAX_QSIZE]);
255007569476SChristian Hopps 	if (attrs[XFRMA_IPTFS_DROP_TIME])
255107569476SChristian Hopps 		xtfs->drop_time_ns =
255207569476SChristian Hopps 			(u64)nla_get_u32(attrs[XFRMA_IPTFS_DROP_TIME]) *
255307569476SChristian Hopps 			NSECS_IN_USEC;
25540e4fbf01SChristian Hopps 	if (attrs[XFRMA_IPTFS_INIT_DELAY])
25550e4fbf01SChristian Hopps 		xtfs->init_delay_ns =
25560e4fbf01SChristian Hopps 			(u64)nla_get_u32(attrs[XFRMA_IPTFS_INIT_DELAY]) * NSECS_IN_USEC;
25570e4fbf01SChristian Hopps 
25580e4fbf01SChristian Hopps 	q = (u64)xc->max_queue_size * 95;
25590e4fbf01SChristian Hopps 	do_div(q, 100);
25600e4fbf01SChristian Hopps 	xtfs->ecn_queue_size = (u32)q;
25610e4fbf01SChristian Hopps 
25624b3faf61SChristian Hopps 	return 0;
25634b3faf61SChristian Hopps }
25644b3faf61SChristian Hopps 
25654b3faf61SChristian Hopps static unsigned int iptfs_sa_len(const struct xfrm_state *x)
25664b3faf61SChristian Hopps {
25674b3faf61SChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
25684b3faf61SChristian Hopps 	struct xfrm_iptfs_config *xc = &xtfs->cfg;
25694b3faf61SChristian Hopps 	unsigned int l = 0;
25704b3faf61SChristian Hopps 
257107569476SChristian Hopps 	if (x->dir == XFRM_SA_DIR_IN) {
257207569476SChristian Hopps 		l += nla_total_size(sizeof(u32)); /* drop time usec */
25736be02e3eSChristian Hopps 		l += nla_total_size(sizeof(xc->reorder_win_size));
257407569476SChristian Hopps 	} else {
25758579d342SChristian Hopps 		if (xc->dont_frag)
25768579d342SChristian Hopps 			l += nla_total_size(0);	  /* dont-frag flag */
25770e4fbf01SChristian Hopps 		l += nla_total_size(sizeof(u32)); /* init delay usec */
25780e4fbf01SChristian Hopps 		l += nla_total_size(sizeof(xc->max_queue_size));
25794b3faf61SChristian Hopps 		l += nla_total_size(sizeof(xc->pkt_size));
25800e4fbf01SChristian Hopps 	}
25814b3faf61SChristian Hopps 
25824b3faf61SChristian Hopps 	return l;
25834b3faf61SChristian Hopps }
25844b3faf61SChristian Hopps 
25854b3faf61SChristian Hopps static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb)
25864b3faf61SChristian Hopps {
25874b3faf61SChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
25884b3faf61SChristian Hopps 	struct xfrm_iptfs_config *xc = &xtfs->cfg;
25894b3faf61SChristian Hopps 	int ret = 0;
25900e4fbf01SChristian Hopps 	u64 q;
25914b3faf61SChristian Hopps 
259207569476SChristian Hopps 	if (x->dir == XFRM_SA_DIR_IN) {
259307569476SChristian Hopps 		q = xtfs->drop_time_ns;
259407569476SChristian Hopps 		do_div(q, NSECS_IN_USEC);
259507569476SChristian Hopps 		ret = nla_put_u32(skb, XFRMA_IPTFS_DROP_TIME, q);
25966be02e3eSChristian Hopps 		if (ret)
25976be02e3eSChristian Hopps 			return ret;
25986be02e3eSChristian Hopps 
25996be02e3eSChristian Hopps 		ret = nla_put_u16(skb, XFRMA_IPTFS_REORDER_WINDOW,
26006be02e3eSChristian Hopps 				  xc->reorder_win_size);
260107569476SChristian Hopps 	} else {
26028579d342SChristian Hopps 		if (xc->dont_frag) {
26038579d342SChristian Hopps 			ret = nla_put_flag(skb, XFRMA_IPTFS_DONT_FRAG);
26048579d342SChristian Hopps 			if (ret)
26058579d342SChristian Hopps 				return ret;
26068579d342SChristian Hopps 		}
26078579d342SChristian Hopps 
26080e4fbf01SChristian Hopps 		q = xtfs->init_delay_ns;
26090e4fbf01SChristian Hopps 		do_div(q, NSECS_IN_USEC);
26100e4fbf01SChristian Hopps 		ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q);
26110e4fbf01SChristian Hopps 		if (ret)
26120e4fbf01SChristian Hopps 			return ret;
26130e4fbf01SChristian Hopps 
26140e4fbf01SChristian Hopps 		ret = nla_put_u32(skb, XFRMA_IPTFS_MAX_QSIZE, xc->max_queue_size);
26150e4fbf01SChristian Hopps 		if (ret)
26160e4fbf01SChristian Hopps 			return ret;
26170e4fbf01SChristian Hopps 
26184b3faf61SChristian Hopps 		ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size);
26190e4fbf01SChristian Hopps 	}
26204b3faf61SChristian Hopps 
26214b3faf61SChristian Hopps 	return ret;
26224b3faf61SChristian Hopps }
26234b3faf61SChristian Hopps 
26244b3faf61SChristian Hopps static void __iptfs_init_state(struct xfrm_state *x,
26254b3faf61SChristian Hopps 			       struct xfrm_iptfs_data *xtfs)
26264b3faf61SChristian Hopps {
26270e4fbf01SChristian Hopps 	__skb_queue_head_init(&xtfs->queue);
2628*1417c85dSNam Cao 	hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE);
26290e4fbf01SChristian Hopps 
263007569476SChristian Hopps 	spin_lock_init(&xtfs->drop_lock);
2631*1417c85dSNam Cao 	hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE);
263207569476SChristian Hopps 
26334b3faf61SChristian Hopps 	/* Modify type (esp) adjustment values */
26344b3faf61SChristian Hopps 
26354b3faf61SChristian Hopps 	if (x->props.family == AF_INET)
26364b3faf61SChristian Hopps 		x->props.header_len += sizeof(struct iphdr) + sizeof(struct ip_iptfs_hdr);
26374b3faf61SChristian Hopps 	else if (x->props.family == AF_INET6)
26384b3faf61SChristian Hopps 		x->props.header_len += sizeof(struct ipv6hdr) + sizeof(struct ip_iptfs_hdr);
26394b3faf61SChristian Hopps 	x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr);
26404b3faf61SChristian Hopps 
26414b3faf61SChristian Hopps 	/* Always keep a module reference when x->mode_data is set */
26424b3faf61SChristian Hopps 	__module_get(x->mode_cbs->owner);
26434b3faf61SChristian Hopps 
26444b3faf61SChristian Hopps 	x->mode_data = xtfs;
26454b3faf61SChristian Hopps 	xtfs->x = x;
26464b3faf61SChristian Hopps }
26474b3faf61SChristian Hopps 
26484b3faf61SChristian Hopps static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
26494b3faf61SChristian Hopps {
26504b3faf61SChristian Hopps 	struct xfrm_iptfs_data *xtfs;
26514b3faf61SChristian Hopps 
26524b3faf61SChristian Hopps 	xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
26534b3faf61SChristian Hopps 	if (!xtfs)
26544b3faf61SChristian Hopps 		return -ENOMEM;
26554b3faf61SChristian Hopps 
26564b3faf61SChristian Hopps 	x->mode_data = xtfs;
26574b3faf61SChristian Hopps 	xtfs->x = x;
26584b3faf61SChristian Hopps 
265907569476SChristian Hopps 	xtfs->ra_newskb = NULL;
26606be02e3eSChristian Hopps 	if (xtfs->cfg.reorder_win_size) {
26616be02e3eSChristian Hopps 		xtfs->w_saved = kcalloc(xtfs->cfg.reorder_win_size,
26626be02e3eSChristian Hopps 					sizeof(*xtfs->w_saved), GFP_KERNEL);
26636be02e3eSChristian Hopps 		if (!xtfs->w_saved) {
26646be02e3eSChristian Hopps 			kfree_sensitive(xtfs);
26656be02e3eSChristian Hopps 			return -ENOMEM;
26666be02e3eSChristian Hopps 		}
26676be02e3eSChristian Hopps 	}
266807569476SChristian Hopps 
26694b3faf61SChristian Hopps 	return 0;
26704b3faf61SChristian Hopps }
26714b3faf61SChristian Hopps 
26724b3faf61SChristian Hopps static int iptfs_init_state(struct xfrm_state *x)
26734b3faf61SChristian Hopps {
26744b3faf61SChristian Hopps 	struct xfrm_iptfs_data *xtfs;
26754b3faf61SChristian Hopps 
26764b3faf61SChristian Hopps 	if (x->mode_data) {
26774b3faf61SChristian Hopps 		/* We have arrived here from xfrm_state_clone() */
26784b3faf61SChristian Hopps 		xtfs = x->mode_data;
26794b3faf61SChristian Hopps 	} else {
26804b3faf61SChristian Hopps 		xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL);
26814b3faf61SChristian Hopps 		if (!xtfs)
26824b3faf61SChristian Hopps 			return -ENOMEM;
26834b3faf61SChristian Hopps 	}
26844b3faf61SChristian Hopps 
26854b3faf61SChristian Hopps 	__iptfs_init_state(x, xtfs);
26864b3faf61SChristian Hopps 
26874b3faf61SChristian Hopps 	return 0;
26884b3faf61SChristian Hopps }
26894b3faf61SChristian Hopps 
26904b3faf61SChristian Hopps static void iptfs_destroy_state(struct xfrm_state *x)
26914b3faf61SChristian Hopps {
26924b3faf61SChristian Hopps 	struct xfrm_iptfs_data *xtfs = x->mode_data;
26930e4fbf01SChristian Hopps 	struct sk_buff_head list;
26946be02e3eSChristian Hopps 	struct skb_wseq *s, *se;
26950e4fbf01SChristian Hopps 	struct sk_buff *skb;
26964b3faf61SChristian Hopps 
26974b3faf61SChristian Hopps 	if (!xtfs)
26984b3faf61SChristian Hopps 		return;
26994b3faf61SChristian Hopps 
27000e4fbf01SChristian Hopps 	spin_lock_bh(&xtfs->x->lock);
27010e4fbf01SChristian Hopps 	hrtimer_cancel(&xtfs->iptfs_timer);
27020e4fbf01SChristian Hopps 	__skb_queue_head_init(&list);
27030e4fbf01SChristian Hopps 	skb_queue_splice_init(&xtfs->queue, &list);
27040e4fbf01SChristian Hopps 	spin_unlock_bh(&xtfs->x->lock);
27050e4fbf01SChristian Hopps 
27060e4fbf01SChristian Hopps 	while ((skb = __skb_dequeue(&list)))
27070e4fbf01SChristian Hopps 		kfree_skb(skb);
27080e4fbf01SChristian Hopps 
270907569476SChristian Hopps 	spin_lock_bh(&xtfs->drop_lock);
271007569476SChristian Hopps 	hrtimer_cancel(&xtfs->drop_timer);
271107569476SChristian Hopps 	spin_unlock_bh(&xtfs->drop_lock);
271207569476SChristian Hopps 
271307569476SChristian Hopps 	if (xtfs->ra_newskb)
271407569476SChristian Hopps 		kfree_skb(xtfs->ra_newskb);
271507569476SChristian Hopps 
27166be02e3eSChristian Hopps 	for (s = xtfs->w_saved, se = s + xtfs->w_savedlen; s < se; s++) {
27176be02e3eSChristian Hopps 		if (s->skb)
27186be02e3eSChristian Hopps 			kfree_skb(s->skb);
27196be02e3eSChristian Hopps 	}
27206be02e3eSChristian Hopps 
27216be02e3eSChristian Hopps 	kfree_sensitive(xtfs->w_saved);
27224b3faf61SChristian Hopps 	kfree_sensitive(xtfs);
27234b3faf61SChristian Hopps 
27244b3faf61SChristian Hopps 	module_put(x->mode_cbs->owner);
27254b3faf61SChristian Hopps }
27264b3faf61SChristian Hopps 
27274b3faf61SChristian Hopps static const struct xfrm_mode_cbs iptfs_mode_cbs = {
27284b3faf61SChristian Hopps 	.owner = THIS_MODULE,
27294b3faf61SChristian Hopps 	.init_state = iptfs_init_state,
27304b3faf61SChristian Hopps 	.clone_state = iptfs_clone_state,
27314b3faf61SChristian Hopps 	.destroy_state = iptfs_destroy_state,
27324b3faf61SChristian Hopps 	.user_init = iptfs_user_init,
27334b3faf61SChristian Hopps 	.copy_to_user = iptfs_copy_to_user,
27344b3faf61SChristian Hopps 	.sa_len = iptfs_sa_len,
27354b3faf61SChristian Hopps 	.get_inner_mtu = iptfs_get_inner_mtu,
27366c82d243SChristian Hopps 	.input = iptfs_input,
27370e4fbf01SChristian Hopps 	.output = iptfs_output_collect,
27380e4fbf01SChristian Hopps 	.prepare_output = iptfs_prepare_output,
27394b3faf61SChristian Hopps };
27404b3faf61SChristian Hopps 
27414b3faf61SChristian Hopps static int __init xfrm_iptfs_init(void)
27424b3faf61SChristian Hopps {
27434b3faf61SChristian Hopps 	int err;
27444b3faf61SChristian Hopps 
27454b3faf61SChristian Hopps 	pr_info("xfrm_iptfs: IPsec IP-TFS tunnel mode module\n");
27464b3faf61SChristian Hopps 
27474b3faf61SChristian Hopps 	err = xfrm_register_mode_cbs(XFRM_MODE_IPTFS, &iptfs_mode_cbs);
27484b3faf61SChristian Hopps 	if (err < 0)
27494b3faf61SChristian Hopps 		pr_info("%s: can't register IP-TFS\n", __func__);
27504b3faf61SChristian Hopps 
27514b3faf61SChristian Hopps 	return err;
27524b3faf61SChristian Hopps }
27534b3faf61SChristian Hopps 
27544b3faf61SChristian Hopps static void __exit xfrm_iptfs_fini(void)
27554b3faf61SChristian Hopps {
27564b3faf61SChristian Hopps 	xfrm_unregister_mode_cbs(XFRM_MODE_IPTFS);
27574b3faf61SChristian Hopps }
27584b3faf61SChristian Hopps 
27594b3faf61SChristian Hopps module_init(xfrm_iptfs_init);
27604b3faf61SChristian Hopps module_exit(xfrm_iptfs_fini);
27614b3faf61SChristian Hopps MODULE_LICENSE("GPL");
27624b3faf61SChristian Hopps MODULE_DESCRIPTION("IP-TFS support for xfrm ipsec tunnels");
2763