xref: /linux/net/xfrm/xfrm_nat_keepalive.c (revision 1b98f357dadd6ea613a435fbaef1a5dd7b35fd21)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * xfrm_nat_keepalive.c
4  *
5  * (c) 2024 Eyal Birger <eyal.birger@gmail.com>
6  */
7 
8 #include <net/inet_common.h>
9 #include <net/ip6_checksum.h>
10 #include <net/xfrm.h>
11 
12 static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv4) = {
13 	.bh_lock = INIT_LOCAL_LOCK(bh_lock),
14 };
15 #if IS_ENABLED(CONFIG_IPV6)
16 static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv6) = {
17 	.bh_lock = INIT_LOCAL_LOCK(bh_lock),
18 };
19 #endif
20 
21 struct nat_keepalive {
22 	struct net *net;
23 	u16 family;
24 	xfrm_address_t saddr;
25 	xfrm_address_t daddr;
26 	__be16 encap_sport;
27 	__be16 encap_dport;
28 	__u32 smark;
29 };
30 
31 static void nat_keepalive_init(struct nat_keepalive *ka, struct xfrm_state *x)
32 {
33 	ka->net = xs_net(x);
34 	ka->family = x->props.family;
35 	ka->saddr = x->props.saddr;
36 	ka->daddr = x->id.daddr;
37 	ka->encap_sport = x->encap->encap_sport;
38 	ka->encap_dport = x->encap->encap_dport;
39 	ka->smark = xfrm_smark_get(0, x);
40 }
41 
42 static int nat_keepalive_send_ipv4(struct sk_buff *skb,
43 				   struct nat_keepalive *ka)
44 {
45 	struct net *net = ka->net;
46 	struct flowi4 fl4;
47 	struct rtable *rt;
48 	struct sock *sk;
49 	__u8 tos = 0;
50 	int err;
51 
52 	flowi4_init_output(&fl4, 0 /* oif */, skb->mark, tos,
53 			   RT_SCOPE_UNIVERSE, IPPROTO_UDP, 0,
54 			   ka->daddr.a4, ka->saddr.a4, ka->encap_dport,
55 			   ka->encap_sport, sock_net_uid(net, NULL));
56 
57 	rt = ip_route_output_key(net, &fl4);
58 	if (IS_ERR(rt))
59 		return PTR_ERR(rt);
60 
61 	skb_dst_set(skb, &rt->dst);
62 
63 	local_lock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock);
64 	sk = this_cpu_read(nat_keepalive_sk_ipv4.sock);
65 	sock_net_set(sk, net);
66 	err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos);
67 	sock_net_set(sk, &init_net);
68 	local_unlock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock);
69 	return err;
70 }
71 
72 #if IS_ENABLED(CONFIG_IPV6)
73 static int nat_keepalive_send_ipv6(struct sk_buff *skb,
74 				   struct nat_keepalive *ka,
75 				   struct udphdr *uh)
76 {
77 	struct net *net = ka->net;
78 	struct dst_entry *dst;
79 	struct flowi6 fl6;
80 	struct sock *sk;
81 	__wsum csum;
82 	int err;
83 
84 	csum = skb_checksum(skb, 0, skb->len, 0);
85 	uh->check = csum_ipv6_magic(&ka->saddr.in6, &ka->daddr.in6,
86 				    skb->len, IPPROTO_UDP, csum);
87 	if (uh->check == 0)
88 		uh->check = CSUM_MANGLED_0;
89 
90 	memset(&fl6, 0, sizeof(fl6));
91 	fl6.flowi6_mark = skb->mark;
92 	fl6.saddr = ka->saddr.in6;
93 	fl6.daddr = ka->daddr.in6;
94 	fl6.flowi6_proto = IPPROTO_UDP;
95 	fl6.fl6_sport = ka->encap_sport;
96 	fl6.fl6_dport = ka->encap_dport;
97 
98 	local_lock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
99 	sk = this_cpu_read(nat_keepalive_sk_ipv6.sock);
100 	sock_net_set(sk, net);
101 	dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL);
102 	if (IS_ERR(dst)) {
103 		local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
104 		return PTR_ERR(dst);
105 	}
106 
107 	skb_dst_set(skb, dst);
108 	err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0);
109 	sock_net_set(sk, &init_net);
110 	local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
111 	return err;
112 }
113 #endif
114 
115 static void nat_keepalive_send(struct nat_keepalive *ka)
116 {
117 	const int nat_ka_hdrs_len = max(sizeof(struct iphdr),
118 					sizeof(struct ipv6hdr)) +
119 				    sizeof(struct udphdr);
120 	const u8 nat_ka_payload = 0xFF;
121 	int err = -EAFNOSUPPORT;
122 	struct sk_buff *skb;
123 	struct udphdr *uh;
124 
125 	skb = alloc_skb(nat_ka_hdrs_len + sizeof(nat_ka_payload), GFP_ATOMIC);
126 	if (unlikely(!skb))
127 		return;
128 
129 	skb_reserve(skb, nat_ka_hdrs_len);
130 
131 	skb_put_u8(skb, nat_ka_payload);
132 
133 	uh = skb_push(skb, sizeof(*uh));
134 	uh->source = ka->encap_sport;
135 	uh->dest = ka->encap_dport;
136 	uh->len = htons(skb->len);
137 	uh->check = 0;
138 
139 	skb->mark = ka->smark;
140 
141 	switch (ka->family) {
142 	case AF_INET:
143 		err = nat_keepalive_send_ipv4(skb, ka);
144 		break;
145 #if IS_ENABLED(CONFIG_IPV6)
146 	case AF_INET6:
147 		err = nat_keepalive_send_ipv6(skb, ka, uh);
148 		break;
149 #endif
150 	}
151 	if (err)
152 		kfree_skb(skb);
153 }
154 
155 struct nat_keepalive_work_ctx {
156 	time64_t next_run;
157 	time64_t now;
158 };
159 
160 static int nat_keepalive_work_single(struct xfrm_state *x, int count, void *ptr)
161 {
162 	struct nat_keepalive_work_ctx *ctx = ptr;
163 	bool send_keepalive = false;
164 	struct nat_keepalive ka;
165 	time64_t next_run;
166 	u32 interval;
167 	int delta;
168 
169 	interval = x->nat_keepalive_interval;
170 	if (!interval)
171 		return 0;
172 
173 	spin_lock(&x->lock);
174 
175 	delta = (int)(ctx->now - x->lastused);
176 	if (delta < interval) {
177 		x->nat_keepalive_expiration = ctx->now + interval - delta;
178 		next_run = x->nat_keepalive_expiration;
179 	} else if (x->nat_keepalive_expiration > ctx->now) {
180 		next_run = x->nat_keepalive_expiration;
181 	} else {
182 		next_run = ctx->now + interval;
183 		nat_keepalive_init(&ka, x);
184 		send_keepalive = true;
185 	}
186 
187 	spin_unlock(&x->lock);
188 
189 	if (send_keepalive)
190 		nat_keepalive_send(&ka);
191 
192 	if (!ctx->next_run || next_run < ctx->next_run)
193 		ctx->next_run = next_run;
194 	return 0;
195 }
196 
197 static void nat_keepalive_work(struct work_struct *work)
198 {
199 	struct nat_keepalive_work_ctx ctx;
200 	struct xfrm_state_walk walk;
201 	struct net *net;
202 
203 	ctx.next_run = 0;
204 	ctx.now = ktime_get_real_seconds();
205 
206 	net = container_of(work, struct net, xfrm.nat_keepalive_work.work);
207 	xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL);
208 	xfrm_state_walk(net, &walk, nat_keepalive_work_single, &ctx);
209 	xfrm_state_walk_done(&walk, net);
210 	if (ctx.next_run)
211 		schedule_delayed_work(&net->xfrm.nat_keepalive_work,
212 				      (ctx.next_run - ctx.now) * HZ);
213 }
214 
215 static int nat_keepalive_sk_init(struct sock_bh_locked __percpu *socks,
216 				 unsigned short family)
217 {
218 	struct sock *sk;
219 	int err, i;
220 
221 	for_each_possible_cpu(i) {
222 		err = inet_ctl_sock_create(&sk, family, SOCK_RAW, IPPROTO_UDP,
223 					   &init_net);
224 		if (err < 0)
225 			goto err;
226 
227 		per_cpu_ptr(socks, i)->sock = sk;
228 	}
229 
230 	return 0;
231 err:
232 	for_each_possible_cpu(i)
233 		inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock);
234 	return err;
235 }
236 
237 static void nat_keepalive_sk_fini(struct sock_bh_locked __percpu *socks)
238 {
239 	int i;
240 
241 	for_each_possible_cpu(i)
242 		inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock);
243 }
244 
245 void xfrm_nat_keepalive_state_updated(struct xfrm_state *x)
246 {
247 	struct net *net;
248 
249 	if (!x->nat_keepalive_interval)
250 		return;
251 
252 	net = xs_net(x);
253 	schedule_delayed_work(&net->xfrm.nat_keepalive_work, 0);
254 }
255 
256 int __net_init xfrm_nat_keepalive_net_init(struct net *net)
257 {
258 	INIT_DELAYED_WORK(&net->xfrm.nat_keepalive_work, nat_keepalive_work);
259 	return 0;
260 }
261 
262 int xfrm_nat_keepalive_net_fini(struct net *net)
263 {
264 	cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work);
265 	return 0;
266 }
267 
268 int xfrm_nat_keepalive_init(unsigned short family)
269 {
270 	int err = -EAFNOSUPPORT;
271 
272 	switch (family) {
273 	case AF_INET:
274 		err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET);
275 		break;
276 #if IS_ENABLED(CONFIG_IPV6)
277 	case AF_INET6:
278 		err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv6, PF_INET6);
279 		break;
280 #endif
281 	}
282 
283 	if (err)
284 		pr_err("xfrm nat keepalive init: failed to init err:%d\n", err);
285 	return err;
286 }
287 EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_init);
288 
289 void xfrm_nat_keepalive_fini(unsigned short family)
290 {
291 	switch (family) {
292 	case AF_INET:
293 		nat_keepalive_sk_fini(&nat_keepalive_sk_ipv4);
294 		break;
295 #if IS_ENABLED(CONFIG_IPV6)
296 	case AF_INET6:
297 		nat_keepalive_sk_fini(&nat_keepalive_sk_ipv6);
298 		break;
299 #endif
300 	}
301 }
302 EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_fini);
303