xref: /linux/net/netfilter/ipvs/ip_vs_app.c (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * ip_vs_app.c: Application module support for IPVS
4  *
5  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
6  *
7  * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
8  * is that ip_vs_app module handles the reverse direction (incoming requests
9  * and outgoing responses).
10  *
11  *		IP_MASQ_APP application masquerading module
12  *
13  * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
14  */
15 
16 #define pr_fmt(fmt) "IPVS: " fmt
17 
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <linux/in.h>
22 #include <linux/ip.h>
23 #include <linux/netfilter.h>
24 #include <linux/slab.h>
25 #include <net/net_namespace.h>
26 #include <net/protocol.h>
27 #include <net/tcp.h>
28 #include <linux/stat.h>
29 #include <linux/proc_fs.h>
30 #include <linux/seq_file.h>
31 #include <linux/mutex.h>
32 
33 #include <net/ip_vs.h>
34 
35 EXPORT_SYMBOL(register_ip_vs_app);
36 EXPORT_SYMBOL(unregister_ip_vs_app);
37 EXPORT_SYMBOL(register_ip_vs_app_inc);
38 
39 static DEFINE_MUTEX(__ip_vs_app_mutex);
40 
41 /*
42  *	Get an ip_vs_app object
43  */
44 static inline int ip_vs_app_get(struct ip_vs_app *app)
45 {
46 	return try_module_get(app->module);
47 }
48 
49 
50 static inline void ip_vs_app_put(struct ip_vs_app *app)
51 {
52 	module_put(app->module);
53 }
54 
55 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
56 {
57 	kfree(inc->timeout_table);
58 	kfree(inc);
59 }
60 
61 static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
62 {
63 	struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
64 
65 	ip_vs_app_inc_destroy(inc);
66 }
67 
68 /*
69  *	Allocate/initialize app incarnation and register it in proto apps.
70  */
71 static int
72 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
73 		  __u16 port)
74 {
75 	struct ip_vs_protocol *pp;
76 	struct ip_vs_app *inc;
77 	int ret;
78 
79 	if (!(pp = ip_vs_proto_get(proto)))
80 		return -EPROTONOSUPPORT;
81 
82 	if (!pp->unregister_app)
83 		return -EOPNOTSUPP;
84 
85 	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
86 	if (!inc)
87 		return -ENOMEM;
88 	INIT_LIST_HEAD(&inc->p_list);
89 	INIT_LIST_HEAD(&inc->incs_list);
90 	inc->app = app;
91 	inc->port = htons(port);
92 	atomic_set(&inc->usecnt, 0);
93 
94 	if (app->timeouts) {
95 		inc->timeout_table =
96 			ip_vs_create_timeout_table(app->timeouts,
97 						   app->timeouts_size);
98 		if (!inc->timeout_table) {
99 			ret = -ENOMEM;
100 			goto out;
101 		}
102 	}
103 
104 	ret = pp->register_app(ipvs, inc);
105 	if (ret)
106 		goto out;
107 
108 	list_add(&inc->a_list, &app->incs_list);
109 	IP_VS_DBG(9, "%s App %s:%u registered\n",
110 		  pp->name, inc->name, ntohs(inc->port));
111 
112 	return 0;
113 
114   out:
115 	ip_vs_app_inc_destroy(inc);
116 	return ret;
117 }
118 
119 
120 /*
121  *	Release app incarnation
122  */
123 static void
124 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
125 {
126 	struct ip_vs_protocol *pp;
127 
128 	if (!(pp = ip_vs_proto_get(inc->protocol)))
129 		return;
130 
131 	if (pp->unregister_app)
132 		pp->unregister_app(ipvs, inc);
133 
134 	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
135 		  pp->name, inc->name, ntohs(inc->port));
136 
137 	list_del(&inc->a_list);
138 
139 	call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
140 }
141 
142 
143 /*
144  *	Get reference to app inc (only called from softirq)
145  *
146  */
147 int ip_vs_app_inc_get(struct ip_vs_app *inc)
148 {
149 	int result;
150 
151 	result = ip_vs_app_get(inc->app);
152 	if (result)
153 		atomic_inc(&inc->usecnt);
154 	return result;
155 }
156 
157 
158 /*
159  *	Put the app inc (only called from timer or net softirq)
160  */
161 void ip_vs_app_inc_put(struct ip_vs_app *inc)
162 {
163 	atomic_dec(&inc->usecnt);
164 	ip_vs_app_put(inc->app);
165 }
166 
167 
168 /*
169  *	Register an application incarnation in protocol applications
170  */
171 int
172 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
173 		       __u16 port)
174 {
175 	int result;
176 
177 	mutex_lock(&__ip_vs_app_mutex);
178 
179 	result = ip_vs_app_inc_new(ipvs, app, proto, port);
180 
181 	mutex_unlock(&__ip_vs_app_mutex);
182 
183 	return result;
184 }
185 
186 
187 /* Register application for netns */
188 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
189 {
190 	struct ip_vs_app *a;
191 	int err = 0;
192 
193 	mutex_lock(&__ip_vs_app_mutex);
194 
195 	/* increase the module use count */
196 	if (!ip_vs_use_count_inc()) {
197 		err = -ENOENT;
198 		goto out_unlock;
199 	}
200 
201 	list_for_each_entry(a, &ipvs->app_list, a_list) {
202 		if (!strcmp(app->name, a->name)) {
203 			err = -EEXIST;
204 			/* decrease the module use count */
205 			ip_vs_use_count_dec();
206 			goto out_unlock;
207 		}
208 	}
209 	a = kmemdup(app, sizeof(*app), GFP_KERNEL);
210 	if (!a) {
211 		err = -ENOMEM;
212 		/* decrease the module use count */
213 		ip_vs_use_count_dec();
214 		goto out_unlock;
215 	}
216 	INIT_LIST_HEAD(&a->incs_list);
217 	list_add(&a->a_list, &ipvs->app_list);
218 
219 out_unlock:
220 	mutex_unlock(&__ip_vs_app_mutex);
221 
222 	return err ? ERR_PTR(err) : a;
223 }
224 
225 
226 /*
227  *	ip_vs_app unregistration routine
228  *	We are sure there are no app incarnations attached to services
229  *	Caller should use synchronize_rcu() or rcu_barrier()
230  */
231 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
232 {
233 	struct ip_vs_app *a, *anxt, *inc, *nxt;
234 
235 	mutex_lock(&__ip_vs_app_mutex);
236 
237 	list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
238 		if (app && strcmp(app->name, a->name))
239 			continue;
240 		list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
241 			ip_vs_app_inc_release(ipvs, inc);
242 		}
243 
244 		list_del(&a->a_list);
245 		kfree(a);
246 
247 		/* decrease the module use count */
248 		ip_vs_use_count_dec();
249 	}
250 
251 	mutex_unlock(&__ip_vs_app_mutex);
252 }
253 
254 
255 /*
256  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
257  */
258 int ip_vs_bind_app(struct ip_vs_conn *cp,
259 		   struct ip_vs_protocol *pp)
260 {
261 	return pp->app_conn_bind(cp);
262 }
263 
264 
265 /*
266  *	Unbind cp from application incarnation (called by cp destructor)
267  */
268 void ip_vs_unbind_app(struct ip_vs_conn *cp)
269 {
270 	struct ip_vs_app *inc = cp->app;
271 
272 	if (!inc)
273 		return;
274 
275 	if (inc->unbind_conn)
276 		inc->unbind_conn(inc, cp);
277 	if (inc->done_conn)
278 		inc->done_conn(inc, cp);
279 	ip_vs_app_inc_put(inc);
280 	cp->app = NULL;
281 }
282 
283 
284 /*
285  *	Fixes th->seq based on ip_vs_seq info.
286  */
287 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
288 {
289 	__u32 seq = ntohl(th->seq);
290 
291 	/*
292 	 *	Adjust seq with delta-offset for all packets after
293 	 *	the most recent resized pkt seq and with previous_delta offset
294 	 *	for all packets	before most recent resized pkt seq.
295 	 */
296 	if (vseq->delta || vseq->previous_delta) {
297 		if(after(seq, vseq->init_seq)) {
298 			th->seq = htonl(seq + vseq->delta);
299 			IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
300 				  __func__, vseq->delta);
301 		} else {
302 			th->seq = htonl(seq + vseq->previous_delta);
303 			IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
304 				  __func__, vseq->previous_delta);
305 		}
306 	}
307 }
308 
309 
310 /*
311  *	Fixes th->ack_seq based on ip_vs_seq info.
312  */
313 static inline void
314 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
315 {
316 	__u32 ack_seq = ntohl(th->ack_seq);
317 
318 	/*
319 	 * Adjust ack_seq with delta-offset for
320 	 * the packets AFTER most recent resized pkt has caused a shift
321 	 * for packets before most recent resized pkt, use previous_delta
322 	 */
323 	if (vseq->delta || vseq->previous_delta) {
324 		/* since ack_seq is the number of octet that is expected
325 		   to receive next, so compare it with init_seq+delta */
326 		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
327 			th->ack_seq = htonl(ack_seq - vseq->delta);
328 			IP_VS_DBG(9, "%s(): subtracted delta "
329 				  "(%d) from ack_seq\n", __func__, vseq->delta);
330 
331 		} else {
332 			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
333 			IP_VS_DBG(9, "%s(): subtracted "
334 				  "previous_delta (%d) from ack_seq\n",
335 				  __func__, vseq->previous_delta);
336 		}
337 	}
338 }
339 
340 
341 /*
342  *	Updates ip_vs_seq if pkt has been resized
343  *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
344  */
345 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
346 				 unsigned int flag, __u32 seq, int diff)
347 {
348 	/* spinlock is to keep updating cp->flags atomic */
349 	spin_lock_bh(&cp->lock);
350 	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
351 		vseq->previous_delta = vseq->delta;
352 		vseq->delta += diff;
353 		vseq->init_seq = seq;
354 		cp->flags |= flag;
355 	}
356 	spin_unlock_bh(&cp->lock);
357 }
358 
359 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
360 				  struct ip_vs_app *app,
361 				  struct ip_vs_iphdr *ipvsh)
362 {
363 	int diff;
364 	const unsigned int tcp_offset = ip_hdrlen(skb);
365 	struct tcphdr *th;
366 	__u32 seq;
367 
368 	if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
369 		return 0;
370 
371 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
372 
373 	/*
374 	 *	Remember seq number in case this pkt gets resized
375 	 */
376 	seq = ntohl(th->seq);
377 
378 	/*
379 	 *	Fix seq stuff if flagged as so.
380 	 */
381 	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
382 		vs_fix_seq(&cp->out_seq, th);
383 	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
384 		vs_fix_ack_seq(&cp->in_seq, th);
385 
386 	/*
387 	 *	Call private output hook function
388 	 */
389 	if (app->pkt_out == NULL)
390 		return 1;
391 
392 	if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
393 		return 0;
394 
395 	/*
396 	 *	Update ip_vs seq stuff if len has changed.
397 	 */
398 	if (diff != 0)
399 		vs_seq_update(cp, &cp->out_seq,
400 			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
401 
402 	return 1;
403 }
404 
405 /*
406  *	Output pkt hook. Will call bound ip_vs_app specific function
407  *	called by ipvs packet handler, assumes previously checked cp!=NULL
408  *	returns false if it can't handle packet (oom)
409  */
410 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
411 		      struct ip_vs_iphdr *ipvsh)
412 {
413 	struct ip_vs_app *app;
414 
415 	/*
416 	 *	check if application module is bound to
417 	 *	this ip_vs_conn.
418 	 */
419 	if ((app = cp->app) == NULL)
420 		return 1;
421 
422 	/* TCP is complicated */
423 	if (cp->protocol == IPPROTO_TCP)
424 		return app_tcp_pkt_out(cp, skb, app, ipvsh);
425 
426 	/*
427 	 *	Call private output hook function
428 	 */
429 	if (app->pkt_out == NULL)
430 		return 1;
431 
432 	return app->pkt_out(app, cp, skb, NULL, ipvsh);
433 }
434 
435 
436 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
437 				 struct ip_vs_app *app,
438 				 struct ip_vs_iphdr *ipvsh)
439 {
440 	int diff;
441 	const unsigned int tcp_offset = ip_hdrlen(skb);
442 	struct tcphdr *th;
443 	__u32 seq;
444 
445 	if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
446 		return 0;
447 
448 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
449 
450 	/*
451 	 *	Remember seq number in case this pkt gets resized
452 	 */
453 	seq = ntohl(th->seq);
454 
455 	/*
456 	 *	Fix seq stuff if flagged as so.
457 	 */
458 	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
459 		vs_fix_seq(&cp->in_seq, th);
460 	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
461 		vs_fix_ack_seq(&cp->out_seq, th);
462 
463 	/*
464 	 *	Call private input hook function
465 	 */
466 	if (app->pkt_in == NULL)
467 		return 1;
468 
469 	if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
470 		return 0;
471 
472 	/*
473 	 *	Update ip_vs seq stuff if len has changed.
474 	 */
475 	if (diff != 0)
476 		vs_seq_update(cp, &cp->in_seq,
477 			      IP_VS_CONN_F_IN_SEQ, seq, diff);
478 
479 	return 1;
480 }
481 
482 /*
483  *	Input pkt hook. Will call bound ip_vs_app specific function
484  *	called by ipvs packet handler, assumes previously checked cp!=NULL.
485  *	returns false if can't handle packet (oom).
486  */
487 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
488 		     struct ip_vs_iphdr *ipvsh)
489 {
490 	struct ip_vs_app *app;
491 
492 	/*
493 	 *	check if application module is bound to
494 	 *	this ip_vs_conn.
495 	 */
496 	if ((app = cp->app) == NULL)
497 		return 1;
498 
499 	/* TCP is complicated */
500 	if (cp->protocol == IPPROTO_TCP)
501 		return app_tcp_pkt_in(cp, skb, app, ipvsh);
502 
503 	/*
504 	 *	Call private input hook function
505 	 */
506 	if (app->pkt_in == NULL)
507 		return 1;
508 
509 	return app->pkt_in(app, cp, skb, NULL, ipvsh);
510 }
511 
512 
513 #ifdef CONFIG_PROC_FS
514 /*
515  *	/proc/net/ip_vs_app entry function
516  */
517 
518 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
519 {
520 	struct ip_vs_app *app, *inc;
521 
522 	list_for_each_entry(app, &ipvs->app_list, a_list) {
523 		list_for_each_entry(inc, &app->incs_list, a_list) {
524 			if (pos-- == 0)
525 				return inc;
526 		}
527 	}
528 	return NULL;
529 
530 }
531 
532 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
533 {
534 	struct net *net = seq_file_net(seq);
535 	struct netns_ipvs *ipvs = net_ipvs(net);
536 
537 	mutex_lock(&__ip_vs_app_mutex);
538 
539 	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
540 }
541 
542 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
543 {
544 	struct ip_vs_app *inc, *app;
545 	struct list_head *e;
546 	struct net *net = seq_file_net(seq);
547 	struct netns_ipvs *ipvs = net_ipvs(net);
548 
549 	++*pos;
550 	if (v == SEQ_START_TOKEN)
551 		return ip_vs_app_idx(ipvs, 0);
552 
553 	inc = v;
554 	app = inc->app;
555 
556 	if ((e = inc->a_list.next) != &app->incs_list)
557 		return list_entry(e, struct ip_vs_app, a_list);
558 
559 	/* go on to next application */
560 	for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
561 		app = list_entry(e, struct ip_vs_app, a_list);
562 		list_for_each_entry(inc, &app->incs_list, a_list) {
563 			return inc;
564 		}
565 	}
566 	return NULL;
567 }
568 
569 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
570 {
571 	mutex_unlock(&__ip_vs_app_mutex);
572 }
573 
574 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
575 {
576 	if (v == SEQ_START_TOKEN)
577 		seq_puts(seq, "prot port    usecnt name\n");
578 	else {
579 		const struct ip_vs_app *inc = v;
580 
581 		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
582 			   ip_vs_proto_name(inc->protocol),
583 			   ntohs(inc->port),
584 			   atomic_read(&inc->usecnt),
585 			   inc->name);
586 	}
587 	return 0;
588 }
589 
590 static const struct seq_operations ip_vs_app_seq_ops = {
591 	.start = ip_vs_app_seq_start,
592 	.next  = ip_vs_app_seq_next,
593 	.stop  = ip_vs_app_seq_stop,
594 	.show  = ip_vs_app_seq_show,
595 };
596 #endif
597 
598 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
599 {
600 	INIT_LIST_HEAD(&ipvs->app_list);
601 #ifdef CONFIG_PROC_FS
602 	if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net,
603 			     &ip_vs_app_seq_ops,
604 			     sizeof(struct seq_net_private)))
605 		return -ENOMEM;
606 #endif
607 	return 0;
608 }
609 
610 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
611 {
612 	unregister_ip_vs_app(ipvs, NULL /* all */);
613 #ifdef CONFIG_PROC_FS
614 	remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
615 #endif
616 }
617