1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip_vs_app.c: Application module support for IPVS 4 * 5 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 6 * 7 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference 8 * is that ip_vs_app module handles the reverse direction (incoming requests 9 * and outgoing responses). 10 * 11 * IP_MASQ_APP application masquerading module 12 * 13 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> 14 */ 15 16 #define pr_fmt(fmt) "IPVS: " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/skbuff.h> 21 #include <linux/in.h> 22 #include <linux/ip.h> 23 #include <linux/netfilter.h> 24 #include <linux/slab.h> 25 #include <net/net_namespace.h> 26 #include <net/protocol.h> 27 #include <net/tcp.h> 28 #include <linux/stat.h> 29 #include <linux/proc_fs.h> 30 #include <linux/seq_file.h> 31 #include <linux/mutex.h> 32 33 #include <net/ip_vs.h> 34 35 EXPORT_SYMBOL(register_ip_vs_app); 36 EXPORT_SYMBOL(unregister_ip_vs_app); 37 EXPORT_SYMBOL(register_ip_vs_app_inc); 38 39 static DEFINE_MUTEX(__ip_vs_app_mutex); 40 41 /* 42 * Get an ip_vs_app object 43 */ 44 static inline int ip_vs_app_get(struct ip_vs_app *app) 45 { 46 return try_module_get(app->module); 47 } 48 49 50 static inline void ip_vs_app_put(struct ip_vs_app *app) 51 { 52 module_put(app->module); 53 } 54 55 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) 56 { 57 kfree(inc->timeout_table); 58 kfree(inc); 59 } 60 61 static void ip_vs_app_inc_rcu_free(struct rcu_head *head) 62 { 63 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); 64 65 ip_vs_app_inc_destroy(inc); 66 } 67 68 /* 69 * Allocate/initialize app incarnation and register it in proto apps. 70 */ 71 static int 72 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 73 __u16 port) 74 { 75 struct ip_vs_protocol *pp; 76 struct ip_vs_app *inc; 77 int ret; 78 79 if (!(pp = ip_vs_proto_get(proto))) 80 return -EPROTONOSUPPORT; 81 82 if (!pp->unregister_app) 83 return -EOPNOTSUPP; 84 85 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); 86 if (!inc) 87 return -ENOMEM; 88 INIT_LIST_HEAD(&inc->p_list); 89 INIT_LIST_HEAD(&inc->incs_list); 90 inc->app = app; 91 inc->port = htons(port); 92 atomic_set(&inc->usecnt, 0); 93 94 if (app->timeouts) { 95 inc->timeout_table = 96 ip_vs_create_timeout_table(app->timeouts, 97 app->timeouts_size); 98 if (!inc->timeout_table) { 99 ret = -ENOMEM; 100 goto out; 101 } 102 } 103 104 ret = pp->register_app(ipvs, inc); 105 if (ret) 106 goto out; 107 108 list_add(&inc->a_list, &app->incs_list); 109 IP_VS_DBG(9, "%s App %s:%u registered\n", 110 pp->name, inc->name, ntohs(inc->port)); 111 112 return 0; 113 114 out: 115 ip_vs_app_inc_destroy(inc); 116 return ret; 117 } 118 119 120 /* 121 * Release app incarnation 122 */ 123 static void 124 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) 125 { 126 struct ip_vs_protocol *pp; 127 128 if (!(pp = ip_vs_proto_get(inc->protocol))) 129 return; 130 131 if (pp->unregister_app) 132 pp->unregister_app(ipvs, inc); 133 134 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 135 pp->name, inc->name, ntohs(inc->port)); 136 137 list_del(&inc->a_list); 138 139 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); 140 } 141 142 143 /* 144 * Get reference to app inc (only called from softirq) 145 * 146 */ 147 int ip_vs_app_inc_get(struct ip_vs_app *inc) 148 { 149 int result; 150 151 result = ip_vs_app_get(inc->app); 152 if (result) 153 atomic_inc(&inc->usecnt); 154 return result; 155 } 156 157 158 /* 159 * Put the app inc (only called from timer or net softirq) 160 */ 161 void ip_vs_app_inc_put(struct ip_vs_app *inc) 162 { 163 atomic_dec(&inc->usecnt); 164 ip_vs_app_put(inc->app); 165 } 166 167 168 /* 169 * Register an application incarnation in protocol applications 170 */ 171 int 172 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 173 __u16 port) 174 { 175 int result; 176 177 mutex_lock(&__ip_vs_app_mutex); 178 179 result = ip_vs_app_inc_new(ipvs, app, proto, port); 180 181 mutex_unlock(&__ip_vs_app_mutex); 182 183 return result; 184 } 185 186 187 /* Register application for netns */ 188 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 189 { 190 struct ip_vs_app *a; 191 int err = 0; 192 193 mutex_lock(&__ip_vs_app_mutex); 194 195 /* increase the module use count */ 196 if (!ip_vs_use_count_inc()) { 197 err = -ENOENT; 198 goto out_unlock; 199 } 200 201 list_for_each_entry(a, &ipvs->app_list, a_list) { 202 if (!strcmp(app->name, a->name)) { 203 err = -EEXIST; 204 /* decrease the module use count */ 205 ip_vs_use_count_dec(); 206 goto out_unlock; 207 } 208 } 209 a = kmemdup(app, sizeof(*app), GFP_KERNEL); 210 if (!a) { 211 err = -ENOMEM; 212 /* decrease the module use count */ 213 ip_vs_use_count_dec(); 214 goto out_unlock; 215 } 216 INIT_LIST_HEAD(&a->incs_list); 217 list_add(&a->a_list, &ipvs->app_list); 218 219 out_unlock: 220 mutex_unlock(&__ip_vs_app_mutex); 221 222 return err ? ERR_PTR(err) : a; 223 } 224 225 226 /* 227 * ip_vs_app unregistration routine 228 * We are sure there are no app incarnations attached to services 229 * Caller should use synchronize_rcu() or rcu_barrier() 230 */ 231 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 232 { 233 struct ip_vs_app *a, *anxt, *inc, *nxt; 234 235 mutex_lock(&__ip_vs_app_mutex); 236 237 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { 238 if (app && strcmp(app->name, a->name)) 239 continue; 240 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { 241 ip_vs_app_inc_release(ipvs, inc); 242 } 243 244 list_del(&a->a_list); 245 kfree(a); 246 247 /* decrease the module use count */ 248 ip_vs_use_count_dec(); 249 } 250 251 mutex_unlock(&__ip_vs_app_mutex); 252 } 253 254 255 /* 256 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 257 */ 258 int ip_vs_bind_app(struct ip_vs_conn *cp, 259 struct ip_vs_protocol *pp) 260 { 261 return pp->app_conn_bind(cp); 262 } 263 264 265 /* 266 * Unbind cp from application incarnation (called by cp destructor) 267 */ 268 void ip_vs_unbind_app(struct ip_vs_conn *cp) 269 { 270 struct ip_vs_app *inc = cp->app; 271 272 if (!inc) 273 return; 274 275 if (inc->unbind_conn) 276 inc->unbind_conn(inc, cp); 277 if (inc->done_conn) 278 inc->done_conn(inc, cp); 279 ip_vs_app_inc_put(inc); 280 cp->app = NULL; 281 } 282 283 284 /* 285 * Fixes th->seq based on ip_vs_seq info. 286 */ 287 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 288 { 289 __u32 seq = ntohl(th->seq); 290 291 /* 292 * Adjust seq with delta-offset for all packets after 293 * the most recent resized pkt seq and with previous_delta offset 294 * for all packets before most recent resized pkt seq. 295 */ 296 if (vseq->delta || vseq->previous_delta) { 297 if(after(seq, vseq->init_seq)) { 298 th->seq = htonl(seq + vseq->delta); 299 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", 300 __func__, vseq->delta); 301 } else { 302 th->seq = htonl(seq + vseq->previous_delta); 303 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", 304 __func__, vseq->previous_delta); 305 } 306 } 307 } 308 309 310 /* 311 * Fixes th->ack_seq based on ip_vs_seq info. 312 */ 313 static inline void 314 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 315 { 316 __u32 ack_seq = ntohl(th->ack_seq); 317 318 /* 319 * Adjust ack_seq with delta-offset for 320 * the packets AFTER most recent resized pkt has caused a shift 321 * for packets before most recent resized pkt, use previous_delta 322 */ 323 if (vseq->delta || vseq->previous_delta) { 324 /* since ack_seq is the number of octet that is expected 325 to receive next, so compare it with init_seq+delta */ 326 if(after(ack_seq, vseq->init_seq+vseq->delta)) { 327 th->ack_seq = htonl(ack_seq - vseq->delta); 328 IP_VS_DBG(9, "%s(): subtracted delta " 329 "(%d) from ack_seq\n", __func__, vseq->delta); 330 331 } else { 332 th->ack_seq = htonl(ack_seq - vseq->previous_delta); 333 IP_VS_DBG(9, "%s(): subtracted " 334 "previous_delta (%d) from ack_seq\n", 335 __func__, vseq->previous_delta); 336 } 337 } 338 } 339 340 341 /* 342 * Updates ip_vs_seq if pkt has been resized 343 * Assumes already checked proto==IPPROTO_TCP and diff!=0. 344 */ 345 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, 346 unsigned int flag, __u32 seq, int diff) 347 { 348 /* spinlock is to keep updating cp->flags atomic */ 349 spin_lock_bh(&cp->lock); 350 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 351 vseq->previous_delta = vseq->delta; 352 vseq->delta += diff; 353 vseq->init_seq = seq; 354 cp->flags |= flag; 355 } 356 spin_unlock_bh(&cp->lock); 357 } 358 359 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 360 struct ip_vs_app *app, 361 struct ip_vs_iphdr *ipvsh) 362 { 363 int diff; 364 const unsigned int tcp_offset = ip_hdrlen(skb); 365 struct tcphdr *th; 366 __u32 seq; 367 368 if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) 369 return 0; 370 371 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 372 373 /* 374 * Remember seq number in case this pkt gets resized 375 */ 376 seq = ntohl(th->seq); 377 378 /* 379 * Fix seq stuff if flagged as so. 380 */ 381 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 382 vs_fix_seq(&cp->out_seq, th); 383 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 384 vs_fix_ack_seq(&cp->in_seq, th); 385 386 /* 387 * Call private output hook function 388 */ 389 if (app->pkt_out == NULL) 390 return 1; 391 392 if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) 393 return 0; 394 395 /* 396 * Update ip_vs seq stuff if len has changed. 397 */ 398 if (diff != 0) 399 vs_seq_update(cp, &cp->out_seq, 400 IP_VS_CONN_F_OUT_SEQ, seq, diff); 401 402 return 1; 403 } 404 405 /* 406 * Output pkt hook. Will call bound ip_vs_app specific function 407 * called by ipvs packet handler, assumes previously checked cp!=NULL 408 * returns false if it can't handle packet (oom) 409 */ 410 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 411 struct ip_vs_iphdr *ipvsh) 412 { 413 struct ip_vs_app *app; 414 415 /* 416 * check if application module is bound to 417 * this ip_vs_conn. 418 */ 419 if ((app = cp->app) == NULL) 420 return 1; 421 422 /* TCP is complicated */ 423 if (cp->protocol == IPPROTO_TCP) 424 return app_tcp_pkt_out(cp, skb, app, ipvsh); 425 426 /* 427 * Call private output hook function 428 */ 429 if (app->pkt_out == NULL) 430 return 1; 431 432 return app->pkt_out(app, cp, skb, NULL, ipvsh); 433 } 434 435 436 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 437 struct ip_vs_app *app, 438 struct ip_vs_iphdr *ipvsh) 439 { 440 int diff; 441 const unsigned int tcp_offset = ip_hdrlen(skb); 442 struct tcphdr *th; 443 __u32 seq; 444 445 if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) 446 return 0; 447 448 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 449 450 /* 451 * Remember seq number in case this pkt gets resized 452 */ 453 seq = ntohl(th->seq); 454 455 /* 456 * Fix seq stuff if flagged as so. 457 */ 458 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 459 vs_fix_seq(&cp->in_seq, th); 460 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 461 vs_fix_ack_seq(&cp->out_seq, th); 462 463 /* 464 * Call private input hook function 465 */ 466 if (app->pkt_in == NULL) 467 return 1; 468 469 if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) 470 return 0; 471 472 /* 473 * Update ip_vs seq stuff if len has changed. 474 */ 475 if (diff != 0) 476 vs_seq_update(cp, &cp->in_seq, 477 IP_VS_CONN_F_IN_SEQ, seq, diff); 478 479 return 1; 480 } 481 482 /* 483 * Input pkt hook. Will call bound ip_vs_app specific function 484 * called by ipvs packet handler, assumes previously checked cp!=NULL. 485 * returns false if can't handle packet (oom). 486 */ 487 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 488 struct ip_vs_iphdr *ipvsh) 489 { 490 struct ip_vs_app *app; 491 492 /* 493 * check if application module is bound to 494 * this ip_vs_conn. 495 */ 496 if ((app = cp->app) == NULL) 497 return 1; 498 499 /* TCP is complicated */ 500 if (cp->protocol == IPPROTO_TCP) 501 return app_tcp_pkt_in(cp, skb, app, ipvsh); 502 503 /* 504 * Call private input hook function 505 */ 506 if (app->pkt_in == NULL) 507 return 1; 508 509 return app->pkt_in(app, cp, skb, NULL, ipvsh); 510 } 511 512 513 #ifdef CONFIG_PROC_FS 514 /* 515 * /proc/net/ip_vs_app entry function 516 */ 517 518 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 519 { 520 struct ip_vs_app *app, *inc; 521 522 list_for_each_entry(app, &ipvs->app_list, a_list) { 523 list_for_each_entry(inc, &app->incs_list, a_list) { 524 if (pos-- == 0) 525 return inc; 526 } 527 } 528 return NULL; 529 530 } 531 532 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 533 { 534 struct net *net = seq_file_net(seq); 535 struct netns_ipvs *ipvs = net_ipvs(net); 536 537 mutex_lock(&__ip_vs_app_mutex); 538 539 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 540 } 541 542 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 543 { 544 struct ip_vs_app *inc, *app; 545 struct list_head *e; 546 struct net *net = seq_file_net(seq); 547 struct netns_ipvs *ipvs = net_ipvs(net); 548 549 ++*pos; 550 if (v == SEQ_START_TOKEN) 551 return ip_vs_app_idx(ipvs, 0); 552 553 inc = v; 554 app = inc->app; 555 556 if ((e = inc->a_list.next) != &app->incs_list) 557 return list_entry(e, struct ip_vs_app, a_list); 558 559 /* go on to next application */ 560 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 561 app = list_entry(e, struct ip_vs_app, a_list); 562 list_for_each_entry(inc, &app->incs_list, a_list) { 563 return inc; 564 } 565 } 566 return NULL; 567 } 568 569 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 570 { 571 mutex_unlock(&__ip_vs_app_mutex); 572 } 573 574 static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 575 { 576 if (v == SEQ_START_TOKEN) 577 seq_puts(seq, "prot port usecnt name\n"); 578 else { 579 const struct ip_vs_app *inc = v; 580 581 seq_printf(seq, "%-3s %-7u %-6d %-17s\n", 582 ip_vs_proto_name(inc->protocol), 583 ntohs(inc->port), 584 atomic_read(&inc->usecnt), 585 inc->name); 586 } 587 return 0; 588 } 589 590 static const struct seq_operations ip_vs_app_seq_ops = { 591 .start = ip_vs_app_seq_start, 592 .next = ip_vs_app_seq_next, 593 .stop = ip_vs_app_seq_stop, 594 .show = ip_vs_app_seq_show, 595 }; 596 #endif 597 598 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) 599 { 600 INIT_LIST_HEAD(&ipvs->app_list); 601 #ifdef CONFIG_PROC_FS 602 if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, 603 &ip_vs_app_seq_ops, 604 sizeof(struct seq_net_private))) 605 return -ENOMEM; 606 #endif 607 return 0; 608 } 609 610 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) 611 { 612 unregister_ip_vs_app(ipvs, NULL /* all */); 613 #ifdef CONFIG_PROC_FS 614 remove_proc_entry("ip_vs_app", ipvs->net->proc_net); 615 #endif 616 } 617