1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * ip_vs_proto.c: transport protocol load balancing support for IPVS
4 *
5 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
6 * Julian Anastasov <ja@ssi.bg>
7 *
8 * Changes:
9 */
10
11 #define pr_fmt(fmt) "IPVS: " fmt
12
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/skbuff.h>
16 #include <linux/gfp.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <net/protocol.h>
20 #include <net/tcp.h>
21 #include <net/udp.h>
22 #include <linux/stat.h>
23 #include <linux/proc_fs.h>
24
25 #include <net/ip_vs.h>
26
27
28 /*
29 * IPVS protocols can only be registered/unregistered when the ipvs
30 * module is loaded/unloaded, so no lock is needed in accessing the
31 * ipvs protocol table.
32 */
33
34 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
35 #define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1))
36
37 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
38
39 /* States for conn templates: NONE or words separated with ",", max 15 chars */
40 static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
41 [IP_VS_CTPL_S_NONE] = "NONE",
42 [IP_VS_CTPL_S_ASSURED] = "ASSURED",
43 };
44
45 /*
46 * register an ipvs protocol
47 */
register_ip_vs_protocol(struct ip_vs_protocol * pp)48 static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
49 {
50 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
51
52 pp->next = ip_vs_proto_table[hash];
53 ip_vs_proto_table[hash] = pp;
54
55 if (pp->init != NULL)
56 pp->init(pp);
57
58 return 0;
59 }
60
61 /*
62 * register an ipvs protocols netns related data
63 */
64 static int
register_ip_vs_proto_netns(struct netns_ipvs * ipvs,struct ip_vs_protocol * pp)65 register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp)
66 {
67 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
68 struct ip_vs_proto_data *pd =
69 kzalloc_obj(struct ip_vs_proto_data);
70
71 if (!pd)
72 return -ENOMEM;
73
74 pd->pp = pp; /* For speed issues */
75 pd->next = ipvs->proto_data_table[hash];
76 ipvs->proto_data_table[hash] = pd;
77 atomic_set(&pd->appcnt, 0); /* Init app counter */
78
79 if (pp->init_netns != NULL) {
80 int ret = pp->init_netns(ipvs, pd);
81 if (ret) {
82 /* unlink an free proto data */
83 ipvs->proto_data_table[hash] = pd->next;
84 kfree(pd);
85 return ret;
86 }
87 }
88
89 return 0;
90 }
91
92 /*
93 * unregister an ipvs protocol
94 */
unregister_ip_vs_protocol(struct ip_vs_protocol * pp)95 static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
96 {
97 struct ip_vs_protocol **pp_p;
98 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
99
100 pp_p = &ip_vs_proto_table[hash];
101 for (; *pp_p; pp_p = &(*pp_p)->next) {
102 if (*pp_p == pp) {
103 *pp_p = pp->next;
104 if (pp->exit != NULL)
105 pp->exit(pp);
106 return 0;
107 }
108 }
109
110 return -ESRCH;
111 }
112
113 /*
114 * unregister an ipvs protocols netns data
115 */
116 static int
unregister_ip_vs_proto_netns(struct netns_ipvs * ipvs,struct ip_vs_proto_data * pd)117 unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
118 {
119 struct ip_vs_proto_data **pd_p;
120 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
121
122 pd_p = &ipvs->proto_data_table[hash];
123 for (; *pd_p; pd_p = &(*pd_p)->next) {
124 if (*pd_p == pd) {
125 *pd_p = pd->next;
126 if (pd->pp->exit_netns != NULL)
127 pd->pp->exit_netns(ipvs, pd);
128 kfree(pd);
129 return 0;
130 }
131 }
132
133 return -ESRCH;
134 }
135
136 /*
137 * get ip_vs_protocol object by its proto.
138 */
ip_vs_proto_get(unsigned short proto)139 struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
140 {
141 struct ip_vs_protocol *pp;
142 unsigned int hash = IP_VS_PROTO_HASH(proto);
143
144 for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
145 if (pp->protocol == proto)
146 return pp;
147 }
148
149 return NULL;
150 }
151 EXPORT_SYMBOL(ip_vs_proto_get);
152
153 /*
154 * get ip_vs_protocol object data by netns and proto
155 */
156 struct ip_vs_proto_data *
ip_vs_proto_data_get(struct netns_ipvs * ipvs,unsigned short proto)157 ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
158 {
159 struct ip_vs_proto_data *pd;
160 unsigned int hash = IP_VS_PROTO_HASH(proto);
161
162 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
163 if (pd->pp->protocol == proto)
164 return pd;
165 }
166
167 return NULL;
168 }
169 EXPORT_SYMBOL(ip_vs_proto_data_get);
170
171 /*
172 * Propagate event for state change to all protocols
173 */
ip_vs_protocol_timeout_change(struct netns_ipvs * ipvs,int flags)174 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
175 {
176 struct ip_vs_proto_data *pd;
177 int i;
178
179 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
180 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
181 if (pd->pp->timeout_change)
182 pd->pp->timeout_change(pd, flags);
183 }
184 }
185 }
186
187
188 int *
ip_vs_create_timeout_table(int * table,int size)189 ip_vs_create_timeout_table(int *table, int size)
190 {
191 return kmemdup(table, size, GFP_KERNEL);
192 }
193
194
ip_vs_state_name(const struct ip_vs_conn * cp)195 const char *ip_vs_state_name(const struct ip_vs_conn *cp)
196 {
197 unsigned int state = cp->state;
198 struct ip_vs_protocol *pp;
199
200 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
201
202 if (state >= IP_VS_CTPL_S_LAST)
203 return "ERR!";
204 return ip_vs_ctpl_state_name_table[state] ? : "?";
205 }
206 pp = ip_vs_proto_get(cp->protocol);
207 if (pp == NULL || pp->state_name == NULL)
208 return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
209 return pp->state_name(state);
210 }
211
212
213 static void
ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol * pp,const struct sk_buff * skb,int offset,const char * msg)214 ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
215 const struct sk_buff *skb,
216 int offset,
217 const char *msg)
218 {
219 char buf[128];
220 struct iphdr _iph, *ih;
221
222 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
223 if (ih == NULL)
224 sprintf(buf, "TRUNCATED");
225 else if (ih->frag_off & htons(IP_OFFSET))
226 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
227 else {
228 __be16 _ports[2], *pptr;
229
230 pptr = skb_header_pointer(skb, offset + ih->ihl*4,
231 sizeof(_ports), _ports);
232 if (pptr == NULL)
233 sprintf(buf, "TRUNCATED %pI4->%pI4",
234 &ih->saddr, &ih->daddr);
235 else
236 sprintf(buf, "%pI4:%u->%pI4:%u",
237 &ih->saddr, ntohs(pptr[0]),
238 &ih->daddr, ntohs(pptr[1]));
239 }
240
241 pr_debug("%s: %s %s\n", msg, pp->name, buf);
242 }
243
244 #ifdef CONFIG_IP_VS_IPV6
245 static void
ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol * pp,const struct sk_buff * skb,int offset,const char * msg)246 ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
247 const struct sk_buff *skb,
248 int offset,
249 const char *msg)
250 {
251 char buf[192];
252 struct ipv6hdr _iph, *ih;
253
254 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
255 if (ih == NULL)
256 sprintf(buf, "TRUNCATED");
257 else if (ih->nexthdr == IPPROTO_FRAGMENT)
258 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr);
259 else {
260 __be16 _ports[2], *pptr;
261
262 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
263 sizeof(_ports), _ports);
264 if (pptr == NULL)
265 sprintf(buf, "TRUNCATED %pI6c->%pI6c",
266 &ih->saddr, &ih->daddr);
267 else
268 sprintf(buf, "%pI6c:%u->%pI6c:%u",
269 &ih->saddr, ntohs(pptr[0]),
270 &ih->daddr, ntohs(pptr[1]));
271 }
272
273 pr_debug("%s: %s %s\n", msg, pp->name, buf);
274 }
275 #endif
276
277
278 void
ip_vs_tcpudp_debug_packet(int af,struct ip_vs_protocol * pp,const struct sk_buff * skb,int offset,const char * msg)279 ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
280 const struct sk_buff *skb,
281 int offset,
282 const char *msg)
283 {
284 #ifdef CONFIG_IP_VS_IPV6
285 if (af == AF_INET6)
286 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
287 else
288 #endif
289 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
290 }
291
292 /*
293 * per network name-space init
294 */
ip_vs_protocol_net_init(struct netns_ipvs * ipvs)295 int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs)
296 {
297 int i, ret;
298 static struct ip_vs_protocol *protos[] = {
299 #ifdef CONFIG_IP_VS_PROTO_TCP
300 &ip_vs_protocol_tcp,
301 #endif
302 #ifdef CONFIG_IP_VS_PROTO_UDP
303 &ip_vs_protocol_udp,
304 #endif
305 #ifdef CONFIG_IP_VS_PROTO_SCTP
306 &ip_vs_protocol_sctp,
307 #endif
308 #ifdef CONFIG_IP_VS_PROTO_AH
309 &ip_vs_protocol_ah,
310 #endif
311 #ifdef CONFIG_IP_VS_PROTO_ESP
312 &ip_vs_protocol_esp,
313 #endif
314 };
315
316 for (i = 0; i < ARRAY_SIZE(protos); i++) {
317 ret = register_ip_vs_proto_netns(ipvs, protos[i]);
318 if (ret < 0)
319 goto cleanup;
320 }
321 return 0;
322
323 cleanup:
324 ip_vs_protocol_net_cleanup(ipvs);
325 return ret;
326 }
327
ip_vs_protocol_net_cleanup(struct netns_ipvs * ipvs)328 void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
329 {
330 struct ip_vs_proto_data *pd;
331 int i;
332
333 /* unregister all the ipvs proto data for this netns */
334 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
335 while ((pd = ipvs->proto_data_table[i]) != NULL)
336 unregister_ip_vs_proto_netns(ipvs, pd);
337 }
338 }
339
ip_vs_protocol_init(void)340 int __init ip_vs_protocol_init(void)
341 {
342 char protocols[64] = { 0 };
343 #define REGISTER_PROTOCOL(p) \
344 do { \
345 register_ip_vs_protocol(p); \
346 strcat(protocols, ", "); \
347 strcat(protocols, (p)->name); \
348 } while (0)
349
350 #ifdef CONFIG_IP_VS_PROTO_TCP
351 REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
352 #endif
353 #ifdef CONFIG_IP_VS_PROTO_UDP
354 REGISTER_PROTOCOL(&ip_vs_protocol_udp);
355 #endif
356 #ifdef CONFIG_IP_VS_PROTO_SCTP
357 REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
358 #endif
359 #ifdef CONFIG_IP_VS_PROTO_AH
360 REGISTER_PROTOCOL(&ip_vs_protocol_ah);
361 #endif
362 #ifdef CONFIG_IP_VS_PROTO_ESP
363 REGISTER_PROTOCOL(&ip_vs_protocol_esp);
364 #endif
365 pr_info("Registered protocols (%s)\n", &protocols[2]);
366
367 return 0;
368 }
369
370
ip_vs_protocol_cleanup(void)371 void ip_vs_protocol_cleanup(void)
372 {
373 struct ip_vs_protocol *pp;
374 int i;
375
376 /* unregister all the ipvs protocols */
377 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
378 while ((pp = ip_vs_proto_table[i]) != NULL)
379 unregister_ip_vs_protocol(pp);
380 }
381 }
382