1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip_vs_proto.c: transport protocol load balancing support for IPVS 4 * 5 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 6 * Julian Anastasov <ja@ssi.bg> 7 * 8 * Changes: 9 */ 10 11 #define pr_fmt(fmt) "IPVS: " fmt 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/skbuff.h> 16 #include <linux/gfp.h> 17 #include <linux/in.h> 18 #include <linux/ip.h> 19 #include <net/protocol.h> 20 #include <net/tcp.h> 21 #include <net/udp.h> 22 #include <linux/stat.h> 23 #include <linux/proc_fs.h> 24 25 #include <net/ip_vs.h> 26 27 28 /* 29 * IPVS protocols can only be registered/unregistered when the ipvs 30 * module is loaded/unloaded, so no lock is needed in accessing the 31 * ipvs protocol table. 32 */ 33 34 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ 35 #define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1)) 36 37 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; 38 39 /* States for conn templates: NONE or words separated with ",", max 15 chars */ 40 static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = { 41 [IP_VS_CTPL_S_NONE] = "NONE", 42 [IP_VS_CTPL_S_ASSURED] = "ASSURED", 43 }; 44 45 /* 46 * register an ipvs protocol 47 */ 48 static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) 49 { 50 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 51 52 pp->next = ip_vs_proto_table[hash]; 53 ip_vs_proto_table[hash] = pp; 54 55 if (pp->init != NULL) 56 pp->init(pp); 57 58 return 0; 59 } 60 61 /* 62 * register an ipvs protocols netns related data 63 */ 64 static int 65 register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) 66 { 67 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 68 struct ip_vs_proto_data *pd = 69 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); 70 71 if (!pd) 72 return -ENOMEM; 73 74 pd->pp = pp; /* For speed issues */ 75 pd->next = ipvs->proto_data_table[hash]; 76 ipvs->proto_data_table[hash] = pd; 77 atomic_set(&pd->appcnt, 0); /* Init app counter */ 78 79 if (pp->init_netns != NULL) { 80 int ret = pp->init_netns(ipvs, pd); 81 if (ret) { 82 /* unlink an free proto data */ 83 ipvs->proto_data_table[hash] = pd->next; 84 kfree(pd); 85 return ret; 86 } 87 } 88 89 return 0; 90 } 91 92 /* 93 * unregister an ipvs protocol 94 */ 95 static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) 96 { 97 struct ip_vs_protocol **pp_p; 98 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 99 100 pp_p = &ip_vs_proto_table[hash]; 101 for (; *pp_p; pp_p = &(*pp_p)->next) { 102 if (*pp_p == pp) { 103 *pp_p = pp->next; 104 if (pp->exit != NULL) 105 pp->exit(pp); 106 return 0; 107 } 108 } 109 110 return -ESRCH; 111 } 112 113 /* 114 * unregister an ipvs protocols netns data 115 */ 116 static int 117 unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) 118 { 119 struct ip_vs_proto_data **pd_p; 120 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); 121 122 pd_p = &ipvs->proto_data_table[hash]; 123 for (; *pd_p; pd_p = &(*pd_p)->next) { 124 if (*pd_p == pd) { 125 *pd_p = pd->next; 126 if (pd->pp->exit_netns != NULL) 127 pd->pp->exit_netns(ipvs, pd); 128 kfree(pd); 129 return 0; 130 } 131 } 132 133 return -ESRCH; 134 } 135 136 /* 137 * get ip_vs_protocol object by its proto. 138 */ 139 struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) 140 { 141 struct ip_vs_protocol *pp; 142 unsigned int hash = IP_VS_PROTO_HASH(proto); 143 144 for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) { 145 if (pp->protocol == proto) 146 return pp; 147 } 148 149 return NULL; 150 } 151 EXPORT_SYMBOL(ip_vs_proto_get); 152 153 /* 154 * get ip_vs_protocol object data by netns and proto 155 */ 156 struct ip_vs_proto_data * 157 ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 158 { 159 struct ip_vs_proto_data *pd; 160 unsigned int hash = IP_VS_PROTO_HASH(proto); 161 162 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { 163 if (pd->pp->protocol == proto) 164 return pd; 165 } 166 167 return NULL; 168 } 169 EXPORT_SYMBOL(ip_vs_proto_data_get); 170 171 /* 172 * Propagate event for state change to all protocols 173 */ 174 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) 175 { 176 struct ip_vs_proto_data *pd; 177 int i; 178 179 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 180 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { 181 if (pd->pp->timeout_change) 182 pd->pp->timeout_change(pd, flags); 183 } 184 } 185 } 186 187 188 int * 189 ip_vs_create_timeout_table(int *table, int size) 190 { 191 return kmemdup(table, size, GFP_KERNEL); 192 } 193 194 195 const char *ip_vs_state_name(const struct ip_vs_conn *cp) 196 { 197 unsigned int state = cp->state; 198 struct ip_vs_protocol *pp; 199 200 if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 201 202 if (state >= IP_VS_CTPL_S_LAST) 203 return "ERR!"; 204 return ip_vs_ctpl_state_name_table[state] ? : "?"; 205 } 206 pp = ip_vs_proto_get(cp->protocol); 207 if (pp == NULL || pp->state_name == NULL) 208 return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!"; 209 return pp->state_name(state); 210 } 211 212 213 static void 214 ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, 215 const struct sk_buff *skb, 216 int offset, 217 const char *msg) 218 { 219 char buf[128]; 220 struct iphdr _iph, *ih; 221 222 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 223 if (ih == NULL) 224 sprintf(buf, "TRUNCATED"); 225 else if (ih->frag_off & htons(IP_OFFSET)) 226 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 227 else { 228 __be16 _ports[2], *pptr; 229 230 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 231 sizeof(_ports), _ports); 232 if (pptr == NULL) 233 sprintf(buf, "TRUNCATED %pI4->%pI4", 234 &ih->saddr, &ih->daddr); 235 else 236 sprintf(buf, "%pI4:%u->%pI4:%u", 237 &ih->saddr, ntohs(pptr[0]), 238 &ih->daddr, ntohs(pptr[1])); 239 } 240 241 pr_debug("%s: %s %s\n", msg, pp->name, buf); 242 } 243 244 #ifdef CONFIG_IP_VS_IPV6 245 static void 246 ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, 247 const struct sk_buff *skb, 248 int offset, 249 const char *msg) 250 { 251 char buf[192]; 252 struct ipv6hdr _iph, *ih; 253 254 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 255 if (ih == NULL) 256 sprintf(buf, "TRUNCATED"); 257 else if (ih->nexthdr == IPPROTO_FRAGMENT) 258 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr); 259 else { 260 __be16 _ports[2], *pptr; 261 262 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 263 sizeof(_ports), _ports); 264 if (pptr == NULL) 265 sprintf(buf, "TRUNCATED %pI6c->%pI6c", 266 &ih->saddr, &ih->daddr); 267 else 268 sprintf(buf, "%pI6c:%u->%pI6c:%u", 269 &ih->saddr, ntohs(pptr[0]), 270 &ih->daddr, ntohs(pptr[1])); 271 } 272 273 pr_debug("%s: %s %s\n", msg, pp->name, buf); 274 } 275 #endif 276 277 278 void 279 ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, 280 const struct sk_buff *skb, 281 int offset, 282 const char *msg) 283 { 284 #ifdef CONFIG_IP_VS_IPV6 285 if (af == AF_INET6) 286 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 287 else 288 #endif 289 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 290 } 291 292 /* 293 * per network name-space init 294 */ 295 int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) 296 { 297 int i, ret; 298 static struct ip_vs_protocol *protos[] = { 299 #ifdef CONFIG_IP_VS_PROTO_TCP 300 &ip_vs_protocol_tcp, 301 #endif 302 #ifdef CONFIG_IP_VS_PROTO_UDP 303 &ip_vs_protocol_udp, 304 #endif 305 #ifdef CONFIG_IP_VS_PROTO_SCTP 306 &ip_vs_protocol_sctp, 307 #endif 308 #ifdef CONFIG_IP_VS_PROTO_AH 309 &ip_vs_protocol_ah, 310 #endif 311 #ifdef CONFIG_IP_VS_PROTO_ESP 312 &ip_vs_protocol_esp, 313 #endif 314 }; 315 316 for (i = 0; i < ARRAY_SIZE(protos); i++) { 317 ret = register_ip_vs_proto_netns(ipvs, protos[i]); 318 if (ret < 0) 319 goto cleanup; 320 } 321 return 0; 322 323 cleanup: 324 ip_vs_protocol_net_cleanup(ipvs); 325 return ret; 326 } 327 328 void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) 329 { 330 struct ip_vs_proto_data *pd; 331 int i; 332 333 /* unregister all the ipvs proto data for this netns */ 334 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 335 while ((pd = ipvs->proto_data_table[i]) != NULL) 336 unregister_ip_vs_proto_netns(ipvs, pd); 337 } 338 } 339 340 int __init ip_vs_protocol_init(void) 341 { 342 char protocols[64] = { 0 }; 343 #define REGISTER_PROTOCOL(p) \ 344 do { \ 345 register_ip_vs_protocol(p); \ 346 strcat(protocols, ", "); \ 347 strcat(protocols, (p)->name); \ 348 } while (0) 349 350 #ifdef CONFIG_IP_VS_PROTO_TCP 351 REGISTER_PROTOCOL(&ip_vs_protocol_tcp); 352 #endif 353 #ifdef CONFIG_IP_VS_PROTO_UDP 354 REGISTER_PROTOCOL(&ip_vs_protocol_udp); 355 #endif 356 #ifdef CONFIG_IP_VS_PROTO_SCTP 357 REGISTER_PROTOCOL(&ip_vs_protocol_sctp); 358 #endif 359 #ifdef CONFIG_IP_VS_PROTO_AH 360 REGISTER_PROTOCOL(&ip_vs_protocol_ah); 361 #endif 362 #ifdef CONFIG_IP_VS_PROTO_ESP 363 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 364 #endif 365 pr_info("Registered protocols (%s)\n", &protocols[2]); 366 367 return 0; 368 } 369 370 371 void ip_vs_protocol_cleanup(void) 372 { 373 struct ip_vs_protocol *pp; 374 int i; 375 376 /* unregister all the ipvs protocols */ 377 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 378 while ((pp = ip_vs_proto_table[i]) != NULL) 379 unregister_ip_vs_protocol(pp); 380 } 381 } 382