1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/ip.h>
4 #include <linux/sctp.h>
5 #include <net/ip.h>
6 #include <net/ip6_checksum.h>
7 #include <linux/netfilter.h>
8 #include <linux/netfilter_ipv4.h>
9 #include <net/sctp/checksum.h>
10 #include <net/ip_vs.h>
11
12 static int
13 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
14 unsigned int sctphoff);
15
16 static int
sctp_conn_schedule(struct netns_ipvs * ipvs,int af,struct sk_buff * skb,struct ip_vs_proto_data * pd,int * verdict,struct ip_vs_conn ** cpp,struct ip_vs_iphdr * iph)17 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
18 struct ip_vs_proto_data *pd,
19 int *verdict, struct ip_vs_conn **cpp,
20 struct ip_vs_iphdr *iph)
21 {
22 struct ip_vs_service *svc;
23 struct sctp_chunkhdr _schunkh, *sch;
24 struct sctphdr *sh, _sctph;
25 __be16 _ports[2], *ports = NULL;
26
27 if (likely(!ip_vs_iph_icmp(iph))) {
28 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
29 if (sh) {
30 sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
31 sizeof(_schunkh), &_schunkh);
32 if (sch) {
33 if (sch->type == SCTP_CID_ABORT ||
34 !(sysctl_sloppy_sctp(ipvs) ||
35 sch->type == SCTP_CID_INIT))
36 return 1;
37 ports = &sh->source;
38 }
39 }
40 } else {
41 ports = skb_header_pointer(
42 skb, iph->len, sizeof(_ports), &_ports);
43 }
44
45 if (!ports) {
46 *verdict = NF_DROP;
47 return 0;
48 }
49
50 if (likely(!ip_vs_iph_inverse(iph)))
51 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
52 &iph->daddr, ports[1]);
53 else
54 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
55 &iph->saddr, ports[0]);
56 if (svc) {
57 int ignored;
58
59 if (ip_vs_todrop(ipvs)) {
60 /*
61 * It seems that we are very loaded.
62 * We have to drop this packet :(
63 */
64 *verdict = NF_DROP;
65 return 0;
66 }
67 /*
68 * Let the virtual server select a real server for the
69 * incoming connection, and create a connection entry.
70 */
71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
72 if (!*cpp && ignored <= 0) {
73 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd, iph);
75 else
76 *verdict = NF_DROP;
77 return 0;
78 }
79 }
80 /* NF_ACCEPT */
81 return 1;
82 }
83
sctp_nat_csum(struct sk_buff * skb,struct sctphdr * sctph,unsigned int sctphoff)84 static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
85 unsigned int sctphoff)
86 {
87 sctph->checksum = sctp_compute_cksum(skb, sctphoff);
88 skb->ip_summed = CHECKSUM_UNNECESSARY;
89 }
90
91 static int
sctp_snat_handler(struct sk_buff * skb,struct ip_vs_protocol * pp,struct ip_vs_conn * cp,struct ip_vs_iphdr * iph)92 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
93 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
94 {
95 struct sctphdr *sctph;
96 unsigned int sctphoff = iph->len;
97 bool payload_csum = false;
98
99 #ifdef CONFIG_IP_VS_IPV6
100 if (cp->af == AF_INET6 && iph->fragoffs)
101 return 1;
102 #endif
103
104 /* csum_check requires unshared skb */
105 if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
106 return 0;
107
108 if (unlikely(cp->app != NULL)) {
109 int ret;
110
111 /* Some checks before mangling */
112 if (!sctp_csum_check(cp->af, skb, pp, sctphoff))
113 return 0;
114
115 /* Call application helper if needed */
116 ret = ip_vs_app_pkt_out(cp, skb, iph);
117 if (ret == 0)
118 return 0;
119 /* ret=2: csum update is needed after payload mangling */
120 if (ret == 2)
121 payload_csum = true;
122 }
123
124 sctph = (void *) skb_network_header(skb) + sctphoff;
125
126 /* Only update csum if we really have to */
127 if (sctph->source != cp->vport || payload_csum ||
128 skb->ip_summed == CHECKSUM_PARTIAL) {
129 sctph->source = cp->vport;
130 if (!skb_is_gso(skb))
131 sctp_nat_csum(skb, sctph, sctphoff);
132 } else {
133 skb->ip_summed = CHECKSUM_UNNECESSARY;
134 }
135
136 return 1;
137 }
138
139 static int
sctp_dnat_handler(struct sk_buff * skb,struct ip_vs_protocol * pp,struct ip_vs_conn * cp,struct ip_vs_iphdr * iph)140 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
141 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
142 {
143 struct sctphdr *sctph;
144 unsigned int sctphoff = iph->len;
145 bool payload_csum = false;
146
147 #ifdef CONFIG_IP_VS_IPV6
148 if (cp->af == AF_INET6 && iph->fragoffs)
149 return 1;
150 #endif
151
152 /* csum_check requires unshared skb */
153 if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
154 return 0;
155
156 if (unlikely(cp->app != NULL)) {
157 int ret;
158
159 /* Some checks before mangling */
160 if (!sctp_csum_check(cp->af, skb, pp, sctphoff))
161 return 0;
162
163 /* Call application helper if needed */
164 ret = ip_vs_app_pkt_in(cp, skb, iph);
165 if (ret == 0)
166 return 0;
167 /* ret=2: csum update is needed after payload mangling */
168 if (ret == 2)
169 payload_csum = true;
170 }
171
172 sctph = (void *) skb_network_header(skb) + sctphoff;
173
174 /* Only update csum if we really have to */
175 if (sctph->dest != cp->dport || payload_csum ||
176 (skb->ip_summed == CHECKSUM_PARTIAL &&
177 !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
178 sctph->dest = cp->dport;
179 if (!skb_is_gso(skb))
180 sctp_nat_csum(skb, sctph, sctphoff);
181 } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
182 skb->ip_summed = CHECKSUM_UNNECESSARY;
183 }
184
185 return 1;
186 }
187
188 static int
sctp_csum_check(int af,struct sk_buff * skb,struct ip_vs_protocol * pp,unsigned int sctphoff)189 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
190 unsigned int sctphoff)
191 {
192 struct sctphdr *sh;
193 __le32 cmp, val;
194
195 sh = (struct sctphdr *)(skb->data + sctphoff);
196 cmp = sh->checksum;
197 val = sctp_compute_cksum(skb, sctphoff);
198
199 if (val != cmp) {
200 /* CRC failure, dump it. */
201 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
202 "Failed checksum for");
203 return 0;
204 }
205 return 1;
206 }
207
208 enum ipvs_sctp_event_t {
209 IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */
210 IP_VS_SCTP_INIT,
211 IP_VS_SCTP_INIT_ACK,
212 IP_VS_SCTP_COOKIE_ECHO,
213 IP_VS_SCTP_COOKIE_ACK,
214 IP_VS_SCTP_SHUTDOWN,
215 IP_VS_SCTP_SHUTDOWN_ACK,
216 IP_VS_SCTP_SHUTDOWN_COMPLETE,
217 IP_VS_SCTP_ERROR,
218 IP_VS_SCTP_ABORT,
219 IP_VS_SCTP_EVENT_LAST
220 };
221
222 /* RFC 2960, 3.2 Chunk Field Descriptions */
223 static __u8 sctp_events[] = {
224 [SCTP_CID_DATA] = IP_VS_SCTP_DATA,
225 [SCTP_CID_INIT] = IP_VS_SCTP_INIT,
226 [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK,
227 [SCTP_CID_SACK] = IP_VS_SCTP_DATA,
228 [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA,
229 [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA,
230 [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT,
231 [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN,
232 [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK,
233 [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR,
234 [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO,
235 [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK,
236 [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA,
237 [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA,
238 [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE,
239 };
240
241 /* SCTP States:
242 * See RFC 2960, 4. SCTP Association State Diagram
243 *
244 * New states (not in diagram):
245 * - INIT1 state: use shorter timeout for dropped INIT packets
246 * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
247 * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
248 *
249 * The states are as seen in real server. In the diagram, INIT1, INIT,
250 * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
251 *
252 * States as per packets from client (C) and server (S):
253 *
254 * Setup of client connection:
255 * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
256 * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
257 * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
258 * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
259 *
260 * Setup of server connection:
261 * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
262 * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
263 * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
264 */
265
266 #define sNO IP_VS_SCTP_S_NONE
267 #define sI1 IP_VS_SCTP_S_INIT1
268 #define sIN IP_VS_SCTP_S_INIT
269 #define sCS IP_VS_SCTP_S_COOKIE_SENT
270 #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
271 #define sCW IP_VS_SCTP_S_COOKIE_WAIT
272 #define sCO IP_VS_SCTP_S_COOKIE
273 #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
274 #define sES IP_VS_SCTP_S_ESTABLISHED
275 #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
276 #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
277 #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
278 #define sRJ IP_VS_SCTP_S_REJECTED
279 #define sCL IP_VS_SCTP_S_CLOSED
280
281 static const __u8 sctp_states
282 [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
283 { /* INPUT */
284 /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
285 /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
286 /* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
287 /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
288 /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
289 /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
290 /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
291 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
292 /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
293 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
294 /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
295 },
296 { /* OUTPUT */
297 /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
298 /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
299 /* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
300 /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
301 /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
302 /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
303 /* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
304 /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
305 /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
306 /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
307 /* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
308 },
309 { /* INPUT-ONLY */
310 /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
311 /* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
312 /* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
313 /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
314 /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
315 /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
316 /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
317 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
318 /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
319 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
320 /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
321 },
322 };
323
324 #define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ)
325
326 /* Timeout table[state] */
327 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
328 [IP_VS_SCTP_S_NONE] = 2 * HZ,
329 [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ,
330 [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO,
331 [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO,
332 [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO,
333 [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO,
334 [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO,
335 [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO,
336 [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ,
337 [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO,
338 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO,
339 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO,
340 [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ,
341 [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO,
342 [IP_VS_SCTP_S_LAST] = 2 * HZ,
343 };
344
345 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
346 [IP_VS_SCTP_S_NONE] = "NONE",
347 [IP_VS_SCTP_S_INIT1] = "INIT1",
348 [IP_VS_SCTP_S_INIT] = "INIT",
349 [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT",
350 [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED",
351 [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT",
352 [IP_VS_SCTP_S_COOKIE] = "COOKIE",
353 [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED",
354 [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED",
355 [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT",
356 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED",
357 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT",
358 [IP_VS_SCTP_S_REJECTED] = "REJECTED",
359 [IP_VS_SCTP_S_CLOSED] = "CLOSED",
360 [IP_VS_SCTP_S_LAST] = "BUG!",
361 };
362
363
sctp_state_name(int state)364 static const char *sctp_state_name(int state)
365 {
366 if (state >= IP_VS_SCTP_S_LAST)
367 return "ERR!";
368 if (sctp_state_name_table[state])
369 return sctp_state_name_table[state];
370 return "?";
371 }
372
373 static inline void
set_sctp_state(struct ip_vs_proto_data * pd,struct ip_vs_conn * cp,int direction,const struct sk_buff * skb)374 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
375 int direction, const struct sk_buff *skb)
376 {
377 struct sctp_chunkhdr _sctpch, *sch;
378 unsigned char chunk_type;
379 int event, next_state;
380 int ihl, cofs;
381
382 #ifdef CONFIG_IP_VS_IPV6
383 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
384 #else
385 ihl = ip_hdrlen(skb);
386 #endif
387
388 cofs = ihl + sizeof(struct sctphdr);
389 sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
390 if (sch == NULL)
391 return;
392
393 chunk_type = sch->type;
394 /*
395 * Section 3: Multiple chunks can be bundled into one SCTP packet
396 * up to the MTU size, except for the INIT, INIT ACK, and
397 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
398 * any other chunk in a packet.
399 *
400 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
401 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
402 * bundled with an ABORT, but they MUST be placed before the ABORT
403 * in the SCTP packet or they will be ignored by the receiver.
404 */
405 if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
406 (sch->type == SCTP_CID_COOKIE_ACK)) {
407 int clen = ntohs(sch->length);
408
409 if (clen >= sizeof(_sctpch)) {
410 sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
411 sizeof(_sctpch), &_sctpch);
412 if (sch && sch->type == SCTP_CID_ABORT)
413 chunk_type = sch->type;
414 }
415 }
416
417 event = (chunk_type < sizeof(sctp_events)) ?
418 sctp_events[chunk_type] : IP_VS_SCTP_DATA;
419
420 /* Update direction to INPUT_ONLY if necessary
421 * or delete NO_OUTPUT flag if output packet detected
422 */
423 if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
424 if (direction == IP_VS_DIR_OUTPUT)
425 cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
426 else
427 direction = IP_VS_DIR_INPUT_ONLY;
428 }
429
430 next_state = sctp_states[direction][event][cp->state];
431
432 if (next_state != cp->state) {
433 struct ip_vs_dest *dest = cp->dest;
434
435 IP_VS_DBG_BUF(8, "%s %s %s:%d->"
436 "%s:%d state: %s->%s conn->refcnt:%d\n",
437 pd->pp->name,
438 ((direction == IP_VS_DIR_OUTPUT) ?
439 "output " : "input "),
440 IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
441 ntohs(cp->dport),
442 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
443 ntohs(cp->cport),
444 sctp_state_name(cp->state),
445 sctp_state_name(next_state),
446 refcount_read(&cp->refcnt));
447 if (dest) {
448 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
449 (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
450 atomic_dec(&dest->activeconns);
451 atomic_inc(&dest->inactconns);
452 cp->flags |= IP_VS_CONN_F_INACTIVE;
453 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
454 (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
455 atomic_inc(&dest->activeconns);
456 atomic_dec(&dest->inactconns);
457 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
458 }
459 }
460 if (next_state == IP_VS_SCTP_S_ESTABLISHED)
461 ip_vs_control_assure_ct(cp);
462 }
463 if (likely(pd))
464 cp->timeout = pd->timeout_table[cp->state = next_state];
465 else /* What to do ? */
466 cp->timeout = sctp_timeouts[cp->state = next_state];
467 }
468
469 static void
sctp_state_transition(struct ip_vs_conn * cp,int direction,const struct sk_buff * skb,struct ip_vs_proto_data * pd)470 sctp_state_transition(struct ip_vs_conn *cp, int direction,
471 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
472 {
473 spin_lock_bh(&cp->lock);
474 set_sctp_state(pd, cp, direction, skb);
475 spin_unlock_bh(&cp->lock);
476 }
477
sctp_app_hashkey(__be16 port)478 static inline __u16 sctp_app_hashkey(__be16 port)
479 {
480 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
481 & SCTP_APP_TAB_MASK;
482 }
483
sctp_register_app(struct netns_ipvs * ipvs,struct ip_vs_app * inc)484 static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
485 {
486 struct ip_vs_app *i;
487 __u16 hash;
488 __be16 port = inc->port;
489 int ret = 0;
490 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
491
492 hash = sctp_app_hashkey(port);
493
494 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
495 if (i->port == port) {
496 ret = -EEXIST;
497 goto out;
498 }
499 }
500 list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
501 atomic_inc(&pd->appcnt);
502 out:
503
504 return ret;
505 }
506
sctp_unregister_app(struct netns_ipvs * ipvs,struct ip_vs_app * inc)507 static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
508 {
509 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
510
511 atomic_dec(&pd->appcnt);
512 list_del_rcu(&inc->p_list);
513 }
514
sctp_app_conn_bind(struct ip_vs_conn * cp)515 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
516 {
517 struct netns_ipvs *ipvs = cp->ipvs;
518 int hash;
519 struct ip_vs_app *inc;
520 int result = 0;
521
522 /* Default binding: bind app only for NAT */
523 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
524 return 0;
525 /* Lookup application incarnations and bind the right one */
526 hash = sctp_app_hashkey(cp->vport);
527
528 list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
529 if (inc->port == cp->vport) {
530 if (unlikely(!ip_vs_app_inc_get(inc)))
531 break;
532
533 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
534 "%s:%u to app %s on port %u\n",
535 __func__,
536 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
537 ntohs(cp->cport),
538 IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
539 ntohs(cp->vport),
540 inc->name, ntohs(inc->port));
541 cp->app = inc;
542 if (inc->init_conn)
543 result = inc->init_conn(inc, cp);
544 break;
545 }
546 }
547
548 return result;
549 }
550
551 /* ---------------------------------------------
552 * timeouts is netns related now.
553 * ---------------------------------------------
554 */
__ip_vs_sctp_init(struct netns_ipvs * ipvs,struct ip_vs_proto_data * pd)555 static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
556 {
557 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
558 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
559 sizeof(sctp_timeouts));
560 if (!pd->timeout_table)
561 return -ENOMEM;
562 return 0;
563 }
564
__ip_vs_sctp_exit(struct netns_ipvs * ipvs,struct ip_vs_proto_data * pd)565 static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
566 {
567 kfree(pd->timeout_table);
568 }
569
570 struct ip_vs_protocol ip_vs_protocol_sctp = {
571 .name = "SCTP",
572 .protocol = IPPROTO_SCTP,
573 .num_states = IP_VS_SCTP_S_LAST,
574 .dont_defrag = 0,
575 .init = NULL,
576 .exit = NULL,
577 .init_netns = __ip_vs_sctp_init,
578 .exit_netns = __ip_vs_sctp_exit,
579 .register_app = sctp_register_app,
580 .unregister_app = sctp_unregister_app,
581 .conn_schedule = sctp_conn_schedule,
582 .conn_in_get = ip_vs_conn_in_get_proto,
583 .conn_out_get = ip_vs_conn_out_get_proto,
584 .snat_handler = sctp_snat_handler,
585 .dnat_handler = sctp_dnat_handler,
586 .state_name = sctp_state_name,
587 .state_transition = sctp_state_transition,
588 .app_conn_bind = sctp_app_conn_bind,
589 .debug_packet = ip_vs_tcpudp_debug_packet,
590 .timeout_change = NULL,
591 };
592