1
2 /*
3 * Copyright (C) 2012 by Darren Reed.
4 *
5 * See the IPFILTER.LICENCE file for details on licencing.
6 *
7 * Copyright 2008 Sun Microsystems.
8 *
9 * $Id$
10 */
11 #if defined(KERNEL) || defined(_KERNEL)
12 # undef KERNEL
13 # undef _KERNEL
14 # define KERNEL 1
15 # define _KERNEL 1
16 #endif
17 #include <sys/errno.h>
18 #include <sys/types.h>
19 #include <sys/param.h>
20 #include <sys/file.h>
21 #if defined(_KERNEL) && defined(__FreeBSD__) && \
22 !defined(KLD_MODULE)
23 #include "opt_inet6.h"
24 #endif
25 #if !defined(_KERNEL) && !defined(__KERNEL__)
26 # include <stdio.h>
27 # include <stdlib.h>
28 # include <string.h>
29 # define _KERNEL
30 # include <sys/uio.h>
31 # undef _KERNEL
32 #endif
33 #if defined(_KERNEL) && defined(__FreeBSD__)
34 # include <sys/filio.h>
35 # include <sys/fcntl.h>
36 #else
37 # include <sys/ioctl.h>
38 #endif
39 #include <sys/time.h>
40 # include <sys/protosw.h>
41 #include <sys/socket.h>
42 #if defined(_KERNEL)
43 # include <sys/systm.h>
44 # if !defined(__SVR4)
45 # include <sys/mbuf.h>
46 # endif
47 #endif
48 #if defined(__SVR4)
49 # include <sys/filio.h>
50 # include <sys/byteorder.h>
51 # ifdef _KERNEL
52 # include <sys/dditypes.h>
53 # endif
54 # include <sys/stream.h>
55 # include <sys/kmem.h>
56 #endif
57
58 #include <net/if.h>
59 #ifdef sun
60 # include <net/af.h>
61 #endif
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
66 # include <netinet/tcp_fsm.h>
67 #include <netinet/udp.h>
68 #include <netinet/ip_icmp.h>
69 #if !defined(_KERNEL)
70 # include "ipf.h"
71 #endif
72 #include "netinet/ip_compat.h"
73 #include "netinet/ip_fil.h"
74 #include "netinet/ip_nat.h"
75 #include "netinet/ip_frag.h"
76 #include "netinet/ip_state.h"
77 #include "netinet/ip_proxy.h"
78 #include "netinet/ip_lookup.h"
79 #include "netinet/ip_dstlist.h"
80 #include "netinet/ip_sync.h"
81 #ifdef USE_INET6
82 #include <netinet/icmp6.h>
83 #endif
84 #ifdef __FreeBSD__
85 # include <sys/malloc.h>
86 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
87 # include <sys/libkern.h>
88 # include <sys/systm.h>
89 # endif
90 #endif
91 /* END OF INCLUDES */
92
93
94
95
96 static ipftuneable_t ipf_state_tuneables[] = {
97 { { (void *)offsetof(ipf_state_softc_t, ipf_state_max) },
98 "state_max", 1, 0x7fffffff,
99 stsizeof(ipf_state_softc_t, ipf_state_max),
100 0, NULL, NULL },
101 { { (void *)offsetof(ipf_state_softc_t, ipf_state_size) },
102 "state_size", 1, 0x7fffffff,
103 stsizeof(ipf_state_softc_t, ipf_state_size),
104 0, NULL, ipf_state_rehash },
105 { { (void *)offsetof(ipf_state_softc_t, ipf_state_lock) },
106 "state_lock", 0, 1,
107 stsizeof(ipf_state_softc_t, ipf_state_lock),
108 IPFT_RDONLY, NULL, NULL },
109 { { (void *)offsetof(ipf_state_softc_t, ipf_state_maxbucket) },
110 "state_maxbucket", 1, 0x7fffffff,
111 stsizeof(ipf_state_softc_t, ipf_state_maxbucket),
112 0, NULL, NULL },
113 { { (void *)offsetof(ipf_state_softc_t, ipf_state_logging) },
114 "state_logging",0, 1,
115 stsizeof(ipf_state_softc_t, ipf_state_logging),
116 0, NULL, NULL },
117 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_high) },
118 "state_wm_high",2, 100,
119 stsizeof(ipf_state_softc_t, ipf_state_wm_high),
120 0, NULL, NULL },
121 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_low) },
122 "state_wm_low", 1, 99,
123 stsizeof(ipf_state_softc_t, ipf_state_wm_low),
124 0, NULL, NULL },
125 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_freq) },
126 "state_wm_freq",2, 999999,
127 stsizeof(ipf_state_softc_t, ipf_state_wm_freq),
128 0, NULL, NULL },
129 { { NULL },
130 NULL, 0, 0,
131 0,
132 0, NULL, NULL }
133 };
134
135 #define SINCL(x) ATOMIC_INCL(softs->x)
136 #define SBUMP(x) (softs->x)++
137 #define SBUMPD(x, y) do { (softs->x.y)++; DT(y); } while (0)
138 #define SBUMPDX(x, y, z)do { (softs->x.y)++; DT(z); } while (0)
139
140 #ifdef USE_INET6
141 static ipstate_t *ipf_checkicmp6matchingstate(fr_info_t *);
142 #endif
143 static int ipf_allowstateicmp(fr_info_t *, ipstate_t *, i6addr_t *);
144 static ipstate_t *ipf_matchsrcdst(fr_info_t *, ipstate_t *, i6addr_t *,
145 i6addr_t *, tcphdr_t *, u_32_t);
146 static ipstate_t *ipf_checkicmpmatchingstate(fr_info_t *);
147 static int ipf_state_flush_entry(ipf_main_softc_t *, void *);
148 static ips_stat_t *ipf_state_stats(ipf_main_softc_t *);
149 static int ipf_state_del(ipf_main_softc_t *, ipstate_t *, int);
150 static int ipf_state_remove(ipf_main_softc_t *, caddr_t);
151 static int ipf_state_match(ipstate_t *is1, ipstate_t *is2);
152 static int ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2);
153 static int ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2);
154 static int ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2);
155 static int ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2);
156 static int ipf_state_matchports(udpinfo_t *is1, udpinfo_t *is2);
157 static int ipf_state_matcharray(ipstate_t *, int *, u_long);
158 static void ipf_ipsmove(ipf_state_softc_t *, ipstate_t *, u_int);
159 static int ipf_state_tcp(ipf_main_softc_t *, ipf_state_softc_t *,
160 fr_info_t *, tcphdr_t *, ipstate_t *);
161 static int ipf_tcpoptions(ipf_state_softc_t *, fr_info_t *,
162 tcphdr_t *, tcpdata_t *);
163 static ipstate_t *ipf_state_clone(fr_info_t *, tcphdr_t *, ipstate_t *);
164 static void ipf_fixinisn(fr_info_t *, ipstate_t *);
165 static void ipf_fixoutisn(fr_info_t *, ipstate_t *);
166 static void ipf_checknewisn(fr_info_t *, ipstate_t *);
167 static int ipf_state_iter(ipf_main_softc_t *, ipftoken_t *,
168 ipfgeniter_t *, ipfobj_t *);
169 static int ipf_state_gettable(ipf_main_softc_t *, ipf_state_softc_t *,
170 char *);
171 static int ipf_state_tcpinwindow(struct fr_info *, struct tcpdata *,
172 struct tcpdata *, tcphdr_t *, int);
173
174 static int ipf_state_getent(ipf_main_softc_t *, ipf_state_softc_t *,
175 caddr_t);
176 static int ipf_state_putent(ipf_main_softc_t *, ipf_state_softc_t *,
177 caddr_t);
178
179 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */
180 #define FIVE_DAYS (5 * ONE_DAY)
181 #define DOUBLE_HASH(x) (((x) + softs->ipf_state_seed[(x) % \
182 softs->ipf_state_size]) % softs->ipf_state_size)
183
184
185 /* ------------------------------------------------------------------------ */
186 /* Function: ipf_state_main_load */
187 /* Returns: int - 0 == success, -1 == failure */
188 /* Parameters: Nil */
189 /* */
190 /* A null-op function that exists as a placeholder so that the flow in */
191 /* other functions is obvious. */
192 /* ------------------------------------------------------------------------ */
193 int
ipf_state_main_load(void)194 ipf_state_main_load(void)
195 {
196 return (0);
197 }
198
199
200 /* ------------------------------------------------------------------------ */
201 /* Function: ipf_state_main_unload */
202 /* Returns: int - 0 == success, -1 == failure */
203 /* Parameters: Nil */
204 /* */
205 /* A null-op function that exists as a placeholder so that the flow in */
206 /* other functions is obvious. */
207 /* ------------------------------------------------------------------------ */
208 int
ipf_state_main_unload(void)209 ipf_state_main_unload(void)
210 {
211 return (0);
212 }
213
214
215 /* ------------------------------------------------------------------------ */
216 /* Function: ipf_state_soft_create */
217 /* Returns: void * - NULL = failure, else pointer to soft context */
218 /* Parameters: softc(I) - pointer to soft context main structure */
219 /* */
220 /* Create a new state soft context structure and populate it with the list */
221 /* of tunables and other default settings. */
222 /* ------------------------------------------------------------------------ */
223 void *
ipf_state_soft_create(ipf_main_softc_t * softc)224 ipf_state_soft_create(ipf_main_softc_t *softc)
225 {
226 ipf_state_softc_t *softs;
227
228 KMALLOC(softs, ipf_state_softc_t *);
229 if (softs == NULL)
230 return (NULL);
231
232 bzero((char *)softs, sizeof(*softs));
233
234 softs->ipf_state_tune = ipf_tune_array_copy(softs,
235 sizeof(ipf_state_tuneables),
236 ipf_state_tuneables);
237 if (softs->ipf_state_tune == NULL) {
238 ipf_state_soft_destroy(softc, softs);
239 return (NULL);
240 }
241 if (ipf_tune_array_link(softc, softs->ipf_state_tune) == -1) {
242 ipf_state_soft_destroy(softc, softs);
243 return (NULL);
244 }
245
246 #ifdef IPFILTER_LOG
247 softs->ipf_state_logging = 1;
248 #else
249 softs->ipf_state_logging = 0;
250 #endif
251 softs->ipf_state_size = IPSTATE_SIZE,
252 softs->ipf_state_maxbucket = 0;
253 softs->ipf_state_wm_freq = IPF_TTLVAL(10);
254 softs->ipf_state_max = IPSTATE_MAX;
255 softs->ipf_state_wm_last = 0;
256 softs->ipf_state_wm_high = 99;
257 softs->ipf_state_wm_low = 90;
258 softs->ipf_state_inited = 0;
259 softs->ipf_state_lock = 0;
260 softs->ipf_state_doflush = 0;
261
262 return (softs);
263 }
264
265
266 /* ------------------------------------------------------------------------ */
267 /* Function: ipf_state_soft_destroy */
268 /* Returns: Nil */
269 /* Parameters: softc(I) - pointer to soft context main structure */
270 /* arg(I) - pointer to local context to use */
271 /* */
272 /* Undo only what we did in soft create: unlink and free the tunables and */
273 /* free the soft context structure itself. */
274 /* ------------------------------------------------------------------------ */
275 void
ipf_state_soft_destroy(ipf_main_softc_t * softc,void * arg)276 ipf_state_soft_destroy(ipf_main_softc_t *softc, void *arg)
277 {
278 ipf_state_softc_t *softs = arg;
279
280 if (softs->ipf_state_tune != NULL) {
281 ipf_tune_array_unlink(softc, softs->ipf_state_tune);
282 KFREES(softs->ipf_state_tune, sizeof(ipf_state_tuneables));
283 softs->ipf_state_tune = NULL;
284 }
285
286 KFREE(softs);
287 }
288
289 static void *
ipf_state_seed_alloc(u_int state_size,u_int state_max)290 ipf_state_seed_alloc(u_int state_size, u_int state_max)
291 {
292 u_int i;
293 u_long *state_seed;
294 KMALLOCS(state_seed, u_long *, state_size * sizeof(*state_seed));
295 if (state_seed == NULL)
296 return (NULL);
297
298 for (i = 0; i < state_size; i++) {
299 /*
300 * XXX - ipf_state_seed[X] should be a random number of sorts.
301 */
302 #ifdef __FreeBSD__
303 state_seed[i] = arc4random();
304 #else
305 state_seed[i] = ((u_long)state_seed + i) * state_size;
306 state_seed[i] ^= 0xa5a55a5a;
307 state_seed[i] *= (u_long)state_seed;
308 state_seed[i] ^= 0x5a5aa5a5;
309 state_seed[i] *= state_max;
310 #endif
311 }
312 return (state_seed);
313 }
314
315
316 /* ------------------------------------------------------------------------ */
317 /* Function: ipf_state_soft_init */
318 /* Returns: int - 0 == success, -1 == failure */
319 /* Parameters: softc(I) - pointer to soft context main structure */
320 /* arg(I) - pointer to local context to use */
321 /* */
322 /* Initialise the state soft context structure so it is ready for use. */
323 /* This involves: */
324 /* - allocating a hash table and zero'ing it out */
325 /* - building a secondary table of seeds for double hashing to make it more */
326 /* difficult to attempt to attack the hash table itself (for DoS) */
327 /* - initialise all of the timeout queues, including a table for TCP, some */
328 /* pairs of query/response for UDP and other IP protocols (typically the */
329 /* reply queue has a shorter timeout than the query) */
330 /* ------------------------------------------------------------------------ */
331 int
ipf_state_soft_init(ipf_main_softc_t * softc,void * arg)332 ipf_state_soft_init(ipf_main_softc_t *softc, void *arg)
333 {
334 ipf_state_softc_t *softs = arg;
335 int i;
336
337 KMALLOCS(softs->ipf_state_table,
338 ipstate_t **, softs->ipf_state_size * sizeof(ipstate_t *));
339 if (softs->ipf_state_table == NULL)
340 return (-1);
341
342 bzero((char *)softs->ipf_state_table,
343 softs->ipf_state_size * sizeof(ipstate_t *));
344
345 softs->ipf_state_seed = ipf_state_seed_alloc(softs->ipf_state_size,
346 softs->ipf_state_max);
347 if (softs->ipf_state_seed == NULL)
348 return (-2);
349
350 KMALLOCS(softs->ipf_state_stats.iss_bucketlen, u_int *,
351 softs->ipf_state_size * sizeof(u_int));
352 if (softs->ipf_state_stats.iss_bucketlen == NULL)
353 return (-3);
354
355 bzero((char *)softs->ipf_state_stats.iss_bucketlen,
356 softs->ipf_state_size * sizeof(u_int));
357
358 if (softs->ipf_state_maxbucket == 0) {
359 for (i = softs->ipf_state_size; i > 0; i >>= 1)
360 softs->ipf_state_maxbucket++;
361 softs->ipf_state_maxbucket *= 2;
362 }
363
364 ipf_sttab_init(softc, softs->ipf_state_tcptq);
365 softs->ipf_state_stats.iss_tcptab = softs->ipf_state_tcptq;
366 softs->ipf_state_tcptq[IPF_TCP_NSTATES - 1].ifq_next =
367 &softs->ipf_state_udptq;
368
369 IPFTQ_INIT(&softs->ipf_state_udptq, softc->ipf_udptimeout,
370 "ipftq udp tab");
371 softs->ipf_state_udptq.ifq_next = &softs->ipf_state_udpacktq;
372
373 IPFTQ_INIT(&softs->ipf_state_udpacktq, softc->ipf_udpacktimeout,
374 "ipftq udpack tab");
375 softs->ipf_state_udpacktq.ifq_next = &softs->ipf_state_icmptq;
376
377 IPFTQ_INIT(&softs->ipf_state_icmptq, softc->ipf_icmptimeout,
378 "ipftq icmp tab");
379 softs->ipf_state_icmptq.ifq_next = &softs->ipf_state_icmpacktq;
380
381 IPFTQ_INIT(&softs->ipf_state_icmpacktq, softc->ipf_icmpacktimeout,
382 "ipftq icmpack tab");
383 softs->ipf_state_icmpacktq.ifq_next = &softs->ipf_state_iptq;
384
385 IPFTQ_INIT(&softs->ipf_state_iptq, softc->ipf_iptimeout,
386 "ipftq iptimeout tab");
387 softs->ipf_state_iptq.ifq_next = &softs->ipf_state_pending;
388
389 IPFTQ_INIT(&softs->ipf_state_pending, IPF_HZ_DIVIDE, "ipftq pending");
390 softs->ipf_state_pending.ifq_next = &softs->ipf_state_deletetq;
391
392 IPFTQ_INIT(&softs->ipf_state_deletetq, 1, "ipftq delete");
393 softs->ipf_state_deletetq.ifq_next = NULL;
394
395 MUTEX_INIT(&softs->ipf_stinsert, "ipf state insert mutex");
396
397
398 softs->ipf_state_wm_last = softc->ipf_ticks;
399 softs->ipf_state_inited = 1;
400
401 return (0);
402 }
403
404
405 /* ------------------------------------------------------------------------ */
406 /* Function: ipf_state_soft_fini */
407 /* Returns: int - 0 = success, -1 = failure */
408 /* Parameters: softc(I) - pointer to soft context main structure */
409 /* arg(I) - pointer to local context to use */
410 /* */
411 /* Release and destroy any resources acquired or initialised so that */
412 /* IPFilter can be unloaded or re-initialised. */
413 /* ------------------------------------------------------------------------ */
414 int
ipf_state_soft_fini(ipf_main_softc_t * softc,void * arg)415 ipf_state_soft_fini(ipf_main_softc_t *softc, void *arg)
416 {
417 ipf_state_softc_t *softs = arg;
418 ipftq_t *ifq, *ifqnext;
419 ipstate_t *is;
420
421 while ((is = softs->ipf_state_list) != NULL)
422 ipf_state_del(softc, is, ISL_UNLOAD);
423
424 /*
425 * Proxy timeout queues are not cleaned here because although they
426 * exist on the state list, appr_unload is called after
427 * ipf_state_unload and the proxies actually are responsible for them
428 * being created. Should the proxy timeouts have their own list?
429 * There's no real justification as this is the only complication.
430 */
431 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
432 ifqnext = ifq->ifq_next;
433
434 if (ipf_deletetimeoutqueue(ifq) == 0)
435 ipf_freetimeoutqueue(softc, ifq);
436 }
437
438 softs->ipf_state_stats.iss_inuse = 0;
439 softs->ipf_state_stats.iss_active = 0;
440
441 if (softs->ipf_state_inited == 1) {
442 softs->ipf_state_inited = 0;
443 ipf_sttab_destroy(softs->ipf_state_tcptq);
444 MUTEX_DESTROY(&softs->ipf_state_udptq.ifq_lock);
445 MUTEX_DESTROY(&softs->ipf_state_icmptq.ifq_lock);
446 MUTEX_DESTROY(&softs->ipf_state_udpacktq.ifq_lock);
447 MUTEX_DESTROY(&softs->ipf_state_icmpacktq.ifq_lock);
448 MUTEX_DESTROY(&softs->ipf_state_iptq.ifq_lock);
449 MUTEX_DESTROY(&softs->ipf_state_deletetq.ifq_lock);
450 MUTEX_DESTROY(&softs->ipf_state_pending.ifq_lock);
451 MUTEX_DESTROY(&softs->ipf_stinsert);
452 }
453
454 if (softs->ipf_state_table != NULL) {
455 KFREES(softs->ipf_state_table,
456 softs->ipf_state_size * sizeof(*softs->ipf_state_table));
457 softs->ipf_state_table = NULL;
458 }
459
460 if (softs->ipf_state_seed != NULL) {
461 KFREES(softs->ipf_state_seed,
462 softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
463 softs->ipf_state_seed = NULL;
464 }
465
466 if (softs->ipf_state_stats.iss_bucketlen != NULL) {
467 KFREES(softs->ipf_state_stats.iss_bucketlen,
468 softs->ipf_state_size * sizeof(u_int));
469 softs->ipf_state_stats.iss_bucketlen = NULL;
470 }
471
472 return (0);
473 }
474
475
476 /* ------------------------------------------------------------------------ */
477 /* Function: ipf_state_setlock */
478 /* Returns: Nil */
479 /* Parameters: arg(I) - pointer to local context to use */
480 /* tmp(I) - new value for lock */
481 /* */
482 /* Stub function that allows for external manipulation of ipf_state_lock */
483 /* ------------------------------------------------------------------------ */
484 void
ipf_state_setlock(void * arg,int tmp)485 ipf_state_setlock(void *arg, int tmp)
486 {
487 ipf_state_softc_t *softs = arg;
488
489 softs->ipf_state_lock = tmp;
490 }
491
492
493 /* ------------------------------------------------------------------------ */
494 /* Function: ipf_state_stats */
495 /* Returns: ips_state_t* - pointer to state stats structure */
496 /* Parameters: softc(I) - pointer to soft context main structure */
497 /* */
498 /* Put all the current numbers and pointers into a single struct and return */
499 /* a pointer to it. */
500 /* ------------------------------------------------------------------------ */
501 static ips_stat_t *
ipf_state_stats(ipf_main_softc_t * softc)502 ipf_state_stats(ipf_main_softc_t *softc)
503 {
504 ipf_state_softc_t *softs = softc->ipf_state_soft;
505 ips_stat_t *issp = &softs->ipf_state_stats;
506
507 issp->iss_state_size = softs->ipf_state_size;
508 issp->iss_state_max = softs->ipf_state_max;
509 issp->iss_table = softs->ipf_state_table;
510 issp->iss_list = softs->ipf_state_list;
511 issp->iss_ticks = softc->ipf_ticks;
512
513 #ifdef IPFILTER_LOGGING
514 issp->iss_log_ok = ipf_log_logok(softc, IPF_LOGSTATE);
515 issp->iss_log_fail = ipf_log_failures(softc, IPF_LOGSTATE);
516 #else
517 issp->iss_log_ok = 0;
518 issp->iss_log_fail = 0;
519 #endif
520 return (issp);
521 }
522
523 /* ------------------------------------------------------------------------ */
524 /* Function: ipf_state_remove */
525 /* Returns: int - 0 == success, != 0 == failure */
526 /* Parameters: softc(I) - pointer to soft context main structure */
527 /* data(I) - pointer to state structure to delete from table */
528 /* */
529 /* Search for a state structure that matches the one passed, according to */
530 /* the IP addresses and other protocol specific information. */
531 /* ------------------------------------------------------------------------ */
532 static int
ipf_state_remove(ipf_main_softc_t * softc,caddr_t data)533 ipf_state_remove(ipf_main_softc_t *softc, caddr_t data)
534 {
535 ipf_state_softc_t *softs = softc->ipf_state_soft;
536 ipstate_t *sp, st;
537 int error;
538
539 sp = &st;
540 error = ipf_inobj(softc, data, NULL, &st, IPFOBJ_IPSTATE);
541 if (error)
542 return (EFAULT);
543
544 WRITE_ENTER(&softc->ipf_state);
545 for (sp = softs->ipf_state_list; sp; sp = sp->is_next)
546 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
547 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src,
548 sizeof(st.is_src)) &&
549 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst,
550 sizeof(st.is_dst)) &&
551 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps,
552 sizeof(st.is_ps))) {
553 ipf_state_del(softc, sp, ISL_REMOVE);
554 RWLOCK_EXIT(&softc->ipf_state);
555 return (0);
556 }
557 RWLOCK_EXIT(&softc->ipf_state);
558
559 IPFERROR(100001);
560 return (ESRCH);
561 }
562
563
564 /* ------------------------------------------------------------------------ */
565 /* Function: ipf_state_ioctl */
566 /* Returns: int - 0 == success, != 0 == failure */
567 /* Parameters: softc(I) - pointer to soft context main structure */
568 /* data(I) - pointer to ioctl data */
569 /* cmd(I) - ioctl command integer */
570 /* mode(I) - file mode bits used with open */
571 /* uid(I) - uid of process making the ioctl call */
572 /* ctx(I) - pointer specific to context of the call */
573 /* */
574 /* Processes an ioctl call made to operate on the IP Filter state device. */
575 /* ------------------------------------------------------------------------ */
576 int
ipf_state_ioctl(ipf_main_softc_t * softc,caddr_t data,ioctlcmd_t cmd,int mode,int uid,void * ctx)577 ipf_state_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd,
578 int mode, int uid, void *ctx)
579 {
580 ipf_state_softc_t *softs = softc->ipf_state_soft;
581 int arg, ret, error = 0;
582 SPL_INT(s);
583
584 switch (cmd)
585 {
586 /*
587 * Delete an entry from the state table.
588 */
589 case SIOCDELST :
590 error = ipf_state_remove(softc, data);
591 break;
592
593 /*
594 * Flush the state table
595 */
596 case SIOCIPFFL :
597 error = BCOPYIN(data, &arg, sizeof(arg));
598 if (error != 0) {
599 IPFERROR(100002);
600 error = EFAULT;
601
602 } else {
603 WRITE_ENTER(&softc->ipf_state);
604 ret = ipf_state_flush(softc, arg, 4);
605 RWLOCK_EXIT(&softc->ipf_state);
606
607 error = BCOPYOUT(&ret, data, sizeof(ret));
608 if (error != 0) {
609 IPFERROR(100003);
610 error = EFAULT;
611 }
612 }
613 break;
614
615 #ifdef USE_INET6
616 case SIOCIPFL6 :
617 error = BCOPYIN(data, &arg, sizeof(arg));
618 if (error != 0) {
619 IPFERROR(100004);
620 error = EFAULT;
621
622 } else {
623 WRITE_ENTER(&softc->ipf_state);
624 ret = ipf_state_flush(softc, arg, 6);
625 RWLOCK_EXIT(&softc->ipf_state);
626
627 error = BCOPYOUT(&ret, data, sizeof(ret));
628 if (error != 0) {
629 IPFERROR(100005);
630 error = EFAULT;
631 }
632 }
633 break;
634 #endif
635
636 case SIOCMATCHFLUSH :
637 WRITE_ENTER(&softc->ipf_state);
638 error = ipf_state_matchflush(softc, data);
639 RWLOCK_EXIT(&softc->ipf_state);
640 break;
641
642 #ifdef IPFILTER_LOG
643 /*
644 * Flush the state log.
645 */
646 case SIOCIPFFB :
647 if (!(mode & FWRITE)) {
648 IPFERROR(100008);
649 error = EPERM;
650 } else {
651 int tmp;
652
653 tmp = ipf_log_clear(softc, IPL_LOGSTATE);
654 error = BCOPYOUT(&tmp, data, sizeof(tmp));
655 if (error != 0) {
656 IPFERROR(100009);
657 error = EFAULT;
658 }
659 }
660 break;
661
662 /*
663 * Turn logging of state information on/off.
664 */
665 case SIOCSETLG :
666 if (!(mode & FWRITE)) {
667 IPFERROR(100010);
668 error = EPERM;
669 } else {
670 error = BCOPYIN(data, &softs->ipf_state_logging,
671 sizeof(softs->ipf_state_logging));
672 if (error != 0) {
673 IPFERROR(100011);
674 error = EFAULT;
675 }
676 }
677 break;
678
679 /*
680 * Return the current state of logging.
681 */
682 case SIOCGETLG :
683 error = BCOPYOUT(&softs->ipf_state_logging, data,
684 sizeof(softs->ipf_state_logging));
685 if (error != 0) {
686 IPFERROR(100012);
687 error = EFAULT;
688 }
689 break;
690
691 /*
692 * Return the number of bytes currently waiting to be read.
693 */
694 case FIONREAD :
695 arg = ipf_log_bytesused(softc, IPL_LOGSTATE);
696 error = BCOPYOUT(&arg, data, sizeof(arg));
697 if (error != 0) {
698 IPFERROR(100013);
699 error = EFAULT;
700 }
701 break;
702 #endif
703
704 /*
705 * Get the current state statistics.
706 */
707 case SIOCGETFS :
708 error = ipf_outobj(softc, data, ipf_state_stats(softc),
709 IPFOBJ_STATESTAT);
710 break;
711
712 /*
713 * Lock/Unlock the state table. (Locking prevents any changes, which
714 * means no packets match).
715 */
716 case SIOCSTLCK :
717 if (!(mode & FWRITE)) {
718 IPFERROR(100014);
719 error = EPERM;
720 } else {
721 error = ipf_lock(data, &softs->ipf_state_lock);
722 }
723 break;
724
725 /*
726 * Add an entry to the current state table.
727 */
728 case SIOCSTPUT :
729 if (!softs->ipf_state_lock || !(mode &FWRITE)) {
730 IPFERROR(100015);
731 error = EACCES;
732 break;
733 }
734 error = ipf_state_putent(softc, softs, data);
735 break;
736
737 /*
738 * Get a state table entry.
739 */
740 case SIOCSTGET :
741 if (!softs->ipf_state_lock) {
742 IPFERROR(100016);
743 error = EACCES;
744 break;
745 }
746 error = ipf_state_getent(softc, softs, data);
747 break;
748
749 case SIOCGENITER :
750 {
751 ipftoken_t *token;
752 ipfgeniter_t iter;
753 ipfobj_t obj;
754
755 error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER);
756 if (error != 0)
757 break;
758
759 SPL_SCHED(s);
760 token = ipf_token_find(softc, IPFGENITER_STATE, uid, ctx);
761 if (token != NULL) {
762 error = ipf_state_iter(softc, token, &iter, &obj);
763 WRITE_ENTER(&softc->ipf_tokens);
764 ipf_token_deref(softc, token);
765 RWLOCK_EXIT(&softc->ipf_tokens);
766 } else {
767 IPFERROR(100018);
768 error = ESRCH;
769 }
770 SPL_X(s);
771 break;
772 }
773
774 case SIOCGTABL :
775 error = ipf_state_gettable(softc, softs, data);
776 break;
777
778 case SIOCIPFDELTOK :
779 error = BCOPYIN(data, &arg, sizeof(arg));
780 if (error != 0) {
781 IPFERROR(100019);
782 error = EFAULT;
783 } else {
784 SPL_SCHED(s);
785 error = ipf_token_del(softc, arg, uid, ctx);
786 SPL_X(s);
787 }
788 break;
789
790 case SIOCGTQTAB :
791 error = ipf_outobj(softc, data, softs->ipf_state_tcptq,
792 IPFOBJ_STATETQTAB);
793 break;
794
795 default :
796 IPFERROR(100020);
797 error = EINVAL;
798 break;
799 }
800 return (error);
801 }
802
803
804 /* ------------------------------------------------------------------------ */
805 /* Function: ipf_state_getent */
806 /* Returns: int - 0 == success, != 0 == failure */
807 /* Parameters: softc(I) - pointer to soft context main structure */
808 /* softs(I) - pointer to state context structure */
809 /* data(I) - pointer to state structure to retrieve from table*/
810 /* */
811 /* Copy out state information from the kernel to a user space process. If */
812 /* there is a filter rule associated with the state entry, copy that out */
813 /* as well. The entry to copy out is taken from the value of "ips_next" in */
814 /* the struct passed in and if not null and not found in the list of current*/
815 /* state entries, the retrieval fails. */
816 /* ------------------------------------------------------------------------ */
817 static int
ipf_state_getent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,caddr_t data)818 ipf_state_getent(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
819 caddr_t data)
820 {
821 ipstate_t *is, *isn;
822 ipstate_save_t ips;
823 int error;
824
825 error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
826 if (error)
827 return (EFAULT);
828
829 READ_ENTER(&softc->ipf_state);
830 isn = ips.ips_next;
831 if (isn == NULL) {
832 isn = softs->ipf_state_list;
833 if (isn == NULL) {
834 if (ips.ips_next == NULL) {
835 RWLOCK_EXIT(&softc->ipf_state);
836 IPFERROR(100021);
837 return (ENOENT);
838 }
839 return (0);
840 }
841 } else {
842 /*
843 * Make sure the pointer we're copying from exists in the
844 * current list of entries. Security precaution to prevent
845 * copying of random kernel data.
846 */
847 for (is = softs->ipf_state_list; is; is = is->is_next)
848 if (is == isn)
849 break;
850 if (!is) {
851 RWLOCK_EXIT(&softc->ipf_state);
852 IPFERROR(100022);
853 return (ESRCH);
854 }
855 }
856 ips.ips_next = isn->is_next;
857 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
858 ips.ips_rule = isn->is_rule;
859 if (isn->is_rule != NULL)
860 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
861 sizeof(ips.ips_fr));
862 RWLOCK_EXIT(&softc->ipf_state);
863 error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
864 return (error);
865 }
866
867
868 /* ------------------------------------------------------------------------ */
869 /* Function: ipf_state_putent */
870 /* Returns: int - 0 == success, != 0 == failure */
871 /* Parameters: softc(I) - pointer to soft context main structure */
872 /* softs(I) - pointer to state context structure */
873 /* data(I) - pointer to state information struct */
874 /* */
875 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */
876 /* the state table. If the state info. includes a pointer to a filter rule */
877 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
878 /* output. */
879 /* ------------------------------------------------------------------------ */
880 int
ipf_state_putent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,caddr_t data)881 ipf_state_putent(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
882 caddr_t data)
883 {
884 ipstate_t *is, *isn;
885 ipstate_save_t ips;
886 int error, i;
887 frentry_t *fr;
888 char *name;
889
890 error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
891 if (error != 0)
892 return (error);
893
894 KMALLOC(isn, ipstate_t *);
895 if (isn == NULL) {
896 IPFERROR(100023);
897 return (ENOMEM);
898 }
899
900 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
901 bzero((char *)isn, offsetof(struct ipstate, is_pkts));
902 isn->is_sti.tqe_pnext = NULL;
903 isn->is_sti.tqe_next = NULL;
904 isn->is_sti.tqe_ifq = NULL;
905 isn->is_sti.tqe_parent = isn;
906 isn->is_ifp[0] = NULL;
907 isn->is_ifp[1] = NULL;
908 isn->is_ifp[2] = NULL;
909 isn->is_ifp[3] = NULL;
910 isn->is_sync = NULL;
911 fr = ips.ips_rule;
912
913 if (fr == NULL) {
914 int inserr;
915
916 READ_ENTER(&softc->ipf_state);
917 inserr = ipf_state_insert(softc, isn, 0);
918 MUTEX_EXIT(&isn->is_lock);
919 RWLOCK_EXIT(&softc->ipf_state);
920
921 return (inserr);
922 }
923
924 if (isn->is_flags & SI_NEWFR) {
925 KMALLOC(fr, frentry_t *);
926 if (fr == NULL) {
927 KFREE(isn);
928 IPFERROR(100024);
929 return (ENOMEM);
930 }
931 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
932 isn->is_rule = fr;
933 ips.ips_is.is_rule = fr;
934 MUTEX_NUKE(&fr->fr_lock);
935 MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
936
937 /*
938 * Look up all the interface names in the rule.
939 */
940 for (i = 0; i < FR_NUM(fr->fr_ifnames); i++) {
941 if (fr->fr_ifnames[i] == -1) {
942 fr->fr_ifas[i] = NULL;
943 continue;
944 }
945 name = FR_NAME(fr, fr_ifnames[i]);
946 fr->fr_ifas[i] = ipf_resolvenic(softc, name,
947 fr->fr_family);
948 }
949
950 for (i = 0; i < FR_NUM(isn->is_ifname); i++) {
951 name = isn->is_ifname[i];
952 isn->is_ifp[i] = ipf_resolvenic(softc, name,
953 isn->is_v);
954 }
955
956 fr->fr_ref = 0;
957 fr->fr_dsize = 0;
958 fr->fr_data = NULL;
959 fr->fr_type = FR_T_NONE;
960
961 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[0],
962 fr->fr_family);
963 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[1],
964 fr->fr_family);
965 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_dif,
966 fr->fr_family);
967
968 /*
969 * send a copy back to userland of what we ended up
970 * to allow for verification.
971 */
972 error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
973 if (error != 0) {
974 KFREE(isn);
975 MUTEX_DESTROY(&fr->fr_lock);
976 KFREE(fr);
977 IPFERROR(100025);
978 return (EFAULT);
979 }
980 READ_ENTER(&softc->ipf_state);
981 error = ipf_state_insert(softc, isn, 0);
982 MUTEX_EXIT(&isn->is_lock);
983 RWLOCK_EXIT(&softc->ipf_state);
984
985 } else {
986 READ_ENTER(&softc->ipf_state);
987 for (is = softs->ipf_state_list; is; is = is->is_next)
988 if (is->is_rule == fr) {
989 error = ipf_state_insert(softc, isn, 0);
990 MUTEX_EXIT(&isn->is_lock);
991 break;
992 }
993
994 if (is == NULL) {
995 KFREE(isn);
996 isn = NULL;
997 }
998 RWLOCK_EXIT(&softc->ipf_state);
999
1000 if (isn == NULL) {
1001 IPFERROR(100033);
1002 error = ESRCH;
1003 }
1004 }
1005
1006 return (error);
1007 }
1008
1009
1010 /* ------------------------------------------------------------------------ */
1011 /* Function: ipf_state_insert */
1012 /* Returns: int - 0 == success, -1 == failure */
1013 /* Parameters: softc(I) - pointer to soft context main structure */
1014 /* Parameters: is(I) - pointer to state structure */
1015 /* rev(I) - flag indicating direction of packet */
1016 /* */
1017 /* Inserts a state structure into the hash table (for lookups) and the list */
1018 /* of state entries (for enumeration). Resolves all of the interface names */
1019 /* to pointers and adjusts running stats for the hash table as appropriate. */
1020 /* */
1021 /* This function can fail if the filter rule has had a population policy of */
1022 /* IP addresses used with stateful filtering assigned to it. */
1023 /* */
1024 /* Locking: it is assumed that some kind of lock on ipf_state is held. */
1025 /* Exits with is_lock initialised and held - *EVEN IF ERROR*. */
1026 /* ------------------------------------------------------------------------ */
1027 int
ipf_state_insert(ipf_main_softc_t * softc,ipstate_t * is,int rev)1028 ipf_state_insert(ipf_main_softc_t *softc, ipstate_t *is, int rev)
1029 {
1030 ipf_state_softc_t *softs = softc->ipf_state_soft;
1031 frentry_t *fr;
1032 u_int hv;
1033 int i;
1034
1035 /*
1036 * Look up all the interface names in the state entry.
1037 */
1038 for (i = 0; i < FR_NUM(is->is_ifp); i++) {
1039 if (is->is_ifp[i] != NULL)
1040 continue;
1041 is->is_ifp[i] = ipf_resolvenic(softc, is->is_ifname[i],
1042 is->is_v);
1043 }
1044
1045 /*
1046 * If we could trust is_hv, then the modulus would not be needed,
1047 * but when running with IPFILTER_SYNC, this stops bad values.
1048 */
1049 hv = is->is_hv % softs->ipf_state_size;
1050 /* TRACE is, hv */
1051 is->is_hv = hv;
1052
1053 /*
1054 * We need to get both of these locks...the first because it is
1055 * possible that once the insert is complete another packet might
1056 * come along, match the entry and want to update it.
1057 */
1058 MUTEX_INIT(&is->is_lock, "ipf state entry");
1059 MUTEX_ENTER(&is->is_lock);
1060 MUTEX_ENTER(&softs->ipf_stinsert);
1061
1062 fr = is->is_rule;
1063 if (fr != NULL) {
1064 if ((fr->fr_srctrack.ht_max_nodes != 0) &&
1065 (ipf_ht_node_add(softc, &fr->fr_srctrack,
1066 is->is_family, &is->is_src) == -1)) {
1067 SBUMPD(ipf_state_stats, iss_max_track);
1068 MUTEX_EXIT(&softs->ipf_stinsert);
1069 return (-1);
1070 }
1071
1072 MUTEX_ENTER(&fr->fr_lock);
1073 fr->fr_ref++;
1074 MUTEX_EXIT(&fr->fr_lock);
1075 fr->fr_statecnt++;
1076 }
1077
1078 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
1079 DT(iss_wild_plus_one);
1080 SINCL(ipf_state_stats.iss_wild);
1081 }
1082
1083 SBUMP(ipf_state_stats.iss_proto[is->is_p]);
1084 SBUMP(ipf_state_stats.iss_active_proto[is->is_p]);
1085
1086 /*
1087 * add into list table.
1088 */
1089 if (softs->ipf_state_list != NULL)
1090 softs->ipf_state_list->is_pnext = &is->is_next;
1091 is->is_pnext = &softs->ipf_state_list;
1092 is->is_next = softs->ipf_state_list;
1093 softs->ipf_state_list = is;
1094
1095 if (softs->ipf_state_table[hv] != NULL)
1096 softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
1097 else
1098 softs->ipf_state_stats.iss_inuse++;
1099 is->is_phnext = softs->ipf_state_table + hv;
1100 is->is_hnext = softs->ipf_state_table[hv];
1101 softs->ipf_state_table[hv] = is;
1102 softs->ipf_state_stats.iss_bucketlen[hv]++;
1103 softs->ipf_state_stats.iss_active++;
1104 MUTEX_EXIT(&softs->ipf_stinsert);
1105
1106 ipf_state_setqueue(softc, is, rev);
1107
1108 return (0);
1109 }
1110
1111
1112 /* ------------------------------------------------------------------------ */
1113 /* Function: ipf_state_matchipv4addrs */
1114 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
1115 /* 0 no match */
1116 /* Parameters: is1, is2 pointers to states we are checking */
1117 /* */
1118 /* Function matches IPv4 addresses it returns strong match for ICMP proto */
1119 /* even there is only reverse match */
1120 /* ------------------------------------------------------------------------ */
1121 static int
ipf_state_matchipv4addrs(ipstate_t * is1,ipstate_t * is2)1122 ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2)
1123 {
1124 int rv;
1125
1126 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr)
1127 rv = 2;
1128 else if (is1->is_saddr == is2->is_daddr &&
1129 is1->is_daddr == is2->is_saddr) {
1130 /* force strong match for ICMP protocol */
1131 rv = (is1->is_p == IPPROTO_ICMP) ? 2 : 1;
1132 }
1133 else
1134 rv = 0;
1135
1136 return (rv);
1137 }
1138
1139
1140 /* ------------------------------------------------------------------------ */
1141 /* Function: ipf_state_matchipv6addrs */
1142 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
1143 /* 0 no match */
1144 /* Parameters: is1, is2 pointers to states we are checking */
1145 /* */
1146 /* Function matches IPv6 addresses it returns strong match for ICMP proto */
1147 /* even there is only reverse match */
1148 /* ------------------------------------------------------------------------ */
1149 static int
ipf_state_matchipv6addrs(ipstate_t * is1,ipstate_t * is2)1150 ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2)
1151 {
1152 int rv;
1153
1154 if (IP6_EQ(&is1->is_src, &is2->is_src) &&
1155 IP6_EQ(&is1->is_dst, &is2->is_dst))
1156 rv = 2;
1157 else if (IP6_EQ(&is1->is_src, &is2->is_dst) &&
1158 IP6_EQ(&is1->is_dst, &is2->is_src)) {
1159 /* force strong match for ICMPv6 protocol */
1160 rv = (is1->is_p == IPPROTO_ICMPV6) ? 2 : 1;
1161 }
1162 else
1163 rv = 0;
1164
1165 return (rv);
1166 }
1167
1168
1169 /* ------------------------------------------------------------------------ */
1170 /* Function: ipf_state_matchaddresses */
1171 /* Returns: int - 2 addresses match, 1 reverse match, zero no match */
1172 /* Parameters: is1, is2 pointers to states we are checking */
1173 /* */
1174 /* function retruns true if two pairs of addresses belong to single */
1175 /* connection. suppose there are two endpoints: */
1176 /* endpoint1 1.1.1.1 */
1177 /* endpoint2 1.1.1.2 */
1178 /* */
1179 /* the state is established by packet flying from .1 to .2 so we see: */
1180 /* is1->src = 1.1.1.1 */
1181 /* is1->dst = 1.1.1.2 */
1182 /* now endpoint 1.1.1.2 sends answer */
1183 /* retreives is1 record created by first packat and compares it with is2 */
1184 /* temporal record, is2 is initialized as follows: */
1185 /* is2->src = 1.1.1.2 */
1186 /* is2->dst = 1.1.1.1 */
1187 /* in this case 1 will be returned */
1188 /* */
1189 /* the ipf_matchaddresses() assumes those two records to be same. of course */
1190 /* the ipf_matchaddresses() also assume records are same in case you pass */
1191 /* identical arguments (i.e. ipf_matchaddress(is1, is1) would return 2 */
1192 /* ------------------------------------------------------------------------ */
1193 static int
ipf_state_matchaddresses(ipstate_t * is1,ipstate_t * is2)1194 ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2)
1195 {
1196 int rv;
1197
1198 if (is1->is_v == 4) {
1199 rv = ipf_state_matchipv4addrs(is1, is2);
1200 }
1201 else {
1202 rv = ipf_state_matchipv6addrs(is1, is2);
1203 }
1204
1205 return (rv);
1206 }
1207
1208
1209 /* ------------------------------------------------------------------------ */
1210 /* Function: ipf_matchports */
1211 /* Returns: int - 2 match, 1 rverse match, 0 no match */
1212 /* Parameters: ppairs1, ppairs - src, dst ports we want to match */
1213 /* */
1214 /* performs the same match for isps members as for addresses */
1215 /* ------------------------------------------------------------------------ */
1216 static int
ipf_state_matchports(udpinfo_t * ppairs1,udpinfo_t * ppairs2)1217 ipf_state_matchports(udpinfo_t *ppairs1, udpinfo_t *ppairs2)
1218 {
1219 int rv;
1220
1221 if (ppairs1->us_sport == ppairs2->us_sport &&
1222 ppairs1->us_dport == ppairs2->us_dport)
1223 rv = 2;
1224 else if (ppairs1->us_sport == ppairs2->us_dport &&
1225 ppairs1->us_dport == ppairs2->us_sport)
1226 rv = 1;
1227 else
1228 rv = 0;
1229
1230 return (rv);
1231 }
1232
1233
1234 /* ------------------------------------------------------------------------ */
1235 /* Function: ipf_matchisps */
1236 /* Returns: int - nonzero if isps members match, 0 nomatch */
1237 /* Parameters: is1, is2 - states we want to match */
1238 /* */
1239 /* performs the same match for isps members as for addresses */
1240 /* ------------------------------------------------------------------------ */
1241 static int
ipf_state_matchisps(ipstate_t * is1,ipstate_t * is2)1242 ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2)
1243 {
1244 int rv;
1245
1246 if (is1->is_p == is2->is_p) {
1247 switch (is1->is_p)
1248 {
1249 case IPPROTO_TCP :
1250 case IPPROTO_UDP :
1251 case IPPROTO_GRE :
1252 /* greinfo_t can be also interpreted as port pair */
1253 rv = ipf_state_matchports(&is1->is_ps.is_us,
1254 &is2->is_ps.is_us);
1255 break;
1256
1257 case IPPROTO_ICMP :
1258 case IPPROTO_ICMPV6 :
1259 /* force strong match for ICMP datagram. */
1260 if (bcmp(&is1->is_ps, &is2->is_ps,
1261 sizeof(icmpinfo_t)) == 0) {
1262 rv = 2;
1263 } else {
1264 rv = 0;
1265 }
1266 break;
1267
1268 default:
1269 rv = 0;
1270 }
1271 } else {
1272 rv = 0;
1273 }
1274
1275 return (rv);
1276 }
1277
1278
1279 /* ------------------------------------------------------------------------ */
1280 /* Function: ipf_state_match */
1281 /* Returns: int - nonzero match, zero no match */
1282 /* Parameters: is1, is2 - states we want to match */
1283 /* */
1284 /* ------------------------------------------------------------------------ */
1285 static int
ipf_state_match(ipstate_t * is1,ipstate_t * is2)1286 ipf_state_match(ipstate_t *is1, ipstate_t *is2)
1287 {
1288 int rv;
1289 int amatch;
1290 int pomatch;
1291
1292 if (bcmp(&is1->is_pass, &is2->is_pass,
1293 offsetof(struct ipstate, is_authmsk) -
1294 offsetof(struct ipstate, is_pass)) == 0) {
1295
1296 pomatch = ipf_state_matchisps(is1, is2);
1297 amatch = ipf_state_matchaddresses(is1, is2);
1298 rv = (amatch != 0) && (amatch == pomatch);
1299 } else {
1300 rv = 0;
1301 }
1302
1303 return (rv);
1304 }
1305
1306 /* ------------------------------------------------------------------------ */
1307 /* Function: ipf_state_add */
1308 /* Returns: ipstate_t - 0 = success */
1309 /* Parameters: softc(I) - pointer to soft context main structure */
1310 /* fin(I) - pointer to packet information */
1311 /* stsave(O) - pointer to place to save pointer to created */
1312 /* state structure. */
1313 /* flags(I) - flags to use when creating the structure */
1314 /* */
1315 /* Creates a new IP state structure from the packet information collected. */
1316 /* Inserts it into the state table and appends to the bottom of the active */
1317 /* list. If the capacity of the table has reached the maximum allowed then */
1318 /* the call will fail and a flush is scheduled for the next timeout call. */
1319 /* */
1320 /* NOTE: The use of stsave to point to nat_state will result in memory */
1321 /* corruption. It should only be used to point to objects that will */
1322 /* either outlive this (not expired) or will deref the ip_state_t */
1323 /* when they are deleted. */
1324 /* ------------------------------------------------------------------------ */
1325 int
ipf_state_add(ipf_main_softc_t * softc,fr_info_t * fin,ipstate_t ** stsave,u_int flags)1326 ipf_state_add(ipf_main_softc_t *softc, fr_info_t *fin, ipstate_t **stsave,
1327 u_int flags)
1328 {
1329 ipf_state_softc_t *softs = softc->ipf_state_soft;
1330 ipstate_t *is, ips;
1331 struct icmp *ic;
1332 u_int pass, hv;
1333 frentry_t *fr;
1334 tcphdr_t *tcp;
1335 frdest_t *fdp;
1336 int out;
1337
1338 /*
1339 * If a locally created packet is trying to egress but it
1340 * does not match because of this lock, it is likely that
1341 * the policy will block it and return network unreachable further
1342 * up the stack. To mitigate this error, EAGAIN is returned instead,
1343 * telling the IP stack to try sending this packet again later.
1344 */
1345 if (softs->ipf_state_lock) {
1346 SBUMPD(ipf_state_stats, iss_add_locked);
1347 fin->fin_error = EAGAIN;
1348 return (-1);
1349 }
1350
1351 if (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)) {
1352 SBUMPD(ipf_state_stats, iss_add_bad);
1353 return (-1);
1354 }
1355
1356 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) {
1357 SBUMPD(ipf_state_stats, iss_add_oow);
1358 return (-1);
1359 }
1360
1361 if ((softs->ipf_state_stats.iss_active * 100 / softs->ipf_state_max) >
1362 softs->ipf_state_wm_high) {
1363 softs->ipf_state_doflush = 1;
1364 }
1365
1366 /*
1367 * If a "keep state" rule has reached the maximum number of references
1368 * to it, then schedule an automatic flush in case we can clear out
1369 * some "dead old wood". Note that because the lock isn't held on
1370 * fr it is possible that we could overflow. The cost of overflowing
1371 * is being ignored here as the number by which it can overflow is
1372 * a product of the number of simultaneous threads that could be
1373 * executing in here, so a limit of 100 won't result in 200, but could
1374 * result in 101 or 102.
1375 */
1376 fr = fin->fin_fr;
1377 if (fr != NULL) {
1378 if ((softs->ipf_state_stats.iss_active >=
1379 softs->ipf_state_max) && (fr->fr_statemax == 0)) {
1380 SBUMPD(ipf_state_stats, iss_max);
1381 return (1);
1382 }
1383 if ((fr->fr_statemax != 0) &&
1384 (fr->fr_statecnt >= fr->fr_statemax)) {
1385 SBUMPD(ipf_state_stats, iss_max_ref);
1386 return (2);
1387 }
1388 }
1389
1390 is = &ips;
1391 if (fr == NULL) {
1392 pass = softc->ipf_flags;
1393 is->is_tag = FR_NOLOGTAG;
1394 } else {
1395 pass = fr->fr_flags;
1396 }
1397
1398 ic = NULL;
1399 tcp = NULL;
1400 out = fin->fin_out;
1401 bzero((char *)is, sizeof(*is));
1402 is->is_die = 1 + softc->ipf_ticks;
1403 /*
1404 * We want to check everything that is a property of this packet,
1405 * but we don't (automatically) care about its fragment status as
1406 * this may change.
1407 */
1408 is->is_pass = pass;
1409 is->is_v = fin->fin_v;
1410 is->is_sec = fin->fin_secmsk;
1411 is->is_secmsk = 0xffff;
1412 is->is_auth = fin->fin_auth;
1413 is->is_authmsk = 0xffff;
1414 is->is_family = fin->fin_family;
1415 is->is_opt[0] = fin->fin_optmsk;
1416 is->is_optmsk[0] = 0xffffffff;
1417 if (is->is_v == 6) {
1418 is->is_opt[0] &= ~0x8;
1419 is->is_optmsk[0] &= ~0x8;
1420 }
1421
1422 /*
1423 * Copy and calculate...
1424 */
1425 hv = (is->is_p = fin->fin_fi.fi_p);
1426 is->is_src = fin->fin_fi.fi_src;
1427 hv += is->is_saddr;
1428 is->is_dst = fin->fin_fi.fi_dst;
1429 hv += is->is_daddr;
1430 #ifdef USE_INET6
1431 if (fin->fin_v == 6) {
1432 /*
1433 * For ICMPv6, we check to see if the destination address is
1434 * a multicast address. If it is, do not include it in the
1435 * calculation of the hash because the correct reply will come
1436 * back from a real address, not a multicast address.
1437 */
1438 if ((is->is_p == IPPROTO_ICMPV6) &&
1439 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
1440 /*
1441 * So you can do keep state with neighbour discovery.
1442 *
1443 * Here we could use the address from the neighbour
1444 * solicit message to put in the state structure and
1445 * we could use that without a wildcard flag too...
1446 */
1447 flags |= SI_W_DADDR;
1448 hv -= is->is_daddr;
1449 } else {
1450 hv += is->is_dst.i6[1];
1451 hv += is->is_dst.i6[2];
1452 hv += is->is_dst.i6[3];
1453 }
1454 hv += is->is_src.i6[1];
1455 hv += is->is_src.i6[2];
1456 hv += is->is_src.i6[3];
1457 }
1458 #endif
1459 if ((fin->fin_v == 4) &&
1460 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
1461 flags |= SI_W_DADDR;
1462 hv -= is->is_daddr;
1463 }
1464
1465 switch (is->is_p)
1466 {
1467 #ifdef USE_INET6
1468 case IPPROTO_ICMPV6 :
1469 ic = fin->fin_dp;
1470
1471 switch (ic->icmp_type)
1472 {
1473 case ICMP6_ECHO_REQUEST :
1474 hv += (is->is_icmp.ici_id = ic->icmp_id);
1475 /*FALLTHROUGH*/
1476 case ICMP6_MEMBERSHIP_QUERY :
1477 case ND_ROUTER_SOLICIT :
1478 case ND_NEIGHBOR_SOLICIT :
1479 case ICMP6_NI_QUERY :
1480 is->is_icmp.ici_type = ic->icmp_type;
1481 break;
1482 default :
1483 SBUMPD(ipf_state_stats, iss_icmp6_notquery);
1484 return (-2);
1485 }
1486 break;
1487 #endif
1488 case IPPROTO_ICMP :
1489 ic = fin->fin_dp;
1490
1491 switch (ic->icmp_type)
1492 {
1493 case ICMP_ECHO :
1494 case ICMP_TSTAMP :
1495 case ICMP_IREQ :
1496 case ICMP_MASKREQ :
1497 is->is_icmp.ici_type = ic->icmp_type;
1498 hv += (is->is_icmp.ici_id = ic->icmp_id);
1499 break;
1500 default :
1501 SBUMPD(ipf_state_stats, iss_icmp_notquery);
1502 return (-3);
1503 }
1504 break;
1505
1506 #if 0
1507 case IPPROTO_GRE :
1508 gre = fin->fin_dp;
1509
1510 is->is_gre.gs_flags = gre->gr_flags;
1511 is->is_gre.gs_ptype = gre->gr_ptype;
1512 if (GRE_REV(is->is_gre.gs_flags) == 1) {
1513 is->is_call[0] = fin->fin_data[0];
1514 is->is_call[1] = fin->fin_data[1];
1515 }
1516 break;
1517 #endif
1518
1519 case IPPROTO_TCP :
1520 tcp = fin->fin_dp;
1521
1522 if (tcp_get_flags(tcp) & TH_RST) {
1523 SBUMPD(ipf_state_stats, iss_tcp_rstadd);
1524 return (-4);
1525 }
1526
1527 /* TRACE is, flags, hv */
1528
1529 /*
1530 * The endian of the ports doesn't matter, but the ack and
1531 * sequence numbers do as we do mathematics on them later.
1532 */
1533 is->is_sport = htons(fin->fin_data[0]);
1534 is->is_dport = htons(fin->fin_data[1]);
1535 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1536 hv += is->is_sport;
1537 hv += is->is_dport;
1538 }
1539
1540 /* TRACE is, flags, hv */
1541
1542 /*
1543 * If this is a real packet then initialise fields in the
1544 * state information structure from the TCP header information.
1545 */
1546
1547 is->is_maxdwin = 1;
1548 is->is_maxswin = ntohs(tcp->th_win);
1549 if (is->is_maxswin == 0)
1550 is->is_maxswin = 1;
1551
1552 if ((fin->fin_flx & FI_IGNORE) == 0) {
1553 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
1554 (TCP_OFF(tcp) << 2) +
1555 ((tcp_get_flags(tcp) & TH_SYN) ? 1 : 0) +
1556 ((tcp_get_flags(tcp) & TH_FIN) ? 1 : 0);
1557 is->is_maxsend = is->is_send;
1558
1559 /*
1560 * Window scale option is only present in
1561 * SYN/SYN-ACK packet.
1562 */
1563 if ((tcp_get_flags(tcp) & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
1564 TH_SYN &&
1565 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
1566 if (ipf_tcpoptions(softs, fin, tcp,
1567 &is->is_tcp.ts_data[0]) == -1) {
1568 fin->fin_flx |= FI_BAD;
1569 DT1(ipf_fi_bad_tcpoptions_th_fin_ack_ecnall, fr_info_t *, fin);
1570 }
1571 }
1572
1573 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
1574 ipf_checknewisn(fin, is);
1575 ipf_fixoutisn(fin, is);
1576 }
1577
1578 if ((tcp_get_flags(tcp) & TH_OPENING) == TH_SYN)
1579 flags |= IS_TCPFSM;
1580 else {
1581 is->is_maxdwin = is->is_maxswin * 2;
1582 is->is_dend = ntohl(tcp->th_ack);
1583 is->is_maxdend = ntohl(tcp->th_ack);
1584 is->is_maxdwin *= 2;
1585 }
1586 }
1587
1588 /*
1589 * If we're creating state for a starting connection, start
1590 * the timer on it as we'll never see an error if it fails
1591 * to connect.
1592 */
1593 break;
1594
1595 case IPPROTO_UDP :
1596 tcp = fin->fin_dp;
1597
1598 is->is_sport = htons(fin->fin_data[0]);
1599 is->is_dport = htons(fin->fin_data[1]);
1600 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1601 hv += tcp->th_dport;
1602 hv += tcp->th_sport;
1603 }
1604 break;
1605
1606 default :
1607 break;
1608 }
1609 hv = DOUBLE_HASH(hv);
1610 is->is_hv = hv;
1611
1612 /*
1613 * Look for identical state.
1614 */
1615 for (is = softs->ipf_state_table[hv % softs->ipf_state_size];
1616 is != NULL; is = is->is_hnext) {
1617 if (ipf_state_match(&ips, is) == 1)
1618 break;
1619 }
1620 if (is != NULL) {
1621 SBUMPD(ipf_state_stats, iss_add_dup);
1622 return (3);
1623 }
1624
1625 if (softs->ipf_state_stats.iss_bucketlen[hv] >=
1626 softs->ipf_state_maxbucket) {
1627 SBUMPD(ipf_state_stats, iss_bucket_full);
1628 return (4);
1629 }
1630
1631 /*
1632 * No existing state; create new
1633 */
1634 KMALLOC(is, ipstate_t *);
1635 if (is == NULL) {
1636 SBUMPD(ipf_state_stats, iss_nomem);
1637 return (5);
1638 }
1639 bcopy((char *)&ips, (char *)is, sizeof(*is));
1640 is->is_flags = flags & IS_INHERITED;
1641 is->is_rulen = fin->fin_rule;
1642 is->is_rule = fr;
1643
1644 /*
1645 * Do not do the modulus here, it is done in ipf_state_insert().
1646 */
1647 if (fr != NULL) {
1648 ipftq_t *tq;
1649
1650 (void) strncpy(is->is_group, FR_NAME(fr, fr_group),
1651 FR_GROUPLEN);
1652 if (fr->fr_age[0] != 0) {
1653 tq = ipf_addtimeoutqueue(softc,
1654 &softs->ipf_state_usertq,
1655 fr->fr_age[0]);
1656 is->is_tqehead[0] = tq;
1657 is->is_sti.tqe_flags |= TQE_RULEBASED;
1658 }
1659 if (fr->fr_age[1] != 0) {
1660 tq = ipf_addtimeoutqueue(softc,
1661 &softs->ipf_state_usertq,
1662 fr->fr_age[1]);
1663 is->is_tqehead[1] = tq;
1664 is->is_sti.tqe_flags |= TQE_RULEBASED;
1665 }
1666
1667 is->is_tag = fr->fr_logtag;
1668 }
1669
1670 /*
1671 * It may seem strange to set is_ref to 2, but if stsave is not NULL
1672 * then a copy of the pointer is being stored somewhere else and in
1673 * the end, it will expect to be able to do something with it.
1674 */
1675 is->is_me = stsave;
1676 if (stsave != NULL) {
1677 *stsave = is;
1678 is->is_ref = 2;
1679 } else {
1680 is->is_ref = 1;
1681 }
1682 is->is_pkts[0] = 0, is->is_bytes[0] = 0;
1683 is->is_pkts[1] = 0, is->is_bytes[1] = 0;
1684 is->is_pkts[2] = 0, is->is_bytes[2] = 0;
1685 is->is_pkts[3] = 0, is->is_bytes[3] = 0;
1686 if ((fin->fin_flx & FI_IGNORE) == 0) {
1687 is->is_pkts[out] = 1;
1688 fin->fin_pktnum = 1;
1689 is->is_bytes[out] = fin->fin_plen;
1690 is->is_flx[out][0] = fin->fin_flx & FI_CMP;
1691 is->is_flx[out][0] &= ~FI_OOW;
1692 }
1693
1694 if (pass & FR_STLOOSE)
1695 is->is_flags |= IS_LOOSE;
1696
1697 if (pass & FR_STSTRICT)
1698 is->is_flags |= IS_STRICT;
1699
1700 if (pass & FR_STATESYNC)
1701 is->is_flags |= IS_STATESYNC;
1702
1703 if (pass & FR_LOGFIRST)
1704 is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
1705
1706 READ_ENTER(&softc->ipf_state);
1707
1708 if (ipf_state_insert(softc, is, fin->fin_rev) == -1) {
1709 RWLOCK_EXIT(&softc->ipf_state);
1710 /*
1711 * This is a bit more manual than it should be but
1712 * ipf_state_del cannot be called.
1713 */
1714 MUTEX_EXIT(&is->is_lock);
1715 MUTEX_DESTROY(&is->is_lock);
1716 if (is->is_tqehead[0] != NULL) {
1717 if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
1718 ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
1719 is->is_tqehead[0] = NULL;
1720 }
1721 if (is->is_tqehead[1] != NULL) {
1722 if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
1723 ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
1724 is->is_tqehead[1] = NULL;
1725 }
1726 KFREE(is);
1727 return (-1);
1728 }
1729
1730 /*
1731 * Filling in the interface name is after the insert so that an
1732 * event (such as add/delete) of an interface that is referenced
1733 * by this rule will see this state entry.
1734 */
1735 if (fr != NULL) {
1736 /*
1737 * The name '-' is special for network interfaces and causes
1738 * a NULL name to be present, always, allowing packets to
1739 * match it, regardless of their interface.
1740 */
1741 if ((fin->fin_ifp == NULL) ||
1742 (fr->fr_ifnames[out << 1] != -1 &&
1743 fr->fr_names[fr->fr_ifnames[out << 1] + 0] == '-' &&
1744 fr->fr_names[fr->fr_ifnames[out << 1] + 1] == '\0')) {
1745 is->is_ifp[out << 1] = fr->fr_ifas[0];
1746 strncpy(is->is_ifname[out << 1],
1747 FR_NAME(fr, fr_ifnames[0]),
1748 sizeof(fr->fr_ifnames[0]));
1749 } else {
1750 is->is_ifp[out << 1] = fin->fin_ifp;
1751 COPYIFNAME(fin->fin_v, fin->fin_ifp,
1752 is->is_ifname[out << 1]);
1753 }
1754
1755 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
1756 if (fr->fr_ifnames[1] != -1) {
1757 strncpy(is->is_ifname[(out << 1) + 1],
1758 FR_NAME(fr, fr_ifnames[1]),
1759 sizeof(fr->fr_ifnames[1]));
1760 }
1761
1762 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
1763 if (fr->fr_ifnames[2] != -1) {
1764 strncpy(is->is_ifname[((1 - out) << 1)],
1765 FR_NAME(fr, fr_ifnames[2]),
1766 sizeof(fr->fr_ifnames[2]));
1767 }
1768
1769 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
1770 if (fr->fr_ifnames[3] != -1) {
1771 strncpy(is->is_ifname[((1 - out) << 1) + 1],
1772 FR_NAME(fr, fr_ifnames[3]),
1773 sizeof(fr->fr_ifnames[3]));
1774 }
1775 } else {
1776 if (fin->fin_ifp != NULL) {
1777 is->is_ifp[out << 1] = fin->fin_ifp;
1778 COPYIFNAME(fin->fin_v, fin->fin_ifp,
1779 is->is_ifname[out << 1]);
1780 }
1781 }
1782
1783 if (fin->fin_p == IPPROTO_TCP) {
1784 /*
1785 * If we're creating state for a starting connection, start the
1786 * timer on it as we'll never see an error if it fails to
1787 * connect.
1788 */
1789 (void) ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
1790 is->is_flags, 2);
1791 }
1792 MUTEX_EXIT(&is->is_lock);
1793 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0))
1794 is->is_sync = ipf_sync_new(softc, SMC_STATE, fin, is);
1795 if (softs->ipf_state_logging)
1796 ipf_state_log(softc, is, ISL_NEW);
1797
1798 RWLOCK_EXIT(&softc->ipf_state);
1799
1800 fin->fin_flx |= FI_STATE;
1801 if (fin->fin_flx & FI_FRAG)
1802 (void) ipf_frag_new(softc, fin, pass);
1803
1804 fdp = &fr->fr_tifs[0];
1805 if (fdp->fd_type == FRD_DSTLIST) {
1806 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1807 &is->is_tifs[0]);
1808 } else {
1809 bcopy(fdp, &is->is_tifs[0], sizeof(*fdp));
1810 }
1811
1812 fdp = &fr->fr_tifs[1];
1813 if (fdp->fd_type == FRD_DSTLIST) {
1814 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1815 &is->is_tifs[1]);
1816 } else {
1817 bcopy(fdp, &is->is_tifs[1], sizeof(*fdp));
1818 }
1819 fin->fin_tif = &is->is_tifs[fin->fin_rev];
1820
1821 fdp = &fr->fr_dif;
1822 if (fdp->fd_type == FRD_DSTLIST) {
1823 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1824 &is->is_dif);
1825 } else {
1826 bcopy(fdp, &is->is_dif, sizeof(*fdp));
1827 }
1828 fin->fin_dif = &is->is_dif;
1829
1830 return (0);
1831 }
1832
1833
1834 /* ------------------------------------------------------------------------ */
1835 /* Function: ipf_tcpoptions */
1836 /* Returns: int - 1 == packet matches state entry, 0 == it does not, */
1837 /* -1 == packet has bad TCP options data */
1838 /* Parameters: softs(I) - pointer to state context structure */
1839 /* fin(I) - pointer to packet information */
1840 /* tcp(I) - pointer to TCP packet header */
1841 /* td(I) - pointer to TCP data held as part of the state */
1842 /* */
1843 /* Look after the TCP header for any options and deal with those that are */
1844 /* present. Record details about those that we recogise. */
1845 /* ------------------------------------------------------------------------ */
1846 static int
ipf_tcpoptions(ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,tcpdata_t * td)1847 ipf_tcpoptions(ipf_state_softc_t *softs, fr_info_t *fin, tcphdr_t *tcp,
1848 tcpdata_t *td)
1849 {
1850 int off, mlen, ol, i, len, retval;
1851 char buf[64], *s, opt;
1852 mb_t *m = NULL;
1853
1854 len = (TCP_OFF(tcp) << 2);
1855 if (fin->fin_dlen < len) {
1856 SBUMPD(ipf_state_stats, iss_tcp_toosmall);
1857 return (0);
1858 }
1859 len -= sizeof(*tcp);
1860
1861 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff;
1862
1863 m = fin->fin_m;
1864 mlen = MSGDSIZE(m) - off;
1865 if (len > mlen) {
1866 len = mlen;
1867 retval = 0;
1868 } else {
1869 retval = 1;
1870 }
1871
1872 COPYDATA(m, off, len, buf);
1873
1874 for (s = buf; len > 0; ) {
1875 opt = *s;
1876 if (opt == TCPOPT_EOL)
1877 break;
1878 else if (opt == TCPOPT_NOP)
1879 ol = 1;
1880 else {
1881 if (len < 2)
1882 break;
1883 ol = (int)*(s + 1);
1884 if (ol < 2 || ol > len)
1885 break;
1886
1887 /*
1888 * Extract the TCP options we are interested in out of
1889 * the header and store them in the tcpdata struct.
1890 */
1891 switch (opt)
1892 {
1893 case TCPOPT_WINDOW :
1894 if (ol == TCPOLEN_WINDOW) {
1895 i = (int)*(s + 2);
1896 if (i > TCP_WSCALE_MAX)
1897 i = TCP_WSCALE_MAX;
1898 else if (i < 0)
1899 i = 0;
1900 td->td_winscale = i;
1901 td->td_winflags |= TCP_WSCALE_SEEN|
1902 TCP_WSCALE_FIRST;
1903 } else
1904 retval = -1;
1905 break;
1906 case TCPOPT_MAXSEG :
1907 /*
1908 * So, if we wanted to set the TCP MAXSEG,
1909 * it should be done here...
1910 */
1911 if (ol == TCPOLEN_MAXSEG) {
1912 i = (int)*(s + 2);
1913 i <<= 8;
1914 i += (int)*(s + 3);
1915 td->td_maxseg = i;
1916 } else
1917 retval = -1;
1918 break;
1919 case TCPOPT_SACK_PERMITTED :
1920 if (ol == TCPOLEN_SACK_PERMITTED)
1921 td->td_winflags |= TCP_SACK_PERMIT;
1922 else
1923 retval = -1;
1924 break;
1925 }
1926 }
1927 len -= ol;
1928 s += ol;
1929 }
1930 if (retval == -1) {
1931 SBUMPD(ipf_state_stats, iss_tcp_badopt);
1932 }
1933 return (retval);
1934 }
1935
1936
1937 /* ------------------------------------------------------------------------ */
1938 /* Function: ipf_state_tcp */
1939 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1940 /* Parameters: softc(I) - pointer to soft context main structure */
1941 /* softs(I) - pointer to state context structure */
1942 /* fin(I) - pointer to packet information */
1943 /* tcp(I) - pointer to TCP packet header */
1944 /* is(I) - pointer to master state structure */
1945 /* */
1946 /* Check to see if a packet with TCP headers fits within the TCP window. */
1947 /* Change timeout depending on whether new packet is a SYN-ACK returning */
1948 /* for a SYN or a RST or FIN which indicate time to close up shop. */
1949 /* ------------------------------------------------------------------------ */
1950 static int
ipf_state_tcp(ipf_main_softc_t * softc,ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)1951 ipf_state_tcp(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
1952 fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
1953 {
1954 tcpdata_t *fdata, *tdata;
1955 int source, ret, flags;
1956
1957 source = !fin->fin_rev;
1958 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
1959 (ntohs(is->is_sport) != fin->fin_data[0]))
1960 source = 0;
1961 fdata = &is->is_tcp.ts_data[!source];
1962 tdata = &is->is_tcp.ts_data[source];
1963
1964 MUTEX_ENTER(&is->is_lock);
1965
1966 /*
1967 * If a SYN packet is received for a connection that is on the way out
1968 * but hasn't yet departed then advance this session along the way.
1969 */
1970 if ((tcp_get_flags(tcp) & TH_OPENING) == TH_SYN) {
1971 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
1972 (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
1973 is->is_state[!source] = IPF_TCPS_CLOSED;
1974 ipf_movequeue(softc->ipf_ticks, &is->is_sti,
1975 is->is_sti.tqe_ifq,
1976 &softs->ipf_state_deletetq);
1977 MUTEX_EXIT(&is->is_lock);
1978 DT1(iss_tcp_closing, ipstate_t *, is);
1979 SBUMP(ipf_state_stats.iss_tcp_closing);
1980 return (0);
1981 }
1982 }
1983
1984 if (is->is_flags & IS_LOOSE)
1985 ret = 1;
1986 else
1987 ret = ipf_state_tcpinwindow(fin, fdata, tdata, tcp,
1988 is->is_flags);
1989 if (ret > 0) {
1990 /*
1991 * Nearing end of connection, start timeout.
1992 */
1993 ret = ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
1994 is->is_flags, ret);
1995 if (ret == 0) {
1996 MUTEX_EXIT(&is->is_lock);
1997 DT2(iss_tcp_fsm, fr_info_t *, fin, ipstate_t *, is);
1998 SBUMP(ipf_state_stats.iss_tcp_fsm);
1999 return (0);
2000 }
2001
2002 if (softs->ipf_state_logging > 4)
2003 ipf_state_log(softc, is, ISL_STATECHANGE);
2004
2005 /*
2006 * set s0's as appropriate. Use syn-ack packet as it
2007 * contains both pieces of required information.
2008 */
2009 /*
2010 * Window scale option is only present in SYN/SYN-ACK packet.
2011 * Compare with ~TH_FIN to mask out T/TCP setups.
2012 */
2013 flags = tcp_get_flags(tcp) & ~(TH_FIN|TH_ECNALL);
2014 if (flags == (TH_SYN|TH_ACK)) {
2015 is->is_s0[source] = ntohl(tcp->th_ack);
2016 is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
2017 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2018 if (ipf_tcpoptions(softs, fin, tcp,
2019 fdata) == -1) {
2020 fin->fin_flx |= FI_BAD;
2021 DT1(ipf_fi_bad_winscale_syn_ack, fr_info_t *, fin);
2022 }
2023 }
2024 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2025 ipf_checknewisn(fin, is);
2026 } else if (flags == TH_SYN) {
2027 is->is_s0[source] = ntohl(tcp->th_seq) + 1;
2028 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2029 if (ipf_tcpoptions(softs, fin, tcp,
2030 fdata) == -1) {
2031 fin->fin_flx |= FI_BAD;
2032 DT1(ipf_fi_bad_winscale_syn, fr_info_t *, fin);
2033 }
2034 }
2035
2036 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2037 ipf_checknewisn(fin, is);
2038
2039 }
2040 ret = 1;
2041 } else {
2042 DT2(iss_tcp_oow, fr_info_t *, fin, ipstate_t *, is);
2043 SBUMP(ipf_state_stats.iss_tcp_oow);
2044 ret = 0;
2045 }
2046 MUTEX_EXIT(&is->is_lock);
2047 return (ret);
2048 }
2049
2050
2051 /* ------------------------------------------------------------------------ */
2052 /* Function: ipf_checknewisn */
2053 /* Returns: Nil */
2054 /* Parameters: fin(I) - pointer to packet information */
2055 /* is(I) - pointer to master state structure */
2056 /* */
2057 /* Check to see if this TCP connection is expecting and needs a new */
2058 /* sequence number for a particular direction of the connection. */
2059 /* */
2060 /* NOTE: This does not actually change the sequence numbers, only gets new */
2061 /* one ready. */
2062 /* ------------------------------------------------------------------------ */
2063 static void
ipf_checknewisn(fr_info_t * fin,ipstate_t * is)2064 ipf_checknewisn(fr_info_t *fin, ipstate_t *is)
2065 {
2066 u_32_t sumd, old, new;
2067 tcphdr_t *tcp;
2068 int i;
2069
2070 i = fin->fin_rev;
2071 tcp = fin->fin_dp;
2072
2073 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
2074 ((i == 1) && !(is->is_flags & IS_ISNACK))) {
2075 old = ntohl(tcp->th_seq);
2076 new = ipf_newisn(fin);
2077 is->is_isninc[i] = new - old;
2078 CALC_SUMD(old, new, sumd);
2079 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
2080
2081 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
2082 }
2083 }
2084
2085
2086 /* ------------------------------------------------------------------------ */
2087 /* Function: ipf_state_tcpinwindow */
2088 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */
2089 /* Parameters: fin(I) - pointer to packet information */
2090 /* fdata(I) - pointer to tcp state informatio (forward) */
2091 /* tdata(I) - pointer to tcp state informatio (reverse) */
2092 /* tcp(I) - pointer to TCP packet header */
2093 /* */
2094 /* Given a packet has matched addresses and ports, check to see if it is */
2095 /* within the TCP data window. In a show of generosity, allow packets that */
2096 /* are within the window space behind the current sequence # as well. */
2097 /* ------------------------------------------------------------------------ */
2098 static int
ipf_state_tcpinwindow(fr_info_t * fin,tcpdata_t * fdata,tcpdata_t * tdata,tcphdr_t * tcp,int flags)2099 ipf_state_tcpinwindow(fr_info_t *fin, tcpdata_t *fdata, tcpdata_t *tdata,
2100 tcphdr_t *tcp, int flags)
2101 {
2102 ipf_main_softc_t *softc = fin->fin_main_soft;
2103 ipf_state_softc_t *softs = softc->ipf_state_soft;
2104 tcp_seq seq, ack, end;
2105 int ackskew, tcpflags;
2106 u_32_t win, maxwin;
2107 int dsize, inseq;
2108
2109 /*
2110 * Find difference between last checked packet and this packet.
2111 */
2112 tcpflags = tcp_get_flags(tcp);
2113 seq = ntohl(tcp->th_seq);
2114 ack = ntohl(tcp->th_ack);
2115 if (tcpflags & TH_SYN)
2116 win = ntohs(tcp->th_win);
2117 else
2118 win = ntohs(tcp->th_win) << fdata->td_winscale;
2119
2120 /*
2121 * A window of 0 produces undesirable behaviour from this function.
2122 */
2123 if (win == 0)
2124 win = 1;
2125
2126 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2127 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
2128
2129 /*
2130 * if window scaling is present, the scaling is only allowed
2131 * for windows not in the first SYN packet. In that packet the
2132 * window is 65535 to specify the largest window possible
2133 * for receivers not implementing the window scale option.
2134 * Currently, we do not assume TTCP here. That means that
2135 * if we see a second packet from a host (after the initial
2136 * SYN), we can assume that the receiver of the SYN did
2137 * already send back the SYN/ACK (and thus that we know if
2138 * the receiver also does window scaling)
2139 */
2140 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
2141 fdata->td_winflags &= ~TCP_WSCALE_FIRST;
2142 fdata->td_maxwin = win;
2143 }
2144
2145 end = seq + dsize;
2146
2147 if ((fdata->td_end == 0) &&
2148 (!(flags & IS_TCPFSM) ||
2149 ((tcpflags & TH_OPENING) == TH_OPENING))) {
2150 /*
2151 * Must be a (outgoing) SYN-ACK in reply to a SYN.
2152 */
2153 fdata->td_end = end - 1;
2154 fdata->td_maxwin = 1;
2155 fdata->td_maxend = end + win;
2156 }
2157
2158 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */
2159 ack = tdata->td_end;
2160 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
2161 (ack == 0)) {
2162 /* gross hack to get around certain broken tcp stacks */
2163 ack = tdata->td_end;
2164 }
2165
2166 maxwin = tdata->td_maxwin;
2167 ackskew = tdata->td_end - ack;
2168
2169 /*
2170 * Strict sequencing only allows in-order delivery.
2171 */
2172 if ((flags & IS_STRICT) != 0) {
2173 if (seq != fdata->td_end) {
2174 DT2(iss_tcp_struct, tcpdata_t *, fdata, int, seq);
2175 SBUMP(ipf_state_stats.iss_tcp_strict);
2176 fin->fin_flx |= FI_OOW;
2177 return (0);
2178 }
2179 }
2180
2181 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0)
2182 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0)
2183 inseq = 0;
2184 if ((SEQ_GE(fdata->td_maxend, end)) &&
2185 (SEQ_GE(seq, fdata->td_end - maxwin)) &&
2186 /* XXX what about big packets */
2187 #define MAXACKWINDOW 66000
2188 (-ackskew <= (MAXACKWINDOW)) &&
2189 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
2190 inseq = 1;
2191 /*
2192 * Microsoft Windows will send the next packet to the right of the
2193 * window if SACK is in use.
2194 */
2195 } else if ((seq == fdata->td_maxend) && (ackskew == 0) &&
2196 (fdata->td_winflags & TCP_SACK_PERMIT) &&
2197 (tdata->td_winflags & TCP_SACK_PERMIT)) {
2198 DT2(iss_sinsack, tcpdata_t *, fdata, int, seq);
2199 SBUMP(ipf_state_stats.iss_winsack);
2200 inseq = 1;
2201 /*
2202 * Sometimes a TCP RST will be generated with only the ACK field
2203 * set to non-zero.
2204 */
2205 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) &&
2206 (ackskew >= -1) && (ackskew <= 1)) {
2207 inseq = 1;
2208 } else if (!(flags & IS_TCPFSM)) {
2209 if (!(fdata->td_winflags &
2210 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) {
2211 /*
2212 * No TCPFSM and no window scaling, so make some
2213 * extra guesses.
2214 */
2215 if ((seq == fdata->td_maxend) && (ackskew == 0))
2216 inseq = 1;
2217 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin))
2218 inseq = 1;
2219 }
2220 }
2221
2222 /* TRACE(inseq, fdata, tdata, seq, end, ack, ackskew, win, maxwin) */
2223
2224 if (inseq) {
2225 /* if ackskew < 0 then this should be due to fragmented
2226 * packets. There is no way to know the length of the
2227 * total packet in advance.
2228 * We do know the total length from the fragment cache though.
2229 * Note however that there might be more sessions with
2230 * exactly the same source and destination parameters in the
2231 * state cache (and source and destination is the only stuff
2232 * that is saved in the fragment cache). Note further that
2233 * some TCP connections in the state cache are hashed with
2234 * sport and dport as well which makes it not worthwhile to
2235 * look for them.
2236 * Thus, when ackskew is negative but still seems to belong
2237 * to this session, we bump up the destinations end value.
2238 */
2239 if (ackskew < 0)
2240 tdata->td_end = ack;
2241
2242 /* update max window seen */
2243 if (fdata->td_maxwin < win)
2244 fdata->td_maxwin = win;
2245 if (SEQ_GT(end, fdata->td_end))
2246 fdata->td_end = end;
2247 if (SEQ_GE(ack + win, tdata->td_maxend))
2248 tdata->td_maxend = ack + win;
2249 return (1);
2250 }
2251 SBUMP(ipf_state_stats.iss_oow);
2252 fin->fin_flx |= FI_OOW;
2253 return (0);
2254 }
2255
2256
2257 /* ------------------------------------------------------------------------ */
2258 /* Function: ipf_state_clone */
2259 /* Returns: ipstate_t* - NULL == cloning failed, */
2260 /* else pointer to new state structure */
2261 /* Parameters: fin(I) - pointer to packet information */
2262 /* tcp(I) - pointer to TCP/UDP header */
2263 /* is(I) - pointer to master state structure */
2264 /* */
2265 /* Create a "duplcate" state table entry from the master. */
2266 /* ------------------------------------------------------------------------ */
2267 static ipstate_t *
ipf_state_clone(fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)2268 ipf_state_clone(fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
2269 {
2270 ipf_main_softc_t *softc = fin->fin_main_soft;
2271 ipf_state_softc_t *softs = softc->ipf_state_soft;
2272 ipstate_t *clone;
2273 u_32_t send;
2274
2275 if (softs->ipf_state_stats.iss_active == softs->ipf_state_max) {
2276 SBUMPD(ipf_state_stats, iss_max);
2277 softs->ipf_state_doflush = 1;
2278 return (NULL);
2279 }
2280 KMALLOC(clone, ipstate_t *);
2281 if (clone == NULL) {
2282 SBUMPD(ipf_state_stats, iss_clone_nomem);
2283 return (NULL);
2284 }
2285 bcopy((char *)is, (char *)clone, sizeof(*clone));
2286
2287 MUTEX_NUKE(&clone->is_lock);
2288 /*
2289 * It has not yet been placed on any timeout queue, so make sure
2290 * all of that data is zero'd out.
2291 */
2292 clone->is_sti.tqe_pnext = NULL;
2293 clone->is_sti.tqe_next = NULL;
2294 clone->is_sti.tqe_ifq = NULL;
2295 clone->is_sti.tqe_parent = clone;
2296
2297 clone->is_die = ONE_DAY + softc->ipf_ticks;
2298 clone->is_state[0] = 0;
2299 clone->is_state[1] = 0;
2300 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2301 ((tcp_get_flags(tcp) & TH_SYN) ? 1 : 0) +
2302 ((tcp_get_flags(tcp) & TH_FIN) ? 1 : 0);
2303
2304 if (fin->fin_rev == 1) {
2305 clone->is_dend = send;
2306 clone->is_maxdend = send;
2307 clone->is_send = 0;
2308 clone->is_maxswin = 1;
2309 clone->is_maxdwin = ntohs(tcp->th_win);
2310 if (clone->is_maxdwin == 0)
2311 clone->is_maxdwin = 1;
2312 } else {
2313 clone->is_send = send;
2314 clone->is_maxsend = send;
2315 clone->is_dend = 0;
2316 clone->is_maxdwin = 1;
2317 clone->is_maxswin = ntohs(tcp->th_win);
2318 if (clone->is_maxswin == 0)
2319 clone->is_maxswin = 1;
2320 }
2321
2322 clone->is_flags &= ~SI_CLONE;
2323 clone->is_flags |= SI_CLONED;
2324 if (ipf_state_insert(softc, clone, fin->fin_rev) == -1) {
2325 KFREE(clone);
2326 return (NULL);
2327 }
2328
2329 clone->is_ref = 1;
2330 if (clone->is_p == IPPROTO_TCP) {
2331 (void) ipf_tcp_age(&clone->is_sti, fin, softs->ipf_state_tcptq,
2332 clone->is_flags, 2);
2333 }
2334 MUTEX_EXIT(&clone->is_lock);
2335 if (is->is_flags & IS_STATESYNC)
2336 clone->is_sync = ipf_sync_new(softc, SMC_STATE, fin, clone);
2337 DT2(iss_clone, ipstate_t *, is, ipstate_t *, clone);
2338 SBUMP(ipf_state_stats.iss_cloned);
2339 return (clone);
2340 }
2341
2342
2343 /* ------------------------------------------------------------------------ */
2344 /* Function: ipf_matchsrcdst */
2345 /* Returns: Nil */
2346 /* Parameters: fin(I) - pointer to packet information */
2347 /* is(I) - pointer to state structure */
2348 /* src(I) - pointer to source address */
2349 /* dst(I) - pointer to destination address */
2350 /* tcp(I) - pointer to TCP/UDP header */
2351 /* cmask(I) - mask of FI_* bits to check */
2352 /* */
2353 /* Match a state table entry against an IP packet. The logic below is that */
2354 /* ret gets set to one if the match succeeds, else remains 0. If it is */
2355 /* still 0 after the test. no match. */
2356 /* ------------------------------------------------------------------------ */
2357 static ipstate_t *
ipf_matchsrcdst(fr_info_t * fin,ipstate_t * is,i6addr_t * src,i6addr_t * dst,tcphdr_t * tcp,u_32_t cmask)2358 ipf_matchsrcdst(fr_info_t *fin, ipstate_t *is, i6addr_t *src, i6addr_t *dst,
2359 tcphdr_t *tcp, u_32_t cmask)
2360 {
2361 ipf_main_softc_t *softc = fin->fin_main_soft;
2362 ipf_state_softc_t *softs = softc->ipf_state_soft;
2363 int ret = 0, rev, out, flags, flx = 0, idx;
2364 u_short sp, dp;
2365 u_32_t cflx;
2366 void *ifp;
2367
2368 /*
2369 * If a connection is about to be deleted, no packets
2370 * are allowed to match it.
2371 */
2372 if (is->is_sti.tqe_ifq == &softs->ipf_state_deletetq)
2373 return (NULL);
2374
2375 rev = IP6_NEQ(&is->is_dst, dst);
2376 ifp = fin->fin_ifp;
2377 out = fin->fin_out;
2378 flags = is->is_flags;
2379 sp = 0;
2380 dp = 0;
2381
2382 if (tcp != NULL) {
2383 sp = htons(fin->fin_sport);
2384 dp = ntohs(fin->fin_dport);
2385 }
2386 if (!rev) {
2387 if (tcp != NULL) {
2388 if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
2389 rev = 1;
2390 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
2391 rev = 1;
2392 }
2393 }
2394
2395 idx = (out << 1) + rev;
2396
2397 /*
2398 * If the interface for this 'direction' is set, make sure it matches.
2399 * An interface name that is not set matches any, as does a name of *.
2400 */
2401 if ((is->is_ifp[idx] == ifp) || (is->is_ifp[idx] == NULL &&
2402 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '-' ||
2403 *is->is_ifname[idx] == '*')))
2404 ret = 1;
2405
2406 if (ret == 0) {
2407 DT2(iss_lookup_badifp, fr_info_t *, fin, ipstate_t *, is);
2408 SBUMP(ipf_state_stats.iss_lookup_badifp);
2409 /* TRACE is, out, rev, idx */
2410 return (NULL);
2411 }
2412 ret = 0;
2413
2414 /*
2415 * Match addresses and ports.
2416 */
2417 if (rev == 0) {
2418 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
2419 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) {
2420 if (tcp) {
2421 if ((sp == is->is_sport || flags & SI_W_SPORT)
2422 &&
2423 (dp == is->is_dport || flags & SI_W_DPORT))
2424 ret = 1;
2425 } else {
2426 ret = 1;
2427 }
2428 }
2429 } else {
2430 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
2431 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) {
2432 if (tcp) {
2433 if ((dp == is->is_sport || flags & SI_W_SPORT)
2434 &&
2435 (sp == is->is_dport || flags & SI_W_DPORT))
2436 ret = 1;
2437 } else {
2438 ret = 1;
2439 }
2440 }
2441 }
2442
2443 if (ret == 0) {
2444 SBUMP(ipf_state_stats.iss_lookup_badport);
2445 DT2(iss_lookup_badport, fr_info_t *, fin, ipstate_t *, is);
2446 /* TRACE rev, is, sp, dp, src, dst */
2447 return (NULL);
2448 }
2449
2450 /*
2451 * Whether or not this should be here, is questionable, but the aim
2452 * is to get this out of the main line.
2453 */
2454 if (tcp == NULL)
2455 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
2456
2457 /*
2458 * Only one of the source or destination address can be flaged as a
2459 * wildcard. Fill in the missing address, if set.
2460 * For IPv6, if the address being copied in is multicast, then
2461 * don't reset the wild flag - multicast causes it to be set in the
2462 * first place!
2463 */
2464 if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
2465 fr_ip_t *fi = &fin->fin_fi;
2466
2467 if ((flags & SI_W_SADDR) != 0) {
2468 if (rev == 0) {
2469 is->is_src = fi->fi_src;
2470 is->is_flags &= ~SI_W_SADDR;
2471 } else {
2472 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2473 is->is_src = fi->fi_dst;
2474 is->is_flags &= ~SI_W_SADDR;
2475 }
2476 }
2477 } else if ((flags & SI_W_DADDR) != 0) {
2478 if (rev == 0) {
2479 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2480 is->is_dst = fi->fi_dst;
2481 is->is_flags &= ~SI_W_DADDR;
2482 }
2483 } else {
2484 is->is_dst = fi->fi_src;
2485 is->is_flags &= ~SI_W_DADDR;
2486 }
2487 }
2488 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
2489 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2490 }
2491 }
2492
2493 flx = fin->fin_flx & cmask;
2494 cflx = is->is_flx[out][rev];
2495
2496 /*
2497 * Match up any flags set from IP options.
2498 */
2499 if ((cflx && (flx != (cflx & cmask))) ||
2500 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) ||
2501 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
2502 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) {
2503 SBUMPD(ipf_state_stats, iss_miss_mask);
2504 return (NULL);
2505 }
2506
2507 if ((fin->fin_flx & FI_IGNORE) != 0) {
2508 fin->fin_rev = rev;
2509 return (is);
2510 }
2511
2512 /*
2513 * Only one of the source or destination port can be flagged as a
2514 * wildcard. When filling it in, fill in a copy of the matched entry
2515 * if it has the cloning flag set.
2516 */
2517 if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
2518 if ((flags & SI_CLONE) != 0) {
2519 ipstate_t *clone;
2520
2521 clone = ipf_state_clone(fin, tcp, is);
2522 if (clone == NULL)
2523 return (NULL);
2524 is = clone;
2525 } else {
2526 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2527 }
2528
2529 if ((flags & SI_W_SPORT) != 0) {
2530 if (rev == 0) {
2531 is->is_sport = sp;
2532 is->is_send = ntohl(tcp->th_seq);
2533 } else {
2534 is->is_sport = dp;
2535 is->is_send = ntohl(tcp->th_ack);
2536 }
2537 is->is_maxsend = is->is_send + 1;
2538 } else if ((flags & SI_W_DPORT) != 0) {
2539 if (rev == 0) {
2540 is->is_dport = dp;
2541 is->is_dend = ntohl(tcp->th_ack);
2542 } else {
2543 is->is_dport = sp;
2544 is->is_dend = ntohl(tcp->th_seq);
2545 }
2546 is->is_maxdend = is->is_dend + 1;
2547 }
2548 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
2549 if ((flags & SI_CLONED) && softs->ipf_state_logging)
2550 ipf_state_log(softc, is, ISL_CLONE);
2551 }
2552
2553 ret = -1;
2554
2555 if (is->is_flx[out][rev] == 0) {
2556 is->is_flx[out][rev] = flx;
2557 if (rev == 1 && is->is_optmsk[1] == 0) {
2558 is->is_opt[1] = fin->fin_optmsk;
2559 is->is_optmsk[1] = 0xffffffff;
2560 if (is->is_v == 6) {
2561 is->is_opt[1] &= ~0x8;
2562 is->is_optmsk[1] &= ~0x8;
2563 }
2564 }
2565 }
2566
2567 /*
2568 * Check if the interface name for this "direction" is set and if not,
2569 * fill it in.
2570 */
2571 if (is->is_ifp[idx] == NULL &&
2572 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
2573 is->is_ifp[idx] = ifp;
2574 COPYIFNAME(fin->fin_v, ifp, is->is_ifname[idx]);
2575 }
2576 fin->fin_rev = rev;
2577 return (is);
2578 }
2579
2580
2581 /* ------------------------------------------------------------------------ */
2582 /* Function: ipf_checkicmpmatchingstate */
2583 /* Returns: Nil */
2584 /* Parameters: fin(I) - pointer to packet information */
2585 /* */
2586 /* If we've got an ICMP error message, using the information stored in the */
2587 /* ICMP packet, look for a matching state table entry. */
2588 /* */
2589 /* If we return NULL then no lock on ipf_state is held. */
2590 /* If we return non-null then a read-lock on ipf_state is held. */
2591 /* ------------------------------------------------------------------------ */
2592 static ipstate_t *
ipf_checkicmpmatchingstate(fr_info_t * fin)2593 ipf_checkicmpmatchingstate(fr_info_t *fin)
2594 {
2595 ipf_main_softc_t *softc = fin->fin_main_soft;
2596 ipf_state_softc_t *softs = softc->ipf_state_soft;
2597 ipstate_t *is, **isp;
2598 i6addr_t dst, src;
2599 struct icmp *ic;
2600 u_short savelen;
2601 icmphdr_t *icmp;
2602 fr_info_t ofin;
2603 tcphdr_t *tcp;
2604 int len;
2605 u_char pr;
2606 ip_t *oip;
2607 u_int hv;
2608
2609 /*
2610 * Does it at least have the return (basic) IP header ?
2611 * Is it an actual recognised ICMP error type?
2612 * Only a basic IP header (no options) should be with
2613 * an ICMP error header.
2614 */
2615 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
2616 (fin->fin_plen < ICMPERR_MINPKTLEN) ||
2617 !(fin->fin_flx & FI_ICMPERR)) {
2618 SBUMPD(ipf_state_stats, iss_icmp_bad);
2619 return (NULL);
2620 }
2621 ic = fin->fin_dp;
2622
2623 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
2624 /*
2625 * Check if the at least the old IP header (with options) and
2626 * 8 bytes of payload is present.
2627 */
2628 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) {
2629 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2630 return (NULL);
2631 }
2632
2633 /*
2634 * Sanity Checks.
2635 */
2636 len = fin->fin_dlen - ICMPERR_ICMPHLEN;
2637 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) {
2638 DT2(iss_icmp_len, fr_info_t *, fin, struct ip*, oip);
2639 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2640 return (NULL);
2641 }
2642
2643 /*
2644 * Is the buffer big enough for all of it ? It's the size of the IP
2645 * header claimed in the encapsulated part which is of concern. It
2646 * may be too big to be in this buffer but not so big that it's
2647 * outside the ICMP packet, leading to TCP deref's causing problems.
2648 * This is possible because we don't know how big oip_hl is when we
2649 * do the pullup early in ipf_check() and thus can't guarantee it is
2650 * all here now.
2651 */
2652 #ifdef _KERNEL
2653 {
2654 mb_t *m;
2655
2656 m = fin->fin_m;
2657 # if SOLARIS
2658 if ((char *)oip + len > (char *)m->b_wptr) {
2659 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_2);
2660 return (NULL);
2661 }
2662 # else
2663 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) {
2664 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_3);
2665 return (NULL);
2666 }
2667 # endif
2668 }
2669 #endif
2670
2671 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
2672
2673 /*
2674 * in the IPv4 case we must zero the i6addr union otherwise
2675 * the IP6_EQ and IP6_NEQ macros produce the wrong results because
2676 * of the 'junk' in the unused part of the union
2677 */
2678 bzero((char *)&src, sizeof(src));
2679 bzero((char *)&dst, sizeof(dst));
2680
2681 /*
2682 * we make an fin entry to be able to feed it to
2683 * matchsrcdst note that not all fields are encessary
2684 * but this is the cleanest way. Note further we fill
2685 * in fin_mp such that if someone uses it we'll get
2686 * a kernel panic. ipf_matchsrcdst does not use this.
2687 *
2688 * watch out here, as ip is in host order and oip in network
2689 * order. Any change we make must be undone afterwards, like
2690 * oip->ip_len.
2691 */
2692 savelen = oip->ip_len;
2693 oip->ip_len = htons(len);
2694
2695 ofin.fin_flx = FI_NOCKSUM;
2696 ofin.fin_v = 4;
2697 ofin.fin_ip = oip;
2698 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
2699 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
2700 (void) ipf_makefrip(IP_HL(oip) << 2, oip, &ofin);
2701 ofin.fin_ifp = fin->fin_ifp;
2702 ofin.fin_out = !fin->fin_out;
2703
2704 hv = (pr = oip->ip_p);
2705 src.in4 = oip->ip_src;
2706 hv += src.in4.s_addr;
2707 dst.in4 = oip->ip_dst;
2708 hv += dst.in4.s_addr;
2709
2710 /*
2711 * Reset the short and bad flag here because in ipf_matchsrcdst()
2712 * the flags for the current packet (fin_flx) are compared against
2713 * those for the existing session.
2714 */
2715 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
2716
2717 /*
2718 * Put old values of ip_len back as we don't know
2719 * if we have to forward the packet or process it again.
2720 */
2721 oip->ip_len = savelen;
2722
2723 switch (oip->ip_p)
2724 {
2725 case IPPROTO_ICMP :
2726 /*
2727 * an ICMP error can only be generated as a result of an
2728 * ICMP query, not as the response on an ICMP error
2729 *
2730 * XXX theoretically ICMP_ECHOREP and the other reply's are
2731 * ICMP query's as well, but adding them here seems strange XXX
2732 */
2733 if ((ofin.fin_flx & FI_ICMPERR) != 0) {
2734 DT1(iss_icmp_icmperr, fr_info_t *, &ofin);
2735 SBUMP(ipf_state_stats.iss_icmp_icmperr);
2736 return (NULL);
2737 }
2738
2739 /*
2740 * perform a lookup of the ICMP packet in the state table
2741 */
2742 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2743 hv += icmp->icmp_id;
2744 hv = DOUBLE_HASH(hv);
2745
2746 READ_ENTER(&softc->ipf_state);
2747 for (isp = &softs->ipf_state_table[hv];
2748 ((is = *isp) != NULL); ) {
2749 isp = &is->is_hnext;
2750 if ((is->is_p != pr) || (is->is_v != 4))
2751 continue;
2752 if (is->is_pass & FR_NOICMPERR)
2753 continue;
2754
2755 is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2756 NULL, FI_ICMPCMP);
2757 if ((is != NULL) && !ipf_allowstateicmp(fin, is, &src))
2758 return (is);
2759 }
2760 RWLOCK_EXIT(&softc->ipf_state);
2761 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_1);
2762 return (NULL);
2763 case IPPROTO_TCP :
2764 case IPPROTO_UDP :
2765 break;
2766 default :
2767 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_2);
2768 return (NULL);
2769 }
2770
2771 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2772
2773 hv += tcp->th_dport;
2774 hv += tcp->th_sport;
2775 hv = DOUBLE_HASH(hv);
2776
2777 READ_ENTER(&softc->ipf_state);
2778 for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
2779 isp = &is->is_hnext;
2780 /*
2781 * Only allow this icmp though if the
2782 * encapsulated packet was allowed through the
2783 * other way around. Note that the minimal amount
2784 * of info present does not allow for checking against
2785 * tcp internals such as seq and ack numbers. Only the
2786 * ports are known to be present and can be even if the
2787 * short flag is set.
2788 */
2789 if ((is->is_p == pr) && (is->is_v == 4) &&
2790 (is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2791 tcp, FI_ICMPCMP))) {
2792 if (ipf_allowstateicmp(fin, is, &src) == 0)
2793 return (is);
2794 }
2795 }
2796 RWLOCK_EXIT(&softc->ipf_state);
2797 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_3);
2798 return (NULL);
2799 }
2800
2801
2802 /* ------------------------------------------------------------------------ */
2803 /* Function: ipf_allowstateicmp */
2804 /* Returns: int - 1 = packet denied, 0 = packet allowed */
2805 /* Parameters: fin(I) - pointer to packet information */
2806 /* is(I) - pointer to state table entry */
2807 /* src(I) - source address to check permission for */
2808 /* */
2809 /* For an ICMP packet that has so far matched a state table entry, check if */
2810 /* there are any further refinements that might mean we want to block this */
2811 /* packet. This code isn't specific to either IPv4 or IPv6. */
2812 /* ------------------------------------------------------------------------ */
2813 static int
ipf_allowstateicmp(fr_info_t * fin,ipstate_t * is,i6addr_t * src)2814 ipf_allowstateicmp(fr_info_t *fin, ipstate_t *is, i6addr_t *src)
2815 {
2816 ipf_main_softc_t *softc = fin->fin_main_soft;
2817 ipf_state_softc_t *softs = softc->ipf_state_soft;
2818 frentry_t *savefr;
2819 frentry_t *fr;
2820 u_32_t ipass;
2821 int backward;
2822 int oi;
2823 int i;
2824
2825 fr = is->is_rule;
2826 if (fr != NULL && fr->fr_icmpgrp != NULL) {
2827 savefr = fin->fin_fr;
2828 fin->fin_fr = fr->fr_icmpgrp->fg_start;
2829
2830 ipass = ipf_scanlist(fin, softc->ipf_pass);
2831 fin->fin_fr = savefr;
2832 if (FR_ISBLOCK(ipass)) {
2833 SBUMPD(ipf_state_stats, iss_icmp_headblock);
2834 return (1);
2835 }
2836 }
2837
2838 /*
2839 * i : the index of this packet (the icmp unreachable)
2840 * oi : the index of the original packet found in the
2841 * icmp header (i.e. the packet causing this icmp)
2842 * backward : original packet was backward compared to
2843 * the state
2844 */
2845 backward = IP6_NEQ(&is->is_src, src);
2846 fin->fin_rev = !backward;
2847 i = (!backward << 1) + fin->fin_out;
2848 oi = (backward << 1) + !fin->fin_out;
2849
2850 if (is->is_pass & FR_NOICMPERR) {
2851 SBUMPD(ipf_state_stats, iss_icmp_banned);
2852 return (1);
2853 }
2854 if (is->is_icmppkts[i] > is->is_pkts[oi]) {
2855 SBUMPD(ipf_state_stats, iss_icmp_toomany);
2856 return (1);
2857 }
2858
2859 DT2(iss_icmp_hits, fr_info_t *, fin, ipstate_t *, is);
2860 SBUMP(ipf_state_stats.iss_icmp_hits);
2861 is->is_icmppkts[i]++;
2862
2863 /*
2864 * we deliberately do not touch the timeouts
2865 * for the accompanying state table entry.
2866 * It remains to be seen if that is correct. XXX
2867 */
2868 return (0);
2869 }
2870
2871
2872 /* ------------------------------------------------------------------------ */
2873 /* Function: ipf_ipsmove */
2874 /* Returns: Nil */
2875 /* Parameters: is(I) - pointer to state table entry */
2876 /* hv(I) - new hash value for state table entry */
2877 /* Write Locks: ipf_state */
2878 /* */
2879 /* Move a state entry from one position in the hash table to another. */
2880 /* ------------------------------------------------------------------------ */
2881 static void
ipf_ipsmove(ipf_state_softc_t * softs,ipstate_t * is,u_int hv)2882 ipf_ipsmove(ipf_state_softc_t *softs, ipstate_t *is, u_int hv)
2883 {
2884 ipstate_t **isp;
2885 u_int hvm;
2886
2887 hvm = is->is_hv;
2888
2889 /* TRACE is, is_hv, hvm */
2890
2891 /*
2892 * Remove the hash from the old location...
2893 */
2894 isp = is->is_phnext;
2895 if (is->is_hnext)
2896 is->is_hnext->is_phnext = isp;
2897 *isp = is->is_hnext;
2898 if (softs->ipf_state_table[hvm] == NULL)
2899 softs->ipf_state_stats.iss_inuse--;
2900 softs->ipf_state_stats.iss_bucketlen[hvm]--;
2901
2902 /*
2903 * ...and put the hash in the new one.
2904 */
2905 hvm = DOUBLE_HASH(hv);
2906 is->is_hv = hvm;
2907
2908 /* TRACE is, hv, is_hv, hvm */
2909
2910 isp = &softs->ipf_state_table[hvm];
2911 if (*isp)
2912 (*isp)->is_phnext = &is->is_hnext;
2913 else
2914 softs->ipf_state_stats.iss_inuse++;
2915 softs->ipf_state_stats.iss_bucketlen[hvm]++;
2916 is->is_phnext = isp;
2917 is->is_hnext = *isp;
2918 *isp = is;
2919 }
2920
2921
2922 /* ------------------------------------------------------------------------ */
2923 /* Function: ipf_state_lookup */
2924 /* Returns: ipstate_t* - NULL == no matching state found, */
2925 /* else pointer to state information is returned */
2926 /* Parameters: fin(I) - pointer to packet information */
2927 /* tcp(I) - pointer to TCP/UDP header. */
2928 /* ifqp(O) - pointer for storing tailq timeout */
2929 /* */
2930 /* Search the state table for a matching entry to the packet described by */
2931 /* the contents of *fin. For certain protocols, when a match is found the */
2932 /* timeout queue is also selected and stored in ifpq if it is non-NULL. */
2933 /* */
2934 /* If we return NULL then no lock on ipf_state is held. */
2935 /* If we return non-null then a read-lock on ipf_state is held. */
2936 /* ------------------------------------------------------------------------ */
2937 ipstate_t *
ipf_state_lookup(fr_info_t * fin,tcphdr_t * tcp,ipftq_t ** ifqp)2938 ipf_state_lookup(fr_info_t *fin, tcphdr_t *tcp, ipftq_t **ifqp)
2939 {
2940 ipf_main_softc_t *softc = fin->fin_main_soft;
2941 ipf_state_softc_t *softs = softc->ipf_state_soft;
2942 u_int hv, hvm, pr, v, tryagain;
2943 ipstate_t *is, **isp;
2944 u_short dport, sport;
2945 i6addr_t src, dst;
2946 struct icmp *ic;
2947 ipftq_t *ifq;
2948 int oow;
2949
2950 is = NULL;
2951 ifq = NULL;
2952 tcp = fin->fin_dp;
2953 ic = (struct icmp *)tcp;
2954 hv = (pr = fin->fin_fi.fi_p);
2955 src = fin->fin_fi.fi_src;
2956 dst = fin->fin_fi.fi_dst;
2957 hv += src.in4.s_addr;
2958 hv += dst.in4.s_addr;
2959
2960 v = fin->fin_fi.fi_v;
2961 #ifdef USE_INET6
2962 if (v == 6) {
2963 hv += fin->fin_fi.fi_src.i6[1];
2964 hv += fin->fin_fi.fi_src.i6[2];
2965 hv += fin->fin_fi.fi_src.i6[3];
2966
2967 if ((fin->fin_p == IPPROTO_ICMPV6) &&
2968 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
2969 hv -= dst.in4.s_addr;
2970 } else {
2971 hv += fin->fin_fi.fi_dst.i6[1];
2972 hv += fin->fin_fi.fi_dst.i6[2];
2973 hv += fin->fin_fi.fi_dst.i6[3];
2974 }
2975 }
2976 #endif
2977 if ((v == 4) &&
2978 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
2979 if (fin->fin_out == 0) {
2980 hv -= src.in4.s_addr;
2981 } else {
2982 hv -= dst.in4.s_addr;
2983 }
2984 }
2985
2986 /* TRACE fin_saddr, fin_daddr, hv */
2987
2988 /*
2989 * Search the hash table for matching packet header info.
2990 */
2991 switch (pr)
2992 {
2993 #ifdef USE_INET6
2994 case IPPROTO_ICMPV6 :
2995 tryagain = 0;
2996 if (v == 6) {
2997 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
2998 (ic->icmp_type == ICMP6_ECHO_REPLY)) {
2999 hv += ic->icmp_id;
3000 }
3001 }
3002 READ_ENTER(&softc->ipf_state);
3003 icmp6again:
3004 hvm = DOUBLE_HASH(hv);
3005 for (isp = &softs->ipf_state_table[hvm];
3006 ((is = *isp) != NULL); ) {
3007 isp = &is->is_hnext;
3008 if ((is->is_p != pr) || (is->is_v != v))
3009 continue;
3010 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3011 if (is != NULL &&
3012 ipf_matchicmpqueryreply(v, &is->is_icmp,
3013 ic, fin->fin_rev)) {
3014 if (fin->fin_rev)
3015 ifq = &softs->ipf_state_icmpacktq;
3016 else
3017 ifq = &softs->ipf_state_icmptq;
3018 break;
3019 }
3020 }
3021
3022 if (is != NULL) {
3023 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
3024 hv += fin->fin_fi.fi_src.i6[0];
3025 hv += fin->fin_fi.fi_src.i6[1];
3026 hv += fin->fin_fi.fi_src.i6[2];
3027 hv += fin->fin_fi.fi_src.i6[3];
3028 ipf_ipsmove(softs, is, hv);
3029 MUTEX_DOWNGRADE(&softc->ipf_state);
3030 }
3031 break;
3032 }
3033 RWLOCK_EXIT(&softc->ipf_state);
3034
3035 /*
3036 * No matching icmp state entry. Perhaps this is a
3037 * response to another state entry.
3038 *
3039 * XXX With some ICMP6 packets, the "other" address is already
3040 * in the packet, after the ICMP6 header, and this could be
3041 * used in place of the multicast address. However, taking
3042 * advantage of this requires some significant code changes
3043 * to handle the specific types where that is the case.
3044 */
3045 if ((softs->ipf_state_stats.iss_wild != 0) &&
3046 ((fin->fin_flx & FI_NOWILD) == 0) &&
3047 (v == 6) && (tryagain == 0)) {
3048 hv -= fin->fin_fi.fi_src.i6[0];
3049 hv -= fin->fin_fi.fi_src.i6[1];
3050 hv -= fin->fin_fi.fi_src.i6[2];
3051 hv -= fin->fin_fi.fi_src.i6[3];
3052 tryagain = 1;
3053 WRITE_ENTER(&softc->ipf_state);
3054 goto icmp6again;
3055 }
3056
3057 is = ipf_checkicmp6matchingstate(fin);
3058 if (is != NULL)
3059 return (is);
3060 break;
3061 #endif
3062
3063 case IPPROTO_ICMP :
3064 if (v == 4) {
3065 hv += ic->icmp_id;
3066 }
3067 hv = DOUBLE_HASH(hv);
3068 READ_ENTER(&softc->ipf_state);
3069 for (isp = &softs->ipf_state_table[hv];
3070 ((is = *isp) != NULL); ) {
3071 isp = &is->is_hnext;
3072 if ((is->is_p != pr) || (is->is_v != v))
3073 continue;
3074 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3075 if ((is != NULL) &&
3076 (ic->icmp_id == is->is_icmp.ici_id) &&
3077 ipf_matchicmpqueryreply(v, &is->is_icmp,
3078 ic, fin->fin_rev)) {
3079 if (fin->fin_rev)
3080 ifq = &softs->ipf_state_icmpacktq;
3081 else
3082 ifq = &softs->ipf_state_icmptq;
3083 break;
3084 }
3085 }
3086 if (is == NULL) {
3087 RWLOCK_EXIT(&softc->ipf_state);
3088 }
3089 break;
3090
3091 case IPPROTO_TCP :
3092 case IPPROTO_UDP :
3093 ifqp = NULL;
3094 sport = htons(fin->fin_data[0]);
3095 hv += sport;
3096 dport = htons(fin->fin_data[1]);
3097 hv += dport;
3098 oow = 0;
3099 tryagain = 0;
3100 READ_ENTER(&softc->ipf_state);
3101 retry_tcpudp:
3102 hvm = DOUBLE_HASH(hv);
3103
3104 /* TRACE hv, hvm */
3105
3106 for (isp = &softs->ipf_state_table[hvm];
3107 ((is = *isp) != NULL); ) {
3108 isp = &is->is_hnext;
3109 if ((is->is_p != pr) || (is->is_v != v))
3110 continue;
3111 fin->fin_flx &= ~FI_OOW;
3112 is = ipf_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
3113 if (is != NULL) {
3114 if (pr == IPPROTO_TCP) {
3115 if (!ipf_state_tcp(softc, softs, fin,
3116 tcp, is)) {
3117 oow |= fin->fin_flx & FI_OOW;
3118 continue;
3119 }
3120 }
3121 break;
3122 }
3123 }
3124 if (is != NULL) {
3125 if (tryagain &&
3126 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
3127 hv += dport;
3128 hv += sport;
3129 ipf_ipsmove(softs, is, hv);
3130 MUTEX_DOWNGRADE(&softc->ipf_state);
3131 }
3132 break;
3133 }
3134 RWLOCK_EXIT(&softc->ipf_state);
3135
3136 if ((softs->ipf_state_stats.iss_wild != 0) &&
3137 ((fin->fin_flx & FI_NOWILD) == 0)) {
3138 if (tryagain == 0) {
3139 hv -= dport;
3140 hv -= sport;
3141 } else if (tryagain == 1) {
3142 hv = fin->fin_fi.fi_p;
3143 /*
3144 * If we try to pretend this is a reply to a
3145 * multicast/broadcast packet then we need to
3146 * exclude part of the address from the hash
3147 * calculation.
3148 */
3149 if (fin->fin_out == 0) {
3150 hv += src.in4.s_addr;
3151 } else {
3152 hv += dst.in4.s_addr;
3153 }
3154 hv += dport;
3155 hv += sport;
3156 }
3157 tryagain++;
3158 if (tryagain <= 2) {
3159 WRITE_ENTER(&softc->ipf_state);
3160 goto retry_tcpudp;
3161 }
3162 }
3163 fin->fin_flx |= oow;
3164 break;
3165
3166 #if 0
3167 case IPPROTO_GRE :
3168 gre = fin->fin_dp;
3169 if (GRE_REV(gre->gr_flags) == 1) {
3170 hv += gre->gr_call;
3171 }
3172 /* FALLTHROUGH */
3173 #endif
3174 default :
3175 ifqp = NULL;
3176 hvm = DOUBLE_HASH(hv);
3177 READ_ENTER(&softc->ipf_state);
3178 for (isp = &softs->ipf_state_table[hvm];
3179 ((is = *isp) != NULL); ) {
3180 isp = &is->is_hnext;
3181 if ((is->is_p != pr) || (is->is_v != v))
3182 continue;
3183 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3184 if (is != NULL) {
3185 ifq = &softs->ipf_state_iptq;
3186 break;
3187 }
3188 }
3189 if (is == NULL) {
3190 RWLOCK_EXIT(&softc->ipf_state);
3191 }
3192 break;
3193 }
3194
3195 if (is != NULL) {
3196 if (((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) &&
3197 (is->is_tqehead[fin->fin_rev] != NULL))
3198 ifq = is->is_tqehead[fin->fin_rev];
3199 if (ifq != NULL && ifqp != NULL)
3200 *ifqp = ifq;
3201 } else {
3202 SBUMP(ipf_state_stats.iss_lookup_miss);
3203 }
3204 return (is);
3205 }
3206
3207
3208 /* ------------------------------------------------------------------------ */
3209 /* Function: ipf_state_check */
3210 /* Returns: frentry_t* - NULL == search failed, */
3211 /* else pointer to rule for matching state */
3212 /* Parameters: fin(I) - pointer to packet information */
3213 /* passp(I) - pointer to filtering result flags */
3214 /* */
3215 /* Check if a packet is associated with an entry in the state table. */
3216 /* ------------------------------------------------------------------------ */
3217 frentry_t *
ipf_state_check(fr_info_t * fin,u_32_t * passp)3218 ipf_state_check(fr_info_t *fin, u_32_t *passp)
3219 {
3220 ipf_main_softc_t *softc = fin->fin_main_soft;
3221 ipf_state_softc_t *softs = softc->ipf_state_soft;
3222 ipftqent_t *tqe;
3223 ipstate_t *is;
3224 frentry_t *fr;
3225 tcphdr_t *tcp;
3226 ipftq_t *ifq;
3227 u_int pass;
3228 int inout;
3229
3230 if (softs->ipf_state_lock || (softs->ipf_state_list == NULL))
3231 return (NULL);
3232
3233 if (fin->fin_flx & (FI_SHORT|FI_FRAGBODY|FI_BAD)) {
3234 SBUMPD(ipf_state_stats, iss_check_bad);
3235 return (NULL);
3236 }
3237
3238 if ((fin->fin_flx & FI_TCPUDP) ||
3239 (fin->fin_fi.fi_p == IPPROTO_ICMP)
3240 #ifdef USE_INET6
3241 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
3242 #endif
3243 )
3244 tcp = fin->fin_dp;
3245 else
3246 tcp = NULL;
3247
3248 ifq = NULL;
3249 /*
3250 * Search the hash table for matching packet header info.
3251 */
3252 is = ipf_state_lookup(fin, tcp, &ifq);
3253
3254 switch (fin->fin_p)
3255 {
3256 #ifdef USE_INET6
3257 case IPPROTO_ICMPV6 :
3258 if (is != NULL)
3259 break;
3260 if (fin->fin_v == 6) {
3261 is = ipf_checkicmp6matchingstate(fin);
3262 }
3263 break;
3264 #endif
3265 case IPPROTO_ICMP :
3266 if (is != NULL)
3267 break;
3268 /*
3269 * No matching icmp state entry. Perhaps this is a
3270 * response to another state entry.
3271 */
3272 is = ipf_checkicmpmatchingstate(fin);
3273 break;
3274
3275 case IPPROTO_TCP :
3276 if (is == NULL)
3277 break;
3278
3279 if (is->is_pass & FR_NEWISN) {
3280 if (fin->fin_out == 0)
3281 ipf_fixinisn(fin, is);
3282 else if (fin->fin_out == 1)
3283 ipf_fixoutisn(fin, is);
3284 }
3285 break;
3286 default :
3287 if (fin->fin_rev)
3288 ifq = &softs->ipf_state_udpacktq;
3289 else
3290 ifq = &softs->ipf_state_udptq;
3291 break;
3292 }
3293 if (is == NULL) {
3294 SBUMP(ipf_state_stats.iss_check_miss);
3295 return (NULL);
3296 }
3297
3298 fr = is->is_rule;
3299 if (fr != NULL) {
3300 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
3301 if (fin->fin_nattag == NULL) {
3302 RWLOCK_EXIT(&softc->ipf_state);
3303 SBUMPD(ipf_state_stats, iss_check_notag);
3304 return (NULL);
3305 }
3306 if (ipf_matchtag(&fr->fr_nattag, fin->fin_nattag)!=0) {
3307 RWLOCK_EXIT(&softc->ipf_state);
3308 SBUMPD(ipf_state_stats, iss_check_nattag);
3309 return (NULL);
3310 }
3311 }
3312 (void) strncpy(fin->fin_group, FR_NAME(fr, fr_group),
3313 FR_GROUPLEN);
3314 fin->fin_icode = fr->fr_icode;
3315 }
3316
3317 fin->fin_rule = is->is_rulen;
3318 fin->fin_fr = fr;
3319
3320 /*
3321 * If this packet is a fragment and the rule says to track fragments,
3322 * then create a new fragment cache entry.
3323 */
3324 if (fin->fin_flx & FI_FRAG && FR_ISPASS(is->is_pass) &&
3325 is->is_pass & FR_KEEPFRAG)
3326 (void) ipf_frag_new(softc, fin, is->is_pass);
3327
3328 /*
3329 * For TCP packets, ifq == NULL. For all others, check if this new
3330 * queue is different to the last one it was on and move it if so.
3331 */
3332 tqe = &is->is_sti;
3333 if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
3334 ifq = is->is_tqehead[fin->fin_rev];
3335
3336 MUTEX_ENTER(&is->is_lock);
3337
3338 if (ifq != NULL)
3339 ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq, ifq);
3340
3341 inout = (fin->fin_rev << 1) + fin->fin_out;
3342 is->is_pkts[inout]++;
3343 is->is_bytes[inout] += fin->fin_plen;
3344 fin->fin_pktnum = is->is_pkts[inout] + is->is_icmppkts[inout];
3345
3346 MUTEX_EXIT(&is->is_lock);
3347
3348 pass = is->is_pass;
3349
3350 if (is->is_flags & IS_STATESYNC)
3351 ipf_sync_update(softc, SMC_STATE, fin, is->is_sync);
3352
3353 RWLOCK_EXIT(&softc->ipf_state);
3354
3355 SBUMP(ipf_state_stats.iss_hits);
3356
3357 fin->fin_dif = &is->is_dif;
3358 fin->fin_tif = &is->is_tifs[fin->fin_rev];
3359 fin->fin_flx |= FI_STATE;
3360 if ((pass & FR_LOGFIRST) != 0)
3361 pass &= ~(FR_LOGFIRST|FR_LOG);
3362 *passp = pass;
3363 return (fr);
3364 }
3365
3366
3367 /* ------------------------------------------------------------------------ */
3368 /* Function: ipf_fixoutisn */
3369 /* Returns: Nil */
3370 /* Parameters: fin(I) - pointer to packet information */
3371 /* is(I) - pointer to master state structure */
3372 /* */
3373 /* Called only for outbound packets, adjusts the sequence number and the */
3374 /* TCP checksum to match that change. */
3375 /* ------------------------------------------------------------------------ */
3376 static void
ipf_fixoutisn(fr_info_t * fin,ipstate_t * is)3377 ipf_fixoutisn(fr_info_t *fin, ipstate_t *is)
3378 {
3379 tcphdr_t *tcp;
3380 int rev;
3381 u_32_t seq;
3382
3383 tcp = fin->fin_dp;
3384 rev = fin->fin_rev;
3385 if ((is->is_flags & IS_ISNSYN) != 0) {
3386 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3387 seq = ntohl(tcp->th_seq);
3388 seq += is->is_isninc[0];
3389 tcp->th_seq = htonl(seq);
3390 ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3391 }
3392 }
3393 if ((is->is_flags & IS_ISNACK) != 0) {
3394 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3395 seq = ntohl(tcp->th_seq);
3396 seq += is->is_isninc[1];
3397 tcp->th_seq = htonl(seq);
3398 ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3399 }
3400 }
3401 }
3402
3403
3404 /* ------------------------------------------------------------------------ */
3405 /* Function: ipf_fixinisn */
3406 /* Returns: Nil */
3407 /* Parameters: fin(I) - pointer to packet information */
3408 /* is(I) - pointer to master state structure */
3409 /* */
3410 /* Called only for inbound packets, adjusts the acknowledge number and the */
3411 /* TCP checksum to match that change. */
3412 /* ------------------------------------------------------------------------ */
3413 static void
ipf_fixinisn(fr_info_t * fin,ipstate_t * is)3414 ipf_fixinisn(fr_info_t *fin, ipstate_t *is)
3415 {
3416 tcphdr_t *tcp;
3417 int rev;
3418 u_32_t ack;
3419
3420 tcp = fin->fin_dp;
3421 rev = fin->fin_rev;
3422 if ((is->is_flags & IS_ISNSYN) != 0) {
3423 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3424 ack = ntohl(tcp->th_ack);
3425 ack -= is->is_isninc[0];
3426 tcp->th_ack = htonl(ack);
3427 ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3428 }
3429 }
3430 if ((is->is_flags & IS_ISNACK) != 0) {
3431 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3432 ack = ntohl(tcp->th_ack);
3433 ack -= is->is_isninc[1];
3434 tcp->th_ack = htonl(ack);
3435 ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3436 }
3437 }
3438 }
3439
3440
3441 /* ------------------------------------------------------------------------ */
3442 /* Function: ipf_state_sync */
3443 /* Returns: Nil */
3444 /* Parameters: softc(I) - pointer to soft context main structure */
3445 /* ifp(I) - pointer to interface */
3446 /* */
3447 /* Walk through all state entries and if an interface pointer match is */
3448 /* found then look it up again, based on its name in case the pointer has */
3449 /* changed since last time. */
3450 /* */
3451 /* If ifp is passed in as being non-null then we are only doing updates for */
3452 /* existing, matching, uses of it. */
3453 /* ------------------------------------------------------------------------ */
3454 void
ipf_state_sync(ipf_main_softc_t * softc,void * ifp)3455 ipf_state_sync(ipf_main_softc_t *softc, void *ifp)
3456 {
3457 ipf_state_softc_t *softs = softc->ipf_state_soft;
3458 ipstate_t *is;
3459 int i;
3460
3461 if (softc->ipf_running <= 0)
3462 return;
3463
3464 WRITE_ENTER(&softc->ipf_state);
3465
3466 if (softc->ipf_running <= 0) {
3467 RWLOCK_EXIT(&softc->ipf_state);
3468 return;
3469 }
3470
3471 for (is = softs->ipf_state_list; is; is = is->is_next) {
3472 /*
3473 * Look up all the interface names in the state entry.
3474 */
3475 for (i = 0; i < FR_NUM(is->is_ifp); i++) {
3476 if (ifp == NULL || ifp == is->is_ifp[i])
3477 is->is_ifp[i] = ipf_resolvenic(softc,
3478 is->is_ifname[i],
3479 is->is_v);
3480 }
3481 }
3482 RWLOCK_EXIT(&softc->ipf_state);
3483 }
3484
3485
3486 /* ------------------------------------------------------------------------ */
3487 /* Function: ipf_state_del */
3488 /* Returns: int - 0 = deleted, else refernce count on active struct */
3489 /* Parameters: softc(I) - pointer to soft context main structure */
3490 /* is(I) - pointer to state structure to delete */
3491 /* why(I) - if not 0, log reason why it was deleted */
3492 /* Write Locks: ipf_state */
3493 /* */
3494 /* Deletes a state entry from the enumerated list as well as the hash table */
3495 /* and timeout queue lists. Make adjustments to hash table statistics and */
3496 /* global counters as required. */
3497 /* ------------------------------------------------------------------------ */
3498 static int
ipf_state_del(ipf_main_softc_t * softc,ipstate_t * is,int why)3499 ipf_state_del(ipf_main_softc_t *softc, ipstate_t *is, int why)
3500 {
3501 ipf_state_softc_t *softs = softc->ipf_state_soft;
3502 int orphan = 1;
3503 frentry_t *fr;
3504
3505 /*
3506 * Since we want to delete this, remove it from the state table,
3507 * where it can be found & used, first.
3508 */
3509 if (is->is_phnext != NULL) {
3510 *is->is_phnext = is->is_hnext;
3511 if (is->is_hnext != NULL)
3512 is->is_hnext->is_phnext = is->is_phnext;
3513 if (softs->ipf_state_table[is->is_hv] == NULL)
3514 softs->ipf_state_stats.iss_inuse--;
3515 softs->ipf_state_stats.iss_bucketlen[is->is_hv]--;
3516
3517 is->is_phnext = NULL;
3518 is->is_hnext = NULL;
3519 orphan = 0;
3520 }
3521
3522 /*
3523 * Because ipf_state_stats.iss_wild is a count of entries in the state
3524 * table that have wildcard flags set, only decerement it once
3525 * and do it here.
3526 */
3527 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
3528 if (!(is->is_flags & SI_CLONED)) {
3529 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
3530 }
3531 is->is_flags &= ~(SI_WILDP|SI_WILDA);
3532 }
3533
3534 /*
3535 * Next, remove it from the timeout queue it is in.
3536 */
3537 if (is->is_sti.tqe_ifq != NULL)
3538 ipf_deletequeueentry(&is->is_sti);
3539
3540 /*
3541 * If it is still in use by something else, do not go any further,
3542 * but note that at this point it is now an orphan. How can this
3543 * be? ipf_state_flush() calls ipf_delete() directly because it wants
3544 * to empty the table out and if something has a hold on a state
3545 * entry (such as ipfstat), it'll do the deref path that'll bring
3546 * us back here to do the real delete & free.
3547 */
3548 MUTEX_ENTER(&is->is_lock);
3549 if (is->is_me != NULL) {
3550 *is->is_me = NULL;
3551 is->is_me = NULL;
3552 is->is_ref--;
3553 }
3554 is->is_ref--;
3555 if (is->is_ref > 0) {
3556 int refs;
3557
3558 refs = is->is_ref;
3559 MUTEX_EXIT(&is->is_lock);
3560 if (!orphan)
3561 softs->ipf_state_stats.iss_orphan++;
3562 return (refs);
3563 }
3564
3565 fr = is->is_rule;
3566 is->is_rule = NULL;
3567 if (fr != NULL) {
3568 if (fr->fr_srctrack.ht_max_nodes != 0) {
3569 (void) ipf_ht_node_del(&fr->fr_srctrack,
3570 is->is_family, &is->is_src);
3571 }
3572 }
3573
3574 ASSERT(is->is_ref == 0);
3575 MUTEX_EXIT(&is->is_lock);
3576
3577 if (is->is_tqehead[0] != NULL) {
3578 if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
3579 ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
3580 }
3581 if (is->is_tqehead[1] != NULL) {
3582 if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
3583 ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
3584 }
3585
3586 if (is->is_sync)
3587 ipf_sync_del_state(softc->ipf_sync_soft, is->is_sync);
3588
3589 /*
3590 * Now remove it from the linked list of known states
3591 */
3592 if (is->is_pnext != NULL) {
3593 *is->is_pnext = is->is_next;
3594
3595 if (is->is_next != NULL)
3596 is->is_next->is_pnext = is->is_pnext;
3597
3598 is->is_pnext = NULL;
3599 is->is_next = NULL;
3600 }
3601
3602 if (softs->ipf_state_logging != 0 && why != 0)
3603 ipf_state_log(softc, is, why);
3604
3605 if (is->is_p == IPPROTO_TCP)
3606 softs->ipf_state_stats.iss_fin++;
3607 else
3608 softs->ipf_state_stats.iss_expire++;
3609 if (orphan)
3610 softs->ipf_state_stats.iss_orphan--;
3611
3612 if (fr != NULL) {
3613 fr->fr_statecnt--;
3614 (void) ipf_derefrule(softc, &fr);
3615 }
3616
3617 softs->ipf_state_stats.iss_active_proto[is->is_p]--;
3618
3619 MUTEX_DESTROY(&is->is_lock);
3620 KFREE(is);
3621 softs->ipf_state_stats.iss_active--;
3622
3623 return (0);
3624 }
3625
3626
3627 /* ------------------------------------------------------------------------ */
3628 /* Function: ipf_state_expire */
3629 /* Returns: Nil */
3630 /* Parameters: softc(I) - pointer to soft context main structure */
3631 /* */
3632 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */
3633 /* used here is to keep the queue sorted with the oldest things at the top */
3634 /* and the youngest at the bottom. So if the top one doesn't need to be */
3635 /* expired then neither will any under it. */
3636 /* ------------------------------------------------------------------------ */
3637 void
ipf_state_expire(ipf_main_softc_t * softc)3638 ipf_state_expire(ipf_main_softc_t *softc)
3639 {
3640 ipf_state_softc_t *softs = softc->ipf_state_soft;
3641 ipftq_t *ifq, *ifqnext;
3642 ipftqent_t *tqe, *tqn;
3643 ipstate_t *is;
3644 SPL_INT(s);
3645
3646 SPL_NET(s);
3647 WRITE_ENTER(&softc->ipf_state);
3648 for (ifq = softs->ipf_state_tcptq; ifq != NULL; ifq = ifq->ifq_next)
3649 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3650 if (tqe->tqe_die > softc->ipf_ticks)
3651 break;
3652 tqn = tqe->tqe_next;
3653 is = tqe->tqe_parent;
3654 ipf_state_del(softc, is, ISL_EXPIRE);
3655 }
3656
3657 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3658 ifqnext = ifq->ifq_next;
3659
3660 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3661 if (tqe->tqe_die > softc->ipf_ticks)
3662 break;
3663 tqn = tqe->tqe_next;
3664 is = tqe->tqe_parent;
3665 ipf_state_del(softc, is, ISL_EXPIRE);
3666 }
3667 }
3668
3669 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3670 ifqnext = ifq->ifq_next;
3671
3672 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
3673 (ifq->ifq_ref == 0)) {
3674 ipf_freetimeoutqueue(softc, ifq);
3675 }
3676 }
3677
3678 if (softs->ipf_state_doflush) {
3679 (void) ipf_state_flush(softc, 2, 0);
3680 softs->ipf_state_doflush = 0;
3681 softs->ipf_state_wm_last = softc->ipf_ticks;
3682 }
3683
3684 RWLOCK_EXIT(&softc->ipf_state);
3685 SPL_X(s);
3686 }
3687
3688
3689 /* ------------------------------------------------------------------------ */
3690 /* Function: ipf_state_flush */
3691 /* Returns: int - 0 == success, -1 == failure */
3692 /* Parameters: softc(I) - pointer to soft context main structure */
3693 /* which(I) - which flush action to perform */
3694 /* proto(I) - which protocol to flush (0 == ALL) */
3695 /* Write Locks: ipf_state */
3696 /* */
3697 /* Flush state tables. Three actions currently defined: */
3698 /* which == 0 : flush all state table entries */
3699 /* which == 1 : flush TCP connections which have started to close but are */
3700 /* stuck for some reason. */
3701 /* which == 2 : flush TCP connections which have been idle for a long time, */
3702 /* starting at > 4 days idle and working back in successive half-*/
3703 /* days to at most 12 hours old. If this fails to free enough */
3704 /* slots then work backwards in half hour slots to 30 minutes. */
3705 /* If that too fails, then work backwards in 30 second intervals */
3706 /* for the last 30 minutes to at worst 30 seconds idle. */
3707 /* ------------------------------------------------------------------------ */
3708 int
ipf_state_flush(ipf_main_softc_t * softc,int which,int proto)3709 ipf_state_flush(ipf_main_softc_t *softc, int which, int proto)
3710 {
3711 ipf_state_softc_t *softs = softc->ipf_state_soft;
3712 ipftqent_t *tqe, *tqn;
3713 ipstate_t *is, **isp;
3714 ipftq_t *ifq;
3715 int removed;
3716 SPL_INT(s);
3717
3718 removed = 0;
3719
3720 SPL_NET(s);
3721
3722 switch (which)
3723 {
3724 case 0 :
3725 SBUMP(ipf_state_stats.iss_flush_all);
3726 /*
3727 * Style 0 flush removes everything...
3728 */
3729 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3730 if ((proto != 0) && (is->is_v != proto)) {
3731 isp = &is->is_next;
3732 continue;
3733 }
3734 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3735 removed++;
3736 else
3737 isp = &is->is_next;
3738 }
3739 break;
3740
3741 case 1 :
3742 SBUMP(ipf_state_stats.iss_flush_closing);
3743 /*
3744 * Since we're only interested in things that are closing,
3745 * we can start with the appropriate timeout queue.
3746 */
3747 for (ifq = softs->ipf_state_tcptq + IPF_TCPS_CLOSE_WAIT;
3748 ifq != NULL; ifq = ifq->ifq_next) {
3749
3750 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3751 tqn = tqe->tqe_next;
3752 is = tqe->tqe_parent;
3753 if (is->is_p != IPPROTO_TCP)
3754 break;
3755 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3756 removed++;
3757 }
3758 }
3759
3760 /*
3761 * Also need to look through the user defined queues.
3762 */
3763 for (ifq = softs->ipf_state_usertq; ifq != NULL;
3764 ifq = ifq->ifq_next) {
3765 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3766 tqn = tqe->tqe_next;
3767 is = tqe->tqe_parent;
3768 if (is->is_p != IPPROTO_TCP)
3769 continue;
3770
3771 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
3772 (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
3773 if (ipf_state_del(softc, is,
3774 ISL_FLUSH) == 0)
3775 removed++;
3776 }
3777 }
3778 }
3779 break;
3780
3781 case 2 :
3782 break;
3783
3784 /*
3785 * Args 5-11 correspond to flushing those particular states
3786 * for TCP connections.
3787 */
3788 case IPF_TCPS_CLOSE_WAIT :
3789 case IPF_TCPS_FIN_WAIT_1 :
3790 case IPF_TCPS_CLOSING :
3791 case IPF_TCPS_LAST_ACK :
3792 case IPF_TCPS_FIN_WAIT_2 :
3793 case IPF_TCPS_TIME_WAIT :
3794 case IPF_TCPS_CLOSED :
3795 SBUMP(ipf_state_stats.iss_flush_queue);
3796 tqn = softs->ipf_state_tcptq[which].ifq_head;
3797 while (tqn != NULL) {
3798 tqe = tqn;
3799 tqn = tqe->tqe_next;
3800 is = tqe->tqe_parent;
3801 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3802 removed++;
3803 }
3804 break;
3805
3806 default :
3807 if (which < 30)
3808 break;
3809
3810 SBUMP(ipf_state_stats.iss_flush_state);
3811 /*
3812 * Take a large arbitrary number to mean the number of seconds
3813 * for which which consider to be the maximum value we'll allow
3814 * the expiration to be.
3815 */
3816 which = IPF_TTLVAL(which);
3817 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3818 if ((proto == 0) || (is->is_v == proto)) {
3819 if (softc->ipf_ticks - is->is_touched > which) {
3820 if (ipf_state_del(softc, is,
3821 ISL_FLUSH) == 0) {
3822 removed++;
3823 continue;
3824 }
3825 }
3826 }
3827 isp = &is->is_next;
3828 }
3829 break;
3830 }
3831
3832 if (which != 2) {
3833 SPL_X(s);
3834 return (removed);
3835 }
3836
3837 SBUMP(ipf_state_stats.iss_flush_timeout);
3838 /*
3839 * Asked to remove inactive entries because the table is full, try
3840 * again, 3 times, if first attempt failed with a different criteria
3841 * each time. The order tried in must be in decreasing age.
3842 * Another alternative is to implement random drop and drop N entries
3843 * at random until N have been freed up.
3844 */
3845 if (softc->ipf_ticks - softs->ipf_state_wm_last >
3846 softs->ipf_state_wm_freq) {
3847 removed = ipf_queueflush(softc, ipf_state_flush_entry,
3848 softs->ipf_state_tcptq,
3849 softs->ipf_state_usertq,
3850 &softs->ipf_state_stats.iss_active,
3851 softs->ipf_state_size,
3852 softs->ipf_state_wm_low);
3853 softs->ipf_state_wm_last = softc->ipf_ticks;
3854 }
3855
3856 SPL_X(s);
3857 return (removed);
3858 }
3859
3860
3861 /* ------------------------------------------------------------------------ */
3862 /* Function: ipf_state_flush_entry */
3863 /* Returns: int - 0 = entry deleted, else not deleted */
3864 /* Parameters: softc(I) - pointer to soft context main structure */
3865 /* entry(I) - pointer to state structure to delete */
3866 /* Write Locks: ipf_state */
3867 /* */
3868 /* This function is a stepping stone between ipf_queueflush() and */
3869 /* ipf_state_del(). It is used so we can provide a uniform interface via */
3870 /* the ipf_queueflush() function. */
3871 /* ------------------------------------------------------------------------ */
3872 static int
ipf_state_flush_entry(ipf_main_softc_t * softc,void * entry)3873 ipf_state_flush_entry(ipf_main_softc_t *softc, void *entry)
3874 {
3875 return (ipf_state_del(softc, entry, ISL_FLUSH));
3876 }
3877
3878
3879 /* ------------------------------------------------------------------------ */
3880 /* Function: ipf_tcp_age */
3881 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */
3882 /* Parameters: tqe(I) - pointer to timeout queue information */
3883 /* fin(I) - pointer to packet information */
3884 /* tqtab(I) - TCP timeout queue table this is in */
3885 /* flags(I) - flags from state/NAT entry */
3886 /* ok(I) - can we advance state */
3887 /* */
3888 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */
3889 /* */
3890 /* - (try to) base state transitions on real evidence only, */
3891 /* i.e. packets that are sent and have been received by ipfilter; */
3892 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */
3893 /* */
3894 /* - deal with half-closed connections correctly; */
3895 /* */
3896 /* - store the state of the source in state[0] such that ipfstat */
3897 /* displays the state as source/dest instead of dest/source; the calls */
3898 /* to ipf_tcp_age have been changed accordingly. */
3899 /* */
3900 /* Internal Parameters: */
3901 /* */
3902 /* state[0] = state of source (host that initiated connection) */
3903 /* state[1] = state of dest (host that accepted the connection) */
3904 /* */
3905 /* dir == 0 : a packet from source to dest */
3906 /* dir == 1 : a packet from dest to source */
3907 /* */
3908 /* A typical procession for a connection is as follows: */
3909 /* */
3910 /* +--------------+-------------------+ */
3911 /* | Side '0' | Side '1' | */
3912 /* +--------------+-------------------+ */
3913 /* | 0 -> 1 (SYN) | | */
3914 /* | | 0 -> 2 (SYN-ACK) | */
3915 /* | 1 -> 3 (ACK) | | */
3916 /* | | 2 -> 4 (ACK-PUSH) | */
3917 /* | 3 -> 4 (ACK) | | */
3918 /* | ... | ... | */
3919 /* | | 4 -> 6 (FIN-ACK) | */
3920 /* | 4 -> 5 (ACK) | | */
3921 /* | | 6 -> 6 (ACK-PUSH) | */
3922 /* | 5 -> 5 (ACK) | | */
3923 /* | 5 -> 8 (FIN) | | */
3924 /* | | 6 -> 10 (ACK) | */
3925 /* +--------------+-------------------+ */
3926 /* */
3927 /* Locking: it is assumed that the parent of the tqe structure is locked. */
3928 /* ------------------------------------------------------------------------ */
3929 int
ipf_tcp_age(ipftqent_t * tqe,fr_info_t * fin,ipftq_t * tqtab,int flags,int ok)3930 ipf_tcp_age(ipftqent_t *tqe, fr_info_t *fin, ipftq_t *tqtab, int flags, int ok)
3931 {
3932 ipf_main_softc_t *softc = fin->fin_main_soft;
3933 int dlen, ostate, nstate, rval, dir;
3934 u_char tcpflags;
3935 tcphdr_t *tcp;
3936
3937 tcp = fin->fin_dp;
3938
3939 rval = 0;
3940 dir = fin->fin_rev;
3941 tcpflags = tcp_get_flags(tcp);
3942 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
3943 ostate = tqe->tqe_state[1 - dir];
3944 nstate = tqe->tqe_state[dir];
3945
3946 if (tcpflags & TH_RST) {
3947 if (!(tcpflags & TH_PUSH) && !dlen)
3948 nstate = IPF_TCPS_CLOSED;
3949 else
3950 nstate = IPF_TCPS_CLOSE_WAIT;
3951
3952 if (ostate <= IPF_TCPS_ESTABLISHED) {
3953 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT;
3954 }
3955 rval = 1;
3956 } else {
3957 switch (nstate)
3958 {
3959 case IPF_TCPS_LISTEN: /* 0 */
3960 if ((tcpflags & TH_OPENING) == TH_OPENING) {
3961 /*
3962 * 'dir' received an S and sends SA in
3963 * response, LISTEN -> SYN_RECEIVED
3964 */
3965 nstate = IPF_TCPS_SYN_RECEIVED;
3966 rval = 1;
3967 } else if ((tcpflags & TH_OPENING) == TH_SYN) {
3968 /* 'dir' sent S, LISTEN -> SYN_SENT */
3969 nstate = IPF_TCPS_SYN_SENT;
3970 rval = 1;
3971 }
3972 /*
3973 * the next piece of code makes it possible to get
3974 * already established connections into the state table
3975 * after a restart or reload of the filter rules; this
3976 * does not work when a strict 'flags S keep state' is
3977 * used for tcp connections of course
3978 */
3979 if (((flags & IS_TCPFSM) == 0) &&
3980 ((tcpflags & TH_ACKMASK) == TH_ACK)) {
3981 /*
3982 * we saw an A, guess 'dir' is in ESTABLISHED
3983 * mode
3984 */
3985 switch (ostate)
3986 {
3987 case IPF_TCPS_LISTEN :
3988 case IPF_TCPS_SYN_RECEIVED :
3989 nstate = IPF_TCPS_HALF_ESTAB;
3990 rval = 1;
3991 break;
3992 case IPF_TCPS_HALF_ESTAB :
3993 case IPF_TCPS_ESTABLISHED :
3994 nstate = IPF_TCPS_ESTABLISHED;
3995 rval = 1;
3996 break;
3997 default :
3998 break;
3999 }
4000 }
4001 /*
4002 * TODO: besides regular ACK packets we can have other
4003 * packets as well; it is yet to be determined how we
4004 * should initialize the states in those cases
4005 */
4006 break;
4007
4008 case IPF_TCPS_SYN_SENT: /* 1 */
4009 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
4010 /*
4011 * A retransmitted SYN packet. We do not reset
4012 * the timeout here to ipf_tcptimeout because a
4013 * connection connect timeout does not renew
4014 * after every packet that is sent. We need to
4015 * set rval so as to indicate the packet has
4016 * passed the check for its flags being valid
4017 * in the TCP FSM. Setting rval to 2 has the
4018 * result of not resetting the timeout.
4019 */
4020 rval = 2;
4021 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
4022 TH_ACK) {
4023 /*
4024 * we see an A from 'dir' which is in SYN_SENT
4025 * state: 'dir' sent an A in response to an SA
4026 * which it received, SYN_SENT -> ESTABLISHED
4027 */
4028 nstate = IPF_TCPS_ESTABLISHED;
4029 rval = 1;
4030 } else if (tcpflags & TH_FIN) {
4031 /*
4032 * we see an F from 'dir' which is in SYN_SENT
4033 * state and wants to close its side of the
4034 * connection; SYN_SENT -> FIN_WAIT_1
4035 */
4036 nstate = IPF_TCPS_FIN_WAIT_1;
4037 rval = 1;
4038 } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
4039 /*
4040 * we see an SA from 'dir' which is already in
4041 * SYN_SENT state, this means we have a
4042 * simultaneous open; SYN_SENT -> SYN_RECEIVED
4043 */
4044 nstate = IPF_TCPS_SYN_RECEIVED;
4045 rval = 1;
4046 }
4047 break;
4048
4049 case IPF_TCPS_SYN_RECEIVED: /* 2 */
4050 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
4051 /*
4052 * we see an A from 'dir' which was in
4053 * SYN_RECEIVED state so it must now be in
4054 * established state, SYN_RECEIVED ->
4055 * ESTABLISHED
4056 */
4057 nstate = IPF_TCPS_ESTABLISHED;
4058 rval = 1;
4059 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
4060 TH_OPENING) {
4061 /*
4062 * We see an SA from 'dir' which is already in
4063 * SYN_RECEIVED state.
4064 */
4065 rval = 2;
4066 } else if (tcpflags & TH_FIN) {
4067 /*
4068 * we see an F from 'dir' which is in
4069 * SYN_RECEIVED state and wants to close its
4070 * side of the connection; SYN_RECEIVED ->
4071 * FIN_WAIT_1
4072 */
4073 nstate = IPF_TCPS_FIN_WAIT_1;
4074 rval = 1;
4075 }
4076 break;
4077
4078 case IPF_TCPS_HALF_ESTAB: /* 3 */
4079 if (tcpflags & TH_FIN) {
4080 nstate = IPF_TCPS_FIN_WAIT_1;
4081 rval = 1;
4082 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) {
4083 /*
4084 * If we've picked up a connection in mid
4085 * flight, we could be looking at a follow on
4086 * packet from the same direction as the one
4087 * that created this state. Recognise it but
4088 * do not advance the entire connection's
4089 * state.
4090 */
4091 switch (ostate)
4092 {
4093 case IPF_TCPS_LISTEN :
4094 case IPF_TCPS_SYN_SENT :
4095 case IPF_TCPS_SYN_RECEIVED :
4096 rval = 1;
4097 break;
4098 case IPF_TCPS_HALF_ESTAB :
4099 case IPF_TCPS_ESTABLISHED :
4100 nstate = IPF_TCPS_ESTABLISHED;
4101 rval = 1;
4102 break;
4103 default :
4104 break;
4105 }
4106 }
4107 break;
4108
4109 case IPF_TCPS_ESTABLISHED: /* 4 */
4110 rval = 1;
4111 if (tcpflags & TH_FIN) {
4112 /*
4113 * 'dir' closed its side of the connection;
4114 * this gives us a half-closed connection;
4115 * ESTABLISHED -> FIN_WAIT_1
4116 */
4117 if (ostate == IPF_TCPS_FIN_WAIT_1) {
4118 nstate = IPF_TCPS_CLOSING;
4119 } else {
4120 nstate = IPF_TCPS_FIN_WAIT_1;
4121 }
4122 } else if (tcpflags & TH_ACK) {
4123 /*
4124 * an ACK, should we exclude other flags here?
4125 */
4126 if (ostate == IPF_TCPS_FIN_WAIT_1) {
4127 /*
4128 * We know the other side did an active
4129 * close, so we are ACKing the recvd
4130 * FIN packet (does the window matching
4131 * code guarantee this?) and go into
4132 * CLOSE_WAIT state; this gives us a
4133 * half-closed connection
4134 */
4135 nstate = IPF_TCPS_CLOSE_WAIT;
4136 } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
4137 /*
4138 * still a fully established
4139 * connection reset timeout
4140 */
4141 nstate = IPF_TCPS_ESTABLISHED;
4142 }
4143 }
4144 break;
4145
4146 case IPF_TCPS_CLOSE_WAIT: /* 5 */
4147 rval = 1;
4148 if (tcpflags & TH_FIN) {
4149 /*
4150 * application closed and 'dir' sent a FIN,
4151 * we're now going into LAST_ACK state
4152 */
4153 nstate = IPF_TCPS_LAST_ACK;
4154 } else {
4155 /*
4156 * we remain in CLOSE_WAIT because the other
4157 * side has closed already and we did not
4158 * close our side yet; reset timeout
4159 */
4160 nstate = IPF_TCPS_CLOSE_WAIT;
4161 }
4162 break;
4163
4164 case IPF_TCPS_FIN_WAIT_1: /* 6 */
4165 rval = 1;
4166 if ((tcpflags & TH_ACK) &&
4167 ostate > IPF_TCPS_CLOSE_WAIT) {
4168 /*
4169 * if the other side is not active anymore
4170 * it has sent us a FIN packet that we are
4171 * ack'ing now with an ACK; this means both
4172 * sides have now closed the connection and
4173 * we go into TIME_WAIT
4174 */
4175 /*
4176 * XXX: how do we know we really are ACKing
4177 * the FIN packet here? does the window code
4178 * guarantee that?
4179 */
4180 nstate = IPF_TCPS_LAST_ACK;
4181 } else {
4182 /*
4183 * we closed our side of the connection
4184 * already but the other side is still active
4185 * (ESTABLISHED/CLOSE_WAIT); continue with
4186 * this half-closed connection
4187 */
4188 nstate = IPF_TCPS_FIN_WAIT_1;
4189 }
4190 break;
4191
4192 case IPF_TCPS_CLOSING: /* 7 */
4193 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) {
4194 nstate = IPF_TCPS_TIME_WAIT;
4195 }
4196 rval = 1;
4197 break;
4198
4199 case IPF_TCPS_LAST_ACK: /* 8 */
4200 if (tcpflags & TH_ACK) {
4201 rval = 1;
4202 }
4203 /*
4204 * we cannot detect when we go out of LAST_ACK state
4205 * to CLOSED because that is based on the reception
4206 * of ACK packets; ipfilter can only detect that a
4207 * packet has been sent by a host
4208 */
4209 break;
4210
4211 case IPF_TCPS_FIN_WAIT_2: /* 9 */
4212 /* NOT USED */
4213 break;
4214
4215 case IPF_TCPS_TIME_WAIT: /* 10 */
4216 /* we're in 2MSL timeout now */
4217 if (ostate == IPF_TCPS_LAST_ACK) {
4218 nstate = IPF_TCPS_CLOSED;
4219 rval = 1;
4220 } else {
4221 rval = 2;
4222 }
4223 break;
4224
4225 case IPF_TCPS_CLOSED: /* 11 */
4226 rval = 2;
4227 break;
4228
4229 default :
4230 #if !defined(_KERNEL)
4231 abort();
4232 #endif
4233 break;
4234 }
4235 }
4236
4237 /*
4238 * If rval == 2 then do not update the queue position, but treat the
4239 * packet as being ok.
4240 */
4241 if (rval == 2)
4242 rval = 1;
4243 else if (rval == 1) {
4244 if (ok)
4245 tqe->tqe_state[dir] = nstate;
4246 if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
4247 ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq,
4248 tqtab + nstate);
4249 }
4250
4251 return (rval);
4252 }
4253
4254
4255 /* ------------------------------------------------------------------------ */
4256 /* Function: ipf_state_log */
4257 /* Returns: Nil */
4258 /* Parameters: softc(I) - pointer to soft context main structure */
4259 /* is(I) - pointer to state structure */
4260 /* type(I) - type of log entry to create */
4261 /* */
4262 /* Creates a state table log entry using the state structure and type info. */
4263 /* passed in. Log packet/byte counts, source/destination address and other */
4264 /* protocol specific information. */
4265 /* ------------------------------------------------------------------------ */
4266 void
ipf_state_log(ipf_main_softc_t * softc,struct ipstate * is,u_int type)4267 ipf_state_log(ipf_main_softc_t *softc, struct ipstate *is, u_int type)
4268 {
4269 #ifdef IPFILTER_LOG
4270 struct ipslog ipsl;
4271 size_t sizes[1];
4272 void *items[1];
4273 int types[1];
4274
4275 /*
4276 * Copy information out of the ipstate_t structure and into the
4277 * structure used for logging.
4278 */
4279 ipsl.isl_type = type;
4280 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
4281 ipsl.isl_bytes[0] = is->is_bytes[0];
4282 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
4283 ipsl.isl_bytes[1] = is->is_bytes[1];
4284 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
4285 ipsl.isl_bytes[2] = is->is_bytes[2];
4286 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
4287 ipsl.isl_bytes[3] = is->is_bytes[3];
4288 ipsl.isl_src = is->is_src;
4289 ipsl.isl_dst = is->is_dst;
4290 ipsl.isl_p = is->is_p;
4291 ipsl.isl_v = is->is_v;
4292 ipsl.isl_flags = is->is_flags;
4293 ipsl.isl_tag = is->is_tag;
4294 ipsl.isl_rulen = is->is_rulen;
4295 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
4296
4297 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
4298 ipsl.isl_sport = is->is_sport;
4299 ipsl.isl_dport = is->is_dport;
4300 if (ipsl.isl_p == IPPROTO_TCP) {
4301 ipsl.isl_state[0] = is->is_state[0];
4302 ipsl.isl_state[1] = is->is_state[1];
4303 }
4304 } else if (ipsl.isl_p == IPPROTO_ICMP) {
4305 ipsl.isl_itype = is->is_icmp.ici_type;
4306 } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
4307 ipsl.isl_itype = is->is_icmp.ici_type;
4308 } else {
4309 ipsl.isl_ps.isl_filler[0] = 0;
4310 ipsl.isl_ps.isl_filler[1] = 0;
4311 }
4312
4313 items[0] = &ipsl;
4314 sizes[0] = sizeof(ipsl);
4315 types[0] = 0;
4316
4317 (void) ipf_log_items(softc, IPL_LOGSTATE, NULL, items, sizes, types, 1);
4318 #endif
4319 }
4320
4321
4322 #ifdef USE_INET6
4323 /* ------------------------------------------------------------------------ */
4324 /* Function: ipf_checkicmp6matchingstate */
4325 /* Returns: ipstate_t* - NULL == no match found, */
4326 /* else pointer to matching state entry */
4327 /* Parameters: fin(I) - pointer to packet information */
4328 /* Locks: NULL == no locks, else Read Lock on ipf_state */
4329 /* */
4330 /* If we've got an ICMPv6 error message, using the information stored in */
4331 /* the ICMPv6 packet, look for a matching state table entry. */
4332 /* ------------------------------------------------------------------------ */
4333 static ipstate_t *
ipf_checkicmp6matchingstate(fr_info_t * fin)4334 ipf_checkicmp6matchingstate(fr_info_t *fin)
4335 {
4336 ipf_main_softc_t *softc = fin->fin_main_soft;
4337 ipf_state_softc_t *softs = softc->ipf_state_soft;
4338 struct icmp6_hdr *ic6, *oic;
4339 ipstate_t *is, **isp;
4340 u_short sport, dport;
4341 i6addr_t dst, src;
4342 u_short savelen;
4343 icmpinfo_t *ic;
4344 fr_info_t ofin;
4345 tcphdr_t *tcp;
4346 ip6_t *oip6;
4347 u_char pr;
4348 u_int hv;
4349
4350 /*
4351 * Does it at least have the return (basic) IP header ?
4352 * Is it an actual recognised ICMP error type?
4353 * Only a basic IP header (no options) should be with
4354 * an ICMP error header.
4355 */
4356 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) ||
4357 !(fin->fin_flx & FI_ICMPERR)) {
4358 SBUMPD(ipf_state_stats, iss_icmp_bad);
4359 return (NULL);
4360 }
4361
4362 ic6 = fin->fin_dp;
4363
4364 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
4365 if (fin->fin_plen < sizeof(*oip6)) {
4366 SBUMPD(ipf_state_stats, iss_icmp_short);
4367 return (NULL);
4368 }
4369
4370 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
4371 ofin.fin_v = 6;
4372 ofin.fin_ifp = fin->fin_ifp;
4373 ofin.fin_out = !fin->fin_out;
4374 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
4375 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
4376
4377 /*
4378 * We make a fin entry to be able to feed it to
4379 * matchsrcdst. Note that not all fields are necessary
4380 * but this is the cleanest way. Note further we fill
4381 * in fin_mp such that if someone uses it we'll get
4382 * a kernel panic. ipf_matchsrcdst does not use this.
4383 *
4384 * watch out here, as ip is in host order and oip6 in network
4385 * order. Any change we make must be undone afterwards.
4386 */
4387 savelen = oip6->ip6_plen;
4388 oip6->ip6_plen = htons(fin->fin_dlen - ICMPERR_ICMPHLEN);
4389 ofin.fin_flx = FI_NOCKSUM;
4390 ofin.fin_ip = (ip_t *)oip6;
4391 (void) ipf_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
4392 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
4393 oip6->ip6_plen = savelen;
4394 pr = ofin.fin_p;
4395
4396 /*
4397 * an ICMP error can never generate an ICMP error in response.
4398 */
4399 if (ofin.fin_flx & FI_ICMPERR) {
4400 DT1(iss_icmp6_icmperr, fr_info_t *, &ofin);
4401 SBUMP(ipf_state_stats.iss_icmp6_icmperr);
4402 return (NULL);
4403 }
4404
4405 if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
4406 oic = ofin.fin_dp;
4407 /*
4408 * an ICMP error can only be generated as a result of an
4409 * ICMP query, not as the response on an ICMP error
4410 *
4411 * XXX theoretically ICMP_ECHOREP and the other reply's are
4412 * ICMP query's as well, but adding them here seems strange XXX
4413 */
4414 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) {
4415 DT1(iss_icmp6_notinfo, fr_info_t *, &ofin);
4416 SBUMP(ipf_state_stats.iss_icmp6_notinfo);
4417 return (NULL);
4418 }
4419
4420 /*
4421 * perform a lookup of the ICMP packet in the state table
4422 */
4423 hv = (pr = oip6->ip6_nxt);
4424 src.in6 = oip6->ip6_src;
4425 hv += src.in4.s_addr;
4426 dst.in6 = oip6->ip6_dst;
4427 hv += dst.in4.s_addr;
4428 hv += oic->icmp6_id;
4429 hv += oic->icmp6_seq;
4430 hv = DOUBLE_HASH(hv);
4431
4432 READ_ENTER(&softc->ipf_state);
4433 for (isp = &softs->ipf_state_table[hv];
4434 ((is = *isp) != NULL); ) {
4435 ic = &is->is_icmp;
4436 isp = &is->is_hnext;
4437 if ((is->is_p == pr) &&
4438 !(is->is_pass & FR_NOICMPERR) &&
4439 (oic->icmp6_id == ic->ici_id) &&
4440 (oic->icmp6_seq == ic->ici_seq) &&
4441 (is = ipf_matchsrcdst(&ofin, is, &src,
4442 &dst, NULL, FI_ICMPCMP))) {
4443 /*
4444 * in the state table ICMP query's are stored
4445 * with the type of the corresponding ICMP
4446 * response. Correct here
4447 */
4448 if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
4449 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
4450 (ic->ici_type - 1 == oic->icmp6_type )) {
4451 if (!ipf_allowstateicmp(fin, is, &src))
4452 return (is);
4453 }
4454 }
4455 }
4456 RWLOCK_EXIT(&softc->ipf_state);
4457 SBUMPD(ipf_state_stats, iss_icmp6_miss);
4458 return (NULL);
4459 }
4460
4461 hv = (pr = oip6->ip6_nxt);
4462 src.in6 = oip6->ip6_src;
4463 hv += src.i6[0];
4464 hv += src.i6[1];
4465 hv += src.i6[2];
4466 hv += src.i6[3];
4467 dst.in6 = oip6->ip6_dst;
4468 hv += dst.i6[0];
4469 hv += dst.i6[1];
4470 hv += dst.i6[2];
4471 hv += dst.i6[3];
4472
4473 tcp = NULL;
4474
4475 switch (oip6->ip6_nxt)
4476 {
4477 case IPPROTO_TCP :
4478 case IPPROTO_UDP :
4479 tcp = (tcphdr_t *)(oip6 + 1);
4480 dport = tcp->th_dport;
4481 sport = tcp->th_sport;
4482 hv += dport;
4483 hv += sport;
4484 break;
4485
4486 case IPPROTO_ICMPV6 :
4487 oic = (struct icmp6_hdr *)(oip6 + 1);
4488 hv += oic->icmp6_id;
4489 hv += oic->icmp6_seq;
4490 break;
4491
4492 default :
4493 break;
4494 }
4495
4496 hv = DOUBLE_HASH(hv);
4497
4498 READ_ENTER(&softc->ipf_state);
4499 for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
4500 isp = &is->is_hnext;
4501 /*
4502 * Only allow this icmp though if the
4503 * encapsulated packet was allowed through the
4504 * other way around. Note that the minimal amount
4505 * of info present does not allow for checking against
4506 * tcp internals such as seq and ack numbers.
4507 */
4508 if ((is->is_p != pr) || (is->is_v != 6) ||
4509 (is->is_pass & FR_NOICMPERR))
4510 continue;
4511 is = ipf_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
4512 if ((is != NULL) && (ipf_allowstateicmp(fin, is, &src) == 0))
4513 return (is);
4514 }
4515 RWLOCK_EXIT(&softc->ipf_state);
4516 SBUMPD(ipf_state_stats, iss_icmp_miss);
4517 return (NULL);
4518 }
4519 #endif
4520
4521
4522 /* ------------------------------------------------------------------------ */
4523 /* Function: ipf_sttab_init */
4524 /* Returns: Nil */
4525 /* Parameters: softc(I) - pointer to soft context main structure */
4526 /* tqp(I) - pointer to an array of timeout queues for TCP */
4527 /* */
4528 /* Initialise the array of timeout queues for TCP. */
4529 /* ------------------------------------------------------------------------ */
4530 void
ipf_sttab_init(ipf_main_softc_t * softc,ipftq_t * tqp)4531 ipf_sttab_init(ipf_main_softc_t *softc, ipftq_t *tqp)
4532 {
4533 int i;
4534
4535 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
4536 IPFTQ_INIT(&tqp[i], 0, "ipftq tcp tab");
4537 tqp[i].ifq_next = tqp + i + 1;
4538 }
4539 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
4540 tqp[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcpclosed;
4541 tqp[IPF_TCPS_LISTEN].ifq_ttl = softc->ipf_tcptimeout;
4542 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = softc->ipf_tcpsynsent;
4543 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = softc->ipf_tcpsynrecv;
4544 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = softc->ipf_tcpidletimeout;
4545 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = softc->ipf_tcphalfclosed;
4546 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = softc->ipf_tcphalfclosed;
4547 tqp[IPF_TCPS_CLOSING].ifq_ttl = softc->ipf_tcptimeout;
4548 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = softc->ipf_tcplastack;
4549 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = softc->ipf_tcpclosewait;
4550 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = softc->ipf_tcptimewait;
4551 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = softc->ipf_tcptimeout;
4552 }
4553
4554
4555 /* ------------------------------------------------------------------------ */
4556 /* Function: ipf_sttab_destroy */
4557 /* Returns: Nil */
4558 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
4559 /* */
4560 /* Do whatever is necessary to "destroy" each of the entries in the array */
4561 /* of timeout queues for TCP. */
4562 /* ------------------------------------------------------------------------ */
4563 void
ipf_sttab_destroy(ipftq_t * tqp)4564 ipf_sttab_destroy(ipftq_t *tqp)
4565 {
4566 int i;
4567
4568 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
4569 MUTEX_DESTROY(&tqp[i].ifq_lock);
4570 }
4571
4572
4573 /* ------------------------------------------------------------------------ */
4574 /* Function: ipf_state_deref */
4575 /* Returns: Nil */
4576 /* Parameters: softc(I) - pointer to soft context main structure */
4577 /* isp(I) - pointer to pointer to state table entry */
4578 /* */
4579 /* Decrement the reference counter for this state table entry and free it */
4580 /* if there are no more things using it. */
4581 /* */
4582 /* This function is only called when cleaning up after increasing is_ref by */
4583 /* one earlier in the 'code path' so if is_ref is 1 when entering, we do */
4584 /* have an orphan, otherwise not. However there is a possible race between */
4585 /* the entry being deleted via flushing with an ioctl call (that calls the */
4586 /* delete function directly) and the tail end of packet processing so we */
4587 /* need to grab is_lock before doing the check to synchronise the two code */
4588 /* paths. */
4589 /* */
4590 /* When operating in userland (ipftest), we have no timers to clear a state */
4591 /* entry. Therefore, we make a few simple tests before deleting an entry */
4592 /* outright. We compare states on each side looking for a combination of */
4593 /* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */
4594 /* in packet direction with the interface list to make sure we don't */
4595 /* prematurely delete an entry on a final inbound packet that's we're also */
4596 /* supposed to route elsewhere. */
4597 /* */
4598 /* Internal parameters: */
4599 /* state[0] = state of source (host that initiated connection) */
4600 /* state[1] = state of dest (host that accepted the connection) */
4601 /* */
4602 /* dir == 0 : a packet from source to dest */
4603 /* dir == 1 : a packet from dest to source */
4604 /* ------------------------------------------------------------------------ */
4605 void
ipf_state_deref(ipf_main_softc_t * softc,ipstate_t ** isp)4606 ipf_state_deref(ipf_main_softc_t *softc, ipstate_t **isp)
4607 {
4608 ipstate_t *is = *isp;
4609
4610 is = *isp;
4611 *isp = NULL;
4612
4613 MUTEX_ENTER(&is->is_lock);
4614 if (is->is_ref > 1) {
4615 is->is_ref--;
4616 MUTEX_EXIT(&is->is_lock);
4617 #ifndef _KERNEL
4618 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
4619 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
4620 ipf_state_del(softc, is, ISL_EXPIRE);
4621 }
4622 #endif
4623 return;
4624 }
4625 MUTEX_EXIT(&is->is_lock);
4626
4627 WRITE_ENTER(&softc->ipf_state);
4628 ipf_state_del(softc, is, ISL_ORPHAN);
4629 RWLOCK_EXIT(&softc->ipf_state);
4630 }
4631
4632
4633 /* ------------------------------------------------------------------------ */
4634 /* Function: ipf_state_setqueue */
4635 /* Returns: Nil */
4636 /* Parameters: softc(I) - pointer to soft context main structure */
4637 /* is(I) - pointer to state structure */
4638 /* rev(I) - forward(0) or reverse(1) direction */
4639 /* Locks: ipf_state (read or write) */
4640 /* */
4641 /* Put the state entry on its default queue entry, using rev as a helped in */
4642 /* determining which queue it should be placed on. */
4643 /* ------------------------------------------------------------------------ */
4644 void
ipf_state_setqueue(ipf_main_softc_t * softc,ipstate_t * is,int rev)4645 ipf_state_setqueue(ipf_main_softc_t *softc, ipstate_t *is, int rev)
4646 {
4647 ipf_state_softc_t *softs = softc->ipf_state_soft;
4648 ipftq_t *oifq, *nifq;
4649
4650 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
4651 nifq = is->is_tqehead[rev];
4652 else
4653 nifq = NULL;
4654
4655 if (nifq == NULL) {
4656 switch (is->is_p)
4657 {
4658 #ifdef USE_INET6
4659 case IPPROTO_ICMPV6 :
4660 if (rev == 1)
4661 nifq = &softs->ipf_state_icmpacktq;
4662 else
4663 nifq = &softs->ipf_state_icmptq;
4664 break;
4665 #endif
4666 case IPPROTO_ICMP :
4667 if (rev == 1)
4668 nifq = &softs->ipf_state_icmpacktq;
4669 else
4670 nifq = &softs->ipf_state_icmptq;
4671 break;
4672 case IPPROTO_TCP :
4673 nifq = softs->ipf_state_tcptq + is->is_state[rev];
4674 break;
4675
4676 case IPPROTO_UDP :
4677 if (rev == 1)
4678 nifq = &softs->ipf_state_udpacktq;
4679 else
4680 nifq = &softs->ipf_state_udptq;
4681 break;
4682
4683 default :
4684 nifq = &softs->ipf_state_iptq;
4685 break;
4686 }
4687 }
4688
4689 oifq = is->is_sti.tqe_ifq;
4690 /*
4691 * If it's currently on a timeout queue, move it from one queue to
4692 * another, else put it on the end of the newly determined queue.
4693 */
4694 if (oifq != NULL)
4695 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq, nifq);
4696 else
4697 ipf_queueappend(softc->ipf_ticks, &is->is_sti, nifq, is);
4698 return;
4699 }
4700
4701
4702 /* ------------------------------------------------------------------------ */
4703 /* Function: ipf_state_iter */
4704 /* Returns: int - 0 == success, else error */
4705 /* Parameters: softc(I) - pointer to main soft context */
4706 /* token(I) - pointer to ipftoken structure */
4707 /* itp(I) - pointer to ipfgeniter structure */
4708 /* obj(I) - pointer to data description structure */
4709 /* */
4710 /* This function handles the SIOCGENITER ioctl for the state tables and */
4711 /* walks through the list of entries in the state table list (softs->ipf_state_list.) */
4712 /* ------------------------------------------------------------------------ */
4713 static int
ipf_state_iter(ipf_main_softc_t * softc,ipftoken_t * token,ipfgeniter_t * itp,ipfobj_t * obj)4714 ipf_state_iter(ipf_main_softc_t *softc, ipftoken_t *token, ipfgeniter_t *itp,
4715 ipfobj_t *obj)
4716 {
4717 ipf_state_softc_t *softs = softc->ipf_state_soft;
4718 ipstate_t *is, *next, zero;
4719 int error;
4720
4721 if (itp->igi_data == NULL) {
4722 IPFERROR(100026);
4723 return (EFAULT);
4724 }
4725
4726 if (itp->igi_nitems < 1) {
4727 IPFERROR(100027);
4728 return (ENOSPC);
4729 }
4730
4731 if (itp->igi_type != IPFGENITER_STATE) {
4732 IPFERROR(100028);
4733 return (EINVAL);
4734 }
4735
4736 is = token->ipt_data;
4737 if (is == (void *)-1) {
4738 IPFERROR(100029);
4739 return (ESRCH);
4740 }
4741
4742 error = 0;
4743 obj->ipfo_type = IPFOBJ_IPSTATE;
4744 obj->ipfo_size = sizeof(ipstate_t);
4745
4746 READ_ENTER(&softc->ipf_state);
4747
4748 is = token->ipt_data;
4749 if (is == NULL) {
4750 next = softs->ipf_state_list;
4751 } else {
4752 next = is->is_next;
4753 }
4754
4755 /*
4756 * If we find a state entry to use, bump its reference count so that
4757 * it can be used for is_next when we come back.
4758 */
4759 if (next != NULL) {
4760 MUTEX_ENTER(&next->is_lock);
4761 next->is_ref++;
4762 MUTEX_EXIT(&next->is_lock);
4763 token->ipt_data = next;
4764 } else {
4765 bzero(&zero, sizeof(zero));
4766 next = &zero;
4767 token->ipt_data = NULL;
4768 }
4769 if (next->is_next == NULL)
4770 ipf_token_mark_complete(token);
4771
4772 RWLOCK_EXIT(&softc->ipf_state);
4773
4774 obj->ipfo_ptr = itp->igi_data;
4775 error = ipf_outobjk(softc, obj, next);
4776 if (is != NULL)
4777 ipf_state_deref(softc, &is);
4778
4779 return (error);
4780 }
4781
4782
4783 /* ------------------------------------------------------------------------ */
4784 /* Function: ipf_state_gettable */
4785 /* Returns: int - 0 = success, else error */
4786 /* Parameters: softc(I) - pointer to main soft context */
4787 /* softs(I) - pointer to state context structure */
4788 /* data(I) - pointer to ioctl data */
4789 /* */
4790 /* This function handles ioctl requests for tables of state information. */
4791 /* At present the only table it deals with is the hash bucket statistics. */
4792 /* ------------------------------------------------------------------------ */
4793 static int
ipf_state_gettable(ipf_main_softc_t * softc,ipf_state_softc_t * softs,char * data)4794 ipf_state_gettable(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
4795 char *data)
4796 {
4797 ipftable_t table;
4798 int error;
4799
4800 error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE);
4801 if (error != 0)
4802 return (error);
4803
4804 if (table.ita_type != IPFTABLE_BUCKETS) {
4805 IPFERROR(100031);
4806 return (EINVAL);
4807 }
4808
4809 error = COPYOUT(softs->ipf_state_stats.iss_bucketlen, table.ita_table,
4810 softs->ipf_state_size * sizeof(u_int));
4811 if (error != 0) {
4812 IPFERROR(100032);
4813 error = EFAULT;
4814 }
4815 return (error);
4816 }
4817
4818
4819 /* ------------------------------------------------------------------------ */
4820 /* Function: ipf_state_setpending */
4821 /* Returns: Nil */
4822 /* Parameters: softc(I) - pointer to main soft context */
4823 /* is(I) - pointer to state structure */
4824 /* Locks: ipf_state (read or write) */
4825 /* */
4826 /* Put the state entry on to the pending queue - this queue has a very */
4827 /* short lifetime where items are put that can't be deleted straight away */
4828 /* because of locking issues but we want to delete them ASAP, anyway. */
4829 /* ------------------------------------------------------------------------ */
4830 void
ipf_state_setpending(ipf_main_softc_t * softc,ipstate_t * is)4831 ipf_state_setpending(ipf_main_softc_t *softc, ipstate_t *is)
4832 {
4833 ipf_state_softc_t *softs = softc->ipf_state_soft;
4834 ipftq_t *oifq;
4835
4836 oifq = is->is_sti.tqe_ifq;
4837 if (oifq != NULL)
4838 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq,
4839 &softs->ipf_state_pending);
4840 else
4841 ipf_queueappend(softc->ipf_ticks, &is->is_sti,
4842 &softs->ipf_state_pending, is);
4843
4844 MUTEX_ENTER(&is->is_lock);
4845 if (is->is_me != NULL) {
4846 *is->is_me = NULL;
4847 is->is_me = NULL;
4848 is->is_ref--;
4849 }
4850 MUTEX_EXIT(&is->is_lock);
4851 }
4852
4853
4854 /* ------------------------------------------------------------------------ */
4855 /* Function: ipf_state_matchflush */
4856 /* Returns: Nil */
4857 /* Parameters: softc(I) - pointer to main soft context */
4858 /* data(I) - pointer to state structure */
4859 /* Locks: ipf_state (read or write) */
4860 /* */
4861 /* Flush all entries from the list of state entries that match the */
4862 /* properties in the array loaded. */
4863 /* ------------------------------------------------------------------------ */
4864 int
ipf_state_matchflush(ipf_main_softc_t * softc,caddr_t data)4865 ipf_state_matchflush(ipf_main_softc_t *softc, caddr_t data)
4866 {
4867 ipf_state_softc_t *softs = softc->ipf_state_soft;
4868 int *array, flushed, error;
4869 ipstate_t *state, *statenext;
4870 ipfobj_t obj;
4871
4872 error = ipf_matcharray_load(softc, data, &obj, &array);
4873 if (error != 0)
4874 return (error);
4875
4876 flushed = 0;
4877
4878 for (state = softs->ipf_state_list; state != NULL; state = statenext) {
4879 statenext = state->is_next;
4880 if (ipf_state_matcharray(state, array, softc->ipf_ticks) == 0) {
4881 ipf_state_del(softc, state, ISL_FLUSH);
4882 flushed++;
4883 }
4884 }
4885
4886 obj.ipfo_retval = flushed;
4887 error = BCOPYOUT(&obj, data, sizeof(obj));
4888
4889 KFREES(array, array[0] * sizeof(*array));
4890
4891 return (error);
4892 }
4893
4894
4895 /* ------------------------------------------------------------------------ */
4896 /* Function: ipf_state_matcharray */
4897 /* Returns: int - 0 = no match, 1 = match */
4898 /* Parameters: state(I) - pointer to state structure */
4899 /* array(I) - pointer to ipf matching expression */
4900 /* ticks(I) - current value of ipfilter tick timer */
4901 /* Locks: ipf_state (read or write) */
4902 /* */
4903 /* Compare a state entry with the match array passed in and return a value */
4904 /* to indicate whether or not the matching was successful. */
4905 /* ------------------------------------------------------------------------ */
4906 static int
ipf_state_matcharray(ipstate_t * state,int * array,u_long ticks)4907 ipf_state_matcharray(ipstate_t *state, int *array, u_long ticks)
4908 {
4909 int i, n, *x, rv, p;
4910 ipfexp_t *e;
4911
4912 rv = 0;
4913 n = array[0];
4914 x = array + 1;
4915
4916 for (; n > 0; x += 3 + x[3], rv = 0) {
4917 e = (ipfexp_t *)x;
4918 n -= e->ipfe_size;
4919 if (x[0] == IPF_EXP_END)
4920 break;
4921
4922 /*
4923 * If we need to match the protocol and that doesn't match,
4924 * don't even both with the instruction array.
4925 */
4926 p = e->ipfe_cmd >> 16;
4927 if ((p != 0) && (p != state->is_p))
4928 break;
4929
4930 switch (e->ipfe_cmd)
4931 {
4932 case IPF_EXP_IP_PR :
4933 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4934 rv |= (state->is_p == e->ipfe_arg0[i]);
4935 }
4936 break;
4937
4938 case IPF_EXP_IP_SRCADDR :
4939 if (state->is_v != 4)
4940 break;
4941 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4942 rv |= ((state->is_saddr &
4943 e->ipfe_arg0[i * 2 + 1]) ==
4944 e->ipfe_arg0[i * 2]);
4945 }
4946 break;
4947
4948 case IPF_EXP_IP_DSTADDR :
4949 if (state->is_v != 4)
4950 break;
4951 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4952 rv |= ((state->is_daddr &
4953 e->ipfe_arg0[i * 2 + 1]) ==
4954 e->ipfe_arg0[i * 2]);
4955 }
4956 break;
4957
4958 case IPF_EXP_IP_ADDR :
4959 if (state->is_v != 4)
4960 break;
4961 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4962 rv |= ((state->is_saddr &
4963 e->ipfe_arg0[i * 2 + 1]) ==
4964 e->ipfe_arg0[i * 2]) ||
4965 ((state->is_daddr &
4966 e->ipfe_arg0[i * 2 + 1]) ==
4967 e->ipfe_arg0[i * 2]);
4968 }
4969 break;
4970
4971 #ifdef USE_INET6
4972 case IPF_EXP_IP6_SRCADDR :
4973 if (state->is_v != 6)
4974 break;
4975 for (i = 0; !rv && i < x[3]; i++) {
4976 rv |= IP6_MASKEQ(&state->is_src.in6,
4977 &e->ipfe_arg0[i * 8 + 4],
4978 &e->ipfe_arg0[i * 8]);
4979 }
4980 break;
4981
4982 case IPF_EXP_IP6_DSTADDR :
4983 if (state->is_v != 6)
4984 break;
4985 for (i = 0; !rv && i < x[3]; i++) {
4986 rv |= IP6_MASKEQ(&state->is_dst.in6,
4987 &e->ipfe_arg0[i * 8 + 4],
4988 &e->ipfe_arg0[i * 8]);
4989 }
4990 break;
4991
4992 case IPF_EXP_IP6_ADDR :
4993 if (state->is_v != 6)
4994 break;
4995 for (i = 0; !rv && i < x[3]; i++) {
4996 rv |= IP6_MASKEQ(&state->is_src.in6,
4997 &e->ipfe_arg0[i * 8 + 4],
4998 &e->ipfe_arg0[i * 8]) ||
4999 IP6_MASKEQ(&state->is_dst.in6,
5000 &e->ipfe_arg0[i * 8 + 4],
5001 &e->ipfe_arg0[i * 8]);
5002 }
5003 break;
5004 #endif
5005
5006 case IPF_EXP_UDP_PORT :
5007 case IPF_EXP_TCP_PORT :
5008 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5009 rv |= (state->is_sport == e->ipfe_arg0[i]) ||
5010 (state->is_dport == e->ipfe_arg0[i]);
5011 }
5012 break;
5013
5014 case IPF_EXP_UDP_SPORT :
5015 case IPF_EXP_TCP_SPORT :
5016 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5017 rv |= (state->is_sport == e->ipfe_arg0[i]);
5018 }
5019 break;
5020
5021 case IPF_EXP_UDP_DPORT :
5022 case IPF_EXP_TCP_DPORT :
5023 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5024 rv |= (state->is_dport == e->ipfe_arg0[i]);
5025 }
5026 break;
5027
5028 case IPF_EXP_TCP_STATE :
5029 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5030 rv |= (state->is_state[0] == e->ipfe_arg0[i]) ||
5031 (state->is_state[1] == e->ipfe_arg0[i]);
5032 }
5033 break;
5034
5035 case IPF_EXP_IDLE_GT :
5036 rv |= (ticks - state->is_touched > e->ipfe_arg0[0]);
5037 break;
5038 }
5039
5040 /*
5041 * Factor in doing a negative match.
5042 */
5043 rv ^= e->ipfe_not;
5044
5045 if (rv == 0)
5046 break;
5047 }
5048
5049 return (rv);
5050 }
5051
5052
5053 /* ------------------------------------------------------------------------ */
5054 /* Function: ipf_state_settimeout */
5055 /* Returns: int 0 = success, else failure */
5056 /* Parameters: softc(I) - pointer to main soft context */
5057 /* t(I) - pointer to tuneable being changed */
5058 /* p(I) - pointer to the new value */
5059 /* */
5060 /* Sets a timeout value for one of the many timeout queues. We find the */
5061 /* correct queue using a somewhat manual process of comparing the timeout */
5062 /* names for each specific value available and calling ipf_apply_timeout on */
5063 /* that queue so that all of the items on it are updated accordingly. */
5064 /* ------------------------------------------------------------------------ */
5065 int
ipf_state_settimeout(struct ipf_main_softc_s * softc,ipftuneable_t * t,ipftuneval_t * p)5066 ipf_state_settimeout(struct ipf_main_softc_s *softc, ipftuneable_t *t,
5067 ipftuneval_t *p)
5068 {
5069 ipf_state_softc_t *softs = softc->ipf_state_soft;
5070
5071 /*
5072 * In case there is nothing to do...
5073 */
5074 if (*t->ipft_pint == p->ipftu_int)
5075 return (0);
5076
5077 if (!strncmp(t->ipft_name, "tcp_", 4))
5078 return (ipf_settimeout_tcp(t, p, softs->ipf_state_tcptq));
5079
5080 if (!strcmp(t->ipft_name, "udp_timeout")) {
5081 ipf_apply_timeout(&softs->ipf_state_udptq, p->ipftu_int);
5082 } else if (!strcmp(t->ipft_name, "udp_ack_timeout")) {
5083 ipf_apply_timeout(&softs->ipf_state_udpacktq, p->ipftu_int);
5084 } else if (!strcmp(t->ipft_name, "icmp_timeout")) {
5085 ipf_apply_timeout(&softs->ipf_state_icmptq, p->ipftu_int);
5086 } else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) {
5087 ipf_apply_timeout(&softs->ipf_state_icmpacktq, p->ipftu_int);
5088 } else if (!strcmp(t->ipft_name, "ip_timeout")) {
5089 ipf_apply_timeout(&softs->ipf_state_iptq, p->ipftu_int);
5090 } else {
5091 IPFERROR(100034);
5092 return (ESRCH);
5093 }
5094
5095 /*
5096 * Update the tuneable being set.
5097 */
5098 *t->ipft_pint = p->ipftu_int;
5099
5100 return (0);
5101 }
5102
5103
5104 /* ------------------------------------------------------------------------ */
5105 /* Function: ipf_state_rehash */
5106 /* Returns: int 0 = success, else failure */
5107 /* Parameters: softc(I) - pointer to main soft context */
5108 /* t(I) - pointer to tuneable being changed */
5109 /* p(I) - pointer to the new value */
5110 /* */
5111 /* To change the size of the state hash table at runtime, a new table has */
5112 /* to be allocated and then all of the existing entries put in it, bumping */
5113 /* up the bucketlength for it as we go along. */
5114 /* ------------------------------------------------------------------------ */
5115 int
ipf_state_rehash(ipf_main_softc_t * softc,ipftuneable_t * t,ipftuneval_t * p)5116 ipf_state_rehash(ipf_main_softc_t *softc, ipftuneable_t *t, ipftuneval_t *p)
5117 {
5118 ipf_state_softc_t *softs = softc->ipf_state_soft;
5119 ipstate_t **newtab, *is;
5120 u_long *newseed;
5121 u_int *bucketlens;
5122 u_int maxbucket;
5123 u_int newsize;
5124 u_int hv;
5125 int i;
5126
5127 newsize = p->ipftu_int;
5128 /*
5129 * In case there is nothing to do...
5130 */
5131 if (newsize == softs->ipf_state_size)
5132 return (0);
5133
5134 KMALLOCS(newtab, ipstate_t **, newsize * sizeof(ipstate_t *));
5135 if (newtab == NULL) {
5136 IPFERROR(100035);
5137 return (ENOMEM);
5138 }
5139
5140 KMALLOCS(bucketlens, u_int *, newsize * sizeof(u_int));
5141 if (bucketlens == NULL) {
5142 KFREES(newtab, newsize * sizeof(*softs->ipf_state_table));
5143 IPFERROR(100036);
5144 return (ENOMEM);
5145 }
5146
5147 newseed = ipf_state_seed_alloc(newsize, softs->ipf_state_max);
5148 if (newseed == NULL) {
5149 KFREES(bucketlens, newsize * sizeof(*bucketlens));
5150 KFREES(newtab, newsize * sizeof(*newtab));
5151 IPFERROR(100037);
5152 return (ENOMEM);
5153 }
5154
5155 for (maxbucket = 0, i = newsize; i > 0; i >>= 1)
5156 maxbucket++;
5157 maxbucket *= 2;
5158
5159 bzero((char *)newtab, newsize * sizeof(ipstate_t *));
5160 bzero((char *)bucketlens, newsize * sizeof(u_int));
5161
5162 WRITE_ENTER(&softc->ipf_state);
5163
5164 if (softs->ipf_state_table != NULL) {
5165 KFREES(softs->ipf_state_table,
5166 softs->ipf_state_size * sizeof(*softs->ipf_state_table));
5167 }
5168 softs->ipf_state_table = newtab;
5169
5170 if (softs->ipf_state_seed != NULL) {
5171 KFREES(softs->ipf_state_seed,
5172 softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
5173 }
5174 softs->ipf_state_seed = newseed;
5175
5176 if (softs->ipf_state_stats.iss_bucketlen != NULL) {
5177 KFREES(softs->ipf_state_stats.iss_bucketlen,
5178 softs->ipf_state_size * sizeof(u_int));
5179 }
5180 softs->ipf_state_stats.iss_bucketlen = bucketlens;
5181 softs->ipf_state_maxbucket = maxbucket;
5182 softs->ipf_state_size = newsize;
5183
5184 /*
5185 * Walk through the entire list of state table entries and put them
5186 * in the new state table, somewhere. Because we have a new table,
5187 * we need to restart the counter of how many chains are in use.
5188 */
5189 softs->ipf_state_stats.iss_inuse = 0;
5190 for (is = softs->ipf_state_list; is != NULL; is = is->is_next) {
5191 is->is_hnext = NULL;
5192 is->is_phnext = NULL;
5193 hv = is->is_hv % softs->ipf_state_size;
5194
5195 if (softs->ipf_state_table[hv] != NULL)
5196 softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
5197 else
5198 softs->ipf_state_stats.iss_inuse++;
5199 is->is_phnext = softs->ipf_state_table + hv;
5200 is->is_hnext = softs->ipf_state_table[hv];
5201 softs->ipf_state_table[hv] = is;
5202 softs->ipf_state_stats.iss_bucketlen[hv]++;
5203 }
5204 RWLOCK_EXIT(&softc->ipf_state);
5205
5206 return (0);
5207 }
5208
5209
5210 /* ------------------------------------------------------------------------ */
5211 /* Function: ipf_state_add_tq */
5212 /* Returns: ipftq_t * - NULL = failure, else pointer to new timeout */
5213 /* queue */
5214 /* Parameters: softc(I) - pointer to main soft context */
5215 /* ttl(I) - pointer to the ttl for the new queue */
5216 /* */
5217 /* Request a pointer to a timeout queue that has a ttl as given by the */
5218 /* value being passed in. The timeout queue is added tot the list of those */
5219 /* used internally for stateful filtering. */
5220 /* ------------------------------------------------------------------------ */
5221 ipftq_t *
ipf_state_add_tq(ipf_main_softc_t * softc,int ttl)5222 ipf_state_add_tq(ipf_main_softc_t *softc, int ttl)
5223 {
5224 ipf_state_softc_t *softs = softc->ipf_state_soft;
5225
5226 return (ipf_addtimeoutqueue(softc, &softs->ipf_state_usertq, ttl));
5227 }
5228
5229
5230 #ifndef _KERNEL
5231 /*
5232 * Display the built up state table rules and mapping entries.
5233 */
5234 void
ipf_state_dump(ipf_main_softc_t * softc,void * arg)5235 ipf_state_dump(ipf_main_softc_t *softc, void *arg)
5236 {
5237 ipf_state_softc_t *softs = arg;
5238 ipstate_t *ips;
5239
5240 printf("List of active state sessions:\n");
5241 for (ips = softs->ipf_state_list; ips != NULL; )
5242 ips = printstate(ips, opts & (OPT_DEBUG|OPT_VERBOSE),
5243 softc->ipf_ticks);
5244 }
5245 #endif
5246