1
2 /*
3 * Copyright (C) 2012 by Darren Reed.
4 *
5 * See the IPFILTER.LICENCE file for details on licencing.
6 *
7 * Copyright 2008 Sun Microsystems.
8 *
9 * $Id$
10 */
11 #if defined(KERNEL) || defined(_KERNEL)
12 # undef KERNEL
13 # undef _KERNEL
14 # define KERNEL 1
15 # define _KERNEL 1
16 #endif
17 #include <sys/errno.h>
18 #include <sys/types.h>
19 #include <sys/param.h>
20 #include <sys/file.h>
21 #if defined(_KERNEL) && defined(__FreeBSD__) && \
22 !defined(KLD_MODULE)
23 #include "opt_inet6.h"
24 #endif
25 #if !defined(_KERNEL) && !defined(__KERNEL__)
26 # include <stdio.h>
27 # include <stdlib.h>
28 # include <string.h>
29 # define _KERNEL
30 # include <sys/uio.h>
31 # undef _KERNEL
32 #endif
33 #if defined(_KERNEL) && defined(__FreeBSD__)
34 # include <sys/filio.h>
35 # include <sys/fcntl.h>
36 #else
37 # include <sys/ioctl.h>
38 #endif
39 #include <sys/time.h>
40 # include <sys/protosw.h>
41 #include <sys/socket.h>
42 #if defined(_KERNEL)
43 # include <sys/systm.h>
44 # if !defined(__SVR4)
45 # include <sys/mbuf.h>
46 # endif
47 #endif
48 #if defined(__SVR4)
49 # include <sys/filio.h>
50 # include <sys/byteorder.h>
51 # ifdef _KERNEL
52 # include <sys/dditypes.h>
53 # endif
54 # include <sys/stream.h>
55 # include <sys/kmem.h>
56 #endif
57
58 #include <net/if.h>
59 #ifdef sun
60 # include <net/af.h>
61 #endif
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
66 # include <netinet/tcp_fsm.h>
67 #include <netinet/udp.h>
68 #include <netinet/ip_icmp.h>
69 #if !defined(_KERNEL)
70 # include "ipf.h"
71 #endif
72 #include "netinet/ip_compat.h"
73 #include "netinet/ip_fil.h"
74 #include "netinet/ip_nat.h"
75 #include "netinet/ip_frag.h"
76 #include "netinet/ip_state.h"
77 #include "netinet/ip_proxy.h"
78 #include "netinet/ip_lookup.h"
79 #include "netinet/ip_dstlist.h"
80 #include "netinet/ip_sync.h"
81 #ifdef USE_INET6
82 #include <netinet/icmp6.h>
83 #endif
84 #ifdef __FreeBSD__
85 # include <sys/malloc.h>
86 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
87 # include <sys/libkern.h>
88 # include <sys/systm.h>
89 # endif
90 #endif
91 /* END OF INCLUDES */
92
93
94
95
96 static ipftuneable_t ipf_state_tuneables[] = {
97 { { (void *)offsetof(ipf_state_softc_t, ipf_state_max) },
98 "state_max", 1, 0x7fffffff,
99 stsizeof(ipf_state_softc_t, ipf_state_max),
100 0, NULL, NULL },
101 { { (void *)offsetof(ipf_state_softc_t, ipf_state_size) },
102 "state_size", 1, 0x7fffffff,
103 stsizeof(ipf_state_softc_t, ipf_state_size),
104 0, NULL, ipf_state_rehash },
105 { { (void *)offsetof(ipf_state_softc_t, ipf_state_lock) },
106 "state_lock", 0, 1,
107 stsizeof(ipf_state_softc_t, ipf_state_lock),
108 IPFT_RDONLY, NULL, NULL },
109 { { (void *)offsetof(ipf_state_softc_t, ipf_state_maxbucket) },
110 "state_maxbucket", 1, 0x7fffffff,
111 stsizeof(ipf_state_softc_t, ipf_state_maxbucket),
112 0, NULL, NULL },
113 { { (void *)offsetof(ipf_state_softc_t, ipf_state_logging) },
114 "state_logging",0, 1,
115 stsizeof(ipf_state_softc_t, ipf_state_logging),
116 0, NULL, NULL },
117 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_high) },
118 "state_wm_high",2, 100,
119 stsizeof(ipf_state_softc_t, ipf_state_wm_high),
120 0, NULL, NULL },
121 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_low) },
122 "state_wm_low", 1, 99,
123 stsizeof(ipf_state_softc_t, ipf_state_wm_low),
124 0, NULL, NULL },
125 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_freq) },
126 "state_wm_freq",2, 999999,
127 stsizeof(ipf_state_softc_t, ipf_state_wm_freq),
128 0, NULL, NULL },
129 { { NULL },
130 NULL, 0, 0,
131 0,
132 0, NULL, NULL }
133 };
134
135 #define SINCL(x) ATOMIC_INCL(softs->x)
136 #define SBUMP(x) (softs->x)++
137 #define SBUMPD(x, y) do { (softs->x.y)++; DT(y); } while (0)
138 #define SBUMPDX(x, y, z)do { (softs->x.y)++; DT(z); } while (0)
139
140 #ifdef USE_INET6
141 static ipstate_t *ipf_checkicmp6matchingstate(fr_info_t *);
142 #endif
143 static int ipf_allowstateicmp(fr_info_t *, ipstate_t *, i6addr_t *);
144 static ipstate_t *ipf_matchsrcdst(fr_info_t *, ipstate_t *, i6addr_t *,
145 i6addr_t *, tcphdr_t *, u_32_t);
146 static ipstate_t *ipf_checkicmpmatchingstate(fr_info_t *);
147 static int ipf_state_flush_entry(ipf_main_softc_t *, void *);
148 static ips_stat_t *ipf_state_stats(ipf_main_softc_t *);
149 static int ipf_state_del(ipf_main_softc_t *, ipstate_t *, int);
150 static int ipf_state_remove(ipf_main_softc_t *, caddr_t);
151 static int ipf_state_match(ipstate_t *is1, ipstate_t *is2);
152 static int ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2);
153 static int ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2);
154 static int ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2);
155 static int ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2);
156 static int ipf_state_matchports(udpinfo_t *is1, udpinfo_t *is2);
157 static int ipf_state_matcharray(ipstate_t *, int *, u_long);
158 static void ipf_ipsmove(ipf_state_softc_t *, ipstate_t *, u_int);
159 static int ipf_state_tcp(ipf_main_softc_t *, ipf_state_softc_t *,
160 fr_info_t *, tcphdr_t *, ipstate_t *);
161 static int ipf_tcpoptions(ipf_state_softc_t *, fr_info_t *,
162 tcphdr_t *, tcpdata_t *);
163 static ipstate_t *ipf_state_clone(fr_info_t *, tcphdr_t *, ipstate_t *);
164 static void ipf_fixinisn(fr_info_t *, ipstate_t *);
165 static void ipf_fixoutisn(fr_info_t *, ipstate_t *);
166 static void ipf_checknewisn(fr_info_t *, ipstate_t *);
167 static int ipf_state_iter(ipf_main_softc_t *, ipftoken_t *,
168 ipfgeniter_t *, ipfobj_t *);
169 static int ipf_state_gettable(ipf_main_softc_t *, ipf_state_softc_t *,
170 char *);
171 static int ipf_state_tcpinwindow(struct fr_info *, struct tcpdata *,
172 struct tcpdata *, tcphdr_t *, int);
173
174 static int ipf_state_getent(ipf_main_softc_t *, ipf_state_softc_t *,
175 caddr_t);
176 static int ipf_state_putent(ipf_main_softc_t *, ipf_state_softc_t *,
177 caddr_t);
178
179 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */
180 #define FIVE_DAYS (5 * ONE_DAY)
181 #define DOUBLE_HASH(x) (((x) + softs->ipf_state_seed[(x) % \
182 softs->ipf_state_size]) % softs->ipf_state_size)
183
184
185 /* ------------------------------------------------------------------------ */
186 /* Function: ipf_state_main_load */
187 /* Returns: int - 0 == success, -1 == failure */
188 /* Parameters: Nil */
189 /* */
190 /* A null-op function that exists as a placeholder so that the flow in */
191 /* other functions is obvious. */
192 /* ------------------------------------------------------------------------ */
193 int
ipf_state_main_load(void)194 ipf_state_main_load(void)
195 {
196 return (0);
197 }
198
199
200 /* ------------------------------------------------------------------------ */
201 /* Function: ipf_state_main_unload */
202 /* Returns: int - 0 == success, -1 == failure */
203 /* Parameters: Nil */
204 /* */
205 /* A null-op function that exists as a placeholder so that the flow in */
206 /* other functions is obvious. */
207 /* ------------------------------------------------------------------------ */
208 int
ipf_state_main_unload(void)209 ipf_state_main_unload(void)
210 {
211 return (0);
212 }
213
214
215 /* ------------------------------------------------------------------------ */
216 /* Function: ipf_state_soft_create */
217 /* Returns: void * - NULL = failure, else pointer to soft context */
218 /* Parameters: softc(I) - pointer to soft context main structure */
219 /* */
220 /* Create a new state soft context structure and populate it with the list */
221 /* of tunables and other default settings. */
222 /* ------------------------------------------------------------------------ */
223 void *
ipf_state_soft_create(ipf_main_softc_t * softc)224 ipf_state_soft_create(ipf_main_softc_t *softc)
225 {
226 ipf_state_softc_t *softs;
227
228 KMALLOC(softs, ipf_state_softc_t *);
229 if (softs == NULL)
230 return (NULL);
231
232 bzero((char *)softs, sizeof(*softs));
233
234 softs->ipf_state_tune = ipf_tune_array_copy(softs,
235 sizeof(ipf_state_tuneables),
236 ipf_state_tuneables);
237 if (softs->ipf_state_tune == NULL) {
238 ipf_state_soft_destroy(softc, softs);
239 return (NULL);
240 }
241 if (ipf_tune_array_link(softc, softs->ipf_state_tune) == -1) {
242 ipf_state_soft_destroy(softc, softs);
243 return (NULL);
244 }
245
246 #ifdef IPFILTER_LOG
247 softs->ipf_state_logging = 1;
248 #else
249 softs->ipf_state_logging = 0;
250 #endif
251 softs->ipf_state_size = IPSTATE_SIZE,
252 softs->ipf_state_maxbucket = 0;
253 softs->ipf_state_wm_freq = IPF_TTLVAL(10);
254 softs->ipf_state_max = IPSTATE_MAX;
255 softs->ipf_state_wm_last = 0;
256 softs->ipf_state_wm_high = 99;
257 softs->ipf_state_wm_low = 90;
258 softs->ipf_state_inited = 0;
259 softs->ipf_state_lock = 0;
260 softs->ipf_state_doflush = 0;
261
262 return (softs);
263 }
264
265
266 /* ------------------------------------------------------------------------ */
267 /* Function: ipf_state_soft_destroy */
268 /* Returns: Nil */
269 /* Parameters: softc(I) - pointer to soft context main structure */
270 /* arg(I) - pointer to local context to use */
271 /* */
272 /* Undo only what we did in soft create: unlink and free the tunables and */
273 /* free the soft context structure itself. */
274 /* ------------------------------------------------------------------------ */
275 void
ipf_state_soft_destroy(ipf_main_softc_t * softc,void * arg)276 ipf_state_soft_destroy(ipf_main_softc_t *softc, void *arg)
277 {
278 ipf_state_softc_t *softs = arg;
279
280 if (softs->ipf_state_tune != NULL) {
281 ipf_tune_array_unlink(softc, softs->ipf_state_tune);
282 KFREES(softs->ipf_state_tune, sizeof(ipf_state_tuneables));
283 softs->ipf_state_tune = NULL;
284 }
285
286 KFREE(softs);
287 }
288
289 static void *
ipf_state_seed_alloc(u_int state_size,u_int state_max)290 ipf_state_seed_alloc(u_int state_size, u_int state_max)
291 {
292 u_int i;
293 u_long *state_seed;
294 KMALLOCS(state_seed, u_long *, state_size * sizeof(*state_seed));
295 if (state_seed == NULL)
296 return (NULL);
297
298 for (i = 0; i < state_size; i++) {
299 /*
300 * XXX - ipf_state_seed[X] should be a random number of sorts.
301 */
302 #ifdef __FreeBSD__
303 state_seed[i] = arc4random();
304 #else
305 state_seed[i] = ((u_long)state_seed + i) * state_size;
306 state_seed[i] ^= 0xa5a55a5a;
307 state_seed[i] *= (u_long)state_seed;
308 state_seed[i] ^= 0x5a5aa5a5;
309 state_seed[i] *= state_max;
310 #endif
311 }
312 return (state_seed);
313 }
314
315
316 /* ------------------------------------------------------------------------ */
317 /* Function: ipf_state_soft_init */
318 /* Returns: int - 0 == success, -1 == failure */
319 /* Parameters: softc(I) - pointer to soft context main structure */
320 /* arg(I) - pointer to local context to use */
321 /* */
322 /* Initialise the state soft context structure so it is ready for use. */
323 /* This involves: */
324 /* - allocating a hash table and zero'ing it out */
325 /* - building a secondary table of seeds for double hashing to make it more */
326 /* difficult to attempt to attack the hash table itself (for DoS) */
327 /* - initialise all of the timeout queues, including a table for TCP, some */
328 /* pairs of query/response for UDP and other IP protocols (typically the */
329 /* reply queue has a shorter timeout than the query) */
330 /* ------------------------------------------------------------------------ */
331 int
ipf_state_soft_init(ipf_main_softc_t * softc,void * arg)332 ipf_state_soft_init(ipf_main_softc_t *softc, void *arg)
333 {
334 ipf_state_softc_t *softs = arg;
335 int i;
336
337 KMALLOCS(softs->ipf_state_table,
338 ipstate_t **, softs->ipf_state_size * sizeof(ipstate_t *));
339 if (softs->ipf_state_table == NULL)
340 return (-1);
341
342 bzero((char *)softs->ipf_state_table,
343 softs->ipf_state_size * sizeof(ipstate_t *));
344
345 softs->ipf_state_seed = ipf_state_seed_alloc(softs->ipf_state_size,
346 softs->ipf_state_max);
347 if (softs->ipf_state_seed == NULL)
348 return (-2);
349
350 KMALLOCS(softs->ipf_state_stats.iss_bucketlen, u_int *,
351 softs->ipf_state_size * sizeof(u_int));
352 if (softs->ipf_state_stats.iss_bucketlen == NULL)
353 return (-3);
354
355 bzero((char *)softs->ipf_state_stats.iss_bucketlen,
356 softs->ipf_state_size * sizeof(u_int));
357
358 if (softs->ipf_state_maxbucket == 0) {
359 for (i = softs->ipf_state_size; i > 0; i >>= 1)
360 softs->ipf_state_maxbucket++;
361 softs->ipf_state_maxbucket *= 2;
362 }
363
364 ipf_sttab_init(softc, softs->ipf_state_tcptq);
365 softs->ipf_state_stats.iss_tcptab = softs->ipf_state_tcptq;
366 softs->ipf_state_tcptq[IPF_TCP_NSTATES - 1].ifq_next =
367 &softs->ipf_state_udptq;
368
369 IPFTQ_INIT(&softs->ipf_state_udptq, softc->ipf_udptimeout,
370 "ipftq udp tab");
371 softs->ipf_state_udptq.ifq_next = &softs->ipf_state_udpacktq;
372
373 IPFTQ_INIT(&softs->ipf_state_udpacktq, softc->ipf_udpacktimeout,
374 "ipftq udpack tab");
375 softs->ipf_state_udpacktq.ifq_next = &softs->ipf_state_icmptq;
376
377 IPFTQ_INIT(&softs->ipf_state_icmptq, softc->ipf_icmptimeout,
378 "ipftq icmp tab");
379 softs->ipf_state_icmptq.ifq_next = &softs->ipf_state_icmpacktq;
380
381 IPFTQ_INIT(&softs->ipf_state_icmpacktq, softc->ipf_icmpacktimeout,
382 "ipftq icmpack tab");
383 softs->ipf_state_icmpacktq.ifq_next = &softs->ipf_state_iptq;
384
385 IPFTQ_INIT(&softs->ipf_state_iptq, softc->ipf_iptimeout,
386 "ipftq iptimeout tab");
387 softs->ipf_state_iptq.ifq_next = &softs->ipf_state_pending;
388
389 IPFTQ_INIT(&softs->ipf_state_pending, IPF_HZ_DIVIDE, "ipftq pending");
390 softs->ipf_state_pending.ifq_next = &softs->ipf_state_deletetq;
391
392 IPFTQ_INIT(&softs->ipf_state_deletetq, 1, "ipftq delete");
393 softs->ipf_state_deletetq.ifq_next = NULL;
394
395 MUTEX_INIT(&softs->ipf_stinsert, "ipf state insert mutex");
396
397
398 softs->ipf_state_wm_last = softc->ipf_ticks;
399 softs->ipf_state_inited = 1;
400
401 return (0);
402 }
403
404
405 /* ------------------------------------------------------------------------ */
406 /* Function: ipf_state_soft_fini */
407 /* Returns: int - 0 = success, -1 = failure */
408 /* Parameters: softc(I) - pointer to soft context main structure */
409 /* arg(I) - pointer to local context to use */
410 /* */
411 /* Release and destroy any resources acquired or initialised so that */
412 /* IPFilter can be unloaded or re-initialised. */
413 /* ------------------------------------------------------------------------ */
414 int
ipf_state_soft_fini(ipf_main_softc_t * softc,void * arg)415 ipf_state_soft_fini(ipf_main_softc_t *softc, void *arg)
416 {
417 ipf_state_softc_t *softs = arg;
418 ipftq_t *ifq, *ifqnext;
419 ipstate_t *is;
420
421 while ((is = softs->ipf_state_list) != NULL)
422 ipf_state_del(softc, is, ISL_UNLOAD);
423
424 /*
425 * Proxy timeout queues are not cleaned here because although they
426 * exist on the state list, appr_unload is called after
427 * ipf_state_unload and the proxies actually are responsible for them
428 * being created. Should the proxy timeouts have their own list?
429 * There's no real justification as this is the only complication.
430 */
431 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
432 ifqnext = ifq->ifq_next;
433
434 if (ipf_deletetimeoutqueue(ifq) == 0)
435 ipf_freetimeoutqueue(softc, ifq);
436 }
437
438 softs->ipf_state_stats.iss_inuse = 0;
439 softs->ipf_state_stats.iss_active = 0;
440
441 if (softs->ipf_state_inited == 1) {
442 softs->ipf_state_inited = 0;
443 ipf_sttab_destroy(softs->ipf_state_tcptq);
444 MUTEX_DESTROY(&softs->ipf_state_udptq.ifq_lock);
445 MUTEX_DESTROY(&softs->ipf_state_icmptq.ifq_lock);
446 MUTEX_DESTROY(&softs->ipf_state_udpacktq.ifq_lock);
447 MUTEX_DESTROY(&softs->ipf_state_icmpacktq.ifq_lock);
448 MUTEX_DESTROY(&softs->ipf_state_iptq.ifq_lock);
449 MUTEX_DESTROY(&softs->ipf_state_deletetq.ifq_lock);
450 MUTEX_DESTROY(&softs->ipf_state_pending.ifq_lock);
451 MUTEX_DESTROY(&softs->ipf_stinsert);
452 }
453
454 if (softs->ipf_state_table != NULL) {
455 KFREES(softs->ipf_state_table,
456 softs->ipf_state_size * sizeof(*softs->ipf_state_table));
457 softs->ipf_state_table = NULL;
458 }
459
460 if (softs->ipf_state_seed != NULL) {
461 KFREES(softs->ipf_state_seed,
462 softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
463 softs->ipf_state_seed = NULL;
464 }
465
466 if (softs->ipf_state_stats.iss_bucketlen != NULL) {
467 KFREES(softs->ipf_state_stats.iss_bucketlen,
468 softs->ipf_state_size * sizeof(u_int));
469 softs->ipf_state_stats.iss_bucketlen = NULL;
470 }
471
472 return (0);
473 }
474
475
476 /* ------------------------------------------------------------------------ */
477 /* Function: ipf_state_setlock */
478 /* Returns: Nil */
479 /* Parameters: arg(I) - pointer to local context to use */
480 /* tmp(I) - new value for lock */
481 /* */
482 /* Stub function that allows for external manipulation of ipf_state_lock */
483 /* ------------------------------------------------------------------------ */
484 void
ipf_state_setlock(void * arg,int tmp)485 ipf_state_setlock(void *arg, int tmp)
486 {
487 ipf_state_softc_t *softs = arg;
488
489 softs->ipf_state_lock = tmp;
490 }
491
492
493 /* ------------------------------------------------------------------------ */
494 /* Function: ipf_state_stats */
495 /* Returns: ips_state_t* - pointer to state stats structure */
496 /* Parameters: softc(I) - pointer to soft context main structure */
497 /* */
498 /* Put all the current numbers and pointers into a single struct and return */
499 /* a pointer to it. */
500 /* ------------------------------------------------------------------------ */
501 static ips_stat_t *
ipf_state_stats(ipf_main_softc_t * softc)502 ipf_state_stats(ipf_main_softc_t *softc)
503 {
504 ipf_state_softc_t *softs = softc->ipf_state_soft;
505 ips_stat_t *issp = &softs->ipf_state_stats;
506
507 issp->iss_state_size = softs->ipf_state_size;
508 issp->iss_state_max = softs->ipf_state_max;
509 issp->iss_table = softs->ipf_state_table;
510 issp->iss_list = softs->ipf_state_list;
511 issp->iss_ticks = softc->ipf_ticks;
512
513 #ifdef IPFILTER_LOGGING
514 issp->iss_log_ok = ipf_log_logok(softc, IPF_LOGSTATE);
515 issp->iss_log_fail = ipf_log_failures(softc, IPF_LOGSTATE);
516 #else
517 issp->iss_log_ok = 0;
518 issp->iss_log_fail = 0;
519 #endif
520 return (issp);
521 }
522
523 /* ------------------------------------------------------------------------ */
524 /* Function: ipf_state_remove */
525 /* Returns: int - 0 == success, != 0 == failure */
526 /* Parameters: softc(I) - pointer to soft context main structure */
527 /* data(I) - pointer to state structure to delete from table */
528 /* */
529 /* Search for a state structure that matches the one passed, according to */
530 /* the IP addresses and other protocol specific information. */
531 /* ------------------------------------------------------------------------ */
532 static int
ipf_state_remove(ipf_main_softc_t * softc,caddr_t data)533 ipf_state_remove(ipf_main_softc_t *softc, caddr_t data)
534 {
535 ipf_state_softc_t *softs = softc->ipf_state_soft;
536 ipstate_t *sp, st;
537 int error;
538
539 sp = &st;
540 error = ipf_inobj(softc, data, NULL, &st, IPFOBJ_IPSTATE);
541 if (error)
542 return (EFAULT);
543
544 WRITE_ENTER(&softc->ipf_state);
545 for (sp = softs->ipf_state_list; sp; sp = sp->is_next)
546 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
547 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src,
548 sizeof(st.is_src)) &&
549 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst,
550 sizeof(st.is_dst)) &&
551 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps,
552 sizeof(st.is_ps))) {
553 ipf_state_del(softc, sp, ISL_REMOVE);
554 RWLOCK_EXIT(&softc->ipf_state);
555 return (0);
556 }
557 RWLOCK_EXIT(&softc->ipf_state);
558
559 IPFERROR(100001);
560 return (ESRCH);
561 }
562
563
564 /* ------------------------------------------------------------------------ */
565 /* Function: ipf_state_ioctl */
566 /* Returns: int - 0 == success, != 0 == failure */
567 /* Parameters: softc(I) - pointer to soft context main structure */
568 /* data(I) - pointer to ioctl data */
569 /* cmd(I) - ioctl command integer */
570 /* mode(I) - file mode bits used with open */
571 /* uid(I) - uid of process making the ioctl call */
572 /* ctx(I) - pointer specific to context of the call */
573 /* */
574 /* Processes an ioctl call made to operate on the IP Filter state device. */
575 /* ------------------------------------------------------------------------ */
576 int
ipf_state_ioctl(ipf_main_softc_t * softc,caddr_t data,ioctlcmd_t cmd,int mode,int uid,void * ctx)577 ipf_state_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd,
578 int mode, int uid, void *ctx)
579 {
580 ipf_state_softc_t *softs = softc->ipf_state_soft;
581 int arg, ret, error = 0;
582 SPL_INT(s);
583
584 switch (cmd)
585 {
586 /*
587 * Delete an entry from the state table.
588 */
589 case SIOCDELST :
590 error = ipf_state_remove(softc, data);
591 break;
592
593 /*
594 * Flush the state table
595 */
596 case SIOCIPFFL :
597 error = BCOPYIN(data, &arg, sizeof(arg));
598 if (error != 0) {
599 IPFERROR(100002);
600 error = EFAULT;
601
602 } else {
603 WRITE_ENTER(&softc->ipf_state);
604 ret = ipf_state_flush(softc, arg, 4);
605 RWLOCK_EXIT(&softc->ipf_state);
606
607 error = BCOPYOUT(&ret, data, sizeof(ret));
608 if (error != 0) {
609 IPFERROR(100003);
610 error = EFAULT;
611 }
612 }
613 break;
614
615 #ifdef USE_INET6
616 case SIOCIPFL6 :
617 error = BCOPYIN(data, &arg, sizeof(arg));
618 if (error != 0) {
619 IPFERROR(100004);
620 error = EFAULT;
621
622 } else {
623 WRITE_ENTER(&softc->ipf_state);
624 ret = ipf_state_flush(softc, arg, 6);
625 RWLOCK_EXIT(&softc->ipf_state);
626
627 error = BCOPYOUT(&ret, data, sizeof(ret));
628 if (error != 0) {
629 IPFERROR(100005);
630 error = EFAULT;
631 }
632 }
633 break;
634 #endif
635
636 case SIOCMATCHFLUSH :
637 WRITE_ENTER(&softc->ipf_state);
638 error = ipf_state_matchflush(softc, data);
639 RWLOCK_EXIT(&softc->ipf_state);
640 break;
641
642 #ifdef IPFILTER_LOG
643 /*
644 * Flush the state log.
645 */
646 case SIOCIPFFB :
647 if (!(mode & FWRITE)) {
648 IPFERROR(100008);
649 error = EPERM;
650 } else {
651 int tmp;
652
653 tmp = ipf_log_clear(softc, IPL_LOGSTATE);
654 error = BCOPYOUT(&tmp, data, sizeof(tmp));
655 if (error != 0) {
656 IPFERROR(100009);
657 error = EFAULT;
658 }
659 }
660 break;
661
662 /*
663 * Turn logging of state information on/off.
664 */
665 case SIOCSETLG :
666 if (!(mode & FWRITE)) {
667 IPFERROR(100010);
668 error = EPERM;
669 } else {
670 error = BCOPYIN(data, &softs->ipf_state_logging,
671 sizeof(softs->ipf_state_logging));
672 if (error != 0) {
673 IPFERROR(100011);
674 error = EFAULT;
675 }
676 }
677 break;
678
679 /*
680 * Return the current state of logging.
681 */
682 case SIOCGETLG :
683 error = BCOPYOUT(&softs->ipf_state_logging, data,
684 sizeof(softs->ipf_state_logging));
685 if (error != 0) {
686 IPFERROR(100012);
687 error = EFAULT;
688 }
689 break;
690
691 /*
692 * Return the number of bytes currently waiting to be read.
693 */
694 case FIONREAD :
695 arg = ipf_log_bytesused(softc, IPL_LOGSTATE);
696 error = BCOPYOUT(&arg, data, sizeof(arg));
697 if (error != 0) {
698 IPFERROR(100013);
699 error = EFAULT;
700 }
701 break;
702 #endif
703
704 /*
705 * Get the current state statistics.
706 */
707 case SIOCGETFS :
708 error = ipf_outobj(softc, data, ipf_state_stats(softc),
709 IPFOBJ_STATESTAT);
710 break;
711
712 /*
713 * Lock/Unlock the state table. (Locking prevents any changes, which
714 * means no packets match).
715 */
716 case SIOCSTLCK :
717 if (!(mode & FWRITE)) {
718 IPFERROR(100014);
719 error = EPERM;
720 } else {
721 error = ipf_lock(data, &softs->ipf_state_lock);
722 }
723 break;
724
725 /*
726 * Add an entry to the current state table.
727 */
728 case SIOCSTPUT :
729 if (!softs->ipf_state_lock || !(mode &FWRITE)) {
730 IPFERROR(100015);
731 error = EACCES;
732 break;
733 }
734 error = ipf_state_putent(softc, softs, data);
735 break;
736
737 /*
738 * Get a state table entry.
739 */
740 case SIOCSTGET :
741 if (!softs->ipf_state_lock) {
742 IPFERROR(100016);
743 error = EACCES;
744 break;
745 }
746 error = ipf_state_getent(softc, softs, data);
747 break;
748
749 case SIOCGENITER :
750 {
751 ipftoken_t *token;
752 ipfgeniter_t iter;
753 ipfobj_t obj;
754
755 error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER);
756 if (error != 0)
757 break;
758
759 SPL_SCHED(s);
760 token = ipf_token_find(softc, IPFGENITER_STATE, uid, ctx);
761 if (token != NULL) {
762 error = ipf_state_iter(softc, token, &iter, &obj);
763 WRITE_ENTER(&softc->ipf_tokens);
764 ipf_token_deref(softc, token);
765 RWLOCK_EXIT(&softc->ipf_tokens);
766 } else {
767 IPFERROR(100018);
768 error = ESRCH;
769 }
770 SPL_X(s);
771 break;
772 }
773
774 case SIOCGTABL :
775 error = ipf_state_gettable(softc, softs, data);
776 break;
777
778 case SIOCIPFDELTOK :
779 error = BCOPYIN(data, &arg, sizeof(arg));
780 if (error != 0) {
781 IPFERROR(100019);
782 error = EFAULT;
783 } else {
784 SPL_SCHED(s);
785 error = ipf_token_del(softc, arg, uid, ctx);
786 SPL_X(s);
787 }
788 break;
789
790 case SIOCGTQTAB :
791 error = ipf_outobj(softc, data, softs->ipf_state_tcptq,
792 IPFOBJ_STATETQTAB);
793 break;
794
795 default :
796 IPFERROR(100020);
797 error = EINVAL;
798 break;
799 }
800 return (error);
801 }
802
803
804 /* ------------------------------------------------------------------------ */
805 /* Function: ipf_state_getent */
806 /* Returns: int - 0 == success, != 0 == failure */
807 /* Parameters: softc(I) - pointer to soft context main structure */
808 /* softs(I) - pointer to state context structure */
809 /* data(I) - pointer to state structure to retrieve from table*/
810 /* */
811 /* Copy out state information from the kernel to a user space process. If */
812 /* there is a filter rule associated with the state entry, copy that out */
813 /* as well. The entry to copy out is taken from the value of "ips_next" in */
814 /* the struct passed in and if not null and not found in the list of current*/
815 /* state entries, the retrieval fails. */
816 /* ------------------------------------------------------------------------ */
817 static int
ipf_state_getent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,caddr_t data)818 ipf_state_getent(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
819 caddr_t data)
820 {
821 ipstate_t *is, *isn;
822 ipstate_save_t ips;
823 int error;
824
825 error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
826 if (error)
827 return (EFAULT);
828
829 READ_ENTER(&softc->ipf_state);
830 isn = ips.ips_next;
831 if (isn == NULL) {
832 isn = softs->ipf_state_list;
833 if (isn == NULL) {
834 if (ips.ips_next == NULL) {
835 RWLOCK_EXIT(&softc->ipf_state);
836 IPFERROR(100021);
837 return (ENOENT);
838 }
839 return (0);
840 }
841 } else {
842 /*
843 * Make sure the pointer we're copying from exists in the
844 * current list of entries. Security precaution to prevent
845 * copying of random kernel data.
846 */
847 for (is = softs->ipf_state_list; is; is = is->is_next)
848 if (is == isn)
849 break;
850 if (!is) {
851 RWLOCK_EXIT(&softc->ipf_state);
852 IPFERROR(100022);
853 return (ESRCH);
854 }
855 }
856 ips.ips_next = isn->is_next;
857 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
858 ips.ips_rule = isn->is_rule;
859 if (isn->is_rule != NULL)
860 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
861 sizeof(ips.ips_fr));
862 RWLOCK_EXIT(&softc->ipf_state);
863 error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
864 return (error);
865 }
866
867
868 /* ------------------------------------------------------------------------ */
869 /* Function: ipf_state_putent */
870 /* Returns: int - 0 == success, != 0 == failure */
871 /* Parameters: softc(I) - pointer to soft context main structure */
872 /* softs(I) - pointer to state context structure */
873 /* data(I) - pointer to state information struct */
874 /* */
875 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */
876 /* the state table. If the state info. includes a pointer to a filter rule */
877 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
878 /* output. */
879 /* ------------------------------------------------------------------------ */
880 int
ipf_state_putent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,caddr_t data)881 ipf_state_putent(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
882 caddr_t data)
883 {
884 ipstate_t *is, *isn;
885 ipstate_save_t ips;
886 int error, out, i;
887 frentry_t *fr;
888 char *name;
889
890 error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
891 if (error != 0)
892 return (error);
893
894 KMALLOC(isn, ipstate_t *);
895 if (isn == NULL) {
896 IPFERROR(100023);
897 return (ENOMEM);
898 }
899
900 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
901 bzero((char *)isn, offsetof(struct ipstate, is_pkts));
902 isn->is_sti.tqe_pnext = NULL;
903 isn->is_sti.tqe_next = NULL;
904 isn->is_sti.tqe_ifq = NULL;
905 isn->is_sti.tqe_parent = isn;
906 isn->is_ifp[0] = NULL;
907 isn->is_ifp[1] = NULL;
908 isn->is_ifp[2] = NULL;
909 isn->is_ifp[3] = NULL;
910 isn->is_sync = NULL;
911 fr = ips.ips_rule;
912
913 if (fr == NULL) {
914 int inserr;
915
916 READ_ENTER(&softc->ipf_state);
917 inserr = ipf_state_insert(softc, isn, 0);
918 MUTEX_EXIT(&isn->is_lock);
919 RWLOCK_EXIT(&softc->ipf_state);
920
921 return (inserr);
922 }
923
924 if (isn->is_flags & SI_NEWFR) {
925 KMALLOC(fr, frentry_t *);
926 if (fr == NULL) {
927 KFREE(isn);
928 IPFERROR(100024);
929 return (ENOMEM);
930 }
931 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
932 out = fr->fr_flags & FR_OUTQUE ? 1 : 0;
933 isn->is_rule = fr;
934 ips.ips_is.is_rule = fr;
935 MUTEX_NUKE(&fr->fr_lock);
936 MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
937
938 /*
939 * Look up all the interface names in the rule.
940 */
941 for (i = 0; i < FR_NUM(fr->fr_ifnames); i++) {
942 if (fr->fr_ifnames[i] == -1) {
943 fr->fr_ifas[i] = NULL;
944 continue;
945 }
946 name = FR_NAME(fr, fr_ifnames[i]);
947 fr->fr_ifas[i] = ipf_resolvenic(softc, name,
948 fr->fr_family);
949 }
950
951 for (i = 0; i < FR_NUM(isn->is_ifname); i++) {
952 name = isn->is_ifname[i];
953 isn->is_ifp[i] = ipf_resolvenic(softc, name,
954 isn->is_v);
955 }
956
957 fr->fr_ref = 0;
958 fr->fr_dsize = 0;
959 fr->fr_data = NULL;
960 fr->fr_type = FR_T_NONE;
961
962 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[0],
963 fr->fr_family);
964 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[1],
965 fr->fr_family);
966 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_dif,
967 fr->fr_family);
968
969 /*
970 * send a copy back to userland of what we ended up
971 * to allow for verification.
972 */
973 error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
974 if (error != 0) {
975 KFREE(isn);
976 MUTEX_DESTROY(&fr->fr_lock);
977 KFREE(fr);
978 IPFERROR(100025);
979 return (EFAULT);
980 }
981 READ_ENTER(&softc->ipf_state);
982 error = ipf_state_insert(softc, isn, 0);
983 MUTEX_EXIT(&isn->is_lock);
984 RWLOCK_EXIT(&softc->ipf_state);
985
986 } else {
987 READ_ENTER(&softc->ipf_state);
988 for (is = softs->ipf_state_list; is; is = is->is_next)
989 if (is->is_rule == fr) {
990 error = ipf_state_insert(softc, isn, 0);
991 MUTEX_EXIT(&isn->is_lock);
992 break;
993 }
994
995 if (is == NULL) {
996 KFREE(isn);
997 isn = NULL;
998 }
999 RWLOCK_EXIT(&softc->ipf_state);
1000
1001 if (isn == NULL) {
1002 IPFERROR(100033);
1003 error = ESRCH;
1004 }
1005 }
1006
1007 return (error);
1008 }
1009
1010
1011 /* ------------------------------------------------------------------------ */
1012 /* Function: ipf_state_insert */
1013 /* Returns: int - 0 == success, -1 == failure */
1014 /* Parameters: softc(I) - pointer to soft context main structure */
1015 /* Parameters: is(I) - pointer to state structure */
1016 /* rev(I) - flag indicating direction of packet */
1017 /* */
1018 /* Inserts a state structure into the hash table (for lookups) and the list */
1019 /* of state entries (for enumeration). Resolves all of the interface names */
1020 /* to pointers and adjusts running stats for the hash table as appropriate. */
1021 /* */
1022 /* This function can fail if the filter rule has had a population policy of */
1023 /* IP addresses used with stateful filtering assigned to it. */
1024 /* */
1025 /* Locking: it is assumed that some kind of lock on ipf_state is held. */
1026 /* Exits with is_lock initialised and held - *EVEN IF ERROR*. */
1027 /* ------------------------------------------------------------------------ */
1028 int
ipf_state_insert(ipf_main_softc_t * softc,ipstate_t * is,int rev)1029 ipf_state_insert(ipf_main_softc_t *softc, ipstate_t *is, int rev)
1030 {
1031 ipf_state_softc_t *softs = softc->ipf_state_soft;
1032 frentry_t *fr;
1033 u_int hv;
1034 int i;
1035
1036 /*
1037 * Look up all the interface names in the state entry.
1038 */
1039 for (i = 0; i < FR_NUM(is->is_ifp); i++) {
1040 if (is->is_ifp[i] != NULL)
1041 continue;
1042 is->is_ifp[i] = ipf_resolvenic(softc, is->is_ifname[i],
1043 is->is_v);
1044 }
1045
1046 /*
1047 * If we could trust is_hv, then the modulus would not be needed,
1048 * but when running with IPFILTER_SYNC, this stops bad values.
1049 */
1050 hv = is->is_hv % softs->ipf_state_size;
1051 /* TRACE is, hv */
1052 is->is_hv = hv;
1053
1054 /*
1055 * We need to get both of these locks...the first because it is
1056 * possible that once the insert is complete another packet might
1057 * come along, match the entry and want to update it.
1058 */
1059 MUTEX_INIT(&is->is_lock, "ipf state entry");
1060 MUTEX_ENTER(&is->is_lock);
1061 MUTEX_ENTER(&softs->ipf_stinsert);
1062
1063 fr = is->is_rule;
1064 if (fr != NULL) {
1065 if ((fr->fr_srctrack.ht_max_nodes != 0) &&
1066 (ipf_ht_node_add(softc, &fr->fr_srctrack,
1067 is->is_family, &is->is_src) == -1)) {
1068 SBUMPD(ipf_state_stats, iss_max_track);
1069 MUTEX_EXIT(&softs->ipf_stinsert);
1070 return (-1);
1071 }
1072
1073 MUTEX_ENTER(&fr->fr_lock);
1074 fr->fr_ref++;
1075 MUTEX_EXIT(&fr->fr_lock);
1076 fr->fr_statecnt++;
1077 }
1078
1079 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
1080 DT(iss_wild_plus_one);
1081 SINCL(ipf_state_stats.iss_wild);
1082 }
1083
1084 SBUMP(ipf_state_stats.iss_proto[is->is_p]);
1085 SBUMP(ipf_state_stats.iss_active_proto[is->is_p]);
1086
1087 /*
1088 * add into list table.
1089 */
1090 if (softs->ipf_state_list != NULL)
1091 softs->ipf_state_list->is_pnext = &is->is_next;
1092 is->is_pnext = &softs->ipf_state_list;
1093 is->is_next = softs->ipf_state_list;
1094 softs->ipf_state_list = is;
1095
1096 if (softs->ipf_state_table[hv] != NULL)
1097 softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
1098 else
1099 softs->ipf_state_stats.iss_inuse++;
1100 is->is_phnext = softs->ipf_state_table + hv;
1101 is->is_hnext = softs->ipf_state_table[hv];
1102 softs->ipf_state_table[hv] = is;
1103 softs->ipf_state_stats.iss_bucketlen[hv]++;
1104 softs->ipf_state_stats.iss_active++;
1105 MUTEX_EXIT(&softs->ipf_stinsert);
1106
1107 ipf_state_setqueue(softc, is, rev);
1108
1109 return (0);
1110 }
1111
1112
1113 /* ------------------------------------------------------------------------ */
1114 /* Function: ipf_state_matchipv4addrs */
1115 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
1116 /* 0 no match */
1117 /* Parameters: is1, is2 pointers to states we are checking */
1118 /* */
1119 /* Function matches IPv4 addresses it returns strong match for ICMP proto */
1120 /* even there is only reverse match */
1121 /* ------------------------------------------------------------------------ */
1122 static int
ipf_state_matchipv4addrs(ipstate_t * is1,ipstate_t * is2)1123 ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2)
1124 {
1125 int rv;
1126
1127 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr)
1128 rv = 2;
1129 else if (is1->is_saddr == is2->is_daddr &&
1130 is1->is_daddr == is2->is_saddr) {
1131 /* force strong match for ICMP protocol */
1132 rv = (is1->is_p == IPPROTO_ICMP) ? 2 : 1;
1133 }
1134 else
1135 rv = 0;
1136
1137 return (rv);
1138 }
1139
1140
1141 /* ------------------------------------------------------------------------ */
1142 /* Function: ipf_state_matchipv6addrs */
1143 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
1144 /* 0 no match */
1145 /* Parameters: is1, is2 pointers to states we are checking */
1146 /* */
1147 /* Function matches IPv6 addresses it returns strong match for ICMP proto */
1148 /* even there is only reverse match */
1149 /* ------------------------------------------------------------------------ */
1150 static int
ipf_state_matchipv6addrs(ipstate_t * is1,ipstate_t * is2)1151 ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2)
1152 {
1153 int rv;
1154
1155 if (IP6_EQ(&is1->is_src, &is2->is_src) &&
1156 IP6_EQ(&is1->is_dst, &is2->is_dst))
1157 rv = 2;
1158 else if (IP6_EQ(&is1->is_src, &is2->is_dst) &&
1159 IP6_EQ(&is1->is_dst, &is2->is_src)) {
1160 /* force strong match for ICMPv6 protocol */
1161 rv = (is1->is_p == IPPROTO_ICMPV6) ? 2 : 1;
1162 }
1163 else
1164 rv = 0;
1165
1166 return (rv);
1167 }
1168
1169
1170 /* ------------------------------------------------------------------------ */
1171 /* Function: ipf_state_matchaddresses */
1172 /* Returns: int - 2 addresses match, 1 reverse match, zero no match */
1173 /* Parameters: is1, is2 pointers to states we are checking */
1174 /* */
1175 /* function retruns true if two pairs of addresses belong to single */
1176 /* connection. suppose there are two endpoints: */
1177 /* endpoint1 1.1.1.1 */
1178 /* endpoint2 1.1.1.2 */
1179 /* */
1180 /* the state is established by packet flying from .1 to .2 so we see: */
1181 /* is1->src = 1.1.1.1 */
1182 /* is1->dst = 1.1.1.2 */
1183 /* now endpoint 1.1.1.2 sends answer */
1184 /* retreives is1 record created by first packat and compares it with is2 */
1185 /* temporal record, is2 is initialized as follows: */
1186 /* is2->src = 1.1.1.2 */
1187 /* is2->dst = 1.1.1.1 */
1188 /* in this case 1 will be returned */
1189 /* */
1190 /* the ipf_matchaddresses() assumes those two records to be same. of course */
1191 /* the ipf_matchaddresses() also assume records are same in case you pass */
1192 /* identical arguments (i.e. ipf_matchaddress(is1, is1) would return 2 */
1193 /* ------------------------------------------------------------------------ */
1194 static int
ipf_state_matchaddresses(ipstate_t * is1,ipstate_t * is2)1195 ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2)
1196 {
1197 int rv;
1198
1199 if (is1->is_v == 4) {
1200 rv = ipf_state_matchipv4addrs(is1, is2);
1201 }
1202 else {
1203 rv = ipf_state_matchipv6addrs(is1, is2);
1204 }
1205
1206 return (rv);
1207 }
1208
1209
1210 /* ------------------------------------------------------------------------ */
1211 /* Function: ipf_matchports */
1212 /* Returns: int - 2 match, 1 rverse match, 0 no match */
1213 /* Parameters: ppairs1, ppairs - src, dst ports we want to match */
1214 /* */
1215 /* performs the same match for isps members as for addresses */
1216 /* ------------------------------------------------------------------------ */
1217 static int
ipf_state_matchports(udpinfo_t * ppairs1,udpinfo_t * ppairs2)1218 ipf_state_matchports(udpinfo_t *ppairs1, udpinfo_t *ppairs2)
1219 {
1220 int rv;
1221
1222 if (ppairs1->us_sport == ppairs2->us_sport &&
1223 ppairs1->us_dport == ppairs2->us_dport)
1224 rv = 2;
1225 else if (ppairs1->us_sport == ppairs2->us_dport &&
1226 ppairs1->us_dport == ppairs2->us_sport)
1227 rv = 1;
1228 else
1229 rv = 0;
1230
1231 return (rv);
1232 }
1233
1234
1235 /* ------------------------------------------------------------------------ */
1236 /* Function: ipf_matchisps */
1237 /* Returns: int - nonzero if isps members match, 0 nomatch */
1238 /* Parameters: is1, is2 - states we want to match */
1239 /* */
1240 /* performs the same match for isps members as for addresses */
1241 /* ------------------------------------------------------------------------ */
1242 static int
ipf_state_matchisps(ipstate_t * is1,ipstate_t * is2)1243 ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2)
1244 {
1245 int rv;
1246
1247 if (is1->is_p == is2->is_p) {
1248 switch (is1->is_p)
1249 {
1250 case IPPROTO_TCP :
1251 case IPPROTO_UDP :
1252 case IPPROTO_GRE :
1253 /* greinfo_t can be also interpreted as port pair */
1254 rv = ipf_state_matchports(&is1->is_ps.is_us,
1255 &is2->is_ps.is_us);
1256 break;
1257
1258 case IPPROTO_ICMP :
1259 case IPPROTO_ICMPV6 :
1260 /* force strong match for ICMP datagram. */
1261 if (bcmp(&is1->is_ps, &is2->is_ps,
1262 sizeof(icmpinfo_t)) == 0) {
1263 rv = 2;
1264 } else {
1265 rv = 0;
1266 }
1267 break;
1268
1269 default:
1270 rv = 0;
1271 }
1272 } else {
1273 rv = 0;
1274 }
1275
1276 return (rv);
1277 }
1278
1279
1280 /* ------------------------------------------------------------------------ */
1281 /* Function: ipf_state_match */
1282 /* Returns: int - nonzero match, zero no match */
1283 /* Parameters: is1, is2 - states we want to match */
1284 /* */
1285 /* ------------------------------------------------------------------------ */
1286 static int
ipf_state_match(ipstate_t * is1,ipstate_t * is2)1287 ipf_state_match(ipstate_t *is1, ipstate_t *is2)
1288 {
1289 int rv;
1290 int amatch;
1291 int pomatch;
1292
1293 if (bcmp(&is1->is_pass, &is2->is_pass,
1294 offsetof(struct ipstate, is_authmsk) -
1295 offsetof(struct ipstate, is_pass)) == 0) {
1296
1297 pomatch = ipf_state_matchisps(is1, is2);
1298 amatch = ipf_state_matchaddresses(is1, is2);
1299 rv = (amatch != 0) && (amatch == pomatch);
1300 } else {
1301 rv = 0;
1302 }
1303
1304 return (rv);
1305 }
1306
1307 /* ------------------------------------------------------------------------ */
1308 /* Function: ipf_state_add */
1309 /* Returns: ipstate_t - 0 = success */
1310 /* Parameters: softc(I) - pointer to soft context main structure */
1311 /* fin(I) - pointer to packet information */
1312 /* stsave(O) - pointer to place to save pointer to created */
1313 /* state structure. */
1314 /* flags(I) - flags to use when creating the structure */
1315 /* */
1316 /* Creates a new IP state structure from the packet information collected. */
1317 /* Inserts it into the state table and appends to the bottom of the active */
1318 /* list. If the capacity of the table has reached the maximum allowed then */
1319 /* the call will fail and a flush is scheduled for the next timeout call. */
1320 /* */
1321 /* NOTE: The use of stsave to point to nat_state will result in memory */
1322 /* corruption. It should only be used to point to objects that will */
1323 /* either outlive this (not expired) or will deref the ip_state_t */
1324 /* when they are deleted. */
1325 /* ------------------------------------------------------------------------ */
1326 int
ipf_state_add(ipf_main_softc_t * softc,fr_info_t * fin,ipstate_t ** stsave,u_int flags)1327 ipf_state_add(ipf_main_softc_t *softc, fr_info_t *fin, ipstate_t **stsave,
1328 u_int flags)
1329 {
1330 ipf_state_softc_t *softs = softc->ipf_state_soft;
1331 ipstate_t *is, ips;
1332 struct icmp *ic;
1333 u_int pass, hv;
1334 frentry_t *fr;
1335 tcphdr_t *tcp;
1336 frdest_t *fdp;
1337 int out;
1338
1339 /*
1340 * If a locally created packet is trying to egress but it
1341 * does not match because of this lock, it is likely that
1342 * the policy will block it and return network unreachable further
1343 * up the stack. To mitigate this error, EAGAIN is returned instead,
1344 * telling the IP stack to try sending this packet again later.
1345 */
1346 if (softs->ipf_state_lock) {
1347 SBUMPD(ipf_state_stats, iss_add_locked);
1348 fin->fin_error = EAGAIN;
1349 return (-1);
1350 }
1351
1352 if (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)) {
1353 SBUMPD(ipf_state_stats, iss_add_bad);
1354 return (-1);
1355 }
1356
1357 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) {
1358 SBUMPD(ipf_state_stats, iss_add_oow);
1359 return (-1);
1360 }
1361
1362 if ((softs->ipf_state_stats.iss_active * 100 / softs->ipf_state_max) >
1363 softs->ipf_state_wm_high) {
1364 softs->ipf_state_doflush = 1;
1365 }
1366
1367 /*
1368 * If a "keep state" rule has reached the maximum number of references
1369 * to it, then schedule an automatic flush in case we can clear out
1370 * some "dead old wood". Note that because the lock isn't held on
1371 * fr it is possible that we could overflow. The cost of overflowing
1372 * is being ignored here as the number by which it can overflow is
1373 * a product of the number of simultaneous threads that could be
1374 * executing in here, so a limit of 100 won't result in 200, but could
1375 * result in 101 or 102.
1376 */
1377 fr = fin->fin_fr;
1378 if (fr != NULL) {
1379 if ((softs->ipf_state_stats.iss_active >=
1380 softs->ipf_state_max) && (fr->fr_statemax == 0)) {
1381 SBUMPD(ipf_state_stats, iss_max);
1382 return (1);
1383 }
1384 if ((fr->fr_statemax != 0) &&
1385 (fr->fr_statecnt >= fr->fr_statemax)) {
1386 SBUMPD(ipf_state_stats, iss_max_ref);
1387 return (2);
1388 }
1389 }
1390
1391 is = &ips;
1392 if (fr == NULL) {
1393 pass = softc->ipf_flags;
1394 is->is_tag = FR_NOLOGTAG;
1395 } else {
1396 pass = fr->fr_flags;
1397 }
1398
1399 ic = NULL;
1400 tcp = NULL;
1401 out = fin->fin_out;
1402 bzero((char *)is, sizeof(*is));
1403 is->is_die = 1 + softc->ipf_ticks;
1404 /*
1405 * We want to check everything that is a property of this packet,
1406 * but we don't (automatically) care about its fragment status as
1407 * this may change.
1408 */
1409 is->is_pass = pass;
1410 is->is_v = fin->fin_v;
1411 is->is_sec = fin->fin_secmsk;
1412 is->is_secmsk = 0xffff;
1413 is->is_auth = fin->fin_auth;
1414 is->is_authmsk = 0xffff;
1415 is->is_family = fin->fin_family;
1416 is->is_opt[0] = fin->fin_optmsk;
1417 is->is_optmsk[0] = 0xffffffff;
1418 if (is->is_v == 6) {
1419 is->is_opt[0] &= ~0x8;
1420 is->is_optmsk[0] &= ~0x8;
1421 }
1422
1423 /*
1424 * Copy and calculate...
1425 */
1426 hv = (is->is_p = fin->fin_fi.fi_p);
1427 is->is_src = fin->fin_fi.fi_src;
1428 hv += is->is_saddr;
1429 is->is_dst = fin->fin_fi.fi_dst;
1430 hv += is->is_daddr;
1431 #ifdef USE_INET6
1432 if (fin->fin_v == 6) {
1433 /*
1434 * For ICMPv6, we check to see if the destination address is
1435 * a multicast address. If it is, do not include it in the
1436 * calculation of the hash because the correct reply will come
1437 * back from a real address, not a multicast address.
1438 */
1439 if ((is->is_p == IPPROTO_ICMPV6) &&
1440 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
1441 /*
1442 * So you can do keep state with neighbour discovery.
1443 *
1444 * Here we could use the address from the neighbour
1445 * solicit message to put in the state structure and
1446 * we could use that without a wildcard flag too...
1447 */
1448 flags |= SI_W_DADDR;
1449 hv -= is->is_daddr;
1450 } else {
1451 hv += is->is_dst.i6[1];
1452 hv += is->is_dst.i6[2];
1453 hv += is->is_dst.i6[3];
1454 }
1455 hv += is->is_src.i6[1];
1456 hv += is->is_src.i6[2];
1457 hv += is->is_src.i6[3];
1458 }
1459 #endif
1460 if ((fin->fin_v == 4) &&
1461 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
1462 flags |= SI_W_DADDR;
1463 hv -= is->is_daddr;
1464 }
1465
1466 switch (is->is_p)
1467 {
1468 #ifdef USE_INET6
1469 case IPPROTO_ICMPV6 :
1470 ic = fin->fin_dp;
1471
1472 switch (ic->icmp_type)
1473 {
1474 case ICMP6_ECHO_REQUEST :
1475 hv += (is->is_icmp.ici_id = ic->icmp_id);
1476 /*FALLTHROUGH*/
1477 case ICMP6_MEMBERSHIP_QUERY :
1478 case ND_ROUTER_SOLICIT :
1479 case ND_NEIGHBOR_SOLICIT :
1480 case ICMP6_NI_QUERY :
1481 is->is_icmp.ici_type = ic->icmp_type;
1482 break;
1483 default :
1484 SBUMPD(ipf_state_stats, iss_icmp6_notquery);
1485 return (-2);
1486 }
1487 break;
1488 #endif
1489 case IPPROTO_ICMP :
1490 ic = fin->fin_dp;
1491
1492 switch (ic->icmp_type)
1493 {
1494 case ICMP_ECHO :
1495 case ICMP_TSTAMP :
1496 case ICMP_IREQ :
1497 case ICMP_MASKREQ :
1498 is->is_icmp.ici_type = ic->icmp_type;
1499 hv += (is->is_icmp.ici_id = ic->icmp_id);
1500 break;
1501 default :
1502 SBUMPD(ipf_state_stats, iss_icmp_notquery);
1503 return (-3);
1504 }
1505 break;
1506
1507 #if 0
1508 case IPPROTO_GRE :
1509 gre = fin->fin_dp;
1510
1511 is->is_gre.gs_flags = gre->gr_flags;
1512 is->is_gre.gs_ptype = gre->gr_ptype;
1513 if (GRE_REV(is->is_gre.gs_flags) == 1) {
1514 is->is_call[0] = fin->fin_data[0];
1515 is->is_call[1] = fin->fin_data[1];
1516 }
1517 break;
1518 #endif
1519
1520 case IPPROTO_TCP :
1521 tcp = fin->fin_dp;
1522
1523 if (tcp->th_flags & TH_RST) {
1524 SBUMPD(ipf_state_stats, iss_tcp_rstadd);
1525 return (-4);
1526 }
1527
1528 /* TRACE is, flags, hv */
1529
1530 /*
1531 * The endian of the ports doesn't matter, but the ack and
1532 * sequence numbers do as we do mathematics on them later.
1533 */
1534 is->is_sport = htons(fin->fin_data[0]);
1535 is->is_dport = htons(fin->fin_data[1]);
1536 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1537 hv += is->is_sport;
1538 hv += is->is_dport;
1539 }
1540
1541 /* TRACE is, flags, hv */
1542
1543 /*
1544 * If this is a real packet then initialise fields in the
1545 * state information structure from the TCP header information.
1546 */
1547
1548 is->is_maxdwin = 1;
1549 is->is_maxswin = ntohs(tcp->th_win);
1550 if (is->is_maxswin == 0)
1551 is->is_maxswin = 1;
1552
1553 if ((fin->fin_flx & FI_IGNORE) == 0) {
1554 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
1555 (TCP_OFF(tcp) << 2) +
1556 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
1557 ((tcp->th_flags & TH_FIN) ? 1 : 0);
1558 is->is_maxsend = is->is_send;
1559
1560 /*
1561 * Window scale option is only present in
1562 * SYN/SYN-ACK packet.
1563 */
1564 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
1565 TH_SYN &&
1566 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
1567 if (ipf_tcpoptions(softs, fin, tcp,
1568 &is->is_tcp.ts_data[0]) == -1) {
1569 fin->fin_flx |= FI_BAD;
1570 DT1(ipf_fi_bad_tcpoptions_th_fin_ack_ecnall, fr_info_t *, fin);
1571 }
1572 }
1573
1574 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
1575 ipf_checknewisn(fin, is);
1576 ipf_fixoutisn(fin, is);
1577 }
1578
1579 if ((tcp->th_flags & TH_OPENING) == TH_SYN)
1580 flags |= IS_TCPFSM;
1581 else {
1582 is->is_maxdwin = is->is_maxswin * 2;
1583 is->is_dend = ntohl(tcp->th_ack);
1584 is->is_maxdend = ntohl(tcp->th_ack);
1585 is->is_maxdwin *= 2;
1586 }
1587 }
1588
1589 /*
1590 * If we're creating state for a starting connection, start
1591 * the timer on it as we'll never see an error if it fails
1592 * to connect.
1593 */
1594 break;
1595
1596 case IPPROTO_UDP :
1597 tcp = fin->fin_dp;
1598
1599 is->is_sport = htons(fin->fin_data[0]);
1600 is->is_dport = htons(fin->fin_data[1]);
1601 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1602 hv += tcp->th_dport;
1603 hv += tcp->th_sport;
1604 }
1605 break;
1606
1607 default :
1608 break;
1609 }
1610 hv = DOUBLE_HASH(hv);
1611 is->is_hv = hv;
1612
1613 /*
1614 * Look for identical state.
1615 */
1616 for (is = softs->ipf_state_table[hv % softs->ipf_state_size];
1617 is != NULL; is = is->is_hnext) {
1618 if (ipf_state_match(&ips, is) == 1)
1619 break;
1620 }
1621 if (is != NULL) {
1622 SBUMPD(ipf_state_stats, iss_add_dup);
1623 return (3);
1624 }
1625
1626 if (softs->ipf_state_stats.iss_bucketlen[hv] >=
1627 softs->ipf_state_maxbucket) {
1628 SBUMPD(ipf_state_stats, iss_bucket_full);
1629 return (4);
1630 }
1631
1632 /*
1633 * No existing state; create new
1634 */
1635 KMALLOC(is, ipstate_t *);
1636 if (is == NULL) {
1637 SBUMPD(ipf_state_stats, iss_nomem);
1638 return (5);
1639 }
1640 bcopy((char *)&ips, (char *)is, sizeof(*is));
1641 is->is_flags = flags & IS_INHERITED;
1642 is->is_rulen = fin->fin_rule;
1643 is->is_rule = fr;
1644
1645 /*
1646 * Do not do the modulus here, it is done in ipf_state_insert().
1647 */
1648 if (fr != NULL) {
1649 ipftq_t *tq;
1650
1651 (void) strncpy(is->is_group, FR_NAME(fr, fr_group),
1652 FR_GROUPLEN);
1653 if (fr->fr_age[0] != 0) {
1654 tq = ipf_addtimeoutqueue(softc,
1655 &softs->ipf_state_usertq,
1656 fr->fr_age[0]);
1657 is->is_tqehead[0] = tq;
1658 is->is_sti.tqe_flags |= TQE_RULEBASED;
1659 }
1660 if (fr->fr_age[1] != 0) {
1661 tq = ipf_addtimeoutqueue(softc,
1662 &softs->ipf_state_usertq,
1663 fr->fr_age[1]);
1664 is->is_tqehead[1] = tq;
1665 is->is_sti.tqe_flags |= TQE_RULEBASED;
1666 }
1667
1668 is->is_tag = fr->fr_logtag;
1669 }
1670
1671 /*
1672 * It may seem strange to set is_ref to 2, but if stsave is not NULL
1673 * then a copy of the pointer is being stored somewhere else and in
1674 * the end, it will expect to be able to do something with it.
1675 */
1676 is->is_me = stsave;
1677 if (stsave != NULL) {
1678 *stsave = is;
1679 is->is_ref = 2;
1680 } else {
1681 is->is_ref = 1;
1682 }
1683 is->is_pkts[0] = 0, is->is_bytes[0] = 0;
1684 is->is_pkts[1] = 0, is->is_bytes[1] = 0;
1685 is->is_pkts[2] = 0, is->is_bytes[2] = 0;
1686 is->is_pkts[3] = 0, is->is_bytes[3] = 0;
1687 if ((fin->fin_flx & FI_IGNORE) == 0) {
1688 is->is_pkts[out] = 1;
1689 fin->fin_pktnum = 1;
1690 is->is_bytes[out] = fin->fin_plen;
1691 is->is_flx[out][0] = fin->fin_flx & FI_CMP;
1692 is->is_flx[out][0] &= ~FI_OOW;
1693 }
1694
1695 if (pass & FR_STLOOSE)
1696 is->is_flags |= IS_LOOSE;
1697
1698 if (pass & FR_STSTRICT)
1699 is->is_flags |= IS_STRICT;
1700
1701 if (pass & FR_STATESYNC)
1702 is->is_flags |= IS_STATESYNC;
1703
1704 if (pass & FR_LOGFIRST)
1705 is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
1706
1707 READ_ENTER(&softc->ipf_state);
1708
1709 if (ipf_state_insert(softc, is, fin->fin_rev) == -1) {
1710 RWLOCK_EXIT(&softc->ipf_state);
1711 /*
1712 * This is a bit more manual than it should be but
1713 * ipf_state_del cannot be called.
1714 */
1715 MUTEX_EXIT(&is->is_lock);
1716 MUTEX_DESTROY(&is->is_lock);
1717 if (is->is_tqehead[0] != NULL) {
1718 if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
1719 ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
1720 is->is_tqehead[0] = NULL;
1721 }
1722 if (is->is_tqehead[1] != NULL) {
1723 if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
1724 ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
1725 is->is_tqehead[1] = NULL;
1726 }
1727 KFREE(is);
1728 return (-1);
1729 }
1730
1731 /*
1732 * Filling in the interface name is after the insert so that an
1733 * event (such as add/delete) of an interface that is referenced
1734 * by this rule will see this state entry.
1735 */
1736 if (fr != NULL) {
1737 /*
1738 * The name '-' is special for network interfaces and causes
1739 * a NULL name to be present, always, allowing packets to
1740 * match it, regardless of their interface.
1741 */
1742 if ((fin->fin_ifp == NULL) ||
1743 (fr->fr_ifnames[out << 1] != -1 &&
1744 fr->fr_names[fr->fr_ifnames[out << 1] + 0] == '-' &&
1745 fr->fr_names[fr->fr_ifnames[out << 1] + 1] == '\0')) {
1746 is->is_ifp[out << 1] = fr->fr_ifas[0];
1747 strncpy(is->is_ifname[out << 1],
1748 FR_NAME(fr, fr_ifnames[0]),
1749 sizeof(fr->fr_ifnames[0]));
1750 } else {
1751 is->is_ifp[out << 1] = fin->fin_ifp;
1752 COPYIFNAME(fin->fin_v, fin->fin_ifp,
1753 is->is_ifname[out << 1]);
1754 }
1755
1756 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
1757 if (fr->fr_ifnames[1] != -1) {
1758 strncpy(is->is_ifname[(out << 1) + 1],
1759 FR_NAME(fr, fr_ifnames[1]),
1760 sizeof(fr->fr_ifnames[1]));
1761 }
1762
1763 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
1764 if (fr->fr_ifnames[2] != -1) {
1765 strncpy(is->is_ifname[((1 - out) << 1)],
1766 FR_NAME(fr, fr_ifnames[2]),
1767 sizeof(fr->fr_ifnames[2]));
1768 }
1769
1770 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
1771 if (fr->fr_ifnames[3] != -1) {
1772 strncpy(is->is_ifname[((1 - out) << 1) + 1],
1773 FR_NAME(fr, fr_ifnames[3]),
1774 sizeof(fr->fr_ifnames[3]));
1775 }
1776 } else {
1777 if (fin->fin_ifp != NULL) {
1778 is->is_ifp[out << 1] = fin->fin_ifp;
1779 COPYIFNAME(fin->fin_v, fin->fin_ifp,
1780 is->is_ifname[out << 1]);
1781 }
1782 }
1783
1784 if (fin->fin_p == IPPROTO_TCP) {
1785 /*
1786 * If we're creating state for a starting connection, start the
1787 * timer on it as we'll never see an error if it fails to
1788 * connect.
1789 */
1790 (void) ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
1791 is->is_flags, 2);
1792 }
1793 MUTEX_EXIT(&is->is_lock);
1794 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0))
1795 is->is_sync = ipf_sync_new(softc, SMC_STATE, fin, is);
1796 if (softs->ipf_state_logging)
1797 ipf_state_log(softc, is, ISL_NEW);
1798
1799 RWLOCK_EXIT(&softc->ipf_state);
1800
1801 fin->fin_flx |= FI_STATE;
1802 if (fin->fin_flx & FI_FRAG)
1803 (void) ipf_frag_new(softc, fin, pass);
1804
1805 fdp = &fr->fr_tifs[0];
1806 if (fdp->fd_type == FRD_DSTLIST) {
1807 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1808 &is->is_tifs[0]);
1809 } else {
1810 bcopy(fdp, &is->is_tifs[0], sizeof(*fdp));
1811 }
1812
1813 fdp = &fr->fr_tifs[1];
1814 if (fdp->fd_type == FRD_DSTLIST) {
1815 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1816 &is->is_tifs[1]);
1817 } else {
1818 bcopy(fdp, &is->is_tifs[1], sizeof(*fdp));
1819 }
1820 fin->fin_tif = &is->is_tifs[fin->fin_rev];
1821
1822 fdp = &fr->fr_dif;
1823 if (fdp->fd_type == FRD_DSTLIST) {
1824 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1825 &is->is_dif);
1826 } else {
1827 bcopy(fdp, &is->is_dif, sizeof(*fdp));
1828 }
1829 fin->fin_dif = &is->is_dif;
1830
1831 return (0);
1832 }
1833
1834
1835 /* ------------------------------------------------------------------------ */
1836 /* Function: ipf_tcpoptions */
1837 /* Returns: int - 1 == packet matches state entry, 0 == it does not, */
1838 /* -1 == packet has bad TCP options data */
1839 /* Parameters: softs(I) - pointer to state context structure */
1840 /* fin(I) - pointer to packet information */
1841 /* tcp(I) - pointer to TCP packet header */
1842 /* td(I) - pointer to TCP data held as part of the state */
1843 /* */
1844 /* Look after the TCP header for any options and deal with those that are */
1845 /* present. Record details about those that we recogise. */
1846 /* ------------------------------------------------------------------------ */
1847 static int
ipf_tcpoptions(ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,tcpdata_t * td)1848 ipf_tcpoptions(ipf_state_softc_t *softs, fr_info_t *fin, tcphdr_t *tcp,
1849 tcpdata_t *td)
1850 {
1851 int off, mlen, ol, i, len, retval;
1852 char buf[64], *s, opt;
1853 mb_t *m = NULL;
1854
1855 len = (TCP_OFF(tcp) << 2);
1856 if (fin->fin_dlen < len) {
1857 SBUMPD(ipf_state_stats, iss_tcp_toosmall);
1858 return (0);
1859 }
1860 len -= sizeof(*tcp);
1861
1862 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff;
1863
1864 m = fin->fin_m;
1865 mlen = MSGDSIZE(m) - off;
1866 if (len > mlen) {
1867 len = mlen;
1868 retval = 0;
1869 } else {
1870 retval = 1;
1871 }
1872
1873 COPYDATA(m, off, len, buf);
1874
1875 for (s = buf; len > 0; ) {
1876 opt = *s;
1877 if (opt == TCPOPT_EOL)
1878 break;
1879 else if (opt == TCPOPT_NOP)
1880 ol = 1;
1881 else {
1882 if (len < 2)
1883 break;
1884 ol = (int)*(s + 1);
1885 if (ol < 2 || ol > len)
1886 break;
1887
1888 /*
1889 * Extract the TCP options we are interested in out of
1890 * the header and store them in the tcpdata struct.
1891 */
1892 switch (opt)
1893 {
1894 case TCPOPT_WINDOW :
1895 if (ol == TCPOLEN_WINDOW) {
1896 i = (int)*(s + 2);
1897 if (i > TCP_WSCALE_MAX)
1898 i = TCP_WSCALE_MAX;
1899 else if (i < 0)
1900 i = 0;
1901 td->td_winscale = i;
1902 td->td_winflags |= TCP_WSCALE_SEEN|
1903 TCP_WSCALE_FIRST;
1904 } else
1905 retval = -1;
1906 break;
1907 case TCPOPT_MAXSEG :
1908 /*
1909 * So, if we wanted to set the TCP MAXSEG,
1910 * it should be done here...
1911 */
1912 if (ol == TCPOLEN_MAXSEG) {
1913 i = (int)*(s + 2);
1914 i <<= 8;
1915 i += (int)*(s + 3);
1916 td->td_maxseg = i;
1917 } else
1918 retval = -1;
1919 break;
1920 case TCPOPT_SACK_PERMITTED :
1921 if (ol == TCPOLEN_SACK_PERMITTED)
1922 td->td_winflags |= TCP_SACK_PERMIT;
1923 else
1924 retval = -1;
1925 break;
1926 }
1927 }
1928 len -= ol;
1929 s += ol;
1930 }
1931 if (retval == -1) {
1932 SBUMPD(ipf_state_stats, iss_tcp_badopt);
1933 }
1934 return (retval);
1935 }
1936
1937
1938 /* ------------------------------------------------------------------------ */
1939 /* Function: ipf_state_tcp */
1940 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1941 /* Parameters: softc(I) - pointer to soft context main structure */
1942 /* softs(I) - pointer to state context structure */
1943 /* fin(I) - pointer to packet information */
1944 /* tcp(I) - pointer to TCP packet header */
1945 /* is(I) - pointer to master state structure */
1946 /* */
1947 /* Check to see if a packet with TCP headers fits within the TCP window. */
1948 /* Change timeout depending on whether new packet is a SYN-ACK returning */
1949 /* for a SYN or a RST or FIN which indicate time to close up shop. */
1950 /* ------------------------------------------------------------------------ */
1951 static int
ipf_state_tcp(ipf_main_softc_t * softc,ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)1952 ipf_state_tcp(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
1953 fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
1954 {
1955 tcpdata_t *fdata, *tdata;
1956 int source, ret, flags;
1957
1958 source = !fin->fin_rev;
1959 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
1960 (ntohs(is->is_sport) != fin->fin_data[0]))
1961 source = 0;
1962 fdata = &is->is_tcp.ts_data[!source];
1963 tdata = &is->is_tcp.ts_data[source];
1964
1965 MUTEX_ENTER(&is->is_lock);
1966
1967 /*
1968 * If a SYN packet is received for a connection that is on the way out
1969 * but hasn't yet departed then advance this session along the way.
1970 */
1971 if ((tcp->th_flags & TH_OPENING) == TH_SYN) {
1972 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
1973 (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
1974 is->is_state[!source] = IPF_TCPS_CLOSED;
1975 ipf_movequeue(softc->ipf_ticks, &is->is_sti,
1976 is->is_sti.tqe_ifq,
1977 &softs->ipf_state_deletetq);
1978 MUTEX_EXIT(&is->is_lock);
1979 DT1(iss_tcp_closing, ipstate_t *, is);
1980 SBUMP(ipf_state_stats.iss_tcp_closing);
1981 return (0);
1982 }
1983 }
1984
1985 if (is->is_flags & IS_LOOSE)
1986 ret = 1;
1987 else
1988 ret = ipf_state_tcpinwindow(fin, fdata, tdata, tcp,
1989 is->is_flags);
1990 if (ret > 0) {
1991 /*
1992 * Nearing end of connection, start timeout.
1993 */
1994 ret = ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
1995 is->is_flags, ret);
1996 if (ret == 0) {
1997 MUTEX_EXIT(&is->is_lock);
1998 DT2(iss_tcp_fsm, fr_info_t *, fin, ipstate_t *, is);
1999 SBUMP(ipf_state_stats.iss_tcp_fsm);
2000 return (0);
2001 }
2002
2003 if (softs->ipf_state_logging > 4)
2004 ipf_state_log(softc, is, ISL_STATECHANGE);
2005
2006 /*
2007 * set s0's as appropriate. Use syn-ack packet as it
2008 * contains both pieces of required information.
2009 */
2010 /*
2011 * Window scale option is only present in SYN/SYN-ACK packet.
2012 * Compare with ~TH_FIN to mask out T/TCP setups.
2013 */
2014 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL);
2015 if (flags == (TH_SYN|TH_ACK)) {
2016 is->is_s0[source] = ntohl(tcp->th_ack);
2017 is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
2018 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2019 if (ipf_tcpoptions(softs, fin, tcp,
2020 fdata) == -1) {
2021 fin->fin_flx |= FI_BAD;
2022 DT1(ipf_fi_bad_winscale_syn_ack, fr_info_t *, fin);
2023 }
2024 }
2025 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2026 ipf_checknewisn(fin, is);
2027 } else if (flags == TH_SYN) {
2028 is->is_s0[source] = ntohl(tcp->th_seq) + 1;
2029 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2030 if (ipf_tcpoptions(softs, fin, tcp,
2031 fdata) == -1) {
2032 fin->fin_flx |= FI_BAD;
2033 DT1(ipf_fi_bad_winscale_syn, fr_info_t *, fin);
2034 }
2035 }
2036
2037 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2038 ipf_checknewisn(fin, is);
2039
2040 }
2041 ret = 1;
2042 } else {
2043 DT2(iss_tcp_oow, fr_info_t *, fin, ipstate_t *, is);
2044 SBUMP(ipf_state_stats.iss_tcp_oow);
2045 ret = 0;
2046 }
2047 MUTEX_EXIT(&is->is_lock);
2048 return (ret);
2049 }
2050
2051
2052 /* ------------------------------------------------------------------------ */
2053 /* Function: ipf_checknewisn */
2054 /* Returns: Nil */
2055 /* Parameters: fin(I) - pointer to packet information */
2056 /* is(I) - pointer to master state structure */
2057 /* */
2058 /* Check to see if this TCP connection is expecting and needs a new */
2059 /* sequence number for a particular direction of the connection. */
2060 /* */
2061 /* NOTE: This does not actually change the sequence numbers, only gets new */
2062 /* one ready. */
2063 /* ------------------------------------------------------------------------ */
2064 static void
ipf_checknewisn(fr_info_t * fin,ipstate_t * is)2065 ipf_checknewisn(fr_info_t *fin, ipstate_t *is)
2066 {
2067 u_32_t sumd, old, new;
2068 tcphdr_t *tcp;
2069 int i;
2070
2071 i = fin->fin_rev;
2072 tcp = fin->fin_dp;
2073
2074 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
2075 ((i == 1) && !(is->is_flags & IS_ISNACK))) {
2076 old = ntohl(tcp->th_seq);
2077 new = ipf_newisn(fin);
2078 is->is_isninc[i] = new - old;
2079 CALC_SUMD(old, new, sumd);
2080 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
2081
2082 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
2083 }
2084 }
2085
2086
2087 /* ------------------------------------------------------------------------ */
2088 /* Function: ipf_state_tcpinwindow */
2089 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */
2090 /* Parameters: fin(I) - pointer to packet information */
2091 /* fdata(I) - pointer to tcp state informatio (forward) */
2092 /* tdata(I) - pointer to tcp state informatio (reverse) */
2093 /* tcp(I) - pointer to TCP packet header */
2094 /* */
2095 /* Given a packet has matched addresses and ports, check to see if it is */
2096 /* within the TCP data window. In a show of generosity, allow packets that */
2097 /* are within the window space behind the current sequence # as well. */
2098 /* ------------------------------------------------------------------------ */
2099 static int
ipf_state_tcpinwindow(fr_info_t * fin,tcpdata_t * fdata,tcpdata_t * tdata,tcphdr_t * tcp,int flags)2100 ipf_state_tcpinwindow(fr_info_t *fin, tcpdata_t *fdata, tcpdata_t *tdata,
2101 tcphdr_t *tcp, int flags)
2102 {
2103 ipf_main_softc_t *softc = fin->fin_main_soft;
2104 ipf_state_softc_t *softs = softc->ipf_state_soft;
2105 tcp_seq seq, ack, end;
2106 int ackskew, tcpflags;
2107 u_32_t win, maxwin;
2108 int dsize, inseq;
2109
2110 /*
2111 * Find difference between last checked packet and this packet.
2112 */
2113 tcpflags = tcp->th_flags;
2114 seq = ntohl(tcp->th_seq);
2115 ack = ntohl(tcp->th_ack);
2116 if (tcpflags & TH_SYN)
2117 win = ntohs(tcp->th_win);
2118 else
2119 win = ntohs(tcp->th_win) << fdata->td_winscale;
2120
2121 /*
2122 * A window of 0 produces undesirable behaviour from this function.
2123 */
2124 if (win == 0)
2125 win = 1;
2126
2127 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2128 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
2129
2130 /*
2131 * if window scaling is present, the scaling is only allowed
2132 * for windows not in the first SYN packet. In that packet the
2133 * window is 65535 to specify the largest window possible
2134 * for receivers not implementing the window scale option.
2135 * Currently, we do not assume TTCP here. That means that
2136 * if we see a second packet from a host (after the initial
2137 * SYN), we can assume that the receiver of the SYN did
2138 * already send back the SYN/ACK (and thus that we know if
2139 * the receiver also does window scaling)
2140 */
2141 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
2142 fdata->td_winflags &= ~TCP_WSCALE_FIRST;
2143 fdata->td_maxwin = win;
2144 }
2145
2146 end = seq + dsize;
2147
2148 if ((fdata->td_end == 0) &&
2149 (!(flags & IS_TCPFSM) ||
2150 ((tcpflags & TH_OPENING) == TH_OPENING))) {
2151 /*
2152 * Must be a (outgoing) SYN-ACK in reply to a SYN.
2153 */
2154 fdata->td_end = end - 1;
2155 fdata->td_maxwin = 1;
2156 fdata->td_maxend = end + win;
2157 }
2158
2159 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */
2160 ack = tdata->td_end;
2161 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
2162 (ack == 0)) {
2163 /* gross hack to get around certain broken tcp stacks */
2164 ack = tdata->td_end;
2165 }
2166
2167 maxwin = tdata->td_maxwin;
2168 ackskew = tdata->td_end - ack;
2169
2170 /*
2171 * Strict sequencing only allows in-order delivery.
2172 */
2173 if ((flags & IS_STRICT) != 0) {
2174 if (seq != fdata->td_end) {
2175 DT2(iss_tcp_struct, tcpdata_t *, fdata, int, seq);
2176 SBUMP(ipf_state_stats.iss_tcp_strict);
2177 fin->fin_flx |= FI_OOW;
2178 return (0);
2179 }
2180 }
2181
2182 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0)
2183 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0)
2184 inseq = 0;
2185 if ((SEQ_GE(fdata->td_maxend, end)) &&
2186 (SEQ_GE(seq, fdata->td_end - maxwin)) &&
2187 /* XXX what about big packets */
2188 #define MAXACKWINDOW 66000
2189 (-ackskew <= (MAXACKWINDOW)) &&
2190 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
2191 inseq = 1;
2192 /*
2193 * Microsoft Windows will send the next packet to the right of the
2194 * window if SACK is in use.
2195 */
2196 } else if ((seq == fdata->td_maxend) && (ackskew == 0) &&
2197 (fdata->td_winflags & TCP_SACK_PERMIT) &&
2198 (tdata->td_winflags & TCP_SACK_PERMIT)) {
2199 DT2(iss_sinsack, tcpdata_t *, fdata, int, seq);
2200 SBUMP(ipf_state_stats.iss_winsack);
2201 inseq = 1;
2202 /*
2203 * Sometimes a TCP RST will be generated with only the ACK field
2204 * set to non-zero.
2205 */
2206 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) &&
2207 (ackskew >= -1) && (ackskew <= 1)) {
2208 inseq = 1;
2209 } else if (!(flags & IS_TCPFSM)) {
2210 int i;
2211
2212 i = (fin->fin_rev << 1) + fin->fin_out;
2213
2214 #if 0
2215 if (is_pkts[i]0 == 0) {
2216 /*
2217 * Picking up a connection in the middle, the "next"
2218 * packet seen from a direction that is new should be
2219 * accepted, even if it appears out of sequence.
2220 */
2221 inseq = 1;
2222 } else
2223 #endif
2224 if (!(fdata->td_winflags &
2225 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) {
2226 /*
2227 * No TCPFSM and no window scaling, so make some
2228 * extra guesses.
2229 */
2230 if ((seq == fdata->td_maxend) && (ackskew == 0))
2231 inseq = 1;
2232 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin))
2233 inseq = 1;
2234 }
2235 }
2236
2237 /* TRACE(inseq, fdata, tdata, seq, end, ack, ackskew, win, maxwin) */
2238
2239 if (inseq) {
2240 /* if ackskew < 0 then this should be due to fragmented
2241 * packets. There is no way to know the length of the
2242 * total packet in advance.
2243 * We do know the total length from the fragment cache though.
2244 * Note however that there might be more sessions with
2245 * exactly the same source and destination parameters in the
2246 * state cache (and source and destination is the only stuff
2247 * that is saved in the fragment cache). Note further that
2248 * some TCP connections in the state cache are hashed with
2249 * sport and dport as well which makes it not worthwhile to
2250 * look for them.
2251 * Thus, when ackskew is negative but still seems to belong
2252 * to this session, we bump up the destinations end value.
2253 */
2254 if (ackskew < 0)
2255 tdata->td_end = ack;
2256
2257 /* update max window seen */
2258 if (fdata->td_maxwin < win)
2259 fdata->td_maxwin = win;
2260 if (SEQ_GT(end, fdata->td_end))
2261 fdata->td_end = end;
2262 if (SEQ_GE(ack + win, tdata->td_maxend))
2263 tdata->td_maxend = ack + win;
2264 return (1);
2265 }
2266 SBUMP(ipf_state_stats.iss_oow);
2267 fin->fin_flx |= FI_OOW;
2268 return (0);
2269 }
2270
2271
2272 /* ------------------------------------------------------------------------ */
2273 /* Function: ipf_state_clone */
2274 /* Returns: ipstate_t* - NULL == cloning failed, */
2275 /* else pointer to new state structure */
2276 /* Parameters: fin(I) - pointer to packet information */
2277 /* tcp(I) - pointer to TCP/UDP header */
2278 /* is(I) - pointer to master state structure */
2279 /* */
2280 /* Create a "duplcate" state table entry from the master. */
2281 /* ------------------------------------------------------------------------ */
2282 static ipstate_t *
ipf_state_clone(fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)2283 ipf_state_clone(fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
2284 {
2285 ipf_main_softc_t *softc = fin->fin_main_soft;
2286 ipf_state_softc_t *softs = softc->ipf_state_soft;
2287 ipstate_t *clone;
2288 u_32_t send;
2289
2290 if (softs->ipf_state_stats.iss_active == softs->ipf_state_max) {
2291 SBUMPD(ipf_state_stats, iss_max);
2292 softs->ipf_state_doflush = 1;
2293 return (NULL);
2294 }
2295 KMALLOC(clone, ipstate_t *);
2296 if (clone == NULL) {
2297 SBUMPD(ipf_state_stats, iss_clone_nomem);
2298 return (NULL);
2299 }
2300 bcopy((char *)is, (char *)clone, sizeof(*clone));
2301
2302 MUTEX_NUKE(&clone->is_lock);
2303 /*
2304 * It has not yet been placed on any timeout queue, so make sure
2305 * all of that data is zero'd out.
2306 */
2307 clone->is_sti.tqe_pnext = NULL;
2308 clone->is_sti.tqe_next = NULL;
2309 clone->is_sti.tqe_ifq = NULL;
2310 clone->is_sti.tqe_parent = clone;
2311
2312 clone->is_die = ONE_DAY + softc->ipf_ticks;
2313 clone->is_state[0] = 0;
2314 clone->is_state[1] = 0;
2315 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2316 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
2317 ((tcp->th_flags & TH_FIN) ? 1 : 0);
2318
2319 if (fin->fin_rev == 1) {
2320 clone->is_dend = send;
2321 clone->is_maxdend = send;
2322 clone->is_send = 0;
2323 clone->is_maxswin = 1;
2324 clone->is_maxdwin = ntohs(tcp->th_win);
2325 if (clone->is_maxdwin == 0)
2326 clone->is_maxdwin = 1;
2327 } else {
2328 clone->is_send = send;
2329 clone->is_maxsend = send;
2330 clone->is_dend = 0;
2331 clone->is_maxdwin = 1;
2332 clone->is_maxswin = ntohs(tcp->th_win);
2333 if (clone->is_maxswin == 0)
2334 clone->is_maxswin = 1;
2335 }
2336
2337 clone->is_flags &= ~SI_CLONE;
2338 clone->is_flags |= SI_CLONED;
2339 if (ipf_state_insert(softc, clone, fin->fin_rev) == -1) {
2340 KFREE(clone);
2341 return (NULL);
2342 }
2343
2344 clone->is_ref = 1;
2345 if (clone->is_p == IPPROTO_TCP) {
2346 (void) ipf_tcp_age(&clone->is_sti, fin, softs->ipf_state_tcptq,
2347 clone->is_flags, 2);
2348 }
2349 MUTEX_EXIT(&clone->is_lock);
2350 if (is->is_flags & IS_STATESYNC)
2351 clone->is_sync = ipf_sync_new(softc, SMC_STATE, fin, clone);
2352 DT2(iss_clone, ipstate_t *, is, ipstate_t *, clone);
2353 SBUMP(ipf_state_stats.iss_cloned);
2354 return (clone);
2355 }
2356
2357
2358 /* ------------------------------------------------------------------------ */
2359 /* Function: ipf_matchsrcdst */
2360 /* Returns: Nil */
2361 /* Parameters: fin(I) - pointer to packet information */
2362 /* is(I) - pointer to state structure */
2363 /* src(I) - pointer to source address */
2364 /* dst(I) - pointer to destination address */
2365 /* tcp(I) - pointer to TCP/UDP header */
2366 /* cmask(I) - mask of FI_* bits to check */
2367 /* */
2368 /* Match a state table entry against an IP packet. The logic below is that */
2369 /* ret gets set to one if the match succeeds, else remains 0. If it is */
2370 /* still 0 after the test. no match. */
2371 /* ------------------------------------------------------------------------ */
2372 static ipstate_t *
ipf_matchsrcdst(fr_info_t * fin,ipstate_t * is,i6addr_t * src,i6addr_t * dst,tcphdr_t * tcp,u_32_t cmask)2373 ipf_matchsrcdst(fr_info_t *fin, ipstate_t *is, i6addr_t *src, i6addr_t *dst,
2374 tcphdr_t *tcp, u_32_t cmask)
2375 {
2376 ipf_main_softc_t *softc = fin->fin_main_soft;
2377 ipf_state_softc_t *softs = softc->ipf_state_soft;
2378 int ret = 0, rev, out, flags, flx = 0, idx;
2379 u_short sp, dp;
2380 u_32_t cflx;
2381 void *ifp;
2382
2383 /*
2384 * If a connection is about to be deleted, no packets
2385 * are allowed to match it.
2386 */
2387 if (is->is_sti.tqe_ifq == &softs->ipf_state_deletetq)
2388 return (NULL);
2389
2390 rev = IP6_NEQ(&is->is_dst, dst);
2391 ifp = fin->fin_ifp;
2392 out = fin->fin_out;
2393 flags = is->is_flags;
2394 sp = 0;
2395 dp = 0;
2396
2397 if (tcp != NULL) {
2398 sp = htons(fin->fin_sport);
2399 dp = ntohs(fin->fin_dport);
2400 }
2401 if (!rev) {
2402 if (tcp != NULL) {
2403 if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
2404 rev = 1;
2405 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
2406 rev = 1;
2407 }
2408 }
2409
2410 idx = (out << 1) + rev;
2411
2412 /*
2413 * If the interface for this 'direction' is set, make sure it matches.
2414 * An interface name that is not set matches any, as does a name of *.
2415 */
2416 if ((is->is_ifp[idx] == ifp) || (is->is_ifp[idx] == NULL &&
2417 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '-' ||
2418 *is->is_ifname[idx] == '*')))
2419 ret = 1;
2420
2421 if (ret == 0) {
2422 DT2(iss_lookup_badifp, fr_info_t *, fin, ipstate_t *, is);
2423 SBUMP(ipf_state_stats.iss_lookup_badifp);
2424 /* TRACE is, out, rev, idx */
2425 return (NULL);
2426 }
2427 ret = 0;
2428
2429 /*
2430 * Match addresses and ports.
2431 */
2432 if (rev == 0) {
2433 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
2434 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) {
2435 if (tcp) {
2436 if ((sp == is->is_sport || flags & SI_W_SPORT)
2437 &&
2438 (dp == is->is_dport || flags & SI_W_DPORT))
2439 ret = 1;
2440 } else {
2441 ret = 1;
2442 }
2443 }
2444 } else {
2445 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
2446 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) {
2447 if (tcp) {
2448 if ((dp == is->is_sport || flags & SI_W_SPORT)
2449 &&
2450 (sp == is->is_dport || flags & SI_W_DPORT))
2451 ret = 1;
2452 } else {
2453 ret = 1;
2454 }
2455 }
2456 }
2457
2458 if (ret == 0) {
2459 SBUMP(ipf_state_stats.iss_lookup_badport);
2460 DT2(iss_lookup_badport, fr_info_t *, fin, ipstate_t *, is);
2461 /* TRACE rev, is, sp, dp, src, dst */
2462 return (NULL);
2463 }
2464
2465 /*
2466 * Whether or not this should be here, is questionable, but the aim
2467 * is to get this out of the main line.
2468 */
2469 if (tcp == NULL)
2470 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
2471
2472 /*
2473 * Only one of the source or destination address can be flaged as a
2474 * wildcard. Fill in the missing address, if set.
2475 * For IPv6, if the address being copied in is multicast, then
2476 * don't reset the wild flag - multicast causes it to be set in the
2477 * first place!
2478 */
2479 if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
2480 fr_ip_t *fi = &fin->fin_fi;
2481
2482 if ((flags & SI_W_SADDR) != 0) {
2483 if (rev == 0) {
2484 is->is_src = fi->fi_src;
2485 is->is_flags &= ~SI_W_SADDR;
2486 } else {
2487 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2488 is->is_src = fi->fi_dst;
2489 is->is_flags &= ~SI_W_SADDR;
2490 }
2491 }
2492 } else if ((flags & SI_W_DADDR) != 0) {
2493 if (rev == 0) {
2494 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2495 is->is_dst = fi->fi_dst;
2496 is->is_flags &= ~SI_W_DADDR;
2497 }
2498 } else {
2499 is->is_dst = fi->fi_src;
2500 is->is_flags &= ~SI_W_DADDR;
2501 }
2502 }
2503 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
2504 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2505 }
2506 }
2507
2508 flx = fin->fin_flx & cmask;
2509 cflx = is->is_flx[out][rev];
2510
2511 /*
2512 * Match up any flags set from IP options.
2513 */
2514 if ((cflx && (flx != (cflx & cmask))) ||
2515 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) ||
2516 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
2517 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) {
2518 SBUMPD(ipf_state_stats, iss_miss_mask);
2519 return (NULL);
2520 }
2521
2522 if ((fin->fin_flx & FI_IGNORE) != 0) {
2523 fin->fin_rev = rev;
2524 return (is);
2525 }
2526
2527 /*
2528 * Only one of the source or destination port can be flagged as a
2529 * wildcard. When filling it in, fill in a copy of the matched entry
2530 * if it has the cloning flag set.
2531 */
2532 if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
2533 if ((flags & SI_CLONE) != 0) {
2534 ipstate_t *clone;
2535
2536 clone = ipf_state_clone(fin, tcp, is);
2537 if (clone == NULL)
2538 return (NULL);
2539 is = clone;
2540 } else {
2541 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2542 }
2543
2544 if ((flags & SI_W_SPORT) != 0) {
2545 if (rev == 0) {
2546 is->is_sport = sp;
2547 is->is_send = ntohl(tcp->th_seq);
2548 } else {
2549 is->is_sport = dp;
2550 is->is_send = ntohl(tcp->th_ack);
2551 }
2552 is->is_maxsend = is->is_send + 1;
2553 } else if ((flags & SI_W_DPORT) != 0) {
2554 if (rev == 0) {
2555 is->is_dport = dp;
2556 is->is_dend = ntohl(tcp->th_ack);
2557 } else {
2558 is->is_dport = sp;
2559 is->is_dend = ntohl(tcp->th_seq);
2560 }
2561 is->is_maxdend = is->is_dend + 1;
2562 }
2563 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
2564 if ((flags & SI_CLONED) && softs->ipf_state_logging)
2565 ipf_state_log(softc, is, ISL_CLONE);
2566 }
2567
2568 ret = -1;
2569
2570 if (is->is_flx[out][rev] == 0) {
2571 is->is_flx[out][rev] = flx;
2572 if (rev == 1 && is->is_optmsk[1] == 0) {
2573 is->is_opt[1] = fin->fin_optmsk;
2574 is->is_optmsk[1] = 0xffffffff;
2575 if (is->is_v == 6) {
2576 is->is_opt[1] &= ~0x8;
2577 is->is_optmsk[1] &= ~0x8;
2578 }
2579 }
2580 }
2581
2582 /*
2583 * Check if the interface name for this "direction" is set and if not,
2584 * fill it in.
2585 */
2586 if (is->is_ifp[idx] == NULL &&
2587 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
2588 is->is_ifp[idx] = ifp;
2589 COPYIFNAME(fin->fin_v, ifp, is->is_ifname[idx]);
2590 }
2591 fin->fin_rev = rev;
2592 return (is);
2593 }
2594
2595
2596 /* ------------------------------------------------------------------------ */
2597 /* Function: ipf_checkicmpmatchingstate */
2598 /* Returns: Nil */
2599 /* Parameters: fin(I) - pointer to packet information */
2600 /* */
2601 /* If we've got an ICMP error message, using the information stored in the */
2602 /* ICMP packet, look for a matching state table entry. */
2603 /* */
2604 /* If we return NULL then no lock on ipf_state is held. */
2605 /* If we return non-null then a read-lock on ipf_state is held. */
2606 /* ------------------------------------------------------------------------ */
2607 static ipstate_t *
ipf_checkicmpmatchingstate(fr_info_t * fin)2608 ipf_checkicmpmatchingstate(fr_info_t *fin)
2609 {
2610 ipf_main_softc_t *softc = fin->fin_main_soft;
2611 ipf_state_softc_t *softs = softc->ipf_state_soft;
2612 ipstate_t *is, **isp;
2613 i6addr_t dst, src;
2614 struct icmp *ic;
2615 u_short savelen;
2616 icmphdr_t *icmp;
2617 fr_info_t ofin;
2618 tcphdr_t *tcp;
2619 int type, len;
2620 u_char pr;
2621 ip_t *oip;
2622 u_int hv;
2623
2624 /*
2625 * Does it at least have the return (basic) IP header ?
2626 * Is it an actual recognised ICMP error type?
2627 * Only a basic IP header (no options) should be with
2628 * an ICMP error header.
2629 */
2630 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
2631 (fin->fin_plen < ICMPERR_MINPKTLEN) ||
2632 !(fin->fin_flx & FI_ICMPERR)) {
2633 SBUMPD(ipf_state_stats, iss_icmp_bad);
2634 return (NULL);
2635 }
2636 ic = fin->fin_dp;
2637 type = ic->icmp_type;
2638
2639 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
2640 /*
2641 * Check if the at least the old IP header (with options) and
2642 * 8 bytes of payload is present.
2643 */
2644 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) {
2645 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2646 return (NULL);
2647 }
2648
2649 /*
2650 * Sanity Checks.
2651 */
2652 len = fin->fin_dlen - ICMPERR_ICMPHLEN;
2653 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) {
2654 DT2(iss_icmp_len, fr_info_t *, fin, struct ip*, oip);
2655 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2656 return (NULL);
2657 }
2658
2659 /*
2660 * Is the buffer big enough for all of it ? It's the size of the IP
2661 * header claimed in the encapsulated part which is of concern. It
2662 * may be too big to be in this buffer but not so big that it's
2663 * outside the ICMP packet, leading to TCP deref's causing problems.
2664 * This is possible because we don't know how big oip_hl is when we
2665 * do the pullup early in ipf_check() and thus can't guarantee it is
2666 * all here now.
2667 */
2668 #ifdef _KERNEL
2669 {
2670 mb_t *m;
2671
2672 m = fin->fin_m;
2673 # if SOLARIS
2674 if ((char *)oip + len > (char *)m->b_wptr) {
2675 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_2);
2676 return (NULL);
2677 }
2678 # else
2679 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) {
2680 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_3);
2681 return (NULL);
2682 }
2683 # endif
2684 }
2685 #endif
2686
2687 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
2688
2689 /*
2690 * in the IPv4 case we must zero the i6addr union otherwise
2691 * the IP6_EQ and IP6_NEQ macros produce the wrong results because
2692 * of the 'junk' in the unused part of the union
2693 */
2694 bzero((char *)&src, sizeof(src));
2695 bzero((char *)&dst, sizeof(dst));
2696
2697 /*
2698 * we make an fin entry to be able to feed it to
2699 * matchsrcdst note that not all fields are encessary
2700 * but this is the cleanest way. Note further we fill
2701 * in fin_mp such that if someone uses it we'll get
2702 * a kernel panic. ipf_matchsrcdst does not use this.
2703 *
2704 * watch out here, as ip is in host order and oip in network
2705 * order. Any change we make must be undone afterwards, like
2706 * oip->ip_len.
2707 */
2708 savelen = oip->ip_len;
2709 oip->ip_len = htons(len);
2710
2711 ofin.fin_flx = FI_NOCKSUM;
2712 ofin.fin_v = 4;
2713 ofin.fin_ip = oip;
2714 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
2715 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
2716 (void) ipf_makefrip(IP_HL(oip) << 2, oip, &ofin);
2717 ofin.fin_ifp = fin->fin_ifp;
2718 ofin.fin_out = !fin->fin_out;
2719
2720 hv = (pr = oip->ip_p);
2721 src.in4 = oip->ip_src;
2722 hv += src.in4.s_addr;
2723 dst.in4 = oip->ip_dst;
2724 hv += dst.in4.s_addr;
2725
2726 /*
2727 * Reset the short and bad flag here because in ipf_matchsrcdst()
2728 * the flags for the current packet (fin_flx) are compared against
2729 * those for the existing session.
2730 */
2731 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
2732
2733 /*
2734 * Put old values of ip_len back as we don't know
2735 * if we have to forward the packet or process it again.
2736 */
2737 oip->ip_len = savelen;
2738
2739 switch (oip->ip_p)
2740 {
2741 case IPPROTO_ICMP :
2742 /*
2743 * an ICMP error can only be generated as a result of an
2744 * ICMP query, not as the response on an ICMP error
2745 *
2746 * XXX theoretically ICMP_ECHOREP and the other reply's are
2747 * ICMP query's as well, but adding them here seems strange XXX
2748 */
2749 if ((ofin.fin_flx & FI_ICMPERR) != 0) {
2750 DT1(iss_icmp_icmperr, fr_info_t *, &ofin);
2751 SBUMP(ipf_state_stats.iss_icmp_icmperr);
2752 return (NULL);
2753 }
2754
2755 /*
2756 * perform a lookup of the ICMP packet in the state table
2757 */
2758 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2759 hv += icmp->icmp_id;
2760 hv = DOUBLE_HASH(hv);
2761
2762 READ_ENTER(&softc->ipf_state);
2763 for (isp = &softs->ipf_state_table[hv];
2764 ((is = *isp) != NULL); ) {
2765 isp = &is->is_hnext;
2766 if ((is->is_p != pr) || (is->is_v != 4))
2767 continue;
2768 if (is->is_pass & FR_NOICMPERR)
2769 continue;
2770
2771 is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2772 NULL, FI_ICMPCMP);
2773 if ((is != NULL) && !ipf_allowstateicmp(fin, is, &src))
2774 return (is);
2775 }
2776 RWLOCK_EXIT(&softc->ipf_state);
2777 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_1);
2778 return (NULL);
2779 case IPPROTO_TCP :
2780 case IPPROTO_UDP :
2781 break;
2782 default :
2783 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_2);
2784 return (NULL);
2785 }
2786
2787 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2788
2789 hv += tcp->th_dport;
2790 hv += tcp->th_sport;
2791 hv = DOUBLE_HASH(hv);
2792
2793 READ_ENTER(&softc->ipf_state);
2794 for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
2795 isp = &is->is_hnext;
2796 /*
2797 * Only allow this icmp though if the
2798 * encapsulated packet was allowed through the
2799 * other way around. Note that the minimal amount
2800 * of info present does not allow for checking against
2801 * tcp internals such as seq and ack numbers. Only the
2802 * ports are known to be present and can be even if the
2803 * short flag is set.
2804 */
2805 if ((is->is_p == pr) && (is->is_v == 4) &&
2806 (is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2807 tcp, FI_ICMPCMP))) {
2808 if (ipf_allowstateicmp(fin, is, &src) == 0)
2809 return (is);
2810 }
2811 }
2812 RWLOCK_EXIT(&softc->ipf_state);
2813 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_3);
2814 return (NULL);
2815 }
2816
2817
2818 /* ------------------------------------------------------------------------ */
2819 /* Function: ipf_allowstateicmp */
2820 /* Returns: int - 1 = packet denied, 0 = packet allowed */
2821 /* Parameters: fin(I) - pointer to packet information */
2822 /* is(I) - pointer to state table entry */
2823 /* src(I) - source address to check permission for */
2824 /* */
2825 /* For an ICMP packet that has so far matched a state table entry, check if */
2826 /* there are any further refinements that might mean we want to block this */
2827 /* packet. This code isn't specific to either IPv4 or IPv6. */
2828 /* ------------------------------------------------------------------------ */
2829 static int
ipf_allowstateicmp(fr_info_t * fin,ipstate_t * is,i6addr_t * src)2830 ipf_allowstateicmp(fr_info_t *fin, ipstate_t *is, i6addr_t *src)
2831 {
2832 ipf_main_softc_t *softc = fin->fin_main_soft;
2833 ipf_state_softc_t *softs = softc->ipf_state_soft;
2834 frentry_t *savefr;
2835 frentry_t *fr;
2836 u_32_t ipass;
2837 int backward;
2838 int oi;
2839 int i;
2840
2841 fr = is->is_rule;
2842 if (fr != NULL && fr->fr_icmpgrp != NULL) {
2843 savefr = fin->fin_fr;
2844 fin->fin_fr = fr->fr_icmpgrp->fg_start;
2845
2846 ipass = ipf_scanlist(fin, softc->ipf_pass);
2847 fin->fin_fr = savefr;
2848 if (FR_ISBLOCK(ipass)) {
2849 SBUMPD(ipf_state_stats, iss_icmp_headblock);
2850 return (1);
2851 }
2852 }
2853
2854 /*
2855 * i : the index of this packet (the icmp unreachable)
2856 * oi : the index of the original packet found in the
2857 * icmp header (i.e. the packet causing this icmp)
2858 * backward : original packet was backward compared to
2859 * the state
2860 */
2861 backward = IP6_NEQ(&is->is_src, src);
2862 fin->fin_rev = !backward;
2863 i = (!backward << 1) + fin->fin_out;
2864 oi = (backward << 1) + !fin->fin_out;
2865
2866 if (is->is_pass & FR_NOICMPERR) {
2867 SBUMPD(ipf_state_stats, iss_icmp_banned);
2868 return (1);
2869 }
2870 if (is->is_icmppkts[i] > is->is_pkts[oi]) {
2871 SBUMPD(ipf_state_stats, iss_icmp_toomany);
2872 return (1);
2873 }
2874
2875 DT2(iss_icmp_hits, fr_info_t *, fin, ipstate_t *, is);
2876 SBUMP(ipf_state_stats.iss_icmp_hits);
2877 is->is_icmppkts[i]++;
2878
2879 /*
2880 * we deliberately do not touch the timeouts
2881 * for the accompanying state table entry.
2882 * It remains to be seen if that is correct. XXX
2883 */
2884 return (0);
2885 }
2886
2887
2888 /* ------------------------------------------------------------------------ */
2889 /* Function: ipf_ipsmove */
2890 /* Returns: Nil */
2891 /* Parameters: is(I) - pointer to state table entry */
2892 /* hv(I) - new hash value for state table entry */
2893 /* Write Locks: ipf_state */
2894 /* */
2895 /* Move a state entry from one position in the hash table to another. */
2896 /* ------------------------------------------------------------------------ */
2897 static void
ipf_ipsmove(ipf_state_softc_t * softs,ipstate_t * is,u_int hv)2898 ipf_ipsmove(ipf_state_softc_t *softs, ipstate_t *is, u_int hv)
2899 {
2900 ipstate_t **isp;
2901 u_int hvm;
2902
2903 hvm = is->is_hv;
2904
2905 /* TRACE is, is_hv, hvm */
2906
2907 /*
2908 * Remove the hash from the old location...
2909 */
2910 isp = is->is_phnext;
2911 if (is->is_hnext)
2912 is->is_hnext->is_phnext = isp;
2913 *isp = is->is_hnext;
2914 if (softs->ipf_state_table[hvm] == NULL)
2915 softs->ipf_state_stats.iss_inuse--;
2916 softs->ipf_state_stats.iss_bucketlen[hvm]--;
2917
2918 /*
2919 * ...and put the hash in the new one.
2920 */
2921 hvm = DOUBLE_HASH(hv);
2922 is->is_hv = hvm;
2923
2924 /* TRACE is, hv, is_hv, hvm */
2925
2926 isp = &softs->ipf_state_table[hvm];
2927 if (*isp)
2928 (*isp)->is_phnext = &is->is_hnext;
2929 else
2930 softs->ipf_state_stats.iss_inuse++;
2931 softs->ipf_state_stats.iss_bucketlen[hvm]++;
2932 is->is_phnext = isp;
2933 is->is_hnext = *isp;
2934 *isp = is;
2935 }
2936
2937
2938 /* ------------------------------------------------------------------------ */
2939 /* Function: ipf_state_lookup */
2940 /* Returns: ipstate_t* - NULL == no matching state found, */
2941 /* else pointer to state information is returned */
2942 /* Parameters: fin(I) - pointer to packet information */
2943 /* tcp(I) - pointer to TCP/UDP header. */
2944 /* ifqp(O) - pointer for storing tailq timeout */
2945 /* */
2946 /* Search the state table for a matching entry to the packet described by */
2947 /* the contents of *fin. For certain protocols, when a match is found the */
2948 /* timeout queue is also selected and stored in ifpq if it is non-NULL. */
2949 /* */
2950 /* If we return NULL then no lock on ipf_state is held. */
2951 /* If we return non-null then a read-lock on ipf_state is held. */
2952 /* ------------------------------------------------------------------------ */
2953 ipstate_t *
ipf_state_lookup(fr_info_t * fin,tcphdr_t * tcp,ipftq_t ** ifqp)2954 ipf_state_lookup(fr_info_t *fin, tcphdr_t *tcp, ipftq_t **ifqp)
2955 {
2956 ipf_main_softc_t *softc = fin->fin_main_soft;
2957 ipf_state_softc_t *softs = softc->ipf_state_soft;
2958 u_int hv, hvm, pr, v, tryagain;
2959 ipstate_t *is, **isp;
2960 u_short dport, sport;
2961 i6addr_t src, dst;
2962 struct icmp *ic;
2963 ipftq_t *ifq;
2964 int oow;
2965
2966 is = NULL;
2967 ifq = NULL;
2968 tcp = fin->fin_dp;
2969 ic = (struct icmp *)tcp;
2970 hv = (pr = fin->fin_fi.fi_p);
2971 src = fin->fin_fi.fi_src;
2972 dst = fin->fin_fi.fi_dst;
2973 hv += src.in4.s_addr;
2974 hv += dst.in4.s_addr;
2975
2976 v = fin->fin_fi.fi_v;
2977 #ifdef USE_INET6
2978 if (v == 6) {
2979 hv += fin->fin_fi.fi_src.i6[1];
2980 hv += fin->fin_fi.fi_src.i6[2];
2981 hv += fin->fin_fi.fi_src.i6[3];
2982
2983 if ((fin->fin_p == IPPROTO_ICMPV6) &&
2984 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
2985 hv -= dst.in4.s_addr;
2986 } else {
2987 hv += fin->fin_fi.fi_dst.i6[1];
2988 hv += fin->fin_fi.fi_dst.i6[2];
2989 hv += fin->fin_fi.fi_dst.i6[3];
2990 }
2991 }
2992 #endif
2993 if ((v == 4) &&
2994 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
2995 if (fin->fin_out == 0) {
2996 hv -= src.in4.s_addr;
2997 } else {
2998 hv -= dst.in4.s_addr;
2999 }
3000 }
3001
3002 /* TRACE fin_saddr, fin_daddr, hv */
3003
3004 /*
3005 * Search the hash table for matching packet header info.
3006 */
3007 switch (pr)
3008 {
3009 #ifdef USE_INET6
3010 case IPPROTO_ICMPV6 :
3011 tryagain = 0;
3012 if (v == 6) {
3013 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
3014 (ic->icmp_type == ICMP6_ECHO_REPLY)) {
3015 hv += ic->icmp_id;
3016 }
3017 }
3018 READ_ENTER(&softc->ipf_state);
3019 icmp6again:
3020 hvm = DOUBLE_HASH(hv);
3021 for (isp = &softs->ipf_state_table[hvm];
3022 ((is = *isp) != NULL); ) {
3023 isp = &is->is_hnext;
3024 if ((is->is_p != pr) || (is->is_v != v))
3025 continue;
3026 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3027 if (is != NULL &&
3028 ipf_matchicmpqueryreply(v, &is->is_icmp,
3029 ic, fin->fin_rev)) {
3030 if (fin->fin_rev)
3031 ifq = &softs->ipf_state_icmpacktq;
3032 else
3033 ifq = &softs->ipf_state_icmptq;
3034 break;
3035 }
3036 }
3037
3038 if (is != NULL) {
3039 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
3040 hv += fin->fin_fi.fi_src.i6[0];
3041 hv += fin->fin_fi.fi_src.i6[1];
3042 hv += fin->fin_fi.fi_src.i6[2];
3043 hv += fin->fin_fi.fi_src.i6[3];
3044 ipf_ipsmove(softs, is, hv);
3045 MUTEX_DOWNGRADE(&softc->ipf_state);
3046 }
3047 break;
3048 }
3049 RWLOCK_EXIT(&softc->ipf_state);
3050
3051 /*
3052 * No matching icmp state entry. Perhaps this is a
3053 * response to another state entry.
3054 *
3055 * XXX With some ICMP6 packets, the "other" address is already
3056 * in the packet, after the ICMP6 header, and this could be
3057 * used in place of the multicast address. However, taking
3058 * advantage of this requires some significant code changes
3059 * to handle the specific types where that is the case.
3060 */
3061 if ((softs->ipf_state_stats.iss_wild != 0) &&
3062 ((fin->fin_flx & FI_NOWILD) == 0) &&
3063 (v == 6) && (tryagain == 0)) {
3064 hv -= fin->fin_fi.fi_src.i6[0];
3065 hv -= fin->fin_fi.fi_src.i6[1];
3066 hv -= fin->fin_fi.fi_src.i6[2];
3067 hv -= fin->fin_fi.fi_src.i6[3];
3068 tryagain = 1;
3069 WRITE_ENTER(&softc->ipf_state);
3070 goto icmp6again;
3071 }
3072
3073 is = ipf_checkicmp6matchingstate(fin);
3074 if (is != NULL)
3075 return (is);
3076 break;
3077 #endif
3078
3079 case IPPROTO_ICMP :
3080 if (v == 4) {
3081 hv += ic->icmp_id;
3082 }
3083 hv = DOUBLE_HASH(hv);
3084 READ_ENTER(&softc->ipf_state);
3085 for (isp = &softs->ipf_state_table[hv];
3086 ((is = *isp) != NULL); ) {
3087 isp = &is->is_hnext;
3088 if ((is->is_p != pr) || (is->is_v != v))
3089 continue;
3090 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3091 if ((is != NULL) &&
3092 (ic->icmp_id == is->is_icmp.ici_id) &&
3093 ipf_matchicmpqueryreply(v, &is->is_icmp,
3094 ic, fin->fin_rev)) {
3095 if (fin->fin_rev)
3096 ifq = &softs->ipf_state_icmpacktq;
3097 else
3098 ifq = &softs->ipf_state_icmptq;
3099 break;
3100 }
3101 }
3102 if (is == NULL) {
3103 RWLOCK_EXIT(&softc->ipf_state);
3104 }
3105 break;
3106
3107 case IPPROTO_TCP :
3108 case IPPROTO_UDP :
3109 ifqp = NULL;
3110 sport = htons(fin->fin_data[0]);
3111 hv += sport;
3112 dport = htons(fin->fin_data[1]);
3113 hv += dport;
3114 oow = 0;
3115 tryagain = 0;
3116 READ_ENTER(&softc->ipf_state);
3117 retry_tcpudp:
3118 hvm = DOUBLE_HASH(hv);
3119
3120 /* TRACE hv, hvm */
3121
3122 for (isp = &softs->ipf_state_table[hvm];
3123 ((is = *isp) != NULL); ) {
3124 isp = &is->is_hnext;
3125 if ((is->is_p != pr) || (is->is_v != v))
3126 continue;
3127 fin->fin_flx &= ~FI_OOW;
3128 is = ipf_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
3129 if (is != NULL) {
3130 if (pr == IPPROTO_TCP) {
3131 if (!ipf_state_tcp(softc, softs, fin,
3132 tcp, is)) {
3133 oow |= fin->fin_flx & FI_OOW;
3134 continue;
3135 }
3136 }
3137 break;
3138 }
3139 }
3140 if (is != NULL) {
3141 if (tryagain &&
3142 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
3143 hv += dport;
3144 hv += sport;
3145 ipf_ipsmove(softs, is, hv);
3146 MUTEX_DOWNGRADE(&softc->ipf_state);
3147 }
3148 break;
3149 }
3150 RWLOCK_EXIT(&softc->ipf_state);
3151
3152 if ((softs->ipf_state_stats.iss_wild != 0) &&
3153 ((fin->fin_flx & FI_NOWILD) == 0)) {
3154 if (tryagain == 0) {
3155 hv -= dport;
3156 hv -= sport;
3157 } else if (tryagain == 1) {
3158 hv = fin->fin_fi.fi_p;
3159 /*
3160 * If we try to pretend this is a reply to a
3161 * multicast/broadcast packet then we need to
3162 * exclude part of the address from the hash
3163 * calculation.
3164 */
3165 if (fin->fin_out == 0) {
3166 hv += src.in4.s_addr;
3167 } else {
3168 hv += dst.in4.s_addr;
3169 }
3170 hv += dport;
3171 hv += sport;
3172 }
3173 tryagain++;
3174 if (tryagain <= 2) {
3175 WRITE_ENTER(&softc->ipf_state);
3176 goto retry_tcpudp;
3177 }
3178 }
3179 fin->fin_flx |= oow;
3180 break;
3181
3182 #if 0
3183 case IPPROTO_GRE :
3184 gre = fin->fin_dp;
3185 if (GRE_REV(gre->gr_flags) == 1) {
3186 hv += gre->gr_call;
3187 }
3188 /* FALLTHROUGH */
3189 #endif
3190 default :
3191 ifqp = NULL;
3192 hvm = DOUBLE_HASH(hv);
3193 READ_ENTER(&softc->ipf_state);
3194 for (isp = &softs->ipf_state_table[hvm];
3195 ((is = *isp) != NULL); ) {
3196 isp = &is->is_hnext;
3197 if ((is->is_p != pr) || (is->is_v != v))
3198 continue;
3199 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3200 if (is != NULL) {
3201 ifq = &softs->ipf_state_iptq;
3202 break;
3203 }
3204 }
3205 if (is == NULL) {
3206 RWLOCK_EXIT(&softc->ipf_state);
3207 }
3208 break;
3209 }
3210
3211 if (is != NULL) {
3212 if (((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) &&
3213 (is->is_tqehead[fin->fin_rev] != NULL))
3214 ifq = is->is_tqehead[fin->fin_rev];
3215 if (ifq != NULL && ifqp != NULL)
3216 *ifqp = ifq;
3217 } else {
3218 SBUMP(ipf_state_stats.iss_lookup_miss);
3219 }
3220 return (is);
3221 }
3222
3223
3224 /* ------------------------------------------------------------------------ */
3225 /* Function: ipf_state_check */
3226 /* Returns: frentry_t* - NULL == search failed, */
3227 /* else pointer to rule for matching state */
3228 /* Parameters: fin(I) - pointer to packet information */
3229 /* passp(I) - pointer to filtering result flags */
3230 /* */
3231 /* Check if a packet is associated with an entry in the state table. */
3232 /* ------------------------------------------------------------------------ */
3233 frentry_t *
ipf_state_check(fr_info_t * fin,u_32_t * passp)3234 ipf_state_check(fr_info_t *fin, u_32_t *passp)
3235 {
3236 ipf_main_softc_t *softc = fin->fin_main_soft;
3237 ipf_state_softc_t *softs = softc->ipf_state_soft;
3238 ipftqent_t *tqe;
3239 ipstate_t *is;
3240 frentry_t *fr;
3241 tcphdr_t *tcp;
3242 ipftq_t *ifq;
3243 u_int pass;
3244 int inout;
3245
3246 if (softs->ipf_state_lock || (softs->ipf_state_list == NULL))
3247 return (NULL);
3248
3249 if (fin->fin_flx & (FI_SHORT|FI_FRAGBODY|FI_BAD)) {
3250 SBUMPD(ipf_state_stats, iss_check_bad);
3251 return (NULL);
3252 }
3253
3254 if ((fin->fin_flx & FI_TCPUDP) ||
3255 (fin->fin_fi.fi_p == IPPROTO_ICMP)
3256 #ifdef USE_INET6
3257 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
3258 #endif
3259 )
3260 tcp = fin->fin_dp;
3261 else
3262 tcp = NULL;
3263
3264 ifq = NULL;
3265 /*
3266 * Search the hash table for matching packet header info.
3267 */
3268 is = ipf_state_lookup(fin, tcp, &ifq);
3269
3270 switch (fin->fin_p)
3271 {
3272 #ifdef USE_INET6
3273 case IPPROTO_ICMPV6 :
3274 if (is != NULL)
3275 break;
3276 if (fin->fin_v == 6) {
3277 is = ipf_checkicmp6matchingstate(fin);
3278 }
3279 break;
3280 #endif
3281 case IPPROTO_ICMP :
3282 if (is != NULL)
3283 break;
3284 /*
3285 * No matching icmp state entry. Perhaps this is a
3286 * response to another state entry.
3287 */
3288 is = ipf_checkicmpmatchingstate(fin);
3289 break;
3290
3291 case IPPROTO_TCP :
3292 if (is == NULL)
3293 break;
3294
3295 if (is->is_pass & FR_NEWISN) {
3296 if (fin->fin_out == 0)
3297 ipf_fixinisn(fin, is);
3298 else if (fin->fin_out == 1)
3299 ipf_fixoutisn(fin, is);
3300 }
3301 break;
3302 default :
3303 if (fin->fin_rev)
3304 ifq = &softs->ipf_state_udpacktq;
3305 else
3306 ifq = &softs->ipf_state_udptq;
3307 break;
3308 }
3309 if (is == NULL) {
3310 SBUMP(ipf_state_stats.iss_check_miss);
3311 return (NULL);
3312 }
3313
3314 fr = is->is_rule;
3315 if (fr != NULL) {
3316 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
3317 if (fin->fin_nattag == NULL) {
3318 RWLOCK_EXIT(&softc->ipf_state);
3319 SBUMPD(ipf_state_stats, iss_check_notag);
3320 return (NULL);
3321 }
3322 if (ipf_matchtag(&fr->fr_nattag, fin->fin_nattag)!=0) {
3323 RWLOCK_EXIT(&softc->ipf_state);
3324 SBUMPD(ipf_state_stats, iss_check_nattag);
3325 return (NULL);
3326 }
3327 }
3328 (void) strncpy(fin->fin_group, FR_NAME(fr, fr_group),
3329 FR_GROUPLEN);
3330 fin->fin_icode = fr->fr_icode;
3331 }
3332
3333 fin->fin_rule = is->is_rulen;
3334 fin->fin_fr = fr;
3335
3336 /*
3337 * If this packet is a fragment and the rule says to track fragments,
3338 * then create a new fragment cache entry.
3339 */
3340 if (fin->fin_flx & FI_FRAG && FR_ISPASS(is->is_pass) &&
3341 is->is_pass & FR_KEEPFRAG)
3342 (void) ipf_frag_new(softc, fin, is->is_pass);
3343
3344 /*
3345 * For TCP packets, ifq == NULL. For all others, check if this new
3346 * queue is different to the last one it was on and move it if so.
3347 */
3348 tqe = &is->is_sti;
3349 if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
3350 ifq = is->is_tqehead[fin->fin_rev];
3351
3352 MUTEX_ENTER(&is->is_lock);
3353
3354 if (ifq != NULL)
3355 ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq, ifq);
3356
3357 inout = (fin->fin_rev << 1) + fin->fin_out;
3358 is->is_pkts[inout]++;
3359 is->is_bytes[inout] += fin->fin_plen;
3360 fin->fin_pktnum = is->is_pkts[inout] + is->is_icmppkts[inout];
3361
3362 MUTEX_EXIT(&is->is_lock);
3363
3364 pass = is->is_pass;
3365
3366 if (is->is_flags & IS_STATESYNC)
3367 ipf_sync_update(softc, SMC_STATE, fin, is->is_sync);
3368
3369 RWLOCK_EXIT(&softc->ipf_state);
3370
3371 SBUMP(ipf_state_stats.iss_hits);
3372
3373 fin->fin_dif = &is->is_dif;
3374 fin->fin_tif = &is->is_tifs[fin->fin_rev];
3375 fin->fin_flx |= FI_STATE;
3376 if ((pass & FR_LOGFIRST) != 0)
3377 pass &= ~(FR_LOGFIRST|FR_LOG);
3378 *passp = pass;
3379 return (fr);
3380 }
3381
3382
3383 /* ------------------------------------------------------------------------ */
3384 /* Function: ipf_fixoutisn */
3385 /* Returns: Nil */
3386 /* Parameters: fin(I) - pointer to packet information */
3387 /* is(I) - pointer to master state structure */
3388 /* */
3389 /* Called only for outbound packets, adjusts the sequence number and the */
3390 /* TCP checksum to match that change. */
3391 /* ------------------------------------------------------------------------ */
3392 static void
ipf_fixoutisn(fr_info_t * fin,ipstate_t * is)3393 ipf_fixoutisn(fr_info_t *fin, ipstate_t *is)
3394 {
3395 tcphdr_t *tcp;
3396 int rev;
3397 u_32_t seq;
3398
3399 tcp = fin->fin_dp;
3400 rev = fin->fin_rev;
3401 if ((is->is_flags & IS_ISNSYN) != 0) {
3402 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3403 seq = ntohl(tcp->th_seq);
3404 seq += is->is_isninc[0];
3405 tcp->th_seq = htonl(seq);
3406 ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3407 }
3408 }
3409 if ((is->is_flags & IS_ISNACK) != 0) {
3410 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3411 seq = ntohl(tcp->th_seq);
3412 seq += is->is_isninc[1];
3413 tcp->th_seq = htonl(seq);
3414 ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3415 }
3416 }
3417 }
3418
3419
3420 /* ------------------------------------------------------------------------ */
3421 /* Function: ipf_fixinisn */
3422 /* Returns: Nil */
3423 /* Parameters: fin(I) - pointer to packet information */
3424 /* is(I) - pointer to master state structure */
3425 /* */
3426 /* Called only for inbound packets, adjusts the acknowledge number and the */
3427 /* TCP checksum to match that change. */
3428 /* ------------------------------------------------------------------------ */
3429 static void
ipf_fixinisn(fr_info_t * fin,ipstate_t * is)3430 ipf_fixinisn(fr_info_t *fin, ipstate_t *is)
3431 {
3432 tcphdr_t *tcp;
3433 int rev;
3434 u_32_t ack;
3435
3436 tcp = fin->fin_dp;
3437 rev = fin->fin_rev;
3438 if ((is->is_flags & IS_ISNSYN) != 0) {
3439 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3440 ack = ntohl(tcp->th_ack);
3441 ack -= is->is_isninc[0];
3442 tcp->th_ack = htonl(ack);
3443 ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3444 }
3445 }
3446 if ((is->is_flags & IS_ISNACK) != 0) {
3447 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3448 ack = ntohl(tcp->th_ack);
3449 ack -= is->is_isninc[1];
3450 tcp->th_ack = htonl(ack);
3451 ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3452 }
3453 }
3454 }
3455
3456
3457 /* ------------------------------------------------------------------------ */
3458 /* Function: ipf_state_sync */
3459 /* Returns: Nil */
3460 /* Parameters: softc(I) - pointer to soft context main structure */
3461 /* ifp(I) - pointer to interface */
3462 /* */
3463 /* Walk through all state entries and if an interface pointer match is */
3464 /* found then look it up again, based on its name in case the pointer has */
3465 /* changed since last time. */
3466 /* */
3467 /* If ifp is passed in as being non-null then we are only doing updates for */
3468 /* existing, matching, uses of it. */
3469 /* ------------------------------------------------------------------------ */
3470 void
ipf_state_sync(ipf_main_softc_t * softc,void * ifp)3471 ipf_state_sync(ipf_main_softc_t *softc, void *ifp)
3472 {
3473 ipf_state_softc_t *softs = softc->ipf_state_soft;
3474 ipstate_t *is;
3475 int i;
3476
3477 if (softc->ipf_running <= 0)
3478 return;
3479
3480 WRITE_ENTER(&softc->ipf_state);
3481
3482 if (softc->ipf_running <= 0) {
3483 RWLOCK_EXIT(&softc->ipf_state);
3484 return;
3485 }
3486
3487 for (is = softs->ipf_state_list; is; is = is->is_next) {
3488 /*
3489 * Look up all the interface names in the state entry.
3490 */
3491 for (i = 0; i < FR_NUM(is->is_ifp); i++) {
3492 if (ifp == NULL || ifp == is->is_ifp[i])
3493 is->is_ifp[i] = ipf_resolvenic(softc,
3494 is->is_ifname[i],
3495 is->is_v);
3496 }
3497 }
3498 RWLOCK_EXIT(&softc->ipf_state);
3499 }
3500
3501
3502 /* ------------------------------------------------------------------------ */
3503 /* Function: ipf_state_del */
3504 /* Returns: int - 0 = deleted, else refernce count on active struct */
3505 /* Parameters: softc(I) - pointer to soft context main structure */
3506 /* is(I) - pointer to state structure to delete */
3507 /* why(I) - if not 0, log reason why it was deleted */
3508 /* Write Locks: ipf_state */
3509 /* */
3510 /* Deletes a state entry from the enumerated list as well as the hash table */
3511 /* and timeout queue lists. Make adjustments to hash table statistics and */
3512 /* global counters as required. */
3513 /* ------------------------------------------------------------------------ */
3514 static int
ipf_state_del(ipf_main_softc_t * softc,ipstate_t * is,int why)3515 ipf_state_del(ipf_main_softc_t *softc, ipstate_t *is, int why)
3516 {
3517 ipf_state_softc_t *softs = softc->ipf_state_soft;
3518 int orphan = 1;
3519 frentry_t *fr;
3520
3521 /*
3522 * Since we want to delete this, remove it from the state table,
3523 * where it can be found & used, first.
3524 */
3525 if (is->is_phnext != NULL) {
3526 *is->is_phnext = is->is_hnext;
3527 if (is->is_hnext != NULL)
3528 is->is_hnext->is_phnext = is->is_phnext;
3529 if (softs->ipf_state_table[is->is_hv] == NULL)
3530 softs->ipf_state_stats.iss_inuse--;
3531 softs->ipf_state_stats.iss_bucketlen[is->is_hv]--;
3532
3533 is->is_phnext = NULL;
3534 is->is_hnext = NULL;
3535 orphan = 0;
3536 }
3537
3538 /*
3539 * Because ipf_state_stats.iss_wild is a count of entries in the state
3540 * table that have wildcard flags set, only decerement it once
3541 * and do it here.
3542 */
3543 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
3544 if (!(is->is_flags & SI_CLONED)) {
3545 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
3546 }
3547 is->is_flags &= ~(SI_WILDP|SI_WILDA);
3548 }
3549
3550 /*
3551 * Next, remove it from the timeout queue it is in.
3552 */
3553 if (is->is_sti.tqe_ifq != NULL)
3554 ipf_deletequeueentry(&is->is_sti);
3555
3556 /*
3557 * If it is still in use by something else, do not go any further,
3558 * but note that at this point it is now an orphan. How can this
3559 * be? ipf_state_flush() calls ipf_delete() directly because it wants
3560 * to empty the table out and if something has a hold on a state
3561 * entry (such as ipfstat), it'll do the deref path that'll bring
3562 * us back here to do the real delete & free.
3563 */
3564 MUTEX_ENTER(&is->is_lock);
3565 if (is->is_me != NULL) {
3566 *is->is_me = NULL;
3567 is->is_me = NULL;
3568 is->is_ref--;
3569 }
3570 is->is_ref--;
3571 if (is->is_ref > 0) {
3572 int refs;
3573
3574 refs = is->is_ref;
3575 MUTEX_EXIT(&is->is_lock);
3576 if (!orphan)
3577 softs->ipf_state_stats.iss_orphan++;
3578 return (refs);
3579 }
3580
3581 fr = is->is_rule;
3582 is->is_rule = NULL;
3583 if (fr != NULL) {
3584 if (fr->fr_srctrack.ht_max_nodes != 0) {
3585 (void) ipf_ht_node_del(&fr->fr_srctrack,
3586 is->is_family, &is->is_src);
3587 }
3588 }
3589
3590 ASSERT(is->is_ref == 0);
3591 MUTEX_EXIT(&is->is_lock);
3592
3593 if (is->is_tqehead[0] != NULL) {
3594 if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
3595 ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
3596 }
3597 if (is->is_tqehead[1] != NULL) {
3598 if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
3599 ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
3600 }
3601
3602 if (is->is_sync)
3603 ipf_sync_del_state(softc->ipf_sync_soft, is->is_sync);
3604
3605 /*
3606 * Now remove it from the linked list of known states
3607 */
3608 if (is->is_pnext != NULL) {
3609 *is->is_pnext = is->is_next;
3610
3611 if (is->is_next != NULL)
3612 is->is_next->is_pnext = is->is_pnext;
3613
3614 is->is_pnext = NULL;
3615 is->is_next = NULL;
3616 }
3617
3618 if (softs->ipf_state_logging != 0 && why != 0)
3619 ipf_state_log(softc, is, why);
3620
3621 if (is->is_p == IPPROTO_TCP)
3622 softs->ipf_state_stats.iss_fin++;
3623 else
3624 softs->ipf_state_stats.iss_expire++;
3625 if (orphan)
3626 softs->ipf_state_stats.iss_orphan--;
3627
3628 if (fr != NULL) {
3629 fr->fr_statecnt--;
3630 (void) ipf_derefrule(softc, &fr);
3631 }
3632
3633 softs->ipf_state_stats.iss_active_proto[is->is_p]--;
3634
3635 MUTEX_DESTROY(&is->is_lock);
3636 KFREE(is);
3637 softs->ipf_state_stats.iss_active--;
3638
3639 return (0);
3640 }
3641
3642
3643 /* ------------------------------------------------------------------------ */
3644 /* Function: ipf_state_expire */
3645 /* Returns: Nil */
3646 /* Parameters: softc(I) - pointer to soft context main structure */
3647 /* */
3648 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */
3649 /* used here is to keep the queue sorted with the oldest things at the top */
3650 /* and the youngest at the bottom. So if the top one doesn't need to be */
3651 /* expired then neither will any under it. */
3652 /* ------------------------------------------------------------------------ */
3653 void
ipf_state_expire(ipf_main_softc_t * softc)3654 ipf_state_expire(ipf_main_softc_t *softc)
3655 {
3656 ipf_state_softc_t *softs = softc->ipf_state_soft;
3657 ipftq_t *ifq, *ifqnext;
3658 ipftqent_t *tqe, *tqn;
3659 ipstate_t *is;
3660 SPL_INT(s);
3661
3662 SPL_NET(s);
3663 WRITE_ENTER(&softc->ipf_state);
3664 for (ifq = softs->ipf_state_tcptq; ifq != NULL; ifq = ifq->ifq_next)
3665 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3666 if (tqe->tqe_die > softc->ipf_ticks)
3667 break;
3668 tqn = tqe->tqe_next;
3669 is = tqe->tqe_parent;
3670 ipf_state_del(softc, is, ISL_EXPIRE);
3671 }
3672
3673 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3674 ifqnext = ifq->ifq_next;
3675
3676 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3677 if (tqe->tqe_die > softc->ipf_ticks)
3678 break;
3679 tqn = tqe->tqe_next;
3680 is = tqe->tqe_parent;
3681 ipf_state_del(softc, is, ISL_EXPIRE);
3682 }
3683 }
3684
3685 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3686 ifqnext = ifq->ifq_next;
3687
3688 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
3689 (ifq->ifq_ref == 0)) {
3690 ipf_freetimeoutqueue(softc, ifq);
3691 }
3692 }
3693
3694 if (softs->ipf_state_doflush) {
3695 (void) ipf_state_flush(softc, 2, 0);
3696 softs->ipf_state_doflush = 0;
3697 softs->ipf_state_wm_last = softc->ipf_ticks;
3698 }
3699
3700 RWLOCK_EXIT(&softc->ipf_state);
3701 SPL_X(s);
3702 }
3703
3704
3705 /* ------------------------------------------------------------------------ */
3706 /* Function: ipf_state_flush */
3707 /* Returns: int - 0 == success, -1 == failure */
3708 /* Parameters: softc(I) - pointer to soft context main structure */
3709 /* which(I) - which flush action to perform */
3710 /* proto(I) - which protocol to flush (0 == ALL) */
3711 /* Write Locks: ipf_state */
3712 /* */
3713 /* Flush state tables. Three actions currently defined: */
3714 /* which == 0 : flush all state table entries */
3715 /* which == 1 : flush TCP connections which have started to close but are */
3716 /* stuck for some reason. */
3717 /* which == 2 : flush TCP connections which have been idle for a long time, */
3718 /* starting at > 4 days idle and working back in successive half-*/
3719 /* days to at most 12 hours old. If this fails to free enough */
3720 /* slots then work backwards in half hour slots to 30 minutes. */
3721 /* If that too fails, then work backwards in 30 second intervals */
3722 /* for the last 30 minutes to at worst 30 seconds idle. */
3723 /* ------------------------------------------------------------------------ */
3724 int
ipf_state_flush(ipf_main_softc_t * softc,int which,int proto)3725 ipf_state_flush(ipf_main_softc_t *softc, int which, int proto)
3726 {
3727 ipf_state_softc_t *softs = softc->ipf_state_soft;
3728 ipftqent_t *tqe, *tqn;
3729 ipstate_t *is, **isp;
3730 ipftq_t *ifq;
3731 int removed;
3732 SPL_INT(s);
3733
3734 removed = 0;
3735
3736 SPL_NET(s);
3737
3738 switch (which)
3739 {
3740 case 0 :
3741 SBUMP(ipf_state_stats.iss_flush_all);
3742 /*
3743 * Style 0 flush removes everything...
3744 */
3745 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3746 if ((proto != 0) && (is->is_v != proto)) {
3747 isp = &is->is_next;
3748 continue;
3749 }
3750 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3751 removed++;
3752 else
3753 isp = &is->is_next;
3754 }
3755 break;
3756
3757 case 1 :
3758 SBUMP(ipf_state_stats.iss_flush_closing);
3759 /*
3760 * Since we're only interested in things that are closing,
3761 * we can start with the appropriate timeout queue.
3762 */
3763 for (ifq = softs->ipf_state_tcptq + IPF_TCPS_CLOSE_WAIT;
3764 ifq != NULL; ifq = ifq->ifq_next) {
3765
3766 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3767 tqn = tqe->tqe_next;
3768 is = tqe->tqe_parent;
3769 if (is->is_p != IPPROTO_TCP)
3770 break;
3771 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3772 removed++;
3773 }
3774 }
3775
3776 /*
3777 * Also need to look through the user defined queues.
3778 */
3779 for (ifq = softs->ipf_state_usertq; ifq != NULL;
3780 ifq = ifq->ifq_next) {
3781 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3782 tqn = tqe->tqe_next;
3783 is = tqe->tqe_parent;
3784 if (is->is_p != IPPROTO_TCP)
3785 continue;
3786
3787 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
3788 (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
3789 if (ipf_state_del(softc, is,
3790 ISL_FLUSH) == 0)
3791 removed++;
3792 }
3793 }
3794 }
3795 break;
3796
3797 case 2 :
3798 break;
3799
3800 /*
3801 * Args 5-11 correspond to flushing those particular states
3802 * for TCP connections.
3803 */
3804 case IPF_TCPS_CLOSE_WAIT :
3805 case IPF_TCPS_FIN_WAIT_1 :
3806 case IPF_TCPS_CLOSING :
3807 case IPF_TCPS_LAST_ACK :
3808 case IPF_TCPS_FIN_WAIT_2 :
3809 case IPF_TCPS_TIME_WAIT :
3810 case IPF_TCPS_CLOSED :
3811 SBUMP(ipf_state_stats.iss_flush_queue);
3812 tqn = softs->ipf_state_tcptq[which].ifq_head;
3813 while (tqn != NULL) {
3814 tqe = tqn;
3815 tqn = tqe->tqe_next;
3816 is = tqe->tqe_parent;
3817 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3818 removed++;
3819 }
3820 break;
3821
3822 default :
3823 if (which < 30)
3824 break;
3825
3826 SBUMP(ipf_state_stats.iss_flush_state);
3827 /*
3828 * Take a large arbitrary number to mean the number of seconds
3829 * for which which consider to be the maximum value we'll allow
3830 * the expiration to be.
3831 */
3832 which = IPF_TTLVAL(which);
3833 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3834 if ((proto == 0) || (is->is_v == proto)) {
3835 if (softc->ipf_ticks - is->is_touched > which) {
3836 if (ipf_state_del(softc, is,
3837 ISL_FLUSH) == 0) {
3838 removed++;
3839 continue;
3840 }
3841 }
3842 }
3843 isp = &is->is_next;
3844 }
3845 break;
3846 }
3847
3848 if (which != 2) {
3849 SPL_X(s);
3850 return (removed);
3851 }
3852
3853 SBUMP(ipf_state_stats.iss_flush_timeout);
3854 /*
3855 * Asked to remove inactive entries because the table is full, try
3856 * again, 3 times, if first attempt failed with a different criteria
3857 * each time. The order tried in must be in decreasing age.
3858 * Another alternative is to implement random drop and drop N entries
3859 * at random until N have been freed up.
3860 */
3861 if (softc->ipf_ticks - softs->ipf_state_wm_last >
3862 softs->ipf_state_wm_freq) {
3863 removed = ipf_queueflush(softc, ipf_state_flush_entry,
3864 softs->ipf_state_tcptq,
3865 softs->ipf_state_usertq,
3866 &softs->ipf_state_stats.iss_active,
3867 softs->ipf_state_size,
3868 softs->ipf_state_wm_low);
3869 softs->ipf_state_wm_last = softc->ipf_ticks;
3870 }
3871
3872 SPL_X(s);
3873 return (removed);
3874 }
3875
3876
3877 /* ------------------------------------------------------------------------ */
3878 /* Function: ipf_state_flush_entry */
3879 /* Returns: int - 0 = entry deleted, else not deleted */
3880 /* Parameters: softc(I) - pointer to soft context main structure */
3881 /* entry(I) - pointer to state structure to delete */
3882 /* Write Locks: ipf_state */
3883 /* */
3884 /* This function is a stepping stone between ipf_queueflush() and */
3885 /* ipf_state_del(). It is used so we can provide a uniform interface via */
3886 /* the ipf_queueflush() function. */
3887 /* ------------------------------------------------------------------------ */
3888 static int
ipf_state_flush_entry(ipf_main_softc_t * softc,void * entry)3889 ipf_state_flush_entry(ipf_main_softc_t *softc, void *entry)
3890 {
3891 return (ipf_state_del(softc, entry, ISL_FLUSH));
3892 }
3893
3894
3895 /* ------------------------------------------------------------------------ */
3896 /* Function: ipf_tcp_age */
3897 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */
3898 /* Parameters: tqe(I) - pointer to timeout queue information */
3899 /* fin(I) - pointer to packet information */
3900 /* tqtab(I) - TCP timeout queue table this is in */
3901 /* flags(I) - flags from state/NAT entry */
3902 /* ok(I) - can we advance state */
3903 /* */
3904 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */
3905 /* */
3906 /* - (try to) base state transitions on real evidence only, */
3907 /* i.e. packets that are sent and have been received by ipfilter; */
3908 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */
3909 /* */
3910 /* - deal with half-closed connections correctly; */
3911 /* */
3912 /* - store the state of the source in state[0] such that ipfstat */
3913 /* displays the state as source/dest instead of dest/source; the calls */
3914 /* to ipf_tcp_age have been changed accordingly. */
3915 /* */
3916 /* Internal Parameters: */
3917 /* */
3918 /* state[0] = state of source (host that initiated connection) */
3919 /* state[1] = state of dest (host that accepted the connection) */
3920 /* */
3921 /* dir == 0 : a packet from source to dest */
3922 /* dir == 1 : a packet from dest to source */
3923 /* */
3924 /* A typical procession for a connection is as follows: */
3925 /* */
3926 /* +--------------+-------------------+ */
3927 /* | Side '0' | Side '1' | */
3928 /* +--------------+-------------------+ */
3929 /* | 0 -> 1 (SYN) | | */
3930 /* | | 0 -> 2 (SYN-ACK) | */
3931 /* | 1 -> 3 (ACK) | | */
3932 /* | | 2 -> 4 (ACK-PUSH) | */
3933 /* | 3 -> 4 (ACK) | | */
3934 /* | ... | ... | */
3935 /* | | 4 -> 6 (FIN-ACK) | */
3936 /* | 4 -> 5 (ACK) | | */
3937 /* | | 6 -> 6 (ACK-PUSH) | */
3938 /* | 5 -> 5 (ACK) | | */
3939 /* | 5 -> 8 (FIN) | | */
3940 /* | | 6 -> 10 (ACK) | */
3941 /* +--------------+-------------------+ */
3942 /* */
3943 /* Locking: it is assumed that the parent of the tqe structure is locked. */
3944 /* ------------------------------------------------------------------------ */
3945 int
ipf_tcp_age(ipftqent_t * tqe,fr_info_t * fin,ipftq_t * tqtab,int flags,int ok)3946 ipf_tcp_age(ipftqent_t *tqe, fr_info_t *fin, ipftq_t *tqtab, int flags, int ok)
3947 {
3948 ipf_main_softc_t *softc = fin->fin_main_soft;
3949 int dlen, ostate, nstate, rval, dir;
3950 u_char tcpflags;
3951 tcphdr_t *tcp;
3952
3953 tcp = fin->fin_dp;
3954
3955 rval = 0;
3956 dir = fin->fin_rev;
3957 tcpflags = tcp->th_flags;
3958 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
3959 ostate = tqe->tqe_state[1 - dir];
3960 nstate = tqe->tqe_state[dir];
3961
3962 if (tcpflags & TH_RST) {
3963 if (!(tcpflags & TH_PUSH) && !dlen)
3964 nstate = IPF_TCPS_CLOSED;
3965 else
3966 nstate = IPF_TCPS_CLOSE_WAIT;
3967
3968 if (ostate <= IPF_TCPS_ESTABLISHED) {
3969 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT;
3970 }
3971 rval = 1;
3972 } else {
3973 switch (nstate)
3974 {
3975 case IPF_TCPS_LISTEN: /* 0 */
3976 if ((tcpflags & TH_OPENING) == TH_OPENING) {
3977 /*
3978 * 'dir' received an S and sends SA in
3979 * response, LISTEN -> SYN_RECEIVED
3980 */
3981 nstate = IPF_TCPS_SYN_RECEIVED;
3982 rval = 1;
3983 } else if ((tcpflags & TH_OPENING) == TH_SYN) {
3984 /* 'dir' sent S, LISTEN -> SYN_SENT */
3985 nstate = IPF_TCPS_SYN_SENT;
3986 rval = 1;
3987 }
3988 /*
3989 * the next piece of code makes it possible to get
3990 * already established connections into the state table
3991 * after a restart or reload of the filter rules; this
3992 * does not work when a strict 'flags S keep state' is
3993 * used for tcp connections of course
3994 */
3995 if (((flags & IS_TCPFSM) == 0) &&
3996 ((tcpflags & TH_ACKMASK) == TH_ACK)) {
3997 /*
3998 * we saw an A, guess 'dir' is in ESTABLISHED
3999 * mode
4000 */
4001 switch (ostate)
4002 {
4003 case IPF_TCPS_LISTEN :
4004 case IPF_TCPS_SYN_RECEIVED :
4005 nstate = IPF_TCPS_HALF_ESTAB;
4006 rval = 1;
4007 break;
4008 case IPF_TCPS_HALF_ESTAB :
4009 case IPF_TCPS_ESTABLISHED :
4010 nstate = IPF_TCPS_ESTABLISHED;
4011 rval = 1;
4012 break;
4013 default :
4014 break;
4015 }
4016 }
4017 /*
4018 * TODO: besides regular ACK packets we can have other
4019 * packets as well; it is yet to be determined how we
4020 * should initialize the states in those cases
4021 */
4022 break;
4023
4024 case IPF_TCPS_SYN_SENT: /* 1 */
4025 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
4026 /*
4027 * A retransmitted SYN packet. We do not reset
4028 * the timeout here to ipf_tcptimeout because a
4029 * connection connect timeout does not renew
4030 * after every packet that is sent. We need to
4031 * set rval so as to indicate the packet has
4032 * passed the check for its flags being valid
4033 * in the TCP FSM. Setting rval to 2 has the
4034 * result of not resetting the timeout.
4035 */
4036 rval = 2;
4037 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
4038 TH_ACK) {
4039 /*
4040 * we see an A from 'dir' which is in SYN_SENT
4041 * state: 'dir' sent an A in response to an SA
4042 * which it received, SYN_SENT -> ESTABLISHED
4043 */
4044 nstate = IPF_TCPS_ESTABLISHED;
4045 rval = 1;
4046 } else if (tcpflags & TH_FIN) {
4047 /*
4048 * we see an F from 'dir' which is in SYN_SENT
4049 * state and wants to close its side of the
4050 * connection; SYN_SENT -> FIN_WAIT_1
4051 */
4052 nstate = IPF_TCPS_FIN_WAIT_1;
4053 rval = 1;
4054 } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
4055 /*
4056 * we see an SA from 'dir' which is already in
4057 * SYN_SENT state, this means we have a
4058 * simultaneous open; SYN_SENT -> SYN_RECEIVED
4059 */
4060 nstate = IPF_TCPS_SYN_RECEIVED;
4061 rval = 1;
4062 }
4063 break;
4064
4065 case IPF_TCPS_SYN_RECEIVED: /* 2 */
4066 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
4067 /*
4068 * we see an A from 'dir' which was in
4069 * SYN_RECEIVED state so it must now be in
4070 * established state, SYN_RECEIVED ->
4071 * ESTABLISHED
4072 */
4073 nstate = IPF_TCPS_ESTABLISHED;
4074 rval = 1;
4075 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
4076 TH_OPENING) {
4077 /*
4078 * We see an SA from 'dir' which is already in
4079 * SYN_RECEIVED state.
4080 */
4081 rval = 2;
4082 } else if (tcpflags & TH_FIN) {
4083 /*
4084 * we see an F from 'dir' which is in
4085 * SYN_RECEIVED state and wants to close its
4086 * side of the connection; SYN_RECEIVED ->
4087 * FIN_WAIT_1
4088 */
4089 nstate = IPF_TCPS_FIN_WAIT_1;
4090 rval = 1;
4091 }
4092 break;
4093
4094 case IPF_TCPS_HALF_ESTAB: /* 3 */
4095 if (tcpflags & TH_FIN) {
4096 nstate = IPF_TCPS_FIN_WAIT_1;
4097 rval = 1;
4098 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) {
4099 /*
4100 * If we've picked up a connection in mid
4101 * flight, we could be looking at a follow on
4102 * packet from the same direction as the one
4103 * that created this state. Recognise it but
4104 * do not advance the entire connection's
4105 * state.
4106 */
4107 switch (ostate)
4108 {
4109 case IPF_TCPS_LISTEN :
4110 case IPF_TCPS_SYN_SENT :
4111 case IPF_TCPS_SYN_RECEIVED :
4112 rval = 1;
4113 break;
4114 case IPF_TCPS_HALF_ESTAB :
4115 case IPF_TCPS_ESTABLISHED :
4116 nstate = IPF_TCPS_ESTABLISHED;
4117 rval = 1;
4118 break;
4119 default :
4120 break;
4121 }
4122 }
4123 break;
4124
4125 case IPF_TCPS_ESTABLISHED: /* 4 */
4126 rval = 1;
4127 if (tcpflags & TH_FIN) {
4128 /*
4129 * 'dir' closed its side of the connection;
4130 * this gives us a half-closed connection;
4131 * ESTABLISHED -> FIN_WAIT_1
4132 */
4133 if (ostate == IPF_TCPS_FIN_WAIT_1) {
4134 nstate = IPF_TCPS_CLOSING;
4135 } else {
4136 nstate = IPF_TCPS_FIN_WAIT_1;
4137 }
4138 } else if (tcpflags & TH_ACK) {
4139 /*
4140 * an ACK, should we exclude other flags here?
4141 */
4142 if (ostate == IPF_TCPS_FIN_WAIT_1) {
4143 /*
4144 * We know the other side did an active
4145 * close, so we are ACKing the recvd
4146 * FIN packet (does the window matching
4147 * code guarantee this?) and go into
4148 * CLOSE_WAIT state; this gives us a
4149 * half-closed connection
4150 */
4151 nstate = IPF_TCPS_CLOSE_WAIT;
4152 } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
4153 /*
4154 * still a fully established
4155 * connection reset timeout
4156 */
4157 nstate = IPF_TCPS_ESTABLISHED;
4158 }
4159 }
4160 break;
4161
4162 case IPF_TCPS_CLOSE_WAIT: /* 5 */
4163 rval = 1;
4164 if (tcpflags & TH_FIN) {
4165 /*
4166 * application closed and 'dir' sent a FIN,
4167 * we're now going into LAST_ACK state
4168 */
4169 nstate = IPF_TCPS_LAST_ACK;
4170 } else {
4171 /*
4172 * we remain in CLOSE_WAIT because the other
4173 * side has closed already and we did not
4174 * close our side yet; reset timeout
4175 */
4176 nstate = IPF_TCPS_CLOSE_WAIT;
4177 }
4178 break;
4179
4180 case IPF_TCPS_FIN_WAIT_1: /* 6 */
4181 rval = 1;
4182 if ((tcpflags & TH_ACK) &&
4183 ostate > IPF_TCPS_CLOSE_WAIT) {
4184 /*
4185 * if the other side is not active anymore
4186 * it has sent us a FIN packet that we are
4187 * ack'ing now with an ACK; this means both
4188 * sides have now closed the connection and
4189 * we go into TIME_WAIT
4190 */
4191 /*
4192 * XXX: how do we know we really are ACKing
4193 * the FIN packet here? does the window code
4194 * guarantee that?
4195 */
4196 nstate = IPF_TCPS_LAST_ACK;
4197 } else {
4198 /*
4199 * we closed our side of the connection
4200 * already but the other side is still active
4201 * (ESTABLISHED/CLOSE_WAIT); continue with
4202 * this half-closed connection
4203 */
4204 nstate = IPF_TCPS_FIN_WAIT_1;
4205 }
4206 break;
4207
4208 case IPF_TCPS_CLOSING: /* 7 */
4209 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) {
4210 nstate = IPF_TCPS_TIME_WAIT;
4211 }
4212 rval = 1;
4213 break;
4214
4215 case IPF_TCPS_LAST_ACK: /* 8 */
4216 if (tcpflags & TH_ACK) {
4217 rval = 1;
4218 }
4219 /*
4220 * we cannot detect when we go out of LAST_ACK state
4221 * to CLOSED because that is based on the reception
4222 * of ACK packets; ipfilter can only detect that a
4223 * packet has been sent by a host
4224 */
4225 break;
4226
4227 case IPF_TCPS_FIN_WAIT_2: /* 9 */
4228 /* NOT USED */
4229 break;
4230
4231 case IPF_TCPS_TIME_WAIT: /* 10 */
4232 /* we're in 2MSL timeout now */
4233 if (ostate == IPF_TCPS_LAST_ACK) {
4234 nstate = IPF_TCPS_CLOSED;
4235 rval = 1;
4236 } else {
4237 rval = 2;
4238 }
4239 break;
4240
4241 case IPF_TCPS_CLOSED: /* 11 */
4242 rval = 2;
4243 break;
4244
4245 default :
4246 #if !defined(_KERNEL)
4247 abort();
4248 #endif
4249 break;
4250 }
4251 }
4252
4253 /*
4254 * If rval == 2 then do not update the queue position, but treat the
4255 * packet as being ok.
4256 */
4257 if (rval == 2)
4258 rval = 1;
4259 else if (rval == 1) {
4260 if (ok)
4261 tqe->tqe_state[dir] = nstate;
4262 if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
4263 ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq,
4264 tqtab + nstate);
4265 }
4266
4267 return (rval);
4268 }
4269
4270
4271 /* ------------------------------------------------------------------------ */
4272 /* Function: ipf_state_log */
4273 /* Returns: Nil */
4274 /* Parameters: softc(I) - pointer to soft context main structure */
4275 /* is(I) - pointer to state structure */
4276 /* type(I) - type of log entry to create */
4277 /* */
4278 /* Creates a state table log entry using the state structure and type info. */
4279 /* passed in. Log packet/byte counts, source/destination address and other */
4280 /* protocol specific information. */
4281 /* ------------------------------------------------------------------------ */
4282 void
ipf_state_log(ipf_main_softc_t * softc,struct ipstate * is,u_int type)4283 ipf_state_log(ipf_main_softc_t *softc, struct ipstate *is, u_int type)
4284 {
4285 #ifdef IPFILTER_LOG
4286 struct ipslog ipsl;
4287 size_t sizes[1];
4288 void *items[1];
4289 int types[1];
4290
4291 /*
4292 * Copy information out of the ipstate_t structure and into the
4293 * structure used for logging.
4294 */
4295 ipsl.isl_type = type;
4296 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
4297 ipsl.isl_bytes[0] = is->is_bytes[0];
4298 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
4299 ipsl.isl_bytes[1] = is->is_bytes[1];
4300 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
4301 ipsl.isl_bytes[2] = is->is_bytes[2];
4302 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
4303 ipsl.isl_bytes[3] = is->is_bytes[3];
4304 ipsl.isl_src = is->is_src;
4305 ipsl.isl_dst = is->is_dst;
4306 ipsl.isl_p = is->is_p;
4307 ipsl.isl_v = is->is_v;
4308 ipsl.isl_flags = is->is_flags;
4309 ipsl.isl_tag = is->is_tag;
4310 ipsl.isl_rulen = is->is_rulen;
4311 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
4312
4313 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
4314 ipsl.isl_sport = is->is_sport;
4315 ipsl.isl_dport = is->is_dport;
4316 if (ipsl.isl_p == IPPROTO_TCP) {
4317 ipsl.isl_state[0] = is->is_state[0];
4318 ipsl.isl_state[1] = is->is_state[1];
4319 }
4320 } else if (ipsl.isl_p == IPPROTO_ICMP) {
4321 ipsl.isl_itype = is->is_icmp.ici_type;
4322 } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
4323 ipsl.isl_itype = is->is_icmp.ici_type;
4324 } else {
4325 ipsl.isl_ps.isl_filler[0] = 0;
4326 ipsl.isl_ps.isl_filler[1] = 0;
4327 }
4328
4329 items[0] = &ipsl;
4330 sizes[0] = sizeof(ipsl);
4331 types[0] = 0;
4332
4333 (void) ipf_log_items(softc, IPL_LOGSTATE, NULL, items, sizes, types, 1);
4334 #endif
4335 }
4336
4337
4338 #ifdef USE_INET6
4339 /* ------------------------------------------------------------------------ */
4340 /* Function: ipf_checkicmp6matchingstate */
4341 /* Returns: ipstate_t* - NULL == no match found, */
4342 /* else pointer to matching state entry */
4343 /* Parameters: fin(I) - pointer to packet information */
4344 /* Locks: NULL == no locks, else Read Lock on ipf_state */
4345 /* */
4346 /* If we've got an ICMPv6 error message, using the information stored in */
4347 /* the ICMPv6 packet, look for a matching state table entry. */
4348 /* ------------------------------------------------------------------------ */
4349 static ipstate_t *
ipf_checkicmp6matchingstate(fr_info_t * fin)4350 ipf_checkicmp6matchingstate(fr_info_t *fin)
4351 {
4352 ipf_main_softc_t *softc = fin->fin_main_soft;
4353 ipf_state_softc_t *softs = softc->ipf_state_soft;
4354 struct icmp6_hdr *ic6, *oic;
4355 ipstate_t *is, **isp;
4356 u_short sport, dport;
4357 i6addr_t dst, src;
4358 u_short savelen;
4359 icmpinfo_t *ic;
4360 fr_info_t ofin;
4361 tcphdr_t *tcp;
4362 ip6_t *oip6;
4363 u_char pr;
4364 u_int hv;
4365 int type;
4366
4367 /*
4368 * Does it at least have the return (basic) IP header ?
4369 * Is it an actual recognised ICMP error type?
4370 * Only a basic IP header (no options) should be with
4371 * an ICMP error header.
4372 */
4373 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) ||
4374 !(fin->fin_flx & FI_ICMPERR)) {
4375 SBUMPD(ipf_state_stats, iss_icmp_bad);
4376 return (NULL);
4377 }
4378
4379 ic6 = fin->fin_dp;
4380 type = ic6->icmp6_type;
4381
4382 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
4383 if (fin->fin_plen < sizeof(*oip6)) {
4384 SBUMPD(ipf_state_stats, iss_icmp_short);
4385 return (NULL);
4386 }
4387
4388 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
4389 ofin.fin_v = 6;
4390 ofin.fin_ifp = fin->fin_ifp;
4391 ofin.fin_out = !fin->fin_out;
4392 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
4393 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
4394
4395 /*
4396 * We make a fin entry to be able to feed it to
4397 * matchsrcdst. Note that not all fields are necessary
4398 * but this is the cleanest way. Note further we fill
4399 * in fin_mp such that if someone uses it we'll get
4400 * a kernel panic. ipf_matchsrcdst does not use this.
4401 *
4402 * watch out here, as ip is in host order and oip6 in network
4403 * order. Any change we make must be undone afterwards.
4404 */
4405 savelen = oip6->ip6_plen;
4406 oip6->ip6_plen = htons(fin->fin_dlen - ICMPERR_ICMPHLEN);
4407 ofin.fin_flx = FI_NOCKSUM;
4408 ofin.fin_ip = (ip_t *)oip6;
4409 (void) ipf_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
4410 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
4411 oip6->ip6_plen = savelen;
4412 pr = ofin.fin_p;
4413
4414 /*
4415 * an ICMP error can never generate an ICMP error in response.
4416 */
4417 if (ofin.fin_flx & FI_ICMPERR) {
4418 DT1(iss_icmp6_icmperr, fr_info_t *, &ofin);
4419 SBUMP(ipf_state_stats.iss_icmp6_icmperr);
4420 return (NULL);
4421 }
4422
4423 if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
4424 oic = ofin.fin_dp;
4425 /*
4426 * an ICMP error can only be generated as a result of an
4427 * ICMP query, not as the response on an ICMP error
4428 *
4429 * XXX theoretically ICMP_ECHOREP and the other reply's are
4430 * ICMP query's as well, but adding them here seems strange XXX
4431 */
4432 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) {
4433 DT1(iss_icmp6_notinfo, fr_info_t *, &ofin);
4434 SBUMP(ipf_state_stats.iss_icmp6_notinfo);
4435 return (NULL);
4436 }
4437
4438 /*
4439 * perform a lookup of the ICMP packet in the state table
4440 */
4441 hv = (pr = oip6->ip6_nxt);
4442 src.in6 = oip6->ip6_src;
4443 hv += src.in4.s_addr;
4444 dst.in6 = oip6->ip6_dst;
4445 hv += dst.in4.s_addr;
4446 hv += oic->icmp6_id;
4447 hv += oic->icmp6_seq;
4448 hv = DOUBLE_HASH(hv);
4449
4450 READ_ENTER(&softc->ipf_state);
4451 for (isp = &softs->ipf_state_table[hv];
4452 ((is = *isp) != NULL); ) {
4453 ic = &is->is_icmp;
4454 isp = &is->is_hnext;
4455 if ((is->is_p == pr) &&
4456 !(is->is_pass & FR_NOICMPERR) &&
4457 (oic->icmp6_id == ic->ici_id) &&
4458 (oic->icmp6_seq == ic->ici_seq) &&
4459 (is = ipf_matchsrcdst(&ofin, is, &src,
4460 &dst, NULL, FI_ICMPCMP))) {
4461 /*
4462 * in the state table ICMP query's are stored
4463 * with the type of the corresponding ICMP
4464 * response. Correct here
4465 */
4466 if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
4467 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
4468 (ic->ici_type - 1 == oic->icmp6_type )) {
4469 if (!ipf_allowstateicmp(fin, is, &src))
4470 return (is);
4471 }
4472 }
4473 }
4474 RWLOCK_EXIT(&softc->ipf_state);
4475 SBUMPD(ipf_state_stats, iss_icmp6_miss);
4476 return (NULL);
4477 }
4478
4479 hv = (pr = oip6->ip6_nxt);
4480 src.in6 = oip6->ip6_src;
4481 hv += src.i6[0];
4482 hv += src.i6[1];
4483 hv += src.i6[2];
4484 hv += src.i6[3];
4485 dst.in6 = oip6->ip6_dst;
4486 hv += dst.i6[0];
4487 hv += dst.i6[1];
4488 hv += dst.i6[2];
4489 hv += dst.i6[3];
4490
4491 tcp = NULL;
4492
4493 switch (oip6->ip6_nxt)
4494 {
4495 case IPPROTO_TCP :
4496 case IPPROTO_UDP :
4497 tcp = (tcphdr_t *)(oip6 + 1);
4498 dport = tcp->th_dport;
4499 sport = tcp->th_sport;
4500 hv += dport;
4501 hv += sport;
4502 break;
4503
4504 case IPPROTO_ICMPV6 :
4505 oic = (struct icmp6_hdr *)(oip6 + 1);
4506 hv += oic->icmp6_id;
4507 hv += oic->icmp6_seq;
4508 break;
4509
4510 default :
4511 break;
4512 }
4513
4514 hv = DOUBLE_HASH(hv);
4515
4516 READ_ENTER(&softc->ipf_state);
4517 for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
4518 isp = &is->is_hnext;
4519 /*
4520 * Only allow this icmp though if the
4521 * encapsulated packet was allowed through the
4522 * other way around. Note that the minimal amount
4523 * of info present does not allow for checking against
4524 * tcp internals such as seq and ack numbers.
4525 */
4526 if ((is->is_p != pr) || (is->is_v != 6) ||
4527 (is->is_pass & FR_NOICMPERR))
4528 continue;
4529 is = ipf_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
4530 if ((is != NULL) && (ipf_allowstateicmp(fin, is, &src) == 0))
4531 return (is);
4532 }
4533 RWLOCK_EXIT(&softc->ipf_state);
4534 SBUMPD(ipf_state_stats, iss_icmp_miss);
4535 return (NULL);
4536 }
4537 #endif
4538
4539
4540 /* ------------------------------------------------------------------------ */
4541 /* Function: ipf_sttab_init */
4542 /* Returns: Nil */
4543 /* Parameters: softc(I) - pointer to soft context main structure */
4544 /* tqp(I) - pointer to an array of timeout queues for TCP */
4545 /* */
4546 /* Initialise the array of timeout queues for TCP. */
4547 /* ------------------------------------------------------------------------ */
4548 void
ipf_sttab_init(ipf_main_softc_t * softc,ipftq_t * tqp)4549 ipf_sttab_init(ipf_main_softc_t *softc, ipftq_t *tqp)
4550 {
4551 int i;
4552
4553 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
4554 IPFTQ_INIT(&tqp[i], 0, "ipftq tcp tab");
4555 tqp[i].ifq_next = tqp + i + 1;
4556 }
4557 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
4558 tqp[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcpclosed;
4559 tqp[IPF_TCPS_LISTEN].ifq_ttl = softc->ipf_tcptimeout;
4560 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = softc->ipf_tcpsynsent;
4561 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = softc->ipf_tcpsynrecv;
4562 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = softc->ipf_tcpidletimeout;
4563 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = softc->ipf_tcphalfclosed;
4564 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = softc->ipf_tcphalfclosed;
4565 tqp[IPF_TCPS_CLOSING].ifq_ttl = softc->ipf_tcptimeout;
4566 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = softc->ipf_tcplastack;
4567 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = softc->ipf_tcpclosewait;
4568 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = softc->ipf_tcptimewait;
4569 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = softc->ipf_tcptimeout;
4570 }
4571
4572
4573 /* ------------------------------------------------------------------------ */
4574 /* Function: ipf_sttab_destroy */
4575 /* Returns: Nil */
4576 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
4577 /* */
4578 /* Do whatever is necessary to "destroy" each of the entries in the array */
4579 /* of timeout queues for TCP. */
4580 /* ------------------------------------------------------------------------ */
4581 void
ipf_sttab_destroy(ipftq_t * tqp)4582 ipf_sttab_destroy(ipftq_t *tqp)
4583 {
4584 int i;
4585
4586 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
4587 MUTEX_DESTROY(&tqp[i].ifq_lock);
4588 }
4589
4590
4591 /* ------------------------------------------------------------------------ */
4592 /* Function: ipf_state_deref */
4593 /* Returns: Nil */
4594 /* Parameters: softc(I) - pointer to soft context main structure */
4595 /* isp(I) - pointer to pointer to state table entry */
4596 /* */
4597 /* Decrement the reference counter for this state table entry and free it */
4598 /* if there are no more things using it. */
4599 /* */
4600 /* This function is only called when cleaning up after increasing is_ref by */
4601 /* one earlier in the 'code path' so if is_ref is 1 when entering, we do */
4602 /* have an orphan, otherwise not. However there is a possible race between */
4603 /* the entry being deleted via flushing with an ioctl call (that calls the */
4604 /* delete function directly) and the tail end of packet processing so we */
4605 /* need to grab is_lock before doing the check to synchronise the two code */
4606 /* paths. */
4607 /* */
4608 /* When operating in userland (ipftest), we have no timers to clear a state */
4609 /* entry. Therefore, we make a few simple tests before deleting an entry */
4610 /* outright. We compare states on each side looking for a combination of */
4611 /* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */
4612 /* in packet direction with the interface list to make sure we don't */
4613 /* prematurely delete an entry on a final inbound packet that's we're also */
4614 /* supposed to route elsewhere. */
4615 /* */
4616 /* Internal parameters: */
4617 /* state[0] = state of source (host that initiated connection) */
4618 /* state[1] = state of dest (host that accepted the connection) */
4619 /* */
4620 /* dir == 0 : a packet from source to dest */
4621 /* dir == 1 : a packet from dest to source */
4622 /* ------------------------------------------------------------------------ */
4623 void
ipf_state_deref(ipf_main_softc_t * softc,ipstate_t ** isp)4624 ipf_state_deref(ipf_main_softc_t *softc, ipstate_t **isp)
4625 {
4626 ipstate_t *is = *isp;
4627
4628 is = *isp;
4629 *isp = NULL;
4630
4631 MUTEX_ENTER(&is->is_lock);
4632 if (is->is_ref > 1) {
4633 is->is_ref--;
4634 MUTEX_EXIT(&is->is_lock);
4635 #ifndef _KERNEL
4636 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
4637 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
4638 ipf_state_del(softc, is, ISL_EXPIRE);
4639 }
4640 #endif
4641 return;
4642 }
4643 MUTEX_EXIT(&is->is_lock);
4644
4645 WRITE_ENTER(&softc->ipf_state);
4646 ipf_state_del(softc, is, ISL_ORPHAN);
4647 RWLOCK_EXIT(&softc->ipf_state);
4648 }
4649
4650
4651 /* ------------------------------------------------------------------------ */
4652 /* Function: ipf_state_setqueue */
4653 /* Returns: Nil */
4654 /* Parameters: softc(I) - pointer to soft context main structure */
4655 /* is(I) - pointer to state structure */
4656 /* rev(I) - forward(0) or reverse(1) direction */
4657 /* Locks: ipf_state (read or write) */
4658 /* */
4659 /* Put the state entry on its default queue entry, using rev as a helped in */
4660 /* determining which queue it should be placed on. */
4661 /* ------------------------------------------------------------------------ */
4662 void
ipf_state_setqueue(ipf_main_softc_t * softc,ipstate_t * is,int rev)4663 ipf_state_setqueue(ipf_main_softc_t *softc, ipstate_t *is, int rev)
4664 {
4665 ipf_state_softc_t *softs = softc->ipf_state_soft;
4666 ipftq_t *oifq, *nifq;
4667
4668 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
4669 nifq = is->is_tqehead[rev];
4670 else
4671 nifq = NULL;
4672
4673 if (nifq == NULL) {
4674 switch (is->is_p)
4675 {
4676 #ifdef USE_INET6
4677 case IPPROTO_ICMPV6 :
4678 if (rev == 1)
4679 nifq = &softs->ipf_state_icmpacktq;
4680 else
4681 nifq = &softs->ipf_state_icmptq;
4682 break;
4683 #endif
4684 case IPPROTO_ICMP :
4685 if (rev == 1)
4686 nifq = &softs->ipf_state_icmpacktq;
4687 else
4688 nifq = &softs->ipf_state_icmptq;
4689 break;
4690 case IPPROTO_TCP :
4691 nifq = softs->ipf_state_tcptq + is->is_state[rev];
4692 break;
4693
4694 case IPPROTO_UDP :
4695 if (rev == 1)
4696 nifq = &softs->ipf_state_udpacktq;
4697 else
4698 nifq = &softs->ipf_state_udptq;
4699 break;
4700
4701 default :
4702 nifq = &softs->ipf_state_iptq;
4703 break;
4704 }
4705 }
4706
4707 oifq = is->is_sti.tqe_ifq;
4708 /*
4709 * If it's currently on a timeout queue, move it from one queue to
4710 * another, else put it on the end of the newly determined queue.
4711 */
4712 if (oifq != NULL)
4713 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq, nifq);
4714 else
4715 ipf_queueappend(softc->ipf_ticks, &is->is_sti, nifq, is);
4716 return;
4717 }
4718
4719
4720 /* ------------------------------------------------------------------------ */
4721 /* Function: ipf_state_iter */
4722 /* Returns: int - 0 == success, else error */
4723 /* Parameters: softc(I) - pointer to main soft context */
4724 /* token(I) - pointer to ipftoken structure */
4725 /* itp(I) - pointer to ipfgeniter structure */
4726 /* obj(I) - pointer to data description structure */
4727 /* */
4728 /* This function handles the SIOCGENITER ioctl for the state tables and */
4729 /* walks through the list of entries in the state table list (softs->ipf_state_list.) */
4730 /* ------------------------------------------------------------------------ */
4731 static int
ipf_state_iter(ipf_main_softc_t * softc,ipftoken_t * token,ipfgeniter_t * itp,ipfobj_t * obj)4732 ipf_state_iter(ipf_main_softc_t *softc, ipftoken_t *token, ipfgeniter_t *itp,
4733 ipfobj_t *obj)
4734 {
4735 ipf_state_softc_t *softs = softc->ipf_state_soft;
4736 ipstate_t *is, *next, zero;
4737 int error;
4738
4739 if (itp->igi_data == NULL) {
4740 IPFERROR(100026);
4741 return (EFAULT);
4742 }
4743
4744 if (itp->igi_nitems < 1) {
4745 IPFERROR(100027);
4746 return (ENOSPC);
4747 }
4748
4749 if (itp->igi_type != IPFGENITER_STATE) {
4750 IPFERROR(100028);
4751 return (EINVAL);
4752 }
4753
4754 is = token->ipt_data;
4755 if (is == (void *)-1) {
4756 IPFERROR(100029);
4757 return (ESRCH);
4758 }
4759
4760 error = 0;
4761 obj->ipfo_type = IPFOBJ_IPSTATE;
4762 obj->ipfo_size = sizeof(ipstate_t);
4763
4764 READ_ENTER(&softc->ipf_state);
4765
4766 is = token->ipt_data;
4767 if (is == NULL) {
4768 next = softs->ipf_state_list;
4769 } else {
4770 next = is->is_next;
4771 }
4772
4773 /*
4774 * If we find a state entry to use, bump its reference count so that
4775 * it can be used for is_next when we come back.
4776 */
4777 if (next != NULL) {
4778 MUTEX_ENTER(&next->is_lock);
4779 next->is_ref++;
4780 MUTEX_EXIT(&next->is_lock);
4781 token->ipt_data = next;
4782 } else {
4783 bzero(&zero, sizeof(zero));
4784 next = &zero;
4785 token->ipt_data = NULL;
4786 }
4787 if (next->is_next == NULL)
4788 ipf_token_mark_complete(token);
4789
4790 RWLOCK_EXIT(&softc->ipf_state);
4791
4792 obj->ipfo_ptr = itp->igi_data;
4793 error = ipf_outobjk(softc, obj, next);
4794 if (is != NULL)
4795 ipf_state_deref(softc, &is);
4796
4797 return (error);
4798 }
4799
4800
4801 /* ------------------------------------------------------------------------ */
4802 /* Function: ipf_state_gettable */
4803 /* Returns: int - 0 = success, else error */
4804 /* Parameters: softc(I) - pointer to main soft context */
4805 /* softs(I) - pointer to state context structure */
4806 /* data(I) - pointer to ioctl data */
4807 /* */
4808 /* This function handles ioctl requests for tables of state information. */
4809 /* At present the only table it deals with is the hash bucket statistics. */
4810 /* ------------------------------------------------------------------------ */
4811 static int
ipf_state_gettable(ipf_main_softc_t * softc,ipf_state_softc_t * softs,char * data)4812 ipf_state_gettable(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
4813 char *data)
4814 {
4815 ipftable_t table;
4816 int error;
4817
4818 error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE);
4819 if (error != 0)
4820 return (error);
4821
4822 if (table.ita_type != IPFTABLE_BUCKETS) {
4823 IPFERROR(100031);
4824 return (EINVAL);
4825 }
4826
4827 error = COPYOUT(softs->ipf_state_stats.iss_bucketlen, table.ita_table,
4828 softs->ipf_state_size * sizeof(u_int));
4829 if (error != 0) {
4830 IPFERROR(100032);
4831 error = EFAULT;
4832 }
4833 return (error);
4834 }
4835
4836
4837 /* ------------------------------------------------------------------------ */
4838 /* Function: ipf_state_setpending */
4839 /* Returns: Nil */
4840 /* Parameters: softc(I) - pointer to main soft context */
4841 /* is(I) - pointer to state structure */
4842 /* Locks: ipf_state (read or write) */
4843 /* */
4844 /* Put the state entry on to the pending queue - this queue has a very */
4845 /* short lifetime where items are put that can't be deleted straight away */
4846 /* because of locking issues but we want to delete them ASAP, anyway. */
4847 /* ------------------------------------------------------------------------ */
4848 void
ipf_state_setpending(ipf_main_softc_t * softc,ipstate_t * is)4849 ipf_state_setpending(ipf_main_softc_t *softc, ipstate_t *is)
4850 {
4851 ipf_state_softc_t *softs = softc->ipf_state_soft;
4852 ipftq_t *oifq;
4853
4854 oifq = is->is_sti.tqe_ifq;
4855 if (oifq != NULL)
4856 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq,
4857 &softs->ipf_state_pending);
4858 else
4859 ipf_queueappend(softc->ipf_ticks, &is->is_sti,
4860 &softs->ipf_state_pending, is);
4861
4862 MUTEX_ENTER(&is->is_lock);
4863 if (is->is_me != NULL) {
4864 *is->is_me = NULL;
4865 is->is_me = NULL;
4866 is->is_ref--;
4867 }
4868 MUTEX_EXIT(&is->is_lock);
4869 }
4870
4871
4872 /* ------------------------------------------------------------------------ */
4873 /* Function: ipf_state_matchflush */
4874 /* Returns: Nil */
4875 /* Parameters: softc(I) - pointer to main soft context */
4876 /* data(I) - pointer to state structure */
4877 /* Locks: ipf_state (read or write) */
4878 /* */
4879 /* Flush all entries from the list of state entries that match the */
4880 /* properties in the array loaded. */
4881 /* ------------------------------------------------------------------------ */
4882 int
ipf_state_matchflush(ipf_main_softc_t * softc,caddr_t data)4883 ipf_state_matchflush(ipf_main_softc_t *softc, caddr_t data)
4884 {
4885 ipf_state_softc_t *softs = softc->ipf_state_soft;
4886 int *array, flushed, error;
4887 ipstate_t *state, *statenext;
4888 ipfobj_t obj;
4889
4890 error = ipf_matcharray_load(softc, data, &obj, &array);
4891 if (error != 0)
4892 return (error);
4893
4894 flushed = 0;
4895
4896 for (state = softs->ipf_state_list; state != NULL; state = statenext) {
4897 statenext = state->is_next;
4898 if (ipf_state_matcharray(state, array, softc->ipf_ticks) == 0) {
4899 ipf_state_del(softc, state, ISL_FLUSH);
4900 flushed++;
4901 }
4902 }
4903
4904 obj.ipfo_retval = flushed;
4905 error = BCOPYOUT(&obj, data, sizeof(obj));
4906
4907 KFREES(array, array[0] * sizeof(*array));
4908
4909 return (error);
4910 }
4911
4912
4913 /* ------------------------------------------------------------------------ */
4914 /* Function: ipf_state_matcharray */
4915 /* Returns: int - 0 = no match, 1 = match */
4916 /* Parameters: state(I) - pointer to state structure */
4917 /* array(I) - pointer to ipf matching expression */
4918 /* ticks(I) - current value of ipfilter tick timer */
4919 /* Locks: ipf_state (read or write) */
4920 /* */
4921 /* Compare a state entry with the match array passed in and return a value */
4922 /* to indicate whether or not the matching was successful. */
4923 /* ------------------------------------------------------------------------ */
4924 static int
ipf_state_matcharray(ipstate_t * state,int * array,u_long ticks)4925 ipf_state_matcharray(ipstate_t *state, int *array, u_long ticks)
4926 {
4927 int i, n, *x, rv, p;
4928 ipfexp_t *e;
4929
4930 rv = 0;
4931 n = array[0];
4932 x = array + 1;
4933
4934 for (; n > 0; x += 3 + x[3], rv = 0) {
4935 e = (ipfexp_t *)x;
4936 n -= e->ipfe_size;
4937 if (x[0] == IPF_EXP_END)
4938 break;
4939
4940 /*
4941 * If we need to match the protocol and that doesn't match,
4942 * don't even both with the instruction array.
4943 */
4944 p = e->ipfe_cmd >> 16;
4945 if ((p != 0) && (p != state->is_p))
4946 break;
4947
4948 switch (e->ipfe_cmd)
4949 {
4950 case IPF_EXP_IP_PR :
4951 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4952 rv |= (state->is_p == e->ipfe_arg0[i]);
4953 }
4954 break;
4955
4956 case IPF_EXP_IP_SRCADDR :
4957 if (state->is_v != 4)
4958 break;
4959 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4960 rv |= ((state->is_saddr &
4961 e->ipfe_arg0[i * 2 + 1]) ==
4962 e->ipfe_arg0[i * 2]);
4963 }
4964 break;
4965
4966 case IPF_EXP_IP_DSTADDR :
4967 if (state->is_v != 4)
4968 break;
4969 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4970 rv |= ((state->is_daddr &
4971 e->ipfe_arg0[i * 2 + 1]) ==
4972 e->ipfe_arg0[i * 2]);
4973 }
4974 break;
4975
4976 case IPF_EXP_IP_ADDR :
4977 if (state->is_v != 4)
4978 break;
4979 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4980 rv |= ((state->is_saddr &
4981 e->ipfe_arg0[i * 2 + 1]) ==
4982 e->ipfe_arg0[i * 2]) ||
4983 ((state->is_daddr &
4984 e->ipfe_arg0[i * 2 + 1]) ==
4985 e->ipfe_arg0[i * 2]);
4986 }
4987 break;
4988
4989 #ifdef USE_INET6
4990 case IPF_EXP_IP6_SRCADDR :
4991 if (state->is_v != 6)
4992 break;
4993 for (i = 0; !rv && i < x[3]; i++) {
4994 rv |= IP6_MASKEQ(&state->is_src.in6,
4995 &e->ipfe_arg0[i * 8 + 4],
4996 &e->ipfe_arg0[i * 8]);
4997 }
4998 break;
4999
5000 case IPF_EXP_IP6_DSTADDR :
5001 if (state->is_v != 6)
5002 break;
5003 for (i = 0; !rv && i < x[3]; i++) {
5004 rv |= IP6_MASKEQ(&state->is_dst.in6,
5005 &e->ipfe_arg0[i * 8 + 4],
5006 &e->ipfe_arg0[i * 8]);
5007 }
5008 break;
5009
5010 case IPF_EXP_IP6_ADDR :
5011 if (state->is_v != 6)
5012 break;
5013 for (i = 0; !rv && i < x[3]; i++) {
5014 rv |= IP6_MASKEQ(&state->is_src.in6,
5015 &e->ipfe_arg0[i * 8 + 4],
5016 &e->ipfe_arg0[i * 8]) ||
5017 IP6_MASKEQ(&state->is_dst.in6,
5018 &e->ipfe_arg0[i * 8 + 4],
5019 &e->ipfe_arg0[i * 8]);
5020 }
5021 break;
5022 #endif
5023
5024 case IPF_EXP_UDP_PORT :
5025 case IPF_EXP_TCP_PORT :
5026 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5027 rv |= (state->is_sport == e->ipfe_arg0[i]) ||
5028 (state->is_dport == e->ipfe_arg0[i]);
5029 }
5030 break;
5031
5032 case IPF_EXP_UDP_SPORT :
5033 case IPF_EXP_TCP_SPORT :
5034 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5035 rv |= (state->is_sport == e->ipfe_arg0[i]);
5036 }
5037 break;
5038
5039 case IPF_EXP_UDP_DPORT :
5040 case IPF_EXP_TCP_DPORT :
5041 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5042 rv |= (state->is_dport == e->ipfe_arg0[i]);
5043 }
5044 break;
5045
5046 case IPF_EXP_TCP_STATE :
5047 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5048 rv |= (state->is_state[0] == e->ipfe_arg0[i]) ||
5049 (state->is_state[1] == e->ipfe_arg0[i]);
5050 }
5051 break;
5052
5053 case IPF_EXP_IDLE_GT :
5054 rv |= (ticks - state->is_touched > e->ipfe_arg0[0]);
5055 break;
5056 }
5057
5058 /*
5059 * Factor in doing a negative match.
5060 */
5061 rv ^= e->ipfe_not;
5062
5063 if (rv == 0)
5064 break;
5065 }
5066
5067 return (rv);
5068 }
5069
5070
5071 /* ------------------------------------------------------------------------ */
5072 /* Function: ipf_state_settimeout */
5073 /* Returns: int 0 = success, else failure */
5074 /* Parameters: softc(I) - pointer to main soft context */
5075 /* t(I) - pointer to tuneable being changed */
5076 /* p(I) - pointer to the new value */
5077 /* */
5078 /* Sets a timeout value for one of the many timeout queues. We find the */
5079 /* correct queue using a somewhat manual process of comparing the timeout */
5080 /* names for each specific value available and calling ipf_apply_timeout on */
5081 /* that queue so that all of the items on it are updated accordingly. */
5082 /* ------------------------------------------------------------------------ */
5083 int
ipf_state_settimeout(struct ipf_main_softc_s * softc,ipftuneable_t * t,ipftuneval_t * p)5084 ipf_state_settimeout(struct ipf_main_softc_s *softc, ipftuneable_t *t,
5085 ipftuneval_t *p)
5086 {
5087 ipf_state_softc_t *softs = softc->ipf_state_soft;
5088
5089 /*
5090 * In case there is nothing to do...
5091 */
5092 if (*t->ipft_pint == p->ipftu_int)
5093 return (0);
5094
5095 if (!strncmp(t->ipft_name, "tcp_", 4))
5096 return (ipf_settimeout_tcp(t, p, softs->ipf_state_tcptq));
5097
5098 if (!strcmp(t->ipft_name, "udp_timeout")) {
5099 ipf_apply_timeout(&softs->ipf_state_udptq, p->ipftu_int);
5100 } else if (!strcmp(t->ipft_name, "udp_ack_timeout")) {
5101 ipf_apply_timeout(&softs->ipf_state_udpacktq, p->ipftu_int);
5102 } else if (!strcmp(t->ipft_name, "icmp_timeout")) {
5103 ipf_apply_timeout(&softs->ipf_state_icmptq, p->ipftu_int);
5104 } else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) {
5105 ipf_apply_timeout(&softs->ipf_state_icmpacktq, p->ipftu_int);
5106 } else if (!strcmp(t->ipft_name, "ip_timeout")) {
5107 ipf_apply_timeout(&softs->ipf_state_iptq, p->ipftu_int);
5108 } else {
5109 IPFERROR(100034);
5110 return (ESRCH);
5111 }
5112
5113 /*
5114 * Update the tuneable being set.
5115 */
5116 *t->ipft_pint = p->ipftu_int;
5117
5118 return (0);
5119 }
5120
5121
5122 /* ------------------------------------------------------------------------ */
5123 /* Function: ipf_state_rehash */
5124 /* Returns: int 0 = success, else failure */
5125 /* Parameters: softc(I) - pointer to main soft context */
5126 /* t(I) - pointer to tuneable being changed */
5127 /* p(I) - pointer to the new value */
5128 /* */
5129 /* To change the size of the state hash table at runtime, a new table has */
5130 /* to be allocated and then all of the existing entries put in it, bumping */
5131 /* up the bucketlength for it as we go along. */
5132 /* ------------------------------------------------------------------------ */
5133 int
ipf_state_rehash(ipf_main_softc_t * softc,ipftuneable_t * t,ipftuneval_t * p)5134 ipf_state_rehash(ipf_main_softc_t *softc, ipftuneable_t *t, ipftuneval_t *p)
5135 {
5136 ipf_state_softc_t *softs = softc->ipf_state_soft;
5137 ipstate_t **newtab, *is;
5138 u_long *newseed;
5139 u_int *bucketlens;
5140 u_int maxbucket;
5141 u_int newsize;
5142 u_int hv;
5143 int i;
5144
5145 newsize = p->ipftu_int;
5146 /*
5147 * In case there is nothing to do...
5148 */
5149 if (newsize == softs->ipf_state_size)
5150 return (0);
5151
5152 KMALLOCS(newtab, ipstate_t **, newsize * sizeof(ipstate_t *));
5153 if (newtab == NULL) {
5154 IPFERROR(100035);
5155 return (ENOMEM);
5156 }
5157
5158 KMALLOCS(bucketlens, u_int *, newsize * sizeof(u_int));
5159 if (bucketlens == NULL) {
5160 KFREES(newtab, newsize * sizeof(*softs->ipf_state_table));
5161 IPFERROR(100036);
5162 return (ENOMEM);
5163 }
5164
5165 newseed = ipf_state_seed_alloc(newsize, softs->ipf_state_max);
5166 if (newseed == NULL) {
5167 KFREES(bucketlens, newsize * sizeof(*bucketlens));
5168 KFREES(newtab, newsize * sizeof(*newtab));
5169 IPFERROR(100037);
5170 return (ENOMEM);
5171 }
5172
5173 for (maxbucket = 0, i = newsize; i > 0; i >>= 1)
5174 maxbucket++;
5175 maxbucket *= 2;
5176
5177 bzero((char *)newtab, newsize * sizeof(ipstate_t *));
5178 bzero((char *)bucketlens, newsize * sizeof(u_int));
5179
5180 WRITE_ENTER(&softc->ipf_state);
5181
5182 if (softs->ipf_state_table != NULL) {
5183 KFREES(softs->ipf_state_table,
5184 softs->ipf_state_size * sizeof(*softs->ipf_state_table));
5185 }
5186 softs->ipf_state_table = newtab;
5187
5188 if (softs->ipf_state_seed != NULL) {
5189 KFREES(softs->ipf_state_seed,
5190 softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
5191 }
5192 softs->ipf_state_seed = newseed;
5193
5194 if (softs->ipf_state_stats.iss_bucketlen != NULL) {
5195 KFREES(softs->ipf_state_stats.iss_bucketlen,
5196 softs->ipf_state_size * sizeof(u_int));
5197 }
5198 softs->ipf_state_stats.iss_bucketlen = bucketlens;
5199 softs->ipf_state_maxbucket = maxbucket;
5200 softs->ipf_state_size = newsize;
5201
5202 /*
5203 * Walk through the entire list of state table entries and put them
5204 * in the new state table, somewhere. Because we have a new table,
5205 * we need to restart the counter of how many chains are in use.
5206 */
5207 softs->ipf_state_stats.iss_inuse = 0;
5208 for (is = softs->ipf_state_list; is != NULL; is = is->is_next) {
5209 is->is_hnext = NULL;
5210 is->is_phnext = NULL;
5211 hv = is->is_hv % softs->ipf_state_size;
5212
5213 if (softs->ipf_state_table[hv] != NULL)
5214 softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
5215 else
5216 softs->ipf_state_stats.iss_inuse++;
5217 is->is_phnext = softs->ipf_state_table + hv;
5218 is->is_hnext = softs->ipf_state_table[hv];
5219 softs->ipf_state_table[hv] = is;
5220 softs->ipf_state_stats.iss_bucketlen[hv]++;
5221 }
5222 RWLOCK_EXIT(&softc->ipf_state);
5223
5224 return (0);
5225 }
5226
5227
5228 /* ------------------------------------------------------------------------ */
5229 /* Function: ipf_state_add_tq */
5230 /* Returns: ipftq_t * - NULL = failure, else pointer to new timeout */
5231 /* queue */
5232 /* Parameters: softc(I) - pointer to main soft context */
5233 /* ttl(I) - pointer to the ttl for the new queue */
5234 /* */
5235 /* Request a pointer to a timeout queue that has a ttl as given by the */
5236 /* value being passed in. The timeout queue is added tot the list of those */
5237 /* used internally for stateful filtering. */
5238 /* ------------------------------------------------------------------------ */
5239 ipftq_t *
ipf_state_add_tq(ipf_main_softc_t * softc,int ttl)5240 ipf_state_add_tq(ipf_main_softc_t *softc, int ttl)
5241 {
5242 ipf_state_softc_t *softs = softc->ipf_state_soft;
5243
5244 return (ipf_addtimeoutqueue(softc, &softs->ipf_state_usertq, ttl));
5245 }
5246
5247
5248 #ifndef _KERNEL
5249 /*
5250 * Display the built up state table rules and mapping entries.
5251 */
5252 void
ipf_state_dump(ipf_main_softc_t * softc,void * arg)5253 ipf_state_dump(ipf_main_softc_t *softc, void *arg)
5254 {
5255 ipf_state_softc_t *softs = arg;
5256 ipstate_t *ips;
5257
5258 printf("List of active state sessions:\n");
5259 for (ips = softs->ipf_state_list; ips != NULL; )
5260 ips = printstate(ips, opts & (OPT_DEBUG|OPT_VERBOSE),
5261 softc->ipf_ticks);
5262 }
5263 #endif
5264