1 /*
2 * Copyright (C) 1993-2001, 2003 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7 *
8 * Copyright 2018 Joyent, Inc.
9 */
10
11 #if !defined(lint)
12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
14 #endif
15
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/param.h>
19 #include <sys/cpuvar.h>
20 #include <sys/open.h>
21 #include <sys/ioctl.h>
22 #include <sys/filio.h>
23 #include <sys/systm.h>
24 #include <sys/strsubr.h>
25 #include <sys/strsun.h>
26 #include <sys/cred.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/mac_provider.h>
32 #include <sys/mkdev.h>
33 #include <sys/protosw.h>
34 #include <sys/socket.h>
35 #include <sys/dditypes.h>
36 #include <sys/cmn_err.h>
37 #include <sys/zone.h>
38 #include <net/if.h>
39 #include <net/af.h>
40 #include <net/route.h>
41 #include <netinet/in.h>
42 #include <netinet/in_systm.h>
43 #include <netinet/ip.h>
44 #include <netinet/ip_var.h>
45 #include <netinet/tcp.h>
46 #include <netinet/udp.h>
47 #include <netinet/tcpip.h>
48 #include <netinet/ip_icmp.h>
49 #include "netinet/ip_compat.h"
50 #ifdef USE_INET6
51 # include <netinet/icmp6.h>
52 #endif
53 #include "netinet/ip_fil.h"
54 #include "netinet/ip_nat.h"
55 #include "netinet/ip_frag.h"
56 #include "netinet/ip_state.h"
57 #include "netinet/ip_auth.h"
58 #include "netinet/ip_proxy.h"
59 #include "netinet/ipf_stack.h"
60 #ifdef IPFILTER_LOOKUP
61 # include "netinet/ip_lookup.h"
62 #endif
63 #include <inet/ip_ire.h>
64
65 #include <sys/md5.h>
66 #include <sys/neti.h>
67
68 static int frzerostats __P((caddr_t, ipf_stack_t *));
69 static int fr_setipfloopback __P((int, ipf_stack_t *));
70 static int fr_enableipf __P((ipf_stack_t *, int));
71 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
72 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
73 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
74 static int ipf_hook __P((hook_data_t, int, int, void *));
75 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
76 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
77 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
78 void *));
79 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
80 static int ipf_hook4 __P((hook_data_t, int, int, void *));
81 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
82 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
83 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
84 void *));
85 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
86 void *));
87 static int ipf_hook6 __P((hook_data_t, int, int, void *));
88
89 static int ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
90 static int ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
91 void *));
92
93 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
94 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
95
96 static int ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
97 const char *, const char *, const char *));
98 static int ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
99 const char *, const char *, const char *));
100
101 #if SOLARIS2 < 10
102 #if SOLARIS2 >= 7
103 u_int *ip_ttl_ptr = NULL;
104 u_int *ip_mtudisc = NULL;
105 # if SOLARIS2 >= 8
106 int *ip_forwarding = NULL;
107 u_int *ip6_forwarding = NULL;
108 # else
109 u_int *ip_forwarding = NULL;
110 # endif
111 #else
112 u_long *ip_ttl_ptr = NULL;
113 u_long *ip_mtudisc = NULL;
114 u_long *ip_forwarding = NULL;
115 #endif
116 #endif
117
118 vmem_t *ipf_minor; /* minor number arena */
119 void *ipf_state; /* DDI state */
120
121 /*
122 * GZ-controlled and per-zone stacks:
123 *
124 * For each non-global zone, we create two ipf stacks: the per-zone stack and
125 * the GZ-controlled stack. The per-zone stack can be controlled and observed
126 * from inside the zone or from the global zone. The GZ-controlled stack can
127 * only be controlled and observed from the global zone (though the rules
128 * still only affect that non-global zone).
129 *
130 * The two hooks are always arranged so that the GZ-controlled stack is always
131 * "outermost" with respect to the zone. The traffic flow then looks like
132 * this:
133 *
134 * Inbound:
135 *
136 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
137 *
138 * Outbound:
139 *
140 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
141 */
142
143 /* IPv4 hook names */
144 char *hook4_nicevents = "ipfilter_hook4_nicevents";
145 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz";
146 char *hook4_in = "ipfilter_hook4_in";
147 char *hook4_in_gz = "ipfilter_hook4_in_gz";
148 char *hook4_out = "ipfilter_hook4_out";
149 char *hook4_out_gz = "ipfilter_hook4_out_gz";
150 char *hook4_loop_in = "ipfilter_hook4_loop_in";
151 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz";
152 char *hook4_loop_out = "ipfilter_hook4_loop_out";
153 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz";
154
155 /* IPv6 hook names */
156 char *hook6_nicevents = "ipfilter_hook6_nicevents";
157 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz";
158 char *hook6_in = "ipfilter_hook6_in";
159 char *hook6_in_gz = "ipfilter_hook6_in_gz";
160 char *hook6_out = "ipfilter_hook6_out";
161 char *hook6_out_gz = "ipfilter_hook6_out_gz";
162 char *hook6_loop_in = "ipfilter_hook6_loop_in";
163 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
164 char *hook6_loop_out = "ipfilter_hook6_loop_out";
165 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
166
167 /* viona hook names */
168 char *hook_viona_in = "ipfilter_hookviona_in";
169 char *hook_viona_in_gz = "ipfilter_hookviona_in_gz";
170 char *hook_viona_out = "ipfilter_hookviona_out";
171 char *hook_viona_out_gz = "ipfilter_hookviona_out_gz";
172
173 /* ------------------------------------------------------------------------ */
174 /* Function: ipldetach */
175 /* Returns: int - 0 == success, else error. */
176 /* Parameters: Nil */
177 /* */
178 /* This function is responsible for undoing anything that might have been */
179 /* done in a call to iplattach(). It must be able to clean up from a call */
180 /* to iplattach() that did not succeed. Why might that happen? Someone */
181 /* configures a table to be so large that we cannot allocate enough memory */
182 /* for it. */
183 /* ------------------------------------------------------------------------ */
ipldetach(ifs)184 int ipldetach(ifs)
185 ipf_stack_t *ifs;
186 {
187
188 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
189
190 #if SOLARIS2 < 10
191
192 if (ifs->ifs_fr_control_forwarding & 2) {
193 if (ip_forwarding != NULL)
194 *ip_forwarding = 0;
195 #if SOLARIS2 >= 8
196 if (ip6_forwarding != NULL)
197 *ip6_forwarding = 0;
198 #endif
199 }
200 #endif
201
202 /*
203 * This lock needs to be dropped around the net_hook_unregister calls
204 * because we can deadlock here with:
205 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
206 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
207 */
208 RWLOCK_EXIT(&ifs->ifs_ipf_global);
209
210 #define UNDO_HOOK(_f, _b, _e, _h) \
211 do { \
212 if (ifs->_f != NULL) { \
213 if (ifs->_b) { \
214 int tmp = net_hook_unregister(ifs->_f, \
215 _e, ifs->_h); \
216 ifs->_b = (tmp != 0 && tmp != ENXIO); \
217 if (!ifs->_b && ifs->_h != NULL) { \
218 hook_free(ifs->_h); \
219 ifs->_h = NULL; \
220 } \
221 } else if (ifs->_h != NULL) { \
222 hook_free(ifs->_h); \
223 ifs->_h = NULL; \
224 } \
225 } \
226 _NOTE(CONSTCOND) \
227 } while (0)
228
229 /*
230 * Remove IPv6 Hooks
231 */
232 if (ifs->ifs_ipf_ipv6 != NULL) {
233 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
234 NH_PHYSICAL_IN, ifs_ipfhook6_in);
235 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
236 NH_PHYSICAL_OUT, ifs_ipfhook6_out);
237 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
238 NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
239 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
240 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
241 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
242 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
243
244 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
245 goto detach_failed;
246 ifs->ifs_ipf_ipv6 = NULL;
247 }
248
249 /*
250 * Remove IPv4 Hooks
251 */
252 if (ifs->ifs_ipf_ipv4 != NULL) {
253 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
254 NH_PHYSICAL_IN, ifs_ipfhook4_in);
255 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
256 NH_PHYSICAL_OUT, ifs_ipfhook4_out);
257 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
258 NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
259 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
260 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
261 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
262 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
263
264 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
265 goto detach_failed;
266 ifs->ifs_ipf_ipv4 = NULL;
267 }
268
269 /*
270 * Remove notification of viona hooks
271 */
272 net_instance_notify_unregister(ifs->ifs_netid,
273 ipf_hook_instance_notify);
274
275 #undef UNDO_HOOK
276
277 /*
278 * Normally, viona will unregister itself before ipldetach() is called,
279 * so these will be no-ops, but out of caution, we try to make sure
280 * we've removed any of our references.
281 */
282 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
283 NH_PHYSICAL_IN);
284 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
285 NH_PHYSICAL_OUT);
286
287 {
288 char netidstr[12]; /* Large enough for INT_MAX + NUL */
289 (void) snprintf(netidstr, sizeof (netidstr), "%d",
290 ifs->ifs_netid);
291
292 /*
293 * The notify callbacks expect the netid value passed as a
294 * string in the third argument. To prevent confusion if
295 * traced, we pass the same value the nethook framework would
296 * pass, even though the callback does not currently use the
297 * value.
298 */
299 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
300 NULL, Hn_VIONA);
301 }
302
303 #ifdef IPFDEBUG
304 cmn_err(CE_CONT, "ipldetach()\n");
305 #endif
306
307 WRITE_ENTER(&ifs->ifs_ipf_global);
308 fr_deinitialise(ifs);
309
310 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
311 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
312
313 if (ifs->ifs_ipf_locks_done == 1) {
314 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
315 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
316 RW_DESTROY(&ifs->ifs_ipf_tokens);
317 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
318 ifs->ifs_ipf_locks_done = 0;
319 }
320
321 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
322 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
323 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
324 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
325 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
326 return -1;
327
328 return 0;
329
330 detach_failed:
331 WRITE_ENTER(&ifs->ifs_ipf_global);
332 return -1;
333 }
334
iplattach(ifs)335 int iplattach(ifs)
336 ipf_stack_t *ifs;
337 {
338 #if SOLARIS2 < 10
339 int i;
340 #endif
341 netid_t id = ifs->ifs_netid;
342
343 #ifdef IPFDEBUG
344 cmn_err(CE_CONT, "iplattach()\n");
345 #endif
346
347 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
348 ifs->ifs_fr_flags = IPF_LOGGING;
349 #ifdef _KERNEL
350 ifs->ifs_fr_update_ipid = 0;
351 #else
352 ifs->ifs_fr_update_ipid = 1;
353 #endif
354 ifs->ifs_fr_minttl = 4;
355 ifs->ifs_fr_icmpminfragmtu = 68;
356 #if defined(IPFILTER_DEFAULT_BLOCK)
357 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
358 #else
359 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
360 #endif
361
362 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
363 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
364 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
365 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
366 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
367 ifs->ifs_ipf_locks_done = 1;
368
369 if (fr_initialise(ifs) < 0)
370 return -1;
371
372 /*
373 * For incoming packets, we want the GZ-controlled hooks to run before
374 * the per-zone hooks, regardless of what order they're are installed.
375 * See the "GZ-controlled and per-zone stacks" comment block at the top
376 * of this file.
377 */
378 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
379 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
380 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \
381 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
382
383 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
384 hook4_nicevents, hook4_nicevents_gz, ifs);
385 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
386 hook4_in, hook4_in_gz, ifs);
387 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
388 hook4_loop_in, hook4_loop_in_gz, ifs);
389
390 /*
391 * For outgoing packets, we want the GZ-controlled hooks to run after
392 * the per-zone hooks, regardless of what order they're are installed.
393 * See the "GZ-controlled and per-zone stacks" comment block at the top
394 * of this file.
395 */
396 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
397 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
398 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \
399 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
400
401 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
402 hook4_out, hook4_out_gz, ifs);
403 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
404 hook4_loop_out, hook4_loop_out_gz, ifs);
405
406 /*
407 * If we hold this lock over all of the net_hook_register calls, we
408 * can cause a deadlock to occur with the following lock ordering:
409 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
410 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
411 */
412 RWLOCK_EXIT(&ifs->ifs_ipf_global);
413
414 /*
415 * Add IPv4 hooks
416 */
417 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
418 if (ifs->ifs_ipf_ipv4 == NULL)
419 goto hookup_failed;
420
421 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
422 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
423 if (!ifs->ifs_hook4_nic_events)
424 goto hookup_failed;
425
426 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
427 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
428 if (!ifs->ifs_hook4_physical_in)
429 goto hookup_failed;
430
431 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
432 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
433 if (!ifs->ifs_hook4_physical_out)
434 goto hookup_failed;
435
436 if (ifs->ifs_ipf_loopback) {
437 ifs->ifs_hook4_loopback_in = (net_hook_register(
438 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
439 ifs->ifs_ipfhook4_loop_in) == 0);
440 if (!ifs->ifs_hook4_loopback_in)
441 goto hookup_failed;
442
443 ifs->ifs_hook4_loopback_out = (net_hook_register(
444 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
445 ifs->ifs_ipfhook4_loop_out) == 0);
446 if (!ifs->ifs_hook4_loopback_out)
447 goto hookup_failed;
448 }
449
450 /*
451 * Add IPv6 hooks
452 */
453 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
454 if (ifs->ifs_ipf_ipv6 == NULL)
455 goto hookup_failed;
456
457 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
458 hook6_nicevents, hook6_nicevents_gz, ifs);
459 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
460 hook6_in, hook6_in_gz, ifs);
461 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
462 hook6_loop_in, hook6_loop_in_gz, ifs);
463 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
464 hook6_out, hook6_out_gz, ifs);
465 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
466 hook6_loop_out, hook6_loop_out_gz, ifs);
467
468 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
469 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
470 if (!ifs->ifs_hook6_nic_events)
471 goto hookup_failed;
472
473 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
474 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
475 if (!ifs->ifs_hook6_physical_in)
476 goto hookup_failed;
477
478 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
479 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
480 if (!ifs->ifs_hook6_physical_out)
481 goto hookup_failed;
482
483 if (ifs->ifs_ipf_loopback) {
484 ifs->ifs_hook6_loopback_in = (net_hook_register(
485 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
486 ifs->ifs_ipfhook6_loop_in) == 0);
487 if (!ifs->ifs_hook6_loopback_in)
488 goto hookup_failed;
489
490 ifs->ifs_hook6_loopback_out = (net_hook_register(
491 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
492 ifs->ifs_ipfhook6_loop_out) == 0);
493 if (!ifs->ifs_hook6_loopback_out)
494 goto hookup_failed;
495 }
496
497 /*
498 * VIONA INET hooks. While the nethook framework allows us to register
499 * hooks for events that haven't been registered yet, we instead
500 * register and unregister our hooks in response to notifications
501 * about the viona hooks from the nethook framework. This prevents
502 * problems when the viona module gets unloaded while the ipf module
503 * does not. If we do not unregister our hooks after the viona module
504 * is unloaded, the viona module cannot later re-register them if it
505 * gets reloaded. As the ip, vnd, and ipf modules are rarely unloaded
506 * even on DEBUG kernels, they do not experience this issue.
507 */
508 if (net_instance_notify_register(id, ipf_hook_instance_notify,
509 ifs) != 0)
510 goto hookup_failed;
511
512 /*
513 * Reacquire ipf_global, now it is safe.
514 */
515 WRITE_ENTER(&ifs->ifs_ipf_global);
516
517 /* Do not use private interface ip_params_arr[] in Solaris 10 */
518 #if SOLARIS2 < 10
519
520 #if SOLARIS2 >= 8
521 ip_forwarding = &ip_g_forward;
522 #endif
523 /*
524 * XXX - There is no terminator for this array, so it is not possible
525 * to tell if what we are looking for is missing and go off the end
526 * of the array.
527 */
528
529 #if SOLARIS2 <= 8
530 for (i = 0; ; i++) {
531 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
532 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
533 } else if (!strcmp(ip_param_arr[i].ip_param_name,
534 "ip_path_mtu_discovery")) {
535 ip_mtudisc = &ip_param_arr[i].ip_param_value;
536 }
537 #if SOLARIS2 < 8
538 else if (!strcmp(ip_param_arr[i].ip_param_name,
539 "ip_forwarding")) {
540 ip_forwarding = &ip_param_arr[i].ip_param_value;
541 }
542 #else
543 else if (!strcmp(ip_param_arr[i].ip_param_name,
544 "ip6_forwarding")) {
545 ip6_forwarding = &ip_param_arr[i].ip_param_value;
546 }
547 #endif
548
549 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
550 #if SOLARIS2 >= 8
551 ip6_forwarding != NULL &&
552 #endif
553 ip_forwarding != NULL)
554 break;
555 }
556 #endif
557
558 if (ifs->ifs_fr_control_forwarding & 1) {
559 if (ip_forwarding != NULL)
560 *ip_forwarding = 1;
561 #if SOLARIS2 >= 8
562 if (ip6_forwarding != NULL)
563 *ip6_forwarding = 1;
564 #endif
565 }
566
567 #endif
568
569 return 0;
570 hookup_failed:
571 WRITE_ENTER(&ifs->ifs_ipf_global);
572 return -1;
573 }
574
575 /* ------------------------------------------------------------------------ */
576 /*
577 * Called whenever a nethook protocol is registered or unregistered. Currently
578 * only used to add or remove the hooks for viona.
579 *
580 * While the function signature requires returning int, nothing
581 * in usr/src/uts/common/io/hook.c that invokes the callbacks
582 * captures the return value (nor is there currently any documentation
583 * on what return values should be). For now at least, we'll return 0
584 * on success (or 'not applicable') or an error value. Even if the
585 * nethook framework doesn't use the return address, it can be observed via
586 * dtrace if needed.
587 */
588 static int
ipf_hook_protocol_notify(hook_notify_cmd_t command,void * arg,const char * name,const char * dummy __unused,const char * he_name)589 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
590 const char *name, const char *dummy __unused, const char *he_name)
591 {
592 ipf_stack_t *ifs = arg;
593 hook_t **hookpp;
594 char *hook_name, *hint_name;
595 hook_func_t hookfn;
596 boolean_t *hookedp;
597 hook_hint_t hint;
598 boolean_t out;
599 int ret = 0;
600
601 const boolean_t gz = ifs->ifs_gz_controlled;
602
603 /* We currently only care about viona hooks notifications */
604 if (strcmp(name, Hn_VIONA) != 0)
605 return (0);
606
607 if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
608 out = B_FALSE;
609 } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
610 out = B_TRUE;
611 } else {
612 /*
613 * If we've added more hook events to viona, we must add
614 * the corresponding handling here (even if it's just to
615 * ignore it) to prevent the firewall from not working as
616 * intended.
617 */
618 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
619 he_name);
620
621 return (0);
622 }
623
624 if (out) {
625 hookpp = &ifs->ifs_ipfhookviona_out;
626 hookfn = ipf_hookviona_out;
627 hookedp = &ifs->ifs_hookviona_physical_out;
628 name = gz ? hook_viona_out_gz : hook_viona_out;
629 hint = gz ? HH_AFTER : HH_BEFORE;
630 hint_name = gz ? hook_viona_out : hook_viona_out_gz;
631 } else {
632 hookpp = &ifs->ifs_ipfhookviona_in;
633 hookfn = ipf_hookviona_in;
634 hookedp = &ifs->ifs_hookviona_physical_in;
635 name = gz ? hook_viona_in_gz : hook_viona_in;
636 hint = gz ? HH_BEFORE : HH_AFTER;
637 hint_name = gz ? hook_viona_in : hook_viona_in_gz;
638 }
639
640 switch (command) {
641 default:
642 case HN_NONE:
643 break;
644 case HN_REGISTER:
645 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
646 (*hookpp)->h_hint = hint;
647 (*hookpp)->h_hintvalue = (uintptr_t)hint_name;
648 ret = net_hook_register(ifs->ifs_ipf_viona,
649 (char *)he_name, *hookpp);
650 if (ret != 0) {
651 cmn_err(CE_NOTE, "%s: could not register hook "
652 "(hook family=%s hook=%s) err=%d", __func__,
653 name, he_name, ret);
654 *hookedp = B_FALSE;
655 return (ret);
656 }
657 *hookedp = B_TRUE;
658 break;
659 case HN_UNREGISTER:
660 if (ifs->ifs_ipf_viona == NULL)
661 break;
662
663 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
664 (char *)he_name, *hookpp) : 0;
665 if ((ret == 0 || ret == ENXIO)) {
666 if (*hookpp != NULL) {
667 hook_free(*hookpp);
668 *hookpp = NULL;
669 }
670 *hookedp = B_FALSE;
671 }
672 break;
673 }
674
675 return (ret);
676 }
677
678 /*
679 * Called whenever a new nethook instance is created. Currently only used
680 * with the Hn_VIONA nethooks. Similar to ipf_hook_protocol_notify, the out
681 * function signature must return an int, though the result is never used.
682 * We elect to return 0 on success (or not applicable) or a non-zero value
683 * on error.
684 */
685 static int
ipf_hook_instance_notify(hook_notify_cmd_t command,void * arg,const char * netid,const char * dummy __unused,const char * instance)686 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
687 const char *netid, const char *dummy __unused, const char *instance)
688 {
689 ipf_stack_t *ifs = arg;
690 int ret = 0;
691
692 /* We currently only care about viona hooks */
693 if (strcmp(instance, Hn_VIONA) != 0)
694 return (0);
695
696 switch (command) {
697 case HN_NONE:
698 default:
699 return (0);
700 case HN_REGISTER:
701 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
702 NHF_VIONA);
703
704 if (ifs->ifs_ipf_viona == NULL)
705 return (EPROTONOSUPPORT);
706
707 ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
708 ipf_hook_protocol_notify, ifs);
709 VERIFY(ret == 0 || ret == ESHUTDOWN);
710 break;
711 case HN_UNREGISTER:
712 if (ifs->ifs_ipf_viona == NULL)
713 break;
714 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
715 ipf_hook_protocol_notify));
716 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
717 ifs->ifs_ipf_viona = NULL;
718 break;
719 }
720
721 return (ret);
722 }
723
fr_setipfloopback(set,ifs)724 static int fr_setipfloopback(set, ifs)
725 int set;
726 ipf_stack_t *ifs;
727 {
728 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
729 return EFAULT;
730
731 if (set && !ifs->ifs_ipf_loopback) {
732 ifs->ifs_ipf_loopback = 1;
733
734 ifs->ifs_hook4_loopback_in = (net_hook_register(
735 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
736 ifs->ifs_ipfhook4_loop_in) == 0);
737 if (!ifs->ifs_hook4_loopback_in)
738 return EINVAL;
739
740 ifs->ifs_hook4_loopback_out = (net_hook_register(
741 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
742 ifs->ifs_ipfhook4_loop_out) == 0);
743 if (!ifs->ifs_hook4_loopback_out)
744 return EINVAL;
745
746 ifs->ifs_hook6_loopback_in = (net_hook_register(
747 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
748 ifs->ifs_ipfhook6_loop_in) == 0);
749 if (!ifs->ifs_hook6_loopback_in)
750 return EINVAL;
751
752 ifs->ifs_hook6_loopback_out = (net_hook_register(
753 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
754 ifs->ifs_ipfhook6_loop_out) == 0);
755 if (!ifs->ifs_hook6_loopback_out)
756 return EINVAL;
757
758 } else if (!set && ifs->ifs_ipf_loopback) {
759 ifs->ifs_ipf_loopback = 0;
760
761 ifs->ifs_hook4_loopback_in =
762 (net_hook_unregister(ifs->ifs_ipf_ipv4,
763 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
764 if (ifs->ifs_hook4_loopback_in)
765 return EBUSY;
766
767 ifs->ifs_hook4_loopback_out =
768 (net_hook_unregister(ifs->ifs_ipf_ipv4,
769 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
770 if (ifs->ifs_hook4_loopback_out)
771 return EBUSY;
772
773 ifs->ifs_hook6_loopback_in =
774 (net_hook_unregister(ifs->ifs_ipf_ipv6,
775 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
776 if (ifs->ifs_hook6_loopback_in)
777 return EBUSY;
778
779 ifs->ifs_hook6_loopback_out =
780 (net_hook_unregister(ifs->ifs_ipf_ipv6,
781 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
782 if (ifs->ifs_hook6_loopback_out)
783 return EBUSY;
784 }
785 return 0;
786 }
787
788
789 /*
790 * Filter ioctl interface.
791 */
792 /*ARGSUSED*/
iplioctl(dev,cmd,data,mode,cp,rp)793 int iplioctl(dev, cmd, data, mode, cp, rp)
794 dev_t dev;
795 int cmd;
796 #if SOLARIS2 >= 7
797 intptr_t data;
798 #else
799 int *data;
800 #endif
801 int mode;
802 cred_t *cp;
803 int *rp;
804 {
805 int error = 0, tmp;
806 friostat_t fio;
807 minor_t unit;
808 u_int enable;
809 ipf_stack_t *ifs;
810 zoneid_t zid;
811 ipf_devstate_t *isp;
812
813 #ifdef IPFDEBUG
814 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
815 dev, cmd, data, mode, cp, rp);
816 #endif
817 unit = getminor(dev);
818
819 isp = ddi_get_soft_state(ipf_state, unit);
820 if (isp == NULL)
821 return ENXIO;
822 unit = isp->ipfs_minor;
823
824 zid = crgetzoneid(cp);
825 if (cmd == SIOCIPFZONESET) {
826 if (zid == GLOBAL_ZONEID)
827 return fr_setzoneid(isp, (caddr_t) data);
828 return EACCES;
829 }
830
831 /*
832 * ipf_find_stack returns with a read lock on ifs_ipf_global
833 */
834 ifs = ipf_find_stack(zid, isp);
835 if (ifs == NULL)
836 return ENXIO;
837
838 if (ifs->ifs_fr_running <= 0) {
839 if (unit != IPL_LOGIPF) {
840 RWLOCK_EXIT(&ifs->ifs_ipf_global);
841 return EIO;
842 }
843 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
844 cmd != SIOCIPFSET && cmd != SIOCFRENB &&
845 cmd != SIOCGETFS && cmd != SIOCGETFF) {
846 RWLOCK_EXIT(&ifs->ifs_ipf_global);
847 return EIO;
848 }
849 }
850
851 if (ifs->ifs_fr_enable_active != 0) {
852 RWLOCK_EXIT(&ifs->ifs_ipf_global);
853 return EBUSY;
854 }
855
856 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
857 curproc, ifs);
858 if (error != -1) {
859 RWLOCK_EXIT(&ifs->ifs_ipf_global);
860 return error;
861 }
862 error = 0;
863
864 switch (cmd)
865 {
866 case SIOCFRENB :
867 if (!(mode & FWRITE))
868 error = EPERM;
869 else {
870 error = COPYIN((caddr_t)data, (caddr_t)&enable,
871 sizeof(enable));
872 if (error != 0) {
873 error = EFAULT;
874 break;
875 }
876
877 RWLOCK_EXIT(&ifs->ifs_ipf_global);
878 WRITE_ENTER(&ifs->ifs_ipf_global);
879
880 /*
881 * We must recheck fr_enable_active here, since we've
882 * dropped ifs_ipf_global from R in order to get it
883 * exclusively.
884 */
885 if (ifs->ifs_fr_enable_active == 0) {
886 ifs->ifs_fr_enable_active = 1;
887 error = fr_enableipf(ifs, enable);
888 ifs->ifs_fr_enable_active = 0;
889 }
890 }
891 break;
892 case SIOCIPFSET :
893 if (!(mode & FWRITE)) {
894 error = EPERM;
895 break;
896 }
897 /* FALLTHRU */
898 case SIOCIPFGETNEXT :
899 case SIOCIPFGET :
900 error = fr_ipftune(cmd, (void *)data, ifs);
901 break;
902 case SIOCSETFF :
903 if (!(mode & FWRITE))
904 error = EPERM;
905 else {
906 error = COPYIN((caddr_t)data,
907 (caddr_t)&ifs->ifs_fr_flags,
908 sizeof(ifs->ifs_fr_flags));
909 if (error != 0)
910 error = EFAULT;
911 }
912 break;
913 case SIOCIPFLP :
914 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
915 sizeof(tmp));
916 if (error != 0)
917 error = EFAULT;
918 else
919 error = fr_setipfloopback(tmp, ifs);
920 break;
921 case SIOCGETFF :
922 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
923 sizeof(ifs->ifs_fr_flags));
924 if (error != 0)
925 error = EFAULT;
926 break;
927 case SIOCFUNCL :
928 error = fr_resolvefunc((void *)data);
929 break;
930 case SIOCINAFR :
931 case SIOCRMAFR :
932 case SIOCADAFR :
933 case SIOCZRLST :
934 if (!(mode & FWRITE))
935 error = EPERM;
936 else
937 error = frrequest(unit, cmd, (caddr_t)data,
938 ifs->ifs_fr_active, 1, ifs);
939 break;
940 case SIOCINIFR :
941 case SIOCRMIFR :
942 case SIOCADIFR :
943 if (!(mode & FWRITE))
944 error = EPERM;
945 else
946 error = frrequest(unit, cmd, (caddr_t)data,
947 1 - ifs->ifs_fr_active, 1, ifs);
948 break;
949 case SIOCSWAPA :
950 if (!(mode & FWRITE))
951 error = EPERM;
952 else {
953 WRITE_ENTER(&ifs->ifs_ipf_mutex);
954 bzero((char *)ifs->ifs_frcache,
955 sizeof (ifs->ifs_frcache));
956 error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
957 (caddr_t)data,
958 sizeof(ifs->ifs_fr_active));
959 if (error != 0)
960 error = EFAULT;
961 else
962 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
963 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
964 }
965 break;
966 case SIOCGETFS :
967 fr_getstat(&fio, ifs);
968 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
969 break;
970 case SIOCFRZST :
971 if (!(mode & FWRITE))
972 error = EPERM;
973 else
974 error = fr_zerostats((caddr_t)data, ifs);
975 break;
976 case SIOCIPFFL :
977 if (!(mode & FWRITE))
978 error = EPERM;
979 else {
980 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
981 sizeof(tmp));
982 if (!error) {
983 tmp = frflush(unit, 4, tmp, ifs);
984 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
985 sizeof(tmp));
986 if (error != 0)
987 error = EFAULT;
988 } else
989 error = EFAULT;
990 }
991 break;
992 #ifdef USE_INET6
993 case SIOCIPFL6 :
994 if (!(mode & FWRITE))
995 error = EPERM;
996 else {
997 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
998 sizeof(tmp));
999 if (!error) {
1000 tmp = frflush(unit, 6, tmp, ifs);
1001 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1002 sizeof(tmp));
1003 if (error != 0)
1004 error = EFAULT;
1005 } else
1006 error = EFAULT;
1007 }
1008 break;
1009 #endif
1010 case SIOCSTLCK :
1011 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1012 if (error == 0) {
1013 ifs->ifs_fr_state_lock = tmp;
1014 ifs->ifs_fr_nat_lock = tmp;
1015 ifs->ifs_fr_frag_lock = tmp;
1016 ifs->ifs_fr_auth_lock = tmp;
1017 } else
1018 error = EFAULT;
1019 break;
1020 #ifdef IPFILTER_LOG
1021 case SIOCIPFFB :
1022 if (!(mode & FWRITE))
1023 error = EPERM;
1024 else {
1025 tmp = ipflog_clear(unit, ifs);
1026 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1027 sizeof(tmp));
1028 if (error)
1029 error = EFAULT;
1030 }
1031 break;
1032 #endif /* IPFILTER_LOG */
1033 case SIOCFRSYN :
1034 if (!(mode & FWRITE))
1035 error = EPERM;
1036 else {
1037 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1038 WRITE_ENTER(&ifs->ifs_ipf_global);
1039
1040 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1041 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1042 fr_nataddrsync(0, NULL, NULL, ifs);
1043 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1044 error = 0;
1045 }
1046 break;
1047 case SIOCGFRST :
1048 error = fr_outobj((void *)data, fr_fragstats(ifs),
1049 IPFOBJ_FRAGSTAT);
1050 break;
1051 case FIONREAD :
1052 #ifdef IPFILTER_LOG
1053 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
1054
1055 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
1056 if (error != 0)
1057 error = EFAULT;
1058 #endif
1059 break;
1060 case SIOCIPFITER :
1061 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
1062 curproc, ifs);
1063 break;
1064
1065 case SIOCGENITER :
1066 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
1067 curproc, ifs);
1068 break;
1069
1070 case SIOCIPFDELTOK :
1071 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1072 if (error != 0) {
1073 error = EFAULT;
1074 } else {
1075 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
1076 }
1077 break;
1078
1079 default :
1080 #ifdef IPFDEBUG
1081 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
1082 cmd, (void *)data);
1083 #endif
1084 error = EINVAL;
1085 break;
1086 }
1087 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1088 return error;
1089 }
1090
1091
fr_enableipf(ifs,enable)1092 static int fr_enableipf(ifs, enable)
1093 ipf_stack_t *ifs;
1094 int enable;
1095 {
1096 int error;
1097
1098 if (!enable) {
1099 error = ipldetach(ifs);
1100 if (error == 0)
1101 ifs->ifs_fr_running = -1;
1102 return error;
1103 }
1104
1105 if (ifs->ifs_fr_running > 0)
1106 return 0;
1107
1108 error = iplattach(ifs);
1109 if (error == 0) {
1110 if (ifs->ifs_fr_timer_id == NULL) {
1111 int hz = drv_usectohz(500000);
1112
1113 ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
1114 (void *)ifs,
1115 hz);
1116 }
1117 ifs->ifs_fr_running = 1;
1118 } else {
1119 (void) ipldetach(ifs);
1120 }
1121 return error;
1122 }
1123
1124
get_unit(name,v,ifs)1125 phy_if_t get_unit(name, v, ifs)
1126 char *name;
1127 int v;
1128 ipf_stack_t *ifs;
1129 {
1130 net_handle_t nif;
1131
1132 if (v == 4)
1133 nif = ifs->ifs_ipf_ipv4;
1134 else if (v == 6)
1135 nif = ifs->ifs_ipf_ipv6;
1136 else
1137 return 0;
1138
1139 return (net_phylookup(nif, name));
1140 }
1141
1142 /*
1143 * routines below for saving IP headers to buffer
1144 */
1145 /*ARGSUSED*/
iplopen(devp,flags,otype,cred)1146 int iplopen(devp, flags, otype, cred)
1147 dev_t *devp;
1148 int flags, otype;
1149 cred_t *cred;
1150 {
1151 ipf_devstate_t *isp;
1152 minor_t min = getminor(*devp);
1153 minor_t minor;
1154
1155 #ifdef IPFDEBUG
1156 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
1157 #endif
1158 if (!(otype & OTYP_CHR))
1159 return ENXIO;
1160
1161 if (IPL_LOGMAX < min)
1162 return ENXIO;
1163
1164 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
1165 VM_BESTFIT | VM_SLEEP);
1166
1167 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
1168 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
1169 return ENXIO;
1170 }
1171
1172 *devp = makedevice(getmajor(*devp), minor);
1173 isp = ddi_get_soft_state(ipf_state, minor);
1174 VERIFY(isp != NULL);
1175
1176 isp->ipfs_minor = min;
1177 isp->ipfs_zoneid = IPFS_ZONE_UNSET;
1178
1179 return 0;
1180 }
1181
1182
1183 /*ARGSUSED*/
iplclose(dev,flags,otype,cred)1184 int iplclose(dev, flags, otype, cred)
1185 dev_t dev;
1186 int flags, otype;
1187 cred_t *cred;
1188 {
1189 minor_t min = getminor(dev);
1190
1191 #ifdef IPFDEBUG
1192 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
1193 #endif
1194
1195 if (IPL_LOGMAX < min)
1196 return ENXIO;
1197
1198 ddi_soft_state_free(ipf_state, min);
1199 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
1200
1201 return 0;
1202 }
1203
1204 #ifdef IPFILTER_LOG
1205 /*
1206 * iplread/ipllog
1207 * both of these must operate with at least splnet() lest they be
1208 * called during packet processing and cause an inconsistancy to appear in
1209 * the filter lists.
1210 */
1211 /*ARGSUSED*/
iplread(dev,uio,cp)1212 int iplread(dev, uio, cp)
1213 dev_t dev;
1214 register struct uio *uio;
1215 cred_t *cp;
1216 {
1217 ipf_stack_t *ifs;
1218 int ret;
1219 minor_t unit;
1220 ipf_devstate_t *isp;
1221
1222 unit = getminor(dev);
1223 isp = ddi_get_soft_state(ipf_state, unit);
1224 if (isp == NULL)
1225 return ENXIO;
1226 unit = isp->ipfs_minor;
1227
1228
1229 /*
1230 * ipf_find_stack returns with a read lock on ifs_ipf_global
1231 */
1232 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1233 if (ifs == NULL)
1234 return ENXIO;
1235
1236 # ifdef IPFDEBUG
1237 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1238 # endif
1239
1240 if (ifs->ifs_fr_running < 1) {
1241 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1242 return EIO;
1243 }
1244
1245 # ifdef IPFILTER_SYNC
1246 if (unit == IPL_LOGSYNC) {
1247 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1248 return ipfsync_read(uio);
1249 }
1250 # endif
1251
1252 ret = ipflog_read(unit, uio, ifs);
1253 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1254 return ret;
1255 }
1256 #endif /* IPFILTER_LOG */
1257
1258
1259 /*
1260 * iplread/ipllog
1261 * both of these must operate with at least splnet() lest they be
1262 * called during packet processing and cause an inconsistancy to appear in
1263 * the filter lists.
1264 */
iplwrite(dev,uio,cp)1265 int iplwrite(dev, uio, cp)
1266 dev_t dev;
1267 register struct uio *uio;
1268 cred_t *cp;
1269 {
1270 ipf_stack_t *ifs;
1271 minor_t unit;
1272 ipf_devstate_t *isp;
1273
1274 unit = getminor(dev);
1275 isp = ddi_get_soft_state(ipf_state, unit);
1276 if (isp == NULL)
1277 return ENXIO;
1278 unit = isp->ipfs_minor;
1279
1280 /*
1281 * ipf_find_stack returns with a read lock on ifs_ipf_global
1282 */
1283 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1284 if (ifs == NULL)
1285 return ENXIO;
1286
1287 #ifdef IPFDEBUG
1288 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1289 #endif
1290
1291 if (ifs->ifs_fr_running < 1) {
1292 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1293 return EIO;
1294 }
1295
1296 #ifdef IPFILTER_SYNC
1297 if (getminor(dev) == IPL_LOGSYNC) {
1298 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1299 return ipfsync_write(uio);
1300 }
1301 #endif /* IPFILTER_SYNC */
1302 dev = dev; /* LINT */
1303 uio = uio; /* LINT */
1304 cp = cp; /* LINT */
1305 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1306 return ENXIO;
1307 }
1308
1309
1310 /*
1311 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1312 * requires a large amount of setting up and isn't any more efficient.
1313 */
fr_send_reset(fin)1314 int fr_send_reset(fin)
1315 fr_info_t *fin;
1316 {
1317 tcphdr_t *tcp, *tcp2;
1318 int tlen, hlen;
1319 mblk_t *m;
1320 #ifdef USE_INET6
1321 ip6_t *ip6;
1322 #endif
1323 ip_t *ip;
1324
1325 tcp = fin->fin_dp;
1326 if (tcp->th_flags & TH_RST)
1327 return -1;
1328
1329 #ifndef IPFILTER_CKSUM
1330 if (fr_checkl4sum(fin) == -1)
1331 return -1;
1332 #endif
1333
1334 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1335 #ifdef USE_INET6
1336 if (fin->fin_v == 6)
1337 hlen = sizeof(ip6_t);
1338 else
1339 #endif
1340 hlen = sizeof(ip_t);
1341 hlen += sizeof(*tcp2);
1342 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1343 return -1;
1344
1345 m->b_rptr += 64;
1346 MTYPE(m) = M_DATA;
1347 m->b_wptr = m->b_rptr + hlen;
1348 ip = (ip_t *)m->b_rptr;
1349 bzero((char *)ip, hlen);
1350 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1351 tcp2->th_dport = tcp->th_sport;
1352 tcp2->th_sport = tcp->th_dport;
1353 if (tcp->th_flags & TH_ACK) {
1354 tcp2->th_seq = tcp->th_ack;
1355 tcp2->th_flags = TH_RST;
1356 } else {
1357 tcp2->th_ack = ntohl(tcp->th_seq);
1358 tcp2->th_ack += tlen;
1359 tcp2->th_ack = htonl(tcp2->th_ack);
1360 tcp2->th_flags = TH_RST|TH_ACK;
1361 }
1362 tcp2->th_off = sizeof(struct tcphdr) >> 2;
1363
1364 ip->ip_v = fin->fin_v;
1365 #ifdef USE_INET6
1366 if (fin->fin_v == 6) {
1367 ip6 = (ip6_t *)m->b_rptr;
1368 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1369 ip6->ip6_src = fin->fin_dst6.in6;
1370 ip6->ip6_dst = fin->fin_src6.in6;
1371 ip6->ip6_plen = htons(sizeof(*tcp));
1372 ip6->ip6_nxt = IPPROTO_TCP;
1373 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1374 } else
1375 #endif
1376 {
1377 ip->ip_src.s_addr = fin->fin_daddr;
1378 ip->ip_dst.s_addr = fin->fin_saddr;
1379 ip->ip_id = fr_nextipid(fin);
1380 ip->ip_hl = sizeof(*ip) >> 2;
1381 ip->ip_p = IPPROTO_TCP;
1382 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1383 ip->ip_tos = fin->fin_ip->ip_tos;
1384 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1385 }
1386 return fr_send_ip(fin, m, &m);
1387 }
1388
1389 /*
1390 * Function: fr_send_ip
1391 * Returns: 0: success
1392 * -1: failed
1393 * Parameters:
1394 * fin: packet information
1395 * m: the message block where ip head starts
1396 *
1397 * Send a new packet through the IP stack.
1398 *
1399 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1400 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1401 * function).
1402 *
1403 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1404 * in by this function.
1405 *
1406 * All other portions of the packet must be in on-the-wire format.
1407 */
1408 /*ARGSUSED*/
fr_send_ip(fin,m,mpp)1409 static int fr_send_ip(fin, m, mpp)
1410 fr_info_t *fin;
1411 mblk_t *m, **mpp;
1412 {
1413 qpktinfo_t qpi, *qpip;
1414 fr_info_t fnew;
1415 ip_t *ip;
1416 int i, hlen;
1417 ipf_stack_t *ifs = fin->fin_ifs;
1418
1419 ip = (ip_t *)m->b_rptr;
1420 bzero((char *)&fnew, sizeof(fnew));
1421
1422 #ifdef USE_INET6
1423 if (fin->fin_v == 6) {
1424 ip6_t *ip6;
1425
1426 ip6 = (ip6_t *)ip;
1427 ip6->ip6_vfc = 0x60;
1428 ip6->ip6_hlim = 127;
1429 fnew.fin_v = 6;
1430 hlen = sizeof(*ip6);
1431 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1432 } else
1433 #endif
1434 {
1435 fnew.fin_v = 4;
1436 #if SOLARIS2 >= 10
1437 ip->ip_ttl = 255;
1438 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1439 ip->ip_off = htons(IP_DF);
1440 #else
1441 if (ip_ttl_ptr != NULL)
1442 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1443 else
1444 ip->ip_ttl = 63;
1445 if (ip_mtudisc != NULL)
1446 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1447 else
1448 ip->ip_off = htons(IP_DF);
1449 #endif
1450 /*
1451 * The dance with byte order and ip_len/ip_off is because in
1452 * fr_fastroute, it expects them to be in host byte order but
1453 * ipf_cksum expects them to be in network byte order.
1454 */
1455 ip->ip_len = htons(ip->ip_len);
1456 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1457 ip->ip_len = ntohs(ip->ip_len);
1458 ip->ip_off = ntohs(ip->ip_off);
1459 hlen = sizeof(*ip);
1460 fnew.fin_plen = ip->ip_len;
1461 }
1462
1463 qpip = fin->fin_qpi;
1464 qpi.qpi_off = 0;
1465 qpi.qpi_ill = qpip->qpi_ill;
1466 qpi.qpi_m = m;
1467 qpi.qpi_data = ip;
1468 fnew.fin_qpi = &qpi;
1469 fnew.fin_ifp = fin->fin_ifp;
1470 fnew.fin_flx = FI_NOCKSUM | FI_GENERATED;
1471 fnew.fin_m = m;
1472 fnew.fin_qfm = m;
1473 fnew.fin_ip = ip;
1474 fnew.fin_mp = mpp;
1475 fnew.fin_hlen = hlen;
1476 fnew.fin_dp = (char *)ip + hlen;
1477 fnew.fin_ifs = fin->fin_ifs;
1478 (void) fr_makefrip(hlen, ip, &fnew);
1479
1480 i = fr_fastroute(m, mpp, &fnew, NULL);
1481 return i;
1482 }
1483
1484
fr_send_icmp_err(type,fin,dst)1485 int fr_send_icmp_err(type, fin, dst)
1486 int type;
1487 fr_info_t *fin;
1488 int dst;
1489 {
1490 struct in_addr dst4;
1491 struct icmp *icmp;
1492 qpktinfo_t *qpi;
1493 int hlen, code;
1494 phy_if_t phy;
1495 u_short sz;
1496 #ifdef USE_INET6
1497 mblk_t *mb;
1498 #endif
1499 mblk_t *m;
1500 #ifdef USE_INET6
1501 ip6_t *ip6;
1502 #endif
1503 ip_t *ip;
1504 ipf_stack_t *ifs = fin->fin_ifs;
1505
1506 if ((type < 0) || (type > ICMP_MAXTYPE))
1507 return -1;
1508
1509 code = fin->fin_icode;
1510 #ifdef USE_INET6
1511 if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1512 return -1;
1513 #endif
1514
1515 #ifndef IPFILTER_CKSUM
1516 if (fr_checkl4sum(fin) == -1)
1517 return -1;
1518 #endif
1519
1520 qpi = fin->fin_qpi;
1521
1522 #ifdef USE_INET6
1523 mb = fin->fin_qfm;
1524
1525 if (fin->fin_v == 6) {
1526 sz = sizeof(ip6_t);
1527 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1528 hlen = sizeof(ip6_t);
1529 type = icmptoicmp6types[type];
1530 if (type == ICMP6_DST_UNREACH)
1531 code = icmptoicmp6unreach[code];
1532 } else
1533 #endif
1534 {
1535 if ((fin->fin_p == IPPROTO_ICMP) &&
1536 !(fin->fin_flx & FI_SHORT))
1537 switch (ntohs(fin->fin_data[0]) >> 8)
1538 {
1539 case ICMP_ECHO :
1540 case ICMP_TSTAMP :
1541 case ICMP_IREQ :
1542 case ICMP_MASKREQ :
1543 break;
1544 default :
1545 return 0;
1546 }
1547
1548 sz = sizeof(ip_t) * 2;
1549 sz += 8; /* 64 bits of data */
1550 hlen = sizeof(ip_t);
1551 }
1552
1553 sz += offsetof(struct icmp, icmp_ip);
1554 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1555 return -1;
1556 MTYPE(m) = M_DATA;
1557 m->b_rptr += 64;
1558 m->b_wptr = m->b_rptr + sz;
1559 bzero((char *)m->b_rptr, (size_t)sz);
1560 ip = (ip_t *)m->b_rptr;
1561 ip->ip_v = fin->fin_v;
1562 icmp = (struct icmp *)(m->b_rptr + hlen);
1563 icmp->icmp_type = type & 0xff;
1564 icmp->icmp_code = code & 0xff;
1565 phy = (phy_if_t)qpi->qpi_ill;
1566 if (type == ICMP_UNREACH && (phy != 0) &&
1567 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1568 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1569
1570 #ifdef USE_INET6
1571 if (fin->fin_v == 6) {
1572 struct in6_addr dst6;
1573 int csz;
1574
1575 if (dst == 0) {
1576 ipf_stack_t *ifs = fin->fin_ifs;
1577
1578 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1579 (void *)&dst6, NULL, ifs) == -1) {
1580 FREE_MB_T(m);
1581 return -1;
1582 }
1583 } else
1584 dst6 = fin->fin_dst6.in6;
1585
1586 csz = sz;
1587 sz -= sizeof(ip6_t);
1588 ip6 = (ip6_t *)m->b_rptr;
1589 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1590 ip6->ip6_plen = htons((u_short)sz);
1591 ip6->ip6_nxt = IPPROTO_ICMPV6;
1592 ip6->ip6_src = dst6;
1593 ip6->ip6_dst = fin->fin_src6.in6;
1594 sz -= offsetof(struct icmp, icmp_ip);
1595 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1596 icmp->icmp_cksum = csz - sizeof(ip6_t);
1597 } else
1598 #endif
1599 {
1600 ip->ip_hl = sizeof(*ip) >> 2;
1601 ip->ip_p = IPPROTO_ICMP;
1602 ip->ip_id = fin->fin_ip->ip_id;
1603 ip->ip_tos = fin->fin_ip->ip_tos;
1604 ip->ip_len = (u_short)sz;
1605 if (dst == 0) {
1606 ipf_stack_t *ifs = fin->fin_ifs;
1607
1608 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1609 (void *)&dst4, NULL, ifs) == -1) {
1610 FREE_MB_T(m);
1611 return -1;
1612 }
1613 } else {
1614 dst4 = fin->fin_dst;
1615 }
1616 ip->ip_src = dst4;
1617 ip->ip_dst = fin->fin_src;
1618 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1619 sizeof(*fin->fin_ip));
1620 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1621 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1622 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1623 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1624 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1625 sz - sizeof(ip_t));
1626 }
1627
1628 /*
1629 * Need to exit out of these so we don't recursively call rw_enter
1630 * from fr_qout.
1631 */
1632 return fr_send_ip(fin, m, &m);
1633 }
1634
1635 #include <sys/time.h>
1636 #include <sys/varargs.h>
1637
1638 #ifndef _KERNEL
1639 #include <stdio.h>
1640 #endif
1641
1642 /*
1643 * Return the first IP Address associated with an interface
1644 * For IPv6, we walk through the list of logical interfaces and return
1645 * the address of the first one that isn't a link-local interface.
1646 * We can't assume that it is :1 because another link-local address
1647 * may have been assigned there.
1648 */
1649 /*ARGSUSED*/
fr_ifpaddr(v,atype,ifptr,inp,inpmask,ifs)1650 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1651 int v, atype;
1652 void *ifptr;
1653 struct in_addr *inp, *inpmask;
1654 ipf_stack_t *ifs;
1655 {
1656 struct sockaddr_in6 v6addr[2];
1657 struct sockaddr_in v4addr[2];
1658 net_ifaddr_t type[2];
1659 net_handle_t net_data;
1660 phy_if_t phyif;
1661 void *array;
1662
1663 switch (v)
1664 {
1665 case 4:
1666 net_data = ifs->ifs_ipf_ipv4;
1667 array = v4addr;
1668 break;
1669 case 6:
1670 net_data = ifs->ifs_ipf_ipv6;
1671 array = v6addr;
1672 break;
1673 default:
1674 net_data = NULL;
1675 break;
1676 }
1677
1678 if (net_data == NULL)
1679 return -1;
1680
1681 phyif = (phy_if_t)ifptr;
1682
1683 switch (atype)
1684 {
1685 case FRI_PEERADDR :
1686 type[0] = NA_PEER;
1687 break;
1688
1689 case FRI_BROADCAST :
1690 type[0] = NA_BROADCAST;
1691 break;
1692
1693 default :
1694 type[0] = NA_ADDRESS;
1695 break;
1696 }
1697
1698 type[1] = NA_NETMASK;
1699
1700 if (v == 6) {
1701 lif_if_t idx = 0;
1702
1703 do {
1704 idx = net_lifgetnext(net_data, phyif, idx);
1705 if (net_getlifaddr(net_data, phyif, idx, 2, type,
1706 array) < 0)
1707 return -1;
1708 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1709 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1710 break;
1711 } while (idx != 0);
1712
1713 if (idx == 0)
1714 return -1;
1715
1716 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1717 inp, inpmask);
1718 }
1719
1720 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1721 return -1;
1722
1723 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1724 }
1725
1726
fr_newisn(fin)1727 u_32_t fr_newisn(fin)
1728 fr_info_t *fin;
1729 {
1730 static int iss_seq_off = 0;
1731 u_char hash[16];
1732 u_32_t newiss;
1733 MD5_CTX ctx;
1734 ipf_stack_t *ifs = fin->fin_ifs;
1735
1736 /*
1737 * Compute the base value of the ISS. It is a hash
1738 * of (saddr, sport, daddr, dport, secret).
1739 */
1740 MD5Init(&ctx);
1741
1742 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1743 sizeof(fin->fin_fi.fi_src));
1744 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1745 sizeof(fin->fin_fi.fi_dst));
1746 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1747
1748 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1749
1750 MD5Final(hash, &ctx);
1751
1752 bcopy(hash, &newiss, sizeof(newiss));
1753
1754 /*
1755 * Now increment our "timer", and add it in to
1756 * the computed value.
1757 *
1758 * XXX Use `addin'?
1759 * XXX TCP_ISSINCR too large to use?
1760 */
1761 iss_seq_off += 0x00010000;
1762 newiss += iss_seq_off;
1763 return newiss;
1764 }
1765
1766
1767 /* ------------------------------------------------------------------------ */
1768 /* Function: fr_nextipid */
1769 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */
1770 /* Parameters: fin(I) - pointer to packet information */
1771 /* */
1772 /* Returns the next IPv4 ID to use for this packet. */
1773 /* ------------------------------------------------------------------------ */
fr_nextipid(fin)1774 u_short fr_nextipid(fin)
1775 fr_info_t *fin;
1776 {
1777 static u_short ipid = 0;
1778 u_short id;
1779 ipf_stack_t *ifs = fin->fin_ifs;
1780
1781 MUTEX_ENTER(&ifs->ifs_ipf_rw);
1782 if (fin->fin_pktnum != 0) {
1783 id = fin->fin_pktnum & 0xffff;
1784 } else {
1785 id = ipid++;
1786 }
1787 MUTEX_EXIT(&ifs->ifs_ipf_rw);
1788
1789 return id;
1790 }
1791
1792
1793 #ifndef IPFILTER_CKSUM
1794 /* ARGSUSED */
1795 #endif
fr_checkv4sum(fin)1796 INLINE void fr_checkv4sum(fin)
1797 fr_info_t *fin;
1798 {
1799 #ifdef IPFILTER_CKSUM
1800 if (fr_checkl4sum(fin) == -1)
1801 fin->fin_flx |= FI_BAD;
1802 #endif
1803 }
1804
1805
1806 #ifdef USE_INET6
1807 # ifndef IPFILTER_CKSUM
1808 /* ARGSUSED */
1809 # endif
fr_checkv6sum(fin)1810 INLINE void fr_checkv6sum(fin)
1811 fr_info_t *fin;
1812 {
1813 # ifdef IPFILTER_CKSUM
1814 if (fr_checkl4sum(fin) == -1)
1815 fin->fin_flx |= FI_BAD;
1816 # endif
1817 }
1818 #endif /* USE_INET6 */
1819
1820
1821 #if (SOLARIS2 < 7)
fr_slowtimer()1822 void fr_slowtimer()
1823 #else
1824 /*ARGSUSED*/
1825 void fr_slowtimer __P((void *arg))
1826 #endif
1827 {
1828 ipf_stack_t *ifs = arg;
1829
1830 READ_ENTER(&ifs->ifs_ipf_global);
1831 if (ifs->ifs_fr_running != 1) {
1832 ifs->ifs_fr_timer_id = NULL;
1833 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1834 return;
1835 }
1836 ipf_expiretokens(ifs);
1837 fr_fragexpire(ifs);
1838 fr_timeoutstate(ifs);
1839 fr_natexpire(ifs);
1840 fr_authexpire(ifs);
1841 ifs->ifs_fr_ticks++;
1842 if (ifs->ifs_fr_running == 1)
1843 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1844 drv_usectohz(500000));
1845 else
1846 ifs->ifs_fr_timer_id = NULL;
1847 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1848 }
1849
1850
1851 /* ------------------------------------------------------------------------ */
1852 /* Function: fr_pullup */
1853 /* Returns: NULL == pullup failed, else pointer to protocol header */
1854 /* Parameters: m(I) - pointer to buffer where data packet starts */
1855 /* fin(I) - pointer to packet information */
1856 /* len(I) - number of bytes to pullup */
1857 /* */
1858 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1859 /* single buffer for ease of access. Operating system native functions are */
1860 /* used to manage buffers - if necessary. If the entire packet ends up in */
1861 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
1862 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
1863 /* and ONLY if the pullup succeeds. */
1864 /* */
1865 /* We assume that 'min' is a pointer to a buffer that is part of the chain */
1866 /* of buffers that starts at *fin->fin_mp. */
1867 /* ------------------------------------------------------------------------ */
fr_pullup(min,fin,len)1868 void *fr_pullup(min, fin, len)
1869 mb_t *min;
1870 fr_info_t *fin;
1871 int len;
1872 {
1873 qpktinfo_t *qpi = fin->fin_qpi;
1874 int out = fin->fin_out, dpoff, ipoff;
1875 mb_t *m = min, *m1, *m2;
1876 char *ip;
1877 uint32_t start, stuff, end, value, flags;
1878 ipf_stack_t *ifs = fin->fin_ifs;
1879
1880 if (m == NULL)
1881 return NULL;
1882
1883 ip = (char *)fin->fin_ip;
1884 if ((fin->fin_flx & FI_COALESCE) != 0)
1885 return ip;
1886
1887 ipoff = fin->fin_ipoff;
1888 if (fin->fin_dp != NULL)
1889 dpoff = (char *)fin->fin_dp - (char *)ip;
1890 else
1891 dpoff = 0;
1892
1893 if (M_LEN(m) < len + ipoff) {
1894
1895 /*
1896 * pfil_precheck ensures the IP header is on a 32bit
1897 * aligned address so simply fail if that isn't currently
1898 * the case (should never happen).
1899 */
1900 int inc = 0;
1901
1902 if (ipoff > 0) {
1903 if ((ipoff & 3) != 0) {
1904 inc = 4 - (ipoff & 3);
1905 if (m->b_rptr - inc >= m->b_datap->db_base)
1906 m->b_rptr -= inc;
1907 else
1908 inc = 0;
1909 }
1910 }
1911
1912 /*
1913 * XXX This is here as a work around for a bug with DEBUG
1914 * XXX Solaris kernels. The problem is b_prev is used by IP
1915 * XXX code as a way to stash the phyint_index for a packet,
1916 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1917 * XXX for both of these to be NULL. See 6442390.
1918 */
1919 m1 = m;
1920 m2 = m->b_prev;
1921
1922 do {
1923 m1->b_next = NULL;
1924 m1->b_prev = NULL;
1925 m1 = m1->b_cont;
1926 } while (m1);
1927
1928 /*
1929 * Need to preserve checksum information by copying them
1930 * to newmp which heads the pulluped message.
1931 */
1932 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);
1933
1934 if (pullupmsg(m, len + ipoff + inc) == 0) {
1935 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1936 FREE_MB_T(*fin->fin_mp);
1937 *fin->fin_mp = NULL;
1938 fin->fin_m = NULL;
1939 fin->fin_ip = NULL;
1940 fin->fin_dp = NULL;
1941 qpi->qpi_data = NULL;
1942 return NULL;
1943 }
1944
1945 mac_hcksum_set(m, start, stuff, end, value, flags);
1946
1947 m->b_prev = m2;
1948 m->b_rptr += inc;
1949 fin->fin_m = m;
1950 ip = MTOD(m, char *) + ipoff;
1951 qpi->qpi_data = ip;
1952 }
1953
1954 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1955 fin->fin_ip = (ip_t *)ip;
1956 if (fin->fin_dp != NULL)
1957 fin->fin_dp = (char *)fin->fin_ip + dpoff;
1958
1959 if (len == fin->fin_plen)
1960 fin->fin_flx |= FI_COALESCE;
1961 return ip;
1962 }
1963
1964
1965 /*
1966 * Function: fr_verifysrc
1967 * Returns: int (really boolean)
1968 * Parameters: fin - packet information
1969 *
1970 * Check whether the packet has a valid source address for the interface on
1971 * which the packet arrived, implementing the "fr_chksrc" feature.
1972 * Returns true iff the packet's source address is valid.
1973 */
fr_verifysrc(fin)1974 int fr_verifysrc(fin)
1975 fr_info_t *fin;
1976 {
1977 net_handle_t net_data_p;
1978 phy_if_t phy_ifdata_routeto;
1979 struct sockaddr sin;
1980 ipf_stack_t *ifs = fin->fin_ifs;
1981
1982 if (fin->fin_v == 4) {
1983 net_data_p = ifs->ifs_ipf_ipv4;
1984 } else if (fin->fin_v == 6) {
1985 net_data_p = ifs->ifs_ipf_ipv6;
1986 } else {
1987 return (0);
1988 }
1989
1990 /* Get the index corresponding to the if name */
1991 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1992 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1993 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1994
1995 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1996 }
1997
1998 /*
1999 * Return true only if forwarding is enabled on the interface.
2000 */
2001 static int
fr_forwarding_enabled(phy_if_t phyif,net_handle_t ndp)2002 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
2003 {
2004 lif_if_t lif;
2005
2006 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
2007 lif = net_lifgetnext(ndp, phyif, lif)) {
2008 int res;
2009 uint64_t flags;
2010
2011 res = net_getlifflags(ndp, phyif, lif, &flags);
2012 if (res != 0)
2013 return (0);
2014 if (flags & IFF_ROUTER)
2015 return (1);
2016 }
2017
2018 return (0);
2019 }
2020
2021 /*
2022 * Function: fr_fastroute
2023 * Returns: 0: success;
2024 * -1: failed
2025 * Parameters:
2026 * mb: the message block where ip head starts
2027 * mpp: the pointer to the pointer of the orignal
2028 * packet message
2029 * fin: packet information
2030 * fdp: destination interface information
2031 * if it is NULL, no interface information provided.
2032 *
2033 * This function is for fastroute/to/dup-to rules. It calls
2034 * pfil_make_lay2_packet to search route, make lay-2 header
2035 * ,and identify output queue for the IP packet.
2036 * The destination address depends on the following conditions:
2037 * 1: for fastroute rule, fdp is passed in as NULL, so the
2038 * destination address is the IP Packet's destination address
2039 * 2: for to/dup-to rule, if an ip address is specified after
2040 * the interface name, this address is the as destination
2041 * address. Otherwise IP Packet's destination address is used
2042 */
fr_fastroute(mb,mpp,fin,fdp)2043 int fr_fastroute(mb, mpp, fin, fdp)
2044 mblk_t *mb, **mpp;
2045 fr_info_t *fin;
2046 frdest_t *fdp;
2047 {
2048 net_handle_t net_data_p;
2049 net_inject_t *inj;
2050 mblk_t *mp = NULL;
2051 frentry_t *fr = fin->fin_fr;
2052 qpktinfo_t *qpi;
2053 ip_t *ip;
2054
2055 struct sockaddr_in *sin;
2056 struct sockaddr_in6 *sin6;
2057 struct sockaddr *sinp;
2058 ipf_stack_t *ifs = fin->fin_ifs;
2059 #ifndef sparc
2060 u_short __iplen, __ipoff;
2061 #endif
2062
2063 if (fin->fin_v == 4) {
2064 net_data_p = ifs->ifs_ipf_ipv4;
2065 } else if (fin->fin_v == 6) {
2066 net_data_p = ifs->ifs_ipf_ipv6;
2067 } else {
2068 return (-1);
2069 }
2070
2071 /* Check the src here, fin_ifp is the src interface. */
2072 if (!(fin->fin_flx & FI_GENERATED) &&
2073 !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p)) {
2074 return (-1);
2075 }
2076
2077 inj = net_inject_alloc(NETINFO_VERSION);
2078 if (inj == NULL)
2079 return -1;
2080
2081 ip = fin->fin_ip;
2082 qpi = fin->fin_qpi;
2083
2084 /*
2085 * If this is a duplicate mblk then we want ip to point at that
2086 * data, not the original, if and only if it is already pointing at
2087 * the current mblk data.
2088 *
2089 * Otherwise, if it's not a duplicate, and we're not already pointing
2090 * at the current mblk data, then we want to ensure that the data
2091 * points at ip.
2092 */
2093
2094 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
2095 ip = (ip_t *)mb->b_rptr;
2096 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
2097 qpi->qpi_m->b_rptr = (uchar_t *)ip;
2098 qpi->qpi_off = 0;
2099 }
2100
2101 /*
2102 * If there is another M_PROTO, we don't want it
2103 */
2104 if (*mpp != mb) {
2105 mp = unlinkb(*mpp);
2106 freeb(*mpp);
2107 *mpp = mp;
2108 }
2109
2110 sinp = (struct sockaddr *)&inj->ni_addr;
2111 sin = (struct sockaddr_in *)sinp;
2112 sin6 = (struct sockaddr_in6 *)sinp;
2113 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
2114 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2115 inj->ni_packet = mb;
2116
2117 /*
2118 * In case we're here due to "to <if>" being used with
2119 * "keep state", check that we're going in the correct
2120 * direction.
2121 */
2122 if (fdp != NULL) {
2123 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
2124 (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
2125 goto bad_fastroute;
2126 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
2127 if (fin->fin_v == 4) {
2128 sin->sin_addr = fdp->fd_ip;
2129 } else {
2130 sin6->sin6_addr = fdp->fd_ip6.in6;
2131 }
2132 } else {
2133 if (fin->fin_v == 4) {
2134 sin->sin_addr = ip->ip_dst;
2135 } else {
2136 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
2137 }
2138 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
2139 }
2140
2141 /* we're checking the destination here */
2142 if (!(fin->fin_flx & FI_GENERATED) &&
2143 !fr_forwarding_enabled(inj->ni_physical, net_data_p)) {
2144 goto bad_fastroute;
2145 }
2146
2147 /*
2148 * Clear the hardware checksum flags from packets that we are doing
2149 * input processing on as leaving them set will cause the outgoing
2150 * NIC (if it supports hardware checksum) to calculate them anew,
2151 * using the old (correct) checksums as the pseudo value to start
2152 * from.
2153 */
2154 if (fin->fin_out == 0) {
2155 DB_CKSUMFLAGS(mb) = 0;
2156 }
2157
2158 *mpp = mb;
2159
2160 if (fin->fin_out == 0) {
2161 void *saveifp;
2162 u_32_t pass;
2163
2164 saveifp = fin->fin_ifp;
2165 fin->fin_ifp = (void *)inj->ni_physical;
2166 fin->fin_flx &= ~FI_STATE;
2167 fin->fin_out = 1;
2168 (void) fr_acctpkt(fin, &pass);
2169 fin->fin_fr = NULL;
2170 if (!fr || !(fr->fr_flags & FR_RETMASK))
2171 (void) fr_checkstate(fin, &pass);
2172 if (fr_checknatout(fin, NULL) == -1)
2173 goto bad_fastroute;
2174 fin->fin_out = 0;
2175 fin->fin_ifp = saveifp;
2176 }
2177 #ifndef sparc
2178 if (fin->fin_v == 4) {
2179 __iplen = (u_short)ip->ip_len,
2180 __ipoff = (u_short)ip->ip_off;
2181
2182 ip->ip_len = htons(__iplen);
2183 ip->ip_off = htons(__ipoff);
2184 }
2185 #endif
2186
2187 if (net_data_p) {
2188 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
2189 net_inject_free(inj);
2190 return (-1);
2191 }
2192 }
2193
2194 ifs->ifs_fr_frouteok[0]++;
2195 net_inject_free(inj);
2196 return 0;
2197 bad_fastroute:
2198 net_inject_free(inj);
2199 freemsg(mb);
2200 ifs->ifs_fr_frouteok[1]++;
2201 return -1;
2202 }
2203
2204
2205 /* ------------------------------------------------------------------------ */
2206 /* Function: ipf_hook4_out */
2207 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2208 /* Parameters: event(I) - pointer to event */
2209 /* info(I) - pointer to hook information for firewalling */
2210 /* */
2211 /* Calling ipf_hook. */
2212 /* ------------------------------------------------------------------------ */
2213 /*ARGSUSED*/
ipf_hook4_out(hook_event_token_t token,hook_data_t info,void * arg)2214 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2215 {
2216 return ipf_hook(info, 1, 0, arg);
2217 }
2218 /*ARGSUSED*/
ipf_hook6_out(hook_event_token_t token,hook_data_t info,void * arg)2219 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2220 {
2221 return ipf_hook6(info, 1, 0, arg);
2222 }
2223
2224 /* ------------------------------------------------------------------------ */
2225 /* Function: ipf_hook4_in */
2226 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2227 /* Parameters: event(I) - pointer to event */
2228 /* info(I) - pointer to hook information for firewalling */
2229 /* */
2230 /* Calling ipf_hook. */
2231 /* ------------------------------------------------------------------------ */
2232 /*ARGSUSED*/
ipf_hook4_in(hook_event_token_t token,hook_data_t info,void * arg)2233 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2234 {
2235 return ipf_hook(info, 0, 0, arg);
2236 }
2237 /*ARGSUSED*/
ipf_hook6_in(hook_event_token_t token,hook_data_t info,void * arg)2238 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2239 {
2240 return ipf_hook6(info, 0, 0, arg);
2241 }
2242
2243
2244 /* ------------------------------------------------------------------------ */
2245 /* Function: ipf_hook4_loop_out */
2246 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2247 /* Parameters: event(I) - pointer to event */
2248 /* info(I) - pointer to hook information for firewalling */
2249 /* */
2250 /* Calling ipf_hook. */
2251 /* ------------------------------------------------------------------------ */
2252 /*ARGSUSED*/
ipf_hook4_loop_out(hook_event_token_t token,hook_data_t info,void * arg)2253 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2254 {
2255 return ipf_hook(info, 1, FI_NOCKSUM, arg);
2256 }
2257 /*ARGSUSED*/
ipf_hook6_loop_out(hook_event_token_t token,hook_data_t info,void * arg)2258 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2259 {
2260 return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2261 }
2262
2263 /* Static constants used by ipf_hook_ether */
2264 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
2265 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
2266 };
2267 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
2268 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
2269
2270 /* ------------------------------------------------------------------------ */
2271 /* Function: ipf_hook_ether */
2272 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2273 /* Parameters: token(I) - pointer to event */
2274 /* info(I) - pointer to hook information for firewalling */
2275 /* */
2276 /* The ipf_hook_ether hook is currently private to illumos. It represents */
2277 /* a layer 2 datapath generally used by virtual machines. Currently the */
2278 /* hook is only used by the viona driver to pass along L2 frames for */
2279 /* inspection. It requires that the L2 ethernet header is contained within */
2280 /* a single dblk_t (however layers above the L2 header have no restrctions */
2281 /* in ipf). ipf does not currently support filtering on L2 fields (e.g. */
2282 /* filtering on a MAC address or ethertype), however virtual machines do */
2283 /* not have native IP stack instances where ipf traditionally hooks in. */
2284 /* Instead this entry point is used to determine if the packet is unicast, */
2285 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the */
2286 /* traditional ip hooks for filtering. Non IPv4 or non IPv6 packets are */
2287 /* not subject to examination. */
2288 /* ------------------------------------------------------------------------ */
ipf_hook_ether(hook_event_token_t token,hook_data_t info,void * arg,boolean_t out)2289 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
2290 boolean_t out)
2291 {
2292 struct ether_header *ethp;
2293 hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
2294 mblk_t *mp;
2295 size_t offset, len;
2296 uint16_t etype;
2297 boolean_t v6;
2298
2299 /*
2300 * viona will only pass us mblks with the L2 header contained in a
2301 * single data block.
2302 */
2303 mp = *hpe->hpe_mp;
2304 len = MBLKL(mp);
2305
2306 VERIFY3S(len, >=, sizeof (struct ether_header));
2307
2308 ethp = (struct ether_header *)mp->b_rptr;
2309 if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
2310 struct ether_vlan_header *evh =
2311 (struct ether_vlan_header *)ethp;
2312
2313 VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
2314
2315 etype = ntohs(evh->ether_type);
2316 offset = sizeof (*evh);
2317 } else {
2318 offset = sizeof (*ethp);
2319 }
2320
2321 /*
2322 * ipf only support filtering IPv4 and IPv6. Ignore other types.
2323 */
2324 if (etype == ETHERTYPE_IP)
2325 v6 = B_FALSE;
2326 else if (etype == ETHERTYPE_IPV6)
2327 v6 = B_TRUE;
2328 else
2329 return (0);
2330
2331 if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
2332 hpe->hpe_flags |= HPE_BROADCAST;
2333 else if (bcmp(ipf_eth_ipv4_mcast, ethp,
2334 sizeof (ipf_eth_ipv4_mcast)) == 0)
2335 hpe->hpe_flags |= HPE_MULTICAST;
2336 else if (bcmp(ipf_eth_ipv6_mcast, ethp,
2337 sizeof (ipf_eth_ipv6_mcast)) == 0)
2338 hpe->hpe_flags |= HPE_MULTICAST;
2339
2340 /* Find the start of the IPv4 or IPv6 header */
2341 for (; offset >= len; len = MBLKL(mp)) {
2342 offset -= len;
2343 mp = mp->b_cont;
2344 if (mp == NULL) {
2345 freemsg(*hpe->hpe_mp);
2346 *hpe->hpe_mp = NULL;
2347 return (-1);
2348 }
2349 }
2350 hpe->hpe_mb = mp;
2351 hpe->hpe_hdr = mp->b_rptr + offset;
2352
2353 return (v6 ? ipf_hook6(info, out, 0, arg) :
2354 ipf_hook(info, out, 0, arg));
2355 }
2356
2357 /* ------------------------------------------------------------------------ */
2358 /* Function: ipf_hookviona_{in,out} */
2359 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2360 /* Parameters: event(I) - pointer to event */
2361 /* info(I) - pointer to hook information for firewalling */
2362 /* */
2363 /* The viona hooks are private hooks to illumos. They represents a layer 2 */
2364 /* datapath generally used to implement virtual machines. */
2365 /* along L2 packets. */
2366 /* */
2367 /* They end up calling the appropriate traditional ip hooks. */
2368 /* ------------------------------------------------------------------------ */
2369 int
ipf_hookviona_in(hook_event_token_t token,hook_data_t info,void * arg)2370 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
2371 {
2372 return (ipf_hook_ether(token, info, arg, B_FALSE));
2373 }
2374
2375 int
ipf_hookviona_out(hook_event_token_t token,hook_data_t info,void * arg)2376 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
2377 {
2378 return (ipf_hook_ether(token, info, arg, B_TRUE));
2379 }
2380
2381 /* ------------------------------------------------------------------------ */
2382 /* Function: ipf_hook4_loop_in */
2383 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2384 /* Parameters: event(I) - pointer to event */
2385 /* info(I) - pointer to hook information for firewalling */
2386 /* */
2387 /* Calling ipf_hook. */
2388 /* ------------------------------------------------------------------------ */
2389 /*ARGSUSED*/
ipf_hook4_loop_in(hook_event_token_t token,hook_data_t info,void * arg)2390 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2391 {
2392 return ipf_hook(info, 0, FI_NOCKSUM, arg);
2393 }
2394 /*ARGSUSED*/
ipf_hook6_loop_in(hook_event_token_t token,hook_data_t info,void * arg)2395 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2396 {
2397 return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2398 }
2399
2400 /* ------------------------------------------------------------------------ */
2401 /* Function: ipf_hook */
2402 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2403 /* Parameters: info(I) - pointer to hook information for firewalling */
2404 /* out(I) - whether packet is going in or out */
2405 /* loopback(I) - whether packet is a loopback packet or not */
2406 /* */
2407 /* Stepping stone function between the IP mainline and IPFilter. Extracts */
2408 /* parameters out of the info structure and forms them up to be useful for */
2409 /* calling ipfilter. */
2410 /* ------------------------------------------------------------------------ */
ipf_hook(hook_data_t info,int out,int loopback,void * arg)2411 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2412 {
2413 hook_pkt_event_t *fw;
2414 ipf_stack_t *ifs;
2415 qpktinfo_t qpi;
2416 int rval, hlen;
2417 u_short swap;
2418 phy_if_t phy;
2419 ip_t *ip;
2420
2421 ifs = arg;
2422 fw = (hook_pkt_event_t *)info;
2423
2424 ASSERT(fw != NULL);
2425 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2426
2427 ip = fw->hpe_hdr;
2428 swap = ntohs(ip->ip_len);
2429 ip->ip_len = swap;
2430 swap = ntohs(ip->ip_off);
2431 ip->ip_off = swap;
2432 hlen = IPH_HDR_LENGTH(ip);
2433
2434 qpi.qpi_m = fw->hpe_mb;
2435 qpi.qpi_data = fw->hpe_hdr;
2436 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2437 qpi.qpi_ill = (void *)phy;
2438 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2439 if (qpi.qpi_flags)
2440 qpi.qpi_flags |= FI_MBCAST;
2441 qpi.qpi_flags |= loopback;
2442
2443 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2444 &qpi, fw->hpe_mp, ifs);
2445
2446 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2447 if (rval == 0 && *(fw->hpe_mp) == NULL)
2448 rval = 1;
2449
2450 /* Notify IP the packet mblk_t and IP header pointers. */
2451 fw->hpe_mb = qpi.qpi_m;
2452 fw->hpe_hdr = qpi.qpi_data;
2453 if (rval == 0) {
2454 ip = qpi.qpi_data;
2455 swap = ntohs(ip->ip_len);
2456 ip->ip_len = swap;
2457 swap = ntohs(ip->ip_off);
2458 ip->ip_off = swap;
2459 }
2460 return rval;
2461
2462 }
ipf_hook6(hook_data_t info,int out,int loopback,void * arg)2463 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2464 {
2465 hook_pkt_event_t *fw;
2466 int rval, hlen;
2467 qpktinfo_t qpi;
2468 phy_if_t phy;
2469
2470 fw = (hook_pkt_event_t *)info;
2471
2472 ASSERT(fw != NULL);
2473 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2474
2475 hlen = sizeof (ip6_t);
2476
2477 qpi.qpi_m = fw->hpe_mb;
2478 qpi.qpi_data = fw->hpe_hdr;
2479 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2480 qpi.qpi_ill = (void *)phy;
2481 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2482 if (qpi.qpi_flags)
2483 qpi.qpi_flags |= FI_MBCAST;
2484 qpi.qpi_flags |= loopback;
2485
2486 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2487 &qpi, fw->hpe_mp, arg);
2488
2489 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2490 if (rval == 0 && *(fw->hpe_mp) == NULL)
2491 rval = 1;
2492
2493 /* Notify IP the packet mblk_t and IP header pointers. */
2494 fw->hpe_mb = qpi.qpi_m;
2495 fw->hpe_hdr = qpi.qpi_data;
2496 return rval;
2497 }
2498
2499
2500 /* ------------------------------------------------------------------------ */
2501 /* Function: ipf_nic_event_v4 */
2502 /* Returns: int - 0 == no problems encountered */
2503 /* Parameters: event(I) - pointer to event */
2504 /* info(I) - pointer to information about a NIC event */
2505 /* */
2506 /* Function to receive asynchronous NIC events from IP */
2507 /* ------------------------------------------------------------------------ */
2508 /*ARGSUSED*/
ipf_nic_event_v4(hook_event_token_t event,hook_data_t info,void * arg)2509 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2510 {
2511 struct sockaddr_in *sin;
2512 hook_nic_event_t *hn;
2513 ipf_stack_t *ifs = arg;
2514 void *new_ifp = NULL;
2515
2516 if (ifs->ifs_fr_running <= 0)
2517 return (0);
2518
2519 hn = (hook_nic_event_t *)info;
2520
2521 switch (hn->hne_event)
2522 {
2523 case NE_PLUMB :
2524 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2525 ifs);
2526 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2527 hn->hne_data, ifs);
2528 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2529 hn->hne_data, ifs);
2530 break;
2531
2532 case NE_UNPLUMB :
2533 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2534 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2535 ifs);
2536 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2537 break;
2538
2539 case NE_ADDRESS_CHANGE :
2540 /*
2541 * We only respond to events for logical interface 0 because
2542 * IPFilter only uses the first address given to a network
2543 * interface. We check for hne_lif==1 because the netinfo
2544 * code maps adds 1 to the lif number so that it can return
2545 * 0 to indicate "no more lifs" when walking them.
2546 */
2547 if (hn->hne_lif == 1) {
2548 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2549 ifs);
2550 sin = hn->hne_data;
2551 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2552 ifs);
2553 }
2554 break;
2555
2556 #if SOLARIS2 >= 10
2557 case NE_IFINDEX_CHANGE :
2558 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2559
2560 if (hn->hne_data != NULL) {
2561 /*
2562 * The netinfo passes interface index as int (hne_data should be
2563 * handled as a pointer to int), which is always 32bit. We need to
2564 * convert it to void pointer here, since interfaces are
2565 * represented as pointers to void in IPF. The pointers are 64 bits
2566 * long on 64bit platforms. Doing something like
2567 * (void *)((int) x)
2568 * will throw warning:
2569 * "cast to pointer from integer of different size"
2570 * during 64bit compilation.
2571 *
2572 * The line below uses (size_t) to typecast int to
2573 * size_t, which might be 64bit/32bit (depending
2574 * on architecture). Once we have proper 64bit/32bit
2575 * type (size_t), we can safely convert it to void pointer.
2576 */
2577 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2578 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2579 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2580 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2581 }
2582 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2583 break;
2584 #endif
2585
2586 default :
2587 break;
2588 }
2589
2590 return 0;
2591 }
2592
2593
2594 /* ------------------------------------------------------------------------ */
2595 /* Function: ipf_nic_event_v6 */
2596 /* Returns: int - 0 == no problems encountered */
2597 /* Parameters: event(I) - pointer to event */
2598 /* info(I) - pointer to information about a NIC event */
2599 /* */
2600 /* Function to receive asynchronous NIC events from IP */
2601 /* ------------------------------------------------------------------------ */
2602 /*ARGSUSED*/
ipf_nic_event_v6(hook_event_token_t event,hook_data_t info,void * arg)2603 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2604 {
2605 struct sockaddr_in6 *sin6;
2606 hook_nic_event_t *hn;
2607 ipf_stack_t *ifs = arg;
2608 void *new_ifp = NULL;
2609
2610 if (ifs->ifs_fr_running <= 0)
2611 return (0);
2612
2613 hn = (hook_nic_event_t *)info;
2614
2615 switch (hn->hne_event)
2616 {
2617 case NE_PLUMB :
2618 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2619 hn->hne_data, ifs);
2620 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2621 hn->hne_data, ifs);
2622 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2623 hn->hne_data, ifs);
2624 break;
2625
2626 case NE_UNPLUMB :
2627 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2628 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2629 ifs);
2630 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2631 break;
2632
2633 case NE_ADDRESS_CHANGE :
2634 if (hn->hne_lif == 1) {
2635 sin6 = hn->hne_data;
2636 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2637 ifs);
2638 }
2639 break;
2640
2641 #if SOLARIS2 >= 10
2642 case NE_IFINDEX_CHANGE :
2643 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2644 if (hn->hne_data != NULL) {
2645 /*
2646 * The netinfo passes interface index as int (hne_data should be
2647 * handled as a pointer to int), which is always 32bit. We need to
2648 * convert it to void pointer here, since interfaces are
2649 * represented as pointers to void in IPF. The pointers are 64 bits
2650 * long on 64bit platforms. Doing something like
2651 * (void *)((int) x)
2652 * will throw warning:
2653 * "cast to pointer from integer of different size"
2654 * during 64bit compilation.
2655 *
2656 * The line below uses (size_t) to typecast int to
2657 * size_t, which might be 64bit/32bit (depending
2658 * on architecture). Once we have proper 64bit/32bit
2659 * type (size_t), we can safely convert it to void pointer.
2660 */
2661 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2662 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2663 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2664 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2665 }
2666 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2667 break;
2668 #endif
2669
2670 default :
2671 break;
2672 }
2673
2674 return 0;
2675 }
2676
2677 /*
2678 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2679 * are needed in Solaris kernel only. We don't need them in
2680 * ipftest to pretend the ICMP/RST packet was sent as a response.
2681 */
2682 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2683 /* ------------------------------------------------------------------------ */
2684 /* Function: fr_make_rst */
2685 /* Returns: int - 0 on success, -1 on failure */
2686 /* Parameters: fin(I) - pointer to packet information */
2687 /* */
2688 /* We must alter the original mblks passed to IPF from IP stack via */
2689 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */
2690 /* IPF can basicaly do only these things with mblk representing the packet: */
2691 /* leave it as it is (pass the packet) */
2692 /* */
2693 /* discard it (block the packet) */
2694 /* */
2695 /* alter it (i.e. NAT) */
2696 /* */
2697 /* As you can see IPF can not simply discard the mblk and supply a new one */
2698 /* instead to IP stack via FW_HOOKS. */
2699 /* */
2700 /* The return-rst action for packets coming via NIC is handled as follows: */
2701 /* mblk with packet is discarded */
2702 /* */
2703 /* new mblk with RST response is constructed and injected to network */
2704 /* */
2705 /* IPF can't inject packets to loopback interface, this is just another */
2706 /* limitation we have to deal with here. The only option to send RST */
2707 /* response to offending TCP packet coming via loopback is to alter it. */
2708 /* */
2709 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */
2710 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */
2711 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */
2712 /* ------------------------------------------------------------------------ */
fr_make_rst(fin)2713 int fr_make_rst(fin)
2714 fr_info_t *fin;
2715 {
2716 uint16_t tmp_port;
2717 int rv = -1;
2718 uint32_t old_ack;
2719 tcphdr_t *tcp = NULL;
2720 struct in_addr tmp_src;
2721 #ifdef USE_INET6
2722 struct in6_addr tmp_src6;
2723 #endif
2724
2725 ASSERT(fin->fin_p == IPPROTO_TCP);
2726
2727 /*
2728 * We do not need to adjust chksum, since it is not being checked by
2729 * Solaris IP stack for loopback clients.
2730 */
2731 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2732 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2733
2734 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2735 /* Swap IPv4 addresses. */
2736 tmp_src = fin->fin_ip->ip_src;
2737 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2738 fin->fin_ip->ip_dst = tmp_src;
2739
2740 rv = 0;
2741 }
2742 else
2743 tcp = NULL;
2744 }
2745 #ifdef USE_INET6
2746 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2747 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2748 /*
2749 * We are relying on fact the next header is TCP, which is true
2750 * for regular TCP packets coming in over loopback.
2751 */
2752 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2753 /* Swap IPv6 addresses. */
2754 tmp_src6 = fin->fin_ip6->ip6_src;
2755 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2756 fin->fin_ip6->ip6_dst = tmp_src6;
2757
2758 rv = 0;
2759 }
2760 else
2761 tcp = NULL;
2762 }
2763 #endif
2764
2765 if (tcp != NULL) {
2766 /*
2767 * Adjust TCP header:
2768 * swap ports,
2769 * set flags,
2770 * set correct ACK number
2771 */
2772 tmp_port = tcp->th_sport;
2773 tcp->th_sport = tcp->th_dport;
2774 tcp->th_dport = tmp_port;
2775 old_ack = tcp->th_ack;
2776 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2777 tcp->th_seq = old_ack;
2778 tcp->th_flags = TH_RST | TH_ACK;
2779 }
2780
2781 return (rv);
2782 }
2783
2784 /* ------------------------------------------------------------------------ */
2785 /* Function: fr_make_icmp_v4 */
2786 /* Returns: int - 0 on success, -1 on failure */
2787 /* Parameters: fin(I) - pointer to packet information */
2788 /* */
2789 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2790 /* what is going to happen here and why. Once you read the comment there, */
2791 /* continue here with next paragraph. */
2792 /* */
2793 /* To turn IPv4 packet into ICMPv4 response packet, these things must */
2794 /* happen here: */
2795 /* (1) Original mblk is copied (duplicated). */
2796 /* */
2797 /* (2) ICMP header is created. */
2798 /* */
2799 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */
2800 /* data ready then. */
2801 /* */
2802 /* (4) Swap IP addresses in original mblk and adjust IP header data. */
2803 /* */
2804 /* (5) The mblk containing original packet is trimmed to contain IP */
2805 /* header only and ICMP chksum is computed. */
2806 /* */
2807 /* (6) The ICMP header we have from (3) is linked to original mblk, */
2808 /* which now contains new IP header. If original packet was spread */
2809 /* over several mblks, only the first mblk is kept. */
2810 /* ------------------------------------------------------------------------ */
fr_make_icmp_v4(fin)2811 static int fr_make_icmp_v4(fin)
2812 fr_info_t *fin;
2813 {
2814 struct in_addr tmp_src;
2815 tcphdr_t *tcp;
2816 struct icmp *icmp;
2817 mblk_t *mblk_icmp;
2818 mblk_t *mblk_ip;
2819 size_t icmp_pld_len; /* octets to append to ICMP header */
2820 size_t orig_iphdr_len; /* length of IP header only */
2821 uint32_t sum;
2822 uint16_t *buf;
2823 int len;
2824
2825
2826 if (fin->fin_v != 4)
2827 return (-1);
2828
2829 /*
2830 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2831 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2832 */
2833 tcp = (tcphdr_t *) fin->fin_dp;
2834
2835 if ((fin->fin_p == IPPROTO_TCP) &&
2836 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2837 return (-1);
2838
2839 /*
2840 * Step (1)
2841 *
2842 * Make copy of original mblk.
2843 *
2844 * We want to copy as much data as necessary, not less, not more. The
2845 * ICMPv4 payload length for unreachable messages is:
2846 * original IP header + 8 bytes of L4 (if there are any).
2847 *
2848 * We determine if there are at least 8 bytes of L4 data following IP
2849 * header first.
2850 */
2851 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2852 ICMPERR_ICMPHLEN : fin->fin_dlen;
2853 /*
2854 * Since we don't want to copy more data than necessary, we must trim
2855 * the original mblk here. The right way (STREAMish) would be to use
2856 * adjmsg() to trim it. However we would have to calculate the length
2857 * argument for adjmsg() from pointers we already have here.
2858 *
2859 * Since we have pointers and offsets, it's faster and easier for
2860 * us to just adjust pointers by hand instead of using adjmsg().
2861 */
2862 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2863 fin->fin_m->b_wptr += icmp_pld_len;
2864 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2865
2866 /*
2867 * Also we don't want to copy any L2 stuff, which might precede IP
2868 * header, so we have have to set b_rptr to point to the start of IP
2869 * header.
2870 */
2871 fin->fin_m->b_rptr += fin->fin_ipoff;
2872 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2873 return (-1);
2874 fin->fin_m->b_rptr -= fin->fin_ipoff;
2875
2876 /*
2877 * Step (2)
2878 *
2879 * Create an ICMP header, which will be appened to original mblk later.
2880 * ICMP header is just another mblk.
2881 */
2882 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2883 if (mblk_icmp == NULL) {
2884 FREE_MB_T(mblk_ip);
2885 return (-1);
2886 }
2887
2888 MTYPE(mblk_icmp) = M_DATA;
2889 icmp = (struct icmp *) mblk_icmp->b_wptr;
2890 icmp->icmp_type = ICMP_UNREACH;
2891 icmp->icmp_code = fin->fin_icode & 0xFF;
2892 icmp->icmp_void = 0;
2893 icmp->icmp_cksum = 0;
2894 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2895
2896 /*
2897 * Step (3)
2898 *
2899 * Complete ICMP packet - link ICMP header with L4 data from original
2900 * IP packet.
2901 */
2902 linkb(mblk_icmp, mblk_ip);
2903
2904 /*
2905 * Step (4)
2906 *
2907 * Swap IP addresses and change IP header fields accordingly in
2908 * original IP packet.
2909 *
2910 * There is a rule option return-icmp as a dest for physical
2911 * interfaces. This option becomes useless for loopback, since IPF box
2912 * uses same address as a loopback destination. We ignore the option
2913 * here, the ICMP packet will always look like as it would have been
2914 * sent from the original destination host.
2915 */
2916 tmp_src = fin->fin_ip->ip_src;
2917 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2918 fin->fin_ip->ip_dst = tmp_src;
2919 fin->fin_ip->ip_p = IPPROTO_ICMP;
2920 fin->fin_ip->ip_sum = 0;
2921
2922 /*
2923 * Step (5)
2924 *
2925 * We trim the orignal mblk to hold IP header only.
2926 */
2927 fin->fin_m->b_wptr = fin->fin_dp;
2928 orig_iphdr_len = fin->fin_m->b_wptr -
2929 (fin->fin_m->b_rptr + fin->fin_ipoff);
2930 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2931 orig_iphdr_len);
2932
2933 /*
2934 * ICMP chksum calculation. The data we are calculating chksum for are
2935 * spread over two mblks, therefore we have to use two for loops.
2936 *
2937 * First for loop computes chksum part for ICMP header.
2938 */
2939 buf = (uint16_t *) icmp;
2940 len = ICMPERR_ICMPHLEN;
2941 for (sum = 0; len > 1; len -= 2)
2942 sum += *buf++;
2943
2944 /*
2945 * Here we add chksum part for ICMP payload.
2946 */
2947 len = icmp_pld_len;
2948 buf = (uint16_t *) mblk_ip->b_rptr;
2949 for (; len > 1; len -= 2)
2950 sum += *buf++;
2951
2952 /*
2953 * Chksum is done.
2954 */
2955 sum = (sum >> 16) + (sum & 0xffff);
2956 sum += (sum >> 16);
2957 icmp->icmp_cksum = ~sum;
2958
2959 /*
2960 * Step (6)
2961 *
2962 * Release all packet mblks, except the first one.
2963 */
2964 if (fin->fin_m->b_cont != NULL) {
2965 FREE_MB_T(fin->fin_m->b_cont);
2966 }
2967
2968 /*
2969 * Append ICMP payload to first mblk, which already contains new IP
2970 * header.
2971 */
2972 linkb(fin->fin_m, mblk_icmp);
2973
2974 return (0);
2975 }
2976
2977 #ifdef USE_INET6
2978 /* ------------------------------------------------------------------------ */
2979 /* Function: fr_make_icmp_v6 */
2980 /* Returns: int - 0 on success, -1 on failure */
2981 /* Parameters: fin(I) - pointer to packet information */
2982 /* */
2983 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2984 /* what and why is going to happen here. Once you read the comment there, */
2985 /* continue here with next paragraph. */
2986 /* */
2987 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */
2988 /* The algorithm is fairly simple: */
2989 /* 1) We need to get copy of complete mblk. */
2990 /* */
2991 /* 2) New ICMPv6 header is created. */
2992 /* */
2993 /* 3) The copy of original mblk with packet is linked to ICMPv6 */
2994 /* header. */
2995 /* */
2996 /* 4) The checksum must be adjusted. */
2997 /* */
2998 /* 5) IP addresses in original mblk are swapped and IP header data */
2999 /* are adjusted (protocol number). */
3000 /* */
3001 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */
3002 /* linked with the ICMPv6 data we got from (3). */
3003 /* ------------------------------------------------------------------------ */
fr_make_icmp_v6(fin)3004 static int fr_make_icmp_v6(fin)
3005 fr_info_t *fin;
3006 {
3007 struct icmp6_hdr *icmp6;
3008 tcphdr_t *tcp;
3009 struct in6_addr tmp_src6;
3010 size_t icmp_pld_len;
3011 mblk_t *mblk_ip, *mblk_icmp;
3012
3013 if (fin->fin_v != 6)
3014 return (-1);
3015
3016 /*
3017 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
3018 * IP stack. If it is not SYN/FIN, then we must drop it silently.
3019 */
3020 tcp = (tcphdr_t *) fin->fin_dp;
3021
3022 if ((fin->fin_p == IPPROTO_TCP) &&
3023 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
3024 return (-1);
3025
3026 /*
3027 * Step (1)
3028 *
3029 * We need to copy complete packet in case of IPv6, no trimming is
3030 * needed (except the L2 headers).
3031 */
3032 icmp_pld_len = M_LEN(fin->fin_m);
3033 fin->fin_m->b_rptr += fin->fin_ipoff;
3034 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3035 return (-1);
3036 fin->fin_m->b_rptr -= fin->fin_ipoff;
3037
3038 /*
3039 * Step (2)
3040 *
3041 * Allocate and create ICMP header.
3042 */
3043 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
3044 BPRI_HI);
3045
3046 if (mblk_icmp == NULL)
3047 return (-1);
3048
3049 MTYPE(mblk_icmp) = M_DATA;
3050 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr;
3051 icmp6->icmp6_type = ICMP6_DST_UNREACH;
3052 icmp6->icmp6_code = fin->fin_icode & 0xFF;
3053 icmp6->icmp6_data32[0] = 0;
3054 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
3055
3056 /*
3057 * Step (3)
3058 *
3059 * Link the copy of IP packet to ICMP header.
3060 */
3061 linkb(mblk_icmp, mblk_ip);
3062
3063 /*
3064 * Step (4)
3065 *
3066 * Calculate chksum - this is much more easier task than in case of
3067 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length.
3068 * We are making compensation just for change of packet length.
3069 */
3070 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
3071
3072 /*
3073 * Step (5)
3074 *
3075 * Swap IP addresses.
3076 */
3077 tmp_src6 = fin->fin_ip6->ip6_src;
3078 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
3079 fin->fin_ip6->ip6_dst = tmp_src6;
3080
3081 /*
3082 * and adjust IP header data.
3083 */
3084 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
3085 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
3086
3087 /*
3088 * Step (6)
3089 *
3090 * We must release all linked mblks from original packet and keep only
3091 * the first mblk with IP header to link ICMP data.
3092 */
3093 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
3094
3095 if (fin->fin_m->b_cont != NULL) {
3096 FREE_MB_T(fin->fin_m->b_cont);
3097 }
3098
3099 /*
3100 * Append ICMP payload to IP header.
3101 */
3102 linkb(fin->fin_m, mblk_icmp);
3103
3104 return (0);
3105 }
3106 #endif /* USE_INET6 */
3107
3108 /* ------------------------------------------------------------------------ */
3109 /* Function: fr_make_icmp */
3110 /* Returns: int - 0 on success, -1 on failure */
3111 /* Parameters: fin(I) - pointer to packet information */
3112 /* */
3113 /* We must alter the original mblks passed to IPF from IP stack via */
3114 /* FW_HOOKS. The reasons why we must alter packet are discussed within */
3115 /* comment at fr_make_rst() function. */
3116 /* */
3117 /* The fr_make_icmp() function acts as a wrapper, which passes the code */
3118 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */
3119 /* protocol version. However there are some details, which are common to */
3120 /* both IP versions. The details are going to be explained here. */
3121 /* */
3122 /* The packet looks as follows: */
3123 /* xxx | IP hdr | IP payload ... | */
3124 /* ^ ^ ^ ^ */
3125 /* | | | | */
3126 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
3127 /* | | | */
3128 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */
3129 /* | | */
3130 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */
3131 /* | of loopback) */
3132 /* | */
3133 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */
3134 /* */
3135 /* All relevant IP headers are pulled up into the first mblk. It happened */
3136 /* well in advance before the matching rule was found (the rule, which took */
3137 /* us here, to fr_make_icmp() function). */
3138 /* */
3139 /* Both functions will turn packet passed in fin->fin_m mblk into a new */
3140 /* packet. New packet will be represented as chain of mblks. */
3141 /* orig mblk |- b_cont ---. */
3142 /* ^ `-> ICMP hdr |- b_cont--. */
3143 /* | ^ `-> duped orig mblk */
3144 /* | | ^ */
3145 /* `- The original mblk | | */
3146 /* will be trimmed to | | */
3147 /* to contain IP header | | */
3148 /* only | | */
3149 /* | | */
3150 /* `- This is newly | */
3151 /* allocated mblk to | */
3152 /* hold ICMPv6 data. | */
3153 /* | */
3154 /* | */
3155 /* | */
3156 /* This is the copy of original mblk, it will contain -' */
3157 /* orignal IP packet in case of ICMPv6. In case of */
3158 /* ICMPv4 it will contain up to 8 bytes of IP payload */
3159 /* (TCP/UDP/L4) data from original packet. */
3160 /* ------------------------------------------------------------------------ */
fr_make_icmp(fin)3161 int fr_make_icmp(fin)
3162 fr_info_t *fin;
3163 {
3164 int rv;
3165
3166 if (fin->fin_v == 4)
3167 rv = fr_make_icmp_v4(fin);
3168 #ifdef USE_INET6
3169 else if (fin->fin_v == 6)
3170 rv = fr_make_icmp_v6(fin);
3171 #endif
3172 else
3173 rv = -1;
3174
3175 return (rv);
3176 }
3177
3178 /* ------------------------------------------------------------------------ */
3179 /* Function: fr_buf_sum */
3180 /* Returns: unsigned int - sum of buffer buf */
3181 /* Parameters: buf - pointer to buf we want to sum up */
3182 /* len - length of buffer buf */
3183 /* */
3184 /* Sums buffer buf. The result is used for chksum calculation. The buf */
3185 /* argument must be aligned. */
3186 /* ------------------------------------------------------------------------ */
fr_buf_sum(buf,len)3187 static uint32_t fr_buf_sum(buf, len)
3188 const void *buf;
3189 unsigned int len;
3190 {
3191 uint32_t sum = 0;
3192 uint16_t *b = (uint16_t *)buf;
3193
3194 while (len > 1) {
3195 sum += *b++;
3196 len -= 2;
3197 }
3198
3199 if (len == 1)
3200 sum += htons((*(unsigned char *)b) << 8);
3201
3202 return (sum);
3203 }
3204
3205 /* ------------------------------------------------------------------------ */
3206 /* Function: fr_calc_chksum */
3207 /* Returns: void */
3208 /* Parameters: fin - pointer to fr_info_t instance with packet data */
3209 /* pkt - pointer to duplicated packet */
3210 /* */
3211 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */
3212 /* versions. */
3213 /* ------------------------------------------------------------------------ */
fr_calc_chksum(fin,pkt)3214 void fr_calc_chksum(fin, pkt)
3215 fr_info_t *fin;
3216 mb_t *pkt;
3217 {
3218 struct pseudo_hdr {
3219 union {
3220 struct in_addr in4;
3221 #ifdef USE_INET6
3222 struct in6_addr in6;
3223 #endif
3224 } src_addr;
3225 union {
3226 struct in_addr in4;
3227 #ifdef USE_INET6
3228 struct in6_addr in6;
3229 #endif
3230 } dst_addr;
3231 char zero;
3232 char proto;
3233 uint16_t len;
3234 } phdr;
3235 uint32_t sum, ip_sum;
3236 void *buf;
3237 uint16_t *l4_csum_p;
3238 tcphdr_t *tcp;
3239 udphdr_t *udp;
3240 icmphdr_t *icmp;
3241 #ifdef USE_INET6
3242 struct icmp6_hdr *icmp6;
3243 #endif
3244 ip_t *ip;
3245 unsigned int len;
3246 int pld_len;
3247
3248 /*
3249 * We need to pullup the packet to the single continuous buffer to avoid
3250 * potential misaligment of b_rptr member in mblk chain.
3251 */
3252 if (pullupmsg(pkt, -1) == 0) {
3253 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
3254 " will not be computed by IPF");
3255 return;
3256 }
3257
3258 /*
3259 * It is guaranteed IP header starts right at b_rptr, because we are
3260 * working with a copy of the original packet.
3261 *
3262 * Compute pseudo header chksum for TCP and UDP.
3263 */
3264 if ((fin->fin_p == IPPROTO_UDP) ||
3265 (fin->fin_p == IPPROTO_TCP)) {
3266 bzero(&phdr, sizeof (phdr));
3267 #ifdef USE_INET6
3268 if (fin->fin_v == 6) {
3269 phdr.src_addr.in6 = fin->fin_srcip6;
3270 phdr.dst_addr.in6 = fin->fin_dstip6;
3271 } else {
3272 phdr.src_addr.in4 = fin->fin_src;
3273 phdr.dst_addr.in4 = fin->fin_dst;
3274 }
3275 #else
3276 phdr.src_addr.in4 = fin->fin_src;
3277 phdr.dst_addr.in4 = fin->fin_dst;
3278 #endif
3279 phdr.zero = (char) 0;
3280 phdr.proto = fin->fin_p;
3281 phdr.len = htons((uint16_t)fin->fin_dlen);
3282 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
3283 } else {
3284 sum = 0;
3285 }
3286
3287 /*
3288 * Set pointer to the L4 chksum field in the packet, set buf pointer to
3289 * the L4 header start.
3290 */
3291 switch (fin->fin_p) {
3292 case IPPROTO_UDP:
3293 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3294 l4_csum_p = &udp->uh_sum;
3295 buf = udp;
3296 break;
3297 case IPPROTO_TCP:
3298 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3299 l4_csum_p = &tcp->th_sum;
3300 buf = tcp;
3301 break;
3302 case IPPROTO_ICMP:
3303 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3304 l4_csum_p = &icmp->icmp_cksum;
3305 buf = icmp;
3306 break;
3307 #ifdef USE_INET6
3308 case IPPROTO_ICMPV6:
3309 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
3310 l4_csum_p = &icmp6->icmp6_cksum;
3311 buf = icmp6;
3312 break;
3313 #endif
3314 default:
3315 l4_csum_p = NULL;
3316 }
3317
3318 /*
3319 * Compute L4 chksum if needed.
3320 */
3321 if (l4_csum_p != NULL) {
3322 *l4_csum_p = (uint16_t)0;
3323 pld_len = fin->fin_dlen;
3324 len = pkt->b_wptr - (unsigned char *)buf;
3325 ASSERT(len == pld_len);
3326 /*
3327 * Add payload sum to pseudoheader sum.
3328 */
3329 sum += fr_buf_sum(buf, len);
3330 while (sum >> 16)
3331 sum = (sum & 0xFFFF) + (sum >> 16);
3332
3333 *l4_csum_p = ~((uint16_t)sum);
3334 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3335 }
3336
3337 /*
3338 * The IP header chksum is needed just for IPv4.
3339 */
3340 if (fin->fin_v == 4) {
3341 /*
3342 * Compute IPv4 header chksum.
3343 */
3344 ip = (ip_t *)pkt->b_rptr;
3345 ip->ip_sum = (uint16_t)0;
3346 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3347 while (ip_sum >> 16)
3348 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3349
3350 ip->ip_sum = ~((uint16_t)ip_sum);
3351 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3352 }
3353
3354 return;
3355 }
3356
3357 #endif /* _KERNEL && SOLARIS2 >= 10 */
3358