1 /* 2 * Copyright (C) 1993-2001, 2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 * 8 * Copyright 2018 Joyent, Inc. 9 */ 10 11 #if !defined(lint) 12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed"; 13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $"; 14 #endif 15 16 #include <sys/types.h> 17 #include <sys/errno.h> 18 #include <sys/param.h> 19 #include <sys/cpuvar.h> 20 #include <sys/open.h> 21 #include <sys/ioctl.h> 22 #include <sys/filio.h> 23 #include <sys/systm.h> 24 #include <sys/strsubr.h> 25 #include <sys/strsun.h> 26 #include <sys/cred.h> 27 #include <sys/ddi.h> 28 #include <sys/sunddi.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/mac_provider.h> 32 #include <sys/mkdev.h> 33 #include <sys/protosw.h> 34 #include <sys/socket.h> 35 #include <sys/dditypes.h> 36 #include <sys/cmn_err.h> 37 #include <sys/zone.h> 38 #include <net/if.h> 39 #include <net/af.h> 40 #include <net/route.h> 41 #include <netinet/in.h> 42 #include <netinet/in_systm.h> 43 #include <netinet/ip.h> 44 #include <netinet/ip_var.h> 45 #include <netinet/tcp.h> 46 #include <netinet/udp.h> 47 #include <netinet/tcpip.h> 48 #include <netinet/ip_icmp.h> 49 #include "netinet/ip_compat.h" 50 #ifdef USE_INET6 51 # include <netinet/icmp6.h> 52 #endif 53 #include "netinet/ip_fil.h" 54 #include "netinet/ip_nat.h" 55 #include "netinet/ip_frag.h" 56 #include "netinet/ip_state.h" 57 #include "netinet/ip_auth.h" 58 #include "netinet/ip_proxy.h" 59 #include "netinet/ipf_stack.h" 60 #ifdef IPFILTER_LOOKUP 61 # include "netinet/ip_lookup.h" 62 #endif 63 #include <inet/ip_ire.h> 64 65 #include <sys/md5.h> 66 #include <sys/neti.h> 67 68 static int frzerostats __P((caddr_t, ipf_stack_t *)); 69 static int fr_setipfloopback __P((int, ipf_stack_t *)); 70 static int fr_enableipf __P((ipf_stack_t *, int)); 71 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); 72 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *)); 73 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *)); 74 static int ipf_hook __P((hook_data_t, int, int, void *)); 75 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *)); 76 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *)); 77 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t, 78 void *)); 79 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *)); 80 static int ipf_hook4 __P((hook_data_t, int, int, void *)); 81 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *)); 82 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *)); 83 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t, 84 void *)); 85 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t, 86 void *)); 87 static int ipf_hook6 __P((hook_data_t, int, int, void *)); 88 89 static int ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *)); 90 static int ipf_hookviona_out __P((hook_event_token_t, hook_data_t, 91 void *)); 92 93 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 94 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); 95 96 static int ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *, 97 const char *, const char *, const char *)); 98 static int ipf_hook_instance_notify __P((hook_notify_cmd_t, void *, 99 const char *, const char *, const char *)); 100 101 #if SOLARIS2 < 10 102 #if SOLARIS2 >= 7 103 u_int *ip_ttl_ptr = NULL; 104 u_int *ip_mtudisc = NULL; 105 # if SOLARIS2 >= 8 106 int *ip_forwarding = NULL; 107 u_int *ip6_forwarding = NULL; 108 # else 109 u_int *ip_forwarding = NULL; 110 # endif 111 #else 112 u_long *ip_ttl_ptr = NULL; 113 u_long *ip_mtudisc = NULL; 114 u_long *ip_forwarding = NULL; 115 #endif 116 #endif 117 118 vmem_t *ipf_minor; /* minor number arena */ 119 void *ipf_state; /* DDI state */ 120 121 /* 122 * GZ-controlled and per-zone stacks: 123 * 124 * For each non-global zone, we create two ipf stacks: the per-zone stack and 125 * the GZ-controlled stack. The per-zone stack can be controlled and observed 126 * from inside the zone or from the global zone. The GZ-controlled stack can 127 * only be controlled and observed from the global zone (though the rules 128 * still only affect that non-global zone). 129 * 130 * The two hooks are always arranged so that the GZ-controlled stack is always 131 * "outermost" with respect to the zone. The traffic flow then looks like 132 * this: 133 * 134 * Inbound: 135 * 136 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone 137 * 138 * Outbound: 139 * 140 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone 141 */ 142 143 /* IPv4 hook names */ 144 char *hook4_nicevents = "ipfilter_hook4_nicevents"; 145 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz"; 146 char *hook4_in = "ipfilter_hook4_in"; 147 char *hook4_in_gz = "ipfilter_hook4_in_gz"; 148 char *hook4_out = "ipfilter_hook4_out"; 149 char *hook4_out_gz = "ipfilter_hook4_out_gz"; 150 char *hook4_loop_in = "ipfilter_hook4_loop_in"; 151 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz"; 152 char *hook4_loop_out = "ipfilter_hook4_loop_out"; 153 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz"; 154 155 /* IPv6 hook names */ 156 char *hook6_nicevents = "ipfilter_hook6_nicevents"; 157 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz"; 158 char *hook6_in = "ipfilter_hook6_in"; 159 char *hook6_in_gz = "ipfilter_hook6_in_gz"; 160 char *hook6_out = "ipfilter_hook6_out"; 161 char *hook6_out_gz = "ipfilter_hook6_out_gz"; 162 char *hook6_loop_in = "ipfilter_hook6_loop_in"; 163 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz"; 164 char *hook6_loop_out = "ipfilter_hook6_loop_out"; 165 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz"; 166 167 /* viona hook names */ 168 char *hook_viona_in = "ipfilter_hookviona_in"; 169 char *hook_viona_in_gz = "ipfilter_hookviona_in_gz"; 170 char *hook_viona_out = "ipfilter_hookviona_out"; 171 char *hook_viona_out_gz = "ipfilter_hookviona_out_gz"; 172 173 /* ------------------------------------------------------------------------ */ 174 /* Function: ipldetach */ 175 /* Returns: int - 0 == success, else error. */ 176 /* Parameters: Nil */ 177 /* */ 178 /* This function is responsible for undoing anything that might have been */ 179 /* done in a call to iplattach(). It must be able to clean up from a call */ 180 /* to iplattach() that did not succeed. Why might that happen? Someone */ 181 /* configures a table to be so large that we cannot allocate enough memory */ 182 /* for it. */ 183 /* ------------------------------------------------------------------------ */ 184 int ipldetach(ifs) 185 ipf_stack_t *ifs; 186 { 187 188 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 189 190 #if SOLARIS2 < 10 191 192 if (ifs->ifs_fr_control_forwarding & 2) { 193 if (ip_forwarding != NULL) 194 *ip_forwarding = 0; 195 #if SOLARIS2 >= 8 196 if (ip6_forwarding != NULL) 197 *ip6_forwarding = 0; 198 #endif 199 } 200 #endif 201 202 /* 203 * This lock needs to be dropped around the net_hook_unregister calls 204 * because we can deadlock here with: 205 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 206 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running) 207 */ 208 RWLOCK_EXIT(&ifs->ifs_ipf_global); 209 210 #define UNDO_HOOK(_f, _b, _e, _h) \ 211 do { \ 212 if (ifs->_f != NULL) { \ 213 if (ifs->_b) { \ 214 int tmp = net_hook_unregister(ifs->_f, \ 215 _e, ifs->_h); \ 216 ifs->_b = (tmp != 0 && tmp != ENXIO); \ 217 if (!ifs->_b && ifs->_h != NULL) { \ 218 hook_free(ifs->_h); \ 219 ifs->_h = NULL; \ 220 } \ 221 } else if (ifs->_h != NULL) { \ 222 hook_free(ifs->_h); \ 223 ifs->_h = NULL; \ 224 } \ 225 } \ 226 _NOTE(CONSTCOND) \ 227 } while (0) 228 229 /* 230 * Remove IPv6 Hooks 231 */ 232 if (ifs->ifs_ipf_ipv6 != NULL) { 233 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in, 234 NH_PHYSICAL_IN, ifs_ipfhook6_in); 235 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out, 236 NH_PHYSICAL_OUT, ifs_ipfhook6_out); 237 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events, 238 NH_NIC_EVENTS, ifs_ipfhook6_nicevents); 239 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in, 240 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in); 241 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out, 242 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out); 243 244 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0) 245 goto detach_failed; 246 ifs->ifs_ipf_ipv6 = NULL; 247 } 248 249 /* 250 * Remove IPv4 Hooks 251 */ 252 if (ifs->ifs_ipf_ipv4 != NULL) { 253 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in, 254 NH_PHYSICAL_IN, ifs_ipfhook4_in); 255 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out, 256 NH_PHYSICAL_OUT, ifs_ipfhook4_out); 257 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events, 258 NH_NIC_EVENTS, ifs_ipfhook4_nicevents); 259 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in, 260 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in); 261 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out, 262 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out); 263 264 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0) 265 goto detach_failed; 266 ifs->ifs_ipf_ipv4 = NULL; 267 } 268 269 /* 270 * Remove notification of viona hooks 271 */ 272 net_instance_notify_unregister(ifs->ifs_netid, 273 ipf_hook_instance_notify); 274 275 #undef UNDO_HOOK 276 277 /* 278 * Normally, viona will unregister itself before ipldetach() is called, 279 * so these will be no-ops, but out of caution, we try to make sure 280 * we've removed any of our references. 281 */ 282 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL, 283 NH_PHYSICAL_IN); 284 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL, 285 NH_PHYSICAL_OUT); 286 287 { 288 char netidstr[12]; /* Large enough for INT_MAX + NUL */ 289 (void) snprintf(netidstr, sizeof (netidstr), "%d", 290 ifs->ifs_netid); 291 292 /* 293 * The notify callbacks expect the netid value passed as a 294 * string in the third argument. To prevent confusion if 295 * traced, we pass the same value the nethook framework would 296 * pass, even though the callback does not currently use the 297 * value. 298 */ 299 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr, 300 NULL, Hn_VIONA); 301 } 302 303 #ifdef IPFDEBUG 304 cmn_err(CE_CONT, "ipldetach()\n"); 305 #endif 306 307 WRITE_ENTER(&ifs->ifs_ipf_global); 308 fr_deinitialise(ifs); 309 310 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); 311 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); 312 313 if (ifs->ifs_ipf_locks_done == 1) { 314 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock); 315 MUTEX_DESTROY(&ifs->ifs_ipf_rw); 316 RW_DESTROY(&ifs->ifs_ipf_tokens); 317 RW_DESTROY(&ifs->ifs_ipf_ipidfrag); 318 ifs->ifs_ipf_locks_done = 0; 319 } 320 321 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out || 322 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in || 323 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events || 324 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out || 325 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out) 326 return -1; 327 328 return 0; 329 330 detach_failed: 331 WRITE_ENTER(&ifs->ifs_ipf_global); 332 return -1; 333 } 334 335 int iplattach(ifs) 336 ipf_stack_t *ifs; 337 { 338 #if SOLARIS2 < 10 339 int i; 340 #endif 341 netid_t id = ifs->ifs_netid; 342 343 #ifdef IPFDEBUG 344 cmn_err(CE_CONT, "iplattach()\n"); 345 #endif 346 347 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); 348 ifs->ifs_fr_flags = IPF_LOGGING; 349 #ifdef _KERNEL 350 ifs->ifs_fr_update_ipid = 0; 351 #else 352 ifs->ifs_fr_update_ipid = 1; 353 #endif 354 ifs->ifs_fr_minttl = 4; 355 ifs->ifs_fr_icmpminfragmtu = 68; 356 #if defined(IPFILTER_DEFAULT_BLOCK) 357 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; 358 #else 359 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; 360 #endif 361 362 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); 363 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); 364 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex"); 365 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); 366 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock"); 367 ifs->ifs_ipf_locks_done = 1; 368 369 if (fr_initialise(ifs) < 0) 370 return -1; 371 372 /* 373 * For incoming packets, we want the GZ-controlled hooks to run before 374 * the per-zone hooks, regardless of what order they're are installed. 375 * See the "GZ-controlled and per-zone stacks" comment block at the top 376 * of this file. 377 */ 378 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \ 379 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 380 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \ 381 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 382 383 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, 384 hook4_nicevents, hook4_nicevents_gz, ifs); 385 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in, 386 hook4_in, hook4_in_gz, ifs); 387 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, 388 hook4_loop_in, hook4_loop_in_gz, ifs); 389 390 /* 391 * For outgoing packets, we want the GZ-controlled hooks to run after 392 * the per-zone hooks, regardless of what order they're are installed. 393 * See the "GZ-controlled and per-zone stacks" comment block at the top 394 * of this file. 395 */ 396 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \ 397 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ 398 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \ 399 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); 400 401 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out, 402 hook4_out, hook4_out_gz, ifs); 403 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, 404 hook4_loop_out, hook4_loop_out_gz, ifs); 405 406 /* 407 * If we hold this lock over all of the net_hook_register calls, we 408 * can cause a deadlock to occur with the following lock ordering: 409 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 410 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path) 411 */ 412 RWLOCK_EXIT(&ifs->ifs_ipf_global); 413 414 /* 415 * Add IPv4 hooks 416 */ 417 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET); 418 if (ifs->ifs_ipf_ipv4 == NULL) 419 goto hookup_failed; 420 421 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4, 422 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0); 423 if (!ifs->ifs_hook4_nic_events) 424 goto hookup_failed; 425 426 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4, 427 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0); 428 if (!ifs->ifs_hook4_physical_in) 429 goto hookup_failed; 430 431 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4, 432 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0); 433 if (!ifs->ifs_hook4_physical_out) 434 goto hookup_failed; 435 436 if (ifs->ifs_ipf_loopback) { 437 ifs->ifs_hook4_loopback_in = (net_hook_register( 438 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 439 ifs->ifs_ipfhook4_loop_in) == 0); 440 if (!ifs->ifs_hook4_loopback_in) 441 goto hookup_failed; 442 443 ifs->ifs_hook4_loopback_out = (net_hook_register( 444 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 445 ifs->ifs_ipfhook4_loop_out) == 0); 446 if (!ifs->ifs_hook4_loopback_out) 447 goto hookup_failed; 448 } 449 450 /* 451 * Add IPv6 hooks 452 */ 453 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6); 454 if (ifs->ifs_ipf_ipv6 == NULL) 455 goto hookup_failed; 456 457 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, 458 hook6_nicevents, hook6_nicevents_gz, ifs); 459 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in, 460 hook6_in, hook6_in_gz, ifs); 461 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, 462 hook6_loop_in, hook6_loop_in_gz, ifs); 463 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out, 464 hook6_out, hook6_out_gz, ifs); 465 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, 466 hook6_loop_out, hook6_loop_out_gz, ifs); 467 468 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6, 469 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0); 470 if (!ifs->ifs_hook6_nic_events) 471 goto hookup_failed; 472 473 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6, 474 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0); 475 if (!ifs->ifs_hook6_physical_in) 476 goto hookup_failed; 477 478 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6, 479 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0); 480 if (!ifs->ifs_hook6_physical_out) 481 goto hookup_failed; 482 483 if (ifs->ifs_ipf_loopback) { 484 ifs->ifs_hook6_loopback_in = (net_hook_register( 485 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 486 ifs->ifs_ipfhook6_loop_in) == 0); 487 if (!ifs->ifs_hook6_loopback_in) 488 goto hookup_failed; 489 490 ifs->ifs_hook6_loopback_out = (net_hook_register( 491 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 492 ifs->ifs_ipfhook6_loop_out) == 0); 493 if (!ifs->ifs_hook6_loopback_out) 494 goto hookup_failed; 495 } 496 497 /* 498 * VIONA INET hooks. While the nethook framework allows us to register 499 * hooks for events that haven't been registered yet, we instead 500 * register and unregister our hooks in response to notifications 501 * about the viona hooks from the nethook framework. This prevents 502 * problems when the viona module gets unloaded while the ipf module 503 * does not. If we do not unregister our hooks after the viona module 504 * is unloaded, the viona module cannot later re-register them if it 505 * gets reloaded. As the ip, vnd, and ipf modules are rarely unloaded 506 * even on DEBUG kernels, they do not experience this issue. 507 */ 508 if (net_instance_notify_register(id, ipf_hook_instance_notify, 509 ifs) != 0) 510 goto hookup_failed; 511 512 /* 513 * Reacquire ipf_global, now it is safe. 514 */ 515 WRITE_ENTER(&ifs->ifs_ipf_global); 516 517 /* Do not use private interface ip_params_arr[] in Solaris 10 */ 518 #if SOLARIS2 < 10 519 520 #if SOLARIS2 >= 8 521 ip_forwarding = &ip_g_forward; 522 #endif 523 /* 524 * XXX - There is no terminator for this array, so it is not possible 525 * to tell if what we are looking for is missing and go off the end 526 * of the array. 527 */ 528 529 #if SOLARIS2 <= 8 530 for (i = 0; ; i++) { 531 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) { 532 ip_ttl_ptr = &ip_param_arr[i].ip_param_value; 533 } else if (!strcmp(ip_param_arr[i].ip_param_name, 534 "ip_path_mtu_discovery")) { 535 ip_mtudisc = &ip_param_arr[i].ip_param_value; 536 } 537 #if SOLARIS2 < 8 538 else if (!strcmp(ip_param_arr[i].ip_param_name, 539 "ip_forwarding")) { 540 ip_forwarding = &ip_param_arr[i].ip_param_value; 541 } 542 #else 543 else if (!strcmp(ip_param_arr[i].ip_param_name, 544 "ip6_forwarding")) { 545 ip6_forwarding = &ip_param_arr[i].ip_param_value; 546 } 547 #endif 548 549 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL && 550 #if SOLARIS2 >= 8 551 ip6_forwarding != NULL && 552 #endif 553 ip_forwarding != NULL) 554 break; 555 } 556 #endif 557 558 if (ifs->ifs_fr_control_forwarding & 1) { 559 if (ip_forwarding != NULL) 560 *ip_forwarding = 1; 561 #if SOLARIS2 >= 8 562 if (ip6_forwarding != NULL) 563 *ip6_forwarding = 1; 564 #endif 565 } 566 567 #endif 568 569 return 0; 570 hookup_failed: 571 WRITE_ENTER(&ifs->ifs_ipf_global); 572 return -1; 573 } 574 575 /* ------------------------------------------------------------------------ */ 576 /* 577 * Called whenever a nethook protocol is registered or unregistered. Currently 578 * only used to add or remove the hooks for viona. 579 * 580 * While the function signature requires returning int, nothing 581 * in usr/src/uts/common/io/hook.c that invokes the callbacks 582 * captures the return value (nor is there currently any documentation 583 * on what return values should be). For now at least, we'll return 0 584 * on success (or 'not applicable') or an error value. Even if the 585 * nethook framework doesn't use the return address, it can be observed via 586 * dtrace if needed. 587 */ 588 static int 589 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg, 590 const char *name, const char *dummy __unused, const char *he_name) 591 { 592 ipf_stack_t *ifs = arg; 593 hook_t **hookpp; 594 char *hook_name, *hint_name; 595 hook_func_t hookfn; 596 boolean_t *hookedp; 597 hook_hint_t hint; 598 boolean_t out; 599 int ret = 0; 600 601 const boolean_t gz = ifs->ifs_gz_controlled; 602 603 /* We currently only care about viona hooks notifications */ 604 if (strcmp(name, Hn_VIONA) != 0) 605 return (0); 606 607 if (strcmp(he_name, NH_PHYSICAL_IN) == 0) { 608 out = B_FALSE; 609 } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) { 610 out = B_TRUE; 611 } else { 612 /* 613 * If we've added more hook events to viona, we must add 614 * the corresponding handling here (even if it's just to 615 * ignore it) to prevent the firewall from not working as 616 * intended. 617 */ 618 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__, 619 he_name); 620 621 return (0); 622 } 623 624 if (out) { 625 hookpp = &ifs->ifs_ipfhookviona_out; 626 hookfn = ipf_hookviona_out; 627 hookedp = &ifs->ifs_hookviona_physical_out; 628 name = gz ? hook_viona_out_gz : hook_viona_out; 629 hint = gz ? HH_AFTER : HH_BEFORE; 630 hint_name = gz ? hook_viona_out : hook_viona_out_gz; 631 } else { 632 hookpp = &ifs->ifs_ipfhookviona_in; 633 hookfn = ipf_hookviona_in; 634 hookedp = &ifs->ifs_hookviona_physical_in; 635 name = gz ? hook_viona_in_gz : hook_viona_in; 636 hint = gz ? HH_BEFORE : HH_AFTER; 637 hint_name = gz ? hook_viona_in : hook_viona_in_gz; 638 } 639 640 switch (command) { 641 default: 642 case HN_NONE: 643 break; 644 case HN_REGISTER: 645 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs); 646 (*hookpp)->h_hint = hint; 647 (*hookpp)->h_hintvalue = (uintptr_t)hint_name; 648 ret = net_hook_register(ifs->ifs_ipf_viona, 649 (char *)he_name, *hookpp); 650 if (ret != 0) { 651 cmn_err(CE_NOTE, "%s: could not register hook " 652 "(hook family=%s hook=%s) err=%d", __func__, 653 name, he_name, ret); 654 *hookedp = B_FALSE; 655 return (ret); 656 } 657 *hookedp = B_TRUE; 658 break; 659 case HN_UNREGISTER: 660 if (ifs->ifs_ipf_viona == NULL) 661 break; 662 663 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona, 664 (char *)he_name, *hookpp) : 0; 665 if ((ret == 0 || ret == ENXIO)) { 666 if (*hookpp != NULL) { 667 hook_free(*hookpp); 668 *hookpp = NULL; 669 } 670 *hookedp = B_FALSE; 671 } 672 break; 673 } 674 675 return (ret); 676 } 677 678 /* 679 * Called whenever a new nethook instance is created. Currently only used 680 * with the Hn_VIONA nethooks. Similar to ipf_hook_protocol_notify, the out 681 * function signature must return an int, though the result is never used. 682 * We elect to return 0 on success (or not applicable) or a non-zero value 683 * on error. 684 */ 685 static int 686 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg, 687 const char *netid, const char *dummy __unused, const char *instance) 688 { 689 ipf_stack_t *ifs = arg; 690 int ret = 0; 691 692 /* We currently only care about viona hooks */ 693 if (strcmp(instance, Hn_VIONA) != 0) 694 return (0); 695 696 switch (command) { 697 case HN_NONE: 698 default: 699 return (0); 700 case HN_REGISTER: 701 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid, 702 NHF_VIONA); 703 704 if (ifs->ifs_ipf_viona == NULL) 705 return (EPROTONOSUPPORT); 706 707 ret = net_protocol_notify_register(ifs->ifs_ipf_viona, 708 ipf_hook_protocol_notify, ifs); 709 VERIFY(ret == 0 || ret == ESHUTDOWN); 710 break; 711 case HN_UNREGISTER: 712 if (ifs->ifs_ipf_viona == NULL) 713 break; 714 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona, 715 ipf_hook_protocol_notify)); 716 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona)); 717 ifs->ifs_ipf_viona = NULL; 718 break; 719 } 720 721 return (ret); 722 } 723 724 static int fr_setipfloopback(set, ifs) 725 int set; 726 ipf_stack_t *ifs; 727 { 728 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL) 729 return EFAULT; 730 731 if (set && !ifs->ifs_ipf_loopback) { 732 ifs->ifs_ipf_loopback = 1; 733 734 ifs->ifs_hook4_loopback_in = (net_hook_register( 735 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 736 ifs->ifs_ipfhook4_loop_in) == 0); 737 if (!ifs->ifs_hook4_loopback_in) 738 return EINVAL; 739 740 ifs->ifs_hook4_loopback_out = (net_hook_register( 741 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 742 ifs->ifs_ipfhook4_loop_out) == 0); 743 if (!ifs->ifs_hook4_loopback_out) 744 return EINVAL; 745 746 ifs->ifs_hook6_loopback_in = (net_hook_register( 747 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 748 ifs->ifs_ipfhook6_loop_in) == 0); 749 if (!ifs->ifs_hook6_loopback_in) 750 return EINVAL; 751 752 ifs->ifs_hook6_loopback_out = (net_hook_register( 753 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 754 ifs->ifs_ipfhook6_loop_out) == 0); 755 if (!ifs->ifs_hook6_loopback_out) 756 return EINVAL; 757 758 } else if (!set && ifs->ifs_ipf_loopback) { 759 ifs->ifs_ipf_loopback = 0; 760 761 ifs->ifs_hook4_loopback_in = 762 (net_hook_unregister(ifs->ifs_ipf_ipv4, 763 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 764 if (ifs->ifs_hook4_loopback_in) 765 return EBUSY; 766 767 ifs->ifs_hook4_loopback_out = 768 (net_hook_unregister(ifs->ifs_ipf_ipv4, 769 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0); 770 if (ifs->ifs_hook4_loopback_out) 771 return EBUSY; 772 773 ifs->ifs_hook6_loopback_in = 774 (net_hook_unregister(ifs->ifs_ipf_ipv6, 775 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 776 if (ifs->ifs_hook6_loopback_in) 777 return EBUSY; 778 779 ifs->ifs_hook6_loopback_out = 780 (net_hook_unregister(ifs->ifs_ipf_ipv6, 781 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0); 782 if (ifs->ifs_hook6_loopback_out) 783 return EBUSY; 784 } 785 return 0; 786 } 787 788 789 /* 790 * Filter ioctl interface. 791 */ 792 /*ARGSUSED*/ 793 int iplioctl(dev, cmd, data, mode, cp, rp) 794 dev_t dev; 795 int cmd; 796 #if SOLARIS2 >= 7 797 intptr_t data; 798 #else 799 int *data; 800 #endif 801 int mode; 802 cred_t *cp; 803 int *rp; 804 { 805 int error = 0, tmp; 806 friostat_t fio; 807 minor_t unit; 808 u_int enable; 809 ipf_stack_t *ifs; 810 zoneid_t zid; 811 ipf_devstate_t *isp; 812 813 #ifdef IPFDEBUG 814 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", 815 dev, cmd, data, mode, cp, rp); 816 #endif 817 unit = getminor(dev); 818 819 isp = ddi_get_soft_state(ipf_state, unit); 820 if (isp == NULL) 821 return ENXIO; 822 unit = isp->ipfs_minor; 823 824 zid = crgetzoneid(cp); 825 if (cmd == SIOCIPFZONESET) { 826 if (zid == GLOBAL_ZONEID) 827 return fr_setzoneid(isp, (caddr_t) data); 828 return EACCES; 829 } 830 831 /* 832 * ipf_find_stack returns with a read lock on ifs_ipf_global 833 */ 834 ifs = ipf_find_stack(zid, isp); 835 if (ifs == NULL) 836 return ENXIO; 837 838 if (ifs->ifs_fr_running <= 0) { 839 if (unit != IPL_LOGIPF) { 840 RWLOCK_EXIT(&ifs->ifs_ipf_global); 841 return EIO; 842 } 843 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && 844 cmd != SIOCIPFSET && cmd != SIOCFRENB && 845 cmd != SIOCGETFS && cmd != SIOCGETFF) { 846 RWLOCK_EXIT(&ifs->ifs_ipf_global); 847 return EIO; 848 } 849 } 850 851 if (ifs->ifs_fr_enable_active != 0) { 852 RWLOCK_EXIT(&ifs->ifs_ipf_global); 853 return EBUSY; 854 } 855 856 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp), 857 curproc, ifs); 858 if (error != -1) { 859 RWLOCK_EXIT(&ifs->ifs_ipf_global); 860 return error; 861 } 862 error = 0; 863 864 switch (cmd) 865 { 866 case SIOCFRENB : 867 if (!(mode & FWRITE)) 868 error = EPERM; 869 else { 870 error = COPYIN((caddr_t)data, (caddr_t)&enable, 871 sizeof(enable)); 872 if (error != 0) { 873 error = EFAULT; 874 break; 875 } 876 877 RWLOCK_EXIT(&ifs->ifs_ipf_global); 878 WRITE_ENTER(&ifs->ifs_ipf_global); 879 880 /* 881 * We must recheck fr_enable_active here, since we've 882 * dropped ifs_ipf_global from R in order to get it 883 * exclusively. 884 */ 885 if (ifs->ifs_fr_enable_active == 0) { 886 ifs->ifs_fr_enable_active = 1; 887 error = fr_enableipf(ifs, enable); 888 ifs->ifs_fr_enable_active = 0; 889 } 890 } 891 break; 892 case SIOCIPFSET : 893 if (!(mode & FWRITE)) { 894 error = EPERM; 895 break; 896 } 897 /* FALLTHRU */ 898 case SIOCIPFGETNEXT : 899 case SIOCIPFGET : 900 error = fr_ipftune(cmd, (void *)data, ifs); 901 break; 902 case SIOCSETFF : 903 if (!(mode & FWRITE)) 904 error = EPERM; 905 else { 906 error = COPYIN((caddr_t)data, 907 (caddr_t)&ifs->ifs_fr_flags, 908 sizeof(ifs->ifs_fr_flags)); 909 if (error != 0) 910 error = EFAULT; 911 } 912 break; 913 case SIOCIPFLP : 914 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 915 sizeof(tmp)); 916 if (error != 0) 917 error = EFAULT; 918 else 919 error = fr_setipfloopback(tmp, ifs); 920 break; 921 case SIOCGETFF : 922 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data, 923 sizeof(ifs->ifs_fr_flags)); 924 if (error != 0) 925 error = EFAULT; 926 break; 927 case SIOCFUNCL : 928 error = fr_resolvefunc((void *)data); 929 break; 930 case SIOCINAFR : 931 case SIOCRMAFR : 932 case SIOCADAFR : 933 case SIOCZRLST : 934 if (!(mode & FWRITE)) 935 error = EPERM; 936 else 937 error = frrequest(unit, cmd, (caddr_t)data, 938 ifs->ifs_fr_active, 1, ifs); 939 break; 940 case SIOCINIFR : 941 case SIOCRMIFR : 942 case SIOCADIFR : 943 if (!(mode & FWRITE)) 944 error = EPERM; 945 else 946 error = frrequest(unit, cmd, (caddr_t)data, 947 1 - ifs->ifs_fr_active, 1, ifs); 948 break; 949 case SIOCSWAPA : 950 if (!(mode & FWRITE)) 951 error = EPERM; 952 else { 953 WRITE_ENTER(&ifs->ifs_ipf_mutex); 954 bzero((char *)ifs->ifs_frcache, 955 sizeof (ifs->ifs_frcache)); 956 error = COPYOUT((caddr_t)&ifs->ifs_fr_active, 957 (caddr_t)data, 958 sizeof(ifs->ifs_fr_active)); 959 if (error != 0) 960 error = EFAULT; 961 else 962 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; 963 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 964 } 965 break; 966 case SIOCGETFS : 967 fr_getstat(&fio, ifs); 968 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); 969 break; 970 case SIOCFRZST : 971 if (!(mode & FWRITE)) 972 error = EPERM; 973 else 974 error = fr_zerostats((caddr_t)data, ifs); 975 break; 976 case SIOCIPFFL : 977 if (!(mode & FWRITE)) 978 error = EPERM; 979 else { 980 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 981 sizeof(tmp)); 982 if (!error) { 983 tmp = frflush(unit, 4, tmp, ifs); 984 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 985 sizeof(tmp)); 986 if (error != 0) 987 error = EFAULT; 988 } else 989 error = EFAULT; 990 } 991 break; 992 #ifdef USE_INET6 993 case SIOCIPFL6 : 994 if (!(mode & FWRITE)) 995 error = EPERM; 996 else { 997 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 998 sizeof(tmp)); 999 if (!error) { 1000 tmp = frflush(unit, 6, tmp, ifs); 1001 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 1002 sizeof(tmp)); 1003 if (error != 0) 1004 error = EFAULT; 1005 } else 1006 error = EFAULT; 1007 } 1008 break; 1009 #endif 1010 case SIOCSTLCK : 1011 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 1012 if (error == 0) { 1013 ifs->ifs_fr_state_lock = tmp; 1014 ifs->ifs_fr_nat_lock = tmp; 1015 ifs->ifs_fr_frag_lock = tmp; 1016 ifs->ifs_fr_auth_lock = tmp; 1017 } else 1018 error = EFAULT; 1019 break; 1020 #ifdef IPFILTER_LOG 1021 case SIOCIPFFB : 1022 if (!(mode & FWRITE)) 1023 error = EPERM; 1024 else { 1025 tmp = ipflog_clear(unit, ifs); 1026 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 1027 sizeof(tmp)); 1028 if (error) 1029 error = EFAULT; 1030 } 1031 break; 1032 #endif /* IPFILTER_LOG */ 1033 case SIOCFRSYN : 1034 if (!(mode & FWRITE)) 1035 error = EPERM; 1036 else { 1037 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1038 WRITE_ENTER(&ifs->ifs_ipf_global); 1039 1040 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 1041 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 1042 fr_nataddrsync(0, NULL, NULL, ifs); 1043 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 1044 error = 0; 1045 } 1046 break; 1047 case SIOCGFRST : 1048 error = fr_outobj((void *)data, fr_fragstats(ifs), 1049 IPFOBJ_FRAGSTAT); 1050 break; 1051 case FIONREAD : 1052 #ifdef IPFILTER_LOG 1053 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF]; 1054 1055 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); 1056 if (error != 0) 1057 error = EFAULT; 1058 #endif 1059 break; 1060 case SIOCIPFITER : 1061 error = ipf_frruleiter((caddr_t)data, crgetuid(cp), 1062 curproc, ifs); 1063 break; 1064 1065 case SIOCGENITER : 1066 error = ipf_genericiter((caddr_t)data, crgetuid(cp), 1067 curproc, ifs); 1068 break; 1069 1070 case SIOCIPFDELTOK : 1071 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 1072 if (error != 0) { 1073 error = EFAULT; 1074 } else { 1075 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs); 1076 } 1077 break; 1078 1079 default : 1080 #ifdef IPFDEBUG 1081 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", 1082 cmd, (void *)data); 1083 #endif 1084 error = EINVAL; 1085 break; 1086 } 1087 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1088 return error; 1089 } 1090 1091 1092 static int fr_enableipf(ifs, enable) 1093 ipf_stack_t *ifs; 1094 int enable; 1095 { 1096 int error; 1097 1098 if (!enable) { 1099 error = ipldetach(ifs); 1100 if (error == 0) 1101 ifs->ifs_fr_running = -1; 1102 return error; 1103 } 1104 1105 if (ifs->ifs_fr_running > 0) 1106 return 0; 1107 1108 error = iplattach(ifs); 1109 if (error == 0) { 1110 if (ifs->ifs_fr_timer_id == NULL) { 1111 int hz = drv_usectohz(500000); 1112 1113 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, 1114 (void *)ifs, 1115 hz); 1116 } 1117 ifs->ifs_fr_running = 1; 1118 } else { 1119 (void) ipldetach(ifs); 1120 } 1121 return error; 1122 } 1123 1124 1125 phy_if_t get_unit(name, v, ifs) 1126 char *name; 1127 int v; 1128 ipf_stack_t *ifs; 1129 { 1130 net_handle_t nif; 1131 1132 if (v == 4) 1133 nif = ifs->ifs_ipf_ipv4; 1134 else if (v == 6) 1135 nif = ifs->ifs_ipf_ipv6; 1136 else 1137 return 0; 1138 1139 return (net_phylookup(nif, name)); 1140 } 1141 1142 /* 1143 * routines below for saving IP headers to buffer 1144 */ 1145 /*ARGSUSED*/ 1146 int iplopen(devp, flags, otype, cred) 1147 dev_t *devp; 1148 int flags, otype; 1149 cred_t *cred; 1150 { 1151 ipf_devstate_t *isp; 1152 minor_t min = getminor(*devp); 1153 minor_t minor; 1154 1155 #ifdef IPFDEBUG 1156 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); 1157 #endif 1158 if (!(otype & OTYP_CHR)) 1159 return ENXIO; 1160 1161 if (IPL_LOGMAX < min) 1162 return ENXIO; 1163 1164 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1, 1165 VM_BESTFIT | VM_SLEEP); 1166 1167 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) { 1168 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1); 1169 return ENXIO; 1170 } 1171 1172 *devp = makedevice(getmajor(*devp), minor); 1173 isp = ddi_get_soft_state(ipf_state, minor); 1174 VERIFY(isp != NULL); 1175 1176 isp->ipfs_minor = min; 1177 isp->ipfs_zoneid = IPFS_ZONE_UNSET; 1178 1179 return 0; 1180 } 1181 1182 1183 /*ARGSUSED*/ 1184 int iplclose(dev, flags, otype, cred) 1185 dev_t dev; 1186 int flags, otype; 1187 cred_t *cred; 1188 { 1189 minor_t min = getminor(dev); 1190 1191 #ifdef IPFDEBUG 1192 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); 1193 #endif 1194 1195 if (IPL_LOGMAX < min) 1196 return ENXIO; 1197 1198 ddi_soft_state_free(ipf_state, min); 1199 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1); 1200 1201 return 0; 1202 } 1203 1204 #ifdef IPFILTER_LOG 1205 /* 1206 * iplread/ipllog 1207 * both of these must operate with at least splnet() lest they be 1208 * called during packet processing and cause an inconsistancy to appear in 1209 * the filter lists. 1210 */ 1211 /*ARGSUSED*/ 1212 int iplread(dev, uio, cp) 1213 dev_t dev; 1214 register struct uio *uio; 1215 cred_t *cp; 1216 { 1217 ipf_stack_t *ifs; 1218 int ret; 1219 minor_t unit; 1220 ipf_devstate_t *isp; 1221 1222 unit = getminor(dev); 1223 isp = ddi_get_soft_state(ipf_state, unit); 1224 if (isp == NULL) 1225 return ENXIO; 1226 unit = isp->ipfs_minor; 1227 1228 1229 /* 1230 * ipf_find_stack returns with a read lock on ifs_ipf_global 1231 */ 1232 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1233 if (ifs == NULL) 1234 return ENXIO; 1235 1236 # ifdef IPFDEBUG 1237 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); 1238 # endif 1239 1240 if (ifs->ifs_fr_running < 1) { 1241 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1242 return EIO; 1243 } 1244 1245 # ifdef IPFILTER_SYNC 1246 if (unit == IPL_LOGSYNC) { 1247 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1248 return ipfsync_read(uio); 1249 } 1250 # endif 1251 1252 ret = ipflog_read(unit, uio, ifs); 1253 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1254 return ret; 1255 } 1256 #endif /* IPFILTER_LOG */ 1257 1258 1259 /* 1260 * iplread/ipllog 1261 * both of these must operate with at least splnet() lest they be 1262 * called during packet processing and cause an inconsistancy to appear in 1263 * the filter lists. 1264 */ 1265 int iplwrite(dev, uio, cp) 1266 dev_t dev; 1267 register struct uio *uio; 1268 cred_t *cp; 1269 { 1270 ipf_stack_t *ifs; 1271 minor_t unit; 1272 ipf_devstate_t *isp; 1273 1274 unit = getminor(dev); 1275 isp = ddi_get_soft_state(ipf_state, unit); 1276 if (isp == NULL) 1277 return ENXIO; 1278 unit = isp->ipfs_minor; 1279 1280 /* 1281 * ipf_find_stack returns with a read lock on ifs_ipf_global 1282 */ 1283 ifs = ipf_find_stack(crgetzoneid(cp), isp); 1284 if (ifs == NULL) 1285 return ENXIO; 1286 1287 #ifdef IPFDEBUG 1288 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); 1289 #endif 1290 1291 if (ifs->ifs_fr_running < 1) { 1292 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1293 return EIO; 1294 } 1295 1296 #ifdef IPFILTER_SYNC 1297 if (getminor(dev) == IPL_LOGSYNC) { 1298 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1299 return ipfsync_write(uio); 1300 } 1301 #endif /* IPFILTER_SYNC */ 1302 dev = dev; /* LINT */ 1303 uio = uio; /* LINT */ 1304 cp = cp; /* LINT */ 1305 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1306 return ENXIO; 1307 } 1308 1309 1310 /* 1311 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that 1312 * requires a large amount of setting up and isn't any more efficient. 1313 */ 1314 int fr_send_reset(fin) 1315 fr_info_t *fin; 1316 { 1317 tcphdr_t *tcp, *tcp2; 1318 int tlen, hlen; 1319 mblk_t *m; 1320 #ifdef USE_INET6 1321 ip6_t *ip6; 1322 #endif 1323 ip_t *ip; 1324 1325 tcp = fin->fin_dp; 1326 if (tcp->th_flags & TH_RST) 1327 return -1; 1328 1329 #ifndef IPFILTER_CKSUM 1330 if (fr_checkl4sum(fin) == -1) 1331 return -1; 1332 #endif 1333 1334 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0; 1335 #ifdef USE_INET6 1336 if (fin->fin_v == 6) 1337 hlen = sizeof(ip6_t); 1338 else 1339 #endif 1340 hlen = sizeof(ip_t); 1341 hlen += sizeof(*tcp2); 1342 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL) 1343 return -1; 1344 1345 m->b_rptr += 64; 1346 MTYPE(m) = M_DATA; 1347 m->b_wptr = m->b_rptr + hlen; 1348 ip = (ip_t *)m->b_rptr; 1349 bzero((char *)ip, hlen); 1350 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2)); 1351 tcp2->th_dport = tcp->th_sport; 1352 tcp2->th_sport = tcp->th_dport; 1353 if (tcp->th_flags & TH_ACK) { 1354 tcp2->th_seq = tcp->th_ack; 1355 tcp2->th_flags = TH_RST; 1356 } else { 1357 tcp2->th_ack = ntohl(tcp->th_seq); 1358 tcp2->th_ack += tlen; 1359 tcp2->th_ack = htonl(tcp2->th_ack); 1360 tcp2->th_flags = TH_RST|TH_ACK; 1361 } 1362 tcp2->th_off = sizeof(struct tcphdr) >> 2; 1363 1364 ip->ip_v = fin->fin_v; 1365 #ifdef USE_INET6 1366 if (fin->fin_v == 6) { 1367 ip6 = (ip6_t *)m->b_rptr; 1368 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1369 ip6->ip6_src = fin->fin_dst6.in6; 1370 ip6->ip6_dst = fin->fin_src6.in6; 1371 ip6->ip6_plen = htons(sizeof(*tcp)); 1372 ip6->ip6_nxt = IPPROTO_TCP; 1373 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2); 1374 } else 1375 #endif 1376 { 1377 ip->ip_src.s_addr = fin->fin_daddr; 1378 ip->ip_dst.s_addr = fin->fin_saddr; 1379 ip->ip_id = fr_nextipid(fin); 1380 ip->ip_hl = sizeof(*ip) >> 2; 1381 ip->ip_p = IPPROTO_TCP; 1382 ip->ip_len = sizeof(*ip) + sizeof(*tcp); 1383 ip->ip_tos = fin->fin_ip->ip_tos; 1384 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2); 1385 } 1386 return fr_send_ip(fin, m, &m); 1387 } 1388 1389 /* 1390 * Function: fr_send_ip 1391 * Returns: 0: success 1392 * -1: failed 1393 * Parameters: 1394 * fin: packet information 1395 * m: the message block where ip head starts 1396 * 1397 * Send a new packet through the IP stack. 1398 * 1399 * For IPv4 packets, ip_len must be in host byte order, and ip_v, 1400 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this 1401 * function). 1402 * 1403 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled 1404 * in by this function. 1405 * 1406 * All other portions of the packet must be in on-the-wire format. 1407 */ 1408 /*ARGSUSED*/ 1409 static int fr_send_ip(fin, m, mpp) 1410 fr_info_t *fin; 1411 mblk_t *m, **mpp; 1412 { 1413 qpktinfo_t qpi, *qpip; 1414 fr_info_t fnew; 1415 ip_t *ip; 1416 int i, hlen; 1417 ipf_stack_t *ifs = fin->fin_ifs; 1418 1419 ip = (ip_t *)m->b_rptr; 1420 bzero((char *)&fnew, sizeof(fnew)); 1421 1422 #ifdef USE_INET6 1423 if (fin->fin_v == 6) { 1424 ip6_t *ip6; 1425 1426 ip6 = (ip6_t *)ip; 1427 ip6->ip6_vfc = 0x60; 1428 ip6->ip6_hlim = 127; 1429 fnew.fin_v = 6; 1430 hlen = sizeof(*ip6); 1431 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; 1432 } else 1433 #endif 1434 { 1435 fnew.fin_v = 4; 1436 #if SOLARIS2 >= 10 1437 ip->ip_ttl = 255; 1438 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1) 1439 ip->ip_off = htons(IP_DF); 1440 #else 1441 if (ip_ttl_ptr != NULL) 1442 ip->ip_ttl = (u_char)(*ip_ttl_ptr); 1443 else 1444 ip->ip_ttl = 63; 1445 if (ip_mtudisc != NULL) 1446 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0); 1447 else 1448 ip->ip_off = htons(IP_DF); 1449 #endif 1450 /* 1451 * The dance with byte order and ip_len/ip_off is because in 1452 * fr_fastroute, it expects them to be in host byte order but 1453 * ipf_cksum expects them to be in network byte order. 1454 */ 1455 ip->ip_len = htons(ip->ip_len); 1456 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); 1457 ip->ip_len = ntohs(ip->ip_len); 1458 ip->ip_off = ntohs(ip->ip_off); 1459 hlen = sizeof(*ip); 1460 fnew.fin_plen = ip->ip_len; 1461 } 1462 1463 qpip = fin->fin_qpi; 1464 qpi.qpi_off = 0; 1465 qpi.qpi_ill = qpip->qpi_ill; 1466 qpi.qpi_m = m; 1467 qpi.qpi_data = ip; 1468 fnew.fin_qpi = &qpi; 1469 fnew.fin_ifp = fin->fin_ifp; 1470 fnew.fin_flx = FI_NOCKSUM | FI_GENERATED; 1471 fnew.fin_m = m; 1472 fnew.fin_qfm = m; 1473 fnew.fin_ip = ip; 1474 fnew.fin_mp = mpp; 1475 fnew.fin_hlen = hlen; 1476 fnew.fin_dp = (char *)ip + hlen; 1477 fnew.fin_ifs = fin->fin_ifs; 1478 (void) fr_makefrip(hlen, ip, &fnew); 1479 1480 i = fr_fastroute(m, mpp, &fnew, NULL); 1481 return i; 1482 } 1483 1484 1485 int fr_send_icmp_err(type, fin, dst) 1486 int type; 1487 fr_info_t *fin; 1488 int dst; 1489 { 1490 struct in_addr dst4; 1491 struct icmp *icmp; 1492 qpktinfo_t *qpi; 1493 int hlen, code; 1494 phy_if_t phy; 1495 u_short sz; 1496 #ifdef USE_INET6 1497 mblk_t *mb; 1498 #endif 1499 mblk_t *m; 1500 #ifdef USE_INET6 1501 ip6_t *ip6; 1502 #endif 1503 ip_t *ip; 1504 ipf_stack_t *ifs = fin->fin_ifs; 1505 1506 if ((type < 0) || (type > ICMP_MAXTYPE)) 1507 return -1; 1508 1509 code = fin->fin_icode; 1510 #ifdef USE_INET6 1511 if ((code < 0) || (code >= ICMP_MAX_UNREACH)) 1512 return -1; 1513 #endif 1514 1515 #ifndef IPFILTER_CKSUM 1516 if (fr_checkl4sum(fin) == -1) 1517 return -1; 1518 #endif 1519 1520 qpi = fin->fin_qpi; 1521 1522 #ifdef USE_INET6 1523 mb = fin->fin_qfm; 1524 1525 if (fin->fin_v == 6) { 1526 sz = sizeof(ip6_t); 1527 sz += MIN(mb->b_wptr - mb->b_rptr, 512); 1528 hlen = sizeof(ip6_t); 1529 type = icmptoicmp6types[type]; 1530 if (type == ICMP6_DST_UNREACH) 1531 code = icmptoicmp6unreach[code]; 1532 } else 1533 #endif 1534 { 1535 if ((fin->fin_p == IPPROTO_ICMP) && 1536 !(fin->fin_flx & FI_SHORT)) 1537 switch (ntohs(fin->fin_data[0]) >> 8) 1538 { 1539 case ICMP_ECHO : 1540 case ICMP_TSTAMP : 1541 case ICMP_IREQ : 1542 case ICMP_MASKREQ : 1543 break; 1544 default : 1545 return 0; 1546 } 1547 1548 sz = sizeof(ip_t) * 2; 1549 sz += 8; /* 64 bits of data */ 1550 hlen = sizeof(ip_t); 1551 } 1552 1553 sz += offsetof(struct icmp, icmp_ip); 1554 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL) 1555 return -1; 1556 MTYPE(m) = M_DATA; 1557 m->b_rptr += 64; 1558 m->b_wptr = m->b_rptr + sz; 1559 bzero((char *)m->b_rptr, (size_t)sz); 1560 ip = (ip_t *)m->b_rptr; 1561 ip->ip_v = fin->fin_v; 1562 icmp = (struct icmp *)(m->b_rptr + hlen); 1563 icmp->icmp_type = type & 0xff; 1564 icmp->icmp_code = code & 0xff; 1565 phy = (phy_if_t)qpi->qpi_ill; 1566 if (type == ICMP_UNREACH && (phy != 0) && 1567 fin->fin_icode == ICMP_UNREACH_NEEDFRAG) 1568 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 ); 1569 1570 #ifdef USE_INET6 1571 if (fin->fin_v == 6) { 1572 struct in6_addr dst6; 1573 int csz; 1574 1575 if (dst == 0) { 1576 ipf_stack_t *ifs = fin->fin_ifs; 1577 1578 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy, 1579 (void *)&dst6, NULL, ifs) == -1) { 1580 FREE_MB_T(m); 1581 return -1; 1582 } 1583 } else 1584 dst6 = fin->fin_dst6.in6; 1585 1586 csz = sz; 1587 sz -= sizeof(ip6_t); 1588 ip6 = (ip6_t *)m->b_rptr; 1589 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1590 ip6->ip6_plen = htons((u_short)sz); 1591 ip6->ip6_nxt = IPPROTO_ICMPV6; 1592 ip6->ip6_src = dst6; 1593 ip6->ip6_dst = fin->fin_src6.in6; 1594 sz -= offsetof(struct icmp, icmp_ip); 1595 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz); 1596 icmp->icmp_cksum = csz - sizeof(ip6_t); 1597 } else 1598 #endif 1599 { 1600 ip->ip_hl = sizeof(*ip) >> 2; 1601 ip->ip_p = IPPROTO_ICMP; 1602 ip->ip_id = fin->fin_ip->ip_id; 1603 ip->ip_tos = fin->fin_ip->ip_tos; 1604 ip->ip_len = (u_short)sz; 1605 if (dst == 0) { 1606 ipf_stack_t *ifs = fin->fin_ifs; 1607 1608 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy, 1609 (void *)&dst4, NULL, ifs) == -1) { 1610 FREE_MB_T(m); 1611 return -1; 1612 } 1613 } else { 1614 dst4 = fin->fin_dst; 1615 } 1616 ip->ip_src = dst4; 1617 ip->ip_dst = fin->fin_src; 1618 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip, 1619 sizeof(*fin->fin_ip)); 1620 bcopy((char *)fin->fin_ip + fin->fin_hlen, 1621 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8); 1622 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len); 1623 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off); 1624 icmp->icmp_cksum = ipf_cksum((u_short *)icmp, 1625 sz - sizeof(ip_t)); 1626 } 1627 1628 /* 1629 * Need to exit out of these so we don't recursively call rw_enter 1630 * from fr_qout. 1631 */ 1632 return fr_send_ip(fin, m, &m); 1633 } 1634 1635 #include <sys/time.h> 1636 #include <sys/varargs.h> 1637 1638 #ifndef _KERNEL 1639 #include <stdio.h> 1640 #endif 1641 1642 /* 1643 * Return the first IP Address associated with an interface 1644 * For IPv6, we walk through the list of logical interfaces and return 1645 * the address of the first one that isn't a link-local interface. 1646 * We can't assume that it is :1 because another link-local address 1647 * may have been assigned there. 1648 */ 1649 /*ARGSUSED*/ 1650 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) 1651 int v, atype; 1652 void *ifptr; 1653 struct in_addr *inp, *inpmask; 1654 ipf_stack_t *ifs; 1655 { 1656 struct sockaddr_in6 v6addr[2]; 1657 struct sockaddr_in v4addr[2]; 1658 net_ifaddr_t type[2]; 1659 net_handle_t net_data; 1660 phy_if_t phyif; 1661 void *array; 1662 1663 switch (v) 1664 { 1665 case 4: 1666 net_data = ifs->ifs_ipf_ipv4; 1667 array = v4addr; 1668 break; 1669 case 6: 1670 net_data = ifs->ifs_ipf_ipv6; 1671 array = v6addr; 1672 break; 1673 default: 1674 net_data = NULL; 1675 break; 1676 } 1677 1678 if (net_data == NULL) 1679 return -1; 1680 1681 phyif = (phy_if_t)ifptr; 1682 1683 switch (atype) 1684 { 1685 case FRI_PEERADDR : 1686 type[0] = NA_PEER; 1687 break; 1688 1689 case FRI_BROADCAST : 1690 type[0] = NA_BROADCAST; 1691 break; 1692 1693 default : 1694 type[0] = NA_ADDRESS; 1695 break; 1696 } 1697 1698 type[1] = NA_NETMASK; 1699 1700 if (v == 6) { 1701 lif_if_t idx = 0; 1702 1703 do { 1704 idx = net_lifgetnext(net_data, phyif, idx); 1705 if (net_getlifaddr(net_data, phyif, idx, 2, type, 1706 array) < 0) 1707 return -1; 1708 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) && 1709 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr)) 1710 break; 1711 } while (idx != 0); 1712 1713 if (idx == 0) 1714 return -1; 1715 1716 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1], 1717 inp, inpmask); 1718 } 1719 1720 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0) 1721 return -1; 1722 1723 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask); 1724 } 1725 1726 1727 u_32_t fr_newisn(fin) 1728 fr_info_t *fin; 1729 { 1730 static int iss_seq_off = 0; 1731 u_char hash[16]; 1732 u_32_t newiss; 1733 MD5_CTX ctx; 1734 ipf_stack_t *ifs = fin->fin_ifs; 1735 1736 /* 1737 * Compute the base value of the ISS. It is a hash 1738 * of (saddr, sport, daddr, dport, secret). 1739 */ 1740 MD5Init(&ctx); 1741 1742 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src, 1743 sizeof(fin->fin_fi.fi_src)); 1744 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst, 1745 sizeof(fin->fin_fi.fi_dst)); 1746 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); 1747 1748 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret)); 1749 1750 MD5Final(hash, &ctx); 1751 1752 bcopy(hash, &newiss, sizeof(newiss)); 1753 1754 /* 1755 * Now increment our "timer", and add it in to 1756 * the computed value. 1757 * 1758 * XXX Use `addin'? 1759 * XXX TCP_ISSINCR too large to use? 1760 */ 1761 iss_seq_off += 0x00010000; 1762 newiss += iss_seq_off; 1763 return newiss; 1764 } 1765 1766 1767 /* ------------------------------------------------------------------------ */ 1768 /* Function: fr_nextipid */ 1769 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */ 1770 /* Parameters: fin(I) - pointer to packet information */ 1771 /* */ 1772 /* Returns the next IPv4 ID to use for this packet. */ 1773 /* ------------------------------------------------------------------------ */ 1774 u_short fr_nextipid(fin) 1775 fr_info_t *fin; 1776 { 1777 static u_short ipid = 0; 1778 u_short id; 1779 ipf_stack_t *ifs = fin->fin_ifs; 1780 1781 MUTEX_ENTER(&ifs->ifs_ipf_rw); 1782 if (fin->fin_pktnum != 0) { 1783 id = fin->fin_pktnum & 0xffff; 1784 } else { 1785 id = ipid++; 1786 } 1787 MUTEX_EXIT(&ifs->ifs_ipf_rw); 1788 1789 return id; 1790 } 1791 1792 1793 #ifndef IPFILTER_CKSUM 1794 /* ARGSUSED */ 1795 #endif 1796 INLINE void fr_checkv4sum(fin) 1797 fr_info_t *fin; 1798 { 1799 #ifdef IPFILTER_CKSUM 1800 if (fr_checkl4sum(fin) == -1) 1801 fin->fin_flx |= FI_BAD; 1802 #endif 1803 } 1804 1805 1806 #ifdef USE_INET6 1807 # ifndef IPFILTER_CKSUM 1808 /* ARGSUSED */ 1809 # endif 1810 INLINE void fr_checkv6sum(fin) 1811 fr_info_t *fin; 1812 { 1813 # ifdef IPFILTER_CKSUM 1814 if (fr_checkl4sum(fin) == -1) 1815 fin->fin_flx |= FI_BAD; 1816 # endif 1817 } 1818 #endif /* USE_INET6 */ 1819 1820 1821 #if (SOLARIS2 < 7) 1822 void fr_slowtimer() 1823 #else 1824 /*ARGSUSED*/ 1825 void fr_slowtimer __P((void *arg)) 1826 #endif 1827 { 1828 ipf_stack_t *ifs = arg; 1829 1830 READ_ENTER(&ifs->ifs_ipf_global); 1831 if (ifs->ifs_fr_running != 1) { 1832 ifs->ifs_fr_timer_id = NULL; 1833 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1834 return; 1835 } 1836 ipf_expiretokens(ifs); 1837 fr_fragexpire(ifs); 1838 fr_timeoutstate(ifs); 1839 fr_natexpire(ifs); 1840 fr_authexpire(ifs); 1841 ifs->ifs_fr_ticks++; 1842 if (ifs->ifs_fr_running == 1) 1843 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, 1844 drv_usectohz(500000)); 1845 else 1846 ifs->ifs_fr_timer_id = NULL; 1847 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1848 } 1849 1850 1851 /* ------------------------------------------------------------------------ */ 1852 /* Function: fr_pullup */ 1853 /* Returns: NULL == pullup failed, else pointer to protocol header */ 1854 /* Parameters: m(I) - pointer to buffer where data packet starts */ 1855 /* fin(I) - pointer to packet information */ 1856 /* len(I) - number of bytes to pullup */ 1857 /* */ 1858 /* Attempt to move at least len bytes (from the start of the buffer) into a */ 1859 /* single buffer for ease of access. Operating system native functions are */ 1860 /* used to manage buffers - if necessary. If the entire packet ends up in */ 1861 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */ 1862 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ 1863 /* and ONLY if the pullup succeeds. */ 1864 /* */ 1865 /* We assume that 'min' is a pointer to a buffer that is part of the chain */ 1866 /* of buffers that starts at *fin->fin_mp. */ 1867 /* ------------------------------------------------------------------------ */ 1868 void *fr_pullup(min, fin, len) 1869 mb_t *min; 1870 fr_info_t *fin; 1871 int len; 1872 { 1873 qpktinfo_t *qpi = fin->fin_qpi; 1874 int out = fin->fin_out, dpoff, ipoff; 1875 mb_t *m = min, *m1, *m2; 1876 char *ip; 1877 uint32_t start, stuff, end, value, flags; 1878 ipf_stack_t *ifs = fin->fin_ifs; 1879 1880 if (m == NULL) 1881 return NULL; 1882 1883 ip = (char *)fin->fin_ip; 1884 if ((fin->fin_flx & FI_COALESCE) != 0) 1885 return ip; 1886 1887 ipoff = fin->fin_ipoff; 1888 if (fin->fin_dp != NULL) 1889 dpoff = (char *)fin->fin_dp - (char *)ip; 1890 else 1891 dpoff = 0; 1892 1893 if (M_LEN(m) < len + ipoff) { 1894 1895 /* 1896 * pfil_precheck ensures the IP header is on a 32bit 1897 * aligned address so simply fail if that isn't currently 1898 * the case (should never happen). 1899 */ 1900 int inc = 0; 1901 1902 if (ipoff > 0) { 1903 if ((ipoff & 3) != 0) { 1904 inc = 4 - (ipoff & 3); 1905 if (m->b_rptr - inc >= m->b_datap->db_base) 1906 m->b_rptr -= inc; 1907 else 1908 inc = 0; 1909 } 1910 } 1911 1912 /* 1913 * XXX This is here as a work around for a bug with DEBUG 1914 * XXX Solaris kernels. The problem is b_prev is used by IP 1915 * XXX code as a way to stash the phyint_index for a packet, 1916 * XXX this doesn't get reset by IP but freeb does an ASSERT() 1917 * XXX for both of these to be NULL. See 6442390. 1918 */ 1919 m1 = m; 1920 m2 = m->b_prev; 1921 1922 do { 1923 m1->b_next = NULL; 1924 m1->b_prev = NULL; 1925 m1 = m1->b_cont; 1926 } while (m1); 1927 1928 /* 1929 * Need to preserve checksum information by copying them 1930 * to newmp which heads the pulluped message. 1931 */ 1932 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags); 1933 1934 if (pullupmsg(m, len + ipoff + inc) == 0) { 1935 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]); 1936 FREE_MB_T(*fin->fin_mp); 1937 *fin->fin_mp = NULL; 1938 fin->fin_m = NULL; 1939 fin->fin_ip = NULL; 1940 fin->fin_dp = NULL; 1941 qpi->qpi_data = NULL; 1942 return NULL; 1943 } 1944 1945 mac_hcksum_set(m, start, stuff, end, value, flags); 1946 1947 m->b_prev = m2; 1948 m->b_rptr += inc; 1949 fin->fin_m = m; 1950 ip = MTOD(m, char *) + ipoff; 1951 qpi->qpi_data = ip; 1952 } 1953 1954 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]); 1955 fin->fin_ip = (ip_t *)ip; 1956 if (fin->fin_dp != NULL) 1957 fin->fin_dp = (char *)fin->fin_ip + dpoff; 1958 1959 if (len == fin->fin_plen) 1960 fin->fin_flx |= FI_COALESCE; 1961 return ip; 1962 } 1963 1964 1965 /* 1966 * Function: fr_verifysrc 1967 * Returns: int (really boolean) 1968 * Parameters: fin - packet information 1969 * 1970 * Check whether the packet has a valid source address for the interface on 1971 * which the packet arrived, implementing the "fr_chksrc" feature. 1972 * Returns true iff the packet's source address is valid. 1973 */ 1974 int fr_verifysrc(fin) 1975 fr_info_t *fin; 1976 { 1977 net_handle_t net_data_p; 1978 phy_if_t phy_ifdata_routeto; 1979 struct sockaddr sin; 1980 ipf_stack_t *ifs = fin->fin_ifs; 1981 1982 if (fin->fin_v == 4) { 1983 net_data_p = ifs->ifs_ipf_ipv4; 1984 } else if (fin->fin_v == 6) { 1985 net_data_p = ifs->ifs_ipf_ipv6; 1986 } else { 1987 return (0); 1988 } 1989 1990 /* Get the index corresponding to the if name */ 1991 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1992 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr)); 1993 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL); 1994 1995 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 1996 } 1997 1998 /* 1999 * Return true only if forwarding is enabled on the interface. 2000 */ 2001 static int 2002 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp) 2003 { 2004 lif_if_t lif; 2005 2006 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0; 2007 lif = net_lifgetnext(ndp, phyif, lif)) { 2008 int res; 2009 uint64_t flags; 2010 2011 res = net_getlifflags(ndp, phyif, lif, &flags); 2012 if (res != 0) 2013 return (0); 2014 if (flags & IFF_ROUTER) 2015 return (1); 2016 } 2017 2018 return (0); 2019 } 2020 2021 /* 2022 * Function: fr_fastroute 2023 * Returns: 0: success; 2024 * -1: failed 2025 * Parameters: 2026 * mb: the message block where ip head starts 2027 * mpp: the pointer to the pointer of the orignal 2028 * packet message 2029 * fin: packet information 2030 * fdp: destination interface information 2031 * if it is NULL, no interface information provided. 2032 * 2033 * This function is for fastroute/to/dup-to rules. It calls 2034 * pfil_make_lay2_packet to search route, make lay-2 header 2035 * ,and identify output queue for the IP packet. 2036 * The destination address depends on the following conditions: 2037 * 1: for fastroute rule, fdp is passed in as NULL, so the 2038 * destination address is the IP Packet's destination address 2039 * 2: for to/dup-to rule, if an ip address is specified after 2040 * the interface name, this address is the as destination 2041 * address. Otherwise IP Packet's destination address is used 2042 */ 2043 int fr_fastroute(mb, mpp, fin, fdp) 2044 mblk_t *mb, **mpp; 2045 fr_info_t *fin; 2046 frdest_t *fdp; 2047 { 2048 net_handle_t net_data_p; 2049 net_inject_t *inj; 2050 mblk_t *mp = NULL; 2051 frentry_t *fr = fin->fin_fr; 2052 qpktinfo_t *qpi; 2053 ip_t *ip; 2054 2055 struct sockaddr_in *sin; 2056 struct sockaddr_in6 *sin6; 2057 struct sockaddr *sinp; 2058 ipf_stack_t *ifs = fin->fin_ifs; 2059 #ifndef sparc 2060 u_short __iplen, __ipoff; 2061 #endif 2062 2063 if (fin->fin_v == 4) { 2064 net_data_p = ifs->ifs_ipf_ipv4; 2065 } else if (fin->fin_v == 6) { 2066 net_data_p = ifs->ifs_ipf_ipv6; 2067 } else { 2068 return (-1); 2069 } 2070 2071 /* Check the src here, fin_ifp is the src interface. */ 2072 if (!(fin->fin_flx & FI_GENERATED) && 2073 !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p)) { 2074 return (-1); 2075 } 2076 2077 inj = net_inject_alloc(NETINFO_VERSION); 2078 if (inj == NULL) 2079 return -1; 2080 2081 ip = fin->fin_ip; 2082 qpi = fin->fin_qpi; 2083 2084 /* 2085 * If this is a duplicate mblk then we want ip to point at that 2086 * data, not the original, if and only if it is already pointing at 2087 * the current mblk data. 2088 * 2089 * Otherwise, if it's not a duplicate, and we're not already pointing 2090 * at the current mblk data, then we want to ensure that the data 2091 * points at ip. 2092 */ 2093 2094 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) { 2095 ip = (ip_t *)mb->b_rptr; 2096 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) { 2097 qpi->qpi_m->b_rptr = (uchar_t *)ip; 2098 qpi->qpi_off = 0; 2099 } 2100 2101 /* 2102 * If there is another M_PROTO, we don't want it 2103 */ 2104 if (*mpp != mb) { 2105 mp = unlinkb(*mpp); 2106 freeb(*mpp); 2107 *mpp = mp; 2108 } 2109 2110 sinp = (struct sockaddr *)&inj->ni_addr; 2111 sin = (struct sockaddr_in *)sinp; 2112 sin6 = (struct sockaddr_in6 *)sinp; 2113 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr)); 2114 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 2115 inj->ni_packet = mb; 2116 2117 /* 2118 * In case we're here due to "to <if>" being used with 2119 * "keep state", check that we're going in the correct 2120 * direction. 2121 */ 2122 if (fdp != NULL) { 2123 if ((fr != NULL) && (fdp->fd_ifp != NULL) && 2124 (fin->fin_rev != 0) && (fdp == &fr->fr_tif)) 2125 goto bad_fastroute; 2126 inj->ni_physical = (phy_if_t)fdp->fd_ifp; 2127 if (fin->fin_v == 4) { 2128 sin->sin_addr = fdp->fd_ip; 2129 } else { 2130 sin6->sin6_addr = fdp->fd_ip6.in6; 2131 } 2132 } else { 2133 if (fin->fin_v == 4) { 2134 sin->sin_addr = ip->ip_dst; 2135 } else { 2136 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst; 2137 } 2138 inj->ni_physical = net_routeto(net_data_p, sinp, NULL); 2139 } 2140 2141 /* we're checking the destination here */ 2142 if (!(fin->fin_flx & FI_GENERATED) && 2143 !fr_forwarding_enabled(inj->ni_physical, net_data_p)) { 2144 goto bad_fastroute; 2145 } 2146 2147 /* 2148 * Clear the hardware checksum flags from packets that we are doing 2149 * input processing on as leaving them set will cause the outgoing 2150 * NIC (if it supports hardware checksum) to calculate them anew, 2151 * using the old (correct) checksums as the pseudo value to start 2152 * from. 2153 */ 2154 if (fin->fin_out == 0) { 2155 DB_CKSUMFLAGS(mb) = 0; 2156 } 2157 2158 *mpp = mb; 2159 2160 if (fin->fin_out == 0) { 2161 void *saveifp; 2162 u_32_t pass; 2163 2164 saveifp = fin->fin_ifp; 2165 fin->fin_ifp = (void *)inj->ni_physical; 2166 fin->fin_flx &= ~FI_STATE; 2167 fin->fin_out = 1; 2168 (void) fr_acctpkt(fin, &pass); 2169 fin->fin_fr = NULL; 2170 if (!fr || !(fr->fr_flags & FR_RETMASK)) 2171 (void) fr_checkstate(fin, &pass); 2172 if (fr_checknatout(fin, NULL) == -1) 2173 goto bad_fastroute; 2174 fin->fin_out = 0; 2175 fin->fin_ifp = saveifp; 2176 } 2177 #ifndef sparc 2178 if (fin->fin_v == 4) { 2179 __iplen = (u_short)ip->ip_len, 2180 __ipoff = (u_short)ip->ip_off; 2181 2182 ip->ip_len = htons(__iplen); 2183 ip->ip_off = htons(__ipoff); 2184 } 2185 #endif 2186 2187 if (net_data_p) { 2188 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) { 2189 net_inject_free(inj); 2190 return (-1); 2191 } 2192 } 2193 2194 ifs->ifs_fr_frouteok[0]++; 2195 net_inject_free(inj); 2196 return 0; 2197 bad_fastroute: 2198 net_inject_free(inj); 2199 freemsg(mb); 2200 ifs->ifs_fr_frouteok[1]++; 2201 return -1; 2202 } 2203 2204 2205 /* ------------------------------------------------------------------------ */ 2206 /* Function: ipf_hook4_out */ 2207 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2208 /* Parameters: event(I) - pointer to event */ 2209 /* info(I) - pointer to hook information for firewalling */ 2210 /* */ 2211 /* Calling ipf_hook. */ 2212 /* ------------------------------------------------------------------------ */ 2213 /*ARGSUSED*/ 2214 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg) 2215 { 2216 return ipf_hook(info, 1, 0, arg); 2217 } 2218 /*ARGSUSED*/ 2219 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg) 2220 { 2221 return ipf_hook6(info, 1, 0, arg); 2222 } 2223 2224 /* ------------------------------------------------------------------------ */ 2225 /* Function: ipf_hook4_in */ 2226 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2227 /* Parameters: event(I) - pointer to event */ 2228 /* info(I) - pointer to hook information for firewalling */ 2229 /* */ 2230 /* Calling ipf_hook. */ 2231 /* ------------------------------------------------------------------------ */ 2232 /*ARGSUSED*/ 2233 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg) 2234 { 2235 return ipf_hook(info, 0, 0, arg); 2236 } 2237 /*ARGSUSED*/ 2238 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg) 2239 { 2240 return ipf_hook6(info, 0, 0, arg); 2241 } 2242 2243 2244 /* ------------------------------------------------------------------------ */ 2245 /* Function: ipf_hook4_loop_out */ 2246 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2247 /* Parameters: event(I) - pointer to event */ 2248 /* info(I) - pointer to hook information for firewalling */ 2249 /* */ 2250 /* Calling ipf_hook. */ 2251 /* ------------------------------------------------------------------------ */ 2252 /*ARGSUSED*/ 2253 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2254 { 2255 return ipf_hook(info, 1, FI_NOCKSUM, arg); 2256 } 2257 /*ARGSUSED*/ 2258 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 2259 { 2260 return ipf_hook6(info, 1, FI_NOCKSUM, arg); 2261 } 2262 2263 /* Static constants used by ipf_hook_ether */ 2264 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = { 2265 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF 2266 }; 2267 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E }; 2268 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 }; 2269 2270 /* ------------------------------------------------------------------------ */ 2271 /* Function: ipf_hook_ether */ 2272 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2273 /* Parameters: token(I) - pointer to event */ 2274 /* info(I) - pointer to hook information for firewalling */ 2275 /* */ 2276 /* The ipf_hook_ether hook is currently private to illumos. It represents */ 2277 /* a layer 2 datapath generally used by virtual machines. Currently the */ 2278 /* hook is only used by the viona driver to pass along L2 frames for */ 2279 /* inspection. It requires that the L2 ethernet header is contained within */ 2280 /* a single dblk_t (however layers above the L2 header have no restrctions */ 2281 /* in ipf). ipf does not currently support filtering on L2 fields (e.g. */ 2282 /* filtering on a MAC address or ethertype), however virtual machines do */ 2283 /* not have native IP stack instances where ipf traditionally hooks in. */ 2284 /* Instead this entry point is used to determine if the packet is unicast, */ 2285 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the */ 2286 /* traditional ip hooks for filtering. Non IPv4 or non IPv6 packets are */ 2287 /* not subject to examination. */ 2288 /* ------------------------------------------------------------------------ */ 2289 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg, 2290 boolean_t out) 2291 { 2292 struct ether_header *ethp; 2293 hook_pkt_event_t *hpe = (hook_pkt_event_t *)info; 2294 mblk_t *mp; 2295 size_t offset, len; 2296 uint16_t etype; 2297 boolean_t v6; 2298 2299 /* 2300 * viona will only pass us mblks with the L2 header contained in a 2301 * single data block. 2302 */ 2303 mp = *hpe->hpe_mp; 2304 len = MBLKL(mp); 2305 2306 VERIFY3S(len, >=, sizeof (struct ether_header)); 2307 2308 ethp = (struct ether_header *)mp->b_rptr; 2309 if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) { 2310 struct ether_vlan_header *evh = 2311 (struct ether_vlan_header *)ethp; 2312 2313 VERIFY3S(len, >=, sizeof (struct ether_vlan_header)); 2314 2315 etype = ntohs(evh->ether_type); 2316 offset = sizeof (*evh); 2317 } else { 2318 offset = sizeof (*ethp); 2319 } 2320 2321 /* 2322 * ipf only support filtering IPv4 and IPv6. Ignore other types. 2323 */ 2324 if (etype == ETHERTYPE_IP) 2325 v6 = B_FALSE; 2326 else if (etype == ETHERTYPE_IPV6) 2327 v6 = B_TRUE; 2328 else 2329 return (0); 2330 2331 if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0) 2332 hpe->hpe_flags |= HPE_BROADCAST; 2333 else if (bcmp(ipf_eth_ipv4_mcast, ethp, 2334 sizeof (ipf_eth_ipv4_mcast)) == 0) 2335 hpe->hpe_flags |= HPE_MULTICAST; 2336 else if (bcmp(ipf_eth_ipv6_mcast, ethp, 2337 sizeof (ipf_eth_ipv6_mcast)) == 0) 2338 hpe->hpe_flags |= HPE_MULTICAST; 2339 2340 /* Find the start of the IPv4 or IPv6 header */ 2341 for (; offset >= len; len = MBLKL(mp)) { 2342 offset -= len; 2343 mp = mp->b_cont; 2344 if (mp == NULL) { 2345 freemsg(*hpe->hpe_mp); 2346 *hpe->hpe_mp = NULL; 2347 return (-1); 2348 } 2349 } 2350 hpe->hpe_mb = mp; 2351 hpe->hpe_hdr = mp->b_rptr + offset; 2352 2353 return (v6 ? ipf_hook6(info, out, 0, arg) : 2354 ipf_hook(info, out, 0, arg)); 2355 } 2356 2357 /* ------------------------------------------------------------------------ */ 2358 /* Function: ipf_hookviona_{in,out} */ 2359 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2360 /* Parameters: event(I) - pointer to event */ 2361 /* info(I) - pointer to hook information for firewalling */ 2362 /* */ 2363 /* The viona hooks are private hooks to illumos. They represents a layer 2 */ 2364 /* datapath generally used to implement virtual machines. */ 2365 /* along L2 packets. */ 2366 /* */ 2367 /* They end up calling the appropriate traditional ip hooks. */ 2368 /* ------------------------------------------------------------------------ */ 2369 int 2370 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg) 2371 { 2372 return (ipf_hook_ether(token, info, arg, B_FALSE)); 2373 } 2374 2375 int 2376 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg) 2377 { 2378 return (ipf_hook_ether(token, info, arg, B_TRUE)); 2379 } 2380 2381 /* ------------------------------------------------------------------------ */ 2382 /* Function: ipf_hook4_loop_in */ 2383 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2384 /* Parameters: event(I) - pointer to event */ 2385 /* info(I) - pointer to hook information for firewalling */ 2386 /* */ 2387 /* Calling ipf_hook. */ 2388 /* ------------------------------------------------------------------------ */ 2389 /*ARGSUSED*/ 2390 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2391 { 2392 return ipf_hook(info, 0, FI_NOCKSUM, arg); 2393 } 2394 /*ARGSUSED*/ 2395 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 2396 { 2397 return ipf_hook6(info, 0, FI_NOCKSUM, arg); 2398 } 2399 2400 /* ------------------------------------------------------------------------ */ 2401 /* Function: ipf_hook */ 2402 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 2403 /* Parameters: info(I) - pointer to hook information for firewalling */ 2404 /* out(I) - whether packet is going in or out */ 2405 /* loopback(I) - whether packet is a loopback packet or not */ 2406 /* */ 2407 /* Stepping stone function between the IP mainline and IPFilter. Extracts */ 2408 /* parameters out of the info structure and forms them up to be useful for */ 2409 /* calling ipfilter. */ 2410 /* ------------------------------------------------------------------------ */ 2411 int ipf_hook(hook_data_t info, int out, int loopback, void *arg) 2412 { 2413 hook_pkt_event_t *fw; 2414 ipf_stack_t *ifs; 2415 qpktinfo_t qpi; 2416 int rval, hlen; 2417 u_short swap; 2418 phy_if_t phy; 2419 ip_t *ip; 2420 2421 ifs = arg; 2422 fw = (hook_pkt_event_t *)info; 2423 2424 ASSERT(fw != NULL); 2425 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2426 2427 ip = fw->hpe_hdr; 2428 swap = ntohs(ip->ip_len); 2429 ip->ip_len = swap; 2430 swap = ntohs(ip->ip_off); 2431 ip->ip_off = swap; 2432 hlen = IPH_HDR_LENGTH(ip); 2433 2434 qpi.qpi_m = fw->hpe_mb; 2435 qpi.qpi_data = fw->hpe_hdr; 2436 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2437 qpi.qpi_ill = (void *)phy; 2438 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2439 if (qpi.qpi_flags) 2440 qpi.qpi_flags |= FI_MBCAST; 2441 qpi.qpi_flags |= loopback; 2442 2443 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2444 &qpi, fw->hpe_mp, ifs); 2445 2446 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2447 if (rval == 0 && *(fw->hpe_mp) == NULL) 2448 rval = 1; 2449 2450 /* Notify IP the packet mblk_t and IP header pointers. */ 2451 fw->hpe_mb = qpi.qpi_m; 2452 fw->hpe_hdr = qpi.qpi_data; 2453 if (rval == 0) { 2454 ip = qpi.qpi_data; 2455 swap = ntohs(ip->ip_len); 2456 ip->ip_len = swap; 2457 swap = ntohs(ip->ip_off); 2458 ip->ip_off = swap; 2459 } 2460 return rval; 2461 2462 } 2463 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg) 2464 { 2465 hook_pkt_event_t *fw; 2466 int rval, hlen; 2467 qpktinfo_t qpi; 2468 phy_if_t phy; 2469 2470 fw = (hook_pkt_event_t *)info; 2471 2472 ASSERT(fw != NULL); 2473 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 2474 2475 hlen = sizeof (ip6_t); 2476 2477 qpi.qpi_m = fw->hpe_mb; 2478 qpi.qpi_data = fw->hpe_hdr; 2479 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 2480 qpi.qpi_ill = (void *)phy; 2481 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 2482 if (qpi.qpi_flags) 2483 qpi.qpi_flags |= FI_MBCAST; 2484 qpi.qpi_flags |= loopback; 2485 2486 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 2487 &qpi, fw->hpe_mp, arg); 2488 2489 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 2490 if (rval == 0 && *(fw->hpe_mp) == NULL) 2491 rval = 1; 2492 2493 /* Notify IP the packet mblk_t and IP header pointers. */ 2494 fw->hpe_mb = qpi.qpi_m; 2495 fw->hpe_hdr = qpi.qpi_data; 2496 return rval; 2497 } 2498 2499 2500 /* ------------------------------------------------------------------------ */ 2501 /* Function: ipf_nic_event_v4 */ 2502 /* Returns: int - 0 == no problems encountered */ 2503 /* Parameters: event(I) - pointer to event */ 2504 /* info(I) - pointer to information about a NIC event */ 2505 /* */ 2506 /* Function to receive asynchronous NIC events from IP */ 2507 /* ------------------------------------------------------------------------ */ 2508 /*ARGSUSED*/ 2509 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg) 2510 { 2511 struct sockaddr_in *sin; 2512 hook_nic_event_t *hn; 2513 ipf_stack_t *ifs = arg; 2514 void *new_ifp = NULL; 2515 2516 if (ifs->ifs_fr_running <= 0) 2517 return (0); 2518 2519 hn = (hook_nic_event_t *)info; 2520 2521 switch (hn->hne_event) 2522 { 2523 case NE_PLUMB : 2524 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data, 2525 ifs); 2526 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2527 hn->hne_data, ifs); 2528 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2529 hn->hne_data, ifs); 2530 break; 2531 2532 case NE_UNPLUMB : 2533 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2534 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, 2535 ifs); 2536 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2537 break; 2538 2539 case NE_ADDRESS_CHANGE : 2540 /* 2541 * We only respond to events for logical interface 0 because 2542 * IPFilter only uses the first address given to a network 2543 * interface. We check for hne_lif==1 because the netinfo 2544 * code maps adds 1 to the lif number so that it can return 2545 * 0 to indicate "no more lifs" when walking them. 2546 */ 2547 if (hn->hne_lif == 1) { 2548 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL, 2549 ifs); 2550 sin = hn->hne_data; 2551 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr, 2552 ifs); 2553 } 2554 break; 2555 2556 #if SOLARIS2 >= 10 2557 case NE_IFINDEX_CHANGE : 2558 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2559 2560 if (hn->hne_data != NULL) { 2561 /* 2562 * The netinfo passes interface index as int (hne_data should be 2563 * handled as a pointer to int), which is always 32bit. We need to 2564 * convert it to void pointer here, since interfaces are 2565 * represented as pointers to void in IPF. The pointers are 64 bits 2566 * long on 64bit platforms. Doing something like 2567 * (void *)((int) x) 2568 * will throw warning: 2569 * "cast to pointer from integer of different size" 2570 * during 64bit compilation. 2571 * 2572 * The line below uses (size_t) to typecast int to 2573 * size_t, which might be 64bit/32bit (depending 2574 * on architecture). Once we have proper 64bit/32bit 2575 * type (size_t), we can safely convert it to void pointer. 2576 */ 2577 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2578 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2579 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2580 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2581 } 2582 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2583 break; 2584 #endif 2585 2586 default : 2587 break; 2588 } 2589 2590 return 0; 2591 } 2592 2593 2594 /* ------------------------------------------------------------------------ */ 2595 /* Function: ipf_nic_event_v6 */ 2596 /* Returns: int - 0 == no problems encountered */ 2597 /* Parameters: event(I) - pointer to event */ 2598 /* info(I) - pointer to information about a NIC event */ 2599 /* */ 2600 /* Function to receive asynchronous NIC events from IP */ 2601 /* ------------------------------------------------------------------------ */ 2602 /*ARGSUSED*/ 2603 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg) 2604 { 2605 struct sockaddr_in6 *sin6; 2606 hook_nic_event_t *hn; 2607 ipf_stack_t *ifs = arg; 2608 void *new_ifp = NULL; 2609 2610 if (ifs->ifs_fr_running <= 0) 2611 return (0); 2612 2613 hn = (hook_nic_event_t *)info; 2614 2615 switch (hn->hne_event) 2616 { 2617 case NE_PLUMB : 2618 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2619 hn->hne_data, ifs); 2620 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2621 hn->hne_data, ifs); 2622 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2623 hn->hne_data, ifs); 2624 break; 2625 2626 case NE_UNPLUMB : 2627 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2628 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, 2629 ifs); 2630 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2631 break; 2632 2633 case NE_ADDRESS_CHANGE : 2634 if (hn->hne_lif == 1) { 2635 sin6 = hn->hne_data; 2636 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr, 2637 ifs); 2638 } 2639 break; 2640 2641 #if SOLARIS2 >= 10 2642 case NE_IFINDEX_CHANGE : 2643 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2644 if (hn->hne_data != NULL) { 2645 /* 2646 * The netinfo passes interface index as int (hne_data should be 2647 * handled as a pointer to int), which is always 32bit. We need to 2648 * convert it to void pointer here, since interfaces are 2649 * represented as pointers to void in IPF. The pointers are 64 bits 2650 * long on 64bit platforms. Doing something like 2651 * (void *)((int) x) 2652 * will throw warning: 2653 * "cast to pointer from integer of different size" 2654 * during 64bit compilation. 2655 * 2656 * The line below uses (size_t) to typecast int to 2657 * size_t, which might be 64bit/32bit (depending 2658 * on architecture). Once we have proper 64bit/32bit 2659 * type (size_t), we can safely convert it to void pointer. 2660 */ 2661 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2662 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2663 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2664 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2665 } 2666 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2667 break; 2668 #endif 2669 2670 default : 2671 break; 2672 } 2673 2674 return 0; 2675 } 2676 2677 /* 2678 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6() 2679 * are needed in Solaris kernel only. We don't need them in 2680 * ipftest to pretend the ICMP/RST packet was sent as a response. 2681 */ 2682 #if defined(_KERNEL) && (SOLARIS2 >= 10) 2683 /* ------------------------------------------------------------------------ */ 2684 /* Function: fr_make_rst */ 2685 /* Returns: int - 0 on success, -1 on failure */ 2686 /* Parameters: fin(I) - pointer to packet information */ 2687 /* */ 2688 /* We must alter the original mblks passed to IPF from IP stack via */ 2689 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */ 2690 /* IPF can basicaly do only these things with mblk representing the packet: */ 2691 /* leave it as it is (pass the packet) */ 2692 /* */ 2693 /* discard it (block the packet) */ 2694 /* */ 2695 /* alter it (i.e. NAT) */ 2696 /* */ 2697 /* As you can see IPF can not simply discard the mblk and supply a new one */ 2698 /* instead to IP stack via FW_HOOKS. */ 2699 /* */ 2700 /* The return-rst action for packets coming via NIC is handled as follows: */ 2701 /* mblk with packet is discarded */ 2702 /* */ 2703 /* new mblk with RST response is constructed and injected to network */ 2704 /* */ 2705 /* IPF can't inject packets to loopback interface, this is just another */ 2706 /* limitation we have to deal with here. The only option to send RST */ 2707 /* response to offending TCP packet coming via loopback is to alter it. */ 2708 /* */ 2709 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */ 2710 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */ 2711 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */ 2712 /* ------------------------------------------------------------------------ */ 2713 int fr_make_rst(fin) 2714 fr_info_t *fin; 2715 { 2716 uint16_t tmp_port; 2717 int rv = -1; 2718 uint32_t old_ack; 2719 tcphdr_t *tcp = NULL; 2720 struct in_addr tmp_src; 2721 #ifdef USE_INET6 2722 struct in6_addr tmp_src6; 2723 #endif 2724 2725 ASSERT(fin->fin_p == IPPROTO_TCP); 2726 2727 /* 2728 * We do not need to adjust chksum, since it is not being checked by 2729 * Solaris IP stack for loopback clients. 2730 */ 2731 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) && 2732 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2733 2734 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2735 /* Swap IPv4 addresses. */ 2736 tmp_src = fin->fin_ip->ip_src; 2737 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2738 fin->fin_ip->ip_dst = tmp_src; 2739 2740 rv = 0; 2741 } 2742 else 2743 tcp = NULL; 2744 } 2745 #ifdef USE_INET6 2746 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) && 2747 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2748 /* 2749 * We are relying on fact the next header is TCP, which is true 2750 * for regular TCP packets coming in over loopback. 2751 */ 2752 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2753 /* Swap IPv6 addresses. */ 2754 tmp_src6 = fin->fin_ip6->ip6_src; 2755 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2756 fin->fin_ip6->ip6_dst = tmp_src6; 2757 2758 rv = 0; 2759 } 2760 else 2761 tcp = NULL; 2762 } 2763 #endif 2764 2765 if (tcp != NULL) { 2766 /* 2767 * Adjust TCP header: 2768 * swap ports, 2769 * set flags, 2770 * set correct ACK number 2771 */ 2772 tmp_port = tcp->th_sport; 2773 tcp->th_sport = tcp->th_dport; 2774 tcp->th_dport = tmp_port; 2775 old_ack = tcp->th_ack; 2776 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1); 2777 tcp->th_seq = old_ack; 2778 tcp->th_flags = TH_RST | TH_ACK; 2779 } 2780 2781 return (rv); 2782 } 2783 2784 /* ------------------------------------------------------------------------ */ 2785 /* Function: fr_make_icmp_v4 */ 2786 /* Returns: int - 0 on success, -1 on failure */ 2787 /* Parameters: fin(I) - pointer to packet information */ 2788 /* */ 2789 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2790 /* what is going to happen here and why. Once you read the comment there, */ 2791 /* continue here with next paragraph. */ 2792 /* */ 2793 /* To turn IPv4 packet into ICMPv4 response packet, these things must */ 2794 /* happen here: */ 2795 /* (1) Original mblk is copied (duplicated). */ 2796 /* */ 2797 /* (2) ICMP header is created. */ 2798 /* */ 2799 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */ 2800 /* data ready then. */ 2801 /* */ 2802 /* (4) Swap IP addresses in original mblk and adjust IP header data. */ 2803 /* */ 2804 /* (5) The mblk containing original packet is trimmed to contain IP */ 2805 /* header only and ICMP chksum is computed. */ 2806 /* */ 2807 /* (6) The ICMP header we have from (3) is linked to original mblk, */ 2808 /* which now contains new IP header. If original packet was spread */ 2809 /* over several mblks, only the first mblk is kept. */ 2810 /* ------------------------------------------------------------------------ */ 2811 static int fr_make_icmp_v4(fin) 2812 fr_info_t *fin; 2813 { 2814 struct in_addr tmp_src; 2815 tcphdr_t *tcp; 2816 struct icmp *icmp; 2817 mblk_t *mblk_icmp; 2818 mblk_t *mblk_ip; 2819 size_t icmp_pld_len; /* octets to append to ICMP header */ 2820 size_t orig_iphdr_len; /* length of IP header only */ 2821 uint32_t sum; 2822 uint16_t *buf; 2823 int len; 2824 2825 2826 if (fin->fin_v != 4) 2827 return (-1); 2828 2829 /* 2830 * If we are dealing with TCP, then packet must be SYN/FIN to be routed 2831 * by IP stack. If it is not SYN/FIN, then we must drop it silently. 2832 */ 2833 tcp = (tcphdr_t *) fin->fin_dp; 2834 2835 if ((fin->fin_p == IPPROTO_TCP) && 2836 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2837 return (-1); 2838 2839 /* 2840 * Step (1) 2841 * 2842 * Make copy of original mblk. 2843 * 2844 * We want to copy as much data as necessary, not less, not more. The 2845 * ICMPv4 payload length for unreachable messages is: 2846 * original IP header + 8 bytes of L4 (if there are any). 2847 * 2848 * We determine if there are at least 8 bytes of L4 data following IP 2849 * header first. 2850 */ 2851 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ? 2852 ICMPERR_ICMPHLEN : fin->fin_dlen; 2853 /* 2854 * Since we don't want to copy more data than necessary, we must trim 2855 * the original mblk here. The right way (STREAMish) would be to use 2856 * adjmsg() to trim it. However we would have to calculate the length 2857 * argument for adjmsg() from pointers we already have here. 2858 * 2859 * Since we have pointers and offsets, it's faster and easier for 2860 * us to just adjust pointers by hand instead of using adjmsg(). 2861 */ 2862 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp; 2863 fin->fin_m->b_wptr += icmp_pld_len; 2864 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip; 2865 2866 /* 2867 * Also we don't want to copy any L2 stuff, which might precede IP 2868 * header, so we have have to set b_rptr to point to the start of IP 2869 * header. 2870 */ 2871 fin->fin_m->b_rptr += fin->fin_ipoff; 2872 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2873 return (-1); 2874 fin->fin_m->b_rptr -= fin->fin_ipoff; 2875 2876 /* 2877 * Step (2) 2878 * 2879 * Create an ICMP header, which will be appened to original mblk later. 2880 * ICMP header is just another mblk. 2881 */ 2882 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI); 2883 if (mblk_icmp == NULL) { 2884 FREE_MB_T(mblk_ip); 2885 return (-1); 2886 } 2887 2888 MTYPE(mblk_icmp) = M_DATA; 2889 icmp = (struct icmp *) mblk_icmp->b_wptr; 2890 icmp->icmp_type = ICMP_UNREACH; 2891 icmp->icmp_code = fin->fin_icode & 0xFF; 2892 icmp->icmp_void = 0; 2893 icmp->icmp_cksum = 0; 2894 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN; 2895 2896 /* 2897 * Step (3) 2898 * 2899 * Complete ICMP packet - link ICMP header with L4 data from original 2900 * IP packet. 2901 */ 2902 linkb(mblk_icmp, mblk_ip); 2903 2904 /* 2905 * Step (4) 2906 * 2907 * Swap IP addresses and change IP header fields accordingly in 2908 * original IP packet. 2909 * 2910 * There is a rule option return-icmp as a dest for physical 2911 * interfaces. This option becomes useless for loopback, since IPF box 2912 * uses same address as a loopback destination. We ignore the option 2913 * here, the ICMP packet will always look like as it would have been 2914 * sent from the original destination host. 2915 */ 2916 tmp_src = fin->fin_ip->ip_src; 2917 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2918 fin->fin_ip->ip_dst = tmp_src; 2919 fin->fin_ip->ip_p = IPPROTO_ICMP; 2920 fin->fin_ip->ip_sum = 0; 2921 2922 /* 2923 * Step (5) 2924 * 2925 * We trim the orignal mblk to hold IP header only. 2926 */ 2927 fin->fin_m->b_wptr = fin->fin_dp; 2928 orig_iphdr_len = fin->fin_m->b_wptr - 2929 (fin->fin_m->b_rptr + fin->fin_ipoff); 2930 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN + 2931 orig_iphdr_len); 2932 2933 /* 2934 * ICMP chksum calculation. The data we are calculating chksum for are 2935 * spread over two mblks, therefore we have to use two for loops. 2936 * 2937 * First for loop computes chksum part for ICMP header. 2938 */ 2939 buf = (uint16_t *) icmp; 2940 len = ICMPERR_ICMPHLEN; 2941 for (sum = 0; len > 1; len -= 2) 2942 sum += *buf++; 2943 2944 /* 2945 * Here we add chksum part for ICMP payload. 2946 */ 2947 len = icmp_pld_len; 2948 buf = (uint16_t *) mblk_ip->b_rptr; 2949 for (; len > 1; len -= 2) 2950 sum += *buf++; 2951 2952 /* 2953 * Chksum is done. 2954 */ 2955 sum = (sum >> 16) + (sum & 0xffff); 2956 sum += (sum >> 16); 2957 icmp->icmp_cksum = ~sum; 2958 2959 /* 2960 * Step (6) 2961 * 2962 * Release all packet mblks, except the first one. 2963 */ 2964 if (fin->fin_m->b_cont != NULL) { 2965 FREE_MB_T(fin->fin_m->b_cont); 2966 } 2967 2968 /* 2969 * Append ICMP payload to first mblk, which already contains new IP 2970 * header. 2971 */ 2972 linkb(fin->fin_m, mblk_icmp); 2973 2974 return (0); 2975 } 2976 2977 #ifdef USE_INET6 2978 /* ------------------------------------------------------------------------ */ 2979 /* Function: fr_make_icmp_v6 */ 2980 /* Returns: int - 0 on success, -1 on failure */ 2981 /* Parameters: fin(I) - pointer to packet information */ 2982 /* */ 2983 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2984 /* what and why is going to happen here. Once you read the comment there, */ 2985 /* continue here with next paragraph. */ 2986 /* */ 2987 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */ 2988 /* The algorithm is fairly simple: */ 2989 /* 1) We need to get copy of complete mblk. */ 2990 /* */ 2991 /* 2) New ICMPv6 header is created. */ 2992 /* */ 2993 /* 3) The copy of original mblk with packet is linked to ICMPv6 */ 2994 /* header. */ 2995 /* */ 2996 /* 4) The checksum must be adjusted. */ 2997 /* */ 2998 /* 5) IP addresses in original mblk are swapped and IP header data */ 2999 /* are adjusted (protocol number). */ 3000 /* */ 3001 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */ 3002 /* linked with the ICMPv6 data we got from (3). */ 3003 /* ------------------------------------------------------------------------ */ 3004 static int fr_make_icmp_v6(fin) 3005 fr_info_t *fin; 3006 { 3007 struct icmp6_hdr *icmp6; 3008 tcphdr_t *tcp; 3009 struct in6_addr tmp_src6; 3010 size_t icmp_pld_len; 3011 mblk_t *mblk_ip, *mblk_icmp; 3012 3013 if (fin->fin_v != 6) 3014 return (-1); 3015 3016 /* 3017 * If we are dealing with TCP, then packet must SYN/FIN to be routed by 3018 * IP stack. If it is not SYN/FIN, then we must drop it silently. 3019 */ 3020 tcp = (tcphdr_t *) fin->fin_dp; 3021 3022 if ((fin->fin_p == IPPROTO_TCP) && 3023 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 3024 return (-1); 3025 3026 /* 3027 * Step (1) 3028 * 3029 * We need to copy complete packet in case of IPv6, no trimming is 3030 * needed (except the L2 headers). 3031 */ 3032 icmp_pld_len = M_LEN(fin->fin_m); 3033 fin->fin_m->b_rptr += fin->fin_ipoff; 3034 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 3035 return (-1); 3036 fin->fin_m->b_rptr -= fin->fin_ipoff; 3037 3038 /* 3039 * Step (2) 3040 * 3041 * Allocate and create ICMP header. 3042 */ 3043 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr), 3044 BPRI_HI); 3045 3046 if (mblk_icmp == NULL) 3047 return (-1); 3048 3049 MTYPE(mblk_icmp) = M_DATA; 3050 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr; 3051 icmp6->icmp6_type = ICMP6_DST_UNREACH; 3052 icmp6->icmp6_code = fin->fin_icode & 0xFF; 3053 icmp6->icmp6_data32[0] = 0; 3054 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr); 3055 3056 /* 3057 * Step (3) 3058 * 3059 * Link the copy of IP packet to ICMP header. 3060 */ 3061 linkb(mblk_icmp, mblk_ip); 3062 3063 /* 3064 * Step (4) 3065 * 3066 * Calculate chksum - this is much more easier task than in case of 3067 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length. 3068 * We are making compensation just for change of packet length. 3069 */ 3070 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr); 3071 3072 /* 3073 * Step (5) 3074 * 3075 * Swap IP addresses. 3076 */ 3077 tmp_src6 = fin->fin_ip6->ip6_src; 3078 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 3079 fin->fin_ip6->ip6_dst = tmp_src6; 3080 3081 /* 3082 * and adjust IP header data. 3083 */ 3084 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6; 3085 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr)); 3086 3087 /* 3088 * Step (6) 3089 * 3090 * We must release all linked mblks from original packet and keep only 3091 * the first mblk with IP header to link ICMP data. 3092 */ 3093 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t); 3094 3095 if (fin->fin_m->b_cont != NULL) { 3096 FREE_MB_T(fin->fin_m->b_cont); 3097 } 3098 3099 /* 3100 * Append ICMP payload to IP header. 3101 */ 3102 linkb(fin->fin_m, mblk_icmp); 3103 3104 return (0); 3105 } 3106 #endif /* USE_INET6 */ 3107 3108 /* ------------------------------------------------------------------------ */ 3109 /* Function: fr_make_icmp */ 3110 /* Returns: int - 0 on success, -1 on failure */ 3111 /* Parameters: fin(I) - pointer to packet information */ 3112 /* */ 3113 /* We must alter the original mblks passed to IPF from IP stack via */ 3114 /* FW_HOOKS. The reasons why we must alter packet are discussed within */ 3115 /* comment at fr_make_rst() function. */ 3116 /* */ 3117 /* The fr_make_icmp() function acts as a wrapper, which passes the code */ 3118 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */ 3119 /* protocol version. However there are some details, which are common to */ 3120 /* both IP versions. The details are going to be explained here. */ 3121 /* */ 3122 /* The packet looks as follows: */ 3123 /* xxx | IP hdr | IP payload ... | */ 3124 /* ^ ^ ^ ^ */ 3125 /* | | | | */ 3126 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */ 3127 /* | | | */ 3128 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */ 3129 /* | | */ 3130 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */ 3131 /* | of loopback) */ 3132 /* | */ 3133 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */ 3134 /* */ 3135 /* All relevant IP headers are pulled up into the first mblk. It happened */ 3136 /* well in advance before the matching rule was found (the rule, which took */ 3137 /* us here, to fr_make_icmp() function). */ 3138 /* */ 3139 /* Both functions will turn packet passed in fin->fin_m mblk into a new */ 3140 /* packet. New packet will be represented as chain of mblks. */ 3141 /* orig mblk |- b_cont ---. */ 3142 /* ^ `-> ICMP hdr |- b_cont--. */ 3143 /* | ^ `-> duped orig mblk */ 3144 /* | | ^ */ 3145 /* `- The original mblk | | */ 3146 /* will be trimmed to | | */ 3147 /* to contain IP header | | */ 3148 /* only | | */ 3149 /* | | */ 3150 /* `- This is newly | */ 3151 /* allocated mblk to | */ 3152 /* hold ICMPv6 data. | */ 3153 /* | */ 3154 /* | */ 3155 /* | */ 3156 /* This is the copy of original mblk, it will contain -' */ 3157 /* orignal IP packet in case of ICMPv6. In case of */ 3158 /* ICMPv4 it will contain up to 8 bytes of IP payload */ 3159 /* (TCP/UDP/L4) data from original packet. */ 3160 /* ------------------------------------------------------------------------ */ 3161 int fr_make_icmp(fin) 3162 fr_info_t *fin; 3163 { 3164 int rv; 3165 3166 if (fin->fin_v == 4) 3167 rv = fr_make_icmp_v4(fin); 3168 #ifdef USE_INET6 3169 else if (fin->fin_v == 6) 3170 rv = fr_make_icmp_v6(fin); 3171 #endif 3172 else 3173 rv = -1; 3174 3175 return (rv); 3176 } 3177 3178 /* ------------------------------------------------------------------------ */ 3179 /* Function: fr_buf_sum */ 3180 /* Returns: unsigned int - sum of buffer buf */ 3181 /* Parameters: buf - pointer to buf we want to sum up */ 3182 /* len - length of buffer buf */ 3183 /* */ 3184 /* Sums buffer buf. The result is used for chksum calculation. The buf */ 3185 /* argument must be aligned. */ 3186 /* ------------------------------------------------------------------------ */ 3187 static uint32_t fr_buf_sum(buf, len) 3188 const void *buf; 3189 unsigned int len; 3190 { 3191 uint32_t sum = 0; 3192 uint16_t *b = (uint16_t *)buf; 3193 3194 while (len > 1) { 3195 sum += *b++; 3196 len -= 2; 3197 } 3198 3199 if (len == 1) 3200 sum += htons((*(unsigned char *)b) << 8); 3201 3202 return (sum); 3203 } 3204 3205 /* ------------------------------------------------------------------------ */ 3206 /* Function: fr_calc_chksum */ 3207 /* Returns: void */ 3208 /* Parameters: fin - pointer to fr_info_t instance with packet data */ 3209 /* pkt - pointer to duplicated packet */ 3210 /* */ 3211 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */ 3212 /* versions. */ 3213 /* ------------------------------------------------------------------------ */ 3214 void fr_calc_chksum(fin, pkt) 3215 fr_info_t *fin; 3216 mb_t *pkt; 3217 { 3218 struct pseudo_hdr { 3219 union { 3220 struct in_addr in4; 3221 #ifdef USE_INET6 3222 struct in6_addr in6; 3223 #endif 3224 } src_addr; 3225 union { 3226 struct in_addr in4; 3227 #ifdef USE_INET6 3228 struct in6_addr in6; 3229 #endif 3230 } dst_addr; 3231 char zero; 3232 char proto; 3233 uint16_t len; 3234 } phdr; 3235 uint32_t sum, ip_sum; 3236 void *buf; 3237 uint16_t *l4_csum_p; 3238 tcphdr_t *tcp; 3239 udphdr_t *udp; 3240 icmphdr_t *icmp; 3241 #ifdef USE_INET6 3242 struct icmp6_hdr *icmp6; 3243 #endif 3244 ip_t *ip; 3245 unsigned int len; 3246 int pld_len; 3247 3248 /* 3249 * We need to pullup the packet to the single continuous buffer to avoid 3250 * potential misaligment of b_rptr member in mblk chain. 3251 */ 3252 if (pullupmsg(pkt, -1) == 0) { 3253 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum" 3254 " will not be computed by IPF"); 3255 return; 3256 } 3257 3258 /* 3259 * It is guaranteed IP header starts right at b_rptr, because we are 3260 * working with a copy of the original packet. 3261 * 3262 * Compute pseudo header chksum for TCP and UDP. 3263 */ 3264 if ((fin->fin_p == IPPROTO_UDP) || 3265 (fin->fin_p == IPPROTO_TCP)) { 3266 bzero(&phdr, sizeof (phdr)); 3267 #ifdef USE_INET6 3268 if (fin->fin_v == 6) { 3269 phdr.src_addr.in6 = fin->fin_srcip6; 3270 phdr.dst_addr.in6 = fin->fin_dstip6; 3271 } else { 3272 phdr.src_addr.in4 = fin->fin_src; 3273 phdr.dst_addr.in4 = fin->fin_dst; 3274 } 3275 #else 3276 phdr.src_addr.in4 = fin->fin_src; 3277 phdr.dst_addr.in4 = fin->fin_dst; 3278 #endif 3279 phdr.zero = (char) 0; 3280 phdr.proto = fin->fin_p; 3281 phdr.len = htons((uint16_t)fin->fin_dlen); 3282 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr)); 3283 } else { 3284 sum = 0; 3285 } 3286 3287 /* 3288 * Set pointer to the L4 chksum field in the packet, set buf pointer to 3289 * the L4 header start. 3290 */ 3291 switch (fin->fin_p) { 3292 case IPPROTO_UDP: 3293 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen); 3294 l4_csum_p = &udp->uh_sum; 3295 buf = udp; 3296 break; 3297 case IPPROTO_TCP: 3298 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen); 3299 l4_csum_p = &tcp->th_sum; 3300 buf = tcp; 3301 break; 3302 case IPPROTO_ICMP: 3303 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen); 3304 l4_csum_p = &icmp->icmp_cksum; 3305 buf = icmp; 3306 break; 3307 #ifdef USE_INET6 3308 case IPPROTO_ICMPV6: 3309 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen); 3310 l4_csum_p = &icmp6->icmp6_cksum; 3311 buf = icmp6; 3312 break; 3313 #endif 3314 default: 3315 l4_csum_p = NULL; 3316 } 3317 3318 /* 3319 * Compute L4 chksum if needed. 3320 */ 3321 if (l4_csum_p != NULL) { 3322 *l4_csum_p = (uint16_t)0; 3323 pld_len = fin->fin_dlen; 3324 len = pkt->b_wptr - (unsigned char *)buf; 3325 ASSERT(len == pld_len); 3326 /* 3327 * Add payload sum to pseudoheader sum. 3328 */ 3329 sum += fr_buf_sum(buf, len); 3330 while (sum >> 16) 3331 sum = (sum & 0xFFFF) + (sum >> 16); 3332 3333 *l4_csum_p = ~((uint16_t)sum); 3334 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p); 3335 } 3336 3337 /* 3338 * The IP header chksum is needed just for IPv4. 3339 */ 3340 if (fin->fin_v == 4) { 3341 /* 3342 * Compute IPv4 header chksum. 3343 */ 3344 ip = (ip_t *)pkt->b_rptr; 3345 ip->ip_sum = (uint16_t)0; 3346 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen); 3347 while (ip_sum >> 16) 3348 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16); 3349 3350 ip->ip_sum = ~((uint16_t)ip_sum); 3351 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum); 3352 } 3353 3354 return; 3355 } 3356 3357 #endif /* _KERNEL && SOLARIS2 >= 10 */ 3358